Coverage Report

Created: 2024-05-08 16:10

/src/libxml2/parser.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
 *            implemented on top of the SAX interfaces
4
 *
5
 * References:
6
 *   The XML specification:
7
 *     http://www.w3.org/TR/REC-xml
8
 *   Original 1.0 version:
9
 *     http://www.w3.org/TR/1998/REC-xml-19980210
10
 *   XML second edition working draft
11
 *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12
 *
13
 * Okay this is a big file, the parser core is around 7000 lines, then it
14
 * is followed by the progressive parser top routines, then the various
15
 * high level APIs to call the parser and a few miscellaneous functions.
16
 * A number of helper functions and deprecated ones have been moved to
17
 * parserInternals.c to reduce this file size.
18
 * As much as possible the functions are associated with their relative
19
 * production in the XML specification. A few productions defining the
20
 * different ranges of character are actually implanted either in
21
 * parserInternals.h or parserInternals.c
22
 * The DOM tree build is realized from the default SAX callbacks in
23
 * the module SAX.c.
24
 * The routines doing the validation checks are in valid.c and called either
25
 * from the SAX callbacks or as standalone functions using a preparsed
26
 * document.
27
 *
28
 * See Copyright for the status of this software.
29
 *
30
 * daniel@veillard.com
31
 */
32
33
/* To avoid EBCDIC trouble when parsing on zOS */
34
#if defined(__MVS__)
35
#pragma convert("ISO8859-1")
36
#endif
37
38
#define IN_LIBXML
39
#include "libxml.h"
40
41
#if defined(_WIN32)
42
#define XML_DIR_SEP '\\'
43
#else
44
#define XML_DIR_SEP '/'
45
#endif
46
47
#include <stdlib.h>
48
#include <limits.h>
49
#include <string.h>
50
#include <stdarg.h>
51
#include <stddef.h>
52
#include <ctype.h>
53
#include <stdlib.h>
54
#include <libxml/xmlmemory.h>
55
#include <libxml/threads.h>
56
#include <libxml/globals.h>
57
#include <libxml/tree.h>
58
#include <libxml/parser.h>
59
#include <libxml/parserInternals.h>
60
#include <libxml/HTMLparser.h>
61
#include <libxml/valid.h>
62
#include <libxml/entities.h>
63
#include <libxml/xmlerror.h>
64
#include <libxml/encoding.h>
65
#include <libxml/xmlIO.h>
66
#include <libxml/uri.h>
67
#ifdef LIBXML_CATALOG_ENABLED
68
#include <libxml/catalog.h>
69
#endif
70
#ifdef LIBXML_SCHEMAS_ENABLED
71
#include <libxml/xmlschemastypes.h>
72
#include <libxml/relaxng.h>
73
#endif
74
#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
75
#include <libxml/xpath.h>
76
#endif
77
78
#include "private/buf.h"
79
#include "private/dict.h"
80
#include "private/enc.h"
81
#include "private/entities.h"
82
#include "private/error.h"
83
#include "private/globals.h"
84
#include "private/html.h"
85
#include "private/io.h"
86
#include "private/memory.h"
87
#include "private/parser.h"
88
#include "private/threads.h"
89
#include "private/xpath.h"
90
91
struct _xmlStartTag {
92
    const xmlChar *prefix;
93
    const xmlChar *URI;
94
    int line;
95
    int nsNr;
96
};
97
98
static xmlParserCtxtPtr
99
xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
100
        const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
101
        xmlParserCtxtPtr pctx);
102
103
static void xmlHaltParser(xmlParserCtxtPtr ctxt);
104
105
static int
106
xmlParseElementStart(xmlParserCtxtPtr ctxt);
107
108
static void
109
xmlParseElementEnd(xmlParserCtxtPtr ctxt);
110
111
/************************************************************************
112
 *                  *
113
 *  Arbitrary limits set in the parser. See XML_PARSE_HUGE    *
114
 *                  *
115
 ************************************************************************/
116
117
360k
#define XML_MAX_HUGE_LENGTH 1000000000
118
119
#define XML_PARSER_BIG_ENTITY 1000
120
#define XML_PARSER_LOT_ENTITY 5000
121
122
/*
123
 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
124
 *    replacement over the size in byte of the input indicates that you have
125
 *    and exponential behaviour. A value of 10 correspond to at least 3 entity
126
 *    replacement per byte of input.
127
 */
128
454
#define XML_PARSER_NON_LINEAR 10
129
130
24.2M
#define XML_ENT_FIXED_COST 50
131
132
/**
133
 * xmlParserMaxDepth:
134
 *
135
 * arbitrary depth limit for the XML documents that we allow to
136
 * process. This is not a limitation of the parser but a safety
137
 * boundary feature. It can be disabled with the XML_PARSE_HUGE
138
 * parser option.
139
 */
140
unsigned int xmlParserMaxDepth = 256;
141
142
143
144
#define SAX2 1
145
154M
#define XML_PARSER_BIG_BUFFER_SIZE 300
146
4.31G
#define XML_PARSER_BUFFER_SIZE 100
147
142k
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
148
149
/**
150
 * XML_PARSER_CHUNK_SIZE
151
 *
152
 * When calling GROW that's the minimal amount of data
153
 * the parser expected to have received. It is not a hard
154
 * limit but an optimization when reading strings like Names
155
 * It is not strictly needed as long as inputs available characters
156
 * are followed by 0, which should be provided by the I/O level
157
 */
158
33.6M
#define XML_PARSER_CHUNK_SIZE 100
159
160
/*
161
 * List of XML prefixed PI allowed by W3C specs
162
 */
163
164
static const char* const xmlW3CPIs[] = {
165
    "xml-stylesheet",
166
    "xml-model",
167
    NULL
168
};
169
170
171
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
172
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
173
                                              const xmlChar **str);
174
175
static xmlParserErrors
176
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
177
                xmlSAXHandlerPtr sax,
178
          void *user_data, int depth, const xmlChar *URL,
179
          const xmlChar *ID, xmlNodePtr *list);
180
181
static int
182
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
183
                          const char *encoding);
184
#ifdef LIBXML_LEGACY_ENABLED
185
static void
186
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
187
                      xmlNodePtr lastNode);
188
#endif /* LIBXML_LEGACY_ENABLED */
189
190
static xmlParserErrors
191
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
192
          const xmlChar *string, void *user_data, xmlNodePtr *lst);
193
194
static int
195
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
196
197
/************************************************************************
198
 *                  *
199
 *    Some factorized error routines        *
200
 *                  *
201
 ************************************************************************/
202
203
/**
204
 * xmlErrAttributeDup:
205
 * @ctxt:  an XML parser context
206
 * @prefix:  the attribute prefix
207
 * @localname:  the attribute localname
208
 *
209
 * Handle a redefinition of attribute error
210
 */
211
static void
212
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
213
                   const xmlChar * localname)
214
1.99k
{
215
1.99k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
216
1.99k
        (ctxt->instate == XML_PARSER_EOF))
217
0
  return;
218
1.99k
    if (ctxt != NULL)
219
1.99k
  ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
220
221
1.99k
    if (prefix == NULL)
222
1.16k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
223
1.16k
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
224
1.16k
                        (const char *) localname, NULL, NULL, 0, 0,
225
1.16k
                        "Attribute %s redefined\n", localname);
226
830
    else
227
830
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
228
830
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
229
830
                        (const char *) prefix, (const char *) localname,
230
830
                        NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
231
830
                        localname);
232
1.99k
    if (ctxt != NULL) {
233
1.99k
  ctxt->wellFormed = 0;
234
1.99k
  if (ctxt->recovery == 0)
235
961
      ctxt->disableSAX = 1;
236
1.99k
    }
237
1.99k
}
238
239
/**
240
 * xmlFatalErr:
241
 * @ctxt:  an XML parser context
242
 * @error:  the error number
243
 * @extra:  extra information string
244
 *
245
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
246
 */
247
static void
248
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
249
403k
{
250
403k
    const char *errmsg;
251
252
403k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
253
403k
        (ctxt->instate == XML_PARSER_EOF))
254
6.74k
  return;
255
396k
    switch (error) {
256
3.71k
        case XML_ERR_INVALID_HEX_CHARREF:
257
3.71k
            errmsg = "CharRef: invalid hexadecimal value";
258
3.71k
            break;
259
8.21k
        case XML_ERR_INVALID_DEC_CHARREF:
260
8.21k
            errmsg = "CharRef: invalid decimal value";
261
8.21k
            break;
262
0
        case XML_ERR_INVALID_CHARREF:
263
0
            errmsg = "CharRef: invalid value";
264
0
            break;
265
154k
        case XML_ERR_INTERNAL_ERROR:
266
154k
            errmsg = "internal error";
267
154k
            break;
268
0
        case XML_ERR_PEREF_AT_EOF:
269
0
            errmsg = "PEReference at end of document";
270
0
            break;
271
0
        case XML_ERR_PEREF_IN_PROLOG:
272
0
            errmsg = "PEReference in prolog";
273
0
            break;
274
0
        case XML_ERR_PEREF_IN_EPILOG:
275
0
            errmsg = "PEReference in epilog";
276
0
            break;
277
0
        case XML_ERR_PEREF_NO_NAME:
278
0
            errmsg = "PEReference: no name";
279
0
            break;
280
475
        case XML_ERR_PEREF_SEMICOL_MISSING:
281
475
            errmsg = "PEReference: expecting ';'";
282
475
            break;
283
425
        case XML_ERR_ENTITY_LOOP:
284
425
            errmsg = "Detected an entity reference loop";
285
425
            break;
286
0
        case XML_ERR_ENTITY_NOT_STARTED:
287
0
            errmsg = "EntityValue: \" or ' expected";
288
0
            break;
289
61
        case XML_ERR_ENTITY_PE_INTERNAL:
290
61
            errmsg = "PEReferences forbidden in internal subset";
291
61
            break;
292
2.03k
        case XML_ERR_ENTITY_NOT_FINISHED:
293
2.03k
            errmsg = "EntityValue: \" or ' expected";
294
2.03k
            break;
295
7.64k
        case XML_ERR_ATTRIBUTE_NOT_STARTED:
296
7.64k
            errmsg = "AttValue: \" or ' expected";
297
7.64k
            break;
298
8.55k
        case XML_ERR_LT_IN_ATTRIBUTE:
299
8.55k
            errmsg = "Unescaped '<' not allowed in attributes values";
300
8.55k
            break;
301
1.58k
        case XML_ERR_LITERAL_NOT_STARTED:
302
1.58k
            errmsg = "SystemLiteral \" or ' expected";
303
1.58k
            break;
304
3.16k
        case XML_ERR_LITERAL_NOT_FINISHED:
305
3.16k
            errmsg = "Unfinished System or Public ID \" or ' expected";
306
3.16k
            break;
307
3.32k
        case XML_ERR_MISPLACED_CDATA_END:
308
3.32k
            errmsg = "Sequence ']]>' not allowed in content";
309
3.32k
            break;
310
1.41k
        case XML_ERR_URI_REQUIRED:
311
1.41k
            errmsg = "SYSTEM or PUBLIC, the URI is missing";
312
1.41k
            break;
313
172
        case XML_ERR_PUBID_REQUIRED:
314
172
            errmsg = "PUBLIC, the Public Identifier is missing";
315
172
            break;
316
6.69k
        case XML_ERR_HYPHEN_IN_COMMENT:
317
6.69k
            errmsg = "Comment must not contain '--' (double-hyphen)";
318
6.69k
            break;
319
1.99k
        case XML_ERR_PI_NOT_STARTED:
320
1.99k
            errmsg = "xmlParsePI : no target name";
321
1.99k
            break;
322
198
        case XML_ERR_RESERVED_XML_NAME:
323
198
            errmsg = "Invalid PI name";
324
198
            break;
325
109
        case XML_ERR_NOTATION_NOT_STARTED:
326
109
            errmsg = "NOTATION: Name expected here";
327
109
            break;
328
206
        case XML_ERR_NOTATION_NOT_FINISHED:
329
206
            errmsg = "'>' required to close NOTATION declaration";
330
206
            break;
331
2.40k
        case XML_ERR_VALUE_REQUIRED:
332
2.40k
            errmsg = "Entity value required";
333
2.40k
            break;
334
100
        case XML_ERR_URI_FRAGMENT:
335
100
            errmsg = "Fragment not allowed";
336
100
            break;
337
1.57k
        case XML_ERR_ATTLIST_NOT_STARTED:
338
1.57k
            errmsg = "'(' required to start ATTLIST enumeration";
339
1.57k
            break;
340
157
        case XML_ERR_NMTOKEN_REQUIRED:
341
157
            errmsg = "NmToken expected in ATTLIST enumeration";
342
157
            break;
343
354
        case XML_ERR_ATTLIST_NOT_FINISHED:
344
354
            errmsg = "')' required to finish ATTLIST enumeration";
345
354
            break;
346
401
        case XML_ERR_MIXED_NOT_STARTED:
347
401
            errmsg = "MixedContentDecl : '|' or ')*' expected";
348
401
            break;
349
0
        case XML_ERR_PCDATA_REQUIRED:
350
0
            errmsg = "MixedContentDecl : '#PCDATA' expected";
351
0
            break;
352
1.33k
        case XML_ERR_ELEMCONTENT_NOT_STARTED:
353
1.33k
            errmsg = "ContentDecl : Name or '(' expected";
354
1.33k
            break;
355
1.37k
        case XML_ERR_ELEMCONTENT_NOT_FINISHED:
356
1.37k
            errmsg = "ContentDecl : ',' '|' or ')' expected";
357
1.37k
            break;
358
0
        case XML_ERR_PEREF_IN_INT_SUBSET:
359
0
            errmsg =
360
0
                "PEReference: forbidden within markup decl in internal subset";
361
0
            break;
362
82.9k
        case XML_ERR_GT_REQUIRED:
363
82.9k
            errmsg = "expected '>'";
364
82.9k
            break;
365
60
        case XML_ERR_CONDSEC_INVALID:
366
60
            errmsg = "XML conditional section '[' expected";
367
60
            break;
368
2.38k
        case XML_ERR_EXT_SUBSET_NOT_FINISHED:
369
2.38k
            errmsg = "Content error in the external subset";
370
2.38k
            break;
371
450
        case XML_ERR_CONDSEC_INVALID_KEYWORD:
372
450
            errmsg =
373
450
                "conditional section INCLUDE or IGNORE keyword expected";
374
450
            break;
375
168
        case XML_ERR_CONDSEC_NOT_FINISHED:
376
168
            errmsg = "XML conditional section not closed";
377
168
            break;
378
130
        case XML_ERR_XMLDECL_NOT_STARTED:
379
130
            errmsg = "Text declaration '<?xml' required";
380
130
            break;
381
20.3k
        case XML_ERR_XMLDECL_NOT_FINISHED:
382
20.3k
            errmsg = "parsing XML declaration: '?>' expected";
383
20.3k
            break;
384
0
        case XML_ERR_EXT_ENTITY_STANDALONE:
385
0
            errmsg = "external parsed entities cannot be standalone";
386
0
            break;
387
7.66k
        case XML_ERR_ENTITYREF_SEMICOL_MISSING:
388
7.66k
            errmsg = "EntityRef: expecting ';'";
389
7.66k
            break;
390
5.63k
        case XML_ERR_DOCTYPE_NOT_FINISHED:
391
5.63k
            errmsg = "DOCTYPE improperly terminated";
392
5.63k
            break;
393
0
        case XML_ERR_LTSLASH_REQUIRED:
394
0
            errmsg = "EndTag: '</' not found";
395
0
            break;
396
989
        case XML_ERR_EQUAL_REQUIRED:
397
989
            errmsg = "expected '='";
398
989
            break;
399
5.33k
        case XML_ERR_STRING_NOT_CLOSED:
400
5.33k
            errmsg = "String not closed expecting \" or '";
401
5.33k
            break;
402
1.10k
        case XML_ERR_STRING_NOT_STARTED:
403
1.10k
            errmsg = "String not started expecting ' or \"";
404
1.10k
            break;
405
248
        case XML_ERR_ENCODING_NAME:
406
248
            errmsg = "Invalid XML encoding name";
407
248
            break;
408
348
        case XML_ERR_STANDALONE_VALUE:
409
348
            errmsg = "standalone accepts only 'yes' or 'no'";
410
348
            break;
411
7.22k
        case XML_ERR_DOCUMENT_EMPTY:
412
7.22k
            errmsg = "Document is empty";
413
7.22k
            break;
414
36.3k
        case XML_ERR_DOCUMENT_END:
415
36.3k
            errmsg = "Extra content at the end of the document";
416
36.3k
            break;
417
515
        case XML_ERR_NOT_WELL_BALANCED:
418
515
            errmsg = "chunk is not well balanced";
419
515
            break;
420
0
        case XML_ERR_EXTRA_CONTENT:
421
0
            errmsg = "extra content at the end of well balanced chunk";
422
0
            break;
423
11.5k
        case XML_ERR_VERSION_MISSING:
424
11.5k
            errmsg = "Malformed declaration expecting version";
425
11.5k
            break;
426
170
        case XML_ERR_NAME_TOO_LONG:
427
170
            errmsg = "Name too long";
428
170
            break;
429
#if 0
430
        case:
431
            errmsg = "";
432
            break;
433
#endif
434
138
        default:
435
138
            errmsg = "Unregistered error message";
436
396k
    }
437
396k
    if (ctxt != NULL)
438
396k
  ctxt->errNo = error;
439
396k
    if (info == NULL) {
440
241k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
441
241k
                        XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
442
241k
                        errmsg);
443
241k
    } else {
444
155k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
445
155k
                        XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
446
155k
                        errmsg, info);
447
155k
    }
448
396k
    if (ctxt != NULL) {
449
396k
  ctxt->wellFormed = 0;
450
396k
  if (ctxt->recovery == 0)
451
91.0k
      ctxt->disableSAX = 1;
452
396k
    }
453
396k
}
454
455
/**
456
 * xmlFatalErrMsg:
457
 * @ctxt:  an XML parser context
458
 * @error:  the error number
459
 * @msg:  the error message
460
 *
461
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
462
 */
463
static void LIBXML_ATTR_FORMAT(3,0)
464
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
465
               const char *msg)
466
914k
{
467
914k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
468
914k
        (ctxt->instate == XML_PARSER_EOF))
469
0
  return;
470
914k
    if (ctxt != NULL)
471
914k
  ctxt->errNo = error;
472
914k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
473
914k
                    XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
474
914k
    if (ctxt != NULL) {
475
914k
  ctxt->wellFormed = 0;
476
914k
  if (ctxt->recovery == 0)
477
643k
      ctxt->disableSAX = 1;
478
914k
    }
479
914k
}
480
481
/**
482
 * xmlWarningMsg:
483
 * @ctxt:  an XML parser context
484
 * @error:  the error number
485
 * @msg:  the error message
486
 * @str1:  extra data
487
 * @str2:  extra data
488
 *
489
 * Handle a warning.
490
 */
491
static void LIBXML_ATTR_FORMAT(3,0)
492
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
493
              const char *msg, const xmlChar *str1, const xmlChar *str2)
494
13.2k
{
495
13.2k
    xmlStructuredErrorFunc schannel = NULL;
496
497
13.2k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
498
13.2k
        (ctxt->instate == XML_PARSER_EOF))
499
0
  return;
500
13.2k
    if ((ctxt != NULL) && (ctxt->sax != NULL) &&
501
13.2k
        (ctxt->sax->initialized == XML_SAX2_MAGIC))
502
10.2k
        schannel = ctxt->sax->serror;
503
13.2k
    if (ctxt != NULL) {
504
13.2k
        __xmlRaiseError(schannel,
505
13.2k
                    (ctxt->sax) ? ctxt->sax->warning : NULL,
506
13.2k
                    ctxt->userData,
507
13.2k
                    ctxt, NULL, XML_FROM_PARSER, error,
508
13.2k
                    XML_ERR_WARNING, NULL, 0,
509
13.2k
        (const char *) str1, (const char *) str2, NULL, 0, 0,
510
13.2k
        msg, (const char *) str1, (const char *) str2);
511
13.2k
    } else {
512
0
        __xmlRaiseError(schannel, NULL, NULL,
513
0
                    ctxt, NULL, XML_FROM_PARSER, error,
514
0
                    XML_ERR_WARNING, NULL, 0,
515
0
        (const char *) str1, (const char *) str2, NULL, 0, 0,
516
0
        msg, (const char *) str1, (const char *) str2);
517
0
    }
518
13.2k
}
519
520
/**
521
 * xmlValidityError:
522
 * @ctxt:  an XML parser context
523
 * @error:  the error number
524
 * @msg:  the error message
525
 * @str1:  extra data
526
 *
527
 * Handle a validity error.
528
 */
529
static void LIBXML_ATTR_FORMAT(3,0)
530
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
531
              const char *msg, const xmlChar *str1, const xmlChar *str2)
532
888
{
533
888
    xmlStructuredErrorFunc schannel = NULL;
534
535
888
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
536
888
        (ctxt->instate == XML_PARSER_EOF))
537
0
  return;
538
888
    if (ctxt != NULL) {
539
888
  ctxt->errNo = error;
540
888
  if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
541
480
      schannel = ctxt->sax->serror;
542
888
    }
543
888
    if (ctxt != NULL) {
544
888
        __xmlRaiseError(schannel,
545
888
                    ctxt->vctxt.error, ctxt->vctxt.userData,
546
888
                    ctxt, NULL, XML_FROM_DTD, error,
547
888
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
548
888
        (const char *) str2, NULL, 0, 0,
549
888
        msg, (const char *) str1, (const char *) str2);
550
888
  ctxt->valid = 0;
551
888
    } else {
552
0
        __xmlRaiseError(schannel, NULL, NULL,
553
0
                    ctxt, NULL, XML_FROM_DTD, error,
554
0
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
555
0
        (const char *) str2, NULL, 0, 0,
556
0
        msg, (const char *) str1, (const char *) str2);
557
0
    }
558
888
}
559
560
/**
561
 * xmlFatalErrMsgInt:
562
 * @ctxt:  an XML parser context
563
 * @error:  the error number
564
 * @msg:  the error message
565
 * @val:  an integer value
566
 *
567
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
568
 */
569
static void LIBXML_ATTR_FORMAT(3,0)
570
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
571
                  const char *msg, int val)
572
95.4M
{
573
95.4M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
574
95.4M
        (ctxt->instate == XML_PARSER_EOF))
575
0
  return;
576
95.4M
    if (ctxt != NULL)
577
95.4M
  ctxt->errNo = error;
578
95.4M
    __xmlRaiseError(NULL, NULL, NULL,
579
95.4M
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
580
95.4M
                    NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
581
95.4M
    if (ctxt != NULL) {
582
95.4M
  ctxt->wellFormed = 0;
583
95.4M
  if (ctxt->recovery == 0)
584
368k
      ctxt->disableSAX = 1;
585
95.4M
    }
586
95.4M
}
587
588
/**
589
 * xmlFatalErrMsgStrIntStr:
590
 * @ctxt:  an XML parser context
591
 * @error:  the error number
592
 * @msg:  the error message
593
 * @str1:  an string info
594
 * @val:  an integer value
595
 * @str2:  an string info
596
 *
597
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
598
 */
599
static void LIBXML_ATTR_FORMAT(3,0)
600
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
601
                  const char *msg, const xmlChar *str1, int val,
602
      const xmlChar *str2)
603
180k
{
604
180k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
605
180k
        (ctxt->instate == XML_PARSER_EOF))
606
0
  return;
607
180k
    if (ctxt != NULL)
608
180k
  ctxt->errNo = error;
609
180k
    __xmlRaiseError(NULL, NULL, NULL,
610
180k
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
611
180k
                    NULL, 0, (const char *) str1, (const char *) str2,
612
180k
        NULL, val, 0, msg, str1, val, str2);
613
180k
    if (ctxt != NULL) {
614
180k
  ctxt->wellFormed = 0;
615
180k
  if (ctxt->recovery == 0)
616
24.0k
      ctxt->disableSAX = 1;
617
180k
    }
618
180k
}
619
620
/**
621
 * xmlFatalErrMsgStr:
622
 * @ctxt:  an XML parser context
623
 * @error:  the error number
624
 * @msg:  the error message
625
 * @val:  a string value
626
 *
627
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
628
 */
629
static void LIBXML_ATTR_FORMAT(3,0)
630
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
631
                  const char *msg, const xmlChar * val)
632
461k
{
633
461k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
634
461k
        (ctxt->instate == XML_PARSER_EOF))
635
0
  return;
636
461k
    if (ctxt != NULL)
637
461k
  ctxt->errNo = error;
638
461k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
639
461k
                    XML_FROM_PARSER, error, XML_ERR_FATAL,
640
461k
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
641
461k
                    val);
642
461k
    if (ctxt != NULL) {
643
461k
  ctxt->wellFormed = 0;
644
461k
  if (ctxt->recovery == 0)
645
188k
      ctxt->disableSAX = 1;
646
461k
    }
647
461k
}
648
649
/**
650
 * xmlErrMsgStr:
651
 * @ctxt:  an XML parser context
652
 * @error:  the error number
653
 * @msg:  the error message
654
 * @val:  a string value
655
 *
656
 * Handle a non fatal parser error
657
 */
658
static void LIBXML_ATTR_FORMAT(3,0)
659
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
660
                  const char *msg, const xmlChar * val)
661
12.8k
{
662
12.8k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
663
12.8k
        (ctxt->instate == XML_PARSER_EOF))
664
0
  return;
665
12.8k
    if (ctxt != NULL)
666
12.8k
  ctxt->errNo = error;
667
12.8k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
668
12.8k
                    XML_FROM_PARSER, error, XML_ERR_ERROR,
669
12.8k
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
670
12.8k
                    val);
671
12.8k
}
672
673
/**
674
 * xmlNsErr:
675
 * @ctxt:  an XML parser context
676
 * @error:  the error number
677
 * @msg:  the message
678
 * @info1:  extra information string
679
 * @info2:  extra information string
680
 *
681
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
682
 */
683
static void LIBXML_ATTR_FORMAT(3,0)
684
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
685
         const char *msg,
686
         const xmlChar * info1, const xmlChar * info2,
687
         const xmlChar * info3)
688
187k
{
689
187k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
690
187k
        (ctxt->instate == XML_PARSER_EOF))
691
0
  return;
692
187k
    if (ctxt != NULL)
693
187k
  ctxt->errNo = error;
694
187k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
695
187k
                    XML_ERR_ERROR, NULL, 0, (const char *) info1,
696
187k
                    (const char *) info2, (const char *) info3, 0, 0, msg,
697
187k
                    info1, info2, info3);
698
187k
    if (ctxt != NULL)
699
187k
  ctxt->nsWellFormed = 0;
700
187k
}
701
702
/**
703
 * xmlNsWarn
704
 * @ctxt:  an XML parser context
705
 * @error:  the error number
706
 * @msg:  the message
707
 * @info1:  extra information string
708
 * @info2:  extra information string
709
 *
710
 * Handle a namespace warning error
711
 */
712
static void LIBXML_ATTR_FORMAT(3,0)
713
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
714
         const char *msg,
715
         const xmlChar * info1, const xmlChar * info2,
716
         const xmlChar * info3)
717
1.93k
{
718
1.93k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
719
1.93k
        (ctxt->instate == XML_PARSER_EOF))
720
0
  return;
721
1.93k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
722
1.93k
                    XML_ERR_WARNING, NULL, 0, (const char *) info1,
723
1.93k
                    (const char *) info2, (const char *) info3, 0, 0, msg,
724
1.93k
                    info1, info2, info3);
725
1.93k
}
726
727
static void
728
87.5M
xmlSaturatedAdd(unsigned long *dst, unsigned long val) {
729
87.5M
    if (val > ULONG_MAX - *dst)
730
0
        *dst = ULONG_MAX;
731
87.5M
    else
732
87.5M
        *dst += val;
733
87.5M
}
734
735
static void
736
24.3M
xmlSaturatedAddSizeT(unsigned long *dst, unsigned long val) {
737
24.3M
    if (val > ULONG_MAX - *dst)
738
0
        *dst = ULONG_MAX;
739
24.3M
    else
740
24.3M
        *dst += val;
741
24.3M
}
742
743
/**
744
 * xmlParserEntityCheck:
745
 * @ctxt:  parser context
746
 * @extra:  sum of unexpanded entity sizes
747
 *
748
 * Check for non-linear entity expansion behaviour.
749
 *
750
 * In some cases like xmlStringDecodeEntities, this function is called
751
 * for each, possibly nested entity and its unexpanded content length.
752
 *
753
 * In other cases like xmlParseReference, it's only called for each
754
 * top-level entity with its unexpanded content length plus the sum of
755
 * the unexpanded content lengths (plus fixed cost) of all nested
756
 * entities.
757
 *
758
 * Summing the unexpanded lengths also adds the length of the reference.
759
 * This is by design. Taking the length of the entity name into account
760
 * discourages attacks that try to waste CPU time with abusively long
761
 * entity names. See test/recurse/lol6.xml for example. Each call also
762
 * adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with
763
 * short entities.
764
 *
765
 * Returns 1 on error, 0 on success.
766
 */
767
static int
768
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long extra)
769
24.2M
{
770
24.2M
    unsigned long consumed;
771
24.2M
    xmlParserInputPtr input = ctxt->input;
772
24.2M
    xmlEntityPtr entity = input->entity;
773
774
    /*
775
     * Compute total consumed bytes so far, including input streams of
776
     * external entities.
777
     */
778
24.2M
    consumed = input->parentConsumed;
779
24.2M
    if ((entity == NULL) ||
780
24.2M
        ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
781
14.6M
         ((entity->flags & XML_ENT_PARSED) == 0))) {
782
14.6M
        xmlSaturatedAdd(&consumed, input->consumed);
783
14.6M
        xmlSaturatedAddSizeT(&consumed, input->cur - input->base);
784
14.6M
    }
785
24.2M
    xmlSaturatedAdd(&consumed, ctxt->sizeentities);
786
787
    /*
788
     * Add extra cost and some fixed cost.
789
     */
790
24.2M
    xmlSaturatedAdd(&ctxt->sizeentcopy, extra);
791
24.2M
    xmlSaturatedAdd(&ctxt->sizeentcopy, XML_ENT_FIXED_COST);
792
793
    /*
794
     * It's important to always use saturation arithmetic when tracking
795
     * entity sizes to make the size checks reliable. If "sizeentcopy"
796
     * overflows, we have to abort.
797
     */
798
24.2M
    if ((ctxt->sizeentcopy > XML_MAX_TEXT_LENGTH) &&
799
24.2M
        ((ctxt->sizeentcopy >= ULONG_MAX) ||
800
454
         (ctxt->sizeentcopy / XML_PARSER_NON_LINEAR > consumed))) {
801
454
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP,
802
454
                       "Maximum entity amplification factor exceeded");
803
454
        xmlHaltParser(ctxt);
804
454
        return(1);
805
454
    }
806
807
24.2M
    return(0);
808
24.2M
}
809
810
/************************************************************************
811
 *                  *
812
 *    Library wide options          *
813
 *                  *
814
 ************************************************************************/
815
816
/**
817
  * xmlHasFeature:
818
  * @feature: the feature to be examined
819
  *
820
  * Examines if the library has been compiled with a given feature.
821
  *
822
  * Returns a non-zero value if the feature exist, otherwise zero.
823
  * Returns zero (0) if the feature does not exist or an unknown
824
  * unknown feature is requested, non-zero otherwise.
825
  */
826
int
827
xmlHasFeature(xmlFeature feature)
828
0
{
829
0
    switch (feature) {
830
0
  case XML_WITH_THREAD:
831
0
#ifdef LIBXML_THREAD_ENABLED
832
0
      return(1);
833
#else
834
      return(0);
835
#endif
836
0
        case XML_WITH_TREE:
837
0
#ifdef LIBXML_TREE_ENABLED
838
0
            return(1);
839
#else
840
            return(0);
841
#endif
842
0
        case XML_WITH_OUTPUT:
843
0
#ifdef LIBXML_OUTPUT_ENABLED
844
0
            return(1);
845
#else
846
            return(0);
847
#endif
848
0
        case XML_WITH_PUSH:
849
0
#ifdef LIBXML_PUSH_ENABLED
850
0
            return(1);
851
#else
852
            return(0);
853
#endif
854
0
        case XML_WITH_READER:
855
0
#ifdef LIBXML_READER_ENABLED
856
0
            return(1);
857
#else
858
            return(0);
859
#endif
860
0
        case XML_WITH_PATTERN:
861
0
#ifdef LIBXML_PATTERN_ENABLED
862
0
            return(1);
863
#else
864
            return(0);
865
#endif
866
0
        case XML_WITH_WRITER:
867
0
#ifdef LIBXML_WRITER_ENABLED
868
0
            return(1);
869
#else
870
            return(0);
871
#endif
872
0
        case XML_WITH_SAX1:
873
0
#ifdef LIBXML_SAX1_ENABLED
874
0
            return(1);
875
#else
876
            return(0);
877
#endif
878
0
        case XML_WITH_FTP:
879
#ifdef LIBXML_FTP_ENABLED
880
            return(1);
881
#else
882
0
            return(0);
883
0
#endif
884
0
        case XML_WITH_HTTP:
885
#ifdef LIBXML_HTTP_ENABLED
886
            return(1);
887
#else
888
0
            return(0);
889
0
#endif
890
0
        case XML_WITH_VALID:
891
0
#ifdef LIBXML_VALID_ENABLED
892
0
            return(1);
893
#else
894
            return(0);
895
#endif
896
0
        case XML_WITH_HTML:
897
0
#ifdef LIBXML_HTML_ENABLED
898
0
            return(1);
899
#else
900
            return(0);
901
#endif
902
0
        case XML_WITH_LEGACY:
903
#ifdef LIBXML_LEGACY_ENABLED
904
            return(1);
905
#else
906
0
            return(0);
907
0
#endif
908
0
        case XML_WITH_C14N:
909
0
#ifdef LIBXML_C14N_ENABLED
910
0
            return(1);
911
#else
912
            return(0);
913
#endif
914
0
        case XML_WITH_CATALOG:
915
0
#ifdef LIBXML_CATALOG_ENABLED
916
0
            return(1);
917
#else
918
            return(0);
919
#endif
920
0
        case XML_WITH_XPATH:
921
0
#ifdef LIBXML_XPATH_ENABLED
922
0
            return(1);
923
#else
924
            return(0);
925
#endif
926
0
        case XML_WITH_XPTR:
927
0
#ifdef LIBXML_XPTR_ENABLED
928
0
            return(1);
929
#else
930
            return(0);
931
#endif
932
0
        case XML_WITH_XINCLUDE:
933
0
#ifdef LIBXML_XINCLUDE_ENABLED
934
0
            return(1);
935
#else
936
            return(0);
937
#endif
938
0
        case XML_WITH_ICONV:
939
0
#ifdef LIBXML_ICONV_ENABLED
940
0
            return(1);
941
#else
942
            return(0);
943
#endif
944
0
        case XML_WITH_ISO8859X:
945
0
#ifdef LIBXML_ISO8859X_ENABLED
946
0
            return(1);
947
#else
948
            return(0);
949
#endif
950
0
        case XML_WITH_UNICODE:
951
0
#ifdef LIBXML_UNICODE_ENABLED
952
0
            return(1);
953
#else
954
            return(0);
955
#endif
956
0
        case XML_WITH_REGEXP:
957
0
#ifdef LIBXML_REGEXP_ENABLED
958
0
            return(1);
959
#else
960
            return(0);
961
#endif
962
0
        case XML_WITH_AUTOMATA:
963
0
#ifdef LIBXML_AUTOMATA_ENABLED
964
0
            return(1);
965
#else
966
            return(0);
967
#endif
968
0
        case XML_WITH_EXPR:
969
#ifdef LIBXML_EXPR_ENABLED
970
            return(1);
971
#else
972
0
            return(0);
973
0
#endif
974
0
        case XML_WITH_SCHEMAS:
975
0
#ifdef LIBXML_SCHEMAS_ENABLED
976
0
            return(1);
977
#else
978
            return(0);
979
#endif
980
0
        case XML_WITH_SCHEMATRON:
981
0
#ifdef LIBXML_SCHEMATRON_ENABLED
982
0
            return(1);
983
#else
984
            return(0);
985
#endif
986
0
        case XML_WITH_MODULES:
987
0
#ifdef LIBXML_MODULES_ENABLED
988
0
            return(1);
989
#else
990
            return(0);
991
#endif
992
0
        case XML_WITH_DEBUG:
993
#ifdef LIBXML_DEBUG_ENABLED
994
            return(1);
995
#else
996
0
            return(0);
997
0
#endif
998
0
        case XML_WITH_DEBUG_MEM:
999
#ifdef DEBUG_MEMORY_LOCATION
1000
            return(1);
1001
#else
1002
0
            return(0);
1003
0
#endif
1004
0
        case XML_WITH_DEBUG_RUN:
1005
0
            return(0);
1006
0
        case XML_WITH_ZLIB:
1007
0
#ifdef LIBXML_ZLIB_ENABLED
1008
0
            return(1);
1009
#else
1010
            return(0);
1011
#endif
1012
0
        case XML_WITH_LZMA:
1013
0
#ifdef LIBXML_LZMA_ENABLED
1014
0
            return(1);
1015
#else
1016
            return(0);
1017
#endif
1018
0
        case XML_WITH_ICU:
1019
#ifdef LIBXML_ICU_ENABLED
1020
            return(1);
1021
#else
1022
0
            return(0);
1023
0
#endif
1024
0
        default:
1025
0
      break;
1026
0
     }
1027
0
     return(0);
1028
0
}
1029
1030
/************************************************************************
1031
 *                  *
1032
 *    SAX2 defaulted attributes handling      *
1033
 *                  *
1034
 ************************************************************************/
1035
1036
/**
1037
 * xmlDetectSAX2:
1038
 * @ctxt:  an XML parser context
1039
 *
1040
 * Do the SAX2 detection and specific initialization
1041
 */
1042
static void
1043
496k
xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1044
496k
    xmlSAXHandlerPtr sax;
1045
1046
    /* Avoid unused variable warning if features are disabled. */
1047
496k
    (void) sax;
1048
1049
496k
    if (ctxt == NULL) return;
1050
496k
    sax = ctxt->sax;
1051
496k
#ifdef LIBXML_SAX1_ENABLED
1052
496k
    if ((sax) &&  (sax->initialized == XML_SAX2_MAGIC) &&
1053
496k
        ((sax->startElementNs != NULL) ||
1054
307k
         (sax->endElementNs != NULL) ||
1055
307k
         ((sax->startElement == NULL) && (sax->endElement == NULL))))
1056
307k
        ctxt->sax2 = 1;
1057
#else
1058
    ctxt->sax2 = 1;
1059
#endif /* LIBXML_SAX1_ENABLED */
1060
1061
496k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1062
496k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1063
496k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1064
496k
    if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1065
496k
    (ctxt->str_xml_ns == NULL)) {
1066
0
        xmlErrMemory(ctxt, NULL);
1067
0
    }
1068
496k
}
1069
1070
typedef struct _xmlDefAttrs xmlDefAttrs;
1071
typedef xmlDefAttrs *xmlDefAttrsPtr;
1072
struct _xmlDefAttrs {
1073
    int nbAttrs;  /* number of defaulted attributes on that element */
1074
    int maxAttrs;       /* the size of the array */
1075
#if __STDC_VERSION__ >= 199901L
1076
    /* Using a C99 flexible array member avoids UBSan errors. */
1077
    const xmlChar *values[]; /* array of localname/prefix/values/external */
1078
#else
1079
    const xmlChar *values[5];
1080
#endif
1081
};
1082
1083
/**
1084
 * xmlAttrNormalizeSpace:
1085
 * @src: the source string
1086
 * @dst: the target string
1087
 *
1088
 * Normalize the space in non CDATA attribute values:
1089
 * If the attribute type is not CDATA, then the XML processor MUST further
1090
 * process the normalized attribute value by discarding any leading and
1091
 * trailing space (#x20) characters, and by replacing sequences of space
1092
 * (#x20) characters by a single space (#x20) character.
1093
 * Note that the size of dst need to be at least src, and if one doesn't need
1094
 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1095
 * passing src as dst is just fine.
1096
 *
1097
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1098
 *         is needed.
1099
 */
1100
static xmlChar *
1101
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1102
20.7k
{
1103
20.7k
    if ((src == NULL) || (dst == NULL))
1104
0
        return(NULL);
1105
1106
31.4k
    while (*src == 0x20) src++;
1107
336k
    while (*src != 0) {
1108
316k
  if (*src == 0x20) {
1109
69.4k
      while (*src == 0x20) src++;
1110
17.0k
      if (*src != 0)
1111
13.1k
    *dst++ = 0x20;
1112
298k
  } else {
1113
298k
      *dst++ = *src++;
1114
298k
  }
1115
316k
    }
1116
20.7k
    *dst = 0;
1117
20.7k
    if (dst == src)
1118
15.4k
       return(NULL);
1119
5.32k
    return(dst);
1120
20.7k
}
1121
1122
/**
1123
 * xmlAttrNormalizeSpace2:
1124
 * @src: the source string
1125
 *
1126
 * Normalize the space in non CDATA attribute values, a slightly more complex
1127
 * front end to avoid allocation problems when running on attribute values
1128
 * coming from the input.
1129
 *
1130
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1131
 *         is needed.
1132
 */
1133
static const xmlChar *
1134
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1135
8.02k
{
1136
8.02k
    int i;
1137
8.02k
    int remove_head = 0;
1138
8.02k
    int need_realloc = 0;
1139
8.02k
    const xmlChar *cur;
1140
1141
8.02k
    if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1142
0
        return(NULL);
1143
8.02k
    i = *len;
1144
8.02k
    if (i <= 0)
1145
339
        return(NULL);
1146
1147
7.68k
    cur = src;
1148
9.41k
    while (*cur == 0x20) {
1149
1.72k
        cur++;
1150
1.72k
  remove_head++;
1151
1.72k
    }
1152
536k
    while (*cur != 0) {
1153
530k
  if (*cur == 0x20) {
1154
6.08k
      cur++;
1155
6.08k
      if ((*cur == 0x20) || (*cur == 0)) {
1156
1.55k
          need_realloc = 1;
1157
1.55k
    break;
1158
1.55k
      }
1159
6.08k
  } else
1160
524k
      cur++;
1161
530k
    }
1162
7.68k
    if (need_realloc) {
1163
1.55k
        xmlChar *ret;
1164
1165
1.55k
  ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1166
1.55k
  if (ret == NULL) {
1167
0
      xmlErrMemory(ctxt, NULL);
1168
0
      return(NULL);
1169
0
  }
1170
1.55k
  xmlAttrNormalizeSpace(ret, ret);
1171
1.55k
  *len = strlen((const char *)ret);
1172
1.55k
        return(ret);
1173
6.13k
    } else if (remove_head) {
1174
155
        *len -= remove_head;
1175
155
        memmove(src, src + remove_head, 1 + *len);
1176
155
  return(src);
1177
155
    }
1178
5.98k
    return(NULL);
1179
7.68k
}
1180
1181
/**
1182
 * xmlAddDefAttrs:
1183
 * @ctxt:  an XML parser context
1184
 * @fullname:  the element fullname
1185
 * @fullattr:  the attribute fullname
1186
 * @value:  the attribute value
1187
 *
1188
 * Add a defaulted attribute for an element
1189
 */
1190
static void
1191
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1192
               const xmlChar *fullname,
1193
               const xmlChar *fullattr,
1194
28.7k
               const xmlChar *value) {
1195
28.7k
    xmlDefAttrsPtr defaults;
1196
28.7k
    int len;
1197
28.7k
    const xmlChar *name;
1198
28.7k
    const xmlChar *prefix;
1199
1200
    /*
1201
     * Allows to detect attribute redefinitions
1202
     */
1203
28.7k
    if (ctxt->attsSpecial != NULL) {
1204
19.9k
        if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1205
197
      return;
1206
19.9k
    }
1207
1208
28.5k
    if (ctxt->attsDefault == NULL) {
1209
9.88k
        ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1210
9.88k
  if (ctxt->attsDefault == NULL)
1211
0
      goto mem_error;
1212
9.88k
    }
1213
1214
    /*
1215
     * split the element name into prefix:localname , the string found
1216
     * are within the DTD and then not associated to namespace names.
1217
     */
1218
28.5k
    name = xmlSplitQName3(fullname, &len);
1219
28.5k
    if (name == NULL) {
1220
26.5k
        name = xmlDictLookup(ctxt->dict, fullname, -1);
1221
26.5k
  prefix = NULL;
1222
26.5k
    } else {
1223
2.05k
        name = xmlDictLookup(ctxt->dict, name, -1);
1224
2.05k
  prefix = xmlDictLookup(ctxt->dict, fullname, len);
1225
2.05k
    }
1226
1227
    /*
1228
     * make sure there is some storage
1229
     */
1230
28.5k
    defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1231
28.5k
    if (defaults == NULL) {
1232
16.4k
        defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1233
16.4k
                     (4 * 5) * sizeof(const xmlChar *));
1234
16.4k
  if (defaults == NULL)
1235
0
      goto mem_error;
1236
16.4k
  defaults->nbAttrs = 0;
1237
16.4k
  defaults->maxAttrs = 4;
1238
16.4k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1239
16.4k
                          defaults, NULL) < 0) {
1240
0
      xmlFree(defaults);
1241
0
      goto mem_error;
1242
0
  }
1243
16.4k
    } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1244
383
        xmlDefAttrsPtr temp;
1245
1246
383
        temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1247
383
           (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1248
383
  if (temp == NULL)
1249
0
      goto mem_error;
1250
383
  defaults = temp;
1251
383
  defaults->maxAttrs *= 2;
1252
383
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1253
383
                          defaults, NULL) < 0) {
1254
0
      xmlFree(defaults);
1255
0
      goto mem_error;
1256
0
  }
1257
383
    }
1258
1259
    /*
1260
     * Split the element name into prefix:localname , the string found
1261
     * are within the DTD and hen not associated to namespace names.
1262
     */
1263
28.5k
    name = xmlSplitQName3(fullattr, &len);
1264
28.5k
    if (name == NULL) {
1265
23.4k
        name = xmlDictLookup(ctxt->dict, fullattr, -1);
1266
23.4k
  prefix = NULL;
1267
23.4k
    } else {
1268
5.09k
        name = xmlDictLookup(ctxt->dict, name, -1);
1269
5.09k
  prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1270
5.09k
    }
1271
1272
28.5k
    defaults->values[5 * defaults->nbAttrs] = name;
1273
28.5k
    defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1274
    /* intern the string and precompute the end */
1275
28.5k
    len = xmlStrlen(value);
1276
28.5k
    value = xmlDictLookup(ctxt->dict, value, len);
1277
28.5k
    if (value == NULL)
1278
0
        goto mem_error;
1279
28.5k
    defaults->values[5 * defaults->nbAttrs + 2] = value;
1280
28.5k
    defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1281
28.5k
    if (ctxt->external)
1282
8.36k
        defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1283
20.1k
    else
1284
20.1k
        defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1285
28.5k
    defaults->nbAttrs++;
1286
1287
28.5k
    return;
1288
1289
0
mem_error:
1290
0
    xmlErrMemory(ctxt, NULL);
1291
0
    return;
1292
28.5k
}
1293
1294
/**
1295
 * xmlAddSpecialAttr:
1296
 * @ctxt:  an XML parser context
1297
 * @fullname:  the element fullname
1298
 * @fullattr:  the attribute fullname
1299
 * @type:  the attribute type
1300
 *
1301
 * Register this attribute type
1302
 */
1303
static void
1304
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1305
      const xmlChar *fullname,
1306
      const xmlChar *fullattr,
1307
      int type)
1308
246k
{
1309
246k
    if (ctxt->attsSpecial == NULL) {
1310
16.4k
        ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1311
16.4k
  if (ctxt->attsSpecial == NULL)
1312
0
      goto mem_error;
1313
16.4k
    }
1314
1315
246k
    if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1316
381
        return;
1317
1318
245k
    xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1319
245k
                     (void *) (ptrdiff_t) type);
1320
245k
    return;
1321
1322
0
mem_error:
1323
0
    xmlErrMemory(ctxt, NULL);
1324
0
    return;
1325
246k
}
1326
1327
/**
1328
 * xmlCleanSpecialAttrCallback:
1329
 *
1330
 * Removes CDATA attributes from the special attribute table
1331
 */
1332
static void
1333
xmlCleanSpecialAttrCallback(void *payload, void *data,
1334
                            const xmlChar *fullname, const xmlChar *fullattr,
1335
201k
                            const xmlChar *unused ATTRIBUTE_UNUSED) {
1336
201k
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1337
1338
201k
    if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1339
82.9k
        xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1340
82.9k
    }
1341
201k
}
1342
1343
/**
1344
 * xmlCleanSpecialAttr:
1345
 * @ctxt:  an XML parser context
1346
 *
1347
 * Trim the list of attributes defined to remove all those of type
1348
 * CDATA as they are not special. This call should be done when finishing
1349
 * to parse the DTD and before starting to parse the document root.
1350
 */
1351
static void
1352
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1353
54.8k
{
1354
54.8k
    if (ctxt->attsSpecial == NULL)
1355
43.9k
        return;
1356
1357
10.9k
    xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1358
1359
10.9k
    if (xmlHashSize(ctxt->attsSpecial) == 0) {
1360
3.70k
        xmlHashFree(ctxt->attsSpecial, NULL);
1361
3.70k
        ctxt->attsSpecial = NULL;
1362
3.70k
    }
1363
10.9k
    return;
1364
54.8k
}
1365
1366
/**
1367
 * xmlCheckLanguageID:
1368
 * @lang:  pointer to the string value
1369
 *
1370
 * Checks that the value conforms to the LanguageID production:
1371
 *
1372
 * NOTE: this is somewhat deprecated, those productions were removed from
1373
 *       the XML Second edition.
1374
 *
1375
 * [33] LanguageID ::= Langcode ('-' Subcode)*
1376
 * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1377
 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1378
 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1379
 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1380
 * [38] Subcode ::= ([a-z] | [A-Z])+
1381
 *
1382
 * The current REC reference the successors of RFC 1766, currently 5646
1383
 *
1384
 * http://www.rfc-editor.org/rfc/rfc5646.txt
1385
 * langtag       = language
1386
 *                 ["-" script]
1387
 *                 ["-" region]
1388
 *                 *("-" variant)
1389
 *                 *("-" extension)
1390
 *                 ["-" privateuse]
1391
 * language      = 2*3ALPHA            ; shortest ISO 639 code
1392
 *                 ["-" extlang]       ; sometimes followed by
1393
 *                                     ; extended language subtags
1394
 *               / 4ALPHA              ; or reserved for future use
1395
 *               / 5*8ALPHA            ; or registered language subtag
1396
 *
1397
 * extlang       = 3ALPHA              ; selected ISO 639 codes
1398
 *                 *2("-" 3ALPHA)      ; permanently reserved
1399
 *
1400
 * script        = 4ALPHA              ; ISO 15924 code
1401
 *
1402
 * region        = 2ALPHA              ; ISO 3166-1 code
1403
 *               / 3DIGIT              ; UN M.49 code
1404
 *
1405
 * variant       = 5*8alphanum         ; registered variants
1406
 *               / (DIGIT 3alphanum)
1407
 *
1408
 * extension     = singleton 1*("-" (2*8alphanum))
1409
 *
1410
 *                                     ; Single alphanumerics
1411
 *                                     ; "x" reserved for private use
1412
 * singleton     = DIGIT               ; 0 - 9
1413
 *               / %x41-57             ; A - W
1414
 *               / %x59-5A             ; Y - Z
1415
 *               / %x61-77             ; a - w
1416
 *               / %x79-7A             ; y - z
1417
 *
1418
 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1419
 * The parser below doesn't try to cope with extension or privateuse
1420
 * that could be added but that's not interoperable anyway
1421
 *
1422
 * Returns 1 if correct 0 otherwise
1423
 **/
1424
int
1425
xmlCheckLanguageID(const xmlChar * lang)
1426
3.22k
{
1427
3.22k
    const xmlChar *cur = lang, *nxt;
1428
1429
3.22k
    if (cur == NULL)
1430
52
        return (0);
1431
3.17k
    if (((cur[0] == 'i') && (cur[1] == '-')) ||
1432
3.17k
        ((cur[0] == 'I') && (cur[1] == '-')) ||
1433
3.17k
        ((cur[0] == 'x') && (cur[1] == '-')) ||
1434
3.17k
        ((cur[0] == 'X') && (cur[1] == '-'))) {
1435
        /*
1436
         * Still allow IANA code and user code which were coming
1437
         * from the previous version of the XML-1.0 specification
1438
         * it's deprecated but we should not fail
1439
         */
1440
16
        cur += 2;
1441
16
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1442
16
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
1443
0
            cur++;
1444
16
        return(cur[0] == 0);
1445
16
    }
1446
3.16k
    nxt = cur;
1447
18.7k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1448
18.7k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1449
15.5k
           nxt++;
1450
3.16k
    if (nxt - cur >= 4) {
1451
        /*
1452
         * Reserved
1453
         */
1454
551
        if ((nxt - cur > 8) || (nxt[0] != 0))
1455
420
            return(0);
1456
131
        return(1);
1457
551
    }
1458
2.60k
    if (nxt - cur < 2)
1459
511
        return(0);
1460
    /* we got an ISO 639 code */
1461
2.09k
    if (nxt[0] == 0)
1462
846
        return(1);
1463
1.25k
    if (nxt[0] != '-')
1464
329
        return(0);
1465
1466
923
    nxt++;
1467
923
    cur = nxt;
1468
    /* now we can have extlang or script or region or variant */
1469
923
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1470
252
        goto region_m49;
1471
1472
4.23k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1473
4.23k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1474
3.56k
           nxt++;
1475
671
    if (nxt - cur == 4)
1476
136
        goto script;
1477
535
    if (nxt - cur == 2)
1478
60
        goto region;
1479
475
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1480
90
        goto variant;
1481
385
    if (nxt - cur != 3)
1482
176
        return(0);
1483
    /* we parsed an extlang */
1484
209
    if (nxt[0] == 0)
1485
30
        return(1);
1486
179
    if (nxt[0] != '-')
1487
47
        return(0);
1488
1489
132
    nxt++;
1490
132
    cur = nxt;
1491
    /* now we can have script or region or variant */
1492
132
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1493
85
        goto region_m49;
1494
1495
99
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1496
99
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1497
52
           nxt++;
1498
47
    if (nxt - cur == 2)
1499
5
        goto region;
1500
42
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1501
0
        goto variant;
1502
42
    if (nxt - cur != 4)
1503
42
        return(0);
1504
    /* we parsed a script */
1505
136
script:
1506
136
    if (nxt[0] == 0)
1507
23
        return(1);
1508
113
    if (nxt[0] != '-')
1509
41
        return(0);
1510
1511
72
    nxt++;
1512
72
    cur = nxt;
1513
    /* now we can have region or variant */
1514
72
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1515
28
        goto region_m49;
1516
1517
172
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1518
172
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1519
128
           nxt++;
1520
1521
44
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1522
7
        goto variant;
1523
37
    if (nxt - cur != 2)
1524
30
        return(0);
1525
    /* we parsed a region */
1526
275
region:
1527
275
    if (nxt[0] == 0)
1528
45
        return(1);
1529
230
    if (nxt[0] != '-')
1530
223
        return(0);
1531
1532
7
    nxt++;
1533
7
    cur = nxt;
1534
    /* now we can just have a variant */
1535
7
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1536
7
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1537
0
           nxt++;
1538
1539
7
    if ((nxt - cur < 5) || (nxt - cur > 8))
1540
7
        return(0);
1541
1542
    /* we parsed a variant */
1543
97
variant:
1544
97
    if (nxt[0] == 0)
1545
43
        return(1);
1546
54
    if (nxt[0] != '-')
1547
33
        return(0);
1548
    /* extensions and private use subtags not checked */
1549
21
    return (1);
1550
1551
365
region_m49:
1552
365
    if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1553
365
        ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1554
203
        nxt += 3;
1555
203
        goto region;
1556
203
    }
1557
162
    return(0);
1558
365
}
1559
1560
/************************************************************************
1561
 *                  *
1562
 *    Parser stacks related functions and macros    *
1563
 *                  *
1564
 ************************************************************************/
1565
1566
static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1567
                                            const xmlChar ** str);
1568
1569
#ifdef SAX2
1570
/**
1571
 * nsPush:
1572
 * @ctxt:  an XML parser context
1573
 * @prefix:  the namespace prefix or NULL
1574
 * @URL:  the namespace name
1575
 *
1576
 * Pushes a new parser namespace on top of the ns stack
1577
 *
1578
 * Returns -1 in case of error, -2 if the namespace should be discarded
1579
 *     and the index in the stack otherwise.
1580
 */
1581
static int
1582
nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1583
28.6k
{
1584
28.6k
    if (ctxt->options & XML_PARSE_NSCLEAN) {
1585
7.51k
        int i;
1586
19.2k
  for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1587
15.4k
      if (ctxt->nsTab[i] == prefix) {
1588
    /* in scope */
1589
3.64k
          if (ctxt->nsTab[i + 1] == URL)
1590
752
        return(-2);
1591
    /* out of scope keep it */
1592
2.89k
    break;
1593
3.64k
      }
1594
15.4k
  }
1595
7.51k
    }
1596
27.8k
    if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1597
16.6k
  ctxt->nsMax = 10;
1598
16.6k
  ctxt->nsNr = 0;
1599
16.6k
  ctxt->nsTab = (const xmlChar **)
1600
16.6k
                xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1601
16.6k
  if (ctxt->nsTab == NULL) {
1602
0
      xmlErrMemory(ctxt, NULL);
1603
0
      ctxt->nsMax = 0;
1604
0
            return (-1);
1605
0
  }
1606
16.6k
    } else if (ctxt->nsNr >= ctxt->nsMax) {
1607
297
        const xmlChar ** tmp;
1608
297
        ctxt->nsMax *= 2;
1609
297
        tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1610
297
            ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1611
297
        if (tmp == NULL) {
1612
0
            xmlErrMemory(ctxt, NULL);
1613
0
      ctxt->nsMax /= 2;
1614
0
            return (-1);
1615
0
        }
1616
297
  ctxt->nsTab = tmp;
1617
297
    }
1618
27.8k
    ctxt->nsTab[ctxt->nsNr++] = prefix;
1619
27.8k
    ctxt->nsTab[ctxt->nsNr++] = URL;
1620
27.8k
    return (ctxt->nsNr);
1621
27.8k
}
1622
/**
1623
 * nsPop:
1624
 * @ctxt: an XML parser context
1625
 * @nr:  the number to pop
1626
 *
1627
 * Pops the top @nr parser prefix/namespace from the ns stack
1628
 *
1629
 * Returns the number of namespaces removed
1630
 */
1631
static int
1632
nsPop(xmlParserCtxtPtr ctxt, int nr)
1633
10.2k
{
1634
10.2k
    int i;
1635
1636
10.2k
    if (ctxt->nsTab == NULL) return(0);
1637
10.2k
    if (ctxt->nsNr < nr) {
1638
0
        xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1639
0
        nr = ctxt->nsNr;
1640
0
    }
1641
10.2k
    if (ctxt->nsNr <= 0)
1642
0
        return (0);
1643
1644
35.7k
    for (i = 0;i < nr;i++) {
1645
25.4k
         ctxt->nsNr--;
1646
25.4k
   ctxt->nsTab[ctxt->nsNr] = NULL;
1647
25.4k
    }
1648
10.2k
    return(nr);
1649
10.2k
}
1650
#endif
1651
1652
static int
1653
31.8k
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1654
31.8k
    const xmlChar **atts;
1655
31.8k
    int *attallocs;
1656
31.8k
    int maxatts;
1657
1658
31.8k
    if (nr + 5 > ctxt->maxatts) {
1659
31.8k
  maxatts = ctxt->maxatts == 0 ? 55 : (nr + 5) * 2;
1660
31.8k
  atts = (const xmlChar **) xmlMalloc(
1661
31.8k
             maxatts * sizeof(const xmlChar *));
1662
31.8k
  if (atts == NULL) goto mem_error;
1663
31.8k
  attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1664
31.8k
                               (maxatts / 5) * sizeof(int));
1665
31.8k
  if (attallocs == NULL) {
1666
0
            xmlFree(atts);
1667
0
            goto mem_error;
1668
0
        }
1669
31.8k
        if (ctxt->maxatts > 0)
1670
6
            memcpy(atts, ctxt->atts, ctxt->maxatts * sizeof(const xmlChar *));
1671
31.8k
        xmlFree(ctxt->atts);
1672
31.8k
  ctxt->atts = atts;
1673
31.8k
  ctxt->attallocs = attallocs;
1674
31.8k
  ctxt->maxatts = maxatts;
1675
31.8k
    }
1676
31.8k
    return(ctxt->maxatts);
1677
0
mem_error:
1678
0
    xmlErrMemory(ctxt, NULL);
1679
0
    return(-1);
1680
31.8k
}
1681
1682
/**
1683
 * inputPush:
1684
 * @ctxt:  an XML parser context
1685
 * @value:  the parser input
1686
 *
1687
 * Pushes a new parser input on top of the input stack
1688
 *
1689
 * Returns -1 in case of error, the index in the stack otherwise
1690
 */
1691
int
1692
inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1693
9.83M
{
1694
9.83M
    if ((ctxt == NULL) || (value == NULL))
1695
0
        return(-1);
1696
9.83M
    if (ctxt->inputNr >= ctxt->inputMax) {
1697
180
        size_t newSize = ctxt->inputMax * 2;
1698
180
        xmlParserInputPtr *tmp;
1699
1700
180
        tmp = (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1701
180
                                               newSize * sizeof(*tmp));
1702
180
        if (tmp == NULL) {
1703
0
            xmlErrMemory(ctxt, NULL);
1704
0
            return (-1);
1705
0
        }
1706
180
        ctxt->inputTab = tmp;
1707
180
        ctxt->inputMax = newSize;
1708
180
    }
1709
9.83M
    ctxt->inputTab[ctxt->inputNr] = value;
1710
9.83M
    ctxt->input = value;
1711
9.83M
    return (ctxt->inputNr++);
1712
9.83M
}
1713
/**
1714
 * inputPop:
1715
 * @ctxt: an XML parser context
1716
 *
1717
 * Pops the top parser input from the input stack
1718
 *
1719
 * Returns the input just removed
1720
 */
1721
xmlParserInputPtr
1722
inputPop(xmlParserCtxtPtr ctxt)
1723
10.2M
{
1724
10.2M
    xmlParserInputPtr ret;
1725
1726
10.2M
    if (ctxt == NULL)
1727
0
        return(NULL);
1728
10.2M
    if (ctxt->inputNr <= 0)
1729
441k
        return (NULL);
1730
9.82M
    ctxt->inputNr--;
1731
9.82M
    if (ctxt->inputNr > 0)
1732
9.62M
        ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1733
205k
    else
1734
205k
        ctxt->input = NULL;
1735
9.82M
    ret = ctxt->inputTab[ctxt->inputNr];
1736
9.82M
    ctxt->inputTab[ctxt->inputNr] = NULL;
1737
9.82M
    return (ret);
1738
10.2M
}
1739
/**
1740
 * nodePush:
1741
 * @ctxt:  an XML parser context
1742
 * @value:  the element node
1743
 *
1744
 * Pushes a new element node on top of the node stack
1745
 *
1746
 * Returns -1 in case of error, the index in the stack otherwise
1747
 */
1748
int
1749
nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1750
2.59M
{
1751
2.59M
    if (ctxt == NULL) return(0);
1752
2.59M
    if (ctxt->nodeNr >= ctxt->nodeMax) {
1753
1.66k
        xmlNodePtr *tmp;
1754
1755
1.66k
  tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1756
1.66k
                                      ctxt->nodeMax * 2 *
1757
1.66k
                                      sizeof(ctxt->nodeTab[0]));
1758
1.66k
        if (tmp == NULL) {
1759
0
            xmlErrMemory(ctxt, NULL);
1760
0
            return (-1);
1761
0
        }
1762
1.66k
        ctxt->nodeTab = tmp;
1763
1.66k
  ctxt->nodeMax *= 2;
1764
1.66k
    }
1765
2.59M
    if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1766
2.59M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1767
0
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1768
0
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1769
0
        xmlParserMaxDepth);
1770
0
  xmlHaltParser(ctxt);
1771
0
  return(-1);
1772
0
    }
1773
2.59M
    ctxt->nodeTab[ctxt->nodeNr] = value;
1774
2.59M
    ctxt->node = value;
1775
2.59M
    return (ctxt->nodeNr++);
1776
2.59M
}
1777
1778
/**
1779
 * nodePop:
1780
 * @ctxt: an XML parser context
1781
 *
1782
 * Pops the top element node from the node stack
1783
 *
1784
 * Returns the node just removed
1785
 */
1786
xmlNodePtr
1787
nodePop(xmlParserCtxtPtr ctxt)
1788
2.50M
{
1789
2.50M
    xmlNodePtr ret;
1790
1791
2.50M
    if (ctxt == NULL) return(NULL);
1792
2.50M
    if (ctxt->nodeNr <= 0)
1793
38.9k
        return (NULL);
1794
2.46M
    ctxt->nodeNr--;
1795
2.46M
    if (ctxt->nodeNr > 0)
1796
2.27M
        ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1797
189k
    else
1798
189k
        ctxt->node = NULL;
1799
2.46M
    ret = ctxt->nodeTab[ctxt->nodeNr];
1800
2.46M
    ctxt->nodeTab[ctxt->nodeNr] = NULL;
1801
2.46M
    return (ret);
1802
2.50M
}
1803
1804
/**
1805
 * nameNsPush:
1806
 * @ctxt:  an XML parser context
1807
 * @value:  the element name
1808
 * @prefix:  the element prefix
1809
 * @URI:  the element namespace name
1810
 * @line:  the current line number for error messages
1811
 * @nsNr:  the number of namespaces pushed on the namespace table
1812
 *
1813
 * Pushes a new element name/prefix/URL on top of the name stack
1814
 *
1815
 * Returns -1 in case of error, the index in the stack otherwise
1816
 */
1817
static int
1818
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1819
           const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
1820
1.69M
{
1821
1.69M
    xmlStartTag *tag;
1822
1823
1.69M
    if (ctxt->nameNr >= ctxt->nameMax) {
1824
4.86k
        const xmlChar * *tmp;
1825
4.86k
        xmlStartTag *tmp2;
1826
4.86k
        ctxt->nameMax *= 2;
1827
4.86k
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1828
4.86k
                                    ctxt->nameMax *
1829
4.86k
                                    sizeof(ctxt->nameTab[0]));
1830
4.86k
        if (tmp == NULL) {
1831
0
      ctxt->nameMax /= 2;
1832
0
      goto mem_error;
1833
0
        }
1834
4.86k
  ctxt->nameTab = tmp;
1835
4.86k
        tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
1836
4.86k
                                    ctxt->nameMax *
1837
4.86k
                                    sizeof(ctxt->pushTab[0]));
1838
4.86k
        if (tmp2 == NULL) {
1839
0
      ctxt->nameMax /= 2;
1840
0
      goto mem_error;
1841
0
        }
1842
4.86k
  ctxt->pushTab = tmp2;
1843
1.68M
    } else if (ctxt->pushTab == NULL) {
1844
112k
        ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
1845
112k
                                            sizeof(ctxt->pushTab[0]));
1846
112k
        if (ctxt->pushTab == NULL)
1847
0
            goto mem_error;
1848
112k
    }
1849
1.69M
    ctxt->nameTab[ctxt->nameNr] = value;
1850
1.69M
    ctxt->name = value;
1851
1.69M
    tag = &ctxt->pushTab[ctxt->nameNr];
1852
1.69M
    tag->prefix = prefix;
1853
1.69M
    tag->URI = URI;
1854
1.69M
    tag->line = line;
1855
1.69M
    tag->nsNr = nsNr;
1856
1.69M
    return (ctxt->nameNr++);
1857
0
mem_error:
1858
0
    xmlErrMemory(ctxt, NULL);
1859
0
    return (-1);
1860
1.69M
}
1861
#ifdef LIBXML_PUSH_ENABLED
1862
/**
1863
 * nameNsPop:
1864
 * @ctxt: an XML parser context
1865
 *
1866
 * Pops the top element/prefix/URI name from the name stack
1867
 *
1868
 * Returns the name just removed
1869
 */
1870
static const xmlChar *
1871
nameNsPop(xmlParserCtxtPtr ctxt)
1872
435k
{
1873
435k
    const xmlChar *ret;
1874
1875
435k
    if (ctxt->nameNr <= 0)
1876
0
        return (NULL);
1877
435k
    ctxt->nameNr--;
1878
435k
    if (ctxt->nameNr > 0)
1879
425k
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1880
10.5k
    else
1881
10.5k
        ctxt->name = NULL;
1882
435k
    ret = ctxt->nameTab[ctxt->nameNr];
1883
435k
    ctxt->nameTab[ctxt->nameNr] = NULL;
1884
435k
    return (ret);
1885
435k
}
1886
#endif /* LIBXML_PUSH_ENABLED */
1887
1888
/**
1889
 * namePush:
1890
 * @ctxt:  an XML parser context
1891
 * @value:  the element name
1892
 *
1893
 * Pushes a new element name on top of the name stack
1894
 *
1895
 * Returns -1 in case of error, the index in the stack otherwise
1896
 */
1897
int
1898
namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1899
0
{
1900
0
    if (ctxt == NULL) return (-1);
1901
1902
0
    if (ctxt->nameNr >= ctxt->nameMax) {
1903
0
        const xmlChar * *tmp;
1904
0
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1905
0
                                    ctxt->nameMax * 2 *
1906
0
                                    sizeof(ctxt->nameTab[0]));
1907
0
        if (tmp == NULL) {
1908
0
      goto mem_error;
1909
0
        }
1910
0
  ctxt->nameTab = tmp;
1911
0
        ctxt->nameMax *= 2;
1912
0
    }
1913
0
    ctxt->nameTab[ctxt->nameNr] = value;
1914
0
    ctxt->name = value;
1915
0
    return (ctxt->nameNr++);
1916
0
mem_error:
1917
0
    xmlErrMemory(ctxt, NULL);
1918
0
    return (-1);
1919
0
}
1920
/**
1921
 * namePop:
1922
 * @ctxt: an XML parser context
1923
 *
1924
 * Pops the top element name from the name stack
1925
 *
1926
 * Returns the name just removed
1927
 */
1928
const xmlChar *
1929
namePop(xmlParserCtxtPtr ctxt)
1930
984k
{
1931
984k
    const xmlChar *ret;
1932
1933
984k
    if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1934
0
        return (NULL);
1935
984k
    ctxt->nameNr--;
1936
984k
    if (ctxt->nameNr > 0)
1937
949k
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1938
35.6k
    else
1939
35.6k
        ctxt->name = NULL;
1940
984k
    ret = ctxt->nameTab[ctxt->nameNr];
1941
984k
    ctxt->nameTab[ctxt->nameNr] = NULL;
1942
984k
    return (ret);
1943
984k
}
1944
1945
2.93M
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1946
2.93M
    if (ctxt->spaceNr >= ctxt->spaceMax) {
1947
1.97k
        int *tmp;
1948
1949
1.97k
  ctxt->spaceMax *= 2;
1950
1.97k
        tmp = (int *) xmlRealloc(ctxt->spaceTab,
1951
1.97k
                           ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1952
1.97k
        if (tmp == NULL) {
1953
0
      xmlErrMemory(ctxt, NULL);
1954
0
      ctxt->spaceMax /=2;
1955
0
      return(-1);
1956
0
  }
1957
1.97k
  ctxt->spaceTab = tmp;
1958
1.97k
    }
1959
2.93M
    ctxt->spaceTab[ctxt->spaceNr] = val;
1960
2.93M
    ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1961
2.93M
    return(ctxt->spaceNr++);
1962
2.93M
}
1963
1964
2.84M
static int spacePop(xmlParserCtxtPtr ctxt) {
1965
2.84M
    int ret;
1966
2.84M
    if (ctxt->spaceNr <= 0) return(0);
1967
2.82M
    ctxt->spaceNr--;
1968
2.82M
    if (ctxt->spaceNr > 0)
1969
2.77M
  ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1970
50.9k
    else
1971
50.9k
        ctxt->space = &ctxt->spaceTab[0];
1972
2.82M
    ret = ctxt->spaceTab[ctxt->spaceNr];
1973
2.82M
    ctxt->spaceTab[ctxt->spaceNr] = -1;
1974
2.82M
    return(ret);
1975
2.84M
}
1976
1977
/*
1978
 * Macros for accessing the content. Those should be used only by the parser,
1979
 * and not exported.
1980
 *
1981
 * Dirty macros, i.e. one often need to make assumption on the context to
1982
 * use them
1983
 *
1984
 *   CUR_PTR return the current pointer to the xmlChar to be parsed.
1985
 *           To be used with extreme caution since operations consuming
1986
 *           characters may move the input buffer to a different location !
1987
 *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
1988
 *           This should be used internally by the parser
1989
 *           only to compare to ASCII values otherwise it would break when
1990
 *           running with UTF-8 encoding.
1991
 *   RAW     same as CUR but in the input buffer, bypass any token
1992
 *           extraction that may have been done
1993
 *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
1994
 *           to compare on ASCII based substring.
1995
 *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1996
 *           strings without newlines within the parser.
1997
 *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1998
 *           defined char within the parser.
1999
 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2000
 *
2001
 *   NEXT    Skip to the next character, this does the proper decoding
2002
 *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2003
 *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2004
 *   CUR_CHAR(l) returns the current unicode character (int), set l
2005
 *           to the number of xmlChars used for the encoding [0-5].
2006
 *   CUR_SCHAR  same but operate on a string instead of the context
2007
 *   COPY_BUF  copy the current unicode char to the target buffer, increment
2008
 *            the index
2009
 *   GROW, SHRINK  handling of input buffers
2010
 */
2011
2012
102M
#define RAW (*ctxt->input->cur)
2013
179M
#define CUR (*ctxt->input->cur)
2014
84.1M
#define NXT(val) ctxt->input->cur[(val)]
2015
8.04M
#define CUR_PTR ctxt->input->cur
2016
285k
#define BASE_PTR ctxt->input->base
2017
2018
#define CMP4( s, c1, c2, c3, c4 ) \
2019
11.8M
  ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2020
5.96M
    ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2021
#define CMP5( s, c1, c2, c3, c4, c5 ) \
2022
11.0M
  ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2023
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2024
9.78M
  ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2025
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2026
8.63M
  ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2027
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2028
7.59M
  ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2029
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2030
3.58M
  ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2031
3.58M
    ((unsigned char *) s)[ 8 ] == c9 )
2032
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2033
41.3k
  ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2034
41.3k
    ((unsigned char *) s)[ 9 ] == c10 )
2035
2036
33.5M
#define SKIP(val) do {             \
2037
33.5M
    ctxt->input->cur += (val),ctxt->input->col+=(val);      \
2038
33.5M
    if (*ctxt->input->cur == 0)           \
2039
33.5M
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);     \
2040
33.5M
  } while (0)
2041
2042
9.91k
#define SKIPL(val) do {             \
2043
9.91k
    int skipl;                \
2044
2.34M
    for(skipl=0; skipl<val; skipl++) {         \
2045
2.33M
  if (*(ctxt->input->cur) == '\n') {       \
2046
20.5k
  ctxt->input->line++; ctxt->input->col = 1;      \
2047
2.31M
  } else ctxt->input->col++;         \
2048
2.33M
  ctxt->input->cur++;           \
2049
2.33M
    }                  \
2050
9.91k
    if (*ctxt->input->cur == 0)           \
2051
9.91k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);     \
2052
9.91k
  } while (0)
2053
2054
317M
#define SHRINK if ((ctxt->progressive == 0) &&       \
2055
317M
       (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2056
317M
       (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2057
317M
  xmlSHRINK (ctxt);
2058
2059
156k
static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2060
    /* Don't shrink memory buffers. */
2061
156k
    if ((ctxt->input->buf) &&
2062
156k
        ((ctxt->input->buf->encoder) || (ctxt->input->buf->readcallback)))
2063
1.14k
        xmlParserInputShrink(ctxt->input);
2064
156k
    if (*ctxt->input->cur == 0)
2065
4.79k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2066
156k
}
2067
2068
406M
#define GROW if ((ctxt->progressive == 0) &&       \
2069
406M
     (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2070
406M
  xmlGROW (ctxt);
2071
2072
19.1M
static void xmlGROW (xmlParserCtxtPtr ctxt) {
2073
19.1M
    ptrdiff_t curEnd = ctxt->input->end - ctxt->input->cur;
2074
19.1M
    ptrdiff_t curBase = ctxt->input->cur - ctxt->input->base;
2075
2076
19.1M
    if (((curEnd > XML_MAX_LOOKUP_LIMIT) ||
2077
19.1M
         (curBase > XML_MAX_LOOKUP_LIMIT)) &&
2078
19.1M
         ((ctxt->input->buf) &&
2079
0
          (ctxt->input->buf->readcallback != NULL)) &&
2080
19.1M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2081
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2082
0
        xmlHaltParser(ctxt);
2083
0
  return;
2084
0
    }
2085
19.1M
    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2086
19.1M
    if ((ctxt->input->cur > ctxt->input->end) ||
2087
19.1M
        (ctxt->input->cur < ctxt->input->base)) {
2088
0
        xmlHaltParser(ctxt);
2089
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2090
0
  return;
2091
0
    }
2092
19.1M
    if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0))
2093
149k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2094
19.1M
}
2095
2096
27.7M
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2097
2098
137M
#define NEXT xmlNextChar(ctxt)
2099
2100
3.87M
#define NEXT1 {               \
2101
3.87M
  ctxt->input->col++;           \
2102
3.87M
  ctxt->input->cur++;           \
2103
3.87M
  if (*ctxt->input->cur == 0)         \
2104
3.87M
      xmlParserInputGrow(ctxt->input, INPUT_CHUNK);   \
2105
3.87M
    }
2106
2107
115M
#define NEXTL(l) do {             \
2108
115M
    if (*(ctxt->input->cur) == '\n') {         \
2109
1.43M
  ctxt->input->line++; ctxt->input->col = 1;      \
2110
114M
    } else ctxt->input->col++;           \
2111
115M
    ctxt->input->cur += l;        \
2112
115M
  } while (0)
2113
2114
213M
#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2115
1.76G
#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2116
2117
#define COPY_BUF(l,b,i,v)           \
2118
1.82G
    if (l == 1) b[i++] = v;           \
2119
1.82G
    else i += xmlCopyCharMultiByte(&b[i],v)
2120
2121
/**
2122
 * xmlSkipBlankChars:
2123
 * @ctxt:  the XML parser context
2124
 *
2125
 * skip all blanks character found at that point in the input streams.
2126
 * It pops up finished entities in the process if allowable at that point.
2127
 *
2128
 * Returns the number of space chars skipped
2129
 */
2130
2131
int
2132
27.7M
xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2133
27.7M
    int res = 0;
2134
2135
    /*
2136
     * It's Okay to use CUR/NEXT here since all the blanks are on
2137
     * the ASCII range.
2138
     */
2139
27.7M
    if (((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) ||
2140
27.7M
        (ctxt->instate == XML_PARSER_START)) {
2141
14.7M
  const xmlChar *cur;
2142
  /*
2143
   * if we are in the document content, go really fast
2144
   */
2145
14.7M
  cur = ctxt->input->cur;
2146
14.7M
  while (IS_BLANK_CH(*cur)) {
2147
11.2M
      if (*cur == '\n') {
2148
1.00M
    ctxt->input->line++; ctxt->input->col = 1;
2149
10.2M
      } else {
2150
10.2M
    ctxt->input->col++;
2151
10.2M
      }
2152
11.2M
      cur++;
2153
11.2M
      if (res < INT_MAX)
2154
11.2M
    res++;
2155
11.2M
      if (*cur == 0) {
2156
33.8k
    ctxt->input->cur = cur;
2157
33.8k
    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2158
33.8k
    cur = ctxt->input->cur;
2159
33.8k
      }
2160
11.2M
  }
2161
14.7M
  ctxt->input->cur = cur;
2162
14.7M
    } else {
2163
12.9M
        int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2164
2165
41.8M
  while (ctxt->instate != XML_PARSER_EOF) {
2166
41.8M
            if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2167
9.10M
    NEXT;
2168
32.7M
      } else if (CUR == '%') {
2169
                /*
2170
                 * Need to handle support of entities branching here
2171
                 */
2172
10.2M
          if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2173
59.5k
                    break;
2174
10.1M
          xmlParsePEReference(ctxt);
2175
22.5M
            } else if (CUR == 0) {
2176
9.63M
                unsigned long consumed;
2177
9.63M
                xmlEntityPtr ent;
2178
2179
9.63M
                if (ctxt->inputNr <= 1)
2180
18.1k
                    break;
2181
2182
9.61M
                consumed = ctxt->input->consumed;
2183
9.61M
                xmlSaturatedAddSizeT(&consumed,
2184
9.61M
                                     ctxt->input->cur - ctxt->input->base);
2185
2186
                /*
2187
                 * Add to sizeentities when parsing an external entity
2188
                 * for the first time.
2189
                 */
2190
9.61M
                ent = ctxt->input->entity;
2191
9.61M
                if ((ent->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2192
9.61M
                    ((ent->flags & XML_ENT_PARSED) == 0)) {
2193
630
                    ent->flags |= XML_ENT_PARSED;
2194
2195
630
                    xmlSaturatedAdd(&ctxt->sizeentities, consumed);
2196
630
                }
2197
2198
9.61M
                xmlParserEntityCheck(ctxt, consumed);
2199
2200
9.61M
                xmlPopInput(ctxt);
2201
12.8M
            } else {
2202
12.8M
                break;
2203
12.8M
            }
2204
2205
            /*
2206
             * Also increase the counter when entering or exiting a PERef.
2207
             * The spec says: "When a parameter-entity reference is recognized
2208
             * in the DTD and included, its replacement text MUST be enlarged
2209
             * by the attachment of one leading and one following space (#x20)
2210
             * character."
2211
             */
2212
28.8M
      if (res < INT_MAX)
2213
28.8M
    res++;
2214
28.8M
        }
2215
12.9M
    }
2216
27.7M
    return(res);
2217
27.7M
}
2218
2219
/************************************************************************
2220
 *                  *
2221
 *    Commodity functions to handle entities      *
2222
 *                  *
2223
 ************************************************************************/
2224
2225
/**
2226
 * xmlPopInput:
2227
 * @ctxt:  an XML parser context
2228
 *
2229
 * xmlPopInput: the current input pointed by ctxt->input came to an end
2230
 *          pop it and return the next char.
2231
 *
2232
 * Returns the current xmlChar in the parser context
2233
 */
2234
xmlChar
2235
9.62M
xmlPopInput(xmlParserCtxtPtr ctxt) {
2236
9.62M
    xmlParserInputPtr input;
2237
2238
9.62M
    if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2239
9.61M
    if (xmlParserDebugEntities)
2240
0
  xmlGenericError(xmlGenericErrorContext,
2241
0
    "Popping input %d\n", ctxt->inputNr);
2242
9.61M
    if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2243
9.61M
        (ctxt->instate != XML_PARSER_EOF))
2244
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2245
0
                    "Unfinished entity outside the DTD");
2246
9.61M
    input = inputPop(ctxt);
2247
9.61M
    if (input->entity != NULL)
2248
9.61M
        input->entity->flags &= ~XML_ENT_EXPANDING;
2249
9.61M
    xmlFreeInputStream(input);
2250
9.61M
    if (*ctxt->input->cur == 0)
2251
4.76M
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2252
9.61M
    return(CUR);
2253
9.62M
}
2254
2255
/**
2256
 * xmlPushInput:
2257
 * @ctxt:  an XML parser context
2258
 * @input:  an XML parser input fragment (entity, XML fragment ...).
2259
 *
2260
 * xmlPushInput: switch to a new input stream which is stacked on top
2261
 *               of the previous one(s).
2262
 * Returns -1 in case of error or the index in the input stack
2263
 */
2264
int
2265
9.63M
xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2266
9.63M
    int ret;
2267
9.63M
    if (input == NULL) return(-1);
2268
2269
9.63M
    if (xmlParserDebugEntities) {
2270
0
  if ((ctxt->input != NULL) && (ctxt->input->filename))
2271
0
      xmlGenericError(xmlGenericErrorContext,
2272
0
        "%s(%d): ", ctxt->input->filename,
2273
0
        ctxt->input->line);
2274
0
  xmlGenericError(xmlGenericErrorContext,
2275
0
    "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2276
0
    }
2277
9.63M
    if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2278
9.63M
        (ctxt->inputNr > 100)) {
2279
0
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2280
0
        while (ctxt->inputNr > 1)
2281
0
            xmlFreeInputStream(inputPop(ctxt));
2282
0
  return(-1);
2283
0
    }
2284
9.63M
    ret = inputPush(ctxt, input);
2285
9.63M
    if (ctxt->instate == XML_PARSER_EOF)
2286
0
        return(-1);
2287
9.63M
    GROW;
2288
9.63M
    return(ret);
2289
9.63M
}
2290
2291
/**
2292
 * xmlParseCharRef:
2293
 * @ctxt:  an XML parser context
2294
 *
2295
 * DEPRECATED: Internal function, don't use.
2296
 *
2297
 * Parse a numeric character reference. Always consumes '&'.
2298
 *
2299
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2300
 *                  '&#x' [0-9a-fA-F]+ ';'
2301
 *
2302
 * [ WFC: Legal Character ]
2303
 * Characters referred to using character references must match the
2304
 * production for Char.
2305
 *
2306
 * Returns the value parsed (as an int), 0 in case of error
2307
 */
2308
int
2309
118k
xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2310
118k
    int val = 0;
2311
118k
    int count = 0;
2312
2313
    /*
2314
     * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2315
     */
2316
118k
    if ((RAW == '&') && (NXT(1) == '#') &&
2317
118k
        (NXT(2) == 'x')) {
2318
22.4k
  SKIP(3);
2319
22.4k
  GROW;
2320
79.7k
  while (RAW != ';') { /* loop blocked by count */
2321
60.3k
      if (count++ > 20) {
2322
2.18k
    count = 0;
2323
2.18k
    GROW;
2324
2.18k
                if (ctxt->instate == XML_PARSER_EOF)
2325
0
                    return(0);
2326
2.18k
      }
2327
60.3k
      if ((RAW >= '0') && (RAW <= '9'))
2328
39.4k
          val = val * 16 + (CUR - '0');
2329
20.9k
      else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2330
12.5k
          val = val * 16 + (CUR - 'a') + 10;
2331
8.40k
      else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2332
5.43k
          val = val * 16 + (CUR - 'A') + 10;
2333
2.97k
      else {
2334
2.97k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2335
2.97k
    val = 0;
2336
2.97k
    break;
2337
2.97k
      }
2338
57.3k
      if (val > 0x110000)
2339
24.5k
          val = 0x110000;
2340
2341
57.3k
      NEXT;
2342
57.3k
      count++;
2343
57.3k
  }
2344
22.4k
  if (RAW == ';') {
2345
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2346
19.4k
      ctxt->input->col++;
2347
19.4k
      ctxt->input->cur++;
2348
19.4k
  }
2349
95.5k
    } else if  ((RAW == '&') && (NXT(1) == '#')) {
2350
95.5k
  SKIP(2);
2351
95.5k
  GROW;
2352
447k
  while (RAW != ';') { /* loop blocked by count */
2353
359k
      if (count++ > 20) {
2354
7.60k
    count = 0;
2355
7.60k
    GROW;
2356
7.60k
                if (ctxt->instate == XML_PARSER_EOF)
2357
0
                    return(0);
2358
7.60k
      }
2359
359k
      if ((RAW >= '0') && (RAW <= '9'))
2360
351k
          val = val * 10 + (CUR - '0');
2361
7.83k
      else {
2362
7.83k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2363
7.83k
    val = 0;
2364
7.83k
    break;
2365
7.83k
      }
2366
351k
      if (val > 0x110000)
2367
83.6k
          val = 0x110000;
2368
2369
351k
      NEXT;
2370
351k
      count++;
2371
351k
  }
2372
95.5k
  if (RAW == ';') {
2373
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2374
87.7k
      ctxt->input->col++;
2375
87.7k
      ctxt->input->cur++;
2376
87.7k
  }
2377
95.5k
    } else {
2378
0
        if (RAW == '&')
2379
0
            SKIP(1);
2380
0
        xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2381
0
    }
2382
2383
    /*
2384
     * [ WFC: Legal Character ]
2385
     * Characters referred to using character references must match the
2386
     * production for Char.
2387
     */
2388
118k
    if (val >= 0x110000) {
2389
964
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2390
964
                "xmlParseCharRef: character reference out of bounds\n",
2391
964
          val);
2392
117k
    } else if (IS_CHAR(val)) {
2393
105k
        return(val);
2394
105k
    } else {
2395
11.5k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2396
11.5k
                          "xmlParseCharRef: invalid xmlChar value %d\n",
2397
11.5k
                    val);
2398
11.5k
    }
2399
12.5k
    return(0);
2400
118k
}
2401
2402
/**
2403
 * xmlParseStringCharRef:
2404
 * @ctxt:  an XML parser context
2405
 * @str:  a pointer to an index in the string
2406
 *
2407
 * parse Reference declarations, variant parsing from a string rather
2408
 * than an an input flow.
2409
 *
2410
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2411
 *                  '&#x' [0-9a-fA-F]+ ';'
2412
 *
2413
 * [ WFC: Legal Character ]
2414
 * Characters referred to using character references must match the
2415
 * production for Char.
2416
 *
2417
 * Returns the value parsed (as an int), 0 in case of error, str will be
2418
 *         updated to the current value of the index
2419
 */
2420
static int
2421
35.9k
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2422
35.9k
    const xmlChar *ptr;
2423
35.9k
    xmlChar cur;
2424
35.9k
    int val = 0;
2425
2426
35.9k
    if ((str == NULL) || (*str == NULL)) return(0);
2427
35.9k
    ptr = *str;
2428
35.9k
    cur = *ptr;
2429
35.9k
    if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2430
8.33k
  ptr += 3;
2431
8.33k
  cur = *ptr;
2432
29.0k
  while (cur != ';') { /* Non input consuming loop */
2433
21.4k
      if ((cur >= '0') && (cur <= '9'))
2434
11.9k
          val = val * 16 + (cur - '0');
2435
9.52k
      else if ((cur >= 'a') && (cur <= 'f'))
2436
2.08k
          val = val * 16 + (cur - 'a') + 10;
2437
7.44k
      else if ((cur >= 'A') && (cur <= 'F'))
2438
6.70k
          val = val * 16 + (cur - 'A') + 10;
2439
745
      else {
2440
745
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2441
745
    val = 0;
2442
745
    break;
2443
745
      }
2444
20.7k
      if (val > 0x110000)
2445
7.45k
          val = 0x110000;
2446
2447
20.7k
      ptr++;
2448
20.7k
      cur = *ptr;
2449
20.7k
  }
2450
8.33k
  if (cur == ';')
2451
7.58k
      ptr++;
2452
27.6k
    } else if  ((cur == '&') && (ptr[1] == '#')){
2453
27.6k
  ptr += 2;
2454
27.6k
  cur = *ptr;
2455
93.5k
  while (cur != ';') { /* Non input consuming loops */
2456
66.2k
      if ((cur >= '0') && (cur <= '9'))
2457
65.9k
          val = val * 10 + (cur - '0');
2458
374
      else {
2459
374
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2460
374
    val = 0;
2461
374
    break;
2462
374
      }
2463
65.9k
      if (val > 0x110000)
2464
396
          val = 0x110000;
2465
2466
65.9k
      ptr++;
2467
65.9k
      cur = *ptr;
2468
65.9k
  }
2469
27.6k
  if (cur == ';')
2470
27.2k
      ptr++;
2471
27.6k
    } else {
2472
0
  xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2473
0
  return(0);
2474
0
    }
2475
35.9k
    *str = ptr;
2476
2477
    /*
2478
     * [ WFC: Legal Character ]
2479
     * Characters referred to using character references must match the
2480
     * production for Char.
2481
     */
2482
35.9k
    if (val >= 0x110000) {
2483
199
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2484
199
                "xmlParseStringCharRef: character reference out of bounds\n",
2485
199
                val);
2486
35.7k
    } else if (IS_CHAR(val)) {
2487
34.2k
        return(val);
2488
34.2k
    } else {
2489
1.50k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2490
1.50k
        "xmlParseStringCharRef: invalid xmlChar value %d\n",
2491
1.50k
        val);
2492
1.50k
    }
2493
1.70k
    return(0);
2494
35.9k
}
2495
2496
/**
2497
 * xmlParserHandlePEReference:
2498
 * @ctxt:  the parser context
2499
 *
2500
 * [69] PEReference ::= '%' Name ';'
2501
 *
2502
 * [ WFC: No Recursion ]
2503
 * A parsed entity must not contain a recursive
2504
 * reference to itself, either directly or indirectly.
2505
 *
2506
 * [ WFC: Entity Declared ]
2507
 * In a document without any DTD, a document with only an internal DTD
2508
 * subset which contains no parameter entity references, or a document
2509
 * with "standalone='yes'", ...  ... The declaration of a parameter
2510
 * entity must precede any reference to it...
2511
 *
2512
 * [ VC: Entity Declared ]
2513
 * In a document with an external subset or external parameter entities
2514
 * with "standalone='no'", ...  ... The declaration of a parameter entity
2515
 * must precede any reference to it...
2516
 *
2517
 * [ WFC: In DTD ]
2518
 * Parameter-entity references may only appear in the DTD.
2519
 * NOTE: misleading but this is handled.
2520
 *
2521
 * A PEReference may have been detected in the current input stream
2522
 * the handling is done accordingly to
2523
 *      http://www.w3.org/TR/REC-xml#entproc
2524
 * i.e.
2525
 *   - Included in literal in entity values
2526
 *   - Included as Parameter Entity reference within DTDs
2527
 */
2528
void
2529
0
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2530
0
    switch(ctxt->instate) {
2531
0
  case XML_PARSER_CDATA_SECTION:
2532
0
      return;
2533
0
        case XML_PARSER_COMMENT:
2534
0
      return;
2535
0
  case XML_PARSER_START_TAG:
2536
0
      return;
2537
0
  case XML_PARSER_END_TAG:
2538
0
      return;
2539
0
        case XML_PARSER_EOF:
2540
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2541
0
      return;
2542
0
        case XML_PARSER_PROLOG:
2543
0
  case XML_PARSER_START:
2544
0
  case XML_PARSER_MISC:
2545
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2546
0
      return;
2547
0
  case XML_PARSER_ENTITY_DECL:
2548
0
        case XML_PARSER_CONTENT:
2549
0
        case XML_PARSER_ATTRIBUTE_VALUE:
2550
0
        case XML_PARSER_PI:
2551
0
  case XML_PARSER_SYSTEM_LITERAL:
2552
0
  case XML_PARSER_PUBLIC_LITERAL:
2553
      /* we just ignore it there */
2554
0
      return;
2555
0
        case XML_PARSER_EPILOG:
2556
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2557
0
      return;
2558
0
  case XML_PARSER_ENTITY_VALUE:
2559
      /*
2560
       * NOTE: in the case of entity values, we don't do the
2561
       *       substitution here since we need the literal
2562
       *       entity value to be able to save the internal
2563
       *       subset of the document.
2564
       *       This will be handled by xmlStringDecodeEntities
2565
       */
2566
0
      return;
2567
0
        case XML_PARSER_DTD:
2568
      /*
2569
       * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2570
       * In the internal DTD subset, parameter-entity references
2571
       * can occur only where markup declarations can occur, not
2572
       * within markup declarations.
2573
       * In that case this is handled in xmlParseMarkupDecl
2574
       */
2575
0
      if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2576
0
    return;
2577
0
      if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2578
0
    return;
2579
0
            break;
2580
0
        case XML_PARSER_IGNORE:
2581
0
            return;
2582
0
    }
2583
2584
0
    xmlParsePEReference(ctxt);
2585
0
}
2586
2587
/*
2588
 * Macro used to grow the current buffer.
2589
 * buffer##_size is expected to be a size_t
2590
 * mem_error: is expected to handle memory allocation failures
2591
 */
2592
555k
#define growBuffer(buffer, n) {           \
2593
555k
    xmlChar *tmp;             \
2594
555k
    size_t new_size = buffer##_size * 2 + n;                            \
2595
555k
    if (new_size < buffer##_size) goto mem_error;                       \
2596
555k
    tmp = (xmlChar *) xmlRealloc(buffer, new_size);                     \
2597
555k
    if (tmp == NULL) goto mem_error;         \
2598
555k
    buffer = tmp;             \
2599
555k
    buffer##_size = new_size;                                           \
2600
555k
}
2601
2602
/**
2603
 * xmlStringDecodeEntitiesInt:
2604
 * @ctxt:  the parser context
2605
 * @str:  the input string
2606
 * @len: the string length
2607
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2608
 * @end:  an end marker xmlChar, 0 if none
2609
 * @end2:  an end marker xmlChar, 0 if none
2610
 * @end3:  an end marker xmlChar, 0 if none
2611
 * @check:  whether to perform entity checks
2612
 */
2613
static xmlChar *
2614
xmlStringDecodeEntitiesInt(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2615
               int what, xmlChar end, xmlChar  end2, xmlChar end3,
2616
13.8M
                           int check) {
2617
13.8M
    xmlChar *buffer = NULL;
2618
13.8M
    size_t buffer_size = 0;
2619
13.8M
    size_t nbchars = 0;
2620
2621
13.8M
    xmlChar *current = NULL;
2622
13.8M
    xmlChar *rep = NULL;
2623
13.8M
    const xmlChar *last;
2624
13.8M
    xmlEntityPtr ent;
2625
13.8M
    int c,l;
2626
2627
13.8M
    if (str == NULL)
2628
602
        return(NULL);
2629
13.8M
    last = str + len;
2630
2631
13.8M
    if (((ctxt->depth > 40) &&
2632
13.8M
         ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2633
13.8M
  (ctxt->depth > 100)) {
2634
0
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP,
2635
0
                       "Maximum entity nesting depth exceeded");
2636
0
  return(NULL);
2637
0
    }
2638
2639
    /*
2640
     * allocate a translation buffer.
2641
     */
2642
13.8M
    buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2643
13.8M
    buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2644
13.8M
    if (buffer == NULL) goto mem_error;
2645
2646
    /*
2647
     * OK loop until we reach one of the ending char or a size limit.
2648
     * we are operating on already parsed values.
2649
     */
2650
13.8M
    if (str < last)
2651
13.8M
  c = CUR_SCHAR(str, l);
2652
9.71k
    else
2653
9.71k
        c = 0;
2654
1.17G
    while ((c != 0) && (c != end) && /* non input consuming loop */
2655
1.17G
           (c != end2) && (c != end3) &&
2656
1.17G
           (ctxt->instate != XML_PARSER_EOF)) {
2657
2658
1.16G
  if (c == 0) break;
2659
1.16G
        if ((c == '&') && (str[1] == '#')) {
2660
35.9k
      int val = xmlParseStringCharRef(ctxt, &str);
2661
35.9k
      if (val == 0)
2662
1.70k
                goto int_error;
2663
34.2k
      COPY_BUF(0,buffer,nbchars,val);
2664
34.2k
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2665
18
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2666
18
      }
2667
1.16G
  } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2668
13.4M
      if (xmlParserDebugEntities)
2669
0
    xmlGenericError(xmlGenericErrorContext,
2670
0
      "String decoding Entity Reference: %.30s\n",
2671
0
      str);
2672
13.4M
      ent = xmlParseStringEntityRef(ctxt, &str);
2673
13.4M
      if ((ent != NULL) &&
2674
13.4M
    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2675
316
    if (ent->content != NULL) {
2676
316
        COPY_BUF(0,buffer,nbchars,ent->content[0]);
2677
316
        if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2678
0
      growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2679
0
        }
2680
316
    } else {
2681
0
        xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2682
0
          "predefined entity has no content\n");
2683
0
                    goto int_error;
2684
0
    }
2685
13.4M
      } else if ((ent != NULL) && (ent->content != NULL)) {
2686
13.4M
          if ((check) && (xmlParserEntityCheck(ctxt, ent->length)))
2687
151
                    goto int_error;
2688
2689
13.4M
                if (ent->flags & XML_ENT_EXPANDING) {
2690
171
              xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2691
171
                    xmlHaltParser(ctxt);
2692
171
                    ent->content[0] = 0;
2693
171
                    goto int_error;
2694
171
                }
2695
2696
13.4M
                ent->flags |= XML_ENT_EXPANDING;
2697
13.4M
    ctxt->depth++;
2698
13.4M
    rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
2699
13.4M
                        ent->length, what, 0, 0, 0, check);
2700
13.4M
    ctxt->depth--;
2701
13.4M
                ent->flags &= ~XML_ENT_EXPANDING;
2702
2703
13.4M
    if (rep == NULL) {
2704
2.56k
                    ent->content[0] = 0;
2705
2.56k
                    goto int_error;
2706
2.56k
                }
2707
2708
13.4M
                current = rep;
2709
2.39G
                while (*current != 0) { /* non input consuming loop */
2710
2.38G
                    buffer[nbchars++] = *current++;
2711
2.38G
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2712
863k
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2713
863k
                    }
2714
2.38G
                }
2715
13.4M
                xmlFree(rep);
2716
13.4M
                rep = NULL;
2717
13.4M
      } else if (ent != NULL) {
2718
16
    int i = xmlStrlen(ent->name);
2719
16
    const xmlChar *cur = ent->name;
2720
2721
16
    buffer[nbchars++] = '&';
2722
16
    if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2723
0
        growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2724
0
    }
2725
56
    for (;i > 0;i--)
2726
40
        buffer[nbchars++] = *cur++;
2727
16
    buffer[nbchars++] = ';';
2728
16
      }
2729
1.15G
  } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2730
38.5k
      if (xmlParserDebugEntities)
2731
0
    xmlGenericError(xmlGenericErrorContext,
2732
0
      "String decoding PE Reference: %.30s\n", str);
2733
38.5k
      ent = xmlParseStringPEReference(ctxt, &str);
2734
38.5k
      if (ent != NULL) {
2735
35.5k
                if (ent->content == NULL) {
2736
        /*
2737
         * Note: external parsed entities will not be loaded,
2738
         * it is not required for a non-validating parser to
2739
         * complete external PEReferences coming from the
2740
         * internal subset
2741
         */
2742
590
        if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2743
590
      ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2744
590
      (ctxt->validate != 0)) {
2745
563
      xmlLoadEntityContent(ctxt, ent);
2746
563
        } else {
2747
27
      xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2748
27
      "not validating will not read content for PE entity %s\n",
2749
27
                          ent->name, NULL);
2750
27
        }
2751
590
    }
2752
2753
35.5k
          if ((check) && (xmlParserEntityCheck(ctxt, ent->length)))
2754
90
                    goto int_error;
2755
2756
35.4k
                if (ent->flags & XML_ENT_EXPANDING) {
2757
9
              xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2758
9
                    xmlHaltParser(ctxt);
2759
9
                    if (ent->content != NULL)
2760
9
                        ent->content[0] = 0;
2761
9
                    goto int_error;
2762
9
                }
2763
2764
35.4k
                ent->flags |= XML_ENT_EXPANDING;
2765
35.4k
    ctxt->depth++;
2766
35.4k
    rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
2767
35.4k
                        ent->length, what, 0, 0, 0, check);
2768
35.4k
    ctxt->depth--;
2769
35.4k
                ent->flags &= ~XML_ENT_EXPANDING;
2770
2771
35.4k
    if (rep == NULL) {
2772
401
                    if (ent->content != NULL)
2773
12
                        ent->content[0] = 0;
2774
401
                    goto int_error;
2775
401
                }
2776
35.0k
                current = rep;
2777
774M
                while (*current != 0) { /* non input consuming loop */
2778
773M
                    buffer[nbchars++] = *current++;
2779
773M
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2780
15.4k
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2781
15.4k
                    }
2782
773M
                }
2783
35.0k
                xmlFree(rep);
2784
35.0k
                rep = NULL;
2785
35.0k
      }
2786
1.15G
  } else {
2787
1.15G
      COPY_BUF(l,buffer,nbchars,c);
2788
1.15G
      str += l;
2789
1.15G
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2790
205k
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2791
205k
      }
2792
1.15G
  }
2793
1.16G
  if (str < last)
2794
1.15G
      c = CUR_SCHAR(str, l);
2795
13.8M
  else
2796
13.8M
      c = 0;
2797
1.16G
    }
2798
13.8M
    buffer[nbchars] = 0;
2799
13.8M
    return(buffer);
2800
2801
0
mem_error:
2802
0
    xmlErrMemory(ctxt, NULL);
2803
5.09k
int_error:
2804
5.09k
    if (rep != NULL)
2805
0
        xmlFree(rep);
2806
5.09k
    if (buffer != NULL)
2807
5.09k
        xmlFree(buffer);
2808
5.09k
    return(NULL);
2809
0
}
2810
2811
/**
2812
 * xmlStringLenDecodeEntities:
2813
 * @ctxt:  the parser context
2814
 * @str:  the input string
2815
 * @len: the string length
2816
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2817
 * @end:  an end marker xmlChar, 0 if none
2818
 * @end2:  an end marker xmlChar, 0 if none
2819
 * @end3:  an end marker xmlChar, 0 if none
2820
 *
2821
 * DEPRECATED: Internal function, don't use.
2822
 *
2823
 * Takes a entity string content and process to do the adequate substitutions.
2824
 *
2825
 * [67] Reference ::= EntityRef | CharRef
2826
 *
2827
 * [69] PEReference ::= '%' Name ';'
2828
 *
2829
 * Returns A newly allocated string with the substitution done. The caller
2830
 *      must deallocate it !
2831
 */
2832
xmlChar *
2833
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2834
                           int what, xmlChar end, xmlChar  end2,
2835
138
                           xmlChar end3) {
2836
138
    if ((ctxt == NULL) || (str == NULL) || (len < 0))
2837
0
        return(NULL);
2838
138
    return(xmlStringDecodeEntitiesInt(ctxt, str, len, what,
2839
138
                                      end, end2, end3, 0));
2840
138
}
2841
2842
/**
2843
 * xmlStringDecodeEntities:
2844
 * @ctxt:  the parser context
2845
 * @str:  the input string
2846
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2847
 * @end:  an end marker xmlChar, 0 if none
2848
 * @end2:  an end marker xmlChar, 0 if none
2849
 * @end3:  an end marker xmlChar, 0 if none
2850
 *
2851
 * DEPRECATED: Internal function, don't use.
2852
 *
2853
 * Takes a entity string content and process to do the adequate substitutions.
2854
 *
2855
 * [67] Reference ::= EntityRef | CharRef
2856
 *
2857
 * [69] PEReference ::= '%' Name ';'
2858
 *
2859
 * Returns A newly allocated string with the substitution done. The caller
2860
 *      must deallocate it !
2861
 */
2862
xmlChar *
2863
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2864
7.49k
            xmlChar end, xmlChar  end2, xmlChar end3) {
2865
7.49k
    if ((ctxt == NULL) || (str == NULL)) return(NULL);
2866
7.49k
    return(xmlStringDecodeEntitiesInt(ctxt, str, xmlStrlen(str), what,
2867
7.49k
                                      end, end2, end3, 0));
2868
7.49k
}
2869
2870
/************************************************************************
2871
 *                  *
2872
 *    Commodity functions, cleanup needed ?     *
2873
 *                  *
2874
 ************************************************************************/
2875
2876
/**
2877
 * areBlanks:
2878
 * @ctxt:  an XML parser context
2879
 * @str:  a xmlChar *
2880
 * @len:  the size of @str
2881
 * @blank_chars: we know the chars are blanks
2882
 *
2883
 * Is this a sequence of blank chars that one can ignore ?
2884
 *
2885
 * Returns 1 if ignorable 0 otherwise.
2886
 */
2887
2888
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2889
939k
                     int blank_chars) {
2890
939k
    int i, ret;
2891
939k
    xmlNodePtr lastChild;
2892
2893
    /*
2894
     * Don't spend time trying to differentiate them, the same callback is
2895
     * used !
2896
     */
2897
939k
    if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2898
173k
  return(0);
2899
2900
    /*
2901
     * Check for xml:space value.
2902
     */
2903
766k
    if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2904
766k
        (*(ctxt->space) == -2))
2905
477k
  return(0);
2906
2907
    /*
2908
     * Check that the string is made of blanks
2909
     */
2910
288k
    if (blank_chars == 0) {
2911
1.27M
  for (i = 0;i < len;i++)
2912
1.16M
      if (!(IS_BLANK_CH(str[i]))) return(0);
2913
181k
    }
2914
2915
    /*
2916
     * Look if the element is mixed content in the DTD if available
2917
     */
2918
218k
    if (ctxt->node == NULL) return(0);
2919
205k
    if (ctxt->myDoc != NULL) {
2920
205k
  ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2921
205k
        if (ret == 0) return(1);
2922
200k
        if (ret == 1) return(0);
2923
200k
    }
2924
2925
    /*
2926
     * Otherwise, heuristic :-\
2927
     */
2928
199k
    if ((RAW != '<') && (RAW != 0xD)) return(0);
2929
183k
    if ((ctxt->node->children == NULL) &&
2930
183k
  (RAW == '<') && (NXT(1) == '/')) return(0);
2931
2932
182k
    lastChild = xmlGetLastChild(ctxt->node);
2933
182k
    if (lastChild == NULL) {
2934
57.8k
        if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2935
57.8k
            (ctxt->node->content != NULL)) return(0);
2936
124k
    } else if (xmlNodeIsText(lastChild))
2937
8.50k
        return(0);
2938
115k
    else if ((ctxt->node->children != NULL) &&
2939
115k
             (xmlNodeIsText(ctxt->node->children)))
2940
816
        return(0);
2941
172k
    return(1);
2942
182k
}
2943
2944
/************************************************************************
2945
 *                  *
2946
 *    Extra stuff for namespace support     *
2947
 *  Relates to http://www.w3.org/TR/WD-xml-names      *
2948
 *                  *
2949
 ************************************************************************/
2950
2951
/**
2952
 * xmlSplitQName:
2953
 * @ctxt:  an XML parser context
2954
 * @name:  an XML parser context
2955
 * @prefix:  a xmlChar **
2956
 *
2957
 * parse an UTF8 encoded XML qualified name string
2958
 *
2959
 * [NS 5] QName ::= (Prefix ':')? LocalPart
2960
 *
2961
 * [NS 6] Prefix ::= NCName
2962
 *
2963
 * [NS 7] LocalPart ::= NCName
2964
 *
2965
 * Returns the local part, and prefix is updated
2966
 *   to get the Prefix if any.
2967
 */
2968
2969
xmlChar *
2970
1.10M
xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2971
1.10M
    xmlChar buf[XML_MAX_NAMELEN + 5];
2972
1.10M
    xmlChar *buffer = NULL;
2973
1.10M
    int len = 0;
2974
1.10M
    int max = XML_MAX_NAMELEN;
2975
1.10M
    xmlChar *ret = NULL;
2976
1.10M
    const xmlChar *cur = name;
2977
1.10M
    int c;
2978
2979
1.10M
    if (prefix == NULL) return(NULL);
2980
1.10M
    *prefix = NULL;
2981
2982
1.10M
    if (cur == NULL) return(NULL);
2983
2984
#ifndef XML_XML_NAMESPACE
2985
    /* xml: prefix is not really a namespace */
2986
    if ((cur[0] == 'x') && (cur[1] == 'm') &&
2987
        (cur[2] == 'l') && (cur[3] == ':'))
2988
  return(xmlStrdup(name));
2989
#endif
2990
2991
    /* nasty but well=formed */
2992
1.10M
    if (cur[0] == ':')
2993
275
  return(xmlStrdup(name));
2994
2995
1.10M
    c = *cur++;
2996
4.30M
    while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2997
3.19M
  buf[len++] = c;
2998
3.19M
  c = *cur++;
2999
3.19M
    }
3000
1.10M
    if (len >= max) {
3001
  /*
3002
   * Okay someone managed to make a huge name, so he's ready to pay
3003
   * for the processing speed.
3004
   */
3005
1.30k
  max = len * 2;
3006
3007
1.30k
  buffer = (xmlChar *) xmlMallocAtomic(max);
3008
1.30k
  if (buffer == NULL) {
3009
0
      xmlErrMemory(ctxt, NULL);
3010
0
      return(NULL);
3011
0
  }
3012
1.30k
  memcpy(buffer, buf, len);
3013
2.07M
  while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3014
2.07M
      if (len + 10 > max) {
3015
1.66k
          xmlChar *tmp;
3016
3017
1.66k
    max *= 2;
3018
1.66k
    tmp = (xmlChar *) xmlRealloc(buffer, max);
3019
1.66k
    if (tmp == NULL) {
3020
0
        xmlFree(buffer);
3021
0
        xmlErrMemory(ctxt, NULL);
3022
0
        return(NULL);
3023
0
    }
3024
1.66k
    buffer = tmp;
3025
1.66k
      }
3026
2.07M
      buffer[len++] = c;
3027
2.07M
      c = *cur++;
3028
2.07M
  }
3029
1.30k
  buffer[len] = 0;
3030
1.30k
    }
3031
3032
1.10M
    if ((c == ':') && (*cur == 0)) {
3033
4.87k
        if (buffer != NULL)
3034
41
      xmlFree(buffer);
3035
4.87k
  *prefix = NULL;
3036
4.87k
  return(xmlStrdup(name));
3037
4.87k
    }
3038
3039
1.10M
    if (buffer == NULL)
3040
1.10M
  ret = xmlStrndup(buf, len);
3041
1.26k
    else {
3042
1.26k
  ret = buffer;
3043
1.26k
  buffer = NULL;
3044
1.26k
  max = XML_MAX_NAMELEN;
3045
1.26k
    }
3046
3047
3048
1.10M
    if (c == ':') {
3049
120k
  c = *cur;
3050
120k
        *prefix = ret;
3051
120k
  if (c == 0) {
3052
0
      return(xmlStrndup(BAD_CAST "", 0));
3053
0
  }
3054
120k
  len = 0;
3055
3056
  /*
3057
   * Check that the first character is proper to start
3058
   * a new name
3059
   */
3060
120k
  if (!(((c >= 0x61) && (c <= 0x7A)) ||
3061
120k
        ((c >= 0x41) && (c <= 0x5A)) ||
3062
120k
        (c == '_') || (c == ':'))) {
3063
1.14k
      int l;
3064
1.14k
      int first = CUR_SCHAR(cur, l);
3065
3066
1.14k
      if (!IS_LETTER(first) && (first != '_')) {
3067
519
    xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3068
519
          "Name %s is not XML Namespace compliant\n",
3069
519
          name);
3070
519
      }
3071
1.14k
  }
3072
120k
  cur++;
3073
3074
877k
  while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3075
756k
      buf[len++] = c;
3076
756k
      c = *cur++;
3077
756k
  }
3078
120k
  if (len >= max) {
3079
      /*
3080
       * Okay someone managed to make a huge name, so he's ready to pay
3081
       * for the processing speed.
3082
       */
3083
1.05k
      max = len * 2;
3084
3085
1.05k
      buffer = (xmlChar *) xmlMallocAtomic(max);
3086
1.05k
      if (buffer == NULL) {
3087
0
          xmlErrMemory(ctxt, NULL);
3088
0
    return(NULL);
3089
0
      }
3090
1.05k
      memcpy(buffer, buf, len);
3091
2.55M
      while (c != 0) { /* tested bigname2.xml */
3092
2.55M
    if (len + 10 > max) {
3093
1.17k
        xmlChar *tmp;
3094
3095
1.17k
        max *= 2;
3096
1.17k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3097
1.17k
        if (tmp == NULL) {
3098
0
      xmlErrMemory(ctxt, NULL);
3099
0
      xmlFree(buffer);
3100
0
      return(NULL);
3101
0
        }
3102
1.17k
        buffer = tmp;
3103
1.17k
    }
3104
2.55M
    buffer[len++] = c;
3105
2.55M
    c = *cur++;
3106
2.55M
      }
3107
1.05k
      buffer[len] = 0;
3108
1.05k
  }
3109
3110
120k
  if (buffer == NULL)
3111
119k
      ret = xmlStrndup(buf, len);
3112
1.05k
  else {
3113
1.05k
      ret = buffer;
3114
1.05k
  }
3115
120k
    }
3116
3117
1.10M
    return(ret);
3118
1.10M
}
3119
3120
/************************************************************************
3121
 *                  *
3122
 *      The parser itself       *
3123
 *  Relates to http://www.w3.org/TR/REC-xml       *
3124
 *                  *
3125
 ************************************************************************/
3126
3127
/************************************************************************
3128
 *                  *
3129
 *  Routines to parse Name, NCName and NmToken      *
3130
 *                  *
3131
 ************************************************************************/
3132
#ifdef DEBUG
3133
static unsigned long nbParseName = 0;
3134
static unsigned long nbParseNmToken = 0;
3135
static unsigned long nbParseNCName = 0;
3136
static unsigned long nbParseNCNameComplex = 0;
3137
static unsigned long nbParseNameComplex = 0;
3138
static unsigned long nbParseStringName = 0;
3139
#endif
3140
3141
/*
3142
 * The two following functions are related to the change of accepted
3143
 * characters for Name and NmToken in the Revision 5 of XML-1.0
3144
 * They correspond to the modified production [4] and the new production [4a]
3145
 * changes in that revision. Also note that the macros used for the
3146
 * productions Letter, Digit, CombiningChar and Extender are not needed
3147
 * anymore.
3148
 * We still keep compatibility to pre-revision5 parsing semantic if the
3149
 * new XML_PARSE_OLD10 option is given to the parser.
3150
 */
3151
static int
3152
13.8M
xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3153
13.8M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3154
        /*
3155
   * Use the new checks of production [4] [4a] amd [5] of the
3156
   * Update 5 of XML-1.0
3157
   */
3158
13.7M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3159
13.7M
      (((c >= 'a') && (c <= 'z')) ||
3160
13.7M
       ((c >= 'A') && (c <= 'Z')) ||
3161
13.7M
       (c == '_') || (c == ':') ||
3162
13.7M
       ((c >= 0xC0) && (c <= 0xD6)) ||
3163
13.7M
       ((c >= 0xD8) && (c <= 0xF6)) ||
3164
13.7M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3165
13.7M
       ((c >= 0x370) && (c <= 0x37D)) ||
3166
13.7M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3167
13.7M
       ((c >= 0x200C) && (c <= 0x200D)) ||
3168
13.7M
       ((c >= 0x2070) && (c <= 0x218F)) ||
3169
13.7M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3170
13.7M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3171
13.7M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3172
13.7M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3173
13.7M
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3174
13.7M
      return(1);
3175
13.7M
    } else {
3176
38.9k
        if (IS_LETTER(c) || (c == '_') || (c == ':'))
3177
23.7k
      return(1);
3178
38.9k
    }
3179
91.3k
    return(0);
3180
13.8M
}
3181
3182
static int
3183
599M
xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3184
599M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3185
        /*
3186
   * Use the new checks of production [4] [4a] amd [5] of the
3187
   * Update 5 of XML-1.0
3188
   */
3189
596M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3190
596M
      (((c >= 'a') && (c <= 'z')) ||
3191
596M
       ((c >= 'A') && (c <= 'Z')) ||
3192
596M
       ((c >= '0') && (c <= '9')) || /* !start */
3193
596M
       (c == '_') || (c == ':') ||
3194
596M
       (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3195
596M
       ((c >= 0xC0) && (c <= 0xD6)) ||
3196
596M
       ((c >= 0xD8) && (c <= 0xF6)) ||
3197
596M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3198
596M
       ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3199
596M
       ((c >= 0x370) && (c <= 0x37D)) ||
3200
596M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3201
596M
       ((c >= 0x200C) && (c <= 0x200D)) ||
3202
596M
       ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3203
596M
       ((c >= 0x2070) && (c <= 0x218F)) ||
3204
596M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3205
596M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3206
596M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3207
596M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3208
596M
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3209
582M
       return(1);
3210
596M
    } else {
3211
3.75M
        if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3212
3.75M
            (c == '.') || (c == '-') ||
3213
3.75M
      (c == '_') || (c == ':') ||
3214
3.75M
      (IS_COMBINING(c)) ||
3215
3.75M
      (IS_EXTENDER(c)))
3216
3.72M
      return(1);
3217
3.75M
    }
3218
13.7M
    return(0);
3219
599M
}
3220
3221
static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3222
                                          int *len, int *alloc, int normalize);
3223
3224
static const xmlChar *
3225
736k
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3226
736k
    int len = 0, l;
3227
736k
    int c;
3228
736k
    int count = 0;
3229
736k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3230
72.1k
                    XML_MAX_TEXT_LENGTH :
3231
736k
                    XML_MAX_NAME_LENGTH;
3232
3233
#ifdef DEBUG
3234
    nbParseNameComplex++;
3235
#endif
3236
3237
    /*
3238
     * Handler for more complex cases
3239
     */
3240
736k
    GROW;
3241
736k
    if (ctxt->instate == XML_PARSER_EOF)
3242
0
        return(NULL);
3243
736k
    c = CUR_CHAR(l);
3244
736k
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3245
        /*
3246
   * Use the new checks of production [4] [4a] amd [5] of the
3247
   * Update 5 of XML-1.0
3248
   */
3249
640k
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3250
640k
      (!(((c >= 'a') && (c <= 'z')) ||
3251
635k
         ((c >= 'A') && (c <= 'Z')) ||
3252
635k
         (c == '_') || (c == ':') ||
3253
635k
         ((c >= 0xC0) && (c <= 0xD6)) ||
3254
635k
         ((c >= 0xD8) && (c <= 0xF6)) ||
3255
635k
         ((c >= 0xF8) && (c <= 0x2FF)) ||
3256
635k
         ((c >= 0x370) && (c <= 0x37D)) ||
3257
635k
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
3258
635k
         ((c >= 0x200C) && (c <= 0x200D)) ||
3259
635k
         ((c >= 0x2070) && (c <= 0x218F)) ||
3260
635k
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3261
635k
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
3262
635k
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
3263
635k
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3264
635k
         ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3265
600k
      return(NULL);
3266
600k
  }
3267
40.6k
  len += l;
3268
40.6k
  NEXTL(l);
3269
40.6k
  c = CUR_CHAR(l);
3270
12.6M
  while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3271
12.6M
         (((c >= 'a') && (c <= 'z')) ||
3272
12.6M
          ((c >= 'A') && (c <= 'Z')) ||
3273
12.6M
          ((c >= '0') && (c <= '9')) || /* !start */
3274
12.6M
          (c == '_') || (c == ':') ||
3275
12.6M
          (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3276
12.6M
          ((c >= 0xC0) && (c <= 0xD6)) ||
3277
12.6M
          ((c >= 0xD8) && (c <= 0xF6)) ||
3278
12.6M
          ((c >= 0xF8) && (c <= 0x2FF)) ||
3279
12.6M
          ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3280
12.6M
          ((c >= 0x370) && (c <= 0x37D)) ||
3281
12.6M
          ((c >= 0x37F) && (c <= 0x1FFF)) ||
3282
12.6M
          ((c >= 0x200C) && (c <= 0x200D)) ||
3283
12.6M
          ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3284
12.6M
          ((c >= 0x2070) && (c <= 0x218F)) ||
3285
12.6M
          ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3286
12.6M
          ((c >= 0x3001) && (c <= 0xD7FF)) ||
3287
12.6M
          ((c >= 0xF900) && (c <= 0xFDCF)) ||
3288
12.6M
          ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3289
12.6M
          ((c >= 0x10000) && (c <= 0xEFFFF))
3290
12.6M
    )) {
3291
12.6M
      if (count++ > XML_PARSER_CHUNK_SIZE) {
3292
121k
    count = 0;
3293
121k
    GROW;
3294
121k
                if (ctxt->instate == XML_PARSER_EOF)
3295
0
                    return(NULL);
3296
121k
      }
3297
12.6M
            if (len <= INT_MAX - l)
3298
12.6M
          len += l;
3299
12.6M
      NEXTL(l);
3300
12.6M
      c = CUR_CHAR(l);
3301
12.6M
  }
3302
95.3k
    } else {
3303
95.3k
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3304
95.3k
      (!IS_LETTER(c) && (c != '_') &&
3305
90.3k
       (c != ':'))) {
3306
56.9k
      return(NULL);
3307
56.9k
  }
3308
38.4k
  len += l;
3309
38.4k
  NEXTL(l);
3310
38.4k
  c = CUR_CHAR(l);
3311
3312
7.89M
  while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3313
7.89M
         ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3314
7.88M
    (c == '.') || (c == '-') ||
3315
7.88M
    (c == '_') || (c == ':') ||
3316
7.88M
    (IS_COMBINING(c)) ||
3317
7.88M
    (IS_EXTENDER(c)))) {
3318
7.85M
      if (count++ > XML_PARSER_CHUNK_SIZE) {
3319
74.5k
    count = 0;
3320
74.5k
    GROW;
3321
74.5k
                if (ctxt->instate == XML_PARSER_EOF)
3322
0
                    return(NULL);
3323
74.5k
      }
3324
7.85M
            if (len <= INT_MAX - l)
3325
7.85M
          len += l;
3326
7.85M
      NEXTL(l);
3327
7.85M
      c = CUR_CHAR(l);
3328
7.85M
  }
3329
38.4k
    }
3330
79.1k
    if (len > maxLength) {
3331
61
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3332
61
        return(NULL);
3333
61
    }
3334
79.0k
    if (ctxt->input->cur - ctxt->input->base < len) {
3335
        /*
3336
         * There were a couple of bugs where PERefs lead to to a change
3337
         * of the buffer. Check the buffer size to avoid passing an invalid
3338
         * pointer to xmlDictLookup.
3339
         */
3340
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3341
0
                    "unexpected change of input buffer");
3342
0
        return (NULL);
3343
0
    }
3344
79.0k
    if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3345
1.02k
        return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3346
78.0k
    return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3347
79.0k
}
3348
3349
/**
3350
 * xmlParseName:
3351
 * @ctxt:  an XML parser context
3352
 *
3353
 * DEPRECATED: Internal function, don't use.
3354
 *
3355
 * parse an XML name.
3356
 *
3357
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3358
 *                  CombiningChar | Extender
3359
 *
3360
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3361
 *
3362
 * [6] Names ::= Name (#x20 Name)*
3363
 *
3364
 * Returns the Name parsed or NULL
3365
 */
3366
3367
const xmlChar *
3368
13.7M
xmlParseName(xmlParserCtxtPtr ctxt) {
3369
13.7M
    const xmlChar *in;
3370
13.7M
    const xmlChar *ret;
3371
13.7M
    size_t count = 0;
3372
13.7M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3373
656k
                       XML_MAX_TEXT_LENGTH :
3374
13.7M
                       XML_MAX_NAME_LENGTH;
3375
3376
13.7M
    GROW;
3377
3378
#ifdef DEBUG
3379
    nbParseName++;
3380
#endif
3381
3382
    /*
3383
     * Accelerator for simple ASCII names
3384
     */
3385
13.7M
    in = ctxt->input->cur;
3386
13.7M
    if (((*in >= 0x61) && (*in <= 0x7A)) ||
3387
13.7M
  ((*in >= 0x41) && (*in <= 0x5A)) ||
3388
13.7M
  (*in == '_') || (*in == ':')) {
3389
13.0M
  in++;
3390
73.4M
  while (((*in >= 0x61) && (*in <= 0x7A)) ||
3391
73.4M
         ((*in >= 0x41) && (*in <= 0x5A)) ||
3392
73.4M
         ((*in >= 0x30) && (*in <= 0x39)) ||
3393
73.4M
         (*in == '_') || (*in == '-') ||
3394
73.4M
         (*in == ':') || (*in == '.'))
3395
60.3M
      in++;
3396
13.0M
  if ((*in > 0) && (*in < 0x80)) {
3397
13.0M
      count = in - ctxt->input->cur;
3398
13.0M
            if (count > maxLength) {
3399
37
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3400
37
                return(NULL);
3401
37
            }
3402
13.0M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3403
13.0M
      ctxt->input->cur = in;
3404
13.0M
      ctxt->input->col += count;
3405
13.0M
      if (ret == NULL)
3406
0
          xmlErrMemory(ctxt, NULL);
3407
13.0M
      return(ret);
3408
13.0M
  }
3409
13.0M
    }
3410
    /* accelerator for special cases */
3411
736k
    return(xmlParseNameComplex(ctxt));
3412
13.7M
}
3413
3414
static const xmlChar *
3415
193k
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3416
193k
    int len = 0, l;
3417
193k
    int c;
3418
193k
    int count = 0;
3419
193k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3420
19.7k
                    XML_MAX_TEXT_LENGTH :
3421
193k
                    XML_MAX_NAME_LENGTH;
3422
193k
    size_t startPosition = 0;
3423
3424
#ifdef DEBUG
3425
    nbParseNCNameComplex++;
3426
#endif
3427
3428
    /*
3429
     * Handler for more complex cases
3430
     */
3431
193k
    GROW;
3432
193k
    startPosition = CUR_PTR - BASE_PTR;
3433
193k
    c = CUR_CHAR(l);
3434
193k
    if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3435
193k
  (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3436
101k
  return(NULL);
3437
101k
    }
3438
3439
10.9M
    while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3440
10.9M
     (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3441
10.8M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
3442
98.1k
      count = 0;
3443
98.1k
      GROW;
3444
98.1k
            if (ctxt->instate == XML_PARSER_EOF)
3445
0
                return(NULL);
3446
98.1k
  }
3447
10.8M
        if (len <= INT_MAX - l)
3448
10.8M
      len += l;
3449
10.8M
  NEXTL(l);
3450
10.8M
  c = CUR_CHAR(l);
3451
10.8M
  if (c == 0) {
3452
43.4k
      count = 0;
3453
      /*
3454
       * when shrinking to extend the buffer we really need to preserve
3455
       * the part of the name we already parsed. Hence rolling back
3456
       * by current length.
3457
       */
3458
43.4k
      ctxt->input->cur -= l;
3459
43.4k
      GROW;
3460
43.4k
            if (ctxt->instate == XML_PARSER_EOF)
3461
0
                return(NULL);
3462
43.4k
      ctxt->input->cur += l;
3463
43.4k
      c = CUR_CHAR(l);
3464
43.4k
  }
3465
10.8M
    }
3466
91.7k
    if (len > maxLength) {
3467
33
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3468
33
        return(NULL);
3469
33
    }
3470
91.6k
    return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
3471
91.7k
}
3472
3473
/**
3474
 * xmlParseNCName:
3475
 * @ctxt:  an XML parser context
3476
 * @len:  length of the string parsed
3477
 *
3478
 * parse an XML name.
3479
 *
3480
 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3481
 *                      CombiningChar | Extender
3482
 *
3483
 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3484
 *
3485
 * Returns the Name parsed or NULL
3486
 */
3487
3488
static const xmlChar *
3489
6.34M
xmlParseNCName(xmlParserCtxtPtr ctxt) {
3490
6.34M
    const xmlChar *in, *e;
3491
6.34M
    const xmlChar *ret;
3492
6.34M
    size_t count = 0;
3493
6.34M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3494
223k
                       XML_MAX_TEXT_LENGTH :
3495
6.34M
                       XML_MAX_NAME_LENGTH;
3496
3497
#ifdef DEBUG
3498
    nbParseNCName++;
3499
#endif
3500
3501
    /*
3502
     * Accelerator for simple ASCII names
3503
     */
3504
6.34M
    in = ctxt->input->cur;
3505
6.34M
    e = ctxt->input->end;
3506
6.34M
    if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3507
6.34M
   ((*in >= 0x41) && (*in <= 0x5A)) ||
3508
6.34M
   (*in == '_')) && (in < e)) {
3509
6.22M
  in++;
3510
30.8M
  while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3511
30.8M
          ((*in >= 0x41) && (*in <= 0x5A)) ||
3512
30.8M
          ((*in >= 0x30) && (*in <= 0x39)) ||
3513
30.8M
          (*in == '_') || (*in == '-') ||
3514
30.8M
          (*in == '.')) && (in < e))
3515
24.6M
      in++;
3516
6.22M
  if (in >= e)
3517
2.93k
      goto complex;
3518
6.21M
  if ((*in > 0) && (*in < 0x80)) {
3519
6.14M
      count = in - ctxt->input->cur;
3520
6.14M
            if (count > maxLength) {
3521
29
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3522
29
                return(NULL);
3523
29
            }
3524
6.14M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3525
6.14M
      ctxt->input->cur = in;
3526
6.14M
      ctxt->input->col += count;
3527
6.14M
      if (ret == NULL) {
3528
0
          xmlErrMemory(ctxt, NULL);
3529
0
      }
3530
6.14M
      return(ret);
3531
6.14M
  }
3532
6.21M
    }
3533
193k
complex:
3534
193k
    return(xmlParseNCNameComplex(ctxt));
3535
6.34M
}
3536
3537
/**
3538
 * xmlParseNameAndCompare:
3539
 * @ctxt:  an XML parser context
3540
 *
3541
 * parse an XML name and compares for match
3542
 * (specialized for endtag parsing)
3543
 *
3544
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3545
 * and the name for mismatch
3546
 */
3547
3548
static const xmlChar *
3549
642k
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3550
642k
    register const xmlChar *cmp = other;
3551
642k
    register const xmlChar *in;
3552
642k
    const xmlChar *ret;
3553
3554
642k
    GROW;
3555
642k
    if (ctxt->instate == XML_PARSER_EOF)
3556
0
        return(NULL);
3557
3558
642k
    in = ctxt->input->cur;
3559
3.11M
    while (*in != 0 && *in == *cmp) {
3560
2.47M
  ++in;
3561
2.47M
  ++cmp;
3562
2.47M
    }
3563
642k
    if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3564
  /* success */
3565
556k
  ctxt->input->col += in - ctxt->input->cur;
3566
556k
  ctxt->input->cur = in;
3567
556k
  return (const xmlChar*) 1;
3568
556k
    }
3569
    /* failure (or end of input buffer), check with full function */
3570
85.9k
    ret = xmlParseName (ctxt);
3571
    /* strings coming from the dictionary direct compare possible */
3572
85.9k
    if (ret == other) {
3573
3.67k
  return (const xmlChar*) 1;
3574
3.67k
    }
3575
82.2k
    return ret;
3576
85.9k
}
3577
3578
/**
3579
 * xmlParseStringName:
3580
 * @ctxt:  an XML parser context
3581
 * @str:  a pointer to the string pointer (IN/OUT)
3582
 *
3583
 * parse an XML name.
3584
 *
3585
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3586
 *                  CombiningChar | Extender
3587
 *
3588
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3589
 *
3590
 * [6] Names ::= Name (#x20 Name)*
3591
 *
3592
 * Returns the Name parsed or NULL. The @str pointer
3593
 * is updated to the current location in the string.
3594
 */
3595
3596
static xmlChar *
3597
13.6M
xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3598
13.6M
    xmlChar buf[XML_MAX_NAMELEN + 5];
3599
13.6M
    const xmlChar *cur = *str;
3600
13.6M
    int len = 0, l;
3601
13.6M
    int c;
3602
13.6M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3603
9.96k
                    XML_MAX_TEXT_LENGTH :
3604
13.6M
                    XML_MAX_NAME_LENGTH;
3605
3606
#ifdef DEBUG
3607
    nbParseStringName++;
3608
#endif
3609
3610
13.6M
    c = CUR_SCHAR(cur, l);
3611
13.6M
    if (!xmlIsNameStartChar(ctxt, c)) {
3612
1.01k
  return(NULL);
3613
1.01k
    }
3614
3615
13.6M
    COPY_BUF(l,buf,len,c);
3616
13.6M
    cur += l;
3617
13.6M
    c = CUR_SCHAR(cur, l);
3618
223M
    while (xmlIsNameChar(ctxt, c)) {
3619
210M
  COPY_BUF(l,buf,len,c);
3620
210M
  cur += l;
3621
210M
  c = CUR_SCHAR(cur, l);
3622
210M
  if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3623
      /*
3624
       * Okay someone managed to make a huge name, so he's ready to pay
3625
       * for the processing speed.
3626
       */
3627
1.59M
      xmlChar *buffer;
3628
1.59M
      int max = len * 2;
3629
3630
1.59M
      buffer = (xmlChar *) xmlMallocAtomic(max);
3631
1.59M
      if (buffer == NULL) {
3632
0
          xmlErrMemory(ctxt, NULL);
3633
0
    return(NULL);
3634
0
      }
3635
1.59M
      memcpy(buffer, buf, len);
3636
363M
      while (xmlIsNameChar(ctxt, c)) {
3637
361M
    if (len + 10 > max) {
3638
1.59M
        xmlChar *tmp;
3639
3640
1.59M
        max *= 2;
3641
1.59M
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3642
1.59M
        if (tmp == NULL) {
3643
0
      xmlErrMemory(ctxt, NULL);
3644
0
      xmlFree(buffer);
3645
0
      return(NULL);
3646
0
        }
3647
1.59M
        buffer = tmp;
3648
1.59M
    }
3649
361M
    COPY_BUF(l,buffer,len,c);
3650
361M
    cur += l;
3651
361M
    c = CUR_SCHAR(cur, l);
3652
361M
                if (len > maxLength) {
3653
0
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3654
0
                    xmlFree(buffer);
3655
0
                    return(NULL);
3656
0
                }
3657
361M
      }
3658
1.59M
      buffer[len] = 0;
3659
1.59M
      *str = cur;
3660
1.59M
      return(buffer);
3661
1.59M
  }
3662
210M
    }
3663
12.0M
    if (len > maxLength) {
3664
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3665
0
        return(NULL);
3666
0
    }
3667
12.0M
    *str = cur;
3668
12.0M
    return(xmlStrndup(buf, len));
3669
12.0M
}
3670
3671
/**
3672
 * xmlParseNmtoken:
3673
 * @ctxt:  an XML parser context
3674
 *
3675
 * DEPRECATED: Internal function, don't use.
3676
 *
3677
 * parse an XML Nmtoken.
3678
 *
3679
 * [7] Nmtoken ::= (NameChar)+
3680
 *
3681
 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3682
 *
3683
 * Returns the Nmtoken parsed or NULL
3684
 */
3685
3686
xmlChar *
3687
83.9k
xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3688
83.9k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3689
83.9k
    int len = 0, l;
3690
83.9k
    int c;
3691
83.9k
    int count = 0;
3692
83.9k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3693
5.75k
                    XML_MAX_TEXT_LENGTH :
3694
83.9k
                    XML_MAX_NAME_LENGTH;
3695
3696
#ifdef DEBUG
3697
    nbParseNmToken++;
3698
#endif
3699
3700
83.9k
    GROW;
3701
83.9k
    if (ctxt->instate == XML_PARSER_EOF)
3702
0
        return(NULL);
3703
83.9k
    c = CUR_CHAR(l);
3704
3705
676k
    while (xmlIsNameChar(ctxt, c)) {
3706
594k
  if (count++ > XML_PARSER_CHUNK_SIZE) {
3707
0
      count = 0;
3708
0
      GROW;
3709
0
  }
3710
594k
  COPY_BUF(l,buf,len,c);
3711
594k
  NEXTL(l);
3712
594k
  c = CUR_CHAR(l);
3713
594k
  if (c == 0) {
3714
1.06k
      count = 0;
3715
1.06k
      GROW;
3716
1.06k
      if (ctxt->instate == XML_PARSER_EOF)
3717
0
    return(NULL);
3718
1.06k
            c = CUR_CHAR(l);
3719
1.06k
  }
3720
594k
  if (len >= XML_MAX_NAMELEN) {
3721
      /*
3722
       * Okay someone managed to make a huge token, so he's ready to pay
3723
       * for the processing speed.
3724
       */
3725
2.04k
      xmlChar *buffer;
3726
2.04k
      int max = len * 2;
3727
3728
2.04k
      buffer = (xmlChar *) xmlMallocAtomic(max);
3729
2.04k
      if (buffer == NULL) {
3730
0
          xmlErrMemory(ctxt, NULL);
3731
0
    return(NULL);
3732
0
      }
3733
2.04k
      memcpy(buffer, buf, len);
3734
1.68M
      while (xmlIsNameChar(ctxt, c)) {
3735
1.68M
    if (count++ > XML_PARSER_CHUNK_SIZE) {
3736
17.6k
        count = 0;
3737
17.6k
        GROW;
3738
17.6k
                    if (ctxt->instate == XML_PARSER_EOF) {
3739
0
                        xmlFree(buffer);
3740
0
                        return(NULL);
3741
0
                    }
3742
17.6k
    }
3743
1.68M
    if (len + 10 > max) {
3744
1.62k
        xmlChar *tmp;
3745
3746
1.62k
        max *= 2;
3747
1.62k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3748
1.62k
        if (tmp == NULL) {
3749
0
      xmlErrMemory(ctxt, NULL);
3750
0
      xmlFree(buffer);
3751
0
      return(NULL);
3752
0
        }
3753
1.62k
        buffer = tmp;
3754
1.62k
    }
3755
1.68M
    COPY_BUF(l,buffer,len,c);
3756
1.68M
    NEXTL(l);
3757
1.68M
    c = CUR_CHAR(l);
3758
1.68M
                if (len > maxLength) {
3759
7
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3760
7
                    xmlFree(buffer);
3761
7
                    return(NULL);
3762
7
                }
3763
1.68M
      }
3764
2.03k
      buffer[len] = 0;
3765
2.03k
      return(buffer);
3766
2.04k
  }
3767
594k
    }
3768
81.9k
    if (len == 0)
3769
8.21k
        return(NULL);
3770
73.6k
    if (len > maxLength) {
3771
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3772
0
        return(NULL);
3773
0
    }
3774
73.6k
    return(xmlStrndup(buf, len));
3775
73.6k
}
3776
3777
/**
3778
 * xmlParseEntityValue:
3779
 * @ctxt:  an XML parser context
3780
 * @orig:  if non-NULL store a copy of the original entity value
3781
 *
3782
 * DEPRECATED: Internal function, don't use.
3783
 *
3784
 * parse a value for ENTITY declarations
3785
 *
3786
 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3787
 *                 "'" ([^%&'] | PEReference | Reference)* "'"
3788
 *
3789
 * Returns the EntityValue parsed with reference substituted or NULL
3790
 */
3791
3792
xmlChar *
3793
126k
xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3794
126k
    xmlChar *buf = NULL;
3795
126k
    int len = 0;
3796
126k
    int size = XML_PARSER_BUFFER_SIZE;
3797
126k
    int c, l;
3798
126k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3799
7.26k
                    XML_MAX_HUGE_LENGTH :
3800
126k
                    XML_MAX_TEXT_LENGTH;
3801
126k
    xmlChar stop;
3802
126k
    xmlChar *ret = NULL;
3803
126k
    const xmlChar *cur = NULL;
3804
126k
    xmlParserInputPtr input;
3805
3806
126k
    if (RAW == '"') stop = '"';
3807
22.6k
    else if (RAW == '\'') stop = '\'';
3808
0
    else {
3809
0
  xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3810
0
  return(NULL);
3811
0
    }
3812
126k
    buf = (xmlChar *) xmlMallocAtomic(size);
3813
126k
    if (buf == NULL) {
3814
0
  xmlErrMemory(ctxt, NULL);
3815
0
  return(NULL);
3816
0
    }
3817
3818
    /*
3819
     * The content of the entity definition is copied in a buffer.
3820
     */
3821
3822
126k
    ctxt->instate = XML_PARSER_ENTITY_VALUE;
3823
126k
    input = ctxt->input;
3824
126k
    GROW;
3825
126k
    if (ctxt->instate == XML_PARSER_EOF)
3826
0
        goto error;
3827
126k
    NEXT;
3828
126k
    c = CUR_CHAR(l);
3829
    /*
3830
     * NOTE: 4.4.5 Included in Literal
3831
     * When a parameter entity reference appears in a literal entity
3832
     * value, ... a single or double quote character in the replacement
3833
     * text is always treated as a normal data character and will not
3834
     * terminate the literal.
3835
     * In practice it means we stop the loop only when back at parsing
3836
     * the initial entity and the quote is found
3837
     */
3838
15.0M
    while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3839
15.0M
      (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3840
14.9M
  if (len + 5 >= size) {
3841
27.4k
      xmlChar *tmp;
3842
3843
27.4k
      size *= 2;
3844
27.4k
      tmp = (xmlChar *) xmlRealloc(buf, size);
3845
27.4k
      if (tmp == NULL) {
3846
0
    xmlErrMemory(ctxt, NULL);
3847
0
                goto error;
3848
0
      }
3849
27.4k
      buf = tmp;
3850
27.4k
  }
3851
14.9M
  COPY_BUF(l,buf,len,c);
3852
14.9M
  NEXTL(l);
3853
3854
14.9M
  GROW;
3855
14.9M
  c = CUR_CHAR(l);
3856
14.9M
  if (c == 0) {
3857
1.59k
      GROW;
3858
1.59k
      c = CUR_CHAR(l);
3859
1.59k
  }
3860
3861
14.9M
        if (len > maxLength) {
3862
0
            xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
3863
0
                           "entity value too long\n");
3864
0
            goto error;
3865
0
        }
3866
14.9M
    }
3867
126k
    buf[len] = 0;
3868
126k
    if (ctxt->instate == XML_PARSER_EOF)
3869
0
        goto error;
3870
126k
    if (c != stop) {
3871
2.03k
        xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3872
2.03k
        goto error;
3873
2.03k
    }
3874
124k
    NEXT;
3875
3876
    /*
3877
     * Raise problem w.r.t. '&' and '%' being used in non-entities
3878
     * reference constructs. Note Charref will be handled in
3879
     * xmlStringDecodeEntities()
3880
     */
3881
124k
    cur = buf;
3882
4.82M
    while (*cur != 0) { /* non input consuming */
3883
4.70M
  if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3884
112k
      xmlChar *name;
3885
112k
      xmlChar tmp = *cur;
3886
112k
            int nameOk = 0;
3887
3888
112k
      cur++;
3889
112k
      name = xmlParseStringName(ctxt, &cur);
3890
112k
            if (name != NULL) {
3891
111k
                nameOk = 1;
3892
111k
                xmlFree(name);
3893
111k
            }
3894
112k
            if ((nameOk == 0) || (*cur != ';')) {
3895
3.62k
    xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3896
3.62k
      "EntityValue: '%c' forbidden except for entities references\n",
3897
3.62k
                            tmp);
3898
3.62k
                goto error;
3899
3.62k
      }
3900
109k
      if ((tmp == '%') && (ctxt->inSubset == 1) &&
3901
109k
    (ctxt->inputNr == 1)) {
3902
61
    xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3903
61
                goto error;
3904
61
      }
3905
109k
      if (*cur == 0)
3906
0
          break;
3907
109k
  }
3908
4.69M
  cur++;
3909
4.69M
    }
3910
3911
    /*
3912
     * Then PEReference entities are substituted.
3913
     *
3914
     * NOTE: 4.4.7 Bypassed
3915
     * When a general entity reference appears in the EntityValue in
3916
     * an entity declaration, it is bypassed and left as is.
3917
     * so XML_SUBSTITUTE_REF is not set here.
3918
     */
3919
120k
    ++ctxt->depth;
3920
120k
    ret = xmlStringDecodeEntitiesInt(ctxt, buf, len, XML_SUBSTITUTE_PEREF,
3921
120k
                                     0, 0, 0, /* check */ 1);
3922
120k
    --ctxt->depth;
3923
3924
120k
    if (orig != NULL) {
3925
120k
        *orig = buf;
3926
120k
        buf = NULL;
3927
120k
    }
3928
3929
126k
error:
3930
126k
    if (buf != NULL)
3931
5.72k
        xmlFree(buf);
3932
126k
    return(ret);
3933
120k
}
3934
3935
/**
3936
 * xmlParseAttValueComplex:
3937
 * @ctxt:  an XML parser context
3938
 * @len:   the resulting attribute len
3939
 * @normalize:  whether to apply the inner normalization
3940
 *
3941
 * parse a value for an attribute, this is the fallback function
3942
 * of xmlParseAttValue() when the attribute parsing requires handling
3943
 * of non-ASCII characters, or normalization compaction.
3944
 *
3945
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3946
 */
3947
static xmlChar *
3948
84.6k
xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3949
84.6k
    xmlChar limit = 0;
3950
84.6k
    xmlChar *buf = NULL;
3951
84.6k
    xmlChar *rep = NULL;
3952
84.6k
    size_t len = 0;
3953
84.6k
    size_t buf_size = 0;
3954
84.6k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3955
19.4k
                       XML_MAX_HUGE_LENGTH :
3956
84.6k
                       XML_MAX_TEXT_LENGTH;
3957
84.6k
    int c, l, in_space = 0;
3958
84.6k
    xmlChar *current = NULL;
3959
84.6k
    xmlEntityPtr ent;
3960
3961
84.6k
    if (NXT(0) == '"') {
3962
53.8k
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3963
53.8k
  limit = '"';
3964
53.8k
        NEXT;
3965
53.8k
    } else if (NXT(0) == '\'') {
3966
30.7k
  limit = '\'';
3967
30.7k
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3968
30.7k
        NEXT;
3969
30.7k
    } else {
3970
0
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3971
0
  return(NULL);
3972
0
    }
3973
3974
    /*
3975
     * allocate a translation buffer.
3976
     */
3977
84.6k
    buf_size = XML_PARSER_BUFFER_SIZE;
3978
84.6k
    buf = (xmlChar *) xmlMallocAtomic(buf_size);
3979
84.6k
    if (buf == NULL) goto mem_error;
3980
3981
    /*
3982
     * OK loop until we reach one of the ending char or a size limit.
3983
     */
3984
84.6k
    c = CUR_CHAR(l);
3985
13.7M
    while (((NXT(0) != limit) && /* checked */
3986
13.7M
            (IS_CHAR(c)) && (c != '<')) &&
3987
13.7M
            (ctxt->instate != XML_PARSER_EOF)) {
3988
13.6M
  if (c == '&') {
3989
367k
      in_space = 0;
3990
367k
      if (NXT(1) == '#') {
3991
55.6k
    int val = xmlParseCharRef(ctxt);
3992
3993
55.6k
    if (val == '&') {
3994
876
        if (ctxt->replaceEntities) {
3995
423
      if (len + 10 > buf_size) {
3996
0
          growBuffer(buf, 10);
3997
0
      }
3998
423
      buf[len++] = '&';
3999
453
        } else {
4000
      /*
4001
       * The reparsing will be done in xmlStringGetNodeList()
4002
       * called by the attribute() function in SAX.c
4003
       */
4004
453
      if (len + 10 > buf_size) {
4005
0
          growBuffer(buf, 10);
4006
0
      }
4007
453
      buf[len++] = '&';
4008
453
      buf[len++] = '#';
4009
453
      buf[len++] = '3';
4010
453
      buf[len++] = '8';
4011
453
      buf[len++] = ';';
4012
453
        }
4013
54.7k
    } else if (val != 0) {
4014
50.3k
        if (len + 10 > buf_size) {
4015
1.50k
      growBuffer(buf, 10);
4016
1.50k
        }
4017
50.3k
        len += xmlCopyChar(0, &buf[len], val);
4018
50.3k
    }
4019
311k
      } else {
4020
311k
    ent = xmlParseEntityRef(ctxt);
4021
311k
    if ((ent != NULL) &&
4022
311k
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
4023
14.5k
        if (len + 10 > buf_size) {
4024
18
      growBuffer(buf, 10);
4025
18
        }
4026
14.5k
        if ((ctxt->replaceEntities == 0) &&
4027
14.5k
            (ent->content[0] == '&')) {
4028
4.07k
      buf[len++] = '&';
4029
4.07k
      buf[len++] = '#';
4030
4.07k
      buf[len++] = '3';
4031
4.07k
      buf[len++] = '8';
4032
4.07k
      buf[len++] = ';';
4033
10.4k
        } else {
4034
10.4k
      buf[len++] = ent->content[0];
4035
10.4k
        }
4036
296k
    } else if ((ent != NULL) &&
4037
296k
               (ctxt->replaceEntities != 0)) {
4038
270k
        if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4039
270k
                        if (xmlParserEntityCheck(ctxt, ent->length))
4040
0
                            goto error;
4041
4042
270k
      ++ctxt->depth;
4043
270k
      rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
4044
270k
                                ent->length, XML_SUBSTITUTE_REF, 0, 0, 0,
4045
270k
                                /* check */ 1);
4046
270k
      --ctxt->depth;
4047
270k
      if (rep != NULL) {
4048
269k
          current = rep;
4049
55.0M
          while (*current != 0) { /* non input consuming */
4050
54.7M
                                if ((*current == 0xD) || (*current == 0xA) ||
4051
54.7M
                                    (*current == 0x9)) {
4052
7.73k
                                    buf[len++] = 0x20;
4053
7.73k
                                    current++;
4054
7.73k
                                } else
4055
54.7M
                                    buf[len++] = *current++;
4056
54.7M
        if (len + 10 > buf_size) {
4057
2.09k
            growBuffer(buf, 10);
4058
2.09k
        }
4059
54.7M
          }
4060
269k
          xmlFree(rep);
4061
269k
          rep = NULL;
4062
269k
      }
4063
270k
        } else {
4064
0
      if (len + 10 > buf_size) {
4065
0
          growBuffer(buf, 10);
4066
0
      }
4067
0
      if (ent->content != NULL)
4068
0
          buf[len++] = ent->content[0];
4069
0
        }
4070
270k
    } else if (ent != NULL) {
4071
3.52k
        int i = xmlStrlen(ent->name);
4072
3.52k
        const xmlChar *cur = ent->name;
4073
4074
        /*
4075
                     * We also check for recursion and amplification
4076
                     * when entities are not substituted. They're
4077
                     * often expanded later.
4078
         */
4079
3.52k
        if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4080
3.52k
      (ent->content != NULL)) {
4081
3.26k
                        if ((ent->flags & XML_ENT_CHECKED) == 0) {
4082
1.35k
                            unsigned long oldCopy = ctxt->sizeentcopy;
4083
4084
1.35k
                            ctxt->sizeentcopy = ent->length;
4085
4086
1.35k
                            ++ctxt->depth;
4087
1.35k
                            rep = xmlStringDecodeEntitiesInt(ctxt,
4088
1.35k
                                    ent->content, ent->length,
4089
1.35k
                                    XML_SUBSTITUTE_REF, 0, 0, 0,
4090
1.35k
                                    /* check */ 1);
4091
1.35k
                            --ctxt->depth;
4092
4093
                            /*
4094
                             * If we're parsing DTD content, the entity
4095
                             * might reference other entities which
4096
                             * weren't defined yet, so the check isn't
4097
                             * reliable.
4098
                             */
4099
1.35k
                            if (ctxt->inSubset == 0) {
4100
1.17k
                                ent->flags |= XML_ENT_CHECKED;
4101
1.17k
                                ent->expandedSize = ctxt->sizeentcopy;
4102
1.17k
                            }
4103
4104
1.35k
                            if (rep != NULL) {
4105
1.32k
                                xmlFree(rep);
4106
1.32k
                                rep = NULL;
4107
1.32k
                            } else {
4108
36
                                ent->content[0] = 0;
4109
36
                            }
4110
4111
1.35k
                            if (xmlParserEntityCheck(ctxt, oldCopy))
4112
0
                                goto error;
4113
1.90k
                        } else {
4114
1.90k
                            if (xmlParserEntityCheck(ctxt, ent->expandedSize))
4115
0
                                goto error;
4116
1.90k
                        }
4117
3.26k
        }
4118
4119
        /*
4120
         * Just output the reference
4121
         */
4122
3.52k
        buf[len++] = '&';
4123
3.52k
        while (len + i + 10 > buf_size) {
4124
0
      growBuffer(buf, i + 10);
4125
0
        }
4126
10.9k
        for (;i > 0;i--)
4127
7.38k
      buf[len++] = *cur++;
4128
3.52k
        buf[len++] = ';';
4129
3.52k
    }
4130
311k
      }
4131
13.3M
  } else {
4132
13.3M
      if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4133
1.13M
          if ((len != 0) || (!normalize)) {
4134
1.10M
        if ((!normalize) || (!in_space)) {
4135
1.07M
      COPY_BUF(l,buf,len,0x20);
4136
1.08M
      while (len + 10 > buf_size) {
4137
3.27k
          growBuffer(buf, 10);
4138
3.27k
      }
4139
1.07M
        }
4140
1.10M
        in_space = 1;
4141
1.10M
    }
4142
12.1M
      } else {
4143
12.1M
          in_space = 0;
4144
12.1M
    COPY_BUF(l,buf,len,c);
4145
12.1M
    if (len + 10 > buf_size) {
4146
21.1k
        growBuffer(buf, 10);
4147
21.1k
    }
4148
12.1M
      }
4149
13.3M
      NEXTL(l);
4150
13.3M
  }
4151
13.6M
  GROW;
4152
13.6M
  c = CUR_CHAR(l);
4153
13.6M
        if (len > maxLength) {
4154
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4155
0
                           "AttValue length too long\n");
4156
0
            goto mem_error;
4157
0
        }
4158
13.6M
    }
4159
84.6k
    if (ctxt->instate == XML_PARSER_EOF)
4160
322
        goto error;
4161
4162
84.3k
    if ((in_space) && (normalize)) {
4163
8.10k
        while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4164
3.97k
    }
4165
84.3k
    buf[len] = 0;
4166
84.3k
    if (RAW == '<') {
4167
8.55k
  xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4168
75.7k
    } else if (RAW != limit) {
4169
27.1k
  if ((c != 0) && (!IS_CHAR(c))) {
4170
7.44k
      xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4171
7.44k
         "invalid character in attribute value\n");
4172
19.6k
  } else {
4173
19.6k
      xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4174
19.6k
         "AttValue: ' expected\n");
4175
19.6k
        }
4176
27.1k
    } else
4177
48.6k
  NEXT;
4178
4179
84.3k
    if (attlen != NULL) *attlen = len;
4180
84.3k
    return(buf);
4181
4182
0
mem_error:
4183
0
    xmlErrMemory(ctxt, NULL);
4184
322
error:
4185
322
    if (buf != NULL)
4186
322
        xmlFree(buf);
4187
322
    if (rep != NULL)
4188
0
        xmlFree(rep);
4189
322
    return(NULL);
4190
0
}
4191
4192
/**
4193
 * xmlParseAttValue:
4194
 * @ctxt:  an XML parser context
4195
 *
4196
 * DEPRECATED: Internal function, don't use.
4197
 *
4198
 * parse a value for an attribute
4199
 * Note: the parser won't do substitution of entities here, this
4200
 * will be handled later in xmlStringGetNodeList
4201
 *
4202
 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4203
 *                   "'" ([^<&'] | Reference)* "'"
4204
 *
4205
 * 3.3.3 Attribute-Value Normalization:
4206
 * Before the value of an attribute is passed to the application or
4207
 * checked for validity, the XML processor must normalize it as follows:
4208
 * - a character reference is processed by appending the referenced
4209
 *   character to the attribute value
4210
 * - an entity reference is processed by recursively processing the
4211
 *   replacement text of the entity
4212
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4213
 *   appending #x20 to the normalized value, except that only a single
4214
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4215
 *   parsed entity or the literal entity value of an internal parsed entity
4216
 * - other characters are processed by appending them to the normalized value
4217
 * If the declared value is not CDATA, then the XML processor must further
4218
 * process the normalized attribute value by discarding any leading and
4219
 * trailing space (#x20) characters, and by replacing sequences of space
4220
 * (#x20) characters by a single space (#x20) character.
4221
 * All attributes for which no declaration has been read should be treated
4222
 * by a non-validating parser as if declared CDATA.
4223
 *
4224
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4225
 */
4226
4227
4228
xmlChar *
4229
535k
xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4230
535k
    if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4231
535k
    return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4232
535k
}
4233
4234
/**
4235
 * xmlParseSystemLiteral:
4236
 * @ctxt:  an XML parser context
4237
 *
4238
 * DEPRECATED: Internal function, don't use.
4239
 *
4240
 * parse an XML Literal
4241
 *
4242
 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4243
 *
4244
 * Returns the SystemLiteral parsed or NULL
4245
 */
4246
4247
xmlChar *
4248
48.7k
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4249
48.7k
    xmlChar *buf = NULL;
4250
48.7k
    int len = 0;
4251
48.7k
    int size = XML_PARSER_BUFFER_SIZE;
4252
48.7k
    int cur, l;
4253
48.7k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4254
8.51k
                    XML_MAX_TEXT_LENGTH :
4255
48.7k
                    XML_MAX_NAME_LENGTH;
4256
48.7k
    xmlChar stop;
4257
48.7k
    int state = ctxt->instate;
4258
48.7k
    int count = 0;
4259
4260
48.7k
    SHRINK;
4261
48.7k
    if (RAW == '"') {
4262
45.3k
        NEXT;
4263
45.3k
  stop = '"';
4264
45.3k
    } else if (RAW == '\'') {
4265
1.98k
        NEXT;
4266
1.98k
  stop = '\'';
4267
1.98k
    } else {
4268
1.41k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4269
1.41k
  return(NULL);
4270
1.41k
    }
4271
4272
47.3k
    buf = (xmlChar *) xmlMallocAtomic(size);
4273
47.3k
    if (buf == NULL) {
4274
0
        xmlErrMemory(ctxt, NULL);
4275
0
  return(NULL);
4276
0
    }
4277
47.3k
    ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4278
47.3k
    cur = CUR_CHAR(l);
4279
2.43M
    while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4280
2.38M
  if (len + 5 >= size) {
4281
3.38k
      xmlChar *tmp;
4282
4283
3.38k
      size *= 2;
4284
3.38k
      tmp = (xmlChar *) xmlRealloc(buf, size);
4285
3.38k
      if (tmp == NULL) {
4286
0
          xmlFree(buf);
4287
0
    xmlErrMemory(ctxt, NULL);
4288
0
    ctxt->instate = (xmlParserInputState) state;
4289
0
    return(NULL);
4290
0
      }
4291
3.38k
      buf = tmp;
4292
3.38k
  }
4293
2.38M
  count++;
4294
2.38M
  if (count > 50) {
4295
31.5k
      SHRINK;
4296
31.5k
      GROW;
4297
31.5k
      count = 0;
4298
31.5k
            if (ctxt->instate == XML_PARSER_EOF) {
4299
0
          xmlFree(buf);
4300
0
    return(NULL);
4301
0
            }
4302
31.5k
  }
4303
2.38M
  COPY_BUF(l,buf,len,cur);
4304
2.38M
  NEXTL(l);
4305
2.38M
  cur = CUR_CHAR(l);
4306
2.38M
  if (cur == 0) {
4307
1.62k
      GROW;
4308
1.62k
      SHRINK;
4309
1.62k
      cur = CUR_CHAR(l);
4310
1.62k
  }
4311
2.38M
        if (len > maxLength) {
4312
0
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4313
0
            xmlFree(buf);
4314
0
            ctxt->instate = (xmlParserInputState) state;
4315
0
            return(NULL);
4316
0
        }
4317
2.38M
    }
4318
47.3k
    buf[len] = 0;
4319
47.3k
    ctxt->instate = (xmlParserInputState) state;
4320
47.3k
    if (!IS_CHAR(cur)) {
4321
2.23k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4322
45.0k
    } else {
4323
45.0k
  NEXT;
4324
45.0k
    }
4325
47.3k
    return(buf);
4326
47.3k
}
4327
4328
/**
4329
 * xmlParsePubidLiteral:
4330
 * @ctxt:  an XML parser context
4331
 *
4332
 * DEPRECATED: Internal function, don't use.
4333
 *
4334
 * parse an XML public literal
4335
 *
4336
 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4337
 *
4338
 * Returns the PubidLiteral parsed or NULL.
4339
 */
4340
4341
xmlChar *
4342
11.3k
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4343
11.3k
    xmlChar *buf = NULL;
4344
11.3k
    int len = 0;
4345
11.3k
    int size = XML_PARSER_BUFFER_SIZE;
4346
11.3k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4347
1.54k
                    XML_MAX_TEXT_LENGTH :
4348
11.3k
                    XML_MAX_NAME_LENGTH;
4349
11.3k
    xmlChar cur;
4350
11.3k
    xmlChar stop;
4351
11.3k
    int count = 0;
4352
11.3k
    xmlParserInputState oldstate = ctxt->instate;
4353
4354
11.3k
    SHRINK;
4355
11.3k
    if (RAW == '"') {
4356
10.7k
        NEXT;
4357
10.7k
  stop = '"';
4358
10.7k
    } else if (RAW == '\'') {
4359
442
        NEXT;
4360
442
  stop = '\'';
4361
442
    } else {
4362
169
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4363
169
  return(NULL);
4364
169
    }
4365
11.1k
    buf = (xmlChar *) xmlMallocAtomic(size);
4366
11.1k
    if (buf == NULL) {
4367
0
  xmlErrMemory(ctxt, NULL);
4368
0
  return(NULL);
4369
0
    }
4370
11.1k
    ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4371
11.1k
    cur = CUR;
4372
980k
    while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4373
968k
  if (len + 1 >= size) {
4374
1.49k
      xmlChar *tmp;
4375
4376
1.49k
      size *= 2;
4377
1.49k
      tmp = (xmlChar *) xmlRealloc(buf, size);
4378
1.49k
      if (tmp == NULL) {
4379
0
    xmlErrMemory(ctxt, NULL);
4380
0
    xmlFree(buf);
4381
0
    return(NULL);
4382
0
      }
4383
1.49k
      buf = tmp;
4384
1.49k
  }
4385
968k
  buf[len++] = cur;
4386
968k
  count++;
4387
968k
  if (count > 50) {
4388
13.5k
      SHRINK;
4389
13.5k
      GROW;
4390
13.5k
      count = 0;
4391
13.5k
            if (ctxt->instate == XML_PARSER_EOF) {
4392
0
    xmlFree(buf);
4393
0
    return(NULL);
4394
0
            }
4395
13.5k
  }
4396
968k
  NEXT;
4397
968k
  cur = CUR;
4398
968k
  if (cur == 0) {
4399
563
      GROW;
4400
563
      SHRINK;
4401
563
      cur = CUR;
4402
563
  }
4403
968k
        if (len > maxLength) {
4404
3
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4405
3
            xmlFree(buf);
4406
3
            return(NULL);
4407
3
        }
4408
968k
    }
4409
11.1k
    buf[len] = 0;
4410
11.1k
    if (cur != stop) {
4411
927
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4412
10.2k
    } else {
4413
10.2k
  NEXT;
4414
10.2k
    }
4415
11.1k
    ctxt->instate = oldstate;
4416
11.1k
    return(buf);
4417
11.1k
}
4418
4419
static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt);
4420
4421
/*
4422
 * used for the test in the inner loop of the char data testing
4423
 */
4424
static const unsigned char test_char_data[256] = {
4425
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4426
    0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4427
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4428
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4429
    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4430
    0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4431
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4432
    0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4433
    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4434
    0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4435
    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4436
    0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4437
    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4438
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4439
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4440
    0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4441
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4442
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4443
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4444
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4445
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4446
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4447
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4448
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4449
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4450
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4451
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4452
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4453
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4454
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4455
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4456
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4457
};
4458
4459
/**
4460
 * xmlParseCharData:
4461
 * @ctxt:  an XML parser context
4462
 * @cdata:  unused
4463
 *
4464
 * DEPRECATED: Internal function, don't use.
4465
 *
4466
 * Parse character data. Always makes progress if the first char isn't
4467
 * '<' or '&'.
4468
 *
4469
 * if we are within a CDATA section ']]>' marks an end of section.
4470
 *
4471
 * The right angle bracket (>) may be represented using the string "&gt;",
4472
 * and must, for compatibility, be escaped using "&gt;" or a character
4473
 * reference when it appears in the string "]]>" in content, when that
4474
 * string is not marking the end of a CDATA section.
4475
 *
4476
 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4477
 */
4478
4479
void
4480
99.1M
xmlParseCharData(xmlParserCtxtPtr ctxt, ATTRIBUTE_UNUSED int cdata) {
4481
99.1M
    const xmlChar *in;
4482
99.1M
    int nbchar = 0;
4483
99.1M
    int line = ctxt->input->line;
4484
99.1M
    int col = ctxt->input->col;
4485
99.1M
    int ccol;
4486
4487
99.1M
    SHRINK;
4488
99.1M
    GROW;
4489
    /*
4490
     * Accelerated common case where input don't need to be
4491
     * modified before passing it to the handler.
4492
     */
4493
99.1M
    in = ctxt->input->cur;
4494
101M
    do {
4495
101M
get_more_space:
4496
104M
        while (*in == 0x20) { in++; ctxt->input->col++; }
4497
101M
        if (*in == 0xA) {
4498
2.27M
            do {
4499
2.27M
                ctxt->input->line++; ctxt->input->col = 1;
4500
2.27M
                in++;
4501
2.27M
            } while (*in == 0xA);
4502
899k
            goto get_more_space;
4503
899k
        }
4504
101M
        if (*in == '<') {
4505
837k
            nbchar = in - ctxt->input->cur;
4506
837k
            if (nbchar > 0) {
4507
837k
                const xmlChar *tmp = ctxt->input->cur;
4508
837k
                ctxt->input->cur = in;
4509
4510
837k
                if ((ctxt->sax != NULL) &&
4511
837k
                    (ctxt->sax->ignorableWhitespace !=
4512
837k
                     ctxt->sax->characters)) {
4513
173k
                    if (areBlanks(ctxt, tmp, nbchar, 1)) {
4514
91.4k
                        if (ctxt->sax->ignorableWhitespace != NULL)
4515
91.4k
                            ctxt->sax->ignorableWhitespace(ctxt->userData,
4516
91.4k
                                                   tmp, nbchar);
4517
91.4k
                    } else {
4518
82.3k
                        if (ctxt->sax->characters != NULL)
4519
82.3k
                            ctxt->sax->characters(ctxt->userData,
4520
82.3k
                                                  tmp, nbchar);
4521
82.3k
                        if (*ctxt->space == -1)
4522
16.1k
                            *ctxt->space = -2;
4523
82.3k
                    }
4524
663k
                } else if ((ctxt->sax != NULL) &&
4525
663k
                           (ctxt->sax->characters != NULL)) {
4526
663k
                    ctxt->sax->characters(ctxt->userData,
4527
663k
                                          tmp, nbchar);
4528
663k
                }
4529
837k
            }
4530
837k
            return;
4531
837k
        }
4532
4533
101M
get_more:
4534
101M
        ccol = ctxt->input->col;
4535
180M
        while (test_char_data[*in]) {
4536
79.1M
            in++;
4537
79.1M
            ccol++;
4538
79.1M
        }
4539
101M
        ctxt->input->col = ccol;
4540
101M
        if (*in == 0xA) {
4541
2.14M
            do {
4542
2.14M
                ctxt->input->line++; ctxt->input->col = 1;
4543
2.14M
                in++;
4544
2.14M
            } while (*in == 0xA);
4545
745k
            goto get_more;
4546
745k
        }
4547
100M
        if (*in == ']') {
4548
58.5k
            if ((in[1] == ']') && (in[2] == '>')) {
4549
1.89k
                xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4550
1.89k
                ctxt->input->cur = in + 1;
4551
1.89k
                return;
4552
1.89k
            }
4553
56.6k
            in++;
4554
56.6k
            ctxt->input->col++;
4555
56.6k
            goto get_more;
4556
58.5k
        }
4557
100M
        nbchar = in - ctxt->input->cur;
4558
100M
        if (nbchar > 0) {
4559
3.11M
            if ((ctxt->sax != NULL) &&
4560
3.11M
                (ctxt->sax->ignorableWhitespace !=
4561
3.11M
                 ctxt->sax->characters) &&
4562
3.11M
                (IS_BLANK_CH(*ctxt->input->cur))) {
4563
336k
                const xmlChar *tmp = ctxt->input->cur;
4564
336k
                ctxt->input->cur = in;
4565
4566
336k
                if (areBlanks(ctxt, tmp, nbchar, 0)) {
4567
86.2k
                    if (ctxt->sax->ignorableWhitespace != NULL)
4568
86.2k
                        ctxt->sax->ignorableWhitespace(ctxt->userData,
4569
86.2k
                                                       tmp, nbchar);
4570
250k
                } else {
4571
250k
                    if (ctxt->sax->characters != NULL)
4572
250k
                        ctxt->sax->characters(ctxt->userData,
4573
250k
                                              tmp, nbchar);
4574
250k
                    if (*ctxt->space == -1)
4575
37.1k
                        *ctxt->space = -2;
4576
250k
                }
4577
336k
                line = ctxt->input->line;
4578
336k
                col = ctxt->input->col;
4579
2.77M
            } else if (ctxt->sax != NULL) {
4580
2.77M
                if (ctxt->sax->characters != NULL)
4581
2.77M
                    ctxt->sax->characters(ctxt->userData,
4582
2.77M
                                          ctxt->input->cur, nbchar);
4583
2.77M
                line = ctxt->input->line;
4584
2.77M
                col = ctxt->input->col;
4585
2.77M
            }
4586
3.11M
        }
4587
100M
        ctxt->input->cur = in;
4588
100M
        if (*in == 0xD) {
4589
1.93M
            in++;
4590
1.93M
            if (*in == 0xA) {
4591
1.92M
                ctxt->input->cur = in;
4592
1.92M
                in++;
4593
1.92M
                ctxt->input->line++; ctxt->input->col = 1;
4594
1.92M
                continue; /* while */
4595
1.92M
            }
4596
10.5k
            in--;
4597
10.5k
        }
4598
98.2M
        if (*in == '<') {
4599
2.51M
            return;
4600
2.51M
        }
4601
95.7M
        if (*in == '&') {
4602
141k
            return;
4603
141k
        }
4604
95.6M
        SHRINK;
4605
95.6M
        GROW;
4606
95.6M
        if (ctxt->instate == XML_PARSER_EOF)
4607
0
            return;
4608
95.6M
        in = ctxt->input->cur;
4609
97.5M
    } while (((*in >= 0x20) && (*in <= 0x7F)) ||
4610
97.5M
             (*in == 0x09) || (*in == 0x0a));
4611
95.6M
    ctxt->input->line = line;
4612
95.6M
    ctxt->input->col = col;
4613
95.6M
    xmlParseCharDataComplex(ctxt);
4614
95.6M
}
4615
4616
/**
4617
 * xmlParseCharDataComplex:
4618
 * @ctxt:  an XML parser context
4619
 * @cdata:  int indicating whether we are within a CDATA section
4620
 *
4621
 * Always makes progress if the first char isn't '<' or '&'.
4622
 *
4623
 * parse a CharData section.this is the fallback function
4624
 * of xmlParseCharData() when the parsing requires handling
4625
 * of non-ASCII characters.
4626
 */
4627
static void
4628
95.6M
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt) {
4629
95.6M
    xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4630
95.6M
    int nbchar = 0;
4631
95.6M
    int cur, l;
4632
95.6M
    int count = 0;
4633
4634
95.6M
    SHRINK;
4635
95.6M
    GROW;
4636
95.6M
    cur = CUR_CHAR(l);
4637
138M
    while ((cur != '<') && /* checked */
4638
138M
           (cur != '&') &&
4639
138M
     (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4640
42.3M
  if ((cur == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4641
1.43k
      xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4642
1.43k
  }
4643
42.3M
  COPY_BUF(l,buf,nbchar,cur);
4644
  /* move current position before possible calling of ctxt->sax->characters */
4645
42.3M
  NEXTL(l);
4646
42.3M
  cur = CUR_CHAR(l);
4647
42.3M
  if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4648
120k
      buf[nbchar] = 0;
4649
4650
      /*
4651
       * OK the segment is to be consumed as chars.
4652
       */
4653
120k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4654
115k
    if (areBlanks(ctxt, buf, nbchar, 0)) {
4655
28
        if (ctxt->sax->ignorableWhitespace != NULL)
4656
28
      ctxt->sax->ignorableWhitespace(ctxt->userData,
4657
28
                                     buf, nbchar);
4658
115k
    } else {
4659
115k
        if (ctxt->sax->characters != NULL)
4660
115k
      ctxt->sax->characters(ctxt->userData, buf, nbchar);
4661
115k
        if ((ctxt->sax->characters !=
4662
115k
             ctxt->sax->ignorableWhitespace) &&
4663
115k
      (*ctxt->space == -1))
4664
2.94k
      *ctxt->space = -2;
4665
115k
    }
4666
115k
      }
4667
120k
      nbchar = 0;
4668
            /* something really bad happened in the SAX callback */
4669
120k
            if (ctxt->instate != XML_PARSER_CONTENT)
4670
0
                return;
4671
120k
  }
4672
42.3M
  count++;
4673
42.3M
  if (count > 50) {
4674
745k
      SHRINK;
4675
745k
      GROW;
4676
745k
      count = 0;
4677
745k
            if (ctxt->instate == XML_PARSER_EOF)
4678
0
    return;
4679
745k
  }
4680
42.3M
    }
4681
95.6M
    if (nbchar != 0) {
4682
362k
        buf[nbchar] = 0;
4683
  /*
4684
   * OK the segment is to be consumed as chars.
4685
   */
4686
362k
  if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4687
313k
      if (areBlanks(ctxt, buf, nbchar, 0)) {
4688
686
    if (ctxt->sax->ignorableWhitespace != NULL)
4689
686
        ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4690
313k
      } else {
4691
313k
    if (ctxt->sax->characters != NULL)
4692
313k
        ctxt->sax->characters(ctxt->userData, buf, nbchar);
4693
313k
    if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4694
313k
        (*ctxt->space == -1))
4695
54.2k
        *ctxt->space = -2;
4696
313k
      }
4697
313k
  }
4698
362k
    }
4699
95.6M
    if ((ctxt->input->cur < ctxt->input->end) && (!IS_CHAR(cur))) {
4700
  /* Generate the error and skip the offending character */
4701
95.4M
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4702
95.4M
                          "PCDATA invalid Char value %d\n",
4703
95.4M
                    cur ? cur : CUR);
4704
95.4M
  NEXT;
4705
95.4M
    }
4706
95.6M
}
4707
4708
/**
4709
 * xmlParseExternalID:
4710
 * @ctxt:  an XML parser context
4711
 * @publicID:  a xmlChar** receiving PubidLiteral
4712
 * @strict: indicate whether we should restrict parsing to only
4713
 *          production [75], see NOTE below
4714
 *
4715
 * DEPRECATED: Internal function, don't use.
4716
 *
4717
 * Parse an External ID or a Public ID
4718
 *
4719
 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4720
 *       'PUBLIC' S PubidLiteral S SystemLiteral
4721
 *
4722
 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4723
 *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
4724
 *
4725
 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4726
 *
4727
 * Returns the function returns SystemLiteral and in the second
4728
 *                case publicID receives PubidLiteral, is strict is off
4729
 *                it is possible to return NULL and have publicID set.
4730
 */
4731
4732
xmlChar *
4733
100k
xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4734
100k
    xmlChar *URI = NULL;
4735
4736
100k
    SHRINK;
4737
4738
100k
    *publicID = NULL;
4739
100k
    if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4740
37.5k
        SKIP(6);
4741
37.5k
  if (SKIP_BLANKS == 0) {
4742
121
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4743
121
                     "Space required after 'SYSTEM'\n");
4744
121
  }
4745
37.5k
  URI = xmlParseSystemLiteral(ctxt);
4746
37.5k
  if (URI == NULL) {
4747
132
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4748
132
        }
4749
62.4k
    } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4750
11.3k
        SKIP(6);
4751
11.3k
  if (SKIP_BLANKS == 0) {
4752
112
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4753
112
        "Space required after 'PUBLIC'\n");
4754
112
  }
4755
11.3k
  *publicID = xmlParsePubidLiteral(ctxt);
4756
11.3k
  if (*publicID == NULL) {
4757
172
      xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4758
172
  }
4759
11.3k
  if (strict) {
4760
      /*
4761
       * We don't handle [83] so "S SystemLiteral" is required.
4762
       */
4763
11.1k
      if (SKIP_BLANKS == 0) {
4764
1.23k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4765
1.23k
      "Space required after the Public Identifier\n");
4766
1.23k
      }
4767
11.1k
  } else {
4768
      /*
4769
       * We handle [83] so we return immediately, if
4770
       * "S SystemLiteral" is not detected. We skip blanks if no
4771
             * system literal was found, but this is harmless since we must
4772
             * be at the end of a NotationDecl.
4773
       */
4774
187
      if (SKIP_BLANKS == 0) return(NULL);
4775
6
      if ((CUR != '\'') && (CUR != '"')) return(NULL);
4776
6
  }
4777
11.1k
  URI = xmlParseSystemLiteral(ctxt);
4778
11.1k
  if (URI == NULL) {
4779
1.28k
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4780
1.28k
        }
4781
11.1k
    }
4782
99.8k
    return(URI);
4783
100k
}
4784
4785
/**
4786
 * xmlParseCommentComplex:
4787
 * @ctxt:  an XML parser context
4788
 * @buf:  the already parsed part of the buffer
4789
 * @len:  number of bytes in the buffer
4790
 * @size:  allocated size of the buffer
4791
 *
4792
 * Skip an XML (SGML) comment <!-- .... -->
4793
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4794
 *  must not occur within comments. "
4795
 * This is the slow routine in case the accelerator for ascii didn't work
4796
 *
4797
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4798
 */
4799
static void
4800
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4801
13.9k
                       size_t len, size_t size) {
4802
13.9k
    int q, ql;
4803
13.9k
    int r, rl;
4804
13.9k
    int cur, l;
4805
13.9k
    size_t count = 0;
4806
13.9k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4807
1.57k
                       XML_MAX_HUGE_LENGTH :
4808
13.9k
                       XML_MAX_TEXT_LENGTH;
4809
13.9k
    int inputid;
4810
4811
13.9k
    inputid = ctxt->input->id;
4812
4813
13.9k
    if (buf == NULL) {
4814
828
        len = 0;
4815
828
  size = XML_PARSER_BUFFER_SIZE;
4816
828
  buf = (xmlChar *) xmlMallocAtomic(size);
4817
828
  if (buf == NULL) {
4818
0
      xmlErrMemory(ctxt, NULL);
4819
0
      return;
4820
0
  }
4821
828
    }
4822
13.9k
    GROW; /* Assure there's enough input data */
4823
13.9k
    q = CUR_CHAR(ql);
4824
13.9k
    if (q == 0)
4825
3.52k
        goto not_terminated;
4826
10.4k
    if (!IS_CHAR(q)) {
4827
1.39k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4828
1.39k
                          "xmlParseComment: invalid xmlChar value %d\n",
4829
1.39k
                    q);
4830
1.39k
  xmlFree (buf);
4831
1.39k
  return;
4832
1.39k
    }
4833
9.04k
    NEXTL(ql);
4834
9.04k
    r = CUR_CHAR(rl);
4835
9.04k
    if (r == 0)
4836
528
        goto not_terminated;
4837
8.51k
    if (!IS_CHAR(r)) {
4838
415
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4839
415
                          "xmlParseComment: invalid xmlChar value %d\n",
4840
415
                    r);
4841
415
  xmlFree (buf);
4842
415
  return;
4843
415
    }
4844
8.10k
    NEXTL(rl);
4845
8.10k
    cur = CUR_CHAR(l);
4846
8.10k
    if (cur == 0)
4847
458
        goto not_terminated;
4848
4.25M
    while (IS_CHAR(cur) && /* checked */
4849
4.25M
           ((cur != '>') ||
4850
4.25M
      (r != '-') || (q != '-'))) {
4851
4.24M
  if ((r == '-') && (q == '-')) {
4852
6.69k
      xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4853
6.69k
  }
4854
4.24M
  if (len + 5 >= size) {
4855
4.00k
      xmlChar *new_buf;
4856
4.00k
            size_t new_size;
4857
4858
4.00k
      new_size = size * 2;
4859
4.00k
      new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4860
4.00k
      if (new_buf == NULL) {
4861
0
    xmlFree (buf);
4862
0
    xmlErrMemory(ctxt, NULL);
4863
0
    return;
4864
0
      }
4865
4.00k
      buf = new_buf;
4866
4.00k
            size = new_size;
4867
4.00k
  }
4868
4.24M
  COPY_BUF(ql,buf,len,q);
4869
4.24M
  q = r;
4870
4.24M
  ql = rl;
4871
4.24M
  r = cur;
4872
4.24M
  rl = l;
4873
4874
4.24M
  count++;
4875
4.24M
  if (count > 50) {
4876
80.8k
      SHRINK;
4877
80.8k
      GROW;
4878
80.8k
      count = 0;
4879
80.8k
            if (ctxt->instate == XML_PARSER_EOF) {
4880
0
    xmlFree(buf);
4881
0
    return;
4882
0
            }
4883
80.8k
  }
4884
4.24M
  NEXTL(l);
4885
4.24M
  cur = CUR_CHAR(l);
4886
4.24M
  if (cur == 0) {
4887
1.66k
      SHRINK;
4888
1.66k
      GROW;
4889
1.66k
      cur = CUR_CHAR(l);
4890
1.66k
  }
4891
4892
4.24M
        if (len > maxLength) {
4893
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4894
0
                         "Comment too big found", NULL);
4895
0
            xmlFree (buf);
4896
0
            return;
4897
0
        }
4898
4.24M
    }
4899
7.64k
    buf[len] = 0;
4900
7.64k
    if (cur == 0) {
4901
1.66k
  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4902
1.66k
                       "Comment not terminated \n<!--%.50s\n", buf);
4903
5.98k
    } else if (!IS_CHAR(cur)) {
4904
764
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4905
764
                          "xmlParseComment: invalid xmlChar value %d\n",
4906
764
                    cur);
4907
5.21k
    } else {
4908
5.21k
  if (inputid != ctxt->input->id) {
4909
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4910
0
               "Comment doesn't start and stop in the same"
4911
0
                           " entity\n");
4912
0
  }
4913
5.21k
        NEXT;
4914
5.21k
  if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4915
5.21k
      (!ctxt->disableSAX))
4916
4.09k
      ctxt->sax->comment(ctxt->userData, buf);
4917
5.21k
    }
4918
7.64k
    xmlFree(buf);
4919
7.64k
    return;
4920
4.51k
not_terminated:
4921
4.51k
    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4922
4.51k
       "Comment not terminated\n", NULL);
4923
4.51k
    xmlFree(buf);
4924
4.51k
    return;
4925
7.64k
}
4926
4927
/**
4928
 * xmlParseComment:
4929
 * @ctxt:  an XML parser context
4930
 *
4931
 * DEPRECATED: Internal function, don't use.
4932
 *
4933
 * Parse an XML (SGML) comment. Always consumes '<!'.
4934
 *
4935
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4936
 *  must not occur within comments. "
4937
 *
4938
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4939
 */
4940
void
4941
9.65M
xmlParseComment(xmlParserCtxtPtr ctxt) {
4942
9.65M
    xmlChar *buf = NULL;
4943
9.65M
    size_t size = XML_PARSER_BUFFER_SIZE;
4944
9.65M
    size_t len = 0;
4945
9.65M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4946
6.42k
                       XML_MAX_HUGE_LENGTH :
4947
9.65M
                       XML_MAX_TEXT_LENGTH;
4948
9.65M
    xmlParserInputState state;
4949
9.65M
    const xmlChar *in;
4950
9.65M
    size_t nbchar = 0;
4951
9.65M
    int ccol;
4952
9.65M
    int inputid;
4953
4954
    /*
4955
     * Check that there is a comment right here.
4956
     */
4957
9.65M
    if ((RAW != '<') || (NXT(1) != '!'))
4958
0
        return;
4959
9.65M
    SKIP(2);
4960
9.65M
    if ((RAW != '-') || (NXT(1) != '-'))
4961
85
        return;
4962
9.65M
    state = ctxt->instate;
4963
9.65M
    ctxt->instate = XML_PARSER_COMMENT;
4964
9.65M
    inputid = ctxt->input->id;
4965
9.65M
    SKIP(2);
4966
9.65M
    SHRINK;
4967
9.65M
    GROW;
4968
4969
    /*
4970
     * Accelerated common case where input don't need to be
4971
     * modified before passing it to the handler.
4972
     */
4973
9.65M
    in = ctxt->input->cur;
4974
9.65M
    do {
4975
9.65M
  if (*in == 0xA) {
4976
15.3k
      do {
4977
15.3k
    ctxt->input->line++; ctxt->input->col = 1;
4978
15.3k
    in++;
4979
15.3k
      } while (*in == 0xA);
4980
14.0k
  }
4981
10.3M
get_more:
4982
10.3M
        ccol = ctxt->input->col;
4983
39.3M
  while (((*in > '-') && (*in <= 0x7F)) ||
4984
39.3M
         ((*in >= 0x20) && (*in < '-')) ||
4985
39.3M
         (*in == 0x09)) {
4986
28.9M
        in++;
4987
28.9M
        ccol++;
4988
28.9M
  }
4989
10.3M
  ctxt->input->col = ccol;
4990
10.3M
  if (*in == 0xA) {
4991
201k
      do {
4992
201k
    ctxt->input->line++; ctxt->input->col = 1;
4993
201k
    in++;
4994
201k
      } while (*in == 0xA);
4995
177k
      goto get_more;
4996
177k
  }
4997
10.2M
  nbchar = in - ctxt->input->cur;
4998
  /*
4999
   * save current set of data
5000
   */
5001
10.2M
  if (nbchar > 0) {
5002
679k
      if ((ctxt->sax != NULL) &&
5003
679k
    (ctxt->sax->comment != NULL)) {
5004
679k
    if (buf == NULL) {
5005
133k
        if ((*in == '-') && (in[1] == '-'))
5006
64.3k
            size = nbchar + 1;
5007
69.0k
        else
5008
69.0k
            size = XML_PARSER_BUFFER_SIZE + nbchar;
5009
133k
        buf = (xmlChar *) xmlMallocAtomic(size);
5010
133k
        if (buf == NULL) {
5011
0
            xmlErrMemory(ctxt, NULL);
5012
0
      ctxt->instate = state;
5013
0
      return;
5014
0
        }
5015
133k
        len = 0;
5016
546k
    } else if (len + nbchar + 1 >= size) {
5017
68.4k
        xmlChar *new_buf;
5018
68.4k
        size  += len + nbchar + XML_PARSER_BUFFER_SIZE;
5019
68.4k
        new_buf = (xmlChar *) xmlRealloc(buf, size);
5020
68.4k
        if (new_buf == NULL) {
5021
0
            xmlFree (buf);
5022
0
      xmlErrMemory(ctxt, NULL);
5023
0
      ctxt->instate = state;
5024
0
      return;
5025
0
        }
5026
68.4k
        buf = new_buf;
5027
68.4k
    }
5028
679k
    memcpy(&buf[len], ctxt->input->cur, nbchar);
5029
679k
    len += nbchar;
5030
679k
    buf[len] = 0;
5031
679k
      }
5032
679k
  }
5033
10.2M
        if (len > maxLength) {
5034
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5035
0
                         "Comment too big found", NULL);
5036
0
            xmlFree (buf);
5037
0
            return;
5038
0
        }
5039
10.2M
  ctxt->input->cur = in;
5040
10.2M
  if (*in == 0xA) {
5041
0
      in++;
5042
0
      ctxt->input->line++; ctxt->input->col = 1;
5043
0
  }
5044
10.2M
  if (*in == 0xD) {
5045
294k
      in++;
5046
294k
      if (*in == 0xA) {
5047
294k
    ctxt->input->cur = in;
5048
294k
    in++;
5049
294k
    ctxt->input->line++; ctxt->input->col = 1;
5050
294k
    goto get_more;
5051
294k
      }
5052
539
      in--;
5053
539
  }
5054
9.91M
  SHRINK;
5055
9.91M
  GROW;
5056
9.91M
        if (ctxt->instate == XML_PARSER_EOF) {
5057
0
            xmlFree(buf);
5058
0
            return;
5059
0
        }
5060
9.91M
  in = ctxt->input->cur;
5061
9.91M
  if (*in == '-') {
5062
9.90M
      if (in[1] == '-') {
5063
9.76M
          if (in[2] == '>') {
5064
9.64M
        if (ctxt->input->id != inputid) {
5065
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5066
0
                     "comment doesn't start and stop in the"
5067
0
                                       " same entity\n");
5068
0
        }
5069
9.64M
        SKIP(3);
5070
9.64M
        if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5071
9.64M
            (!ctxt->disableSAX)) {
5072
9.63M
      if (buf != NULL)
5073
112k
          ctxt->sax->comment(ctxt->userData, buf);
5074
9.52M
      else
5075
9.52M
          ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5076
9.63M
        }
5077
9.64M
        if (buf != NULL)
5078
120k
            xmlFree(buf);
5079
9.64M
        if (ctxt->instate != XML_PARSER_EOF)
5080
9.64M
      ctxt->instate = state;
5081
9.64M
        return;
5082
9.64M
    }
5083
120k
    if (buf != NULL) {
5084
120k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5085
120k
                          "Double hyphen within comment: "
5086
120k
                                      "<!--%.50s\n",
5087
120k
              buf);
5088
120k
    } else
5089
269
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5090
269
                          "Double hyphen within comment\n", NULL);
5091
120k
                if (ctxt->instate == XML_PARSER_EOF) {
5092
0
                    xmlFree(buf);
5093
0
                    return;
5094
0
                }
5095
120k
    in++;
5096
120k
    ctxt->input->col++;
5097
120k
      }
5098
256k
      in++;
5099
256k
      ctxt->input->col++;
5100
256k
      goto get_more;
5101
9.90M
  }
5102
9.91M
    } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5103
13.9k
    xmlParseCommentComplex(ctxt, buf, len, size);
5104
13.9k
    ctxt->instate = state;
5105
13.9k
    return;
5106
9.65M
}
5107
5108
5109
/**
5110
 * xmlParsePITarget:
5111
 * @ctxt:  an XML parser context
5112
 *
5113
 * DEPRECATED: Internal function, don't use.
5114
 *
5115
 * parse the name of a PI
5116
 *
5117
 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5118
 *
5119
 * Returns the PITarget name or NULL
5120
 */
5121
5122
const xmlChar *
5123
33.0k
xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5124
33.0k
    const xmlChar *name;
5125
5126
33.0k
    name = xmlParseName(ctxt);
5127
33.0k
    if ((name != NULL) &&
5128
33.0k
        ((name[0] == 'x') || (name[0] == 'X')) &&
5129
33.0k
        ((name[1] == 'm') || (name[1] == 'M')) &&
5130
33.0k
        ((name[2] == 'l') || (name[2] == 'L'))) {
5131
5.76k
  int i;
5132
5.76k
  if ((name[0] == 'x') && (name[1] == 'm') &&
5133
5.76k
      (name[2] == 'l') && (name[3] == 0)) {
5134
1.97k
      xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5135
1.97k
     "XML declaration allowed only at the start of the document\n");
5136
1.97k
      return(name);
5137
3.78k
  } else if (name[3] == 0) {
5138
198
      xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5139
198
      return(name);
5140
198
  }
5141
9.27k
  for (i = 0;;i++) {
5142
9.27k
      if (xmlW3CPIs[i] == NULL) break;
5143
6.44k
      if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5144
757
          return(name);
5145
6.44k
  }
5146
2.83k
  xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5147
2.83k
          "xmlParsePITarget: invalid name prefix 'xml'\n",
5148
2.83k
          NULL, NULL);
5149
2.83k
    }
5150
30.1k
    if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5151
431
  xmlNsErr(ctxt, XML_NS_ERR_COLON,
5152
431
     "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5153
431
    }
5154
30.1k
    return(name);
5155
33.0k
}
5156
5157
#ifdef LIBXML_CATALOG_ENABLED
5158
/**
5159
 * xmlParseCatalogPI:
5160
 * @ctxt:  an XML parser context
5161
 * @catalog:  the PI value string
5162
 *
5163
 * parse an XML Catalog Processing Instruction.
5164
 *
5165
 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5166
 *
5167
 * Occurs only if allowed by the user and if happening in the Misc
5168
 * part of the document before any doctype information
5169
 * This will add the given catalog to the parsing context in order
5170
 * to be used if there is a resolution need further down in the document
5171
 */
5172
5173
static void
5174
72
xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5175
72
    xmlChar *URL = NULL;
5176
72
    const xmlChar *tmp, *base;
5177
72
    xmlChar marker;
5178
5179
72
    tmp = catalog;
5180
72
    while (IS_BLANK_CH(*tmp)) tmp++;
5181
72
    if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5182
72
  goto error;
5183
0
    tmp += 7;
5184
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5185
0
    if (*tmp != '=') {
5186
0
  return;
5187
0
    }
5188
0
    tmp++;
5189
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5190
0
    marker = *tmp;
5191
0
    if ((marker != '\'') && (marker != '"'))
5192
0
  goto error;
5193
0
    tmp++;
5194
0
    base = tmp;
5195
0
    while ((*tmp != 0) && (*tmp != marker)) tmp++;
5196
0
    if (*tmp == 0)
5197
0
  goto error;
5198
0
    URL = xmlStrndup(base, tmp - base);
5199
0
    tmp++;
5200
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5201
0
    if (*tmp != 0)
5202
0
  goto error;
5203
5204
0
    if (URL != NULL) {
5205
0
  ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5206
0
  xmlFree(URL);
5207
0
    }
5208
0
    return;
5209
5210
72
error:
5211
72
    xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5212
72
            "Catalog PI syntax error: %s\n",
5213
72
      catalog, NULL);
5214
72
    if (URL != NULL)
5215
0
  xmlFree(URL);
5216
72
}
5217
#endif
5218
5219
/**
5220
 * xmlParsePI:
5221
 * @ctxt:  an XML parser context
5222
 *
5223
 * DEPRECATED: Internal function, don't use.
5224
 *
5225
 * parse an XML Processing Instruction.
5226
 *
5227
 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5228
 *
5229
 * The processing is transferred to SAX once parsed.
5230
 */
5231
5232
void
5233
33.0k
xmlParsePI(xmlParserCtxtPtr ctxt) {
5234
33.0k
    xmlChar *buf = NULL;
5235
33.0k
    size_t len = 0;
5236
33.0k
    size_t size = XML_PARSER_BUFFER_SIZE;
5237
33.0k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5238
4.94k
                       XML_MAX_HUGE_LENGTH :
5239
33.0k
                       XML_MAX_TEXT_LENGTH;
5240
33.0k
    int cur, l;
5241
33.0k
    const xmlChar *target;
5242
33.0k
    xmlParserInputState state;
5243
33.0k
    int count = 0;
5244
5245
33.0k
    if ((RAW == '<') && (NXT(1) == '?')) {
5246
33.0k
  int inputid = ctxt->input->id;
5247
33.0k
  state = ctxt->instate;
5248
33.0k
        ctxt->instate = XML_PARSER_PI;
5249
  /*
5250
   * this is a Processing Instruction.
5251
   */
5252
33.0k
  SKIP(2);
5253
33.0k
  SHRINK;
5254
5255
  /*
5256
   * Parse the target name and check for special support like
5257
   * namespace.
5258
   */
5259
33.0k
        target = xmlParsePITarget(ctxt);
5260
33.0k
  if (target != NULL) {
5261
31.0k
      if ((RAW == '?') && (NXT(1) == '>')) {
5262
1.17k
    if (inputid != ctxt->input->id) {
5263
0
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5264
0
                             "PI declaration doesn't start and stop in"
5265
0
                                   " the same entity\n");
5266
0
    }
5267
1.17k
    SKIP(2);
5268
5269
    /*
5270
     * SAX: PI detected.
5271
     */
5272
1.17k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5273
1.17k
        (ctxt->sax->processingInstruction != NULL))
5274
1.04k
        ctxt->sax->processingInstruction(ctxt->userData,
5275
1.04k
                                         target, NULL);
5276
1.17k
    if (ctxt->instate != XML_PARSER_EOF)
5277
1.17k
        ctxt->instate = state;
5278
1.17k
    return;
5279
1.17k
      }
5280
29.8k
      buf = (xmlChar *) xmlMallocAtomic(size);
5281
29.8k
      if (buf == NULL) {
5282
0
    xmlErrMemory(ctxt, NULL);
5283
0
    ctxt->instate = state;
5284
0
    return;
5285
0
      }
5286
29.8k
      if (SKIP_BLANKS == 0) {
5287
7.37k
    xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5288
7.37k
        "ParsePI: PI %s space expected\n", target);
5289
7.37k
      }
5290
29.8k
      cur = CUR_CHAR(l);
5291
3.48M
      while (IS_CHAR(cur) && /* checked */
5292
3.48M
       ((cur != '?') || (NXT(1) != '>'))) {
5293
3.45M
    if (len + 5 >= size) {
5294
4.10k
        xmlChar *tmp;
5295
4.10k
                    size_t new_size = size * 2;
5296
4.10k
        tmp = (xmlChar *) xmlRealloc(buf, new_size);
5297
4.10k
        if (tmp == NULL) {
5298
0
      xmlErrMemory(ctxt, NULL);
5299
0
      xmlFree(buf);
5300
0
      ctxt->instate = state;
5301
0
      return;
5302
0
        }
5303
4.10k
        buf = tmp;
5304
4.10k
                    size = new_size;
5305
4.10k
    }
5306
3.45M
    count++;
5307
3.45M
    if (count > 50) {
5308
61.8k
        SHRINK;
5309
61.8k
        GROW;
5310
61.8k
                    if (ctxt->instate == XML_PARSER_EOF) {
5311
0
                        xmlFree(buf);
5312
0
                        return;
5313
0
                    }
5314
61.8k
        count = 0;
5315
61.8k
    }
5316
3.45M
    COPY_BUF(l,buf,len,cur);
5317
3.45M
    NEXTL(l);
5318
3.45M
    cur = CUR_CHAR(l);
5319
3.45M
    if (cur == 0) {
5320
3.09k
        SHRINK;
5321
3.09k
        GROW;
5322
3.09k
        cur = CUR_CHAR(l);
5323
3.09k
    }
5324
3.45M
                if (len > maxLength) {
5325
0
                    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5326
0
                                      "PI %s too big found", target);
5327
0
                    xmlFree(buf);
5328
0
                    ctxt->instate = state;
5329
0
                    return;
5330
0
                }
5331
3.45M
      }
5332
29.8k
      buf[len] = 0;
5333
29.8k
      if (cur != '?') {
5334
6.00k
    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5335
6.00k
          "ParsePI: PI %s never end ...\n", target);
5336
23.8k
      } else {
5337
23.8k
    if (inputid != ctxt->input->id) {
5338
0
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5339
0
                             "PI declaration doesn't start and stop in"
5340
0
                                   " the same entity\n");
5341
0
    }
5342
23.8k
    SKIP(2);
5343
5344
23.8k
#ifdef LIBXML_CATALOG_ENABLED
5345
23.8k
    if (((state == XML_PARSER_MISC) ||
5346
23.8k
               (state == XML_PARSER_START)) &&
5347
23.8k
        (xmlStrEqual(target, XML_CATALOG_PI))) {
5348
72
        xmlCatalogAllow allow = xmlCatalogGetDefaults();
5349
72
        if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5350
72
      (allow == XML_CATA_ALLOW_ALL))
5351
72
      xmlParseCatalogPI(ctxt, buf);
5352
72
    }
5353
23.8k
#endif
5354
5355
5356
    /*
5357
     * SAX: PI detected.
5358
     */
5359
23.8k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5360
23.8k
        (ctxt->sax->processingInstruction != NULL))
5361
21.6k
        ctxt->sax->processingInstruction(ctxt->userData,
5362
21.6k
                                         target, buf);
5363
23.8k
      }
5364
29.8k
      xmlFree(buf);
5365
29.8k
  } else {
5366
1.99k
      xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5367
1.99k
  }
5368
31.8k
  if (ctxt->instate != XML_PARSER_EOF)
5369
31.8k
      ctxt->instate = state;
5370
31.8k
    }
5371
33.0k
}
5372
5373
/**
5374
 * xmlParseNotationDecl:
5375
 * @ctxt:  an XML parser context
5376
 *
5377
 * DEPRECATED: Internal function, don't use.
5378
 *
5379
 * Parse a notation declaration. Always consumes '<!'.
5380
 *
5381
 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>'
5382
 *
5383
 * Hence there is actually 3 choices:
5384
 *     'PUBLIC' S PubidLiteral
5385
 *     'PUBLIC' S PubidLiteral S SystemLiteral
5386
 * and 'SYSTEM' S SystemLiteral
5387
 *
5388
 * See the NOTE on xmlParseExternalID().
5389
 */
5390
5391
void
5392
906
xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5393
906
    const xmlChar *name;
5394
906
    xmlChar *Pubid;
5395
906
    xmlChar *Systemid;
5396
5397
906
    if ((CUR != '<') || (NXT(1) != '!'))
5398
0
        return;
5399
906
    SKIP(2);
5400
5401
906
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5402
662
  int inputid = ctxt->input->id;
5403
662
  SHRINK;
5404
662
  SKIP(8);
5405
662
  if (SKIP_BLANKS == 0) {
5406
87
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5407
87
         "Space required after '<!NOTATION'\n");
5408
87
      return;
5409
87
  }
5410
5411
575
        name = xmlParseName(ctxt);
5412
575
  if (name == NULL) {
5413
33
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5414
33
      return;
5415
33
  }
5416
542
  if (xmlStrchr(name, ':') != NULL) {
5417
22
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5418
22
         "colons are forbidden from notation names '%s'\n",
5419
22
         name, NULL, NULL);
5420
22
  }
5421
542
  if (SKIP_BLANKS == 0) {
5422
60
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5423
60
         "Space required after the NOTATION name'\n");
5424
60
      return;
5425
60
  }
5426
5427
  /*
5428
   * Parse the IDs.
5429
   */
5430
482
  Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5431
482
  SKIP_BLANKS;
5432
5433
482
  if (RAW == '>') {
5434
385
      if (inputid != ctxt->input->id) {
5435
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5436
0
                         "Notation declaration doesn't start and stop"
5437
0
                               " in the same entity\n");
5438
0
      }
5439
385
      NEXT;
5440
385
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5441
385
    (ctxt->sax->notationDecl != NULL))
5442
275
    ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5443
385
  } else {
5444
97
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5445
97
  }
5446
482
  if (Systemid != NULL) xmlFree(Systemid);
5447
482
  if (Pubid != NULL) xmlFree(Pubid);
5448
482
    }
5449
906
}
5450
5451
/**
5452
 * xmlParseEntityDecl:
5453
 * @ctxt:  an XML parser context
5454
 *
5455
 * DEPRECATED: Internal function, don't use.
5456
 *
5457
 * Parse an entity declaration. Always consumes '<!'.
5458
 *
5459
 * [70] EntityDecl ::= GEDecl | PEDecl
5460
 *
5461
 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5462
 *
5463
 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5464
 *
5465
 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5466
 *
5467
 * [74] PEDef ::= EntityValue | ExternalID
5468
 *
5469
 * [76] NDataDecl ::= S 'NDATA' S Name
5470
 *
5471
 * [ VC: Notation Declared ]
5472
 * The Name must match the declared name of a notation.
5473
 */
5474
5475
void
5476
145k
xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5477
145k
    const xmlChar *name = NULL;
5478
145k
    xmlChar *value = NULL;
5479
145k
    xmlChar *URI = NULL, *literal = NULL;
5480
145k
    const xmlChar *ndata = NULL;
5481
145k
    int isParameter = 0;
5482
145k
    xmlChar *orig = NULL;
5483
5484
145k
    if ((CUR != '<') || (NXT(1) != '!'))
5485
0
        return;
5486
145k
    SKIP(2);
5487
5488
    /* GROW; done in the caller */
5489
145k
    if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5490
144k
  int inputid = ctxt->input->id;
5491
144k
  SHRINK;
5492
144k
  SKIP(6);
5493
144k
  if (SKIP_BLANKS == 0) {
5494
854
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5495
854
         "Space required after '<!ENTITY'\n");
5496
854
  }
5497
5498
144k
  if (RAW == '%') {
5499
46.4k
      NEXT;
5500
46.4k
      if (SKIP_BLANKS == 0) {
5501
121
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5502
121
             "Space required after '%%'\n");
5503
121
      }
5504
46.4k
      isParameter = 1;
5505
46.4k
  }
5506
5507
144k
        name = xmlParseName(ctxt);
5508
144k
  if (name == NULL) {
5509
826
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5510
826
                     "xmlParseEntityDecl: no name\n");
5511
826
            return;
5512
826
  }
5513
144k
  if (xmlStrchr(name, ':') != NULL) {
5514
90
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5515
90
         "colons are forbidden from entities names '%s'\n",
5516
90
         name, NULL, NULL);
5517
90
  }
5518
144k
  if (SKIP_BLANKS == 0) {
5519
1.77k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5520
1.77k
         "Space required after the entity name\n");
5521
1.77k
  }
5522
5523
144k
  ctxt->instate = XML_PARSER_ENTITY_DECL;
5524
  /*
5525
   * handle the various case of definitions...
5526
   */
5527
144k
  if (isParameter) {
5528
46.2k
      if ((RAW == '"') || (RAW == '\'')) {
5529
43.0k
          value = xmlParseEntityValue(ctxt, &orig);
5530
43.0k
    if (value) {
5531
41.4k
        if ((ctxt->sax != NULL) &&
5532
41.4k
      (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5533
37.9k
      ctxt->sax->entityDecl(ctxt->userData, name,
5534
37.9k
                        XML_INTERNAL_PARAMETER_ENTITY,
5535
37.9k
            NULL, NULL, value);
5536
41.4k
    }
5537
43.0k
      } else {
5538
3.26k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5539
3.26k
    if ((URI == NULL) && (literal == NULL)) {
5540
365
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5541
365
    }
5542
3.26k
    if (URI) {
5543
2.89k
        xmlURIPtr uri;
5544
5545
2.89k
        uri = xmlParseURI((const char *) URI);
5546
2.89k
        if (uri == NULL) {
5547
133
            xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5548
133
             "Invalid URI: %s\n", URI);
5549
      /*
5550
       * This really ought to be a well formedness error
5551
       * but the XML Core WG decided otherwise c.f. issue
5552
       * E26 of the XML erratas.
5553
       */
5554
2.75k
        } else {
5555
2.75k
      if (uri->fragment != NULL) {
5556
          /*
5557
           * Okay this is foolish to block those but not
5558
           * invalid URIs.
5559
           */
5560
9
          xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5561
2.74k
      } else {
5562
2.74k
          if ((ctxt->sax != NULL) &&
5563
2.74k
        (!ctxt->disableSAX) &&
5564
2.74k
        (ctxt->sax->entityDecl != NULL))
5565
2.63k
        ctxt->sax->entityDecl(ctxt->userData, name,
5566
2.63k
              XML_EXTERNAL_PARAMETER_ENTITY,
5567
2.63k
              literal, URI, NULL);
5568
2.74k
      }
5569
2.75k
      xmlFreeURI(uri);
5570
2.75k
        }
5571
2.89k
    }
5572
3.26k
      }
5573
97.8k
  } else {
5574
97.8k
      if ((RAW == '"') || (RAW == '\'')) {
5575
83.7k
          value = xmlParseEntityValue(ctxt, &orig);
5576
83.7k
    if ((ctxt->sax != NULL) &&
5577
83.7k
        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5578
74.8k
        ctxt->sax->entityDecl(ctxt->userData, name,
5579
74.8k
        XML_INTERNAL_GENERAL_ENTITY,
5580
74.8k
        NULL, NULL, value);
5581
    /*
5582
     * For expat compatibility in SAX mode.
5583
     */
5584
83.7k
    if ((ctxt->myDoc == NULL) ||
5585
83.7k
        (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5586
866
        if (ctxt->myDoc == NULL) {
5587
294
      ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5588
294
      if (ctxt->myDoc == NULL) {
5589
0
          xmlErrMemory(ctxt, "New Doc failed");
5590
0
          return;
5591
0
      }
5592
294
      ctxt->myDoc->properties = XML_DOC_INTERNAL;
5593
294
        }
5594
866
        if (ctxt->myDoc->intSubset == NULL)
5595
294
      ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5596
294
              BAD_CAST "fake", NULL, NULL);
5597
5598
866
        xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5599
866
                    NULL, NULL, value);
5600
866
    }
5601
83.7k
      } else {
5602
14.1k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5603
14.1k
    if ((URI == NULL) && (literal == NULL)) {
5604
2.03k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5605
2.03k
    }
5606
14.1k
    if (URI) {
5607
11.9k
        xmlURIPtr uri;
5608
5609
11.9k
        uri = xmlParseURI((const char *)URI);
5610
11.9k
        if (uri == NULL) {
5611
415
            xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5612
415
             "Invalid URI: %s\n", URI);
5613
      /*
5614
       * This really ought to be a well formedness error
5615
       * but the XML Core WG decided otherwise c.f. issue
5616
       * E26 of the XML erratas.
5617
       */
5618
11.5k
        } else {
5619
11.5k
      if (uri->fragment != NULL) {
5620
          /*
5621
           * Okay this is foolish to block those but not
5622
           * invalid URIs.
5623
           */
5624
91
          xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5625
91
      }
5626
11.5k
      xmlFreeURI(uri);
5627
11.5k
        }
5628
11.9k
    }
5629
14.1k
    if ((RAW != '>') && (SKIP_BLANKS == 0)) {
5630
2.43k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5631
2.43k
           "Space required before 'NDATA'\n");
5632
2.43k
    }
5633
14.1k
    if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5634
1.34k
        SKIP(5);
5635
1.34k
        if (SKIP_BLANKS == 0) {
5636
62
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5637
62
               "Space required after 'NDATA'\n");
5638
62
        }
5639
1.34k
        ndata = xmlParseName(ctxt);
5640
1.34k
        if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5641
1.34k
            (ctxt->sax->unparsedEntityDecl != NULL))
5642
1.22k
      ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5643
1.22k
            literal, URI, ndata);
5644
12.7k
    } else {
5645
12.7k
        if ((ctxt->sax != NULL) &&
5646
12.7k
            (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5647
10.5k
      ctxt->sax->entityDecl(ctxt->userData, name,
5648
10.5k
            XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5649
10.5k
            literal, URI, NULL);
5650
        /*
5651
         * For expat compatibility in SAX mode.
5652
         * assuming the entity replacement was asked for
5653
         */
5654
12.7k
        if ((ctxt->replaceEntities != 0) &&
5655
12.7k
      ((ctxt->myDoc == NULL) ||
5656
9.52k
      (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5657
171
      if (ctxt->myDoc == NULL) {
5658
49
          ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5659
49
          if (ctxt->myDoc == NULL) {
5660
0
              xmlErrMemory(ctxt, "New Doc failed");
5661
0
        return;
5662
0
          }
5663
49
          ctxt->myDoc->properties = XML_DOC_INTERNAL;
5664
49
      }
5665
5666
171
      if (ctxt->myDoc->intSubset == NULL)
5667
49
          ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5668
49
            BAD_CAST "fake", NULL, NULL);
5669
171
      xmlSAX2EntityDecl(ctxt, name,
5670
171
                  XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5671
171
                  literal, URI, NULL);
5672
171
        }
5673
12.7k
    }
5674
14.1k
      }
5675
97.8k
  }
5676
144k
  if (ctxt->instate == XML_PARSER_EOF)
5677
99
      goto done;
5678
143k
  SKIP_BLANKS;
5679
143k
  if (RAW != '>') {
5680
7.11k
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5681
7.11k
              "xmlParseEntityDecl: entity %s not terminated\n", name);
5682
7.11k
      xmlHaltParser(ctxt);
5683
136k
  } else {
5684
136k
      if (inputid != ctxt->input->id) {
5685
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5686
0
                         "Entity declaration doesn't start and stop in"
5687
0
                               " the same entity\n");
5688
0
      }
5689
136k
      NEXT;
5690
136k
  }
5691
143k
  if (orig != NULL) {
5692
      /*
5693
       * Ugly mechanism to save the raw entity value.
5694
       */
5695
120k
      xmlEntityPtr cur = NULL;
5696
5697
120k
      if (isParameter) {
5698
41.8k
          if ((ctxt->sax != NULL) &&
5699
41.8k
        (ctxt->sax->getParameterEntity != NULL))
5700
41.8k
        cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5701
79.0k
      } else {
5702
79.0k
          if ((ctxt->sax != NULL) &&
5703
79.0k
        (ctxt->sax->getEntity != NULL))
5704
79.0k
        cur = ctxt->sax->getEntity(ctxt->userData, name);
5705
79.0k
    if ((cur == NULL) && (ctxt->userData==ctxt)) {
5706
3.84k
        cur = xmlSAX2GetEntity(ctxt, name);
5707
3.84k
    }
5708
79.0k
      }
5709
120k
            if ((cur != NULL) && (cur->orig == NULL)) {
5710
109k
    cur->orig = orig;
5711
109k
                orig = NULL;
5712
109k
      }
5713
120k
  }
5714
5715
144k
done:
5716
144k
  if (value != NULL) xmlFree(value);
5717
144k
  if (URI != NULL) xmlFree(URI);
5718
144k
  if (literal != NULL) xmlFree(literal);
5719
144k
        if (orig != NULL) xmlFree(orig);
5720
144k
    }
5721
145k
}
5722
5723
/**
5724
 * xmlParseDefaultDecl:
5725
 * @ctxt:  an XML parser context
5726
 * @value:  Receive a possible fixed default value for the attribute
5727
 *
5728
 * DEPRECATED: Internal function, don't use.
5729
 *
5730
 * Parse an attribute default declaration
5731
 *
5732
 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5733
 *
5734
 * [ VC: Required Attribute ]
5735
 * if the default declaration is the keyword #REQUIRED, then the
5736
 * attribute must be specified for all elements of the type in the
5737
 * attribute-list declaration.
5738
 *
5739
 * [ VC: Attribute Default Legal ]
5740
 * The declared default value must meet the lexical constraints of
5741
 * the declared attribute type c.f. xmlValidateAttributeDecl()
5742
 *
5743
 * [ VC: Fixed Attribute Default ]
5744
 * if an attribute has a default value declared with the #FIXED
5745
 * keyword, instances of that attribute must match the default value.
5746
 *
5747
 * [ WFC: No < in Attribute Values ]
5748
 * handled in xmlParseAttValue()
5749
 *
5750
 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5751
 *          or XML_ATTRIBUTE_FIXED.
5752
 */
5753
5754
int
5755
261k
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5756
261k
    int val;
5757
261k
    xmlChar *ret;
5758
5759
261k
    *value = NULL;
5760
261k
    if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5761
17.4k
  SKIP(9);
5762
17.4k
  return(XML_ATTRIBUTE_REQUIRED);
5763
17.4k
    }
5764
243k
    if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5765
204k
  SKIP(8);
5766
204k
  return(XML_ATTRIBUTE_IMPLIED);
5767
204k
    }
5768
39.2k
    val = XML_ATTRIBUTE_NONE;
5769
39.2k
    if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5770
16.4k
  SKIP(6);
5771
16.4k
  val = XML_ATTRIBUTE_FIXED;
5772
16.4k
  if (SKIP_BLANKS == 0) {
5773
78
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5774
78
         "Space required after '#FIXED'\n");
5775
78
  }
5776
16.4k
    }
5777
39.2k
    ret = xmlParseAttValue(ctxt);
5778
39.2k
    ctxt->instate = XML_PARSER_DTD;
5779
39.2k
    if (ret == NULL) {
5780
1.32k
  xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5781
1.32k
           "Attribute default value declaration error\n");
5782
1.32k
    } else
5783
37.8k
        *value = ret;
5784
39.2k
    return(val);
5785
243k
}
5786
5787
/**
5788
 * xmlParseNotationType:
5789
 * @ctxt:  an XML parser context
5790
 *
5791
 * DEPRECATED: Internal function, don't use.
5792
 *
5793
 * parse an Notation attribute type.
5794
 *
5795
 * Note: the leading 'NOTATION' S part has already being parsed...
5796
 *
5797
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5798
 *
5799
 * [ VC: Notation Attributes ]
5800
 * Values of this type must match one of the notation names included
5801
 * in the declaration; all notation names in the declaration must be declared.
5802
 *
5803
 * Returns: the notation attribute tree built while parsing
5804
 */
5805
5806
xmlEnumerationPtr
5807
432
xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5808
432
    const xmlChar *name;
5809
432
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5810
5811
432
    if (RAW != '(') {
5812
76
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5813
76
  return(NULL);
5814
76
    }
5815
356
    SHRINK;
5816
374
    do {
5817
374
        NEXT;
5818
374
  SKIP_BLANKS;
5819
374
        name = xmlParseName(ctxt);
5820
374
  if (name == NULL) {
5821
75
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5822
75
         "Name expected in NOTATION declaration\n");
5823
75
            xmlFreeEnumeration(ret);
5824
75
      return(NULL);
5825
75
  }
5826
299
  tmp = ret;
5827
311
  while (tmp != NULL) {
5828
12
      if (xmlStrEqual(name, tmp->name)) {
5829
0
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5830
0
    "standalone: attribute notation value token %s duplicated\n",
5831
0
         name, NULL);
5832
0
    if (!xmlDictOwns(ctxt->dict, name))
5833
0
        xmlFree((xmlChar *) name);
5834
0
    break;
5835
0
      }
5836
12
      tmp = tmp->next;
5837
12
  }
5838
299
  if (tmp == NULL) {
5839
299
      cur = xmlCreateEnumeration(name);
5840
299
      if (cur == NULL) {
5841
0
                xmlFreeEnumeration(ret);
5842
0
                return(NULL);
5843
0
            }
5844
299
      if (last == NULL) ret = last = cur;
5845
12
      else {
5846
12
    last->next = cur;
5847
12
    last = cur;
5848
12
      }
5849
299
  }
5850
299
  SKIP_BLANKS;
5851
299
    } while (RAW == '|');
5852
281
    if (RAW != ')') {
5853
109
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5854
109
        xmlFreeEnumeration(ret);
5855
109
  return(NULL);
5856
109
    }
5857
172
    NEXT;
5858
172
    return(ret);
5859
281
}
5860
5861
/**
5862
 * xmlParseEnumerationType:
5863
 * @ctxt:  an XML parser context
5864
 *
5865
 * DEPRECATED: Internal function, don't use.
5866
 *
5867
 * parse an Enumeration attribute type.
5868
 *
5869
 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5870
 *
5871
 * [ VC: Enumeration ]
5872
 * Values of this type must match one of the Nmtoken tokens in
5873
 * the declaration
5874
 *
5875
 * Returns: the enumeration attribute tree built while parsing
5876
 */
5877
5878
xmlEnumerationPtr
5879
25.8k
xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5880
25.8k
    xmlChar *name;
5881
25.8k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5882
5883
25.8k
    if (RAW != '(') {
5884
1.57k
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5885
1.57k
  return(NULL);
5886
1.57k
    }
5887
24.2k
    SHRINK;
5888
71.0k
    do {
5889
71.0k
        NEXT;
5890
71.0k
  SKIP_BLANKS;
5891
71.0k
        name = xmlParseNmtoken(ctxt);
5892
71.0k
  if (name == NULL) {
5893
157
      xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5894
157
      return(ret);
5895
157
  }
5896
70.9k
  tmp = ret;
5897
179k
  while (tmp != NULL) {
5898
109k
      if (xmlStrEqual(name, tmp->name)) {
5899
788
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5900
788
    "standalone: attribute enumeration value token %s duplicated\n",
5901
788
         name, NULL);
5902
788
    if (!xmlDictOwns(ctxt->dict, name))
5903
788
        xmlFree(name);
5904
788
    break;
5905
788
      }
5906
108k
      tmp = tmp->next;
5907
108k
  }
5908
70.9k
  if (tmp == NULL) {
5909
70.1k
      cur = xmlCreateEnumeration(name);
5910
70.1k
      if (!xmlDictOwns(ctxt->dict, name))
5911
70.1k
    xmlFree(name);
5912
70.1k
      if (cur == NULL) {
5913
0
                xmlFreeEnumeration(ret);
5914
0
                return(NULL);
5915
0
            }
5916
70.1k
      if (last == NULL) ret = last = cur;
5917
45.8k
      else {
5918
45.8k
    last->next = cur;
5919
45.8k
    last = cur;
5920
45.8k
      }
5921
70.1k
  }
5922
70.9k
  SKIP_BLANKS;
5923
70.9k
    } while (RAW == '|');
5924
24.1k
    if (RAW != ')') {
5925
354
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5926
354
  return(ret);
5927
354
    }
5928
23.7k
    NEXT;
5929
23.7k
    return(ret);
5930
24.1k
}
5931
5932
/**
5933
 * xmlParseEnumeratedType:
5934
 * @ctxt:  an XML parser context
5935
 * @tree:  the enumeration tree built while parsing
5936
 *
5937
 * DEPRECATED: Internal function, don't use.
5938
 *
5939
 * parse an Enumerated attribute type.
5940
 *
5941
 * [57] EnumeratedType ::= NotationType | Enumeration
5942
 *
5943
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5944
 *
5945
 *
5946
 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5947
 */
5948
5949
int
5950
26.3k
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5951
26.3k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5952
485
  SKIP(8);
5953
485
  if (SKIP_BLANKS == 0) {
5954
53
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5955
53
         "Space required after 'NOTATION'\n");
5956
53
      return(0);
5957
53
  }
5958
432
  *tree = xmlParseNotationType(ctxt);
5959
432
  if (*tree == NULL) return(0);
5960
172
  return(XML_ATTRIBUTE_NOTATION);
5961
432
    }
5962
25.8k
    *tree = xmlParseEnumerationType(ctxt);
5963
25.8k
    if (*tree == NULL) return(0);
5964
24.2k
    return(XML_ATTRIBUTE_ENUMERATION);
5965
25.8k
}
5966
5967
/**
5968
 * xmlParseAttributeType:
5969
 * @ctxt:  an XML parser context
5970
 * @tree:  the enumeration tree built while parsing
5971
 *
5972
 * DEPRECATED: Internal function, don't use.
5973
 *
5974
 * parse the Attribute list def for an element
5975
 *
5976
 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5977
 *
5978
 * [55] StringType ::= 'CDATA'
5979
 *
5980
 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5981
 *                        'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5982
 *
5983
 * Validity constraints for attribute values syntax are checked in
5984
 * xmlValidateAttributeValue()
5985
 *
5986
 * [ VC: ID ]
5987
 * Values of type ID must match the Name production. A name must not
5988
 * appear more than once in an XML document as a value of this type;
5989
 * i.e., ID values must uniquely identify the elements which bear them.
5990
 *
5991
 * [ VC: One ID per Element Type ]
5992
 * No element type may have more than one ID attribute specified.
5993
 *
5994
 * [ VC: ID Attribute Default ]
5995
 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5996
 *
5997
 * [ VC: IDREF ]
5998
 * Values of type IDREF must match the Name production, and values
5999
 * of type IDREFS must match Names; each IDREF Name must match the value
6000
 * of an ID attribute on some element in the XML document; i.e. IDREF
6001
 * values must match the value of some ID attribute.
6002
 *
6003
 * [ VC: Entity Name ]
6004
 * Values of type ENTITY must match the Name production, values
6005
 * of type ENTITIES must match Names; each Entity Name must match the
6006
 * name of an unparsed entity declared in the DTD.
6007
 *
6008
 * [ VC: Name Token ]
6009
 * Values of type NMTOKEN must match the Nmtoken production; values
6010
 * of type NMTOKENS must match Nmtokens.
6011
 *
6012
 * Returns the attribute type
6013
 */
6014
int
6015
263k
xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6016
263k
    SHRINK;
6017
263k
    if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
6018
106k
  SKIP(5);
6019
106k
  return(XML_ATTRIBUTE_CDATA);
6020
157k
     } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
6021
1.44k
  SKIP(6);
6022
1.44k
  return(XML_ATTRIBUTE_IDREFS);
6023
155k
     } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
6024
5.12k
  SKIP(5);
6025
5.12k
  return(XML_ATTRIBUTE_IDREF);
6026
150k
     } else if ((RAW == 'I') && (NXT(1) == 'D')) {
6027
62.8k
        SKIP(2);
6028
62.8k
  return(XML_ATTRIBUTE_ID);
6029
87.6k
     } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
6030
570
  SKIP(6);
6031
570
  return(XML_ATTRIBUTE_ENTITY);
6032
87.1k
     } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
6033
107
  SKIP(8);
6034
107
  return(XML_ATTRIBUTE_ENTITIES);
6035
87.0k
     } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
6036
17.4k
  SKIP(8);
6037
17.4k
  return(XML_ATTRIBUTE_NMTOKENS);
6038
69.5k
     } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
6039
43.2k
  SKIP(7);
6040
43.2k
  return(XML_ATTRIBUTE_NMTOKEN);
6041
43.2k
     }
6042
26.3k
     return(xmlParseEnumeratedType(ctxt, tree));
6043
263k
}
6044
6045
/**
6046
 * xmlParseAttributeListDecl:
6047
 * @ctxt:  an XML parser context
6048
 *
6049
 * DEPRECATED: Internal function, don't use.
6050
 *
6051
 * Parse an attribute list declaration for an element. Always consumes '<!'.
6052
 *
6053
 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
6054
 *
6055
 * [53] AttDef ::= S Name S AttType S DefaultDecl
6056
 *
6057
 */
6058
void
6059
104k
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
6060
104k
    const xmlChar *elemName;
6061
104k
    const xmlChar *attrName;
6062
104k
    xmlEnumerationPtr tree;
6063
6064
104k
    if ((CUR != '<') || (NXT(1) != '!'))
6065
0
        return;
6066
104k
    SKIP(2);
6067
6068
104k
    if (CMP7(CUR_PTR, 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
6069
103k
  int inputid = ctxt->input->id;
6070
6071
103k
  SKIP(7);
6072
103k
  if (SKIP_BLANKS == 0) {
6073
210
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6074
210
                     "Space required after '<!ATTLIST'\n");
6075
210
  }
6076
103k
        elemName = xmlParseName(ctxt);
6077
103k
  if (elemName == NULL) {
6078
183
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6079
183
         "ATTLIST: no name for Element\n");
6080
183
      return;
6081
183
  }
6082
103k
  SKIP_BLANKS;
6083
103k
  GROW;
6084
361k
  while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
6085
266k
      int type;
6086
266k
      int def;
6087
266k
      xmlChar *defaultValue = NULL;
6088
6089
266k
      GROW;
6090
266k
            tree = NULL;
6091
266k
      attrName = xmlParseName(ctxt);
6092
266k
      if (attrName == NULL) {
6093
1.58k
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6094
1.58k
             "ATTLIST: no name for Attribute\n");
6095
1.58k
    break;
6096
1.58k
      }
6097
265k
      GROW;
6098
265k
      if (SKIP_BLANKS == 0) {
6099
1.07k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6100
1.07k
            "Space required after the attribute name\n");
6101
1.07k
    break;
6102
1.07k
      }
6103
6104
263k
      type = xmlParseAttributeType(ctxt, &tree);
6105
263k
      if (type <= 0) {
6106
1.94k
          break;
6107
1.94k
      }
6108
6109
262k
      GROW;
6110
262k
      if (SKIP_BLANKS == 0) {
6111
980
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6112
980
             "Space required after the attribute type\n");
6113
980
          if (tree != NULL)
6114
525
        xmlFreeEnumeration(tree);
6115
980
    break;
6116
980
      }
6117
6118
261k
      def = xmlParseDefaultDecl(ctxt, &defaultValue);
6119
261k
      if (def <= 0) {
6120
0
                if (defaultValue != NULL)
6121
0
        xmlFree(defaultValue);
6122
0
          if (tree != NULL)
6123
0
        xmlFreeEnumeration(tree);
6124
0
          break;
6125
0
      }
6126
261k
      if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6127
19.1k
          xmlAttrNormalizeSpace(defaultValue, defaultValue);
6128
6129
261k
      GROW;
6130
261k
            if (RAW != '>') {
6131
228k
    if (SKIP_BLANKS == 0) {
6132
2.66k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6133
2.66k
      "Space required after the attribute default value\n");
6134
2.66k
        if (defaultValue != NULL)
6135
1.30k
      xmlFree(defaultValue);
6136
2.66k
        if (tree != NULL)
6137
267
      xmlFreeEnumeration(tree);
6138
2.66k
        break;
6139
2.66k
    }
6140
228k
      }
6141
258k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6142
258k
    (ctxt->sax->attributeDecl != NULL))
6143
241k
    ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6144
241k
                          type, def, defaultValue, tree);
6145
16.5k
      else if (tree != NULL)
6146
1.91k
    xmlFreeEnumeration(tree);
6147
6148
258k
      if ((ctxt->sax2) && (defaultValue != NULL) &&
6149
258k
          (def != XML_ATTRIBUTE_IMPLIED) &&
6150
258k
    (def != XML_ATTRIBUTE_REQUIRED)) {
6151
28.7k
    xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6152
28.7k
      }
6153
258k
      if (ctxt->sax2) {
6154
246k
    xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6155
246k
      }
6156
258k
      if (defaultValue != NULL)
6157
36.5k
          xmlFree(defaultValue);
6158
258k
      GROW;
6159
258k
  }
6160
103k
  if (RAW == '>') {
6161
95.2k
      if (inputid != ctxt->input->id) {
6162
3
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6163
3
                               "Attribute list declaration doesn't start and"
6164
3
                               " stop in the same entity\n");
6165
3
      }
6166
95.2k
      NEXT;
6167
95.2k
  }
6168
103k
    }
6169
104k
}
6170
6171
/**
6172
 * xmlParseElementMixedContentDecl:
6173
 * @ctxt:  an XML parser context
6174
 * @inputchk:  the input used for the current entity, needed for boundary checks
6175
 *
6176
 * DEPRECATED: Internal function, don't use.
6177
 *
6178
 * parse the declaration for a Mixed Element content
6179
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6180
 *
6181
 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6182
 *                '(' S? '#PCDATA' S? ')'
6183
 *
6184
 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6185
 *
6186
 * [ VC: No Duplicate Types ]
6187
 * The same name must not appear more than once in a single
6188
 * mixed-content declaration.
6189
 *
6190
 * returns: the list of the xmlElementContentPtr describing the element choices
6191
 */
6192
xmlElementContentPtr
6193
54.2k
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6194
54.2k
    xmlElementContentPtr ret = NULL, cur = NULL, n;
6195
54.2k
    const xmlChar *elem = NULL;
6196
6197
54.2k
    GROW;
6198
54.2k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6199
54.2k
  SKIP(7);
6200
54.2k
  SKIP_BLANKS;
6201
54.2k
  SHRINK;
6202
54.2k
  if (RAW == ')') {
6203
37.6k
      if (ctxt->input->id != inputchk) {
6204
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6205
0
                               "Element content declaration doesn't start and"
6206
0
                               " stop in the same entity\n");
6207
0
      }
6208
37.6k
      NEXT;
6209
37.6k
      ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6210
37.6k
      if (ret == NULL)
6211
0
          return(NULL);
6212
37.6k
      if (RAW == '*') {
6213
9
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6214
9
    NEXT;
6215
9
      }
6216
37.6k
      return(ret);
6217
37.6k
  }
6218
16.6k
  if ((RAW == '(') || (RAW == '|')) {
6219
16.5k
      ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6220
16.5k
      if (ret == NULL) return(NULL);
6221
16.5k
  }
6222
160k
  while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6223
143k
      NEXT;
6224
143k
      if (elem == NULL) {
6225
16.4k
          ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6226
16.4k
    if (ret == NULL) {
6227
0
        xmlFreeDocElementContent(ctxt->myDoc, cur);
6228
0
                    return(NULL);
6229
0
                }
6230
16.4k
    ret->c1 = cur;
6231
16.4k
    if (cur != NULL)
6232
16.4k
        cur->parent = ret;
6233
16.4k
    cur = ret;
6234
127k
      } else {
6235
127k
          n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6236
127k
    if (n == NULL) {
6237
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6238
0
                    return(NULL);
6239
0
                }
6240
127k
    n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6241
127k
    if (n->c1 != NULL)
6242
127k
        n->c1->parent = n;
6243
127k
          cur->c2 = n;
6244
127k
    if (n != NULL)
6245
127k
        n->parent = cur;
6246
127k
    cur = n;
6247
127k
      }
6248
143k
      SKIP_BLANKS;
6249
143k
      elem = xmlParseName(ctxt);
6250
143k
      if (elem == NULL) {
6251
42
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6252
42
      "xmlParseElementMixedContentDecl : Name expected\n");
6253
42
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6254
42
    return(NULL);
6255
42
      }
6256
143k
      SKIP_BLANKS;
6257
143k
      GROW;
6258
143k
  }
6259
16.6k
  if ((RAW == ')') && (NXT(1) == '*')) {
6260
16.2k
      if (elem != NULL) {
6261
16.2k
    cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6262
16.2k
                                   XML_ELEMENT_CONTENT_ELEMENT);
6263
16.2k
    if (cur->c2 != NULL)
6264
16.2k
        cur->c2->parent = cur;
6265
16.2k
            }
6266
16.2k
            if (ret != NULL)
6267
16.2k
                ret->ocur = XML_ELEMENT_CONTENT_MULT;
6268
16.2k
      if (ctxt->input->id != inputchk) {
6269
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6270
0
                               "Element content declaration doesn't start and"
6271
0
                               " stop in the same entity\n");
6272
0
      }
6273
16.2k
      SKIP(2);
6274
16.2k
  } else {
6275
401
      xmlFreeDocElementContent(ctxt->myDoc, ret);
6276
401
      xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6277
401
      return(NULL);
6278
401
  }
6279
6280
16.6k
    } else {
6281
0
  xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6282
0
    }
6283
16.2k
    return(ret);
6284
54.2k
}
6285
6286
/**
6287
 * xmlParseElementChildrenContentDeclPriv:
6288
 * @ctxt:  an XML parser context
6289
 * @inputchk:  the input used for the current entity, needed for boundary checks
6290
 * @depth: the level of recursion
6291
 *
6292
 * parse the declaration for a Mixed Element content
6293
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6294
 *
6295
 *
6296
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6297
 *
6298
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6299
 *
6300
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6301
 *
6302
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6303
 *
6304
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6305
 * TODO Parameter-entity replacement text must be properly nested
6306
 *  with parenthesized groups. That is to say, if either of the
6307
 *  opening or closing parentheses in a choice, seq, or Mixed
6308
 *  construct is contained in the replacement text for a parameter
6309
 *  entity, both must be contained in the same replacement text. For
6310
 *  interoperability, if a parameter-entity reference appears in a
6311
 *  choice, seq, or Mixed construct, its replacement text should not
6312
 *  be empty, and neither the first nor last non-blank character of
6313
 *  the replacement text should be a connector (| or ,).
6314
 *
6315
 * Returns the tree of xmlElementContentPtr describing the element
6316
 *          hierarchy.
6317
 */
6318
static xmlElementContentPtr
6319
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6320
77.2k
                                       int depth) {
6321
77.2k
    xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6322
77.2k
    const xmlChar *elem;
6323
77.2k
    xmlChar type = 0;
6324
6325
77.2k
    if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6326
77.2k
        (depth >  2048)) {
6327
58
        xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6328
58
"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6329
58
                          depth);
6330
58
  return(NULL);
6331
58
    }
6332
77.2k
    SKIP_BLANKS;
6333
77.2k
    GROW;
6334
77.2k
    if (RAW == '(') {
6335
25.9k
  int inputid = ctxt->input->id;
6336
6337
        /* Recurse on first child */
6338
25.9k
  NEXT;
6339
25.9k
  SKIP_BLANKS;
6340
25.9k
        cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6341
25.9k
                                                           depth + 1);
6342
25.9k
        if (cur == NULL)
6343
23.4k
            return(NULL);
6344
2.49k
  SKIP_BLANKS;
6345
2.49k
  GROW;
6346
51.3k
    } else {
6347
51.3k
  elem = xmlParseName(ctxt);
6348
51.3k
  if (elem == NULL) {
6349
1.04k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6350
1.04k
      return(NULL);
6351
1.04k
  }
6352
50.2k
        cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6353
50.2k
  if (cur == NULL) {
6354
0
      xmlErrMemory(ctxt, NULL);
6355
0
      return(NULL);
6356
0
  }
6357
50.2k
  GROW;
6358
50.2k
  if (RAW == '?') {
6359
4.55k
      cur->ocur = XML_ELEMENT_CONTENT_OPT;
6360
4.55k
      NEXT;
6361
45.7k
  } else if (RAW == '*') {
6362
6.99k
      cur->ocur = XML_ELEMENT_CONTENT_MULT;
6363
6.99k
      NEXT;
6364
38.7k
  } else if (RAW == '+') {
6365
8.87k
      cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6366
8.87k
      NEXT;
6367
29.8k
  } else {
6368
29.8k
      cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6369
29.8k
  }
6370
50.2k
  GROW;
6371
50.2k
    }
6372
52.7k
    SKIP_BLANKS;
6373
52.7k
    SHRINK;
6374
189k
    while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6375
        /*
6376
   * Each loop we parse one separator and one element.
6377
   */
6378
139k
        if (RAW == ',') {
6379
45.4k
      if (type == 0) type = CUR;
6380
6381
      /*
6382
       * Detect "Name | Name , Name" error
6383
       */
6384
27.5k
      else if (type != CUR) {
6385
33
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6386
33
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6387
33
                      type);
6388
33
    if ((last != NULL) && (last != ret))
6389
33
        xmlFreeDocElementContent(ctxt->myDoc, last);
6390
33
    if (ret != NULL)
6391
33
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6392
33
    return(NULL);
6393
33
      }
6394
45.3k
      NEXT;
6395
6396
45.3k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6397
45.3k
      if (op == NULL) {
6398
0
    if ((last != NULL) && (last != ret))
6399
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6400
0
          xmlFreeDocElementContent(ctxt->myDoc, ret);
6401
0
    return(NULL);
6402
0
      }
6403
45.3k
      if (last == NULL) {
6404
17.8k
    op->c1 = ret;
6405
17.8k
    if (ret != NULL)
6406
17.8k
        ret->parent = op;
6407
17.8k
    ret = cur = op;
6408
27.5k
      } else {
6409
27.5k
          cur->c2 = op;
6410
27.5k
    if (op != NULL)
6411
27.5k
        op->parent = cur;
6412
27.5k
    op->c1 = last;
6413
27.5k
    if (last != NULL)
6414
27.5k
        last->parent = op;
6415
27.5k
    cur =op;
6416
27.5k
    last = NULL;
6417
27.5k
      }
6418
93.6k
  } else if (RAW == '|') {
6419
92.3k
      if (type == 0) type = CUR;
6420
6421
      /*
6422
       * Detect "Name , Name | Name" error
6423
       */
6424
78.7k
      else if (type != CUR) {
6425
30
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6426
30
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6427
30
          type);
6428
30
    if ((last != NULL) && (last != ret))
6429
30
        xmlFreeDocElementContent(ctxt->myDoc, last);
6430
30
    if (ret != NULL)
6431
30
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6432
30
    return(NULL);
6433
30
      }
6434
92.2k
      NEXT;
6435
6436
92.2k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6437
92.2k
      if (op == NULL) {
6438
0
    if ((last != NULL) && (last != ret))
6439
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6440
0
    if (ret != NULL)
6441
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6442
0
    return(NULL);
6443
0
      }
6444
92.2k
      if (last == NULL) {
6445
13.5k
    op->c1 = ret;
6446
13.5k
    if (ret != NULL)
6447
13.5k
        ret->parent = op;
6448
13.5k
    ret = cur = op;
6449
78.6k
      } else {
6450
78.6k
          cur->c2 = op;
6451
78.6k
    if (op != NULL)
6452
78.6k
        op->parent = cur;
6453
78.6k
    op->c1 = last;
6454
78.6k
    if (last != NULL)
6455
78.6k
        last->parent = op;
6456
78.6k
    cur =op;
6457
78.6k
    last = NULL;
6458
78.6k
      }
6459
92.2k
  } else {
6460
1.37k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6461
1.37k
      if ((last != NULL) && (last != ret))
6462
492
          xmlFreeDocElementContent(ctxt->myDoc, last);
6463
1.37k
      if (ret != NULL)
6464
1.37k
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6465
1.37k
      return(NULL);
6466
1.37k
  }
6467
137k
  GROW;
6468
137k
  SKIP_BLANKS;
6469
137k
  GROW;
6470
137k
  if (RAW == '(') {
6471
6.25k
      int inputid = ctxt->input->id;
6472
      /* Recurse on second child */
6473
6.25k
      NEXT;
6474
6.25k
      SKIP_BLANKS;
6475
6.25k
      last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6476
6.25k
                                                          depth + 1);
6477
6.25k
            if (last == NULL) {
6478
246
    if (ret != NULL)
6479
246
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6480
246
    return(NULL);
6481
246
            }
6482
6.00k
      SKIP_BLANKS;
6483
131k
  } else {
6484
131k
      elem = xmlParseName(ctxt);
6485
131k
      if (elem == NULL) {
6486
295
    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6487
295
    if (ret != NULL)
6488
295
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6489
295
    return(NULL);
6490
295
      }
6491
131k
      last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6492
131k
      if (last == NULL) {
6493
0
    if (ret != NULL)
6494
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6495
0
    return(NULL);
6496
0
      }
6497
131k
      if (RAW == '?') {
6498
17.9k
    last->ocur = XML_ELEMENT_CONTENT_OPT;
6499
17.9k
    NEXT;
6500
113k
      } else if (RAW == '*') {
6501
10.5k
    last->ocur = XML_ELEMENT_CONTENT_MULT;
6502
10.5k
    NEXT;
6503
102k
      } else if (RAW == '+') {
6504
2.84k
    last->ocur = XML_ELEMENT_CONTENT_PLUS;
6505
2.84k
    NEXT;
6506
99.7k
      } else {
6507
99.7k
    last->ocur = XML_ELEMENT_CONTENT_ONCE;
6508
99.7k
      }
6509
131k
  }
6510
137k
  SKIP_BLANKS;
6511
137k
  GROW;
6512
137k
    }
6513
50.7k
    if ((cur != NULL) && (last != NULL)) {
6514
30.3k
        cur->c2 = last;
6515
30.3k
  if (last != NULL)
6516
30.3k
      last->parent = cur;
6517
30.3k
    }
6518
50.7k
    if (ctxt->input->id != inputchk) {
6519
0
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6520
0
                       "Element content declaration doesn't start and stop in"
6521
0
                       " the same entity\n");
6522
0
    }
6523
50.7k
    NEXT;
6524
50.7k
    if (RAW == '?') {
6525
1.56k
  if (ret != NULL) {
6526
1.56k
      if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6527
1.56k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6528
72
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6529
1.49k
      else
6530
1.49k
          ret->ocur = XML_ELEMENT_CONTENT_OPT;
6531
1.56k
  }
6532
1.56k
  NEXT;
6533
49.2k
    } else if (RAW == '*') {
6534
11.3k
  if (ret != NULL) {
6535
11.3k
      ret->ocur = XML_ELEMENT_CONTENT_MULT;
6536
11.3k
      cur = ret;
6537
      /*
6538
       * Some normalization:
6539
       * (a | b* | c?)* == (a | b | c)*
6540
       */
6541
66.7k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6542
55.3k
    if ((cur->c1 != NULL) &&
6543
55.3k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6544
55.3k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6545
2.55k
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6546
55.3k
    if ((cur->c2 != NULL) &&
6547
55.3k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6548
55.3k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6549
420
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6550
55.3k
    cur = cur->c2;
6551
55.3k
      }
6552
11.3k
  }
6553
11.3k
  NEXT;
6554
37.8k
    } else if (RAW == '+') {
6555
5.90k
  if (ret != NULL) {
6556
5.90k
      int found = 0;
6557
6558
5.90k
      if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6559
5.90k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6560
12
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6561
5.89k
      else
6562
5.89k
          ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6563
      /*
6564
       * Some normalization:
6565
       * (a | b*)+ == (a | b)*
6566
       * (a | b?)+ == (a | b)*
6567
       */
6568
9.73k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6569
3.83k
    if ((cur->c1 != NULL) &&
6570
3.83k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6571
3.83k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6572
0
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6573
0
        found = 1;
6574
0
    }
6575
3.83k
    if ((cur->c2 != NULL) &&
6576
3.83k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6577
3.83k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6578
0
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6579
0
        found = 1;
6580
0
    }
6581
3.83k
    cur = cur->c2;
6582
3.83k
      }
6583
5.90k
      if (found)
6584
0
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6585
5.90k
  }
6586
5.90k
  NEXT;
6587
5.90k
    }
6588
50.7k
    return(ret);
6589
52.7k
}
6590
6591
/**
6592
 * xmlParseElementChildrenContentDecl:
6593
 * @ctxt:  an XML parser context
6594
 * @inputchk:  the input used for the current entity, needed for boundary checks
6595
 *
6596
 * DEPRECATED: Internal function, don't use.
6597
 *
6598
 * parse the declaration for a Mixed Element content
6599
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6600
 *
6601
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6602
 *
6603
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6604
 *
6605
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6606
 *
6607
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6608
 *
6609
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6610
 * TODO Parameter-entity replacement text must be properly nested
6611
 *  with parenthesized groups. That is to say, if either of the
6612
 *  opening or closing parentheses in a choice, seq, or Mixed
6613
 *  construct is contained in the replacement text for a parameter
6614
 *  entity, both must be contained in the same replacement text. For
6615
 *  interoperability, if a parameter-entity reference appears in a
6616
 *  choice, seq, or Mixed construct, its replacement text should not
6617
 *  be empty, and neither the first nor last non-blank character of
6618
 *  the replacement text should be a connector (| or ,).
6619
 *
6620
 * Returns the tree of xmlElementContentPtr describing the element
6621
 *          hierarchy.
6622
 */
6623
xmlElementContentPtr
6624
0
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6625
    /* stub left for API/ABI compat */
6626
0
    return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6627
0
}
6628
6629
/**
6630
 * xmlParseElementContentDecl:
6631
 * @ctxt:  an XML parser context
6632
 * @name:  the name of the element being defined.
6633
 * @result:  the Element Content pointer will be stored here if any
6634
 *
6635
 * DEPRECATED: Internal function, don't use.
6636
 *
6637
 * parse the declaration for an Element content either Mixed or Children,
6638
 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6639
 *
6640
 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6641
 *
6642
 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6643
 */
6644
6645
int
6646
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6647
99.4k
                           xmlElementContentPtr *result) {
6648
6649
99.4k
    xmlElementContentPtr tree = NULL;
6650
99.4k
    int inputid = ctxt->input->id;
6651
99.4k
    int res;
6652
6653
99.4k
    *result = NULL;
6654
6655
99.4k
    if (RAW != '(') {
6656
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6657
0
    "xmlParseElementContentDecl : %s '(' expected\n", name);
6658
0
  return(-1);
6659
0
    }
6660
99.4k
    NEXT;
6661
99.4k
    GROW;
6662
99.4k
    if (ctxt->instate == XML_PARSER_EOF)
6663
0
        return(-1);
6664
99.4k
    SKIP_BLANKS;
6665
99.4k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6666
54.2k
        tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6667
54.2k
  res = XML_ELEMENT_TYPE_MIXED;
6668
54.2k
    } else {
6669
45.1k
        tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6670
45.1k
  res = XML_ELEMENT_TYPE_ELEMENT;
6671
45.1k
    }
6672
99.4k
    SKIP_BLANKS;
6673
99.4k
    *result = tree;
6674
99.4k
    return(res);
6675
99.4k
}
6676
6677
/**
6678
 * xmlParseElementDecl:
6679
 * @ctxt:  an XML parser context
6680
 *
6681
 * DEPRECATED: Internal function, don't use.
6682
 *
6683
 * Parse an element declaration. Always consumes '<!'.
6684
 *
6685
 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6686
 *
6687
 * [ VC: Unique Element Type Declaration ]
6688
 * No element type may be declared more than once
6689
 *
6690
 * Returns the type of the element, or -1 in case of error
6691
 */
6692
int
6693
136k
xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6694
136k
    const xmlChar *name;
6695
136k
    int ret = -1;
6696
136k
    xmlElementContentPtr content  = NULL;
6697
6698
136k
    if ((CUR != '<') || (NXT(1) != '!'))
6699
0
        return(ret);
6700
136k
    SKIP(2);
6701
6702
    /* GROW; done in the caller */
6703
136k
    if (CMP7(CUR_PTR, 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6704
135k
  int inputid = ctxt->input->id;
6705
6706
135k
  SKIP(7);
6707
135k
  if (SKIP_BLANKS == 0) {
6708
299
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6709
299
               "Space required after 'ELEMENT'\n");
6710
299
      return(-1);
6711
299
  }
6712
135k
        name = xmlParseName(ctxt);
6713
135k
  if (name == NULL) {
6714
269
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6715
269
         "xmlParseElementDecl: no name for Element\n");
6716
269
      return(-1);
6717
269
  }
6718
134k
  if (SKIP_BLANKS == 0) {
6719
870
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6720
870
         "Space required after the element name\n");
6721
870
  }
6722
134k
  if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6723
33.2k
      SKIP(5);
6724
      /*
6725
       * Element must always be empty.
6726
       */
6727
33.2k
      ret = XML_ELEMENT_TYPE_EMPTY;
6728
101k
  } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6729
101k
             (NXT(2) == 'Y')) {
6730
944
      SKIP(3);
6731
      /*
6732
       * Element is a generic container.
6733
       */
6734
944
      ret = XML_ELEMENT_TYPE_ANY;
6735
100k
  } else if (RAW == '(') {
6736
99.4k
      ret = xmlParseElementContentDecl(ctxt, name, &content);
6737
99.4k
  } else {
6738
      /*
6739
       * [ WFC: PEs in Internal Subset ] error handling.
6740
       */
6741
1.41k
      if ((RAW == '%') && (ctxt->external == 0) &&
6742
1.41k
          (ctxt->inputNr == 1)) {
6743
48
    xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6744
48
    "PEReference: forbidden within markup decl in internal subset\n");
6745
1.36k
      } else {
6746
1.36k
    xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6747
1.36k
          "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6748
1.36k
            }
6749
1.41k
      return(-1);
6750
1.41k
  }
6751
6752
133k
  SKIP_BLANKS;
6753
6754
133k
  if (RAW != '>') {
6755
3.50k
      xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6756
3.50k
      if (content != NULL) {
6757
335
    xmlFreeDocElementContent(ctxt->myDoc, content);
6758
335
      }
6759
130k
  } else {
6760
130k
      if (inputid != ctxt->input->id) {
6761
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6762
0
                               "Element declaration doesn't start and stop in"
6763
0
                               " the same entity\n");
6764
0
      }
6765
6766
130k
      NEXT;
6767
130k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6768
130k
    (ctxt->sax->elementDecl != NULL)) {
6769
121k
    if (content != NULL)
6770
89.6k
        content->parent = NULL;
6771
121k
          ctxt->sax->elementDecl(ctxt->userData, name, ret,
6772
121k
                           content);
6773
121k
    if ((content != NULL) && (content->parent == NULL)) {
6774
        /*
6775
         * this is a trick: if xmlAddElementDecl is called,
6776
         * instead of copying the full tree it is plugged directly
6777
         * if called from the parser. Avoid duplicating the
6778
         * interfaces or change the API/ABI
6779
         */
6780
366
        xmlFreeDocElementContent(ctxt->myDoc, content);
6781
366
    }
6782
121k
      } else if (content != NULL) {
6783
6.17k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6784
6.17k
      }
6785
130k
  }
6786
133k
    }
6787
134k
    return(ret);
6788
136k
}
6789
6790
/**
6791
 * xmlParseConditionalSections
6792
 * @ctxt:  an XML parser context
6793
 *
6794
 * Parse a conditional section. Always consumes '<!['.
6795
 *
6796
 * [61] conditionalSect ::= includeSect | ignoreSect
6797
 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6798
 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6799
 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6800
 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6801
 */
6802
6803
static void
6804
1.99k
xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6805
1.99k
    int *inputIds = NULL;
6806
1.99k
    size_t inputIdsSize = 0;
6807
1.99k
    size_t depth = 0;
6808
6809
11.5k
    while (ctxt->instate != XML_PARSER_EOF) {
6810
11.5k
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6811
6.62k
            int id = ctxt->input->id;
6812
6813
6.62k
            SKIP(3);
6814
6.62k
            SKIP_BLANKS;
6815
6816
6.62k
            if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6817
5.81k
                SKIP(7);
6818
5.81k
                SKIP_BLANKS;
6819
5.81k
                if (RAW != '[') {
6820
33
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6821
33
                    xmlHaltParser(ctxt);
6822
33
                    goto error;
6823
33
                }
6824
5.78k
                if (ctxt->input->id != id) {
6825
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6826
0
                                   "All markup of the conditional section is"
6827
0
                                   " not in the same entity\n");
6828
0
                }
6829
5.78k
                NEXT;
6830
6831
5.78k
                if (inputIdsSize <= depth) {
6832
1.81k
                    int *tmp;
6833
6834
1.81k
                    inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2);
6835
1.81k
                    tmp = (int *) xmlRealloc(inputIds,
6836
1.81k
                            inputIdsSize * sizeof(int));
6837
1.81k
                    if (tmp == NULL) {
6838
0
                        xmlErrMemory(ctxt, NULL);
6839
0
                        goto error;
6840
0
                    }
6841
1.81k
                    inputIds = tmp;
6842
1.81k
                }
6843
5.78k
                inputIds[depth] = id;
6844
5.78k
                depth++;
6845
5.78k
            } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6846
354
                size_t ignoreDepth = 0;
6847
6848
354
                SKIP(6);
6849
354
                SKIP_BLANKS;
6850
354
                if (RAW != '[') {
6851
27
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6852
27
                    xmlHaltParser(ctxt);
6853
27
                    goto error;
6854
27
                }
6855
327
                if (ctxt->input->id != id) {
6856
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6857
0
                                   "All markup of the conditional section is"
6858
0
                                   " not in the same entity\n");
6859
0
                }
6860
327
                NEXT;
6861
6862
44.7k
                while (RAW != 0) {
6863
44.5k
                    if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6864
882
                        SKIP(3);
6865
882
                        ignoreDepth++;
6866
                        /* Check for integer overflow */
6867
882
                        if (ignoreDepth == 0) {
6868
0
                            xmlErrMemory(ctxt, NULL);
6869
0
                            goto error;
6870
0
                        }
6871
43.6k
                    } else if ((RAW == ']') && (NXT(1) == ']') &&
6872
43.6k
                               (NXT(2) == '>')) {
6873
723
                        if (ignoreDepth == 0)
6874
159
                            break;
6875
564
                        SKIP(3);
6876
564
                        ignoreDepth--;
6877
42.9k
                    } else {
6878
42.9k
                        NEXT;
6879
42.9k
                    }
6880
44.5k
                }
6881
6882
327
    if (RAW == 0) {
6883
168
        xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6884
168
                    goto error;
6885
168
    }
6886
159
                if (ctxt->input->id != id) {
6887
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6888
0
                                   "All markup of the conditional section is"
6889
0
                                   " not in the same entity\n");
6890
0
                }
6891
159
                SKIP(3);
6892
450
            } else {
6893
450
                xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6894
450
                xmlHaltParser(ctxt);
6895
450
                goto error;
6896
450
            }
6897
6.62k
        } else if ((depth > 0) &&
6898
4.89k
                   (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6899
2.49k
            depth--;
6900
2.49k
            if (ctxt->input->id != inputIds[depth]) {
6901
114
                xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6902
114
                               "All markup of the conditional section is not"
6903
114
                               " in the same entity\n");
6904
114
            }
6905
2.49k
            SKIP(3);
6906
2.49k
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
6907
1.62k
            xmlParseMarkupDecl(ctxt);
6908
1.62k
        } else {
6909
777
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6910
777
            xmlHaltParser(ctxt);
6911
777
            goto error;
6912
777
        }
6913
6914
10.0k
        if (depth == 0)
6915
525
            break;
6916
6917
9.53k
        SKIP_BLANKS;
6918
9.53k
        GROW;
6919
9.53k
    }
6920
6921
1.99k
error:
6922
1.99k
    xmlFree(inputIds);
6923
1.99k
}
6924
6925
/**
6926
 * xmlParseMarkupDecl:
6927
 * @ctxt:  an XML parser context
6928
 *
6929
 * DEPRECATED: Internal function, don't use.
6930
 *
6931
 * Parse markup declarations. Always consumes '<!' or '<?'.
6932
 *
6933
 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6934
 *                     NotationDecl | PI | Comment
6935
 *
6936
 * [ VC: Proper Declaration/PE Nesting ]
6937
 * Parameter-entity replacement text must be properly nested with
6938
 * markup declarations. That is to say, if either the first character
6939
 * or the last character of a markup declaration (markupdecl above) is
6940
 * contained in the replacement text for a parameter-entity reference,
6941
 * both must be contained in the same replacement text.
6942
 *
6943
 * [ WFC: PEs in Internal Subset ]
6944
 * In the internal DTD subset, parameter-entity references can occur
6945
 * only where markup declarations can occur, not within markup declarations.
6946
 * (This does not apply to references that occur in external parameter
6947
 * entities or to the external subset.)
6948
 */
6949
void
6950
9.98M
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6951
9.98M
    GROW;
6952
9.98M
    if (CUR == '<') {
6953
9.98M
        if (NXT(1) == '!') {
6954
9.98M
      switch (NXT(2)) {
6955
283k
          case 'E':
6956
283k
        if (NXT(3) == 'L')
6957
136k
      xmlParseElementDecl(ctxt);
6958
146k
        else if (NXT(3) == 'N')
6959
145k
      xmlParseEntityDecl(ctxt);
6960
728
                    else
6961
728
                        SKIP(2);
6962
283k
        break;
6963
104k
          case 'A':
6964
104k
        xmlParseAttributeListDecl(ctxt);
6965
104k
        break;
6966
906
          case 'N':
6967
906
        xmlParseNotationDecl(ctxt);
6968
906
        break;
6969
9.59M
          case '-':
6970
9.59M
        xmlParseComment(ctxt);
6971
9.59M
        break;
6972
1.51k
    default:
6973
        /* there is an error but it will be detected later */
6974
1.51k
                    SKIP(2);
6975
1.51k
        break;
6976
9.98M
      }
6977
9.98M
  } else if (NXT(1) == '?') {
6978
708
      xmlParsePI(ctxt);
6979
708
  }
6980
9.98M
    }
6981
6982
    /*
6983
     * detect requirement to exit there and act accordingly
6984
     * and avoid having instate overridden later on
6985
     */
6986
9.98M
    if (ctxt->instate == XML_PARSER_EOF)
6987
7.21k
        return;
6988
6989
9.97M
    ctxt->instate = XML_PARSER_DTD;
6990
9.97M
}
6991
6992
/**
6993
 * xmlParseTextDecl:
6994
 * @ctxt:  an XML parser context
6995
 *
6996
 * DEPRECATED: Internal function, don't use.
6997
 *
6998
 * parse an XML declaration header for external entities
6999
 *
7000
 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
7001
 */
7002
7003
void
7004
1.60k
xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
7005
1.60k
    xmlChar *version;
7006
1.60k
    const xmlChar *encoding;
7007
1.60k
    int oldstate;
7008
7009
    /*
7010
     * We know that '<?xml' is here.
7011
     */
7012
1.60k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7013
1.47k
  SKIP(5);
7014
1.47k
    } else {
7015
130
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
7016
130
  return;
7017
130
    }
7018
7019
    /* Avoid expansion of parameter entities when skipping blanks. */
7020
1.47k
    oldstate = ctxt->instate;
7021
1.47k
    ctxt->instate = XML_PARSER_START;
7022
7023
1.47k
    if (SKIP_BLANKS == 0) {
7024
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7025
0
           "Space needed after '<?xml'\n");
7026
0
    }
7027
7028
    /*
7029
     * We may have the VersionInfo here.
7030
     */
7031
1.47k
    version = xmlParseVersionInfo(ctxt);
7032
1.47k
    if (version == NULL)
7033
419
  version = xmlCharStrdup(XML_DEFAULT_VERSION);
7034
1.06k
    else {
7035
1.06k
  if (SKIP_BLANKS == 0) {
7036
87
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7037
87
               "Space needed here\n");
7038
87
  }
7039
1.06k
    }
7040
1.47k
    ctxt->input->version = version;
7041
7042
    /*
7043
     * We must have the encoding declaration
7044
     */
7045
1.47k
    encoding = xmlParseEncodingDecl(ctxt);
7046
1.47k
    if (ctxt->instate == XML_PARSER_EOF)
7047
0
        return;
7048
1.47k
    if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7049
  /*
7050
   * The XML REC instructs us to stop parsing right here
7051
   */
7052
36
        ctxt->instate = oldstate;
7053
36
        return;
7054
36
    }
7055
1.44k
    if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
7056
376
  xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
7057
376
           "Missing encoding in text declaration\n");
7058
376
    }
7059
7060
1.44k
    SKIP_BLANKS;
7061
1.44k
    if ((RAW == '?') && (NXT(1) == '>')) {
7062
750
        SKIP(2);
7063
750
    } else if (RAW == '>') {
7064
        /* Deprecated old WD ... */
7065
24
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7066
24
  NEXT;
7067
669
    } else {
7068
669
        int c;
7069
7070
669
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7071
14.2k
        while ((c = CUR) != 0) {
7072
13.8k
            NEXT;
7073
13.8k
            if (c == '>')
7074
260
                break;
7075
13.8k
        }
7076
669
    }
7077
7078
1.44k
    ctxt->instate = oldstate;
7079
1.44k
}
7080
7081
/**
7082
 * xmlParseExternalSubset:
7083
 * @ctxt:  an XML parser context
7084
 * @ExternalID: the external identifier
7085
 * @SystemID: the system identifier (or URL)
7086
 *
7087
 * parse Markup declarations from an external subset
7088
 *
7089
 * [30] extSubset ::= textDecl? extSubsetDecl
7090
 *
7091
 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7092
 */
7093
void
7094
xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7095
9.02k
                       const xmlChar *SystemID) {
7096
9.02k
    xmlDetectSAX2(ctxt);
7097
9.02k
    GROW;
7098
7099
9.02k
    if ((ctxt->encoding == NULL) &&
7100
9.02k
        (ctxt->input->end - ctxt->input->cur >= 4)) {
7101
8.94k
        xmlChar start[4];
7102
8.94k
  xmlCharEncoding enc;
7103
7104
8.94k
  start[0] = RAW;
7105
8.94k
  start[1] = NXT(1);
7106
8.94k
  start[2] = NXT(2);
7107
8.94k
  start[3] = NXT(3);
7108
8.94k
  enc = xmlDetectCharEncoding(start, 4);
7109
8.94k
  if (enc != XML_CHAR_ENCODING_NONE)
7110
1.38k
      xmlSwitchEncoding(ctxt, enc);
7111
8.94k
    }
7112
7113
9.02k
    if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7114
1.17k
  xmlParseTextDecl(ctxt);
7115
1.17k
  if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7116
      /*
7117
       * The XML REC instructs us to stop parsing right here
7118
       */
7119
27
      xmlHaltParser(ctxt);
7120
27
      return;
7121
27
  }
7122
1.17k
    }
7123
8.99k
    if (ctxt->myDoc == NULL) {
7124
0
        ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7125
0
  if (ctxt->myDoc == NULL) {
7126
0
      xmlErrMemory(ctxt, "New Doc failed");
7127
0
      return;
7128
0
  }
7129
0
  ctxt->myDoc->properties = XML_DOC_INTERNAL;
7130
0
    }
7131
8.99k
    if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7132
0
        xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7133
7134
8.99k
    ctxt->instate = XML_PARSER_DTD;
7135
8.99k
    ctxt->external = 1;
7136
8.99k
    SKIP_BLANKS;
7137
165k
    while ((ctxt->instate != XML_PARSER_EOF) && (RAW != 0)) {
7138
157k
  GROW;
7139
157k
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7140
1.99k
            xmlParseConditionalSections(ctxt);
7141
155k
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7142
154k
            xmlParseMarkupDecl(ctxt);
7143
154k
        } else {
7144
1.60k
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7145
1.60k
            xmlHaltParser(ctxt);
7146
1.60k
            return;
7147
1.60k
        }
7148
156k
        SKIP_BLANKS;
7149
156k
    }
7150
7151
7.38k
    if (RAW != 0) {
7152
0
  xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7153
0
    }
7154
7155
7.38k
}
7156
7157
/**
7158
 * xmlParseReference:
7159
 * @ctxt:  an XML parser context
7160
 *
7161
 * DEPRECATED: Internal function, don't use.
7162
 *
7163
 * parse and handle entity references in content, depending on the SAX
7164
 * interface, this may end-up in a call to character() if this is a
7165
 * CharRef, a predefined entity, if there is no reference() callback.
7166
 * or if the parser was asked to switch to that mode.
7167
 *
7168
 * Always consumes '&'.
7169
 *
7170
 * [67] Reference ::= EntityRef | CharRef
7171
 */
7172
void
7173
1.14M
xmlParseReference(xmlParserCtxtPtr ctxt) {
7174
1.14M
    xmlEntityPtr ent;
7175
1.14M
    xmlChar *val;
7176
1.14M
    int was_checked;
7177
1.14M
    xmlNodePtr list = NULL;
7178
1.14M
    xmlParserErrors ret = XML_ERR_OK;
7179
7180
7181
1.14M
    if (RAW != '&')
7182
0
        return;
7183
7184
    /*
7185
     * Simple case of a CharRef
7186
     */
7187
1.14M
    if (NXT(1) == '#') {
7188
62.3k
  int i = 0;
7189
62.3k
  xmlChar out[16];
7190
62.3k
  int hex = NXT(2);
7191
62.3k
  int value = xmlParseCharRef(ctxt);
7192
7193
62.3k
  if (value == 0)
7194
8.13k
      return;
7195
54.2k
  if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7196
      /*
7197
       * So we are using non-UTF-8 buffers
7198
       * Check that the char fit on 8bits, if not
7199
       * generate a CharRef.
7200
       */
7201
42.8k
      if (value <= 0xFF) {
7202
41.5k
    out[0] = value;
7203
41.5k
    out[1] = 0;
7204
41.5k
    if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7205
41.5k
        (!ctxt->disableSAX))
7206
40.0k
        ctxt->sax->characters(ctxt->userData, out, 1);
7207
41.5k
      } else {
7208
1.32k
    if ((hex == 'x') || (hex == 'X'))
7209
231
        snprintf((char *)out, sizeof(out), "#x%X", value);
7210
1.09k
    else
7211
1.09k
        snprintf((char *)out, sizeof(out), "#%d", value);
7212
1.32k
    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7213
1.32k
        (!ctxt->disableSAX))
7214
1.23k
        ctxt->sax->reference(ctxt->userData, out);
7215
1.32k
      }
7216
42.8k
  } else {
7217
      /*
7218
       * Just encode the value in UTF-8
7219
       */
7220
11.3k
      COPY_BUF(0 ,out, i, value);
7221
11.3k
      out[i] = 0;
7222
11.3k
      if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7223
11.3k
    (!ctxt->disableSAX))
7224
10.0k
    ctxt->sax->characters(ctxt->userData, out, i);
7225
11.3k
  }
7226
54.2k
  return;
7227
62.3k
    }
7228
7229
    /*
7230
     * We are seeing an entity reference
7231
     */
7232
1.08M
    ent = xmlParseEntityRef(ctxt);
7233
1.08M
    if (ent == NULL) return;
7234
987k
    if (!ctxt->wellFormed)
7235
67.7k
  return;
7236
920k
    was_checked = ent->flags & XML_ENT_PARSED;
7237
7238
    /* special case of predefined entities */
7239
920k
    if ((ent->name == NULL) ||
7240
920k
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7241
25.0k
  val = ent->content;
7242
25.0k
  if (val == NULL) return;
7243
  /*
7244
   * inline the entity.
7245
   */
7246
25.0k
  if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7247
25.0k
      (!ctxt->disableSAX))
7248
25.0k
      ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7249
25.0k
  return;
7250
25.0k
    }
7251
7252
    /*
7253
     * The first reference to the entity trigger a parsing phase
7254
     * where the ent->children is filled with the result from
7255
     * the parsing.
7256
     * Note: external parsed entities will not be loaded, it is not
7257
     * required for a non-validating parser, unless the parsing option
7258
     * of validating, or substituting entities were given. Doing so is
7259
     * far more secure as the parser will only process data coming from
7260
     * the document entity by default.
7261
     */
7262
895k
    if (((ent->flags & XML_ENT_PARSED) == 0) &&
7263
895k
        ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7264
19.4k
         (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7265
18.9k
  unsigned long oldsizeentcopy = ctxt->sizeentcopy;
7266
7267
  /*
7268
   * This is a bit hackish but this seems the best
7269
   * way to make sure both SAX and DOM entity support
7270
   * behaves okay.
7271
   */
7272
18.9k
  void *user_data;
7273
18.9k
  if (ctxt->userData == ctxt)
7274
18.9k
      user_data = NULL;
7275
0
  else
7276
0
      user_data = ctxt->userData;
7277
7278
        /* Avoid overflow as much as possible */
7279
18.9k
        ctxt->sizeentcopy = 0;
7280
7281
18.9k
        if (ent->flags & XML_ENT_EXPANDING) {
7282
170
            xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7283
170
            xmlHaltParser(ctxt);
7284
170
            return;
7285
170
        }
7286
7287
18.7k
        ent->flags |= XML_ENT_EXPANDING;
7288
7289
  /*
7290
   * Check that this entity is well formed
7291
   * 4.3.2: An internal general parsed entity is well-formed
7292
   * if its replacement text matches the production labeled
7293
   * content.
7294
   */
7295
18.7k
  if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7296
13.6k
      ctxt->depth++;
7297
13.6k
      ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7298
13.6k
                                                user_data, &list);
7299
13.6k
      ctxt->depth--;
7300
7301
13.6k
  } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7302
5.11k
      ctxt->depth++;
7303
5.11k
      ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7304
5.11k
                                     user_data, ctxt->depth, ent->URI,
7305
5.11k
             ent->ExternalID, &list);
7306
5.11k
      ctxt->depth--;
7307
5.11k
  } else {
7308
0
      ret = XML_ERR_ENTITY_PE_INTERNAL;
7309
0
      xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7310
0
       "invalid entity type found\n", NULL);
7311
0
  }
7312
7313
18.7k
        ent->flags &= ~XML_ENT_EXPANDING;
7314
18.7k
        ent->flags |= XML_ENT_PARSED | XML_ENT_CHECKED;
7315
18.7k
        ent->expandedSize = ctxt->sizeentcopy;
7316
18.7k
  if (ret == XML_ERR_ENTITY_LOOP) {
7317
2.90k
            xmlHaltParser(ctxt);
7318
2.90k
      xmlFreeNodeList(list);
7319
2.90k
      return;
7320
2.90k
  }
7321
15.8k
  if (xmlParserEntityCheck(ctxt, oldsizeentcopy)) {
7322
0
      xmlFreeNodeList(list);
7323
0
      return;
7324
0
  }
7325
7326
15.8k
  if ((ret == XML_ERR_OK) && (list != NULL)) {
7327
11.3k
            ent->children = list;
7328
            /*
7329
             * Prune it directly in the generated document
7330
             * except for single text nodes.
7331
             */
7332
11.3k
            if ((ctxt->replaceEntities == 0) ||
7333
11.3k
                (ctxt->parseMode == XML_PARSE_READER) ||
7334
11.3k
                ((list->type == XML_TEXT_NODE) &&
7335
10.4k
                 (list->next == NULL))) {
7336
10.4k
                ent->owner = 1;
7337
23.0k
                while (list != NULL) {
7338
12.5k
                    list->parent = (xmlNodePtr) ent;
7339
12.5k
                    if (list->doc != ent->doc)
7340
0
                        xmlSetTreeDoc(list, ent->doc);
7341
12.5k
                    if (list->next == NULL)
7342
10.4k
                        ent->last = list;
7343
12.5k
                    list = list->next;
7344
12.5k
                }
7345
10.4k
                list = NULL;
7346
10.4k
            } else {
7347
923
                ent->owner = 0;
7348
3.77k
                while (list != NULL) {
7349
2.85k
                    list->parent = (xmlNodePtr) ctxt->node;
7350
2.85k
                    list->doc = ctxt->myDoc;
7351
2.85k
                    if (list->next == NULL)
7352
923
                        ent->last = list;
7353
2.85k
                    list = list->next;
7354
2.85k
                }
7355
923
                list = ent->children;
7356
#ifdef LIBXML_LEGACY_ENABLED
7357
                if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7358
                    xmlAddEntityReference(ent, list, NULL);
7359
#endif /* LIBXML_LEGACY_ENABLED */
7360
923
            }
7361
11.3k
  } else if ((ret != XML_ERR_OK) &&
7362
4.55k
       (ret != XML_WAR_UNDECLARED_ENTITY)) {
7363
2.43k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7364
2.43k
         "Entity '%s' failed to parse\n", ent->name);
7365
2.43k
            if (ent->content != NULL)
7366
429
                ent->content[0] = 0;
7367
2.43k
  } else if (list != NULL) {
7368
0
      xmlFreeNodeList(list);
7369
0
      list = NULL;
7370
0
  }
7371
7372
        /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7373
15.8k
        was_checked = 0;
7374
15.8k
    }
7375
7376
    /*
7377
     * Now that the entity content has been gathered
7378
     * provide it to the application, this can take different forms based
7379
     * on the parsing modes.
7380
     */
7381
892k
    if (ent->children == NULL) {
7382
  /*
7383
   * Probably running in SAX mode and the callbacks don't
7384
   * build the entity content. So unless we already went
7385
   * though parsing for first checking go though the entity
7386
   * content to generate callbacks associated to the entity
7387
   */
7388
18.6k
  if (was_checked != 0) {
7389
13.6k
      void *user_data;
7390
      /*
7391
       * This is a bit hackish but this seems the best
7392
       * way to make sure both SAX and DOM entity support
7393
       * behaves okay.
7394
       */
7395
13.6k
      if (ctxt->userData == ctxt)
7396
13.6k
    user_data = NULL;
7397
0
      else
7398
0
    user_data = ctxt->userData;
7399
7400
13.6k
      if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7401
3
    ctxt->depth++;
7402
3
    ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7403
3
           ent->content, user_data, NULL);
7404
3
    ctxt->depth--;
7405
13.6k
      } else if (ent->etype ==
7406
13.6k
           XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7407
13.6k
          unsigned long oldsizeentities = ctxt->sizeentities;
7408
7409
13.6k
    ctxt->depth++;
7410
13.6k
    ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7411
13.6k
         ctxt->sax, user_data, ctxt->depth,
7412
13.6k
         ent->URI, ent->ExternalID, NULL);
7413
13.6k
    ctxt->depth--;
7414
7415
                /* Undo the change to sizeentities */
7416
13.6k
                ctxt->sizeentities = oldsizeentities;
7417
13.6k
      } else {
7418
0
    ret = XML_ERR_ENTITY_PE_INTERNAL;
7419
0
    xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7420
0
           "invalid entity type found\n", NULL);
7421
0
      }
7422
13.6k
      if (ret == XML_ERR_ENTITY_LOOP) {
7423
0
    xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7424
0
    return;
7425
0
      }
7426
13.6k
            if (xmlParserEntityCheck(ctxt, 0))
7427
0
                return;
7428
13.6k
  }
7429
18.6k
  if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7430
18.6k
      (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7431
      /*
7432
       * Entity reference callback comes second, it's somewhat
7433
       * superfluous but a compatibility to historical behaviour
7434
       */
7435
2.27k
      ctxt->sax->reference(ctxt->userData, ent->name);
7436
2.27k
  }
7437
18.6k
  return;
7438
18.6k
    }
7439
7440
    /*
7441
     * We also check for amplification if entities aren't substituted.
7442
     * They might be expanded later.
7443
     */
7444
873k
    if ((was_checked != 0) &&
7445
873k
        (xmlParserEntityCheck(ctxt, ent->expandedSize)))
7446
153
        return;
7447
7448
    /*
7449
     * If we didn't get any children for the entity being built
7450
     */
7451
873k
    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7452
873k
  (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7453
  /*
7454
   * Create a node.
7455
   */
7456
4.86k
  ctxt->sax->reference(ctxt->userData, ent->name);
7457
4.86k
  return;
7458
4.86k
    }
7459
7460
868k
    if (ctxt->replaceEntities)  {
7461
  /*
7462
   * There is a problem on the handling of _private for entities
7463
   * (bug 155816): Should we copy the content of the field from
7464
   * the entity (possibly overwriting some value set by the user
7465
   * when a copy is created), should we leave it alone, or should
7466
   * we try to take care of different situations?  The problem
7467
   * is exacerbated by the usage of this field by the xmlReader.
7468
   * To fix this bug, we look at _private on the created node
7469
   * and, if it's NULL, we copy in whatever was in the entity.
7470
   * If it's not NULL we leave it alone.  This is somewhat of a
7471
   * hack - maybe we should have further tests to determine
7472
   * what to do.
7473
   */
7474
868k
  if (ctxt->node != NULL) {
7475
      /*
7476
       * Seems we are generating the DOM content, do
7477
       * a simple tree copy for all references except the first
7478
       * In the first occurrence list contains the replacement.
7479
       */
7480
868k
      if (((list == NULL) && (ent->owner == 0)) ||
7481
868k
    (ctxt->parseMode == XML_PARSE_READER)) {
7482
282k
    xmlNodePtr nw = NULL, cur, firstChild = NULL;
7483
7484
    /*
7485
     * when operating on a reader, the entities definitions
7486
     * are always owning the entities subtree.
7487
    if (ctxt->parseMode == XML_PARSE_READER)
7488
        ent->owner = 1;
7489
     */
7490
7491
282k
    cur = ent->children;
7492
284k
    while (cur != NULL) {
7493
284k
        nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7494
284k
        if (nw != NULL) {
7495
284k
      if (nw->_private == NULL)
7496
284k
          nw->_private = cur->_private;
7497
284k
      if (firstChild == NULL){
7498
282k
          firstChild = nw;
7499
282k
      }
7500
284k
      nw = xmlAddChild(ctxt->node, nw);
7501
284k
        }
7502
284k
        if (cur == ent->last) {
7503
      /*
7504
       * needed to detect some strange empty
7505
       * node cases in the reader tests
7506
       */
7507
282k
      if ((ctxt->parseMode == XML_PARSE_READER) &&
7508
282k
          (nw != NULL) &&
7509
282k
          (nw->type == XML_ELEMENT_NODE) &&
7510
282k
          (nw->children == NULL))
7511
255
          nw->extra = 1;
7512
7513
282k
      break;
7514
282k
        }
7515
2.18k
        cur = cur->next;
7516
2.18k
    }
7517
#ifdef LIBXML_LEGACY_ENABLED
7518
    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7519
      xmlAddEntityReference(ent, firstChild, nw);
7520
#endif /* LIBXML_LEGACY_ENABLED */
7521
585k
      } else if ((list == NULL) || (ctxt->inputNr > 0)) {
7522
585k
    xmlNodePtr nw = NULL, cur, next, last,
7523
585k
         firstChild = NULL;
7524
7525
    /*
7526
     * Copy the entity child list and make it the new
7527
     * entity child list. The goal is to make sure any
7528
     * ID or REF referenced will be the one from the
7529
     * document content and not the entity copy.
7530
     */
7531
585k
    cur = ent->children;
7532
585k
    ent->children = NULL;
7533
585k
    last = ent->last;
7534
585k
    ent->last = NULL;
7535
590k
    while (cur != NULL) {
7536
590k
        next = cur->next;
7537
590k
        cur->next = NULL;
7538
590k
        cur->parent = NULL;
7539
590k
        nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7540
590k
        if (nw != NULL) {
7541
590k
      if (nw->_private == NULL)
7542
590k
          nw->_private = cur->_private;
7543
590k
      if (firstChild == NULL){
7544
585k
          firstChild = cur;
7545
585k
      }
7546
590k
      xmlAddChild((xmlNodePtr) ent, nw);
7547
590k
        }
7548
590k
        xmlAddChild(ctxt->node, cur);
7549
590k
        if (cur == last)
7550
585k
      break;
7551
4.34k
        cur = next;
7552
4.34k
    }
7553
585k
    if (ent->owner == 0)
7554
923
        ent->owner = 1;
7555
#ifdef LIBXML_LEGACY_ENABLED
7556
    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7557
      xmlAddEntityReference(ent, firstChild, nw);
7558
#endif /* LIBXML_LEGACY_ENABLED */
7559
585k
      } else {
7560
0
    const xmlChar *nbktext;
7561
7562
    /*
7563
     * the name change is to avoid coalescing of the
7564
     * node with a possible previous text one which
7565
     * would make ent->children a dangling pointer
7566
     */
7567
0
    nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7568
0
          -1);
7569
0
    if (ent->children->type == XML_TEXT_NODE)
7570
0
        ent->children->name = nbktext;
7571
0
    if ((ent->last != ent->children) &&
7572
0
        (ent->last->type == XML_TEXT_NODE))
7573
0
        ent->last->name = nbktext;
7574
0
    xmlAddChildList(ctxt->node, ent->children);
7575
0
      }
7576
7577
      /*
7578
       * This is to avoid a nasty side effect, see
7579
       * characters() in SAX.c
7580
       */
7581
868k
      ctxt->nodemem = 0;
7582
868k
      ctxt->nodelen = 0;
7583
868k
      return;
7584
868k
  }
7585
868k
    }
7586
868k
}
7587
7588
/**
7589
 * xmlParseEntityRef:
7590
 * @ctxt:  an XML parser context
7591
 *
7592
 * DEPRECATED: Internal function, don't use.
7593
 *
7594
 * Parse an entitiy reference. Always consumes '&'.
7595
 *
7596
 * [68] EntityRef ::= '&' Name ';'
7597
 *
7598
 * [ WFC: Entity Declared ]
7599
 * In a document without any DTD, a document with only an internal DTD
7600
 * subset which contains no parameter entity references, or a document
7601
 * with "standalone='yes'", the Name given in the entity reference
7602
 * must match that in an entity declaration, except that well-formed
7603
 * documents need not declare any of the following entities: amp, lt,
7604
 * gt, apos, quot.  The declaration of a parameter entity must precede
7605
 * any reference to it.  Similarly, the declaration of a general entity
7606
 * must precede any reference to it which appears in a default value in an
7607
 * attribute-list declaration. Note that if entities are declared in the
7608
 * external subset or in external parameter entities, a non-validating
7609
 * processor is not obligated to read and process their declarations;
7610
 * for such documents, the rule that an entity must be declared is a
7611
 * well-formedness constraint only if standalone='yes'.
7612
 *
7613
 * [ WFC: Parsed Entity ]
7614
 * An entity reference must not contain the name of an unparsed entity
7615
 *
7616
 * Returns the xmlEntityPtr if found, or NULL otherwise.
7617
 */
7618
xmlEntityPtr
7619
1.39M
xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7620
1.39M
    const xmlChar *name;
7621
1.39M
    xmlEntityPtr ent = NULL;
7622
7623
1.39M
    GROW;
7624
1.39M
    if (ctxt->instate == XML_PARSER_EOF)
7625
0
        return(NULL);
7626
7627
1.39M
    if (RAW != '&')
7628
0
        return(NULL);
7629
1.39M
    NEXT;
7630
1.39M
    name = xmlParseName(ctxt);
7631
1.39M
    if (name == NULL) {
7632
40.4k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7633
40.4k
           "xmlParseEntityRef: no name\n");
7634
40.4k
        return(NULL);
7635
40.4k
    }
7636
1.35M
    if (RAW != ';') {
7637
7.65k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7638
7.65k
  return(NULL);
7639
7.65k
    }
7640
1.34M
    NEXT;
7641
7642
    /*
7643
     * Predefined entities override any extra definition
7644
     */
7645
1.34M
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7646
1.30M
        ent = xmlGetPredefinedEntity(name);
7647
1.30M
        if (ent != NULL)
7648
49.1k
            return(ent);
7649
1.30M
    }
7650
7651
    /*
7652
     * Ask first SAX for entity resolution, otherwise try the
7653
     * entities which may have stored in the parser context.
7654
     */
7655
1.29M
    if (ctxt->sax != NULL) {
7656
1.29M
  if (ctxt->sax->getEntity != NULL)
7657
1.29M
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7658
1.29M
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7659
1.29M
      (ctxt->options & XML_PARSE_OLDSAX))
7660
380
      ent = xmlGetPredefinedEntity(name);
7661
1.29M
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7662
1.29M
      (ctxt->userData==ctxt)) {
7663
1.80k
      ent = xmlSAX2GetEntity(ctxt, name);
7664
1.80k
  }
7665
1.29M
    }
7666
1.29M
    if (ctxt->instate == XML_PARSER_EOF)
7667
0
  return(NULL);
7668
    /*
7669
     * [ WFC: Entity Declared ]
7670
     * In a document without any DTD, a document with only an
7671
     * internal DTD subset which contains no parameter entity
7672
     * references, or a document with "standalone='yes'", the
7673
     * Name given in the entity reference must match that in an
7674
     * entity declaration, except that well-formed documents
7675
     * need not declare any of the following entities: amp, lt,
7676
     * gt, apos, quot.
7677
     * The declaration of a parameter entity must precede any
7678
     * reference to it.
7679
     * Similarly, the declaration of a general entity must
7680
     * precede any reference to it which appears in a default
7681
     * value in an attribute-list declaration. Note that if
7682
     * entities are declared in the external subset or in
7683
     * external parameter entities, a non-validating processor
7684
     * is not obligated to read and process their declarations;
7685
     * for such documents, the rule that an entity must be
7686
     * declared is a well-formedness constraint only if
7687
     * standalone='yes'.
7688
     */
7689
1.29M
    if (ent == NULL) {
7690
71.4k
  if ((ctxt->standalone == 1) ||
7691
71.4k
      ((ctxt->hasExternalSubset == 0) &&
7692
69.1k
       (ctxt->hasPErefs == 0))) {
7693
62.3k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7694
62.3k
         "Entity '%s' not defined\n", name);
7695
62.3k
  } else {
7696
9.08k
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7697
9.08k
         "Entity '%s' not defined\n", name);
7698
9.08k
      if ((ctxt->inSubset == 0) &&
7699
9.08k
    (ctxt->sax != NULL) &&
7700
9.08k
    (ctxt->sax->reference != NULL)) {
7701
9.00k
    ctxt->sax->reference(ctxt->userData, name);
7702
9.00k
      }
7703
9.08k
  }
7704
71.4k
  ctxt->valid = 0;
7705
71.4k
    }
7706
7707
    /*
7708
     * [ WFC: Parsed Entity ]
7709
     * An entity reference must not contain the name of an
7710
     * unparsed entity
7711
     */
7712
1.22M
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7713
3
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7714
3
     "Entity reference to unparsed entity %s\n", name);
7715
3
    }
7716
7717
    /*
7718
     * [ WFC: No External Entity References ]
7719
     * Attribute values cannot contain direct or indirect
7720
     * entity references to external entities.
7721
     */
7722
1.22M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7723
1.22M
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7724
17
  xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7725
17
       "Attribute references external entity '%s'\n", name);
7726
17
    }
7727
    /*
7728
     * [ WFC: No < in Attribute Values ]
7729
     * The replacement text of any entity referred to directly or
7730
     * indirectly in an attribute value (other than "&lt;") must
7731
     * not contain a <.
7732
     */
7733
1.22M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7734
1.22M
       (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7735
273k
  if ((ent->flags & XML_ENT_CHECKED_LT) == 0) {
7736
5.35k
            if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7737
45
                ent->flags |= XML_ENT_CONTAINS_LT;
7738
5.35k
            ent->flags |= XML_ENT_CHECKED_LT;
7739
5.35k
        }
7740
273k
        if (ent->flags & XML_ENT_CONTAINS_LT)
7741
82
            xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7742
82
                    "'<' in entity '%s' is not allowed in attributes "
7743
82
                    "values\n", name);
7744
273k
    }
7745
7746
    /*
7747
     * Internal check, no parameter entities here ...
7748
     */
7749
953k
    else {
7750
953k
  switch (ent->etype) {
7751
0
      case XML_INTERNAL_PARAMETER_ENTITY:
7752
0
      case XML_EXTERNAL_PARAMETER_ENTITY:
7753
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7754
0
       "Attempt to reference the parameter entity '%s'\n",
7755
0
            name);
7756
0
      break;
7757
953k
      default:
7758
953k
      break;
7759
953k
  }
7760
953k
    }
7761
7762
    /*
7763
     * [ WFC: No Recursion ]
7764
     * A parsed entity must not contain a recursive reference
7765
     * to itself, either directly or indirectly.
7766
     * Done somewhere else
7767
     */
7768
1.29M
    return(ent);
7769
1.29M
}
7770
7771
/**
7772
 * xmlParseStringEntityRef:
7773
 * @ctxt:  an XML parser context
7774
 * @str:  a pointer to an index in the string
7775
 *
7776
 * parse ENTITY references declarations, but this version parses it from
7777
 * a string value.
7778
 *
7779
 * [68] EntityRef ::= '&' Name ';'
7780
 *
7781
 * [ WFC: Entity Declared ]
7782
 * In a document without any DTD, a document with only an internal DTD
7783
 * subset which contains no parameter entity references, or a document
7784
 * with "standalone='yes'", the Name given in the entity reference
7785
 * must match that in an entity declaration, except that well-formed
7786
 * documents need not declare any of the following entities: amp, lt,
7787
 * gt, apos, quot.  The declaration of a parameter entity must precede
7788
 * any reference to it.  Similarly, the declaration of a general entity
7789
 * must precede any reference to it which appears in a default value in an
7790
 * attribute-list declaration. Note that if entities are declared in the
7791
 * external subset or in external parameter entities, a non-validating
7792
 * processor is not obligated to read and process their declarations;
7793
 * for such documents, the rule that an entity must be declared is a
7794
 * well-formedness constraint only if standalone='yes'.
7795
 *
7796
 * [ WFC: Parsed Entity ]
7797
 * An entity reference must not contain the name of an unparsed entity
7798
 *
7799
 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7800
 * is updated to the current location in the string.
7801
 */
7802
static xmlEntityPtr
7803
13.4M
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7804
13.4M
    xmlChar *name;
7805
13.4M
    const xmlChar *ptr;
7806
13.4M
    xmlChar cur;
7807
13.4M
    xmlEntityPtr ent = NULL;
7808
7809
13.4M
    if ((str == NULL) || (*str == NULL))
7810
0
        return(NULL);
7811
13.4M
    ptr = *str;
7812
13.4M
    cur = *ptr;
7813
13.4M
    if (cur != '&')
7814
0
  return(NULL);
7815
7816
13.4M
    ptr++;
7817
13.4M
    name = xmlParseStringName(ctxt, &ptr);
7818
13.4M
    if (name == NULL) {
7819
0
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7820
0
           "xmlParseStringEntityRef: no name\n");
7821
0
  *str = ptr;
7822
0
  return(NULL);
7823
0
    }
7824
13.4M
    if (*ptr != ';') {
7825
0
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7826
0
        xmlFree(name);
7827
0
  *str = ptr;
7828
0
  return(NULL);
7829
0
    }
7830
13.4M
    ptr++;
7831
7832
7833
    /*
7834
     * Predefined entities override any extra definition
7835
     */
7836
13.4M
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7837
13.4M
        ent = xmlGetPredefinedEntity(name);
7838
13.4M
        if (ent != NULL) {
7839
187
            xmlFree(name);
7840
187
            *str = ptr;
7841
187
            return(ent);
7842
187
        }
7843
13.4M
    }
7844
7845
    /*
7846
     * Ask first SAX for entity resolution, otherwise try the
7847
     * entities which may have stored in the parser context.
7848
     */
7849
13.4M
    if (ctxt->sax != NULL) {
7850
13.4M
  if (ctxt->sax->getEntity != NULL)
7851
13.4M
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7852
13.4M
  if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7853
2.83k
      ent = xmlGetPredefinedEntity(name);
7854
13.4M
  if ((ent == NULL) && (ctxt->userData==ctxt)) {
7855
36.7k
      ent = xmlSAX2GetEntity(ctxt, name);
7856
36.7k
  }
7857
13.4M
    }
7858
13.4M
    if (ctxt->instate == XML_PARSER_EOF) {
7859
0
  xmlFree(name);
7860
0
  return(NULL);
7861
0
    }
7862
7863
    /*
7864
     * [ WFC: Entity Declared ]
7865
     * In a document without any DTD, a document with only an
7866
     * internal DTD subset which contains no parameter entity
7867
     * references, or a document with "standalone='yes'", the
7868
     * Name given in the entity reference must match that in an
7869
     * entity declaration, except that well-formed documents
7870
     * need not declare any of the following entities: amp, lt,
7871
     * gt, apos, quot.
7872
     * The declaration of a parameter entity must precede any
7873
     * reference to it.
7874
     * Similarly, the declaration of a general entity must
7875
     * precede any reference to it which appears in a default
7876
     * value in an attribute-list declaration. Note that if
7877
     * entities are declared in the external subset or in
7878
     * external parameter entities, a non-validating processor
7879
     * is not obligated to read and process their declarations;
7880
     * for such documents, the rule that an entity must be
7881
     * declared is a well-formedness constraint only if
7882
     * standalone='yes'.
7883
     */
7884
13.4M
    if (ent == NULL) {
7885
36.7k
  if ((ctxt->standalone == 1) ||
7886
36.7k
      ((ctxt->hasExternalSubset == 0) &&
7887
36.6k
       (ctxt->hasPErefs == 0))) {
7888
33.5k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7889
33.5k
         "Entity '%s' not defined\n", name);
7890
33.5k
  } else {
7891
3.19k
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7892
3.19k
        "Entity '%s' not defined\n",
7893
3.19k
        name);
7894
3.19k
  }
7895
  /* TODO ? check regressions ctxt->valid = 0; */
7896
36.7k
    }
7897
7898
    /*
7899
     * [ WFC: Parsed Entity ]
7900
     * An entity reference must not contain the name of an
7901
     * unparsed entity
7902
     */
7903
13.4M
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7904
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7905
0
     "Entity reference to unparsed entity %s\n", name);
7906
0
    }
7907
7908
    /*
7909
     * [ WFC: No External Entity References ]
7910
     * Attribute values cannot contain direct or indirect
7911
     * entity references to external entities.
7912
     */
7913
13.4M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7914
13.4M
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7915
4
  xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7916
4
   "Attribute references external entity '%s'\n", name);
7917
4
    }
7918
    /*
7919
     * [ WFC: No < in Attribute Values ]
7920
     * The replacement text of any entity referred to directly or
7921
     * indirectly in an attribute value (other than "&lt;") must
7922
     * not contain a <.
7923
     */
7924
13.4M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7925
13.4M
       (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7926
13.4M
  if ((ent->flags & XML_ENT_CHECKED_LT) == 0) {
7927
3.32k
            if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7928
23
                ent->flags |= XML_ENT_CONTAINS_LT;
7929
3.32k
            ent->flags |= XML_ENT_CHECKED_LT;
7930
3.32k
        }
7931
13.4M
        if (ent->flags & XML_ENT_CONTAINS_LT)
7932
499
            xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7933
499
                    "'<' in entity '%s' is not allowed in attributes "
7934
499
                    "values\n", name);
7935
13.4M
    }
7936
7937
    /*
7938
     * Internal check, no parameter entities here ...
7939
     */
7940
570
    else {
7941
570
  switch (ent->etype) {
7942
0
      case XML_INTERNAL_PARAMETER_ENTITY:
7943
0
      case XML_EXTERNAL_PARAMETER_ENTITY:
7944
0
    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7945
0
       "Attempt to reference the parameter entity '%s'\n",
7946
0
          name);
7947
0
      break;
7948
570
      default:
7949
570
      break;
7950
570
  }
7951
570
    }
7952
7953
    /*
7954
     * [ WFC: No Recursion ]
7955
     * A parsed entity must not contain a recursive reference
7956
     * to itself, either directly or indirectly.
7957
     * Done somewhere else
7958
     */
7959
7960
13.4M
    xmlFree(name);
7961
13.4M
    *str = ptr;
7962
13.4M
    return(ent);
7963
13.4M
}
7964
7965
/**
7966
 * xmlParsePEReference:
7967
 * @ctxt:  an XML parser context
7968
 *
7969
 * DEPRECATED: Internal function, don't use.
7970
 *
7971
 * Parse a parameter entity reference. Always consumes '%'.
7972
 *
7973
 * The entity content is handled directly by pushing it's content as
7974
 * a new input stream.
7975
 *
7976
 * [69] PEReference ::= '%' Name ';'
7977
 *
7978
 * [ WFC: No Recursion ]
7979
 * A parsed entity must not contain a recursive
7980
 * reference to itself, either directly or indirectly.
7981
 *
7982
 * [ WFC: Entity Declared ]
7983
 * In a document without any DTD, a document with only an internal DTD
7984
 * subset which contains no parameter entity references, or a document
7985
 * with "standalone='yes'", ...  ... The declaration of a parameter
7986
 * entity must precede any reference to it...
7987
 *
7988
 * [ VC: Entity Declared ]
7989
 * In a document with an external subset or external parameter entities
7990
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7991
 * must precede any reference to it...
7992
 *
7993
 * [ WFC: In DTD ]
7994
 * Parameter-entity references may only appear in the DTD.
7995
 * NOTE: misleading but this is handled.
7996
 */
7997
void
7998
xmlParsePEReference(xmlParserCtxtPtr ctxt)
7999
10.1M
{
8000
10.1M
    const xmlChar *name;
8001
10.1M
    xmlEntityPtr entity = NULL;
8002
10.1M
    xmlParserInputPtr input;
8003
8004
10.1M
    if (RAW != '%')
8005
0
        return;
8006
10.1M
    NEXT;
8007
10.1M
    name = xmlParseName(ctxt);
8008
10.1M
    if (name == NULL) {
8009
547k
  xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
8010
547k
  return;
8011
547k
    }
8012
9.62M
    if (xmlParserDebugEntities)
8013
0
  xmlGenericError(xmlGenericErrorContext,
8014
0
    "PEReference: %s\n", name);
8015
9.62M
    if (RAW != ';') {
8016
475
  xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
8017
475
        return;
8018
475
    }
8019
8020
9.62M
    NEXT;
8021
8022
    /*
8023
     * Request the entity from SAX
8024
     */
8025
9.62M
    if ((ctxt->sax != NULL) &&
8026
9.62M
  (ctxt->sax->getParameterEntity != NULL))
8027
9.62M
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8028
9.62M
    if (ctxt->instate == XML_PARSER_EOF)
8029
0
  return;
8030
9.62M
    if (entity == NULL) {
8031
  /*
8032
   * [ WFC: Entity Declared ]
8033
   * In a document without any DTD, a document with only an
8034
   * internal DTD subset which contains no parameter entity
8035
   * references, or a document with "standalone='yes'", ...
8036
   * ... The declaration of a parameter entity must precede
8037
   * any reference to it...
8038
   */
8039
4.55k
  if ((ctxt->standalone == 1) ||
8040
4.55k
      ((ctxt->hasExternalSubset == 0) &&
8041
4.55k
       (ctxt->hasPErefs == 0))) {
8042
416
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8043
416
            "PEReference: %%%s; not found\n",
8044
416
            name);
8045
4.13k
  } else {
8046
      /*
8047
       * [ VC: Entity Declared ]
8048
       * In a document with an external subset or external
8049
       * parameter entities with "standalone='no'", ...
8050
       * ... The declaration of a parameter entity must
8051
       * precede any reference to it...
8052
       */
8053
4.13k
            if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
8054
100
                xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
8055
100
                                 "PEReference: %%%s; not found\n",
8056
100
                                 name, NULL);
8057
100
            } else
8058
4.03k
                xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8059
4.03k
                              "PEReference: %%%s; not found\n",
8060
4.03k
                              name, NULL);
8061
4.13k
            ctxt->valid = 0;
8062
4.13k
  }
8063
9.62M
    } else {
8064
  /*
8065
   * Internal checking in case the entity quest barfed
8066
   */
8067
9.62M
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8068
9.62M
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8069
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8070
0
      "Internal: %%%s; is not a parameter entity\n",
8071
0
        name, NULL);
8072
9.62M
  } else {
8073
9.62M
            xmlChar start[4];
8074
9.62M
            xmlCharEncoding enc;
8075
9.62M
            unsigned long parentConsumed;
8076
9.62M
            xmlEntityPtr oldEnt;
8077
8078
9.62M
      if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8079
9.62M
          ((ctxt->options & XML_PARSE_NOENT) == 0) &&
8080
9.62M
    ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
8081
9.62M
    ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
8082
9.62M
    ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
8083
9.62M
    (ctxt->replaceEntities == 0) &&
8084
9.62M
    (ctxt->validate == 0))
8085
39
    return;
8086
8087
9.62M
            if (entity->flags & XML_ENT_EXPANDING) {
8088
75
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
8089
75
                xmlHaltParser(ctxt);
8090
75
                return;
8091
75
            }
8092
8093
            /* Must be computed from old input before pushing new input. */
8094
9.62M
            parentConsumed = ctxt->input->parentConsumed;
8095
9.62M
            oldEnt = ctxt->input->entity;
8096
9.62M
            if ((oldEnt == NULL) ||
8097
9.62M
                ((oldEnt->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8098
9.56M
                 ((oldEnt->flags & XML_ENT_PARSED) == 0))) {
8099
97.6k
                xmlSaturatedAdd(&parentConsumed, ctxt->input->consumed);
8100
97.6k
                xmlSaturatedAddSizeT(&parentConsumed,
8101
97.6k
                                     ctxt->input->cur - ctxt->input->base);
8102
97.6k
            }
8103
8104
9.62M
      input = xmlNewEntityInputStream(ctxt, entity);
8105
9.62M
      if (xmlPushInput(ctxt, input) < 0) {
8106
503
                xmlFreeInputStream(input);
8107
503
    return;
8108
503
            }
8109
8110
9.62M
            entity->flags |= XML_ENT_EXPANDING;
8111
8112
9.62M
            input->parentConsumed = parentConsumed;
8113
8114
9.62M
      if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
8115
                /*
8116
                 * Get the 4 first bytes and decode the charset
8117
                 * if enc != XML_CHAR_ENCODING_NONE
8118
                 * plug some encoding conversion routines.
8119
                 * Note that, since we may have some non-UTF8
8120
                 * encoding (like UTF16, bug 135229), the 'length'
8121
                 * is not known, but we can calculate based upon
8122
                 * the amount of data in the buffer.
8123
                 */
8124
1.16k
                GROW
8125
1.16k
                if (ctxt->instate == XML_PARSER_EOF)
8126
0
                    return;
8127
1.16k
                if ((ctxt->input->end - ctxt->input->cur)>=4) {
8128
1.16k
                    start[0] = RAW;
8129
1.16k
                    start[1] = NXT(1);
8130
1.16k
                    start[2] = NXT(2);
8131
1.16k
                    start[3] = NXT(3);
8132
1.16k
                    enc = xmlDetectCharEncoding(start, 4);
8133
1.16k
                    if (enc != XML_CHAR_ENCODING_NONE) {
8134
0
                        xmlSwitchEncoding(ctxt, enc);
8135
0
                    }
8136
1.16k
                }
8137
8138
1.16k
                if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8139
1.16k
                    (IS_BLANK_CH(NXT(5)))) {
8140
0
                    xmlParseTextDecl(ctxt);
8141
0
                }
8142
1.16k
            }
8143
9.62M
  }
8144
9.62M
    }
8145
9.62M
    ctxt->hasPErefs = 1;
8146
9.62M
}
8147
8148
/**
8149
 * xmlLoadEntityContent:
8150
 * @ctxt:  an XML parser context
8151
 * @entity: an unloaded system entity
8152
 *
8153
 * Load the original content of the given system entity from the
8154
 * ExternalID/SystemID given. This is to be used for Included in Literal
8155
 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8156
 *
8157
 * Returns 0 in case of success and -1 in case of failure
8158
 */
8159
static int
8160
563
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8161
563
    xmlParserInputPtr input;
8162
563
    xmlBufferPtr buf;
8163
563
    int l, c;
8164
563
    int count = 0;
8165
8166
563
    if ((ctxt == NULL) || (entity == NULL) ||
8167
563
        ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8168
563
   (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8169
563
  (entity->content != NULL)) {
8170
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8171
0
              "xmlLoadEntityContent parameter error");
8172
0
        return(-1);
8173
0
    }
8174
8175
563
    if (xmlParserDebugEntities)
8176
0
  xmlGenericError(xmlGenericErrorContext,
8177
0
    "Reading %s entity content input\n", entity->name);
8178
8179
563
    buf = xmlBufferCreate();
8180
563
    if (buf == NULL) {
8181
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8182
0
              "xmlLoadEntityContent parameter error");
8183
0
        return(-1);
8184
0
    }
8185
563
    xmlBufferSetAllocationScheme(buf, XML_BUFFER_ALLOC_DOUBLEIT);
8186
8187
563
    input = xmlNewEntityInputStream(ctxt, entity);
8188
563
    if (input == NULL) {
8189
78
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8190
78
              "xmlLoadEntityContent input error");
8191
78
  xmlBufferFree(buf);
8192
78
        return(-1);
8193
78
    }
8194
8195
    /*
8196
     * Push the entity as the current input, read char by char
8197
     * saving to the buffer until the end of the entity or an error
8198
     */
8199
485
    if (xmlPushInput(ctxt, input) < 0) {
8200
0
        xmlBufferFree(buf);
8201
0
  xmlFreeInputStream(input);
8202
0
  return(-1);
8203
0
    }
8204
8205
485
    GROW;
8206
485
    c = CUR_CHAR(l);
8207
60.2k
    while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8208
60.2k
           (IS_CHAR(c))) {
8209
59.7k
        xmlBufferAdd(buf, ctxt->input->cur, l);
8210
59.7k
  if (count++ > XML_PARSER_CHUNK_SIZE) {
8211
441
      count = 0;
8212
441
      GROW;
8213
441
            if (ctxt->instate == XML_PARSER_EOF) {
8214
0
                xmlBufferFree(buf);
8215
0
                return(-1);
8216
0
            }
8217
441
  }
8218
59.7k
  NEXTL(l);
8219
59.7k
  c = CUR_CHAR(l);
8220
59.7k
  if (c == 0) {
8221
336
      count = 0;
8222
336
      GROW;
8223
336
            if (ctxt->instate == XML_PARSER_EOF) {
8224
0
                xmlBufferFree(buf);
8225
0
                return(-1);
8226
0
            }
8227
336
      c = CUR_CHAR(l);
8228
336
  }
8229
59.7k
    }
8230
8231
485
    if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8232
201
        xmlSaturatedAdd(&ctxt->sizeentities, ctxt->input->consumed);
8233
201
        xmlPopInput(ctxt);
8234
284
    } else if (!IS_CHAR(c)) {
8235
284
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8236
284
                          "xmlLoadEntityContent: invalid char value %d\n",
8237
284
                    c);
8238
284
  xmlBufferFree(buf);
8239
284
  return(-1);
8240
284
    }
8241
201
    entity->content = buf->content;
8242
201
    entity->length = buf->use;
8243
201
    buf->content = NULL;
8244
201
    xmlBufferFree(buf);
8245
8246
201
    return(0);
8247
485
}
8248
8249
/**
8250
 * xmlParseStringPEReference:
8251
 * @ctxt:  an XML parser context
8252
 * @str:  a pointer to an index in the string
8253
 *
8254
 * parse PEReference declarations
8255
 *
8256
 * [69] PEReference ::= '%' Name ';'
8257
 *
8258
 * [ WFC: No Recursion ]
8259
 * A parsed entity must not contain a recursive
8260
 * reference to itself, either directly or indirectly.
8261
 *
8262
 * [ WFC: Entity Declared ]
8263
 * In a document without any DTD, a document with only an internal DTD
8264
 * subset which contains no parameter entity references, or a document
8265
 * with "standalone='yes'", ...  ... The declaration of a parameter
8266
 * entity must precede any reference to it...
8267
 *
8268
 * [ VC: Entity Declared ]
8269
 * In a document with an external subset or external parameter entities
8270
 * with "standalone='no'", ...  ... The declaration of a parameter entity
8271
 * must precede any reference to it...
8272
 *
8273
 * [ WFC: In DTD ]
8274
 * Parameter-entity references may only appear in the DTD.
8275
 * NOTE: misleading but this is handled.
8276
 *
8277
 * Returns the string of the entity content.
8278
 *         str is updated to the current value of the index
8279
 */
8280
static xmlEntityPtr
8281
38.5k
xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8282
38.5k
    const xmlChar *ptr;
8283
38.5k
    xmlChar cur;
8284
38.5k
    xmlChar *name;
8285
38.5k
    xmlEntityPtr entity = NULL;
8286
8287
38.5k
    if ((str == NULL) || (*str == NULL)) return(NULL);
8288
38.5k
    ptr = *str;
8289
38.5k
    cur = *ptr;
8290
38.5k
    if (cur != '%')
8291
0
        return(NULL);
8292
38.5k
    ptr++;
8293
38.5k
    name = xmlParseStringName(ctxt, &ptr);
8294
38.5k
    if (name == NULL) {
8295
6
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8296
6
           "xmlParseStringPEReference: no name\n");
8297
6
  *str = ptr;
8298
6
  return(NULL);
8299
6
    }
8300
38.5k
    cur = *ptr;
8301
38.5k
    if (cur != ';') {
8302
6
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8303
6
  xmlFree(name);
8304
6
  *str = ptr;
8305
6
  return(NULL);
8306
6
    }
8307
38.5k
    ptr++;
8308
8309
    /*
8310
     * Request the entity from SAX
8311
     */
8312
38.5k
    if ((ctxt->sax != NULL) &&
8313
38.5k
  (ctxt->sax->getParameterEntity != NULL))
8314
38.5k
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8315
38.5k
    if (ctxt->instate == XML_PARSER_EOF) {
8316
0
  xmlFree(name);
8317
0
  *str = ptr;
8318
0
  return(NULL);
8319
0
    }
8320
38.5k
    if (entity == NULL) {
8321
  /*
8322
   * [ WFC: Entity Declared ]
8323
   * In a document without any DTD, a document with only an
8324
   * internal DTD subset which contains no parameter entity
8325
   * references, or a document with "standalone='yes'", ...
8326
   * ... The declaration of a parameter entity must precede
8327
   * any reference to it...
8328
   */
8329
2.96k
  if ((ctxt->standalone == 1) ||
8330
2.96k
      ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8331
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8332
0
     "PEReference: %%%s; not found\n", name);
8333
2.96k
  } else {
8334
      /*
8335
       * [ VC: Entity Declared ]
8336
       * In a document with an external subset or external
8337
       * parameter entities with "standalone='no'", ...
8338
       * ... The declaration of a parameter entity must
8339
       * precede any reference to it...
8340
       */
8341
2.96k
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8342
2.96k
        "PEReference: %%%s; not found\n",
8343
2.96k
        name, NULL);
8344
2.96k
      ctxt->valid = 0;
8345
2.96k
  }
8346
35.5k
    } else {
8347
  /*
8348
   * Internal checking in case the entity quest barfed
8349
   */
8350
35.5k
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8351
35.5k
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8352
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8353
0
        "%%%s; is not a parameter entity\n",
8354
0
        name, NULL);
8355
0
  }
8356
35.5k
    }
8357
38.5k
    ctxt->hasPErefs = 1;
8358
38.5k
    xmlFree(name);
8359
38.5k
    *str = ptr;
8360
38.5k
    return(entity);
8361
38.5k
}
8362
8363
/**
8364
 * xmlParseDocTypeDecl:
8365
 * @ctxt:  an XML parser context
8366
 *
8367
 * DEPRECATED: Internal function, don't use.
8368
 *
8369
 * parse a DOCTYPE declaration
8370
 *
8371
 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8372
 *                      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8373
 *
8374
 * [ VC: Root Element Type ]
8375
 * The Name in the document type declaration must match the element
8376
 * type of the root element.
8377
 */
8378
8379
void
8380
82.1k
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8381
82.1k
    const xmlChar *name = NULL;
8382
82.1k
    xmlChar *ExternalID = NULL;
8383
82.1k
    xmlChar *URI = NULL;
8384
8385
    /*
8386
     * We know that '<!DOCTYPE' has been detected.
8387
     */
8388
82.1k
    SKIP(9);
8389
8390
82.1k
    SKIP_BLANKS;
8391
8392
    /*
8393
     * Parse the DOCTYPE name.
8394
     */
8395
82.1k
    name = xmlParseName(ctxt);
8396
82.1k
    if (name == NULL) {
8397
599
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8398
599
           "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8399
599
    }
8400
82.1k
    ctxt->intSubName = name;
8401
8402
82.1k
    SKIP_BLANKS;
8403
8404
    /*
8405
     * Check for SystemID and ExternalID
8406
     */
8407
82.1k
    URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8408
8409
82.1k
    if ((URI != NULL) || (ExternalID != NULL)) {
8410
33.2k
        ctxt->hasExternalSubset = 1;
8411
33.2k
    }
8412
82.1k
    ctxt->extSubURI = URI;
8413
82.1k
    ctxt->extSubSystem = ExternalID;
8414
8415
82.1k
    SKIP_BLANKS;
8416
8417
    /*
8418
     * Create and update the internal subset.
8419
     */
8420
82.1k
    if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8421
82.1k
  (!ctxt->disableSAX))
8422
77.8k
  ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8423
82.1k
    if (ctxt->instate == XML_PARSER_EOF)
8424
0
  return;
8425
8426
    /*
8427
     * Is there any internal subset declarations ?
8428
     * they are handled separately in xmlParseInternalSubset()
8429
     */
8430
82.1k
    if (RAW == '[')
8431
56.7k
  return;
8432
8433
    /*
8434
     * We should be at the end of the DOCTYPE declaration.
8435
     */
8436
25.4k
    if (RAW != '>') {
8437
5.13k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8438
5.13k
    }
8439
25.4k
    NEXT;
8440
25.4k
}
8441
8442
/**
8443
 * xmlParseInternalSubset:
8444
 * @ctxt:  an XML parser context
8445
 *
8446
 * parse the internal subset declaration
8447
 *
8448
 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8449
 */
8450
8451
static void
8452
56.3k
xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8453
    /*
8454
     * Is there any DTD definition ?
8455
     */
8456
56.3k
    if (RAW == '[') {
8457
56.3k
        int baseInputNr = ctxt->inputNr;
8458
56.3k
        ctxt->instate = XML_PARSER_DTD;
8459
56.3k
        NEXT;
8460
  /*
8461
   * Parse the succession of Markup declarations and
8462
   * PEReferences.
8463
   * Subsequence (markupdecl | PEReference | S)*
8464
   */
8465
56.3k
  SKIP_BLANKS;
8466
9.89M
  while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) &&
8467
9.89M
               (ctxt->instate != XML_PARSER_EOF)) {
8468
8469
            /*
8470
             * Conditional sections are allowed from external entities included
8471
             * by PE References in the internal subset.
8472
             */
8473
9.85M
            if ((ctxt->inputNr > 1) && (ctxt->input->filename != NULL) &&
8474
9.85M
                (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8475
0
                xmlParseConditionalSections(ctxt);
8476
9.85M
            } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
8477
9.82M
          xmlParseMarkupDecl(ctxt);
8478
9.82M
            } else if (RAW == '%') {
8479
12.6k
          xmlParsePEReference(ctxt);
8480
19.2k
            } else {
8481
19.2k
    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8482
19.2k
                        "xmlParseInternalSubset: error detected in"
8483
19.2k
                        " Markup declaration\n");
8484
19.2k
                xmlHaltParser(ctxt);
8485
19.2k
                return;
8486
19.2k
            }
8487
9.84M
      SKIP_BLANKS;
8488
9.84M
  }
8489
37.1k
  if (RAW == ']') {
8490
30.8k
      NEXT;
8491
30.8k
      SKIP_BLANKS;
8492
30.8k
  }
8493
37.1k
    }
8494
8495
    /*
8496
     * We should be at the end of the DOCTYPE declaration.
8497
     */
8498
37.1k
    if (RAW != '>') {
8499
6.77k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8500
6.77k
  return;
8501
6.77k
    }
8502
30.3k
    NEXT;
8503
30.3k
}
8504
8505
#ifdef LIBXML_SAX1_ENABLED
8506
/**
8507
 * xmlParseAttribute:
8508
 * @ctxt:  an XML parser context
8509
 * @value:  a xmlChar ** used to store the value of the attribute
8510
 *
8511
 * DEPRECATED: Internal function, don't use.
8512
 *
8513
 * parse an attribute
8514
 *
8515
 * [41] Attribute ::= Name Eq AttValue
8516
 *
8517
 * [ WFC: No External Entity References ]
8518
 * Attribute values cannot contain direct or indirect entity references
8519
 * to external entities.
8520
 *
8521
 * [ WFC: No < in Attribute Values ]
8522
 * The replacement text of any entity referred to directly or indirectly in
8523
 * an attribute value (other than "&lt;") must not contain a <.
8524
 *
8525
 * [ VC: Attribute Value Type ]
8526
 * The attribute must have been declared; the value must be of the type
8527
 * declared for it.
8528
 *
8529
 * [25] Eq ::= S? '=' S?
8530
 *
8531
 * With namespace:
8532
 *
8533
 * [NS 11] Attribute ::= QName Eq AttValue
8534
 *
8535
 * Also the case QName == xmlns:??? is handled independently as a namespace
8536
 * definition.
8537
 *
8538
 * Returns the attribute name, and the value in *value.
8539
 */
8540
8541
const xmlChar *
8542
546k
xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8543
546k
    const xmlChar *name;
8544
546k
    xmlChar *val;
8545
8546
546k
    *value = NULL;
8547
546k
    GROW;
8548
546k
    name = xmlParseName(ctxt);
8549
546k
    if (name == NULL) {
8550
33.9k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8551
33.9k
                 "error parsing attribute name\n");
8552
33.9k
        return(NULL);
8553
33.9k
    }
8554
8555
    /*
8556
     * read the value
8557
     */
8558
512k
    SKIP_BLANKS;
8559
512k
    if (RAW == '=') {
8560
496k
        NEXT;
8561
496k
  SKIP_BLANKS;
8562
496k
  val = xmlParseAttValue(ctxt);
8563
496k
  ctxt->instate = XML_PARSER_CONTENT;
8564
496k
    } else {
8565
16.3k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8566
16.3k
         "Specification mandates value for attribute %s\n", name);
8567
16.3k
  return(name);
8568
16.3k
    }
8569
8570
    /*
8571
     * Check that xml:lang conforms to the specification
8572
     * No more registered as an error, just generate a warning now
8573
     * since this was deprecated in XML second edition
8574
     */
8575
496k
    if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8576
1.53k
  if (!xmlCheckLanguageID(val)) {
8577
983
      xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8578
983
              "Malformed value for xml:lang : %s\n",
8579
983
        val, NULL);
8580
983
  }
8581
1.53k
    }
8582
8583
    /*
8584
     * Check that xml:space conforms to the specification
8585
     */
8586
496k
    if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8587
200
  if (xmlStrEqual(val, BAD_CAST "default"))
8588
0
      *(ctxt->space) = 0;
8589
200
  else if (xmlStrEqual(val, BAD_CAST "preserve"))
8590
57
      *(ctxt->space) = 1;
8591
143
  else {
8592
143
    xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8593
143
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8594
143
                                 val, NULL);
8595
143
  }
8596
200
    }
8597
8598
496k
    *value = val;
8599
496k
    return(name);
8600
512k
}
8601
8602
/**
8603
 * xmlParseStartTag:
8604
 * @ctxt:  an XML parser context
8605
 *
8606
 * DEPRECATED: Internal function, don't use.
8607
 *
8608
 * Parse a start tag. Always consumes '<'.
8609
 *
8610
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8611
 *
8612
 * [ WFC: Unique Att Spec ]
8613
 * No attribute name may appear more than once in the same start-tag or
8614
 * empty-element tag.
8615
 *
8616
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8617
 *
8618
 * [ WFC: Unique Att Spec ]
8619
 * No attribute name may appear more than once in the same start-tag or
8620
 * empty-element tag.
8621
 *
8622
 * With namespace:
8623
 *
8624
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8625
 *
8626
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8627
 *
8628
 * Returns the element name parsed
8629
 */
8630
8631
const xmlChar *
8632
433k
xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8633
433k
    const xmlChar *name;
8634
433k
    const xmlChar *attname;
8635
433k
    xmlChar *attvalue;
8636
433k
    const xmlChar **atts = ctxt->atts;
8637
433k
    int nbatts = 0;
8638
433k
    int maxatts = ctxt->maxatts;
8639
433k
    int i;
8640
8641
433k
    if (RAW != '<') return(NULL);
8642
433k
    NEXT1;
8643
8644
433k
    name = xmlParseName(ctxt);
8645
433k
    if (name == NULL) {
8646
22.3k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8647
22.3k
       "xmlParseStartTag: invalid element name\n");
8648
22.3k
        return(NULL);
8649
22.3k
    }
8650
8651
    /*
8652
     * Now parse the attributes, it ends up with the ending
8653
     *
8654
     * (S Attribute)* S?
8655
     */
8656
411k
    SKIP_BLANKS;
8657
411k
    GROW;
8658
8659
679k
    while (((RAW != '>') &&
8660
679k
     ((RAW != '/') || (NXT(1) != '>')) &&
8661
679k
     (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8662
546k
  attname = xmlParseAttribute(ctxt, &attvalue);
8663
546k
        if (attname == NULL) {
8664
33.9k
      xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8665
33.9k
         "xmlParseStartTag: problem parsing attributes\n");
8666
33.9k
      break;
8667
33.9k
  }
8668
512k
        if (attvalue != NULL) {
8669
      /*
8670
       * [ WFC: Unique Att Spec ]
8671
       * No attribute name may appear more than once in the same
8672
       * start-tag or empty-element tag.
8673
       */
8674
731k
      for (i = 0; i < nbatts;i += 2) {
8675
237k
          if (xmlStrEqual(atts[i], attname)) {
8676
484
        xmlErrAttributeDup(ctxt, NULL, attname);
8677
484
        xmlFree(attvalue);
8678
484
        goto failed;
8679
484
    }
8680
237k
      }
8681
      /*
8682
       * Add the pair to atts
8683
       */
8684
493k
      if (atts == NULL) {
8685
19.0k
          maxatts = 22; /* allow for 10 attrs by default */
8686
19.0k
          atts = (const xmlChar **)
8687
19.0k
           xmlMalloc(maxatts * sizeof(xmlChar *));
8688
19.0k
    if (atts == NULL) {
8689
0
        xmlErrMemory(ctxt, NULL);
8690
0
        if (attvalue != NULL)
8691
0
      xmlFree(attvalue);
8692
0
        goto failed;
8693
0
    }
8694
19.0k
    ctxt->atts = atts;
8695
19.0k
    ctxt->maxatts = maxatts;
8696
474k
      } else if (nbatts + 4 > maxatts) {
8697
1
          const xmlChar **n;
8698
8699
1
          maxatts *= 2;
8700
1
          n = (const xmlChar **) xmlRealloc((void *) atts,
8701
1
               maxatts * sizeof(const xmlChar *));
8702
1
    if (n == NULL) {
8703
0
        xmlErrMemory(ctxt, NULL);
8704
0
        if (attvalue != NULL)
8705
0
      xmlFree(attvalue);
8706
0
        goto failed;
8707
0
    }
8708
1
    atts = n;
8709
1
    ctxt->atts = atts;
8710
1
    ctxt->maxatts = maxatts;
8711
1
      }
8712
493k
      atts[nbatts++] = attname;
8713
493k
      atts[nbatts++] = attvalue;
8714
493k
      atts[nbatts] = NULL;
8715
493k
      atts[nbatts + 1] = NULL;
8716
493k
  } else {
8717
18.2k
      if (attvalue != NULL)
8718
0
    xmlFree(attvalue);
8719
18.2k
  }
8720
8721
512k
failed:
8722
8723
512k
  GROW
8724
512k
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8725
245k
      break;
8726
267k
  if (SKIP_BLANKS == 0) {
8727
34.5k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8728
34.5k
         "attributes construct error\n");
8729
34.5k
  }
8730
267k
  SHRINK;
8731
267k
        GROW;
8732
267k
    }
8733
8734
    /*
8735
     * SAX: Start of Element !
8736
     */
8737
411k
    if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8738
411k
  (!ctxt->disableSAX)) {
8739
394k
  if (nbatts > 0)
8740
251k
      ctxt->sax->startElement(ctxt->userData, name, atts);
8741
143k
  else
8742
143k
      ctxt->sax->startElement(ctxt->userData, name, NULL);
8743
394k
    }
8744
8745
411k
    if (atts != NULL) {
8746
        /* Free only the content strings */
8747
847k
        for (i = 1;i < nbatts;i+=2)
8748
493k
      if (atts[i] != NULL)
8749
493k
         xmlFree((xmlChar *) atts[i]);
8750
353k
    }
8751
411k
    return(name);
8752
411k
}
8753
8754
/**
8755
 * xmlParseEndTag1:
8756
 * @ctxt:  an XML parser context
8757
 * @line:  line of the start tag
8758
 * @nsNr:  number of namespaces on the start tag
8759
 *
8760
 * Parse an end tag. Always consumes '</'.
8761
 *
8762
 * [42] ETag ::= '</' Name S? '>'
8763
 *
8764
 * With namespace
8765
 *
8766
 * [NS 9] ETag ::= '</' QName S? '>'
8767
 */
8768
8769
static void
8770
82.3k
xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8771
82.3k
    const xmlChar *name;
8772
8773
82.3k
    GROW;
8774
82.3k
    if ((RAW != '<') || (NXT(1) != '/')) {
8775
0
  xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8776
0
           "xmlParseEndTag: '</' not found\n");
8777
0
  return;
8778
0
    }
8779
82.3k
    SKIP(2);
8780
8781
82.3k
    name = xmlParseNameAndCompare(ctxt,ctxt->name);
8782
8783
    /*
8784
     * We should definitely be at the ending "S? '>'" part
8785
     */
8786
82.3k
    GROW;
8787
82.3k
    SKIP_BLANKS;
8788
82.3k
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8789
29.7k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8790
29.7k
    } else
8791
52.6k
  NEXT1;
8792
8793
    /*
8794
     * [ WFC: Element Type Match ]
8795
     * The Name in an element's end-tag must match the element type in the
8796
     * start-tag.
8797
     *
8798
     */
8799
82.3k
    if (name != (xmlChar*)1) {
8800
43.7k
        if (name == NULL) name = BAD_CAST "unparsable";
8801
43.7k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8802
43.7k
         "Opening and ending tag mismatch: %s line %d and %s\n",
8803
43.7k
                    ctxt->name, line, name);
8804
43.7k
    }
8805
8806
    /*
8807
     * SAX: End of Tag
8808
     */
8809
82.3k
    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8810
82.3k
  (!ctxt->disableSAX))
8811
80.0k
        ctxt->sax->endElement(ctxt->userData, ctxt->name);
8812
8813
82.3k
    namePop(ctxt);
8814
82.3k
    spacePop(ctxt);
8815
82.3k
    return;
8816
82.3k
}
8817
8818
/**
8819
 * xmlParseEndTag:
8820
 * @ctxt:  an XML parser context
8821
 *
8822
 * DEPRECATED: Internal function, don't use.
8823
 *
8824
 * parse an end of tag
8825
 *
8826
 * [42] ETag ::= '</' Name S? '>'
8827
 *
8828
 * With namespace
8829
 *
8830
 * [NS 9] ETag ::= '</' QName S? '>'
8831
 */
8832
8833
void
8834
0
xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8835
0
    xmlParseEndTag1(ctxt, 0);
8836
0
}
8837
#endif /* LIBXML_SAX1_ENABLED */
8838
8839
/************************************************************************
8840
 *                  *
8841
 *          SAX 2 specific operations       *
8842
 *                  *
8843
 ************************************************************************/
8844
8845
/*
8846
 * xmlGetNamespace:
8847
 * @ctxt:  an XML parser context
8848
 * @prefix:  the prefix to lookup
8849
 *
8850
 * Lookup the namespace name for the @prefix (which ca be NULL)
8851
 * The prefix must come from the @ctxt->dict dictionary
8852
 *
8853
 * Returns the namespace name or NULL if not bound
8854
 */
8855
static const xmlChar *
8856
2.52M
xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8857
2.52M
    int i;
8858
8859
2.52M
    if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8860
2.63M
    for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8861
245k
        if (ctxt->nsTab[i] == prefix) {
8862
112k
      if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8863
471
          return(NULL);
8864
112k
      return(ctxt->nsTab[i + 1]);
8865
112k
  }
8866
2.39M
    return(NULL);
8867
2.50M
}
8868
8869
/**
8870
 * xmlParseQName:
8871
 * @ctxt:  an XML parser context
8872
 * @prefix:  pointer to store the prefix part
8873
 *
8874
 * parse an XML Namespace QName
8875
 *
8876
 * [6]  QName  ::= (Prefix ':')? LocalPart
8877
 * [7]  Prefix  ::= NCName
8878
 * [8]  LocalPart  ::= NCName
8879
 *
8880
 * Returns the Name parsed or NULL
8881
 */
8882
8883
static const xmlChar *
8884
6.03M
xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8885
6.03M
    const xmlChar *l, *p;
8886
8887
6.03M
    GROW;
8888
8889
6.03M
    l = xmlParseNCName(ctxt);
8890
6.03M
    if (l == NULL) {
8891
88.8k
        if (CUR == ':') {
8892
585
      l = xmlParseName(ctxt);
8893
585
      if (l != NULL) {
8894
585
          xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8895
585
             "Failed to parse QName '%s'\n", l, NULL, NULL);
8896
585
    *prefix = NULL;
8897
585
    return(l);
8898
585
      }
8899
585
  }
8900
88.2k
        return(NULL);
8901
88.8k
    }
8902
5.94M
    if (CUR == ':') {
8903
310k
        NEXT;
8904
310k
  p = l;
8905
310k
  l = xmlParseNCName(ctxt);
8906
310k
  if (l == NULL) {
8907
12.8k
      xmlChar *tmp;
8908
8909
12.8k
            if (ctxt->instate == XML_PARSER_EOF)
8910
0
                return(NULL);
8911
12.8k
            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8912
12.8k
               "Failed to parse QName '%s:'\n", p, NULL, NULL);
8913
12.8k
      l = xmlParseNmtoken(ctxt);
8914
12.8k
      if (l == NULL) {
8915
8.06k
                if (ctxt->instate == XML_PARSER_EOF)
8916
0
                    return(NULL);
8917
8.06k
    tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8918
8.06k
            } else {
8919
4.82k
    tmp = xmlBuildQName(l, p, NULL, 0);
8920
4.82k
    xmlFree((char *)l);
8921
4.82k
      }
8922
12.8k
      p = xmlDictLookup(ctxt->dict, tmp, -1);
8923
12.8k
      if (tmp != NULL) xmlFree(tmp);
8924
12.8k
      *prefix = NULL;
8925
12.8k
      return(p);
8926
12.8k
  }
8927
297k
  if (CUR == ':') {
8928
6.83k
      xmlChar *tmp;
8929
8930
6.83k
            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8931
6.83k
               "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8932
6.83k
      NEXT;
8933
6.83k
      tmp = (xmlChar *) xmlParseName(ctxt);
8934
6.83k
      if (tmp != NULL) {
8935
3.03k
          tmp = xmlBuildQName(tmp, l, NULL, 0);
8936
3.03k
    l = xmlDictLookup(ctxt->dict, tmp, -1);
8937
3.03k
    if (tmp != NULL) xmlFree(tmp);
8938
3.03k
    *prefix = p;
8939
3.03k
    return(l);
8940
3.03k
      }
8941
3.80k
            if (ctxt->instate == XML_PARSER_EOF)
8942
0
                return(NULL);
8943
3.80k
      tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8944
3.80k
      l = xmlDictLookup(ctxt->dict, tmp, -1);
8945
3.80k
      if (tmp != NULL) xmlFree(tmp);
8946
3.80k
      *prefix = p;
8947
3.80k
      return(l);
8948
3.80k
  }
8949
290k
  *prefix = p;
8950
290k
    } else
8951
5.63M
        *prefix = NULL;
8952
5.92M
    return(l);
8953
5.94M
}
8954
8955
/**
8956
 * xmlParseQNameAndCompare:
8957
 * @ctxt:  an XML parser context
8958
 * @name:  the localname
8959
 * @prefix:  the prefix, if any.
8960
 *
8961
 * parse an XML name and compares for match
8962
 * (specialized for endtag parsing)
8963
 *
8964
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8965
 * and the name for mismatch
8966
 */
8967
8968
static const xmlChar *
8969
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8970
112k
                        xmlChar const *prefix) {
8971
112k
    const xmlChar *cmp;
8972
112k
    const xmlChar *in;
8973
112k
    const xmlChar *ret;
8974
112k
    const xmlChar *prefix2;
8975
8976
112k
    if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8977
8978
112k
    GROW;
8979
112k
    in = ctxt->input->cur;
8980
8981
112k
    cmp = prefix;
8982
304k
    while (*in != 0 && *in == *cmp) {
8983
191k
  ++in;
8984
191k
  ++cmp;
8985
191k
    }
8986
112k
    if ((*cmp == 0) && (*in == ':')) {
8987
98.5k
        in++;
8988
98.5k
  cmp = name;
8989
569k
  while (*in != 0 && *in == *cmp) {
8990
471k
      ++in;
8991
471k
      ++cmp;
8992
471k
  }
8993
98.5k
  if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8994
      /* success */
8995
57.6k
            ctxt->input->col += in - ctxt->input->cur;
8996
57.6k
      ctxt->input->cur = in;
8997
57.6k
      return((const xmlChar*) 1);
8998
57.6k
  }
8999
98.5k
    }
9000
    /*
9001
     * all strings coms from the dictionary, equality can be done directly
9002
     */
9003
55.1k
    ret = xmlParseQName (ctxt, &prefix2);
9004
55.1k
    if ((ret == name) && (prefix == prefix2))
9005
247
  return((const xmlChar*) 1);
9006
54.8k
    return ret;
9007
55.1k
}
9008
9009
/**
9010
 * xmlParseAttValueInternal:
9011
 * @ctxt:  an XML parser context
9012
 * @len:  attribute len result
9013
 * @alloc:  whether the attribute was reallocated as a new string
9014
 * @normalize:  if 1 then further non-CDATA normalization must be done
9015
 *
9016
 * parse a value for an attribute.
9017
 * NOTE: if no normalization is needed, the routine will return pointers
9018
 *       directly from the data buffer.
9019
 *
9020
 * 3.3.3 Attribute-Value Normalization:
9021
 * Before the value of an attribute is passed to the application or
9022
 * checked for validity, the XML processor must normalize it as follows:
9023
 * - a character reference is processed by appending the referenced
9024
 *   character to the attribute value
9025
 * - an entity reference is processed by recursively processing the
9026
 *   replacement text of the entity
9027
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
9028
 *   appending #x20 to the normalized value, except that only a single
9029
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
9030
 *   parsed entity or the literal entity value of an internal parsed entity
9031
 * - other characters are processed by appending them to the normalized value
9032
 * If the declared value is not CDATA, then the XML processor must further
9033
 * process the normalized attribute value by discarding any leading and
9034
 * trailing space (#x20) characters, and by replacing sequences of space
9035
 * (#x20) characters by a single space (#x20) character.
9036
 * All attributes for which no declaration has been read should be treated
9037
 * by a non-validating parser as if declared CDATA.
9038
 *
9039
 * Returns the AttValue parsed or NULL. The value has to be freed by the
9040
 *     caller if it was copied, this can be detected by val[*len] == 0.
9041
 */
9042
9043
#define GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) \
9044
3.36k
    const xmlChar *oldbase = ctxt->input->base;\
9045
3.36k
    GROW;\
9046
3.36k
    if (ctxt->instate == XML_PARSER_EOF)\
9047
3.36k
        return(NULL);\
9048
3.36k
    if (oldbase != ctxt->input->base) {\
9049
0
        ptrdiff_t delta = ctxt->input->base - oldbase;\
9050
0
        start = start + delta;\
9051
0
        in = in + delta;\
9052
0
    }\
9053
3.36k
    end = ctxt->input->end;
9054
9055
static xmlChar *
9056
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
9057
                         int normalize)
9058
3.94M
{
9059
3.94M
    xmlChar limit = 0;
9060
3.94M
    const xmlChar *in = NULL, *start, *end, *last;
9061
3.94M
    xmlChar *ret = NULL;
9062
3.94M
    int line, col;
9063
3.94M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9064
320k
                    XML_MAX_HUGE_LENGTH :
9065
3.94M
                    XML_MAX_TEXT_LENGTH;
9066
9067
3.94M
    GROW;
9068
3.94M
    in = (xmlChar *) CUR_PTR;
9069
3.94M
    line = ctxt->input->line;
9070
3.94M
    col = ctxt->input->col;
9071
3.94M
    if (*in != '"' && *in != '\'') {
9072
7.64k
        xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
9073
7.64k
        return (NULL);
9074
7.64k
    }
9075
3.93M
    ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
9076
9077
    /*
9078
     * try to handle in this routine the most common case where no
9079
     * allocation of a new string is required and where content is
9080
     * pure ASCII.
9081
     */
9082
3.93M
    limit = *in++;
9083
3.93M
    col++;
9084
3.93M
    end = ctxt->input->end;
9085
3.93M
    start = in;
9086
3.93M
    if (in >= end) {
9087
233
        GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9088
233
    }
9089
3.93M
    if (normalize) {
9090
        /*
9091
   * Skip any leading spaces
9092
   */
9093
130k
  while ((in < end) && (*in != limit) &&
9094
130k
         ((*in == 0x20) || (*in == 0x9) ||
9095
130k
          (*in == 0xA) || (*in == 0xD))) {
9096
35.6k
      if (*in == 0xA) {
9097
28.0k
          line++; col = 1;
9098
28.0k
      } else {
9099
7.65k
          col++;
9100
7.65k
      }
9101
35.6k
      in++;
9102
35.6k
      start = in;
9103
35.6k
      if (in >= end) {
9104
5
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9105
5
                if ((in - start) > maxLength) {
9106
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9107
0
                                   "AttValue length too long\n");
9108
0
                    return(NULL);
9109
0
                }
9110
5
      }
9111
35.6k
  }
9112
1.20M
  while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9113
1.20M
         (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9114
1.11M
      col++;
9115
1.11M
      if ((*in++ == 0x20) && (*in == 0x20)) break;
9116
1.11M
      if (in >= end) {
9117
64
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9118
64
                if ((in - start) > maxLength) {
9119
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9120
0
                                   "AttValue length too long\n");
9121
0
                    return(NULL);
9122
0
                }
9123
64
      }
9124
1.11M
  }
9125
95.0k
  last = in;
9126
  /*
9127
   * skip the trailing blanks
9128
   */
9129
96.5k
  while ((last[-1] == 0x20) && (last > start)) last--;
9130
109k
  while ((in < end) && (*in != limit) &&
9131
109k
         ((*in == 0x20) || (*in == 0x9) ||
9132
22.7k
          (*in == 0xA) || (*in == 0xD))) {
9133
14.9k
      if (*in == 0xA) {
9134
8.13k
          line++, col = 1;
9135
8.13k
      } else {
9136
6.78k
          col++;
9137
6.78k
      }
9138
14.9k
      in++;
9139
14.9k
      if (in >= end) {
9140
61
    const xmlChar *oldbase = ctxt->input->base;
9141
61
    GROW;
9142
61
                if (ctxt->instate == XML_PARSER_EOF)
9143
0
                    return(NULL);
9144
61
    if (oldbase != ctxt->input->base) {
9145
0
        ptrdiff_t delta = ctxt->input->base - oldbase;
9146
0
        start = start + delta;
9147
0
        in = in + delta;
9148
0
        last = last + delta;
9149
0
    }
9150
61
    end = ctxt->input->end;
9151
61
                if ((in - start) > maxLength) {
9152
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9153
0
                                   "AttValue length too long\n");
9154
0
                    return(NULL);
9155
0
                }
9156
61
      }
9157
14.9k
  }
9158
95.0k
        if ((in - start) > maxLength) {
9159
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9160
0
                           "AttValue length too long\n");
9161
0
            return(NULL);
9162
0
        }
9163
95.0k
  if (*in != limit) goto need_complex;
9164
3.84M
    } else {
9165
47.2M
  while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9166
47.2M
         (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9167
43.4M
      in++;
9168
43.4M
      col++;
9169
43.4M
      if (in >= end) {
9170
3.06k
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9171
3.06k
                if ((in - start) > maxLength) {
9172
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9173
0
                                   "AttValue length too long\n");
9174
0
                    return(NULL);
9175
0
                }
9176
3.06k
      }
9177
43.4M
  }
9178
3.84M
  last = in;
9179
3.84M
        if ((in - start) > maxLength) {
9180
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9181
0
                           "AttValue length too long\n");
9182
0
            return(NULL);
9183
0
        }
9184
3.84M
  if (*in != limit) goto need_complex;
9185
3.84M
    }
9186
3.85M
    in++;
9187
3.85M
    col++;
9188
3.85M
    if (len != NULL) {
9189
3.35M
        if (alloc) *alloc = 0;
9190
3.35M
        *len = last - start;
9191
3.35M
        ret = (xmlChar *) start;
9192
3.35M
    } else {
9193
501k
        if (alloc) *alloc = 1;
9194
501k
        ret = xmlStrndup(start, last - start);
9195
501k
    }
9196
3.85M
    CUR_PTR = in;
9197
3.85M
    ctxt->input->line = line;
9198
3.85M
    ctxt->input->col = col;
9199
3.85M
    return ret;
9200
84.6k
need_complex:
9201
84.6k
    if (alloc) *alloc = 1;
9202
84.6k
    return xmlParseAttValueComplex(ctxt, len, normalize);
9203
3.93M
}
9204
9205
/**
9206
 * xmlParseAttribute2:
9207
 * @ctxt:  an XML parser context
9208
 * @pref:  the element prefix
9209
 * @elem:  the element name
9210
 * @prefix:  a xmlChar ** used to store the value of the attribute prefix
9211
 * @value:  a xmlChar ** used to store the value of the attribute
9212
 * @len:  an int * to save the length of the attribute
9213
 * @alloc:  an int * to indicate if the attribute was allocated
9214
 *
9215
 * parse an attribute in the new SAX2 framework.
9216
 *
9217
 * Returns the attribute name, and the value in *value, .
9218
 */
9219
9220
static const xmlChar *
9221
xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9222
                   const xmlChar * pref, const xmlChar * elem,
9223
                   const xmlChar ** prefix, xmlChar ** value,
9224
                   int *len, int *alloc)
9225
3.47M
{
9226
3.47M
    const xmlChar *name;
9227
3.47M
    xmlChar *val, *internal_val = NULL;
9228
3.47M
    int normalize = 0;
9229
9230
3.47M
    *value = NULL;
9231
3.47M
    GROW;
9232
3.47M
    name = xmlParseQName(ctxt, prefix);
9233
3.47M
    if (name == NULL) {
9234
52.1k
        xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9235
52.1k
                       "error parsing attribute name\n");
9236
52.1k
        return (NULL);
9237
52.1k
    }
9238
9239
    /*
9240
     * get the type if needed
9241
     */
9242
3.42M
    if (ctxt->attsSpecial != NULL) {
9243
146k
        int type;
9244
9245
146k
        type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
9246
146k
                                                 pref, elem, *prefix, name);
9247
146k
        if (type != 0)
9248
95.1k
            normalize = 1;
9249
146k
    }
9250
9251
    /*
9252
     * read the value
9253
     */
9254
3.42M
    SKIP_BLANKS;
9255
3.42M
    if (RAW == '=') {
9256
3.41M
        NEXT;
9257
3.41M
        SKIP_BLANKS;
9258
3.41M
        val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9259
3.41M
        if (val == NULL)
9260
4.67k
            return (NULL);
9261
3.40M
  if (normalize) {
9262
      /*
9263
       * Sometimes a second normalisation pass for spaces is needed
9264
       * but that only happens if charrefs or entities references
9265
       * have been used in the attribute value, i.e. the attribute
9266
       * value have been extracted in an allocated string already.
9267
       */
9268
95.0k
      if (*alloc) {
9269
8.02k
          const xmlChar *val2;
9270
9271
8.02k
          val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9272
8.02k
    if ((val2 != NULL) && (val2 != val)) {
9273
1.55k
        xmlFree(val);
9274
1.55k
        val = (xmlChar *) val2;
9275
1.55k
    }
9276
8.02k
      }
9277
95.0k
  }
9278
3.40M
        ctxt->instate = XML_PARSER_CONTENT;
9279
3.40M
    } else {
9280
16.9k
        xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9281
16.9k
                          "Specification mandates value for attribute %s\n",
9282
16.9k
                          name);
9283
16.9k
        return (name);
9284
16.9k
    }
9285
9286
3.40M
    if (*prefix == ctxt->str_xml) {
9287
        /*
9288
         * Check that xml:lang conforms to the specification
9289
         * No more registered as an error, just generate a warning now
9290
         * since this was deprecated in XML second edition
9291
         */
9292
8.93k
        if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9293
1.69k
            internal_val = xmlStrndup(val, *len);
9294
1.69k
            if (!xmlCheckLanguageID(internal_val)) {
9295
1.10k
                xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9296
1.10k
                              "Malformed value for xml:lang : %s\n",
9297
1.10k
                              internal_val, NULL);
9298
1.10k
            }
9299
1.69k
        }
9300
9301
        /*
9302
         * Check that xml:space conforms to the specification
9303
         */
9304
8.93k
        if (xmlStrEqual(name, BAD_CAST "space")) {
9305
228
            internal_val = xmlStrndup(val, *len);
9306
228
            if (xmlStrEqual(internal_val, BAD_CAST "default"))
9307
0
                *(ctxt->space) = 0;
9308
228
            else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9309
100
                *(ctxt->space) = 1;
9310
128
            else {
9311
128
                xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9312
128
                              "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9313
128
                              internal_val, NULL);
9314
128
            }
9315
228
        }
9316
8.93k
        if (internal_val) {
9317
1.92k
            xmlFree(internal_val);
9318
1.92k
        }
9319
8.93k
    }
9320
9321
3.40M
    *value = val;
9322
3.40M
    return (name);
9323
3.42M
}
9324
/**
9325
 * xmlParseStartTag2:
9326
 * @ctxt:  an XML parser context
9327
 *
9328
 * Parse a start tag. Always consumes '<'.
9329
 *
9330
 * This routine is called when running SAX2 parsing
9331
 *
9332
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9333
 *
9334
 * [ WFC: Unique Att Spec ]
9335
 * No attribute name may appear more than once in the same start-tag or
9336
 * empty-element tag.
9337
 *
9338
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9339
 *
9340
 * [ WFC: Unique Att Spec ]
9341
 * No attribute name may appear more than once in the same start-tag or
9342
 * empty-element tag.
9343
 *
9344
 * With namespace:
9345
 *
9346
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9347
 *
9348
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9349
 *
9350
 * Returns the element name parsed
9351
 */
9352
9353
static const xmlChar *
9354
xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9355
2.49M
                  const xmlChar **URI, int *tlen) {
9356
2.49M
    const xmlChar *localname;
9357
2.49M
    const xmlChar *prefix;
9358
2.49M
    const xmlChar *attname;
9359
2.49M
    const xmlChar *aprefix;
9360
2.49M
    const xmlChar *nsname;
9361
2.49M
    xmlChar *attvalue;
9362
2.49M
    const xmlChar **atts = ctxt->atts;
9363
2.49M
    int maxatts = ctxt->maxatts;
9364
2.49M
    int nratts, nbatts, nbdef, inputid;
9365
2.49M
    int i, j, nbNs, attval;
9366
2.49M
    unsigned long cur;
9367
2.49M
    int nsNr = ctxt->nsNr;
9368
9369
2.49M
    if (RAW != '<') return(NULL);
9370
2.49M
    NEXT1;
9371
9372
    /*
9373
     * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9374
     *       point since the attribute values may be stored as pointers to
9375
     *       the buffer and calling SHRINK would destroy them !
9376
     *       The Shrinking is only possible once the full set of attribute
9377
     *       callbacks have been done.
9378
     */
9379
2.49M
    SHRINK;
9380
2.49M
    cur = ctxt->input->cur - ctxt->input->base;
9381
2.49M
    inputid = ctxt->input->id;
9382
2.49M
    nbatts = 0;
9383
2.49M
    nratts = 0;
9384
2.49M
    nbdef = 0;
9385
2.49M
    nbNs = 0;
9386
2.49M
    attval = 0;
9387
    /* Forget any namespaces added during an earlier parse of this element. */
9388
2.49M
    ctxt->nsNr = nsNr;
9389
9390
2.49M
    localname = xmlParseQName(ctxt, &prefix);
9391
2.49M
    if (localname == NULL) {
9392
34.4k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9393
34.4k
           "StartTag: invalid element name\n");
9394
34.4k
        return(NULL);
9395
34.4k
    }
9396
2.46M
    *tlen = ctxt->input->cur - ctxt->input->base - cur;
9397
9398
    /*
9399
     * Now parse the attributes, it ends up with the ending
9400
     *
9401
     * (S Attribute)* S?
9402
     */
9403
2.46M
    SKIP_BLANKS;
9404
2.46M
    GROW;
9405
9406
4.02M
    while (((RAW != '>') &&
9407
4.02M
     ((RAW != '/') || (NXT(1) != '>')) &&
9408
4.02M
     (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9409
3.47M
  int len = -1, alloc = 0;
9410
9411
3.47M
  attname = xmlParseAttribute2(ctxt, prefix, localname,
9412
3.47M
                               &aprefix, &attvalue, &len, &alloc);
9413
3.47M
        if (attname == NULL) {
9414
56.8k
      xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9415
56.8k
           "xmlParseStartTag: problem parsing attributes\n");
9416
56.8k
      break;
9417
56.8k
  }
9418
3.42M
        if (attvalue == NULL)
9419
16.9k
            goto next_attr;
9420
3.40M
  if (len < 0) len = xmlStrlen(attvalue);
9421
9422
3.40M
        if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9423
10.1k
            const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9424
10.1k
            xmlURIPtr uri;
9425
9426
10.1k
            if (URL == NULL) {
9427
0
                xmlErrMemory(ctxt, "dictionary allocation failure");
9428
0
                if ((attvalue != NULL) && (alloc != 0))
9429
0
                    xmlFree(attvalue);
9430
0
                localname = NULL;
9431
0
                goto done;
9432
0
            }
9433
10.1k
            if (*URL != 0) {
9434
10.0k
                uri = xmlParseURI((const char *) URL);
9435
10.0k
                if (uri == NULL) {
9436
3.22k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9437
3.22k
                             "xmlns: '%s' is not a valid URI\n",
9438
3.22k
                                       URL, NULL, NULL);
9439
6.80k
                } else {
9440
6.80k
                    if (uri->scheme == NULL) {
9441
1.46k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9442
1.46k
                                  "xmlns: URI %s is not absolute\n",
9443
1.46k
                                  URL, NULL, NULL);
9444
1.46k
                    }
9445
6.80k
                    xmlFreeURI(uri);
9446
6.80k
                }
9447
10.0k
                if (URL == ctxt->str_xml_ns) {
9448
0
                    if (attname != ctxt->str_xml) {
9449
0
                        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9450
0
                     "xml namespace URI cannot be the default namespace\n",
9451
0
                                 NULL, NULL, NULL);
9452
0
                    }
9453
0
                    goto next_attr;
9454
0
                }
9455
10.0k
                if ((len == 29) &&
9456
10.0k
                    (xmlStrEqual(URL,
9457
112
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9458
0
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9459
0
                         "reuse of the xmlns namespace name is forbidden\n",
9460
0
                             NULL, NULL, NULL);
9461
0
                    goto next_attr;
9462
0
                }
9463
10.0k
            }
9464
            /*
9465
             * check that it's not a defined namespace
9466
             */
9467
11.3k
            for (j = 1;j <= nbNs;j++)
9468
1.49k
                if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9469
371
                    break;
9470
10.1k
            if (j <= nbNs)
9471
371
                xmlErrAttributeDup(ctxt, NULL, attname);
9472
9.82k
            else
9473
9.82k
                if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9474
9475
3.39M
        } else if (aprefix == ctxt->str_xmlns) {
9476
17.5k
            const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9477
17.5k
            xmlURIPtr uri;
9478
9479
17.5k
            if (attname == ctxt->str_xml) {
9480
104
                if (URL != ctxt->str_xml_ns) {
9481
104
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9482
104
                             "xml namespace prefix mapped to wrong URI\n",
9483
104
                             NULL, NULL, NULL);
9484
104
                }
9485
                /*
9486
                 * Do not keep a namespace definition node
9487
                 */
9488
104
                goto next_attr;
9489
104
            }
9490
17.4k
            if (URL == ctxt->str_xml_ns) {
9491
0
                if (attname != ctxt->str_xml) {
9492
0
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9493
0
                             "xml namespace URI mapped to wrong prefix\n",
9494
0
                             NULL, NULL, NULL);
9495
0
                }
9496
0
                goto next_attr;
9497
0
            }
9498
17.4k
            if (attname == ctxt->str_xmlns) {
9499
0
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9500
0
                         "redefinition of the xmlns prefix is forbidden\n",
9501
0
                         NULL, NULL, NULL);
9502
0
                goto next_attr;
9503
0
            }
9504
17.4k
            if ((len == 29) &&
9505
17.4k
                (xmlStrEqual(URL,
9506
693
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9507
0
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9508
0
                         "reuse of the xmlns namespace name is forbidden\n",
9509
0
                         NULL, NULL, NULL);
9510
0
                goto next_attr;
9511
0
            }
9512
17.4k
            if ((URL == NULL) || (URL[0] == 0)) {
9513
310
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9514
310
                         "xmlns:%s: Empty XML namespace is not allowed\n",
9515
310
                              attname, NULL, NULL);
9516
310
                goto next_attr;
9517
17.1k
            } else {
9518
17.1k
                uri = xmlParseURI((const char *) URL);
9519
17.1k
                if (uri == NULL) {
9520
2.31k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9521
2.31k
                         "xmlns:%s: '%s' is not a valid URI\n",
9522
2.31k
                                       attname, URL, NULL);
9523
14.8k
                } else {
9524
14.8k
                    if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9525
469
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9526
469
                                  "xmlns:%s: URI %s is not absolute\n",
9527
469
                                  attname, URL, NULL);
9528
469
                    }
9529
14.8k
                    xmlFreeURI(uri);
9530
14.8k
                }
9531
17.1k
            }
9532
9533
            /*
9534
             * check that it's not a defined namespace
9535
             */
9536
24.2k
            for (j = 1;j <= nbNs;j++)
9537
7.68k
                if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9538
606
                    break;
9539
17.1k
            if (j <= nbNs)
9540
606
                xmlErrAttributeDup(ctxt, aprefix, attname);
9541
16.5k
            else
9542
16.5k
                if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9543
9544
3.37M
        } else {
9545
            /*
9546
             * Add the pair to atts
9547
             */
9548
3.37M
            if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9549
30.5k
                if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9550
0
                    goto next_attr;
9551
0
                }
9552
30.5k
                maxatts = ctxt->maxatts;
9553
30.5k
                atts = ctxt->atts;
9554
30.5k
            }
9555
3.37M
            ctxt->attallocs[nratts++] = alloc;
9556
3.37M
            atts[nbatts++] = attname;
9557
3.37M
            atts[nbatts++] = aprefix;
9558
            /*
9559
             * The namespace URI field is used temporarily to point at the
9560
             * base of the current input buffer for non-alloced attributes.
9561
             * When the input buffer is reallocated, all the pointers become
9562
             * invalid, but they can be reconstructed later.
9563
             */
9564
3.37M
            if (alloc)
9565
45.9k
                atts[nbatts++] = NULL;
9566
3.33M
            else
9567
3.33M
                atts[nbatts++] = ctxt->input->base;
9568
3.37M
            atts[nbatts++] = attvalue;
9569
3.37M
            attvalue += len;
9570
3.37M
            atts[nbatts++] = attvalue;
9571
            /*
9572
             * tag if some deallocation is needed
9573
             */
9574
3.37M
            if (alloc != 0) attval = 1;
9575
3.37M
            attvalue = NULL; /* moved into atts */
9576
3.37M
        }
9577
9578
3.42M
next_attr:
9579
3.42M
        if ((attvalue != NULL) && (alloc != 0)) {
9580
8.12k
            xmlFree(attvalue);
9581
8.12k
            attvalue = NULL;
9582
8.12k
        }
9583
9584
3.42M
  GROW
9585
3.42M
        if (ctxt->instate == XML_PARSER_EOF)
9586
0
            break;
9587
3.42M
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9588
1.81M
      break;
9589
1.60M
  if (SKIP_BLANKS == 0) {
9590
41.6k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9591
41.6k
         "attributes construct error\n");
9592
41.6k
      break;
9593
41.6k
  }
9594
1.56M
        GROW;
9595
1.56M
    }
9596
9597
2.46M
    if (ctxt->input->id != inputid) {
9598
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9599
0
                    "Unexpected change of input\n");
9600
0
        localname = NULL;
9601
0
        goto done;
9602
0
    }
9603
9604
    /* Reconstruct attribute value pointers. */
9605
5.84M
    for (i = 0, j = 0; j < nratts; i += 5, j++) {
9606
3.37M
        if (atts[i+2] != NULL) {
9607
            /*
9608
             * Arithmetic on dangling pointers is technically undefined
9609
             * behavior, but well...
9610
             */
9611
3.33M
            const xmlChar *old = atts[i+2];
9612
3.33M
            atts[i+2]  = NULL;    /* Reset repurposed namespace URI */
9613
3.33M
            atts[i+3] = ctxt->input->base + (atts[i+3] - old);  /* value */
9614
3.33M
            atts[i+4] = ctxt->input->base + (atts[i+4] - old);  /* valuend */
9615
3.33M
        }
9616
3.37M
    }
9617
9618
    /*
9619
     * The attributes defaulting
9620
     */
9621
2.46M
    if (ctxt->attsDefault != NULL) {
9622
246k
        xmlDefAttrsPtr defaults;
9623
9624
246k
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9625
246k
  if (defaults != NULL) {
9626
67.4k
      for (i = 0;i < defaults->nbAttrs;i++) {
9627
46.5k
          attname = defaults->values[5 * i];
9628
46.5k
    aprefix = defaults->values[5 * i + 1];
9629
9630
                /*
9631
     * special work for namespaces defaulted defs
9632
     */
9633
46.5k
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9634
        /*
9635
         * check that it's not a defined namespace
9636
         */
9637
1.65k
        for (j = 1;j <= nbNs;j++)
9638
383
            if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9639
208
          break;
9640
1.47k
              if (j <= nbNs) continue;
9641
9642
1.26k
        nsname = xmlGetNamespace(ctxt, NULL);
9643
1.26k
        if (nsname != defaults->values[5 * i + 2]) {
9644
1.26k
      if (nsPush(ctxt, NULL,
9645
1.26k
                 defaults->values[5 * i + 2]) > 0)
9646
1.26k
          nbNs++;
9647
1.26k
        }
9648
45.0k
    } else if (aprefix == ctxt->str_xmlns) {
9649
        /*
9650
         * check that it's not a defined namespace
9651
         */
9652
1.56k
        for (j = 1;j <= nbNs;j++)
9653
414
            if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9654
281
          break;
9655
1.43k
              if (j <= nbNs) continue;
9656
9657
1.15k
        nsname = xmlGetNamespace(ctxt, attname);
9658
1.15k
        if (nsname != defaults->values[5 * i + 2]) {
9659
986
      if (nsPush(ctxt, attname,
9660
986
                 defaults->values[5 * i + 2]) > 0)
9661
986
          nbNs++;
9662
986
        }
9663
43.6k
    } else {
9664
        /*
9665
         * check that it's not a defined attribute
9666
         */
9667
121k
        for (j = 0;j < nbatts;j+=5) {
9668
78.6k
      if ((attname == atts[j]) && (aprefix == atts[j+1]))
9669
783
          break;
9670
78.6k
        }
9671
43.6k
        if (j < nbatts) continue;
9672
9673
42.8k
        if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9674
1.29k
      if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9675
0
                            localname = NULL;
9676
0
                            goto done;
9677
0
      }
9678
1.29k
      maxatts = ctxt->maxatts;
9679
1.29k
      atts = ctxt->atts;
9680
1.29k
        }
9681
42.8k
        atts[nbatts++] = attname;
9682
42.8k
        atts[nbatts++] = aprefix;
9683
42.8k
        if (aprefix == NULL)
9684
35.1k
      atts[nbatts++] = NULL;
9685
7.71k
        else
9686
7.71k
            atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9687
42.8k
        atts[nbatts++] = defaults->values[5 * i + 2];
9688
42.8k
        atts[nbatts++] = defaults->values[5 * i + 3];
9689
42.8k
        if ((ctxt->standalone == 1) &&
9690
42.8k
            (defaults->values[5 * i + 4] != NULL)) {
9691
0
      xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9692
0
    "standalone: attribute %s on %s defaulted from external subset\n",
9693
0
                                   attname, localname);
9694
0
        }
9695
42.8k
        nbdef++;
9696
42.8k
    }
9697
46.5k
      }
9698
20.9k
  }
9699
246k
    }
9700
9701
    /*
9702
     * The attributes checkings
9703
     */
9704
5.88M
    for (i = 0; i < nbatts;i += 5) {
9705
        /*
9706
  * The default namespace does not apply to attribute names.
9707
  */
9708
3.42M
  if (atts[i + 1] != NULL) {
9709
51.0k
      nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9710
51.0k
      if (nsname == NULL) {
9711
26.4k
    xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9712
26.4k
        "Namespace prefix %s for %s on %s is not defined\n",
9713
26.4k
        atts[i + 1], atts[i], localname);
9714
26.4k
      }
9715
51.0k
      atts[i + 2] = nsname;
9716
51.0k
  } else
9717
3.36M
      nsname = NULL;
9718
  /*
9719
   * [ WFC: Unique Att Spec ]
9720
   * No attribute name may appear more than once in the same
9721
   * start-tag or empty-element tag.
9722
   * As extended by the Namespace in XML REC.
9723
   */
9724
5.06M
        for (j = 0; j < i;j += 5) {
9725
1.64M
      if (atts[i] == atts[j]) {
9726
2.62k
          if (atts[i+1] == atts[j+1]) {
9727
531
        xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9728
531
        break;
9729
531
    }
9730
2.09k
    if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9731
117
        xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9732
117
           "Namespaced Attribute %s in '%s' redefined\n",
9733
117
           atts[i], nsname, NULL);
9734
117
        break;
9735
117
    }
9736
2.09k
      }
9737
1.64M
  }
9738
3.42M
    }
9739
9740
2.46M
    nsname = xmlGetNamespace(ctxt, prefix);
9741
2.46M
    if ((prefix != NULL) && (nsname == NULL)) {
9742
134k
  xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9743
134k
           "Namespace prefix %s on %s is not defined\n",
9744
134k
     prefix, localname, NULL);
9745
134k
    }
9746
2.46M
    *pref = prefix;
9747
2.46M
    *URI = nsname;
9748
9749
    /*
9750
     * SAX: Start of Element !
9751
     */
9752
2.46M
    if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9753
2.46M
  (!ctxt->disableSAX)) {
9754
2.18M
  if (nbNs > 0)
9755
17.1k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9756
17.1k
        nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9757
17.1k
        nbatts / 5, nbdef, atts);
9758
2.16M
  else
9759
2.16M
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9760
2.16M
                    nsname, 0, NULL, nbatts / 5, nbdef, atts);
9761
2.18M
    }
9762
9763
2.46M
done:
9764
    /*
9765
     * Free up attribute allocated strings if needed
9766
     */
9767
2.46M
    if (attval != 0) {
9768
97.9k
  for (i = 3,j = 0; j < nratts;i += 5,j++)
9769
54.1k
      if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9770
45.9k
          xmlFree((xmlChar *) atts[i]);
9771
43.7k
    }
9772
9773
2.46M
    return(localname);
9774
2.46M
}
9775
9776
/**
9777
 * xmlParseEndTag2:
9778
 * @ctxt:  an XML parser context
9779
 * @line:  line of the start tag
9780
 * @nsNr:  number of namespaces on the start tag
9781
 *
9782
 * Parse an end tag. Always consumes '</'.
9783
 *
9784
 * [42] ETag ::= '</' Name S? '>'
9785
 *
9786
 * With namespace
9787
 *
9788
 * [NS 9] ETag ::= '</' QName S? '>'
9789
 */
9790
9791
static void
9792
672k
xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9793
672k
    const xmlChar *name;
9794
9795
672k
    GROW;
9796
672k
    if ((RAW != '<') || (NXT(1) != '/')) {
9797
0
  xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9798
0
  return;
9799
0
    }
9800
672k
    SKIP(2);
9801
9802
672k
    if (tag->prefix == NULL)
9803
559k
        name = xmlParseNameAndCompare(ctxt, ctxt->name);
9804
112k
    else
9805
112k
        name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9806
9807
    /*
9808
     * We should definitely be at the ending "S? '>'" part
9809
     */
9810
672k
    GROW;
9811
672k
    if (ctxt->instate == XML_PARSER_EOF)
9812
0
        return;
9813
672k
    SKIP_BLANKS;
9814
672k
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9815
49.7k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9816
49.7k
    } else
9817
623k
  NEXT1;
9818
9819
    /*
9820
     * [ WFC: Element Type Match ]
9821
     * The Name in an element's end-tag must match the element type in the
9822
     * start-tag.
9823
     *
9824
     */
9825
672k
    if (name != (xmlChar*)1) {
9826
93.4k
        if (name == NULL) name = BAD_CAST "unparsable";
9827
93.4k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9828
93.4k
         "Opening and ending tag mismatch: %s line %d and %s\n",
9829
93.4k
                    ctxt->name, tag->line, name);
9830
93.4k
    }
9831
9832
    /*
9833
     * SAX: End of Tag
9834
     */
9835
672k
    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9836
672k
  (!ctxt->disableSAX))
9837
586k
  ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9838
586k
                                tag->URI);
9839
9840
672k
    spacePop(ctxt);
9841
672k
    if (tag->nsNr != 0)
9842
4.64k
  nsPop(ctxt, tag->nsNr);
9843
672k
}
9844
9845
/**
9846
 * xmlParseCDSect:
9847
 * @ctxt:  an XML parser context
9848
 *
9849
 * DEPRECATED: Internal function, don't use.
9850
 *
9851
 * Parse escaped pure raw content. Always consumes '<!['.
9852
 *
9853
 * [18] CDSect ::= CDStart CData CDEnd
9854
 *
9855
 * [19] CDStart ::= '<![CDATA['
9856
 *
9857
 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9858
 *
9859
 * [21] CDEnd ::= ']]>'
9860
 */
9861
void
9862
3.79k
xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9863
3.79k
    xmlChar *buf = NULL;
9864
3.79k
    int len = 0;
9865
3.79k
    int size = XML_PARSER_BUFFER_SIZE;
9866
3.79k
    int r, rl;
9867
3.79k
    int s, sl;
9868
3.79k
    int cur, l;
9869
3.79k
    int count = 0;
9870
3.79k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9871
323
                    XML_MAX_HUGE_LENGTH :
9872
3.79k
                    XML_MAX_TEXT_LENGTH;
9873
9874
3.79k
    if ((CUR != '<') || (NXT(1) != '!') || (NXT(2) != '['))
9875
0
        return;
9876
3.79k
    SKIP(3);
9877
9878
3.79k
    if (!CMP6(CUR_PTR, 'C', 'D', 'A', 'T', 'A', '['))
9879
0
        return;
9880
3.79k
    SKIP(6);
9881
9882
3.79k
    ctxt->instate = XML_PARSER_CDATA_SECTION;
9883
3.79k
    r = CUR_CHAR(rl);
9884
3.79k
    if (!IS_CHAR(r)) {
9885
51
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9886
51
        goto out;
9887
51
    }
9888
3.74k
    NEXTL(rl);
9889
3.74k
    s = CUR_CHAR(sl);
9890
3.74k
    if (!IS_CHAR(s)) {
9891
87
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9892
87
        goto out;
9893
87
    }
9894
3.65k
    NEXTL(sl);
9895
3.65k
    cur = CUR_CHAR(l);
9896
3.65k
    buf = (xmlChar *) xmlMallocAtomic(size);
9897
3.65k
    if (buf == NULL) {
9898
0
  xmlErrMemory(ctxt, NULL);
9899
0
        goto out;
9900
0
    }
9901
1.43M
    while (IS_CHAR(cur) &&
9902
1.43M
           ((r != ']') || (s != ']') || (cur != '>'))) {
9903
1.43M
  if (len + 5 >= size) {
9904
4.06k
      xmlChar *tmp;
9905
9906
4.06k
      tmp = (xmlChar *) xmlRealloc(buf, size * 2);
9907
4.06k
      if (tmp == NULL) {
9908
0
    xmlErrMemory(ctxt, NULL);
9909
0
                goto out;
9910
0
      }
9911
4.06k
      buf = tmp;
9912
4.06k
      size *= 2;
9913
4.06k
  }
9914
1.43M
  COPY_BUF(rl,buf,len,r);
9915
1.43M
  r = s;
9916
1.43M
  rl = sl;
9917
1.43M
  s = cur;
9918
1.43M
  sl = l;
9919
1.43M
  count++;
9920
1.43M
  if (count > 50) {
9921
26.4k
      SHRINK;
9922
26.4k
      GROW;
9923
26.4k
            if (ctxt->instate == XML_PARSER_EOF) {
9924
0
                goto out;
9925
0
            }
9926
26.4k
      count = 0;
9927
26.4k
  }
9928
1.43M
  NEXTL(l);
9929
1.43M
  cur = CUR_CHAR(l);
9930
1.43M
        if (len > maxLength) {
9931
0
            xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9932
0
                           "CData section too big found\n");
9933
0
            goto out;
9934
0
        }
9935
1.43M
    }
9936
3.65k
    buf[len] = 0;
9937
3.65k
    if (cur != '>') {
9938
1.15k
  xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9939
1.15k
                       "CData section not finished\n%.50s\n", buf);
9940
1.15k
        goto out;
9941
1.15k
    }
9942
2.49k
    NEXTL(l);
9943
9944
    /*
9945
     * OK the buffer is to be consumed as cdata.
9946
     */
9947
2.49k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9948
1.78k
  if (ctxt->sax->cdataBlock != NULL)
9949
1.49k
      ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9950
288
  else if (ctxt->sax->characters != NULL)
9951
288
      ctxt->sax->characters(ctxt->userData, buf, len);
9952
1.78k
    }
9953
9954
3.79k
out:
9955
3.79k
    if (ctxt->instate != XML_PARSER_EOF)
9956
3.79k
        ctxt->instate = XML_PARSER_CONTENT;
9957
3.79k
    xmlFree(buf);
9958
3.79k
}
9959
9960
/**
9961
 * xmlParseContentInternal:
9962
 * @ctxt:  an XML parser context
9963
 *
9964
 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9965
 * unexpected EOF to the caller.
9966
 */
9967
9968
static void
9969
39.8k
xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9970
39.8k
    int nameNr = ctxt->nameNr;
9971
9972
39.8k
    GROW;
9973
3.21M
    while ((RAW != 0) &&
9974
3.21M
     (ctxt->instate != XML_PARSER_EOF)) {
9975
3.18M
  const xmlChar *cur = ctxt->input->cur;
9976
9977
  /*
9978
   * First case : a Processing Instruction.
9979
   */
9980
3.18M
  if ((*cur == '<') && (cur[1] == '?')) {
9981
1.30k
      xmlParsePI(ctxt);
9982
1.30k
  }
9983
9984
  /*
9985
   * Second case : a CDSection
9986
   */
9987
  /* 2.6.0 test was *cur not RAW */
9988
3.18M
  else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9989
3.79k
      xmlParseCDSect(ctxt);
9990
3.79k
  }
9991
9992
  /*
9993
   * Third case :  a comment
9994
   */
9995
3.17M
  else if ((*cur == '<') && (NXT(1) == '!') &&
9996
3.17M
     (NXT(2) == '-') && (NXT(3) == '-')) {
9997
11.0k
      xmlParseComment(ctxt);
9998
11.0k
      ctxt->instate = XML_PARSER_CONTENT;
9999
11.0k
  }
10000
10001
  /*
10002
   * Fourth case :  a sub-element.
10003
   */
10004
3.16M
  else if (*cur == '<') {
10005
1.18M
            if (NXT(1) == '/') {
10006
243k
                if (ctxt->nameNr <= nameNr)
10007
8.11k
                    break;
10008
235k
          xmlParseElementEnd(ctxt);
10009
940k
            } else {
10010
940k
          xmlParseElementStart(ctxt);
10011
940k
            }
10012
1.18M
  }
10013
10014
  /*
10015
   * Fifth case : a reference. If if has not been resolved,
10016
   *    parsing returns it's Name, create the node
10017
   */
10018
10019
1.98M
  else if (*cur == '&') {
10020
415k
      xmlParseReference(ctxt);
10021
415k
  }
10022
10023
  /*
10024
   * Last case, text. Note that References are handled directly.
10025
   */
10026
1.56M
  else {
10027
1.56M
      xmlParseCharData(ctxt, 0);
10028
1.56M
  }
10029
10030
3.17M
  GROW;
10031
3.17M
  SHRINK;
10032
3.17M
    }
10033
39.8k
}
10034
10035
/**
10036
 * xmlParseContent:
10037
 * @ctxt:  an XML parser context
10038
 *
10039
 * Parse a content sequence. Stops at EOF or '</'.
10040
 *
10041
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10042
 */
10043
10044
void
10045
17.6k
xmlParseContent(xmlParserCtxtPtr ctxt) {
10046
17.6k
    int nameNr = ctxt->nameNr;
10047
10048
17.6k
    xmlParseContentInternal(ctxt);
10049
10050
17.6k
    if ((ctxt->instate != XML_PARSER_EOF) && (ctxt->nameNr > nameNr)) {
10051
141
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
10052
141
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
10053
141
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10054
141
                "Premature end of data in tag %s line %d\n",
10055
141
    name, line, NULL);
10056
141
    }
10057
17.6k
}
10058
10059
/**
10060
 * xmlParseElement:
10061
 * @ctxt:  an XML parser context
10062
 *
10063
 * DEPRECATED: Internal function, don't use.
10064
 *
10065
 * parse an XML element
10066
 *
10067
 * [39] element ::= EmptyElemTag | STag content ETag
10068
 *
10069
 * [ WFC: Element Type Match ]
10070
 * The Name in an element's end-tag must match the element type in the
10071
 * start-tag.
10072
 *
10073
 */
10074
10075
void
10076
43.0k
xmlParseElement(xmlParserCtxtPtr ctxt) {
10077
43.0k
    if (xmlParseElementStart(ctxt) != 0)
10078
20.7k
        return;
10079
10080
22.2k
    xmlParseContentInternal(ctxt);
10081
22.2k
    if (ctxt->instate == XML_PARSER_EOF)
10082
124
  return;
10083
10084
22.1k
    if (CUR == 0) {
10085
14.1k
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
10086
14.1k
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
10087
14.1k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10088
14.1k
                "Premature end of data in tag %s line %d\n",
10089
14.1k
    name, line, NULL);
10090
14.1k
        return;
10091
14.1k
    }
10092
10093
8.02k
    xmlParseElementEnd(ctxt);
10094
8.02k
}
10095
10096
/**
10097
 * xmlParseElementStart:
10098
 * @ctxt:  an XML parser context
10099
 *
10100
 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
10101
 * opening tag was parsed, 1 if an empty element was parsed.
10102
 *
10103
 * Always consumes '<'.
10104
 */
10105
static int
10106
983k
xmlParseElementStart(xmlParserCtxtPtr ctxt) {
10107
983k
    const xmlChar *name;
10108
983k
    const xmlChar *prefix = NULL;
10109
983k
    const xmlChar *URI = NULL;
10110
983k
    xmlParserNodeInfo node_info;
10111
983k
    int line, tlen = 0;
10112
983k
    xmlNodePtr ret;
10113
983k
    int nsNr = ctxt->nsNr;
10114
10115
983k
    if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
10116
983k
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
10117
0
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
10118
0
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
10119
0
        xmlParserMaxDepth);
10120
0
  xmlHaltParser(ctxt);
10121
0
  return(-1);
10122
0
    }
10123
10124
    /* Capture start position */
10125
983k
    if (ctxt->record_info) {
10126
0
        node_info.begin_pos = ctxt->input->consumed +
10127
0
                          (CUR_PTR - ctxt->input->base);
10128
0
  node_info.begin_line = ctxt->input->line;
10129
0
    }
10130
10131
983k
    if (ctxt->spaceNr == 0)
10132
0
  spacePush(ctxt, -1);
10133
983k
    else if (*ctxt->space == -2)
10134
29.8k
  spacePush(ctxt, -1);
10135
953k
    else
10136
953k
  spacePush(ctxt, *ctxt->space);
10137
10138
983k
    line = ctxt->input->line;
10139
983k
#ifdef LIBXML_SAX1_ENABLED
10140
983k
    if (ctxt->sax2)
10141
913k
#endif /* LIBXML_SAX1_ENABLED */
10142
913k
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
10143
69.4k
#ifdef LIBXML_SAX1_ENABLED
10144
69.4k
    else
10145
69.4k
  name = xmlParseStartTag(ctxt);
10146
983k
#endif /* LIBXML_SAX1_ENABLED */
10147
983k
    if (ctxt->instate == XML_PARSER_EOF)
10148
97
  return(-1);
10149
983k
    if (name == NULL) {
10150
49.1k
  spacePop(ctxt);
10151
49.1k
        return(-1);
10152
49.1k
    }
10153
934k
    nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
10154
934k
    ret = ctxt->node;
10155
10156
934k
#ifdef LIBXML_VALID_ENABLED
10157
    /*
10158
     * [ VC: Root Element Type ]
10159
     * The Name in the document type declaration must match the element
10160
     * type of the root element.
10161
     */
10162
934k
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10163
934k
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
10164
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10165
934k
#endif /* LIBXML_VALID_ENABLED */
10166
10167
    /*
10168
     * Check for an Empty Element.
10169
     */
10170
934k
    if ((RAW == '/') && (NXT(1) == '>')) {
10171
635k
        SKIP(2);
10172
635k
  if (ctxt->sax2) {
10173
609k
      if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10174
609k
    (!ctxt->disableSAX))
10175
431k
    ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
10176
609k
#ifdef LIBXML_SAX1_ENABLED
10177
609k
  } else {
10178
26.4k
      if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10179
26.4k
    (!ctxt->disableSAX))
10180
15.6k
    ctxt->sax->endElement(ctxt->userData, name);
10181
26.4k
#endif /* LIBXML_SAX1_ENABLED */
10182
26.4k
  }
10183
635k
  namePop(ctxt);
10184
635k
  spacePop(ctxt);
10185
635k
  if (nsNr != ctxt->nsNr)
10186
950
      nsPop(ctxt, ctxt->nsNr - nsNr);
10187
635k
  if ( ret != NULL && ctxt->record_info ) {
10188
0
     node_info.end_pos = ctxt->input->consumed +
10189
0
            (CUR_PTR - ctxt->input->base);
10190
0
     node_info.end_line = ctxt->input->line;
10191
0
     node_info.node = ret;
10192
0
     xmlParserAddNodeInfo(ctxt, &node_info);
10193
0
  }
10194
635k
  return(1);
10195
635k
    }
10196
298k
    if (RAW == '>') {
10197
269k
        NEXT1;
10198
269k
    } else {
10199
29.1k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10200
29.1k
         "Couldn't find end of Start Tag %s line %d\n",
10201
29.1k
                    name, line, NULL);
10202
10203
  /*
10204
   * end of parsing of this node.
10205
   */
10206
29.1k
  nodePop(ctxt);
10207
29.1k
  namePop(ctxt);
10208
29.1k
  spacePop(ctxt);
10209
29.1k
  if (nsNr != ctxt->nsNr)
10210
2.57k
      nsPop(ctxt, ctxt->nsNr - nsNr);
10211
10212
  /*
10213
   * Capture end position and add node
10214
   */
10215
29.1k
  if ( ret != NULL && ctxt->record_info ) {
10216
0
     node_info.end_pos = ctxt->input->consumed +
10217
0
            (CUR_PTR - ctxt->input->base);
10218
0
     node_info.end_line = ctxt->input->line;
10219
0
     node_info.node = ret;
10220
0
     xmlParserAddNodeInfo(ctxt, &node_info);
10221
0
  }
10222
29.1k
  return(-1);
10223
29.1k
    }
10224
10225
269k
    return(0);
10226
298k
}
10227
10228
/**
10229
 * xmlParseElementEnd:
10230
 * @ctxt:  an XML parser context
10231
 *
10232
 * Parse the end of an XML element. Always consumes '</'.
10233
 */
10234
static void
10235
243k
xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
10236
243k
    xmlParserNodeInfo node_info;
10237
243k
    xmlNodePtr ret = ctxt->node;
10238
10239
243k
    if (ctxt->nameNr <= 0) {
10240
0
        if ((RAW == '<') && (NXT(1) == '/'))
10241
0
            SKIP(2);
10242
0
        return;
10243
0
    }
10244
10245
    /*
10246
     * parse the end of tag: '</' should be here.
10247
     */
10248
243k
    if (ctxt->sax2) {
10249
237k
  xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
10250
237k
  namePop(ctxt);
10251
237k
    }
10252
6.02k
#ifdef LIBXML_SAX1_ENABLED
10253
6.02k
    else
10254
6.02k
  xmlParseEndTag1(ctxt, 0);
10255
243k
#endif /* LIBXML_SAX1_ENABLED */
10256
10257
    /*
10258
     * Capture end position and add node
10259
     */
10260
243k
    if ( ret != NULL && ctxt->record_info ) {
10261
0
       node_info.end_pos = ctxt->input->consumed +
10262
0
                          (CUR_PTR - ctxt->input->base);
10263
0
       node_info.end_line = ctxt->input->line;
10264
0
       node_info.node = ret;
10265
0
       xmlParserAddNodeInfo(ctxt, &node_info);
10266
0
    }
10267
243k
}
10268
10269
/**
10270
 * xmlParseVersionNum:
10271
 * @ctxt:  an XML parser context
10272
 *
10273
 * DEPRECATED: Internal function, don't use.
10274
 *
10275
 * parse the XML version value.
10276
 *
10277
 * [26] VersionNum ::= '1.' [0-9]+
10278
 *
10279
 * In practice allow [0-9].[0-9]+ at that level
10280
 *
10281
 * Returns the string giving the XML version number, or NULL
10282
 */
10283
xmlChar *
10284
70.4k
xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10285
70.4k
    xmlChar *buf = NULL;
10286
70.4k
    int len = 0;
10287
70.4k
    int size = 10;
10288
70.4k
    xmlChar cur;
10289
10290
70.4k
    buf = (xmlChar *) xmlMallocAtomic(size);
10291
70.4k
    if (buf == NULL) {
10292
0
  xmlErrMemory(ctxt, NULL);
10293
0
  return(NULL);
10294
0
    }
10295
70.4k
    cur = CUR;
10296
70.4k
    if (!((cur >= '0') && (cur <= '9'))) {
10297
586
  xmlFree(buf);
10298
586
  return(NULL);
10299
586
    }
10300
69.8k
    buf[len++] = cur;
10301
69.8k
    NEXT;
10302
69.8k
    cur=CUR;
10303
69.8k
    if (cur != '.') {
10304
1.11k
  xmlFree(buf);
10305
1.11k
  return(NULL);
10306
1.11k
    }
10307
68.7k
    buf[len++] = cur;
10308
68.7k
    NEXT;
10309
68.7k
    cur=CUR;
10310
157k
    while ((cur >= '0') && (cur <= '9')) {
10311
89.0k
  if (len + 1 >= size) {
10312
468
      xmlChar *tmp;
10313
10314
468
      size *= 2;
10315
468
      tmp = (xmlChar *) xmlRealloc(buf, size);
10316
468
      if (tmp == NULL) {
10317
0
          xmlFree(buf);
10318
0
    xmlErrMemory(ctxt, NULL);
10319
0
    return(NULL);
10320
0
      }
10321
468
      buf = tmp;
10322
468
  }
10323
89.0k
  buf[len++] = cur;
10324
89.0k
  NEXT;
10325
89.0k
  cur=CUR;
10326
89.0k
    }
10327
68.7k
    buf[len] = 0;
10328
68.7k
    return(buf);
10329
68.7k
}
10330
10331
/**
10332
 * xmlParseVersionInfo:
10333
 * @ctxt:  an XML parser context
10334
 *
10335
 * DEPRECATED: Internal function, don't use.
10336
 *
10337
 * parse the XML version.
10338
 *
10339
 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10340
 *
10341
 * [25] Eq ::= S? '=' S?
10342
 *
10343
 * Returns the version string, e.g. "1.0"
10344
 */
10345
10346
xmlChar *
10347
80.7k
xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10348
80.7k
    xmlChar *version = NULL;
10349
10350
80.7k
    if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10351
72.0k
  SKIP(7);
10352
72.0k
  SKIP_BLANKS;
10353
72.0k
  if (RAW != '=') {
10354
714
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10355
714
      return(NULL);
10356
714
        }
10357
71.2k
  NEXT;
10358
71.2k
  SKIP_BLANKS;
10359
71.2k
  if (RAW == '"') {
10360
63.3k
      NEXT;
10361
63.3k
      version = xmlParseVersionNum(ctxt);
10362
63.3k
      if (RAW != '"') {
10363
2.90k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10364
2.90k
      } else
10365
60.3k
          NEXT;
10366
63.3k
  } else if (RAW == '\''){
10367
7.16k
      NEXT;
10368
7.16k
      version = xmlParseVersionNum(ctxt);
10369
7.16k
      if (RAW != '\'') {
10370
366
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10371
366
      } else
10372
6.79k
          NEXT;
10373
7.16k
  } else {
10374
825
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10375
825
  }
10376
71.2k
    }
10377
80.0k
    return(version);
10378
80.7k
}
10379
10380
/**
10381
 * xmlParseEncName:
10382
 * @ctxt:  an XML parser context
10383
 *
10384
 * DEPRECATED: Internal function, don't use.
10385
 *
10386
 * parse the XML encoding name
10387
 *
10388
 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10389
 *
10390
 * Returns the encoding name value or NULL
10391
 */
10392
xmlChar *
10393
24.1k
xmlParseEncName(xmlParserCtxtPtr ctxt) {
10394
24.1k
    xmlChar *buf = NULL;
10395
24.1k
    int len = 0;
10396
24.1k
    int size = 10;
10397
24.1k
    xmlChar cur;
10398
10399
24.1k
    cur = CUR;
10400
24.1k
    if (((cur >= 'a') && (cur <= 'z')) ||
10401
24.1k
        ((cur >= 'A') && (cur <= 'Z'))) {
10402
23.9k
  buf = (xmlChar *) xmlMallocAtomic(size);
10403
23.9k
  if (buf == NULL) {
10404
0
      xmlErrMemory(ctxt, NULL);
10405
0
      return(NULL);
10406
0
  }
10407
10408
23.9k
  buf[len++] = cur;
10409
23.9k
  NEXT;
10410
23.9k
  cur = CUR;
10411
311k
  while (((cur >= 'a') && (cur <= 'z')) ||
10412
311k
         ((cur >= 'A') && (cur <= 'Z')) ||
10413
311k
         ((cur >= '0') && (cur <= '9')) ||
10414
311k
         (cur == '.') || (cur == '_') ||
10415
311k
         (cur == '-')) {
10416
287k
      if (len + 1 >= size) {
10417
11.3k
          xmlChar *tmp;
10418
10419
11.3k
    size *= 2;
10420
11.3k
    tmp = (xmlChar *) xmlRealloc(buf, size);
10421
11.3k
    if (tmp == NULL) {
10422
0
        xmlErrMemory(ctxt, NULL);
10423
0
        xmlFree(buf);
10424
0
        return(NULL);
10425
0
    }
10426
11.3k
    buf = tmp;
10427
11.3k
      }
10428
287k
      buf[len++] = cur;
10429
287k
      NEXT;
10430
287k
      cur = CUR;
10431
287k
      if (cur == 0) {
10432
480
          SHRINK;
10433
480
    GROW;
10434
480
    cur = CUR;
10435
480
      }
10436
287k
        }
10437
23.9k
  buf[len] = 0;
10438
23.9k
    } else {
10439
248
  xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10440
248
    }
10441
24.1k
    return(buf);
10442
24.1k
}
10443
10444
/**
10445
 * xmlParseEncodingDecl:
10446
 * @ctxt:  an XML parser context
10447
 *
10448
 * DEPRECATED: Internal function, don't use.
10449
 *
10450
 * parse the XML encoding declaration
10451
 *
10452
 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'")
10453
 *
10454
 * this setups the conversion filters.
10455
 *
10456
 * Returns the encoding value or NULL
10457
 */
10458
10459
const xmlChar *
10460
50.8k
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10461
50.8k
    xmlChar *encoding = NULL;
10462
10463
50.8k
    SKIP_BLANKS;
10464
50.8k
    if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10465
24.4k
  SKIP(8);
10466
24.4k
  SKIP_BLANKS;
10467
24.4k
  if (RAW != '=') {
10468
134
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10469
134
      return(NULL);
10470
134
        }
10471
24.3k
  NEXT;
10472
24.3k
  SKIP_BLANKS;
10473
24.3k
  if (RAW == '"') {
10474
20.8k
      NEXT;
10475
20.8k
      encoding = xmlParseEncName(ctxt);
10476
20.8k
      if (RAW != '"') {
10477
1.21k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10478
1.21k
    xmlFree((xmlChar *) encoding);
10479
1.21k
    return(NULL);
10480
1.21k
      } else
10481
19.6k
          NEXT;
10482
20.8k
  } else if (RAW == '\''){
10483
3.31k
      NEXT;
10484
3.31k
      encoding = xmlParseEncName(ctxt);
10485
3.31k
      if (RAW != '\'') {
10486
219
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10487
219
    xmlFree((xmlChar *) encoding);
10488
219
    return(NULL);
10489
219
      } else
10490
3.09k
          NEXT;
10491
3.31k
  } else {
10492
153
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10493
153
  }
10494
10495
        /*
10496
         * Non standard parsing, allowing the user to ignore encoding
10497
         */
10498
22.8k
        if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10499
2.38k
      xmlFree((xmlChar *) encoding);
10500
2.38k
            return(NULL);
10501
2.38k
  }
10502
10503
  /*
10504
   * UTF-16 encoding switch has already taken place at this stage,
10505
   * more over the little-endian/big-endian selection is already done
10506
   */
10507
20.5k
        if ((encoding != NULL) &&
10508
20.5k
      ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10509
20.3k
       (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10510
      /*
10511
       * If no encoding was passed to the parser, that we are
10512
       * using UTF-16 and no decoder is present i.e. the
10513
       * document is apparently UTF-8 compatible, then raise an
10514
       * encoding mismatch fatal error
10515
       */
10516
3
      if ((ctxt->encoding == NULL) &&
10517
3
          (ctxt->input->buf != NULL) &&
10518
3
          (ctxt->input->buf->encoder == NULL)) {
10519
3
    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10520
3
      "Document labelled UTF-16 but has UTF-8 content\n");
10521
3
      }
10522
3
      if (ctxt->encoding != NULL)
10523
0
    xmlFree((xmlChar *) ctxt->encoding);
10524
3
      ctxt->encoding = encoding;
10525
3
  }
10526
  /*
10527
   * UTF-8 encoding is handled natively
10528
   */
10529
20.5k
        else if ((encoding != NULL) &&
10530
20.5k
      ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10531
20.3k
       (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10532
9.63k
      if (ctxt->encoding != NULL)
10533
0
    xmlFree((xmlChar *) ctxt->encoding);
10534
9.63k
      ctxt->encoding = encoding;
10535
9.63k
  }
10536
10.8k
  else if (encoding != NULL) {
10537
10.7k
      xmlCharEncodingHandlerPtr handler;
10538
10539
10.7k
      if (ctxt->input->encoding != NULL)
10540
0
    xmlFree((xmlChar *) ctxt->input->encoding);
10541
10.7k
      ctxt->input->encoding = encoding;
10542
10543
10.7k
            handler = xmlFindCharEncodingHandler((const char *) encoding);
10544
10.7k
      if (handler != NULL) {
10545
9.81k
    if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10546
        /* failed to convert */
10547
57
        ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10548
57
        return(NULL);
10549
57
    }
10550
9.81k
      } else {
10551
903
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10552
903
      "Unsupported encoding %s\n", encoding);
10553
903
    return(NULL);
10554
903
      }
10555
10.7k
  }
10556
20.5k
    }
10557
45.9k
    return(encoding);
10558
50.8k
}
10559
10560
/**
10561
 * xmlParseSDDecl:
10562
 * @ctxt:  an XML parser context
10563
 *
10564
 * DEPRECATED: Internal function, don't use.
10565
 *
10566
 * parse the XML standalone declaration
10567
 *
10568
 * [32] SDDecl ::= S 'standalone' Eq
10569
 *                 (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10570
 *
10571
 * [ VC: Standalone Document Declaration ]
10572
 * TODO The standalone document declaration must have the value "no"
10573
 * if any external markup declarations contain declarations of:
10574
 *  - attributes with default values, if elements to which these
10575
 *    attributes apply appear in the document without specifications
10576
 *    of values for these attributes, or
10577
 *  - entities (other than amp, lt, gt, apos, quot), if references
10578
 *    to those entities appear in the document, or
10579
 *  - attributes with values subject to normalization, where the
10580
 *    attribute appears in the document with a value which will change
10581
 *    as a result of normalization, or
10582
 *  - element types with element content, if white space occurs directly
10583
 *    within any instance of those types.
10584
 *
10585
 * Returns:
10586
 *   1 if standalone="yes"
10587
 *   0 if standalone="no"
10588
 *  -2 if standalone attribute is missing or invalid
10589
 *    (A standalone value of -2 means that the XML declaration was found,
10590
 *     but no value was specified for the standalone attribute).
10591
 */
10592
10593
int
10594
41.3k
xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10595
41.3k
    int standalone = -2;
10596
10597
41.3k
    SKIP_BLANKS;
10598
41.3k
    if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10599
10.1k
  SKIP(10);
10600
10.1k
        SKIP_BLANKS;
10601
10.1k
  if (RAW != '=') {
10602
141
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10603
141
      return(standalone);
10604
141
        }
10605
10.0k
  NEXT;
10606
10.0k
  SKIP_BLANKS;
10607
10.0k
        if (RAW == '\''){
10608
2.47k
      NEXT;
10609
2.47k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10610
1.83k
          standalone = 0;
10611
1.83k
                SKIP(2);
10612
1.83k
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10613
642
                 (NXT(2) == 's')) {
10614
507
          standalone = 1;
10615
507
    SKIP(3);
10616
507
            } else {
10617
135
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10618
135
      }
10619
2.47k
      if (RAW != '\'') {
10620
234
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10621
234
      } else
10622
2.24k
          NEXT;
10623
7.53k
  } else if (RAW == '"'){
10624
7.41k
      NEXT;
10625
7.41k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10626
3.31k
          standalone = 0;
10627
3.31k
    SKIP(2);
10628
4.10k
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10629
4.10k
                 (NXT(2) == 's')) {
10630
3.89k
          standalone = 1;
10631
3.89k
                SKIP(3);
10632
3.89k
            } else {
10633
213
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10634
213
      }
10635
7.41k
      if (RAW != '"') {
10636
390
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10637
390
      } else
10638
7.02k
          NEXT;
10639
7.41k
  } else {
10640
123
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10641
123
        }
10642
10.0k
    }
10643
41.2k
    return(standalone);
10644
41.3k
}
10645
10646
/**
10647
 * xmlParseXMLDecl:
10648
 * @ctxt:  an XML parser context
10649
 *
10650
 * DEPRECATED: Internal function, don't use.
10651
 *
10652
 * parse an XML declaration header
10653
 *
10654
 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10655
 */
10656
10657
void
10658
79.2k
xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10659
79.2k
    xmlChar *version;
10660
10661
    /*
10662
     * This value for standalone indicates that the document has an
10663
     * XML declaration but it does not have a standalone attribute.
10664
     * It will be overwritten later if a standalone attribute is found.
10665
     */
10666
79.2k
    ctxt->input->standalone = -2;
10667
10668
    /*
10669
     * We know that '<?xml' is here.
10670
     */
10671
79.2k
    SKIP(5);
10672
10673
79.2k
    if (!IS_BLANK_CH(RAW)) {
10674
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10675
0
                 "Blank needed after '<?xml'\n");
10676
0
    }
10677
79.2k
    SKIP_BLANKS;
10678
10679
    /*
10680
     * We must have the VersionInfo here.
10681
     */
10682
79.2k
    version = xmlParseVersionInfo(ctxt);
10683
79.2k
    if (version == NULL) {
10684
11.5k
  xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10685
67.7k
    } else {
10686
67.7k
  if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10687
      /*
10688
       * Changed here for XML-1.0 5th edition
10689
       */
10690
1.84k
      if (ctxt->options & XML_PARSE_OLD10) {
10691
429
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10692
429
                "Unsupported version '%s'\n",
10693
429
                version);
10694
1.41k
      } else {
10695
1.41k
          if ((version[0] == '1') && ((version[1] == '.'))) {
10696
936
        xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10697
936
                      "Unsupported version '%s'\n",
10698
936
          version, NULL);
10699
936
    } else {
10700
477
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10701
477
              "Unsupported version '%s'\n",
10702
477
              version);
10703
477
    }
10704
1.41k
      }
10705
1.84k
  }
10706
67.7k
  if (ctxt->version != NULL)
10707
0
      xmlFree((void *) ctxt->version);
10708
67.7k
  ctxt->version = version;
10709
67.7k
    }
10710
10711
    /*
10712
     * We may have the encoding declaration
10713
     */
10714
79.2k
    if (!IS_BLANK_CH(RAW)) {
10715
43.5k
        if ((RAW == '?') && (NXT(1) == '>')) {
10716
29.8k
      SKIP(2);
10717
29.8k
      return;
10718
29.8k
  }
10719
13.6k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10720
13.6k
    }
10721
49.4k
    xmlParseEncodingDecl(ctxt);
10722
49.4k
    if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10723
49.4k
         (ctxt->instate == XML_PARSER_EOF)) {
10724
  /*
10725
   * The XML REC instructs us to stop parsing right here
10726
   */
10727
924
        return;
10728
924
    }
10729
10730
    /*
10731
     * We may have the standalone status.
10732
     */
10733
48.4k
    if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10734
7.51k
        if ((RAW == '?') && (NXT(1) == '>')) {
10735
7.10k
      SKIP(2);
10736
7.10k
      return;
10737
7.10k
  }
10738
411
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10739
411
    }
10740
10741
    /*
10742
     * We can grow the input buffer freely at that point
10743
     */
10744
41.3k
    GROW;
10745
10746
41.3k
    SKIP_BLANKS;
10747
41.3k
    ctxt->input->standalone = xmlParseSDDecl(ctxt);
10748
10749
41.3k
    SKIP_BLANKS;
10750
41.3k
    if ((RAW == '?') && (NXT(1) == '>')) {
10751
21.7k
        SKIP(2);
10752
21.7k
    } else if (RAW == '>') {
10753
        /* Deprecated old WD ... */
10754
327
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10755
327
  NEXT;
10756
19.2k
    } else {
10757
19.2k
        int c;
10758
10759
19.2k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10760
1.14M
        while ((c = CUR) != 0) {
10761
1.14M
            NEXT;
10762
1.14M
            if (c == '>')
10763
15.2k
                break;
10764
1.14M
        }
10765
19.2k
    }
10766
41.3k
}
10767
10768
/**
10769
 * xmlParseMisc:
10770
 * @ctxt:  an XML parser context
10771
 *
10772
 * DEPRECATED: Internal function, don't use.
10773
 *
10774
 * parse an XML Misc* optional field.
10775
 *
10776
 * [27] Misc ::= Comment | PI |  S
10777
 */
10778
10779
void
10780
123k
xmlParseMisc(xmlParserCtxtPtr ctxt) {
10781
134k
    while (ctxt->instate != XML_PARSER_EOF) {
10782
134k
        SKIP_BLANKS;
10783
134k
        GROW;
10784
134k
        if ((RAW == '<') && (NXT(1) == '?')) {
10785
6.76k
      xmlParsePI(ctxt);
10786
127k
        } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10787
4.40k
      xmlParseComment(ctxt);
10788
123k
        } else {
10789
123k
            break;
10790
123k
        }
10791
134k
    }
10792
123k
}
10793
10794
/**
10795
 * xmlParseDocument:
10796
 * @ctxt:  an XML parser context
10797
 *
10798
 * parse an XML document (and build a tree if using the standard SAX
10799
 * interface).
10800
 *
10801
 * [1] document ::= prolog element Misc*
10802
 *
10803
 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10804
 *
10805
 * Returns 0, -1 in case of error. the parser context is augmented
10806
 *                as a result of the parsing.
10807
 */
10808
10809
int
10810
62.7k
xmlParseDocument(xmlParserCtxtPtr ctxt) {
10811
62.7k
    xmlChar start[4];
10812
62.7k
    xmlCharEncoding enc;
10813
10814
62.7k
    xmlInitParser();
10815
10816
62.7k
    if ((ctxt == NULL) || (ctxt->input == NULL))
10817
0
        return(-1);
10818
10819
62.7k
    GROW;
10820
10821
    /*
10822
     * SAX: detecting the level.
10823
     */
10824
62.7k
    xmlDetectSAX2(ctxt);
10825
10826
    /*
10827
     * SAX: beginning of the document processing.
10828
     */
10829
62.7k
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10830
62.7k
        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10831
62.7k
    if (ctxt->instate == XML_PARSER_EOF)
10832
0
  return(-1);
10833
10834
62.7k
    if ((ctxt->encoding == NULL) &&
10835
62.7k
        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10836
  /*
10837
   * Get the 4 first bytes and decode the charset
10838
   * if enc != XML_CHAR_ENCODING_NONE
10839
   * plug some encoding conversion routines.
10840
   */
10841
62.5k
  start[0] = RAW;
10842
62.5k
  start[1] = NXT(1);
10843
62.5k
  start[2] = NXT(2);
10844
62.5k
  start[3] = NXT(3);
10845
62.5k
  enc = xmlDetectCharEncoding(&start[0], 4);
10846
62.5k
  if (enc != XML_CHAR_ENCODING_NONE) {
10847
31.4k
      xmlSwitchEncoding(ctxt, enc);
10848
31.4k
  }
10849
62.5k
    }
10850
10851
10852
62.7k
    if (CUR == 0) {
10853
258
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10854
258
  return(-1);
10855
258
    }
10856
10857
    /*
10858
     * Check for the XMLDecl in the Prolog.
10859
     * do not GROW here to avoid the detected encoder to decode more
10860
     * than just the first line, unless the amount of data is really
10861
     * too small to hold "<?xml version="1.0" encoding="foo"
10862
     */
10863
62.4k
    if ((ctxt->input->end - ctxt->input->cur) < 35) {
10864
5.27k
       GROW;
10865
5.27k
    }
10866
62.4k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10867
10868
  /*
10869
   * Note that we will switch encoding on the fly.
10870
   */
10871
26.4k
  xmlParseXMLDecl(ctxt);
10872
26.4k
  if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10873
26.4k
      (ctxt->instate == XML_PARSER_EOF)) {
10874
      /*
10875
       * The XML REC instructs us to stop parsing right here
10876
       */
10877
308
      return(-1);
10878
308
  }
10879
26.1k
  ctxt->standalone = ctxt->input->standalone;
10880
26.1k
  SKIP_BLANKS;
10881
36.0k
    } else {
10882
36.0k
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10883
36.0k
    }
10884
62.1k
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10885
58.9k
        ctxt->sax->startDocument(ctxt->userData);
10886
62.1k
    if (ctxt->instate == XML_PARSER_EOF)
10887
0
  return(-1);
10888
62.1k
    if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10889
62.1k
        (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10890
0
  ctxt->myDoc->compression = ctxt->input->buf->compressed;
10891
0
    }
10892
10893
    /*
10894
     * The Misc part of the Prolog
10895
     */
10896
62.1k
    xmlParseMisc(ctxt);
10897
10898
    /*
10899
     * Then possibly doc type declaration(s) and more Misc
10900
     * (doctypedecl Misc*)?
10901
     */
10902
62.1k
    GROW;
10903
62.1k
    if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10904
10905
28.2k
  ctxt->inSubset = 1;
10906
28.2k
  xmlParseDocTypeDecl(ctxt);
10907
28.2k
  if (RAW == '[') {
10908
19.4k
      ctxt->instate = XML_PARSER_DTD;
10909
19.4k
      xmlParseInternalSubset(ctxt);
10910
19.4k
      if (ctxt->instate == XML_PARSER_EOF)
10911
8.93k
    return(-1);
10912
19.4k
  }
10913
10914
  /*
10915
   * Create and update the external subset.
10916
   */
10917
19.3k
  ctxt->inSubset = 2;
10918
19.3k
  if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10919
19.3k
      (!ctxt->disableSAX))
10920
16.7k
      ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10921
16.7k
                                ctxt->extSubSystem, ctxt->extSubURI);
10922
19.3k
  if (ctxt->instate == XML_PARSER_EOF)
10923
1.33k
      return(-1);
10924
17.9k
  ctxt->inSubset = 0;
10925
10926
17.9k
        xmlCleanSpecialAttr(ctxt);
10927
10928
17.9k
  ctxt->instate = XML_PARSER_PROLOG;
10929
17.9k
  xmlParseMisc(ctxt);
10930
17.9k
    }
10931
10932
    /*
10933
     * Time to start parsing the tree itself
10934
     */
10935
51.9k
    GROW;
10936
51.9k
    if (RAW != '<') {
10937
8.87k
  xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10938
8.87k
           "Start tag expected, '<' not found\n");
10939
43.0k
    } else {
10940
43.0k
  ctxt->instate = XML_PARSER_CONTENT;
10941
43.0k
  xmlParseElement(ctxt);
10942
43.0k
  ctxt->instate = XML_PARSER_EPILOG;
10943
10944
10945
  /*
10946
   * The Misc part at the end
10947
   */
10948
43.0k
  xmlParseMisc(ctxt);
10949
10950
43.0k
  if (RAW != 0) {
10951
11.7k
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10952
11.7k
  }
10953
43.0k
  ctxt->instate = XML_PARSER_EOF;
10954
43.0k
    }
10955
10956
    /*
10957
     * SAX: end of the document processing.
10958
     */
10959
51.9k
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10960
51.9k
        ctxt->sax->endDocument(ctxt->userData);
10961
10962
    /*
10963
     * Remove locally kept entity definitions if the tree was not built
10964
     */
10965
51.9k
    if ((ctxt->myDoc != NULL) &&
10966
51.9k
  (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10967
201
  xmlFreeDoc(ctxt->myDoc);
10968
201
  ctxt->myDoc = NULL;
10969
201
    }
10970
10971
51.9k
    if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10972
6.80k
        ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10973
6.80k
  if (ctxt->valid)
10974
5.32k
      ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10975
6.80k
  if (ctxt->nsWellFormed)
10976
6.40k
      ctxt->myDoc->properties |= XML_DOC_NSVALID;
10977
6.80k
  if (ctxt->options & XML_PARSE_OLD10)
10978
805
      ctxt->myDoc->properties |= XML_DOC_OLD10;
10979
6.80k
    }
10980
51.9k
    if (! ctxt->wellFormed) {
10981
45.1k
  ctxt->valid = 0;
10982
45.1k
  return(-1);
10983
45.1k
    }
10984
6.80k
    return(0);
10985
51.9k
}
10986
10987
/**
10988
 * xmlParseExtParsedEnt:
10989
 * @ctxt:  an XML parser context
10990
 *
10991
 * parse a general parsed entity
10992
 * An external general parsed entity is well-formed if it matches the
10993
 * production labeled extParsedEnt.
10994
 *
10995
 * [78] extParsedEnt ::= TextDecl? content
10996
 *
10997
 * Returns 0, -1 in case of error. the parser context is augmented
10998
 *                as a result of the parsing.
10999
 */
11000
11001
int
11002
0
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
11003
0
    xmlChar start[4];
11004
0
    xmlCharEncoding enc;
11005
11006
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
11007
0
        return(-1);
11008
11009
0
    xmlDetectSAX2(ctxt);
11010
11011
0
    GROW;
11012
11013
    /*
11014
     * SAX: beginning of the document processing.
11015
     */
11016
0
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11017
0
        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
11018
11019
    /*
11020
     * Get the 4 first bytes and decode the charset
11021
     * if enc != XML_CHAR_ENCODING_NONE
11022
     * plug some encoding conversion routines.
11023
     */
11024
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11025
0
  start[0] = RAW;
11026
0
  start[1] = NXT(1);
11027
0
  start[2] = NXT(2);
11028
0
  start[3] = NXT(3);
11029
0
  enc = xmlDetectCharEncoding(start, 4);
11030
0
  if (enc != XML_CHAR_ENCODING_NONE) {
11031
0
      xmlSwitchEncoding(ctxt, enc);
11032
0
  }
11033
0
    }
11034
11035
11036
0
    if (CUR == 0) {
11037
0
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11038
0
    }
11039
11040
    /*
11041
     * Check for the XMLDecl in the Prolog.
11042
     */
11043
0
    GROW;
11044
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
11045
11046
  /*
11047
   * Note that we will switch encoding on the fly.
11048
   */
11049
0
  xmlParseXMLDecl(ctxt);
11050
0
  if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11051
      /*
11052
       * The XML REC instructs us to stop parsing right here
11053
       */
11054
0
      return(-1);
11055
0
  }
11056
0
  SKIP_BLANKS;
11057
0
    } else {
11058
0
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11059
0
    }
11060
0
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
11061
0
        ctxt->sax->startDocument(ctxt->userData);
11062
0
    if (ctxt->instate == XML_PARSER_EOF)
11063
0
  return(-1);
11064
11065
    /*
11066
     * Doing validity checking on chunk doesn't make sense
11067
     */
11068
0
    ctxt->instate = XML_PARSER_CONTENT;
11069
0
    ctxt->validate = 0;
11070
0
    ctxt->loadsubset = 0;
11071
0
    ctxt->depth = 0;
11072
11073
0
    xmlParseContent(ctxt);
11074
0
    if (ctxt->instate == XML_PARSER_EOF)
11075
0
  return(-1);
11076
11077
0
    if ((RAW == '<') && (NXT(1) == '/')) {
11078
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11079
0
    } else if (RAW != 0) {
11080
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11081
0
    }
11082
11083
    /*
11084
     * SAX: end of the document processing.
11085
     */
11086
0
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11087
0
        ctxt->sax->endDocument(ctxt->userData);
11088
11089
0
    if (! ctxt->wellFormed) return(-1);
11090
0
    return(0);
11091
0
}
11092
11093
#ifdef LIBXML_PUSH_ENABLED
11094
/************************************************************************
11095
 *                  *
11096
 *    Progressive parsing interfaces        *
11097
 *                  *
11098
 ************************************************************************/
11099
11100
/**
11101
 * xmlParseLookupChar:
11102
 * @ctxt:  an XML parser context
11103
 * @c:  character
11104
 *
11105
 * Check whether the input buffer contains a character.
11106
 */
11107
static int
11108
1.12M
xmlParseLookupChar(xmlParserCtxtPtr ctxt, int c) {
11109
1.12M
    const xmlChar *cur;
11110
11111
1.12M
    if (ctxt->checkIndex == 0) {
11112
1.02M
        cur = ctxt->input->cur + 1;
11113
1.02M
    } else {
11114
95.9k
        cur = ctxt->input->cur + ctxt->checkIndex;
11115
95.9k
    }
11116
11117
1.12M
    if (memchr(cur, c, ctxt->input->end - cur) == NULL) {
11118
99.3k
        ctxt->checkIndex = ctxt->input->end - ctxt->input->cur;
11119
99.3k
        return(0);
11120
1.02M
    } else {
11121
1.02M
        ctxt->checkIndex = 0;
11122
1.02M
        return(1);
11123
1.02M
    }
11124
1.12M
}
11125
11126
/**
11127
 * xmlParseLookupString:
11128
 * @ctxt:  an XML parser context
11129
 * @startDelta: delta to apply at the start
11130
 * @str:  string
11131
 * @strLen:  length of string
11132
 *
11133
 * Check whether the input buffer contains a string.
11134
 */
11135
static const xmlChar *
11136
xmlParseLookupString(xmlParserCtxtPtr ctxt, size_t startDelta,
11137
842k
                     const char *str, size_t strLen) {
11138
842k
    const xmlChar *cur, *term;
11139
11140
842k
    if (ctxt->checkIndex == 0) {
11141
227k
        cur = ctxt->input->cur + startDelta;
11142
614k
    } else {
11143
614k
        cur = ctxt->input->cur + ctxt->checkIndex;
11144
614k
    }
11145
11146
842k
    term = BAD_CAST strstr((const char *) cur, str);
11147
842k
    if (term == NULL) {
11148
697k
        const xmlChar *end = ctxt->input->end;
11149
11150
        /* Rescan (strLen - 1) characters. */
11151
697k
        if ((size_t) (end - cur) < strLen)
11152
6.34k
            end = cur;
11153
690k
        else
11154
690k
            end -= strLen - 1;
11155
697k
        ctxt->checkIndex = end - ctxt->input->cur;
11156
697k
    } else {
11157
144k
        ctxt->checkIndex = 0;
11158
144k
    }
11159
11160
842k
    return(term);
11161
842k
}
11162
11163
/**
11164
 * xmlParseLookupCharData:
11165
 * @ctxt:  an XML parser context
11166
 *
11167
 * Check whether the input buffer contains terminated char data.
11168
 */
11169
static int
11170
1.93M
xmlParseLookupCharData(xmlParserCtxtPtr ctxt) {
11171
1.93M
    const xmlChar *cur = ctxt->input->cur + ctxt->checkIndex;
11172
1.93M
    const xmlChar *end = ctxt->input->end;
11173
11174
75.6M
    while (cur < end) {
11175
75.4M
        if ((*cur == '<') || (*cur == '&')) {
11176
1.67M
            ctxt->checkIndex = 0;
11177
1.67M
            return(1);
11178
1.67M
        }
11179
73.7M
        cur++;
11180
73.7M
    }
11181
11182
259k
    ctxt->checkIndex = cur - ctxt->input->cur;
11183
259k
    return(0);
11184
1.93M
}
11185
11186
/**
11187
 * xmlParseLookupGt:
11188
 * @ctxt:  an XML parser context
11189
 *
11190
 * Check whether there's enough data in the input buffer to finish parsing
11191
 * a start tag. This has to take quotes into account.
11192
 */
11193
static int
11194
3.16M
xmlParseLookupGt(xmlParserCtxtPtr ctxt) {
11195
3.16M
    const xmlChar *cur;
11196
3.16M
    const xmlChar *end = ctxt->input->end;
11197
3.16M
    int state = ctxt->endCheckState;
11198
11199
3.16M
    if (ctxt->checkIndex == 0)
11200
1.65M
        cur = ctxt->input->cur + 1;
11201
1.50M
    else
11202
1.50M
        cur = ctxt->input->cur + ctxt->checkIndex;
11203
11204
293M
    while (cur < end) {
11205
291M
        if (state) {
11206
173M
            if (*cur == state)
11207
3.52M
                state = 0;
11208
173M
        } else if (*cur == '\'' || *cur == '"') {
11209
3.54M
            state = *cur;
11210
114M
        } else if (*cur == '>') {
11211
1.62M
            ctxt->checkIndex = 0;
11212
1.62M
            ctxt->endCheckState = 0;
11213
1.62M
            return(1);
11214
1.62M
        }
11215
290M
        cur++;
11216
290M
    }
11217
11218
1.53M
    ctxt->checkIndex = cur - ctxt->input->cur;
11219
1.53M
    ctxt->endCheckState = state;
11220
1.53M
    return(0);
11221
3.16M
}
11222
11223
/**
11224
 * xmlParseLookupInternalSubset:
11225
 * @ctxt:  an XML parser context
11226
 *
11227
 * Check whether there's enough data in the input buffer to finish parsing
11228
 * the internal subset.
11229
 */
11230
static int
11231
364k
xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt) {
11232
    /*
11233
     * Sorry, but progressive parsing of the internal subset is not
11234
     * supported. We first check that the full content of the internal
11235
     * subset is available and parsing is launched only at that point.
11236
     * Internal subset ends with "']' S? '>'" in an unescaped section and
11237
     * not in a ']]>' sequence which are conditional sections.
11238
     */
11239
364k
    const xmlChar *cur, *start;
11240
364k
    const xmlChar *end = ctxt->input->end;
11241
364k
    int state = ctxt->endCheckState;
11242
11243
364k
    if (ctxt->checkIndex == 0) {
11244
35.0k
        cur = ctxt->input->cur + 1;
11245
329k
    } else {
11246
329k
        cur = ctxt->input->cur + ctxt->checkIndex;
11247
329k
    }
11248
364k
    start = cur;
11249
11250
77.3M
    while (cur < end) {
11251
77.0M
        if (state == '-') {
11252
9.09M
            if ((*cur == '-') &&
11253
9.09M
                (cur[1] == '-') &&
11254
9.09M
                (cur[2] == '>')) {
11255
18.6k
                state = 0;
11256
18.6k
                cur += 3;
11257
18.6k
                start = cur;
11258
18.6k
                continue;
11259
18.6k
            }
11260
9.09M
        }
11261
67.9M
        else if (state == ']') {
11262
56.2k
            if (*cur == '>') {
11263
26.0k
                ctxt->checkIndex = 0;
11264
26.0k
                ctxt->endCheckState = 0;
11265
26.0k
                return(1);
11266
26.0k
            }
11267
30.2k
            if (IS_BLANK_CH(*cur)) {
11268
5.20k
                state = ' ';
11269
25.0k
            } else if (*cur != ']') {
11270
6.47k
                state = 0;
11271
6.47k
                start = cur;
11272
6.47k
                continue;
11273
6.47k
            }
11274
30.2k
        }
11275
67.8M
        else if (state == ' ') {
11276
13.8k
            if (*cur == '>') {
11277
594
                ctxt->checkIndex = 0;
11278
594
                ctxt->endCheckState = 0;
11279
594
                return(1);
11280
594
            }
11281
13.2k
            if (!IS_BLANK_CH(*cur)) {
11282
4.60k
                state = 0;
11283
4.60k
                start = cur;
11284
4.60k
                continue;
11285
4.60k
            }
11286
13.2k
        }
11287
67.8M
        else if (state != 0) {
11288
38.4M
            if (*cur == state) {
11289
219k
                state = 0;
11290
219k
                start = cur + 1;
11291
219k
            }
11292
38.4M
        }
11293
29.4M
        else if (*cur == '<') {
11294
270k
            if ((cur[1] == '!') &&
11295
270k
                (cur[2] == '-') &&
11296
270k
                (cur[3] == '-')) {
11297
18.9k
                state = '-';
11298
18.9k
                cur += 4;
11299
                /* Don't treat <!--> as comment */
11300
18.9k
                start = cur;
11301
18.9k
                continue;
11302
18.9k
            }
11303
270k
        }
11304
29.1M
        else if ((*cur == '"') || (*cur == '\'') || (*cur == ']')) {
11305
260k
            state = *cur;
11306
260k
        }
11307
11308
76.9M
        cur++;
11309
76.9M
    }
11310
11311
    /*
11312
     * Rescan the three last characters to detect "<!--" and "-->"
11313
     * split across chunks.
11314
     */
11315
338k
    if ((state == 0) || (state == '-')) {
11316
161k
        if (cur - start < 3)
11317
3.90k
            cur = start;
11318
157k
        else
11319
157k
            cur -= 3;
11320
161k
    }
11321
338k
    ctxt->checkIndex = cur - ctxt->input->cur;
11322
338k
    ctxt->endCheckState = state;
11323
338k
    return(0);
11324
364k
}
11325
11326
/**
11327
 * xmlCheckCdataPush:
11328
 * @cur: pointer to the block of characters
11329
 * @len: length of the block in bytes
11330
 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
11331
 *
11332
 * Check that the block of characters is okay as SCdata content [20]
11333
 *
11334
 * Returns the number of bytes to pass if okay, a negative index where an
11335
 *         UTF-8 error occurred otherwise
11336
 */
11337
static int
11338
125k
xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
11339
125k
    int ix;
11340
125k
    unsigned char c;
11341
125k
    int codepoint;
11342
11343
125k
    if ((utf == NULL) || (len <= 0))
11344
177
        return(0);
11345
11346
2.27M
    for (ix = 0; ix < len;) {      /* string is 0-terminated */
11347
2.26M
        c = utf[ix];
11348
2.26M
        if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11349
1.80M
      if (c >= 0x20)
11350
1.64M
    ix++;
11351
163k
      else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11352
144k
          ix++;
11353
19.1k
      else
11354
19.1k
          return(-ix);
11355
1.80M
  } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11356
176k
      if (ix + 2 > len) return(complete ? -ix : ix);
11357
173k
      if ((utf[ix+1] & 0xc0 ) != 0x80)
11358
14.6k
          return(-ix);
11359
159k
      codepoint = (utf[ix] & 0x1f) << 6;
11360
159k
      codepoint |= utf[ix+1] & 0x3f;
11361
159k
      if (!xmlIsCharQ(codepoint))
11362
3.66k
          return(-ix);
11363
155k
      ix += 2;
11364
284k
  } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11365
110k
      if (ix + 3 > len) return(complete ? -ix : ix);
11366
108k
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11367
108k
          ((utf[ix+2] & 0xc0) != 0x80))
11368
22.4k
        return(-ix);
11369
85.7k
      codepoint = (utf[ix] & 0xf) << 12;
11370
85.7k
      codepoint |= (utf[ix+1] & 0x3f) << 6;
11371
85.7k
      codepoint |= utf[ix+2] & 0x3f;
11372
85.7k
      if (!xmlIsCharQ(codepoint))
11373
367
          return(-ix);
11374
85.3k
      ix += 3;
11375
174k
  } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11376
151k
      if (ix + 4 > len) return(complete ? -ix : ix);
11377
148k
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11378
148k
          ((utf[ix+2] & 0xc0) != 0x80) ||
11379
148k
    ((utf[ix+3] & 0xc0) != 0x80))
11380
25.7k
        return(-ix);
11381
122k
      codepoint = (utf[ix] & 0x7) << 18;
11382
122k
      codepoint |= (utf[ix+1] & 0x3f) << 12;
11383
122k
      codepoint |= (utf[ix+2] & 0x3f) << 6;
11384
122k
      codepoint |= utf[ix+3] & 0x3f;
11385
122k
      if (!xmlIsCharQ(codepoint))
11386
35
          return(-ix);
11387
122k
      ix += 4;
11388
122k
  } else       /* unknown encoding */
11389
23.0k
      return(-ix);
11390
2.26M
      }
11391
9.16k
      return(ix);
11392
125k
}
11393
11394
/**
11395
 * xmlParseTryOrFinish:
11396
 * @ctxt:  an XML parser context
11397
 * @terminate:  last chunk indicator
11398
 *
11399
 * Try to progress on parsing
11400
 *
11401
 * Returns zero if no parsing was possible
11402
 */
11403
static int
11404
3.20M
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11405
3.20M
    int ret = 0;
11406
3.20M
    int avail, tlen;
11407
3.20M
    xmlChar cur, next;
11408
11409
3.20M
    if (ctxt->input == NULL)
11410
0
        return(0);
11411
11412
#ifdef DEBUG_PUSH
11413
    switch (ctxt->instate) {
11414
  case XML_PARSER_EOF:
11415
      xmlGenericError(xmlGenericErrorContext,
11416
        "PP: try EOF\n"); break;
11417
  case XML_PARSER_START:
11418
      xmlGenericError(xmlGenericErrorContext,
11419
        "PP: try START\n"); break;
11420
  case XML_PARSER_MISC:
11421
      xmlGenericError(xmlGenericErrorContext,
11422
        "PP: try MISC\n");break;
11423
  case XML_PARSER_COMMENT:
11424
      xmlGenericError(xmlGenericErrorContext,
11425
        "PP: try COMMENT\n");break;
11426
  case XML_PARSER_PROLOG:
11427
      xmlGenericError(xmlGenericErrorContext,
11428
        "PP: try PROLOG\n");break;
11429
  case XML_PARSER_START_TAG:
11430
      xmlGenericError(xmlGenericErrorContext,
11431
        "PP: try START_TAG\n");break;
11432
  case XML_PARSER_CONTENT:
11433
      xmlGenericError(xmlGenericErrorContext,
11434
        "PP: try CONTENT\n");break;
11435
  case XML_PARSER_CDATA_SECTION:
11436
      xmlGenericError(xmlGenericErrorContext,
11437
        "PP: try CDATA_SECTION\n");break;
11438
  case XML_PARSER_END_TAG:
11439
      xmlGenericError(xmlGenericErrorContext,
11440
        "PP: try END_TAG\n");break;
11441
  case XML_PARSER_ENTITY_DECL:
11442
      xmlGenericError(xmlGenericErrorContext,
11443
        "PP: try ENTITY_DECL\n");break;
11444
  case XML_PARSER_ENTITY_VALUE:
11445
      xmlGenericError(xmlGenericErrorContext,
11446
        "PP: try ENTITY_VALUE\n");break;
11447
  case XML_PARSER_ATTRIBUTE_VALUE:
11448
      xmlGenericError(xmlGenericErrorContext,
11449
        "PP: try ATTRIBUTE_VALUE\n");break;
11450
  case XML_PARSER_DTD:
11451
      xmlGenericError(xmlGenericErrorContext,
11452
        "PP: try DTD\n");break;
11453
  case XML_PARSER_EPILOG:
11454
      xmlGenericError(xmlGenericErrorContext,
11455
        "PP: try EPILOG\n");break;
11456
  case XML_PARSER_PI:
11457
      xmlGenericError(xmlGenericErrorContext,
11458
        "PP: try PI\n");break;
11459
        case XML_PARSER_IGNORE:
11460
            xmlGenericError(xmlGenericErrorContext,
11461
        "PP: try IGNORE\n");break;
11462
    }
11463
#endif
11464
11465
3.20M
    if ((ctxt->input != NULL) &&
11466
3.20M
        (ctxt->input->cur - ctxt->input->base > 4096)) {
11467
23.6k
        xmlParserInputShrink(ctxt->input);
11468
23.6k
    }
11469
11470
106M
    while (ctxt->instate != XML_PARSER_EOF) {
11471
106M
  if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11472
40.2k
      return(0);
11473
11474
106M
  if (ctxt->input == NULL) break;
11475
106M
  if (ctxt->input->buf == NULL)
11476
0
      avail = ctxt->input->length -
11477
0
              (ctxt->input->cur - ctxt->input->base);
11478
106M
  else {
11479
      /*
11480
       * If we are operating on converted input, try to flush
11481
       * remaining chars to avoid them stalling in the non-converted
11482
       * buffer. But do not do this in document start where
11483
       * encoding="..." may not have been read and we work on a
11484
       * guessed encoding.
11485
       */
11486
106M
      if ((ctxt->instate != XML_PARSER_START) &&
11487
106M
          (ctxt->input->buf->raw != NULL) &&
11488
106M
    (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11489
723k
                size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11490
723k
                                                 ctxt->input);
11491
723k
    size_t current = ctxt->input->cur - ctxt->input->base;
11492
11493
723k
    xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11494
723k
                xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11495
723k
                                      base, current);
11496
723k
      }
11497
106M
      avail = xmlBufUse(ctxt->input->buf->buffer) -
11498
106M
        (ctxt->input->cur - ctxt->input->base);
11499
106M
  }
11500
106M
        if (avail < 1)
11501
104k
      goto done;
11502
106M
        switch (ctxt->instate) {
11503
0
            case XML_PARSER_EOF:
11504
          /*
11505
     * Document parsing is done !
11506
     */
11507
0
          goto done;
11508
653k
            case XML_PARSER_START:
11509
653k
    if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11510
94.7k
        xmlChar start[4];
11511
94.7k
        xmlCharEncoding enc;
11512
11513
        /*
11514
         * Very first chars read from the document flow.
11515
         */
11516
94.7k
        if (avail < 4)
11517
1.18k
      goto done;
11518
11519
        /*
11520
         * Get the 4 first bytes and decode the charset
11521
         * if enc != XML_CHAR_ENCODING_NONE
11522
         * plug some encoding conversion routines,
11523
         * else xmlSwitchEncoding will set to (default)
11524
         * UTF8.
11525
         */
11526
93.5k
        start[0] = RAW;
11527
93.5k
        start[1] = NXT(1);
11528
93.5k
        start[2] = NXT(2);
11529
93.5k
        start[3] = NXT(3);
11530
93.5k
        enc = xmlDetectCharEncoding(start, 4);
11531
93.5k
        xmlSwitchEncoding(ctxt, enc);
11532
93.5k
        break;
11533
94.7k
    }
11534
11535
558k
    if (avail < 2)
11536
61
        goto done;
11537
558k
    cur = ctxt->input->cur[0];
11538
558k
    next = ctxt->input->cur[1];
11539
558k
    if (cur == 0) {
11540
424
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11541
424
      ctxt->sax->setDocumentLocator(ctxt->userData,
11542
424
                  &xmlDefaultSAXLocator);
11543
424
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11544
424
        xmlHaltParser(ctxt);
11545
#ifdef DEBUG_PUSH
11546
        xmlGenericError(xmlGenericErrorContext,
11547
          "PP: entering EOF\n");
11548
#endif
11549
424
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11550
424
      ctxt->sax->endDocument(ctxt->userData);
11551
424
        goto done;
11552
424
    }
11553
558k
          if ((cur == '<') && (next == '?')) {
11554
        /* PI or XML decl */
11555
496k
        if (avail < 5) goto done;
11556
495k
        if ((!terminate) &&
11557
495k
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11558
434k
      goto done;
11559
61.7k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11560
61.7k
      ctxt->sax->setDocumentLocator(ctxt->userData,
11561
61.7k
                  &xmlDefaultSAXLocator);
11562
61.7k
        if ((ctxt->input->cur[2] == 'x') &&
11563
61.7k
      (ctxt->input->cur[3] == 'm') &&
11564
61.7k
      (ctxt->input->cur[4] == 'l') &&
11565
61.7k
      (IS_BLANK_CH(ctxt->input->cur[5]))) {
11566
52.8k
      ret += 5;
11567
#ifdef DEBUG_PUSH
11568
      xmlGenericError(xmlGenericErrorContext,
11569
        "PP: Parsing XML Decl\n");
11570
#endif
11571
52.8k
      xmlParseXMLDecl(ctxt);
11572
52.8k
      if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11573
          /*
11574
           * The XML REC instructs us to stop parsing right
11575
           * here
11576
           */
11577
616
          xmlHaltParser(ctxt);
11578
616
          return(0);
11579
616
      }
11580
52.1k
      ctxt->standalone = ctxt->input->standalone;
11581
52.1k
      if ((ctxt->encoding == NULL) &&
11582
52.1k
          (ctxt->input->encoding != NULL))
11583
6.49k
          ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11584
52.1k
      if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11585
52.1k
          (!ctxt->disableSAX))
11586
45.7k
          ctxt->sax->startDocument(ctxt->userData);
11587
52.1k
      ctxt->instate = XML_PARSER_MISC;
11588
#ifdef DEBUG_PUSH
11589
      xmlGenericError(xmlGenericErrorContext,
11590
        "PP: entering MISC\n");
11591
#endif
11592
52.1k
        } else {
11593
8.95k
      ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11594
8.95k
      if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11595
8.95k
          (!ctxt->disableSAX))
11596
8.95k
          ctxt->sax->startDocument(ctxt->userData);
11597
8.95k
      ctxt->instate = XML_PARSER_MISC;
11598
#ifdef DEBUG_PUSH
11599
      xmlGenericError(xmlGenericErrorContext,
11600
        "PP: entering MISC\n");
11601
#endif
11602
8.95k
        }
11603
62.3k
    } else {
11604
62.3k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11605
62.3k
      ctxt->sax->setDocumentLocator(ctxt->userData,
11606
62.3k
                  &xmlDefaultSAXLocator);
11607
62.3k
        ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11608
62.3k
        if (ctxt->version == NULL) {
11609
0
            xmlErrMemory(ctxt, NULL);
11610
0
      break;
11611
0
        }
11612
62.3k
        if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11613
62.3k
            (!ctxt->disableSAX))
11614
62.3k
      ctxt->sax->startDocument(ctxt->userData);
11615
62.3k
        ctxt->instate = XML_PARSER_MISC;
11616
#ifdef DEBUG_PUSH
11617
        xmlGenericError(xmlGenericErrorContext,
11618
          "PP: entering MISC\n");
11619
#endif
11620
62.3k
    }
11621
123k
    break;
11622
3.42M
            case XML_PARSER_START_TAG: {
11623
3.42M
          const xmlChar *name;
11624
3.42M
    const xmlChar *prefix = NULL;
11625
3.42M
    const xmlChar *URI = NULL;
11626
3.42M
                int line = ctxt->input->line;
11627
3.42M
    int nsNr = ctxt->nsNr;
11628
11629
3.42M
    if ((avail < 2) && (ctxt->inputNr == 1))
11630
0
        goto done;
11631
3.42M
    cur = ctxt->input->cur[0];
11632
3.42M
          if (cur != '<') {
11633
6.57k
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11634
6.57k
        xmlHaltParser(ctxt);
11635
6.57k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11636
6.57k
      ctxt->sax->endDocument(ctxt->userData);
11637
6.57k
        goto done;
11638
6.57k
    }
11639
3.41M
    if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11640
1.47M
                    goto done;
11641
1.94M
    if (ctxt->spaceNr == 0)
11642
46.6k
        spacePush(ctxt, -1);
11643
1.90M
    else if (*ctxt->space == -2)
11644
254k
        spacePush(ctxt, -1);
11645
1.64M
    else
11646
1.64M
        spacePush(ctxt, *ctxt->space);
11647
1.94M
#ifdef LIBXML_SAX1_ENABLED
11648
1.94M
    if (ctxt->sax2)
11649
1.58M
#endif /* LIBXML_SAX1_ENABLED */
11650
1.58M
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11651
364k
#ifdef LIBXML_SAX1_ENABLED
11652
364k
    else
11653
364k
        name = xmlParseStartTag(ctxt);
11654
1.94M
#endif /* LIBXML_SAX1_ENABLED */
11655
1.94M
    if (ctxt->instate == XML_PARSER_EOF)
11656
192
        goto done;
11657
1.94M
    if (name == NULL) {
11658
7.57k
        spacePop(ctxt);
11659
7.57k
        xmlHaltParser(ctxt);
11660
7.57k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11661
7.57k
      ctxt->sax->endDocument(ctxt->userData);
11662
7.57k
        goto done;
11663
7.57k
    }
11664
1.94M
#ifdef LIBXML_VALID_ENABLED
11665
    /*
11666
     * [ VC: Root Element Type ]
11667
     * The Name in the document type declaration must match
11668
     * the element type of the root element.
11669
     */
11670
1.94M
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11671
1.94M
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
11672
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11673
1.94M
#endif /* LIBXML_VALID_ENABLED */
11674
11675
    /*
11676
     * Check for an Empty Element.
11677
     */
11678
1.94M
    if ((RAW == '/') && (NXT(1) == '>')) {
11679
1.18M
        SKIP(2);
11680
11681
1.18M
        if (ctxt->sax2) {
11682
977k
      if ((ctxt->sax != NULL) &&
11683
977k
          (ctxt->sax->endElementNs != NULL) &&
11684
977k
          (!ctxt->disableSAX))
11685
976k
          ctxt->sax->endElementNs(ctxt->userData, name,
11686
976k
                                  prefix, URI);
11687
977k
      if (ctxt->nsNr - nsNr > 0)
11688
2.09k
          nsPop(ctxt, ctxt->nsNr - nsNr);
11689
977k
#ifdef LIBXML_SAX1_ENABLED
11690
977k
        } else {
11691
208k
      if ((ctxt->sax != NULL) &&
11692
208k
          (ctxt->sax->endElement != NULL) &&
11693
208k
          (!ctxt->disableSAX))
11694
207k
          ctxt->sax->endElement(ctxt->userData, name);
11695
208k
#endif /* LIBXML_SAX1_ENABLED */
11696
208k
        }
11697
1.18M
        if (ctxt->instate == XML_PARSER_EOF)
11698
0
      goto done;
11699
1.18M
        spacePop(ctxt);
11700
1.18M
        if (ctxt->nameNr == 0) {
11701
5.90k
      ctxt->instate = XML_PARSER_EPILOG;
11702
1.17M
        } else {
11703
1.17M
      ctxt->instate = XML_PARSER_CONTENT;
11704
1.17M
        }
11705
1.18M
        break;
11706
1.18M
    }
11707
755k
    if (RAW == '>') {
11708
577k
        NEXT;
11709
577k
    } else {
11710
178k
        xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11711
178k
           "Couldn't find end of Start Tag %s\n",
11712
178k
           name);
11713
178k
        nodePop(ctxt);
11714
178k
        spacePop(ctxt);
11715
178k
    }
11716
755k
                nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
11717
11718
755k
    ctxt->instate = XML_PARSER_CONTENT;
11719
755k
                break;
11720
1.94M
      }
11721
101M
            case XML_PARSER_CONTENT: {
11722
101M
    if ((avail < 2) && (ctxt->inputNr == 1))
11723
21.5k
        goto done;
11724
101M
    cur = ctxt->input->cur[0];
11725
101M
    next = ctxt->input->cur[1];
11726
11727
101M
    if ((cur == '<') && (next == '/')) {
11728
512k
        ctxt->instate = XML_PARSER_END_TAG;
11729
512k
        break;
11730
100M
          } else if ((cur == '<') && (next == '?')) {
11731
34.1k
        if ((!terminate) &&
11732
34.1k
            (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11733
21.4k
      goto done;
11734
12.6k
        xmlParsePI(ctxt);
11735
12.6k
        ctxt->instate = XML_PARSER_CONTENT;
11736
100M
    } else if ((cur == '<') && (next != '!')) {
11737
1.87M
        ctxt->instate = XML_PARSER_START_TAG;
11738
1.87M
        break;
11739
98.8M
    } else if ((cur == '<') && (next == '!') &&
11740
98.8M
               (ctxt->input->cur[2] == '-') &&
11741
98.8M
         (ctxt->input->cur[3] == '-')) {
11742
156k
        if ((!terminate) &&
11743
156k
            (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11744
113k
      goto done;
11745
42.8k
        xmlParseComment(ctxt);
11746
42.8k
        ctxt->instate = XML_PARSER_CONTENT;
11747
98.6M
    } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11748
98.6M
        (ctxt->input->cur[2] == '[') &&
11749
98.6M
        (ctxt->input->cur[3] == 'C') &&
11750
98.6M
        (ctxt->input->cur[4] == 'D') &&
11751
98.6M
        (ctxt->input->cur[5] == 'A') &&
11752
98.6M
        (ctxt->input->cur[6] == 'T') &&
11753
98.6M
        (ctxt->input->cur[7] == 'A') &&
11754
98.6M
        (ctxt->input->cur[8] == '[')) {
11755
7.08k
        SKIP(9);
11756
7.08k
        ctxt->instate = XML_PARSER_CDATA_SECTION;
11757
7.08k
        break;
11758
98.6M
    } else if ((cur == '<') && (next == '!') &&
11759
98.6M
               (avail < 9)) {
11760
2.46k
        goto done;
11761
98.6M
    } else if (cur == '<') {
11762
78.9k
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11763
78.9k
                    "detected an error in element content\n");
11764
78.9k
                    SKIP(1);
11765
98.5M
    } else if (cur == '&') {
11766
786k
        if ((!terminate) && (!xmlParseLookupChar(ctxt, ';')))
11767
55.3k
      goto done;
11768
731k
        xmlParseReference(ctxt);
11769
97.8M
    } else {
11770
        /* TODO Avoid the extra copy, handle directly !!! */
11771
        /*
11772
         * Goal of the following test is:
11773
         *  - minimize calls to the SAX 'character' callback
11774
         *    when they are mergeable
11775
         *  - handle an problem for isBlank when we only parse
11776
         *    a sequence of blank chars and the next one is
11777
         *    not available to check against '<' presence.
11778
         *  - tries to homogenize the differences in SAX
11779
         *    callbacks between the push and pull versions
11780
         *    of the parser.
11781
         */
11782
97.8M
        if ((ctxt->inputNr == 1) &&
11783
97.8M
            (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11784
2.22M
      if ((!terminate) && (!xmlParseLookupCharData(ctxt)))
11785
259k
          goto done;
11786
2.22M
                    }
11787
97.5M
                    ctxt->checkIndex = 0;
11788
97.5M
        xmlParseCharData(ctxt, 0);
11789
97.5M
    }
11790
98.4M
    break;
11791
101M
      }
11792
98.4M
            case XML_PARSER_END_TAG:
11793
555k
    if (avail < 2)
11794
0
        goto done;
11795
555k
    if ((!terminate) && (!xmlParseLookupChar(ctxt, '>')))
11796
43.9k
        goto done;
11797
511k
    if (ctxt->sax2) {
11798
435k
              xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11799
435k
        nameNsPop(ctxt);
11800
435k
    }
11801
76.3k
#ifdef LIBXML_SAX1_ENABLED
11802
76.3k
      else
11803
76.3k
        xmlParseEndTag1(ctxt, 0);
11804
511k
#endif /* LIBXML_SAX1_ENABLED */
11805
511k
    if (ctxt->instate == XML_PARSER_EOF) {
11806
        /* Nothing */
11807
511k
    } else if (ctxt->nameNr == 0) {
11808
13.6k
        ctxt->instate = XML_PARSER_EPILOG;
11809
498k
    } else {
11810
498k
        ctxt->instate = XML_PARSER_CONTENT;
11811
498k
    }
11812
511k
    break;
11813
139k
            case XML_PARSER_CDATA_SECTION: {
11814
          /*
11815
     * The Push mode need to have the SAX callback for
11816
     * cdataBlock merge back contiguous callbacks.
11817
     */
11818
139k
    const xmlChar *term;
11819
11820
139k
                if (terminate) {
11821
                    /*
11822
                     * Don't call xmlParseLookupString. If 'terminate'
11823
                     * is set, checkIndex is invalid.
11824
                     */
11825
2.80k
                    term = BAD_CAST strstr((const char *) ctxt->input->cur,
11826
2.80k
                                           "]]>");
11827
136k
                } else {
11828
136k
        term = xmlParseLookupString(ctxt, 0, "]]>", 3);
11829
136k
                }
11830
11831
139k
    if (term == NULL) {
11832
85.5k
        int tmp, size;
11833
11834
85.5k
                    if (terminate) {
11835
                        /* Unfinished CDATA section */
11836
2.12k
                        size = ctxt->input->end - ctxt->input->cur;
11837
83.4k
                    } else {
11838
83.4k
                        if (avail < XML_PARSER_BIG_BUFFER_SIZE + 2)
11839
13.3k
                            goto done;
11840
70.0k
                        ctxt->checkIndex = 0;
11841
                        /* XXX: Why don't we pass the full buffer? */
11842
70.0k
                        size = XML_PARSER_BIG_BUFFER_SIZE;
11843
70.0k
                    }
11844
72.1k
                    tmp = xmlCheckCdataPush(ctxt->input->cur, size, 0);
11845
72.1k
                    if (tmp <= 0) {
11846
65.5k
                        tmp = -tmp;
11847
65.5k
                        ctxt->input->cur += tmp;
11848
65.5k
                        goto encoding_error;
11849
65.5k
                    }
11850
6.59k
                    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11851
6.59k
                        if (ctxt->sax->cdataBlock != NULL)
11852
5.40k
                            ctxt->sax->cdataBlock(ctxt->userData,
11853
5.40k
                                                  ctxt->input->cur, tmp);
11854
1.19k
                        else if (ctxt->sax->characters != NULL)
11855
1.19k
                            ctxt->sax->characters(ctxt->userData,
11856
1.19k
                                                  ctxt->input->cur, tmp);
11857
6.59k
                    }
11858
6.59k
                    if (ctxt->instate == XML_PARSER_EOF)
11859
0
                        goto done;
11860
6.59k
                    SKIPL(tmp);
11861
53.7k
    } else {
11862
53.7k
                    int base = term - CUR_PTR;
11863
53.7k
        int tmp;
11864
11865
53.7k
        tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11866
53.7k
        if ((tmp < 0) || (tmp != base)) {
11867
50.4k
      tmp = -tmp;
11868
50.4k
      ctxt->input->cur += tmp;
11869
50.4k
      goto encoding_error;
11870
50.4k
        }
11871
3.32k
        if ((ctxt->sax != NULL) && (base == 0) &&
11872
3.32k
            (ctxt->sax->cdataBlock != NULL) &&
11873
3.32k
            (!ctxt->disableSAX)) {
11874
      /*
11875
       * Special case to provide identical behaviour
11876
       * between pull and push parsers on enpty CDATA
11877
       * sections
11878
       */
11879
83
       if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11880
83
           (!strncmp((const char *)&ctxt->input->cur[-9],
11881
83
                     "<![CDATA[", 9)))
11882
82
           ctxt->sax->cdataBlock(ctxt->userData,
11883
82
                                 BAD_CAST "", 0);
11884
3.24k
        } else if ((ctxt->sax != NULL) && (base > 0) &&
11885
3.24k
      (!ctxt->disableSAX)) {
11886
3.14k
      if (ctxt->sax->cdataBlock != NULL)
11887
2.58k
          ctxt->sax->cdataBlock(ctxt->userData,
11888
2.58k
              ctxt->input->cur, base);
11889
563
      else if (ctxt->sax->characters != NULL)
11890
563
          ctxt->sax->characters(ctxt->userData,
11891
563
              ctxt->input->cur, base);
11892
3.14k
        }
11893
3.32k
        if (ctxt->instate == XML_PARSER_EOF)
11894
0
      goto done;
11895
3.32k
        SKIPL(base + 3);
11896
3.32k
        ctxt->instate = XML_PARSER_CONTENT;
11897
#ifdef DEBUG_PUSH
11898
        xmlGenericError(xmlGenericErrorContext,
11899
          "PP: entering CONTENT\n");
11900
#endif
11901
3.32k
    }
11902
9.91k
    break;
11903
139k
      }
11904
232k
            case XML_PARSER_MISC:
11905
274k
            case XML_PARSER_PROLOG:
11906
295k
            case XML_PARSER_EPILOG:
11907
295k
    SKIP_BLANKS;
11908
295k
    if (ctxt->input->buf == NULL)
11909
0
        avail = ctxt->input->length -
11910
0
                (ctxt->input->cur - ctxt->input->base);
11911
295k
    else
11912
295k
        avail = xmlBufUse(ctxt->input->buf->buffer) -
11913
295k
                (ctxt->input->cur - ctxt->input->base);
11914
295k
    if (avail < 2)
11915
17.4k
        goto done;
11916
277k
    cur = ctxt->input->cur[0];
11917
277k
    next = ctxt->input->cur[1];
11918
277k
          if ((cur == '<') && (next == '?')) {
11919
28.1k
        if ((!terminate) &&
11920
28.1k
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11921
16.4k
      goto done;
11922
#ifdef DEBUG_PUSH
11923
        xmlGenericError(xmlGenericErrorContext,
11924
          "PP: Parsing PI\n");
11925
#endif
11926
11.6k
        xmlParsePI(ctxt);
11927
11.6k
        if (ctxt->instate == XML_PARSER_EOF)
11928
0
      goto done;
11929
249k
    } else if ((cur == '<') && (next == '!') &&
11930
249k
        (ctxt->input->cur[2] == '-') &&
11931
249k
        (ctxt->input->cur[3] == '-')) {
11932
35.9k
        if ((!terminate) &&
11933
35.9k
                        (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11934
28.4k
      goto done;
11935
#ifdef DEBUG_PUSH
11936
        xmlGenericError(xmlGenericErrorContext,
11937
          "PP: Parsing Comment\n");
11938
#endif
11939
7.54k
        xmlParseComment(ctxt);
11940
7.54k
        if (ctxt->instate == XML_PARSER_EOF)
11941
0
      goto done;
11942
213k
    } else if ((ctxt->instate == XML_PARSER_MISC) &&
11943
213k
                    (cur == '<') && (next == '!') &&
11944
213k
        (ctxt->input->cur[2] == 'D') &&
11945
213k
        (ctxt->input->cur[3] == 'O') &&
11946
213k
        (ctxt->input->cur[4] == 'C') &&
11947
213k
        (ctxt->input->cur[5] == 'T') &&
11948
213k
        (ctxt->input->cur[6] == 'Y') &&
11949
213k
        (ctxt->input->cur[7] == 'P') &&
11950
213k
        (ctxt->input->cur[8] == 'E')) {
11951
123k
        if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11952
69.6k
                        goto done;
11953
#ifdef DEBUG_PUSH
11954
        xmlGenericError(xmlGenericErrorContext,
11955
          "PP: Parsing internal subset\n");
11956
#endif
11957
53.9k
        ctxt->inSubset = 1;
11958
53.9k
        xmlParseDocTypeDecl(ctxt);
11959
53.9k
        if (ctxt->instate == XML_PARSER_EOF)
11960
0
      goto done;
11961
53.9k
        if (RAW == '[') {
11962
37.3k
      ctxt->instate = XML_PARSER_DTD;
11963
#ifdef DEBUG_PUSH
11964
      xmlGenericError(xmlGenericErrorContext,
11965
        "PP: entering DTD\n");
11966
#endif
11967
37.3k
        } else {
11968
      /*
11969
       * Create and update the external subset.
11970
       */
11971
16.5k
      ctxt->inSubset = 2;
11972
16.5k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11973
16.5k
          (ctxt->sax->externalSubset != NULL))
11974
13.9k
          ctxt->sax->externalSubset(ctxt->userData,
11975
13.9k
            ctxt->intSubName, ctxt->extSubSystem,
11976
13.9k
            ctxt->extSubURI);
11977
16.5k
      ctxt->inSubset = 0;
11978
16.5k
      xmlCleanSpecialAttr(ctxt);
11979
16.5k
      ctxt->instate = XML_PARSER_PROLOG;
11980
#ifdef DEBUG_PUSH
11981
      xmlGenericError(xmlGenericErrorContext,
11982
        "PP: entering PROLOG\n");
11983
#endif
11984
16.5k
        }
11985
90.3k
    } else if ((cur == '<') && (next == '!') &&
11986
90.3k
               (avail <
11987
2.90k
                            (ctxt->instate == XML_PARSER_MISC ? 9 : 4))) {
11988
1.23k
        goto done;
11989
89.0k
    } else if (ctxt->instate == XML_PARSER_EPILOG) {
11990
2.62k
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11991
2.62k
        xmlHaltParser(ctxt);
11992
#ifdef DEBUG_PUSH
11993
        xmlGenericError(xmlGenericErrorContext,
11994
          "PP: entering EOF\n");
11995
#endif
11996
2.62k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11997
2.62k
      ctxt->sax->endDocument(ctxt->userData);
11998
2.62k
        goto done;
11999
86.4k
                } else {
12000
86.4k
        ctxt->instate = XML_PARSER_START_TAG;
12001
#ifdef DEBUG_PUSH
12002
        xmlGenericError(xmlGenericErrorContext,
12003
          "PP: entering START_TAG\n");
12004
#endif
12005
86.4k
    }
12006
159k
    break;
12007
375k
            case XML_PARSER_DTD: {
12008
375k
                if ((!terminate) && (!xmlParseLookupInternalSubset(ctxt)))
12009
338k
                    goto done;
12010
36.9k
    xmlParseInternalSubset(ctxt);
12011
36.9k
    if (ctxt->instate == XML_PARSER_EOF)
12012
16.5k
        goto done;
12013
20.3k
    ctxt->inSubset = 2;
12014
20.3k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
12015
20.3k
        (ctxt->sax->externalSubset != NULL))
12016
19.3k
        ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
12017
19.3k
          ctxt->extSubSystem, ctxt->extSubURI);
12018
20.3k
    ctxt->inSubset = 0;
12019
20.3k
    xmlCleanSpecialAttr(ctxt);
12020
20.3k
    if (ctxt->instate == XML_PARSER_EOF)
12021
552
        goto done;
12022
19.7k
    ctxt->instate = XML_PARSER_PROLOG;
12023
#ifdef DEBUG_PUSH
12024
    xmlGenericError(xmlGenericErrorContext,
12025
      "PP: entering PROLOG\n");
12026
#endif
12027
19.7k
                break;
12028
20.3k
      }
12029
0
            case XML_PARSER_COMMENT:
12030
0
    xmlGenericError(xmlGenericErrorContext,
12031
0
      "PP: internal error, state == COMMENT\n");
12032
0
    ctxt->instate = XML_PARSER_CONTENT;
12033
#ifdef DEBUG_PUSH
12034
    xmlGenericError(xmlGenericErrorContext,
12035
      "PP: entering CONTENT\n");
12036
#endif
12037
0
    break;
12038
0
            case XML_PARSER_IGNORE:
12039
0
    xmlGenericError(xmlGenericErrorContext,
12040
0
      "PP: internal error, state == IGNORE");
12041
0
          ctxt->instate = XML_PARSER_DTD;
12042
#ifdef DEBUG_PUSH
12043
    xmlGenericError(xmlGenericErrorContext,
12044
      "PP: entering DTD\n");
12045
#endif
12046
0
          break;
12047
0
            case XML_PARSER_PI:
12048
0
    xmlGenericError(xmlGenericErrorContext,
12049
0
      "PP: internal error, state == PI\n");
12050
0
    ctxt->instate = XML_PARSER_CONTENT;
12051
#ifdef DEBUG_PUSH
12052
    xmlGenericError(xmlGenericErrorContext,
12053
      "PP: entering CONTENT\n");
12054
#endif
12055
0
    break;
12056
0
            case XML_PARSER_ENTITY_DECL:
12057
0
    xmlGenericError(xmlGenericErrorContext,
12058
0
      "PP: internal error, state == ENTITY_DECL\n");
12059
0
    ctxt->instate = XML_PARSER_DTD;
12060
#ifdef DEBUG_PUSH
12061
    xmlGenericError(xmlGenericErrorContext,
12062
      "PP: entering DTD\n");
12063
#endif
12064
0
    break;
12065
0
            case XML_PARSER_ENTITY_VALUE:
12066
0
    xmlGenericError(xmlGenericErrorContext,
12067
0
      "PP: internal error, state == ENTITY_VALUE\n");
12068
0
    ctxt->instate = XML_PARSER_CONTENT;
12069
#ifdef DEBUG_PUSH
12070
    xmlGenericError(xmlGenericErrorContext,
12071
      "PP: entering DTD\n");
12072
#endif
12073
0
    break;
12074
0
            case XML_PARSER_ATTRIBUTE_VALUE:
12075
0
    xmlGenericError(xmlGenericErrorContext,
12076
0
      "PP: internal error, state == ATTRIBUTE_VALUE\n");
12077
0
    ctxt->instate = XML_PARSER_START_TAG;
12078
#ifdef DEBUG_PUSH
12079
    xmlGenericError(xmlGenericErrorContext,
12080
      "PP: entering START_TAG\n");
12081
#endif
12082
0
    break;
12083
0
            case XML_PARSER_SYSTEM_LITERAL:
12084
0
    xmlGenericError(xmlGenericErrorContext,
12085
0
      "PP: internal error, state == SYSTEM_LITERAL\n");
12086
0
    ctxt->instate = XML_PARSER_START_TAG;
12087
#ifdef DEBUG_PUSH
12088
    xmlGenericError(xmlGenericErrorContext,
12089
      "PP: entering START_TAG\n");
12090
#endif
12091
0
    break;
12092
0
            case XML_PARSER_PUBLIC_LITERAL:
12093
0
    xmlGenericError(xmlGenericErrorContext,
12094
0
      "PP: internal error, state == PUBLIC_LITERAL\n");
12095
0
    ctxt->instate = XML_PARSER_START_TAG;
12096
#ifdef DEBUG_PUSH
12097
    xmlGenericError(xmlGenericErrorContext,
12098
      "PP: entering START_TAG\n");
12099
#endif
12100
0
    break;
12101
106M
  }
12102
106M
    }
12103
3.04M
done:
12104
#ifdef DEBUG_PUSH
12105
    xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12106
#endif
12107
3.04M
    return(ret);
12108
116k
encoding_error:
12109
116k
    {
12110
116k
        char buffer[150];
12111
12112
116k
  snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12113
116k
      ctxt->input->cur[0], ctxt->input->cur[1],
12114
116k
      ctxt->input->cur[2], ctxt->input->cur[3]);
12115
116k
  __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12116
116k
         "Input is not proper UTF-8, indicate encoding !\n%s",
12117
116k
         BAD_CAST buffer, NULL);
12118
116k
    }
12119
116k
    return(0);
12120
3.20M
}
12121
12122
/**
12123
 * xmlParseChunk:
12124
 * @ctxt:  an XML parser context
12125
 * @chunk:  an char array
12126
 * @size:  the size in byte of the chunk
12127
 * @terminate:  last chunk indicator
12128
 *
12129
 * Parse a Chunk of memory
12130
 *
12131
 * Returns zero if no error, the xmlParserErrors otherwise.
12132
 */
12133
int
12134
xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12135
3.63M
              int terminate) {
12136
3.63M
    int end_in_lf = 0;
12137
3.63M
    int remain = 0;
12138
12139
3.63M
    if (ctxt == NULL)
12140
0
        return(XML_ERR_INTERNAL_ERROR);
12141
3.63M
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12142
580k
        return(ctxt->errNo);
12143
3.05M
    if (ctxt->instate == XML_PARSER_EOF)
12144
94
        return(-1);
12145
3.05M
    if (ctxt->input == NULL)
12146
0
        return(-1);
12147
12148
3.05M
    ctxt->progressive = 1;
12149
3.05M
    if (ctxt->instate == XML_PARSER_START)
12150
407k
        xmlDetectSAX2(ctxt);
12151
3.05M
    if ((size > 0) && (chunk != NULL) && (!terminate) &&
12152
3.05M
        (chunk[size - 1] == '\r')) {
12153
28.0k
  end_in_lf = 1;
12154
28.0k
  size--;
12155
28.0k
    }
12156
12157
3.20M
xmldecl_done:
12158
12159
3.20M
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12160
3.20M
        (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF))  {
12161
3.10M
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12162
3.10M
  size_t cur = ctxt->input->cur - ctxt->input->base;
12163
3.10M
  int res;
12164
12165
        /*
12166
         * Specific handling if we autodetected an encoding, we should not
12167
         * push more than the first line ... which depend on the encoding
12168
         * And only push the rest once the final encoding was detected
12169
         */
12170
3.10M
        if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12171
3.10M
            (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
12172
223k
            unsigned int len = 45;
12173
12174
223k
            if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12175
223k
                               BAD_CAST "UTF-16")) ||
12176
223k
                (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12177
69.1k
                               BAD_CAST "UTF16")))
12178
154k
                len = 90;
12179
69.1k
            else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12180
69.1k
                                    BAD_CAST "UCS-4")) ||
12181
69.1k
                     (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12182
68.8k
                                    BAD_CAST "UCS4")))
12183
301
                len = 180;
12184
12185
223k
            if (ctxt->input->buf->rawconsumed < len)
12186
1.60k
                len -= ctxt->input->buf->rawconsumed;
12187
12188
            /*
12189
             * Change size for reading the initial declaration only
12190
             * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12191
             * will blindly copy extra bytes from memory.
12192
             */
12193
223k
            if ((unsigned int) size > len) {
12194
153k
                remain = size - len;
12195
153k
                size = len;
12196
153k
            } else {
12197
69.9k
                remain = 0;
12198
69.9k
            }
12199
223k
        }
12200
3.10M
  res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12201
3.10M
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12202
3.10M
  if (res < 0) {
12203
383
      ctxt->errNo = XML_PARSER_EOF;
12204
383
      xmlHaltParser(ctxt);
12205
383
      return (XML_PARSER_EOF);
12206
383
  }
12207
#ifdef DEBUG_PUSH
12208
  xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12209
#endif
12210
12211
3.10M
    } else if (ctxt->instate != XML_PARSER_EOF) {
12212
103k
  if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12213
103k
      xmlParserInputBufferPtr in = ctxt->input->buf;
12214
103k
      if ((in->encoder != NULL) && (in->buffer != NULL) &&
12215
103k
        (in->raw != NULL)) {
12216
7.70k
    int nbchars;
12217
7.70k
    size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12218
7.70k
    size_t current = ctxt->input->cur - ctxt->input->base;
12219
12220
7.70k
    nbchars = xmlCharEncInput(in, terminate);
12221
7.70k
    xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
12222
7.70k
    if (nbchars < 0) {
12223
        /* TODO 2.6.0 */
12224
204
        xmlGenericError(xmlGenericErrorContext,
12225
204
            "xmlParseChunk: encoder error\n");
12226
204
                    xmlHaltParser(ctxt);
12227
204
        return(XML_ERR_INVALID_ENCODING);
12228
204
    }
12229
7.70k
      }
12230
103k
  }
12231
103k
    }
12232
12233
3.20M
    if (remain != 0) {
12234
153k
        xmlParseTryOrFinish(ctxt, 0);
12235
3.05M
    } else {
12236
3.05M
        xmlParseTryOrFinish(ctxt, terminate);
12237
3.05M
    }
12238
3.20M
    if (ctxt->instate == XML_PARSER_EOF)
12239
35.4k
        return(ctxt->errNo);
12240
12241
3.16M
    if ((ctxt->input != NULL) &&
12242
3.16M
         (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12243
3.16M
         ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12244
3.16M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12245
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12246
0
        xmlHaltParser(ctxt);
12247
0
    }
12248
3.16M
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12249
41.4k
        return(ctxt->errNo);
12250
12251
3.12M
    if (remain != 0) {
12252
153k
        chunk += size;
12253
153k
        size = remain;
12254
153k
        remain = 0;
12255
153k
        goto xmldecl_done;
12256
153k
    }
12257
2.97M
    if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12258
2.97M
        (ctxt->input->buf != NULL)) {
12259
27.9k
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12260
27.9k
           ctxt->input);
12261
27.9k
  size_t current = ctxt->input->cur - ctxt->input->base;
12262
12263
27.9k
  xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12264
12265
27.9k
  xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12266
27.9k
            base, current);
12267
27.9k
    }
12268
2.97M
    if (terminate) {
12269
  /*
12270
   * Check for termination
12271
   */
12272
36.9k
  int cur_avail = 0;
12273
12274
36.9k
  if (ctxt->input != NULL) {
12275
36.9k
      if (ctxt->input->buf == NULL)
12276
0
    cur_avail = ctxt->input->length -
12277
0
          (ctxt->input->cur - ctxt->input->base);
12278
36.9k
      else
12279
36.9k
    cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12280
36.9k
                    (ctxt->input->cur - ctxt->input->base);
12281
36.9k
  }
12282
12283
36.9k
  if ((ctxt->instate != XML_PARSER_EOF) &&
12284
36.9k
      (ctxt->instate != XML_PARSER_EPILOG)) {
12285
21.9k
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12286
21.9k
  }
12287
36.9k
  if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
12288
72
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12289
72
  }
12290
36.9k
  if (ctxt->instate != XML_PARSER_EOF) {
12291
36.9k
      if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12292
36.9k
    ctxt->sax->endDocument(ctxt->userData);
12293
36.9k
  }
12294
36.9k
  ctxt->instate = XML_PARSER_EOF;
12295
36.9k
    }
12296
2.97M
    if (ctxt->wellFormed == 0)
12297
903k
  return((xmlParserErrors) ctxt->errNo);
12298
2.07M
    else
12299
2.07M
        return(0);
12300
2.97M
}
12301
12302
/************************************************************************
12303
 *                  *
12304
 *    I/O front end functions to the parser     *
12305
 *                  *
12306
 ************************************************************************/
12307
12308
/**
12309
 * xmlCreatePushParserCtxt:
12310
 * @sax:  a SAX handler
12311
 * @user_data:  The user data returned on SAX callbacks
12312
 * @chunk:  a pointer to an array of chars
12313
 * @size:  number of chars in the array
12314
 * @filename:  an optional file name or URI
12315
 *
12316
 * Create a parser context for using the XML parser in push mode.
12317
 * If @buffer and @size are non-NULL, the data is used to detect
12318
 * the encoding.  The remaining characters will be parsed so they
12319
 * don't need to be fed in again through xmlParseChunk.
12320
 * To allow content encoding detection, @size should be >= 4
12321
 * The value of @filename is used for fetching external entities
12322
 * and error/warning reports.
12323
 *
12324
 * Returns the new parser context or NULL
12325
 */
12326
12327
xmlParserCtxtPtr
12328
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12329
125k
                        const char *chunk, int size, const char *filename) {
12330
125k
    xmlParserCtxtPtr ctxt;
12331
125k
    xmlParserInputPtr inputStream;
12332
125k
    xmlParserInputBufferPtr buf;
12333
125k
    xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12334
12335
    /*
12336
     * plug some encoding conversion routines
12337
     */
12338
125k
    if ((chunk != NULL) && (size >= 4))
12339
62.5k
  enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12340
12341
125k
    buf = xmlAllocParserInputBuffer(enc);
12342
125k
    if (buf == NULL) return(NULL);
12343
12344
125k
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12345
125k
    if (ctxt == NULL) {
12346
0
        xmlErrMemory(NULL, "creating parser: out of memory\n");
12347
0
  xmlFreeParserInputBuffer(buf);
12348
0
  return(NULL);
12349
0
    }
12350
125k
    ctxt->dictNames = 1;
12351
125k
    if (filename == NULL) {
12352
62.7k
  ctxt->directory = NULL;
12353
62.7k
    } else {
12354
62.7k
        ctxt->directory = xmlParserGetDirectory(filename);
12355
62.7k
    }
12356
12357
125k
    inputStream = xmlNewInputStream(ctxt);
12358
125k
    if (inputStream == NULL) {
12359
0
  xmlFreeParserCtxt(ctxt);
12360
0
  xmlFreeParserInputBuffer(buf);
12361
0
  return(NULL);
12362
0
    }
12363
12364
125k
    if (filename == NULL)
12365
62.7k
  inputStream->filename = NULL;
12366
62.7k
    else {
12367
62.7k
  inputStream->filename = (char *)
12368
62.7k
      xmlCanonicPath((const xmlChar *) filename);
12369
62.7k
  if (inputStream->filename == NULL) {
12370
0
            xmlFreeInputStream(inputStream);
12371
0
      xmlFreeParserCtxt(ctxt);
12372
0
      xmlFreeParserInputBuffer(buf);
12373
0
      return(NULL);
12374
0
  }
12375
62.7k
    }
12376
125k
    inputStream->buf = buf;
12377
125k
    xmlBufResetInput(inputStream->buf->buffer, inputStream);
12378
125k
    inputPush(ctxt, inputStream);
12379
12380
    /*
12381
     * If the caller didn't provide an initial 'chunk' for determining
12382
     * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12383
     * that it can be automatically determined later
12384
     */
12385
125k
    ctxt->charset = XML_CHAR_ENCODING_NONE;
12386
12387
125k
    if ((size != 0) && (chunk != NULL) &&
12388
125k
        (ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12389
62.5k
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12390
62.5k
  size_t cur = ctxt->input->cur - ctxt->input->base;
12391
12392
62.5k
  xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12393
12394
62.5k
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12395
#ifdef DEBUG_PUSH
12396
  xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12397
#endif
12398
62.5k
    }
12399
12400
125k
    if (enc != XML_CHAR_ENCODING_NONE) {
12401
31.4k
        xmlSwitchEncoding(ctxt, enc);
12402
31.4k
    }
12403
12404
125k
    return(ctxt);
12405
125k
}
12406
#endif /* LIBXML_PUSH_ENABLED */
12407
12408
/**
12409
 * xmlHaltParser:
12410
 * @ctxt:  an XML parser context
12411
 *
12412
 * Blocks further parser processing don't override error
12413
 * for internal use
12414
 */
12415
static void
12416
114k
xmlHaltParser(xmlParserCtxtPtr ctxt) {
12417
114k
    if (ctxt == NULL)
12418
0
        return;
12419
114k
    ctxt->instate = XML_PARSER_EOF;
12420
114k
    ctxt->disableSAX = 1;
12421
117k
    while (ctxt->inputNr > 1)
12422
3.00k
        xmlFreeInputStream(inputPop(ctxt));
12423
114k
    if (ctxt->input != NULL) {
12424
        /*
12425
   * in case there was a specific allocation deallocate before
12426
   * overriding base
12427
   */
12428
114k
        if (ctxt->input->free != NULL) {
12429
0
      ctxt->input->free((xmlChar *) ctxt->input->base);
12430
0
      ctxt->input->free = NULL;
12431
0
  }
12432
114k
        if (ctxt->input->buf != NULL) {
12433
97.4k
            xmlFreeParserInputBuffer(ctxt->input->buf);
12434
97.4k
            ctxt->input->buf = NULL;
12435
97.4k
        }
12436
114k
  ctxt->input->cur = BAD_CAST"";
12437
114k
        ctxt->input->length = 0;
12438
114k
  ctxt->input->base = ctxt->input->cur;
12439
114k
        ctxt->input->end = ctxt->input->cur;
12440
114k
    }
12441
114k
}
12442
12443
/**
12444
 * xmlStopParser:
12445
 * @ctxt:  an XML parser context
12446
 *
12447
 * Blocks further parser processing
12448
 */
12449
void
12450
62.8k
xmlStopParser(xmlParserCtxtPtr ctxt) {
12451
62.8k
    if (ctxt == NULL)
12452
0
        return;
12453
62.8k
    xmlHaltParser(ctxt);
12454
62.8k
    ctxt->errNo = XML_ERR_USER_STOP;
12455
62.8k
}
12456
12457
/**
12458
 * xmlCreateIOParserCtxt:
12459
 * @sax:  a SAX handler
12460
 * @user_data:  The user data returned on SAX callbacks
12461
 * @ioread:  an I/O read function
12462
 * @ioclose:  an I/O close function
12463
 * @ioctx:  an I/O handler
12464
 * @enc:  the charset encoding if known
12465
 *
12466
 * Create a parser context for using the XML parser with an existing
12467
 * I/O stream
12468
 *
12469
 * Returns the new parser context or NULL
12470
 */
12471
xmlParserCtxtPtr
12472
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12473
  xmlInputReadCallback   ioread, xmlInputCloseCallback  ioclose,
12474
0
  void *ioctx, xmlCharEncoding enc) {
12475
0
    xmlParserCtxtPtr ctxt;
12476
0
    xmlParserInputPtr inputStream;
12477
0
    xmlParserInputBufferPtr buf;
12478
12479
0
    if (ioread == NULL) return(NULL);
12480
12481
0
    buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12482
0
    if (buf == NULL) {
12483
0
        if (ioclose != NULL)
12484
0
            ioclose(ioctx);
12485
0
        return (NULL);
12486
0
    }
12487
12488
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12489
0
    if (ctxt == NULL) {
12490
0
  xmlFreeParserInputBuffer(buf);
12491
0
  return(NULL);
12492
0
    }
12493
12494
0
    inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12495
0
    if (inputStream == NULL) {
12496
0
  xmlFreeParserCtxt(ctxt);
12497
0
  return(NULL);
12498
0
    }
12499
0
    inputPush(ctxt, inputStream);
12500
12501
0
    return(ctxt);
12502
0
}
12503
12504
#ifdef LIBXML_VALID_ENABLED
12505
/************************************************************************
12506
 *                  *
12507
 *    Front ends when parsing a DTD       *
12508
 *                  *
12509
 ************************************************************************/
12510
12511
/**
12512
 * xmlIOParseDTD:
12513
 * @sax:  the SAX handler block or NULL
12514
 * @input:  an Input Buffer
12515
 * @enc:  the charset encoding if known
12516
 *
12517
 * Load and parse a DTD
12518
 *
12519
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12520
 * @input will be freed by the function in any case.
12521
 */
12522
12523
xmlDtdPtr
12524
xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12525
0
        xmlCharEncoding enc) {
12526
0
    xmlDtdPtr ret = NULL;
12527
0
    xmlParserCtxtPtr ctxt;
12528
0
    xmlParserInputPtr pinput = NULL;
12529
0
    xmlChar start[4];
12530
12531
0
    if (input == NULL)
12532
0
  return(NULL);
12533
12534
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12535
0
    if (ctxt == NULL) {
12536
0
        xmlFreeParserInputBuffer(input);
12537
0
  return(NULL);
12538
0
    }
12539
12540
    /* We are loading a DTD */
12541
0
    ctxt->options |= XML_PARSE_DTDLOAD;
12542
12543
0
    xmlDetectSAX2(ctxt);
12544
12545
    /*
12546
     * generate a parser input from the I/O handler
12547
     */
12548
12549
0
    pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12550
0
    if (pinput == NULL) {
12551
0
        xmlFreeParserInputBuffer(input);
12552
0
  xmlFreeParserCtxt(ctxt);
12553
0
  return(NULL);
12554
0
    }
12555
12556
    /*
12557
     * plug some encoding conversion routines here.
12558
     */
12559
0
    if (xmlPushInput(ctxt, pinput) < 0) {
12560
0
  xmlFreeParserCtxt(ctxt);
12561
0
  return(NULL);
12562
0
    }
12563
0
    if (enc != XML_CHAR_ENCODING_NONE) {
12564
0
        xmlSwitchEncoding(ctxt, enc);
12565
0
    }
12566
12567
0
    pinput->filename = NULL;
12568
0
    pinput->line = 1;
12569
0
    pinput->col = 1;
12570
0
    pinput->base = ctxt->input->cur;
12571
0
    pinput->cur = ctxt->input->cur;
12572
0
    pinput->free = NULL;
12573
12574
    /*
12575
     * let's parse that entity knowing it's an external subset.
12576
     */
12577
0
    ctxt->inSubset = 2;
12578
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12579
0
    if (ctxt->myDoc == NULL) {
12580
0
  xmlErrMemory(ctxt, "New Doc failed");
12581
0
  return(NULL);
12582
0
    }
12583
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12584
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12585
0
                                 BAD_CAST "none", BAD_CAST "none");
12586
12587
0
    if ((enc == XML_CHAR_ENCODING_NONE) &&
12588
0
        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12589
  /*
12590
   * Get the 4 first bytes and decode the charset
12591
   * if enc != XML_CHAR_ENCODING_NONE
12592
   * plug some encoding conversion routines.
12593
   */
12594
0
  start[0] = RAW;
12595
0
  start[1] = NXT(1);
12596
0
  start[2] = NXT(2);
12597
0
  start[3] = NXT(3);
12598
0
  enc = xmlDetectCharEncoding(start, 4);
12599
0
  if (enc != XML_CHAR_ENCODING_NONE) {
12600
0
      xmlSwitchEncoding(ctxt, enc);
12601
0
  }
12602
0
    }
12603
12604
0
    xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12605
12606
0
    if (ctxt->myDoc != NULL) {
12607
0
  if (ctxt->wellFormed) {
12608
0
      ret = ctxt->myDoc->extSubset;
12609
0
      ctxt->myDoc->extSubset = NULL;
12610
0
      if (ret != NULL) {
12611
0
    xmlNodePtr tmp;
12612
12613
0
    ret->doc = NULL;
12614
0
    tmp = ret->children;
12615
0
    while (tmp != NULL) {
12616
0
        tmp->doc = NULL;
12617
0
        tmp = tmp->next;
12618
0
    }
12619
0
      }
12620
0
  } else {
12621
0
      ret = NULL;
12622
0
  }
12623
0
        xmlFreeDoc(ctxt->myDoc);
12624
0
        ctxt->myDoc = NULL;
12625
0
    }
12626
0
    xmlFreeParserCtxt(ctxt);
12627
12628
0
    return(ret);
12629
0
}
12630
12631
/**
12632
 * xmlSAXParseDTD:
12633
 * @sax:  the SAX handler block
12634
 * @ExternalID:  a NAME* containing the External ID of the DTD
12635
 * @SystemID:  a NAME* containing the URL to the DTD
12636
 *
12637
 * DEPRECATED: Don't use.
12638
 *
12639
 * Load and parse an external subset.
12640
 *
12641
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12642
 */
12643
12644
xmlDtdPtr
12645
xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12646
0
                          const xmlChar *SystemID) {
12647
0
    xmlDtdPtr ret = NULL;
12648
0
    xmlParserCtxtPtr ctxt;
12649
0
    xmlParserInputPtr input = NULL;
12650
0
    xmlCharEncoding enc;
12651
0
    xmlChar* systemIdCanonic;
12652
12653
0
    if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12654
12655
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12656
0
    if (ctxt == NULL) {
12657
0
  return(NULL);
12658
0
    }
12659
12660
    /* We are loading a DTD */
12661
0
    ctxt->options |= XML_PARSE_DTDLOAD;
12662
12663
    /*
12664
     * Canonicalise the system ID
12665
     */
12666
0
    systemIdCanonic = xmlCanonicPath(SystemID);
12667
0
    if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12668
0
  xmlFreeParserCtxt(ctxt);
12669
0
  return(NULL);
12670
0
    }
12671
12672
    /*
12673
     * Ask the Entity resolver to load the damn thing
12674
     */
12675
12676
0
    if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12677
0
  input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12678
0
                                   systemIdCanonic);
12679
0
    if (input == NULL) {
12680
0
  xmlFreeParserCtxt(ctxt);
12681
0
  if (systemIdCanonic != NULL)
12682
0
      xmlFree(systemIdCanonic);
12683
0
  return(NULL);
12684
0
    }
12685
12686
    /*
12687
     * plug some encoding conversion routines here.
12688
     */
12689
0
    if (xmlPushInput(ctxt, input) < 0) {
12690
0
  xmlFreeParserCtxt(ctxt);
12691
0
  if (systemIdCanonic != NULL)
12692
0
      xmlFree(systemIdCanonic);
12693
0
  return(NULL);
12694
0
    }
12695
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12696
0
  enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12697
0
  xmlSwitchEncoding(ctxt, enc);
12698
0
    }
12699
12700
0
    if (input->filename == NULL)
12701
0
  input->filename = (char *) systemIdCanonic;
12702
0
    else
12703
0
  xmlFree(systemIdCanonic);
12704
0
    input->line = 1;
12705
0
    input->col = 1;
12706
0
    input->base = ctxt->input->cur;
12707
0
    input->cur = ctxt->input->cur;
12708
0
    input->free = NULL;
12709
12710
    /*
12711
     * let's parse that entity knowing it's an external subset.
12712
     */
12713
0
    ctxt->inSubset = 2;
12714
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12715
0
    if (ctxt->myDoc == NULL) {
12716
0
  xmlErrMemory(ctxt, "New Doc failed");
12717
0
  xmlFreeParserCtxt(ctxt);
12718
0
  return(NULL);
12719
0
    }
12720
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12721
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12722
0
                                 ExternalID, SystemID);
12723
0
    xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12724
12725
0
    if (ctxt->myDoc != NULL) {
12726
0
  if (ctxt->wellFormed) {
12727
0
      ret = ctxt->myDoc->extSubset;
12728
0
      ctxt->myDoc->extSubset = NULL;
12729
0
      if (ret != NULL) {
12730
0
    xmlNodePtr tmp;
12731
12732
0
    ret->doc = NULL;
12733
0
    tmp = ret->children;
12734
0
    while (tmp != NULL) {
12735
0
        tmp->doc = NULL;
12736
0
        tmp = tmp->next;
12737
0
    }
12738
0
      }
12739
0
  } else {
12740
0
      ret = NULL;
12741
0
  }
12742
0
        xmlFreeDoc(ctxt->myDoc);
12743
0
        ctxt->myDoc = NULL;
12744
0
    }
12745
0
    xmlFreeParserCtxt(ctxt);
12746
12747
0
    return(ret);
12748
0
}
12749
12750
12751
/**
12752
 * xmlParseDTD:
12753
 * @ExternalID:  a NAME* containing the External ID of the DTD
12754
 * @SystemID:  a NAME* containing the URL to the DTD
12755
 *
12756
 * Load and parse an external subset.
12757
 *
12758
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12759
 */
12760
12761
xmlDtdPtr
12762
0
xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12763
0
    return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12764
0
}
12765
#endif /* LIBXML_VALID_ENABLED */
12766
12767
/************************************************************************
12768
 *                  *
12769
 *    Front ends when parsing an Entity     *
12770
 *                  *
12771
 ************************************************************************/
12772
12773
/**
12774
 * xmlParseCtxtExternalEntity:
12775
 * @ctx:  the existing parsing context
12776
 * @URL:  the URL for the entity to load
12777
 * @ID:  the System ID for the entity to load
12778
 * @lst:  the return value for the set of parsed nodes
12779
 *
12780
 * Parse an external general entity within an existing parsing context
12781
 * An external general parsed entity is well-formed if it matches the
12782
 * production labeled extParsedEnt.
12783
 *
12784
 * [78] extParsedEnt ::= TextDecl? content
12785
 *
12786
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12787
 *    the parser error code otherwise
12788
 */
12789
12790
int
12791
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12792
0
                 const xmlChar *ID, xmlNodePtr *lst) {
12793
0
    void *userData;
12794
12795
0
    if (ctx == NULL) return(-1);
12796
    /*
12797
     * If the user provided their own SAX callbacks, then reuse the
12798
     * userData callback field, otherwise the expected setup in a
12799
     * DOM builder is to have userData == ctxt
12800
     */
12801
0
    if (ctx->userData == ctx)
12802
0
        userData = NULL;
12803
0
    else
12804
0
        userData = ctx->userData;
12805
0
    return xmlParseExternalEntityPrivate(ctx->myDoc, ctx, ctx->sax,
12806
0
                                         userData, ctx->depth + 1,
12807
0
                                         URL, ID, lst);
12808
0
}
12809
12810
/**
12811
 * xmlParseExternalEntityPrivate:
12812
 * @doc:  the document the chunk pertains to
12813
 * @oldctxt:  the previous parser context if available
12814
 * @sax:  the SAX handler block (possibly NULL)
12815
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12816
 * @depth:  Used for loop detection, use 0
12817
 * @URL:  the URL for the entity to load
12818
 * @ID:  the System ID for the entity to load
12819
 * @list:  the return value for the set of parsed nodes
12820
 *
12821
 * Private version of xmlParseExternalEntity()
12822
 *
12823
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12824
 *    the parser error code otherwise
12825
 */
12826
12827
static xmlParserErrors
12828
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12829
                xmlSAXHandlerPtr sax,
12830
          void *user_data, int depth, const xmlChar *URL,
12831
18.7k
          const xmlChar *ID, xmlNodePtr *list) {
12832
18.7k
    xmlParserCtxtPtr ctxt;
12833
18.7k
    xmlDocPtr newDoc;
12834
18.7k
    xmlNodePtr newRoot;
12835
18.7k
    xmlParserErrors ret = XML_ERR_OK;
12836
18.7k
    xmlChar start[4];
12837
18.7k
    xmlCharEncoding enc;
12838
12839
18.7k
    if (((depth > 40) &&
12840
18.7k
  ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12841
18.7k
  (depth > 100)) {
12842
0
  xmlFatalErrMsg(oldctxt, XML_ERR_ENTITY_LOOP,
12843
0
                       "Maximum entity nesting depth exceeded");
12844
0
        return(XML_ERR_ENTITY_LOOP);
12845
0
    }
12846
12847
18.7k
    if (list != NULL)
12848
5.11k
        *list = NULL;
12849
18.7k
    if ((URL == NULL) && (ID == NULL))
12850
36
  return(XML_ERR_INTERNAL_ERROR);
12851
18.6k
    if (doc == NULL)
12852
0
  return(XML_ERR_INTERNAL_ERROR);
12853
12854
18.6k
    ctxt = xmlCreateEntityParserCtxtInternal(sax, user_data, URL, ID, NULL,
12855
18.6k
                                             oldctxt);
12856
18.6k
    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
12857
4.07k
    if (oldctxt != NULL) {
12858
4.07k
        ctxt->nbErrors = oldctxt->nbErrors;
12859
4.07k
        ctxt->nbWarnings = oldctxt->nbWarnings;
12860
4.07k
    }
12861
4.07k
    xmlDetectSAX2(ctxt);
12862
12863
4.07k
    newDoc = xmlNewDoc(BAD_CAST "1.0");
12864
4.07k
    if (newDoc == NULL) {
12865
0
  xmlFreeParserCtxt(ctxt);
12866
0
  return(XML_ERR_INTERNAL_ERROR);
12867
0
    }
12868
4.07k
    newDoc->properties = XML_DOC_INTERNAL;
12869
4.07k
    if (doc) {
12870
4.07k
        newDoc->intSubset = doc->intSubset;
12871
4.07k
        newDoc->extSubset = doc->extSubset;
12872
4.07k
        if (doc->dict) {
12873
3.29k
            newDoc->dict = doc->dict;
12874
3.29k
            xmlDictReference(newDoc->dict);
12875
3.29k
        }
12876
4.07k
        if (doc->URL != NULL) {
12877
2.63k
            newDoc->URL = xmlStrdup(doc->URL);
12878
2.63k
        }
12879
4.07k
    }
12880
4.07k
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12881
4.07k
    if (newRoot == NULL) {
12882
0
  if (sax != NULL)
12883
0
  xmlFreeParserCtxt(ctxt);
12884
0
  newDoc->intSubset = NULL;
12885
0
  newDoc->extSubset = NULL;
12886
0
        xmlFreeDoc(newDoc);
12887
0
  return(XML_ERR_INTERNAL_ERROR);
12888
0
    }
12889
4.07k
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
12890
4.07k
    nodePush(ctxt, newDoc->children);
12891
4.07k
    if (doc == NULL) {
12892
0
        ctxt->myDoc = newDoc;
12893
4.07k
    } else {
12894
4.07k
        ctxt->myDoc = doc;
12895
4.07k
        newRoot->doc = doc;
12896
4.07k
    }
12897
12898
    /*
12899
     * Get the 4 first bytes and decode the charset
12900
     * if enc != XML_CHAR_ENCODING_NONE
12901
     * plug some encoding conversion routines.
12902
     */
12903
4.07k
    GROW;
12904
4.07k
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12905
3.90k
  start[0] = RAW;
12906
3.90k
  start[1] = NXT(1);
12907
3.90k
  start[2] = NXT(2);
12908
3.90k
  start[3] = NXT(3);
12909
3.90k
  enc = xmlDetectCharEncoding(start, 4);
12910
3.90k
  if (enc != XML_CHAR_ENCODING_NONE) {
12911
628
      xmlSwitchEncoding(ctxt, enc);
12912
628
  }
12913
3.90k
    }
12914
12915
    /*
12916
     * Parse a possible text declaration first
12917
     */
12918
4.07k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12919
435
  xmlParseTextDecl(ctxt);
12920
        /*
12921
         * An XML-1.0 document can't reference an entity not XML-1.0
12922
         */
12923
435
        if ((xmlStrEqual(oldctxt->version, BAD_CAST "1.0")) &&
12924
435
            (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12925
46
            xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12926
46
                           "Version mismatch between document and entity\n");
12927
46
        }
12928
435
    }
12929
12930
4.07k
    ctxt->instate = XML_PARSER_CONTENT;
12931
4.07k
    ctxt->depth = depth;
12932
4.07k
    if (oldctxt != NULL) {
12933
4.07k
  ctxt->_private = oldctxt->_private;
12934
4.07k
  ctxt->loadsubset = oldctxt->loadsubset;
12935
4.07k
  ctxt->validate = oldctxt->validate;
12936
4.07k
  ctxt->valid = oldctxt->valid;
12937
4.07k
  ctxt->replaceEntities = oldctxt->replaceEntities;
12938
4.07k
        if (oldctxt->validate) {
12939
809
            ctxt->vctxt.error = oldctxt->vctxt.error;
12940
809
            ctxt->vctxt.warning = oldctxt->vctxt.warning;
12941
809
            ctxt->vctxt.userData = oldctxt->vctxt.userData;
12942
809
            ctxt->vctxt.flags = oldctxt->vctxt.flags;
12943
809
        }
12944
4.07k
  ctxt->external = oldctxt->external;
12945
4.07k
        if (ctxt->dict) xmlDictFree(ctxt->dict);
12946
4.07k
        ctxt->dict = oldctxt->dict;
12947
4.07k
        ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12948
4.07k
        ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12949
4.07k
        ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12950
4.07k
        ctxt->dictNames = oldctxt->dictNames;
12951
4.07k
        ctxt->attsDefault = oldctxt->attsDefault;
12952
4.07k
        ctxt->attsSpecial = oldctxt->attsSpecial;
12953
4.07k
        ctxt->linenumbers = oldctxt->linenumbers;
12954
4.07k
  ctxt->record_info = oldctxt->record_info;
12955
4.07k
  ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
12956
4.07k
  ctxt->node_seq.length = oldctxt->node_seq.length;
12957
4.07k
  ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
12958
4.07k
    } else {
12959
  /*
12960
   * Doing validity checking on chunk without context
12961
   * doesn't make sense
12962
   */
12963
0
  ctxt->_private = NULL;
12964
0
  ctxt->validate = 0;
12965
0
  ctxt->external = 2;
12966
0
  ctxt->loadsubset = 0;
12967
0
    }
12968
12969
4.07k
    xmlParseContent(ctxt);
12970
12971
4.07k
    if ((RAW == '<') && (NXT(1) == '/')) {
12972
51
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12973
4.02k
    } else if (RAW != 0) {
12974
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12975
0
    }
12976
4.07k
    if (ctxt->node != newDoc->children) {
12977
409
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12978
409
    }
12979
12980
4.07k
    if (!ctxt->wellFormed) {
12981
2.26k
  ret = (xmlParserErrors)ctxt->errNo;
12982
2.26k
        if (oldctxt != NULL) {
12983
2.26k
            oldctxt->errNo = ctxt->errNo;
12984
2.26k
            oldctxt->wellFormed = 0;
12985
2.26k
            xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12986
2.26k
        }
12987
2.26k
    } else {
12988
1.81k
  if (list != NULL) {
12989
915
      xmlNodePtr cur;
12990
12991
      /*
12992
       * Return the newly created nodeset after unlinking it from
12993
       * they pseudo parent.
12994
       */
12995
915
      cur = newDoc->children->children;
12996
915
      *list = cur;
12997
2.42k
      while (cur != NULL) {
12998
1.51k
    cur->parent = NULL;
12999
1.51k
    cur = cur->next;
13000
1.51k
      }
13001
915
            newDoc->children->children = NULL;
13002
915
  }
13003
1.81k
  ret = XML_ERR_OK;
13004
1.81k
    }
13005
13006
    /*
13007
     * Also record the size of the entity parsed
13008
     */
13009
4.07k
    if (ctxt->input != NULL && oldctxt != NULL) {
13010
4.07k
        unsigned long consumed = ctxt->input->consumed;
13011
13012
4.07k
        xmlSaturatedAddSizeT(&consumed, ctxt->input->cur - ctxt->input->base);
13013
13014
4.07k
        xmlSaturatedAdd(&oldctxt->sizeentities, consumed);
13015
4.07k
        xmlSaturatedAdd(&oldctxt->sizeentities, ctxt->sizeentities);
13016
13017
4.07k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, consumed);
13018
4.07k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, ctxt->sizeentcopy);
13019
4.07k
    }
13020
13021
4.07k
    if (oldctxt != NULL) {
13022
4.07k
        ctxt->dict = NULL;
13023
4.07k
        ctxt->attsDefault = NULL;
13024
4.07k
        ctxt->attsSpecial = NULL;
13025
4.07k
        oldctxt->nbErrors = ctxt->nbErrors;
13026
4.07k
        oldctxt->nbWarnings = ctxt->nbWarnings;
13027
4.07k
        oldctxt->validate = ctxt->validate;
13028
4.07k
        oldctxt->valid = ctxt->valid;
13029
4.07k
        oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13030
4.07k
        oldctxt->node_seq.length = ctxt->node_seq.length;
13031
4.07k
        oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13032
4.07k
    }
13033
4.07k
    ctxt->node_seq.maximum = 0;
13034
4.07k
    ctxt->node_seq.length = 0;
13035
4.07k
    ctxt->node_seq.buffer = NULL;
13036
4.07k
    xmlFreeParserCtxt(ctxt);
13037
4.07k
    newDoc->intSubset = NULL;
13038
4.07k
    newDoc->extSubset = NULL;
13039
4.07k
    xmlFreeDoc(newDoc);
13040
13041
4.07k
    return(ret);
13042
4.07k
}
13043
13044
#ifdef LIBXML_SAX1_ENABLED
13045
/**
13046
 * xmlParseExternalEntity:
13047
 * @doc:  the document the chunk pertains to
13048
 * @sax:  the SAX handler block (possibly NULL)
13049
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13050
 * @depth:  Used for loop detection, use 0
13051
 * @URL:  the URL for the entity to load
13052
 * @ID:  the System ID for the entity to load
13053
 * @lst:  the return value for the set of parsed nodes
13054
 *
13055
 * Parse an external general entity
13056
 * An external general parsed entity is well-formed if it matches the
13057
 * production labeled extParsedEnt.
13058
 *
13059
 * [78] extParsedEnt ::= TextDecl? content
13060
 *
13061
 * Returns 0 if the entity is well formed, -1 in case of args problem and
13062
 *    the parser error code otherwise
13063
 */
13064
13065
int
13066
xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
13067
0
    int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
13068
0
    return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
13069
0
                           ID, lst));
13070
0
}
13071
13072
/**
13073
 * xmlParseBalancedChunkMemory:
13074
 * @doc:  the document the chunk pertains to (must not be NULL)
13075
 * @sax:  the SAX handler block (possibly NULL)
13076
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13077
 * @depth:  Used for loop detection, use 0
13078
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13079
 * @lst:  the return value for the set of parsed nodes
13080
 *
13081
 * Parse a well-balanced chunk of an XML document
13082
 * called by the parser
13083
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13084
 * the content production in the XML grammar:
13085
 *
13086
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13087
 *
13088
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13089
 *    the parser error code otherwise
13090
 */
13091
13092
int
13093
xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13094
0
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
13095
0
    return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13096
0
                                                depth, string, lst, 0 );
13097
0
}
13098
#endif /* LIBXML_SAX1_ENABLED */
13099
13100
/**
13101
 * xmlParseBalancedChunkMemoryInternal:
13102
 * @oldctxt:  the existing parsing context
13103
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13104
 * @user_data:  the user data field for the parser context
13105
 * @lst:  the return value for the set of parsed nodes
13106
 *
13107
 *
13108
 * Parse a well-balanced chunk of an XML document
13109
 * called by the parser
13110
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13111
 * the content production in the XML grammar:
13112
 *
13113
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13114
 *
13115
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13116
 * error code otherwise
13117
 *
13118
 * In case recover is set to 1, the nodelist will not be empty even if
13119
 * the parsed chunk is not well balanced.
13120
 */
13121
static xmlParserErrors
13122
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13123
13.6k
  const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13124
13.6k
    xmlParserCtxtPtr ctxt;
13125
13.6k
    xmlDocPtr newDoc = NULL;
13126
13.6k
    xmlNodePtr newRoot;
13127
13.6k
    xmlSAXHandlerPtr oldsax = NULL;
13128
13.6k
    xmlNodePtr content = NULL;
13129
13.6k
    xmlNodePtr last = NULL;
13130
13.6k
    int size;
13131
13.6k
    xmlParserErrors ret = XML_ERR_OK;
13132
13.6k
#ifdef SAX2
13133
13.6k
    int i;
13134
13.6k
#endif
13135
13136
13.6k
    if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13137
13.6k
        (oldctxt->depth >  100)) {
13138
60
  xmlFatalErrMsg(oldctxt, XML_ERR_ENTITY_LOOP,
13139
60
                       "Maximum entity nesting depth exceeded");
13140
60
  return(XML_ERR_ENTITY_LOOP);
13141
60
    }
13142
13143
13144
13.6k
    if (lst != NULL)
13145
13.6k
        *lst = NULL;
13146
13.6k
    if (string == NULL)
13147
18
        return(XML_ERR_INTERNAL_ERROR);
13148
13149
13.5k
    size = xmlStrlen(string);
13150
13151
13.5k
    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13152
13.5k
    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13153
13.5k
    ctxt->nbErrors = oldctxt->nbErrors;
13154
13.5k
    ctxt->nbWarnings = oldctxt->nbWarnings;
13155
13.5k
    if (user_data != NULL)
13156
0
  ctxt->userData = user_data;
13157
13.5k
    else
13158
13.5k
  ctxt->userData = ctxt;
13159
13.5k
    if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13160
13.5k
    ctxt->dict = oldctxt->dict;
13161
13.5k
    ctxt->input_id = oldctxt->input_id;
13162
13.5k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13163
13.5k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13164
13.5k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13165
13166
13.5k
#ifdef SAX2
13167
    /* propagate namespaces down the entity */
13168
13.5k
    for (i = 0;i < oldctxt->nsNr;i += 2) {
13169
3
        nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13170
3
    }
13171
13.5k
#endif
13172
13173
13.5k
    oldsax = ctxt->sax;
13174
13.5k
    ctxt->sax = oldctxt->sax;
13175
13.5k
    xmlDetectSAX2(ctxt);
13176
13.5k
    ctxt->replaceEntities = oldctxt->replaceEntities;
13177
13.5k
    ctxt->options = oldctxt->options;
13178
13179
13.5k
    ctxt->_private = oldctxt->_private;
13180
13.5k
    if (oldctxt->myDoc == NULL) {
13181
0
  newDoc = xmlNewDoc(BAD_CAST "1.0");
13182
0
  if (newDoc == NULL) {
13183
0
      ctxt->sax = oldsax;
13184
0
      ctxt->dict = NULL;
13185
0
      xmlFreeParserCtxt(ctxt);
13186
0
      return(XML_ERR_INTERNAL_ERROR);
13187
0
  }
13188
0
  newDoc->properties = XML_DOC_INTERNAL;
13189
0
  newDoc->dict = ctxt->dict;
13190
0
  xmlDictReference(newDoc->dict);
13191
0
  ctxt->myDoc = newDoc;
13192
13.5k
    } else {
13193
13.5k
  ctxt->myDoc = oldctxt->myDoc;
13194
13.5k
        content = ctxt->myDoc->children;
13195
13.5k
  last = ctxt->myDoc->last;
13196
13.5k
    }
13197
13.5k
    newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13198
13.5k
    if (newRoot == NULL) {
13199
0
  ctxt->sax = oldsax;
13200
0
  ctxt->dict = NULL;
13201
0
  xmlFreeParserCtxt(ctxt);
13202
0
  if (newDoc != NULL) {
13203
0
      xmlFreeDoc(newDoc);
13204
0
  }
13205
0
  return(XML_ERR_INTERNAL_ERROR);
13206
0
    }
13207
13.5k
    ctxt->myDoc->children = NULL;
13208
13.5k
    ctxt->myDoc->last = NULL;
13209
13.5k
    xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13210
13.5k
    nodePush(ctxt, ctxt->myDoc->children);
13211
13.5k
    ctxt->instate = XML_PARSER_CONTENT;
13212
13.5k
    ctxt->depth = oldctxt->depth;
13213
13214
13.5k
    ctxt->validate = 0;
13215
13.5k
    ctxt->loadsubset = oldctxt->loadsubset;
13216
13.5k
    if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13217
  /*
13218
   * ID/IDREF registration will be done in xmlValidateElement below
13219
   */
13220
13.0k
  ctxt->loadsubset |= XML_SKIP_IDS;
13221
13.0k
    }
13222
13.5k
    ctxt->dictNames = oldctxt->dictNames;
13223
13.5k
    ctxt->attsDefault = oldctxt->attsDefault;
13224
13.5k
    ctxt->attsSpecial = oldctxt->attsSpecial;
13225
13226
13.5k
    xmlParseContent(ctxt);
13227
13.5k
    if ((RAW == '<') && (NXT(1) == '/')) {
13228
33
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13229
13.5k
    } else if (RAW != 0) {
13230
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13231
0
    }
13232
13.5k
    if (ctxt->node != ctxt->myDoc->children) {
13233
168
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13234
168
    }
13235
13236
13.5k
    if (!ctxt->wellFormed) {
13237
2.95k
  ret = (xmlParserErrors)ctxt->errNo;
13238
2.95k
        oldctxt->errNo = ctxt->errNo;
13239
2.95k
        oldctxt->wellFormed = 0;
13240
2.95k
        xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13241
10.5k
    } else {
13242
10.5k
        ret = XML_ERR_OK;
13243
10.5k
    }
13244
13245
13.5k
    if ((lst != NULL) && (ret == XML_ERR_OK)) {
13246
10.5k
  xmlNodePtr cur;
13247
13248
  /*
13249
   * Return the newly created nodeset after unlinking it from
13250
   * they pseudo parent.
13251
   */
13252
10.5k
  cur = ctxt->myDoc->children->children;
13253
10.5k
  *lst = cur;
13254
24.5k
  while (cur != NULL) {
13255
13.9k
#ifdef LIBXML_VALID_ENABLED
13256
13.9k
      if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13257
13.9k
    (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13258
13.9k
    (cur->type == XML_ELEMENT_NODE)) {
13259
447
    oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13260
447
      oldctxt->myDoc, cur);
13261
447
      }
13262
13.9k
#endif /* LIBXML_VALID_ENABLED */
13263
13.9k
      cur->parent = NULL;
13264
13.9k
      cur = cur->next;
13265
13.9k
  }
13266
10.5k
  ctxt->myDoc->children->children = NULL;
13267
10.5k
    }
13268
13.5k
    if (ctxt->myDoc != NULL) {
13269
13.5k
  xmlFreeNode(ctxt->myDoc->children);
13270
13.5k
        ctxt->myDoc->children = content;
13271
13.5k
        ctxt->myDoc->last = last;
13272
13.5k
    }
13273
13274
    /*
13275
     * Also record the size of the entity parsed
13276
     */
13277
13.5k
    if (ctxt->input != NULL && oldctxt != NULL) {
13278
13.5k
        unsigned long consumed = ctxt->input->consumed;
13279
13280
13.5k
        xmlSaturatedAddSizeT(&consumed, ctxt->input->cur - ctxt->input->base);
13281
13282
13.5k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, consumed);
13283
13.5k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, ctxt->sizeentcopy);
13284
13.5k
    }
13285
13286
13.5k
    oldctxt->nbErrors = ctxt->nbErrors;
13287
13.5k
    oldctxt->nbWarnings = ctxt->nbWarnings;
13288
13.5k
    ctxt->sax = oldsax;
13289
13.5k
    ctxt->dict = NULL;
13290
13.5k
    ctxt->attsDefault = NULL;
13291
13.5k
    ctxt->attsSpecial = NULL;
13292
13.5k
    xmlFreeParserCtxt(ctxt);
13293
13.5k
    if (newDoc != NULL) {
13294
0
  xmlFreeDoc(newDoc);
13295
0
    }
13296
13297
13.5k
    return(ret);
13298
13.5k
}
13299
13300
/**
13301
 * xmlParseInNodeContext:
13302
 * @node:  the context node
13303
 * @data:  the input string
13304
 * @datalen:  the input string length in bytes
13305
 * @options:  a combination of xmlParserOption
13306
 * @lst:  the return value for the set of parsed nodes
13307
 *
13308
 * Parse a well-balanced chunk of an XML document
13309
 * within the context (DTD, namespaces, etc ...) of the given node.
13310
 *
13311
 * The allowed sequence for the data is a Well Balanced Chunk defined by
13312
 * the content production in the XML grammar:
13313
 *
13314
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13315
 *
13316
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13317
 * error code otherwise
13318
 */
13319
xmlParserErrors
13320
xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13321
0
                      int options, xmlNodePtr *lst) {
13322
0
#ifdef SAX2
13323
0
    xmlParserCtxtPtr ctxt;
13324
0
    xmlDocPtr doc = NULL;
13325
0
    xmlNodePtr fake, cur;
13326
0
    int nsnr = 0;
13327
13328
0
    xmlParserErrors ret = XML_ERR_OK;
13329
13330
    /*
13331
     * check all input parameters, grab the document
13332
     */
13333
0
    if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13334
0
        return(XML_ERR_INTERNAL_ERROR);
13335
0
    switch (node->type) {
13336
0
        case XML_ELEMENT_NODE:
13337
0
        case XML_ATTRIBUTE_NODE:
13338
0
        case XML_TEXT_NODE:
13339
0
        case XML_CDATA_SECTION_NODE:
13340
0
        case XML_ENTITY_REF_NODE:
13341
0
        case XML_PI_NODE:
13342
0
        case XML_COMMENT_NODE:
13343
0
        case XML_DOCUMENT_NODE:
13344
0
        case XML_HTML_DOCUMENT_NODE:
13345
0
      break;
13346
0
  default:
13347
0
      return(XML_ERR_INTERNAL_ERROR);
13348
13349
0
    }
13350
0
    while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13351
0
           (node->type != XML_DOCUMENT_NODE) &&
13352
0
     (node->type != XML_HTML_DOCUMENT_NODE))
13353
0
  node = node->parent;
13354
0
    if (node == NULL)
13355
0
  return(XML_ERR_INTERNAL_ERROR);
13356
0
    if (node->type == XML_ELEMENT_NODE)
13357
0
  doc = node->doc;
13358
0
    else
13359
0
        doc = (xmlDocPtr) node;
13360
0
    if (doc == NULL)
13361
0
  return(XML_ERR_INTERNAL_ERROR);
13362
13363
    /*
13364
     * allocate a context and set-up everything not related to the
13365
     * node position in the tree
13366
     */
13367
0
    if (doc->type == XML_DOCUMENT_NODE)
13368
0
  ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13369
0
#ifdef LIBXML_HTML_ENABLED
13370
0
    else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13371
0
  ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13372
        /*
13373
         * When parsing in context, it makes no sense to add implied
13374
         * elements like html/body/etc...
13375
         */
13376
0
        options |= HTML_PARSE_NOIMPLIED;
13377
0
    }
13378
0
#endif
13379
0
    else
13380
0
        return(XML_ERR_INTERNAL_ERROR);
13381
13382
0
    if (ctxt == NULL)
13383
0
        return(XML_ERR_NO_MEMORY);
13384
13385
    /*
13386
     * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13387
     * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13388
     * we must wait until the last moment to free the original one.
13389
     */
13390
0
    if (doc->dict != NULL) {
13391
0
        if (ctxt->dict != NULL)
13392
0
      xmlDictFree(ctxt->dict);
13393
0
  ctxt->dict = doc->dict;
13394
0
    } else
13395
0
        options |= XML_PARSE_NODICT;
13396
13397
0
    if (doc->encoding != NULL) {
13398
0
        xmlCharEncodingHandlerPtr hdlr;
13399
13400
0
        if (ctxt->encoding != NULL)
13401
0
      xmlFree((xmlChar *) ctxt->encoding);
13402
0
        ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13403
13404
0
        hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
13405
0
        if (hdlr != NULL) {
13406
0
            xmlSwitchToEncoding(ctxt, hdlr);
13407
0
  } else {
13408
0
            return(XML_ERR_UNSUPPORTED_ENCODING);
13409
0
        }
13410
0
    }
13411
13412
0
    xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13413
0
    xmlDetectSAX2(ctxt);
13414
0
    ctxt->myDoc = doc;
13415
    /* parsing in context, i.e. as within existing content */
13416
0
    ctxt->input_id = 2;
13417
0
    ctxt->instate = XML_PARSER_CONTENT;
13418
13419
0
    fake = xmlNewDocComment(node->doc, NULL);
13420
0
    if (fake == NULL) {
13421
0
        xmlFreeParserCtxt(ctxt);
13422
0
  return(XML_ERR_NO_MEMORY);
13423
0
    }
13424
0
    xmlAddChild(node, fake);
13425
13426
0
    if (node->type == XML_ELEMENT_NODE) {
13427
0
  nodePush(ctxt, node);
13428
  /*
13429
   * initialize the SAX2 namespaces stack
13430
   */
13431
0
  cur = node;
13432
0
  while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13433
0
      xmlNsPtr ns = cur->nsDef;
13434
0
      const xmlChar *iprefix, *ihref;
13435
13436
0
      while (ns != NULL) {
13437
0
    if (ctxt->dict) {
13438
0
        iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13439
0
        ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13440
0
    } else {
13441
0
        iprefix = ns->prefix;
13442
0
        ihref = ns->href;
13443
0
    }
13444
13445
0
          if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13446
0
        nsPush(ctxt, iprefix, ihref);
13447
0
        nsnr++;
13448
0
    }
13449
0
    ns = ns->next;
13450
0
      }
13451
0
      cur = cur->parent;
13452
0
  }
13453
0
    }
13454
13455
0
    if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13456
  /*
13457
   * ID/IDREF registration will be done in xmlValidateElement below
13458
   */
13459
0
  ctxt->loadsubset |= XML_SKIP_IDS;
13460
0
    }
13461
13462
0
#ifdef LIBXML_HTML_ENABLED
13463
0
    if (doc->type == XML_HTML_DOCUMENT_NODE)
13464
0
        __htmlParseContent(ctxt);
13465
0
    else
13466
0
#endif
13467
0
  xmlParseContent(ctxt);
13468
13469
0
    nsPop(ctxt, nsnr);
13470
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13471
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13472
0
    } else if (RAW != 0) {
13473
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13474
0
    }
13475
0
    if ((ctxt->node != NULL) && (ctxt->node != node)) {
13476
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13477
0
  ctxt->wellFormed = 0;
13478
0
    }
13479
13480
0
    if (!ctxt->wellFormed) {
13481
0
        if (ctxt->errNo == 0)
13482
0
      ret = XML_ERR_INTERNAL_ERROR;
13483
0
  else
13484
0
      ret = (xmlParserErrors)ctxt->errNo;
13485
0
    } else {
13486
0
        ret = XML_ERR_OK;
13487
0
    }
13488
13489
    /*
13490
     * Return the newly created nodeset after unlinking it from
13491
     * the pseudo sibling.
13492
     */
13493
13494
0
    cur = fake->next;
13495
0
    fake->next = NULL;
13496
0
    node->last = fake;
13497
13498
0
    if (cur != NULL) {
13499
0
  cur->prev = NULL;
13500
0
    }
13501
13502
0
    *lst = cur;
13503
13504
0
    while (cur != NULL) {
13505
0
  cur->parent = NULL;
13506
0
  cur = cur->next;
13507
0
    }
13508
13509
0
    xmlUnlinkNode(fake);
13510
0
    xmlFreeNode(fake);
13511
13512
13513
0
    if (ret != XML_ERR_OK) {
13514
0
        xmlFreeNodeList(*lst);
13515
0
  *lst = NULL;
13516
0
    }
13517
13518
0
    if (doc->dict != NULL)
13519
0
        ctxt->dict = NULL;
13520
0
    xmlFreeParserCtxt(ctxt);
13521
13522
0
    return(ret);
13523
#else /* !SAX2 */
13524
    return(XML_ERR_INTERNAL_ERROR);
13525
#endif
13526
0
}
13527
13528
#ifdef LIBXML_SAX1_ENABLED
13529
/**
13530
 * xmlParseBalancedChunkMemoryRecover:
13531
 * @doc:  the document the chunk pertains to (must not be NULL)
13532
 * @sax:  the SAX handler block (possibly NULL)
13533
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13534
 * @depth:  Used for loop detection, use 0
13535
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13536
 * @lst:  the return value for the set of parsed nodes
13537
 * @recover: return nodes even if the data is broken (use 0)
13538
 *
13539
 *
13540
 * Parse a well-balanced chunk of an XML document
13541
 * called by the parser
13542
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13543
 * the content production in the XML grammar:
13544
 *
13545
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13546
 *
13547
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13548
 *    the parser error code otherwise
13549
 *
13550
 * In case recover is set to 1, the nodelist will not be empty even if
13551
 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13552
 * some extent.
13553
 */
13554
int
13555
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13556
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13557
0
     int recover) {
13558
0
    xmlParserCtxtPtr ctxt;
13559
0
    xmlDocPtr newDoc;
13560
0
    xmlSAXHandlerPtr oldsax = NULL;
13561
0
    xmlNodePtr content, newRoot;
13562
0
    int size;
13563
0
    int ret = 0;
13564
13565
0
    if (depth > 40) {
13566
0
  return(XML_ERR_ENTITY_LOOP);
13567
0
    }
13568
13569
13570
0
    if (lst != NULL)
13571
0
        *lst = NULL;
13572
0
    if (string == NULL)
13573
0
        return(-1);
13574
13575
0
    size = xmlStrlen(string);
13576
13577
0
    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13578
0
    if (ctxt == NULL) return(-1);
13579
0
    ctxt->userData = ctxt;
13580
0
    if (sax != NULL) {
13581
0
  oldsax = ctxt->sax;
13582
0
        ctxt->sax = sax;
13583
0
  if (user_data != NULL)
13584
0
      ctxt->userData = user_data;
13585
0
    }
13586
0
    newDoc = xmlNewDoc(BAD_CAST "1.0");
13587
0
    if (newDoc == NULL) {
13588
0
  xmlFreeParserCtxt(ctxt);
13589
0
  return(-1);
13590
0
    }
13591
0
    newDoc->properties = XML_DOC_INTERNAL;
13592
0
    if ((doc != NULL) && (doc->dict != NULL)) {
13593
0
        xmlDictFree(ctxt->dict);
13594
0
  ctxt->dict = doc->dict;
13595
0
  xmlDictReference(ctxt->dict);
13596
0
  ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13597
0
  ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13598
0
  ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13599
0
  ctxt->dictNames = 1;
13600
0
    } else {
13601
0
  xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13602
0
    }
13603
    /* doc == NULL is only supported for historic reasons */
13604
0
    if (doc != NULL) {
13605
0
  newDoc->intSubset = doc->intSubset;
13606
0
  newDoc->extSubset = doc->extSubset;
13607
0
    }
13608
0
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13609
0
    if (newRoot == NULL) {
13610
0
  if (sax != NULL)
13611
0
      ctxt->sax = oldsax;
13612
0
  xmlFreeParserCtxt(ctxt);
13613
0
  newDoc->intSubset = NULL;
13614
0
  newDoc->extSubset = NULL;
13615
0
        xmlFreeDoc(newDoc);
13616
0
  return(-1);
13617
0
    }
13618
0
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
13619
0
    nodePush(ctxt, newRoot);
13620
    /* doc == NULL is only supported for historic reasons */
13621
0
    if (doc == NULL) {
13622
0
  ctxt->myDoc = newDoc;
13623
0
    } else {
13624
0
  ctxt->myDoc = newDoc;
13625
0
  newDoc->children->doc = doc;
13626
  /* Ensure that doc has XML spec namespace */
13627
0
  xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13628
0
  newDoc->oldNs = doc->oldNs;
13629
0
    }
13630
0
    ctxt->instate = XML_PARSER_CONTENT;
13631
0
    ctxt->input_id = 2;
13632
0
    ctxt->depth = depth;
13633
13634
    /*
13635
     * Doing validity checking on chunk doesn't make sense
13636
     */
13637
0
    ctxt->validate = 0;
13638
0
    ctxt->loadsubset = 0;
13639
0
    xmlDetectSAX2(ctxt);
13640
13641
0
    if ( doc != NULL ){
13642
0
        content = doc->children;
13643
0
        doc->children = NULL;
13644
0
        xmlParseContent(ctxt);
13645
0
        doc->children = content;
13646
0
    }
13647
0
    else {
13648
0
        xmlParseContent(ctxt);
13649
0
    }
13650
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13651
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13652
0
    } else if (RAW != 0) {
13653
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13654
0
    }
13655
0
    if (ctxt->node != newDoc->children) {
13656
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13657
0
    }
13658
13659
0
    if (!ctxt->wellFormed) {
13660
0
        if (ctxt->errNo == 0)
13661
0
      ret = 1;
13662
0
  else
13663
0
      ret = ctxt->errNo;
13664
0
    } else {
13665
0
      ret = 0;
13666
0
    }
13667
13668
0
    if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13669
0
  xmlNodePtr cur;
13670
13671
  /*
13672
   * Return the newly created nodeset after unlinking it from
13673
   * they pseudo parent.
13674
   */
13675
0
  cur = newDoc->children->children;
13676
0
  *lst = cur;
13677
0
  while (cur != NULL) {
13678
0
      xmlSetTreeDoc(cur, doc);
13679
0
      cur->parent = NULL;
13680
0
      cur = cur->next;
13681
0
  }
13682
0
  newDoc->children->children = NULL;
13683
0
    }
13684
13685
0
    if (sax != NULL)
13686
0
  ctxt->sax = oldsax;
13687
0
    xmlFreeParserCtxt(ctxt);
13688
0
    newDoc->intSubset = NULL;
13689
0
    newDoc->extSubset = NULL;
13690
    /* This leaks the namespace list if doc == NULL */
13691
0
    newDoc->oldNs = NULL;
13692
0
    xmlFreeDoc(newDoc);
13693
13694
0
    return(ret);
13695
0
}
13696
13697
/**
13698
 * xmlSAXParseEntity:
13699
 * @sax:  the SAX handler block
13700
 * @filename:  the filename
13701
 *
13702
 * DEPRECATED: Don't use.
13703
 *
13704
 * parse an XML external entity out of context and build a tree.
13705
 * It use the given SAX function block to handle the parsing callback.
13706
 * If sax is NULL, fallback to the default DOM tree building routines.
13707
 *
13708
 * [78] extParsedEnt ::= TextDecl? content
13709
 *
13710
 * This correspond to a "Well Balanced" chunk
13711
 *
13712
 * Returns the resulting document tree
13713
 */
13714
13715
xmlDocPtr
13716
0
xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13717
0
    xmlDocPtr ret;
13718
0
    xmlParserCtxtPtr ctxt;
13719
13720
0
    ctxt = xmlCreateFileParserCtxt(filename);
13721
0
    if (ctxt == NULL) {
13722
0
  return(NULL);
13723
0
    }
13724
0
    if (sax != NULL) {
13725
0
  if (ctxt->sax != NULL)
13726
0
      xmlFree(ctxt->sax);
13727
0
        ctxt->sax = sax;
13728
0
        ctxt->userData = NULL;
13729
0
    }
13730
13731
0
    xmlParseExtParsedEnt(ctxt);
13732
13733
0
    if (ctxt->wellFormed)
13734
0
  ret = ctxt->myDoc;
13735
0
    else {
13736
0
        ret = NULL;
13737
0
        xmlFreeDoc(ctxt->myDoc);
13738
0
        ctxt->myDoc = NULL;
13739
0
    }
13740
0
    if (sax != NULL)
13741
0
        ctxt->sax = NULL;
13742
0
    xmlFreeParserCtxt(ctxt);
13743
13744
0
    return(ret);
13745
0
}
13746
13747
/**
13748
 * xmlParseEntity:
13749
 * @filename:  the filename
13750
 *
13751
 * parse an XML external entity out of context and build a tree.
13752
 *
13753
 * [78] extParsedEnt ::= TextDecl? content
13754
 *
13755
 * This correspond to a "Well Balanced" chunk
13756
 *
13757
 * Returns the resulting document tree
13758
 */
13759
13760
xmlDocPtr
13761
0
xmlParseEntity(const char *filename) {
13762
0
    return(xmlSAXParseEntity(NULL, filename));
13763
0
}
13764
#endif /* LIBXML_SAX1_ENABLED */
13765
13766
/**
13767
 * xmlCreateEntityParserCtxtInternal:
13768
 * @URL:  the entity URL
13769
 * @ID:  the entity PUBLIC ID
13770
 * @base:  a possible base for the target URI
13771
 * @pctx:  parser context used to set options on new context
13772
 *
13773
 * Create a parser context for an external entity
13774
 * Automatic support for ZLIB/Compress compressed document is provided
13775
 * by default if found at compile-time.
13776
 *
13777
 * Returns the new parser context or NULL
13778
 */
13779
static xmlParserCtxtPtr
13780
xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
13781
        const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
13782
18.6k
        xmlParserCtxtPtr pctx) {
13783
18.6k
    xmlParserCtxtPtr ctxt;
13784
18.6k
    xmlParserInputPtr inputStream;
13785
18.6k
    char *directory = NULL;
13786
18.6k
    xmlChar *uri;
13787
13788
18.6k
    ctxt = xmlNewSAXParserCtxt(sax, userData);
13789
18.6k
    if (ctxt == NULL) {
13790
0
  return(NULL);
13791
0
    }
13792
13793
18.6k
    if (pctx != NULL) {
13794
18.6k
        ctxt->options = pctx->options;
13795
18.6k
        ctxt->_private = pctx->_private;
13796
18.6k
  ctxt->input_id = pctx->input_id;
13797
18.6k
    }
13798
13799
    /* Don't read from stdin. */
13800
18.6k
    if (xmlStrcmp(URL, BAD_CAST "-") == 0)
13801
0
        URL = BAD_CAST "./-";
13802
13803
18.6k
    uri = xmlBuildURI(URL, base);
13804
13805
18.6k
    if (uri == NULL) {
13806
129
  inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13807
129
  if (inputStream == NULL) {
13808
129
      xmlFreeParserCtxt(ctxt);
13809
129
      return(NULL);
13810
129
  }
13811
13812
0
  inputPush(ctxt, inputStream);
13813
13814
0
  if ((ctxt->directory == NULL) && (directory == NULL))
13815
0
      directory = xmlParserGetDirectory((char *)URL);
13816
0
  if ((ctxt->directory == NULL) && (directory != NULL))
13817
0
      ctxt->directory = directory;
13818
18.5k
    } else {
13819
18.5k
  inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13820
18.5k
  if (inputStream == NULL) {
13821
14.4k
      xmlFree(uri);
13822
14.4k
      xmlFreeParserCtxt(ctxt);
13823
14.4k
      return(NULL);
13824
14.4k
  }
13825
13826
4.07k
  inputPush(ctxt, inputStream);
13827
13828
4.07k
  if ((ctxt->directory == NULL) && (directory == NULL))
13829
4.07k
      directory = xmlParserGetDirectory((char *)uri);
13830
4.07k
  if ((ctxt->directory == NULL) && (directory != NULL))
13831
4.07k
      ctxt->directory = directory;
13832
4.07k
  xmlFree(uri);
13833
4.07k
    }
13834
4.07k
    return(ctxt);
13835
18.6k
}
13836
13837
/**
13838
 * xmlCreateEntityParserCtxt:
13839
 * @URL:  the entity URL
13840
 * @ID:  the entity PUBLIC ID
13841
 * @base:  a possible base for the target URI
13842
 *
13843
 * Create a parser context for an external entity
13844
 * Automatic support for ZLIB/Compress compressed document is provided
13845
 * by default if found at compile-time.
13846
 *
13847
 * Returns the new parser context or NULL
13848
 */
13849
xmlParserCtxtPtr
13850
xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
13851
0
                    const xmlChar *base) {
13852
0
    return xmlCreateEntityParserCtxtInternal(NULL, NULL, URL, ID, base, NULL);
13853
13854
0
}
13855
13856
/************************************************************************
13857
 *                  *
13858
 *    Front ends when parsing from a file     *
13859
 *                  *
13860
 ************************************************************************/
13861
13862
/**
13863
 * xmlCreateURLParserCtxt:
13864
 * @filename:  the filename or URL
13865
 * @options:  a combination of xmlParserOption
13866
 *
13867
 * Create a parser context for a file or URL content.
13868
 * Automatic support for ZLIB/Compress compressed document is provided
13869
 * by default if found at compile-time and for file accesses
13870
 *
13871
 * Returns the new parser context or NULL
13872
 */
13873
xmlParserCtxtPtr
13874
xmlCreateURLParserCtxt(const char *filename, int options)
13875
0
{
13876
0
    xmlParserCtxtPtr ctxt;
13877
0
    xmlParserInputPtr inputStream;
13878
0
    char *directory = NULL;
13879
13880
0
    ctxt = xmlNewParserCtxt();
13881
0
    if (ctxt == NULL) {
13882
0
  xmlErrMemory(NULL, "cannot allocate parser context");
13883
0
  return(NULL);
13884
0
    }
13885
13886
0
    if (options)
13887
0
  xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13888
0
    ctxt->linenumbers = 1;
13889
13890
0
    inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
13891
0
    if (inputStream == NULL) {
13892
0
  xmlFreeParserCtxt(ctxt);
13893
0
  return(NULL);
13894
0
    }
13895
13896
0
    inputPush(ctxt, inputStream);
13897
0
    if ((ctxt->directory == NULL) && (directory == NULL))
13898
0
        directory = xmlParserGetDirectory(filename);
13899
0
    if ((ctxt->directory == NULL) && (directory != NULL))
13900
0
        ctxt->directory = directory;
13901
13902
0
    return(ctxt);
13903
0
}
13904
13905
/**
13906
 * xmlCreateFileParserCtxt:
13907
 * @filename:  the filename
13908
 *
13909
 * Create a parser context for a file content.
13910
 * Automatic support for ZLIB/Compress compressed document is provided
13911
 * by default if found at compile-time.
13912
 *
13913
 * Returns the new parser context or NULL
13914
 */
13915
xmlParserCtxtPtr
13916
xmlCreateFileParserCtxt(const char *filename)
13917
0
{
13918
0
    return(xmlCreateURLParserCtxt(filename, 0));
13919
0
}
13920
13921
#ifdef LIBXML_SAX1_ENABLED
13922
/**
13923
 * xmlSAXParseFileWithData:
13924
 * @sax:  the SAX handler block
13925
 * @filename:  the filename
13926
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13927
 *             documents
13928
 * @data:  the userdata
13929
 *
13930
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
13931
 *
13932
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13933
 * compressed document is provided by default if found at compile-time.
13934
 * It use the given SAX function block to handle the parsing callback.
13935
 * If sax is NULL, fallback to the default DOM tree building routines.
13936
 *
13937
 * User data (void *) is stored within the parser context in the
13938
 * context's _private member, so it is available nearly everywhere in libxml
13939
 *
13940
 * Returns the resulting document tree
13941
 */
13942
13943
xmlDocPtr
13944
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
13945
0
                        int recovery, void *data) {
13946
0
    xmlDocPtr ret;
13947
0
    xmlParserCtxtPtr ctxt;
13948
13949
0
    xmlInitParser();
13950
13951
0
    ctxt = xmlCreateFileParserCtxt(filename);
13952
0
    if (ctxt == NULL) {
13953
0
  return(NULL);
13954
0
    }
13955
0
    if (sax != NULL) {
13956
0
  if (ctxt->sax != NULL)
13957
0
      xmlFree(ctxt->sax);
13958
0
        ctxt->sax = sax;
13959
0
    }
13960
0
    xmlDetectSAX2(ctxt);
13961
0
    if (data!=NULL) {
13962
0
  ctxt->_private = data;
13963
0
    }
13964
13965
0
    if (ctxt->directory == NULL)
13966
0
        ctxt->directory = xmlParserGetDirectory(filename);
13967
13968
0
    ctxt->recovery = recovery;
13969
13970
0
    xmlParseDocument(ctxt);
13971
13972
0
    if ((ctxt->wellFormed) || recovery) {
13973
0
        ret = ctxt->myDoc;
13974
0
  if ((ret != NULL) && (ctxt->input->buf != NULL)) {
13975
0
      if (ctxt->input->buf->compressed > 0)
13976
0
    ret->compression = 9;
13977
0
      else
13978
0
    ret->compression = ctxt->input->buf->compressed;
13979
0
  }
13980
0
    }
13981
0
    else {
13982
0
       ret = NULL;
13983
0
       xmlFreeDoc(ctxt->myDoc);
13984
0
       ctxt->myDoc = NULL;
13985
0
    }
13986
0
    if (sax != NULL)
13987
0
        ctxt->sax = NULL;
13988
0
    xmlFreeParserCtxt(ctxt);
13989
13990
0
    return(ret);
13991
0
}
13992
13993
/**
13994
 * xmlSAXParseFile:
13995
 * @sax:  the SAX handler block
13996
 * @filename:  the filename
13997
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13998
 *             documents
13999
 *
14000
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
14001
 *
14002
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14003
 * compressed document is provided by default if found at compile-time.
14004
 * It use the given SAX function block to handle the parsing callback.
14005
 * If sax is NULL, fallback to the default DOM tree building routines.
14006
 *
14007
 * Returns the resulting document tree
14008
 */
14009
14010
xmlDocPtr
14011
xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14012
0
                          int recovery) {
14013
0
    return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14014
0
}
14015
14016
/**
14017
 * xmlRecoverDoc:
14018
 * @cur:  a pointer to an array of xmlChar
14019
 *
14020
 * DEPRECATED: Use xmlReadDoc with XML_PARSE_RECOVER.
14021
 *
14022
 * parse an XML in-memory document and build a tree.
14023
 * In the case the document is not Well Formed, a attempt to build a
14024
 * tree is tried anyway
14025
 *
14026
 * Returns the resulting document tree or NULL in case of failure
14027
 */
14028
14029
xmlDocPtr
14030
0
xmlRecoverDoc(const xmlChar *cur) {
14031
0
    return(xmlSAXParseDoc(NULL, cur, 1));
14032
0
}
14033
14034
/**
14035
 * xmlParseFile:
14036
 * @filename:  the filename
14037
 *
14038
 * DEPRECATED: Use xmlReadFile.
14039
 *
14040
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14041
 * compressed document is provided by default if found at compile-time.
14042
 *
14043
 * Returns the resulting document tree if the file was wellformed,
14044
 * NULL otherwise.
14045
 */
14046
14047
xmlDocPtr
14048
0
xmlParseFile(const char *filename) {
14049
0
    return(xmlSAXParseFile(NULL, filename, 0));
14050
0
}
14051
14052
/**
14053
 * xmlRecoverFile:
14054
 * @filename:  the filename
14055
 *
14056
 * DEPRECATED: Use xmlReadFile with XML_PARSE_RECOVER.
14057
 *
14058
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14059
 * compressed document is provided by default if found at compile-time.
14060
 * In the case the document is not Well Formed, it attempts to build
14061
 * a tree anyway
14062
 *
14063
 * Returns the resulting document tree or NULL in case of failure
14064
 */
14065
14066
xmlDocPtr
14067
0
xmlRecoverFile(const char *filename) {
14068
0
    return(xmlSAXParseFile(NULL, filename, 1));
14069
0
}
14070
14071
14072
/**
14073
 * xmlSetupParserForBuffer:
14074
 * @ctxt:  an XML parser context
14075
 * @buffer:  a xmlChar * buffer
14076
 * @filename:  a file name
14077
 *
14078
 * DEPRECATED: Don't use.
14079
 *
14080
 * Setup the parser context to parse a new buffer; Clears any prior
14081
 * contents from the parser context. The buffer parameter must not be
14082
 * NULL, but the filename parameter can be
14083
 */
14084
void
14085
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14086
                             const char* filename)
14087
0
{
14088
0
    xmlParserInputPtr input;
14089
14090
0
    if ((ctxt == NULL) || (buffer == NULL))
14091
0
        return;
14092
14093
0
    input = xmlNewInputStream(ctxt);
14094
0
    if (input == NULL) {
14095
0
        xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
14096
0
        xmlClearParserCtxt(ctxt);
14097
0
        return;
14098
0
    }
14099
14100
0
    xmlClearParserCtxt(ctxt);
14101
0
    if (filename != NULL)
14102
0
        input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
14103
0
    input->base = buffer;
14104
0
    input->cur = buffer;
14105
0
    input->end = &buffer[xmlStrlen(buffer)];
14106
0
    inputPush(ctxt, input);
14107
0
}
14108
14109
/**
14110
 * xmlSAXUserParseFile:
14111
 * @sax:  a SAX handler
14112
 * @user_data:  The user data returned on SAX callbacks
14113
 * @filename:  a file name
14114
 *
14115
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
14116
 *
14117
 * parse an XML file and call the given SAX handler routines.
14118
 * Automatic support for ZLIB/Compress compressed document is provided
14119
 *
14120
 * Returns 0 in case of success or a error number otherwise
14121
 */
14122
int
14123
xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14124
0
                    const char *filename) {
14125
0
    int ret = 0;
14126
0
    xmlParserCtxtPtr ctxt;
14127
14128
0
    ctxt = xmlCreateFileParserCtxt(filename);
14129
0
    if (ctxt == NULL) return -1;
14130
0
    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14131
0
  xmlFree(ctxt->sax);
14132
0
    ctxt->sax = sax;
14133
0
    xmlDetectSAX2(ctxt);
14134
14135
0
    if (user_data != NULL)
14136
0
  ctxt->userData = user_data;
14137
14138
0
    xmlParseDocument(ctxt);
14139
14140
0
    if (ctxt->wellFormed)
14141
0
  ret = 0;
14142
0
    else {
14143
0
        if (ctxt->errNo != 0)
14144
0
      ret = ctxt->errNo;
14145
0
  else
14146
0
      ret = -1;
14147
0
    }
14148
0
    if (sax != NULL)
14149
0
  ctxt->sax = NULL;
14150
0
    if (ctxt->myDoc != NULL) {
14151
0
        xmlFreeDoc(ctxt->myDoc);
14152
0
  ctxt->myDoc = NULL;
14153
0
    }
14154
0
    xmlFreeParserCtxt(ctxt);
14155
14156
0
    return ret;
14157
0
}
14158
#endif /* LIBXML_SAX1_ENABLED */
14159
14160
/************************************************************************
14161
 *                  *
14162
 *    Front ends when parsing from memory     *
14163
 *                  *
14164
 ************************************************************************/
14165
14166
/**
14167
 * xmlCreateMemoryParserCtxt:
14168
 * @buffer:  a pointer to a char array
14169
 * @size:  the size of the array
14170
 *
14171
 * Create a parser context for an XML in-memory document.
14172
 *
14173
 * Returns the new parser context or NULL
14174
 */
14175
xmlParserCtxtPtr
14176
76.3k
xmlCreateMemoryParserCtxt(const char *buffer, int size) {
14177
76.3k
    xmlParserCtxtPtr ctxt;
14178
76.3k
    xmlParserInputPtr input;
14179
76.3k
    xmlParserInputBufferPtr buf;
14180
14181
76.3k
    if (buffer == NULL)
14182
0
  return(NULL);
14183
76.3k
    if (size <= 0)
14184
65
  return(NULL);
14185
14186
76.2k
    ctxt = xmlNewParserCtxt();
14187
76.2k
    if (ctxt == NULL)
14188
0
  return(NULL);
14189
14190
76.2k
    buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14191
76.2k
    if (buf == NULL) {
14192
0
  xmlFreeParserCtxt(ctxt);
14193
0
  return(NULL);
14194
0
    }
14195
14196
76.2k
    input = xmlNewInputStream(ctxt);
14197
76.2k
    if (input == NULL) {
14198
0
  xmlFreeParserInputBuffer(buf);
14199
0
  xmlFreeParserCtxt(ctxt);
14200
0
  return(NULL);
14201
0
    }
14202
14203
76.2k
    input->filename = NULL;
14204
76.2k
    input->buf = buf;
14205
76.2k
    xmlBufResetInput(input->buf->buffer, input);
14206
14207
76.2k
    inputPush(ctxt, input);
14208
76.2k
    return(ctxt);
14209
76.2k
}
14210
14211
#ifdef LIBXML_SAX1_ENABLED
14212
/**
14213
 * xmlSAXParseMemoryWithData:
14214
 * @sax:  the SAX handler block
14215
 * @buffer:  an pointer to a char array
14216
 * @size:  the size of the array
14217
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14218
 *             documents
14219
 * @data:  the userdata
14220
 *
14221
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14222
 *
14223
 * parse an XML in-memory block and use the given SAX function block
14224
 * to handle the parsing callback. If sax is NULL, fallback to the default
14225
 * DOM tree building routines.
14226
 *
14227
 * User data (void *) is stored within the parser context in the
14228
 * context's _private member, so it is available nearly everywhere in libxml
14229
 *
14230
 * Returns the resulting document tree
14231
 */
14232
14233
xmlDocPtr
14234
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14235
0
            int size, int recovery, void *data) {
14236
0
    xmlDocPtr ret;
14237
0
    xmlParserCtxtPtr ctxt;
14238
14239
0
    xmlInitParser();
14240
14241
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14242
0
    if (ctxt == NULL) return(NULL);
14243
0
    if (sax != NULL) {
14244
0
  if (ctxt->sax != NULL)
14245
0
      xmlFree(ctxt->sax);
14246
0
        ctxt->sax = sax;
14247
0
    }
14248
0
    xmlDetectSAX2(ctxt);
14249
0
    if (data!=NULL) {
14250
0
  ctxt->_private=data;
14251
0
    }
14252
14253
0
    ctxt->recovery = recovery;
14254
14255
0
    xmlParseDocument(ctxt);
14256
14257
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14258
0
    else {
14259
0
       ret = NULL;
14260
0
       xmlFreeDoc(ctxt->myDoc);
14261
0
       ctxt->myDoc = NULL;
14262
0
    }
14263
0
    if (sax != NULL)
14264
0
  ctxt->sax = NULL;
14265
0
    xmlFreeParserCtxt(ctxt);
14266
14267
0
    return(ret);
14268
0
}
14269
14270
/**
14271
 * xmlSAXParseMemory:
14272
 * @sax:  the SAX handler block
14273
 * @buffer:  an pointer to a char array
14274
 * @size:  the size of the array
14275
 * @recovery:  work in recovery mode, i.e. tries to read not Well Formed
14276
 *             documents
14277
 *
14278
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14279
 *
14280
 * parse an XML in-memory block and use the given SAX function block
14281
 * to handle the parsing callback. If sax is NULL, fallback to the default
14282
 * DOM tree building routines.
14283
 *
14284
 * Returns the resulting document tree
14285
 */
14286
xmlDocPtr
14287
xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14288
0
            int size, int recovery) {
14289
0
    return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14290
0
}
14291
14292
/**
14293
 * xmlParseMemory:
14294
 * @buffer:  an pointer to a char array
14295
 * @size:  the size of the array
14296
 *
14297
 * DEPRECATED: Use xmlReadMemory.
14298
 *
14299
 * parse an XML in-memory block and build a tree.
14300
 *
14301
 * Returns the resulting document tree
14302
 */
14303
14304
0
xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14305
0
   return(xmlSAXParseMemory(NULL, buffer, size, 0));
14306
0
}
14307
14308
/**
14309
 * xmlRecoverMemory:
14310
 * @buffer:  an pointer to a char array
14311
 * @size:  the size of the array
14312
 *
14313
 * DEPRECATED: Use xmlReadMemory with XML_PARSE_RECOVER.
14314
 *
14315
 * parse an XML in-memory block and build a tree.
14316
 * In the case the document is not Well Formed, an attempt to
14317
 * build a tree is tried anyway
14318
 *
14319
 * Returns the resulting document tree or NULL in case of error
14320
 */
14321
14322
0
xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14323
0
   return(xmlSAXParseMemory(NULL, buffer, size, 1));
14324
0
}
14325
14326
/**
14327
 * xmlSAXUserParseMemory:
14328
 * @sax:  a SAX handler
14329
 * @user_data:  The user data returned on SAX callbacks
14330
 * @buffer:  an in-memory XML document input
14331
 * @size:  the length of the XML document in bytes
14332
 *
14333
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14334
 *
14335
 * parse an XML in-memory buffer and call the given SAX handler routines.
14336
 *
14337
 * Returns 0 in case of success or a error number otherwise
14338
 */
14339
int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14340
0
        const char *buffer, int size) {
14341
0
    int ret = 0;
14342
0
    xmlParserCtxtPtr ctxt;
14343
14344
0
    xmlInitParser();
14345
14346
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14347
0
    if (ctxt == NULL) return -1;
14348
0
    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14349
0
        xmlFree(ctxt->sax);
14350
0
    ctxt->sax = sax;
14351
0
    xmlDetectSAX2(ctxt);
14352
14353
0
    if (user_data != NULL)
14354
0
  ctxt->userData = user_data;
14355
14356
0
    xmlParseDocument(ctxt);
14357
14358
0
    if (ctxt->wellFormed)
14359
0
  ret = 0;
14360
0
    else {
14361
0
        if (ctxt->errNo != 0)
14362
0
      ret = ctxt->errNo;
14363
0
  else
14364
0
      ret = -1;
14365
0
    }
14366
0
    if (sax != NULL)
14367
0
        ctxt->sax = NULL;
14368
0
    if (ctxt->myDoc != NULL) {
14369
0
        xmlFreeDoc(ctxt->myDoc);
14370
0
  ctxt->myDoc = NULL;
14371
0
    }
14372
0
    xmlFreeParserCtxt(ctxt);
14373
14374
0
    return ret;
14375
0
}
14376
#endif /* LIBXML_SAX1_ENABLED */
14377
14378
/**
14379
 * xmlCreateDocParserCtxt:
14380
 * @cur:  a pointer to an array of xmlChar
14381
 *
14382
 * Creates a parser context for an XML in-memory document.
14383
 *
14384
 * Returns the new parser context or NULL
14385
 */
14386
xmlParserCtxtPtr
14387
0
xmlCreateDocParserCtxt(const xmlChar *cur) {
14388
0
    int len;
14389
14390
0
    if (cur == NULL)
14391
0
  return(NULL);
14392
0
    len = xmlStrlen(cur);
14393
0
    return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14394
0
}
14395
14396
#ifdef LIBXML_SAX1_ENABLED
14397
/**
14398
 * xmlSAXParseDoc:
14399
 * @sax:  the SAX handler block
14400
 * @cur:  a pointer to an array of xmlChar
14401
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14402
 *             documents
14403
 *
14404
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadDoc.
14405
 *
14406
 * parse an XML in-memory document and build a tree.
14407
 * It use the given SAX function block to handle the parsing callback.
14408
 * If sax is NULL, fallback to the default DOM tree building routines.
14409
 *
14410
 * Returns the resulting document tree
14411
 */
14412
14413
xmlDocPtr
14414
0
xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14415
0
    xmlDocPtr ret;
14416
0
    xmlParserCtxtPtr ctxt;
14417
0
    xmlSAXHandlerPtr oldsax = NULL;
14418
14419
0
    if (cur == NULL) return(NULL);
14420
14421
14422
0
    ctxt = xmlCreateDocParserCtxt(cur);
14423
0
    if (ctxt == NULL) return(NULL);
14424
0
    if (sax != NULL) {
14425
0
        oldsax = ctxt->sax;
14426
0
        ctxt->sax = sax;
14427
0
        ctxt->userData = NULL;
14428
0
    }
14429
0
    xmlDetectSAX2(ctxt);
14430
14431
0
    xmlParseDocument(ctxt);
14432
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14433
0
    else {
14434
0
       ret = NULL;
14435
0
       xmlFreeDoc(ctxt->myDoc);
14436
0
       ctxt->myDoc = NULL;
14437
0
    }
14438
0
    if (sax != NULL)
14439
0
  ctxt->sax = oldsax;
14440
0
    xmlFreeParserCtxt(ctxt);
14441
14442
0
    return(ret);
14443
0
}
14444
14445
/**
14446
 * xmlParseDoc:
14447
 * @cur:  a pointer to an array of xmlChar
14448
 *
14449
 * DEPRECATED: Use xmlReadDoc.
14450
 *
14451
 * parse an XML in-memory document and build a tree.
14452
 *
14453
 * Returns the resulting document tree
14454
 */
14455
14456
xmlDocPtr
14457
0
xmlParseDoc(const xmlChar *cur) {
14458
0
    return(xmlSAXParseDoc(NULL, cur, 0));
14459
0
}
14460
#endif /* LIBXML_SAX1_ENABLED */
14461
14462
#ifdef LIBXML_LEGACY_ENABLED
14463
/************************************************************************
14464
 *                  *
14465
 *  Specific function to keep track of entities references    *
14466
 *  and used by the XSLT debugger         *
14467
 *                  *
14468
 ************************************************************************/
14469
14470
static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14471
14472
/**
14473
 * xmlAddEntityReference:
14474
 * @ent : A valid entity
14475
 * @firstNode : A valid first node for children of entity
14476
 * @lastNode : A valid last node of children entity
14477
 *
14478
 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14479
 */
14480
static void
14481
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14482
                      xmlNodePtr lastNode)
14483
{
14484
    if (xmlEntityRefFunc != NULL) {
14485
        (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14486
    }
14487
}
14488
14489
14490
/**
14491
 * xmlSetEntityReferenceFunc:
14492
 * @func: A valid function
14493
 *
14494
 * Set the function to call call back when a xml reference has been made
14495
 */
14496
void
14497
xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14498
{
14499
    xmlEntityRefFunc = func;
14500
}
14501
#endif /* LIBXML_LEGACY_ENABLED */
14502
14503
/************************************************************************
14504
 *                  *
14505
 *        Miscellaneous       *
14506
 *                  *
14507
 ************************************************************************/
14508
14509
static int xmlParserInitialized = 0;
14510
14511
/**
14512
 * xmlInitParser:
14513
 *
14514
 * Initialization function for the XML parser.
14515
 * This is not reentrant. Call once before processing in case of
14516
 * use in multithreaded programs.
14517
 */
14518
14519
void
14520
282M
xmlInitParser(void) {
14521
    /*
14522
     * Note that the initialization code must not make memory allocations.
14523
     */
14524
282M
    if (xmlParserInitialized != 0)
14525
282M
  return;
14526
14527
54
#ifdef LIBXML_THREAD_ENABLED
14528
54
    __xmlGlobalInitMutexLock();
14529
54
    if (xmlParserInitialized == 0) {
14530
54
#endif
14531
#if defined(_WIN32) && (!defined(LIBXML_STATIC) || defined(LIBXML_STATIC_FOR_DLL))
14532
        if (xmlFree == free)
14533
            atexit(xmlCleanupParser);
14534
#endif
14535
14536
54
  xmlInitThreadsInternal();
14537
54
  xmlInitGlobalsInternal();
14538
54
  xmlInitMemoryInternal();
14539
54
        __xmlInitializeDict();
14540
54
  xmlInitEncodingInternal();
14541
54
  xmlRegisterDefaultInputCallbacks();
14542
54
#ifdef LIBXML_OUTPUT_ENABLED
14543
54
  xmlRegisterDefaultOutputCallbacks();
14544
54
#endif /* LIBXML_OUTPUT_ENABLED */
14545
54
#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
14546
54
  xmlInitXPathInternal();
14547
54
#endif
14548
54
  xmlParserInitialized = 1;
14549
54
#ifdef LIBXML_THREAD_ENABLED
14550
54
    }
14551
54
    __xmlGlobalInitMutexUnlock();
14552
54
#endif
14553
54
}
14554
14555
/**
14556
 * xmlCleanupParser:
14557
 *
14558
 * This function name is somewhat misleading. It does not clean up
14559
 * parser state, it cleans up memory allocated by the library itself.
14560
 * It is a cleanup function for the XML library. It tries to reclaim all
14561
 * related global memory allocated for the library processing.
14562
 * It doesn't deallocate any document related memory. One should
14563
 * call xmlCleanupParser() only when the process has finished using
14564
 * the library and all XML/HTML documents built with it.
14565
 * See also xmlInitParser() which has the opposite function of preparing
14566
 * the library for operations.
14567
 *
14568
 * WARNING: if your application is multithreaded or has plugin support
14569
 *          calling this may crash the application if another thread or
14570
 *          a plugin is still using libxml2. It's sometimes very hard to
14571
 *          guess if libxml2 is in use in the application, some libraries
14572
 *          or plugins may use it without notice. In case of doubt abstain
14573
 *          from calling this function or do it just before calling exit()
14574
 *          to avoid leak reports from valgrind !
14575
 */
14576
14577
void
14578
0
xmlCleanupParser(void) {
14579
0
    if (!xmlParserInitialized)
14580
0
  return;
14581
14582
0
    xmlCleanupCharEncodingHandlers();
14583
0
#ifdef LIBXML_CATALOG_ENABLED
14584
0
    xmlCatalogCleanup();
14585
0
#endif
14586
0
    xmlCleanupDictInternal();
14587
0
    xmlCleanupInputCallbacks();
14588
0
#ifdef LIBXML_OUTPUT_ENABLED
14589
0
    xmlCleanupOutputCallbacks();
14590
0
#endif
14591
0
#ifdef LIBXML_SCHEMAS_ENABLED
14592
0
    xmlSchemaCleanupTypes();
14593
0
    xmlRelaxNGCleanupTypes();
14594
0
#endif
14595
0
    xmlCleanupGlobalsInternal();
14596
0
    xmlCleanupThreadsInternal();
14597
0
    xmlCleanupMemoryInternal();
14598
0
    xmlParserInitialized = 0;
14599
0
}
14600
14601
#if defined(HAVE_ATTRIBUTE_DESTRUCTOR) && !defined(LIBXML_STATIC) && \
14602
    !defined(_WIN32)
14603
static void
14604
ATTRIBUTE_DESTRUCTOR
14605
0
xmlDestructor(void) {
14606
    /*
14607
     * Calling custom deallocation functions in a destructor can cause
14608
     * problems, for example with Nokogiri.
14609
     */
14610
0
    if (xmlFree == free)
14611
0
        xmlCleanupParser();
14612
0
}
14613
#endif
14614
14615
/************************************************************************
14616
 *                  *
14617
 *  New set (2.6.0) of simpler and more flexible APIs   *
14618
 *                  *
14619
 ************************************************************************/
14620
14621
/**
14622
 * DICT_FREE:
14623
 * @str:  a string
14624
 *
14625
 * Free a string if it is not owned by the "dict" dictionary in the
14626
 * current scope
14627
 */
14628
#define DICT_FREE(str)            \
14629
0
  if ((str) && ((!dict) ||       \
14630
0
      (xmlDictOwns(dict, (const xmlChar *)(str)) == 0)))  \
14631
0
      xmlFree((char *)(str));
14632
14633
/**
14634
 * xmlCtxtReset:
14635
 * @ctxt: an XML parser context
14636
 *
14637
 * Reset a parser context
14638
 */
14639
void
14640
xmlCtxtReset(xmlParserCtxtPtr ctxt)
14641
0
{
14642
0
    xmlParserInputPtr input;
14643
0
    xmlDictPtr dict;
14644
14645
0
    if (ctxt == NULL)
14646
0
        return;
14647
14648
0
    dict = ctxt->dict;
14649
14650
0
    while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14651
0
        xmlFreeInputStream(input);
14652
0
    }
14653
0
    ctxt->inputNr = 0;
14654
0
    ctxt->input = NULL;
14655
14656
0
    ctxt->spaceNr = 0;
14657
0
    if (ctxt->spaceTab != NULL) {
14658
0
  ctxt->spaceTab[0] = -1;
14659
0
  ctxt->space = &ctxt->spaceTab[0];
14660
0
    } else {
14661
0
        ctxt->space = NULL;
14662
0
    }
14663
14664
14665
0
    ctxt->nodeNr = 0;
14666
0
    ctxt->node = NULL;
14667
14668
0
    ctxt->nameNr = 0;
14669
0
    ctxt->name = NULL;
14670
14671
0
    ctxt->nsNr = 0;
14672
14673
0
    DICT_FREE(ctxt->version);
14674
0
    ctxt->version = NULL;
14675
0
    DICT_FREE(ctxt->encoding);
14676
0
    ctxt->encoding = NULL;
14677
0
    DICT_FREE(ctxt->directory);
14678
0
    ctxt->directory = NULL;
14679
0
    DICT_FREE(ctxt->extSubURI);
14680
0
    ctxt->extSubURI = NULL;
14681
0
    DICT_FREE(ctxt->extSubSystem);
14682
0
    ctxt->extSubSystem = NULL;
14683
0
    if (ctxt->myDoc != NULL)
14684
0
        xmlFreeDoc(ctxt->myDoc);
14685
0
    ctxt->myDoc = NULL;
14686
14687
0
    ctxt->standalone = -1;
14688
0
    ctxt->hasExternalSubset = 0;
14689
0
    ctxt->hasPErefs = 0;
14690
0
    ctxt->html = 0;
14691
0
    ctxt->external = 0;
14692
0
    ctxt->instate = XML_PARSER_START;
14693
0
    ctxt->token = 0;
14694
14695
0
    ctxt->wellFormed = 1;
14696
0
    ctxt->nsWellFormed = 1;
14697
0
    ctxt->disableSAX = 0;
14698
0
    ctxt->valid = 1;
14699
#if 0
14700
    ctxt->vctxt.userData = ctxt;
14701
    ctxt->vctxt.error = xmlParserValidityError;
14702
    ctxt->vctxt.warning = xmlParserValidityWarning;
14703
#endif
14704
0
    ctxt->record_info = 0;
14705
0
    ctxt->checkIndex = 0;
14706
0
    ctxt->endCheckState = 0;
14707
0
    ctxt->inSubset = 0;
14708
0
    ctxt->errNo = XML_ERR_OK;
14709
0
    ctxt->depth = 0;
14710
0
    ctxt->charset = XML_CHAR_ENCODING_UTF8;
14711
0
    ctxt->catalogs = NULL;
14712
0
    ctxt->sizeentities = 0;
14713
0
    ctxt->sizeentcopy = 0;
14714
0
    xmlInitNodeInfoSeq(&ctxt->node_seq);
14715
14716
0
    if (ctxt->attsDefault != NULL) {
14717
0
        xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
14718
0
        ctxt->attsDefault = NULL;
14719
0
    }
14720
0
    if (ctxt->attsSpecial != NULL) {
14721
0
        xmlHashFree(ctxt->attsSpecial, NULL);
14722
0
        ctxt->attsSpecial = NULL;
14723
0
    }
14724
14725
0
#ifdef LIBXML_CATALOG_ENABLED
14726
0
    if (ctxt->catalogs != NULL)
14727
0
  xmlCatalogFreeLocal(ctxt->catalogs);
14728
0
#endif
14729
0
    ctxt->nbErrors = 0;
14730
0
    ctxt->nbWarnings = 0;
14731
0
    if (ctxt->lastError.code != XML_ERR_OK)
14732
0
        xmlResetError(&ctxt->lastError);
14733
0
}
14734
14735
/**
14736
 * xmlCtxtResetPush:
14737
 * @ctxt: an XML parser context
14738
 * @chunk:  a pointer to an array of chars
14739
 * @size:  number of chars in the array
14740
 * @filename:  an optional file name or URI
14741
 * @encoding:  the document encoding, or NULL
14742
 *
14743
 * Reset a push parser context
14744
 *
14745
 * Returns 0 in case of success and 1 in case of error
14746
 */
14747
int
14748
xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14749
                 int size, const char *filename, const char *encoding)
14750
0
{
14751
0
    xmlParserInputPtr inputStream;
14752
0
    xmlParserInputBufferPtr buf;
14753
0
    xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14754
14755
0
    if (ctxt == NULL)
14756
0
        return(1);
14757
14758
0
    if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14759
0
        enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14760
14761
0
    buf = xmlAllocParserInputBuffer(enc);
14762
0
    if (buf == NULL)
14763
0
        return(1);
14764
14765
0
    if (ctxt == NULL) {
14766
0
        xmlFreeParserInputBuffer(buf);
14767
0
        return(1);
14768
0
    }
14769
14770
0
    xmlCtxtReset(ctxt);
14771
14772
0
    if (filename == NULL) {
14773
0
        ctxt->directory = NULL;
14774
0
    } else {
14775
0
        ctxt->directory = xmlParserGetDirectory(filename);
14776
0
    }
14777
14778
0
    inputStream = xmlNewInputStream(ctxt);
14779
0
    if (inputStream == NULL) {
14780
0
        xmlFreeParserInputBuffer(buf);
14781
0
        return(1);
14782
0
    }
14783
14784
0
    if (filename == NULL)
14785
0
        inputStream->filename = NULL;
14786
0
    else
14787
0
        inputStream->filename = (char *)
14788
0
            xmlCanonicPath((const xmlChar *) filename);
14789
0
    inputStream->buf = buf;
14790
0
    xmlBufResetInput(buf->buffer, inputStream);
14791
14792
0
    inputPush(ctxt, inputStream);
14793
14794
0
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14795
0
        (ctxt->input->buf != NULL)) {
14796
0
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14797
0
        size_t cur = ctxt->input->cur - ctxt->input->base;
14798
14799
0
        xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14800
14801
0
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
14802
#ifdef DEBUG_PUSH
14803
        xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14804
#endif
14805
0
    }
14806
14807
0
    if (encoding != NULL) {
14808
0
        xmlCharEncodingHandlerPtr hdlr;
14809
14810
0
        if (ctxt->encoding != NULL)
14811
0
      xmlFree((xmlChar *) ctxt->encoding);
14812
0
        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14813
14814
0
        hdlr = xmlFindCharEncodingHandler(encoding);
14815
0
        if (hdlr != NULL) {
14816
0
            xmlSwitchToEncoding(ctxt, hdlr);
14817
0
  } else {
14818
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14819
0
            "Unsupported encoding %s\n", BAD_CAST encoding);
14820
0
        }
14821
0
    } else if (enc != XML_CHAR_ENCODING_NONE) {
14822
0
        xmlSwitchEncoding(ctxt, enc);
14823
0
    }
14824
14825
0
    return(0);
14826
0
}
14827
14828
14829
/**
14830
 * xmlCtxtUseOptionsInternal:
14831
 * @ctxt: an XML parser context
14832
 * @options:  a combination of xmlParserOption
14833
 * @encoding:  the user provided encoding to use
14834
 *
14835
 * Applies the options to the parser context
14836
 *
14837
 * Returns 0 in case of success, the set of unknown or unimplemented options
14838
 *         in case of error.
14839
 */
14840
static int
14841
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
14842
188k
{
14843
188k
    if (ctxt == NULL)
14844
0
        return(-1);
14845
188k
    if (encoding != NULL) {
14846
0
        if (ctxt->encoding != NULL)
14847
0
      xmlFree((xmlChar *) ctxt->encoding);
14848
0
        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14849
0
    }
14850
188k
    if (options & XML_PARSE_RECOVER) {
14851
72.0k
        ctxt->recovery = 1;
14852
72.0k
        options -= XML_PARSE_RECOVER;
14853
72.0k
  ctxt->options |= XML_PARSE_RECOVER;
14854
72.0k
    } else
14855
116k
        ctxt->recovery = 0;
14856
188k
    if (options & XML_PARSE_DTDLOAD) {
14857
150k
        ctxt->loadsubset = XML_DETECT_IDS;
14858
150k
        options -= XML_PARSE_DTDLOAD;
14859
150k
  ctxt->options |= XML_PARSE_DTDLOAD;
14860
150k
    } else
14861
37.7k
        ctxt->loadsubset = 0;
14862
188k
    if (options & XML_PARSE_DTDATTR) {
14863
44.9k
        ctxt->loadsubset |= XML_COMPLETE_ATTRS;
14864
44.9k
        options -= XML_PARSE_DTDATTR;
14865
44.9k
  ctxt->options |= XML_PARSE_DTDATTR;
14866
44.9k
    }
14867
188k
    if (options & XML_PARSE_NOENT) {
14868
134k
        ctxt->replaceEntities = 1;
14869
        /* ctxt->loadsubset |= XML_DETECT_IDS; */
14870
134k
        options -= XML_PARSE_NOENT;
14871
134k
  ctxt->options |= XML_PARSE_NOENT;
14872
134k
    } else
14873
53.7k
        ctxt->replaceEntities = 0;
14874
188k
    if (options & XML_PARSE_PEDANTIC) {
14875
34.2k
        ctxt->pedantic = 1;
14876
34.2k
        options -= XML_PARSE_PEDANTIC;
14877
34.2k
  ctxt->options |= XML_PARSE_PEDANTIC;
14878
34.2k
    } else
14879
154k
        ctxt->pedantic = 0;
14880
188k
    if (options & XML_PARSE_NOBLANKS) {
14881
50.2k
        ctxt->keepBlanks = 0;
14882
50.2k
        ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
14883
50.2k
        options -= XML_PARSE_NOBLANKS;
14884
50.2k
  ctxt->options |= XML_PARSE_NOBLANKS;
14885
50.2k
    } else
14886
138k
        ctxt->keepBlanks = 1;
14887
188k
    if (options & XML_PARSE_DTDVALID) {
14888
52.2k
        ctxt->validate = 1;
14889
52.2k
        if (options & XML_PARSE_NOWARNING)
14890
37.1k
            ctxt->vctxt.warning = NULL;
14891
52.2k
        if (options & XML_PARSE_NOERROR)
14892
38.5k
            ctxt->vctxt.error = NULL;
14893
52.2k
        options -= XML_PARSE_DTDVALID;
14894
52.2k
  ctxt->options |= XML_PARSE_DTDVALID;
14895
52.2k
    } else
14896
136k
        ctxt->validate = 0;
14897
188k
    if (options & XML_PARSE_NOWARNING) {
14898
51.6k
        ctxt->sax->warning = NULL;
14899
51.6k
        options -= XML_PARSE_NOWARNING;
14900
51.6k
    }
14901
188k
    if (options & XML_PARSE_NOERROR) {
14902
56.9k
        ctxt->sax->error = NULL;
14903
56.9k
        ctxt->sax->fatalError = NULL;
14904
56.9k
        options -= XML_PARSE_NOERROR;
14905
56.9k
    }
14906
188k
#ifdef LIBXML_SAX1_ENABLED
14907
188k
    if (options & XML_PARSE_SAX1) {
14908
49.3k
        ctxt->sax->startElement = xmlSAX2StartElement;
14909
49.3k
        ctxt->sax->endElement = xmlSAX2EndElement;
14910
49.3k
        ctxt->sax->startElementNs = NULL;
14911
49.3k
        ctxt->sax->endElementNs = NULL;
14912
49.3k
        ctxt->sax->initialized = 1;
14913
49.3k
        options -= XML_PARSE_SAX1;
14914
49.3k
  ctxt->options |= XML_PARSE_SAX1;
14915
49.3k
    }
14916
188k
#endif /* LIBXML_SAX1_ENABLED */
14917
188k
    if (options & XML_PARSE_NODICT) {
14918
50.7k
        ctxt->dictNames = 0;
14919
50.7k
        options -= XML_PARSE_NODICT;
14920
50.7k
  ctxt->options |= XML_PARSE_NODICT;
14921
137k
    } else {
14922
137k
        ctxt->dictNames = 1;
14923
137k
    }
14924
188k
    if (options & XML_PARSE_NOCDATA) {
14925
47.4k
        ctxt->sax->cdataBlock = NULL;
14926
47.4k
        options -= XML_PARSE_NOCDATA;
14927
47.4k
  ctxt->options |= XML_PARSE_NOCDATA;
14928
47.4k
    }
14929
188k
    if (options & XML_PARSE_NSCLEAN) {
14930
53.2k
  ctxt->options |= XML_PARSE_NSCLEAN;
14931
53.2k
        options -= XML_PARSE_NSCLEAN;
14932
53.2k
    }
14933
188k
    if (options & XML_PARSE_NONET) {
14934
42.6k
  ctxt->options |= XML_PARSE_NONET;
14935
42.6k
        options -= XML_PARSE_NONET;
14936
42.6k
    }
14937
188k
    if (options & XML_PARSE_COMPACT) {
14938
92.3k
  ctxt->options |= XML_PARSE_COMPACT;
14939
92.3k
        options -= XML_PARSE_COMPACT;
14940
92.3k
    }
14941
188k
    if (options & XML_PARSE_OLD10) {
14942
46.0k
  ctxt->options |= XML_PARSE_OLD10;
14943
46.0k
        options -= XML_PARSE_OLD10;
14944
46.0k
    }
14945
188k
    if (options & XML_PARSE_NOBASEFIX) {
14946
45.2k
  ctxt->options |= XML_PARSE_NOBASEFIX;
14947
45.2k
        options -= XML_PARSE_NOBASEFIX;
14948
45.2k
    }
14949
188k
    if (options & XML_PARSE_HUGE) {
14950
41.3k
  ctxt->options |= XML_PARSE_HUGE;
14951
41.3k
        options -= XML_PARSE_HUGE;
14952
41.3k
        if (ctxt->dict != NULL)
14953
41.3k
            xmlDictSetLimit(ctxt->dict, 0);
14954
41.3k
    }
14955
188k
    if (options & XML_PARSE_OLDSAX) {
14956
40.9k
  ctxt->options |= XML_PARSE_OLDSAX;
14957
40.9k
        options -= XML_PARSE_OLDSAX;
14958
40.9k
    }
14959
188k
    if (options & XML_PARSE_IGNORE_ENC) {
14960
49.8k
  ctxt->options |= XML_PARSE_IGNORE_ENC;
14961
49.8k
        options -= XML_PARSE_IGNORE_ENC;
14962
49.8k
    }
14963
188k
    if (options & XML_PARSE_BIG_LINES) {
14964
46.8k
  ctxt->options |= XML_PARSE_BIG_LINES;
14965
46.8k
        options -= XML_PARSE_BIG_LINES;
14966
46.8k
    }
14967
188k
    ctxt->linenumbers = 1;
14968
188k
    return (options);
14969
188k
}
14970
14971
/**
14972
 * xmlCtxtUseOptions:
14973
 * @ctxt: an XML parser context
14974
 * @options:  a combination of xmlParserOption
14975
 *
14976
 * Applies the options to the parser context
14977
 *
14978
 * Returns 0 in case of success, the set of unknown or unimplemented options
14979
 *         in case of error.
14980
 */
14981
int
14982
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
14983
125k
{
14984
125k
   return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
14985
125k
}
14986
14987
/**
14988
 * xmlDoRead:
14989
 * @ctxt:  an XML parser context
14990
 * @URL:  the base URL to use for the document
14991
 * @encoding:  the document encoding, or NULL
14992
 * @options:  a combination of xmlParserOption
14993
 * @reuse:  keep the context for reuse
14994
 *
14995
 * Common front-end for the xmlRead functions
14996
 *
14997
 * Returns the resulting document tree or NULL
14998
 */
14999
static xmlDocPtr
15000
xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15001
          int options, int reuse)
15002
62.7k
{
15003
62.7k
    xmlDocPtr ret;
15004
15005
62.7k
    xmlCtxtUseOptionsInternal(ctxt, options, encoding);
15006
62.7k
    if (encoding != NULL) {
15007
0
        xmlCharEncodingHandlerPtr hdlr;
15008
15009
0
  hdlr = xmlFindCharEncodingHandler(encoding);
15010
0
  if (hdlr != NULL)
15011
0
      xmlSwitchToEncoding(ctxt, hdlr);
15012
0
    }
15013
62.7k
    if ((URL != NULL) && (ctxt->input != NULL) &&
15014
62.7k
        (ctxt->input->filename == NULL))
15015
62.7k
        ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
15016
62.7k
    xmlParseDocument(ctxt);
15017
62.7k
    if ((ctxt->wellFormed) || ctxt->recovery)
15018
29.7k
        ret = ctxt->myDoc;
15019
33.0k
    else {
15020
33.0k
        ret = NULL;
15021
33.0k
  if (ctxt->myDoc != NULL) {
15022
29.4k
      xmlFreeDoc(ctxt->myDoc);
15023
29.4k
  }
15024
33.0k
    }
15025
62.7k
    ctxt->myDoc = NULL;
15026
62.7k
    if (!reuse) {
15027
62.7k
  xmlFreeParserCtxt(ctxt);
15028
62.7k
    }
15029
15030
62.7k
    return (ret);
15031
62.7k
}
15032
15033
/**
15034
 * xmlReadDoc:
15035
 * @cur:  a pointer to a zero terminated string
15036
 * @URL:  the base URL to use for the document
15037
 * @encoding:  the document encoding, or NULL
15038
 * @options:  a combination of xmlParserOption
15039
 *
15040
 * parse an XML in-memory document and build a tree.
15041
 *
15042
 * Returns the resulting document tree
15043
 */
15044
xmlDocPtr
15045
xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
15046
0
{
15047
0
    xmlParserCtxtPtr ctxt;
15048
15049
0
    if (cur == NULL)
15050
0
        return (NULL);
15051
0
    xmlInitParser();
15052
15053
0
    ctxt = xmlCreateDocParserCtxt(cur);
15054
0
    if (ctxt == NULL)
15055
0
        return (NULL);
15056
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15057
0
}
15058
15059
/**
15060
 * xmlReadFile:
15061
 * @filename:  a file or URL
15062
 * @encoding:  the document encoding, or NULL
15063
 * @options:  a combination of xmlParserOption
15064
 *
15065
 * parse an XML file from the filesystem or the network.
15066
 *
15067
 * Returns the resulting document tree
15068
 */
15069
xmlDocPtr
15070
xmlReadFile(const char *filename, const char *encoding, int options)
15071
0
{
15072
0
    xmlParserCtxtPtr ctxt;
15073
15074
0
    xmlInitParser();
15075
0
    ctxt = xmlCreateURLParserCtxt(filename, options);
15076
0
    if (ctxt == NULL)
15077
0
        return (NULL);
15078
0
    return (xmlDoRead(ctxt, NULL, encoding, options, 0));
15079
0
}
15080
15081
/**
15082
 * xmlReadMemory:
15083
 * @buffer:  a pointer to a char array
15084
 * @size:  the size of the array
15085
 * @URL:  the base URL to use for the document
15086
 * @encoding:  the document encoding, or NULL
15087
 * @options:  a combination of xmlParserOption
15088
 *
15089
 * parse an XML in-memory document and build a tree.
15090
 *
15091
 * Returns the resulting document tree
15092
 */
15093
xmlDocPtr
15094
xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
15095
62.7k
{
15096
62.7k
    xmlParserCtxtPtr ctxt;
15097
15098
62.7k
    xmlInitParser();
15099
62.7k
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15100
62.7k
    if (ctxt == NULL)
15101
11
        return (NULL);
15102
62.7k
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15103
62.7k
}
15104
15105
/**
15106
 * xmlReadFd:
15107
 * @fd:  an open file descriptor
15108
 * @URL:  the base URL to use for the document
15109
 * @encoding:  the document encoding, or NULL
15110
 * @options:  a combination of xmlParserOption
15111
 *
15112
 * parse an XML from a file descriptor and build a tree.
15113
 * NOTE that the file descriptor will not be closed when the
15114
 *      reader is closed or reset.
15115
 *
15116
 * Returns the resulting document tree
15117
 */
15118
xmlDocPtr
15119
xmlReadFd(int fd, const char *URL, const char *encoding, int options)
15120
0
{
15121
0
    xmlParserCtxtPtr ctxt;
15122
0
    xmlParserInputBufferPtr input;
15123
0
    xmlParserInputPtr stream;
15124
15125
0
    if (fd < 0)
15126
0
        return (NULL);
15127
0
    xmlInitParser();
15128
15129
0
    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15130
0
    if (input == NULL)
15131
0
        return (NULL);
15132
0
    input->closecallback = NULL;
15133
0
    ctxt = xmlNewParserCtxt();
15134
0
    if (ctxt == NULL) {
15135
0
        xmlFreeParserInputBuffer(input);
15136
0
        return (NULL);
15137
0
    }
15138
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15139
0
    if (stream == NULL) {
15140
0
        xmlFreeParserInputBuffer(input);
15141
0
  xmlFreeParserCtxt(ctxt);
15142
0
        return (NULL);
15143
0
    }
15144
0
    inputPush(ctxt, stream);
15145
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15146
0
}
15147
15148
/**
15149
 * xmlReadIO:
15150
 * @ioread:  an I/O read function
15151
 * @ioclose:  an I/O close function
15152
 * @ioctx:  an I/O handler
15153
 * @URL:  the base URL to use for the document
15154
 * @encoding:  the document encoding, or NULL
15155
 * @options:  a combination of xmlParserOption
15156
 *
15157
 * parse an XML document from I/O functions and source and build a tree.
15158
 *
15159
 * Returns the resulting document tree
15160
 */
15161
xmlDocPtr
15162
xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
15163
          void *ioctx, const char *URL, const char *encoding, int options)
15164
0
{
15165
0
    xmlParserCtxtPtr ctxt;
15166
0
    xmlParserInputBufferPtr input;
15167
0
    xmlParserInputPtr stream;
15168
15169
0
    if (ioread == NULL)
15170
0
        return (NULL);
15171
0
    xmlInitParser();
15172
15173
0
    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15174
0
                                         XML_CHAR_ENCODING_NONE);
15175
0
    if (input == NULL) {
15176
0
        if (ioclose != NULL)
15177
0
            ioclose(ioctx);
15178
0
        return (NULL);
15179
0
    }
15180
0
    ctxt = xmlNewParserCtxt();
15181
0
    if (ctxt == NULL) {
15182
0
        xmlFreeParserInputBuffer(input);
15183
0
        return (NULL);
15184
0
    }
15185
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15186
0
    if (stream == NULL) {
15187
0
        xmlFreeParserInputBuffer(input);
15188
0
  xmlFreeParserCtxt(ctxt);
15189
0
        return (NULL);
15190
0
    }
15191
0
    inputPush(ctxt, stream);
15192
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15193
0
}
15194
15195
/**
15196
 * xmlCtxtReadDoc:
15197
 * @ctxt:  an XML parser context
15198
 * @cur:  a pointer to a zero terminated string
15199
 * @URL:  the base URL to use for the document
15200
 * @encoding:  the document encoding, or NULL
15201
 * @options:  a combination of xmlParserOption
15202
 *
15203
 * parse an XML in-memory document and build a tree.
15204
 * This reuses the existing @ctxt parser context
15205
 *
15206
 * Returns the resulting document tree
15207
 */
15208
xmlDocPtr
15209
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
15210
               const char *URL, const char *encoding, int options)
15211
0
{
15212
0
    if (cur == NULL)
15213
0
        return (NULL);
15214
0
    return (xmlCtxtReadMemory(ctxt, (const char *) cur, xmlStrlen(cur), URL,
15215
0
                              encoding, options));
15216
0
}
15217
15218
/**
15219
 * xmlCtxtReadFile:
15220
 * @ctxt:  an XML parser context
15221
 * @filename:  a file or URL
15222
 * @encoding:  the document encoding, or NULL
15223
 * @options:  a combination of xmlParserOption
15224
 *
15225
 * parse an XML file from the filesystem or the network.
15226
 * This reuses the existing @ctxt parser context
15227
 *
15228
 * Returns the resulting document tree
15229
 */
15230
xmlDocPtr
15231
xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15232
                const char *encoding, int options)
15233
0
{
15234
0
    xmlParserInputPtr stream;
15235
15236
0
    if (filename == NULL)
15237
0
        return (NULL);
15238
0
    if (ctxt == NULL)
15239
0
        return (NULL);
15240
0
    xmlInitParser();
15241
15242
0
    xmlCtxtReset(ctxt);
15243
15244
0
    stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15245
0
    if (stream == NULL) {
15246
0
        return (NULL);
15247
0
    }
15248
0
    inputPush(ctxt, stream);
15249
0
    return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15250
0
}
15251
15252
/**
15253
 * xmlCtxtReadMemory:
15254
 * @ctxt:  an XML parser context
15255
 * @buffer:  a pointer to a char array
15256
 * @size:  the size of the array
15257
 * @URL:  the base URL to use for the document
15258
 * @encoding:  the document encoding, or NULL
15259
 * @options:  a combination of xmlParserOption
15260
 *
15261
 * parse an XML in-memory document and build a tree.
15262
 * This reuses the existing @ctxt parser context
15263
 *
15264
 * Returns the resulting document tree
15265
 */
15266
xmlDocPtr
15267
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15268
                  const char *URL, const char *encoding, int options)
15269
0
{
15270
0
    xmlParserInputBufferPtr input;
15271
0
    xmlParserInputPtr stream;
15272
15273
0
    if (ctxt == NULL)
15274
0
        return (NULL);
15275
0
    if (buffer == NULL)
15276
0
        return (NULL);
15277
0
    xmlInitParser();
15278
15279
0
    xmlCtxtReset(ctxt);
15280
15281
0
    input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15282
0
    if (input == NULL) {
15283
0
  return(NULL);
15284
0
    }
15285
15286
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15287
0
    if (stream == NULL) {
15288
0
  xmlFreeParserInputBuffer(input);
15289
0
  return(NULL);
15290
0
    }
15291
15292
0
    inputPush(ctxt, stream);
15293
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15294
0
}
15295
15296
/**
15297
 * xmlCtxtReadFd:
15298
 * @ctxt:  an XML parser context
15299
 * @fd:  an open file descriptor
15300
 * @URL:  the base URL to use for the document
15301
 * @encoding:  the document encoding, or NULL
15302
 * @options:  a combination of xmlParserOption
15303
 *
15304
 * parse an XML from a file descriptor and build a tree.
15305
 * This reuses the existing @ctxt parser context
15306
 * NOTE that the file descriptor will not be closed when the
15307
 *      reader is closed or reset.
15308
 *
15309
 * Returns the resulting document tree
15310
 */
15311
xmlDocPtr
15312
xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15313
              const char *URL, const char *encoding, int options)
15314
0
{
15315
0
    xmlParserInputBufferPtr input;
15316
0
    xmlParserInputPtr stream;
15317
15318
0
    if (fd < 0)
15319
0
        return (NULL);
15320
0
    if (ctxt == NULL)
15321
0
        return (NULL);
15322
0
    xmlInitParser();
15323
15324
0
    xmlCtxtReset(ctxt);
15325
15326
15327
0
    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15328
0
    if (input == NULL)
15329
0
        return (NULL);
15330
0
    input->closecallback = NULL;
15331
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15332
0
    if (stream == NULL) {
15333
0
        xmlFreeParserInputBuffer(input);
15334
0
        return (NULL);
15335
0
    }
15336
0
    inputPush(ctxt, stream);
15337
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15338
0
}
15339
15340
/**
15341
 * xmlCtxtReadIO:
15342
 * @ctxt:  an XML parser context
15343
 * @ioread:  an I/O read function
15344
 * @ioclose:  an I/O close function
15345
 * @ioctx:  an I/O handler
15346
 * @URL:  the base URL to use for the document
15347
 * @encoding:  the document encoding, or NULL
15348
 * @options:  a combination of xmlParserOption
15349
 *
15350
 * parse an XML document from I/O functions and source and build a tree.
15351
 * This reuses the existing @ctxt parser context
15352
 *
15353
 * Returns the resulting document tree
15354
 */
15355
xmlDocPtr
15356
xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15357
              xmlInputCloseCallback ioclose, void *ioctx,
15358
        const char *URL,
15359
              const char *encoding, int options)
15360
0
{
15361
0
    xmlParserInputBufferPtr input;
15362
0
    xmlParserInputPtr stream;
15363
15364
0
    if (ioread == NULL)
15365
0
        return (NULL);
15366
0
    if (ctxt == NULL)
15367
0
        return (NULL);
15368
0
    xmlInitParser();
15369
15370
0
    xmlCtxtReset(ctxt);
15371
15372
0
    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15373
0
                                         XML_CHAR_ENCODING_NONE);
15374
0
    if (input == NULL) {
15375
0
        if (ioclose != NULL)
15376
0
            ioclose(ioctx);
15377
0
        return (NULL);
15378
0
    }
15379
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15380
0
    if (stream == NULL) {
15381
0
        xmlFreeParserInputBuffer(input);
15382
0
        return (NULL);
15383
0
    }
15384
0
    inputPush(ctxt, stream);
15385
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15386
0
}
15387