Coverage Report

Created: 2023-09-28 22:19

/src/libxml2/parser.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
 *            implemented on top of the SAX interfaces
4
 *
5
 * References:
6
 *   The XML specification:
7
 *     http://www.w3.org/TR/REC-xml
8
 *   Original 1.0 version:
9
 *     http://www.w3.org/TR/1998/REC-xml-19980210
10
 *   XML second edition working draft
11
 *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12
 *
13
 * Okay this is a big file, the parser core is around 7000 lines, then it
14
 * is followed by the progressive parser top routines, then the various
15
 * high level APIs to call the parser and a few miscellaneous functions.
16
 * A number of helper functions and deprecated ones have been moved to
17
 * parserInternals.c to reduce this file size.
18
 * As much as possible the functions are associated with their relative
19
 * production in the XML specification. A few productions defining the
20
 * different ranges of character are actually implanted either in
21
 * parserInternals.h or parserInternals.c
22
 * The DOM tree build is realized from the default SAX callbacks in
23
 * the module SAX.c.
24
 * The routines doing the validation checks are in valid.c and called either
25
 * from the SAX callbacks or as standalone functions using a preparsed
26
 * document.
27
 *
28
 * See Copyright for the status of this software.
29
 *
30
 * daniel@veillard.com
31
 */
32
33
/* To avoid EBCDIC trouble when parsing on zOS */
34
#if defined(__MVS__)
35
#pragma convert("ISO8859-1")
36
#endif
37
38
#define IN_LIBXML
39
#include "libxml.h"
40
41
#if defined(_WIN32)
42
#define XML_DIR_SEP '\\'
43
#else
44
#define XML_DIR_SEP '/'
45
#endif
46
47
#include <stdlib.h>
48
#include <limits.h>
49
#include <string.h>
50
#include <stdarg.h>
51
#include <stddef.h>
52
#include <ctype.h>
53
#include <stdlib.h>
54
#include <libxml/xmlmemory.h>
55
#include <libxml/threads.h>
56
#include <libxml/globals.h>
57
#include <libxml/tree.h>
58
#include <libxml/parser.h>
59
#include <libxml/parserInternals.h>
60
#include <libxml/HTMLparser.h>
61
#include <libxml/valid.h>
62
#include <libxml/entities.h>
63
#include <libxml/xmlerror.h>
64
#include <libxml/encoding.h>
65
#include <libxml/xmlIO.h>
66
#include <libxml/uri.h>
67
#ifdef LIBXML_CATALOG_ENABLED
68
#include <libxml/catalog.h>
69
#endif
70
#ifdef LIBXML_SCHEMAS_ENABLED
71
#include <libxml/xmlschemastypes.h>
72
#include <libxml/relaxng.h>
73
#endif
74
#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
75
#include <libxml/xpath.h>
76
#endif
77
78
#include "private/buf.h"
79
#include "private/dict.h"
80
#include "private/enc.h"
81
#include "private/entities.h"
82
#include "private/error.h"
83
#include "private/globals.h"
84
#include "private/html.h"
85
#include "private/io.h"
86
#include "private/memory.h"
87
#include "private/parser.h"
88
#include "private/threads.h"
89
#include "private/xpath.h"
90
91
struct _xmlStartTag {
92
    const xmlChar *prefix;
93
    const xmlChar *URI;
94
    int line;
95
    int nsNr;
96
};
97
98
static xmlParserCtxtPtr
99
xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
100
        const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
101
        xmlParserCtxtPtr pctx);
102
103
static void xmlHaltParser(xmlParserCtxtPtr ctxt);
104
105
static int
106
xmlParseElementStart(xmlParserCtxtPtr ctxt);
107
108
static void
109
xmlParseElementEnd(xmlParserCtxtPtr ctxt);
110
111
/************************************************************************
112
 *                  *
113
 *  Arbitrary limits set in the parser. See XML_PARSE_HUGE    *
114
 *                  *
115
 ************************************************************************/
116
117
12.3M
#define XML_MAX_HUGE_LENGTH 1000000000
118
119
#define XML_PARSER_BIG_ENTITY 1000
120
#define XML_PARSER_LOT_ENTITY 5000
121
122
/*
123
 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
124
 *    replacement over the size in byte of the input indicates that you have
125
 *    and exponential behaviour. A value of 10 correspond to at least 3 entity
126
 *    replacement per byte of input.
127
 */
128
1.37k
#define XML_PARSER_NON_LINEAR 10
129
130
99.2M
#define XML_ENT_FIXED_COST 50
131
132
/**
133
 * xmlParserMaxDepth:
134
 *
135
 * arbitrary depth limit for the XML documents that we allow to
136
 * process. This is not a limitation of the parser but a safety
137
 * boundary feature. It can be disabled with the XML_PARSE_HUGE
138
 * parser option.
139
 */
140
unsigned int xmlParserMaxDepth = 256;
141
142
143
144
#define SAX2 1
145
103M
#define XML_PARSER_BIG_BUFFER_SIZE 300
146
15.2G
#define XML_PARSER_BUFFER_SIZE 100
147
741k
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
148
149
/**
150
 * XML_PARSER_CHUNK_SIZE
151
 *
152
 * When calling GROW that's the minimal amount of data
153
 * the parser expected to have received. It is not a hard
154
 * limit but an optimization when reading strings like Names
155
 * It is not strictly needed as long as inputs available characters
156
 * are followed by 0, which should be provided by the I/O level
157
 */
158
38.8M
#define XML_PARSER_CHUNK_SIZE 100
159
160
/*
161
 * List of XML prefixed PI allowed by W3C specs
162
 */
163
164
static const char* const xmlW3CPIs[] = {
165
    "xml-stylesheet",
166
    "xml-model",
167
    NULL
168
};
169
170
171
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
172
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
173
                                              const xmlChar **str);
174
175
static xmlParserErrors
176
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
177
                xmlSAXHandlerPtr sax,
178
          void *user_data, int depth, const xmlChar *URL,
179
          const xmlChar *ID, xmlNodePtr *list);
180
181
static int
182
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
183
                          const char *encoding);
184
#ifdef LIBXML_LEGACY_ENABLED
185
static void
186
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
187
                      xmlNodePtr lastNode);
188
#endif /* LIBXML_LEGACY_ENABLED */
189
190
static xmlParserErrors
191
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
192
          const xmlChar *string, void *user_data, xmlNodePtr *lst);
193
194
static int
195
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
196
197
/************************************************************************
198
 *                  *
199
 *    Some factorized error routines        *
200
 *                  *
201
 ************************************************************************/
202
203
/**
204
 * xmlErrAttributeDup:
205
 * @ctxt:  an XML parser context
206
 * @prefix:  the attribute prefix
207
 * @localname:  the attribute localname
208
 *
209
 * Handle a redefinition of attribute error
210
 */
211
static void
212
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
213
                   const xmlChar * localname)
214
38.5k
{
215
38.5k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
216
38.5k
        (ctxt->instate == XML_PARSER_EOF))
217
6
  return;
218
38.5k
    if (ctxt != NULL)
219
38.5k
  ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
220
221
38.5k
    if (prefix == NULL)
222
21.6k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
223
21.6k
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
224
21.6k
                        (const char *) localname, NULL, NULL, 0, 0,
225
21.6k
                        "Attribute %s redefined\n", localname);
226
16.8k
    else
227
16.8k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
228
16.8k
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
229
16.8k
                        (const char *) prefix, (const char *) localname,
230
16.8k
                        NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
231
16.8k
                        localname);
232
38.5k
    if (ctxt != NULL) {
233
38.5k
  ctxt->wellFormed = 0;
234
38.5k
  if (ctxt->recovery == 0)
235
18.8k
      ctxt->disableSAX = 1;
236
38.5k
    }
237
38.5k
}
238
239
/**
240
 * xmlFatalErr:
241
 * @ctxt:  an XML parser context
242
 * @error:  the error number
243
 * @extra:  extra information string
244
 *
245
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
246
 */
247
static void
248
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
249
3.69M
{
250
3.69M
    const char *errmsg;
251
252
3.69M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
253
3.69M
        (ctxt->instate == XML_PARSER_EOF))
254
18.6k
  return;
255
3.67M
    switch (error) {
256
22.0k
        case XML_ERR_INVALID_HEX_CHARREF:
257
22.0k
            errmsg = "CharRef: invalid hexadecimal value";
258
22.0k
            break;
259
37.9k
        case XML_ERR_INVALID_DEC_CHARREF:
260
37.9k
            errmsg = "CharRef: invalid decimal value";
261
37.9k
            break;
262
0
        case XML_ERR_INVALID_CHARREF:
263
0
            errmsg = "CharRef: invalid value";
264
0
            break;
265
576k
        case XML_ERR_INTERNAL_ERROR:
266
576k
            errmsg = "internal error";
267
576k
            break;
268
0
        case XML_ERR_PEREF_AT_EOF:
269
0
            errmsg = "PEReference at end of document";
270
0
            break;
271
0
        case XML_ERR_PEREF_IN_PROLOG:
272
0
            errmsg = "PEReference in prolog";
273
0
            break;
274
0
        case XML_ERR_PEREF_IN_EPILOG:
275
0
            errmsg = "PEReference in epilog";
276
0
            break;
277
0
        case XML_ERR_PEREF_NO_NAME:
278
0
            errmsg = "PEReference: no name";
279
0
            break;
280
239k
        case XML_ERR_PEREF_SEMICOL_MISSING:
281
239k
            errmsg = "PEReference: expecting ';'";
282
239k
            break;
283
2.05k
        case XML_ERR_ENTITY_LOOP:
284
2.05k
            errmsg = "Detected an entity reference loop";
285
2.05k
            break;
286
0
        case XML_ERR_ENTITY_NOT_STARTED:
287
0
            errmsg = "EntityValue: \" or ' expected";
288
0
            break;
289
8.00k
        case XML_ERR_ENTITY_PE_INTERNAL:
290
8.00k
            errmsg = "PEReferences forbidden in internal subset";
291
8.00k
            break;
292
2.62k
        case XML_ERR_ENTITY_NOT_FINISHED:
293
2.62k
            errmsg = "EntityValue: \" or ' expected";
294
2.62k
            break;
295
41.8k
        case XML_ERR_ATTRIBUTE_NOT_STARTED:
296
41.8k
            errmsg = "AttValue: \" or ' expected";
297
41.8k
            break;
298
101k
        case XML_ERR_LT_IN_ATTRIBUTE:
299
101k
            errmsg = "Unescaped '<' not allowed in attributes values";
300
101k
            break;
301
17.4k
        case XML_ERR_LITERAL_NOT_STARTED:
302
17.4k
            errmsg = "SystemLiteral \" or ' expected";
303
17.4k
            break;
304
19.4k
        case XML_ERR_LITERAL_NOT_FINISHED:
305
19.4k
            errmsg = "Unfinished System or Public ID \" or ' expected";
306
19.4k
            break;
307
22.0k
        case XML_ERR_MISPLACED_CDATA_END:
308
22.0k
            errmsg = "Sequence ']]>' not allowed in content";
309
22.0k
            break;
310
14.6k
        case XML_ERR_URI_REQUIRED:
311
14.6k
            errmsg = "SYSTEM or PUBLIC, the URI is missing";
312
14.6k
            break;
313
2.99k
        case XML_ERR_PUBID_REQUIRED:
314
2.99k
            errmsg = "PUBLIC, the Public Identifier is missing";
315
2.99k
            break;
316
1.58M
        case XML_ERR_HYPHEN_IN_COMMENT:
317
1.58M
            errmsg = "Comment must not contain '--' (double-hyphen)";
318
1.58M
            break;
319
16.6k
        case XML_ERR_PI_NOT_STARTED:
320
16.6k
            errmsg = "xmlParsePI : no target name";
321
16.6k
            break;
322
5.74k
        case XML_ERR_RESERVED_XML_NAME:
323
5.74k
            errmsg = "Invalid PI name";
324
5.74k
            break;
325
4.39k
        case XML_ERR_NOTATION_NOT_STARTED:
326
4.39k
            errmsg = "NOTATION: Name expected here";
327
4.39k
            break;
328
19.4k
        case XML_ERR_NOTATION_NOT_FINISHED:
329
19.4k
            errmsg = "'>' required to close NOTATION declaration";
330
19.4k
            break;
331
11.6k
        case XML_ERR_VALUE_REQUIRED:
332
11.6k
            errmsg = "Entity value required";
333
11.6k
            break;
334
4.43k
        case XML_ERR_URI_FRAGMENT:
335
4.43k
            errmsg = "Fragment not allowed";
336
4.43k
            break;
337
8.49k
        case XML_ERR_ATTLIST_NOT_STARTED:
338
8.49k
            errmsg = "'(' required to start ATTLIST enumeration";
339
8.49k
            break;
340
1.47k
        case XML_ERR_NMTOKEN_REQUIRED:
341
1.47k
            errmsg = "NmToken expected in ATTLIST enumeration";
342
1.47k
            break;
343
3.99k
        case XML_ERR_ATTLIST_NOT_FINISHED:
344
3.99k
            errmsg = "')' required to finish ATTLIST enumeration";
345
3.99k
            break;
346
3.98k
        case XML_ERR_MIXED_NOT_STARTED:
347
3.98k
            errmsg = "MixedContentDecl : '|' or ')*' expected";
348
3.98k
            break;
349
0
        case XML_ERR_PCDATA_REQUIRED:
350
0
            errmsg = "MixedContentDecl : '#PCDATA' expected";
351
0
            break;
352
7.85k
        case XML_ERR_ELEMCONTENT_NOT_STARTED:
353
7.85k
            errmsg = "ContentDecl : Name or '(' expected";
354
7.85k
            break;
355
10.7k
        case XML_ERR_ELEMCONTENT_NOT_FINISHED:
356
10.7k
            errmsg = "ContentDecl : ',' '|' or ')' expected";
357
10.7k
            break;
358
0
        case XML_ERR_PEREF_IN_INT_SUBSET:
359
0
            errmsg =
360
0
                "PEReference: forbidden within markup decl in internal subset";
361
0
            break;
362
117k
        case XML_ERR_GT_REQUIRED:
363
117k
            errmsg = "expected '>'";
364
117k
            break;
365
416
        case XML_ERR_CONDSEC_INVALID:
366
416
            errmsg = "XML conditional section '[' expected";
367
416
            break;
368
13.6k
        case XML_ERR_EXT_SUBSET_NOT_FINISHED:
369
13.6k
            errmsg = "Content error in the external subset";
370
13.6k
            break;
371
1.06k
        case XML_ERR_CONDSEC_INVALID_KEYWORD:
372
1.06k
            errmsg =
373
1.06k
                "conditional section INCLUDE or IGNORE keyword expected";
374
1.06k
            break;
375
2.12k
        case XML_ERR_CONDSEC_NOT_FINISHED:
376
2.12k
            errmsg = "XML conditional section not closed";
377
2.12k
            break;
378
412
        case XML_ERR_XMLDECL_NOT_STARTED:
379
412
            errmsg = "Text declaration '<?xml' required";
380
412
            break;
381
88.1k
        case XML_ERR_XMLDECL_NOT_FINISHED:
382
88.1k
            errmsg = "parsing XML declaration: '?>' expected";
383
88.1k
            break;
384
0
        case XML_ERR_EXT_ENTITY_STANDALONE:
385
0
            errmsg = "external parsed entities cannot be standalone";
386
0
            break;
387
406k
        case XML_ERR_ENTITYREF_SEMICOL_MISSING:
388
406k
            errmsg = "EntityRef: expecting ';'";
389
406k
            break;
390
9.60k
        case XML_ERR_DOCTYPE_NOT_FINISHED:
391
9.60k
            errmsg = "DOCTYPE improperly terminated";
392
9.60k
            break;
393
0
        case XML_ERR_LTSLASH_REQUIRED:
394
0
            errmsg = "EndTag: '</' not found";
395
0
            break;
396
6.99k
        case XML_ERR_EQUAL_REQUIRED:
397
6.99k
            errmsg = "expected '='";
398
6.99k
            break;
399
26.3k
        case XML_ERR_STRING_NOT_CLOSED:
400
26.3k
            errmsg = "String not closed expecting \" or '";
401
26.3k
            break;
402
7.33k
        case XML_ERR_STRING_NOT_STARTED:
403
7.33k
            errmsg = "String not started expecting ' or \"";
404
7.33k
            break;
405
2.14k
        case XML_ERR_ENCODING_NAME:
406
2.14k
            errmsg = "Invalid XML encoding name";
407
2.14k
            break;
408
870
        case XML_ERR_STANDALONE_VALUE:
409
870
            errmsg = "standalone accepts only 'yes' or 'no'";
410
870
            break;
411
20.9k
        case XML_ERR_DOCUMENT_EMPTY:
412
20.9k
            errmsg = "Document is empty";
413
20.9k
            break;
414
129k
        case XML_ERR_DOCUMENT_END:
415
129k
            errmsg = "Extra content at the end of the document";
416
129k
            break;
417
22.4k
        case XML_ERR_NOT_WELL_BALANCED:
418
22.4k
            errmsg = "chunk is not well balanced";
419
22.4k
            break;
420
0
        case XML_ERR_EXTRA_CONTENT:
421
0
            errmsg = "extra content at the end of well balanced chunk";
422
0
            break;
423
25.6k
        case XML_ERR_VERSION_MISSING:
424
25.6k
            errmsg = "Malformed declaration expecting version";
425
25.6k
            break;
426
163
        case XML_ERR_NAME_TOO_LONG:
427
163
            errmsg = "Name too long";
428
163
            break;
429
#if 0
430
        case:
431
            errmsg = "";
432
            break;
433
#endif
434
4.08k
        default:
435
4.08k
            errmsg = "Unregistered error message";
436
3.67M
    }
437
3.67M
    if (ctxt != NULL)
438
3.67M
  ctxt->errNo = error;
439
3.67M
    if (info == NULL) {
440
3.09M
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
441
3.09M
                        XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
442
3.09M
                        errmsg);
443
3.09M
    } else {
444
576k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
445
576k
                        XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
446
576k
                        errmsg, info);
447
576k
    }
448
3.67M
    if (ctxt != NULL) {
449
3.67M
  ctxt->wellFormed = 0;
450
3.67M
  if (ctxt->recovery == 0)
451
604k
      ctxt->disableSAX = 1;
452
3.67M
    }
453
3.67M
}
454
455
/**
456
 * xmlFatalErrMsg:
457
 * @ctxt:  an XML parser context
458
 * @error:  the error number
459
 * @msg:  the error message
460
 *
461
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
462
 */
463
static void LIBXML_ATTR_FORMAT(3,0)
464
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
465
               const char *msg)
466
1.94M
{
467
1.94M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
468
1.94M
        (ctxt->instate == XML_PARSER_EOF))
469
188
  return;
470
1.94M
    if (ctxt != NULL)
471
1.94M
  ctxt->errNo = error;
472
1.94M
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
473
1.94M
                    XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
474
1.94M
    if (ctxt != NULL) {
475
1.94M
  ctxt->wellFormed = 0;
476
1.94M
  if (ctxt->recovery == 0)
477
608k
      ctxt->disableSAX = 1;
478
1.94M
    }
479
1.94M
}
480
481
/**
482
 * xmlWarningMsg:
483
 * @ctxt:  an XML parser context
484
 * @error:  the error number
485
 * @msg:  the error message
486
 * @str1:  extra data
487
 * @str2:  extra data
488
 *
489
 * Handle a warning.
490
 */
491
static void LIBXML_ATTR_FORMAT(3,0)
492
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
493
              const char *msg, const xmlChar *str1, const xmlChar *str2)
494
9.61M
{
495
9.61M
    xmlStructuredErrorFunc schannel = NULL;
496
497
9.61M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
498
9.61M
        (ctxt->instate == XML_PARSER_EOF))
499
0
  return;
500
9.61M
    if ((ctxt != NULL) && (ctxt->sax != NULL) &&
501
9.61M
        (ctxt->sax->initialized == XML_SAX2_MAGIC))
502
6.75M
        schannel = ctxt->sax->serror;
503
9.61M
    if (ctxt != NULL) {
504
9.61M
        __xmlRaiseError(schannel,
505
9.61M
                    (ctxt->sax) ? ctxt->sax->warning : NULL,
506
9.61M
                    ctxt->userData,
507
9.61M
                    ctxt, NULL, XML_FROM_PARSER, error,
508
9.61M
                    XML_ERR_WARNING, NULL, 0,
509
9.61M
        (const char *) str1, (const char *) str2, NULL, 0, 0,
510
9.61M
        msg, (const char *) str1, (const char *) str2);
511
9.61M
    } else {
512
0
        __xmlRaiseError(schannel, NULL, NULL,
513
0
                    ctxt, NULL, XML_FROM_PARSER, error,
514
0
                    XML_ERR_WARNING, NULL, 0,
515
0
        (const char *) str1, (const char *) str2, NULL, 0, 0,
516
0
        msg, (const char *) str1, (const char *) str2);
517
0
    }
518
9.61M
}
519
520
/**
521
 * xmlValidityError:
522
 * @ctxt:  an XML parser context
523
 * @error:  the error number
524
 * @msg:  the error message
525
 * @str1:  extra data
526
 *
527
 * Handle a validity error.
528
 */
529
static void LIBXML_ATTR_FORMAT(3,0)
530
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
531
              const char *msg, const xmlChar *str1, const xmlChar *str2)
532
864k
{
533
864k
    xmlStructuredErrorFunc schannel = NULL;
534
535
864k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
536
864k
        (ctxt->instate == XML_PARSER_EOF))
537
0
  return;
538
864k
    if (ctxt != NULL) {
539
864k
  ctxt->errNo = error;
540
864k
  if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
541
461k
      schannel = ctxt->sax->serror;
542
864k
    }
543
864k
    if (ctxt != NULL) {
544
864k
        __xmlRaiseError(schannel,
545
864k
                    ctxt->vctxt.error, ctxt->vctxt.userData,
546
864k
                    ctxt, NULL, XML_FROM_DTD, error,
547
864k
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
548
864k
        (const char *) str2, NULL, 0, 0,
549
864k
        msg, (const char *) str1, (const char *) str2);
550
864k
  ctxt->valid = 0;
551
864k
    } else {
552
0
        __xmlRaiseError(schannel, NULL, NULL,
553
0
                    ctxt, NULL, XML_FROM_DTD, error,
554
0
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
555
0
        (const char *) str2, NULL, 0, 0,
556
0
        msg, (const char *) str1, (const char *) str2);
557
0
    }
558
864k
}
559
560
/**
561
 * xmlFatalErrMsgInt:
562
 * @ctxt:  an XML parser context
563
 * @error:  the error number
564
 * @msg:  the error message
565
 * @val:  an integer value
566
 *
567
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
568
 */
569
static void LIBXML_ATTR_FORMAT(3,0)
570
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
571
                  const char *msg, int val)
572
1.56M
{
573
1.56M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
574
1.56M
        (ctxt->instate == XML_PARSER_EOF))
575
0
  return;
576
1.56M
    if (ctxt != NULL)
577
1.56M
  ctxt->errNo = error;
578
1.56M
    __xmlRaiseError(NULL, NULL, NULL,
579
1.56M
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
580
1.56M
                    NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
581
1.56M
    if (ctxt != NULL) {
582
1.56M
  ctxt->wellFormed = 0;
583
1.56M
  if (ctxt->recovery == 0)
584
235k
      ctxt->disableSAX = 1;
585
1.56M
    }
586
1.56M
}
587
588
/**
589
 * xmlFatalErrMsgStrIntStr:
590
 * @ctxt:  an XML parser context
591
 * @error:  the error number
592
 * @msg:  the error message
593
 * @str1:  an string info
594
 * @val:  an integer value
595
 * @str2:  an string info
596
 *
597
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
598
 */
599
static void LIBXML_ATTR_FORMAT(3,0)
600
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
601
                  const char *msg, const xmlChar *str1, int val,
602
      const xmlChar *str2)
603
588k
{
604
588k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
605
588k
        (ctxt->instate == XML_PARSER_EOF))
606
0
  return;
607
588k
    if (ctxt != NULL)
608
588k
  ctxt->errNo = error;
609
588k
    __xmlRaiseError(NULL, NULL, NULL,
610
588k
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
611
588k
                    NULL, 0, (const char *) str1, (const char *) str2,
612
588k
        NULL, val, 0, msg, str1, val, str2);
613
588k
    if (ctxt != NULL) {
614
588k
  ctxt->wellFormed = 0;
615
588k
  if (ctxt->recovery == 0)
616
215k
      ctxt->disableSAX = 1;
617
588k
    }
618
588k
}
619
620
/**
621
 * xmlFatalErrMsgStr:
622
 * @ctxt:  an XML parser context
623
 * @error:  the error number
624
 * @msg:  the error message
625
 * @val:  a string value
626
 *
627
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
628
 */
629
static void LIBXML_ATTR_FORMAT(3,0)
630
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
631
                  const char *msg, const xmlChar * val)
632
11.6M
{
633
11.6M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
634
11.6M
        (ctxt->instate == XML_PARSER_EOF))
635
63
  return;
636
11.6M
    if (ctxt != NULL)
637
11.6M
  ctxt->errNo = error;
638
11.6M
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
639
11.6M
                    XML_FROM_PARSER, error, XML_ERR_FATAL,
640
11.6M
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
641
11.6M
                    val);
642
11.6M
    if (ctxt != NULL) {
643
11.6M
  ctxt->wellFormed = 0;
644
11.6M
  if (ctxt->recovery == 0)
645
5.65M
      ctxt->disableSAX = 1;
646
11.6M
    }
647
11.6M
}
648
649
/**
650
 * xmlErrMsgStr:
651
 * @ctxt:  an XML parser context
652
 * @error:  the error number
653
 * @msg:  the error message
654
 * @val:  a string value
655
 *
656
 * Handle a non fatal parser error
657
 */
658
static void LIBXML_ATTR_FORMAT(3,0)
659
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
660
                  const char *msg, const xmlChar * val)
661
3.36M
{
662
3.36M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
663
3.36M
        (ctxt->instate == XML_PARSER_EOF))
664
0
  return;
665
3.36M
    if (ctxt != NULL)
666
3.36M
  ctxt->errNo = error;
667
3.36M
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
668
3.36M
                    XML_FROM_PARSER, error, XML_ERR_ERROR,
669
3.36M
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
670
3.36M
                    val);
671
3.36M
}
672
673
/**
674
 * xmlNsErr:
675
 * @ctxt:  an XML parser context
676
 * @error:  the error number
677
 * @msg:  the message
678
 * @info1:  extra information string
679
 * @info2:  extra information string
680
 *
681
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
682
 */
683
static void LIBXML_ATTR_FORMAT(3,0)
684
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
685
         const char *msg,
686
         const xmlChar * info1, const xmlChar * info2,
687
         const xmlChar * info3)
688
453k
{
689
453k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
690
453k
        (ctxt->instate == XML_PARSER_EOF))
691
135
  return;
692
453k
    if (ctxt != NULL)
693
453k
  ctxt->errNo = error;
694
453k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
695
453k
                    XML_ERR_ERROR, NULL, 0, (const char *) info1,
696
453k
                    (const char *) info2, (const char *) info3, 0, 0, msg,
697
453k
                    info1, info2, info3);
698
453k
    if (ctxt != NULL)
699
453k
  ctxt->nsWellFormed = 0;
700
453k
}
701
702
/**
703
 * xmlNsWarn
704
 * @ctxt:  an XML parser context
705
 * @error:  the error number
706
 * @msg:  the message
707
 * @info1:  extra information string
708
 * @info2:  extra information string
709
 *
710
 * Handle a namespace warning error
711
 */
712
static void LIBXML_ATTR_FORMAT(3,0)
713
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
714
         const char *msg,
715
         const xmlChar * info1, const xmlChar * info2,
716
         const xmlChar * info3)
717
11.1k
{
718
11.1k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
719
11.1k
        (ctxt->instate == XML_PARSER_EOF))
720
0
  return;
721
11.1k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
722
11.1k
                    XML_ERR_WARNING, NULL, 0, (const char *) info1,
723
11.1k
                    (const char *) info2, (const char *) info3, 0, 0, msg,
724
11.1k
                    info1, info2, info3);
725
11.1k
}
726
727
static void
728
355M
xmlSaturatedAdd(unsigned long *dst, unsigned long val) {
729
355M
    if (val > ULONG_MAX - *dst)
730
0
        *dst = ULONG_MAX;
731
355M
    else
732
355M
        *dst += val;
733
355M
}
734
735
static void
736
100M
xmlSaturatedAddSizeT(unsigned long *dst, unsigned long val) {
737
100M
    if (val > ULONG_MAX - *dst)
738
0
        *dst = ULONG_MAX;
739
100M
    else
740
100M
        *dst += val;
741
100M
}
742
743
/**
744
 * xmlParserEntityCheck:
745
 * @ctxt:  parser context
746
 * @extra:  sum of unexpanded entity sizes
747
 *
748
 * Check for non-linear entity expansion behaviour.
749
 *
750
 * In some cases like xmlStringDecodeEntities, this function is called
751
 * for each, possibly nested entity and its unexpanded content length.
752
 *
753
 * In other cases like xmlParseReference, it's only called for each
754
 * top-level entity with its unexpanded content length plus the sum of
755
 * the unexpanded content lengths (plus fixed cost) of all nested
756
 * entities.
757
 *
758
 * Summing the unexpanded lengths also adds the length of the reference.
759
 * This is by design. Taking the length of the entity name into account
760
 * discourages attacks that try to waste CPU time with abusively long
761
 * entity names. See test/recurse/lol6.xml for example. Each call also
762
 * adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with
763
 * short entities.
764
 *
765
 * Returns 1 on error, 0 on success.
766
 */
767
static int
768
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long extra)
769
99.2M
{
770
99.2M
    unsigned long consumed;
771
99.2M
    xmlParserInputPtr input = ctxt->input;
772
99.2M
    xmlEntityPtr entity = input->entity;
773
774
    /*
775
     * Compute total consumed bytes so far, including input streams of
776
     * external entities.
777
     */
778
99.2M
    consumed = input->parentConsumed;
779
99.2M
    if ((entity == NULL) ||
780
99.2M
        ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
781
55.5M
         ((entity->flags & XML_ENT_PARSED) == 0))) {
782
55.5M
        xmlSaturatedAdd(&consumed, input->consumed);
783
55.5M
        xmlSaturatedAddSizeT(&consumed, input->cur - input->base);
784
55.5M
    }
785
99.2M
    xmlSaturatedAdd(&consumed, ctxt->sizeentities);
786
787
    /*
788
     * Add extra cost and some fixed cost.
789
     */
790
99.2M
    xmlSaturatedAdd(&ctxt->sizeentcopy, extra);
791
99.2M
    xmlSaturatedAdd(&ctxt->sizeentcopy, XML_ENT_FIXED_COST);
792
793
    /*
794
     * It's important to always use saturation arithmetic when tracking
795
     * entity sizes to make the size checks reliable. If "sizeentcopy"
796
     * overflows, we have to abort.
797
     */
798
99.2M
    if ((ctxt->sizeentcopy > XML_MAX_TEXT_LENGTH) &&
799
99.2M
        ((ctxt->sizeentcopy >= ULONG_MAX) ||
800
1.37k
         (ctxt->sizeentcopy / XML_PARSER_NON_LINEAR > consumed))) {
801
1.37k
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP,
802
1.37k
                       "Maximum entity amplification factor exceeded");
803
1.37k
        xmlHaltParser(ctxt);
804
1.37k
        return(1);
805
1.37k
    }
806
807
99.2M
    return(0);
808
99.2M
}
809
810
/************************************************************************
811
 *                  *
812
 *    Library wide options          *
813
 *                  *
814
 ************************************************************************/
815
816
/**
817
  * xmlHasFeature:
818
  * @feature: the feature to be examined
819
  *
820
  * Examines if the library has been compiled with a given feature.
821
  *
822
  * Returns a non-zero value if the feature exist, otherwise zero.
823
  * Returns zero (0) if the feature does not exist or an unknown
824
  * unknown feature is requested, non-zero otherwise.
825
  */
826
int
827
xmlHasFeature(xmlFeature feature)
828
0
{
829
0
    switch (feature) {
830
0
  case XML_WITH_THREAD:
831
0
#ifdef LIBXML_THREAD_ENABLED
832
0
      return(1);
833
#else
834
      return(0);
835
#endif
836
0
        case XML_WITH_TREE:
837
0
#ifdef LIBXML_TREE_ENABLED
838
0
            return(1);
839
#else
840
            return(0);
841
#endif
842
0
        case XML_WITH_OUTPUT:
843
0
#ifdef LIBXML_OUTPUT_ENABLED
844
0
            return(1);
845
#else
846
            return(0);
847
#endif
848
0
        case XML_WITH_PUSH:
849
0
#ifdef LIBXML_PUSH_ENABLED
850
0
            return(1);
851
#else
852
            return(0);
853
#endif
854
0
        case XML_WITH_READER:
855
0
#ifdef LIBXML_READER_ENABLED
856
0
            return(1);
857
#else
858
            return(0);
859
#endif
860
0
        case XML_WITH_PATTERN:
861
0
#ifdef LIBXML_PATTERN_ENABLED
862
0
            return(1);
863
#else
864
            return(0);
865
#endif
866
0
        case XML_WITH_WRITER:
867
0
#ifdef LIBXML_WRITER_ENABLED
868
0
            return(1);
869
#else
870
            return(0);
871
#endif
872
0
        case XML_WITH_SAX1:
873
0
#ifdef LIBXML_SAX1_ENABLED
874
0
            return(1);
875
#else
876
            return(0);
877
#endif
878
0
        case XML_WITH_FTP:
879
#ifdef LIBXML_FTP_ENABLED
880
            return(1);
881
#else
882
0
            return(0);
883
0
#endif
884
0
        case XML_WITH_HTTP:
885
#ifdef LIBXML_HTTP_ENABLED
886
            return(1);
887
#else
888
0
            return(0);
889
0
#endif
890
0
        case XML_WITH_VALID:
891
0
#ifdef LIBXML_VALID_ENABLED
892
0
            return(1);
893
#else
894
            return(0);
895
#endif
896
0
        case XML_WITH_HTML:
897
0
#ifdef LIBXML_HTML_ENABLED
898
0
            return(1);
899
#else
900
            return(0);
901
#endif
902
0
        case XML_WITH_LEGACY:
903
#ifdef LIBXML_LEGACY_ENABLED
904
            return(1);
905
#else
906
0
            return(0);
907
0
#endif
908
0
        case XML_WITH_C14N:
909
0
#ifdef LIBXML_C14N_ENABLED
910
0
            return(1);
911
#else
912
            return(0);
913
#endif
914
0
        case XML_WITH_CATALOG:
915
0
#ifdef LIBXML_CATALOG_ENABLED
916
0
            return(1);
917
#else
918
            return(0);
919
#endif
920
0
        case XML_WITH_XPATH:
921
0
#ifdef LIBXML_XPATH_ENABLED
922
0
            return(1);
923
#else
924
            return(0);
925
#endif
926
0
        case XML_WITH_XPTR:
927
0
#ifdef LIBXML_XPTR_ENABLED
928
0
            return(1);
929
#else
930
            return(0);
931
#endif
932
0
        case XML_WITH_XINCLUDE:
933
0
#ifdef LIBXML_XINCLUDE_ENABLED
934
0
            return(1);
935
#else
936
            return(0);
937
#endif
938
0
        case XML_WITH_ICONV:
939
0
#ifdef LIBXML_ICONV_ENABLED
940
0
            return(1);
941
#else
942
            return(0);
943
#endif
944
0
        case XML_WITH_ISO8859X:
945
0
#ifdef LIBXML_ISO8859X_ENABLED
946
0
            return(1);
947
#else
948
            return(0);
949
#endif
950
0
        case XML_WITH_UNICODE:
951
0
#ifdef LIBXML_UNICODE_ENABLED
952
0
            return(1);
953
#else
954
            return(0);
955
#endif
956
0
        case XML_WITH_REGEXP:
957
0
#ifdef LIBXML_REGEXP_ENABLED
958
0
            return(1);
959
#else
960
            return(0);
961
#endif
962
0
        case XML_WITH_AUTOMATA:
963
0
#ifdef LIBXML_AUTOMATA_ENABLED
964
0
            return(1);
965
#else
966
            return(0);
967
#endif
968
0
        case XML_WITH_EXPR:
969
#ifdef LIBXML_EXPR_ENABLED
970
            return(1);
971
#else
972
0
            return(0);
973
0
#endif
974
0
        case XML_WITH_SCHEMAS:
975
0
#ifdef LIBXML_SCHEMAS_ENABLED
976
0
            return(1);
977
#else
978
            return(0);
979
#endif
980
0
        case XML_WITH_SCHEMATRON:
981
0
#ifdef LIBXML_SCHEMATRON_ENABLED
982
0
            return(1);
983
#else
984
            return(0);
985
#endif
986
0
        case XML_WITH_MODULES:
987
0
#ifdef LIBXML_MODULES_ENABLED
988
0
            return(1);
989
#else
990
            return(0);
991
#endif
992
0
        case XML_WITH_DEBUG:
993
#ifdef LIBXML_DEBUG_ENABLED
994
            return(1);
995
#else
996
0
            return(0);
997
0
#endif
998
0
        case XML_WITH_DEBUG_MEM:
999
#ifdef DEBUG_MEMORY_LOCATION
1000
            return(1);
1001
#else
1002
0
            return(0);
1003
0
#endif
1004
0
        case XML_WITH_DEBUG_RUN:
1005
0
            return(0);
1006
0
        case XML_WITH_ZLIB:
1007
0
#ifdef LIBXML_ZLIB_ENABLED
1008
0
            return(1);
1009
#else
1010
            return(0);
1011
#endif
1012
0
        case XML_WITH_LZMA:
1013
0
#ifdef LIBXML_LZMA_ENABLED
1014
0
            return(1);
1015
#else
1016
            return(0);
1017
#endif
1018
0
        case XML_WITH_ICU:
1019
#ifdef LIBXML_ICU_ENABLED
1020
            return(1);
1021
#else
1022
0
            return(0);
1023
0
#endif
1024
0
        default:
1025
0
      break;
1026
0
     }
1027
0
     return(0);
1028
0
}
1029
1030
/************************************************************************
1031
 *                  *
1032
 *    SAX2 defaulted attributes handling      *
1033
 *                  *
1034
 ************************************************************************/
1035
1036
/**
1037
 * xmlDetectSAX2:
1038
 * @ctxt:  an XML parser context
1039
 *
1040
 * Do the SAX2 detection and specific initialization
1041
 */
1042
static void
1043
1.13M
xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1044
1.13M
    xmlSAXHandlerPtr sax;
1045
1046
    /* Avoid unused variable warning if features are disabled. */
1047
1.13M
    (void) sax;
1048
1049
1.13M
    if (ctxt == NULL) return;
1050
1.13M
    sax = ctxt->sax;
1051
1.13M
#ifdef LIBXML_SAX1_ENABLED
1052
1.13M
    if ((sax) &&  (sax->initialized == XML_SAX2_MAGIC) &&
1053
1.13M
        ((sax->startElementNs != NULL) ||
1054
691k
         (sax->endElementNs != NULL) ||
1055
691k
         ((sax->startElement == NULL) && (sax->endElement == NULL))))
1056
691k
        ctxt->sax2 = 1;
1057
#else
1058
    ctxt->sax2 = 1;
1059
#endif /* LIBXML_SAX1_ENABLED */
1060
1061
1.13M
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1062
1.13M
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1063
1.13M
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1064
1.13M
    if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1065
1.13M
    (ctxt->str_xml_ns == NULL)) {
1066
0
        xmlErrMemory(ctxt, NULL);
1067
0
    }
1068
1.13M
}
1069
1070
typedef struct _xmlDefAttrs xmlDefAttrs;
1071
typedef xmlDefAttrs *xmlDefAttrsPtr;
1072
struct _xmlDefAttrs {
1073
    int nbAttrs;  /* number of defaulted attributes on that element */
1074
    int maxAttrs;       /* the size of the array */
1075
#if __STDC_VERSION__ >= 199901L
1076
    /* Using a C99 flexible array member avoids UBSan errors. */
1077
    const xmlChar *values[]; /* array of localname/prefix/values/external */
1078
#else
1079
    const xmlChar *values[5];
1080
#endif
1081
};
1082
1083
/**
1084
 * xmlAttrNormalizeSpace:
1085
 * @src: the source string
1086
 * @dst: the target string
1087
 *
1088
 * Normalize the space in non CDATA attribute values:
1089
 * If the attribute type is not CDATA, then the XML processor MUST further
1090
 * process the normalized attribute value by discarding any leading and
1091
 * trailing space (#x20) characters, and by replacing sequences of space
1092
 * (#x20) characters by a single space (#x20) character.
1093
 * Note that the size of dst need to be at least src, and if one doesn't need
1094
 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1095
 * passing src as dst is just fine.
1096
 *
1097
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1098
 *         is needed.
1099
 */
1100
static xmlChar *
1101
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1102
163k
{
1103
163k
    if ((src == NULL) || (dst == NULL))
1104
0
        return(NULL);
1105
1106
203k
    while (*src == 0x20) src++;
1107
2.40M
    while (*src != 0) {
1108
2.24M
  if (*src == 0x20) {
1109
536k
      while (*src == 0x20) src++;
1110
149k
      if (*src != 0)
1111
131k
    *dst++ = 0x20;
1112
2.09M
  } else {
1113
2.09M
      *dst++ = *src++;
1114
2.09M
  }
1115
2.24M
    }
1116
163k
    *dst = 0;
1117
163k
    if (dst == src)
1118
138k
       return(NULL);
1119
24.6k
    return(dst);
1120
163k
}
1121
1122
/**
1123
 * xmlAttrNormalizeSpace2:
1124
 * @src: the source string
1125
 *
1126
 * Normalize the space in non CDATA attribute values, a slightly more complex
1127
 * front end to avoid allocation problems when running on attribute values
1128
 * coming from the input.
1129
 *
1130
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1131
 *         is needed.
1132
 */
1133
static const xmlChar *
1134
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1135
30.2k
{
1136
30.2k
    int i;
1137
30.2k
    int remove_head = 0;
1138
30.2k
    int need_realloc = 0;
1139
30.2k
    const xmlChar *cur;
1140
1141
30.2k
    if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1142
0
        return(NULL);
1143
30.2k
    i = *len;
1144
30.2k
    if (i <= 0)
1145
1.07k
        return(NULL);
1146
1147
29.1k
    cur = src;
1148
45.1k
    while (*cur == 0x20) {
1149
15.9k
        cur++;
1150
15.9k
  remove_head++;
1151
15.9k
    }
1152
881k
    while (*cur != 0) {
1153
857k
  if (*cur == 0x20) {
1154
60.6k
      cur++;
1155
60.6k
      if ((*cur == 0x20) || (*cur == 0)) {
1156
5.25k
          need_realloc = 1;
1157
5.25k
    break;
1158
5.25k
      }
1159
60.6k
  } else
1160
796k
      cur++;
1161
857k
    }
1162
29.1k
    if (need_realloc) {
1163
5.25k
        xmlChar *ret;
1164
1165
5.25k
  ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1166
5.25k
  if (ret == NULL) {
1167
0
      xmlErrMemory(ctxt, NULL);
1168
0
      return(NULL);
1169
0
  }
1170
5.25k
  xmlAttrNormalizeSpace(ret, ret);
1171
5.25k
  *len = strlen((const char *)ret);
1172
5.25k
        return(ret);
1173
23.9k
    } else if (remove_head) {
1174
791
        *len -= remove_head;
1175
791
        memmove(src, src + remove_head, 1 + *len);
1176
791
  return(src);
1177
791
    }
1178
23.1k
    return(NULL);
1179
29.1k
}
1180
1181
/**
1182
 * xmlAddDefAttrs:
1183
 * @ctxt:  an XML parser context
1184
 * @fullname:  the element fullname
1185
 * @fullattr:  the attribute fullname
1186
 * @value:  the attribute value
1187
 *
1188
 * Add a defaulted attribute for an element
1189
 */
1190
static void
1191
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1192
               const xmlChar *fullname,
1193
               const xmlChar *fullattr,
1194
200k
               const xmlChar *value) {
1195
200k
    xmlDefAttrsPtr defaults;
1196
200k
    int len;
1197
200k
    const xmlChar *name;
1198
200k
    const xmlChar *prefix;
1199
1200
    /*
1201
     * Allows to detect attribute redefinitions
1202
     */
1203
200k
    if (ctxt->attsSpecial != NULL) {
1204
172k
        if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1205
33.8k
      return;
1206
172k
    }
1207
1208
167k
    if (ctxt->attsDefault == NULL) {
1209
36.0k
        ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1210
36.0k
  if (ctxt->attsDefault == NULL)
1211
0
      goto mem_error;
1212
36.0k
    }
1213
1214
    /*
1215
     * split the element name into prefix:localname , the string found
1216
     * are within the DTD and then not associated to namespace names.
1217
     */
1218
167k
    name = xmlSplitQName3(fullname, &len);
1219
167k
    if (name == NULL) {
1220
154k
        name = xmlDictLookup(ctxt->dict, fullname, -1);
1221
154k
  prefix = NULL;
1222
154k
    } else {
1223
12.1k
        name = xmlDictLookup(ctxt->dict, name, -1);
1224
12.1k
  prefix = xmlDictLookup(ctxt->dict, fullname, len);
1225
12.1k
    }
1226
1227
    /*
1228
     * make sure there is some storage
1229
     */
1230
167k
    defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1231
167k
    if (defaults == NULL) {
1232
91.7k
        defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1233
91.7k
                     (4 * 5) * sizeof(const xmlChar *));
1234
91.7k
  if (defaults == NULL)
1235
0
      goto mem_error;
1236
91.7k
  defaults->nbAttrs = 0;
1237
91.7k
  defaults->maxAttrs = 4;
1238
91.7k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1239
91.7k
                          defaults, NULL) < 0) {
1240
0
      xmlFree(defaults);
1241
0
      goto mem_error;
1242
0
  }
1243
91.7k
    } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1244
4.70k
        xmlDefAttrsPtr temp;
1245
1246
4.70k
        temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1247
4.70k
           (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1248
4.70k
  if (temp == NULL)
1249
0
      goto mem_error;
1250
4.70k
  defaults = temp;
1251
4.70k
  defaults->maxAttrs *= 2;
1252
4.70k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1253
4.70k
                          defaults, NULL) < 0) {
1254
0
      xmlFree(defaults);
1255
0
      goto mem_error;
1256
0
  }
1257
4.70k
    }
1258
1259
    /*
1260
     * Split the element name into prefix:localname , the string found
1261
     * are within the DTD and hen not associated to namespace names.
1262
     */
1263
167k
    name = xmlSplitQName3(fullattr, &len);
1264
167k
    if (name == NULL) {
1265
141k
        name = xmlDictLookup(ctxt->dict, fullattr, -1);
1266
141k
  prefix = NULL;
1267
141k
    } else {
1268
25.2k
        name = xmlDictLookup(ctxt->dict, name, -1);
1269
25.2k
  prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1270
25.2k
    }
1271
1272
167k
    defaults->values[5 * defaults->nbAttrs] = name;
1273
167k
    defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1274
    /* intern the string and precompute the end */
1275
167k
    len = xmlStrlen(value);
1276
167k
    value = xmlDictLookup(ctxt->dict, value, len);
1277
167k
    if (value == NULL)
1278
0
        goto mem_error;
1279
167k
    defaults->values[5 * defaults->nbAttrs + 2] = value;
1280
167k
    defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1281
167k
    if (ctxt->external)
1282
39.8k
        defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1283
127k
    else
1284
127k
        defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1285
167k
    defaults->nbAttrs++;
1286
1287
167k
    return;
1288
1289
0
mem_error:
1290
0
    xmlErrMemory(ctxt, NULL);
1291
0
    return;
1292
167k
}
1293
1294
/**
1295
 * xmlAddSpecialAttr:
1296
 * @ctxt:  an XML parser context
1297
 * @fullname:  the element fullname
1298
 * @fullattr:  the attribute fullname
1299
 * @type:  the attribute type
1300
 *
1301
 * Register this attribute type
1302
 */
1303
static void
1304
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1305
      const xmlChar *fullname,
1306
      const xmlChar *fullattr,
1307
      int type)
1308
2.38M
{
1309
2.38M
    if (ctxt->attsSpecial == NULL) {
1310
63.5k
        ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1311
63.5k
  if (ctxt->attsSpecial == NULL)
1312
0
      goto mem_error;
1313
63.5k
    }
1314
1315
2.38M
    if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1316
480k
        return;
1317
1318
1.90M
    xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1319
1.90M
                     (void *) (ptrdiff_t) type);
1320
1.90M
    return;
1321
1322
0
mem_error:
1323
0
    xmlErrMemory(ctxt, NULL);
1324
0
    return;
1325
2.38M
}
1326
1327
/**
1328
 * xmlCleanSpecialAttrCallback:
1329
 *
1330
 * Removes CDATA attributes from the special attribute table
1331
 */
1332
static void
1333
xmlCleanSpecialAttrCallback(void *payload, void *data,
1334
                            const xmlChar *fullname, const xmlChar *fullattr,
1335
1.32M
                            const xmlChar *unused ATTRIBUTE_UNUSED) {
1336
1.32M
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1337
1338
1.32M
    if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1339
532k
        xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1340
532k
    }
1341
1.32M
}
1342
1343
/**
1344
 * xmlCleanSpecialAttr:
1345
 * @ctxt:  an XML parser context
1346
 *
1347
 * Trim the list of attributes defined to remove all those of type
1348
 * CDATA as they are not special. This call should be done when finishing
1349
 * to parse the DTD and before starting to parse the document root.
1350
 */
1351
static void
1352
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1353
276k
{
1354
276k
    if (ctxt->attsSpecial == NULL)
1355
227k
        return;
1356
1357
48.8k
    xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1358
1359
48.8k
    if (xmlHashSize(ctxt->attsSpecial) == 0) {
1360
15.2k
        xmlHashFree(ctxt->attsSpecial, NULL);
1361
15.2k
        ctxt->attsSpecial = NULL;
1362
15.2k
    }
1363
48.8k
    return;
1364
276k
}
1365
1366
/**
1367
 * xmlCheckLanguageID:
1368
 * @lang:  pointer to the string value
1369
 *
1370
 * Checks that the value conforms to the LanguageID production:
1371
 *
1372
 * NOTE: this is somewhat deprecated, those productions were removed from
1373
 *       the XML Second edition.
1374
 *
1375
 * [33] LanguageID ::= Langcode ('-' Subcode)*
1376
 * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1377
 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1378
 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1379
 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1380
 * [38] Subcode ::= ([a-z] | [A-Z])+
1381
 *
1382
 * The current REC reference the successors of RFC 1766, currently 5646
1383
 *
1384
 * http://www.rfc-editor.org/rfc/rfc5646.txt
1385
 * langtag       = language
1386
 *                 ["-" script]
1387
 *                 ["-" region]
1388
 *                 *("-" variant)
1389
 *                 *("-" extension)
1390
 *                 ["-" privateuse]
1391
 * language      = 2*3ALPHA            ; shortest ISO 639 code
1392
 *                 ["-" extlang]       ; sometimes followed by
1393
 *                                     ; extended language subtags
1394
 *               / 4ALPHA              ; or reserved for future use
1395
 *               / 5*8ALPHA            ; or registered language subtag
1396
 *
1397
 * extlang       = 3ALPHA              ; selected ISO 639 codes
1398
 *                 *2("-" 3ALPHA)      ; permanently reserved
1399
 *
1400
 * script        = 4ALPHA              ; ISO 15924 code
1401
 *
1402
 * region        = 2ALPHA              ; ISO 3166-1 code
1403
 *               / 3DIGIT              ; UN M.49 code
1404
 *
1405
 * variant       = 5*8alphanum         ; registered variants
1406
 *               / (DIGIT 3alphanum)
1407
 *
1408
 * extension     = singleton 1*("-" (2*8alphanum))
1409
 *
1410
 *                                     ; Single alphanumerics
1411
 *                                     ; "x" reserved for private use
1412
 * singleton     = DIGIT               ; 0 - 9
1413
 *               / %x41-57             ; A - W
1414
 *               / %x59-5A             ; Y - Z
1415
 *               / %x61-77             ; a - w
1416
 *               / %x79-7A             ; y - z
1417
 *
1418
 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1419
 * The parser below doesn't try to cope with extension or privateuse
1420
 * that could be added but that's not interoperable anyway
1421
 *
1422
 * Returns 1 if correct 0 otherwise
1423
 **/
1424
int
1425
xmlCheckLanguageID(const xmlChar * lang)
1426
29.1k
{
1427
29.1k
    const xmlChar *cur = lang, *nxt;
1428
1429
29.1k
    if (cur == NULL)
1430
641
        return (0);
1431
28.5k
    if (((cur[0] == 'i') && (cur[1] == '-')) ||
1432
28.5k
        ((cur[0] == 'I') && (cur[1] == '-')) ||
1433
28.5k
        ((cur[0] == 'x') && (cur[1] == '-')) ||
1434
28.5k
        ((cur[0] == 'X') && (cur[1] == '-'))) {
1435
        /*
1436
         * Still allow IANA code and user code which were coming
1437
         * from the previous version of the XML-1.0 specification
1438
         * it's deprecated but we should not fail
1439
         */
1440
2.69k
        cur += 2;
1441
15.3k
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1442
15.3k
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
1443
12.6k
            cur++;
1444
2.69k
        return(cur[0] == 0);
1445
2.69k
    }
1446
25.8k
    nxt = cur;
1447
141k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1448
141k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1449
115k
           nxt++;
1450
25.8k
    if (nxt - cur >= 4) {
1451
        /*
1452
         * Reserved
1453
         */
1454
1.82k
        if ((nxt - cur > 8) || (nxt[0] != 0))
1455
1.43k
            return(0);
1456
390
        return(1);
1457
1.82k
    }
1458
23.9k
    if (nxt - cur < 2)
1459
2.64k
        return(0);
1460
    /* we got an ISO 639 code */
1461
21.3k
    if (nxt[0] == 0)
1462
7.37k
        return(1);
1463
13.9k
    if (nxt[0] != '-')
1464
1.42k
        return(0);
1465
1466
12.5k
    nxt++;
1467
12.5k
    cur = nxt;
1468
    /* now we can have extlang or script or region or variant */
1469
12.5k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1470
1.07k
        goto region_m49;
1471
1472
57.8k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1473
57.8k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1474
46.3k
           nxt++;
1475
11.4k
    if (nxt - cur == 4)
1476
2.90k
        goto script;
1477
8.58k
    if (nxt - cur == 2)
1478
2.60k
        goto region;
1479
5.97k
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1480
1.04k
        goto variant;
1481
4.92k
    if (nxt - cur != 3)
1482
1.39k
        return(0);
1483
    /* we parsed an extlang */
1484
3.53k
    if (nxt[0] == 0)
1485
363
        return(1);
1486
3.17k
    if (nxt[0] != '-')
1487
542
        return(0);
1488
1489
2.62k
    nxt++;
1490
2.62k
    cur = nxt;
1491
    /* now we can have script or region or variant */
1492
2.62k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1493
573
        goto region_m49;
1494
1495
23.5k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1496
23.5k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1497
21.4k
           nxt++;
1498
2.05k
    if (nxt - cur == 2)
1499
347
        goto region;
1500
1.70k
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1501
456
        goto variant;
1502
1.25k
    if (nxt - cur != 4)
1503
780
        return(0);
1504
    /* we parsed a script */
1505
3.37k
script:
1506
3.37k
    if (nxt[0] == 0)
1507
423
        return(1);
1508
2.94k
    if (nxt[0] != '-')
1509
811
        return(0);
1510
1511
2.13k
    nxt++;
1512
2.13k
    cur = nxt;
1513
    /* now we can have region or variant */
1514
2.13k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1515
386
        goto region_m49;
1516
1517
25.1k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1518
25.1k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1519
23.4k
           nxt++;
1520
1521
1.75k
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1522
411
        goto variant;
1523
1.34k
    if (nxt - cur != 2)
1524
872
        return(0);
1525
    /* we parsed a region */
1526
4.15k
region:
1527
4.15k
    if (nxt[0] == 0)
1528
2.06k
        return(1);
1529
2.09k
    if (nxt[0] != '-')
1530
1.15k
        return(0);
1531
1532
934
    nxt++;
1533
934
    cur = nxt;
1534
    /* now we can just have a variant */
1535
12.3k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1536
12.3k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1537
11.4k
           nxt++;
1538
1539
934
    if ((nxt - cur < 5) || (nxt - cur > 8))
1540
530
        return(0);
1541
1542
    /* we parsed a variant */
1543
2.31k
variant:
1544
2.31k
    if (nxt[0] == 0)
1545
655
        return(1);
1546
1.66k
    if (nxt[0] != '-')
1547
1.18k
        return(0);
1548
    /* extensions and private use subtags not checked */
1549
472
    return (1);
1550
1551
2.03k
region_m49:
1552
2.03k
    if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1553
2.03k
        ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1554
729
        nxt += 3;
1555
729
        goto region;
1556
729
    }
1557
1.30k
    return(0);
1558
2.03k
}
1559
1560
/************************************************************************
1561
 *                  *
1562
 *    Parser stacks related functions and macros    *
1563
 *                  *
1564
 ************************************************************************/
1565
1566
static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1567
                                            const xmlChar ** str);
1568
1569
#ifdef SAX2
1570
/**
1571
 * nsPush:
1572
 * @ctxt:  an XML parser context
1573
 * @prefix:  the namespace prefix or NULL
1574
 * @URL:  the namespace name
1575
 *
1576
 * Pushes a new parser namespace on top of the ns stack
1577
 *
1578
 * Returns -1 in case of error, -2 if the namespace should be discarded
1579
 *     and the index in the stack otherwise.
1580
 */
1581
static int
1582
nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1583
164k
{
1584
164k
    if (ctxt->options & XML_PARSE_NSCLEAN) {
1585
84.0k
        int i;
1586
167k
  for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1587
109k
      if (ctxt->nsTab[i] == prefix) {
1588
    /* in scope */
1589
26.3k
          if (ctxt->nsTab[i + 1] == URL)
1590
9.62k
        return(-2);
1591
    /* out of scope keep it */
1592
16.7k
    break;
1593
26.3k
      }
1594
109k
  }
1595
84.0k
    }
1596
154k
    if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1597
58.8k
  ctxt->nsMax = 10;
1598
58.8k
  ctxt->nsNr = 0;
1599
58.8k
  ctxt->nsTab = (const xmlChar **)
1600
58.8k
                xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1601
58.8k
  if (ctxt->nsTab == NULL) {
1602
0
      xmlErrMemory(ctxt, NULL);
1603
0
      ctxt->nsMax = 0;
1604
0
            return (-1);
1605
0
  }
1606
95.6k
    } else if (ctxt->nsNr >= ctxt->nsMax) {
1607
5.01k
        const xmlChar ** tmp;
1608
5.01k
        ctxt->nsMax *= 2;
1609
5.01k
        tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1610
5.01k
            ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1611
5.01k
        if (tmp == NULL) {
1612
0
            xmlErrMemory(ctxt, NULL);
1613
0
      ctxt->nsMax /= 2;
1614
0
            return (-1);
1615
0
        }
1616
5.01k
  ctxt->nsTab = tmp;
1617
5.01k
    }
1618
154k
    ctxt->nsTab[ctxt->nsNr++] = prefix;
1619
154k
    ctxt->nsTab[ctxt->nsNr++] = URL;
1620
154k
    return (ctxt->nsNr);
1621
154k
}
1622
/**
1623
 * nsPop:
1624
 * @ctxt: an XML parser context
1625
 * @nr:  the number to pop
1626
 *
1627
 * Pops the top @nr parser prefix/namespace from the ns stack
1628
 *
1629
 * Returns the number of namespaces removed
1630
 */
1631
static int
1632
nsPop(xmlParserCtxtPtr ctxt, int nr)
1633
55.2k
{
1634
55.2k
    int i;
1635
1636
55.2k
    if (ctxt->nsTab == NULL) return(0);
1637
55.2k
    if (ctxt->nsNr < nr) {
1638
0
        xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1639
0
        nr = ctxt->nsNr;
1640
0
    }
1641
55.2k
    if (ctxt->nsNr <= 0)
1642
0
        return (0);
1643
1644
207k
    for (i = 0;i < nr;i++) {
1645
152k
         ctxt->nsNr--;
1646
152k
   ctxt->nsTab[ctxt->nsNr] = NULL;
1647
152k
    }
1648
55.2k
    return(nr);
1649
55.2k
}
1650
#endif
1651
1652
static int
1653
132k
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1654
132k
    const xmlChar **atts;
1655
132k
    int *attallocs;
1656
132k
    int maxatts;
1657
1658
132k
    if (nr + 5 > ctxt->maxatts) {
1659
132k
  maxatts = ctxt->maxatts == 0 ? 55 : (nr + 5) * 2;
1660
132k
  atts = (const xmlChar **) xmlMalloc(
1661
132k
             maxatts * sizeof(const xmlChar *));
1662
132k
  if (atts == NULL) goto mem_error;
1663
132k
  attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1664
132k
                               (maxatts / 5) * sizeof(int));
1665
132k
  if (attallocs == NULL) {
1666
0
            xmlFree(atts);
1667
0
            goto mem_error;
1668
0
        }
1669
132k
        if (ctxt->maxatts > 0)
1670
800
            memcpy(atts, ctxt->atts, ctxt->maxatts * sizeof(const xmlChar *));
1671
132k
        xmlFree(ctxt->atts);
1672
132k
  ctxt->atts = atts;
1673
132k
  ctxt->attallocs = attallocs;
1674
132k
  ctxt->maxatts = maxatts;
1675
132k
    }
1676
132k
    return(ctxt->maxatts);
1677
0
mem_error:
1678
0
    xmlErrMemory(ctxt, NULL);
1679
0
    return(-1);
1680
132k
}
1681
1682
/**
1683
 * inputPush:
1684
 * @ctxt:  an XML parser context
1685
 * @value:  the parser input
1686
 *
1687
 * Pushes a new parser input on top of the input stack
1688
 *
1689
 * Returns -1 in case of error, the index in the stack otherwise
1690
 */
1691
int
1692
inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1693
44.5M
{
1694
44.5M
    if ((ctxt == NULL) || (value == NULL))
1695
0
        return(-1);
1696
44.5M
    if (ctxt->inputNr >= ctxt->inputMax) {
1697
2.14k
        size_t newSize = ctxt->inputMax * 2;
1698
2.14k
        xmlParserInputPtr *tmp;
1699
1700
2.14k
        tmp = (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1701
2.14k
                                               newSize * sizeof(*tmp));
1702
2.14k
        if (tmp == NULL) {
1703
0
            xmlErrMemory(ctxt, NULL);
1704
0
            return (-1);
1705
0
        }
1706
2.14k
        ctxt->inputTab = tmp;
1707
2.14k
        ctxt->inputMax = newSize;
1708
2.14k
    }
1709
44.5M
    ctxt->inputTab[ctxt->inputNr] = value;
1710
44.5M
    ctxt->input = value;
1711
44.5M
    return (ctxt->inputNr++);
1712
44.5M
}
1713
/**
1714
 * inputPop:
1715
 * @ctxt: an XML parser context
1716
 *
1717
 * Pops the top parser input from the input stack
1718
 *
1719
 * Returns the input just removed
1720
 */
1721
xmlParserInputPtr
1722
inputPop(xmlParserCtxtPtr ctxt)
1723
47.6M
{
1724
47.6M
    xmlParserInputPtr ret;
1725
1726
47.6M
    if (ctxt == NULL)
1727
0
        return(NULL);
1728
47.6M
    if (ctxt->inputNr <= 0)
1729
3.14M
        return (NULL);
1730
44.5M
    ctxt->inputNr--;
1731
44.5M
    if (ctxt->inputNr > 0)
1732
43.7M
        ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1733
799k
    else
1734
799k
        ctxt->input = NULL;
1735
44.5M
    ret = ctxt->inputTab[ctxt->inputNr];
1736
44.5M
    ctxt->inputTab[ctxt->inputNr] = NULL;
1737
44.5M
    return (ret);
1738
47.6M
}
1739
/**
1740
 * nodePush:
1741
 * @ctxt:  an XML parser context
1742
 * @value:  the element node
1743
 *
1744
 * Pushes a new element node on top of the node stack
1745
 *
1746
 * Returns -1 in case of error, the index in the stack otherwise
1747
 */
1748
int
1749
nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1750
11.4M
{
1751
11.4M
    if (ctxt == NULL) return(0);
1752
11.4M
    if (ctxt->nodeNr >= ctxt->nodeMax) {
1753
9.44k
        xmlNodePtr *tmp;
1754
1755
9.44k
  tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1756
9.44k
                                      ctxt->nodeMax * 2 *
1757
9.44k
                                      sizeof(ctxt->nodeTab[0]));
1758
9.44k
        if (tmp == NULL) {
1759
0
            xmlErrMemory(ctxt, NULL);
1760
0
            return (-1);
1761
0
        }
1762
9.44k
        ctxt->nodeTab = tmp;
1763
9.44k
  ctxt->nodeMax *= 2;
1764
9.44k
    }
1765
11.4M
    if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1766
11.4M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1767
0
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1768
0
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1769
0
        xmlParserMaxDepth);
1770
0
  xmlHaltParser(ctxt);
1771
0
  return(-1);
1772
0
    }
1773
11.4M
    ctxt->nodeTab[ctxt->nodeNr] = value;
1774
11.4M
    ctxt->node = value;
1775
11.4M
    return (ctxt->nodeNr++);
1776
11.4M
}
1777
1778
/**
1779
 * nodePop:
1780
 * @ctxt: an XML parser context
1781
 *
1782
 * Pops the top element node from the node stack
1783
 *
1784
 * Returns the node just removed
1785
 */
1786
xmlNodePtr
1787
nodePop(xmlParserCtxtPtr ctxt)
1788
10.8M
{
1789
10.8M
    xmlNodePtr ret;
1790
1791
10.8M
    if (ctxt == NULL) return(NULL);
1792
10.8M
    if (ctxt->nodeNr <= 0)
1793
146k
        return (NULL);
1794
10.7M
    ctxt->nodeNr--;
1795
10.7M
    if (ctxt->nodeNr > 0)
1796
10.3M
        ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1797
433k
    else
1798
433k
        ctxt->node = NULL;
1799
10.7M
    ret = ctxt->nodeTab[ctxt->nodeNr];
1800
10.7M
    ctxt->nodeTab[ctxt->nodeNr] = NULL;
1801
10.7M
    return (ret);
1802
10.8M
}
1803
1804
/**
1805
 * nameNsPush:
1806
 * @ctxt:  an XML parser context
1807
 * @value:  the element name
1808
 * @prefix:  the element prefix
1809
 * @URI:  the element namespace name
1810
 * @line:  the current line number for error messages
1811
 * @nsNr:  the number of namespaces pushed on the namespace table
1812
 *
1813
 * Pushes a new element name/prefix/URL on top of the name stack
1814
 *
1815
 * Returns -1 in case of error, the index in the stack otherwise
1816
 */
1817
static int
1818
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1819
           const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
1820
9.84M
{
1821
9.84M
    xmlStartTag *tag;
1822
1823
9.84M
    if (ctxt->nameNr >= ctxt->nameMax) {
1824
22.7k
        const xmlChar * *tmp;
1825
22.7k
        xmlStartTag *tmp2;
1826
22.7k
        ctxt->nameMax *= 2;
1827
22.7k
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1828
22.7k
                                    ctxt->nameMax *
1829
22.7k
                                    sizeof(ctxt->nameTab[0]));
1830
22.7k
        if (tmp == NULL) {
1831
0
      ctxt->nameMax /= 2;
1832
0
      goto mem_error;
1833
0
        }
1834
22.7k
  ctxt->nameTab = tmp;
1835
22.7k
        tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
1836
22.7k
                                    ctxt->nameMax *
1837
22.7k
                                    sizeof(ctxt->pushTab[0]));
1838
22.7k
        if (tmp2 == NULL) {
1839
0
      ctxt->nameMax /= 2;
1840
0
      goto mem_error;
1841
0
        }
1842
22.7k
  ctxt->pushTab = tmp2;
1843
9.82M
    } else if (ctxt->pushTab == NULL) {
1844
415k
        ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
1845
415k
                                            sizeof(ctxt->pushTab[0]));
1846
415k
        if (ctxt->pushTab == NULL)
1847
0
            goto mem_error;
1848
415k
    }
1849
9.84M
    ctxt->nameTab[ctxt->nameNr] = value;
1850
9.84M
    ctxt->name = value;
1851
9.84M
    tag = &ctxt->pushTab[ctxt->nameNr];
1852
9.84M
    tag->prefix = prefix;
1853
9.84M
    tag->URI = URI;
1854
9.84M
    tag->line = line;
1855
9.84M
    tag->nsNr = nsNr;
1856
9.84M
    return (ctxt->nameNr++);
1857
0
mem_error:
1858
0
    xmlErrMemory(ctxt, NULL);
1859
0
    return (-1);
1860
9.84M
}
1861
#ifdef LIBXML_PUSH_ENABLED
1862
/**
1863
 * nameNsPop:
1864
 * @ctxt: an XML parser context
1865
 *
1866
 * Pops the top element/prefix/URI name from the name stack
1867
 *
1868
 * Returns the name just removed
1869
 */
1870
static const xmlChar *
1871
nameNsPop(xmlParserCtxtPtr ctxt)
1872
1.99M
{
1873
1.99M
    const xmlChar *ret;
1874
1875
1.99M
    if (ctxt->nameNr <= 0)
1876
0
        return (NULL);
1877
1.99M
    ctxt->nameNr--;
1878
1.99M
    if (ctxt->nameNr > 0)
1879
1.95M
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1880
36.2k
    else
1881
36.2k
        ctxt->name = NULL;
1882
1.99M
    ret = ctxt->nameTab[ctxt->nameNr];
1883
1.99M
    ctxt->nameTab[ctxt->nameNr] = NULL;
1884
1.99M
    return (ret);
1885
1.99M
}
1886
#endif /* LIBXML_PUSH_ENABLED */
1887
1888
/**
1889
 * namePush:
1890
 * @ctxt:  an XML parser context
1891
 * @value:  the element name
1892
 *
1893
 * Pushes a new element name on top of the name stack
1894
 *
1895
 * Returns -1 in case of error, the index in the stack otherwise
1896
 */
1897
int
1898
namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1899
0
{
1900
0
    if (ctxt == NULL) return (-1);
1901
1902
0
    if (ctxt->nameNr >= ctxt->nameMax) {
1903
0
        const xmlChar * *tmp;
1904
0
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1905
0
                                    ctxt->nameMax * 2 *
1906
0
                                    sizeof(ctxt->nameTab[0]));
1907
0
        if (tmp == NULL) {
1908
0
      goto mem_error;
1909
0
        }
1910
0
  ctxt->nameTab = tmp;
1911
0
        ctxt->nameMax *= 2;
1912
0
    }
1913
0
    ctxt->nameTab[ctxt->nameNr] = value;
1914
0
    ctxt->name = value;
1915
0
    return (ctxt->nameNr++);
1916
0
mem_error:
1917
0
    xmlErrMemory(ctxt, NULL);
1918
0
    return (-1);
1919
0
}
1920
/**
1921
 * namePop:
1922
 * @ctxt: an XML parser context
1923
 *
1924
 * Pops the top element name from the name stack
1925
 *
1926
 * Returns the name just removed
1927
 */
1928
const xmlChar *
1929
namePop(xmlParserCtxtPtr ctxt)
1930
6.93M
{
1931
6.93M
    const xmlChar *ret;
1932
1933
6.93M
    if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1934
0
        return (NULL);
1935
6.93M
    ctxt->nameNr--;
1936
6.93M
    if (ctxt->nameNr > 0)
1937
6.47M
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1938
467k
    else
1939
467k
        ctxt->name = NULL;
1940
6.93M
    ret = ctxt->nameTab[ctxt->nameNr];
1941
6.93M
    ctxt->nameTab[ctxt->nameNr] = NULL;
1942
6.93M
    return (ret);
1943
6.93M
}
1944
1945
13.3M
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1946
13.3M
    if (ctxt->spaceNr >= ctxt->spaceMax) {
1947
15.5k
        int *tmp;
1948
1949
15.5k
  ctxt->spaceMax *= 2;
1950
15.5k
        tmp = (int *) xmlRealloc(ctxt->spaceTab,
1951
15.5k
                           ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1952
15.5k
        if (tmp == NULL) {
1953
0
      xmlErrMemory(ctxt, NULL);
1954
0
      ctxt->spaceMax /=2;
1955
0
      return(-1);
1956
0
  }
1957
15.5k
  ctxt->spaceTab = tmp;
1958
15.5k
    }
1959
13.3M
    ctxt->spaceTab[ctxt->spaceNr] = val;
1960
13.3M
    ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1961
13.3M
    return(ctxt->spaceNr++);
1962
13.3M
}
1963
1964
12.8M
static int spacePop(xmlParserCtxtPtr ctxt) {
1965
12.8M
    int ret;
1966
12.8M
    if (ctxt->spaceNr <= 0) return(0);
1967
12.8M
    ctxt->spaceNr--;
1968
12.8M
    if (ctxt->spaceNr > 0)
1969
12.7M
  ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1970
70.7k
    else
1971
70.7k
        ctxt->space = &ctxt->spaceTab[0];
1972
12.8M
    ret = ctxt->spaceTab[ctxt->spaceNr];
1973
12.8M
    ctxt->spaceTab[ctxt->spaceNr] = -1;
1974
12.8M
    return(ret);
1975
12.8M
}
1976
1977
/*
1978
 * Macros for accessing the content. Those should be used only by the parser,
1979
 * and not exported.
1980
 *
1981
 * Dirty macros, i.e. one often need to make assumption on the context to
1982
 * use them
1983
 *
1984
 *   CUR_PTR return the current pointer to the xmlChar to be parsed.
1985
 *           To be used with extreme caution since operations consuming
1986
 *           characters may move the input buffer to a different location !
1987
 *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
1988
 *           This should be used internally by the parser
1989
 *           only to compare to ASCII values otherwise it would break when
1990
 *           running with UTF-8 encoding.
1991
 *   RAW     same as CUR but in the input buffer, bypass any token
1992
 *           extraction that may have been done
1993
 *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
1994
 *           to compare on ASCII based substring.
1995
 *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1996
 *           strings without newlines within the parser.
1997
 *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1998
 *           defined char within the parser.
1999
 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2000
 *
2001
 *   NEXT    Skip to the next character, this does the proper decoding
2002
 *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2003
 *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2004
 *   CUR_CHAR(l) returns the current unicode character (int), set l
2005
 *           to the number of xmlChars used for the encoding [0-5].
2006
 *   CUR_SCHAR  same but operate on a string instead of the context
2007
 *   COPY_BUF  copy the current unicode char to the target buffer, increment
2008
 *            the index
2009
 *   GROW, SHRINK  handling of input buffers
2010
 */
2011
2012
601M
#define RAW (*ctxt->input->cur)
2013
478M
#define CUR (*ctxt->input->cur)
2014
450M
#define NXT(val) ctxt->input->cur[(val)]
2015
28.0M
#define CUR_PTR ctxt->input->cur
2016
685k
#define BASE_PTR ctxt->input->base
2017
2018
#define CMP4( s, c1, c2, c3, c4 ) \
2019
109M
  ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2020
55.1M
    ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2021
#define CMP5( s, c1, c2, c3, c4, c5 ) \
2022
100M
  ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2023
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2024
85.1M
  ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2025
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2026
72.3M
  ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2027
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2028
59.4M
  ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2029
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2030
26.4M
  ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2031
26.4M
    ((unsigned char *) s)[ 8 ] == c9 )
2032
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2033
160k
  ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2034
160k
    ((unsigned char *) s)[ 9 ] == c10 )
2035
2036
193M
#define SKIP(val) do {             \
2037
193M
    ctxt->input->cur += (val),ctxt->input->col+=(val);      \
2038
193M
    if (*ctxt->input->cur == 0)           \
2039
193M
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);     \
2040
193M
  } while (0)
2041
2042
66.3k
#define SKIPL(val) do {             \
2043
66.3k
    int skipl;                \
2044
15.9M
    for(skipl=0; skipl<val; skipl++) {         \
2045
15.8M
  if (*(ctxt->input->cur) == '\n') {       \
2046
314k
  ctxt->input->line++; ctxt->input->col = 1;      \
2047
15.5M
  } else ctxt->input->col++;         \
2048
15.8M
  ctxt->input->cur++;           \
2049
15.8M
    }                  \
2050
66.3k
    if (*ctxt->input->cur == 0)           \
2051
66.3k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);     \
2052
66.3k
  } while (0)
2053
2054
183M
#define SHRINK if ((ctxt->progressive == 0) &&       \
2055
183M
       (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2056
183M
       (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2057
183M
  xmlSHRINK (ctxt);
2058
2059
2.18M
static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2060
    /* Don't shrink memory buffers. */
2061
2.18M
    if ((ctxt->input->buf) &&
2062
2.18M
        ((ctxt->input->buf->encoder) || (ctxt->input->buf->readcallback)))
2063
10.9k
        xmlParserInputShrink(ctxt->input);
2064
2.18M
    if (*ctxt->input->cur == 0)
2065
87.8k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2066
2.18M
}
2067
2068
604M
#define GROW if ((ctxt->progressive == 0) &&       \
2069
604M
     (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2070
604M
  xmlGROW (ctxt);
2071
2072
109M
static void xmlGROW (xmlParserCtxtPtr ctxt) {
2073
109M
    ptrdiff_t curEnd = ctxt->input->end - ctxt->input->cur;
2074
109M
    ptrdiff_t curBase = ctxt->input->cur - ctxt->input->base;
2075
2076
109M
    if (((curEnd > XML_MAX_LOOKUP_LIMIT) ||
2077
109M
         (curBase > XML_MAX_LOOKUP_LIMIT)) &&
2078
109M
         ((ctxt->input->buf) &&
2079
0
          (ctxt->input->buf->readcallback != NULL)) &&
2080
109M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2081
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2082
0
        xmlHaltParser(ctxt);
2083
0
  return;
2084
0
    }
2085
109M
    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2086
109M
    if ((ctxt->input->cur > ctxt->input->end) ||
2087
109M
        (ctxt->input->cur < ctxt->input->base)) {
2088
0
        xmlHaltParser(ctxt);
2089
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2090
0
  return;
2091
0
    }
2092
109M
    if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0))
2093
1.91M
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2094
109M
}
2095
2096
156M
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2097
2098
299M
#define NEXT xmlNextChar(ctxt)
2099
2100
22.6M
#define NEXT1 {               \
2101
22.6M
  ctxt->input->col++;           \
2102
22.6M
  ctxt->input->cur++;           \
2103
22.6M
  if (*ctxt->input->cur == 0)         \
2104
22.6M
      xmlParserInputGrow(ctxt->input, INPUT_CHUNK);   \
2105
22.6M
    }
2106
2107
295M
#define NEXTL(l) do {             \
2108
295M
    if (*(ctxt->input->cur) == '\n') {         \
2109
5.24M
  ctxt->input->line++; ctxt->input->col = 1;      \
2110
290M
    } else ctxt->input->col++;           \
2111
295M
    ctxt->input->cur += l;        \
2112
295M
  } while (0)
2113
2114
307M
#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2115
5.80G
#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2116
2117
#define COPY_BUF(l,b,i,v)           \
2118
5.60G
    if (l == 1) b[i++] = v;           \
2119
5.60G
    else i += xmlCopyCharMultiByte(&b[i],v)
2120
2121
/**
2122
 * xmlSkipBlankChars:
2123
 * @ctxt:  the XML parser context
2124
 *
2125
 * skip all blanks character found at that point in the input streams.
2126
 * It pops up finished entities in the process if allowable at that point.
2127
 *
2128
 * Returns the number of space chars skipped
2129
 */
2130
2131
int
2132
156M
xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2133
156M
    int res = 0;
2134
2135
    /*
2136
     * It's Okay to use CUR/NEXT here since all the blanks are on
2137
     * the ASCII range.
2138
     */
2139
156M
    if (((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) ||
2140
156M
        (ctxt->instate == XML_PARSER_START)) {
2141
59.0M
  const xmlChar *cur;
2142
  /*
2143
   * if we are in the document content, go really fast
2144
   */
2145
59.0M
  cur = ctxt->input->cur;
2146
59.0M
  while (IS_BLANK_CH(*cur)) {
2147
25.3M
      if (*cur == '\n') {
2148
1.16M
    ctxt->input->line++; ctxt->input->col = 1;
2149
24.2M
      } else {
2150
24.2M
    ctxt->input->col++;
2151
24.2M
      }
2152
25.3M
      cur++;
2153
25.3M
      if (res < INT_MAX)
2154
25.3M
    res++;
2155
25.3M
      if (*cur == 0) {
2156
91.3k
    ctxt->input->cur = cur;
2157
91.3k
    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2158
91.3k
    cur = ctxt->input->cur;
2159
91.3k
      }
2160
25.3M
  }
2161
59.0M
  ctxt->input->cur = cur;
2162
96.9M
    } else {
2163
96.9M
        int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2164
2165
302M
  while (ctxt->instate != XML_PARSER_EOF) {
2166
302M
            if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2167
109M
    NEXT;
2168
193M
      } else if (CUR == '%') {
2169
                /*
2170
                 * Need to handle support of entities branching here
2171
                 */
2172
53.0M
          if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2173
629k
                    break;
2174
52.4M
          xmlParsePEReference(ctxt);
2175
140M
            } else if (CUR == 0) {
2176
43.7M
                unsigned long consumed;
2177
43.7M
                xmlEntityPtr ent;
2178
2179
43.7M
                if (ctxt->inputNr <= 1)
2180
41.6k
                    break;
2181
2182
43.7M
                consumed = ctxt->input->consumed;
2183
43.7M
                xmlSaturatedAddSizeT(&consumed,
2184
43.7M
                                     ctxt->input->cur - ctxt->input->base);
2185
2186
                /*
2187
                 * Add to sizeentities when parsing an external entity
2188
                 * for the first time.
2189
                 */
2190
43.7M
                ent = ctxt->input->entity;
2191
43.7M
                if ((ent->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2192
43.7M
                    ((ent->flags & XML_ENT_PARSED) == 0)) {
2193
17.7k
                    ent->flags |= XML_ENT_PARSED;
2194
2195
17.7k
                    xmlSaturatedAdd(&ctxt->sizeentities, consumed);
2196
17.7k
                }
2197
2198
43.7M
                xmlParserEntityCheck(ctxt, consumed);
2199
2200
43.7M
                xmlPopInput(ctxt);
2201
96.3M
            } else {
2202
96.3M
                break;
2203
96.3M
            }
2204
2205
            /*
2206
             * Also increase the counter when entering or exiting a PERef.
2207
             * The spec says: "When a parameter-entity reference is recognized
2208
             * in the DTD and included, its replacement text MUST be enlarged
2209
             * by the attachment of one leading and one following space (#x20)
2210
             * character."
2211
             */
2212
205M
      if (res < INT_MAX)
2213
205M
    res++;
2214
205M
        }
2215
96.9M
    }
2216
156M
    return(res);
2217
156M
}
2218
2219
/************************************************************************
2220
 *                  *
2221
 *    Commodity functions to handle entities      *
2222
 *                  *
2223
 ************************************************************************/
2224
2225
/**
2226
 * xmlPopInput:
2227
 * @ctxt:  an XML parser context
2228
 *
2229
 * xmlPopInput: the current input pointed by ctxt->input came to an end
2230
 *          pop it and return the next char.
2231
 *
2232
 * Returns the current xmlChar in the parser context
2233
 */
2234
xmlChar
2235
43.7M
xmlPopInput(xmlParserCtxtPtr ctxt) {
2236
43.7M
    xmlParserInputPtr input;
2237
2238
43.7M
    if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2239
43.7M
    if (xmlParserDebugEntities)
2240
0
  xmlGenericError(xmlGenericErrorContext,
2241
0
    "Popping input %d\n", ctxt->inputNr);
2242
43.7M
    if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2243
43.7M
        (ctxt->instate != XML_PARSER_EOF))
2244
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2245
0
                    "Unfinished entity outside the DTD");
2246
43.7M
    input = inputPop(ctxt);
2247
43.7M
    if (input->entity != NULL)
2248
43.7M
        input->entity->flags &= ~XML_ENT_EXPANDING;
2249
43.7M
    xmlFreeInputStream(input);
2250
43.7M
    if (*ctxt->input->cur == 0)
2251
20.6M
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2252
43.7M
    return(CUR);
2253
43.7M
}
2254
2255
/**
2256
 * xmlPushInput:
2257
 * @ctxt:  an XML parser context
2258
 * @input:  an XML parser input fragment (entity, XML fragment ...).
2259
 *
2260
 * xmlPushInput: switch to a new input stream which is stacked on top
2261
 *               of the previous one(s).
2262
 * Returns -1 in case of error or the index in the input stack
2263
 */
2264
int
2265
43.7M
xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2266
43.7M
    int ret;
2267
43.7M
    if (input == NULL) return(-1);
2268
2269
43.7M
    if (xmlParserDebugEntities) {
2270
0
  if ((ctxt->input != NULL) && (ctxt->input->filename))
2271
0
      xmlGenericError(xmlGenericErrorContext,
2272
0
        "%s(%d): ", ctxt->input->filename,
2273
0
        ctxt->input->line);
2274
0
  xmlGenericError(xmlGenericErrorContext,
2275
0
    "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2276
0
    }
2277
43.7M
    if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2278
43.7M
        (ctxt->inputNr > 100)) {
2279
0
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2280
0
        while (ctxt->inputNr > 1)
2281
0
            xmlFreeInputStream(inputPop(ctxt));
2282
0
  return(-1);
2283
0
    }
2284
43.7M
    ret = inputPush(ctxt, input);
2285
43.7M
    if (ctxt->instate == XML_PARSER_EOF)
2286
0
        return(-1);
2287
43.7M
    GROW;
2288
43.7M
    return(ret);
2289
43.7M
}
2290
2291
/**
2292
 * xmlParseCharRef:
2293
 * @ctxt:  an XML parser context
2294
 *
2295
 * DEPRECATED: Internal function, don't use.
2296
 *
2297
 * Parse a numeric character reference. Always consumes '&'.
2298
 *
2299
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2300
 *                  '&#x' [0-9a-fA-F]+ ';'
2301
 *
2302
 * [ WFC: Legal Character ]
2303
 * Characters referred to using character references must match the
2304
 * production for Char.
2305
 *
2306
 * Returns the value parsed (as an int), 0 in case of error
2307
 */
2308
int
2309
445k
xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2310
445k
    int val = 0;
2311
445k
    int count = 0;
2312
2313
    /*
2314
     * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2315
     */
2316
445k
    if ((RAW == '&') && (NXT(1) == '#') &&
2317
445k
        (NXT(2) == 'x')) {
2318
151k
  SKIP(3);
2319
151k
  GROW;
2320
642k
  while (RAW != ';') { /* loop blocked by count */
2321
511k
      if (count++ > 20) {
2322
27.0k
    count = 0;
2323
27.0k
    GROW;
2324
27.0k
                if (ctxt->instate == XML_PARSER_EOF)
2325
0
                    return(0);
2326
27.0k
      }
2327
511k
      if ((RAW >= '0') && (RAW <= '9'))
2328
370k
          val = val * 16 + (CUR - '0');
2329
140k
      else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2330
102k
          val = val * 16 + (CUR - 'a') + 10;
2331
38.4k
      else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2332
18.4k
          val = val * 16 + (CUR - 'A') + 10;
2333
19.9k
      else {
2334
19.9k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2335
19.9k
    val = 0;
2336
19.9k
    break;
2337
19.9k
      }
2338
491k
      if (val > 0x110000)
2339
300k
          val = 0x110000;
2340
2341
491k
      NEXT;
2342
491k
      count++;
2343
491k
  }
2344
151k
  if (RAW == ';') {
2345
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2346
131k
      ctxt->input->col++;
2347
131k
      ctxt->input->cur++;
2348
131k
  }
2349
294k
    } else if  ((RAW == '&') && (NXT(1) == '#')) {
2350
294k
  SKIP(2);
2351
294k
  GROW;
2352
1.42M
  while (RAW != ';') { /* loop blocked by count */
2353
1.16M
      if (count++ > 20) {
2354
36.0k
    count = 0;
2355
36.0k
    GROW;
2356
36.0k
                if (ctxt->instate == XML_PARSER_EOF)
2357
0
                    return(0);
2358
36.0k
      }
2359
1.16M
      if ((RAW >= '0') && (RAW <= '9'))
2360
1.13M
          val = val * 10 + (CUR - '0');
2361
34.2k
      else {
2362
34.2k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2363
34.2k
    val = 0;
2364
34.2k
    break;
2365
34.2k
      }
2366
1.13M
      if (val > 0x110000)
2367
394k
          val = 0x110000;
2368
2369
1.13M
      NEXT;
2370
1.13M
      count++;
2371
1.13M
  }
2372
294k
  if (RAW == ';') {
2373
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2374
259k
      ctxt->input->col++;
2375
259k
      ctxt->input->cur++;
2376
259k
  }
2377
294k
    } else {
2378
0
        if (RAW == '&')
2379
0
            SKIP(1);
2380
0
        xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2381
0
    }
2382
2383
    /*
2384
     * [ WFC: Legal Character ]
2385
     * Characters referred to using character references must match the
2386
     * production for Char.
2387
     */
2388
445k
    if (val >= 0x110000) {
2389
1.28k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2390
1.28k
                "xmlParseCharRef: character reference out of bounds\n",
2391
1.28k
          val);
2392
444k
    } else if (IS_CHAR(val)) {
2393
384k
        return(val);
2394
384k
    } else {
2395
59.9k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2396
59.9k
                          "xmlParseCharRef: invalid xmlChar value %d\n",
2397
59.9k
                    val);
2398
59.9k
    }
2399
61.2k
    return(0);
2400
445k
}
2401
2402
/**
2403
 * xmlParseStringCharRef:
2404
 * @ctxt:  an XML parser context
2405
 * @str:  a pointer to an index in the string
2406
 *
2407
 * parse Reference declarations, variant parsing from a string rather
2408
 * than an an input flow.
2409
 *
2410
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2411
 *                  '&#x' [0-9a-fA-F]+ ';'
2412
 *
2413
 * [ WFC: Legal Character ]
2414
 * Characters referred to using character references must match the
2415
 * production for Char.
2416
 *
2417
 * Returns the value parsed (as an int), 0 in case of error, str will be
2418
 *         updated to the current value of the index
2419
 */
2420
static int
2421
276k
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2422
276k
    const xmlChar *ptr;
2423
276k
    xmlChar cur;
2424
276k
    int val = 0;
2425
2426
276k
    if ((str == NULL) || (*str == NULL)) return(0);
2427
276k
    ptr = *str;
2428
276k
    cur = *ptr;
2429
276k
    if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2430
39.9k
  ptr += 3;
2431
39.9k
  cur = *ptr;
2432
119k
  while (cur != ';') { /* Non input consuming loop */
2433
81.8k
      if ((cur >= '0') && (cur <= '9'))
2434
44.2k
          val = val * 16 + (cur - '0');
2435
37.6k
      else if ((cur >= 'a') && (cur <= 'f'))
2436
8.56k
          val = val * 16 + (cur - 'a') + 10;
2437
29.0k
      else if ((cur >= 'A') && (cur <= 'F'))
2438
26.9k
          val = val * 16 + (cur - 'A') + 10;
2439
2.12k
      else {
2440
2.12k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2441
2.12k
    val = 0;
2442
2.12k
    break;
2443
2.12k
      }
2444
79.7k
      if (val > 0x110000)
2445
28.9k
          val = 0x110000;
2446
2447
79.7k
      ptr++;
2448
79.7k
      cur = *ptr;
2449
79.7k
  }
2450
39.9k
  if (cur == ';')
2451
37.7k
      ptr++;
2452
236k
    } else if  ((cur == '&') && (ptr[1] == '#')){
2453
236k
  ptr += 2;
2454
236k
  cur = *ptr;
2455
788k
  while (cur != ';') { /* Non input consuming loops */
2456
555k
      if ((cur >= '0') && (cur <= '9'))
2457
551k
          val = val * 10 + (cur - '0');
2458
3.77k
      else {
2459
3.77k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2460
3.77k
    val = 0;
2461
3.77k
    break;
2462
3.77k
      }
2463
551k
      if (val > 0x110000)
2464
10.8k
          val = 0x110000;
2465
2466
551k
      ptr++;
2467
551k
      cur = *ptr;
2468
551k
  }
2469
236k
  if (cur == ';')
2470
233k
      ptr++;
2471
236k
    } else {
2472
0
  xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2473
0
  return(0);
2474
0
    }
2475
276k
    *str = ptr;
2476
2477
    /*
2478
     * [ WFC: Legal Character ]
2479
     * Characters referred to using character references must match the
2480
     * production for Char.
2481
     */
2482
276k
    if (val >= 0x110000) {
2483
501
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2484
501
                "xmlParseStringCharRef: character reference out of bounds\n",
2485
501
                val);
2486
276k
    } else if (IS_CHAR(val)) {
2487
268k
        return(val);
2488
268k
    } else {
2489
8.15k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2490
8.15k
        "xmlParseStringCharRef: invalid xmlChar value %d\n",
2491
8.15k
        val);
2492
8.15k
    }
2493
8.65k
    return(0);
2494
276k
}
2495
2496
/**
2497
 * xmlParserHandlePEReference:
2498
 * @ctxt:  the parser context
2499
 *
2500
 * [69] PEReference ::= '%' Name ';'
2501
 *
2502
 * [ WFC: No Recursion ]
2503
 * A parsed entity must not contain a recursive
2504
 * reference to itself, either directly or indirectly.
2505
 *
2506
 * [ WFC: Entity Declared ]
2507
 * In a document without any DTD, a document with only an internal DTD
2508
 * subset which contains no parameter entity references, or a document
2509
 * with "standalone='yes'", ...  ... The declaration of a parameter
2510
 * entity must precede any reference to it...
2511
 *
2512
 * [ VC: Entity Declared ]
2513
 * In a document with an external subset or external parameter entities
2514
 * with "standalone='no'", ...  ... The declaration of a parameter entity
2515
 * must precede any reference to it...
2516
 *
2517
 * [ WFC: In DTD ]
2518
 * Parameter-entity references may only appear in the DTD.
2519
 * NOTE: misleading but this is handled.
2520
 *
2521
 * A PEReference may have been detected in the current input stream
2522
 * the handling is done accordingly to
2523
 *      http://www.w3.org/TR/REC-xml#entproc
2524
 * i.e.
2525
 *   - Included in literal in entity values
2526
 *   - Included as Parameter Entity reference within DTDs
2527
 */
2528
void
2529
0
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2530
0
    switch(ctxt->instate) {
2531
0
  case XML_PARSER_CDATA_SECTION:
2532
0
      return;
2533
0
        case XML_PARSER_COMMENT:
2534
0
      return;
2535
0
  case XML_PARSER_START_TAG:
2536
0
      return;
2537
0
  case XML_PARSER_END_TAG:
2538
0
      return;
2539
0
        case XML_PARSER_EOF:
2540
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2541
0
      return;
2542
0
        case XML_PARSER_PROLOG:
2543
0
  case XML_PARSER_START:
2544
0
  case XML_PARSER_MISC:
2545
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2546
0
      return;
2547
0
  case XML_PARSER_ENTITY_DECL:
2548
0
        case XML_PARSER_CONTENT:
2549
0
        case XML_PARSER_ATTRIBUTE_VALUE:
2550
0
        case XML_PARSER_PI:
2551
0
  case XML_PARSER_SYSTEM_LITERAL:
2552
0
  case XML_PARSER_PUBLIC_LITERAL:
2553
      /* we just ignore it there */
2554
0
      return;
2555
0
        case XML_PARSER_EPILOG:
2556
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2557
0
      return;
2558
0
  case XML_PARSER_ENTITY_VALUE:
2559
      /*
2560
       * NOTE: in the case of entity values, we don't do the
2561
       *       substitution here since we need the literal
2562
       *       entity value to be able to save the internal
2563
       *       subset of the document.
2564
       *       This will be handled by xmlStringDecodeEntities
2565
       */
2566
0
      return;
2567
0
        case XML_PARSER_DTD:
2568
      /*
2569
       * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2570
       * In the internal DTD subset, parameter-entity references
2571
       * can occur only where markup declarations can occur, not
2572
       * within markup declarations.
2573
       * In that case this is handled in xmlParseMarkupDecl
2574
       */
2575
0
      if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2576
0
    return;
2577
0
      if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2578
0
    return;
2579
0
            break;
2580
0
        case XML_PARSER_IGNORE:
2581
0
            return;
2582
0
    }
2583
2584
0
    xmlParsePEReference(ctxt);
2585
0
}
2586
2587
/*
2588
 * Macro used to grow the current buffer.
2589
 * buffer##_size is expected to be a size_t
2590
 * mem_error: is expected to handle memory allocation failures
2591
 */
2592
3.04M
#define growBuffer(buffer, n) {           \
2593
3.04M
    xmlChar *tmp;             \
2594
3.04M
    size_t new_size = buffer##_size * 2 + n;                            \
2595
3.04M
    if (new_size < buffer##_size) goto mem_error;                       \
2596
3.04M
    tmp = (xmlChar *) xmlRealloc(buffer, new_size);                     \
2597
3.04M
    if (tmp == NULL) goto mem_error;         \
2598
3.04M
    buffer = tmp;             \
2599
3.04M
    buffer##_size = new_size;                                           \
2600
3.04M
}
2601
2602
/**
2603
 * xmlStringDecodeEntitiesInt:
2604
 * @ctxt:  the parser context
2605
 * @str:  the input string
2606
 * @len: the string length
2607
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2608
 * @end:  an end marker xmlChar, 0 if none
2609
 * @end2:  an end marker xmlChar, 0 if none
2610
 * @end3:  an end marker xmlChar, 0 if none
2611
 * @check:  whether to perform entity checks
2612
 */
2613
static xmlChar *
2614
xmlStringDecodeEntitiesInt(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2615
               int what, xmlChar end, xmlChar  end2, xmlChar end3,
2616
53.3M
                           int check) {
2617
53.3M
    xmlChar *buffer = NULL;
2618
53.3M
    size_t buffer_size = 0;
2619
53.3M
    size_t nbchars = 0;
2620
2621
53.3M
    xmlChar *current = NULL;
2622
53.3M
    xmlChar *rep = NULL;
2623
53.3M
    const xmlChar *last;
2624
53.3M
    xmlEntityPtr ent;
2625
53.3M
    int c,l;
2626
2627
53.3M
    if (str == NULL)
2628
22.0k
        return(NULL);
2629
53.3M
    last = str + len;
2630
2631
53.3M
    if (((ctxt->depth > 40) &&
2632
53.3M
         ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2633
53.3M
  (ctxt->depth > 100)) {
2634
0
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP,
2635
0
                       "Maximum entity nesting depth exceeded");
2636
0
  return(NULL);
2637
0
    }
2638
2639
    /*
2640
     * allocate a translation buffer.
2641
     */
2642
53.3M
    buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2643
53.3M
    buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2644
53.3M
    if (buffer == NULL) goto mem_error;
2645
2646
    /*
2647
     * OK loop until we reach one of the ending char or a size limit.
2648
     * we are operating on already parsed values.
2649
     */
2650
53.3M
    if (str < last)
2651
53.2M
  c = CUR_SCHAR(str, l);
2652
130k
    else
2653
130k
        c = 0;
2654
4.42G
    while ((c != 0) && (c != end) && /* non input consuming loop */
2655
4.42G
           (c != end2) && (c != end3) &&
2656
4.42G
           (ctxt->instate != XML_PARSER_EOF)) {
2657
2658
4.37G
  if (c == 0) break;
2659
4.37G
        if ((c == '&') && (str[1] == '#')) {
2660
276k
      int val = xmlParseStringCharRef(ctxt, &str);
2661
276k
      if (val == 0)
2662
8.65k
                goto int_error;
2663
268k
      COPY_BUF(0,buffer,nbchars,val);
2664
268k
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2665
266
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2666
266
      }
2667
4.37G
  } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2668
395M
      if (xmlParserDebugEntities)
2669
0
    xmlGenericError(xmlGenericErrorContext,
2670
0
      "String decoding Entity Reference: %.30s\n",
2671
0
      str);
2672
395M
      ent = xmlParseStringEntityRef(ctxt, &str);
2673
395M
      if ((ent != NULL) &&
2674
395M
    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2675
194k
    if (ent->content != NULL) {
2676
194k
        COPY_BUF(0,buffer,nbchars,ent->content[0]);
2677
194k
        if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2678
6.34k
      growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2679
6.34k
        }
2680
194k
    } else {
2681
0
        xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2682
0
          "predefined entity has no content\n");
2683
0
                    goto int_error;
2684
0
    }
2685
395M
      } else if ((ent != NULL) && (ent->content != NULL)) {
2686
47.7M
          if ((check) && (xmlParserEntityCheck(ctxt, ent->length)))
2687
423
                    goto int_error;
2688
2689
47.7M
                if (ent->flags & XML_ENT_EXPANDING) {
2690
557
              xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2691
557
                    xmlHaltParser(ctxt);
2692
557
                    ent->content[0] = 0;
2693
557
                    goto int_error;
2694
557
                }
2695
2696
47.7M
                ent->flags |= XML_ENT_EXPANDING;
2697
47.7M
    ctxt->depth++;
2698
47.7M
    rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
2699
47.7M
                        ent->length, what, 0, 0, 0, check);
2700
47.7M
    ctxt->depth--;
2701
47.7M
                ent->flags &= ~XML_ENT_EXPANDING;
2702
2703
47.7M
    if (rep == NULL) {
2704
8.20k
                    ent->content[0] = 0;
2705
8.20k
                    goto int_error;
2706
8.20k
                }
2707
2708
47.7M
                current = rep;
2709
8.53G
                while (*current != 0) { /* non input consuming loop */
2710
8.48G
                    buffer[nbchars++] = *current++;
2711
8.48G
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2712
4.70M
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2713
4.70M
                    }
2714
8.48G
                }
2715
47.7M
                xmlFree(rep);
2716
47.7M
                rep = NULL;
2717
347M
      } else if (ent != NULL) {
2718
3.00M
    int i = xmlStrlen(ent->name);
2719
3.00M
    const xmlChar *cur = ent->name;
2720
2721
3.00M
    buffer[nbchars++] = '&';
2722
3.00M
    if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2723
2.65k
        growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2724
2.65k
    }
2725
20.9M
    for (;i > 0;i--)
2726
17.9M
        buffer[nbchars++] = *cur++;
2727
3.00M
    buffer[nbchars++] = ';';
2728
3.00M
      }
2729
3.97G
  } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2730
4.78M
      if (xmlParserDebugEntities)
2731
0
    xmlGenericError(xmlGenericErrorContext,
2732
0
      "String decoding PE Reference: %.30s\n", str);
2733
4.78M
      ent = xmlParseStringPEReference(ctxt, &str);
2734
4.78M
      if (ent != NULL) {
2735
2.54M
                if (ent->content == NULL) {
2736
        /*
2737
         * Note: external parsed entities will not be loaded,
2738
         * it is not required for a non-validating parser to
2739
         * complete external PEReferences coming from the
2740
         * internal subset
2741
         */
2742
5.21k
        if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2743
5.21k
      ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2744
5.21k
      (ctxt->validate != 0)) {
2745
4.65k
      xmlLoadEntityContent(ctxt, ent);
2746
4.65k
        } else {
2747
561
      xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2748
561
      "not validating will not read content for PE entity %s\n",
2749
561
                          ent->name, NULL);
2750
561
        }
2751
5.21k
    }
2752
2753
2.54M
          if ((check) && (xmlParserEntityCheck(ctxt, ent->length)))
2754
297
                    goto int_error;
2755
2756
2.54M
                if (ent->flags & XML_ENT_EXPANDING) {
2757
361
              xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2758
361
                    xmlHaltParser(ctxt);
2759
361
                    if (ent->content != NULL)
2760
195
                        ent->content[0] = 0;
2761
361
                    goto int_error;
2762
361
                }
2763
2764
2.54M
                ent->flags |= XML_ENT_EXPANDING;
2765
2.54M
    ctxt->depth++;
2766
2.54M
    rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
2767
2.54M
                        ent->length, what, 0, 0, 0, check);
2768
2.54M
    ctxt->depth--;
2769
2.54M
                ent->flags &= ~XML_ENT_EXPANDING;
2770
2771
2.54M
    if (rep == NULL) {
2772
3.57k
                    if (ent->content != NULL)
2773
570
                        ent->content[0] = 0;
2774
3.57k
                    goto int_error;
2775
3.57k
                }
2776
2.53M
                current = rep;
2777
2.76G
                while (*current != 0) { /* non input consuming loop */
2778
2.75G
                    buffer[nbchars++] = *current++;
2779
2.75G
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2780
477k
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2781
477k
                    }
2782
2.75G
                }
2783
2.53M
                xmlFree(rep);
2784
2.53M
                rep = NULL;
2785
2.53M
      }
2786
3.97G
  } else {
2787
3.97G
      COPY_BUF(l,buffer,nbchars,c);
2788
3.97G
      str += l;
2789
3.97G
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2790
732k
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2791
732k
      }
2792
3.97G
  }
2793
4.37G
  if (str < last)
2794
4.31G
      c = CUR_SCHAR(str, l);
2795
53.1M
  else
2796
53.1M
      c = 0;
2797
4.37G
    }
2798
53.3M
    buffer[nbchars] = 0;
2799
53.3M
    return(buffer);
2800
2801
0
mem_error:
2802
0
    xmlErrMemory(ctxt, NULL);
2803
22.0k
int_error:
2804
22.0k
    if (rep != NULL)
2805
0
        xmlFree(rep);
2806
22.0k
    if (buffer != NULL)
2807
22.0k
        xmlFree(buffer);
2808
22.0k
    return(NULL);
2809
0
}
2810
2811
/**
2812
 * xmlStringLenDecodeEntities:
2813
 * @ctxt:  the parser context
2814
 * @str:  the input string
2815
 * @len: the string length
2816
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2817
 * @end:  an end marker xmlChar, 0 if none
2818
 * @end2:  an end marker xmlChar, 0 if none
2819
 * @end3:  an end marker xmlChar, 0 if none
2820
 *
2821
 * DEPRECATED: Internal function, don't use.
2822
 *
2823
 * Takes a entity string content and process to do the adequate substitutions.
2824
 *
2825
 * [67] Reference ::= EntityRef | CharRef
2826
 *
2827
 * [69] PEReference ::= '%' Name ';'
2828
 *
2829
 * Returns A newly allocated string with the substitution done. The caller
2830
 *      must deallocate it !
2831
 */
2832
xmlChar *
2833
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2834
                           int what, xmlChar end, xmlChar  end2,
2835
13.9k
                           xmlChar end3) {
2836
13.9k
    if ((ctxt == NULL) || (str == NULL) || (len < 0))
2837
0
        return(NULL);
2838
13.9k
    return(xmlStringDecodeEntitiesInt(ctxt, str, len, what,
2839
13.9k
                                      end, end2, end3, 0));
2840
13.9k
}
2841
2842
/**
2843
 * xmlStringDecodeEntities:
2844
 * @ctxt:  the parser context
2845
 * @str:  the input string
2846
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2847
 * @end:  an end marker xmlChar, 0 if none
2848
 * @end2:  an end marker xmlChar, 0 if none
2849
 * @end3:  an end marker xmlChar, 0 if none
2850
 *
2851
 * DEPRECATED: Internal function, don't use.
2852
 *
2853
 * Takes a entity string content and process to do the adequate substitutions.
2854
 *
2855
 * [67] Reference ::= EntityRef | CharRef
2856
 *
2857
 * [69] PEReference ::= '%' Name ';'
2858
 *
2859
 * Returns A newly allocated string with the substitution done. The caller
2860
 *      must deallocate it !
2861
 */
2862
xmlChar *
2863
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2864
230k
            xmlChar end, xmlChar  end2, xmlChar end3) {
2865
230k
    if ((ctxt == NULL) || (str == NULL)) return(NULL);
2866
230k
    return(xmlStringDecodeEntitiesInt(ctxt, str, xmlStrlen(str), what,
2867
230k
                                      end, end2, end3, 0));
2868
230k
}
2869
2870
/************************************************************************
2871
 *                  *
2872
 *    Commodity functions, cleanup needed ?     *
2873
 *                  *
2874
 ************************************************************************/
2875
2876
/**
2877
 * areBlanks:
2878
 * @ctxt:  an XML parser context
2879
 * @str:  a xmlChar *
2880
 * @len:  the size of @str
2881
 * @blank_chars: we know the chars are blanks
2882
 *
2883
 * Is this a sequence of blank chars that one can ignore ?
2884
 *
2885
 * Returns 1 if ignorable 0 otherwise.
2886
 */
2887
2888
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2889
6.46M
                     int blank_chars) {
2890
6.46M
    int i, ret;
2891
6.46M
    xmlNodePtr lastChild;
2892
2893
    /*
2894
     * Don't spend time trying to differentiate them, the same callback is
2895
     * used !
2896
     */
2897
6.46M
    if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2898
435k
  return(0);
2899
2900
    /*
2901
     * Check for xml:space value.
2902
     */
2903
6.03M
    if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2904
6.03M
        (*(ctxt->space) == -2))
2905
2.56M
  return(0);
2906
2907
    /*
2908
     * Check that the string is made of blanks
2909
     */
2910
3.47M
    if (blank_chars == 0) {
2911
8.36M
  for (i = 0;i < len;i++)
2912
7.19M
      if (!(IS_BLANK_CH(str[i]))) return(0);
2913
1.60M
    }
2914
2915
    /*
2916
     * Look if the element is mixed content in the DTD if available
2917
     */
2918
3.03M
    if (ctxt->node == NULL) return(0);
2919
2.97M
    if (ctxt->myDoc != NULL) {
2920
2.97M
  ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2921
2.97M
        if (ret == 0) return(1);
2922
2.70M
        if (ret == 1) return(0);
2923
2.70M
    }
2924
2925
    /*
2926
     * Otherwise, heuristic :-\
2927
     */
2928
2.68M
    if ((RAW != '<') && (RAW != 0xD)) return(0);
2929
2.64M
    if ((ctxt->node->children == NULL) &&
2930
2.64M
  (RAW == '<') && (NXT(1) == '/')) return(0);
2931
2932
2.64M
    lastChild = xmlGetLastChild(ctxt->node);
2933
2.64M
    if (lastChild == NULL) {
2934
495k
        if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2935
495k
            (ctxt->node->content != NULL)) return(0);
2936
2.15M
    } else if (xmlNodeIsText(lastChild))
2937
37.3k
        return(0);
2938
2.11M
    else if ((ctxt->node->children != NULL) &&
2939
2.11M
             (xmlNodeIsText(ctxt->node->children)))
2940
23.2k
        return(0);
2941
2.58M
    return(1);
2942
2.64M
}
2943
2944
/************************************************************************
2945
 *                  *
2946
 *    Extra stuff for namespace support     *
2947
 *  Relates to http://www.w3.org/TR/WD-xml-names      *
2948
 *                  *
2949
 ************************************************************************/
2950
2951
/**
2952
 * xmlSplitQName:
2953
 * @ctxt:  an XML parser context
2954
 * @name:  an XML parser context
2955
 * @prefix:  a xmlChar **
2956
 *
2957
 * parse an UTF8 encoded XML qualified name string
2958
 *
2959
 * [NS 5] QName ::= (Prefix ':')? LocalPart
2960
 *
2961
 * [NS 6] Prefix ::= NCName
2962
 *
2963
 * [NS 7] LocalPart ::= NCName
2964
 *
2965
 * Returns the local part, and prefix is updated
2966
 *   to get the Prefix if any.
2967
 */
2968
2969
xmlChar *
2970
11.4M
xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2971
11.4M
    xmlChar buf[XML_MAX_NAMELEN + 5];
2972
11.4M
    xmlChar *buffer = NULL;
2973
11.4M
    int len = 0;
2974
11.4M
    int max = XML_MAX_NAMELEN;
2975
11.4M
    xmlChar *ret = NULL;
2976
11.4M
    const xmlChar *cur = name;
2977
11.4M
    int c;
2978
2979
11.4M
    if (prefix == NULL) return(NULL);
2980
11.4M
    *prefix = NULL;
2981
2982
11.4M
    if (cur == NULL) return(NULL);
2983
2984
#ifndef XML_XML_NAMESPACE
2985
    /* xml: prefix is not really a namespace */
2986
    if ((cur[0] == 'x') && (cur[1] == 'm') &&
2987
        (cur[2] == 'l') && (cur[3] == ':'))
2988
  return(xmlStrdup(name));
2989
#endif
2990
2991
    /* nasty but well=formed */
2992
11.4M
    if (cur[0] == ':')
2993
5.52k
  return(xmlStrdup(name));
2994
2995
11.3M
    c = *cur++;
2996
52.5M
    while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2997
41.1M
  buf[len++] = c;
2998
41.1M
  c = *cur++;
2999
41.1M
    }
3000
11.3M
    if (len >= max) {
3001
  /*
3002
   * Okay someone managed to make a huge name, so he's ready to pay
3003
   * for the processing speed.
3004
   */
3005
3.91k
  max = len * 2;
3006
3007
3.91k
  buffer = (xmlChar *) xmlMallocAtomic(max);
3008
3.91k
  if (buffer == NULL) {
3009
0
      xmlErrMemory(ctxt, NULL);
3010
0
      return(NULL);
3011
0
  }
3012
3.91k
  memcpy(buffer, buf, len);
3013
3.91M
  while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3014
3.90M
      if (len + 10 > max) {
3015
5.13k
          xmlChar *tmp;
3016
3017
5.13k
    max *= 2;
3018
5.13k
    tmp = (xmlChar *) xmlRealloc(buffer, max);
3019
5.13k
    if (tmp == NULL) {
3020
0
        xmlFree(buffer);
3021
0
        xmlErrMemory(ctxt, NULL);
3022
0
        return(NULL);
3023
0
    }
3024
5.13k
    buffer = tmp;
3025
5.13k
      }
3026
3.90M
      buffer[len++] = c;
3027
3.90M
      c = *cur++;
3028
3.90M
  }
3029
3.91k
  buffer[len] = 0;
3030
3.91k
    }
3031
3032
11.3M
    if ((c == ':') && (*cur == 0)) {
3033
7.73k
        if (buffer != NULL)
3034
531
      xmlFree(buffer);
3035
7.73k
  *prefix = NULL;
3036
7.73k
  return(xmlStrdup(name));
3037
7.73k
    }
3038
3039
11.3M
    if (buffer == NULL)
3040
11.3M
  ret = xmlStrndup(buf, len);
3041
3.38k
    else {
3042
3.38k
  ret = buffer;
3043
3.38k
  buffer = NULL;
3044
3.38k
  max = XML_MAX_NAMELEN;
3045
3.38k
    }
3046
3047
3048
11.3M
    if (c == ':') {
3049
717k
  c = *cur;
3050
717k
        *prefix = ret;
3051
717k
  if (c == 0) {
3052
0
      return(xmlStrndup(BAD_CAST "", 0));
3053
0
  }
3054
717k
  len = 0;
3055
3056
  /*
3057
   * Check that the first character is proper to start
3058
   * a new name
3059
   */
3060
717k
  if (!(((c >= 0x61) && (c <= 0x7A)) ||
3061
717k
        ((c >= 0x41) && (c <= 0x5A)) ||
3062
717k
        (c == '_') || (c == ':'))) {
3063
9.32k
      int l;
3064
9.32k
      int first = CUR_SCHAR(cur, l);
3065
3066
9.32k
      if (!IS_LETTER(first) && (first != '_')) {
3067
4.40k
    xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3068
4.40k
          "Name %s is not XML Namespace compliant\n",
3069
4.40k
          name);
3070
4.40k
      }
3071
9.32k
  }
3072
717k
  cur++;
3073
3074
4.47M
  while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3075
3.75M
      buf[len++] = c;
3076
3.75M
      c = *cur++;
3077
3.75M
  }
3078
717k
  if (len >= max) {
3079
      /*
3080
       * Okay someone managed to make a huge name, so he's ready to pay
3081
       * for the processing speed.
3082
       */
3083
3.11k
      max = len * 2;
3084
3085
3.11k
      buffer = (xmlChar *) xmlMallocAtomic(max);
3086
3.11k
      if (buffer == NULL) {
3087
0
          xmlErrMemory(ctxt, NULL);
3088
0
    return(NULL);
3089
0
      }
3090
3.11k
      memcpy(buffer, buf, len);
3091
2.80M
      while (c != 0) { /* tested bigname2.xml */
3092
2.80M
    if (len + 10 > max) {
3093
3.32k
        xmlChar *tmp;
3094
3095
3.32k
        max *= 2;
3096
3.32k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3097
3.32k
        if (tmp == NULL) {
3098
0
      xmlErrMemory(ctxt, NULL);
3099
0
      xmlFree(buffer);
3100
0
      return(NULL);
3101
0
        }
3102
3.32k
        buffer = tmp;
3103
3.32k
    }
3104
2.80M
    buffer[len++] = c;
3105
2.80M
    c = *cur++;
3106
2.80M
      }
3107
3.11k
      buffer[len] = 0;
3108
3.11k
  }
3109
3110
717k
  if (buffer == NULL)
3111
714k
      ret = xmlStrndup(buf, len);
3112
3.11k
  else {
3113
3.11k
      ret = buffer;
3114
3.11k
  }
3115
717k
    }
3116
3117
11.3M
    return(ret);
3118
11.3M
}
3119
3120
/************************************************************************
3121
 *                  *
3122
 *      The parser itself       *
3123
 *  Relates to http://www.w3.org/TR/REC-xml       *
3124
 *                  *
3125
 ************************************************************************/
3126
3127
/************************************************************************
3128
 *                  *
3129
 *  Routines to parse Name, NCName and NmToken      *
3130
 *                  *
3131
 ************************************************************************/
3132
#ifdef DEBUG
3133
static unsigned long nbParseName = 0;
3134
static unsigned long nbParseNmToken = 0;
3135
static unsigned long nbParseNCName = 0;
3136
static unsigned long nbParseNCNameComplex = 0;
3137
static unsigned long nbParseNameComplex = 0;
3138
static unsigned long nbParseStringName = 0;
3139
#endif
3140
3141
/*
3142
 * The two following functions are related to the change of accepted
3143
 * characters for Name and NmToken in the Revision 5 of XML-1.0
3144
 * They correspond to the modified production [4] and the new production [4a]
3145
 * changes in that revision. Also note that the macros used for the
3146
 * productions Letter, Digit, CombiningChar and Extender are not needed
3147
 * anymore.
3148
 * We still keep compatibility to pre-revision5 parsing semantic if the
3149
 * new XML_PARSE_OLD10 option is given to the parser.
3150
 */
3151
static int
3152
65.6M
xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3153
65.6M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3154
        /*
3155
   * Use the new checks of production [4] [4a] amd [5] of the
3156
   * Update 5 of XML-1.0
3157
   */
3158
57.5M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3159
57.5M
      (((c >= 'a') && (c <= 'z')) ||
3160
57.5M
       ((c >= 'A') && (c <= 'Z')) ||
3161
57.5M
       (c == '_') || (c == ':') ||
3162
57.5M
       ((c >= 0xC0) && (c <= 0xD6)) ||
3163
57.5M
       ((c >= 0xD8) && (c <= 0xF6)) ||
3164
57.5M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3165
57.5M
       ((c >= 0x370) && (c <= 0x37D)) ||
3166
57.5M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3167
57.5M
       ((c >= 0x200C) && (c <= 0x200D)) ||
3168
57.5M
       ((c >= 0x2070) && (c <= 0x218F)) ||
3169
57.5M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3170
57.5M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3171
57.5M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3172
57.5M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3173
57.5M
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3174
57.2M
      return(1);
3175
57.5M
    } else {
3176
8.15M
        if (IS_LETTER(c) || (c == '_') || (c == ':'))
3177
8.01M
      return(1);
3178
8.15M
    }
3179
416k
    return(0);
3180
65.6M
}
3181
3182
static int
3183
1.38G
xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3184
1.38G
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3185
        /*
3186
   * Use the new checks of production [4] [4a] amd [5] of the
3187
   * Update 5 of XML-1.0
3188
   */
3189
1.33G
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3190
1.33G
      (((c >= 'a') && (c <= 'z')) ||
3191
1.33G
       ((c >= 'A') && (c <= 'Z')) ||
3192
1.33G
       ((c >= '0') && (c <= '9')) || /* !start */
3193
1.33G
       (c == '_') || (c == ':') ||
3194
1.33G
       (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3195
1.33G
       ((c >= 0xC0) && (c <= 0xD6)) ||
3196
1.33G
       ((c >= 0xD8) && (c <= 0xF6)) ||
3197
1.33G
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3198
1.33G
       ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3199
1.33G
       ((c >= 0x370) && (c <= 0x37D)) ||
3200
1.33G
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3201
1.33G
       ((c >= 0x200C) && (c <= 0x200D)) ||
3202
1.33G
       ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3203
1.33G
       ((c >= 0x2070) && (c <= 0x218F)) ||
3204
1.33G
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3205
1.33G
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3206
1.33G
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3207
1.33G
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3208
1.33G
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3209
1.27G
       return(1);
3210
1.33G
    } else {
3211
50.2M
        if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3212
50.2M
            (c == '.') || (c == '-') ||
3213
50.2M
      (c == '_') || (c == ':') ||
3214
50.2M
      (IS_COMBINING(c)) ||
3215
50.2M
      (IS_EXTENDER(c)))
3216
41.7M
      return(1);
3217
50.2M
    }
3218
66.5M
    return(0);
3219
1.38G
}
3220
3221
static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3222
                                          int *len, int *alloc, int normalize);
3223
3224
static const xmlChar *
3225
1.14M
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3226
1.14M
    int len = 0, l;
3227
1.14M
    int c;
3228
1.14M
    int count = 0;
3229
1.14M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3230
408k
                    XML_MAX_TEXT_LENGTH :
3231
1.14M
                    XML_MAX_NAME_LENGTH;
3232
3233
#ifdef DEBUG
3234
    nbParseNameComplex++;
3235
#endif
3236
3237
    /*
3238
     * Handler for more complex cases
3239
     */
3240
1.14M
    GROW;
3241
1.14M
    if (ctxt->instate == XML_PARSER_EOF)
3242
59
        return(NULL);
3243
1.14M
    c = CUR_CHAR(l);
3244
1.14M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3245
        /*
3246
   * Use the new checks of production [4] [4a] amd [5] of the
3247
   * Update 5 of XML-1.0
3248
   */
3249
618k
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3250
618k
      (!(((c >= 'a') && (c <= 'z')) ||
3251
591k
         ((c >= 'A') && (c <= 'Z')) ||
3252
591k
         (c == '_') || (c == ':') ||
3253
591k
         ((c >= 0xC0) && (c <= 0xD6)) ||
3254
591k
         ((c >= 0xD8) && (c <= 0xF6)) ||
3255
591k
         ((c >= 0xF8) && (c <= 0x2FF)) ||
3256
591k
         ((c >= 0x370) && (c <= 0x37D)) ||
3257
591k
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
3258
591k
         ((c >= 0x200C) && (c <= 0x200D)) ||
3259
591k
         ((c >= 0x2070) && (c <= 0x218F)) ||
3260
591k
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3261
591k
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
3262
591k
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
3263
591k
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3264
591k
         ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3265
319k
      return(NULL);
3266
319k
  }
3267
298k
  len += l;
3268
298k
  NEXTL(l);
3269
298k
  c = CUR_CHAR(l);
3270
5.64M
  while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3271
5.64M
         (((c >= 'a') && (c <= 'z')) ||
3272
5.60M
          ((c >= 'A') && (c <= 'Z')) ||
3273
5.60M
          ((c >= '0') && (c <= '9')) || /* !start */
3274
5.60M
          (c == '_') || (c == ':') ||
3275
5.60M
          (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3276
5.60M
          ((c >= 0xC0) && (c <= 0xD6)) ||
3277
5.60M
          ((c >= 0xD8) && (c <= 0xF6)) ||
3278
5.60M
          ((c >= 0xF8) && (c <= 0x2FF)) ||
3279
5.60M
          ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3280
5.60M
          ((c >= 0x370) && (c <= 0x37D)) ||
3281
5.60M
          ((c >= 0x37F) && (c <= 0x1FFF)) ||
3282
5.60M
          ((c >= 0x200C) && (c <= 0x200D)) ||
3283
5.60M
          ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3284
5.60M
          ((c >= 0x2070) && (c <= 0x218F)) ||
3285
5.60M
          ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3286
5.60M
          ((c >= 0x3001) && (c <= 0xD7FF)) ||
3287
5.60M
          ((c >= 0xF900) && (c <= 0xFDCF)) ||
3288
5.60M
          ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3289
5.60M
          ((c >= 0x10000) && (c <= 0xEFFFF))
3290
5.60M
    )) {
3291
5.34M
      if (count++ > XML_PARSER_CHUNK_SIZE) {
3292
37.9k
    count = 0;
3293
37.9k
    GROW;
3294
37.9k
                if (ctxt->instate == XML_PARSER_EOF)
3295
0
                    return(NULL);
3296
37.9k
      }
3297
5.34M
            if (len <= INT_MAX - l)
3298
5.34M
          len += l;
3299
5.34M
      NEXTL(l);
3300
5.34M
      c = CUR_CHAR(l);
3301
5.34M
  }
3302
524k
    } else {
3303
524k
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3304
524k
      (!IS_LETTER(c) && (c != '_') &&
3305
502k
       (c != ':'))) {
3306
282k
      return(NULL);
3307
282k
  }
3308
242k
  len += l;
3309
242k
  NEXTL(l);
3310
242k
  c = CUR_CHAR(l);
3311
3312
4.69M
  while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3313
4.69M
         ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3314
4.65M
    (c == '.') || (c == '-') ||
3315
4.65M
    (c == '_') || (c == ':') ||
3316
4.65M
    (IS_COMBINING(c)) ||
3317
4.65M
    (IS_EXTENDER(c)))) {
3318
4.44M
      if (count++ > XML_PARSER_CHUNK_SIZE) {
3319
32.3k
    count = 0;
3320
32.3k
    GROW;
3321
32.3k
                if (ctxt->instate == XML_PARSER_EOF)
3322
0
                    return(NULL);
3323
32.3k
      }
3324
4.44M
            if (len <= INT_MAX - l)
3325
4.44M
          len += l;
3326
4.44M
      NEXTL(l);
3327
4.44M
      c = CUR_CHAR(l);
3328
4.44M
  }
3329
242k
    }
3330
541k
    if (len > maxLength) {
3331
6
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3332
6
        return(NULL);
3333
6
    }
3334
541k
    if (ctxt->input->cur - ctxt->input->base < len) {
3335
        /*
3336
         * There were a couple of bugs where PERefs lead to to a change
3337
         * of the buffer. Check the buffer size to avoid passing an invalid
3338
         * pointer to xmlDictLookup.
3339
         */
3340
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3341
0
                    "unexpected change of input buffer");
3342
0
        return (NULL);
3343
0
    }
3344
541k
    if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3345
2.72k
        return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3346
538k
    return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3347
541k
}
3348
3349
/**
3350
 * xmlParseName:
3351
 * @ctxt:  an XML parser context
3352
 *
3353
 * DEPRECATED: Internal function, don't use.
3354
 *
3355
 * parse an XML name.
3356
 *
3357
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3358
 *                  CombiningChar | Extender
3359
 *
3360
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3361
 *
3362
 * [6] Names ::= Name (#x20 Name)*
3363
 *
3364
 * Returns the Name parsed or NULL
3365
 */
3366
3367
const xmlChar *
3368
84.9M
xmlParseName(xmlParserCtxtPtr ctxt) {
3369
84.9M
    const xmlChar *in;
3370
84.9M
    const xmlChar *ret;
3371
84.9M
    size_t count = 0;
3372
84.9M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3373
18.8M
                       XML_MAX_TEXT_LENGTH :
3374
84.9M
                       XML_MAX_NAME_LENGTH;
3375
3376
84.9M
    GROW;
3377
3378
#ifdef DEBUG
3379
    nbParseName++;
3380
#endif
3381
3382
    /*
3383
     * Accelerator for simple ASCII names
3384
     */
3385
84.9M
    in = ctxt->input->cur;
3386
84.9M
    if (((*in >= 0x61) && (*in <= 0x7A)) ||
3387
84.9M
  ((*in >= 0x41) && (*in <= 0x5A)) ||
3388
84.9M
  (*in == '_') || (*in == ':')) {
3389
84.2M
  in++;
3390
364M
  while (((*in >= 0x61) && (*in <= 0x7A)) ||
3391
364M
         ((*in >= 0x41) && (*in <= 0x5A)) ||
3392
364M
         ((*in >= 0x30) && (*in <= 0x39)) ||
3393
364M
         (*in == '_') || (*in == '-') ||
3394
364M
         (*in == ':') || (*in == '.'))
3395
280M
      in++;
3396
84.2M
  if ((*in > 0) && (*in < 0x80)) {
3397
83.8M
      count = in - ctxt->input->cur;
3398
83.8M
            if (count > maxLength) {
3399
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3400
0
                return(NULL);
3401
0
            }
3402
83.8M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3403
83.8M
      ctxt->input->cur = in;
3404
83.8M
      ctxt->input->col += count;
3405
83.8M
      if (ret == NULL)
3406
0
          xmlErrMemory(ctxt, NULL);
3407
83.8M
      return(ret);
3408
83.8M
  }
3409
84.2M
    }
3410
    /* accelerator for special cases */
3411
1.14M
    return(xmlParseNameComplex(ctxt));
3412
84.9M
}
3413
3414
static const xmlChar *
3415
556k
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3416
556k
    int len = 0, l;
3417
556k
    int c;
3418
556k
    int count = 0;
3419
556k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3420
227k
                    XML_MAX_TEXT_LENGTH :
3421
556k
                    XML_MAX_NAME_LENGTH;
3422
556k
    size_t startPosition = 0;
3423
3424
#ifdef DEBUG
3425
    nbParseNCNameComplex++;
3426
#endif
3427
3428
    /*
3429
     * Handler for more complex cases
3430
     */
3431
556k
    GROW;
3432
556k
    startPosition = CUR_PTR - BASE_PTR;
3433
556k
    c = CUR_CHAR(l);
3434
556k
    if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3435
556k
  (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3436
428k
  return(NULL);
3437
428k
    }
3438
3439
4.12M
    while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3440
4.12M
     (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3441
4.00M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
3442
31.9k
      count = 0;
3443
31.9k
      GROW;
3444
31.9k
            if (ctxt->instate == XML_PARSER_EOF)
3445
0
                return(NULL);
3446
31.9k
  }
3447
4.00M
        if (len <= INT_MAX - l)
3448
4.00M
      len += l;
3449
4.00M
  NEXTL(l);
3450
4.00M
  c = CUR_CHAR(l);
3451
4.00M
  if (c == 0) {
3452
11.4k
      count = 0;
3453
      /*
3454
       * when shrinking to extend the buffer we really need to preserve
3455
       * the part of the name we already parsed. Hence rolling back
3456
       * by current length.
3457
       */
3458
11.4k
      ctxt->input->cur -= l;
3459
11.4k
      GROW;
3460
11.4k
            if (ctxt->instate == XML_PARSER_EOF)
3461
0
                return(NULL);
3462
11.4k
      ctxt->input->cur += l;
3463
11.4k
      c = CUR_CHAR(l);
3464
11.4k
  }
3465
4.00M
    }
3466
128k
    if (len > maxLength) {
3467
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3468
0
        return(NULL);
3469
0
    }
3470
128k
    return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
3471
128k
}
3472
3473
/**
3474
 * xmlParseNCName:
3475
 * @ctxt:  an XML parser context
3476
 * @len:  length of the string parsed
3477
 *
3478
 * parse an XML name.
3479
 *
3480
 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3481
 *                      CombiningChar | Extender
3482
 *
3483
 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3484
 *
3485
 * Returns the Name parsed or NULL
3486
 */
3487
3488
static const xmlChar *
3489
19.0M
xmlParseNCName(xmlParserCtxtPtr ctxt) {
3490
19.0M
    const xmlChar *in, *e;
3491
19.0M
    const xmlChar *ret;
3492
19.0M
    size_t count = 0;
3493
19.0M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3494
5.65M
                       XML_MAX_TEXT_LENGTH :
3495
19.0M
                       XML_MAX_NAME_LENGTH;
3496
3497
#ifdef DEBUG
3498
    nbParseNCName++;
3499
#endif
3500
3501
    /*
3502
     * Accelerator for simple ASCII names
3503
     */
3504
19.0M
    in = ctxt->input->cur;
3505
19.0M
    e = ctxt->input->end;
3506
19.0M
    if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3507
19.0M
   ((*in >= 0x41) && (*in <= 0x5A)) ||
3508
19.0M
   (*in == '_')) && (in < e)) {
3509
18.5M
  in++;
3510
66.5M
  while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3511
66.5M
          ((*in >= 0x41) && (*in <= 0x5A)) ||
3512
66.5M
          ((*in >= 0x30) && (*in <= 0x39)) ||
3513
66.5M
          (*in == '_') || (*in == '-') ||
3514
66.5M
          (*in == '.')) && (in < e))
3515
47.9M
      in++;
3516
18.5M
  if (in >= e)
3517
3.05k
      goto complex;
3518
18.5M
  if ((*in > 0) && (*in < 0x80)) {
3519
18.4M
      count = in - ctxt->input->cur;
3520
18.4M
            if (count > maxLength) {
3521
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3522
0
                return(NULL);
3523
0
            }
3524
18.4M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3525
18.4M
      ctxt->input->cur = in;
3526
18.4M
      ctxt->input->col += count;
3527
18.4M
      if (ret == NULL) {
3528
0
          xmlErrMemory(ctxt, NULL);
3529
0
      }
3530
18.4M
      return(ret);
3531
18.4M
  }
3532
18.5M
    }
3533
556k
complex:
3534
556k
    return(xmlParseNCNameComplex(ctxt));
3535
19.0M
}
3536
3537
/**
3538
 * xmlParseNameAndCompare:
3539
 * @ctxt:  an XML parser context
3540
 *
3541
 * parse an XML name and compares for match
3542
 * (specialized for endtag parsing)
3543
 *
3544
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3545
 * and the name for mismatch
3546
 */
3547
3548
static const xmlChar *
3549
6.01M
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3550
6.01M
    register const xmlChar *cmp = other;
3551
6.01M
    register const xmlChar *in;
3552
6.01M
    const xmlChar *ret;
3553
3554
6.01M
    GROW;
3555
6.01M
    if (ctxt->instate == XML_PARSER_EOF)
3556
0
        return(NULL);
3557
3558
6.01M
    in = ctxt->input->cur;
3559
31.0M
    while (*in != 0 && *in == *cmp) {
3560
25.0M
  ++in;
3561
25.0M
  ++cmp;
3562
25.0M
    }
3563
6.01M
    if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3564
  /* success */
3565
5.79M
  ctxt->input->col += in - ctxt->input->cur;
3566
5.79M
  ctxt->input->cur = in;
3567
5.79M
  return (const xmlChar*) 1;
3568
5.79M
    }
3569
    /* failure (or end of input buffer), check with full function */
3570
220k
    ret = xmlParseName (ctxt);
3571
    /* strings coming from the dictionary direct compare possible */
3572
220k
    if (ret == other) {
3573
17.0k
  return (const xmlChar*) 1;
3574
17.0k
    }
3575
203k
    return ret;
3576
220k
}
3577
3578
/**
3579
 * xmlParseStringName:
3580
 * @ctxt:  an XML parser context
3581
 * @str:  a pointer to the string pointer (IN/OUT)
3582
 *
3583
 * parse an XML name.
3584
 *
3585
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3586
 *                  CombiningChar | Extender
3587
 *
3588
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3589
 *
3590
 * [6] Names ::= Name (#x20 Name)*
3591
 *
3592
 * Returns the Name parsed or NULL. The @str pointer
3593
 * is updated to the current location in the string.
3594
 */
3595
3596
static xmlChar *
3597
65.1M
xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3598
65.1M
    xmlChar buf[XML_MAX_NAMELEN + 5];
3599
65.1M
    const xmlChar *cur = *str;
3600
65.1M
    int len = 0, l;
3601
65.1M
    int c;
3602
65.1M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3603
19.6M
                    XML_MAX_TEXT_LENGTH :
3604
65.1M
                    XML_MAX_NAME_LENGTH;
3605
3606
#ifdef DEBUG
3607
    nbParseStringName++;
3608
#endif
3609
3610
65.1M
    c = CUR_SCHAR(cur, l);
3611
65.1M
    if (!xmlIsNameStartChar(ctxt, c)) {
3612
23.3k
  return(NULL);
3613
23.3k
    }
3614
3615
65.1M
    COPY_BUF(l,buf,len,c);
3616
65.1M
    cur += l;
3617
65.1M
    c = CUR_SCHAR(cur, l);
3618
635M
    while (xmlIsNameChar(ctxt, c)) {
3619
573M
  COPY_BUF(l,buf,len,c);
3620
573M
  cur += l;
3621
573M
  c = CUR_SCHAR(cur, l);
3622
573M
  if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3623
      /*
3624
       * Okay someone managed to make a huge name, so he's ready to pay
3625
       * for the processing speed.
3626
       */
3627
3.21M
      xmlChar *buffer;
3628
3.21M
      int max = len * 2;
3629
3630
3.21M
      buffer = (xmlChar *) xmlMallocAtomic(max);
3631
3.21M
      if (buffer == NULL) {
3632
0
          xmlErrMemory(ctxt, NULL);
3633
0
    return(NULL);
3634
0
      }
3635
3.21M
      memcpy(buffer, buf, len);
3636
730M
      while (xmlIsNameChar(ctxt, c)) {
3637
727M
    if (len + 10 > max) {
3638
3.21M
        xmlChar *tmp;
3639
3640
3.21M
        max *= 2;
3641
3.21M
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3642
3.21M
        if (tmp == NULL) {
3643
0
      xmlErrMemory(ctxt, NULL);
3644
0
      xmlFree(buffer);
3645
0
      return(NULL);
3646
0
        }
3647
3.21M
        buffer = tmp;
3648
3.21M
    }
3649
727M
    COPY_BUF(l,buffer,len,c);
3650
727M
    cur += l;
3651
727M
    c = CUR_SCHAR(cur, l);
3652
727M
                if (len > maxLength) {
3653
0
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3654
0
                    xmlFree(buffer);
3655
0
                    return(NULL);
3656
0
                }
3657
727M
      }
3658
3.21M
      buffer[len] = 0;
3659
3.21M
      *str = cur;
3660
3.21M
      return(buffer);
3661
3.21M
  }
3662
573M
    }
3663
61.9M
    if (len > maxLength) {
3664
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3665
0
        return(NULL);
3666
0
    }
3667
61.9M
    *str = cur;
3668
61.9M
    return(xmlStrndup(buf, len));
3669
61.9M
}
3670
3671
/**
3672
 * xmlParseNmtoken:
3673
 * @ctxt:  an XML parser context
3674
 *
3675
 * DEPRECATED: Internal function, don't use.
3676
 *
3677
 * parse an XML Nmtoken.
3678
 *
3679
 * [7] Nmtoken ::= (NameChar)+
3680
 *
3681
 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3682
 *
3683
 * Returns the Nmtoken parsed or NULL
3684
 */
3685
3686
xmlChar *
3687
1.33M
xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3688
1.33M
    xmlChar buf[XML_MAX_NAMELEN + 5];
3689
1.33M
    int len = 0, l;
3690
1.33M
    int c;
3691
1.33M
    int count = 0;
3692
1.33M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3693
493k
                    XML_MAX_TEXT_LENGTH :
3694
1.33M
                    XML_MAX_NAME_LENGTH;
3695
3696
#ifdef DEBUG
3697
    nbParseNmToken++;
3698
#endif
3699
3700
1.33M
    GROW;
3701
1.33M
    if (ctxt->instate == XML_PARSER_EOF)
3702
12
        return(NULL);
3703
1.33M
    c = CUR_CHAR(l);
3704
3705
7.84M
    while (xmlIsNameChar(ctxt, c)) {
3706
6.51M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
3707
0
      count = 0;
3708
0
      GROW;
3709
0
  }
3710
6.51M
  COPY_BUF(l,buf,len,c);
3711
6.51M
  NEXTL(l);
3712
6.51M
  c = CUR_CHAR(l);
3713
6.51M
  if (c == 0) {
3714
1.93k
      count = 0;
3715
1.93k
      GROW;
3716
1.93k
      if (ctxt->instate == XML_PARSER_EOF)
3717
0
    return(NULL);
3718
1.93k
            c = CUR_CHAR(l);
3719
1.93k
  }
3720
6.51M
  if (len >= XML_MAX_NAMELEN) {
3721
      /*
3722
       * Okay someone managed to make a huge token, so he's ready to pay
3723
       * for the processing speed.
3724
       */
3725
1.62k
      xmlChar *buffer;
3726
1.62k
      int max = len * 2;
3727
3728
1.62k
      buffer = (xmlChar *) xmlMallocAtomic(max);
3729
1.62k
      if (buffer == NULL) {
3730
0
          xmlErrMemory(ctxt, NULL);
3731
0
    return(NULL);
3732
0
      }
3733
1.62k
      memcpy(buffer, buf, len);
3734
2.33M
      while (xmlIsNameChar(ctxt, c)) {
3735
2.33M
    if (count++ > XML_PARSER_CHUNK_SIZE) {
3736
23.7k
        count = 0;
3737
23.7k
        GROW;
3738
23.7k
                    if (ctxt->instate == XML_PARSER_EOF) {
3739
0
                        xmlFree(buffer);
3740
0
                        return(NULL);
3741
0
                    }
3742
23.7k
    }
3743
2.33M
    if (len + 10 > max) {
3744
2.53k
        xmlChar *tmp;
3745
3746
2.53k
        max *= 2;
3747
2.53k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3748
2.53k
        if (tmp == NULL) {
3749
0
      xmlErrMemory(ctxt, NULL);
3750
0
      xmlFree(buffer);
3751
0
      return(NULL);
3752
0
        }
3753
2.53k
        buffer = tmp;
3754
2.53k
    }
3755
2.33M
    COPY_BUF(l,buffer,len,c);
3756
2.33M
    NEXTL(l);
3757
2.33M
    c = CUR_CHAR(l);
3758
2.33M
                if (len > maxLength) {
3759
0
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3760
0
                    xmlFree(buffer);
3761
0
                    return(NULL);
3762
0
                }
3763
2.33M
      }
3764
1.62k
      buffer[len] = 0;
3765
1.62k
      return(buffer);
3766
1.62k
  }
3767
6.51M
    }
3768
1.32M
    if (len == 0)
3769
12.9k
        return(NULL);
3770
1.31M
    if (len > maxLength) {
3771
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3772
0
        return(NULL);
3773
0
    }
3774
1.31M
    return(xmlStrndup(buf, len));
3775
1.31M
}
3776
3777
/**
3778
 * xmlParseEntityValue:
3779
 * @ctxt:  an XML parser context
3780
 * @orig:  if non-NULL store a copy of the original entity value
3781
 *
3782
 * DEPRECATED: Internal function, don't use.
3783
 *
3784
 * parse a value for ENTITY declarations
3785
 *
3786
 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3787
 *                 "'" ([^%&'] | PEReference | Reference)* "'"
3788
 *
3789
 * Returns the EntityValue parsed with reference substituted or NULL
3790
 */
3791
3792
xmlChar *
3793
954k
xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3794
954k
    xmlChar *buf = NULL;
3795
954k
    int len = 0;
3796
954k
    int size = XML_PARSER_BUFFER_SIZE;
3797
954k
    int c, l;
3798
954k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3799
332k
                    XML_MAX_HUGE_LENGTH :
3800
954k
                    XML_MAX_TEXT_LENGTH;
3801
954k
    xmlChar stop;
3802
954k
    xmlChar *ret = NULL;
3803
954k
    const xmlChar *cur = NULL;
3804
954k
    xmlParserInputPtr input;
3805
3806
954k
    if (RAW == '"') stop = '"';
3807
214k
    else if (RAW == '\'') stop = '\'';
3808
0
    else {
3809
0
  xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3810
0
  return(NULL);
3811
0
    }
3812
954k
    buf = (xmlChar *) xmlMallocAtomic(size);
3813
954k
    if (buf == NULL) {
3814
0
  xmlErrMemory(ctxt, NULL);
3815
0
  return(NULL);
3816
0
    }
3817
3818
    /*
3819
     * The content of the entity definition is copied in a buffer.
3820
     */
3821
3822
954k
    ctxt->instate = XML_PARSER_ENTITY_VALUE;
3823
954k
    input = ctxt->input;
3824
954k
    GROW;
3825
954k
    if (ctxt->instate == XML_PARSER_EOF)
3826
0
        goto error;
3827
954k
    NEXT;
3828
954k
    c = CUR_CHAR(l);
3829
    /*
3830
     * NOTE: 4.4.5 Included in Literal
3831
     * When a parameter entity reference appears in a literal entity
3832
     * value, ... a single or double quote character in the replacement
3833
     * text is always treated as a normal data character and will not
3834
     * terminate the literal.
3835
     * In practice it means we stop the loop only when back at parsing
3836
     * the initial entity and the quote is found
3837
     */
3838
67.8M
    while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3839
67.8M
      (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3840
66.8M
  if (len + 5 >= size) {
3841
158k
      xmlChar *tmp;
3842
3843
158k
      size *= 2;
3844
158k
      tmp = (xmlChar *) xmlRealloc(buf, size);
3845
158k
      if (tmp == NULL) {
3846
0
    xmlErrMemory(ctxt, NULL);
3847
0
                goto error;
3848
0
      }
3849
158k
      buf = tmp;
3850
158k
  }
3851
66.8M
  COPY_BUF(l,buf,len,c);
3852
66.8M
  NEXTL(l);
3853
3854
66.8M
  GROW;
3855
66.8M
  c = CUR_CHAR(l);
3856
66.8M
  if (c == 0) {
3857
1.86k
      GROW;
3858
1.86k
      c = CUR_CHAR(l);
3859
1.86k
  }
3860
3861
66.8M
        if (len > maxLength) {
3862
0
            xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
3863
0
                           "entity value too long\n");
3864
0
            goto error;
3865
0
        }
3866
66.8M
    }
3867
954k
    buf[len] = 0;
3868
954k
    if (ctxt->instate == XML_PARSER_EOF)
3869
0
        goto error;
3870
954k
    if (c != stop) {
3871
2.62k
        xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3872
2.62k
        goto error;
3873
2.62k
    }
3874
951k
    NEXT;
3875
3876
    /*
3877
     * Raise problem w.r.t. '&' and '%' being used in non-entities
3878
     * reference constructs. Note Charref will be handled in
3879
     * xmlStringDecodeEntities()
3880
     */
3881
951k
    cur = buf;
3882
44.7M
    while (*cur != 0) { /* non input consuming */
3883
43.8M
  if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3884
717k
      xmlChar *name;
3885
717k
      xmlChar tmp = *cur;
3886
717k
            int nameOk = 0;
3887
3888
717k
      cur++;
3889
717k
      name = xmlParseStringName(ctxt, &cur);
3890
717k
            if (name != NULL) {
3891
714k
                nameOk = 1;
3892
714k
                xmlFree(name);
3893
714k
            }
3894
717k
            if ((nameOk == 0) || (*cur != ';')) {
3895
8.71k
    xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3896
8.71k
      "EntityValue: '%c' forbidden except for entities references\n",
3897
8.71k
                            tmp);
3898
8.71k
                goto error;
3899
8.71k
      }
3900
708k
      if ((tmp == '%') && (ctxt->inSubset == 1) &&
3901
708k
    (ctxt->inputNr == 1)) {
3902
8.00k
    xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3903
8.00k
                goto error;
3904
8.00k
      }
3905
700k
      if (*cur == 0)
3906
0
          break;
3907
700k
  }
3908
43.7M
  cur++;
3909
43.7M
    }
3910
3911
    /*
3912
     * Then PEReference entities are substituted.
3913
     *
3914
     * NOTE: 4.4.7 Bypassed
3915
     * When a general entity reference appears in the EntityValue in
3916
     * an entity declaration, it is bypassed and left as is.
3917
     * so XML_SUBSTITUTE_REF is not set here.
3918
     */
3919
935k
    ++ctxt->depth;
3920
935k
    ret = xmlStringDecodeEntitiesInt(ctxt, buf, len, XML_SUBSTITUTE_PEREF,
3921
935k
                                     0, 0, 0, /* check */ 1);
3922
935k
    --ctxt->depth;
3923
3924
935k
    if (orig != NULL) {
3925
935k
        *orig = buf;
3926
935k
        buf = NULL;
3927
935k
    }
3928
3929
954k
error:
3930
954k
    if (buf != NULL)
3931
19.3k
        xmlFree(buf);
3932
954k
    return(ret);
3933
935k
}
3934
3935
/**
3936
 * xmlParseAttValueComplex:
3937
 * @ctxt:  an XML parser context
3938
 * @len:   the resulting attribute len
3939
 * @normalize:  whether to apply the inner normalization
3940
 *
3941
 * parse a value for an attribute, this is the fallback function
3942
 * of xmlParseAttValue() when the attribute parsing requires handling
3943
 * of non-ASCII characters, or normalization compaction.
3944
 *
3945
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3946
 */
3947
static xmlChar *
3948
533k
xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3949
533k
    xmlChar limit = 0;
3950
533k
    xmlChar *buf = NULL;
3951
533k
    xmlChar *rep = NULL;
3952
533k
    size_t len = 0;
3953
533k
    size_t buf_size = 0;
3954
533k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3955
216k
                       XML_MAX_HUGE_LENGTH :
3956
533k
                       XML_MAX_TEXT_LENGTH;
3957
533k
    int c, l, in_space = 0;
3958
533k
    xmlChar *current = NULL;
3959
533k
    xmlEntityPtr ent;
3960
3961
533k
    if (NXT(0) == '"') {
3962
338k
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3963
338k
  limit = '"';
3964
338k
        NEXT;
3965
338k
    } else if (NXT(0) == '\'') {
3966
195k
  limit = '\'';
3967
195k
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3968
195k
        NEXT;
3969
195k
    } else {
3970
0
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3971
0
  return(NULL);
3972
0
    }
3973
3974
    /*
3975
     * allocate a translation buffer.
3976
     */
3977
533k
    buf_size = XML_PARSER_BUFFER_SIZE;
3978
533k
    buf = (xmlChar *) xmlMallocAtomic(buf_size);
3979
533k
    if (buf == NULL) goto mem_error;
3980
3981
    /*
3982
     * OK loop until we reach one of the ending char or a size limit.
3983
     */
3984
533k
    c = CUR_CHAR(l);
3985
27.1M
    while (((NXT(0) != limit) && /* checked */
3986
27.1M
            (IS_CHAR(c)) && (c != '<')) &&
3987
27.1M
            (ctxt->instate != XML_PARSER_EOF)) {
3988
26.5M
  if (c == '&') {
3989
3.59M
      in_space = 0;
3990
3.59M
      if (NXT(1) == '#') {
3991
214k
    int val = xmlParseCharRef(ctxt);
3992
3993
214k
    if (val == '&') {
3994
4.19k
        if (ctxt->replaceEntities) {
3995
2.10k
      if (len + 10 > buf_size) {
3996
254
          growBuffer(buf, 10);
3997
254
      }
3998
2.10k
      buf[len++] = '&';
3999
2.10k
        } else {
4000
      /*
4001
       * The reparsing will be done in xmlStringGetNodeList()
4002
       * called by the attribute() function in SAX.c
4003
       */
4004
2.09k
      if (len + 10 > buf_size) {
4005
260
          growBuffer(buf, 10);
4006
260
      }
4007
2.09k
      buf[len++] = '&';
4008
2.09k
      buf[len++] = '#';
4009
2.09k
      buf[len++] = '3';
4010
2.09k
      buf[len++] = '8';
4011
2.09k
      buf[len++] = ';';
4012
2.09k
        }
4013
210k
    } else if (val != 0) {
4014
186k
        if (len + 10 > buf_size) {
4015
2.40k
      growBuffer(buf, 10);
4016
2.40k
        }
4017
186k
        len += xmlCopyChar(0, &buf[len], val);
4018
186k
    }
4019
3.37M
      } else {
4020
3.37M
    ent = xmlParseEntityRef(ctxt);
4021
3.37M
    if ((ent != NULL) &&
4022
3.37M
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
4023
76.6k
        if (len + 10 > buf_size) {
4024
282
      growBuffer(buf, 10);
4025
282
        }
4026
76.6k
        if ((ctxt->replaceEntities == 0) &&
4027
76.6k
            (ent->content[0] == '&')) {
4028
22.1k
      buf[len++] = '&';
4029
22.1k
      buf[len++] = '#';
4030
22.1k
      buf[len++] = '3';
4031
22.1k
      buf[len++] = '8';
4032
22.1k
      buf[len++] = ';';
4033
54.5k
        } else {
4034
54.5k
      buf[len++] = ent->content[0];
4035
54.5k
        }
4036
3.30M
    } else if ((ent != NULL) &&
4037
3.30M
               (ctxt->replaceEntities != 0)) {
4038
1.83M
        if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4039
1.83M
                        if (xmlParserEntityCheck(ctxt, ent->length))
4040
0
                            goto error;
4041
4042
1.83M
      ++ctxt->depth;
4043
1.83M
      rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
4044
1.83M
                                ent->length, XML_SUBSTITUTE_REF, 0, 0, 0,
4045
1.83M
                                /* check */ 1);
4046
1.83M
      --ctxt->depth;
4047
1.83M
      if (rep != NULL) {
4048
1.81M
          current = rep;
4049
355M
          while (*current != 0) { /* non input consuming */
4050
353M
                                if ((*current == 0xD) || (*current == 0xA) ||
4051
353M
                                    (*current == 0x9)) {
4052
469k
                                    buf[len++] = 0x20;
4053
469k
                                    current++;
4054
469k
                                } else
4055
353M
                                    buf[len++] = *current++;
4056
353M
        if (len + 10 > buf_size) {
4057
45.7k
            growBuffer(buf, 10);
4058
45.7k
        }
4059
353M
          }
4060
1.81M
          xmlFree(rep);
4061
1.81M
          rep = NULL;
4062
1.81M
      }
4063
1.83M
        } else {
4064
0
      if (len + 10 > buf_size) {
4065
0
          growBuffer(buf, 10);
4066
0
      }
4067
0
      if (ent->content != NULL)
4068
0
          buf[len++] = ent->content[0];
4069
0
        }
4070
1.83M
    } else if (ent != NULL) {
4071
1.03M
        int i = xmlStrlen(ent->name);
4072
1.03M
        const xmlChar *cur = ent->name;
4073
4074
        /*
4075
                     * We also check for recursion and amplification
4076
                     * when entities are not substituted. They're
4077
                     * often expanded later.
4078
         */
4079
1.03M
        if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4080
1.03M
      (ent->content != NULL)) {
4081
1.00M
                        if ((ent->flags & XML_ENT_CHECKED) == 0) {
4082
17.4k
                            unsigned long oldCopy = ctxt->sizeentcopy;
4083
4084
17.4k
                            ctxt->sizeentcopy = ent->length;
4085
4086
17.4k
                            ++ctxt->depth;
4087
17.4k
                            rep = xmlStringDecodeEntitiesInt(ctxt,
4088
17.4k
                                    ent->content, ent->length,
4089
17.4k
                                    XML_SUBSTITUTE_REF, 0, 0, 0,
4090
17.4k
                                    /* check */ 1);
4091
17.4k
                            --ctxt->depth;
4092
4093
                            /*
4094
                             * If we're parsing DTD content, the entity
4095
                             * might reference other entities which
4096
                             * weren't defined yet, so the check isn't
4097
                             * reliable.
4098
                             */
4099
17.4k
                            if (ctxt->inSubset == 0) {
4100
12.1k
                                ent->flags |= XML_ENT_CHECKED;
4101
12.1k
                                ent->expandedSize = ctxt->sizeentcopy;
4102
12.1k
                            }
4103
4104
17.4k
                            if (rep != NULL) {
4105
17.2k
                                xmlFree(rep);
4106
17.2k
                                rep = NULL;
4107
17.2k
                            } else {
4108
259
                                ent->content[0] = 0;
4109
259
                            }
4110
4111
17.4k
                            if (xmlParserEntityCheck(ctxt, oldCopy))
4112
67
                                goto error;
4113
989k
                        } else {
4114
989k
                            if (xmlParserEntityCheck(ctxt, ent->expandedSize))
4115
0
                                goto error;
4116
989k
                        }
4117
1.00M
        }
4118
4119
        /*
4120
         * Just output the reference
4121
         */
4122
1.03M
        buf[len++] = '&';
4123
1.04M
        while (len + i + 10 > buf_size) {
4124
11.7k
      growBuffer(buf, i + 10);
4125
11.7k
        }
4126
2.45M
        for (;i > 0;i--)
4127
1.41M
      buf[len++] = *cur++;
4128
1.03M
        buf[len++] = ';';
4129
1.03M
    }
4130
3.37M
      }
4131
22.9M
  } else {
4132
22.9M
      if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4133
3.73M
          if ((len != 0) || (!normalize)) {
4134
3.55M
        if ((!normalize) || (!in_space)) {
4135
3.08M
      COPY_BUF(l,buf,len,0x20);
4136
3.09M
      while (len + 10 > buf_size) {
4137
15.8k
          growBuffer(buf, 10);
4138
15.8k
      }
4139
3.08M
        }
4140
3.55M
        in_space = 1;
4141
3.55M
    }
4142
19.2M
      } else {
4143
19.2M
          in_space = 0;
4144
19.2M
    COPY_BUF(l,buf,len,c);
4145
19.2M
    if (len + 10 > buf_size) {
4146
92.6k
        growBuffer(buf, 10);
4147
92.6k
    }
4148
19.2M
      }
4149
22.9M
      NEXTL(l);
4150
22.9M
  }
4151
26.5M
  GROW;
4152
26.5M
  c = CUR_CHAR(l);
4153
26.5M
        if (len > maxLength) {
4154
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4155
0
                           "AttValue length too long\n");
4156
0
            goto mem_error;
4157
0
        }
4158
26.5M
    }
4159
533k
    if (ctxt->instate == XML_PARSER_EOF)
4160
913
        goto error;
4161
4162
532k
    if ((in_space) && (normalize)) {
4163
35.9k
        while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4164
13.6k
    }
4165
532k
    buf[len] = 0;
4166
532k
    if (RAW == '<') {
4167
101k
  xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4168
431k
    } else if (RAW != limit) {
4169
79.8k
  if ((c != 0) && (!IS_CHAR(c))) {
4170
36.4k
      xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4171
36.4k
         "invalid character in attribute value\n");
4172
43.4k
  } else {
4173
43.4k
      xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4174
43.4k
         "AttValue: ' expected\n");
4175
43.4k
        }
4176
79.8k
    } else
4177
351k
  NEXT;
4178
4179
532k
    if (attlen != NULL) *attlen = len;
4180
532k
    return(buf);
4181
4182
0
mem_error:
4183
0
    xmlErrMemory(ctxt, NULL);
4184
980
error:
4185
980
    if (buf != NULL)
4186
980
        xmlFree(buf);
4187
980
    if (rep != NULL)
4188
0
        xmlFree(rep);
4189
980
    return(NULL);
4190
0
}
4191
4192
/**
4193
 * xmlParseAttValue:
4194
 * @ctxt:  an XML parser context
4195
 *
4196
 * DEPRECATED: Internal function, don't use.
4197
 *
4198
 * parse a value for an attribute
4199
 * Note: the parser won't do substitution of entities here, this
4200
 * will be handled later in xmlStringGetNodeList
4201
 *
4202
 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4203
 *                   "'" ([^<&'] | Reference)* "'"
4204
 *
4205
 * 3.3.3 Attribute-Value Normalization:
4206
 * Before the value of an attribute is passed to the application or
4207
 * checked for validity, the XML processor must normalize it as follows:
4208
 * - a character reference is processed by appending the referenced
4209
 *   character to the attribute value
4210
 * - an entity reference is processed by recursively processing the
4211
 *   replacement text of the entity
4212
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4213
 *   appending #x20 to the normalized value, except that only a single
4214
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4215
 *   parsed entity or the literal entity value of an internal parsed entity
4216
 * - other characters are processed by appending them to the normalized value
4217
 * If the declared value is not CDATA, then the XML processor must further
4218
 * process the normalized attribute value by discarding any leading and
4219
 * trailing space (#x20) characters, and by replacing sequences of space
4220
 * (#x20) characters by a single space (#x20) character.
4221
 * All attributes for which no declaration has been read should be treated
4222
 * by a non-validating parser as if declared CDATA.
4223
 *
4224
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4225
 */
4226
4227
4228
xmlChar *
4229
4.65M
xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4230
4.65M
    if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4231
4.65M
    return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4232
4.65M
}
4233
4234
/**
4235
 * xmlParseSystemLiteral:
4236
 * @ctxt:  an XML parser context
4237
 *
4238
 * DEPRECATED: Internal function, don't use.
4239
 *
4240
 * parse an XML Literal
4241
 *
4242
 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4243
 *
4244
 * Returns the SystemLiteral parsed or NULL
4245
 */
4246
4247
xmlChar *
4248
615k
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4249
615k
    xmlChar *buf = NULL;
4250
615k
    int len = 0;
4251
615k
    int size = XML_PARSER_BUFFER_SIZE;
4252
615k
    int cur, l;
4253
615k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4254
224k
                    XML_MAX_TEXT_LENGTH :
4255
615k
                    XML_MAX_NAME_LENGTH;
4256
615k
    xmlChar stop;
4257
615k
    int state = ctxt->instate;
4258
615k
    int count = 0;
4259
4260
615k
    SHRINK;
4261
615k
    if (RAW == '"') {
4262
396k
        NEXT;
4263
396k
  stop = '"';
4264
396k
    } else if (RAW == '\'') {
4265
204k
        NEXT;
4266
204k
  stop = '\'';
4267
204k
    } else {
4268
14.5k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4269
14.5k
  return(NULL);
4270
14.5k
    }
4271
4272
600k
    buf = (xmlChar *) xmlMallocAtomic(size);
4273
600k
    if (buf == NULL) {
4274
0
        xmlErrMemory(ctxt, NULL);
4275
0
  return(NULL);
4276
0
    }
4277
600k
    ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4278
600k
    cur = CUR_CHAR(l);
4279
24.4M
    while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4280
23.8M
  if (len + 5 >= size) {
4281
18.0k
      xmlChar *tmp;
4282
4283
18.0k
      size *= 2;
4284
18.0k
      tmp = (xmlChar *) xmlRealloc(buf, size);
4285
18.0k
      if (tmp == NULL) {
4286
0
          xmlFree(buf);
4287
0
    xmlErrMemory(ctxt, NULL);
4288
0
    ctxt->instate = (xmlParserInputState) state;
4289
0
    return(NULL);
4290
0
      }
4291
18.0k
      buf = tmp;
4292
18.0k
  }
4293
23.8M
  count++;
4294
23.8M
  if (count > 50) {
4295
355k
      SHRINK;
4296
355k
      GROW;
4297
355k
      count = 0;
4298
355k
            if (ctxt->instate == XML_PARSER_EOF) {
4299
0
          xmlFree(buf);
4300
0
    return(NULL);
4301
0
            }
4302
355k
  }
4303
23.8M
  COPY_BUF(l,buf,len,cur);
4304
23.8M
  NEXTL(l);
4305
23.8M
  cur = CUR_CHAR(l);
4306
23.8M
  if (cur == 0) {
4307
4.21k
      GROW;
4308
4.21k
      SHRINK;
4309
4.21k
      cur = CUR_CHAR(l);
4310
4.21k
  }
4311
23.8M
        if (len > maxLength) {
4312
142
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4313
142
            xmlFree(buf);
4314
142
            ctxt->instate = (xmlParserInputState) state;
4315
142
            return(NULL);
4316
142
        }
4317
23.8M
    }
4318
600k
    buf[len] = 0;
4319
600k
    ctxt->instate = (xmlParserInputState) state;
4320
600k
    if (!IS_CHAR(cur)) {
4321
6.17k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4322
594k
    } else {
4323
594k
  NEXT;
4324
594k
    }
4325
600k
    return(buf);
4326
600k
}
4327
4328
/**
4329
 * xmlParsePubidLiteral:
4330
 * @ctxt:  an XML parser context
4331
 *
4332
 * DEPRECATED: Internal function, don't use.
4333
 *
4334
 * parse an XML public literal
4335
 *
4336
 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4337
 *
4338
 * Returns the PubidLiteral parsed or NULL.
4339
 */
4340
4341
xmlChar *
4342
242k
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4343
242k
    xmlChar *buf = NULL;
4344
242k
    int len = 0;
4345
242k
    int size = XML_PARSER_BUFFER_SIZE;
4346
242k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4347
74.0k
                    XML_MAX_TEXT_LENGTH :
4348
242k
                    XML_MAX_NAME_LENGTH;
4349
242k
    xmlChar cur;
4350
242k
    xmlChar stop;
4351
242k
    int count = 0;
4352
242k
    xmlParserInputState oldstate = ctxt->instate;
4353
4354
242k
    SHRINK;
4355
242k
    if (RAW == '"') {
4356
53.2k
        NEXT;
4357
53.2k
  stop = '"';
4358
189k
    } else if (RAW == '\'') {
4359
186k
        NEXT;
4360
186k
  stop = '\'';
4361
186k
    } else {
4362
2.97k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4363
2.97k
  return(NULL);
4364
2.97k
    }
4365
239k
    buf = (xmlChar *) xmlMallocAtomic(size);
4366
239k
    if (buf == NULL) {
4367
0
  xmlErrMemory(ctxt, NULL);
4368
0
  return(NULL);
4369
0
    }
4370
239k
    ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4371
239k
    cur = CUR;
4372
5.27M
    while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4373
5.03M
  if (len + 1 >= size) {
4374
5.52k
      xmlChar *tmp;
4375
4376
5.52k
      size *= 2;
4377
5.52k
      tmp = (xmlChar *) xmlRealloc(buf, size);
4378
5.52k
      if (tmp == NULL) {
4379
0
    xmlErrMemory(ctxt, NULL);
4380
0
    xmlFree(buf);
4381
0
    return(NULL);
4382
0
      }
4383
5.52k
      buf = tmp;
4384
5.52k
  }
4385
5.03M
  buf[len++] = cur;
4386
5.03M
  count++;
4387
5.03M
  if (count > 50) {
4388
63.9k
      SHRINK;
4389
63.9k
      GROW;
4390
63.9k
      count = 0;
4391
63.9k
            if (ctxt->instate == XML_PARSER_EOF) {
4392
0
    xmlFree(buf);
4393
0
    return(NULL);
4394
0
            }
4395
63.9k
  }
4396
5.03M
  NEXT;
4397
5.03M
  cur = CUR;
4398
5.03M
  if (cur == 0) {
4399
907
      GROW;
4400
907
      SHRINK;
4401
907
      cur = CUR;
4402
907
  }
4403
5.03M
        if (len > maxLength) {
4404
15
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4405
15
            xmlFree(buf);
4406
15
            return(NULL);
4407
15
        }
4408
5.03M
    }
4409
239k
    buf[len] = 0;
4410
239k
    if (cur != stop) {
4411
13.2k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4412
226k
    } else {
4413
226k
  NEXT;
4414
226k
    }
4415
239k
    ctxt->instate = oldstate;
4416
239k
    return(buf);
4417
239k
}
4418
4419
static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt);
4420
4421
/*
4422
 * used for the test in the inner loop of the char data testing
4423
 */
4424
static const unsigned char test_char_data[256] = {
4425
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4426
    0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4427
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4428
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4429
    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4430
    0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4431
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4432
    0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4433
    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4434
    0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4435
    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4436
    0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4437
    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4438
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4439
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4440
    0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4441
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4442
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4443
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4444
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4445
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4446
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4447
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4448
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4449
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4450
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4451
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4452
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4453
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4454
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4455
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4456
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4457
};
4458
4459
/**
4460
 * xmlParseCharData:
4461
 * @ctxt:  an XML parser context
4462
 * @cdata:  unused
4463
 *
4464
 * DEPRECATED: Internal function, don't use.
4465
 *
4466
 * Parse character data. Always makes progress if the first char isn't
4467
 * '<' or '&'.
4468
 *
4469
 * if we are within a CDATA section ']]>' marks an end of section.
4470
 *
4471
 * The right angle bracket (>) may be represented using the string "&gt;",
4472
 * and must, for compatibility, be escaped using "&gt;" or a character
4473
 * reference when it appears in the string "]]>" in content, when that
4474
 * string is not marking the end of a CDATA section.
4475
 *
4476
 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4477
 */
4478
4479
void
4480
20.9M
xmlParseCharData(xmlParserCtxtPtr ctxt, ATTRIBUTE_UNUSED int cdata) {
4481
20.9M
    const xmlChar *in;
4482
20.9M
    int nbchar = 0;
4483
20.9M
    int line = ctxt->input->line;
4484
20.9M
    int col = ctxt->input->col;
4485
20.9M
    int ccol;
4486
4487
20.9M
    SHRINK;
4488
20.9M
    GROW;
4489
    /*
4490
     * Accelerated common case where input don't need to be
4491
     * modified before passing it to the handler.
4492
     */
4493
20.9M
    in = ctxt->input->cur;
4494
26.9M
    do {
4495
33.4M
get_more_space:
4496
44.9M
        while (*in == 0x20) { in++; ctxt->input->col++; }
4497
33.4M
        if (*in == 0xA) {
4498
6.82M
            do {
4499
6.82M
                ctxt->input->line++; ctxt->input->col = 1;
4500
6.82M
                in++;
4501
6.82M
            } while (*in == 0xA);
4502
6.57M
            goto get_more_space;
4503
6.57M
        }
4504
26.9M
        if (*in == '<') {
4505
5.42M
            nbchar = in - ctxt->input->cur;
4506
5.42M
            if (nbchar > 0) {
4507
5.42M
                const xmlChar *tmp = ctxt->input->cur;
4508
5.42M
                ctxt->input->cur = in;
4509
4510
5.42M
                if ((ctxt->sax != NULL) &&
4511
5.42M
                    (ctxt->sax->ignorableWhitespace !=
4512
5.42M
                     ctxt->sax->characters)) {
4513
2.29M
                    if (areBlanks(ctxt, tmp, nbchar, 1)) {
4514
1.74M
                        if (ctxt->sax->ignorableWhitespace != NULL)
4515
1.74M
                            ctxt->sax->ignorableWhitespace(ctxt->userData,
4516
1.74M
                                                   tmp, nbchar);
4517
1.74M
                    } else {
4518
546k
                        if (ctxt->sax->characters != NULL)
4519
546k
                            ctxt->sax->characters(ctxt->userData,
4520
546k
                                                  tmp, nbchar);
4521
546k
                        if (*ctxt->space == -1)
4522
121k
                            *ctxt->space = -2;
4523
546k
                    }
4524
3.13M
                } else if ((ctxt->sax != NULL) &&
4525
3.13M
                           (ctxt->sax->characters != NULL)) {
4526
3.13M
                    ctxt->sax->characters(ctxt->userData,
4527
3.13M
                                          tmp, nbchar);
4528
3.13M
                }
4529
5.42M
            }
4530
5.42M
            return;
4531
5.42M
        }
4532
4533
27.3M
get_more:
4534
27.3M
        ccol = ctxt->input->col;
4535
373M
        while (test_char_data[*in]) {
4536
346M
            in++;
4537
346M
            ccol++;
4538
346M
        }
4539
27.3M
        ctxt->input->col = ccol;
4540
27.3M
        if (*in == 0xA) {
4541
5.55M
            do {
4542
5.55M
                ctxt->input->line++; ctxt->input->col = 1;
4543
5.55M
                in++;
4544
5.55M
            } while (*in == 0xA);
4545
5.41M
            goto get_more;
4546
5.41M
        }
4547
21.9M
        if (*in == ']') {
4548
459k
            if ((in[1] == ']') && (in[2] == '>')) {
4549
12.6k
                xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4550
12.6k
                ctxt->input->cur = in + 1;
4551
12.6k
                return;
4552
12.6k
            }
4553
447k
            in++;
4554
447k
            ctxt->input->col++;
4555
447k
            goto get_more;
4556
459k
        }
4557
21.4M
        nbchar = in - ctxt->input->cur;
4558
21.4M
        if (nbchar > 0) {
4559
15.1M
            if ((ctxt->sax != NULL) &&
4560
15.1M
                (ctxt->sax->ignorableWhitespace !=
4561
15.1M
                 ctxt->sax->characters) &&
4562
15.1M
                (IS_BLANK_CH(*ctxt->input->cur))) {
4563
3.11M
                const xmlChar *tmp = ctxt->input->cur;
4564
3.11M
                ctxt->input->cur = in;
4565
4566
3.11M
                if (areBlanks(ctxt, tmp, nbchar, 0)) {
4567
1.10M
                    if (ctxt->sax->ignorableWhitespace != NULL)
4568
1.10M
                        ctxt->sax->ignorableWhitespace(ctxt->userData,
4569
1.10M
                                                       tmp, nbchar);
4570
2.01M
                } else {
4571
2.01M
                    if (ctxt->sax->characters != NULL)
4572
2.01M
                        ctxt->sax->characters(ctxt->userData,
4573
2.01M
                                              tmp, nbchar);
4574
2.01M
                    if (*ctxt->space == -1)
4575
411k
                        *ctxt->space = -2;
4576
2.01M
                }
4577
3.11M
                line = ctxt->input->line;
4578
3.11M
                col = ctxt->input->col;
4579
12.0M
            } else if (ctxt->sax != NULL) {
4580
12.0M
                if (ctxt->sax->characters != NULL)
4581
12.0M
                    ctxt->sax->characters(ctxt->userData,
4582
12.0M
                                          ctxt->input->cur, nbchar);
4583
12.0M
                line = ctxt->input->line;
4584
12.0M
                col = ctxt->input->col;
4585
12.0M
            }
4586
15.1M
        }
4587
21.4M
        ctxt->input->cur = in;
4588
21.4M
        if (*in == 0xD) {
4589
6.03M
            in++;
4590
6.03M
            if (*in == 0xA) {
4591
5.99M
                ctxt->input->cur = in;
4592
5.99M
                in++;
4593
5.99M
                ctxt->input->line++; ctxt->input->col = 1;
4594
5.99M
                continue; /* while */
4595
5.99M
            }
4596
37.5k
            in--;
4597
37.5k
        }
4598
15.4M
        if (*in == '<') {
4599
12.2M
            return;
4600
12.2M
        }
4601
3.18M
        if (*in == '&') {
4602
1.07M
            return;
4603
1.07M
        }
4604
2.10M
        SHRINK;
4605
2.10M
        GROW;
4606
2.10M
        if (ctxt->instate == XML_PARSER_EOF)
4607
0
            return;
4608
2.10M
        in = ctxt->input->cur;
4609
8.10M
    } while (((*in >= 0x20) && (*in <= 0x7F)) ||
4610
8.10M
             (*in == 0x09) || (*in == 0x0a));
4611
2.13M
    ctxt->input->line = line;
4612
2.13M
    ctxt->input->col = col;
4613
2.13M
    xmlParseCharDataComplex(ctxt);
4614
2.13M
}
4615
4616
/**
4617
 * xmlParseCharDataComplex:
4618
 * @ctxt:  an XML parser context
4619
 * @cdata:  int indicating whether we are within a CDATA section
4620
 *
4621
 * Always makes progress if the first char isn't '<' or '&'.
4622
 *
4623
 * parse a CharData section.this is the fallback function
4624
 * of xmlParseCharData() when the parsing requires handling
4625
 * of non-ASCII characters.
4626
 */
4627
static void
4628
2.13M
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt) {
4629
2.13M
    xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4630
2.13M
    int nbchar = 0;
4631
2.13M
    int cur, l;
4632
2.13M
    int count = 0;
4633
4634
2.13M
    SHRINK;
4635
2.13M
    GROW;
4636
2.13M
    cur = CUR_CHAR(l);
4637
39.3M
    while ((cur != '<') && /* checked */
4638
39.3M
           (cur != '&') &&
4639
39.3M
     (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4640
37.1M
  if ((cur == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4641
9.46k
      xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4642
9.46k
  }
4643
37.1M
  COPY_BUF(l,buf,nbchar,cur);
4644
  /* move current position before possible calling of ctxt->sax->characters */
4645
37.1M
  NEXTL(l);
4646
37.1M
  cur = CUR_CHAR(l);
4647
37.1M
  if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4648
85.6k
      buf[nbchar] = 0;
4649
4650
      /*
4651
       * OK the segment is to be consumed as chars.
4652
       */
4653
85.6k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4654
66.4k
    if (areBlanks(ctxt, buf, nbchar, 0)) {
4655
319
        if (ctxt->sax->ignorableWhitespace != NULL)
4656
319
      ctxt->sax->ignorableWhitespace(ctxt->userData,
4657
319
                                     buf, nbchar);
4658
66.1k
    } else {
4659
66.1k
        if (ctxt->sax->characters != NULL)
4660
66.1k
      ctxt->sax->characters(ctxt->userData, buf, nbchar);
4661
66.1k
        if ((ctxt->sax->characters !=
4662
66.1k
             ctxt->sax->ignorableWhitespace) &&
4663
66.1k
      (*ctxt->space == -1))
4664
2.33k
      *ctxt->space = -2;
4665
66.1k
    }
4666
66.4k
      }
4667
85.6k
      nbchar = 0;
4668
            /* something really bad happened in the SAX callback */
4669
85.6k
            if (ctxt->instate != XML_PARSER_CONTENT)
4670
0
                return;
4671
85.6k
  }
4672
37.1M
  count++;
4673
37.1M
  if (count > 50) {
4674
492k
      SHRINK;
4675
492k
      GROW;
4676
492k
      count = 0;
4677
492k
            if (ctxt->instate == XML_PARSER_EOF)
4678
0
    return;
4679
492k
  }
4680
37.1M
    }
4681
2.13M
    if (nbchar != 0) {
4682
1.13M
        buf[nbchar] = 0;
4683
  /*
4684
   * OK the segment is to be consumed as chars.
4685
   */
4686
1.13M
  if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4687
992k
      if (areBlanks(ctxt, buf, nbchar, 0)) {
4688
1.94k
    if (ctxt->sax->ignorableWhitespace != NULL)
4689
1.94k
        ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4690
990k
      } else {
4691
990k
    if (ctxt->sax->characters != NULL)
4692
990k
        ctxt->sax->characters(ctxt->userData, buf, nbchar);
4693
990k
    if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4694
990k
        (*ctxt->space == -1))
4695
85.7k
        *ctxt->space = -2;
4696
990k
      }
4697
992k
  }
4698
1.13M
    }
4699
2.13M
    if ((ctxt->input->cur < ctxt->input->end) && (!IS_CHAR(cur))) {
4700
  /* Generate the error and skip the offending character */
4701
1.47M
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4702
1.47M
                          "PCDATA invalid Char value %d\n",
4703
1.47M
                    cur ? cur : CUR);
4704
1.47M
  NEXT;
4705
1.47M
    }
4706
2.13M
}
4707
4708
/**
4709
 * xmlParseExternalID:
4710
 * @ctxt:  an XML parser context
4711
 * @publicID:  a xmlChar** receiving PubidLiteral
4712
 * @strict: indicate whether we should restrict parsing to only
4713
 *          production [75], see NOTE below
4714
 *
4715
 * DEPRECATED: Internal function, don't use.
4716
 *
4717
 * Parse an External ID or a Public ID
4718
 *
4719
 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4720
 *       'PUBLIC' S PubidLiteral S SystemLiteral
4721
 *
4722
 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4723
 *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
4724
 *
4725
 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4726
 *
4727
 * Returns the function returns SystemLiteral and in the second
4728
 *                case publicID receives PubidLiteral, is strict is off
4729
 *                it is possible to return NULL and have publicID set.
4730
 */
4731
4732
xmlChar *
4733
864k
xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4734
864k
    xmlChar *URI = NULL;
4735
4736
864k
    SHRINK;
4737
4738
864k
    *publicID = NULL;
4739
864k
    if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4740
392k
        SKIP(6);
4741
392k
  if (SKIP_BLANKS == 0) {
4742
949
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4743
949
                     "Space required after 'SYSTEM'\n");
4744
949
  }
4745
392k
  URI = xmlParseSystemLiteral(ctxt);
4746
392k
  if (URI == NULL) {
4747
2.77k
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4748
2.77k
        }
4749
472k
    } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4750
242k
        SKIP(6);
4751
242k
  if (SKIP_BLANKS == 0) {
4752
2.06k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4753
2.06k
        "Space required after 'PUBLIC'\n");
4754
2.06k
  }
4755
242k
  *publicID = xmlParsePubidLiteral(ctxt);
4756
242k
  if (*publicID == NULL) {
4757
2.99k
      xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4758
2.99k
  }
4759
242k
  if (strict) {
4760
      /*
4761
       * We don't handle [83] so "S SystemLiteral" is required.
4762
       */
4763
221k
      if (SKIP_BLANKS == 0) {
4764
10.0k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4765
10.0k
      "Space required after the Public Identifier\n");
4766
10.0k
      }
4767
221k
  } else {
4768
      /*
4769
       * We handle [83] so we return immediately, if
4770
       * "S SystemLiteral" is not detected. We skip blanks if no
4771
             * system literal was found, but this is harmless since we must
4772
             * be at the end of a NotationDecl.
4773
       */
4774
20.8k
      if (SKIP_BLANKS == 0) return(NULL);
4775
2.03k
      if ((CUR != '\'') && (CUR != '"')) return(NULL);
4776
2.03k
  }
4777
222k
  URI = xmlParseSystemLiteral(ctxt);
4778
222k
  if (URI == NULL) {
4779
11.8k
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4780
11.8k
        }
4781
222k
    }
4782
844k
    return(URI);
4783
864k
}
4784
4785
/**
4786
 * xmlParseCommentComplex:
4787
 * @ctxt:  an XML parser context
4788
 * @buf:  the already parsed part of the buffer
4789
 * @len:  number of bytes in the buffer
4790
 * @size:  allocated size of the buffer
4791
 *
4792
 * Skip an XML (SGML) comment <!-- .... -->
4793
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4794
 *  must not occur within comments. "
4795
 * This is the slow routine in case the accelerator for ascii didn't work
4796
 *
4797
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4798
 */
4799
static void
4800
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4801
1.46M
                       size_t len, size_t size) {
4802
1.46M
    int q, ql;
4803
1.46M
    int r, rl;
4804
1.46M
    int cur, l;
4805
1.46M
    size_t count = 0;
4806
1.46M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4807
113k
                       XML_MAX_HUGE_LENGTH :
4808
1.46M
                       XML_MAX_TEXT_LENGTH;
4809
1.46M
    int inputid;
4810
4811
1.46M
    inputid = ctxt->input->id;
4812
4813
1.46M
    if (buf == NULL) {
4814
9.75k
        len = 0;
4815
9.75k
  size = XML_PARSER_BUFFER_SIZE;
4816
9.75k
  buf = (xmlChar *) xmlMallocAtomic(size);
4817
9.75k
  if (buf == NULL) {
4818
0
      xmlErrMemory(ctxt, NULL);
4819
0
      return;
4820
0
  }
4821
9.75k
    }
4822
1.46M
    GROW; /* Assure there's enough input data */
4823
1.46M
    q = CUR_CHAR(ql);
4824
1.46M
    if (q == 0)
4825
1.36M
        goto not_terminated;
4826
99.5k
    if (!IS_CHAR(q)) {
4827
9.46k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4828
9.46k
                          "xmlParseComment: invalid xmlChar value %d\n",
4829
9.46k
                    q);
4830
9.46k
  xmlFree (buf);
4831
9.46k
  return;
4832
9.46k
    }
4833
90.1k
    NEXTL(ql);
4834
90.1k
    r = CUR_CHAR(rl);
4835
90.1k
    if (r == 0)
4836
1.97k
        goto not_terminated;
4837
88.1k
    if (!IS_CHAR(r)) {
4838
1.85k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4839
1.85k
                          "xmlParseComment: invalid xmlChar value %d\n",
4840
1.85k
                    r);
4841
1.85k
  xmlFree (buf);
4842
1.85k
  return;
4843
1.85k
    }
4844
86.2k
    NEXTL(rl);
4845
86.2k
    cur = CUR_CHAR(l);
4846
86.2k
    if (cur == 0)
4847
1.61k
        goto not_terminated;
4848
40.0M
    while (IS_CHAR(cur) && /* checked */
4849
40.0M
           ((cur != '>') ||
4850
40.0M
      (r != '-') || (q != '-'))) {
4851
40.0M
  if ((r == '-') && (q == '-')) {
4852
1.58M
      xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4853
1.58M
  }
4854
40.0M
  if (len + 5 >= size) {
4855
47.8k
      xmlChar *new_buf;
4856
47.8k
            size_t new_size;
4857
4858
47.8k
      new_size = size * 2;
4859
47.8k
      new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4860
47.8k
      if (new_buf == NULL) {
4861
0
    xmlFree (buf);
4862
0
    xmlErrMemory(ctxt, NULL);
4863
0
    return;
4864
0
      }
4865
47.8k
      buf = new_buf;
4866
47.8k
            size = new_size;
4867
47.8k
  }
4868
40.0M
  COPY_BUF(ql,buf,len,q);
4869
40.0M
  q = r;
4870
40.0M
  ql = rl;
4871
40.0M
  r = cur;
4872
40.0M
  rl = l;
4873
4874
40.0M
  count++;
4875
40.0M
  if (count > 50) {
4876
758k
      SHRINK;
4877
758k
      GROW;
4878
758k
      count = 0;
4879
758k
            if (ctxt->instate == XML_PARSER_EOF) {
4880
0
    xmlFree(buf);
4881
0
    return;
4882
0
            }
4883
758k
  }
4884
40.0M
  NEXTL(l);
4885
40.0M
  cur = CUR_CHAR(l);
4886
40.0M
  if (cur == 0) {
4887
24.9k
      SHRINK;
4888
24.9k
      GROW;
4889
24.9k
      cur = CUR_CHAR(l);
4890
24.9k
  }
4891
4892
40.0M
        if (len > maxLength) {
4893
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4894
0
                         "Comment too big found", NULL);
4895
0
            xmlFree (buf);
4896
0
            return;
4897
0
        }
4898
40.0M
    }
4899
84.6k
    buf[len] = 0;
4900
84.6k
    if (cur == 0) {
4901
24.9k
  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4902
24.9k
                       "Comment not terminated \n<!--%.50s\n", buf);
4903
59.7k
    } else if (!IS_CHAR(cur)) {
4904
5.65k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4905
5.65k
                          "xmlParseComment: invalid xmlChar value %d\n",
4906
5.65k
                    cur);
4907
54.0k
    } else {
4908
54.0k
  if (inputid != ctxt->input->id) {
4909
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4910
0
               "Comment doesn't start and stop in the same"
4911
0
                           " entity\n");
4912
0
  }
4913
54.0k
        NEXT;
4914
54.0k
  if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4915
54.0k
      (!ctxt->disableSAX))
4916
41.6k
      ctxt->sax->comment(ctxt->userData, buf);
4917
54.0k
    }
4918
84.6k
    xmlFree(buf);
4919
84.6k
    return;
4920
1.36M
not_terminated:
4921
1.36M
    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4922
1.36M
       "Comment not terminated\n", NULL);
4923
1.36M
    xmlFree(buf);
4924
1.36M
    return;
4925
84.6k
}
4926
4927
/**
4928
 * xmlParseComment:
4929
 * @ctxt:  an XML parser context
4930
 *
4931
 * DEPRECATED: Internal function, don't use.
4932
 *
4933
 * Parse an XML (SGML) comment. Always consumes '<!'.
4934
 *
4935
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4936
 *  must not occur within comments. "
4937
 *
4938
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4939
 */
4940
void
4941
53.9M
xmlParseComment(xmlParserCtxtPtr ctxt) {
4942
53.9M
    xmlChar *buf = NULL;
4943
53.9M
    size_t size = XML_PARSER_BUFFER_SIZE;
4944
53.9M
    size_t len = 0;
4945
53.9M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4946
6.93M
                       XML_MAX_HUGE_LENGTH :
4947
53.9M
                       XML_MAX_TEXT_LENGTH;
4948
53.9M
    xmlParserInputState state;
4949
53.9M
    const xmlChar *in;
4950
53.9M
    size_t nbchar = 0;
4951
53.9M
    int ccol;
4952
53.9M
    int inputid;
4953
4954
    /*
4955
     * Check that there is a comment right here.
4956
     */
4957
53.9M
    if ((RAW != '<') || (NXT(1) != '!'))
4958
0
        return;
4959
53.9M
    SKIP(2);
4960
53.9M
    if ((RAW != '-') || (NXT(1) != '-'))
4961
248
        return;
4962
53.9M
    state = ctxt->instate;
4963
53.9M
    ctxt->instate = XML_PARSER_COMMENT;
4964
53.9M
    inputid = ctxt->input->id;
4965
53.9M
    SKIP(2);
4966
53.9M
    SHRINK;
4967
53.9M
    GROW;
4968
4969
    /*
4970
     * Accelerated common case where input don't need to be
4971
     * modified before passing it to the handler.
4972
     */
4973
53.9M
    in = ctxt->input->cur;
4974
53.9M
    do {
4975
53.9M
  if (*in == 0xA) {
4976
199k
      do {
4977
199k
    ctxt->input->line++; ctxt->input->col = 1;
4978
199k
    in++;
4979
199k
      } while (*in == 0xA);
4980
193k
  }
4981
64.2M
get_more:
4982
64.2M
        ccol = ctxt->input->col;
4983
294M
  while (((*in > '-') && (*in <= 0x7F)) ||
4984
294M
         ((*in >= 0x20) && (*in < '-')) ||
4985
294M
         (*in == 0x09)) {
4986
230M
        in++;
4987
230M
        ccol++;
4988
230M
  }
4989
64.2M
  ctxt->input->col = ccol;
4990
64.2M
  if (*in == 0xA) {
4991
2.21M
      do {
4992
2.21M
    ctxt->input->line++; ctxt->input->col = 1;
4993
2.21M
    in++;
4994
2.21M
      } while (*in == 0xA);
4995
2.14M
      goto get_more;
4996
2.14M
  }
4997
62.0M
  nbchar = in - ctxt->input->cur;
4998
  /*
4999
   * save current set of data
5000
   */
5001
62.0M
  if (nbchar > 0) {
5002
12.9M
      if ((ctxt->sax != NULL) &&
5003
12.9M
    (ctxt->sax->comment != NULL)) {
5004
12.9M
    if (buf == NULL) {
5005
5.80M
        if ((*in == '-') && (in[1] == '-'))
5006
3.94M
            size = nbchar + 1;
5007
1.86M
        else
5008
1.86M
            size = XML_PARSER_BUFFER_SIZE + nbchar;
5009
5.80M
        buf = (xmlChar *) xmlMallocAtomic(size);
5010
5.80M
        if (buf == NULL) {
5011
0
            xmlErrMemory(ctxt, NULL);
5012
0
      ctxt->instate = state;
5013
0
      return;
5014
0
        }
5015
5.80M
        len = 0;
5016
7.09M
    } else if (len + nbchar + 1 >= size) {
5017
446k
        xmlChar *new_buf;
5018
446k
        size  += len + nbchar + XML_PARSER_BUFFER_SIZE;
5019
446k
        new_buf = (xmlChar *) xmlRealloc(buf, size);
5020
446k
        if (new_buf == NULL) {
5021
0
            xmlFree (buf);
5022
0
      xmlErrMemory(ctxt, NULL);
5023
0
      ctxt->instate = state;
5024
0
      return;
5025
0
        }
5026
446k
        buf = new_buf;
5027
446k
    }
5028
12.9M
    memcpy(&buf[len], ctxt->input->cur, nbchar);
5029
12.9M
    len += nbchar;
5030
12.9M
    buf[len] = 0;
5031
12.9M
      }
5032
12.9M
  }
5033
62.0M
        if (len > maxLength) {
5034
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5035
0
                         "Comment too big found", NULL);
5036
0
            xmlFree (buf);
5037
0
            return;
5038
0
        }
5039
62.0M
  ctxt->input->cur = in;
5040
62.0M
  if (*in == 0xA) {
5041
0
      in++;
5042
0
      ctxt->input->line++; ctxt->input->col = 1;
5043
0
  }
5044
62.0M
  if (*in == 0xD) {
5045
2.29M
      in++;
5046
2.29M
      if (*in == 0xA) {
5047
2.28M
    ctxt->input->cur = in;
5048
2.28M
    in++;
5049
2.28M
    ctxt->input->line++; ctxt->input->col = 1;
5050
2.28M
    goto get_more;
5051
2.28M
      }
5052
7.45k
      in--;
5053
7.45k
  }
5054
59.8M
  SHRINK;
5055
59.8M
  GROW;
5056
59.8M
        if (ctxt->instate == XML_PARSER_EOF) {
5057
0
            xmlFree(buf);
5058
0
            return;
5059
0
        }
5060
59.8M
  in = ctxt->input->cur;
5061
59.8M
  if (*in == '-') {
5062
58.3M
      if (in[1] == '-') {
5063
55.1M
          if (in[2] == '>') {
5064
52.4M
        if (ctxt->input->id != inputid) {
5065
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5066
0
                     "comment doesn't start and stop in the"
5067
0
                                       " same entity\n");
5068
0
        }
5069
52.4M
        SKIP(3);
5070
52.4M
        if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5071
52.4M
            (!ctxt->disableSAX)) {
5072
38.3M
      if (buf != NULL)
5073
1.45M
          ctxt->sax->comment(ctxt->userData, buf);
5074
36.8M
      else
5075
36.8M
          ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5076
38.3M
        }
5077
52.4M
        if (buf != NULL)
5078
4.35M
            xmlFree(buf);
5079
52.4M
        if (ctxt->instate != XML_PARSER_EOF)
5080
52.4M
      ctxt->instate = state;
5081
52.4M
        return;
5082
52.4M
    }
5083
2.63M
    if (buf != NULL) {
5084
1.66M
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5085
1.66M
                          "Double hyphen within comment: "
5086
1.66M
                                      "<!--%.50s\n",
5087
1.66M
              buf);
5088
1.66M
    } else
5089
966k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5090
966k
                          "Double hyphen within comment\n", NULL);
5091
2.63M
                if (ctxt->instate == XML_PARSER_EOF) {
5092
0
                    xmlFree(buf);
5093
0
                    return;
5094
0
                }
5095
2.63M
    in++;
5096
2.63M
    ctxt->input->col++;
5097
2.63M
      }
5098
5.84M
      in++;
5099
5.84M
      ctxt->input->col++;
5100
5.84M
      goto get_more;
5101
58.3M
  }
5102
59.8M
    } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5103
1.46M
    xmlParseCommentComplex(ctxt, buf, len, size);
5104
1.46M
    ctxt->instate = state;
5105
1.46M
    return;
5106
53.9M
}
5107
5108
5109
/**
5110
 * xmlParsePITarget:
5111
 * @ctxt:  an XML parser context
5112
 *
5113
 * DEPRECATED: Internal function, don't use.
5114
 *
5115
 * parse the name of a PI
5116
 *
5117
 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5118
 *
5119
 * Returns the PITarget name or NULL
5120
 */
5121
5122
const xmlChar *
5123
241k
xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5124
241k
    const xmlChar *name;
5125
5126
241k
    name = xmlParseName(ctxt);
5127
241k
    if ((name != NULL) &&
5128
241k
        ((name[0] == 'x') || (name[0] == 'X')) &&
5129
241k
        ((name[1] == 'm') || (name[1] == 'M')) &&
5130
241k
        ((name[2] == 'l') || (name[2] == 'L'))) {
5131
61.0k
  int i;
5132
61.0k
  if ((name[0] == 'x') && (name[1] == 'm') &&
5133
61.0k
      (name[2] == 'l') && (name[3] == 0)) {
5134
15.4k
      xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5135
15.4k
     "XML declaration allowed only at the start of the document\n");
5136
15.4k
      return(name);
5137
45.5k
  } else if (name[3] == 0) {
5138
5.74k
      xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5139
5.74k
      return(name);
5140
5.74k
  }
5141
81.8k
  for (i = 0;;i++) {
5142
81.8k
      if (xmlW3CPIs[i] == NULL) break;
5143
60.8k
      if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5144
18.7k
          return(name);
5145
60.8k
  }
5146
21.0k
  xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5147
21.0k
          "xmlParsePITarget: invalid name prefix 'xml'\n",
5148
21.0k
          NULL, NULL);
5149
21.0k
    }
5150
201k
    if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5151
4.32k
  xmlNsErr(ctxt, XML_NS_ERR_COLON,
5152
4.32k
     "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5153
4.32k
    }
5154
201k
    return(name);
5155
241k
}
5156
5157
#ifdef LIBXML_CATALOG_ENABLED
5158
/**
5159
 * xmlParseCatalogPI:
5160
 * @ctxt:  an XML parser context
5161
 * @catalog:  the PI value string
5162
 *
5163
 * parse an XML Catalog Processing Instruction.
5164
 *
5165
 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5166
 *
5167
 * Occurs only if allowed by the user and if happening in the Misc
5168
 * part of the document before any doctype information
5169
 * This will add the given catalog to the parsing context in order
5170
 * to be used if there is a resolution need further down in the document
5171
 */
5172
5173
static void
5174
0
xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5175
0
    xmlChar *URL = NULL;
5176
0
    const xmlChar *tmp, *base;
5177
0
    xmlChar marker;
5178
5179
0
    tmp = catalog;
5180
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5181
0
    if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5182
0
  goto error;
5183
0
    tmp += 7;
5184
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5185
0
    if (*tmp != '=') {
5186
0
  return;
5187
0
    }
5188
0
    tmp++;
5189
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5190
0
    marker = *tmp;
5191
0
    if ((marker != '\'') && (marker != '"'))
5192
0
  goto error;
5193
0
    tmp++;
5194
0
    base = tmp;
5195
0
    while ((*tmp != 0) && (*tmp != marker)) tmp++;
5196
0
    if (*tmp == 0)
5197
0
  goto error;
5198
0
    URL = xmlStrndup(base, tmp - base);
5199
0
    tmp++;
5200
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5201
0
    if (*tmp != 0)
5202
0
  goto error;
5203
5204
0
    if (URL != NULL) {
5205
0
  ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5206
0
  xmlFree(URL);
5207
0
    }
5208
0
    return;
5209
5210
0
error:
5211
0
    xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5212
0
            "Catalog PI syntax error: %s\n",
5213
0
      catalog, NULL);
5214
0
    if (URL != NULL)
5215
0
  xmlFree(URL);
5216
0
}
5217
#endif
5218
5219
/**
5220
 * xmlParsePI:
5221
 * @ctxt:  an XML parser context
5222
 *
5223
 * DEPRECATED: Internal function, don't use.
5224
 *
5225
 * parse an XML Processing Instruction.
5226
 *
5227
 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5228
 *
5229
 * The processing is transferred to SAX once parsed.
5230
 */
5231
5232
void
5233
241k
xmlParsePI(xmlParserCtxtPtr ctxt) {
5234
241k
    xmlChar *buf = NULL;
5235
241k
    size_t len = 0;
5236
241k
    size_t size = XML_PARSER_BUFFER_SIZE;
5237
241k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5238
98.6k
                       XML_MAX_HUGE_LENGTH :
5239
241k
                       XML_MAX_TEXT_LENGTH;
5240
241k
    int cur, l;
5241
241k
    const xmlChar *target;
5242
241k
    xmlParserInputState state;
5243
241k
    int count = 0;
5244
5245
241k
    if ((RAW == '<') && (NXT(1) == '?')) {
5246
241k
  int inputid = ctxt->input->id;
5247
241k
  state = ctxt->instate;
5248
241k
        ctxt->instate = XML_PARSER_PI;
5249
  /*
5250
   * this is a Processing Instruction.
5251
   */
5252
241k
  SKIP(2);
5253
241k
  SHRINK;
5254
5255
  /*
5256
   * Parse the target name and check for special support like
5257
   * namespace.
5258
   */
5259
241k
        target = xmlParsePITarget(ctxt);
5260
241k
  if (target != NULL) {
5261
224k
      if ((RAW == '?') && (NXT(1) == '>')) {
5262
39.6k
    if (inputid != ctxt->input->id) {
5263
0
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5264
0
                             "PI declaration doesn't start and stop in"
5265
0
                                   " the same entity\n");
5266
0
    }
5267
39.6k
    SKIP(2);
5268
5269
    /*
5270
     * SAX: PI detected.
5271
     */
5272
39.6k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5273
39.6k
        (ctxt->sax->processingInstruction != NULL))
5274
27.4k
        ctxt->sax->processingInstruction(ctxt->userData,
5275
27.4k
                                         target, NULL);
5276
39.6k
    if (ctxt->instate != XML_PARSER_EOF)
5277
39.6k
        ctxt->instate = state;
5278
39.6k
    return;
5279
39.6k
      }
5280
185k
      buf = (xmlChar *) xmlMallocAtomic(size);
5281
185k
      if (buf == NULL) {
5282
0
    xmlErrMemory(ctxt, NULL);
5283
0
    ctxt->instate = state;
5284
0
    return;
5285
0
      }
5286
185k
      if (SKIP_BLANKS == 0) {
5287
47.9k
    xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5288
47.9k
        "ParsePI: PI %s space expected\n", target);
5289
47.9k
      }
5290
185k
      cur = CUR_CHAR(l);
5291
45.7M
      while (IS_CHAR(cur) && /* checked */
5292
45.7M
       ((cur != '?') || (NXT(1) != '>'))) {
5293
45.5M
    if (len + 5 >= size) {
5294
53.2k
        xmlChar *tmp;
5295
53.2k
                    size_t new_size = size * 2;
5296
53.2k
        tmp = (xmlChar *) xmlRealloc(buf, new_size);
5297
53.2k
        if (tmp == NULL) {
5298
0
      xmlErrMemory(ctxt, NULL);
5299
0
      xmlFree(buf);
5300
0
      ctxt->instate = state;
5301
0
      return;
5302
0
        }
5303
53.2k
        buf = tmp;
5304
53.2k
                    size = new_size;
5305
53.2k
    }
5306
45.5M
    count++;
5307
45.5M
    if (count > 50) {
5308
836k
        SHRINK;
5309
836k
        GROW;
5310
836k
                    if (ctxt->instate == XML_PARSER_EOF) {
5311
0
                        xmlFree(buf);
5312
0
                        return;
5313
0
                    }
5314
836k
        count = 0;
5315
836k
    }
5316
45.5M
    COPY_BUF(l,buf,len,cur);
5317
45.5M
    NEXTL(l);
5318
45.5M
    cur = CUR_CHAR(l);
5319
45.5M
    if (cur == 0) {
5320
29.1k
        SHRINK;
5321
29.1k
        GROW;
5322
29.1k
        cur = CUR_CHAR(l);
5323
29.1k
    }
5324
45.5M
                if (len > maxLength) {
5325
0
                    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5326
0
                                      "PI %s too big found", target);
5327
0
                    xmlFree(buf);
5328
0
                    ctxt->instate = state;
5329
0
                    return;
5330
0
                }
5331
45.5M
      }
5332
185k
      buf[len] = 0;
5333
185k
      if (cur != '?') {
5334
38.9k
    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5335
38.9k
          "ParsePI: PI %s never end ...\n", target);
5336
146k
      } else {
5337
146k
    if (inputid != ctxt->input->id) {
5338
659
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5339
659
                             "PI declaration doesn't start and stop in"
5340
659
                                   " the same entity\n");
5341
659
    }
5342
146k
    SKIP(2);
5343
5344
146k
#ifdef LIBXML_CATALOG_ENABLED
5345
146k
    if (((state == XML_PARSER_MISC) ||
5346
146k
               (state == XML_PARSER_START)) &&
5347
146k
        (xmlStrEqual(target, XML_CATALOG_PI))) {
5348
0
        xmlCatalogAllow allow = xmlCatalogGetDefaults();
5349
0
        if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5350
0
      (allow == XML_CATA_ALLOW_ALL))
5351
0
      xmlParseCatalogPI(ctxt, buf);
5352
0
    }
5353
146k
#endif
5354
5355
5356
    /*
5357
     * SAX: PI detected.
5358
     */
5359
146k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5360
146k
        (ctxt->sax->processingInstruction != NULL))
5361
115k
        ctxt->sax->processingInstruction(ctxt->userData,
5362
115k
                                         target, buf);
5363
146k
      }
5364
185k
      xmlFree(buf);
5365
185k
  } else {
5366
16.6k
      xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5367
16.6k
  }
5368
201k
  if (ctxt->instate != XML_PARSER_EOF)
5369
201k
      ctxt->instate = state;
5370
201k
    }
5371
241k
}
5372
5373
/**
5374
 * xmlParseNotationDecl:
5375
 * @ctxt:  an XML parser context
5376
 *
5377
 * DEPRECATED: Internal function, don't use.
5378
 *
5379
 * Parse a notation declaration. Always consumes '<!'.
5380
 *
5381
 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>'
5382
 *
5383
 * Hence there is actually 3 choices:
5384
 *     'PUBLIC' S PubidLiteral
5385
 *     'PUBLIC' S PubidLiteral S SystemLiteral
5386
 * and 'SYSTEM' S SystemLiteral
5387
 *
5388
 * See the NOTE on xmlParseExternalID().
5389
 */
5390
5391
void
5392
48.9k
xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5393
48.9k
    const xmlChar *name;
5394
48.9k
    xmlChar *Pubid;
5395
48.9k
    xmlChar *Systemid;
5396
5397
48.9k
    if ((CUR != '<') || (NXT(1) != '!'))
5398
0
        return;
5399
48.9k
    SKIP(2);
5400
5401
48.9k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5402
48.1k
  int inputid = ctxt->input->id;
5403
48.1k
  SHRINK;
5404
48.1k
  SKIP(8);
5405
48.1k
  if (SKIP_BLANKS == 0) {
5406
1.32k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5407
1.32k
         "Space required after '<!NOTATION'\n");
5408
1.32k
      return;
5409
1.32k
  }
5410
5411
46.8k
        name = xmlParseName(ctxt);
5412
46.8k
  if (name == NULL) {
5413
3.30k
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5414
3.30k
      return;
5415
3.30k
  }
5416
43.5k
  if (xmlStrchr(name, ':') != NULL) {
5417
2.87k
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5418
2.87k
         "colons are forbidden from notation names '%s'\n",
5419
2.87k
         name, NULL, NULL);
5420
2.87k
  }
5421
43.5k
  if (SKIP_BLANKS == 0) {
5422
1.93k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5423
1.93k
         "Space required after the NOTATION name'\n");
5424
1.93k
      return;
5425
1.93k
  }
5426
5427
  /*
5428
   * Parse the IDs.
5429
   */
5430
41.6k
  Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5431
41.6k
  SKIP_BLANKS;
5432
5433
41.6k
  if (RAW == '>') {
5434
25.7k
      if (inputid != ctxt->input->id) {
5435
101
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5436
101
                         "Notation declaration doesn't start and stop"
5437
101
                               " in the same entity\n");
5438
101
      }
5439
25.7k
      NEXT;
5440
25.7k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5441
25.7k
    (ctxt->sax->notationDecl != NULL))
5442
18.4k
    ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5443
25.7k
  } else {
5444
15.8k
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5445
15.8k
  }
5446
41.6k
  if (Systemid != NULL) xmlFree(Systemid);
5447
41.6k
  if (Pubid != NULL) xmlFree(Pubid);
5448
41.6k
    }
5449
48.9k
}
5450
5451
/**
5452
 * xmlParseEntityDecl:
5453
 * @ctxt:  an XML parser context
5454
 *
5455
 * DEPRECATED: Internal function, don't use.
5456
 *
5457
 * Parse an entity declaration. Always consumes '<!'.
5458
 *
5459
 * [70] EntityDecl ::= GEDecl | PEDecl
5460
 *
5461
 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5462
 *
5463
 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5464
 *
5465
 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5466
 *
5467
 * [74] PEDef ::= EntityValue | ExternalID
5468
 *
5469
 * [76] NDataDecl ::= S 'NDATA' S Name
5470
 *
5471
 * [ VC: Notation Declared ]
5472
 * The Name must match the declared name of a notation.
5473
 */
5474
5475
void
5476
1.42M
xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5477
1.42M
    const xmlChar *name = NULL;
5478
1.42M
    xmlChar *value = NULL;
5479
1.42M
    xmlChar *URI = NULL, *literal = NULL;
5480
1.42M
    const xmlChar *ndata = NULL;
5481
1.42M
    int isParameter = 0;
5482
1.42M
    xmlChar *orig = NULL;
5483
5484
1.42M
    if ((CUR != '<') || (NXT(1) != '!'))
5485
0
        return;
5486
1.42M
    SKIP(2);
5487
5488
    /* GROW; done in the caller */
5489
1.42M
    if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5490
1.42M
  int inputid = ctxt->input->id;
5491
1.42M
  SHRINK;
5492
1.42M
  SKIP(6);
5493
1.42M
  if (SKIP_BLANKS == 0) {
5494
5.00k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5495
5.00k
         "Space required after '<!ENTITY'\n");
5496
5.00k
  }
5497
5498
1.42M
  if (RAW == '%') {
5499
550k
      NEXT;
5500
550k
      if (SKIP_BLANKS == 0) {
5501
1.36k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5502
1.36k
             "Space required after '%%'\n");
5503
1.36k
      }
5504
550k
      isParameter = 1;
5505
550k
  }
5506
5507
1.42M
        name = xmlParseName(ctxt);
5508
1.42M
  if (name == NULL) {
5509
6.13k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5510
6.13k
                     "xmlParseEntityDecl: no name\n");
5511
6.13k
            return;
5512
6.13k
  }
5513
1.41M
  if (xmlStrchr(name, ':') != NULL) {
5514
4.48k
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5515
4.48k
         "colons are forbidden from entities names '%s'\n",
5516
4.48k
         name, NULL, NULL);
5517
4.48k
  }
5518
1.41M
  if (SKIP_BLANKS == 0) {
5519
6.46k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5520
6.46k
         "Space required after the entity name\n");
5521
6.46k
  }
5522
5523
1.41M
  ctxt->instate = XML_PARSER_ENTITY_DECL;
5524
  /*
5525
   * handle the various case of definitions...
5526
   */
5527
1.41M
  if (isParameter) {
5528
549k
      if ((RAW == '"') || (RAW == '\'')) {
5529
494k
          value = xmlParseEntityValue(ctxt, &orig);
5530
494k
    if (value) {
5531
481k
        if ((ctxt->sax != NULL) &&
5532
481k
      (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5533
432k
      ctxt->sax->entityDecl(ctxt->userData, name,
5534
432k
                        XML_INTERNAL_PARAMETER_ENTITY,
5535
432k
            NULL, NULL, value);
5536
481k
    }
5537
494k
      } else {
5538
54.6k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5539
54.6k
    if ((URI == NULL) && (literal == NULL)) {
5540
2.42k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5541
2.42k
    }
5542
54.6k
    if (URI) {
5543
51.8k
        xmlURIPtr uri;
5544
5545
51.8k
        uri = xmlParseURI((const char *) URI);
5546
51.8k
        if (uri == NULL) {
5547
2.41k
            xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5548
2.41k
             "Invalid URI: %s\n", URI);
5549
      /*
5550
       * This really ought to be a well formedness error
5551
       * but the XML Core WG decided otherwise c.f. issue
5552
       * E26 of the XML erratas.
5553
       */
5554
49.4k
        } else {
5555
49.4k
      if (uri->fragment != NULL) {
5556
          /*
5557
           * Okay this is foolish to block those but not
5558
           * invalid URIs.
5559
           */
5560
634
          xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5561
48.8k
      } else {
5562
48.8k
          if ((ctxt->sax != NULL) &&
5563
48.8k
        (!ctxt->disableSAX) &&
5564
48.8k
        (ctxt->sax->entityDecl != NULL))
5565
46.8k
        ctxt->sax->entityDecl(ctxt->userData, name,
5566
46.8k
              XML_EXTERNAL_PARAMETER_ENTITY,
5567
46.8k
              literal, URI, NULL);
5568
48.8k
      }
5569
49.4k
      xmlFreeURI(uri);
5570
49.4k
        }
5571
51.8k
    }
5572
54.6k
      }
5573
868k
  } else {
5574
868k
      if ((RAW == '"') || (RAW == '\'')) {
5575
459k
          value = xmlParseEntityValue(ctxt, &orig);
5576
459k
    if ((ctxt->sax != NULL) &&
5577
459k
        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5578
421k
        ctxt->sax->entityDecl(ctxt->userData, name,
5579
421k
        XML_INTERNAL_GENERAL_ENTITY,
5580
421k
        NULL, NULL, value);
5581
    /*
5582
     * For expat compatibility in SAX mode.
5583
     */
5584
459k
    if ((ctxt->myDoc == NULL) ||
5585
459k
        (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5586
5.06k
        if (ctxt->myDoc == NULL) {
5587
443
      ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5588
443
      if (ctxt->myDoc == NULL) {
5589
0
          xmlErrMemory(ctxt, "New Doc failed");
5590
0
          return;
5591
0
      }
5592
443
      ctxt->myDoc->properties = XML_DOC_INTERNAL;
5593
443
        }
5594
5.06k
        if (ctxt->myDoc->intSubset == NULL)
5595
443
      ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5596
443
              BAD_CAST "fake", NULL, NULL);
5597
5598
5.06k
        xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5599
5.06k
                    NULL, NULL, value);
5600
5.06k
    }
5601
459k
      } else {
5602
409k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5603
409k
    if ((URI == NULL) && (literal == NULL)) {
5604
9.18k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5605
9.18k
    }
5606
409k
    if (URI) {
5607
391k
        xmlURIPtr uri;
5608
5609
391k
        uri = xmlParseURI((const char *)URI);
5610
391k
        if (uri == NULL) {
5611
9.92k
            xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5612
9.92k
             "Invalid URI: %s\n", URI);
5613
      /*
5614
       * This really ought to be a well formedness error
5615
       * but the XML Core WG decided otherwise c.f. issue
5616
       * E26 of the XML erratas.
5617
       */
5618
381k
        } else {
5619
381k
      if (uri->fragment != NULL) {
5620
          /*
5621
           * Okay this is foolish to block those but not
5622
           * invalid URIs.
5623
           */
5624
3.80k
          xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5625
3.80k
      }
5626
381k
      xmlFreeURI(uri);
5627
381k
        }
5628
391k
    }
5629
409k
    if ((RAW != '>') && (SKIP_BLANKS == 0)) {
5630
12.9k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5631
12.9k
           "Space required before 'NDATA'\n");
5632
12.9k
    }
5633
409k
    if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5634
193k
        SKIP(5);
5635
193k
        if (SKIP_BLANKS == 0) {
5636
18.2k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5637
18.2k
               "Space required after 'NDATA'\n");
5638
18.2k
        }
5639
193k
        ndata = xmlParseName(ctxt);
5640
193k
        if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5641
193k
            (ctxt->sax->unparsedEntityDecl != NULL))
5642
139k
      ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5643
139k
            literal, URI, ndata);
5644
216k
    } else {
5645
216k
        if ((ctxt->sax != NULL) &&
5646
216k
            (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5647
200k
      ctxt->sax->entityDecl(ctxt->userData, name,
5648
200k
            XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5649
200k
            literal, URI, NULL);
5650
        /*
5651
         * For expat compatibility in SAX mode.
5652
         * assuming the entity replacement was asked for
5653
         */
5654
216k
        if ((ctxt->replaceEntities != 0) &&
5655
216k
      ((ctxt->myDoc == NULL) ||
5656
129k
      (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5657
992
      if (ctxt->myDoc == NULL) {
5658
183
          ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5659
183
          if (ctxt->myDoc == NULL) {
5660
0
              xmlErrMemory(ctxt, "New Doc failed");
5661
0
        return;
5662
0
          }
5663
183
          ctxt->myDoc->properties = XML_DOC_INTERNAL;
5664
183
      }
5665
5666
992
      if (ctxt->myDoc->intSubset == NULL)
5667
183
          ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5668
183
            BAD_CAST "fake", NULL, NULL);
5669
992
      xmlSAX2EntityDecl(ctxt, name,
5670
992
                  XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5671
992
                  literal, URI, NULL);
5672
992
        }
5673
216k
    }
5674
409k
      }
5675
868k
  }
5676
1.41M
  if (ctxt->instate == XML_PARSER_EOF)
5677
658
      goto done;
5678
1.41M
  SKIP_BLANKS;
5679
1.41M
  if (RAW != '>') {
5680
20.3k
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5681
20.3k
              "xmlParseEntityDecl: entity %s not terminated\n", name);
5682
20.3k
      xmlHaltParser(ctxt);
5683
1.39M
  } else {
5684
1.39M
      if (inputid != ctxt->input->id) {
5685
174
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5686
174
                         "Entity declaration doesn't start and stop in"
5687
174
                               " the same entity\n");
5688
174
      }
5689
1.39M
      NEXT;
5690
1.39M
  }
5691
1.41M
  if (orig != NULL) {
5692
      /*
5693
       * Ugly mechanism to save the raw entity value.
5694
       */
5695
934k
      xmlEntityPtr cur = NULL;
5696
5697
934k
      if (isParameter) {
5698
483k
          if ((ctxt->sax != NULL) &&
5699
483k
        (ctxt->sax->getParameterEntity != NULL))
5700
483k
        cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5701
483k
      } else {
5702
451k
          if ((ctxt->sax != NULL) &&
5703
451k
        (ctxt->sax->getEntity != NULL))
5704
451k
        cur = ctxt->sax->getEntity(ctxt->userData, name);
5705
451k
    if ((cur == NULL) && (ctxt->userData==ctxt)) {
5706
25.2k
        cur = xmlSAX2GetEntity(ctxt, name);
5707
25.2k
    }
5708
451k
      }
5709
934k
            if ((cur != NULL) && (cur->orig == NULL)) {
5710
767k
    cur->orig = orig;
5711
767k
                orig = NULL;
5712
767k
      }
5713
934k
  }
5714
5715
1.41M
done:
5716
1.41M
  if (value != NULL) xmlFree(value);
5717
1.41M
  if (URI != NULL) xmlFree(URI);
5718
1.41M
  if (literal != NULL) xmlFree(literal);
5719
1.41M
        if (orig != NULL) xmlFree(orig);
5720
1.41M
    }
5721
1.42M
}
5722
5723
/**
5724
 * xmlParseDefaultDecl:
5725
 * @ctxt:  an XML parser context
5726
 * @value:  Receive a possible fixed default value for the attribute
5727
 *
5728
 * DEPRECATED: Internal function, don't use.
5729
 *
5730
 * Parse an attribute default declaration
5731
 *
5732
 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5733
 *
5734
 * [ VC: Required Attribute ]
5735
 * if the default declaration is the keyword #REQUIRED, then the
5736
 * attribute must be specified for all elements of the type in the
5737
 * attribute-list declaration.
5738
 *
5739
 * [ VC: Attribute Default Legal ]
5740
 * The declared default value must meet the lexical constraints of
5741
 * the declared attribute type c.f. xmlValidateAttributeDecl()
5742
 *
5743
 * [ VC: Fixed Attribute Default ]
5744
 * if an attribute has a default value declared with the #FIXED
5745
 * keyword, instances of that attribute must match the default value.
5746
 *
5747
 * [ WFC: No < in Attribute Values ]
5748
 * handled in xmlParseAttValue()
5749
 *
5750
 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5751
 *          or XML_ATTRIBUTE_FIXED.
5752
 */
5753
5754
int
5755
3.92M
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5756
3.92M
    int val;
5757
3.92M
    xmlChar *ret;
5758
5759
3.92M
    *value = NULL;
5760
3.92M
    if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5761
272k
  SKIP(9);
5762
272k
  return(XML_ATTRIBUTE_REQUIRED);
5763
272k
    }
5764
3.65M
    if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5765
3.31M
  SKIP(8);
5766
3.31M
  return(XML_ATTRIBUTE_IMPLIED);
5767
3.31M
    }
5768
332k
    val = XML_ATTRIBUTE_NONE;
5769
332k
    if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5770
130k
  SKIP(6);
5771
130k
  val = XML_ATTRIBUTE_FIXED;
5772
130k
  if (SKIP_BLANKS == 0) {
5773
296
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5774
296
         "Space required after '#FIXED'\n");
5775
296
  }
5776
130k
    }
5777
332k
    ret = xmlParseAttValue(ctxt);
5778
332k
    ctxt->instate = XML_PARSER_DTD;
5779
332k
    if (ret == NULL) {
5780
5.82k
  xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5781
5.82k
           "Attribute default value declaration error\n");
5782
5.82k
    } else
5783
327k
        *value = ret;
5784
332k
    return(val);
5785
3.65M
}
5786
5787
/**
5788
 * xmlParseNotationType:
5789
 * @ctxt:  an XML parser context
5790
 *
5791
 * DEPRECATED: Internal function, don't use.
5792
 *
5793
 * parse an Notation attribute type.
5794
 *
5795
 * Note: the leading 'NOTATION' S part has already being parsed...
5796
 *
5797
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5798
 *
5799
 * [ VC: Notation Attributes ]
5800
 * Values of this type must match one of the notation names included
5801
 * in the declaration; all notation names in the declaration must be declared.
5802
 *
5803
 * Returns: the notation attribute tree built while parsing
5804
 */
5805
5806
xmlEnumerationPtr
5807
25.8k
xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5808
25.8k
    const xmlChar *name;
5809
25.8k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5810
5811
25.8k
    if (RAW != '(') {
5812
1.10k
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5813
1.10k
  return(NULL);
5814
1.10k
    }
5815
24.7k
    SHRINK;
5816
37.4k
    do {
5817
37.4k
        NEXT;
5818
37.4k
  SKIP_BLANKS;
5819
37.4k
        name = xmlParseName(ctxt);
5820
37.4k
  if (name == NULL) {
5821
1.07k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5822
1.07k
         "Name expected in NOTATION declaration\n");
5823
1.07k
            xmlFreeEnumeration(ret);
5824
1.07k
      return(NULL);
5825
1.07k
  }
5826
36.3k
  tmp = ret;
5827
86.0k
  while (tmp != NULL) {
5828
53.2k
      if (xmlStrEqual(name, tmp->name)) {
5829
3.45k
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5830
3.45k
    "standalone: attribute notation value token %s duplicated\n",
5831
3.45k
         name, NULL);
5832
3.45k
    if (!xmlDictOwns(ctxt->dict, name))
5833
0
        xmlFree((xmlChar *) name);
5834
3.45k
    break;
5835
3.45k
      }
5836
49.7k
      tmp = tmp->next;
5837
49.7k
  }
5838
36.3k
  if (tmp == NULL) {
5839
32.8k
      cur = xmlCreateEnumeration(name);
5840
32.8k
      if (cur == NULL) {
5841
0
                xmlFreeEnumeration(ret);
5842
0
                return(NULL);
5843
0
            }
5844
32.8k
      if (last == NULL) ret = last = cur;
5845
9.01k
      else {
5846
9.01k
    last->next = cur;
5847
9.01k
    last = cur;
5848
9.01k
      }
5849
32.8k
  }
5850
36.3k
  SKIP_BLANKS;
5851
36.3k
    } while (RAW == '|');
5852
23.6k
    if (RAW != ')') {
5853
3.64k
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5854
3.64k
        xmlFreeEnumeration(ret);
5855
3.64k
  return(NULL);
5856
3.64k
    }
5857
20.0k
    NEXT;
5858
20.0k
    return(ret);
5859
23.6k
}
5860
5861
/**
5862
 * xmlParseEnumerationType:
5863
 * @ctxt:  an XML parser context
5864
 *
5865
 * DEPRECATED: Internal function, don't use.
5866
 *
5867
 * parse an Enumeration attribute type.
5868
 *
5869
 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5870
 *
5871
 * [ VC: Enumeration ]
5872
 * Values of this type must match one of the Nmtoken tokens in
5873
 * the declaration
5874
 *
5875
 * Returns: the enumeration attribute tree built while parsing
5876
 */
5877
5878
xmlEnumerationPtr
5879
436k
xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5880
436k
    xmlChar *name;
5881
436k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5882
5883
436k
    if (RAW != '(') {
5884
8.54k
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5885
8.54k
  return(NULL);
5886
8.54k
    }
5887
427k
    SHRINK;
5888
1.31M
    do {
5889
1.31M
        NEXT;
5890
1.31M
  SKIP_BLANKS;
5891
1.31M
        name = xmlParseNmtoken(ctxt);
5892
1.31M
  if (name == NULL) {
5893
1.49k
      xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5894
1.49k
      return(ret);
5895
1.49k
  }
5896
1.31M
  tmp = ret;
5897
3.52M
  while (tmp != NULL) {
5898
2.21M
      if (xmlStrEqual(name, tmp->name)) {
5899
2.28k
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5900
2.28k
    "standalone: attribute enumeration value token %s duplicated\n",
5901
2.28k
         name, NULL);
5902
2.28k
    if (!xmlDictOwns(ctxt->dict, name))
5903
2.28k
        xmlFree(name);
5904
2.28k
    break;
5905
2.28k
      }
5906
2.21M
      tmp = tmp->next;
5907
2.21M
  }
5908
1.31M
  if (tmp == NULL) {
5909
1.31M
      cur = xmlCreateEnumeration(name);
5910
1.31M
      if (!xmlDictOwns(ctxt->dict, name))
5911
1.31M
    xmlFree(name);
5912
1.31M
      if (cur == NULL) {
5913
0
                xmlFreeEnumeration(ret);
5914
0
                return(NULL);
5915
0
            }
5916
1.31M
      if (last == NULL) ret = last = cur;
5917
883k
      else {
5918
883k
    last->next = cur;
5919
883k
    last = cur;
5920
883k
      }
5921
1.31M
  }
5922
1.31M
  SKIP_BLANKS;
5923
1.31M
    } while (RAW == '|');
5924
426k
    if (RAW != ')') {
5925
4.01k
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5926
4.01k
  return(ret);
5927
4.01k
    }
5928
422k
    NEXT;
5929
422k
    return(ret);
5930
426k
}
5931
5932
/**
5933
 * xmlParseEnumeratedType:
5934
 * @ctxt:  an XML parser context
5935
 * @tree:  the enumeration tree built while parsing
5936
 *
5937
 * DEPRECATED: Internal function, don't use.
5938
 *
5939
 * parse an Enumerated attribute type.
5940
 *
5941
 * [57] EnumeratedType ::= NotationType | Enumeration
5942
 *
5943
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5944
 *
5945
 *
5946
 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5947
 */
5948
5949
int
5950
462k
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5951
462k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5952
26.2k
  SKIP(8);
5953
26.2k
  if (SKIP_BLANKS == 0) {
5954
397
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5955
397
         "Space required after 'NOTATION'\n");
5956
397
      return(0);
5957
397
  }
5958
25.8k
  *tree = xmlParseNotationType(ctxt);
5959
25.8k
  if (*tree == NULL) return(0);
5960
20.0k
  return(XML_ATTRIBUTE_NOTATION);
5961
25.8k
    }
5962
436k
    *tree = xmlParseEnumerationType(ctxt);
5963
436k
    if (*tree == NULL) return(0);
5964
426k
    return(XML_ATTRIBUTE_ENUMERATION);
5965
436k
}
5966
5967
/**
5968
 * xmlParseAttributeType:
5969
 * @ctxt:  an XML parser context
5970
 * @tree:  the enumeration tree built while parsing
5971
 *
5972
 * DEPRECATED: Internal function, don't use.
5973
 *
5974
 * parse the Attribute list def for an element
5975
 *
5976
 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5977
 *
5978
 * [55] StringType ::= 'CDATA'
5979
 *
5980
 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5981
 *                        'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5982
 *
5983
 * Validity constraints for attribute values syntax are checked in
5984
 * xmlValidateAttributeValue()
5985
 *
5986
 * [ VC: ID ]
5987
 * Values of type ID must match the Name production. A name must not
5988
 * appear more than once in an XML document as a value of this type;
5989
 * i.e., ID values must uniquely identify the elements which bear them.
5990
 *
5991
 * [ VC: One ID per Element Type ]
5992
 * No element type may have more than one ID attribute specified.
5993
 *
5994
 * [ VC: ID Attribute Default ]
5995
 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5996
 *
5997
 * [ VC: IDREF ]
5998
 * Values of type IDREF must match the Name production, and values
5999
 * of type IDREFS must match Names; each IDREF Name must match the value
6000
 * of an ID attribute on some element in the XML document; i.e. IDREF
6001
 * values must match the value of some ID attribute.
6002
 *
6003
 * [ VC: Entity Name ]
6004
 * Values of type ENTITY must match the Name production, values
6005
 * of type ENTITIES must match Names; each Entity Name must match the
6006
 * name of an unparsed entity declared in the DTD.
6007
 *
6008
 * [ VC: Name Token ]
6009
 * Values of type NMTOKEN must match the Nmtoken production; values
6010
 * of type NMTOKENS must match Nmtokens.
6011
 *
6012
 * Returns the attribute type
6013
 */
6014
int
6015
3.94M
xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6016
3.94M
    SHRINK;
6017
3.94M
    if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
6018
1.57M
  SKIP(5);
6019
1.57M
  return(XML_ATTRIBUTE_CDATA);
6020
2.36M
     } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
6021
18.3k
  SKIP(6);
6022
18.3k
  return(XML_ATTRIBUTE_IDREFS);
6023
2.34M
     } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
6024
42.8k
  SKIP(5);
6025
42.8k
  return(XML_ATTRIBUTE_IDREF);
6026
2.30M
     } else if ((RAW == 'I') && (NXT(1) == 'D')) {
6027
944k
        SKIP(2);
6028
944k
  return(XML_ATTRIBUTE_ID);
6029
1.36M
     } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
6030
83.8k
  SKIP(6);
6031
83.8k
  return(XML_ATTRIBUTE_ENTITY);
6032
1.27M
     } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
6033
3.84k
  SKIP(8);
6034
3.84k
  return(XML_ATTRIBUTE_ENTITIES);
6035
1.27M
     } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
6036
210k
  SKIP(8);
6037
210k
  return(XML_ATTRIBUTE_NMTOKENS);
6038
1.06M
     } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
6039
599k
  SKIP(7);
6040
599k
  return(XML_ATTRIBUTE_NMTOKEN);
6041
599k
     }
6042
462k
     return(xmlParseEnumeratedType(ctxt, tree));
6043
3.94M
}
6044
6045
/**
6046
 * xmlParseAttributeListDecl:
6047
 * @ctxt:  an XML parser context
6048
 *
6049
 * DEPRECATED: Internal function, don't use.
6050
 *
6051
 * Parse an attribute list declaration for an element. Always consumes '<!'.
6052
 *
6053
 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
6054
 *
6055
 * [53] AttDef ::= S Name S AttType S DefaultDecl
6056
 *
6057
 */
6058
void
6059
1.38M
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
6060
1.38M
    const xmlChar *elemName;
6061
1.38M
    const xmlChar *attrName;
6062
1.38M
    xmlEnumerationPtr tree;
6063
6064
1.38M
    if ((CUR != '<') || (NXT(1) != '!'))
6065
0
        return;
6066
1.38M
    SKIP(2);
6067
6068
1.38M
    if (CMP7(CUR_PTR, 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
6069
1.38M
  int inputid = ctxt->input->id;
6070
6071
1.38M
  SKIP(7);
6072
1.38M
  if (SKIP_BLANKS == 0) {
6073
4.91k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6074
4.91k
                     "Space required after '<!ATTLIST'\n");
6075
4.91k
  }
6076
1.38M
        elemName = xmlParseName(ctxt);
6077
1.38M
  if (elemName == NULL) {
6078
2.81k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6079
2.81k
         "ATTLIST: no name for Element\n");
6080
2.81k
      return;
6081
2.81k
  }
6082
1.37M
  SKIP_BLANKS;
6083
1.37M
  GROW;
6084
5.28M
  while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
6085
3.96M
      int type;
6086
3.96M
      int def;
6087
3.96M
      xmlChar *defaultValue = NULL;
6088
6089
3.96M
      GROW;
6090
3.96M
            tree = NULL;
6091
3.96M
      attrName = xmlParseName(ctxt);
6092
3.96M
      if (attrName == NULL) {
6093
11.0k
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6094
11.0k
             "ATTLIST: no name for Attribute\n");
6095
11.0k
    break;
6096
11.0k
      }
6097
3.95M
      GROW;
6098
3.95M
      if (SKIP_BLANKS == 0) {
6099
7.61k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6100
7.61k
            "Space required after the attribute name\n");
6101
7.61k
    break;
6102
7.61k
      }
6103
6104
3.94M
      type = xmlParseAttributeType(ctxt, &tree);
6105
3.94M
      if (type <= 0) {
6106
15.7k
          break;
6107
15.7k
      }
6108
6109
3.92M
      GROW;
6110
3.92M
      if (SKIP_BLANKS == 0) {
6111
6.61k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6112
6.61k
             "Space required after the attribute type\n");
6113
6.61k
          if (tree != NULL)
6114
4.93k
        xmlFreeEnumeration(tree);
6115
6.61k
    break;
6116
6.61k
      }
6117
6118
3.92M
      def = xmlParseDefaultDecl(ctxt, &defaultValue);
6119
3.92M
      if (def <= 0) {
6120
0
                if (defaultValue != NULL)
6121
0
        xmlFree(defaultValue);
6122
0
          if (tree != NULL)
6123
0
        xmlFreeEnumeration(tree);
6124
0
          break;
6125
0
      }
6126
3.92M
      if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6127
157k
          xmlAttrNormalizeSpace(defaultValue, defaultValue);
6128
6129
3.92M
      GROW;
6130
3.92M
            if (RAW != '>') {
6131
3.57M
    if (SKIP_BLANKS == 0) {
6132
13.0k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6133
13.0k
      "Space required after the attribute default value\n");
6134
13.0k
        if (defaultValue != NULL)
6135
7.12k
      xmlFree(defaultValue);
6136
13.0k
        if (tree != NULL)
6137
1.55k
      xmlFreeEnumeration(tree);
6138
13.0k
        break;
6139
13.0k
    }
6140
3.57M
      }
6141
3.91M
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6142
3.91M
    (ctxt->sax->attributeDecl != NULL))
6143
3.48M
    ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6144
3.48M
                          type, def, defaultValue, tree);
6145
423k
      else if (tree != NULL)
6146
55.0k
    xmlFreeEnumeration(tree);
6147
6148
3.91M
      if ((ctxt->sax2) && (defaultValue != NULL) &&
6149
3.91M
          (def != XML_ATTRIBUTE_IMPLIED) &&
6150
3.91M
    (def != XML_ATTRIBUTE_REQUIRED)) {
6151
200k
    xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6152
200k
      }
6153
3.91M
      if (ctxt->sax2) {
6154
2.38M
    xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6155
2.38M
      }
6156
3.91M
      if (defaultValue != NULL)
6157
319k
          xmlFree(defaultValue);
6158
3.91M
      GROW;
6159
3.91M
  }
6160
1.37M
  if (RAW == '>') {
6161
1.33M
      if (inputid != ctxt->input->id) {
6162
680
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6163
680
                               "Attribute list declaration doesn't start and"
6164
680
                               " stop in the same entity\n");
6165
680
      }
6166
1.33M
      NEXT;
6167
1.33M
  }
6168
1.37M
    }
6169
1.38M
}
6170
6171
/**
6172
 * xmlParseElementMixedContentDecl:
6173
 * @ctxt:  an XML parser context
6174
 * @inputchk:  the input used for the current entity, needed for boundary checks
6175
 *
6176
 * DEPRECATED: Internal function, don't use.
6177
 *
6178
 * parse the declaration for a Mixed Element content
6179
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6180
 *
6181
 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6182
 *                '(' S? '#PCDATA' S? ')'
6183
 *
6184
 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6185
 *
6186
 * [ VC: No Duplicate Types ]
6187
 * The same name must not appear more than once in a single
6188
 * mixed-content declaration.
6189
 *
6190
 * returns: the list of the xmlElementContentPtr describing the element choices
6191
 */
6192
xmlElementContentPtr
6193
586k
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6194
586k
    xmlElementContentPtr ret = NULL, cur = NULL, n;
6195
586k
    const xmlChar *elem = NULL;
6196
6197
586k
    GROW;
6198
586k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6199
586k
  SKIP(7);
6200
586k
  SKIP_BLANKS;
6201
586k
  SHRINK;
6202
586k
  if (RAW == ')') {
6203
353k
      if (ctxt->input->id != inputchk) {
6204
57
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6205
57
                               "Element content declaration doesn't start and"
6206
57
                               " stop in the same entity\n");
6207
57
      }
6208
353k
      NEXT;
6209
353k
      ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6210
353k
      if (ret == NULL)
6211
0
          return(NULL);
6212
353k
      if (RAW == '*') {
6213
893
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6214
893
    NEXT;
6215
893
      }
6216
353k
      return(ret);
6217
353k
  }
6218
233k
  if ((RAW == '(') || (RAW == '|')) {
6219
231k
      ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6220
231k
      if (ret == NULL) return(NULL);
6221
231k
  }
6222
2.48M
  while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6223
2.24M
      NEXT;
6224
2.24M
      if (elem == NULL) {
6225
230k
          ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6226
230k
    if (ret == NULL) {
6227
0
        xmlFreeDocElementContent(ctxt->myDoc, cur);
6228
0
                    return(NULL);
6229
0
                }
6230
230k
    ret->c1 = cur;
6231
230k
    if (cur != NULL)
6232
230k
        cur->parent = ret;
6233
230k
    cur = ret;
6234
2.01M
      } else {
6235
2.01M
          n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6236
2.01M
    if (n == NULL) {
6237
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6238
0
                    return(NULL);
6239
0
                }
6240
2.01M
    n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6241
2.01M
    if (n->c1 != NULL)
6242
2.01M
        n->c1->parent = n;
6243
2.01M
          cur->c2 = n;
6244
2.01M
    if (n != NULL)
6245
2.01M
        n->parent = cur;
6246
2.01M
    cur = n;
6247
2.01M
      }
6248
2.24M
      SKIP_BLANKS;
6249
2.24M
      elem = xmlParseName(ctxt);
6250
2.24M
      if (elem == NULL) {
6251
868
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6252
868
      "xmlParseElementMixedContentDecl : Name expected\n");
6253
868
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6254
868
    return(NULL);
6255
868
      }
6256
2.24M
      SKIP_BLANKS;
6257
2.24M
      GROW;
6258
2.24M
  }
6259
232k
  if ((RAW == ')') && (NXT(1) == '*')) {
6260
228k
      if (elem != NULL) {
6261
228k
    cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6262
228k
                                   XML_ELEMENT_CONTENT_ELEMENT);
6263
228k
    if (cur->c2 != NULL)
6264
228k
        cur->c2->parent = cur;
6265
228k
            }
6266
228k
            if (ret != NULL)
6267
228k
                ret->ocur = XML_ELEMENT_CONTENT_MULT;
6268
228k
      if (ctxt->input->id != inputchk) {
6269
14
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6270
14
                               "Element content declaration doesn't start and"
6271
14
                               " stop in the same entity\n");
6272
14
      }
6273
228k
      SKIP(2);
6274
228k
  } else {
6275
3.99k
      xmlFreeDocElementContent(ctxt->myDoc, ret);
6276
3.99k
      xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6277
3.99k
      return(NULL);
6278
3.99k
  }
6279
6280
232k
    } else {
6281
0
  xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6282
0
    }
6283
228k
    return(ret);
6284
586k
}
6285
6286
/**
6287
 * xmlParseElementChildrenContentDeclPriv:
6288
 * @ctxt:  an XML parser context
6289
 * @inputchk:  the input used for the current entity, needed for boundary checks
6290
 * @depth: the level of recursion
6291
 *
6292
 * parse the declaration for a Mixed Element content
6293
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6294
 *
6295
 *
6296
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6297
 *
6298
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6299
 *
6300
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6301
 *
6302
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6303
 *
6304
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6305
 * TODO Parameter-entity replacement text must be properly nested
6306
 *  with parenthesized groups. That is to say, if either of the
6307
 *  opening or closing parentheses in a choice, seq, or Mixed
6308
 *  construct is contained in the replacement text for a parameter
6309
 *  entity, both must be contained in the same replacement text. For
6310
 *  interoperability, if a parameter-entity reference appears in a
6311
 *  choice, seq, or Mixed construct, its replacement text should not
6312
 *  be empty, and neither the first nor last non-blank character of
6313
 *  the replacement text should be a connector (| or ,).
6314
 *
6315
 * Returns the tree of xmlElementContentPtr describing the element
6316
 *          hierarchy.
6317
 */
6318
static xmlElementContentPtr
6319
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6320
602k
                                       int depth) {
6321
602k
    xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6322
602k
    const xmlChar *elem;
6323
602k
    xmlChar type = 0;
6324
6325
602k
    if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6326
602k
        (depth >  2048)) {
6327
0
        xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6328
0
"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6329
0
                          depth);
6330
0
  return(NULL);
6331
0
    }
6332
602k
    SKIP_BLANKS;
6333
602k
    GROW;
6334
602k
    if (RAW == '(') {
6335
36.6k
  int inputid = ctxt->input->id;
6336
6337
        /* Recurse on first child */
6338
36.6k
  NEXT;
6339
36.6k
  SKIP_BLANKS;
6340
36.6k
        cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6341
36.6k
                                                           depth + 1);
6342
36.6k
        if (cur == NULL)
6343
4.79k
            return(NULL);
6344
31.8k
  SKIP_BLANKS;
6345
31.8k
  GROW;
6346
565k
    } else {
6347
565k
  elem = xmlParseName(ctxt);
6348
565k
  if (elem == NULL) {
6349
4.49k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6350
4.49k
      return(NULL);
6351
4.49k
  }
6352
561k
        cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6353
561k
  if (cur == NULL) {
6354
0
      xmlErrMemory(ctxt, NULL);
6355
0
      return(NULL);
6356
0
  }
6357
561k
  GROW;
6358
561k
  if (RAW == '?') {
6359
64.7k
      cur->ocur = XML_ELEMENT_CONTENT_OPT;
6360
64.7k
      NEXT;
6361
496k
  } else if (RAW == '*') {
6362
66.4k
      cur->ocur = XML_ELEMENT_CONTENT_MULT;
6363
66.4k
      NEXT;
6364
430k
  } else if (RAW == '+') {
6365
74.0k
      cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6366
74.0k
      NEXT;
6367
356k
  } else {
6368
356k
      cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6369
356k
  }
6370
561k
  GROW;
6371
561k
    }
6372
593k
    SKIP_BLANKS;
6373
593k
    SHRINK;
6374
2.34M
    while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6375
        /*
6376
   * Each loop we parse one separator and one element.
6377
   */
6378
1.76M
        if (RAW == ',') {
6379
585k
      if (type == 0) type = CUR;
6380
6381
      /*
6382
       * Detect "Name | Name , Name" error
6383
       */
6384
365k
      else if (type != CUR) {
6385
96
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6386
96
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6387
96
                      type);
6388
96
    if ((last != NULL) && (last != ret))
6389
96
        xmlFreeDocElementContent(ctxt->myDoc, last);
6390
96
    if (ret != NULL)
6391
96
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6392
96
    return(NULL);
6393
96
      }
6394
585k
      NEXT;
6395
6396
585k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6397
585k
      if (op == NULL) {
6398
0
    if ((last != NULL) && (last != ret))
6399
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6400
0
          xmlFreeDocElementContent(ctxt->myDoc, ret);
6401
0
    return(NULL);
6402
0
      }
6403
585k
      if (last == NULL) {
6404
220k
    op->c1 = ret;
6405
220k
    if (ret != NULL)
6406
220k
        ret->parent = op;
6407
220k
    ret = cur = op;
6408
365k
      } else {
6409
365k
          cur->c2 = op;
6410
365k
    if (op != NULL)
6411
365k
        op->parent = cur;
6412
365k
    op->c1 = last;
6413
365k
    if (last != NULL)
6414
365k
        last->parent = op;
6415
365k
    cur =op;
6416
365k
    last = NULL;
6417
365k
      }
6418
1.18M
  } else if (RAW == '|') {
6419
1.17M
      if (type == 0) type = CUR;
6420
6421
      /*
6422
       * Detect "Name , Name | Name" error
6423
       */
6424
1.00M
      else if (type != CUR) {
6425
101
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6426
101
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6427
101
          type);
6428
101
    if ((last != NULL) && (last != ret))
6429
101
        xmlFreeDocElementContent(ctxt->myDoc, last);
6430
101
    if (ret != NULL)
6431
101
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6432
101
    return(NULL);
6433
101
      }
6434
1.17M
      NEXT;
6435
6436
1.17M
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6437
1.17M
      if (op == NULL) {
6438
0
    if ((last != NULL) && (last != ret))
6439
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6440
0
    if (ret != NULL)
6441
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6442
0
    return(NULL);
6443
0
      }
6444
1.17M
      if (last == NULL) {
6445
164k
    op->c1 = ret;
6446
164k
    if (ret != NULL)
6447
164k
        ret->parent = op;
6448
164k
    ret = cur = op;
6449
1.00M
      } else {
6450
1.00M
          cur->c2 = op;
6451
1.00M
    if (op != NULL)
6452
1.00M
        op->parent = cur;
6453
1.00M
    op->c1 = last;
6454
1.00M
    if (last != NULL)
6455
1.00M
        last->parent = op;
6456
1.00M
    cur =op;
6457
1.00M
    last = NULL;
6458
1.00M
      }
6459
1.17M
  } else {
6460
10.7k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6461
10.7k
      if ((last != NULL) && (last != ret))
6462
4.22k
          xmlFreeDocElementContent(ctxt->myDoc, last);
6463
10.7k
      if (ret != NULL)
6464
10.7k
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6465
10.7k
      return(NULL);
6466
10.7k
  }
6467
1.75M
  GROW;
6468
1.75M
  SKIP_BLANKS;
6469
1.75M
  GROW;
6470
1.75M
  if (RAW == '(') {
6471
81.3k
      int inputid = ctxt->input->id;
6472
      /* Recurse on second child */
6473
81.3k
      NEXT;
6474
81.3k
      SKIP_BLANKS;
6475
81.3k
      last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6476
81.3k
                                                          depth + 1);
6477
81.3k
            if (last == NULL) {
6478
2.04k
    if (ret != NULL)
6479
2.04k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6480
2.04k
    return(NULL);
6481
2.04k
            }
6482
79.2k
      SKIP_BLANKS;
6483
1.67M
  } else {
6484
1.67M
      elem = xmlParseName(ctxt);
6485
1.67M
      if (elem == NULL) {
6486
3.38k
    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6487
3.38k
    if (ret != NULL)
6488
3.38k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6489
3.38k
    return(NULL);
6490
3.38k
      }
6491
1.67M
      last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6492
1.67M
      if (last == NULL) {
6493
0
    if (ret != NULL)
6494
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6495
0
    return(NULL);
6496
0
      }
6497
1.67M
      if (RAW == '?') {
6498
241k
    last->ocur = XML_ELEMENT_CONTENT_OPT;
6499
241k
    NEXT;
6500
1.43M
      } else if (RAW == '*') {
6501
148k
    last->ocur = XML_ELEMENT_CONTENT_MULT;
6502
148k
    NEXT;
6503
1.28M
      } else if (RAW == '+') {
6504
28.2k
    last->ocur = XML_ELEMENT_CONTENT_PLUS;
6505
28.2k
    NEXT;
6506
1.25M
      } else {
6507
1.25M
    last->ocur = XML_ELEMENT_CONTENT_ONCE;
6508
1.25M
      }
6509
1.67M
  }
6510
1.75M
  SKIP_BLANKS;
6511
1.75M
  GROW;
6512
1.75M
    }
6513
576k
    if ((cur != NULL) && (last != NULL)) {
6514
375k
        cur->c2 = last;
6515
375k
  if (last != NULL)
6516
375k
      last->parent = cur;
6517
375k
    }
6518
576k
    if (ctxt->input->id != inputchk) {
6519
207
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6520
207
                       "Element content declaration doesn't start and stop in"
6521
207
                       " the same entity\n");
6522
207
    }
6523
576k
    NEXT;
6524
576k
    if (RAW == '?') {
6525
23.1k
  if (ret != NULL) {
6526
23.1k
      if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6527
23.1k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6528
6
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6529
23.1k
      else
6530
23.1k
          ret->ocur = XML_ELEMENT_CONTENT_OPT;
6531
23.1k
  }
6532
23.1k
  NEXT;
6533
553k
    } else if (RAW == '*') {
6534
152k
  if (ret != NULL) {
6535
152k
      ret->ocur = XML_ELEMENT_CONTENT_MULT;
6536
152k
      cur = ret;
6537
      /*
6538
       * Some normalization:
6539
       * (a | b* | c?)* == (a | b | c)*
6540
       */
6541
899k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6542
746k
    if ((cur->c1 != NULL) &&
6543
746k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6544
746k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6545
17.0k
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6546
746k
    if ((cur->c2 != NULL) &&
6547
746k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6548
746k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6549
2.74k
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6550
746k
    cur = cur->c2;
6551
746k
      }
6552
152k
  }
6553
152k
  NEXT;
6554
401k
    } else if (RAW == '+') {
6555
73.8k
  if (ret != NULL) {
6556
73.8k
      int found = 0;
6557
6558
73.8k
      if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6559
73.8k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6560
18
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6561
73.8k
      else
6562
73.8k
          ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6563
      /*
6564
       * Some normalization:
6565
       * (a | b*)+ == (a | b)*
6566
       * (a | b?)+ == (a | b)*
6567
       */
6568
120k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6569
46.8k
    if ((cur->c1 != NULL) &&
6570
46.8k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6571
46.8k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6572
590
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6573
590
        found = 1;
6574
590
    }
6575
46.8k
    if ((cur->c2 != NULL) &&
6576
46.8k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6577
46.8k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6578
332
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6579
332
        found = 1;
6580
332
    }
6581
46.8k
    cur = cur->c2;
6582
46.8k
      }
6583
73.8k
      if (found)
6584
530
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6585
73.8k
  }
6586
73.8k
  NEXT;
6587
73.8k
    }
6588
576k
    return(ret);
6589
593k
}
6590
6591
/**
6592
 * xmlParseElementChildrenContentDecl:
6593
 * @ctxt:  an XML parser context
6594
 * @inputchk:  the input used for the current entity, needed for boundary checks
6595
 *
6596
 * DEPRECATED: Internal function, don't use.
6597
 *
6598
 * parse the declaration for a Mixed Element content
6599
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6600
 *
6601
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6602
 *
6603
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6604
 *
6605
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6606
 *
6607
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6608
 *
6609
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6610
 * TODO Parameter-entity replacement text must be properly nested
6611
 *  with parenthesized groups. That is to say, if either of the
6612
 *  opening or closing parentheses in a choice, seq, or Mixed
6613
 *  construct is contained in the replacement text for a parameter
6614
 *  entity, both must be contained in the same replacement text. For
6615
 *  interoperability, if a parameter-entity reference appears in a
6616
 *  choice, seq, or Mixed construct, its replacement text should not
6617
 *  be empty, and neither the first nor last non-blank character of
6618
 *  the replacement text should be a connector (| or ,).
6619
 *
6620
 * Returns the tree of xmlElementContentPtr describing the element
6621
 *          hierarchy.
6622
 */
6623
xmlElementContentPtr
6624
0
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6625
    /* stub left for API/ABI compat */
6626
0
    return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6627
0
}
6628
6629
/**
6630
 * xmlParseElementContentDecl:
6631
 * @ctxt:  an XML parser context
6632
 * @name:  the name of the element being defined.
6633
 * @result:  the Element Content pointer will be stored here if any
6634
 *
6635
 * DEPRECATED: Internal function, don't use.
6636
 *
6637
 * parse the declaration for an Element content either Mixed or Children,
6638
 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6639
 *
6640
 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6641
 *
6642
 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6643
 */
6644
6645
int
6646
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6647
1.07M
                           xmlElementContentPtr *result) {
6648
6649
1.07M
    xmlElementContentPtr tree = NULL;
6650
1.07M
    int inputid = ctxt->input->id;
6651
1.07M
    int res;
6652
6653
1.07M
    *result = NULL;
6654
6655
1.07M
    if (RAW != '(') {
6656
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6657
0
    "xmlParseElementContentDecl : %s '(' expected\n", name);
6658
0
  return(-1);
6659
0
    }
6660
1.07M
    NEXT;
6661
1.07M
    GROW;
6662
1.07M
    if (ctxt->instate == XML_PARSER_EOF)
6663
0
        return(-1);
6664
1.07M
    SKIP_BLANKS;
6665
1.07M
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6666
586k
        tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6667
586k
  res = XML_ELEMENT_TYPE_MIXED;
6668
586k
    } else {
6669
484k
        tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6670
484k
  res = XML_ELEMENT_TYPE_ELEMENT;
6671
484k
    }
6672
1.07M
    SKIP_BLANKS;
6673
1.07M
    *result = tree;
6674
1.07M
    return(res);
6675
1.07M
}
6676
6677
/**
6678
 * xmlParseElementDecl:
6679
 * @ctxt:  an XML parser context
6680
 *
6681
 * DEPRECATED: Internal function, don't use.
6682
 *
6683
 * Parse an element declaration. Always consumes '<!'.
6684
 *
6685
 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6686
 *
6687
 * [ VC: Unique Element Type Declaration ]
6688
 * No element type may be declared more than once
6689
 *
6690
 * Returns the type of the element, or -1 in case of error
6691
 */
6692
int
6693
1.47M
xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6694
1.47M
    const xmlChar *name;
6695
1.47M
    int ret = -1;
6696
1.47M
    xmlElementContentPtr content  = NULL;
6697
6698
1.47M
    if ((CUR != '<') || (NXT(1) != '!'))
6699
0
        return(ret);
6700
1.47M
    SKIP(2);
6701
6702
    /* GROW; done in the caller */
6703
1.47M
    if (CMP7(CUR_PTR, 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6704
1.47M
  int inputid = ctxt->input->id;
6705
6706
1.47M
  SKIP(7);
6707
1.47M
  if (SKIP_BLANKS == 0) {
6708
1.80k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6709
1.80k
               "Space required after 'ELEMENT'\n");
6710
1.80k
      return(-1);
6711
1.80k
  }
6712
1.46M
        name = xmlParseName(ctxt);
6713
1.46M
  if (name == NULL) {
6714
3.21k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6715
3.21k
         "xmlParseElementDecl: no name for Element\n");
6716
3.21k
      return(-1);
6717
3.21k
  }
6718
1.46M
  if (SKIP_BLANKS == 0) {
6719
6.87k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6720
6.87k
         "Space required after the element name\n");
6721
6.87k
  }
6722
1.46M
  if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6723
379k
      SKIP(5);
6724
      /*
6725
       * Element must always be empty.
6726
       */
6727
379k
      ret = XML_ELEMENT_TYPE_EMPTY;
6728
1.08M
  } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6729
1.08M
             (NXT(2) == 'Y')) {
6730
6.66k
      SKIP(3);
6731
      /*
6732
       * Element is a generic container.
6733
       */
6734
6.66k
      ret = XML_ELEMENT_TYPE_ANY;
6735
1.08M
  } else if (RAW == '(') {
6736
1.07M
      ret = xmlParseElementContentDecl(ctxt, name, &content);
6737
1.07M
  } else {
6738
      /*
6739
       * [ WFC: PEs in Internal Subset ] error handling.
6740
       */
6741
8.47k
      if ((RAW == '%') && (ctxt->external == 0) &&
6742
8.47k
          (ctxt->inputNr == 1)) {
6743
620
    xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6744
620
    "PEReference: forbidden within markup decl in internal subset\n");
6745
7.85k
      } else {
6746
7.85k
    xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6747
7.85k
          "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6748
7.85k
            }
6749
8.47k
      return(-1);
6750
8.47k
  }
6751
6752
1.45M
  SKIP_BLANKS;
6753
6754
1.45M
  if (RAW != '>') {
6755
23.5k
      xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6756
23.5k
      if (content != NULL) {
6757
3.82k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6758
3.82k
      }
6759
1.43M
  } else {
6760
1.43M
      if (inputid != ctxt->input->id) {
6761
106
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6762
106
                               "Element declaration doesn't start and stop in"
6763
106
                               " the same entity\n");
6764
106
      }
6765
6766
1.43M
      NEXT;
6767
1.43M
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6768
1.43M
    (ctxt->sax->elementDecl != NULL)) {
6769
1.28M
    if (content != NULL)
6770
942k
        content->parent = NULL;
6771
1.28M
          ctxt->sax->elementDecl(ctxt->userData, name, ret,
6772
1.28M
                           content);
6773
1.28M
    if ((content != NULL) && (content->parent == NULL)) {
6774
        /*
6775
         * this is a trick: if xmlAddElementDecl is called,
6776
         * instead of copying the full tree it is plugged directly
6777
         * if called from the parser. Avoid duplicating the
6778
         * interfaces or change the API/ABI
6779
         */
6780
101k
        xmlFreeDocElementContent(ctxt->myDoc, content);
6781
101k
    }
6782
1.28M
      } else if (content != NULL) {
6783
101k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6784
101k
      }
6785
1.43M
  }
6786
1.45M
    }
6787
1.45M
    return(ret);
6788
1.47M
}
6789
6790
/**
6791
 * xmlParseConditionalSections
6792
 * @ctxt:  an XML parser context
6793
 *
6794
 * Parse a conditional section. Always consumes '<!['.
6795
 *
6796
 * [61] conditionalSect ::= includeSect | ignoreSect
6797
 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6798
 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6799
 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6800
 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6801
 */
6802
6803
static void
6804
15.5k
xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6805
15.5k
    int *inputIds = NULL;
6806
15.5k
    size_t inputIdsSize = 0;
6807
15.5k
    size_t depth = 0;
6808
6809
76.3k
    while (ctxt->instate != XML_PARSER_EOF) {
6810
76.0k
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6811
36.2k
            int id = ctxt->input->id;
6812
6813
36.2k
            SKIP(3);
6814
36.2k
            SKIP_BLANKS;
6815
6816
36.2k
            if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6817
27.3k
                SKIP(7);
6818
27.3k
                SKIP_BLANKS;
6819
27.3k
                if (RAW != '[') {
6820
197
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6821
197
                    xmlHaltParser(ctxt);
6822
197
                    goto error;
6823
197
                }
6824
27.1k
                if (ctxt->input->id != id) {
6825
125
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6826
125
                                   "All markup of the conditional section is"
6827
125
                                   " not in the same entity\n");
6828
125
                }
6829
27.1k
                NEXT;
6830
6831
27.1k
                if (inputIdsSize <= depth) {
6832
8.93k
                    int *tmp;
6833
6834
8.93k
                    inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2);
6835
8.93k
                    tmp = (int *) xmlRealloc(inputIds,
6836
8.93k
                            inputIdsSize * sizeof(int));
6837
8.93k
                    if (tmp == NULL) {
6838
0
                        xmlErrMemory(ctxt, NULL);
6839
0
                        goto error;
6840
0
                    }
6841
8.93k
                    inputIds = tmp;
6842
8.93k
                }
6843
27.1k
                inputIds[depth] = id;
6844
27.1k
                depth++;
6845
27.1k
            } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6846
7.86k
                size_t ignoreDepth = 0;
6847
6848
7.86k
                SKIP(6);
6849
7.86k
                SKIP_BLANKS;
6850
7.86k
                if (RAW != '[') {
6851
219
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6852
219
                    xmlHaltParser(ctxt);
6853
219
                    goto error;
6854
219
                }
6855
7.64k
                if (ctxt->input->id != id) {
6856
201
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6857
201
                                   "All markup of the conditional section is"
6858
201
                                   " not in the same entity\n");
6859
201
                }
6860
7.64k
                NEXT;
6861
6862
9.81M
                while (RAW != 0) {
6863
9.81M
                    if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6864
10.5k
                        SKIP(3);
6865
10.5k
                        ignoreDepth++;
6866
                        /* Check for integer overflow */
6867
10.5k
                        if (ignoreDepth == 0) {
6868
0
                            xmlErrMemory(ctxt, NULL);
6869
0
                            goto error;
6870
0
                        }
6871
9.79M
                    } else if ((RAW == ']') && (NXT(1) == ']') &&
6872
9.79M
                               (NXT(2) == '>')) {
6873
11.7k
                        if (ignoreDepth == 0)
6874
5.51k
                            break;
6875
6.21k
                        SKIP(3);
6876
6.21k
                        ignoreDepth--;
6877
9.78M
                    } else {
6878
9.78M
                        NEXT;
6879
9.78M
                    }
6880
9.81M
                }
6881
6882
7.64k
    if (RAW == 0) {
6883
2.12k
        xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6884
2.12k
                    goto error;
6885
2.12k
    }
6886
5.51k
                if (ctxt->input->id != id) {
6887
21
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6888
21
                                   "All markup of the conditional section is"
6889
21
                                   " not in the same entity\n");
6890
21
                }
6891
5.51k
                SKIP(3);
6892
5.51k
            } else {
6893
1.06k
                xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6894
1.06k
                xmlHaltParser(ctxt);
6895
1.06k
                goto error;
6896
1.06k
            }
6897
39.7k
        } else if ((depth > 0) &&
6898
39.7k
                   (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6899
15.2k
            depth--;
6900
15.2k
            if (ctxt->input->id != inputIds[depth]) {
6901
483
                xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6902
483
                               "All markup of the conditional section is not"
6903
483
                               " in the same entity\n");
6904
483
            }
6905
15.2k
            SKIP(3);
6906
24.5k
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
6907
22.2k
            xmlParseMarkupDecl(ctxt);
6908
22.2k
        } else {
6909
2.30k
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6910
2.30k
            xmlHaltParser(ctxt);
6911
2.30k
            goto error;
6912
2.30k
        }
6913
6914
70.0k
        if (depth == 0)
6915
9.26k
            break;
6916
6917
60.8k
        SKIP_BLANKS;
6918
60.8k
        GROW;
6919
60.8k
    }
6920
6921
15.5k
error:
6922
15.5k
    xmlFree(inputIds);
6923
15.5k
}
6924
6925
/**
6926
 * xmlParseMarkupDecl:
6927
 * @ctxt:  an XML parser context
6928
 *
6929
 * DEPRECATED: Internal function, don't use.
6930
 *
6931
 * Parse markup declarations. Always consumes '<!' or '<?'.
6932
 *
6933
 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6934
 *                     NotationDecl | PI | Comment
6935
 *
6936
 * [ VC: Proper Declaration/PE Nesting ]
6937
 * Parameter-entity replacement text must be properly nested with
6938
 * markup declarations. That is to say, if either the first character
6939
 * or the last character of a markup declaration (markupdecl above) is
6940
 * contained in the replacement text for a parameter-entity reference,
6941
 * both must be contained in the same replacement text.
6942
 *
6943
 * [ WFC: PEs in Internal Subset ]
6944
 * In the internal DTD subset, parameter-entity references can occur
6945
 * only where markup declarations can occur, not within markup declarations.
6946
 * (This does not apply to references that occur in external parameter
6947
 * entities or to the external subset.)
6948
 */
6949
void
6950
57.9M
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6951
57.9M
    GROW;
6952
57.9M
    if (CUR == '<') {
6953
57.9M
        if (NXT(1) == '!') {
6954
57.8M
      switch (NXT(2)) {
6955
2.90M
          case 'E':
6956
2.90M
        if (NXT(3) == 'L')
6957
1.47M
      xmlParseElementDecl(ctxt);
6958
1.42M
        else if (NXT(3) == 'N')
6959
1.42M
      xmlParseEntityDecl(ctxt);
6960
858
                    else
6961
858
                        SKIP(2);
6962
2.90M
        break;
6963
1.38M
          case 'A':
6964
1.38M
        xmlParseAttributeListDecl(ctxt);
6965
1.38M
        break;
6966
48.9k
          case 'N':
6967
48.9k
        xmlParseNotationDecl(ctxt);
6968
48.9k
        break;
6969
53.5M
          case '-':
6970
53.5M
        xmlParseComment(ctxt);
6971
53.5M
        break;
6972
13.6k
    default:
6973
        /* there is an error but it will be detected later */
6974
13.6k
                    SKIP(2);
6975
13.6k
        break;
6976
57.8M
      }
6977
57.8M
  } else if (NXT(1) == '?') {
6978
79.3k
      xmlParsePI(ctxt);
6979
79.3k
  }
6980
57.9M
    }
6981
6982
    /*
6983
     * detect requirement to exit there and act accordingly
6984
     * and avoid having instate overridden later on
6985
     */
6986
57.9M
    if (ctxt->instate == XML_PARSER_EOF)
6987
21.3k
        return;
6988
6989
57.9M
    ctxt->instate = XML_PARSER_DTD;
6990
57.9M
}
6991
6992
/**
6993
 * xmlParseTextDecl:
6994
 * @ctxt:  an XML parser context
6995
 *
6996
 * DEPRECATED: Internal function, don't use.
6997
 *
6998
 * parse an XML declaration header for external entities
6999
 *
7000
 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
7001
 */
7002
7003
void
7004
50.3k
xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
7005
50.3k
    xmlChar *version;
7006
50.3k
    const xmlChar *encoding;
7007
50.3k
    int oldstate;
7008
7009
    /*
7010
     * We know that '<?xml' is here.
7011
     */
7012
50.3k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7013
49.9k
  SKIP(5);
7014
49.9k
    } else {
7015
412
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
7016
412
  return;
7017
412
    }
7018
7019
    /* Avoid expansion of parameter entities when skipping blanks. */
7020
49.9k
    oldstate = ctxt->instate;
7021
49.9k
    ctxt->instate = XML_PARSER_START;
7022
7023
49.9k
    if (SKIP_BLANKS == 0) {
7024
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7025
0
           "Space needed after '<?xml'\n");
7026
0
    }
7027
7028
    /*
7029
     * We may have the VersionInfo here.
7030
     */
7031
49.9k
    version = xmlParseVersionInfo(ctxt);
7032
49.9k
    if (version == NULL)
7033
16.6k
  version = xmlCharStrdup(XML_DEFAULT_VERSION);
7034
33.2k
    else {
7035
33.2k
  if (SKIP_BLANKS == 0) {
7036
2.90k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7037
2.90k
               "Space needed here\n");
7038
2.90k
  }
7039
33.2k
    }
7040
49.9k
    ctxt->input->version = version;
7041
7042
    /*
7043
     * We must have the encoding declaration
7044
     */
7045
49.9k
    encoding = xmlParseEncodingDecl(ctxt);
7046
49.9k
    if (ctxt->instate == XML_PARSER_EOF)
7047
0
        return;
7048
49.9k
    if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7049
  /*
7050
   * The XML REC instructs us to stop parsing right here
7051
   */
7052
792
        ctxt->instate = oldstate;
7053
792
        return;
7054
792
    }
7055
49.1k
    if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
7056
5.94k
  xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
7057
5.94k
           "Missing encoding in text declaration\n");
7058
5.94k
    }
7059
7060
49.1k
    SKIP_BLANKS;
7061
49.1k
    if ((RAW == '?') && (NXT(1) == '>')) {
7062
12.8k
        SKIP(2);
7063
36.2k
    } else if (RAW == '>') {
7064
        /* Deprecated old WD ... */
7065
1.31k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7066
1.31k
  NEXT;
7067
34.9k
    } else {
7068
34.9k
        int c;
7069
7070
34.9k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7071
2.87M
        while ((c = CUR) != 0) {
7072
2.85M
            NEXT;
7073
2.85M
            if (c == '>')
7074
11.7k
                break;
7075
2.85M
        }
7076
34.9k
    }
7077
7078
49.1k
    ctxt->instate = oldstate;
7079
49.1k
}
7080
7081
/**
7082
 * xmlParseExternalSubset:
7083
 * @ctxt:  an XML parser context
7084
 * @ExternalID: the external identifier
7085
 * @SystemID: the system identifier (or URL)
7086
 *
7087
 * parse Markup declarations from an external subset
7088
 *
7089
 * [30] extSubset ::= textDecl? extSubsetDecl
7090
 *
7091
 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7092
 */
7093
void
7094
xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7095
43.7k
                       const xmlChar *SystemID) {
7096
43.7k
    xmlDetectSAX2(ctxt);
7097
43.7k
    GROW;
7098
7099
43.7k
    if ((ctxt->encoding == NULL) &&
7100
43.7k
        (ctxt->input->end - ctxt->input->cur >= 4)) {
7101
43.6k
        xmlChar start[4];
7102
43.6k
  xmlCharEncoding enc;
7103
7104
43.6k
  start[0] = RAW;
7105
43.6k
  start[1] = NXT(1);
7106
43.6k
  start[2] = NXT(2);
7107
43.6k
  start[3] = NXT(3);
7108
43.6k
  enc = xmlDetectCharEncoding(start, 4);
7109
43.6k
  if (enc != XML_CHAR_ENCODING_NONE)
7110
7.84k
      xmlSwitchEncoding(ctxt, enc);
7111
43.6k
    }
7112
7113
43.7k
    if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7114
7.31k
  xmlParseTextDecl(ctxt);
7115
7.31k
  if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7116
      /*
7117
       * The XML REC instructs us to stop parsing right here
7118
       */
7119
96
      xmlHaltParser(ctxt);
7120
96
      return;
7121
96
  }
7122
7.31k
    }
7123
43.6k
    if (ctxt->myDoc == NULL) {
7124
0
        ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7125
0
  if (ctxt->myDoc == NULL) {
7126
0
      xmlErrMemory(ctxt, "New Doc failed");
7127
0
      return;
7128
0
  }
7129
0
  ctxt->myDoc->properties = XML_DOC_INTERNAL;
7130
0
    }
7131
43.6k
    if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7132
0
        xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7133
7134
43.6k
    ctxt->instate = XML_PARSER_DTD;
7135
43.6k
    ctxt->external = 1;
7136
43.6k
    SKIP_BLANKS;
7137
13.4M
    while ((ctxt->instate != XML_PARSER_EOF) && (RAW != 0)) {
7138
13.4M
  GROW;
7139
13.4M
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7140
15.5k
            xmlParseConditionalSections(ctxt);
7141
13.4M
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7142
13.4M
            xmlParseMarkupDecl(ctxt);
7143
13.4M
        } else {
7144
11.3k
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7145
11.3k
            xmlHaltParser(ctxt);
7146
11.3k
            return;
7147
11.3k
        }
7148
13.4M
        SKIP_BLANKS;
7149
13.4M
    }
7150
7151
32.2k
    if (RAW != 0) {
7152
0
  xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7153
0
    }
7154
7155
32.2k
}
7156
7157
/**
7158
 * xmlParseReference:
7159
 * @ctxt:  an XML parser context
7160
 *
7161
 * DEPRECATED: Internal function, don't use.
7162
 *
7163
 * parse and handle entity references in content, depending on the SAX
7164
 * interface, this may end-up in a call to character() if this is a
7165
 * CharRef, a predefined entity, if there is no reference() callback.
7166
 * or if the parser was asked to switch to that mode.
7167
 *
7168
 * Always consumes '&'.
7169
 *
7170
 * [67] Reference ::= EntityRef | CharRef
7171
 */
7172
void
7173
5.93M
xmlParseReference(xmlParserCtxtPtr ctxt) {
7174
5.93M
    xmlEntityPtr ent;
7175
5.93M
    xmlChar *val;
7176
5.93M
    int was_checked;
7177
5.93M
    xmlNodePtr list = NULL;
7178
5.93M
    xmlParserErrors ret = XML_ERR_OK;
7179
7180
7181
5.93M
    if (RAW != '&')
7182
0
        return;
7183
7184
    /*
7185
     * Simple case of a CharRef
7186
     */
7187
5.93M
    if (NXT(1) == '#') {
7188
230k
  int i = 0;
7189
230k
  xmlChar out[16];
7190
230k
  int hex = NXT(2);
7191
230k
  int value = xmlParseCharRef(ctxt);
7192
7193
230k
  if (value == 0)
7194
36.6k
      return;
7195
193k
  if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7196
      /*
7197
       * So we are using non-UTF-8 buffers
7198
       * Check that the char fit on 8bits, if not
7199
       * generate a CharRef.
7200
       */
7201
118k
      if (value <= 0xFF) {
7202
113k
    out[0] = value;
7203
113k
    out[1] = 0;
7204
113k
    if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7205
113k
        (!ctxt->disableSAX))
7206
79.7k
        ctxt->sax->characters(ctxt->userData, out, 1);
7207
113k
      } else {
7208
4.99k
    if ((hex == 'x') || (hex == 'X'))
7209
1.40k
        snprintf((char *)out, sizeof(out), "#x%X", value);
7210
3.59k
    else
7211
3.59k
        snprintf((char *)out, sizeof(out), "#%d", value);
7212
4.99k
    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7213
4.99k
        (!ctxt->disableSAX))
7214
2.55k
        ctxt->sax->reference(ctxt->userData, out);
7215
4.99k
      }
7216
118k
  } else {
7217
      /*
7218
       * Just encode the value in UTF-8
7219
       */
7220
75.2k
      COPY_BUF(0 ,out, i, value);
7221
75.2k
      out[i] = 0;
7222
75.2k
      if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7223
75.2k
    (!ctxt->disableSAX))
7224
55.5k
    ctxt->sax->characters(ctxt->userData, out, i);
7225
75.2k
  }
7226
193k
  return;
7227
230k
    }
7228
7229
    /*
7230
     * We are seeing an entity reference
7231
     */
7232
5.70M
    ent = xmlParseEntityRef(ctxt);
7233
5.70M
    if (ent == NULL) return;
7234
4.59M
    if (!ctxt->wellFormed)
7235
1.56M
  return;
7236
3.02M
    was_checked = ent->flags & XML_ENT_PARSED;
7237
7238
    /* special case of predefined entities */
7239
3.02M
    if ((ent->name == NULL) ||
7240
3.02M
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7241
177k
  val = ent->content;
7242
177k
  if (val == NULL) return;
7243
  /*
7244
   * inline the entity.
7245
   */
7246
177k
  if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7247
177k
      (!ctxt->disableSAX))
7248
177k
      ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7249
177k
  return;
7250
177k
    }
7251
7252
    /*
7253
     * The first reference to the entity trigger a parsing phase
7254
     * where the ent->children is filled with the result from
7255
     * the parsing.
7256
     * Note: external parsed entities will not be loaded, it is not
7257
     * required for a non-validating parser, unless the parsing option
7258
     * of validating, or substituting entities were given. Doing so is
7259
     * far more secure as the parser will only process data coming from
7260
     * the document entity by default.
7261
     */
7262
2.84M
    if (((ent->flags & XML_ENT_PARSED) == 0) &&
7263
2.84M
        ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7264
169k
         (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7265
160k
  unsigned long oldsizeentcopy = ctxt->sizeentcopy;
7266
7267
  /*
7268
   * This is a bit hackish but this seems the best
7269
   * way to make sure both SAX and DOM entity support
7270
   * behaves okay.
7271
   */
7272
160k
  void *user_data;
7273
160k
  if (ctxt->userData == ctxt)
7274
160k
      user_data = NULL;
7275
0
  else
7276
0
      user_data = ctxt->userData;
7277
7278
        /* Avoid overflow as much as possible */
7279
160k
        ctxt->sizeentcopy = 0;
7280
7281
160k
        if (ent->flags & XML_ENT_EXPANDING) {
7282
560
            xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7283
560
            xmlHaltParser(ctxt);
7284
560
            return;
7285
560
        }
7286
7287
159k
        ent->flags |= XML_ENT_EXPANDING;
7288
7289
  /*
7290
   * Check that this entity is well formed
7291
   * 4.3.2: An internal general parsed entity is well-formed
7292
   * if its replacement text matches the production labeled
7293
   * content.
7294
   */
7295
159k
  if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7296
77.0k
      ctxt->depth++;
7297
77.0k
      ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7298
77.0k
                                                user_data, &list);
7299
77.0k
      ctxt->depth--;
7300
7301
82.9k
  } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7302
82.9k
      ctxt->depth++;
7303
82.9k
      ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7304
82.9k
                                     user_data, ctxt->depth, ent->URI,
7305
82.9k
             ent->ExternalID, &list);
7306
82.9k
      ctxt->depth--;
7307
82.9k
  } else {
7308
0
      ret = XML_ERR_ENTITY_PE_INTERNAL;
7309
0
      xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7310
0
       "invalid entity type found\n", NULL);
7311
0
  }
7312
7313
159k
        ent->flags &= ~XML_ENT_EXPANDING;
7314
159k
        ent->flags |= XML_ENT_PARSED | XML_ENT_CHECKED;
7315
159k
        ent->expandedSize = ctxt->sizeentcopy;
7316
159k
  if (ret == XML_ERR_ENTITY_LOOP) {
7317
7.52k
            xmlHaltParser(ctxt);
7318
7.52k
      xmlFreeNodeList(list);
7319
7.52k
      return;
7320
7.52k
  }
7321
152k
  if (xmlParserEntityCheck(ctxt, oldsizeentcopy)) {
7322
0
      xmlFreeNodeList(list);
7323
0
      return;
7324
0
  }
7325
7326
152k
  if ((ret == XML_ERR_OK) && (list != NULL)) {
7327
67.8k
            ent->children = list;
7328
            /*
7329
             * Prune it directly in the generated document
7330
             * except for single text nodes.
7331
             */
7332
67.8k
            if ((ctxt->replaceEntities == 0) ||
7333
67.8k
                (ctxt->parseMode == XML_PARSE_READER) ||
7334
67.8k
                ((list->type == XML_TEXT_NODE) &&
7335
54.5k
                 (list->next == NULL))) {
7336
54.5k
                ent->owner = 1;
7337
479k
                while (list != NULL) {
7338
425k
                    list->parent = (xmlNodePtr) ent;
7339
425k
                    if (list->doc != ent->doc)
7340
0
                        xmlSetTreeDoc(list, ent->doc);
7341
425k
                    if (list->next == NULL)
7342
54.5k
                        ent->last = list;
7343
425k
                    list = list->next;
7344
425k
                }
7345
54.5k
                list = NULL;
7346
54.5k
            } else {
7347
13.3k
                ent->owner = 0;
7348
629k
                while (list != NULL) {
7349
616k
                    list->parent = (xmlNodePtr) ctxt->node;
7350
616k
                    list->doc = ctxt->myDoc;
7351
616k
                    if (list->next == NULL)
7352
13.3k
                        ent->last = list;
7353
616k
                    list = list->next;
7354
616k
                }
7355
13.3k
                list = ent->children;
7356
#ifdef LIBXML_LEGACY_ENABLED
7357
                if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7358
                    xmlAddEntityReference(ent, list, NULL);
7359
#endif /* LIBXML_LEGACY_ENABLED */
7360
13.3k
            }
7361
84.6k
  } else if ((ret != XML_ERR_OK) &&
7362
84.6k
       (ret != XML_WAR_UNDECLARED_ENTITY)) {
7363
42.9k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7364
42.9k
         "Entity '%s' failed to parse\n", ent->name);
7365
42.9k
            if (ent->content != NULL)
7366
7.87k
                ent->content[0] = 0;
7367
42.9k
  } else if (list != NULL) {
7368
0
      xmlFreeNodeList(list);
7369
0
      list = NULL;
7370
0
  }
7371
7372
        /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7373
152k
        was_checked = 0;
7374
152k
    }
7375
7376
    /*
7377
     * Now that the entity content has been gathered
7378
     * provide it to the application, this can take different forms based
7379
     * on the parsing modes.
7380
     */
7381
2.83M
    if (ent->children == NULL) {
7382
  /*
7383
   * Probably running in SAX mode and the callbacks don't
7384
   * build the entity content. So unless we already went
7385
   * though parsing for first checking go though the entity
7386
   * content to generate callbacks associated to the entity
7387
   */
7388
956k
  if (was_checked != 0) {
7389
862k
      void *user_data;
7390
      /*
7391
       * This is a bit hackish but this seems the best
7392
       * way to make sure both SAX and DOM entity support
7393
       * behaves okay.
7394
       */
7395
862k
      if (ctxt->userData == ctxt)
7396
862k
    user_data = NULL;
7397
0
      else
7398
0
    user_data = ctxt->userData;
7399
7400
862k
      if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7401
57.4k
    ctxt->depth++;
7402
57.4k
    ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7403
57.4k
           ent->content, user_data, NULL);
7404
57.4k
    ctxt->depth--;
7405
805k
      } else if (ent->etype ==
7406
805k
           XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7407
805k
          unsigned long oldsizeentities = ctxt->sizeentities;
7408
7409
805k
    ctxt->depth++;
7410
805k
    ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7411
805k
         ctxt->sax, user_data, ctxt->depth,
7412
805k
         ent->URI, ent->ExternalID, NULL);
7413
805k
    ctxt->depth--;
7414
7415
                /* Undo the change to sizeentities */
7416
805k
                ctxt->sizeentities = oldsizeentities;
7417
805k
      } else {
7418
0
    ret = XML_ERR_ENTITY_PE_INTERNAL;
7419
0
    xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7420
0
           "invalid entity type found\n", NULL);
7421
0
      }
7422
862k
      if (ret == XML_ERR_ENTITY_LOOP) {
7423
0
    xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7424
0
    return;
7425
0
      }
7426
862k
            if (xmlParserEntityCheck(ctxt, 0))
7427
0
                return;
7428
862k
  }
7429
956k
  if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7430
956k
      (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7431
      /*
7432
       * Entity reference callback comes second, it's somewhat
7433
       * superfluous but a compatibility to historical behaviour
7434
       */
7435
178k
      ctxt->sax->reference(ctxt->userData, ent->name);
7436
178k
  }
7437
956k
  return;
7438
956k
    }
7439
7440
    /*
7441
     * We also check for amplification if entities aren't substituted.
7442
     * They might be expanded later.
7443
     */
7444
1.88M
    if ((was_checked != 0) &&
7445
1.88M
        (xmlParserEntityCheck(ctxt, ent->expandedSize)))
7446
335
        return;
7447
7448
    /*
7449
     * If we didn't get any children for the entity being built
7450
     */
7451
1.88M
    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7452
1.88M
  (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7453
  /*
7454
   * Create a node.
7455
   */
7456
416k
  ctxt->sax->reference(ctxt->userData, ent->name);
7457
416k
  return;
7458
416k
    }
7459
7460
1.46M
    if (ctxt->replaceEntities)  {
7461
  /*
7462
   * There is a problem on the handling of _private for entities
7463
   * (bug 155816): Should we copy the content of the field from
7464
   * the entity (possibly overwriting some value set by the user
7465
   * when a copy is created), should we leave it alone, or should
7466
   * we try to take care of different situations?  The problem
7467
   * is exacerbated by the usage of this field by the xmlReader.
7468
   * To fix this bug, we look at _private on the created node
7469
   * and, if it's NULL, we copy in whatever was in the entity.
7470
   * If it's not NULL we leave it alone.  This is somewhat of a
7471
   * hack - maybe we should have further tests to determine
7472
   * what to do.
7473
   */
7474
1.46M
  if (ctxt->node != NULL) {
7475
      /*
7476
       * Seems we are generating the DOM content, do
7477
       * a simple tree copy for all references except the first
7478
       * In the first occurrence list contains the replacement.
7479
       */
7480
1.46M
      if (((list == NULL) && (ent->owner == 0)) ||
7481
1.46M
    (ctxt->parseMode == XML_PARSE_READER)) {
7482
454k
    xmlNodePtr nw = NULL, cur, firstChild = NULL;
7483
7484
    /*
7485
     * when operating on a reader, the entities definitions
7486
     * are always owning the entities subtree.
7487
    if (ctxt->parseMode == XML_PARSE_READER)
7488
        ent->owner = 1;
7489
     */
7490
7491
454k
    cur = ent->children;
7492
1.02M
    while (cur != NULL) {
7493
1.02M
        nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7494
1.02M
        if (nw != NULL) {
7495
1.02M
      if (nw->_private == NULL)
7496
1.02M
          nw->_private = cur->_private;
7497
1.02M
      if (firstChild == NULL){
7498
454k
          firstChild = nw;
7499
454k
      }
7500
1.02M
      nw = xmlAddChild(ctxt->node, nw);
7501
1.02M
        }
7502
1.02M
        if (cur == ent->last) {
7503
      /*
7504
       * needed to detect some strange empty
7505
       * node cases in the reader tests
7506
       */
7507
454k
      if ((ctxt->parseMode == XML_PARSE_READER) &&
7508
454k
          (nw != NULL) &&
7509
454k
          (nw->type == XML_ELEMENT_NODE) &&
7510
454k
          (nw->children == NULL))
7511
10.8k
          nw->extra = 1;
7512
7513
454k
      break;
7514
454k
        }
7515
574k
        cur = cur->next;
7516
574k
    }
7517
#ifdef LIBXML_LEGACY_ENABLED
7518
    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7519
      xmlAddEntityReference(ent, firstChild, nw);
7520
#endif /* LIBXML_LEGACY_ENABLED */
7521
1.01M
      } else if ((list == NULL) || (ctxt->inputNr > 0)) {
7522
1.01M
    xmlNodePtr nw = NULL, cur, next, last,
7523
1.01M
         firstChild = NULL;
7524
7525
    /*
7526
     * Copy the entity child list and make it the new
7527
     * entity child list. The goal is to make sure any
7528
     * ID or REF referenced will be the one from the
7529
     * document content and not the entity copy.
7530
     */
7531
1.01M
    cur = ent->children;
7532
1.01M
    ent->children = NULL;
7533
1.01M
    last = ent->last;
7534
1.01M
    ent->last = NULL;
7535
3.47M
    while (cur != NULL) {
7536
3.47M
        next = cur->next;
7537
3.47M
        cur->next = NULL;
7538
3.47M
        cur->parent = NULL;
7539
3.47M
        nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7540
3.47M
        if (nw != NULL) {
7541
3.47M
      if (nw->_private == NULL)
7542
3.47M
          nw->_private = cur->_private;
7543
3.47M
      if (firstChild == NULL){
7544
1.01M
          firstChild = cur;
7545
1.01M
      }
7546
3.47M
      xmlAddChild((xmlNodePtr) ent, nw);
7547
3.47M
        }
7548
3.47M
        xmlAddChild(ctxt->node, cur);
7549
3.47M
        if (cur == last)
7550
1.01M
      break;
7551
2.46M
        cur = next;
7552
2.46M
    }
7553
1.01M
    if (ent->owner == 0)
7554
13.3k
        ent->owner = 1;
7555
#ifdef LIBXML_LEGACY_ENABLED
7556
    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7557
      xmlAddEntityReference(ent, firstChild, nw);
7558
#endif /* LIBXML_LEGACY_ENABLED */
7559
1.01M
      } else {
7560
0
    const xmlChar *nbktext;
7561
7562
    /*
7563
     * the name change is to avoid coalescing of the
7564
     * node with a possible previous text one which
7565
     * would make ent->children a dangling pointer
7566
     */
7567
0
    nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7568
0
          -1);
7569
0
    if (ent->children->type == XML_TEXT_NODE)
7570
0
        ent->children->name = nbktext;
7571
0
    if ((ent->last != ent->children) &&
7572
0
        (ent->last->type == XML_TEXT_NODE))
7573
0
        ent->last->name = nbktext;
7574
0
    xmlAddChildList(ctxt->node, ent->children);
7575
0
      }
7576
7577
      /*
7578
       * This is to avoid a nasty side effect, see
7579
       * characters() in SAX.c
7580
       */
7581
1.46M
      ctxt->nodemem = 0;
7582
1.46M
      ctxt->nodelen = 0;
7583
1.46M
      return;
7584
1.46M
  }
7585
1.46M
    }
7586
1.46M
}
7587
7588
/**
7589
 * xmlParseEntityRef:
7590
 * @ctxt:  an XML parser context
7591
 *
7592
 * DEPRECATED: Internal function, don't use.
7593
 *
7594
 * Parse an entitiy reference. Always consumes '&'.
7595
 *
7596
 * [68] EntityRef ::= '&' Name ';'
7597
 *
7598
 * [ WFC: Entity Declared ]
7599
 * In a document without any DTD, a document with only an internal DTD
7600
 * subset which contains no parameter entity references, or a document
7601
 * with "standalone='yes'", the Name given in the entity reference
7602
 * must match that in an entity declaration, except that well-formed
7603
 * documents need not declare any of the following entities: amp, lt,
7604
 * gt, apos, quot.  The declaration of a parameter entity must precede
7605
 * any reference to it.  Similarly, the declaration of a general entity
7606
 * must precede any reference to it which appears in a default value in an
7607
 * attribute-list declaration. Note that if entities are declared in the
7608
 * external subset or in external parameter entities, a non-validating
7609
 * processor is not obligated to read and process their declarations;
7610
 * for such documents, the rule that an entity must be declared is a
7611
 * well-formedness constraint only if standalone='yes'.
7612
 *
7613
 * [ WFC: Parsed Entity ]
7614
 * An entity reference must not contain the name of an unparsed entity
7615
 *
7616
 * Returns the xmlEntityPtr if found, or NULL otherwise.
7617
 */
7618
xmlEntityPtr
7619
9.08M
xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7620
9.08M
    const xmlChar *name;
7621
9.08M
    xmlEntityPtr ent = NULL;
7622
7623
9.08M
    GROW;
7624
9.08M
    if (ctxt->instate == XML_PARSER_EOF)
7625
0
        return(NULL);
7626
7627
9.08M
    if (RAW != '&')
7628
0
        return(NULL);
7629
9.08M
    NEXT;
7630
9.08M
    name = xmlParseName(ctxt);
7631
9.08M
    if (name == NULL) {
7632
108k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7633
108k
           "xmlParseEntityRef: no name\n");
7634
108k
        return(NULL);
7635
108k
    }
7636
8.97M
    if (RAW != ';') {
7637
95.4k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7638
95.4k
  return(NULL);
7639
95.4k
    }
7640
8.87M
    NEXT;
7641
7642
    /*
7643
     * Predefined entities override any extra definition
7644
     */
7645
8.87M
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7646
6.47M
        ent = xmlGetPredefinedEntity(name);
7647
6.47M
        if (ent != NULL)
7648
306k
            return(ent);
7649
6.47M
    }
7650
7651
    /*
7652
     * Ask first SAX for entity resolution, otherwise try the
7653
     * entities which may have stored in the parser context.
7654
     */
7655
8.57M
    if (ctxt->sax != NULL) {
7656
8.57M
  if (ctxt->sax->getEntity != NULL)
7657
8.57M
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7658
8.57M
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7659
8.57M
      (ctxt->options & XML_PARSE_OLDSAX))
7660
43.1k
      ent = xmlGetPredefinedEntity(name);
7661
8.57M
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7662
8.57M
      (ctxt->userData==ctxt)) {
7663
144k
      ent = xmlSAX2GetEntity(ctxt, name);
7664
144k
  }
7665
8.57M
    }
7666
8.57M
    if (ctxt->instate == XML_PARSER_EOF)
7667
0
  return(NULL);
7668
    /*
7669
     * [ WFC: Entity Declared ]
7670
     * In a document without any DTD, a document with only an
7671
     * internal DTD subset which contains no parameter entity
7672
     * references, or a document with "standalone='yes'", the
7673
     * Name given in the entity reference must match that in an
7674
     * entity declaration, except that well-formed documents
7675
     * need not declare any of the following entities: amp, lt,
7676
     * gt, apos, quot.
7677
     * The declaration of a parameter entity must precede any
7678
     * reference to it.
7679
     * Similarly, the declaration of a general entity must
7680
     * precede any reference to it which appears in a default
7681
     * value in an attribute-list declaration. Note that if
7682
     * entities are declared in the external subset or in
7683
     * external parameter entities, a non-validating processor
7684
     * is not obligated to read and process their declarations;
7685
     * for such documents, the rule that an entity must be
7686
     * declared is a well-formedness constraint only if
7687
     * standalone='yes'.
7688
     */
7689
8.57M
    if (ent == NULL) {
7690
1.33M
  if ((ctxt->standalone == 1) ||
7691
1.33M
      ((ctxt->hasExternalSubset == 0) &&
7692
1.30M
       (ctxt->hasPErefs == 0))) {
7693
851k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7694
851k
         "Entity '%s' not defined\n", name);
7695
851k
  } else {
7696
487k
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7697
487k
         "Entity '%s' not defined\n", name);
7698
487k
      if ((ctxt->inSubset == 0) &&
7699
487k
    (ctxt->sax != NULL) &&
7700
487k
    (ctxt->sax->reference != NULL)) {
7701
463k
    ctxt->sax->reference(ctxt->userData, name);
7702
463k
      }
7703
487k
  }
7704
1.33M
  ctxt->valid = 0;
7705
1.33M
    }
7706
7707
    /*
7708
     * [ WFC: Parsed Entity ]
7709
     * An entity reference must not contain the name of an
7710
     * unparsed entity
7711
     */
7712
7.23M
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7713
4.18k
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7714
4.18k
     "Entity reference to unparsed entity %s\n", name);
7715
4.18k
    }
7716
7717
    /*
7718
     * [ WFC: No External Entity References ]
7719
     * Attribute values cannot contain direct or indirect
7720
     * entity references to external entities.
7721
     */
7722
7.22M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7723
7.22M
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7724
21.5k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7725
21.5k
       "Attribute references external entity '%s'\n", name);
7726
21.5k
    }
7727
    /*
7728
     * [ WFC: No < in Attribute Values ]
7729
     * The replacement text of any entity referred to directly or
7730
     * indirectly in an attribute value (other than "&lt;") must
7731
     * not contain a <.
7732
     */
7733
7.20M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7734
7.20M
       (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7735
2.84M
  if ((ent->flags & XML_ENT_CHECKED_LT) == 0) {
7736
30.8k
            if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7737
936
                ent->flags |= XML_ENT_CONTAINS_LT;
7738
30.8k
            ent->flags |= XML_ENT_CHECKED_LT;
7739
30.8k
        }
7740
2.84M
        if (ent->flags & XML_ENT_CONTAINS_LT)
7741
26.6k
            xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7742
26.6k
                    "'<' in entity '%s' is not allowed in attributes "
7743
26.6k
                    "values\n", name);
7744
2.84M
    }
7745
7746
    /*
7747
     * Internal check, no parameter entities here ...
7748
     */
7749
4.35M
    else {
7750
4.35M
  switch (ent->etype) {
7751
0
      case XML_INTERNAL_PARAMETER_ENTITY:
7752
0
      case XML_EXTERNAL_PARAMETER_ENTITY:
7753
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7754
0
       "Attempt to reference the parameter entity '%s'\n",
7755
0
            name);
7756
0
      break;
7757
4.35M
      default:
7758
4.35M
      break;
7759
4.35M
  }
7760
4.35M
    }
7761
7762
    /*
7763
     * [ WFC: No Recursion ]
7764
     * A parsed entity must not contain a recursive reference
7765
     * to itself, either directly or indirectly.
7766
     * Done somewhere else
7767
     */
7768
8.57M
    return(ent);
7769
8.57M
}
7770
7771
/**
7772
 * xmlParseStringEntityRef:
7773
 * @ctxt:  an XML parser context
7774
 * @str:  a pointer to an index in the string
7775
 *
7776
 * parse ENTITY references declarations, but this version parses it from
7777
 * a string value.
7778
 *
7779
 * [68] EntityRef ::= '&' Name ';'
7780
 *
7781
 * [ WFC: Entity Declared ]
7782
 * In a document without any DTD, a document with only an internal DTD
7783
 * subset which contains no parameter entity references, or a document
7784
 * with "standalone='yes'", the Name given in the entity reference
7785
 * must match that in an entity declaration, except that well-formed
7786
 * documents need not declare any of the following entities: amp, lt,
7787
 * gt, apos, quot.  The declaration of a parameter entity must precede
7788
 * any reference to it.  Similarly, the declaration of a general entity
7789
 * must precede any reference to it which appears in a default value in an
7790
 * attribute-list declaration. Note that if entities are declared in the
7791
 * external subset or in external parameter entities, a non-validating
7792
 * processor is not obligated to read and process their declarations;
7793
 * for such documents, the rule that an entity must be declared is a
7794
 * well-formedness constraint only if standalone='yes'.
7795
 *
7796
 * [ WFC: Parsed Entity ]
7797
 * An entity reference must not contain the name of an unparsed entity
7798
 *
7799
 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7800
 * is updated to the current location in the string.
7801
 */
7802
static xmlEntityPtr
7803
395M
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7804
395M
    xmlChar *name;
7805
395M
    const xmlChar *ptr;
7806
395M
    xmlChar cur;
7807
395M
    xmlEntityPtr ent = NULL;
7808
7809
395M
    if ((str == NULL) || (*str == NULL))
7810
0
        return(NULL);
7811
395M
    ptr = *str;
7812
395M
    cur = *ptr;
7813
395M
    if (cur != '&')
7814
336M
  return(NULL);
7815
7816
59.6M
    ptr++;
7817
59.6M
    name = xmlParseStringName(ctxt, &ptr);
7818
59.6M
    if (name == NULL) {
7819
10.1k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7820
10.1k
           "xmlParseStringEntityRef: no name\n");
7821
10.1k
  *str = ptr;
7822
10.1k
  return(NULL);
7823
10.1k
    }
7824
59.6M
    if (*ptr != ';') {
7825
18.3k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7826
18.3k
        xmlFree(name);
7827
18.3k
  *str = ptr;
7828
18.3k
  return(NULL);
7829
18.3k
    }
7830
59.6M
    ptr++;
7831
7832
7833
    /*
7834
     * Predefined entities override any extra definition
7835
     */
7836
59.6M
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7837
51.9M
        ent = xmlGetPredefinedEntity(name);
7838
51.9M
        if (ent != NULL) {
7839
107k
            xmlFree(name);
7840
107k
            *str = ptr;
7841
107k
            return(ent);
7842
107k
        }
7843
51.9M
    }
7844
7845
    /*
7846
     * Ask first SAX for entity resolution, otherwise try the
7847
     * entities which may have stored in the parser context.
7848
     */
7849
59.5M
    if (ctxt->sax != NULL) {
7850
59.5M
  if (ctxt->sax->getEntity != NULL)
7851
59.5M
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7852
59.5M
  if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7853
1.89M
      ent = xmlGetPredefinedEntity(name);
7854
59.5M
  if ((ent == NULL) && (ctxt->userData==ctxt)) {
7855
8.63M
      ent = xmlSAX2GetEntity(ctxt, name);
7856
8.63M
  }
7857
59.5M
    }
7858
59.5M
    if (ctxt->instate == XML_PARSER_EOF) {
7859
0
  xmlFree(name);
7860
0
  return(NULL);
7861
0
    }
7862
7863
    /*
7864
     * [ WFC: Entity Declared ]
7865
     * In a document without any DTD, a document with only an
7866
     * internal DTD subset which contains no parameter entity
7867
     * references, or a document with "standalone='yes'", the
7868
     * Name given in the entity reference must match that in an
7869
     * entity declaration, except that well-formed documents
7870
     * need not declare any of the following entities: amp, lt,
7871
     * gt, apos, quot.
7872
     * The declaration of a parameter entity must precede any
7873
     * reference to it.
7874
     * Similarly, the declaration of a general entity must
7875
     * precede any reference to it which appears in a default
7876
     * value in an attribute-list declaration. Note that if
7877
     * entities are declared in the external subset or in
7878
     * external parameter entities, a non-validating processor
7879
     * is not obligated to read and process their declarations;
7880
     * for such documents, the rule that an entity must be
7881
     * declared is a well-formedness constraint only if
7882
     * standalone='yes'.
7883
     */
7884
59.5M
    if (ent == NULL) {
7885
8.63M
  if ((ctxt->standalone == 1) ||
7886
8.63M
      ((ctxt->hasExternalSubset == 0) &&
7887
8.62M
       (ctxt->hasPErefs == 0))) {
7888
5.77M
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7889
5.77M
         "Entity '%s' not defined\n", name);
7890
5.77M
  } else {
7891
2.86M
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7892
2.86M
        "Entity '%s' not defined\n",
7893
2.86M
        name);
7894
2.86M
  }
7895
  /* TODO ? check regressions ctxt->valid = 0; */
7896
8.63M
    }
7897
7898
    /*
7899
     * [ WFC: Parsed Entity ]
7900
     * An entity reference must not contain the name of an
7901
     * unparsed entity
7902
     */
7903
50.8M
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7904
2.59k
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7905
2.59k
     "Entity reference to unparsed entity %s\n", name);
7906
2.59k
    }
7907
7908
    /*
7909
     * [ WFC: No External Entity References ]
7910
     * Attribute values cannot contain direct or indirect
7911
     * entity references to external entities.
7912
     */
7913
50.8M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7914
50.8M
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7915
20.2k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7916
20.2k
   "Attribute references external entity '%s'\n", name);
7917
20.2k
    }
7918
    /*
7919
     * [ WFC: No < in Attribute Values ]
7920
     * The replacement text of any entity referred to directly or
7921
     * indirectly in an attribute value (other than "&lt;") must
7922
     * not contain a <.
7923
     */
7924
50.8M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7925
50.8M
       (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7926
50.2M
  if ((ent->flags & XML_ENT_CHECKED_LT) == 0) {
7927
21.2k
            if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7928
1.59k
                ent->flags |= XML_ENT_CONTAINS_LT;
7929
21.2k
            ent->flags |= XML_ENT_CHECKED_LT;
7930
21.2k
        }
7931
50.2M
        if (ent->flags & XML_ENT_CONTAINS_LT)
7932
135k
            xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7933
135k
                    "'<' in entity '%s' is not allowed in attributes "
7934
135k
                    "values\n", name);
7935
50.2M
    }
7936
7937
    /*
7938
     * Internal check, no parameter entities here ...
7939
     */
7940
583k
    else {
7941
583k
  switch (ent->etype) {
7942
0
      case XML_INTERNAL_PARAMETER_ENTITY:
7943
0
      case XML_EXTERNAL_PARAMETER_ENTITY:
7944
0
    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7945
0
       "Attempt to reference the parameter entity '%s'\n",
7946
0
          name);
7947
0
      break;
7948
583k
      default:
7949
583k
      break;
7950
583k
  }
7951
583k
    }
7952
7953
    /*
7954
     * [ WFC: No Recursion ]
7955
     * A parsed entity must not contain a recursive reference
7956
     * to itself, either directly or indirectly.
7957
     * Done somewhere else
7958
     */
7959
7960
59.5M
    xmlFree(name);
7961
59.5M
    *str = ptr;
7962
59.5M
    return(ent);
7963
59.5M
}
7964
7965
/**
7966
 * xmlParsePEReference:
7967
 * @ctxt:  an XML parser context
7968
 *
7969
 * DEPRECATED: Internal function, don't use.
7970
 *
7971
 * Parse a parameter entity reference. Always consumes '%'.
7972
 *
7973
 * The entity content is handled directly by pushing it's content as
7974
 * a new input stream.
7975
 *
7976
 * [69] PEReference ::= '%' Name ';'
7977
 *
7978
 * [ WFC: No Recursion ]
7979
 * A parsed entity must not contain a recursive
7980
 * reference to itself, either directly or indirectly.
7981
 *
7982
 * [ WFC: Entity Declared ]
7983
 * In a document without any DTD, a document with only an internal DTD
7984
 * subset which contains no parameter entity references, or a document
7985
 * with "standalone='yes'", ...  ... The declaration of a parameter
7986
 * entity must precede any reference to it...
7987
 *
7988
 * [ VC: Entity Declared ]
7989
 * In a document with an external subset or external parameter entities
7990
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7991
 * must precede any reference to it...
7992
 *
7993
 * [ WFC: In DTD ]
7994
 * Parameter-entity references may only appear in the DTD.
7995
 * NOTE: misleading but this is handled.
7996
 */
7997
void
7998
xmlParsePEReference(xmlParserCtxtPtr ctxt)
7999
52.5M
{
8000
52.5M
    const xmlChar *name;
8001
52.5M
    xmlEntityPtr entity = NULL;
8002
52.5M
    xmlParserInputPtr input;
8003
8004
52.5M
    if (RAW != '%')
8005
0
        return;
8006
52.5M
    NEXT;
8007
52.5M
    name = xmlParseName(ctxt);
8008
52.5M
    if (name == NULL) {
8009
22.3k
  xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
8010
22.3k
  return;
8011
22.3k
    }
8012
52.4M
    if (xmlParserDebugEntities)
8013
0
  xmlGenericError(xmlGenericErrorContext,
8014
0
    "PEReference: %s\n", name);
8015
52.4M
    if (RAW != ';') {
8016
239k
  xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
8017
239k
        return;
8018
239k
    }
8019
8020
52.2M
    NEXT;
8021
8022
    /*
8023
     * Request the entity from SAX
8024
     */
8025
52.2M
    if ((ctxt->sax != NULL) &&
8026
52.2M
  (ctxt->sax->getParameterEntity != NULL))
8027
52.2M
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8028
52.2M
    if (ctxt->instate == XML_PARSER_EOF)
8029
0
  return;
8030
52.2M
    if (entity == NULL) {
8031
  /*
8032
   * [ WFC: Entity Declared ]
8033
   * In a document without any DTD, a document with only an
8034
   * internal DTD subset which contains no parameter entity
8035
   * references, or a document with "standalone='yes'", ...
8036
   * ... The declaration of a parameter entity must precede
8037
   * any reference to it...
8038
   */
8039
8.49M
  if ((ctxt->standalone == 1) ||
8040
8.49M
      ((ctxt->hasExternalSubset == 0) &&
8041
8.49M
       (ctxt->hasPErefs == 0))) {
8042
3.36k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8043
3.36k
            "PEReference: %%%s; not found\n",
8044
3.36k
            name);
8045
8.49M
  } else {
8046
      /*
8047
       * [ VC: Entity Declared ]
8048
       * In a document with an external subset or external
8049
       * parameter entities with "standalone='no'", ...
8050
       * ... The declaration of a parameter entity must
8051
       * precede any reference to it...
8052
       */
8053
8.49M
            if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
8054
858k
                xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
8055
858k
                                 "PEReference: %%%s; not found\n",
8056
858k
                                 name, NULL);
8057
858k
            } else
8058
7.63M
                xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8059
7.63M
                              "PEReference: %%%s; not found\n",
8060
7.63M
                              name, NULL);
8061
8.49M
            ctxt->valid = 0;
8062
8.49M
  }
8063
43.7M
    } else {
8064
  /*
8065
   * Internal checking in case the entity quest barfed
8066
   */
8067
43.7M
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8068
43.7M
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8069
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8070
0
      "Internal: %%%s; is not a parameter entity\n",
8071
0
        name, NULL);
8072
43.7M
  } else {
8073
43.7M
            xmlChar start[4];
8074
43.7M
            xmlCharEncoding enc;
8075
43.7M
            unsigned long parentConsumed;
8076
43.7M
            xmlEntityPtr oldEnt;
8077
8078
43.7M
      if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8079
43.7M
          ((ctxt->options & XML_PARSE_NOENT) == 0) &&
8080
43.7M
    ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
8081
43.7M
    ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
8082
43.7M
    ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
8083
43.7M
    (ctxt->replaceEntities == 0) &&
8084
43.7M
    (ctxt->validate == 0))
8085
1.89k
    return;
8086
8087
43.7M
            if (entity->flags & XML_ENT_EXPANDING) {
8088
580
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
8089
580
                xmlHaltParser(ctxt);
8090
580
                return;
8091
580
            }
8092
8093
            /* Must be computed from old input before pushing new input. */
8094
43.7M
            parentConsumed = ctxt->input->parentConsumed;
8095
43.7M
            oldEnt = ctxt->input->entity;
8096
43.7M
            if ((oldEnt == NULL) ||
8097
43.7M
                ((oldEnt->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8098
43.1M
                 ((oldEnt->flags & XML_ENT_PARSED) == 0))) {
8099
1.25M
                xmlSaturatedAdd(&parentConsumed, ctxt->input->consumed);
8100
1.25M
                xmlSaturatedAddSizeT(&parentConsumed,
8101
1.25M
                                     ctxt->input->cur - ctxt->input->base);
8102
1.25M
            }
8103
8104
43.7M
      input = xmlNewEntityInputStream(ctxt, entity);
8105
43.7M
      if (xmlPushInput(ctxt, input) < 0) {
8106
12.2k
                xmlFreeInputStream(input);
8107
12.2k
    return;
8108
12.2k
            }
8109
8110
43.7M
            entity->flags |= XML_ENT_EXPANDING;
8111
8112
43.7M
            input->parentConsumed = parentConsumed;
8113
8114
43.7M
      if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
8115
                /*
8116
                 * Get the 4 first bytes and decode the charset
8117
                 * if enc != XML_CHAR_ENCODING_NONE
8118
                 * plug some encoding conversion routines.
8119
                 * Note that, since we may have some non-UTF8
8120
                 * encoding (like UTF16, bug 135229), the 'length'
8121
                 * is not known, but we can calculate based upon
8122
                 * the amount of data in the buffer.
8123
                 */
8124
88.9k
                GROW
8125
88.9k
                if (ctxt->instate == XML_PARSER_EOF)
8126
0
                    return;
8127
88.9k
                if ((ctxt->input->end - ctxt->input->cur)>=4) {
8128
87.9k
                    start[0] = RAW;
8129
87.9k
                    start[1] = NXT(1);
8130
87.9k
                    start[2] = NXT(2);
8131
87.9k
                    start[3] = NXT(3);
8132
87.9k
                    enc = xmlDetectCharEncoding(start, 4);
8133
87.9k
                    if (enc != XML_CHAR_ENCODING_NONE) {
8134
43.0k
                        xmlSwitchEncoding(ctxt, enc);
8135
43.0k
                    }
8136
87.9k
                }
8137
8138
88.9k
                if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8139
88.9k
                    (IS_BLANK_CH(NXT(5)))) {
8140
36.3k
                    xmlParseTextDecl(ctxt);
8141
36.3k
                }
8142
88.9k
            }
8143
43.7M
  }
8144
43.7M
    }
8145
52.2M
    ctxt->hasPErefs = 1;
8146
52.2M
}
8147
8148
/**
8149
 * xmlLoadEntityContent:
8150
 * @ctxt:  an XML parser context
8151
 * @entity: an unloaded system entity
8152
 *
8153
 * Load the original content of the given system entity from the
8154
 * ExternalID/SystemID given. This is to be used for Included in Literal
8155
 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8156
 *
8157
 * Returns 0 in case of success and -1 in case of failure
8158
 */
8159
static int
8160
4.65k
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8161
4.65k
    xmlParserInputPtr input;
8162
4.65k
    xmlBufferPtr buf;
8163
4.65k
    int l, c;
8164
4.65k
    int count = 0;
8165
8166
4.65k
    if ((ctxt == NULL) || (entity == NULL) ||
8167
4.65k
        ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8168
4.65k
   (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8169
4.65k
  (entity->content != NULL)) {
8170
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8171
0
              "xmlLoadEntityContent parameter error");
8172
0
        return(-1);
8173
0
    }
8174
8175
4.65k
    if (xmlParserDebugEntities)
8176
0
  xmlGenericError(xmlGenericErrorContext,
8177
0
    "Reading %s entity content input\n", entity->name);
8178
8179
4.65k
    buf = xmlBufferCreate();
8180
4.65k
    if (buf == NULL) {
8181
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8182
0
              "xmlLoadEntityContent parameter error");
8183
0
        return(-1);
8184
0
    }
8185
4.65k
    xmlBufferSetAllocationScheme(buf, XML_BUFFER_ALLOC_DOUBLEIT);
8186
8187
4.65k
    input = xmlNewEntityInputStream(ctxt, entity);
8188
4.65k
    if (input == NULL) {
8189
1.45k
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8190
1.45k
              "xmlLoadEntityContent input error");
8191
1.45k
  xmlBufferFree(buf);
8192
1.45k
        return(-1);
8193
1.45k
    }
8194
8195
    /*
8196
     * Push the entity as the current input, read char by char
8197
     * saving to the buffer until the end of the entity or an error
8198
     */
8199
3.20k
    if (xmlPushInput(ctxt, input) < 0) {
8200
0
        xmlBufferFree(buf);
8201
0
  xmlFreeInputStream(input);
8202
0
  return(-1);
8203
0
    }
8204
8205
3.20k
    GROW;
8206
3.20k
    c = CUR_CHAR(l);
8207
16.1M
    while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8208
16.1M
           (IS_CHAR(c))) {
8209
16.1M
        xmlBufferAdd(buf, ctxt->input->cur, l);
8210
16.1M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
8211
157k
      count = 0;
8212
157k
      GROW;
8213
157k
            if (ctxt->instate == XML_PARSER_EOF) {
8214
0
                xmlBufferFree(buf);
8215
0
                return(-1);
8216
0
            }
8217
157k
  }
8218
16.1M
  NEXTL(l);
8219
16.1M
  c = CUR_CHAR(l);
8220
16.1M
  if (c == 0) {
8221
2.79k
      count = 0;
8222
2.79k
      GROW;
8223
2.79k
            if (ctxt->instate == XML_PARSER_EOF) {
8224
0
                xmlBufferFree(buf);
8225
0
                return(-1);
8226
0
            }
8227
2.79k
      c = CUR_CHAR(l);
8228
2.79k
  }
8229
16.1M
    }
8230
8231
3.20k
    if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8232
2.04k
        xmlSaturatedAdd(&ctxt->sizeentities, ctxt->input->consumed);
8233
2.04k
        xmlPopInput(ctxt);
8234
2.04k
    } else if (!IS_CHAR(c)) {
8235
1.15k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8236
1.15k
                          "xmlLoadEntityContent: invalid char value %d\n",
8237
1.15k
                    c);
8238
1.15k
  xmlBufferFree(buf);
8239
1.15k
  return(-1);
8240
1.15k
    }
8241
2.04k
    entity->content = buf->content;
8242
2.04k
    entity->length = buf->use;
8243
2.04k
    buf->content = NULL;
8244
2.04k
    xmlBufferFree(buf);
8245
8246
2.04k
    return(0);
8247
3.20k
}
8248
8249
/**
8250
 * xmlParseStringPEReference:
8251
 * @ctxt:  an XML parser context
8252
 * @str:  a pointer to an index in the string
8253
 *
8254
 * parse PEReference declarations
8255
 *
8256
 * [69] PEReference ::= '%' Name ';'
8257
 *
8258
 * [ WFC: No Recursion ]
8259
 * A parsed entity must not contain a recursive
8260
 * reference to itself, either directly or indirectly.
8261
 *
8262
 * [ WFC: Entity Declared ]
8263
 * In a document without any DTD, a document with only an internal DTD
8264
 * subset which contains no parameter entity references, or a document
8265
 * with "standalone='yes'", ...  ... The declaration of a parameter
8266
 * entity must precede any reference to it...
8267
 *
8268
 * [ VC: Entity Declared ]
8269
 * In a document with an external subset or external parameter entities
8270
 * with "standalone='no'", ...  ... The declaration of a parameter entity
8271
 * must precede any reference to it...
8272
 *
8273
 * [ WFC: In DTD ]
8274
 * Parameter-entity references may only appear in the DTD.
8275
 * NOTE: misleading but this is handled.
8276
 *
8277
 * Returns the string of the entity content.
8278
 *         str is updated to the current value of the index
8279
 */
8280
static xmlEntityPtr
8281
4.78M
xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8282
4.78M
    const xmlChar *ptr;
8283
4.78M
    xmlChar cur;
8284
4.78M
    xmlChar *name;
8285
4.78M
    xmlEntityPtr entity = NULL;
8286
8287
4.78M
    if ((str == NULL) || (*str == NULL)) return(NULL);
8288
4.78M
    ptr = *str;
8289
4.78M
    cur = *ptr;
8290
4.78M
    if (cur != '%')
8291
0
        return(NULL);
8292
4.78M
    ptr++;
8293
4.78M
    name = xmlParseStringName(ctxt, &ptr);
8294
4.78M
    if (name == NULL) {
8295
10.2k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8296
10.2k
           "xmlParseStringPEReference: no name\n");
8297
10.2k
  *str = ptr;
8298
10.2k
  return(NULL);
8299
10.2k
    }
8300
4.77M
    cur = *ptr;
8301
4.77M
    if (cur != ';') {
8302
292k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8303
292k
  xmlFree(name);
8304
292k
  *str = ptr;
8305
292k
  return(NULL);
8306
292k
    }
8307
4.48M
    ptr++;
8308
8309
    /*
8310
     * Request the entity from SAX
8311
     */
8312
4.48M
    if ((ctxt->sax != NULL) &&
8313
4.48M
  (ctxt->sax->getParameterEntity != NULL))
8314
4.48M
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8315
4.48M
    if (ctxt->instate == XML_PARSER_EOF) {
8316
0
  xmlFree(name);
8317
0
  *str = ptr;
8318
0
  return(NULL);
8319
0
    }
8320
4.48M
    if (entity == NULL) {
8321
  /*
8322
   * [ WFC: Entity Declared ]
8323
   * In a document without any DTD, a document with only an
8324
   * internal DTD subset which contains no parameter entity
8325
   * references, or a document with "standalone='yes'", ...
8326
   * ... The declaration of a parameter entity must precede
8327
   * any reference to it...
8328
   */
8329
1.93M
  if ((ctxt->standalone == 1) ||
8330
1.93M
      ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8331
1.04k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8332
1.04k
     "PEReference: %%%s; not found\n", name);
8333
1.93M
  } else {
8334
      /*
8335
       * [ VC: Entity Declared ]
8336
       * In a document with an external subset or external
8337
       * parameter entities with "standalone='no'", ...
8338
       * ... The declaration of a parameter entity must
8339
       * precede any reference to it...
8340
       */
8341
1.93M
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8342
1.93M
        "PEReference: %%%s; not found\n",
8343
1.93M
        name, NULL);
8344
1.93M
      ctxt->valid = 0;
8345
1.93M
  }
8346
2.54M
    } else {
8347
  /*
8348
   * Internal checking in case the entity quest barfed
8349
   */
8350
2.54M
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8351
2.54M
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8352
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8353
0
        "%%%s; is not a parameter entity\n",
8354
0
        name, NULL);
8355
0
  }
8356
2.54M
    }
8357
4.48M
    ctxt->hasPErefs = 1;
8358
4.48M
    xmlFree(name);
8359
4.48M
    *str = ptr;
8360
4.48M
    return(entity);
8361
4.48M
}
8362
8363
/**
8364
 * xmlParseDocTypeDecl:
8365
 * @ctxt:  an XML parser context
8366
 *
8367
 * DEPRECATED: Internal function, don't use.
8368
 *
8369
 * parse a DOCTYPE declaration
8370
 *
8371
 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8372
 *                      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8373
 *
8374
 * [ VC: Root Element Type ]
8375
 * The Name in the document type declaration must match the element
8376
 * type of the root element.
8377
 */
8378
8379
void
8380
358k
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8381
358k
    const xmlChar *name = NULL;
8382
358k
    xmlChar *ExternalID = NULL;
8383
358k
    xmlChar *URI = NULL;
8384
8385
    /*
8386
     * We know that '<!DOCTYPE' has been detected.
8387
     */
8388
358k
    SKIP(9);
8389
8390
358k
    SKIP_BLANKS;
8391
8392
    /*
8393
     * Parse the DOCTYPE name.
8394
     */
8395
358k
    name = xmlParseName(ctxt);
8396
358k
    if (name == NULL) {
8397
777
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8398
777
           "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8399
777
    }
8400
358k
    ctxt->intSubName = name;
8401
8402
358k
    SKIP_BLANKS;
8403
8404
    /*
8405
     * Check for SystemID and ExternalID
8406
     */
8407
358k
    URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8408
8409
358k
    if ((URI != NULL) || (ExternalID != NULL)) {
8410
145k
        ctxt->hasExternalSubset = 1;
8411
145k
    }
8412
358k
    ctxt->extSubURI = URI;
8413
358k
    ctxt->extSubSystem = ExternalID;
8414
8415
358k
    SKIP_BLANKS;
8416
8417
    /*
8418
     * Create and update the internal subset.
8419
     */
8420
358k
    if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8421
358k
  (!ctxt->disableSAX))
8422
353k
  ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8423
358k
    if (ctxt->instate == XML_PARSER_EOF)
8424
0
  return;
8425
8426
    /*
8427
     * Is there any internal subset declarations ?
8428
     * they are handled separately in xmlParseInternalSubset()
8429
     */
8430
358k
    if (RAW == '[')
8431
285k
  return;
8432
8433
    /*
8434
     * We should be at the end of the DOCTYPE declaration.
8435
     */
8436
73.4k
    if (RAW != '>') {
8437
7.45k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8438
7.45k
    }
8439
73.4k
    NEXT;
8440
73.4k
}
8441
8442
/**
8443
 * xmlParseInternalSubset:
8444
 * @ctxt:  an XML parser context
8445
 *
8446
 * parse the internal subset declaration
8447
 *
8448
 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8449
 */
8450
8451
static void
8452
285k
xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8453
    /*
8454
     * Is there any DTD definition ?
8455
     */
8456
285k
    if (RAW == '[') {
8457
285k
        int baseInputNr = ctxt->inputNr;
8458
285k
        ctxt->instate = XML_PARSER_DTD;
8459
285k
        NEXT;
8460
  /*
8461
   * Parse the succession of Markup declarations and
8462
   * PEReferences.
8463
   * Subsequence (markupdecl | PEReference | S)*
8464
   */
8465
285k
  SKIP_BLANKS;
8466
44.8M
  while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) &&
8467
44.8M
               (ctxt->instate != XML_PARSER_EOF)) {
8468
8469
            /*
8470
             * Conditional sections are allowed from external entities included
8471
             * by PE References in the internal subset.
8472
             */
8473
44.6M
            if ((ctxt->inputNr > 1) && (ctxt->input->filename != NULL) &&
8474
44.6M
                (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8475
0
                xmlParseConditionalSections(ctxt);
8476
44.6M
            } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
8477
44.5M
          xmlParseMarkupDecl(ctxt);
8478
44.5M
            } else if (RAW == '%') {
8479
70.8k
          xmlParsePEReference(ctxt);
8480
70.8k
            } else {
8481
57.3k
    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8482
57.3k
                        "xmlParseInternalSubset: error detected in"
8483
57.3k
                        " Markup declaration\n");
8484
57.3k
                xmlHaltParser(ctxt);
8485
57.3k
                return;
8486
57.3k
            }
8487
44.5M
      SKIP_BLANKS;
8488
44.5M
  }
8489
228k
  if (RAW == ']') {
8490
210k
      NEXT;
8491
210k
      SKIP_BLANKS;
8492
210k
  }
8493
228k
    }
8494
8495
    /*
8496
     * We should be at the end of the DOCTYPE declaration.
8497
     */
8498
228k
    if (RAW != '>') {
8499
19.4k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8500
19.4k
  return;
8501
19.4k
    }
8502
208k
    NEXT;
8503
208k
}
8504
8505
#ifdef LIBXML_SAX1_ENABLED
8506
/**
8507
 * xmlParseAttribute:
8508
 * @ctxt:  an XML parser context
8509
 * @value:  a xmlChar ** used to store the value of the attribute
8510
 *
8511
 * DEPRECATED: Internal function, don't use.
8512
 *
8513
 * parse an attribute
8514
 *
8515
 * [41] Attribute ::= Name Eq AttValue
8516
 *
8517
 * [ WFC: No External Entity References ]
8518
 * Attribute values cannot contain direct or indirect entity references
8519
 * to external entities.
8520
 *
8521
 * [ WFC: No < in Attribute Values ]
8522
 * The replacement text of any entity referred to directly or indirectly in
8523
 * an attribute value (other than "&lt;") must not contain a <.
8524
 *
8525
 * [ VC: Attribute Value Type ]
8526
 * The attribute must have been declared; the value must be of the type
8527
 * declared for it.
8528
 *
8529
 * [25] Eq ::= S? '=' S?
8530
 *
8531
 * With namespace:
8532
 *
8533
 * [NS 11] Attribute ::= QName Eq AttValue
8534
 *
8535
 * Also the case QName == xmlns:??? is handled independently as a namespace
8536
 * definition.
8537
 *
8538
 * Returns the attribute name, and the value in *value.
8539
 */
8540
8541
const xmlChar *
8542
4.69M
xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8543
4.69M
    const xmlChar *name;
8544
4.69M
    xmlChar *val;
8545
8546
4.69M
    *value = NULL;
8547
4.69M
    GROW;
8548
4.69M
    name = xmlParseName(ctxt);
8549
4.69M
    if (name == NULL) {
8550
227k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8551
227k
                 "error parsing attribute name\n");
8552
227k
        return(NULL);
8553
227k
    }
8554
8555
    /*
8556
     * read the value
8557
     */
8558
4.46M
    SKIP_BLANKS;
8559
4.46M
    if (RAW == '=') {
8560
4.31M
        NEXT;
8561
4.31M
  SKIP_BLANKS;
8562
4.31M
  val = xmlParseAttValue(ctxt);
8563
4.31M
  ctxt->instate = XML_PARSER_CONTENT;
8564
4.31M
    } else {
8565
151k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8566
151k
         "Specification mandates value for attribute %s\n", name);
8567
151k
  return(name);
8568
151k
    }
8569
8570
    /*
8571
     * Check that xml:lang conforms to the specification
8572
     * No more registered as an error, just generate a warning now
8573
     * since this was deprecated in XML second edition
8574
     */
8575
4.31M
    if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8576
14.1k
  if (!xmlCheckLanguageID(val)) {
8577
7.76k
      xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8578
7.76k
              "Malformed value for xml:lang : %s\n",
8579
7.76k
        val, NULL);
8580
7.76k
  }
8581
14.1k
    }
8582
8583
    /*
8584
     * Check that xml:space conforms to the specification
8585
     */
8586
4.31M
    if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8587
2.14k
  if (xmlStrEqual(val, BAD_CAST "default"))
8588
0
      *(ctxt->space) = 0;
8589
2.14k
  else if (xmlStrEqual(val, BAD_CAST "preserve"))
8590
593
      *(ctxt->space) = 1;
8591
1.55k
  else {
8592
1.55k
    xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8593
1.55k
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8594
1.55k
                                 val, NULL);
8595
1.55k
  }
8596
2.14k
    }
8597
8598
4.31M
    *value = val;
8599
4.31M
    return(name);
8600
4.46M
}
8601
8602
/**
8603
 * xmlParseStartTag:
8604
 * @ctxt:  an XML parser context
8605
 *
8606
 * DEPRECATED: Internal function, don't use.
8607
 *
8608
 * Parse a start tag. Always consumes '<'.
8609
 *
8610
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8611
 *
8612
 * [ WFC: Unique Att Spec ]
8613
 * No attribute name may appear more than once in the same start-tag or
8614
 * empty-element tag.
8615
 *
8616
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8617
 *
8618
 * [ WFC: Unique Att Spec ]
8619
 * No attribute name may appear more than once in the same start-tag or
8620
 * empty-element tag.
8621
 *
8622
 * With namespace:
8623
 *
8624
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8625
 *
8626
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8627
 *
8628
 * Returns the element name parsed
8629
 */
8630
8631
const xmlChar *
8632
4.82M
xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8633
4.82M
    const xmlChar *name;
8634
4.82M
    const xmlChar *attname;
8635
4.82M
    xmlChar *attvalue;
8636
4.82M
    const xmlChar **atts = ctxt->atts;
8637
4.82M
    int nbatts = 0;
8638
4.82M
    int maxatts = ctxt->maxatts;
8639
4.82M
    int i;
8640
8641
4.82M
    if (RAW != '<') return(NULL);
8642
4.82M
    NEXT1;
8643
8644
4.82M
    name = xmlParseName(ctxt);
8645
4.82M
    if (name == NULL) {
8646
176k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8647
176k
       "xmlParseStartTag: invalid element name\n");
8648
176k
        return(NULL);
8649
176k
    }
8650
8651
    /*
8652
     * Now parse the attributes, it ends up with the ending
8653
     *
8654
     * (S Attribute)* S?
8655
     */
8656
4.64M
    SKIP_BLANKS;
8657
4.64M
    GROW;
8658
8659
6.51M
    while (((RAW != '>') &&
8660
6.51M
     ((RAW != '/') || (NXT(1) != '>')) &&
8661
6.51M
     (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8662
4.69M
  attname = xmlParseAttribute(ctxt, &attvalue);
8663
4.69M
        if (attname == NULL) {
8664
227k
      xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8665
227k
         "xmlParseStartTag: problem parsing attributes\n");
8666
227k
      break;
8667
227k
  }
8668
4.46M
        if (attvalue != NULL) {
8669
      /*
8670
       * [ WFC: Unique Att Spec ]
8671
       * No attribute name may appear more than once in the same
8672
       * start-tag or empty-element tag.
8673
       */
8674
5.97M
      for (i = 0; i < nbatts;i += 2) {
8675
1.68M
          if (xmlStrEqual(atts[i], attname)) {
8676
9.40k
        xmlErrAttributeDup(ctxt, NULL, attname);
8677
9.40k
        xmlFree(attvalue);
8678
9.40k
        goto failed;
8679
9.40k
    }
8680
1.68M
      }
8681
      /*
8682
       * Add the pair to atts
8683
       */
8684
4.29M
      if (atts == NULL) {
8685
95.6k
          maxatts = 22; /* allow for 10 attrs by default */
8686
95.6k
          atts = (const xmlChar **)
8687
95.6k
           xmlMalloc(maxatts * sizeof(xmlChar *));
8688
95.6k
    if (atts == NULL) {
8689
0
        xmlErrMemory(ctxt, NULL);
8690
0
        if (attvalue != NULL)
8691
0
      xmlFree(attvalue);
8692
0
        goto failed;
8693
0
    }
8694
95.6k
    ctxt->atts = atts;
8695
95.6k
    ctxt->maxatts = maxatts;
8696
4.19M
      } else if (nbatts + 4 > maxatts) {
8697
155
          const xmlChar **n;
8698
8699
155
          maxatts *= 2;
8700
155
          n = (const xmlChar **) xmlRealloc((void *) atts,
8701
155
               maxatts * sizeof(const xmlChar *));
8702
155
    if (n == NULL) {
8703
0
        xmlErrMemory(ctxt, NULL);
8704
0
        if (attvalue != NULL)
8705
0
      xmlFree(attvalue);
8706
0
        goto failed;
8707
0
    }
8708
155
    atts = n;
8709
155
    ctxt->atts = atts;
8710
155
    ctxt->maxatts = maxatts;
8711
155
      }
8712
4.29M
      atts[nbatts++] = attname;
8713
4.29M
      atts[nbatts++] = attvalue;
8714
4.29M
      atts[nbatts] = NULL;
8715
4.29M
      atts[nbatts + 1] = NULL;
8716
4.29M
  } else {
8717
169k
      if (attvalue != NULL)
8718
0
    xmlFree(attvalue);
8719
169k
  }
8720
8721
4.46M
failed:
8722
8723
4.46M
  GROW
8724
4.46M
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8725
2.59M
      break;
8726
1.87M
  if (SKIP_BLANKS == 0) {
8727
256k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8728
256k
         "attributes construct error\n");
8729
256k
  }
8730
1.87M
  SHRINK;
8731
1.87M
        GROW;
8732
1.87M
    }
8733
8734
    /*
8735
     * SAX: Start of Element !
8736
     */
8737
4.64M
    if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8738
4.64M
  (!ctxt->disableSAX)) {
8739
4.11M
  if (nbatts > 0)
8740
2.34M
      ctxt->sax->startElement(ctxt->userData, name, atts);
8741
1.77M
  else
8742
1.77M
      ctxt->sax->startElement(ctxt->userData, name, NULL);
8743
4.11M
    }
8744
8745
4.64M
    if (atts != NULL) {
8746
        /* Free only the content strings */
8747
8.59M
        for (i = 1;i < nbatts;i+=2)
8748
4.29M
      if (atts[i] != NULL)
8749
4.29M
         xmlFree((xmlChar *) atts[i]);
8750
4.30M
    }
8751
4.64M
    return(name);
8752
4.64M
}
8753
8754
/**
8755
 * xmlParseEndTag1:
8756
 * @ctxt:  an XML parser context
8757
 * @line:  line of the start tag
8758
 * @nsNr:  number of namespaces on the start tag
8759
 *
8760
 * Parse an end tag. Always consumes '</'.
8761
 *
8762
 * [42] ETag ::= '</' Name S? '>'
8763
 *
8764
 * With namespace
8765
 *
8766
 * [NS 9] ETag ::= '</' QName S? '>'
8767
 */
8768
8769
static void
8770
2.49M
xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8771
2.49M
    const xmlChar *name;
8772
8773
2.49M
    GROW;
8774
2.49M
    if ((RAW != '<') || (NXT(1) != '/')) {
8775
0
  xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8776
0
           "xmlParseEndTag: '</' not found\n");
8777
0
  return;
8778
0
    }
8779
2.49M
    SKIP(2);
8780
8781
2.49M
    name = xmlParseNameAndCompare(ctxt,ctxt->name);
8782
8783
    /*
8784
     * We should definitely be at the ending "S? '>'" part
8785
     */
8786
2.49M
    GROW;
8787
2.49M
    SKIP_BLANKS;
8788
2.49M
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8789
39.6k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8790
39.6k
    } else
8791
2.45M
  NEXT1;
8792
8793
    /*
8794
     * [ WFC: Element Type Match ]
8795
     * The Name in an element's end-tag must match the element type in the
8796
     * start-tag.
8797
     *
8798
     */
8799
2.49M
    if (name != (xmlChar*)1) {
8800
102k
        if (name == NULL) name = BAD_CAST "unparsable";
8801
102k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8802
102k
         "Opening and ending tag mismatch: %s line %d and %s\n",
8803
102k
                    ctxt->name, line, name);
8804
102k
    }
8805
8806
    /*
8807
     * SAX: End of Tag
8808
     */
8809
2.49M
    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8810
2.49M
  (!ctxt->disableSAX))
8811
2.27M
        ctxt->sax->endElement(ctxt->userData, ctxt->name);
8812
8813
2.49M
    namePop(ctxt);
8814
2.49M
    spacePop(ctxt);
8815
2.49M
    return;
8816
2.49M
}
8817
8818
/**
8819
 * xmlParseEndTag:
8820
 * @ctxt:  an XML parser context
8821
 *
8822
 * DEPRECATED: Internal function, don't use.
8823
 *
8824
 * parse an end of tag
8825
 *
8826
 * [42] ETag ::= '</' Name S? '>'
8827
 *
8828
 * With namespace
8829
 *
8830
 * [NS 9] ETag ::= '</' QName S? '>'
8831
 */
8832
8833
void
8834
0
xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8835
0
    xmlParseEndTag1(ctxt, 0);
8836
0
}
8837
#endif /* LIBXML_SAX1_ENABLED */
8838
8839
/************************************************************************
8840
 *                  *
8841
 *          SAX 2 specific operations       *
8842
 *                  *
8843
 ************************************************************************/
8844
8845
/*
8846
 * xmlGetNamespace:
8847
 * @ctxt:  an XML parser context
8848
 * @prefix:  the prefix to lookup
8849
 *
8850
 * Lookup the namespace name for the @prefix (which ca be NULL)
8851
 * The prefix must come from the @ctxt->dict dictionary
8852
 *
8853
 * Returns the namespace name or NULL if not bound
8854
 */
8855
static const xmlChar *
8856
8.73M
xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8857
8.73M
    int i;
8858
8859
8.73M
    if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8860
9.47M
    for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8861
1.51M
        if (ctxt->nsTab[i] == prefix) {
8862
648k
      if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8863
4.15k
          return(NULL);
8864
644k
      return(ctxt->nsTab[i + 1]);
8865
648k
  }
8866
7.96M
    return(NULL);
8867
8.61M
}
8868
8869
/**
8870
 * xmlParseQName:
8871
 * @ctxt:  an XML parser context
8872
 * @prefix:  pointer to store the prefix part
8873
 *
8874
 * parse an XML Namespace QName
8875
 *
8876
 * [6]  QName  ::= (Prefix ':')? LocalPart
8877
 * [7]  Prefix  ::= NCName
8878
 * [8]  LocalPart  ::= NCName
8879
 *
8880
 * Returns the Name parsed or NULL
8881
 */
8882
8883
static const xmlChar *
8884
18.1M
xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8885
18.1M
    const xmlChar *l, *p;
8886
8887
18.1M
    GROW;
8888
8889
18.1M
    l = xmlParseNCName(ctxt);
8890
18.1M
    if (l == NULL) {
8891
411k
        if (CUR == ':') {
8892
7.29k
      l = xmlParseName(ctxt);
8893
7.29k
      if (l != NULL) {
8894
7.29k
          xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8895
7.29k
             "Failed to parse QName '%s'\n", l, NULL, NULL);
8896
7.29k
    *prefix = NULL;
8897
7.29k
    return(l);
8898
7.29k
      }
8899
7.29k
  }
8900
403k
        return(NULL);
8901
411k
    }
8902
17.7M
    if (CUR == ':') {
8903
916k
        NEXT;
8904
916k
  p = l;
8905
916k
  l = xmlParseNCName(ctxt);
8906
916k
  if (l == NULL) {
8907
17.0k
      xmlChar *tmp;
8908
8909
17.0k
            if (ctxt->instate == XML_PARSER_EOF)
8910
0
                return(NULL);
8911
17.0k
            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8912
17.0k
               "Failed to parse QName '%s:'\n", p, NULL, NULL);
8913
17.0k
      l = xmlParseNmtoken(ctxt);
8914
17.0k
      if (l == NULL) {
8915
11.4k
                if (ctxt->instate == XML_PARSER_EOF)
8916
0
                    return(NULL);
8917
11.4k
    tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8918
11.4k
            } else {
8919
5.56k
    tmp = xmlBuildQName(l, p, NULL, 0);
8920
5.56k
    xmlFree((char *)l);
8921
5.56k
      }
8922
17.0k
      p = xmlDictLookup(ctxt->dict, tmp, -1);
8923
17.0k
      if (tmp != NULL) xmlFree(tmp);
8924
17.0k
      *prefix = NULL;
8925
17.0k
      return(p);
8926
17.0k
  }
8927
899k
  if (CUR == ':') {
8928
12.0k
      xmlChar *tmp;
8929
8930
12.0k
            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8931
12.0k
               "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8932
12.0k
      NEXT;
8933
12.0k
      tmp = (xmlChar *) xmlParseName(ctxt);
8934
12.0k
      if (tmp != NULL) {
8935
9.80k
          tmp = xmlBuildQName(tmp, l, NULL, 0);
8936
9.80k
    l = xmlDictLookup(ctxt->dict, tmp, -1);
8937
9.80k
    if (tmp != NULL) xmlFree(tmp);
8938
9.80k
    *prefix = p;
8939
9.80k
    return(l);
8940
9.80k
      }
8941
2.20k
            if (ctxt->instate == XML_PARSER_EOF)
8942
0
                return(NULL);
8943
2.20k
      tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8944
2.20k
      l = xmlDictLookup(ctxt->dict, tmp, -1);
8945
2.20k
      if (tmp != NULL) xmlFree(tmp);
8946
2.20k
      *prefix = p;
8947
2.20k
      return(l);
8948
2.20k
  }
8949
887k
  *prefix = p;
8950
887k
    } else
8951
16.8M
        *prefix = NULL;
8952
17.6M
    return(l);
8953
17.7M
}
8954
8955
/**
8956
 * xmlParseQNameAndCompare:
8957
 * @ctxt:  an XML parser context
8958
 * @name:  the localname
8959
 * @prefix:  the prefix, if any.
8960
 *
8961
 * parse an XML name and compares for match
8962
 * (specialized for endtag parsing)
8963
 *
8964
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8965
 * and the name for mismatch
8966
 */
8967
8968
static const xmlChar *
8969
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8970
225k
                        xmlChar const *prefix) {
8971
225k
    const xmlChar *cmp;
8972
225k
    const xmlChar *in;
8973
225k
    const xmlChar *ret;
8974
225k
    const xmlChar *prefix2;
8975
8976
225k
    if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8977
8978
225k
    GROW;
8979
225k
    in = ctxt->input->cur;
8980
8981
225k
    cmp = prefix;
8982
784k
    while (*in != 0 && *in == *cmp) {
8983
558k
  ++in;
8984
558k
  ++cmp;
8985
558k
    }
8986
225k
    if ((*cmp == 0) && (*in == ':')) {
8987
203k
        in++;
8988
203k
  cmp = name;
8989
1.60M
  while (*in != 0 && *in == *cmp) {
8990
1.40M
      ++in;
8991
1.40M
      ++cmp;
8992
1.40M
  }
8993
203k
  if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8994
      /* success */
8995
183k
            ctxt->input->col += in - ctxt->input->cur;
8996
183k
      ctxt->input->cur = in;
8997
183k
      return((const xmlChar*) 1);
8998
183k
  }
8999
203k
    }
9000
    /*
9001
     * all strings coms from the dictionary, equality can be done directly
9002
     */
9003
42.1k
    ret = xmlParseQName (ctxt, &prefix2);
9004
42.1k
    if ((ret == name) && (prefix == prefix2))
9005
682
  return((const xmlChar*) 1);
9006
41.4k
    return ret;
9007
42.1k
}
9008
9009
/**
9010
 * xmlParseAttValueInternal:
9011
 * @ctxt:  an XML parser context
9012
 * @len:  attribute len result
9013
 * @alloc:  whether the attribute was reallocated as a new string
9014
 * @normalize:  if 1 then further non-CDATA normalization must be done
9015
 *
9016
 * parse a value for an attribute.
9017
 * NOTE: if no normalization is needed, the routine will return pointers
9018
 *       directly from the data buffer.
9019
 *
9020
 * 3.3.3 Attribute-Value Normalization:
9021
 * Before the value of an attribute is passed to the application or
9022
 * checked for validity, the XML processor must normalize it as follows:
9023
 * - a character reference is processed by appending the referenced
9024
 *   character to the attribute value
9025
 * - an entity reference is processed by recursively processing the
9026
 *   replacement text of the entity
9027
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
9028
 *   appending #x20 to the normalized value, except that only a single
9029
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
9030
 *   parsed entity or the literal entity value of an internal parsed entity
9031
 * - other characters are processed by appending them to the normalized value
9032
 * If the declared value is not CDATA, then the XML processor must further
9033
 * process the normalized attribute value by discarding any leading and
9034
 * trailing space (#x20) characters, and by replacing sequences of space
9035
 * (#x20) characters by a single space (#x20) character.
9036
 * All attributes for which no declaration has been read should be treated
9037
 * by a non-validating parser as if declared CDATA.
9038
 *
9039
 * Returns the AttValue parsed or NULL. The value has to be freed by the
9040
 *     caller if it was copied, this can be detected by val[*len] == 0.
9041
 */
9042
9043
#define GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) \
9044
6.34k
    const xmlChar *oldbase = ctxt->input->base;\
9045
6.34k
    GROW;\
9046
6.34k
    if (ctxt->instate == XML_PARSER_EOF)\
9047
6.34k
        return(NULL);\
9048
6.34k
    if (oldbase != ctxt->input->base) {\
9049
0
        ptrdiff_t delta = ctxt->input->base - oldbase;\
9050
0
        start = start + delta;\
9051
0
        in = in + delta;\
9052
0
    }\
9053
6.34k
    end = ctxt->input->end;
9054
9055
static xmlChar *
9056
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
9057
                         int normalize)
9058
13.9M
{
9059
13.9M
    xmlChar limit = 0;
9060
13.9M
    const xmlChar *in = NULL, *start, *end, *last;
9061
13.9M
    xmlChar *ret = NULL;
9062
13.9M
    int line, col;
9063
13.9M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9064
4.67M
                    XML_MAX_HUGE_LENGTH :
9065
13.9M
                    XML_MAX_TEXT_LENGTH;
9066
9067
13.9M
    GROW;
9068
13.9M
    in = (xmlChar *) CUR_PTR;
9069
13.9M
    line = ctxt->input->line;
9070
13.9M
    col = ctxt->input->col;
9071
13.9M
    if (*in != '"' && *in != '\'') {
9072
41.8k
        xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
9073
41.8k
        return (NULL);
9074
41.8k
    }
9075
13.9M
    ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
9076
9077
    /*
9078
     * try to handle in this routine the most common case where no
9079
     * allocation of a new string is required and where content is
9080
     * pure ASCII.
9081
     */
9082
13.9M
    limit = *in++;
9083
13.9M
    col++;
9084
13.9M
    end = ctxt->input->end;
9085
13.9M
    start = in;
9086
13.9M
    if (in >= end) {
9087
887
        GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9088
887
    }
9089
13.9M
    if (normalize) {
9090
        /*
9091
   * Skip any leading spaces
9092
   */
9093
603k
  while ((in < end) && (*in != limit) &&
9094
603k
         ((*in == 0x20) || (*in == 0x9) ||
9095
600k
          (*in == 0xA) || (*in == 0xD))) {
9096
212k
      if (*in == 0xA) {
9097
27.2k
          line++; col = 1;
9098
185k
      } else {
9099
185k
          col++;
9100
185k
      }
9101
212k
      in++;
9102
212k
      start = in;
9103
212k
      if (in >= end) {
9104
170
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9105
170
                if ((in - start) > maxLength) {
9106
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9107
0
                                   "AttValue length too long\n");
9108
0
                    return(NULL);
9109
0
                }
9110
170
      }
9111
212k
  }
9112
3.89M
  while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9113
3.89M
         (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9114
3.50M
      col++;
9115
3.50M
      if ((*in++ == 0x20) && (*in == 0x20)) break;
9116
3.50M
      if (in >= end) {
9117
324
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9118
324
                if ((in - start) > maxLength) {
9119
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9120
0
                                   "AttValue length too long\n");
9121
0
                    return(NULL);
9122
0
                }
9123
324
      }
9124
3.50M
  }
9125
391k
  last = in;
9126
  /*
9127
   * skip the trailing blanks
9128
   */
9129
396k
  while ((last[-1] == 0x20) && (last > start)) last--;
9130
718k
  while ((in < end) && (*in != limit) &&
9131
718k
         ((*in == 0x20) || (*in == 0x9) ||
9132
356k
          (*in == 0xA) || (*in == 0xD))) {
9133
326k
      if (*in == 0xA) {
9134
18.4k
          line++, col = 1;
9135
308k
      } else {
9136
308k
          col++;
9137
308k
      }
9138
326k
      in++;
9139
326k
      if (in >= end) {
9140
269
    const xmlChar *oldbase = ctxt->input->base;
9141
269
    GROW;
9142
269
                if (ctxt->instate == XML_PARSER_EOF)
9143
0
                    return(NULL);
9144
269
    if (oldbase != ctxt->input->base) {
9145
0
        ptrdiff_t delta = ctxt->input->base - oldbase;
9146
0
        start = start + delta;
9147
0
        in = in + delta;
9148
0
        last = last + delta;
9149
0
    }
9150
269
    end = ctxt->input->end;
9151
269
                if ((in - start) > maxLength) {
9152
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9153
0
                                   "AttValue length too long\n");
9154
0
                    return(NULL);
9155
0
                }
9156
269
      }
9157
326k
  }
9158
391k
        if ((in - start) > maxLength) {
9159
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9160
0
                           "AttValue length too long\n");
9161
0
            return(NULL);
9162
0
        }
9163
391k
  if (*in != limit) goto need_complex;
9164
13.5M
    } else {
9165
152M
  while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9166
152M
         (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9167
138M
      in++;
9168
138M
      col++;
9169
138M
      if (in >= end) {
9170
4.96k
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9171
4.96k
                if ((in - start) > maxLength) {
9172
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9173
0
                                   "AttValue length too long\n");
9174
0
                    return(NULL);
9175
0
                }
9176
4.96k
      }
9177
138M
  }
9178
13.5M
  last = in;
9179
13.5M
        if ((in - start) > maxLength) {
9180
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9181
0
                           "AttValue length too long\n");
9182
0
            return(NULL);
9183
0
        }
9184
13.5M
  if (*in != limit) goto need_complex;
9185
13.5M
    }
9186
13.3M
    in++;
9187
13.3M
    col++;
9188
13.3M
    if (len != NULL) {
9189
9.00M
        if (alloc) *alloc = 0;
9190
9.00M
        *len = last - start;
9191
9.00M
        ret = (xmlChar *) start;
9192
9.00M
    } else {
9193
4.38M
        if (alloc) *alloc = 1;
9194
4.38M
        ret = xmlStrndup(start, last - start);
9195
4.38M
    }
9196
13.3M
    CUR_PTR = in;
9197
13.3M
    ctxt->input->line = line;
9198
13.3M
    ctxt->input->col = col;
9199
13.3M
    return ret;
9200
533k
need_complex:
9201
533k
    if (alloc) *alloc = 1;
9202
533k
    return xmlParseAttValueComplex(ctxt, len, normalize);
9203
13.9M
}
9204
9205
/**
9206
 * xmlParseAttribute2:
9207
 * @ctxt:  an XML parser context
9208
 * @pref:  the element prefix
9209
 * @elem:  the element name
9210
 * @prefix:  a xmlChar ** used to store the value of the attribute prefix
9211
 * @value:  a xmlChar ** used to store the value of the attribute
9212
 * @len:  an int * to save the length of the attribute
9213
 * @alloc:  an int * to indicate if the attribute was allocated
9214
 *
9215
 * parse an attribute in the new SAX2 framework.
9216
 *
9217
 * Returns the attribute name, and the value in *value, .
9218
 */
9219
9220
static const xmlChar *
9221
xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9222
                   const xmlChar * pref, const xmlChar * elem,
9223
                   const xmlChar ** prefix, xmlChar ** value,
9224
                   int *len, int *alloc)
9225
9.52M
{
9226
9.52M
    const xmlChar *name;
9227
9.52M
    xmlChar *val, *internal_val = NULL;
9228
9.52M
    int normalize = 0;
9229
9230
9.52M
    *value = NULL;
9231
9.52M
    GROW;
9232
9.52M
    name = xmlParseQName(ctxt, prefix);
9233
9.52M
    if (name == NULL) {
9234
139k
        xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9235
139k
                       "error parsing attribute name\n");
9236
139k
        return (NULL);
9237
139k
    }
9238
9239
    /*
9240
     * get the type if needed
9241
     */
9242
9.38M
    if (ctxt->attsSpecial != NULL) {
9243
1.09M
        int type;
9244
9245
1.09M
        type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
9246
1.09M
                                                 pref, elem, *prefix, name);
9247
1.09M
        if (type != 0)
9248
392k
            normalize = 1;
9249
1.09M
    }
9250
9251
    /*
9252
     * read the value
9253
     */
9254
9.38M
    SKIP_BLANKS;
9255
9.38M
    if (RAW == '=') {
9256
9.31M
        NEXT;
9257
9.31M
        SKIP_BLANKS;
9258
9.31M
        val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9259
9.31M
        if (val == NULL)
9260
19.5k
            return (NULL);
9261
9.29M
  if (normalize) {
9262
      /*
9263
       * Sometimes a second normalisation pass for spaces is needed
9264
       * but that only happens if charrefs or entities references
9265
       * have been used in the attribute value, i.e. the attribute
9266
       * value have been extracted in an allocated string already.
9267
       */
9268
391k
      if (*alloc) {
9269
30.2k
          const xmlChar *val2;
9270
9271
30.2k
          val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9272
30.2k
    if ((val2 != NULL) && (val2 != val)) {
9273
5.25k
        xmlFree(val);
9274
5.25k
        val = (xmlChar *) val2;
9275
5.25k
    }
9276
30.2k
      }
9277
391k
  }
9278
9.29M
        ctxt->instate = XML_PARSER_CONTENT;
9279
9.29M
    } else {
9280
69.4k
        xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9281
69.4k
                          "Specification mandates value for attribute %s\n",
9282
69.4k
                          name);
9283
69.4k
        return (name);
9284
69.4k
    }
9285
9286
9.29M
    if (*prefix == ctxt->str_xml) {
9287
        /*
9288
         * Check that xml:lang conforms to the specification
9289
         * No more registered as an error, just generate a warning now
9290
         * since this was deprecated in XML second edition
9291
         */
9292
68.0k
        if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9293
15.0k
            internal_val = xmlStrndup(val, *len);
9294
15.0k
            if (!xmlCheckLanguageID(internal_val)) {
9295
8.37k
                xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9296
8.37k
                              "Malformed value for xml:lang : %s\n",
9297
8.37k
                              internal_val, NULL);
9298
8.37k
            }
9299
15.0k
        }
9300
9301
        /*
9302
         * Check that xml:space conforms to the specification
9303
         */
9304
68.0k
        if (xmlStrEqual(name, BAD_CAST "space")) {
9305
4.58k
            internal_val = xmlStrndup(val, *len);
9306
4.58k
            if (xmlStrEqual(internal_val, BAD_CAST "default"))
9307
0
                *(ctxt->space) = 0;
9308
4.58k
            else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9309
454
                *(ctxt->space) = 1;
9310
4.12k
            else {
9311
4.12k
                xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9312
4.12k
                              "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9313
4.12k
                              internal_val, NULL);
9314
4.12k
            }
9315
4.58k
        }
9316
68.0k
        if (internal_val) {
9317
19.6k
            xmlFree(internal_val);
9318
19.6k
        }
9319
68.0k
    }
9320
9321
9.29M
    *value = val;
9322
9.29M
    return (name);
9323
9.38M
}
9324
/**
9325
 * xmlParseStartTag2:
9326
 * @ctxt:  an XML parser context
9327
 *
9328
 * Parse a start tag. Always consumes '<'.
9329
 *
9330
 * This routine is called when running SAX2 parsing
9331
 *
9332
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9333
 *
9334
 * [ WFC: Unique Att Spec ]
9335
 * No attribute name may appear more than once in the same start-tag or
9336
 * empty-element tag.
9337
 *
9338
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9339
 *
9340
 * [ WFC: Unique Att Spec ]
9341
 * No attribute name may appear more than once in the same start-tag or
9342
 * empty-element tag.
9343
 *
9344
 * With namespace:
9345
 *
9346
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9347
 *
9348
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9349
 *
9350
 * Returns the element name parsed
9351
 */
9352
9353
static const xmlChar *
9354
xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9355
8.57M
                  const xmlChar **URI, int *tlen) {
9356
8.57M
    const xmlChar *localname;
9357
8.57M
    const xmlChar *prefix;
9358
8.57M
    const xmlChar *attname;
9359
8.57M
    const xmlChar *aprefix;
9360
8.57M
    const xmlChar *nsname;
9361
8.57M
    xmlChar *attvalue;
9362
8.57M
    const xmlChar **atts = ctxt->atts;
9363
8.57M
    int maxatts = ctxt->maxatts;
9364
8.57M
    int nratts, nbatts, nbdef, inputid;
9365
8.57M
    int i, j, nbNs, attval;
9366
8.57M
    unsigned long cur;
9367
8.57M
    int nsNr = ctxt->nsNr;
9368
9369
8.57M
    if (RAW != '<') return(NULL);
9370
8.57M
    NEXT1;
9371
9372
    /*
9373
     * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9374
     *       point since the attribute values may be stored as pointers to
9375
     *       the buffer and calling SHRINK would destroy them !
9376
     *       The Shrinking is only possible once the full set of attribute
9377
     *       callbacks have been done.
9378
     */
9379
8.57M
    SHRINK;
9380
8.57M
    cur = ctxt->input->cur - ctxt->input->base;
9381
8.57M
    inputid = ctxt->input->id;
9382
8.57M
    nbatts = 0;
9383
8.57M
    nratts = 0;
9384
8.57M
    nbdef = 0;
9385
8.57M
    nbNs = 0;
9386
8.57M
    attval = 0;
9387
    /* Forget any namespaces added during an earlier parse of this element. */
9388
8.57M
    ctxt->nsNr = nsNr;
9389
9390
8.57M
    localname = xmlParseQName(ctxt, &prefix);
9391
8.57M
    if (localname == NULL) {
9392
261k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9393
261k
           "StartTag: invalid element name\n");
9394
261k
        return(NULL);
9395
261k
    }
9396
8.30M
    *tlen = ctxt->input->cur - ctxt->input->base - cur;
9397
9398
    /*
9399
     * Now parse the attributes, it ends up with the ending
9400
     *
9401
     * (S Attribute)* S?
9402
     */
9403
8.30M
    SKIP_BLANKS;
9404
8.30M
    GROW;
9405
9406
12.1M
    while (((RAW != '>') &&
9407
12.1M
     ((RAW != '/') || (NXT(1) != '>')) &&
9408
12.1M
     (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9409
9.52M
  int len = -1, alloc = 0;
9410
9411
9.52M
  attname = xmlParseAttribute2(ctxt, prefix, localname,
9412
9.52M
                               &aprefix, &attvalue, &len, &alloc);
9413
9.52M
        if (attname == NULL) {
9414
159k
      xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9415
159k
           "xmlParseStartTag: problem parsing attributes\n");
9416
159k
      break;
9417
159k
  }
9418
9.36M
        if (attvalue == NULL)
9419
69.4k
            goto next_attr;
9420
9.29M
  if (len < 0) len = xmlStrlen(attvalue);
9421
9422
9.29M
        if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9423
50.5k
            const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9424
50.5k
            xmlURIPtr uri;
9425
9426
50.5k
            if (URL == NULL) {
9427
0
                xmlErrMemory(ctxt, "dictionary allocation failure");
9428
0
                if ((attvalue != NULL) && (alloc != 0))
9429
0
                    xmlFree(attvalue);
9430
0
                localname = NULL;
9431
0
                goto done;
9432
0
            }
9433
50.5k
            if (*URL != 0) {
9434
49.5k
                uri = xmlParseURI((const char *) URL);
9435
49.5k
                if (uri == NULL) {
9436
14.3k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9437
14.3k
                             "xmlns: '%s' is not a valid URI\n",
9438
14.3k
                                       URL, NULL, NULL);
9439
35.1k
                } else {
9440
35.1k
                    if (uri->scheme == NULL) {
9441
8.19k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9442
8.19k
                                  "xmlns: URI %s is not absolute\n",
9443
8.19k
                                  URL, NULL, NULL);
9444
8.19k
                    }
9445
35.1k
                    xmlFreeURI(uri);
9446
35.1k
                }
9447
49.5k
                if (URL == ctxt->str_xml_ns) {
9448
0
                    if (attname != ctxt->str_xml) {
9449
0
                        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9450
0
                     "xml namespace URI cannot be the default namespace\n",
9451
0
                                 NULL, NULL, NULL);
9452
0
                    }
9453
0
                    goto next_attr;
9454
0
                }
9455
49.5k
                if ((len == 29) &&
9456
49.5k
                    (xmlStrEqual(URL,
9457
987
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9458
0
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9459
0
                         "reuse of the xmlns namespace name is forbidden\n",
9460
0
                             NULL, NULL, NULL);
9461
0
                    goto next_attr;
9462
0
                }
9463
49.5k
            }
9464
            /*
9465
             * check that it's not a defined namespace
9466
             */
9467
66.0k
            for (j = 1;j <= nbNs;j++)
9468
23.0k
                if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9469
7.54k
                    break;
9470
50.5k
            if (j <= nbNs)
9471
7.54k
                xmlErrAttributeDup(ctxt, NULL, attname);
9472
42.9k
            else
9473
42.9k
                if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9474
9475
9.24M
        } else if (aprefix == ctxt->str_xmlns) {
9476
117k
            const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9477
117k
            xmlURIPtr uri;
9478
9479
117k
            if (attname == ctxt->str_xml) {
9480
898
                if (URL != ctxt->str_xml_ns) {
9481
898
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9482
898
                             "xml namespace prefix mapped to wrong URI\n",
9483
898
                             NULL, NULL, NULL);
9484
898
                }
9485
                /*
9486
                 * Do not keep a namespace definition node
9487
                 */
9488
898
                goto next_attr;
9489
898
            }
9490
117k
            if (URL == ctxt->str_xml_ns) {
9491
0
                if (attname != ctxt->str_xml) {
9492
0
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9493
0
                             "xml namespace URI mapped to wrong prefix\n",
9494
0
                             NULL, NULL, NULL);
9495
0
                }
9496
0
                goto next_attr;
9497
0
            }
9498
117k
            if (attname == ctxt->str_xmlns) {
9499
912
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9500
912
                         "redefinition of the xmlns prefix is forbidden\n",
9501
912
                         NULL, NULL, NULL);
9502
912
                goto next_attr;
9503
912
            }
9504
116k
            if ((len == 29) &&
9505
116k
                (xmlStrEqual(URL,
9506
2.74k
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9507
0
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9508
0
                         "reuse of the xmlns namespace name is forbidden\n",
9509
0
                         NULL, NULL, NULL);
9510
0
                goto next_attr;
9511
0
            }
9512
116k
            if ((URL == NULL) || (URL[0] == 0)) {
9513
2.09k
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9514
2.09k
                         "xmlns:%s: Empty XML namespace is not allowed\n",
9515
2.09k
                              attname, NULL, NULL);
9516
2.09k
                goto next_attr;
9517
114k
            } else {
9518
114k
                uri = xmlParseURI((const char *) URL);
9519
114k
                if (uri == NULL) {
9520
24.0k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9521
24.0k
                         "xmlns:%s: '%s' is not a valid URI\n",
9522
24.0k
                                       attname, URL, NULL);
9523
89.9k
                } else {
9524
89.9k
                    if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9525
2.99k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9526
2.99k
                                  "xmlns:%s: URI %s is not absolute\n",
9527
2.99k
                                  attname, URL, NULL);
9528
2.99k
                    }
9529
89.9k
                    xmlFreeURI(uri);
9530
89.9k
                }
9531
114k
            }
9532
9533
            /*
9534
             * check that it's not a defined namespace
9535
             */
9536
177k
            for (j = 1;j <= nbNs;j++)
9537
68.9k
                if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9538
5.22k
                    break;
9539
114k
            if (j <= nbNs)
9540
5.22k
                xmlErrAttributeDup(ctxt, aprefix, attname);
9541
108k
            else
9542
108k
                if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9543
9544
9.12M
        } else {
9545
            /*
9546
             * Add the pair to atts
9547
             */
9548
9.12M
            if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9549
126k
                if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9550
0
                    goto next_attr;
9551
0
                }
9552
126k
                maxatts = ctxt->maxatts;
9553
126k
                atts = ctxt->atts;
9554
126k
            }
9555
9.12M
            ctxt->attallocs[nratts++] = alloc;
9556
9.12M
            atts[nbatts++] = attname;
9557
9.12M
            atts[nbatts++] = aprefix;
9558
            /*
9559
             * The namespace URI field is used temporarily to point at the
9560
             * base of the current input buffer for non-alloced attributes.
9561
             * When the input buffer is reallocated, all the pointers become
9562
             * invalid, but they can be reconstructed later.
9563
             */
9564
9.12M
            if (alloc)
9565
253k
                atts[nbatts++] = NULL;
9566
8.87M
            else
9567
8.87M
                atts[nbatts++] = ctxt->input->base;
9568
9.12M
            atts[nbatts++] = attvalue;
9569
9.12M
            attvalue += len;
9570
9.12M
            atts[nbatts++] = attvalue;
9571
            /*
9572
             * tag if some deallocation is needed
9573
             */
9574
9.12M
            if (alloc != 0) attval = 1;
9575
9.12M
            attvalue = NULL; /* moved into atts */
9576
9.12M
        }
9577
9578
9.36M
next_attr:
9579
9.36M
        if ((attvalue != NULL) && (alloc != 0)) {
9580
39.5k
            xmlFree(attvalue);
9581
39.5k
            attvalue = NULL;
9582
39.5k
        }
9583
9584
9.36M
  GROW
9585
9.36M
        if (ctxt->instate == XML_PARSER_EOF)
9586
0
            break;
9587
9.36M
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9588
5.34M
      break;
9589
4.02M
  if (SKIP_BLANKS == 0) {
9590
199k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9591
199k
         "attributes construct error\n");
9592
199k
      break;
9593
199k
  }
9594
3.82M
        GROW;
9595
3.82M
    }
9596
9597
8.30M
    if (ctxt->input->id != inputid) {
9598
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9599
0
                    "Unexpected change of input\n");
9600
0
        localname = NULL;
9601
0
        goto done;
9602
0
    }
9603
9604
    /* Reconstruct attribute value pointers. */
9605
17.4M
    for (i = 0, j = 0; j < nratts; i += 5, j++) {
9606
9.12M
        if (atts[i+2] != NULL) {
9607
            /*
9608
             * Arithmetic on dangling pointers is technically undefined
9609
             * behavior, but well...
9610
             */
9611
8.87M
            const xmlChar *old = atts[i+2];
9612
8.87M
            atts[i+2]  = NULL;    /* Reset repurposed namespace URI */
9613
8.87M
            atts[i+3] = ctxt->input->base + (atts[i+3] - old);  /* value */
9614
8.87M
            atts[i+4] = ctxt->input->base + (atts[i+4] - old);  /* valuend */
9615
8.87M
        }
9616
9.12M
    }
9617
9618
    /*
9619
     * The attributes defaulting
9620
     */
9621
8.30M
    if (ctxt->attsDefault != NULL) {
9622
1.37M
        xmlDefAttrsPtr defaults;
9623
9624
1.37M
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9625
1.37M
  if (defaults != NULL) {
9626
379k
      for (i = 0;i < defaults->nbAttrs;i++) {
9627
259k
          attname = defaults->values[5 * i];
9628
259k
    aprefix = defaults->values[5 * i + 1];
9629
9630
                /*
9631
     * special work for namespaces defaulted defs
9632
     */
9633
259k
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9634
        /*
9635
         * check that it's not a defined namespace
9636
         */
9637
16.7k
        for (j = 1;j <= nbNs;j++)
9638
6.61k
            if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9639
2.79k
          break;
9640
12.9k
              if (j <= nbNs) continue;
9641
9642
10.1k
        nsname = xmlGetNamespace(ctxt, NULL);
9643
10.1k
        if (nsname != defaults->values[5 * i + 2]) {
9644
4.70k
      if (nsPush(ctxt, NULL,
9645
4.70k
                 defaults->values[5 * i + 2]) > 0)
9646
4.68k
          nbNs++;
9647
4.70k
        }
9648
246k
    } else if (aprefix == ctxt->str_xmlns) {
9649
        /*
9650
         * check that it's not a defined namespace
9651
         */
9652
22.9k
        for (j = 1;j <= nbNs;j++)
9653
7.19k
            if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9654
3.85k
          break;
9655
19.5k
              if (j <= nbNs) continue;
9656
9657
15.7k
        nsname = xmlGetNamespace(ctxt, attname);
9658
15.7k
        if (nsname != defaults->values[5 * i + 2]) {
9659
6.62k
      if (nsPush(ctxt, attname,
9660
6.62k
                 defaults->values[5 * i + 2]) > 0)
9661
6.62k
          nbNs++;
9662
6.62k
        }
9663
226k
    } else {
9664
        /*
9665
         * check that it's not a defined attribute
9666
         */
9667
615k
        for (j = 0;j < nbatts;j+=5) {
9668
391k
      if ((attname == atts[j]) && (aprefix == atts[j+1]))
9669
2.78k
          break;
9670
391k
        }
9671
226k
        if (j < nbatts) continue;
9672
9673
224k
        if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9674
6.48k
      if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9675
0
                            localname = NULL;
9676
0
                            goto done;
9677
0
      }
9678
6.48k
      maxatts = ctxt->maxatts;
9679
6.48k
      atts = ctxt->atts;
9680
6.48k
        }
9681
224k
        atts[nbatts++] = attname;
9682
224k
        atts[nbatts++] = aprefix;
9683
224k
        if (aprefix == NULL)
9684
168k
      atts[nbatts++] = NULL;
9685
55.2k
        else
9686
55.2k
            atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9687
224k
        atts[nbatts++] = defaults->values[5 * i + 2];
9688
224k
        atts[nbatts++] = defaults->values[5 * i + 3];
9689
224k
        if ((ctxt->standalone == 1) &&
9690
224k
            (defaults->values[5 * i + 4] != NULL)) {
9691
8
      xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9692
8
    "standalone: attribute %s on %s defaulted from external subset\n",
9693
8
                                   attname, localname);
9694
8
        }
9695
224k
        nbdef++;
9696
224k
    }
9697
259k
      }
9698
120k
  }
9699
1.37M
    }
9700
9701
    /*
9702
     * The attributes checkings
9703
     */
9704
17.6M
    for (i = 0; i < nbatts;i += 5) {
9705
        /*
9706
  * The default namespace does not apply to attribute names.
9707
  */
9708
9.35M
  if (atts[i + 1] != NULL) {
9709
344k
      nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9710
344k
      if (nsname == NULL) {
9711
145k
    xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9712
145k
        "Namespace prefix %s for %s on %s is not defined\n",
9713
145k
        atts[i + 1], atts[i], localname);
9714
145k
      }
9715
344k
      atts[i + 2] = nsname;
9716
344k
  } else
9717
9.00M
      nsname = NULL;
9718
  /*
9719
   * [ WFC: Unique Att Spec ]
9720
   * No attribute name may appear more than once in the same
9721
   * start-tag or empty-element tag.
9722
   * As extended by the Namespace in XML REC.
9723
   */
9724
13.5M
        for (j = 0; j < i;j += 5) {
9725
4.22M
      if (atts[i] == atts[j]) {
9726
35.1k
          if (atts[i+1] == atts[j+1]) {
9727
16.3k
        xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9728
16.3k
        break;
9729
16.3k
    }
9730
18.7k
    if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9731
1.29k
        xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9732
1.29k
           "Namespaced Attribute %s in '%s' redefined\n",
9733
1.29k
           atts[i], nsname, NULL);
9734
1.29k
        break;
9735
1.29k
    }
9736
18.7k
      }
9737
4.22M
  }
9738
9.35M
    }
9739
9740
8.30M
    nsname = xmlGetNamespace(ctxt, prefix);
9741
8.30M
    if ((prefix != NULL) && (nsname == NULL)) {
9742
217k
  xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9743
217k
           "Namespace prefix %s on %s is not defined\n",
9744
217k
     prefix, localname, NULL);
9745
217k
    }
9746
8.30M
    *pref = prefix;
9747
8.30M
    *URI = nsname;
9748
9749
    /*
9750
     * SAX: Start of Element !
9751
     */
9752
8.30M
    if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9753
8.30M
  (!ctxt->disableSAX)) {
9754
7.13M
  if (nbNs > 0)
9755
84.4k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9756
84.4k
        nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9757
84.4k
        nbatts / 5, nbdef, atts);
9758
7.04M
  else
9759
7.04M
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9760
7.04M
                    nsname, 0, NULL, nbatts / 5, nbdef, atts);
9761
7.13M
    }
9762
9763
8.30M
done:
9764
    /*
9765
     * Free up attribute allocated strings if needed
9766
     */
9767
8.30M
    if (attval != 0) {
9768
552k
  for (i = 3,j = 0; j < nratts;i += 5,j++)
9769
313k
      if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9770
253k
          xmlFree((xmlChar *) atts[i]);
9771
238k
    }
9772
9773
8.30M
    return(localname);
9774
8.30M
}
9775
9776
/**
9777
 * xmlParseEndTag2:
9778
 * @ctxt:  an XML parser context
9779
 * @line:  line of the start tag
9780
 * @nsNr:  number of namespaces on the start tag
9781
 *
9782
 * Parse an end tag. Always consumes '</'.
9783
 *
9784
 * [42] ETag ::= '</' Name S? '>'
9785
 *
9786
 * With namespace
9787
 *
9788
 * [NS 9] ETag ::= '</' QName S? '>'
9789
 */
9790
9791
static void
9792
3.74M
xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9793
3.74M
    const xmlChar *name;
9794
9795
3.74M
    GROW;
9796
3.74M
    if ((RAW != '<') || (NXT(1) != '/')) {
9797
0
  xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9798
0
  return;
9799
0
    }
9800
3.74M
    SKIP(2);
9801
9802
3.74M
    if (tag->prefix == NULL)
9803
3.51M
        name = xmlParseNameAndCompare(ctxt, ctxt->name);
9804
225k
    else
9805
225k
        name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9806
9807
    /*
9808
     * We should definitely be at the ending "S? '>'" part
9809
     */
9810
3.74M
    GROW;
9811
3.74M
    if (ctxt->instate == XML_PARSER_EOF)
9812
0
        return;
9813
3.74M
    SKIP_BLANKS;
9814
3.74M
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9815
54.3k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9816
54.3k
    } else
9817
3.68M
  NEXT1;
9818
9819
    /*
9820
     * [ WFC: Element Type Match ]
9821
     * The Name in an element's end-tag must match the element type in the
9822
     * start-tag.
9823
     *
9824
     */
9825
3.74M
    if (name != (xmlChar*)1) {
9826
142k
        if (name == NULL) name = BAD_CAST "unparsable";
9827
142k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9828
142k
         "Opening and ending tag mismatch: %s line %d and %s\n",
9829
142k
                    ctxt->name, tag->line, name);
9830
142k
    }
9831
9832
    /*
9833
     * SAX: End of Tag
9834
     */
9835
3.74M
    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9836
3.74M
  (!ctxt->disableSAX))
9837
3.16M
  ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9838
3.16M
                                tag->URI);
9839
9840
3.74M
    spacePop(ctxt);
9841
3.74M
    if (tag->nsNr != 0)
9842
22.1k
  nsPop(ctxt, tag->nsNr);
9843
3.74M
}
9844
9845
/**
9846
 * xmlParseCDSect:
9847
 * @ctxt:  an XML parser context
9848
 *
9849
 * DEPRECATED: Internal function, don't use.
9850
 *
9851
 * Parse escaped pure raw content. Always consumes '<!['.
9852
 *
9853
 * [18] CDSect ::= CDStart CData CDEnd
9854
 *
9855
 * [19] CDStart ::= '<![CDATA['
9856
 *
9857
 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9858
 *
9859
 * [21] CDEnd ::= ']]>'
9860
 */
9861
void
9862
33.0k
xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9863
33.0k
    xmlChar *buf = NULL;
9864
33.0k
    int len = 0;
9865
33.0k
    int size = XML_PARSER_BUFFER_SIZE;
9866
33.0k
    int r, rl;
9867
33.0k
    int s, sl;
9868
33.0k
    int cur, l;
9869
33.0k
    int count = 0;
9870
33.0k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9871
12.4k
                    XML_MAX_HUGE_LENGTH :
9872
33.0k
                    XML_MAX_TEXT_LENGTH;
9873
9874
33.0k
    if ((CUR != '<') || (NXT(1) != '!') || (NXT(2) != '['))
9875
0
        return;
9876
33.0k
    SKIP(3);
9877
9878
33.0k
    if (!CMP6(CUR_PTR, 'C', 'D', 'A', 'T', 'A', '['))
9879
0
        return;
9880
33.0k
    SKIP(6);
9881
9882
33.0k
    ctxt->instate = XML_PARSER_CDATA_SECTION;
9883
33.0k
    r = CUR_CHAR(rl);
9884
33.0k
    if (!IS_CHAR(r)) {
9885
2.09k
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9886
2.09k
        goto out;
9887
2.09k
    }
9888
30.9k
    NEXTL(rl);
9889
30.9k
    s = CUR_CHAR(sl);
9890
30.9k
    if (!IS_CHAR(s)) {
9891
1.98k
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9892
1.98k
        goto out;
9893
1.98k
    }
9894
28.9k
    NEXTL(sl);
9895
28.9k
    cur = CUR_CHAR(l);
9896
28.9k
    buf = (xmlChar *) xmlMallocAtomic(size);
9897
28.9k
    if (buf == NULL) {
9898
0
  xmlErrMemory(ctxt, NULL);
9899
0
        goto out;
9900
0
    }
9901
19.2M
    while (IS_CHAR(cur) &&
9902
19.2M
           ((r != ']') || (s != ']') || (cur != '>'))) {
9903
19.2M
  if (len + 5 >= size) {
9904
33.9k
      xmlChar *tmp;
9905
9906
33.9k
      tmp = (xmlChar *) xmlRealloc(buf, size * 2);
9907
33.9k
      if (tmp == NULL) {
9908
0
    xmlErrMemory(ctxt, NULL);
9909
0
                goto out;
9910
0
      }
9911
33.9k
      buf = tmp;
9912
33.9k
      size *= 2;
9913
33.9k
  }
9914
19.2M
  COPY_BUF(rl,buf,len,r);
9915
19.2M
  r = s;
9916
19.2M
  rl = sl;
9917
19.2M
  s = cur;
9918
19.2M
  sl = l;
9919
19.2M
  count++;
9920
19.2M
  if (count > 50) {
9921
366k
      SHRINK;
9922
366k
      GROW;
9923
366k
            if (ctxt->instate == XML_PARSER_EOF) {
9924
0
                goto out;
9925
0
            }
9926
366k
      count = 0;
9927
366k
  }
9928
19.2M
  NEXTL(l);
9929
19.2M
  cur = CUR_CHAR(l);
9930
19.2M
        if (len > maxLength) {
9931
0
            xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9932
0
                           "CData section too big found\n");
9933
0
            goto out;
9934
0
        }
9935
19.2M
    }
9936
28.9k
    buf[len] = 0;
9937
28.9k
    if (cur != '>') {
9938
5.22k
  xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9939
5.22k
                       "CData section not finished\n%.50s\n", buf);
9940
5.22k
        goto out;
9941
5.22k
    }
9942
23.7k
    NEXTL(l);
9943
9944
    /*
9945
     * OK the buffer is to be consumed as cdata.
9946
     */
9947
23.7k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9948
17.2k
  if (ctxt->sax->cdataBlock != NULL)
9949
11.0k
      ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9950
6.21k
  else if (ctxt->sax->characters != NULL)
9951
6.21k
      ctxt->sax->characters(ctxt->userData, buf, len);
9952
17.2k
    }
9953
9954
33.0k
out:
9955
33.0k
    if (ctxt->instate != XML_PARSER_EOF)
9956
33.0k
        ctxt->instate = XML_PARSER_CONTENT;
9957
33.0k
    xmlFree(buf);
9958
33.0k
}
9959
9960
/**
9961
 * xmlParseContentInternal:
9962
 * @ctxt:  an XML parser context
9963
 *
9964
 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9965
 * unexpected EOF to the caller.
9966
 */
9967
9968
static void
9969
330k
xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9970
330k
    int nameNr = ctxt->nameNr;
9971
9972
330k
    GROW;
9973
22.2M
    while ((RAW != 0) &&
9974
22.2M
     (ctxt->instate != XML_PARSER_EOF)) {
9975
22.0M
  const xmlChar *cur = ctxt->input->cur;
9976
9977
  /*
9978
   * First case : a Processing Instruction.
9979
   */
9980
22.0M
  if ((*cur == '<') && (cur[1] == '?')) {
9981
45.3k
      xmlParsePI(ctxt);
9982
45.3k
  }
9983
9984
  /*
9985
   * Second case : a CDSection
9986
   */
9987
  /* 2.6.0 test was *cur not RAW */
9988
21.9M
  else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9989
33.0k
      xmlParseCDSect(ctxt);
9990
33.0k
  }
9991
9992
  /*
9993
   * Third case :  a comment
9994
   */
9995
21.9M
  else if ((*cur == '<') && (NXT(1) == '!') &&
9996
21.9M
     (NXT(2) == '-') && (NXT(3) == '-')) {
9997
176k
      xmlParseComment(ctxt);
9998
176k
      ctxt->instate = XML_PARSER_CONTENT;
9999
176k
  }
10000
10001
  /*
10002
   * Fourth case :  a sub-element.
10003
   */
10004
21.7M
  else if (*cur == '<') {
10005
8.98M
            if (NXT(1) == '/') {
10006
2.90M
                if (ctxt->nameNr <= nameNr)
10007
46.0k
                    break;
10008
2.85M
          xmlParseElementEnd(ctxt);
10009
6.08M
            } else {
10010
6.08M
          xmlParseElementStart(ctxt);
10011
6.08M
            }
10012
8.98M
  }
10013
10014
  /*
10015
   * Fifth case : a reference. If if has not been resolved,
10016
   *    parsing returns it's Name, create the node
10017
   */
10018
10019
12.7M
  else if (*cur == '&') {
10020
3.23M
      xmlParseReference(ctxt);
10021
3.23M
  }
10022
10023
  /*
10024
   * Last case, text. Note that References are handled directly.
10025
   */
10026
9.53M
  else {
10027
9.53M
      xmlParseCharData(ctxt, 0);
10028
9.53M
  }
10029
10030
21.9M
  GROW;
10031
21.9M
  SHRINK;
10032
21.9M
    }
10033
330k
}
10034
10035
/**
10036
 * xmlParseContent:
10037
 * @ctxt:  an XML parser context
10038
 *
10039
 * Parse a content sequence. Stops at EOF or '</'.
10040
 *
10041
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10042
 */
10043
10044
void
10045
231k
xmlParseContent(xmlParserCtxtPtr ctxt) {
10046
231k
    int nameNr = ctxt->nameNr;
10047
10048
231k
    xmlParseContentInternal(ctxt);
10049
10050
231k
    if ((ctxt->instate != XML_PARSER_EOF) && (ctxt->nameNr > nameNr)) {
10051
10.3k
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
10052
10.3k
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
10053
10.3k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10054
10.3k
                "Premature end of data in tag %s line %d\n",
10055
10.3k
    name, line, NULL);
10056
10.3k
    }
10057
231k
}
10058
10059
/**
10060
 * xmlParseElement:
10061
 * @ctxt:  an XML parser context
10062
 *
10063
 * DEPRECATED: Internal function, don't use.
10064
 *
10065
 * parse an XML element
10066
 *
10067
 * [39] element ::= EmptyElemTag | STag content ETag
10068
 *
10069
 * [ WFC: Element Type Match ]
10070
 * The Name in an element's end-tag must match the element type in the
10071
 * start-tag.
10072
 *
10073
 */
10074
10075
void
10076
138k
xmlParseElement(xmlParserCtxtPtr ctxt) {
10077
138k
    if (xmlParseElementStart(ctxt) != 0)
10078
39.8k
        return;
10079
10080
99.1k
    xmlParseContentInternal(ctxt);
10081
99.1k
    if (ctxt->instate == XML_PARSER_EOF)
10082
382
  return;
10083
10084
98.7k
    if (CUR == 0) {
10085
56.7k
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
10086
56.7k
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
10087
56.7k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10088
56.7k
                "Premature end of data in tag %s line %d\n",
10089
56.7k
    name, line, NULL);
10090
56.7k
        return;
10091
56.7k
    }
10092
10093
42.0k
    xmlParseElementEnd(ctxt);
10094
42.0k
}
10095
10096
/**
10097
 * xmlParseElementStart:
10098
 * @ctxt:  an XML parser context
10099
 *
10100
 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
10101
 * opening tag was parsed, 1 if an empty element was parsed.
10102
 *
10103
 * Always consumes '<'.
10104
 */
10105
static int
10106
6.22M
xmlParseElementStart(xmlParserCtxtPtr ctxt) {
10107
6.22M
    const xmlChar *name;
10108
6.22M
    const xmlChar *prefix = NULL;
10109
6.22M
    const xmlChar *URI = NULL;
10110
6.22M
    xmlParserNodeInfo node_info;
10111
6.22M
    int line, tlen = 0;
10112
6.22M
    xmlNodePtr ret;
10113
6.22M
    int nsNr = ctxt->nsNr;
10114
10115
6.22M
    if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
10116
6.22M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
10117
0
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
10118
0
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
10119
0
        xmlParserMaxDepth);
10120
0
  xmlHaltParser(ctxt);
10121
0
  return(-1);
10122
0
    }
10123
10124
    /* Capture start position */
10125
6.22M
    if (ctxt->record_info) {
10126
0
        node_info.begin_pos = ctxt->input->consumed +
10127
0
                          (CUR_PTR - ctxt->input->base);
10128
0
  node_info.begin_line = ctxt->input->line;
10129
0
    }
10130
10131
6.22M
    if (ctxt->spaceNr == 0)
10132
0
  spacePush(ctxt, -1);
10133
6.22M
    else if (*ctxt->space == -2)
10134
1.17M
  spacePush(ctxt, -1);
10135
5.04M
    else
10136
5.04M
  spacePush(ctxt, *ctxt->space);
10137
10138
6.22M
    line = ctxt->input->line;
10139
6.22M
#ifdef LIBXML_SAX1_ENABLED
10140
6.22M
    if (ctxt->sax2)
10141
3.94M
#endif /* LIBXML_SAX1_ENABLED */
10142
3.94M
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
10143
2.28M
#ifdef LIBXML_SAX1_ENABLED
10144
2.28M
    else
10145
2.28M
  name = xmlParseStartTag(ctxt);
10146
6.22M
#endif /* LIBXML_SAX1_ENABLED */
10147
6.22M
    if (ctxt->instate == XML_PARSER_EOF)
10148
311
  return(-1);
10149
6.22M
    if (name == NULL) {
10150
415k
  spacePop(ctxt);
10151
415k
        return(-1);
10152
415k
    }
10153
5.80M
    nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
10154
5.80M
    ret = ctxt->node;
10155
10156
5.80M
#ifdef LIBXML_VALID_ENABLED
10157
    /*
10158
     * [ VC: Root Element Type ]
10159
     * The Name in the document type declaration must match the element
10160
     * type of the root element.
10161
     */
10162
5.80M
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10163
5.80M
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
10164
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10165
5.80M
#endif /* LIBXML_VALID_ENABLED */
10166
10167
    /*
10168
     * Check for an Empty Element.
10169
     */
10170
5.80M
    if ((RAW == '/') && (NXT(1) == '>')) {
10171
2.41M
        SKIP(2);
10172
2.41M
  if (ctxt->sax2) {
10173
1.64M
      if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10174
1.64M
    (!ctxt->disableSAX))
10175
1.17M
    ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
10176
1.64M
#ifdef LIBXML_SAX1_ENABLED
10177
1.64M
  } else {
10178
767k
      if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10179
767k
    (!ctxt->disableSAX))
10180
514k
    ctxt->sax->endElement(ctxt->userData, name);
10181
767k
#endif /* LIBXML_SAX1_ENABLED */
10182
767k
  }
10183
2.41M
  namePop(ctxt);
10184
2.41M
  spacePop(ctxt);
10185
2.41M
  if (nsNr != ctxt->nsNr)
10186
8.18k
      nsPop(ctxt, ctxt->nsNr - nsNr);
10187
2.41M
  if ( ret != NULL && ctxt->record_info ) {
10188
0
     node_info.end_pos = ctxt->input->consumed +
10189
0
            (CUR_PTR - ctxt->input->base);
10190
0
     node_info.end_line = ctxt->input->line;
10191
0
     node_info.node = ret;
10192
0
     xmlParserAddNodeInfo(ctxt, &node_info);
10193
0
  }
10194
2.41M
  return(1);
10195
2.41M
    }
10196
3.39M
    if (RAW == '>') {
10197
3.11M
        NEXT1;
10198
3.11M
    } else {
10199
276k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10200
276k
         "Couldn't find end of Start Tag %s line %d\n",
10201
276k
                    name, line, NULL);
10202
10203
  /*
10204
   * end of parsing of this node.
10205
   */
10206
276k
  nodePop(ctxt);
10207
276k
  namePop(ctxt);
10208
276k
  spacePop(ctxt);
10209
276k
  if (nsNr != ctxt->nsNr)
10210
16.1k
      nsPop(ctxt, ctxt->nsNr - nsNr);
10211
10212
  /*
10213
   * Capture end position and add node
10214
   */
10215
276k
  if ( ret != NULL && ctxt->record_info ) {
10216
0
     node_info.end_pos = ctxt->input->consumed +
10217
0
            (CUR_PTR - ctxt->input->base);
10218
0
     node_info.end_line = ctxt->input->line;
10219
0
     node_info.node = ret;
10220
0
     xmlParserAddNodeInfo(ctxt, &node_info);
10221
0
  }
10222
276k
  return(-1);
10223
276k
    }
10224
10225
3.11M
    return(0);
10226
3.39M
}
10227
10228
/**
10229
 * xmlParseElementEnd:
10230
 * @ctxt:  an XML parser context
10231
 *
10232
 * Parse the end of an XML element. Always consumes '</'.
10233
 */
10234
static void
10235
2.90M
xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
10236
2.90M
    xmlParserNodeInfo node_info;
10237
2.90M
    xmlNodePtr ret = ctxt->node;
10238
10239
2.90M
    if (ctxt->nameNr <= 0) {
10240
0
        if ((RAW == '<') && (NXT(1) == '/'))
10241
0
            SKIP(2);
10242
0
        return;
10243
0
    }
10244
10245
    /*
10246
     * parse the end of tag: '</' should be here.
10247
     */
10248
2.90M
    if (ctxt->sax2) {
10249
1.75M
  xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
10250
1.75M
  namePop(ctxt);
10251
1.75M
    }
10252
1.14M
#ifdef LIBXML_SAX1_ENABLED
10253
1.14M
    else
10254
1.14M
  xmlParseEndTag1(ctxt, 0);
10255
2.90M
#endif /* LIBXML_SAX1_ENABLED */
10256
10257
    /*
10258
     * Capture end position and add node
10259
     */
10260
2.90M
    if ( ret != NULL && ctxt->record_info ) {
10261
0
       node_info.end_pos = ctxt->input->consumed +
10262
0
                          (CUR_PTR - ctxt->input->base);
10263
0
       node_info.end_line = ctxt->input->line;
10264
0
       node_info.node = ret;
10265
0
       xmlParserAddNodeInfo(ctxt, &node_info);
10266
0
    }
10267
2.90M
}
10268
10269
/**
10270
 * xmlParseVersionNum:
10271
 * @ctxt:  an XML parser context
10272
 *
10273
 * DEPRECATED: Internal function, don't use.
10274
 *
10275
 * parse the XML version value.
10276
 *
10277
 * [26] VersionNum ::= '1.' [0-9]+
10278
 *
10279
 * In practice allow [0-9].[0-9]+ at that level
10280
 *
10281
 * Returns the string giving the XML version number, or NULL
10282
 */
10283
xmlChar *
10284
323k
xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10285
323k
    xmlChar *buf = NULL;
10286
323k
    int len = 0;
10287
323k
    int size = 10;
10288
323k
    xmlChar cur;
10289
10290
323k
    buf = (xmlChar *) xmlMallocAtomic(size);
10291
323k
    if (buf == NULL) {
10292
0
  xmlErrMemory(ctxt, NULL);
10293
0
  return(NULL);
10294
0
    }
10295
323k
    cur = CUR;
10296
323k
    if (!((cur >= '0') && (cur <= '9'))) {
10297
3.41k
  xmlFree(buf);
10298
3.41k
  return(NULL);
10299
3.41k
    }
10300
320k
    buf[len++] = cur;
10301
320k
    NEXT;
10302
320k
    cur=CUR;
10303
320k
    if (cur != '.') {
10304
5.60k
  xmlFree(buf);
10305
5.60k
  return(NULL);
10306
5.60k
    }
10307
314k
    buf[len++] = cur;
10308
314k
    NEXT;
10309
314k
    cur=CUR;
10310
1.26M
    while ((cur >= '0') && (cur <= '9')) {
10311
953k
  if (len + 1 >= size) {
10312
1.78k
      xmlChar *tmp;
10313
10314
1.78k
      size *= 2;
10315
1.78k
      tmp = (xmlChar *) xmlRealloc(buf, size);
10316
1.78k
      if (tmp == NULL) {
10317
0
          xmlFree(buf);
10318
0
    xmlErrMemory(ctxt, NULL);
10319
0
    return(NULL);
10320
0
      }
10321
1.78k
      buf = tmp;
10322
1.78k
  }
10323
953k
  buf[len++] = cur;
10324
953k
  NEXT;
10325
953k
  cur=CUR;
10326
953k
    }
10327
314k
    buf[len] = 0;
10328
314k
    return(buf);
10329
314k
}
10330
10331
/**
10332
 * xmlParseVersionInfo:
10333
 * @ctxt:  an XML parser context
10334
 *
10335
 * DEPRECATED: Internal function, don't use.
10336
 *
10337
 * parse the XML version.
10338
 *
10339
 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10340
 *
10341
 * [25] Eq ::= S? '=' S?
10342
 *
10343
 * Returns the version string, e.g. "1.0"
10344
 */
10345
10346
xmlChar *
10347
357k
xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10348
357k
    xmlChar *version = NULL;
10349
10350
357k
    if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10351
333k
  SKIP(7);
10352
333k
  SKIP_BLANKS;
10353
333k
  if (RAW != '=') {
10354
4.82k
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10355
4.82k
      return(NULL);
10356
4.82k
        }
10357
328k
  NEXT;
10358
328k
  SKIP_BLANKS;
10359
328k
  if (RAW == '"') {
10360
300k
      NEXT;
10361
300k
      version = xmlParseVersionNum(ctxt);
10362
300k
      if (RAW != '"') {
10363
13.0k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10364
13.0k
      } else
10365
287k
          NEXT;
10366
300k
  } else if (RAW == '\''){
10367
23.4k
      NEXT;
10368
23.4k
      version = xmlParseVersionNum(ctxt);
10369
23.4k
      if (RAW != '\'') {
10370
2.47k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10371
2.47k
      } else
10372
20.9k
          NEXT;
10373
23.4k
  } else {
10374
4.38k
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10375
4.38k
  }
10376
328k
    }
10377
352k
    return(version);
10378
357k
}
10379
10380
/**
10381
 * xmlParseEncName:
10382
 * @ctxt:  an XML parser context
10383
 *
10384
 * DEPRECATED: Internal function, don't use.
10385
 *
10386
 * parse the XML encoding name
10387
 *
10388
 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10389
 *
10390
 * Returns the encoding name value or NULL
10391
 */
10392
xmlChar *
10393
143k
xmlParseEncName(xmlParserCtxtPtr ctxt) {
10394
143k
    xmlChar *buf = NULL;
10395
143k
    int len = 0;
10396
143k
    int size = 10;
10397
143k
    xmlChar cur;
10398
10399
143k
    cur = CUR;
10400
143k
    if (((cur >= 'a') && (cur <= 'z')) ||
10401
143k
        ((cur >= 'A') && (cur <= 'Z'))) {
10402
141k
  buf = (xmlChar *) xmlMallocAtomic(size);
10403
141k
  if (buf == NULL) {
10404
0
      xmlErrMemory(ctxt, NULL);
10405
0
      return(NULL);
10406
0
  }
10407
10408
141k
  buf[len++] = cur;
10409
141k
  NEXT;
10410
141k
  cur = CUR;
10411
2.41M
  while (((cur >= 'a') && (cur <= 'z')) ||
10412
2.41M
         ((cur >= 'A') && (cur <= 'Z')) ||
10413
2.41M
         ((cur >= '0') && (cur <= '9')) ||
10414
2.41M
         (cur == '.') || (cur == '_') ||
10415
2.41M
         (cur == '-')) {
10416
2.27M
      if (len + 1 >= size) {
10417
65.7k
          xmlChar *tmp;
10418
10419
65.7k
    size *= 2;
10420
65.7k
    tmp = (xmlChar *) xmlRealloc(buf, size);
10421
65.7k
    if (tmp == NULL) {
10422
0
        xmlErrMemory(ctxt, NULL);
10423
0
        xmlFree(buf);
10424
0
        return(NULL);
10425
0
    }
10426
65.7k
    buf = tmp;
10427
65.7k
      }
10428
2.27M
      buf[len++] = cur;
10429
2.27M
      NEXT;
10430
2.27M
      cur = CUR;
10431
2.27M
      if (cur == 0) {
10432
1.81k
          SHRINK;
10433
1.81k
    GROW;
10434
1.81k
    cur = CUR;
10435
1.81k
      }
10436
2.27M
        }
10437
141k
  buf[len] = 0;
10438
141k
    } else {
10439
2.14k
  xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10440
2.14k
    }
10441
143k
    return(buf);
10442
143k
}
10443
10444
/**
10445
 * xmlParseEncodingDecl:
10446
 * @ctxt:  an XML parser context
10447
 *
10448
 * DEPRECATED: Internal function, don't use.
10449
 *
10450
 * parse the XML encoding declaration
10451
 *
10452
 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'")
10453
 *
10454
 * this setups the conversion filters.
10455
 *
10456
 * Returns the encoding value or NULL
10457
 */
10458
10459
const xmlChar *
10460
236k
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10461
236k
    xmlChar *encoding = NULL;
10462
10463
236k
    SKIP_BLANKS;
10464
236k
    if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10465
147k
  SKIP(8);
10466
147k
  SKIP_BLANKS;
10467
147k
  if (RAW != '=') {
10468
1.86k
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10469
1.86k
      return(NULL);
10470
1.86k
        }
10471
145k
  NEXT;
10472
145k
  SKIP_BLANKS;
10473
145k
  if (RAW == '"') {
10474
128k
      NEXT;
10475
128k
      encoding = xmlParseEncName(ctxt);
10476
128k
      if (RAW != '"') {
10477
7.50k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10478
7.50k
    xmlFree((xmlChar *) encoding);
10479
7.50k
    return(NULL);
10480
7.50k
      } else
10481
121k
          NEXT;
10482
128k
  } else if (RAW == '\''){
10483
14.9k
      NEXT;
10484
14.9k
      encoding = xmlParseEncName(ctxt);
10485
14.9k
      if (RAW != '\'') {
10486
2.01k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10487
2.01k
    xmlFree((xmlChar *) encoding);
10488
2.01k
    return(NULL);
10489
2.01k
      } else
10490
12.9k
          NEXT;
10491
14.9k
  } else {
10492
2.52k
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10493
2.52k
  }
10494
10495
        /*
10496
         * Non standard parsing, allowing the user to ignore encoding
10497
         */
10498
136k
        if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10499
64.2k
      xmlFree((xmlChar *) encoding);
10500
64.2k
            return(NULL);
10501
64.2k
  }
10502
10503
  /*
10504
   * UTF-16 encoding switch has already taken place at this stage,
10505
   * more over the little-endian/big-endian selection is already done
10506
   */
10507
72.1k
        if ((encoding != NULL) &&
10508
72.1k
      ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10509
71.2k
       (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10510
      /*
10511
       * If no encoding was passed to the parser, that we are
10512
       * using UTF-16 and no decoder is present i.e. the
10513
       * document is apparently UTF-8 compatible, then raise an
10514
       * encoding mismatch fatal error
10515
       */
10516
2.41k
      if ((ctxt->encoding == NULL) &&
10517
2.41k
          (ctxt->input->buf != NULL) &&
10518
2.41k
          (ctxt->input->buf->encoder == NULL)) {
10519
2.18k
    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10520
2.18k
      "Document labelled UTF-16 but has UTF-8 content\n");
10521
2.18k
      }
10522
2.41k
      if (ctxt->encoding != NULL)
10523
230
    xmlFree((xmlChar *) ctxt->encoding);
10524
2.41k
      ctxt->encoding = encoding;
10525
2.41k
  }
10526
  /*
10527
   * UTF-8 encoding is handled natively
10528
   */
10529
69.7k
        else if ((encoding != NULL) &&
10530
69.7k
      ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10531
68.7k
       (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10532
26.6k
      if (ctxt->encoding != NULL)
10533
810
    xmlFree((xmlChar *) ctxt->encoding);
10534
26.6k
      ctxt->encoding = encoding;
10535
26.6k
  }
10536
43.0k
  else if (encoding != NULL) {
10537
42.1k
      xmlCharEncodingHandlerPtr handler;
10538
10539
42.1k
      if (ctxt->input->encoding != NULL)
10540
0
    xmlFree((xmlChar *) ctxt->input->encoding);
10541
42.1k
      ctxt->input->encoding = encoding;
10542
10543
42.1k
            handler = xmlFindCharEncodingHandler((const char *) encoding);
10544
42.1k
      if (handler != NULL) {
10545
41.3k
    if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10546
        /* failed to convert */
10547
612
        ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10548
612
        return(NULL);
10549
612
    }
10550
41.3k
      } else {
10551
750
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10552
750
      "Unsupported encoding %s\n", encoding);
10553
750
    return(NULL);
10554
750
      }
10555
42.1k
  }
10556
72.1k
    }
10557
159k
    return(encoding);
10558
236k
}
10559
10560
/**
10561
 * xmlParseSDDecl:
10562
 * @ctxt:  an XML parser context
10563
 *
10564
 * DEPRECATED: Internal function, don't use.
10565
 *
10566
 * parse the XML standalone declaration
10567
 *
10568
 * [32] SDDecl ::= S 'standalone' Eq
10569
 *                 (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10570
 *
10571
 * [ VC: Standalone Document Declaration ]
10572
 * TODO The standalone document declaration must have the value "no"
10573
 * if any external markup declarations contain declarations of:
10574
 *  - attributes with default values, if elements to which these
10575
 *    attributes apply appear in the document without specifications
10576
 *    of values for these attributes, or
10577
 *  - entities (other than amp, lt, gt, apos, quot), if references
10578
 *    to those entities appear in the document, or
10579
 *  - attributes with values subject to normalization, where the
10580
 *    attribute appears in the document with a value which will change
10581
 *    as a result of normalization, or
10582
 *  - element types with element content, if white space occurs directly
10583
 *    within any instance of those types.
10584
 *
10585
 * Returns:
10586
 *   1 if standalone="yes"
10587
 *   0 if standalone="no"
10588
 *  -2 if standalone attribute is missing or invalid
10589
 *    (A standalone value of -2 means that the XML declaration was found,
10590
 *     but no value was specified for the standalone attribute).
10591
 */
10592
10593
int
10594
160k
xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10595
160k
    int standalone = -2;
10596
10597
160k
    SKIP_BLANKS;
10598
160k
    if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10599
24.9k
  SKIP(10);
10600
24.9k
        SKIP_BLANKS;
10601
24.9k
  if (RAW != '=') {
10602
297
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10603
297
      return(standalone);
10604
297
        }
10605
24.6k
  NEXT;
10606
24.6k
  SKIP_BLANKS;
10607
24.6k
        if (RAW == '\''){
10608
11.9k
      NEXT;
10609
11.9k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10610
9.21k
          standalone = 0;
10611
9.21k
                SKIP(2);
10612
9.21k
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10613
2.76k
                 (NXT(2) == 's')) {
10614
2.34k
          standalone = 1;
10615
2.34k
    SKIP(3);
10616
2.34k
            } else {
10617
426
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10618
426
      }
10619
11.9k
      if (RAW != '\'') {
10620
675
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10621
675
      } else
10622
11.3k
          NEXT;
10623
12.6k
  } else if (RAW == '"'){
10624
12.2k
      NEXT;
10625
12.2k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10626
7.79k
          standalone = 0;
10627
7.79k
    SKIP(2);
10628
7.79k
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10629
4.44k
                 (NXT(2) == 's')) {
10630
4.00k
          standalone = 1;
10631
4.00k
                SKIP(3);
10632
4.00k
            } else {
10633
444
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10634
444
      }
10635
12.2k
      if (RAW != '"') {
10636
627
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10637
627
      } else
10638
11.6k
          NEXT;
10639
12.2k
  } else {
10640
432
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10641
432
        }
10642
24.6k
    }
10643
160k
    return(standalone);
10644
160k
}
10645
10646
/**
10647
 * xmlParseXMLDecl:
10648
 * @ctxt:  an XML parser context
10649
 *
10650
 * DEPRECATED: Internal function, don't use.
10651
 *
10652
 * parse an XML declaration header
10653
 *
10654
 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10655
 */
10656
10657
void
10658
307k
xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10659
307k
    xmlChar *version;
10660
10661
    /*
10662
     * This value for standalone indicates that the document has an
10663
     * XML declaration but it does not have a standalone attribute.
10664
     * It will be overwritten later if a standalone attribute is found.
10665
     */
10666
307k
    ctxt->input->standalone = -2;
10667
10668
    /*
10669
     * We know that '<?xml' is here.
10670
     */
10671
307k
    SKIP(5);
10672
10673
307k
    if (!IS_BLANK_CH(RAW)) {
10674
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10675
0
                 "Blank needed after '<?xml'\n");
10676
0
    }
10677
307k
    SKIP_BLANKS;
10678
10679
    /*
10680
     * We must have the VersionInfo here.
10681
     */
10682
307k
    version = xmlParseVersionInfo(ctxt);
10683
307k
    if (version == NULL) {
10684
25.6k
  xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10685
281k
    } else {
10686
281k
  if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10687
      /*
10688
       * Changed here for XML-1.0 5th edition
10689
       */
10690
3.87k
      if (ctxt->options & XML_PARSE_OLD10) {
10691
1.22k
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10692
1.22k
                "Unsupported version '%s'\n",
10693
1.22k
                version);
10694
2.64k
      } else {
10695
2.64k
          if ((version[0] == '1') && ((version[1] == '.'))) {
10696
2.34k
        xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10697
2.34k
                      "Unsupported version '%s'\n",
10698
2.34k
          version, NULL);
10699
2.34k
    } else {
10700
303
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10701
303
              "Unsupported version '%s'\n",
10702
303
              version);
10703
303
    }
10704
2.64k
      }
10705
3.87k
  }
10706
281k
  if (ctxt->version != NULL)
10707
0
      xmlFree((void *) ctxt->version);
10708
281k
  ctxt->version = version;
10709
281k
    }
10710
10711
    /*
10712
     * We may have the encoding declaration
10713
     */
10714
307k
    if (!IS_BLANK_CH(RAW)) {
10715
152k
        if ((RAW == '?') && (NXT(1) == '>')) {
10716
120k
      SKIP(2);
10717
120k
      return;
10718
120k
  }
10719
31.5k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10720
31.5k
    }
10721
186k
    xmlParseEncodingDecl(ctxt);
10722
186k
    if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10723
186k
         (ctxt->instate == XML_PARSER_EOF)) {
10724
  /*
10725
   * The XML REC instructs us to stop parsing right here
10726
   */
10727
570
        return;
10728
570
    }
10729
10730
    /*
10731
     * We may have the standalone status.
10732
     */
10733
185k
    if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10734
27.2k
        if ((RAW == '?') && (NXT(1) == '>')) {
10735
25.3k
      SKIP(2);
10736
25.3k
      return;
10737
25.3k
  }
10738
1.88k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10739
1.88k
    }
10740
10741
    /*
10742
     * We can grow the input buffer freely at that point
10743
     */
10744
160k
    GROW;
10745
10746
160k
    SKIP_BLANKS;
10747
160k
    ctxt->input->standalone = xmlParseSDDecl(ctxt);
10748
10749
160k
    SKIP_BLANKS;
10750
160k
    if ((RAW == '?') && (NXT(1) == '>')) {
10751
108k
        SKIP(2);
10752
108k
    } else if (RAW == '>') {
10753
        /* Deprecated old WD ... */
10754
975
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10755
975
  NEXT;
10756
50.9k
    } else {
10757
50.9k
        int c;
10758
10759
50.9k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10760
3.92M
        while ((c = CUR) != 0) {
10761
3.91M
            NEXT;
10762
3.91M
            if (c == '>')
10763
46.6k
                break;
10764
3.91M
        }
10765
50.9k
    }
10766
160k
}
10767
10768
/**
10769
 * xmlParseMisc:
10770
 * @ctxt:  an XML parser context
10771
 *
10772
 * DEPRECATED: Internal function, don't use.
10773
 *
10774
 * parse an XML Misc* optional field.
10775
 *
10776
 * [27] Misc ::= Comment | PI |  S
10777
 */
10778
10779
void
10780
416k
xmlParseMisc(xmlParserCtxtPtr ctxt) {
10781
464k
    while (ctxt->instate != XML_PARSER_EOF) {
10782
464k
        SKIP_BLANKS;
10783
464k
        GROW;
10784
464k
        if ((RAW == '<') && (NXT(1) == '?')) {
10785
28.7k
      xmlParsePI(ctxt);
10786
436k
        } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10787
19.6k
      xmlParseComment(ctxt);
10788
416k
        } else {
10789
416k
            break;
10790
416k
        }
10791
464k
    }
10792
416k
}
10793
10794
/**
10795
 * xmlParseDocument:
10796
 * @ctxt:  an XML parser context
10797
 *
10798
 * parse an XML document (and build a tree if using the standard SAX
10799
 * interface).
10800
 *
10801
 * [1] document ::= prolog element Misc*
10802
 *
10803
 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10804
 *
10805
 * Returns 0, -1 in case of error. the parser context is augmented
10806
 *                as a result of the parsing.
10807
 */
10808
10809
int
10810
189k
xmlParseDocument(xmlParserCtxtPtr ctxt) {
10811
189k
    xmlChar start[4];
10812
189k
    xmlCharEncoding enc;
10813
10814
189k
    xmlInitParser();
10815
10816
189k
    if ((ctxt == NULL) || (ctxt->input == NULL))
10817
0
        return(-1);
10818
10819
189k
    GROW;
10820
10821
    /*
10822
     * SAX: detecting the level.
10823
     */
10824
189k
    xmlDetectSAX2(ctxt);
10825
10826
    /*
10827
     * SAX: beginning of the document processing.
10828
     */
10829
189k
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10830
189k
        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10831
189k
    if (ctxt->instate == XML_PARSER_EOF)
10832
0
  return(-1);
10833
10834
189k
    if ((ctxt->encoding == NULL) &&
10835
189k
        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10836
  /*
10837
   * Get the 4 first bytes and decode the charset
10838
   * if enc != XML_CHAR_ENCODING_NONE
10839
   * plug some encoding conversion routines.
10840
   */
10841
188k
  start[0] = RAW;
10842
188k
  start[1] = NXT(1);
10843
188k
  start[2] = NXT(2);
10844
188k
  start[3] = NXT(3);
10845
188k
  enc = xmlDetectCharEncoding(&start[0], 4);
10846
188k
  if (enc != XML_CHAR_ENCODING_NONE) {
10847
109k
      xmlSwitchEncoding(ctxt, enc);
10848
109k
  }
10849
188k
    }
10850
10851
10852
189k
    if (CUR == 0) {
10853
328
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10854
328
  return(-1);
10855
328
    }
10856
10857
    /*
10858
     * Check for the XMLDecl in the Prolog.
10859
     * do not GROW here to avoid the detected encoder to decode more
10860
     * than just the first line, unless the amount of data is really
10861
     * too small to hold "<?xml version="1.0" encoding="foo"
10862
     */
10863
189k
    if ((ctxt->input->end - ctxt->input->cur) < 35) {
10864
7.22k
       GROW;
10865
7.22k
    }
10866
189k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10867
10868
  /*
10869
   * Note that we will switch encoding on the fly.
10870
   */
10871
102k
  xmlParseXMLDecl(ctxt);
10872
102k
  if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10873
102k
      (ctxt->instate == XML_PARSER_EOF)) {
10874
      /*
10875
       * The XML REC instructs us to stop parsing right here
10876
       */
10877
190
      return(-1);
10878
190
  }
10879
102k
  ctxt->standalone = ctxt->input->standalone;
10880
102k
  SKIP_BLANKS;
10881
102k
    } else {
10882
86.5k
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10883
86.5k
    }
10884
188k
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10885
184k
        ctxt->sax->startDocument(ctxt->userData);
10886
188k
    if (ctxt->instate == XML_PARSER_EOF)
10887
0
  return(-1);
10888
188k
    if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10889
188k
        (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10890
0
  ctxt->myDoc->compression = ctxt->input->buf->compressed;
10891
0
    }
10892
10893
    /*
10894
     * The Misc part of the Prolog
10895
     */
10896
188k
    xmlParseMisc(ctxt);
10897
10898
    /*
10899
     * Then possibly doc type declaration(s) and more Misc
10900
     * (doctypedecl Misc*)?
10901
     */
10902
188k
    GROW;
10903
188k
    if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10904
10905
121k
  ctxt->inSubset = 1;
10906
121k
  xmlParseDocTypeDecl(ctxt);
10907
121k
  if (RAW == '[') {
10908
96.7k
      ctxt->instate = XML_PARSER_DTD;
10909
96.7k
      xmlParseInternalSubset(ctxt);
10910
96.7k
      if (ctxt->instate == XML_PARSER_EOF)
10911
26.0k
    return(-1);
10912
96.7k
  }
10913
10914
  /*
10915
   * Create and update the external subset.
10916
   */
10917
95.1k
  ctxt->inSubset = 2;
10918
95.1k
  if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10919
95.1k
      (!ctxt->disableSAX))
10920
89.7k
      ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10921
89.7k
                                ctxt->extSubSystem, ctxt->extSubURI);
10922
95.1k
  if (ctxt->instate == XML_PARSER_EOF)
10923
6.65k
      return(-1);
10924
88.5k
  ctxt->inSubset = 0;
10925
10926
88.5k
        xmlCleanSpecialAttr(ctxt);
10927
10928
88.5k
  ctxt->instate = XML_PARSER_PROLOG;
10929
88.5k
  xmlParseMisc(ctxt);
10930
88.5k
    }
10931
10932
    /*
10933
     * Time to start parsing the tree itself
10934
     */
10935
156k
    GROW;
10936
156k
    if (RAW != '<') {
10937
17.1k
  xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10938
17.1k
           "Start tag expected, '<' not found\n");
10939
138k
    } else {
10940
138k
  ctxt->instate = XML_PARSER_CONTENT;
10941
138k
  xmlParseElement(ctxt);
10942
138k
  ctxt->instate = XML_PARSER_EPILOG;
10943
10944
10945
  /*
10946
   * The Misc part at the end
10947
   */
10948
138k
  xmlParseMisc(ctxt);
10949
10950
138k
  if (RAW != 0) {
10951
41.3k
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10952
41.3k
  }
10953
138k
  ctxt->instate = XML_PARSER_EOF;
10954
138k
    }
10955
10956
    /*
10957
     * SAX: end of the document processing.
10958
     */
10959
156k
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10960
156k
        ctxt->sax->endDocument(ctxt->userData);
10961
10962
    /*
10963
     * Remove locally kept entity definitions if the tree was not built
10964
     */
10965
156k
    if ((ctxt->myDoc != NULL) &&
10966
156k
  (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10967
273
  xmlFreeDoc(ctxt->myDoc);
10968
273
  ctxt->myDoc = NULL;
10969
273
    }
10970
10971
156k
    if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10972
17.9k
        ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10973
17.9k
  if (ctxt->valid)
10974
12.9k
      ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10975
17.9k
  if (ctxt->nsWellFormed)
10976
16.7k
      ctxt->myDoc->properties |= XML_DOC_NSVALID;
10977
17.9k
  if (ctxt->options & XML_PARSE_OLD10)
10978
3.01k
      ctxt->myDoc->properties |= XML_DOC_OLD10;
10979
17.9k
    }
10980
156k
    if (! ctxt->wellFormed) {
10981
138k
  ctxt->valid = 0;
10982
138k
  return(-1);
10983
138k
    }
10984
17.9k
    return(0);
10985
156k
}
10986
10987
/**
10988
 * xmlParseExtParsedEnt:
10989
 * @ctxt:  an XML parser context
10990
 *
10991
 * parse a general parsed entity
10992
 * An external general parsed entity is well-formed if it matches the
10993
 * production labeled extParsedEnt.
10994
 *
10995
 * [78] extParsedEnt ::= TextDecl? content
10996
 *
10997
 * Returns 0, -1 in case of error. the parser context is augmented
10998
 *                as a result of the parsing.
10999
 */
11000
11001
int
11002
0
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
11003
0
    xmlChar start[4];
11004
0
    xmlCharEncoding enc;
11005
11006
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
11007
0
        return(-1);
11008
11009
0
    xmlDetectSAX2(ctxt);
11010
11011
0
    GROW;
11012
11013
    /*
11014
     * SAX: beginning of the document processing.
11015
     */
11016
0
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11017
0
        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
11018
11019
    /*
11020
     * Get the 4 first bytes and decode the charset
11021
     * if enc != XML_CHAR_ENCODING_NONE
11022
     * plug some encoding conversion routines.
11023
     */
11024
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11025
0
  start[0] = RAW;
11026
0
  start[1] = NXT(1);
11027
0
  start[2] = NXT(2);
11028
0
  start[3] = NXT(3);
11029
0
  enc = xmlDetectCharEncoding(start, 4);
11030
0
  if (enc != XML_CHAR_ENCODING_NONE) {
11031
0
      xmlSwitchEncoding(ctxt, enc);
11032
0
  }
11033
0
    }
11034
11035
11036
0
    if (CUR == 0) {
11037
0
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11038
0
    }
11039
11040
    /*
11041
     * Check for the XMLDecl in the Prolog.
11042
     */
11043
0
    GROW;
11044
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
11045
11046
  /*
11047
   * Note that we will switch encoding on the fly.
11048
   */
11049
0
  xmlParseXMLDecl(ctxt);
11050
0
  if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11051
      /*
11052
       * The XML REC instructs us to stop parsing right here
11053
       */
11054
0
      return(-1);
11055
0
  }
11056
0
  SKIP_BLANKS;
11057
0
    } else {
11058
0
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11059
0
    }
11060
0
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
11061
0
        ctxt->sax->startDocument(ctxt->userData);
11062
0
    if (ctxt->instate == XML_PARSER_EOF)
11063
0
  return(-1);
11064
11065
    /*
11066
     * Doing validity checking on chunk doesn't make sense
11067
     */
11068
0
    ctxt->instate = XML_PARSER_CONTENT;
11069
0
    ctxt->validate = 0;
11070
0
    ctxt->loadsubset = 0;
11071
0
    ctxt->depth = 0;
11072
11073
0
    xmlParseContent(ctxt);
11074
0
    if (ctxt->instate == XML_PARSER_EOF)
11075
0
  return(-1);
11076
11077
0
    if ((RAW == '<') && (NXT(1) == '/')) {
11078
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11079
0
    } else if (RAW != 0) {
11080
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11081
0
    }
11082
11083
    /*
11084
     * SAX: end of the document processing.
11085
     */
11086
0
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11087
0
        ctxt->sax->endDocument(ctxt->userData);
11088
11089
0
    if (! ctxt->wellFormed) return(-1);
11090
0
    return(0);
11091
0
}
11092
11093
#ifdef LIBXML_PUSH_ENABLED
11094
/************************************************************************
11095
 *                  *
11096
 *    Progressive parsing interfaces        *
11097
 *                  *
11098
 ************************************************************************/
11099
11100
/**
11101
 * xmlParseLookupChar:
11102
 * @ctxt:  an XML parser context
11103
 * @c:  character
11104
 *
11105
 * Check whether the input buffer contains a character.
11106
 */
11107
static int
11108
5.49M
xmlParseLookupChar(xmlParserCtxtPtr ctxt, int c) {
11109
5.49M
    const xmlChar *cur;
11110
11111
5.49M
    if (ctxt->checkIndex == 0) {
11112
5.31M
        cur = ctxt->input->cur + 1;
11113
5.31M
    } else {
11114
186k
        cur = ctxt->input->cur + ctxt->checkIndex;
11115
186k
    }
11116
11117
5.49M
    if (memchr(cur, c, ctxt->input->end - cur) == NULL) {
11118
200k
        ctxt->checkIndex = ctxt->input->end - ctxt->input->cur;
11119
200k
        return(0);
11120
5.29M
    } else {
11121
5.29M
        ctxt->checkIndex = 0;
11122
5.29M
        return(1);
11123
5.29M
    }
11124
5.49M
}
11125
11126
/**
11127
 * xmlParseLookupString:
11128
 * @ctxt:  an XML parser context
11129
 * @startDelta: delta to apply at the start
11130
 * @str:  string
11131
 * @strLen:  length of string
11132
 *
11133
 * Check whether the input buffer contains a string.
11134
 */
11135
static const xmlChar *
11136
xmlParseLookupString(xmlParserCtxtPtr ctxt, size_t startDelta,
11137
1.40M
                     const char *str, size_t strLen) {
11138
1.40M
    const xmlChar *cur, *term;
11139
11140
1.40M
    if (ctxt->checkIndex == 0) {
11141
740k
        cur = ctxt->input->cur + startDelta;
11142
740k
    } else {
11143
667k
        cur = ctxt->input->cur + ctxt->checkIndex;
11144
667k
    }
11145
11146
1.40M
    term = BAD_CAST strstr((const char *) cur, str);
11147
1.40M
    if (term == NULL) {
11148
829k
        const xmlChar *end = ctxt->input->end;
11149
11150
        /* Rescan (strLen - 1) characters. */
11151
829k
        if ((size_t) (end - cur) < strLen)
11152
13.8k
            end = cur;
11153
815k
        else
11154
815k
            end -= strLen - 1;
11155
829k
        ctxt->checkIndex = end - ctxt->input->cur;
11156
829k
    } else {
11157
579k
        ctxt->checkIndex = 0;
11158
579k
    }
11159
11160
1.40M
    return(term);
11161
1.40M
}
11162
11163
/**
11164
 * xmlParseLookupCharData:
11165
 * @ctxt:  an XML parser context
11166
 *
11167
 * Check whether the input buffer contains terminated char data.
11168
 */
11169
static int
11170
8.24M
xmlParseLookupCharData(xmlParserCtxtPtr ctxt) {
11171
8.24M
    const xmlChar *cur = ctxt->input->cur + ctxt->checkIndex;
11172
8.24M
    const xmlChar *end = ctxt->input->end;
11173
11174
148M
    while (cur < end) {
11175
147M
        if ((*cur == '<') || (*cur == '&')) {
11176
7.33M
            ctxt->checkIndex = 0;
11177
7.33M
            return(1);
11178
7.33M
        }
11179
139M
        cur++;
11180
139M
    }
11181
11182
911k
    ctxt->checkIndex = cur - ctxt->input->cur;
11183
911k
    return(0);
11184
8.24M
}
11185
11186
/**
11187
 * xmlParseLookupGt:
11188
 * @ctxt:  an XML parser context
11189
 *
11190
 * Check whether there's enough data in the input buffer to finish parsing
11191
 * a start tag. This has to take quotes into account.
11192
 */
11193
static int
11194
8.25M
xmlParseLookupGt(xmlParserCtxtPtr ctxt) {
11195
8.25M
    const xmlChar *cur;
11196
8.25M
    const xmlChar *end = ctxt->input->end;
11197
8.25M
    int state = ctxt->endCheckState;
11198
11199
8.25M
    if (ctxt->checkIndex == 0)
11200
6.64M
        cur = ctxt->input->cur + 1;
11201
1.61M
    else
11202
1.61M
        cur = ctxt->input->cur + ctxt->checkIndex;
11203
11204
345M
    while (cur < end) {
11205
343M
        if (state) {
11206
197M
            if (*cur == state)
11207
8.84M
                state = 0;
11208
197M
        } else if (*cur == '\'' || *cur == '"') {
11209
8.90M
            state = *cur;
11210
137M
        } else if (*cur == '>') {
11211
6.55M
            ctxt->checkIndex = 0;
11212
6.55M
            ctxt->endCheckState = 0;
11213
6.55M
            return(1);
11214
6.55M
        }
11215
337M
        cur++;
11216
337M
    }
11217
11218
1.69M
    ctxt->checkIndex = cur - ctxt->input->cur;
11219
1.69M
    ctxt->endCheckState = state;
11220
1.69M
    return(0);
11221
8.25M
}
11222
11223
/**
11224
 * xmlParseLookupInternalSubset:
11225
 * @ctxt:  an XML parser context
11226
 *
11227
 * Check whether there's enough data in the input buffer to finish parsing
11228
 * the internal subset.
11229
 */
11230
static int
11231
692k
xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt) {
11232
    /*
11233
     * Sorry, but progressive parsing of the internal subset is not
11234
     * supported. We first check that the full content of the internal
11235
     * subset is available and parsing is launched only at that point.
11236
     * Internal subset ends with "']' S? '>'" in an unescaped section and
11237
     * not in a ']]>' sequence which are conditional sections.
11238
     */
11239
692k
    const xmlChar *cur, *start;
11240
692k
    const xmlChar *end = ctxt->input->end;
11241
692k
    int state = ctxt->endCheckState;
11242
11243
692k
    if (ctxt->checkIndex == 0) {
11244
184k
        cur = ctxt->input->cur + 1;
11245
507k
    } else {
11246
507k
        cur = ctxt->input->cur + ctxt->checkIndex;
11247
507k
    }
11248
692k
    start = cur;
11249
11250
120M
    while (cur < end) {
11251
119M
        if (state == '-') {
11252
12.6M
            if ((*cur == '-') &&
11253
12.6M
                (cur[1] == '-') &&
11254
12.6M
                (cur[2] == '>')) {
11255
88.1k
                state = 0;
11256
88.1k
                cur += 3;
11257
88.1k
                start = cur;
11258
88.1k
                continue;
11259
88.1k
            }
11260
12.6M
        }
11261
107M
        else if (state == ']') {
11262
182k
            if (*cur == '>') {
11263
152k
                ctxt->checkIndex = 0;
11264
152k
                ctxt->endCheckState = 0;
11265
152k
                return(1);
11266
152k
            }
11267
30.5k
            if (IS_BLANK_CH(*cur)) {
11268
18.8k
                state = ' ';
11269
18.8k
            } else if (*cur != ']') {
11270
5.52k
                state = 0;
11271
5.52k
                start = cur;
11272
5.52k
                continue;
11273
5.52k
            }
11274
30.5k
        }
11275
106M
        else if (state == ' ') {
11276
337k
            if (*cur == '>') {
11277
578
                ctxt->checkIndex = 0;
11278
578
                ctxt->endCheckState = 0;
11279
578
                return(1);
11280
578
            }
11281
337k
            if (!IS_BLANK_CH(*cur)) {
11282
18.2k
                state = 0;
11283
18.2k
                start = cur;
11284
18.2k
                continue;
11285
18.2k
            }
11286
337k
        }
11287
106M
        else if (state != 0) {
11288
54.7M
            if (*cur == state) {
11289
1.17M
                state = 0;
11290
1.17M
                start = cur + 1;
11291
1.17M
            }
11292
54.7M
        }
11293
51.8M
        else if (*cur == '<') {
11294
1.42M
            if ((cur[1] == '!') &&
11295
1.42M
                (cur[2] == '-') &&
11296
1.42M
                (cur[3] == '-')) {
11297
90.2k
                state = '-';
11298
90.2k
                cur += 4;
11299
                /* Don't treat <!--> as comment */
11300
90.2k
                start = cur;
11301
90.2k
                continue;
11302
90.2k
            }
11303
1.42M
        }
11304
50.4M
        else if ((*cur == '"') || (*cur == '\'') || (*cur == ']')) {
11305
1.36M
            state = *cur;
11306
1.36M
        }
11307
11308
119M
        cur++;
11309
119M
    }
11310
11311
    /*
11312
     * Rescan the three last characters to detect "<!--" and "-->"
11313
     * split across chunks.
11314
     */
11315
539k
    if ((state == 0) || (state == '-')) {
11316
271k
        if (cur - start < 3)
11317
23.5k
            cur = start;
11318
248k
        else
11319
248k
            cur -= 3;
11320
271k
    }
11321
539k
    ctxt->checkIndex = cur - ctxt->input->cur;
11322
539k
    ctxt->endCheckState = state;
11323
539k
    return(0);
11324
692k
}
11325
11326
/**
11327
 * xmlCheckCdataPush:
11328
 * @cur: pointer to the block of characters
11329
 * @len: length of the block in bytes
11330
 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
11331
 *
11332
 * Check that the block of characters is okay as SCdata content [20]
11333
 *
11334
 * Returns the number of bytes to pass if okay, a negative index where an
11335
 *         UTF-8 error occurred otherwise
11336
 */
11337
static int
11338
278k
xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
11339
278k
    int ix;
11340
278k
    unsigned char c;
11341
278k
    int codepoint;
11342
11343
278k
    if ((utf == NULL) || (len <= 0))
11344
3.89k
        return(0);
11345
11346
14.3M
    for (ix = 0; ix < len;) {      /* string is 0-terminated */
11347
14.3M
        c = utf[ix];
11348
14.3M
        if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11349
12.9M
      if (c >= 0x20)
11350
11.8M
    ix++;
11351
1.09M
      else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11352
1.08M
          ix++;
11353
12.8k
      else
11354
12.8k
          return(-ix);
11355
12.9M
  } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11356
435k
      if (ix + 2 > len) return(complete ? -ix : ix);
11357
428k
      if ((utf[ix+1] & 0xc0 ) != 0x80)
11358
70.0k
          return(-ix);
11359
358k
      codepoint = (utf[ix] & 0x1f) << 6;
11360
358k
      codepoint |= utf[ix+1] & 0x3f;
11361
358k
      if (!xmlIsCharQ(codepoint))
11362
8.20k
          return(-ix);
11363
350k
      ix += 2;
11364
949k
  } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11365
365k
      if (ix + 3 > len) return(complete ? -ix : ix);
11366
359k
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11367
359k
          ((utf[ix+2] & 0xc0) != 0x80))
11368
21.8k
        return(-ix);
11369
337k
      codepoint = (utf[ix] & 0xf) << 12;
11370
337k
      codepoint |= (utf[ix+1] & 0x3f) << 6;
11371
337k
      codepoint |= utf[ix+2] & 0x3f;
11372
337k
      if (!xmlIsCharQ(codepoint))
11373
9.87k
          return(-ix);
11374
327k
      ix += 3;
11375
584k
  } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11376
559k
      if (ix + 4 > len) return(complete ? -ix : ix);
11377
553k
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11378
553k
          ((utf[ix+2] & 0xc0) != 0x80) ||
11379
553k
    ((utf[ix+3] & 0xc0) != 0x80))
11380
34.2k
        return(-ix);
11381
518k
      codepoint = (utf[ix] & 0x7) << 18;
11382
518k
      codepoint |= (utf[ix+1] & 0x3f) << 12;
11383
518k
      codepoint |= (utf[ix+2] & 0x3f) << 6;
11384
518k
      codepoint |= utf[ix+3] & 0x3f;
11385
518k
      if (!xmlIsCharQ(codepoint))
11386
13.3k
          return(-ix);
11387
505k
      ix += 4;
11388
505k
  } else       /* unknown encoding */
11389
24.3k
      return(-ix);
11390
14.3M
      }
11391
60.7k
      return(ix);
11392
274k
}
11393
11394
/**
11395
 * xmlParseTryOrFinish:
11396
 * @ctxt:  an XML parser context
11397
 * @terminate:  last chunk indicator
11398
 *
11399
 * Try to progress on parsing
11400
 *
11401
 * Returns zero if no parsing was possible
11402
 */
11403
static int
11404
4.97M
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11405
4.97M
    int ret = 0;
11406
4.97M
    int avail, tlen;
11407
4.97M
    xmlChar cur, next;
11408
11409
4.97M
    if (ctxt->input == NULL)
11410
0
        return(0);
11411
11412
#ifdef DEBUG_PUSH
11413
    switch (ctxt->instate) {
11414
  case XML_PARSER_EOF:
11415
      xmlGenericError(xmlGenericErrorContext,
11416
        "PP: try EOF\n"); break;
11417
  case XML_PARSER_START:
11418
      xmlGenericError(xmlGenericErrorContext,
11419
        "PP: try START\n"); break;
11420
  case XML_PARSER_MISC:
11421
      xmlGenericError(xmlGenericErrorContext,
11422
        "PP: try MISC\n");break;
11423
  case XML_PARSER_COMMENT:
11424
      xmlGenericError(xmlGenericErrorContext,
11425
        "PP: try COMMENT\n");break;
11426
  case XML_PARSER_PROLOG:
11427
      xmlGenericError(xmlGenericErrorContext,
11428
        "PP: try PROLOG\n");break;
11429
  case XML_PARSER_START_TAG:
11430
      xmlGenericError(xmlGenericErrorContext,
11431
        "PP: try START_TAG\n");break;
11432
  case XML_PARSER_CONTENT:
11433
      xmlGenericError(xmlGenericErrorContext,
11434
        "PP: try CONTENT\n");break;
11435
  case XML_PARSER_CDATA_SECTION:
11436
      xmlGenericError(xmlGenericErrorContext,
11437
        "PP: try CDATA_SECTION\n");break;
11438
  case XML_PARSER_END_TAG:
11439
      xmlGenericError(xmlGenericErrorContext,
11440
        "PP: try END_TAG\n");break;
11441
  case XML_PARSER_ENTITY_DECL:
11442
      xmlGenericError(xmlGenericErrorContext,
11443
        "PP: try ENTITY_DECL\n");break;
11444
  case XML_PARSER_ENTITY_VALUE:
11445
      xmlGenericError(xmlGenericErrorContext,
11446
        "PP: try ENTITY_VALUE\n");break;
11447
  case XML_PARSER_ATTRIBUTE_VALUE:
11448
      xmlGenericError(xmlGenericErrorContext,
11449
        "PP: try ATTRIBUTE_VALUE\n");break;
11450
  case XML_PARSER_DTD:
11451
      xmlGenericError(xmlGenericErrorContext,
11452
        "PP: try DTD\n");break;
11453
  case XML_PARSER_EPILOG:
11454
      xmlGenericError(xmlGenericErrorContext,
11455
        "PP: try EPILOG\n");break;
11456
  case XML_PARSER_PI:
11457
      xmlGenericError(xmlGenericErrorContext,
11458
        "PP: try PI\n");break;
11459
        case XML_PARSER_IGNORE:
11460
            xmlGenericError(xmlGenericErrorContext,
11461
        "PP: try IGNORE\n");break;
11462
    }
11463
#endif
11464
11465
4.97M
    if ((ctxt->input != NULL) &&
11466
4.97M
        (ctxt->input->cur - ctxt->input->base > 4096)) {
11467
76.6k
        xmlParserInputShrink(ctxt->input);
11468
76.6k
    }
11469
11470
41.9M
    while (ctxt->instate != XML_PARSER_EOF) {
11471
41.9M
  if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11472
100k
      return(0);
11473
11474
41.8M
  if (ctxt->input == NULL) break;
11475
41.8M
  if (ctxt->input->buf == NULL)
11476
0
      avail = ctxt->input->length -
11477
0
              (ctxt->input->cur - ctxt->input->base);
11478
41.8M
  else {
11479
      /*
11480
       * If we are operating on converted input, try to flush
11481
       * remaining chars to avoid them stalling in the non-converted
11482
       * buffer. But do not do this in document start where
11483
       * encoding="..." may not have been read and we work on a
11484
       * guessed encoding.
11485
       */
11486
41.8M
      if ((ctxt->instate != XML_PARSER_START) &&
11487
41.8M
          (ctxt->input->buf->raw != NULL) &&
11488
41.8M
    (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11489
178k
                size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11490
178k
                                                 ctxt->input);
11491
178k
    size_t current = ctxt->input->cur - ctxt->input->base;
11492
11493
178k
    xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11494
178k
                xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11495
178k
                                      base, current);
11496
178k
      }
11497
41.8M
      avail = xmlBufUse(ctxt->input->buf->buffer) -
11498
41.8M
        (ctxt->input->cur - ctxt->input->base);
11499
41.8M
  }
11500
41.8M
        if (avail < 1)
11501
257k
      goto done;
11502
41.5M
        switch (ctxt->instate) {
11503
0
            case XML_PARSER_EOF:
11504
          /*
11505
     * Document parsing is done !
11506
     */
11507
0
          goto done;
11508
974k
            case XML_PARSER_START:
11509
974k
    if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11510
271k
        xmlChar start[4];
11511
271k
        xmlCharEncoding enc;
11512
11513
        /*
11514
         * Very first chars read from the document flow.
11515
         */
11516
271k
        if (avail < 4)
11517
3.18k
      goto done;
11518
11519
        /*
11520
         * Get the 4 first bytes and decode the charset
11521
         * if enc != XML_CHAR_ENCODING_NONE
11522
         * plug some encoding conversion routines,
11523
         * else xmlSwitchEncoding will set to (default)
11524
         * UTF8.
11525
         */
11526
268k
        start[0] = RAW;
11527
268k
        start[1] = NXT(1);
11528
268k
        start[2] = NXT(2);
11529
268k
        start[3] = NXT(3);
11530
268k
        enc = xmlDetectCharEncoding(start, 4);
11531
268k
        xmlSwitchEncoding(ctxt, enc);
11532
268k
        break;
11533
271k
    }
11534
11535
702k
    if (avail < 2)
11536
120
        goto done;
11537
702k
    cur = ctxt->input->cur[0];
11538
702k
    next = ctxt->input->cur[1];
11539
702k
    if (cur == 0) {
11540
414
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11541
414
      ctxt->sax->setDocumentLocator(ctxt->userData,
11542
414
                  &xmlDefaultSAXLocator);
11543
414
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11544
414
        xmlHaltParser(ctxt);
11545
#ifdef DEBUG_PUSH
11546
        xmlGenericError(xmlGenericErrorContext,
11547
          "PP: entering EOF\n");
11548
#endif
11549
414
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11550
414
      ctxt->sax->endDocument(ctxt->userData);
11551
414
        goto done;
11552
414
    }
11553
702k
          if ((cur == '<') && (next == '?')) {
11554
        /* PI or XML decl */
11555
546k
        if (avail < 5) goto done;
11556
546k
        if ((!terminate) &&
11557
546k
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11558
325k
      goto done;
11559
220k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11560
220k
      ctxt->sax->setDocumentLocator(ctxt->userData,
11561
220k
                  &xmlDefaultSAXLocator);
11562
220k
        if ((ctxt->input->cur[2] == 'x') &&
11563
220k
      (ctxt->input->cur[3] == 'm') &&
11564
220k
      (ctxt->input->cur[4] == 'l') &&
11565
220k
      (IS_BLANK_CH(ctxt->input->cur[5]))) {
11566
204k
      ret += 5;
11567
#ifdef DEBUG_PUSH
11568
      xmlGenericError(xmlGenericErrorContext,
11569
        "PP: Parsing XML Decl\n");
11570
#endif
11571
204k
      xmlParseXMLDecl(ctxt);
11572
204k
      if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11573
          /*
11574
           * The XML REC instructs us to stop parsing right
11575
           * here
11576
           */
11577
380
          xmlHaltParser(ctxt);
11578
380
          return(0);
11579
380
      }
11580
204k
      ctxt->standalone = ctxt->input->standalone;
11581
204k
      if ((ctxt->encoding == NULL) &&
11582
204k
          (ctxt->input->encoding != NULL))
11583
23.7k
          ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11584
204k
      if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11585
204k
          (!ctxt->disableSAX))
11586
195k
          ctxt->sax->startDocument(ctxt->userData);
11587
204k
      ctxt->instate = XML_PARSER_MISC;
11588
#ifdef DEBUG_PUSH
11589
      xmlGenericError(xmlGenericErrorContext,
11590
        "PP: entering MISC\n");
11591
#endif
11592
204k
        } else {
11593
15.7k
      ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11594
15.7k
      if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11595
15.7k
          (!ctxt->disableSAX))
11596
15.7k
          ctxt->sax->startDocument(ctxt->userData);
11597
15.7k
      ctxt->instate = XML_PARSER_MISC;
11598
#ifdef DEBUG_PUSH
11599
      xmlGenericError(xmlGenericErrorContext,
11600
        "PP: entering MISC\n");
11601
#endif
11602
15.7k
        }
11603
220k
    } else {
11604
155k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11605
155k
      ctxt->sax->setDocumentLocator(ctxt->userData,
11606
155k
                  &xmlDefaultSAXLocator);
11607
155k
        ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11608
155k
        if (ctxt->version == NULL) {
11609
0
            xmlErrMemory(ctxt, NULL);
11610
0
      break;
11611
0
        }
11612
155k
        if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11613
155k
            (!ctxt->disableSAX))
11614
155k
      ctxt->sax->startDocument(ctxt->userData);
11615
155k
        ctxt->instate = XML_PARSER_MISC;
11616
#ifdef DEBUG_PUSH
11617
        xmlGenericError(xmlGenericErrorContext,
11618
          "PP: entering MISC\n");
11619
#endif
11620
155k
    }
11621
375k
    break;
11622
8.75M
            case XML_PARSER_START_TAG: {
11623
8.75M
          const xmlChar *name;
11624
8.75M
    const xmlChar *prefix = NULL;
11625
8.75M
    const xmlChar *URI = NULL;
11626
8.75M
                int line = ctxt->input->line;
11627
8.75M
    int nsNr = ctxt->nsNr;
11628
11629
8.75M
    if ((avail < 2) && (ctxt->inputNr == 1))
11630
0
        goto done;
11631
8.75M
    cur = ctxt->input->cur[0];
11632
8.75M
          if (cur != '<') {
11633
20.3k
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11634
20.3k
        xmlHaltParser(ctxt);
11635
20.3k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11636
20.3k
      ctxt->sax->endDocument(ctxt->userData);
11637
20.3k
        goto done;
11638
20.3k
    }
11639
8.73M
    if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11640
1.55M
                    goto done;
11641
7.17M
    if (ctxt->spaceNr == 0)
11642
60.2k
        spacePush(ctxt, -1);
11643
7.11M
    else if (*ctxt->space == -2)
11644
832k
        spacePush(ctxt, -1);
11645
6.28M
    else
11646
6.28M
        spacePush(ctxt, *ctxt->space);
11647
7.17M
#ifdef LIBXML_SAX1_ENABLED
11648
7.17M
    if (ctxt->sax2)
11649
4.63M
#endif /* LIBXML_SAX1_ENABLED */
11650
4.63M
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11651
2.54M
#ifdef LIBXML_SAX1_ENABLED
11652
2.54M
    else
11653
2.54M
        name = xmlParseStartTag(ctxt);
11654
7.17M
#endif /* LIBXML_SAX1_ENABLED */
11655
7.17M
    if (ctxt->instate == XML_PARSER_EOF)
11656
474
        goto done;
11657
7.17M
    if (name == NULL) {
11658
22.5k
        spacePop(ctxt);
11659
22.5k
        xmlHaltParser(ctxt);
11660
22.5k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11661
22.5k
      ctxt->sax->endDocument(ctxt->userData);
11662
22.5k
        goto done;
11663
22.5k
    }
11664
7.15M
#ifdef LIBXML_VALID_ENABLED
11665
    /*
11666
     * [ VC: Root Element Type ]
11667
     * The Name in the document type declaration must match
11668
     * the element type of the root element.
11669
     */
11670
7.15M
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11671
7.15M
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
11672
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11673
7.15M
#endif /* LIBXML_VALID_ENABLED */
11674
11675
    /*
11676
     * Check for an Empty Element.
11677
     */
11678
7.15M
    if ((RAW == '/') && (NXT(1) == '>')) {
11679
3.11M
        SKIP(2);
11680
11681
3.11M
        if (ctxt->sax2) {
11682
2.23M
      if ((ctxt->sax != NULL) &&
11683
2.23M
          (ctxt->sax->endElementNs != NULL) &&
11684
2.23M
          (!ctxt->disableSAX))
11685
2.23M
          ctxt->sax->endElementNs(ctxt->userData, name,
11686
2.23M
                                  prefix, URI);
11687
2.23M
      if (ctxt->nsNr - nsNr > 0)
11688
8.73k
          nsPop(ctxt, ctxt->nsNr - nsNr);
11689
2.23M
#ifdef LIBXML_SAX1_ENABLED
11690
2.23M
        } else {
11691
874k
      if ((ctxt->sax != NULL) &&
11692
874k
          (ctxt->sax->endElement != NULL) &&
11693
874k
          (!ctxt->disableSAX))
11694
874k
          ctxt->sax->endElement(ctxt->userData, name);
11695
874k
#endif /* LIBXML_SAX1_ENABLED */
11696
874k
        }
11697
3.11M
        if (ctxt->instate == XML_PARSER_EOF)
11698
0
      goto done;
11699
3.11M
        spacePop(ctxt);
11700
3.11M
        if (ctxt->nameNr == 0) {
11701
11.8k
      ctxt->instate = XML_PARSER_EPILOG;
11702
3.09M
        } else {
11703
3.09M
      ctxt->instate = XML_PARSER_CONTENT;
11704
3.09M
        }
11705
3.11M
        break;
11706
3.11M
    }
11707
4.03M
    if (RAW == '>') {
11708
3.66M
        NEXT;
11709
3.66M
    } else {
11710
376k
        xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11711
376k
           "Couldn't find end of Start Tag %s\n",
11712
376k
           name);
11713
376k
        nodePop(ctxt);
11714
376k
        spacePop(ctxt);
11715
376k
    }
11716
4.03M
                nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
11717
11718
4.03M
    ctxt->instate = XML_PARSER_CONTENT;
11719
4.03M
                break;
11720
7.15M
      }
11721
26.3M
            case XML_PARSER_CONTENT: {
11722
26.3M
    if ((avail < 2) && (ctxt->inputNr == 1))
11723
114k
        goto done;
11724
26.2M
    cur = ctxt->input->cur[0];
11725
26.2M
    next = ctxt->input->cur[1];
11726
11727
26.2M
    if ((cur == '<') && (next == '/')) {
11728
3.34M
        ctxt->instate = XML_PARSER_END_TAG;
11729
3.34M
        break;
11730
22.8M
          } else if ((cur == '<') && (next == '?')) {
11731
77.2k
        if ((!terminate) &&
11732
77.2k
            (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11733
35.9k
      goto done;
11734
41.2k
        xmlParsePI(ctxt);
11735
41.2k
        ctxt->instate = XML_PARSER_CONTENT;
11736
22.8M
    } else if ((cur == '<') && (next != '!')) {
11737
6.93M
        ctxt->instate = XML_PARSER_START_TAG;
11738
6.93M
        break;
11739
15.8M
    } else if ((cur == '<') && (next == '!') &&
11740
15.8M
               (ctxt->input->cur[2] == '-') &&
11741
15.8M
         (ctxt->input->cur[3] == '-')) {
11742
335k
        if ((!terminate) &&
11743
335k
            (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11744
149k
      goto done;
11745
186k
        xmlParseComment(ctxt);
11746
186k
        ctxt->instate = XML_PARSER_CONTENT;
11747
15.5M
    } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11748
15.5M
        (ctxt->input->cur[2] == '[') &&
11749
15.5M
        (ctxt->input->cur[3] == 'C') &&
11750
15.5M
        (ctxt->input->cur[4] == 'D') &&
11751
15.5M
        (ctxt->input->cur[5] == 'A') &&
11752
15.5M
        (ctxt->input->cur[6] == 'T') &&
11753
15.5M
        (ctxt->input->cur[7] == 'A') &&
11754
15.5M
        (ctxt->input->cur[8] == '[')) {
11755
30.8k
        SKIP(9);
11756
30.8k
        ctxt->instate = XML_PARSER_CDATA_SECTION;
11757
30.8k
        break;
11758
15.5M
    } else if ((cur == '<') && (next == '!') &&
11759
15.5M
               (avail < 9)) {
11760
29.3k
        goto done;
11761
15.4M
    } else if (cur == '<') {
11762
358k
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11763
358k
                    "detected an error in element content\n");
11764
358k
                    SKIP(1);
11765
15.1M
    } else if (cur == '&') {
11766
2.79M
        if ((!terminate) && (!xmlParseLookupChar(ctxt, ';')))
11767
100k
      goto done;
11768
2.69M
        xmlParseReference(ctxt);
11769
12.3M
    } else {
11770
        /* TODO Avoid the extra copy, handle directly !!! */
11771
        /*
11772
         * Goal of the following test is:
11773
         *  - minimize calls to the SAX 'character' callback
11774
         *    when they are mergeable
11775
         *  - handle an problem for isBlank when we only parse
11776
         *    a sequence of blank chars and the next one is
11777
         *    not available to check against '<' presence.
11778
         *  - tries to homogenize the differences in SAX
11779
         *    callbacks between the push and pull versions
11780
         *    of the parser.
11781
         */
11782
12.3M
        if ((ctxt->inputNr == 1) &&
11783
12.3M
            (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11784
8.59M
      if ((!terminate) && (!xmlParseLookupCharData(ctxt)))
11785
911k
          goto done;
11786
8.59M
                    }
11787
11.4M
                    ctxt->checkIndex = 0;
11788
11.4M
        xmlParseCharData(ctxt, 0);
11789
11.4M
    }
11790
14.6M
    break;
11791
26.2M
      }
11792
14.6M
            case XML_PARSER_END_TAG:
11793
3.43M
    if (avail < 2)
11794
0
        goto done;
11795
3.43M
    if ((!terminate) && (!xmlParseLookupChar(ctxt, '>')))
11796
99.7k
        goto done;
11797
3.33M
    if (ctxt->sax2) {
11798
1.99M
              xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11799
1.99M
        nameNsPop(ctxt);
11800
1.99M
    }
11801
1.34M
#ifdef LIBXML_SAX1_ENABLED
11802
1.34M
      else
11803
1.34M
        xmlParseEndTag1(ctxt, 0);
11804
3.33M
#endif /* LIBXML_SAX1_ENABLED */
11805
3.33M
    if (ctxt->instate == XML_PARSER_EOF) {
11806
        /* Nothing */
11807
3.33M
    } else if (ctxt->nameNr == 0) {
11808
54.0k
        ctxt->instate = XML_PARSER_EPILOG;
11809
3.28M
    } else {
11810
3.28M
        ctxt->instate = XML_PARSER_CONTENT;
11811
3.28M
    }
11812
3.33M
    break;
11813
369k
            case XML_PARSER_CDATA_SECTION: {
11814
          /*
11815
     * The Push mode need to have the SAX callback for
11816
     * cdataBlock merge back contiguous callbacks.
11817
     */
11818
369k
    const xmlChar *term;
11819
11820
369k
                if (terminate) {
11821
                    /*
11822
                     * Don't call xmlParseLookupString. If 'terminate'
11823
                     * is set, checkIndex is invalid.
11824
                     */
11825
8.26k
                    term = BAD_CAST strstr((const char *) ctxt->input->cur,
11826
8.26k
                                           "]]>");
11827
361k
                } else {
11828
361k
        term = xmlParseLookupString(ctxt, 0, "]]>", 3);
11829
361k
                }
11830
11831
369k
    if (term == NULL) {
11832
226k
        int tmp, size;
11833
11834
226k
                    if (terminate) {
11835
                        /* Unfinished CDATA section */
11836
3.46k
                        size = ctxt->input->end - ctxt->input->cur;
11837
223k
                    } else {
11838
223k
                        if (avail < XML_PARSER_BIG_BUFFER_SIZE + 2)
11839
91.7k
                            goto done;
11840
131k
                        ctxt->checkIndex = 0;
11841
                        /* XXX: Why don't we pass the full buffer? */
11842
131k
                        size = XML_PARSER_BIG_BUFFER_SIZE;
11843
131k
                    }
11844
134k
                    tmp = xmlCheckCdataPush(ctxt->input->cur, size, 0);
11845
134k
                    if (tmp <= 0) {
11846
91.0k
                        tmp = -tmp;
11847
91.0k
                        ctxt->input->cur += tmp;
11848
91.0k
                        goto encoding_error;
11849
91.0k
                    }
11850
43.7k
                    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11851
43.7k
                        if (ctxt->sax->cdataBlock != NULL)
11852
24.2k
                            ctxt->sax->cdataBlock(ctxt->userData,
11853
24.2k
                                                  ctxt->input->cur, tmp);
11854
19.5k
                        else if (ctxt->sax->characters != NULL)
11855
19.5k
                            ctxt->sax->characters(ctxt->userData,
11856
19.5k
                                                  ctxt->input->cur, tmp);
11857
43.7k
                    }
11858
43.7k
                    if (ctxt->instate == XML_PARSER_EOF)
11859
0
                        goto done;
11860
43.7k
                    SKIPL(tmp);
11861
143k
    } else {
11862
143k
                    int base = term - CUR_PTR;
11863
143k
        int tmp;
11864
11865
143k
        tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11866
143k
        if ((tmp < 0) || (tmp != base)) {
11867
120k
      tmp = -tmp;
11868
120k
      ctxt->input->cur += tmp;
11869
120k
      goto encoding_error;
11870
120k
        }
11871
22.6k
        if ((ctxt->sax != NULL) && (base == 0) &&
11872
22.6k
            (ctxt->sax->cdataBlock != NULL) &&
11873
22.6k
            (!ctxt->disableSAX)) {
11874
      /*
11875
       * Special case to provide identical behaviour
11876
       * between pull and push parsers on enpty CDATA
11877
       * sections
11878
       */
11879
2.66k
       if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11880
2.66k
           (!strncmp((const char *)&ctxt->input->cur[-9],
11881
2.66k
                     "<![CDATA[", 9)))
11882
2.64k
           ctxt->sax->cdataBlock(ctxt->userData,
11883
2.64k
                                 BAD_CAST "", 0);
11884
20.0k
        } else if ((ctxt->sax != NULL) && (base > 0) &&
11885
20.0k
      (!ctxt->disableSAX)) {
11886
18.7k
      if (ctxt->sax->cdataBlock != NULL)
11887
11.5k
          ctxt->sax->cdataBlock(ctxt->userData,
11888
11.5k
              ctxt->input->cur, base);
11889
7.21k
      else if (ctxt->sax->characters != NULL)
11890
7.21k
          ctxt->sax->characters(ctxt->userData,
11891
7.21k
              ctxt->input->cur, base);
11892
18.7k
        }
11893
22.6k
        if (ctxt->instate == XML_PARSER_EOF)
11894
0
      goto done;
11895
22.6k
        SKIPL(base + 3);
11896
22.6k
        ctxt->instate = XML_PARSER_CONTENT;
11897
#ifdef DEBUG_PUSH
11898
        xmlGenericError(xmlGenericErrorContext,
11899
          "PP: entering CONTENT\n");
11900
#endif
11901
22.6k
    }
11902
66.3k
    break;
11903
369k
      }
11904
664k
            case XML_PARSER_MISC:
11905
881k
            case XML_PARSER_PROLOG:
11906
952k
            case XML_PARSER_EPILOG:
11907
952k
    SKIP_BLANKS;
11908
952k
    if (ctxt->input->buf == NULL)
11909
0
        avail = ctxt->input->length -
11910
0
                (ctxt->input->cur - ctxt->input->base);
11911
952k
    else
11912
952k
        avail = xmlBufUse(ctxt->input->buf->buffer) -
11913
952k
                (ctxt->input->cur - ctxt->input->base);
11914
952k
    if (avail < 2)
11915
55.9k
        goto done;
11916
896k
    cur = ctxt->input->cur[0];
11917
896k
    next = ctxt->input->cur[1];
11918
896k
          if ((cur == '<') && (next == '?')) {
11919
74.7k
        if ((!terminate) &&
11920
74.7k
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11921
28.1k
      goto done;
11922
#ifdef DEBUG_PUSH
11923
        xmlGenericError(xmlGenericErrorContext,
11924
          "PP: Parsing PI\n");
11925
#endif
11926
46.6k
        xmlParsePI(ctxt);
11927
46.6k
        if (ctxt->instate == XML_PARSER_EOF)
11928
0
      goto done;
11929
821k
    } else if ((cur == '<') && (next == '!') &&
11930
821k
        (ctxt->input->cur[2] == '-') &&
11931
821k
        (ctxt->input->cur[3] == '-')) {
11932
101k
        if ((!terminate) &&
11933
101k
                        (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11934
66.5k
      goto done;
11935
#ifdef DEBUG_PUSH
11936
        xmlGenericError(xmlGenericErrorContext,
11937
          "PP: Parsing Comment\n");
11938
#endif
11939
35.2k
        xmlParseComment(ctxt);
11940
35.2k
        if (ctxt->instate == XML_PARSER_EOF)
11941
0
      goto done;
11942
720k
    } else if ((ctxt->instate == XML_PARSER_MISC) &&
11943
720k
                    (cur == '<') && (next == '!') &&
11944
720k
        (ctxt->input->cur[2] == 'D') &&
11945
720k
        (ctxt->input->cur[3] == 'O') &&
11946
720k
        (ctxt->input->cur[4] == 'C') &&
11947
720k
        (ctxt->input->cur[5] == 'T') &&
11948
720k
        (ctxt->input->cur[6] == 'Y') &&
11949
720k
        (ctxt->input->cur[7] == 'P') &&
11950
720k
        (ctxt->input->cur[8] == 'E')) {
11951
376k
        if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11952
138k
                        goto done;
11953
#ifdef DEBUG_PUSH
11954
        xmlGenericError(xmlGenericErrorContext,
11955
          "PP: Parsing internal subset\n");
11956
#endif
11957
237k
        ctxt->inSubset = 1;
11958
237k
        xmlParseDocTypeDecl(ctxt);
11959
237k
        if (ctxt->instate == XML_PARSER_EOF)
11960
0
      goto done;
11961
237k
        if (RAW == '[') {
11962
190k
      ctxt->instate = XML_PARSER_DTD;
11963
#ifdef DEBUG_PUSH
11964
      xmlGenericError(xmlGenericErrorContext,
11965
        "PP: entering DTD\n");
11966
#endif
11967
190k
        } else {
11968
      /*
11969
       * Create and update the external subset.
11970
       */
11971
47.6k
      ctxt->inSubset = 2;
11972
47.6k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11973
47.6k
          (ctxt->sax->externalSubset != NULL))
11974
45.2k
          ctxt->sax->externalSubset(ctxt->userData,
11975
45.2k
            ctxt->intSubName, ctxt->extSubSystem,
11976
45.2k
            ctxt->extSubURI);
11977
47.6k
      ctxt->inSubset = 0;
11978
47.6k
      xmlCleanSpecialAttr(ctxt);
11979
47.6k
      ctxt->instate = XML_PARSER_PROLOG;
11980
#ifdef DEBUG_PUSH
11981
      xmlGenericError(xmlGenericErrorContext,
11982
        "PP: entering PROLOG\n");
11983
#endif
11984
47.6k
        }
11985
343k
    } else if ((cur == '<') && (next == '!') &&
11986
343k
               (avail <
11987
50.1k
                            (ctxt->instate == XML_PARSER_MISC ? 9 : 4))) {
11988
47.5k
        goto done;
11989
295k
    } else if (ctxt->instate == XML_PARSER_EPILOG) {
11990
13.5k
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11991
13.5k
        xmlHaltParser(ctxt);
11992
#ifdef DEBUG_PUSH
11993
        xmlGenericError(xmlGenericErrorContext,
11994
          "PP: entering EOF\n");
11995
#endif
11996
13.5k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11997
13.5k
      ctxt->sax->endDocument(ctxt->userData);
11998
13.5k
        goto done;
11999
282k
                } else {
12000
282k
        ctxt->instate = XML_PARSER_START_TAG;
12001
#ifdef DEBUG_PUSH
12002
        xmlGenericError(xmlGenericErrorContext,
12003
          "PP: entering START_TAG\n");
12004
#endif
12005
282k
    }
12006
601k
    break;
12007
727k
            case XML_PARSER_DTD: {
12008
727k
                if ((!terminate) && (!xmlParseLookupInternalSubset(ctxt)))
12009
539k
                    goto done;
12010
188k
    xmlParseInternalSubset(ctxt);
12011
188k
    if (ctxt->instate == XML_PARSER_EOF)
12012
48.5k
        goto done;
12013
140k
    ctxt->inSubset = 2;
12014
140k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
12015
140k
        (ctxt->sax->externalSubset != NULL))
12016
134k
        ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
12017
134k
          ctxt->extSubSystem, ctxt->extSubURI);
12018
140k
    ctxt->inSubset = 0;
12019
140k
    xmlCleanSpecialAttr(ctxt);
12020
140k
    if (ctxt->instate == XML_PARSER_EOF)
12021
3.65k
        goto done;
12022
136k
    ctxt->instate = XML_PARSER_PROLOG;
12023
#ifdef DEBUG_PUSH
12024
    xmlGenericError(xmlGenericErrorContext,
12025
      "PP: entering PROLOG\n");
12026
#endif
12027
136k
                break;
12028
140k
      }
12029
0
            case XML_PARSER_COMMENT:
12030
0
    xmlGenericError(xmlGenericErrorContext,
12031
0
      "PP: internal error, state == COMMENT\n");
12032
0
    ctxt->instate = XML_PARSER_CONTENT;
12033
#ifdef DEBUG_PUSH
12034
    xmlGenericError(xmlGenericErrorContext,
12035
      "PP: entering CONTENT\n");
12036
#endif
12037
0
    break;
12038
0
            case XML_PARSER_IGNORE:
12039
0
    xmlGenericError(xmlGenericErrorContext,
12040
0
      "PP: internal error, state == IGNORE");
12041
0
          ctxt->instate = XML_PARSER_DTD;
12042
#ifdef DEBUG_PUSH
12043
    xmlGenericError(xmlGenericErrorContext,
12044
      "PP: entering DTD\n");
12045
#endif
12046
0
          break;
12047
0
            case XML_PARSER_PI:
12048
0
    xmlGenericError(xmlGenericErrorContext,
12049
0
      "PP: internal error, state == PI\n");
12050
0
    ctxt->instate = XML_PARSER_CONTENT;
12051
#ifdef DEBUG_PUSH
12052
    xmlGenericError(xmlGenericErrorContext,
12053
      "PP: entering CONTENT\n");
12054
#endif
12055
0
    break;
12056
0
            case XML_PARSER_ENTITY_DECL:
12057
0
    xmlGenericError(xmlGenericErrorContext,
12058
0
      "PP: internal error, state == ENTITY_DECL\n");
12059
0
    ctxt->instate = XML_PARSER_DTD;
12060
#ifdef DEBUG_PUSH
12061
    xmlGenericError(xmlGenericErrorContext,
12062
      "PP: entering DTD\n");
12063
#endif
12064
0
    break;
12065
0
            case XML_PARSER_ENTITY_VALUE:
12066
0
    xmlGenericError(xmlGenericErrorContext,
12067
0
      "PP: internal error, state == ENTITY_VALUE\n");
12068
0
    ctxt->instate = XML_PARSER_CONTENT;
12069
#ifdef DEBUG_PUSH
12070
    xmlGenericError(xmlGenericErrorContext,
12071
      "PP: entering DTD\n");
12072
#endif
12073
0
    break;
12074
0
            case XML_PARSER_ATTRIBUTE_VALUE:
12075
0
    xmlGenericError(xmlGenericErrorContext,
12076
0
      "PP: internal error, state == ATTRIBUTE_VALUE\n");
12077
0
    ctxt->instate = XML_PARSER_START_TAG;
12078
#ifdef DEBUG_PUSH
12079
    xmlGenericError(xmlGenericErrorContext,
12080
      "PP: entering START_TAG\n");
12081
#endif
12082
0
    break;
12083
0
            case XML_PARSER_SYSTEM_LITERAL:
12084
0
    xmlGenericError(xmlGenericErrorContext,
12085
0
      "PP: internal error, state == SYSTEM_LITERAL\n");
12086
0
    ctxt->instate = XML_PARSER_START_TAG;
12087
#ifdef DEBUG_PUSH
12088
    xmlGenericError(xmlGenericErrorContext,
12089
      "PP: entering START_TAG\n");
12090
#endif
12091
0
    break;
12092
0
            case XML_PARSER_PUBLIC_LITERAL:
12093
0
    xmlGenericError(xmlGenericErrorContext,
12094
0
      "PP: internal error, state == PUBLIC_LITERAL\n");
12095
0
    ctxt->instate = XML_PARSER_START_TAG;
12096
#ifdef DEBUG_PUSH
12097
    xmlGenericError(xmlGenericErrorContext,
12098
      "PP: entering START_TAG\n");
12099
#endif
12100
0
    break;
12101
41.5M
  }
12102
41.5M
    }
12103
4.66M
done:
12104
#ifdef DEBUG_PUSH
12105
    xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12106
#endif
12107
4.66M
    return(ret);
12108
211k
encoding_error:
12109
211k
    {
12110
211k
        char buffer[150];
12111
12112
211k
  snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12113
211k
      ctxt->input->cur[0], ctxt->input->cur[1],
12114
211k
      ctxt->input->cur[2], ctxt->input->cur[3]);
12115
211k
  __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12116
211k
         "Input is not proper UTF-8, indicate encoding !\n%s",
12117
211k
         BAD_CAST buffer, NULL);
12118
211k
    }
12119
211k
    return(0);
12120
4.97M
}
12121
12122
/**
12123
 * xmlParseChunk:
12124
 * @ctxt:  an XML parser context
12125
 * @chunk:  an char array
12126
 * @size:  the size in byte of the chunk
12127
 * @terminate:  last chunk indicator
12128
 *
12129
 * Parse a Chunk of memory
12130
 *
12131
 * Returns zero if no error, the xmlParserErrors otherwise.
12132
 */
12133
int
12134
xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12135
6.22M
              int terminate) {
12136
6.22M
    int end_in_lf = 0;
12137
6.22M
    int remain = 0;
12138
12139
6.22M
    if (ctxt == NULL)
12140
0
        return(XML_ERR_INTERNAL_ERROR);
12141
6.22M
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12142
1.28M
        return(ctxt->errNo);
12143
4.93M
    if (ctxt->instate == XML_PARSER_EOF)
12144
171
        return(-1);
12145
4.93M
    if (ctxt->input == NULL)
12146
0
        return(-1);
12147
12148
4.93M
    ctxt->progressive = 1;
12149
4.93M
    if (ctxt->instate == XML_PARSER_START)
12150
667k
        xmlDetectSAX2(ctxt);
12151
4.93M
    if ((size > 0) && (chunk != NULL) && (!terminate) &&
12152
4.93M
        (chunk[size - 1] == '\r')) {
12153
38.2k
  end_in_lf = 1;
12154
38.2k
  size--;
12155
38.2k
    }
12156
12157
4.97M
xmldecl_done:
12158
12159
4.97M
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12160
4.97M
        (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF))  {
12161
4.70M
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12162
4.70M
  size_t cur = ctxt->input->cur - ctxt->input->base;
12163
4.70M
  int res;
12164
12165
        /*
12166
         * Specific handling if we autodetected an encoding, we should not
12167
         * push more than the first line ... which depend on the encoding
12168
         * And only push the rest once the final encoding was detected
12169
         */
12170
4.70M
        if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12171
4.70M
            (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
12172
61.9k
            unsigned int len = 45;
12173
12174
61.9k
            if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12175
61.9k
                               BAD_CAST "UTF-16")) ||
12176
61.9k
                (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12177
7.64k
                               BAD_CAST "UTF16")))
12178
54.2k
                len = 90;
12179
7.64k
            else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12180
7.64k
                                    BAD_CAST "UCS-4")) ||
12181
7.64k
                     (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12182
7.47k
                                    BAD_CAST "UCS4")))
12183
168
                len = 180;
12184
12185
61.9k
            if (ctxt->input->buf->rawconsumed < len)
12186
2.19k
                len -= ctxt->input->buf->rawconsumed;
12187
12188
            /*
12189
             * Change size for reading the initial declaration only
12190
             * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12191
             * will blindly copy extra bytes from memory.
12192
             */
12193
61.9k
            if ((unsigned int) size > len) {
12194
40.8k
                remain = size - len;
12195
40.8k
                size = len;
12196
40.8k
            } else {
12197
21.0k
                remain = 0;
12198
21.0k
            }
12199
61.9k
        }
12200
4.70M
  res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12201
4.70M
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12202
4.70M
  if (res < 0) {
12203
539
      ctxt->errNo = XML_PARSER_EOF;
12204
539
      xmlHaltParser(ctxt);
12205
539
      return (XML_PARSER_EOF);
12206
539
  }
12207
#ifdef DEBUG_PUSH
12208
  xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12209
#endif
12210
12211
4.70M
    } else if (ctxt->instate != XML_PARSER_EOF) {
12212
271k
  if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12213
271k
      xmlParserInputBufferPtr in = ctxt->input->buf;
12214
271k
      if ((in->encoder != NULL) && (in->buffer != NULL) &&
12215
271k
        (in->raw != NULL)) {
12216
16.9k
    int nbchars;
12217
16.9k
    size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12218
16.9k
    size_t current = ctxt->input->cur - ctxt->input->base;
12219
12220
16.9k
    nbchars = xmlCharEncInput(in, terminate);
12221
16.9k
    xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
12222
16.9k
    if (nbchars < 0) {
12223
        /* TODO 2.6.0 */
12224
335
        xmlGenericError(xmlGenericErrorContext,
12225
335
            "xmlParseChunk: encoder error\n");
12226
335
                    xmlHaltParser(ctxt);
12227
335
        return(XML_ERR_INVALID_ENCODING);
12228
335
    }
12229
16.9k
      }
12230
271k
  }
12231
271k
    }
12232
12233
4.97M
    if (remain != 0) {
12234
40.6k
        xmlParseTryOrFinish(ctxt, 0);
12235
4.93M
    } else {
12236
4.93M
        xmlParseTryOrFinish(ctxt, terminate);
12237
4.93M
    }
12238
4.97M
    if (ctxt->instate == XML_PARSER_EOF)
12239
110k
        return(ctxt->errNo);
12240
12241
4.86M
    if ((ctxt->input != NULL) &&
12242
4.86M
         (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12243
4.86M
         ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12244
4.86M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12245
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12246
0
        xmlHaltParser(ctxt);
12247
0
    }
12248
4.86M
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12249
102k
        return(ctxt->errNo);
12250
12251
4.76M
    if (remain != 0) {
12252
40.1k
        chunk += size;
12253
40.1k
        size = remain;
12254
40.1k
        remain = 0;
12255
40.1k
        goto xmldecl_done;
12256
40.1k
    }
12257
4.72M
    if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12258
4.72M
        (ctxt->input->buf != NULL)) {
12259
37.6k
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12260
37.6k
           ctxt->input);
12261
37.6k
  size_t current = ctxt->input->cur - ctxt->input->base;
12262
12263
37.6k
  xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12264
12265
37.6k
  xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12266
37.6k
            base, current);
12267
37.6k
    }
12268
4.72M
    if (terminate) {
12269
  /*
12270
   * Check for termination
12271
   */
12272
116k
  int cur_avail = 0;
12273
12274
116k
  if (ctxt->input != NULL) {
12275
116k
      if (ctxt->input->buf == NULL)
12276
0
    cur_avail = ctxt->input->length -
12277
0
          (ctxt->input->cur - ctxt->input->base);
12278
116k
      else
12279
116k
    cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12280
116k
                    (ctxt->input->cur - ctxt->input->base);
12281
116k
  }
12282
12283
116k
  if ((ctxt->instate != XML_PARSER_EOF) &&
12284
116k
      (ctxt->instate != XML_PARSER_EPILOG)) {
12285
73.6k
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12286
73.6k
  }
12287
116k
  if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
12288
466
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12289
466
  }
12290
116k
  if (ctxt->instate != XML_PARSER_EOF) {
12291
116k
      if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12292
116k
    ctxt->sax->endDocument(ctxt->userData);
12293
116k
  }
12294
116k
  ctxt->instate = XML_PARSER_EOF;
12295
116k
    }
12296
4.72M
    if (ctxt->wellFormed == 0)
12297
1.57M
  return((xmlParserErrors) ctxt->errNo);
12298
3.15M
    else
12299
3.15M
        return(0);
12300
4.72M
}
12301
12302
/************************************************************************
12303
 *                  *
12304
 *    I/O front end functions to the parser     *
12305
 *                  *
12306
 ************************************************************************/
12307
12308
/**
12309
 * xmlCreatePushParserCtxt:
12310
 * @sax:  a SAX handler
12311
 * @user_data:  The user data returned on SAX callbacks
12312
 * @chunk:  a pointer to an array of chars
12313
 * @size:  number of chars in the array
12314
 * @filename:  an optional file name or URI
12315
 *
12316
 * Create a parser context for using the XML parser in push mode.
12317
 * If @buffer and @size are non-NULL, the data is used to detect
12318
 * the encoding.  The remaining characters will be parsed so they
12319
 * don't need to be fed in again through xmlParseChunk.
12320
 * To allow content encoding detection, @size should be >= 4
12321
 * The value of @filename is used for fetching external entities
12322
 * and error/warning reports.
12323
 *
12324
 * Returns the new parser context or NULL
12325
 */
12326
12327
xmlParserCtxtPtr
12328
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12329
378k
                        const char *chunk, int size, const char *filename) {
12330
378k
    xmlParserCtxtPtr ctxt;
12331
378k
    xmlParserInputPtr inputStream;
12332
378k
    xmlParserInputBufferPtr buf;
12333
378k
    xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12334
12335
    /*
12336
     * plug some encoding conversion routines
12337
     */
12338
378k
    if ((chunk != NULL) && (size >= 4))
12339
188k
  enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12340
12341
378k
    buf = xmlAllocParserInputBuffer(enc);
12342
378k
    if (buf == NULL) return(NULL);
12343
12344
378k
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12345
378k
    if (ctxt == NULL) {
12346
0
        xmlErrMemory(NULL, "creating parser: out of memory\n");
12347
0
  xmlFreeParserInputBuffer(buf);
12348
0
  return(NULL);
12349
0
    }
12350
378k
    ctxt->dictNames = 1;
12351
378k
    if (filename == NULL) {
12352
189k
  ctxt->directory = NULL;
12353
189k
    } else {
12354
189k
        ctxt->directory = xmlParserGetDirectory(filename);
12355
189k
    }
12356
12357
378k
    inputStream = xmlNewInputStream(ctxt);
12358
378k
    if (inputStream == NULL) {
12359
0
  xmlFreeParserCtxt(ctxt);
12360
0
  xmlFreeParserInputBuffer(buf);
12361
0
  return(NULL);
12362
0
    }
12363
12364
378k
    if (filename == NULL)
12365
189k
  inputStream->filename = NULL;
12366
189k
    else {
12367
189k
  inputStream->filename = (char *)
12368
189k
      xmlCanonicPath((const xmlChar *) filename);
12369
189k
  if (inputStream->filename == NULL) {
12370
0
            xmlFreeInputStream(inputStream);
12371
0
      xmlFreeParserCtxt(ctxt);
12372
0
      xmlFreeParserInputBuffer(buf);
12373
0
      return(NULL);
12374
0
  }
12375
189k
    }
12376
378k
    inputStream->buf = buf;
12377
378k
    xmlBufResetInput(inputStream->buf->buffer, inputStream);
12378
378k
    inputPush(ctxt, inputStream);
12379
12380
    /*
12381
     * If the caller didn't provide an initial 'chunk' for determining
12382
     * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12383
     * that it can be automatically determined later
12384
     */
12385
378k
    ctxt->charset = XML_CHAR_ENCODING_NONE;
12386
12387
378k
    if ((size != 0) && (chunk != NULL) &&
12388
378k
        (ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12389
188k
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12390
188k
  size_t cur = ctxt->input->cur - ctxt->input->base;
12391
12392
188k
  xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12393
12394
188k
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12395
#ifdef DEBUG_PUSH
12396
  xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12397
#endif
12398
188k
    }
12399
12400
378k
    if (enc != XML_CHAR_ENCODING_NONE) {
12401
109k
        xmlSwitchEncoding(ctxt, enc);
12402
109k
    }
12403
12404
378k
    return(ctxt);
12405
378k
}
12406
#endif /* LIBXML_PUSH_ENABLED */
12407
12408
/**
12409
 * xmlHaltParser:
12410
 * @ctxt:  an XML parser context
12411
 *
12412
 * Blocks further parser processing don't override error
12413
 * for internal use
12414
 */
12415
static void
12416
351k
xmlHaltParser(xmlParserCtxtPtr ctxt) {
12417
351k
    if (ctxt == NULL)
12418
0
        return;
12419
351k
    ctxt->instate = XML_PARSER_EOF;
12420
351k
    ctxt->disableSAX = 1;
12421
371k
    while (ctxt->inputNr > 1)
12422
19.8k
        xmlFreeInputStream(inputPop(ctxt));
12423
351k
    if (ctxt->input != NULL) {
12424
        /*
12425
   * in case there was a specific allocation deallocate before
12426
   * overriding base
12427
   */
12428
351k
        if (ctxt->input->free != NULL) {
12429
0
      ctxt->input->free((xmlChar *) ctxt->input->base);
12430
0
      ctxt->input->free = NULL;
12431
0
  }
12432
351k
        if (ctxt->input->buf != NULL) {
12433
302k
            xmlFreeParserInputBuffer(ctxt->input->buf);
12434
302k
            ctxt->input->buf = NULL;
12435
302k
        }
12436
351k
  ctxt->input->cur = BAD_CAST"";
12437
351k
        ctxt->input->length = 0;
12438
351k
  ctxt->input->base = ctxt->input->cur;
12439
351k
        ctxt->input->end = ctxt->input->cur;
12440
351k
    }
12441
351k
}
12442
12443
/**
12444
 * xmlStopParser:
12445
 * @ctxt:  an XML parser context
12446
 *
12447
 * Blocks further parser processing
12448
 */
12449
void
12450
189k
xmlStopParser(xmlParserCtxtPtr ctxt) {
12451
189k
    if (ctxt == NULL)
12452
0
        return;
12453
189k
    xmlHaltParser(ctxt);
12454
189k
    ctxt->errNo = XML_ERR_USER_STOP;
12455
189k
}
12456
12457
/**
12458
 * xmlCreateIOParserCtxt:
12459
 * @sax:  a SAX handler
12460
 * @user_data:  The user data returned on SAX callbacks
12461
 * @ioread:  an I/O read function
12462
 * @ioclose:  an I/O close function
12463
 * @ioctx:  an I/O handler
12464
 * @enc:  the charset encoding if known
12465
 *
12466
 * Create a parser context for using the XML parser with an existing
12467
 * I/O stream
12468
 *
12469
 * Returns the new parser context or NULL
12470
 */
12471
xmlParserCtxtPtr
12472
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12473
  xmlInputReadCallback   ioread, xmlInputCloseCallback  ioclose,
12474
0
  void *ioctx, xmlCharEncoding enc) {
12475
0
    xmlParserCtxtPtr ctxt;
12476
0
    xmlParserInputPtr inputStream;
12477
0
    xmlParserInputBufferPtr buf;
12478
12479
0
    if (ioread == NULL) return(NULL);
12480
12481
0
    buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12482
0
    if (buf == NULL) {
12483
0
        if (ioclose != NULL)
12484
0
            ioclose(ioctx);
12485
0
        return (NULL);
12486
0
    }
12487
12488
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12489
0
    if (ctxt == NULL) {
12490
0
  xmlFreeParserInputBuffer(buf);
12491
0
  return(NULL);
12492
0
    }
12493
12494
0
    inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12495
0
    if (inputStream == NULL) {
12496
0
  xmlFreeParserCtxt(ctxt);
12497
0
  return(NULL);
12498
0
    }
12499
0
    inputPush(ctxt, inputStream);
12500
12501
0
    return(ctxt);
12502
0
}
12503
12504
#ifdef LIBXML_VALID_ENABLED
12505
/************************************************************************
12506
 *                  *
12507
 *    Front ends when parsing a DTD       *
12508
 *                  *
12509
 ************************************************************************/
12510
12511
/**
12512
 * xmlIOParseDTD:
12513
 * @sax:  the SAX handler block or NULL
12514
 * @input:  an Input Buffer
12515
 * @enc:  the charset encoding if known
12516
 *
12517
 * Load and parse a DTD
12518
 *
12519
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12520
 * @input will be freed by the function in any case.
12521
 */
12522
12523
xmlDtdPtr
12524
xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12525
0
        xmlCharEncoding enc) {
12526
0
    xmlDtdPtr ret = NULL;
12527
0
    xmlParserCtxtPtr ctxt;
12528
0
    xmlParserInputPtr pinput = NULL;
12529
0
    xmlChar start[4];
12530
12531
0
    if (input == NULL)
12532
0
  return(NULL);
12533
12534
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12535
0
    if (ctxt == NULL) {
12536
0
        xmlFreeParserInputBuffer(input);
12537
0
  return(NULL);
12538
0
    }
12539
12540
    /* We are loading a DTD */
12541
0
    ctxt->options |= XML_PARSE_DTDLOAD;
12542
12543
0
    xmlDetectSAX2(ctxt);
12544
12545
    /*
12546
     * generate a parser input from the I/O handler
12547
     */
12548
12549
0
    pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12550
0
    if (pinput == NULL) {
12551
0
        xmlFreeParserInputBuffer(input);
12552
0
  xmlFreeParserCtxt(ctxt);
12553
0
  return(NULL);
12554
0
    }
12555
12556
    /*
12557
     * plug some encoding conversion routines here.
12558
     */
12559
0
    if (xmlPushInput(ctxt, pinput) < 0) {
12560
0
  xmlFreeParserCtxt(ctxt);
12561
0
  return(NULL);
12562
0
    }
12563
0
    if (enc != XML_CHAR_ENCODING_NONE) {
12564
0
        xmlSwitchEncoding(ctxt, enc);
12565
0
    }
12566
12567
0
    pinput->filename = NULL;
12568
0
    pinput->line = 1;
12569
0
    pinput->col = 1;
12570
0
    pinput->base = ctxt->input->cur;
12571
0
    pinput->cur = ctxt->input->cur;
12572
0
    pinput->free = NULL;
12573
12574
    /*
12575
     * let's parse that entity knowing it's an external subset.
12576
     */
12577
0
    ctxt->inSubset = 2;
12578
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12579
0
    if (ctxt->myDoc == NULL) {
12580
0
  xmlErrMemory(ctxt, "New Doc failed");
12581
0
  return(NULL);
12582
0
    }
12583
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12584
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12585
0
                                 BAD_CAST "none", BAD_CAST "none");
12586
12587
0
    if ((enc == XML_CHAR_ENCODING_NONE) &&
12588
0
        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12589
  /*
12590
   * Get the 4 first bytes and decode the charset
12591
   * if enc != XML_CHAR_ENCODING_NONE
12592
   * plug some encoding conversion routines.
12593
   */
12594
0
  start[0] = RAW;
12595
0
  start[1] = NXT(1);
12596
0
  start[2] = NXT(2);
12597
0
  start[3] = NXT(3);
12598
0
  enc = xmlDetectCharEncoding(start, 4);
12599
0
  if (enc != XML_CHAR_ENCODING_NONE) {
12600
0
      xmlSwitchEncoding(ctxt, enc);
12601
0
  }
12602
0
    }
12603
12604
0
    xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12605
12606
0
    if (ctxt->myDoc != NULL) {
12607
0
  if (ctxt->wellFormed) {
12608
0
      ret = ctxt->myDoc->extSubset;
12609
0
      ctxt->myDoc->extSubset = NULL;
12610
0
      if (ret != NULL) {
12611
0
    xmlNodePtr tmp;
12612
12613
0
    ret->doc = NULL;
12614
0
    tmp = ret->children;
12615
0
    while (tmp != NULL) {
12616
0
        tmp->doc = NULL;
12617
0
        tmp = tmp->next;
12618
0
    }
12619
0
      }
12620
0
  } else {
12621
0
      ret = NULL;
12622
0
  }
12623
0
        xmlFreeDoc(ctxt->myDoc);
12624
0
        ctxt->myDoc = NULL;
12625
0
    }
12626
0
    xmlFreeParserCtxt(ctxt);
12627
12628
0
    return(ret);
12629
0
}
12630
12631
/**
12632
 * xmlSAXParseDTD:
12633
 * @sax:  the SAX handler block
12634
 * @ExternalID:  a NAME* containing the External ID of the DTD
12635
 * @SystemID:  a NAME* containing the URL to the DTD
12636
 *
12637
 * DEPRECATED: Don't use.
12638
 *
12639
 * Load and parse an external subset.
12640
 *
12641
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12642
 */
12643
12644
xmlDtdPtr
12645
xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12646
0
                          const xmlChar *SystemID) {
12647
0
    xmlDtdPtr ret = NULL;
12648
0
    xmlParserCtxtPtr ctxt;
12649
0
    xmlParserInputPtr input = NULL;
12650
0
    xmlCharEncoding enc;
12651
0
    xmlChar* systemIdCanonic;
12652
12653
0
    if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12654
12655
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12656
0
    if (ctxt == NULL) {
12657
0
  return(NULL);
12658
0
    }
12659
12660
    /* We are loading a DTD */
12661
0
    ctxt->options |= XML_PARSE_DTDLOAD;
12662
12663
    /*
12664
     * Canonicalise the system ID
12665
     */
12666
0
    systemIdCanonic = xmlCanonicPath(SystemID);
12667
0
    if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12668
0
  xmlFreeParserCtxt(ctxt);
12669
0
  return(NULL);
12670
0
    }
12671
12672
    /*
12673
     * Ask the Entity resolver to load the damn thing
12674
     */
12675
12676
0
    if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12677
0
  input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12678
0
                                   systemIdCanonic);
12679
0
    if (input == NULL) {
12680
0
  xmlFreeParserCtxt(ctxt);
12681
0
  if (systemIdCanonic != NULL)
12682
0
      xmlFree(systemIdCanonic);
12683
0
  return(NULL);
12684
0
    }
12685
12686
    /*
12687
     * plug some encoding conversion routines here.
12688
     */
12689
0
    if (xmlPushInput(ctxt, input) < 0) {
12690
0
  xmlFreeParserCtxt(ctxt);
12691
0
  if (systemIdCanonic != NULL)
12692
0
      xmlFree(systemIdCanonic);
12693
0
  return(NULL);
12694
0
    }
12695
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12696
0
  enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12697
0
  xmlSwitchEncoding(ctxt, enc);
12698
0
    }
12699
12700
0
    if (input->filename == NULL)
12701
0
  input->filename = (char *) systemIdCanonic;
12702
0
    else
12703
0
  xmlFree(systemIdCanonic);
12704
0
    input->line = 1;
12705
0
    input->col = 1;
12706
0
    input->base = ctxt->input->cur;
12707
0
    input->cur = ctxt->input->cur;
12708
0
    input->free = NULL;
12709
12710
    /*
12711
     * let's parse that entity knowing it's an external subset.
12712
     */
12713
0
    ctxt->inSubset = 2;
12714
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12715
0
    if (ctxt->myDoc == NULL) {
12716
0
  xmlErrMemory(ctxt, "New Doc failed");
12717
0
  xmlFreeParserCtxt(ctxt);
12718
0
  return(NULL);
12719
0
    }
12720
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12721
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12722
0
                                 ExternalID, SystemID);
12723
0
    xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12724
12725
0
    if (ctxt->myDoc != NULL) {
12726
0
  if (ctxt->wellFormed) {
12727
0
      ret = ctxt->myDoc->extSubset;
12728
0
      ctxt->myDoc->extSubset = NULL;
12729
0
      if (ret != NULL) {
12730
0
    xmlNodePtr tmp;
12731
12732
0
    ret->doc = NULL;
12733
0
    tmp = ret->children;
12734
0
    while (tmp != NULL) {
12735
0
        tmp->doc = NULL;
12736
0
        tmp = tmp->next;
12737
0
    }
12738
0
      }
12739
0
  } else {
12740
0
      ret = NULL;
12741
0
  }
12742
0
        xmlFreeDoc(ctxt->myDoc);
12743
0
        ctxt->myDoc = NULL;
12744
0
    }
12745
0
    xmlFreeParserCtxt(ctxt);
12746
12747
0
    return(ret);
12748
0
}
12749
12750
12751
/**
12752
 * xmlParseDTD:
12753
 * @ExternalID:  a NAME* containing the External ID of the DTD
12754
 * @SystemID:  a NAME* containing the URL to the DTD
12755
 *
12756
 * Load and parse an external subset.
12757
 *
12758
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12759
 */
12760
12761
xmlDtdPtr
12762
0
xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12763
0
    return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12764
0
}
12765
#endif /* LIBXML_VALID_ENABLED */
12766
12767
/************************************************************************
12768
 *                  *
12769
 *    Front ends when parsing an Entity     *
12770
 *                  *
12771
 ************************************************************************/
12772
12773
/**
12774
 * xmlParseCtxtExternalEntity:
12775
 * @ctx:  the existing parsing context
12776
 * @URL:  the URL for the entity to load
12777
 * @ID:  the System ID for the entity to load
12778
 * @lst:  the return value for the set of parsed nodes
12779
 *
12780
 * Parse an external general entity within an existing parsing context
12781
 * An external general parsed entity is well-formed if it matches the
12782
 * production labeled extParsedEnt.
12783
 *
12784
 * [78] extParsedEnt ::= TextDecl? content
12785
 *
12786
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12787
 *    the parser error code otherwise
12788
 */
12789
12790
int
12791
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12792
0
                 const xmlChar *ID, xmlNodePtr *lst) {
12793
0
    void *userData;
12794
12795
0
    if (ctx == NULL) return(-1);
12796
    /*
12797
     * If the user provided their own SAX callbacks, then reuse the
12798
     * userData callback field, otherwise the expected setup in a
12799
     * DOM builder is to have userData == ctxt
12800
     */
12801
0
    if (ctx->userData == ctx)
12802
0
        userData = NULL;
12803
0
    else
12804
0
        userData = ctx->userData;
12805
0
    return xmlParseExternalEntityPrivate(ctx->myDoc, ctx, ctx->sax,
12806
0
                                         userData, ctx->depth + 1,
12807
0
                                         URL, ID, lst);
12808
0
}
12809
12810
/**
12811
 * xmlParseExternalEntityPrivate:
12812
 * @doc:  the document the chunk pertains to
12813
 * @oldctxt:  the previous parser context if available
12814
 * @sax:  the SAX handler block (possibly NULL)
12815
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12816
 * @depth:  Used for loop detection, use 0
12817
 * @URL:  the URL for the entity to load
12818
 * @ID:  the System ID for the entity to load
12819
 * @list:  the return value for the set of parsed nodes
12820
 *
12821
 * Private version of xmlParseExternalEntity()
12822
 *
12823
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12824
 *    the parser error code otherwise
12825
 */
12826
12827
static xmlParserErrors
12828
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12829
                xmlSAXHandlerPtr sax,
12830
          void *user_data, int depth, const xmlChar *URL,
12831
888k
          const xmlChar *ID, xmlNodePtr *list) {
12832
888k
    xmlParserCtxtPtr ctxt;
12833
888k
    xmlDocPtr newDoc;
12834
888k
    xmlNodePtr newRoot;
12835
888k
    xmlParserErrors ret = XML_ERR_OK;
12836
888k
    xmlChar start[4];
12837
888k
    xmlCharEncoding enc;
12838
12839
888k
    if (((depth > 40) &&
12840
888k
  ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12841
888k
  (depth > 100)) {
12842
0
  xmlFatalErrMsg(oldctxt, XML_ERR_ENTITY_LOOP,
12843
0
                       "Maximum entity nesting depth exceeded");
12844
0
        return(XML_ERR_ENTITY_LOOP);
12845
0
    }
12846
12847
888k
    if (list != NULL)
12848
82.9k
        *list = NULL;
12849
888k
    if ((URL == NULL) && (ID == NULL))
12850
265
  return(XML_ERR_INTERNAL_ERROR);
12851
888k
    if (doc == NULL)
12852
0
  return(XML_ERR_INTERNAL_ERROR);
12853
12854
888k
    ctxt = xmlCreateEntityParserCtxtInternal(sax, user_data, URL, ID, NULL,
12855
888k
                                             oldctxt);
12856
888k
    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
12857
112k
    if (oldctxt != NULL) {
12858
112k
        ctxt->nbErrors = oldctxt->nbErrors;
12859
112k
        ctxt->nbWarnings = oldctxt->nbWarnings;
12860
112k
    }
12861
112k
    xmlDetectSAX2(ctxt);
12862
12863
112k
    newDoc = xmlNewDoc(BAD_CAST "1.0");
12864
112k
    if (newDoc == NULL) {
12865
0
  xmlFreeParserCtxt(ctxt);
12866
0
  return(XML_ERR_INTERNAL_ERROR);
12867
0
    }
12868
112k
    newDoc->properties = XML_DOC_INTERNAL;
12869
112k
    if (doc) {
12870
112k
        newDoc->intSubset = doc->intSubset;
12871
112k
        newDoc->extSubset = doc->extSubset;
12872
112k
        if (doc->dict) {
12873
70.8k
            newDoc->dict = doc->dict;
12874
70.8k
            xmlDictReference(newDoc->dict);
12875
70.8k
        }
12876
112k
        if (doc->URL != NULL) {
12877
74.0k
            newDoc->URL = xmlStrdup(doc->URL);
12878
74.0k
        }
12879
112k
    }
12880
112k
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12881
112k
    if (newRoot == NULL) {
12882
0
  if (sax != NULL)
12883
0
  xmlFreeParserCtxt(ctxt);
12884
0
  newDoc->intSubset = NULL;
12885
0
  newDoc->extSubset = NULL;
12886
0
        xmlFreeDoc(newDoc);
12887
0
  return(XML_ERR_INTERNAL_ERROR);
12888
0
    }
12889
112k
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
12890
112k
    nodePush(ctxt, newDoc->children);
12891
112k
    if (doc == NULL) {
12892
0
        ctxt->myDoc = newDoc;
12893
112k
    } else {
12894
112k
        ctxt->myDoc = doc;
12895
112k
        newRoot->doc = doc;
12896
112k
    }
12897
12898
    /*
12899
     * Get the 4 first bytes and decode the charset
12900
     * if enc != XML_CHAR_ENCODING_NONE
12901
     * plug some encoding conversion routines.
12902
     */
12903
112k
    GROW;
12904
112k
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12905
106k
  start[0] = RAW;
12906
106k
  start[1] = NXT(1);
12907
106k
  start[2] = NXT(2);
12908
106k
  start[3] = NXT(3);
12909
106k
  enc = xmlDetectCharEncoding(start, 4);
12910
106k
  if (enc != XML_CHAR_ENCODING_NONE) {
12911
8.85k
      xmlSwitchEncoding(ctxt, enc);
12912
8.85k
  }
12913
106k
    }
12914
12915
    /*
12916
     * Parse a possible text declaration first
12917
     */
12918
112k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12919
6.69k
  xmlParseTextDecl(ctxt);
12920
        /*
12921
         * An XML-1.0 document can't reference an entity not XML-1.0
12922
         */
12923
6.69k
        if ((xmlStrEqual(oldctxt->version, BAD_CAST "1.0")) &&
12924
6.69k
            (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12925
203
            xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12926
203
                           "Version mismatch between document and entity\n");
12927
203
        }
12928
6.69k
    }
12929
12930
112k
    ctxt->instate = XML_PARSER_CONTENT;
12931
112k
    ctxt->depth = depth;
12932
112k
    if (oldctxt != NULL) {
12933
112k
  ctxt->_private = oldctxt->_private;
12934
112k
  ctxt->loadsubset = oldctxt->loadsubset;
12935
112k
  ctxt->validate = oldctxt->validate;
12936
112k
  ctxt->valid = oldctxt->valid;
12937
112k
  ctxt->replaceEntities = oldctxt->replaceEntities;
12938
112k
        if (oldctxt->validate) {
12939
46.0k
            ctxt->vctxt.error = oldctxt->vctxt.error;
12940
46.0k
            ctxt->vctxt.warning = oldctxt->vctxt.warning;
12941
46.0k
            ctxt->vctxt.userData = oldctxt->vctxt.userData;
12942
46.0k
            ctxt->vctxt.flags = oldctxt->vctxt.flags;
12943
46.0k
        }
12944
112k
  ctxt->external = oldctxt->external;
12945
112k
        if (ctxt->dict) xmlDictFree(ctxt->dict);
12946
112k
        ctxt->dict = oldctxt->dict;
12947
112k
        ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12948
112k
        ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12949
112k
        ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12950
112k
        ctxt->dictNames = oldctxt->dictNames;
12951
112k
        ctxt->attsDefault = oldctxt->attsDefault;
12952
112k
        ctxt->attsSpecial = oldctxt->attsSpecial;
12953
112k
        ctxt->linenumbers = oldctxt->linenumbers;
12954
112k
  ctxt->record_info = oldctxt->record_info;
12955
112k
  ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
12956
112k
  ctxt->node_seq.length = oldctxt->node_seq.length;
12957
112k
  ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
12958
112k
    } else {
12959
  /*
12960
   * Doing validity checking on chunk without context
12961
   * doesn't make sense
12962
   */
12963
0
  ctxt->_private = NULL;
12964
0
  ctxt->validate = 0;
12965
0
  ctxt->external = 2;
12966
0
  ctxt->loadsubset = 0;
12967
0
    }
12968
12969
112k
    xmlParseContent(ctxt);
12970
12971
112k
    if ((RAW == '<') && (NXT(1) == '/')) {
12972
3.69k
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12973
109k
    } else if (RAW != 0) {
12974
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12975
0
    }
12976
112k
    if (ctxt->node != newDoc->children) {
12977
16.9k
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12978
16.9k
    }
12979
12980
112k
    if (!ctxt->wellFormed) {
12981
35.7k
  ret = (xmlParserErrors)ctxt->errNo;
12982
35.7k
        if (oldctxt != NULL) {
12983
35.7k
            oldctxt->errNo = ctxt->errNo;
12984
35.7k
            oldctxt->wellFormed = 0;
12985
35.7k
            xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12986
35.7k
        }
12987
77.0k
    } else {
12988
77.0k
  if (list != NULL) {
12989
8.82k
      xmlNodePtr cur;
12990
12991
      /*
12992
       * Return the newly created nodeset after unlinking it from
12993
       * they pseudo parent.
12994
       */
12995
8.82k
      cur = newDoc->children->children;
12996
8.82k
      *list = cur;
12997
845k
      while (cur != NULL) {
12998
836k
    cur->parent = NULL;
12999
836k
    cur = cur->next;
13000
836k
      }
13001
8.82k
            newDoc->children->children = NULL;
13002
8.82k
  }
13003
77.0k
  ret = XML_ERR_OK;
13004
77.0k
    }
13005
13006
    /*
13007
     * Also record the size of the entity parsed
13008
     */
13009
112k
    if (ctxt->input != NULL && oldctxt != NULL) {
13010
112k
        unsigned long consumed = ctxt->input->consumed;
13011
13012
112k
        xmlSaturatedAddSizeT(&consumed, ctxt->input->cur - ctxt->input->base);
13013
13014
112k
        xmlSaturatedAdd(&oldctxt->sizeentities, consumed);
13015
112k
        xmlSaturatedAdd(&oldctxt->sizeentities, ctxt->sizeentities);
13016
13017
112k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, consumed);
13018
112k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, ctxt->sizeentcopy);
13019
112k
    }
13020
13021
112k
    if (oldctxt != NULL) {
13022
112k
        ctxt->dict = NULL;
13023
112k
        ctxt->attsDefault = NULL;
13024
112k
        ctxt->attsSpecial = NULL;
13025
112k
        oldctxt->nbErrors = ctxt->nbErrors;
13026
112k
        oldctxt->nbWarnings = ctxt->nbWarnings;
13027
112k
        oldctxt->validate = ctxt->validate;
13028
112k
        oldctxt->valid = ctxt->valid;
13029
112k
        oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13030
112k
        oldctxt->node_seq.length = ctxt->node_seq.length;
13031
112k
        oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13032
112k
    }
13033
112k
    ctxt->node_seq.maximum = 0;
13034
112k
    ctxt->node_seq.length = 0;
13035
112k
    ctxt->node_seq.buffer = NULL;
13036
112k
    xmlFreeParserCtxt(ctxt);
13037
112k
    newDoc->intSubset = NULL;
13038
112k
    newDoc->extSubset = NULL;
13039
112k
    xmlFreeDoc(newDoc);
13040
13041
112k
    return(ret);
13042
112k
}
13043
13044
#ifdef LIBXML_SAX1_ENABLED
13045
/**
13046
 * xmlParseExternalEntity:
13047
 * @doc:  the document the chunk pertains to
13048
 * @sax:  the SAX handler block (possibly NULL)
13049
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13050
 * @depth:  Used for loop detection, use 0
13051
 * @URL:  the URL for the entity to load
13052
 * @ID:  the System ID for the entity to load
13053
 * @lst:  the return value for the set of parsed nodes
13054
 *
13055
 * Parse an external general entity
13056
 * An external general parsed entity is well-formed if it matches the
13057
 * production labeled extParsedEnt.
13058
 *
13059
 * [78] extParsedEnt ::= TextDecl? content
13060
 *
13061
 * Returns 0 if the entity is well formed, -1 in case of args problem and
13062
 *    the parser error code otherwise
13063
 */
13064
13065
int
13066
xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
13067
0
    int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
13068
0
    return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
13069
0
                           ID, lst));
13070
0
}
13071
13072
/**
13073
 * xmlParseBalancedChunkMemory:
13074
 * @doc:  the document the chunk pertains to (must not be NULL)
13075
 * @sax:  the SAX handler block (possibly NULL)
13076
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13077
 * @depth:  Used for loop detection, use 0
13078
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13079
 * @lst:  the return value for the set of parsed nodes
13080
 *
13081
 * Parse a well-balanced chunk of an XML document
13082
 * called by the parser
13083
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13084
 * the content production in the XML grammar:
13085
 *
13086
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13087
 *
13088
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13089
 *    the parser error code otherwise
13090
 */
13091
13092
int
13093
xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13094
0
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
13095
0
    return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13096
0
                                                depth, string, lst, 0 );
13097
0
}
13098
#endif /* LIBXML_SAX1_ENABLED */
13099
13100
/**
13101
 * xmlParseBalancedChunkMemoryInternal:
13102
 * @oldctxt:  the existing parsing context
13103
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13104
 * @user_data:  the user data field for the parser context
13105
 * @lst:  the return value for the set of parsed nodes
13106
 *
13107
 *
13108
 * Parse a well-balanced chunk of an XML document
13109
 * called by the parser
13110
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13111
 * the content production in the XML grammar:
13112
 *
13113
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13114
 *
13115
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13116
 * error code otherwise
13117
 *
13118
 * In case recover is set to 1, the nodelist will not be empty even if
13119
 * the parsed chunk is not well balanced.
13120
 */
13121
static xmlParserErrors
13122
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13123
134k
  const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13124
134k
    xmlParserCtxtPtr ctxt;
13125
134k
    xmlDocPtr newDoc = NULL;
13126
134k
    xmlNodePtr newRoot;
13127
134k
    xmlSAXHandlerPtr oldsax = NULL;
13128
134k
    xmlNodePtr content = NULL;
13129
134k
    xmlNodePtr last = NULL;
13130
134k
    int size;
13131
134k
    xmlParserErrors ret = XML_ERR_OK;
13132
134k
#ifdef SAX2
13133
134k
    int i;
13134
134k
#endif
13135
13136
134k
    if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13137
134k
        (oldctxt->depth >  100)) {
13138
126
  xmlFatalErrMsg(oldctxt, XML_ERR_ENTITY_LOOP,
13139
126
                       "Maximum entity nesting depth exceeded");
13140
126
  return(XML_ERR_ENTITY_LOOP);
13141
126
    }
13142
13143
13144
134k
    if (lst != NULL)
13145
76.9k
        *lst = NULL;
13146
134k
    if (string == NULL)
13147
78
        return(XML_ERR_INTERNAL_ERROR);
13148
13149
134k
    size = xmlStrlen(string);
13150
13151
134k
    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13152
134k
    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13153
118k
    ctxt->nbErrors = oldctxt->nbErrors;
13154
118k
    ctxt->nbWarnings = oldctxt->nbWarnings;
13155
118k
    if (user_data != NULL)
13156
0
  ctxt->userData = user_data;
13157
118k
    else
13158
118k
  ctxt->userData = ctxt;
13159
118k
    if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13160
118k
    ctxt->dict = oldctxt->dict;
13161
118k
    ctxt->input_id = oldctxt->input_id;
13162
118k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13163
118k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13164
118k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13165
13166
118k
#ifdef SAX2
13167
    /* propagate namespaces down the entity */
13168
119k
    for (i = 0;i < oldctxt->nsNr;i += 2) {
13169
1.01k
        nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13170
1.01k
    }
13171
118k
#endif
13172
13173
118k
    oldsax = ctxt->sax;
13174
118k
    ctxt->sax = oldctxt->sax;
13175
118k
    xmlDetectSAX2(ctxt);
13176
118k
    ctxt->replaceEntities = oldctxt->replaceEntities;
13177
118k
    ctxt->options = oldctxt->options;
13178
13179
118k
    ctxt->_private = oldctxt->_private;
13180
118k
    if (oldctxt->myDoc == NULL) {
13181
0
  newDoc = xmlNewDoc(BAD_CAST "1.0");
13182
0
  if (newDoc == NULL) {
13183
0
      ctxt->sax = oldsax;
13184
0
      ctxt->dict = NULL;
13185
0
      xmlFreeParserCtxt(ctxt);
13186
0
      return(XML_ERR_INTERNAL_ERROR);
13187
0
  }
13188
0
  newDoc->properties = XML_DOC_INTERNAL;
13189
0
  newDoc->dict = ctxt->dict;
13190
0
  xmlDictReference(newDoc->dict);
13191
0
  ctxt->myDoc = newDoc;
13192
118k
    } else {
13193
118k
  ctxt->myDoc = oldctxt->myDoc;
13194
118k
        content = ctxt->myDoc->children;
13195
118k
  last = ctxt->myDoc->last;
13196
118k
    }
13197
118k
    newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13198
118k
    if (newRoot == NULL) {
13199
0
  ctxt->sax = oldsax;
13200
0
  ctxt->dict = NULL;
13201
0
  xmlFreeParserCtxt(ctxt);
13202
0
  if (newDoc != NULL) {
13203
0
      xmlFreeDoc(newDoc);
13204
0
  }
13205
0
  return(XML_ERR_INTERNAL_ERROR);
13206
0
    }
13207
118k
    ctxt->myDoc->children = NULL;
13208
118k
    ctxt->myDoc->last = NULL;
13209
118k
    xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13210
118k
    nodePush(ctxt, ctxt->myDoc->children);
13211
118k
    ctxt->instate = XML_PARSER_CONTENT;
13212
118k
    ctxt->depth = oldctxt->depth;
13213
13214
118k
    ctxt->validate = 0;
13215
118k
    ctxt->loadsubset = oldctxt->loadsubset;
13216
118k
    if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13217
  /*
13218
   * ID/IDREF registration will be done in xmlValidateElement below
13219
   */
13220
102k
  ctxt->loadsubset |= XML_SKIP_IDS;
13221
102k
    }
13222
118k
    ctxt->dictNames = oldctxt->dictNames;
13223
118k
    ctxt->attsDefault = oldctxt->attsDefault;
13224
118k
    ctxt->attsSpecial = oldctxt->attsSpecial;
13225
13226
118k
    xmlParseContent(ctxt);
13227
118k
    if ((RAW == '<') && (NXT(1) == '/')) {
13228
357
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13229
118k
    } else if (RAW != 0) {
13230
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13231
0
    }
13232
118k
    if (ctxt->node != ctxt->myDoc->children) {
13233
1.66k
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13234
1.66k
    }
13235
13236
118k
    if (!ctxt->wellFormed) {
13237
14.2k
  ret = (xmlParserErrors)ctxt->errNo;
13238
14.2k
        oldctxt->errNo = ctxt->errNo;
13239
14.2k
        oldctxt->wellFormed = 0;
13240
14.2k
        xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13241
104k
    } else {
13242
104k
        ret = XML_ERR_OK;
13243
104k
    }
13244
13245
118k
    if ((lst != NULL) && (ret == XML_ERR_OK)) {
13246
62.1k
  xmlNodePtr cur;
13247
13248
  /*
13249
   * Return the newly created nodeset after unlinking it from
13250
   * they pseudo parent.
13251
   */
13252
62.1k
  cur = ctxt->myDoc->children->children;
13253
62.1k
  *lst = cur;
13254
267k
  while (cur != NULL) {
13255
205k
#ifdef LIBXML_VALID_ENABLED
13256
205k
      if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13257
205k
    (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13258
205k
    (cur->type == XML_ELEMENT_NODE)) {
13259
26.5k
    oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13260
26.5k
      oldctxt->myDoc, cur);
13261
26.5k
      }
13262
205k
#endif /* LIBXML_VALID_ENABLED */
13263
205k
      cur->parent = NULL;
13264
205k
      cur = cur->next;
13265
205k
  }
13266
62.1k
  ctxt->myDoc->children->children = NULL;
13267
62.1k
    }
13268
118k
    if (ctxt->myDoc != NULL) {
13269
118k
  xmlFreeNode(ctxt->myDoc->children);
13270
118k
        ctxt->myDoc->children = content;
13271
118k
        ctxt->myDoc->last = last;
13272
118k
    }
13273
13274
    /*
13275
     * Also record the size of the entity parsed
13276
     */
13277
118k
    if (ctxt->input != NULL && oldctxt != NULL) {
13278
118k
        unsigned long consumed = ctxt->input->consumed;
13279
13280
118k
        xmlSaturatedAddSizeT(&consumed, ctxt->input->cur - ctxt->input->base);
13281
13282
118k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, consumed);
13283
118k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, ctxt->sizeentcopy);
13284
118k
    }
13285
13286
118k
    oldctxt->nbErrors = ctxt->nbErrors;
13287
118k
    oldctxt->nbWarnings = ctxt->nbWarnings;
13288
118k
    ctxt->sax = oldsax;
13289
118k
    ctxt->dict = NULL;
13290
118k
    ctxt->attsDefault = NULL;
13291
118k
    ctxt->attsSpecial = NULL;
13292
118k
    xmlFreeParserCtxt(ctxt);
13293
118k
    if (newDoc != NULL) {
13294
0
  xmlFreeDoc(newDoc);
13295
0
    }
13296
13297
118k
    return(ret);
13298
118k
}
13299
13300
/**
13301
 * xmlParseInNodeContext:
13302
 * @node:  the context node
13303
 * @data:  the input string
13304
 * @datalen:  the input string length in bytes
13305
 * @options:  a combination of xmlParserOption
13306
 * @lst:  the return value for the set of parsed nodes
13307
 *
13308
 * Parse a well-balanced chunk of an XML document
13309
 * within the context (DTD, namespaces, etc ...) of the given node.
13310
 *
13311
 * The allowed sequence for the data is a Well Balanced Chunk defined by
13312
 * the content production in the XML grammar:
13313
 *
13314
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13315
 *
13316
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13317
 * error code otherwise
13318
 */
13319
xmlParserErrors
13320
xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13321
0
                      int options, xmlNodePtr *lst) {
13322
0
#ifdef SAX2
13323
0
    xmlParserCtxtPtr ctxt;
13324
0
    xmlDocPtr doc = NULL;
13325
0
    xmlNodePtr fake, cur;
13326
0
    int nsnr = 0;
13327
13328
0
    xmlParserErrors ret = XML_ERR_OK;
13329
13330
    /*
13331
     * check all input parameters, grab the document
13332
     */
13333
0
    if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13334
0
        return(XML_ERR_INTERNAL_ERROR);
13335
0
    switch (node->type) {
13336
0
        case XML_ELEMENT_NODE:
13337
0
        case XML_ATTRIBUTE_NODE:
13338
0
        case XML_TEXT_NODE:
13339
0
        case XML_CDATA_SECTION_NODE:
13340
0
        case XML_ENTITY_REF_NODE:
13341
0
        case XML_PI_NODE:
13342
0
        case XML_COMMENT_NODE:
13343
0
        case XML_DOCUMENT_NODE:
13344
0
        case XML_HTML_DOCUMENT_NODE:
13345
0
      break;
13346
0
  default:
13347
0
      return(XML_ERR_INTERNAL_ERROR);
13348
13349
0
    }
13350
0
    while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13351
0
           (node->type != XML_DOCUMENT_NODE) &&
13352
0
     (node->type != XML_HTML_DOCUMENT_NODE))
13353
0
  node = node->parent;
13354
0
    if (node == NULL)
13355
0
  return(XML_ERR_INTERNAL_ERROR);
13356
0
    if (node->type == XML_ELEMENT_NODE)
13357
0
  doc = node->doc;
13358
0
    else
13359
0
        doc = (xmlDocPtr) node;
13360
0
    if (doc == NULL)
13361
0
  return(XML_ERR_INTERNAL_ERROR);
13362
13363
    /*
13364
     * allocate a context and set-up everything not related to the
13365
     * node position in the tree
13366
     */
13367
0
    if (doc->type == XML_DOCUMENT_NODE)
13368
0
  ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13369
0
#ifdef LIBXML_HTML_ENABLED
13370
0
    else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13371
0
  ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13372
        /*
13373
         * When parsing in context, it makes no sense to add implied
13374
         * elements like html/body/etc...
13375
         */
13376
0
        options |= HTML_PARSE_NOIMPLIED;
13377
0
    }
13378
0
#endif
13379
0
    else
13380
0
        return(XML_ERR_INTERNAL_ERROR);
13381
13382
0
    if (ctxt == NULL)
13383
0
        return(XML_ERR_NO_MEMORY);
13384
13385
    /*
13386
     * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13387
     * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13388
     * we must wait until the last moment to free the original one.
13389
     */
13390
0
    if (doc->dict != NULL) {
13391
0
        if (ctxt->dict != NULL)
13392
0
      xmlDictFree(ctxt->dict);
13393
0
  ctxt->dict = doc->dict;
13394
0
    } else
13395
0
        options |= XML_PARSE_NODICT;
13396
13397
0
    if (doc->encoding != NULL) {
13398
0
        xmlCharEncodingHandlerPtr hdlr;
13399
13400
0
        if (ctxt->encoding != NULL)
13401
0
      xmlFree((xmlChar *) ctxt->encoding);
13402
0
        ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13403
13404
0
        hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
13405
0
        if (hdlr != NULL) {
13406
0
            xmlSwitchToEncoding(ctxt, hdlr);
13407
0
  } else {
13408
0
            return(XML_ERR_UNSUPPORTED_ENCODING);
13409
0
        }
13410
0
    }
13411
13412
0
    xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13413
0
    xmlDetectSAX2(ctxt);
13414
0
    ctxt->myDoc = doc;
13415
    /* parsing in context, i.e. as within existing content */
13416
0
    ctxt->input_id = 2;
13417
0
    ctxt->instate = XML_PARSER_CONTENT;
13418
13419
0
    fake = xmlNewDocComment(node->doc, NULL);
13420
0
    if (fake == NULL) {
13421
0
        xmlFreeParserCtxt(ctxt);
13422
0
  return(XML_ERR_NO_MEMORY);
13423
0
    }
13424
0
    xmlAddChild(node, fake);
13425
13426
0
    if (node->type == XML_ELEMENT_NODE) {
13427
0
  nodePush(ctxt, node);
13428
  /*
13429
   * initialize the SAX2 namespaces stack
13430
   */
13431
0
  cur = node;
13432
0
  while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13433
0
      xmlNsPtr ns = cur->nsDef;
13434
0
      const xmlChar *iprefix, *ihref;
13435
13436
0
      while (ns != NULL) {
13437
0
    if (ctxt->dict) {
13438
0
        iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13439
0
        ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13440
0
    } else {
13441
0
        iprefix = ns->prefix;
13442
0
        ihref = ns->href;
13443
0
    }
13444
13445
0
          if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13446
0
        nsPush(ctxt, iprefix, ihref);
13447
0
        nsnr++;
13448
0
    }
13449
0
    ns = ns->next;
13450
0
      }
13451
0
      cur = cur->parent;
13452
0
  }
13453
0
    }
13454
13455
0
    if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13456
  /*
13457
   * ID/IDREF registration will be done in xmlValidateElement below
13458
   */
13459
0
  ctxt->loadsubset |= XML_SKIP_IDS;
13460
0
    }
13461
13462
0
#ifdef LIBXML_HTML_ENABLED
13463
0
    if (doc->type == XML_HTML_DOCUMENT_NODE)
13464
0
        __htmlParseContent(ctxt);
13465
0
    else
13466
0
#endif
13467
0
  xmlParseContent(ctxt);
13468
13469
0
    nsPop(ctxt, nsnr);
13470
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13471
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13472
0
    } else if (RAW != 0) {
13473
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13474
0
    }
13475
0
    if ((ctxt->node != NULL) && (ctxt->node != node)) {
13476
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13477
0
  ctxt->wellFormed = 0;
13478
0
    }
13479
13480
0
    if (!ctxt->wellFormed) {
13481
0
        if (ctxt->errNo == 0)
13482
0
      ret = XML_ERR_INTERNAL_ERROR;
13483
0
  else
13484
0
      ret = (xmlParserErrors)ctxt->errNo;
13485
0
    } else {
13486
0
        ret = XML_ERR_OK;
13487
0
    }
13488
13489
    /*
13490
     * Return the newly created nodeset after unlinking it from
13491
     * the pseudo sibling.
13492
     */
13493
13494
0
    cur = fake->next;
13495
0
    fake->next = NULL;
13496
0
    node->last = fake;
13497
13498
0
    if (cur != NULL) {
13499
0
  cur->prev = NULL;
13500
0
    }
13501
13502
0
    *lst = cur;
13503
13504
0
    while (cur != NULL) {
13505
0
  cur->parent = NULL;
13506
0
  cur = cur->next;
13507
0
    }
13508
13509
0
    xmlUnlinkNode(fake);
13510
0
    xmlFreeNode(fake);
13511
13512
13513
0
    if (ret != XML_ERR_OK) {
13514
0
        xmlFreeNodeList(*lst);
13515
0
  *lst = NULL;
13516
0
    }
13517
13518
0
    if (doc->dict != NULL)
13519
0
        ctxt->dict = NULL;
13520
0
    xmlFreeParserCtxt(ctxt);
13521
13522
0
    return(ret);
13523
#else /* !SAX2 */
13524
    return(XML_ERR_INTERNAL_ERROR);
13525
#endif
13526
0
}
13527
13528
#ifdef LIBXML_SAX1_ENABLED
13529
/**
13530
 * xmlParseBalancedChunkMemoryRecover:
13531
 * @doc:  the document the chunk pertains to (must not be NULL)
13532
 * @sax:  the SAX handler block (possibly NULL)
13533
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13534
 * @depth:  Used for loop detection, use 0
13535
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13536
 * @lst:  the return value for the set of parsed nodes
13537
 * @recover: return nodes even if the data is broken (use 0)
13538
 *
13539
 *
13540
 * Parse a well-balanced chunk of an XML document
13541
 * called by the parser
13542
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13543
 * the content production in the XML grammar:
13544
 *
13545
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13546
 *
13547
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13548
 *    the parser error code otherwise
13549
 *
13550
 * In case recover is set to 1, the nodelist will not be empty even if
13551
 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13552
 * some extent.
13553
 */
13554
int
13555
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13556
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13557
0
     int recover) {
13558
0
    xmlParserCtxtPtr ctxt;
13559
0
    xmlDocPtr newDoc;
13560
0
    xmlSAXHandlerPtr oldsax = NULL;
13561
0
    xmlNodePtr content, newRoot;
13562
0
    int size;
13563
0
    int ret = 0;
13564
13565
0
    if (depth > 40) {
13566
0
  return(XML_ERR_ENTITY_LOOP);
13567
0
    }
13568
13569
13570
0
    if (lst != NULL)
13571
0
        *lst = NULL;
13572
0
    if (string == NULL)
13573
0
        return(-1);
13574
13575
0
    size = xmlStrlen(string);
13576
13577
0
    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13578
0
    if (ctxt == NULL) return(-1);
13579
0
    ctxt->userData = ctxt;
13580
0
    if (sax != NULL) {
13581
0
  oldsax = ctxt->sax;
13582
0
        ctxt->sax = sax;
13583
0
  if (user_data != NULL)
13584
0
      ctxt->userData = user_data;
13585
0
    }
13586
0
    newDoc = xmlNewDoc(BAD_CAST "1.0");
13587
0
    if (newDoc == NULL) {
13588
0
  xmlFreeParserCtxt(ctxt);
13589
0
  return(-1);
13590
0
    }
13591
0
    newDoc->properties = XML_DOC_INTERNAL;
13592
0
    if ((doc != NULL) && (doc->dict != NULL)) {
13593
0
        xmlDictFree(ctxt->dict);
13594
0
  ctxt->dict = doc->dict;
13595
0
  xmlDictReference(ctxt->dict);
13596
0
  ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13597
0
  ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13598
0
  ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13599
0
  ctxt->dictNames = 1;
13600
0
    } else {
13601
0
  xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13602
0
    }
13603
    /* doc == NULL is only supported for historic reasons */
13604
0
    if (doc != NULL) {
13605
0
  newDoc->intSubset = doc->intSubset;
13606
0
  newDoc->extSubset = doc->extSubset;
13607
0
    }
13608
0
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13609
0
    if (newRoot == NULL) {
13610
0
  if (sax != NULL)
13611
0
      ctxt->sax = oldsax;
13612
0
  xmlFreeParserCtxt(ctxt);
13613
0
  newDoc->intSubset = NULL;
13614
0
  newDoc->extSubset = NULL;
13615
0
        xmlFreeDoc(newDoc);
13616
0
  return(-1);
13617
0
    }
13618
0
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
13619
0
    nodePush(ctxt, newRoot);
13620
    /* doc == NULL is only supported for historic reasons */
13621
0
    if (doc == NULL) {
13622
0
  ctxt->myDoc = newDoc;
13623
0
    } else {
13624
0
  ctxt->myDoc = newDoc;
13625
0
  newDoc->children->doc = doc;
13626
  /* Ensure that doc has XML spec namespace */
13627
0
  xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13628
0
  newDoc->oldNs = doc->oldNs;
13629
0
    }
13630
0
    ctxt->instate = XML_PARSER_CONTENT;
13631
0
    ctxt->input_id = 2;
13632
0
    ctxt->depth = depth;
13633
13634
    /*
13635
     * Doing validity checking on chunk doesn't make sense
13636
     */
13637
0
    ctxt->validate = 0;
13638
0
    ctxt->loadsubset = 0;
13639
0
    xmlDetectSAX2(ctxt);
13640
13641
0
    if ( doc != NULL ){
13642
0
        content = doc->children;
13643
0
        doc->children = NULL;
13644
0
        xmlParseContent(ctxt);
13645
0
        doc->children = content;
13646
0
    }
13647
0
    else {
13648
0
        xmlParseContent(ctxt);
13649
0
    }
13650
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13651
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13652
0
    } else if (RAW != 0) {
13653
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13654
0
    }
13655
0
    if (ctxt->node != newDoc->children) {
13656
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13657
0
    }
13658
13659
0
    if (!ctxt->wellFormed) {
13660
0
        if (ctxt->errNo == 0)
13661
0
      ret = 1;
13662
0
  else
13663
0
      ret = ctxt->errNo;
13664
0
    } else {
13665
0
      ret = 0;
13666
0
    }
13667
13668
0
    if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13669
0
  xmlNodePtr cur;
13670
13671
  /*
13672
   * Return the newly created nodeset after unlinking it from
13673
   * they pseudo parent.
13674
   */
13675
0
  cur = newDoc->children->children;
13676
0
  *lst = cur;
13677
0
  while (cur != NULL) {
13678
0
      xmlSetTreeDoc(cur, doc);
13679
0
      cur->parent = NULL;
13680
0
      cur = cur->next;
13681
0
  }
13682
0
  newDoc->children->children = NULL;
13683
0
    }
13684
13685
0
    if (sax != NULL)
13686
0
  ctxt->sax = oldsax;
13687
0
    xmlFreeParserCtxt(ctxt);
13688
0
    newDoc->intSubset = NULL;
13689
0
    newDoc->extSubset = NULL;
13690
    /* This leaks the namespace list if doc == NULL */
13691
0
    newDoc->oldNs = NULL;
13692
0
    xmlFreeDoc(newDoc);
13693
13694
0
    return(ret);
13695
0
}
13696
13697
/**
13698
 * xmlSAXParseEntity:
13699
 * @sax:  the SAX handler block
13700
 * @filename:  the filename
13701
 *
13702
 * DEPRECATED: Don't use.
13703
 *
13704
 * parse an XML external entity out of context and build a tree.
13705
 * It use the given SAX function block to handle the parsing callback.
13706
 * If sax is NULL, fallback to the default DOM tree building routines.
13707
 *
13708
 * [78] extParsedEnt ::= TextDecl? content
13709
 *
13710
 * This correspond to a "Well Balanced" chunk
13711
 *
13712
 * Returns the resulting document tree
13713
 */
13714
13715
xmlDocPtr
13716
0
xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13717
0
    xmlDocPtr ret;
13718
0
    xmlParserCtxtPtr ctxt;
13719
13720
0
    ctxt = xmlCreateFileParserCtxt(filename);
13721
0
    if (ctxt == NULL) {
13722
0
  return(NULL);
13723
0
    }
13724
0
    if (sax != NULL) {
13725
0
  if (ctxt->sax != NULL)
13726
0
      xmlFree(ctxt->sax);
13727
0
        ctxt->sax = sax;
13728
0
        ctxt->userData = NULL;
13729
0
    }
13730
13731
0
    xmlParseExtParsedEnt(ctxt);
13732
13733
0
    if (ctxt->wellFormed)
13734
0
  ret = ctxt->myDoc;
13735
0
    else {
13736
0
        ret = NULL;
13737
0
        xmlFreeDoc(ctxt->myDoc);
13738
0
        ctxt->myDoc = NULL;
13739
0
    }
13740
0
    if (sax != NULL)
13741
0
        ctxt->sax = NULL;
13742
0
    xmlFreeParserCtxt(ctxt);
13743
13744
0
    return(ret);
13745
0
}
13746
13747
/**
13748
 * xmlParseEntity:
13749
 * @filename:  the filename
13750
 *
13751
 * parse an XML external entity out of context and build a tree.
13752
 *
13753
 * [78] extParsedEnt ::= TextDecl? content
13754
 *
13755
 * This correspond to a "Well Balanced" chunk
13756
 *
13757
 * Returns the resulting document tree
13758
 */
13759
13760
xmlDocPtr
13761
0
xmlParseEntity(const char *filename) {
13762
0
    return(xmlSAXParseEntity(NULL, filename));
13763
0
}
13764
#endif /* LIBXML_SAX1_ENABLED */
13765
13766
/**
13767
 * xmlCreateEntityParserCtxtInternal:
13768
 * @URL:  the entity URL
13769
 * @ID:  the entity PUBLIC ID
13770
 * @base:  a possible base for the target URI
13771
 * @pctx:  parser context used to set options on new context
13772
 *
13773
 * Create a parser context for an external entity
13774
 * Automatic support for ZLIB/Compress compressed document is provided
13775
 * by default if found at compile-time.
13776
 *
13777
 * Returns the new parser context or NULL
13778
 */
13779
static xmlParserCtxtPtr
13780
xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
13781
        const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
13782
888k
        xmlParserCtxtPtr pctx) {
13783
888k
    xmlParserCtxtPtr ctxt;
13784
888k
    xmlParserInputPtr inputStream;
13785
888k
    char *directory = NULL;
13786
888k
    xmlChar *uri;
13787
13788
888k
    ctxt = xmlNewSAXParserCtxt(sax, userData);
13789
888k
    if (ctxt == NULL) {
13790
0
  return(NULL);
13791
0
    }
13792
13793
888k
    if (pctx != NULL) {
13794
888k
        ctxt->options = pctx->options;
13795
888k
        ctxt->_private = pctx->_private;
13796
888k
  ctxt->input_id = pctx->input_id;
13797
888k
    }
13798
13799
    /* Don't read from stdin. */
13800
888k
    if (xmlStrcmp(URL, BAD_CAST "-") == 0)
13801
0
        URL = BAD_CAST "./-";
13802
13803
888k
    uri = xmlBuildURI(URL, base);
13804
13805
888k
    if (uri == NULL) {
13806
23.4k
  inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13807
23.4k
  if (inputStream == NULL) {
13808
20.9k
      xmlFreeParserCtxt(ctxt);
13809
20.9k
      return(NULL);
13810
20.9k
  }
13811
13812
2.49k
  inputPush(ctxt, inputStream);
13813
13814
2.49k
  if ((ctxt->directory == NULL) && (directory == NULL))
13815
2.49k
      directory = xmlParserGetDirectory((char *)URL);
13816
2.49k
  if ((ctxt->directory == NULL) && (directory != NULL))
13817
2.49k
      ctxt->directory = directory;
13818
864k
    } else {
13819
864k
  inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13820
864k
  if (inputStream == NULL) {
13821
754k
      xmlFree(uri);
13822
754k
      xmlFreeParserCtxt(ctxt);
13823
754k
      return(NULL);
13824
754k
  }
13825
13826
110k
  inputPush(ctxt, inputStream);
13827
13828
110k
  if ((ctxt->directory == NULL) && (directory == NULL))
13829
110k
      directory = xmlParserGetDirectory((char *)uri);
13830
110k
  if ((ctxt->directory == NULL) && (directory != NULL))
13831
110k
      ctxt->directory = directory;
13832
110k
  xmlFree(uri);
13833
110k
    }
13834
112k
    return(ctxt);
13835
888k
}
13836
13837
/**
13838
 * xmlCreateEntityParserCtxt:
13839
 * @URL:  the entity URL
13840
 * @ID:  the entity PUBLIC ID
13841
 * @base:  a possible base for the target URI
13842
 *
13843
 * Create a parser context for an external entity
13844
 * Automatic support for ZLIB/Compress compressed document is provided
13845
 * by default if found at compile-time.
13846
 *
13847
 * Returns the new parser context or NULL
13848
 */
13849
xmlParserCtxtPtr
13850
xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
13851
0
                    const xmlChar *base) {
13852
0
    return xmlCreateEntityParserCtxtInternal(NULL, NULL, URL, ID, base, NULL);
13853
13854
0
}
13855
13856
/************************************************************************
13857
 *                  *
13858
 *    Front ends when parsing from a file     *
13859
 *                  *
13860
 ************************************************************************/
13861
13862
/**
13863
 * xmlCreateURLParserCtxt:
13864
 * @filename:  the filename or URL
13865
 * @options:  a combination of xmlParserOption
13866
 *
13867
 * Create a parser context for a file or URL content.
13868
 * Automatic support for ZLIB/Compress compressed document is provided
13869
 * by default if found at compile-time and for file accesses
13870
 *
13871
 * Returns the new parser context or NULL
13872
 */
13873
xmlParserCtxtPtr
13874
xmlCreateURLParserCtxt(const char *filename, int options)
13875
0
{
13876
0
    xmlParserCtxtPtr ctxt;
13877
0
    xmlParserInputPtr inputStream;
13878
0
    char *directory = NULL;
13879
13880
0
    ctxt = xmlNewParserCtxt();
13881
0
    if (ctxt == NULL) {
13882
0
  xmlErrMemory(NULL, "cannot allocate parser context");
13883
0
  return(NULL);
13884
0
    }
13885
13886
0
    if (options)
13887
0
  xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13888
0
    ctxt->linenumbers = 1;
13889
13890
0
    inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
13891
0
    if (inputStream == NULL) {
13892
0
  xmlFreeParserCtxt(ctxt);
13893
0
  return(NULL);
13894
0
    }
13895
13896
0
    inputPush(ctxt, inputStream);
13897
0
    if ((ctxt->directory == NULL) && (directory == NULL))
13898
0
        directory = xmlParserGetDirectory(filename);
13899
0
    if ((ctxt->directory == NULL) && (directory != NULL))
13900
0
        ctxt->directory = directory;
13901
13902
0
    return(ctxt);
13903
0
}
13904
13905
/**
13906
 * xmlCreateFileParserCtxt:
13907
 * @filename:  the filename
13908
 *
13909
 * Create a parser context for a file content.
13910
 * Automatic support for ZLIB/Compress compressed document is provided
13911
 * by default if found at compile-time.
13912
 *
13913
 * Returns the new parser context or NULL
13914
 */
13915
xmlParserCtxtPtr
13916
xmlCreateFileParserCtxt(const char *filename)
13917
0
{
13918
0
    return(xmlCreateURLParserCtxt(filename, 0));
13919
0
}
13920
13921
#ifdef LIBXML_SAX1_ENABLED
13922
/**
13923
 * xmlSAXParseFileWithData:
13924
 * @sax:  the SAX handler block
13925
 * @filename:  the filename
13926
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13927
 *             documents
13928
 * @data:  the userdata
13929
 *
13930
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
13931
 *
13932
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13933
 * compressed document is provided by default if found at compile-time.
13934
 * It use the given SAX function block to handle the parsing callback.
13935
 * If sax is NULL, fallback to the default DOM tree building routines.
13936
 *
13937
 * User data (void *) is stored within the parser context in the
13938
 * context's _private member, so it is available nearly everywhere in libxml
13939
 *
13940
 * Returns the resulting document tree
13941
 */
13942
13943
xmlDocPtr
13944
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
13945
0
                        int recovery, void *data) {
13946
0
    xmlDocPtr ret;
13947
0
    xmlParserCtxtPtr ctxt;
13948
13949
0
    xmlInitParser();
13950
13951
0
    ctxt = xmlCreateFileParserCtxt(filename);
13952
0
    if (ctxt == NULL) {
13953
0
  return(NULL);
13954
0
    }
13955
0
    if (sax != NULL) {
13956
0
  if (ctxt->sax != NULL)
13957
0
      xmlFree(ctxt->sax);
13958
0
        ctxt->sax = sax;
13959
0
    }
13960
0
    xmlDetectSAX2(ctxt);
13961
0
    if (data!=NULL) {
13962
0
  ctxt->_private = data;
13963
0
    }
13964
13965
0
    if (ctxt->directory == NULL)
13966
0
        ctxt->directory = xmlParserGetDirectory(filename);
13967
13968
0
    ctxt->recovery = recovery;
13969
13970
0
    xmlParseDocument(ctxt);
13971
13972
0
    if ((ctxt->wellFormed) || recovery) {
13973
0
        ret = ctxt->myDoc;
13974
0
  if ((ret != NULL) && (ctxt->input->buf != NULL)) {
13975
0
      if (ctxt->input->buf->compressed > 0)
13976
0
    ret->compression = 9;
13977
0
      else
13978
0
    ret->compression = ctxt->input->buf->compressed;
13979
0
  }
13980
0
    }
13981
0
    else {
13982
0
       ret = NULL;
13983
0
       xmlFreeDoc(ctxt->myDoc);
13984
0
       ctxt->myDoc = NULL;
13985
0
    }
13986
0
    if (sax != NULL)
13987
0
        ctxt->sax = NULL;
13988
0
    xmlFreeParserCtxt(ctxt);
13989
13990
0
    return(ret);
13991
0
}
13992
13993
/**
13994
 * xmlSAXParseFile:
13995
 * @sax:  the SAX handler block
13996
 * @filename:  the filename
13997
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13998
 *             documents
13999
 *
14000
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
14001
 *
14002
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14003
 * compressed document is provided by default if found at compile-time.
14004
 * It use the given SAX function block to handle the parsing callback.
14005
 * If sax is NULL, fallback to the default DOM tree building routines.
14006
 *
14007
 * Returns the resulting document tree
14008
 */
14009
14010
xmlDocPtr
14011
xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14012
0
                          int recovery) {
14013
0
    return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14014
0
}
14015
14016
/**
14017
 * xmlRecoverDoc:
14018
 * @cur:  a pointer to an array of xmlChar
14019
 *
14020
 * DEPRECATED: Use xmlReadDoc with XML_PARSE_RECOVER.
14021
 *
14022
 * parse an XML in-memory document and build a tree.
14023
 * In the case the document is not Well Formed, a attempt to build a
14024
 * tree is tried anyway
14025
 *
14026
 * Returns the resulting document tree or NULL in case of failure
14027
 */
14028
14029
xmlDocPtr
14030
0
xmlRecoverDoc(const xmlChar *cur) {
14031
0
    return(xmlSAXParseDoc(NULL, cur, 1));
14032
0
}
14033
14034
/**
14035
 * xmlParseFile:
14036
 * @filename:  the filename
14037
 *
14038
 * DEPRECATED: Use xmlReadFile.
14039
 *
14040
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14041
 * compressed document is provided by default if found at compile-time.
14042
 *
14043
 * Returns the resulting document tree if the file was wellformed,
14044
 * NULL otherwise.
14045
 */
14046
14047
xmlDocPtr
14048
0
xmlParseFile(const char *filename) {
14049
0
    return(xmlSAXParseFile(NULL, filename, 0));
14050
0
}
14051
14052
/**
14053
 * xmlRecoverFile:
14054
 * @filename:  the filename
14055
 *
14056
 * DEPRECATED: Use xmlReadFile with XML_PARSE_RECOVER.
14057
 *
14058
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14059
 * compressed document is provided by default if found at compile-time.
14060
 * In the case the document is not Well Formed, it attempts to build
14061
 * a tree anyway
14062
 *
14063
 * Returns the resulting document tree or NULL in case of failure
14064
 */
14065
14066
xmlDocPtr
14067
0
xmlRecoverFile(const char *filename) {
14068
0
    return(xmlSAXParseFile(NULL, filename, 1));
14069
0
}
14070
14071
14072
/**
14073
 * xmlSetupParserForBuffer:
14074
 * @ctxt:  an XML parser context
14075
 * @buffer:  a xmlChar * buffer
14076
 * @filename:  a file name
14077
 *
14078
 * DEPRECATED: Don't use.
14079
 *
14080
 * Setup the parser context to parse a new buffer; Clears any prior
14081
 * contents from the parser context. The buffer parameter must not be
14082
 * NULL, but the filename parameter can be
14083
 */
14084
void
14085
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14086
                             const char* filename)
14087
0
{
14088
0
    xmlParserInputPtr input;
14089
14090
0
    if ((ctxt == NULL) || (buffer == NULL))
14091
0
        return;
14092
14093
0
    input = xmlNewInputStream(ctxt);
14094
0
    if (input == NULL) {
14095
0
        xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
14096
0
        xmlClearParserCtxt(ctxt);
14097
0
        return;
14098
0
    }
14099
14100
0
    xmlClearParserCtxt(ctxt);
14101
0
    if (filename != NULL)
14102
0
        input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
14103
0
    input->base = buffer;
14104
0
    input->cur = buffer;
14105
0
    input->end = &buffer[xmlStrlen(buffer)];
14106
0
    inputPush(ctxt, input);
14107
0
}
14108
14109
/**
14110
 * xmlSAXUserParseFile:
14111
 * @sax:  a SAX handler
14112
 * @user_data:  The user data returned on SAX callbacks
14113
 * @filename:  a file name
14114
 *
14115
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
14116
 *
14117
 * parse an XML file and call the given SAX handler routines.
14118
 * Automatic support for ZLIB/Compress compressed document is provided
14119
 *
14120
 * Returns 0 in case of success or a error number otherwise
14121
 */
14122
int
14123
xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14124
0
                    const char *filename) {
14125
0
    int ret = 0;
14126
0
    xmlParserCtxtPtr ctxt;
14127
14128
0
    ctxt = xmlCreateFileParserCtxt(filename);
14129
0
    if (ctxt == NULL) return -1;
14130
0
    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14131
0
  xmlFree(ctxt->sax);
14132
0
    ctxt->sax = sax;
14133
0
    xmlDetectSAX2(ctxt);
14134
14135
0
    if (user_data != NULL)
14136
0
  ctxt->userData = user_data;
14137
14138
0
    xmlParseDocument(ctxt);
14139
14140
0
    if (ctxt->wellFormed)
14141
0
  ret = 0;
14142
0
    else {
14143
0
        if (ctxt->errNo != 0)
14144
0
      ret = ctxt->errNo;
14145
0
  else
14146
0
      ret = -1;
14147
0
    }
14148
0
    if (sax != NULL)
14149
0
  ctxt->sax = NULL;
14150
0
    if (ctxt->myDoc != NULL) {
14151
0
        xmlFreeDoc(ctxt->myDoc);
14152
0
  ctxt->myDoc = NULL;
14153
0
    }
14154
0
    xmlFreeParserCtxt(ctxt);
14155
14156
0
    return ret;
14157
0
}
14158
#endif /* LIBXML_SAX1_ENABLED */
14159
14160
/************************************************************************
14161
 *                  *
14162
 *    Front ends when parsing from memory     *
14163
 *                  *
14164
 ************************************************************************/
14165
14166
/**
14167
 * xmlCreateMemoryParserCtxt:
14168
 * @buffer:  a pointer to a char array
14169
 * @size:  the size of the array
14170
 *
14171
 * Create a parser context for an XML in-memory document.
14172
 *
14173
 * Returns the new parser context or NULL
14174
 */
14175
xmlParserCtxtPtr
14176
323k
xmlCreateMemoryParserCtxt(const char *buffer, int size) {
14177
323k
    xmlParserCtxtPtr ctxt;
14178
323k
    xmlParserInputPtr input;
14179
323k
    xmlParserInputBufferPtr buf;
14180
14181
323k
    if (buffer == NULL)
14182
0
  return(NULL);
14183
323k
    if (size <= 0)
14184
15.6k
  return(NULL);
14185
14186
308k
    ctxt = xmlNewParserCtxt();
14187
308k
    if (ctxt == NULL)
14188
0
  return(NULL);
14189
14190
308k
    buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14191
308k
    if (buf == NULL) {
14192
0
  xmlFreeParserCtxt(ctxt);
14193
0
  return(NULL);
14194
0
    }
14195
14196
308k
    input = xmlNewInputStream(ctxt);
14197
308k
    if (input == NULL) {
14198
0
  xmlFreeParserInputBuffer(buf);
14199
0
  xmlFreeParserCtxt(ctxt);
14200
0
  return(NULL);
14201
0
    }
14202
14203
308k
    input->filename = NULL;
14204
308k
    input->buf = buf;
14205
308k
    xmlBufResetInput(input->buf->buffer, input);
14206
14207
308k
    inputPush(ctxt, input);
14208
308k
    return(ctxt);
14209
308k
}
14210
14211
#ifdef LIBXML_SAX1_ENABLED
14212
/**
14213
 * xmlSAXParseMemoryWithData:
14214
 * @sax:  the SAX handler block
14215
 * @buffer:  an pointer to a char array
14216
 * @size:  the size of the array
14217
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14218
 *             documents
14219
 * @data:  the userdata
14220
 *
14221
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14222
 *
14223
 * parse an XML in-memory block and use the given SAX function block
14224
 * to handle the parsing callback. If sax is NULL, fallback to the default
14225
 * DOM tree building routines.
14226
 *
14227
 * User data (void *) is stored within the parser context in the
14228
 * context's _private member, so it is available nearly everywhere in libxml
14229
 *
14230
 * Returns the resulting document tree
14231
 */
14232
14233
xmlDocPtr
14234
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14235
0
            int size, int recovery, void *data) {
14236
0
    xmlDocPtr ret;
14237
0
    xmlParserCtxtPtr ctxt;
14238
14239
0
    xmlInitParser();
14240
14241
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14242
0
    if (ctxt == NULL) return(NULL);
14243
0
    if (sax != NULL) {
14244
0
  if (ctxt->sax != NULL)
14245
0
      xmlFree(ctxt->sax);
14246
0
        ctxt->sax = sax;
14247
0
    }
14248
0
    xmlDetectSAX2(ctxt);
14249
0
    if (data!=NULL) {
14250
0
  ctxt->_private=data;
14251
0
    }
14252
14253
0
    ctxt->recovery = recovery;
14254
14255
0
    xmlParseDocument(ctxt);
14256
14257
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14258
0
    else {
14259
0
       ret = NULL;
14260
0
       xmlFreeDoc(ctxt->myDoc);
14261
0
       ctxt->myDoc = NULL;
14262
0
    }
14263
0
    if (sax != NULL)
14264
0
  ctxt->sax = NULL;
14265
0
    xmlFreeParserCtxt(ctxt);
14266
14267
0
    return(ret);
14268
0
}
14269
14270
/**
14271
 * xmlSAXParseMemory:
14272
 * @sax:  the SAX handler block
14273
 * @buffer:  an pointer to a char array
14274
 * @size:  the size of the array
14275
 * @recovery:  work in recovery mode, i.e. tries to read not Well Formed
14276
 *             documents
14277
 *
14278
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14279
 *
14280
 * parse an XML in-memory block and use the given SAX function block
14281
 * to handle the parsing callback. If sax is NULL, fallback to the default
14282
 * DOM tree building routines.
14283
 *
14284
 * Returns the resulting document tree
14285
 */
14286
xmlDocPtr
14287
xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14288
0
            int size, int recovery) {
14289
0
    return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14290
0
}
14291
14292
/**
14293
 * xmlParseMemory:
14294
 * @buffer:  an pointer to a char array
14295
 * @size:  the size of the array
14296
 *
14297
 * DEPRECATED: Use xmlReadMemory.
14298
 *
14299
 * parse an XML in-memory block and build a tree.
14300
 *
14301
 * Returns the resulting document tree
14302
 */
14303
14304
0
xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14305
0
   return(xmlSAXParseMemory(NULL, buffer, size, 0));
14306
0
}
14307
14308
/**
14309
 * xmlRecoverMemory:
14310
 * @buffer:  an pointer to a char array
14311
 * @size:  the size of the array
14312
 *
14313
 * DEPRECATED: Use xmlReadMemory with XML_PARSE_RECOVER.
14314
 *
14315
 * parse an XML in-memory block and build a tree.
14316
 * In the case the document is not Well Formed, an attempt to
14317
 * build a tree is tried anyway
14318
 *
14319
 * Returns the resulting document tree or NULL in case of error
14320
 */
14321
14322
0
xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14323
0
   return(xmlSAXParseMemory(NULL, buffer, size, 1));
14324
0
}
14325
14326
/**
14327
 * xmlSAXUserParseMemory:
14328
 * @sax:  a SAX handler
14329
 * @user_data:  The user data returned on SAX callbacks
14330
 * @buffer:  an in-memory XML document input
14331
 * @size:  the length of the XML document in bytes
14332
 *
14333
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14334
 *
14335
 * parse an XML in-memory buffer and call the given SAX handler routines.
14336
 *
14337
 * Returns 0 in case of success or a error number otherwise
14338
 */
14339
int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14340
0
        const char *buffer, int size) {
14341
0
    int ret = 0;
14342
0
    xmlParserCtxtPtr ctxt;
14343
14344
0
    xmlInitParser();
14345
14346
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14347
0
    if (ctxt == NULL) return -1;
14348
0
    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14349
0
        xmlFree(ctxt->sax);
14350
0
    ctxt->sax = sax;
14351
0
    xmlDetectSAX2(ctxt);
14352
14353
0
    if (user_data != NULL)
14354
0
  ctxt->userData = user_data;
14355
14356
0
    xmlParseDocument(ctxt);
14357
14358
0
    if (ctxt->wellFormed)
14359
0
  ret = 0;
14360
0
    else {
14361
0
        if (ctxt->errNo != 0)
14362
0
      ret = ctxt->errNo;
14363
0
  else
14364
0
      ret = -1;
14365
0
    }
14366
0
    if (sax != NULL)
14367
0
        ctxt->sax = NULL;
14368
0
    if (ctxt->myDoc != NULL) {
14369
0
        xmlFreeDoc(ctxt->myDoc);
14370
0
  ctxt->myDoc = NULL;
14371
0
    }
14372
0
    xmlFreeParserCtxt(ctxt);
14373
14374
0
    return ret;
14375
0
}
14376
#endif /* LIBXML_SAX1_ENABLED */
14377
14378
/**
14379
 * xmlCreateDocParserCtxt:
14380
 * @cur:  a pointer to an array of xmlChar
14381
 *
14382
 * Creates a parser context for an XML in-memory document.
14383
 *
14384
 * Returns the new parser context or NULL
14385
 */
14386
xmlParserCtxtPtr
14387
0
xmlCreateDocParserCtxt(const xmlChar *cur) {
14388
0
    int len;
14389
14390
0
    if (cur == NULL)
14391
0
  return(NULL);
14392
0
    len = xmlStrlen(cur);
14393
0
    return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14394
0
}
14395
14396
#ifdef LIBXML_SAX1_ENABLED
14397
/**
14398
 * xmlSAXParseDoc:
14399
 * @sax:  the SAX handler block
14400
 * @cur:  a pointer to an array of xmlChar
14401
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14402
 *             documents
14403
 *
14404
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadDoc.
14405
 *
14406
 * parse an XML in-memory document and build a tree.
14407
 * It use the given SAX function block to handle the parsing callback.
14408
 * If sax is NULL, fallback to the default DOM tree building routines.
14409
 *
14410
 * Returns the resulting document tree
14411
 */
14412
14413
xmlDocPtr
14414
0
xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14415
0
    xmlDocPtr ret;
14416
0
    xmlParserCtxtPtr ctxt;
14417
0
    xmlSAXHandlerPtr oldsax = NULL;
14418
14419
0
    if (cur == NULL) return(NULL);
14420
14421
14422
0
    ctxt = xmlCreateDocParserCtxt(cur);
14423
0
    if (ctxt == NULL) return(NULL);
14424
0
    if (sax != NULL) {
14425
0
        oldsax = ctxt->sax;
14426
0
        ctxt->sax = sax;
14427
0
        ctxt->userData = NULL;
14428
0
    }
14429
0
    xmlDetectSAX2(ctxt);
14430
14431
0
    xmlParseDocument(ctxt);
14432
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14433
0
    else {
14434
0
       ret = NULL;
14435
0
       xmlFreeDoc(ctxt->myDoc);
14436
0
       ctxt->myDoc = NULL;
14437
0
    }
14438
0
    if (sax != NULL)
14439
0
  ctxt->sax = oldsax;
14440
0
    xmlFreeParserCtxt(ctxt);
14441
14442
0
    return(ret);
14443
0
}
14444
14445
/**
14446
 * xmlParseDoc:
14447
 * @cur:  a pointer to an array of xmlChar
14448
 *
14449
 * DEPRECATED: Use xmlReadDoc.
14450
 *
14451
 * parse an XML in-memory document and build a tree.
14452
 *
14453
 * Returns the resulting document tree
14454
 */
14455
14456
xmlDocPtr
14457
0
xmlParseDoc(const xmlChar *cur) {
14458
0
    return(xmlSAXParseDoc(NULL, cur, 0));
14459
0
}
14460
#endif /* LIBXML_SAX1_ENABLED */
14461
14462
#ifdef LIBXML_LEGACY_ENABLED
14463
/************************************************************************
14464
 *                  *
14465
 *  Specific function to keep track of entities references    *
14466
 *  and used by the XSLT debugger         *
14467
 *                  *
14468
 ************************************************************************/
14469
14470
static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14471
14472
/**
14473
 * xmlAddEntityReference:
14474
 * @ent : A valid entity
14475
 * @firstNode : A valid first node for children of entity
14476
 * @lastNode : A valid last node of children entity
14477
 *
14478
 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14479
 */
14480
static void
14481
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14482
                      xmlNodePtr lastNode)
14483
{
14484
    if (xmlEntityRefFunc != NULL) {
14485
        (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14486
    }
14487
}
14488
14489
14490
/**
14491
 * xmlSetEntityReferenceFunc:
14492
 * @func: A valid function
14493
 *
14494
 * Set the function to call call back when a xml reference has been made
14495
 */
14496
void
14497
xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14498
{
14499
    xmlEntityRefFunc = func;
14500
}
14501
#endif /* LIBXML_LEGACY_ENABLED */
14502
14503
/************************************************************************
14504
 *                  *
14505
 *        Miscellaneous       *
14506
 *                  *
14507
 ************************************************************************/
14508
14509
static int xmlParserInitialized = 0;
14510
14511
/**
14512
 * xmlInitParser:
14513
 *
14514
 * Initialization function for the XML parser.
14515
 * This is not reentrant. Call once before processing in case of
14516
 * use in multithreaded programs.
14517
 */
14518
14519
void
14520
731M
xmlInitParser(void) {
14521
    /*
14522
     * Note that the initialization code must not make memory allocations.
14523
     */
14524
731M
    if (xmlParserInitialized != 0)
14525
731M
  return;
14526
14527
3.70k
#ifdef LIBXML_THREAD_ENABLED
14528
3.70k
    __xmlGlobalInitMutexLock();
14529
3.70k
    if (xmlParserInitialized == 0) {
14530
3.70k
#endif
14531
#if defined(_WIN32) && (!defined(LIBXML_STATIC) || defined(LIBXML_STATIC_FOR_DLL))
14532
        if (xmlFree == free)
14533
            atexit(xmlCleanupParser);
14534
#endif
14535
14536
3.70k
  xmlInitThreadsInternal();
14537
3.70k
  xmlInitGlobalsInternal();
14538
3.70k
  xmlInitMemoryInternal();
14539
3.70k
        __xmlInitializeDict();
14540
3.70k
  xmlInitEncodingInternal();
14541
3.70k
  xmlRegisterDefaultInputCallbacks();
14542
3.70k
#ifdef LIBXML_OUTPUT_ENABLED
14543
3.70k
  xmlRegisterDefaultOutputCallbacks();
14544
3.70k
#endif /* LIBXML_OUTPUT_ENABLED */
14545
3.70k
#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
14546
3.70k
  xmlInitXPathInternal();
14547
3.70k
#endif
14548
3.70k
  xmlParserInitialized = 1;
14549
3.70k
#ifdef LIBXML_THREAD_ENABLED
14550
3.70k
    }
14551
3.70k
    __xmlGlobalInitMutexUnlock();
14552
3.70k
#endif
14553
3.70k
}
14554
14555
/**
14556
 * xmlCleanupParser:
14557
 *
14558
 * This function name is somewhat misleading. It does not clean up
14559
 * parser state, it cleans up memory allocated by the library itself.
14560
 * It is a cleanup function for the XML library. It tries to reclaim all
14561
 * related global memory allocated for the library processing.
14562
 * It doesn't deallocate any document related memory. One should
14563
 * call xmlCleanupParser() only when the process has finished using
14564
 * the library and all XML/HTML documents built with it.
14565
 * See also xmlInitParser() which has the opposite function of preparing
14566
 * the library for operations.
14567
 *
14568
 * WARNING: if your application is multithreaded or has plugin support
14569
 *          calling this may crash the application if another thread or
14570
 *          a plugin is still using libxml2. It's sometimes very hard to
14571
 *          guess if libxml2 is in use in the application, some libraries
14572
 *          or plugins may use it without notice. In case of doubt abstain
14573
 *          from calling this function or do it just before calling exit()
14574
 *          to avoid leak reports from valgrind !
14575
 */
14576
14577
void
14578
0
xmlCleanupParser(void) {
14579
0
    if (!xmlParserInitialized)
14580
0
  return;
14581
14582
0
    xmlCleanupCharEncodingHandlers();
14583
0
#ifdef LIBXML_CATALOG_ENABLED
14584
0
    xmlCatalogCleanup();
14585
0
#endif
14586
0
    xmlCleanupDictInternal();
14587
0
    xmlCleanupInputCallbacks();
14588
0
#ifdef LIBXML_OUTPUT_ENABLED
14589
0
    xmlCleanupOutputCallbacks();
14590
0
#endif
14591
0
#ifdef LIBXML_SCHEMAS_ENABLED
14592
0
    xmlSchemaCleanupTypes();
14593
0
    xmlRelaxNGCleanupTypes();
14594
0
#endif
14595
0
    xmlCleanupGlobalsInternal();
14596
0
    xmlCleanupThreadsInternal();
14597
0
    xmlCleanupMemoryInternal();
14598
0
    xmlParserInitialized = 0;
14599
0
}
14600
14601
#if defined(HAVE_ATTRIBUTE_DESTRUCTOR) && !defined(LIBXML_STATIC) && \
14602
    !defined(_WIN32)
14603
static void
14604
ATTRIBUTE_DESTRUCTOR
14605
0
xmlDestructor(void) {
14606
    /*
14607
     * Calling custom deallocation functions in a destructor can cause
14608
     * problems, for example with Nokogiri.
14609
     */
14610
0
    if (xmlFree == free)
14611
0
        xmlCleanupParser();
14612
0
}
14613
#endif
14614
14615
/************************************************************************
14616
 *                  *
14617
 *  New set (2.6.0) of simpler and more flexible APIs   *
14618
 *                  *
14619
 ************************************************************************/
14620
14621
/**
14622
 * DICT_FREE:
14623
 * @str:  a string
14624
 *
14625
 * Free a string if it is not owned by the "dict" dictionary in the
14626
 * current scope
14627
 */
14628
#define DICT_FREE(str)            \
14629
0
  if ((str) && ((!dict) ||       \
14630
0
      (xmlDictOwns(dict, (const xmlChar *)(str)) == 0)))  \
14631
0
      xmlFree((char *)(str));
14632
14633
/**
14634
 * xmlCtxtReset:
14635
 * @ctxt: an XML parser context
14636
 *
14637
 * Reset a parser context
14638
 */
14639
void
14640
xmlCtxtReset(xmlParserCtxtPtr ctxt)
14641
0
{
14642
0
    xmlParserInputPtr input;
14643
0
    xmlDictPtr dict;
14644
14645
0
    if (ctxt == NULL)
14646
0
        return;
14647
14648
0
    dict = ctxt->dict;
14649
14650
0
    while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14651
0
        xmlFreeInputStream(input);
14652
0
    }
14653
0
    ctxt->inputNr = 0;
14654
0
    ctxt->input = NULL;
14655
14656
0
    ctxt->spaceNr = 0;
14657
0
    if (ctxt->spaceTab != NULL) {
14658
0
  ctxt->spaceTab[0] = -1;
14659
0
  ctxt->space = &ctxt->spaceTab[0];
14660
0
    } else {
14661
0
        ctxt->space = NULL;
14662
0
    }
14663
14664
14665
0
    ctxt->nodeNr = 0;
14666
0
    ctxt->node = NULL;
14667
14668
0
    ctxt->nameNr = 0;
14669
0
    ctxt->name = NULL;
14670
14671
0
    ctxt->nsNr = 0;
14672
14673
0
    DICT_FREE(ctxt->version);
14674
0
    ctxt->version = NULL;
14675
0
    DICT_FREE(ctxt->encoding);
14676
0
    ctxt->encoding = NULL;
14677
0
    DICT_FREE(ctxt->directory);
14678
0
    ctxt->directory = NULL;
14679
0
    DICT_FREE(ctxt->extSubURI);
14680
0
    ctxt->extSubURI = NULL;
14681
0
    DICT_FREE(ctxt->extSubSystem);
14682
0
    ctxt->extSubSystem = NULL;
14683
0
    if (ctxt->myDoc != NULL)
14684
0
        xmlFreeDoc(ctxt->myDoc);
14685
0
    ctxt->myDoc = NULL;
14686
14687
0
    ctxt->standalone = -1;
14688
0
    ctxt->hasExternalSubset = 0;
14689
0
    ctxt->hasPErefs = 0;
14690
0
    ctxt->html = 0;
14691
0
    ctxt->external = 0;
14692
0
    ctxt->instate = XML_PARSER_START;
14693
0
    ctxt->token = 0;
14694
14695
0
    ctxt->wellFormed = 1;
14696
0
    ctxt->nsWellFormed = 1;
14697
0
    ctxt->disableSAX = 0;
14698
0
    ctxt->valid = 1;
14699
#if 0
14700
    ctxt->vctxt.userData = ctxt;
14701
    ctxt->vctxt.error = xmlParserValidityError;
14702
    ctxt->vctxt.warning = xmlParserValidityWarning;
14703
#endif
14704
0
    ctxt->record_info = 0;
14705
0
    ctxt->checkIndex = 0;
14706
0
    ctxt->endCheckState = 0;
14707
0
    ctxt->inSubset = 0;
14708
0
    ctxt->errNo = XML_ERR_OK;
14709
0
    ctxt->depth = 0;
14710
0
    ctxt->charset = XML_CHAR_ENCODING_UTF8;
14711
0
    ctxt->catalogs = NULL;
14712
0
    ctxt->sizeentities = 0;
14713
0
    ctxt->sizeentcopy = 0;
14714
0
    xmlInitNodeInfoSeq(&ctxt->node_seq);
14715
14716
0
    if (ctxt->attsDefault != NULL) {
14717
0
        xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
14718
0
        ctxt->attsDefault = NULL;
14719
0
    }
14720
0
    if (ctxt->attsSpecial != NULL) {
14721
0
        xmlHashFree(ctxt->attsSpecial, NULL);
14722
0
        ctxt->attsSpecial = NULL;
14723
0
    }
14724
14725
0
#ifdef LIBXML_CATALOG_ENABLED
14726
0
    if (ctxt->catalogs != NULL)
14727
0
  xmlCatalogFreeLocal(ctxt->catalogs);
14728
0
#endif
14729
0
    ctxt->nbErrors = 0;
14730
0
    ctxt->nbWarnings = 0;
14731
0
    if (ctxt->lastError.code != XML_ERR_OK)
14732
0
        xmlResetError(&ctxt->lastError);
14733
0
}
14734
14735
/**
14736
 * xmlCtxtResetPush:
14737
 * @ctxt: an XML parser context
14738
 * @chunk:  a pointer to an array of chars
14739
 * @size:  number of chars in the array
14740
 * @filename:  an optional file name or URI
14741
 * @encoding:  the document encoding, or NULL
14742
 *
14743
 * Reset a push parser context
14744
 *
14745
 * Returns 0 in case of success and 1 in case of error
14746
 */
14747
int
14748
xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14749
                 int size, const char *filename, const char *encoding)
14750
0
{
14751
0
    xmlParserInputPtr inputStream;
14752
0
    xmlParserInputBufferPtr buf;
14753
0
    xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14754
14755
0
    if (ctxt == NULL)
14756
0
        return(1);
14757
14758
0
    if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14759
0
        enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14760
14761
0
    buf = xmlAllocParserInputBuffer(enc);
14762
0
    if (buf == NULL)
14763
0
        return(1);
14764
14765
0
    if (ctxt == NULL) {
14766
0
        xmlFreeParserInputBuffer(buf);
14767
0
        return(1);
14768
0
    }
14769
14770
0
    xmlCtxtReset(ctxt);
14771
14772
0
    if (filename == NULL) {
14773
0
        ctxt->directory = NULL;
14774
0
    } else {
14775
0
        ctxt->directory = xmlParserGetDirectory(filename);
14776
0
    }
14777
14778
0
    inputStream = xmlNewInputStream(ctxt);
14779
0
    if (inputStream == NULL) {
14780
0
        xmlFreeParserInputBuffer(buf);
14781
0
        return(1);
14782
0
    }
14783
14784
0
    if (filename == NULL)
14785
0
        inputStream->filename = NULL;
14786
0
    else
14787
0
        inputStream->filename = (char *)
14788
0
            xmlCanonicPath((const xmlChar *) filename);
14789
0
    inputStream->buf = buf;
14790
0
    xmlBufResetInput(buf->buffer, inputStream);
14791
14792
0
    inputPush(ctxt, inputStream);
14793
14794
0
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14795
0
        (ctxt->input->buf != NULL)) {
14796
0
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14797
0
        size_t cur = ctxt->input->cur - ctxt->input->base;
14798
14799
0
        xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14800
14801
0
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
14802
#ifdef DEBUG_PUSH
14803
        xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14804
#endif
14805
0
    }
14806
14807
0
    if (encoding != NULL) {
14808
0
        xmlCharEncodingHandlerPtr hdlr;
14809
14810
0
        if (ctxt->encoding != NULL)
14811
0
      xmlFree((xmlChar *) ctxt->encoding);
14812
0
        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14813
14814
0
        hdlr = xmlFindCharEncodingHandler(encoding);
14815
0
        if (hdlr != NULL) {
14816
0
            xmlSwitchToEncoding(ctxt, hdlr);
14817
0
  } else {
14818
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14819
0
            "Unsupported encoding %s\n", BAD_CAST encoding);
14820
0
        }
14821
0
    } else if (enc != XML_CHAR_ENCODING_NONE) {
14822
0
        xmlSwitchEncoding(ctxt, enc);
14823
0
    }
14824
14825
0
    return(0);
14826
0
}
14827
14828
14829
/**
14830
 * xmlCtxtUseOptionsInternal:
14831
 * @ctxt: an XML parser context
14832
 * @options:  a combination of xmlParserOption
14833
 * @encoding:  the user provided encoding to use
14834
 *
14835
 * Applies the options to the parser context
14836
 *
14837
 * Returns 0 in case of success, the set of unknown or unimplemented options
14838
 *         in case of error.
14839
 */
14840
static int
14841
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
14842
568k
{
14843
568k
    if (ctxt == NULL)
14844
0
        return(-1);
14845
568k
    if (encoding != NULL) {
14846
0
        if (ctxt->encoding != NULL)
14847
0
      xmlFree((xmlChar *) ctxt->encoding);
14848
0
        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14849
0
    }
14850
568k
    if (options & XML_PARSE_RECOVER) {
14851
299k
        ctxt->recovery = 1;
14852
299k
        options -= XML_PARSE_RECOVER;
14853
299k
  ctxt->options |= XML_PARSE_RECOVER;
14854
299k
    } else
14855
268k
        ctxt->recovery = 0;
14856
568k
    if (options & XML_PARSE_DTDLOAD) {
14857
352k
        ctxt->loadsubset = XML_DETECT_IDS;
14858
352k
        options -= XML_PARSE_DTDLOAD;
14859
352k
  ctxt->options |= XML_PARSE_DTDLOAD;
14860
352k
    } else
14861
215k
        ctxt->loadsubset = 0;
14862
568k
    if (options & XML_PARSE_DTDATTR) {
14863
225k
        ctxt->loadsubset |= XML_COMPLETE_ATTRS;
14864
225k
        options -= XML_PARSE_DTDATTR;
14865
225k
  ctxt->options |= XML_PARSE_DTDATTR;
14866
225k
    }
14867
568k
    if (options & XML_PARSE_NOENT) {
14868
320k
        ctxt->replaceEntities = 1;
14869
        /* ctxt->loadsubset |= XML_DETECT_IDS; */
14870
320k
        options -= XML_PARSE_NOENT;
14871
320k
  ctxt->options |= XML_PARSE_NOENT;
14872
320k
    } else
14873
247k
        ctxt->replaceEntities = 0;
14874
568k
    if (options & XML_PARSE_PEDANTIC) {
14875
73.6k
        ctxt->pedantic = 1;
14876
73.6k
        options -= XML_PARSE_PEDANTIC;
14877
73.6k
  ctxt->options |= XML_PARSE_PEDANTIC;
14878
73.6k
    } else
14879
494k
        ctxt->pedantic = 0;
14880
568k
    if (options & XML_PARSE_NOBLANKS) {
14881
219k
        ctxt->keepBlanks = 0;
14882
219k
        ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
14883
219k
        options -= XML_PARSE_NOBLANKS;
14884
219k
  ctxt->options |= XML_PARSE_NOBLANKS;
14885
219k
    } else
14886
348k
        ctxt->keepBlanks = 1;
14887
568k
    if (options & XML_PARSE_DTDVALID) {
14888
225k
        ctxt->validate = 1;
14889
225k
        if (options & XML_PARSE_NOWARNING)
14890
111k
            ctxt->vctxt.warning = NULL;
14891
225k
        if (options & XML_PARSE_NOERROR)
14892
171k
            ctxt->vctxt.error = NULL;
14893
225k
        options -= XML_PARSE_DTDVALID;
14894
225k
  ctxt->options |= XML_PARSE_DTDVALID;
14895
225k
    } else
14896
342k
        ctxt->validate = 0;
14897
568k
    if (options & XML_PARSE_NOWARNING) {
14898
241k
        ctxt->sax->warning = NULL;
14899
241k
        options -= XML_PARSE_NOWARNING;
14900
241k
    }
14901
568k
    if (options & XML_PARSE_NOERROR) {
14902
329k
        ctxt->sax->error = NULL;
14903
329k
        ctxt->sax->fatalError = NULL;
14904
329k
        options -= XML_PARSE_NOERROR;
14905
329k
    }
14906
568k
#ifdef LIBXML_SAX1_ENABLED
14907
568k
    if (options & XML_PARSE_SAX1) {
14908
207k
        ctxt->sax->startElement = xmlSAX2StartElement;
14909
207k
        ctxt->sax->endElement = xmlSAX2EndElement;
14910
207k
        ctxt->sax->startElementNs = NULL;
14911
207k
        ctxt->sax->endElementNs = NULL;
14912
207k
        ctxt->sax->initialized = 1;
14913
207k
        options -= XML_PARSE_SAX1;
14914
207k
  ctxt->options |= XML_PARSE_SAX1;
14915
207k
    }
14916
568k
#endif /* LIBXML_SAX1_ENABLED */
14917
568k
    if (options & XML_PARSE_NODICT) {
14918
175k
        ctxt->dictNames = 0;
14919
175k
        options -= XML_PARSE_NODICT;
14920
175k
  ctxt->options |= XML_PARSE_NODICT;
14921
392k
    } else {
14922
392k
        ctxt->dictNames = 1;
14923
392k
    }
14924
568k
    if (options & XML_PARSE_NOCDATA) {
14925
227k
        ctxt->sax->cdataBlock = NULL;
14926
227k
        options -= XML_PARSE_NOCDATA;
14927
227k
  ctxt->options |= XML_PARSE_NOCDATA;
14928
227k
    }
14929
568k
    if (options & XML_PARSE_NSCLEAN) {
14930
325k
  ctxt->options |= XML_PARSE_NSCLEAN;
14931
325k
        options -= XML_PARSE_NSCLEAN;
14932
325k
    }
14933
568k
    if (options & XML_PARSE_NONET) {
14934
209k
  ctxt->options |= XML_PARSE_NONET;
14935
209k
        options -= XML_PARSE_NONET;
14936
209k
    }
14937
568k
    if (options & XML_PARSE_COMPACT) {
14938
335k
  ctxt->options |= XML_PARSE_COMPACT;
14939
335k
        options -= XML_PARSE_COMPACT;
14940
335k
    }
14941
568k
    if (options & XML_PARSE_OLD10) {
14942
193k
  ctxt->options |= XML_PARSE_OLD10;
14943
193k
        options -= XML_PARSE_OLD10;
14944
193k
    }
14945
568k
    if (options & XML_PARSE_NOBASEFIX) {
14946
234k
  ctxt->options |= XML_PARSE_NOBASEFIX;
14947
234k
        options -= XML_PARSE_NOBASEFIX;
14948
234k
    }
14949
568k
    if (options & XML_PARSE_HUGE) {
14950
205k
  ctxt->options |= XML_PARSE_HUGE;
14951
205k
        options -= XML_PARSE_HUGE;
14952
205k
        if (ctxt->dict != NULL)
14953
205k
            xmlDictSetLimit(ctxt->dict, 0);
14954
205k
    }
14955
568k
    if (options & XML_PARSE_OLDSAX) {
14956
188k
  ctxt->options |= XML_PARSE_OLDSAX;
14957
188k
        options -= XML_PARSE_OLDSAX;
14958
188k
    }
14959
568k
    if (options & XML_PARSE_IGNORE_ENC) {
14960
319k
  ctxt->options |= XML_PARSE_IGNORE_ENC;
14961
319k
        options -= XML_PARSE_IGNORE_ENC;
14962
319k
    }
14963
568k
    if (options & XML_PARSE_BIG_LINES) {
14964
236k
  ctxt->options |= XML_PARSE_BIG_LINES;
14965
236k
        options -= XML_PARSE_BIG_LINES;
14966
236k
    }
14967
568k
    ctxt->linenumbers = 1;
14968
568k
    return (options);
14969
568k
}
14970
14971
/**
14972
 * xmlCtxtUseOptions:
14973
 * @ctxt: an XML parser context
14974
 * @options:  a combination of xmlParserOption
14975
 *
14976
 * Applies the options to the parser context
14977
 *
14978
 * Returns 0 in case of success, the set of unknown or unimplemented options
14979
 *         in case of error.
14980
 */
14981
int
14982
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
14983
378k
{
14984
378k
   return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
14985
378k
}
14986
14987
/**
14988
 * xmlDoRead:
14989
 * @ctxt:  an XML parser context
14990
 * @URL:  the base URL to use for the document
14991
 * @encoding:  the document encoding, or NULL
14992
 * @options:  a combination of xmlParserOption
14993
 * @reuse:  keep the context for reuse
14994
 *
14995
 * Common front-end for the xmlRead functions
14996
 *
14997
 * Returns the resulting document tree or NULL
14998
 */
14999
static xmlDocPtr
15000
xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15001
          int options, int reuse)
15002
189k
{
15003
189k
    xmlDocPtr ret;
15004
15005
189k
    xmlCtxtUseOptionsInternal(ctxt, options, encoding);
15006
189k
    if (encoding != NULL) {
15007
0
        xmlCharEncodingHandlerPtr hdlr;
15008
15009
0
  hdlr = xmlFindCharEncodingHandler(encoding);
15010
0
  if (hdlr != NULL)
15011
0
      xmlSwitchToEncoding(ctxt, hdlr);
15012
0
    }
15013
189k
    if ((URL != NULL) && (ctxt->input != NULL) &&
15014
189k
        (ctxt->input->filename == NULL))
15015
189k
        ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
15016
189k
    xmlParseDocument(ctxt);
15017
189k
    if ((ctxt->wellFormed) || ctxt->recovery)
15018
114k
        ret = ctxt->myDoc;
15019
74.9k
    else {
15020
74.9k
        ret = NULL;
15021
74.9k
  if (ctxt->myDoc != NULL) {
15022
70.4k
      xmlFreeDoc(ctxt->myDoc);
15023
70.4k
  }
15024
74.9k
    }
15025
189k
    ctxt->myDoc = NULL;
15026
189k
    if (!reuse) {
15027
189k
  xmlFreeParserCtxt(ctxt);
15028
189k
    }
15029
15030
189k
    return (ret);
15031
189k
}
15032
15033
/**
15034
 * xmlReadDoc:
15035
 * @cur:  a pointer to a zero terminated string
15036
 * @URL:  the base URL to use for the document
15037
 * @encoding:  the document encoding, or NULL
15038
 * @options:  a combination of xmlParserOption
15039
 *
15040
 * parse an XML in-memory document and build a tree.
15041
 *
15042
 * Returns the resulting document tree
15043
 */
15044
xmlDocPtr
15045
xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
15046
0
{
15047
0
    xmlParserCtxtPtr ctxt;
15048
15049
0
    if (cur == NULL)
15050
0
        return (NULL);
15051
0
    xmlInitParser();
15052
15053
0
    ctxt = xmlCreateDocParserCtxt(cur);
15054
0
    if (ctxt == NULL)
15055
0
        return (NULL);
15056
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15057
0
}
15058
15059
/**
15060
 * xmlReadFile:
15061
 * @filename:  a file or URL
15062
 * @encoding:  the document encoding, or NULL
15063
 * @options:  a combination of xmlParserOption
15064
 *
15065
 * parse an XML file from the filesystem or the network.
15066
 *
15067
 * Returns the resulting document tree
15068
 */
15069
xmlDocPtr
15070
xmlReadFile(const char *filename, const char *encoding, int options)
15071
0
{
15072
0
    xmlParserCtxtPtr ctxt;
15073
15074
0
    xmlInitParser();
15075
0
    ctxt = xmlCreateURLParserCtxt(filename, options);
15076
0
    if (ctxt == NULL)
15077
0
        return (NULL);
15078
0
    return (xmlDoRead(ctxt, NULL, encoding, options, 0));
15079
0
}
15080
15081
/**
15082
 * xmlReadMemory:
15083
 * @buffer:  a pointer to a char array
15084
 * @size:  the size of the array
15085
 * @URL:  the base URL to use for the document
15086
 * @encoding:  the document encoding, or NULL
15087
 * @options:  a combination of xmlParserOption
15088
 *
15089
 * parse an XML in-memory document and build a tree.
15090
 *
15091
 * Returns the resulting document tree
15092
 */
15093
xmlDocPtr
15094
xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
15095
189k
{
15096
189k
    xmlParserCtxtPtr ctxt;
15097
15098
189k
    xmlInitParser();
15099
189k
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15100
189k
    if (ctxt == NULL)
15101
50
        return (NULL);
15102
189k
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15103
189k
}
15104
15105
/**
15106
 * xmlReadFd:
15107
 * @fd:  an open file descriptor
15108
 * @URL:  the base URL to use for the document
15109
 * @encoding:  the document encoding, or NULL
15110
 * @options:  a combination of xmlParserOption
15111
 *
15112
 * parse an XML from a file descriptor and build a tree.
15113
 * NOTE that the file descriptor will not be closed when the
15114
 *      reader is closed or reset.
15115
 *
15116
 * Returns the resulting document tree
15117
 */
15118
xmlDocPtr
15119
xmlReadFd(int fd, const char *URL, const char *encoding, int options)
15120
0
{
15121
0
    xmlParserCtxtPtr ctxt;
15122
0
    xmlParserInputBufferPtr input;
15123
0
    xmlParserInputPtr stream;
15124
15125
0
    if (fd < 0)
15126
0
        return (NULL);
15127
0
    xmlInitParser();
15128
15129
0
    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15130
0
    if (input == NULL)
15131
0
        return (NULL);
15132
0
    input->closecallback = NULL;
15133
0
    ctxt = xmlNewParserCtxt();
15134
0
    if (ctxt == NULL) {
15135
0
        xmlFreeParserInputBuffer(input);
15136
0
        return (NULL);
15137
0
    }
15138
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15139
0
    if (stream == NULL) {
15140
0
        xmlFreeParserInputBuffer(input);
15141
0
  xmlFreeParserCtxt(ctxt);
15142
0
        return (NULL);
15143
0
    }
15144
0
    inputPush(ctxt, stream);
15145
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15146
0
}
15147
15148
/**
15149
 * xmlReadIO:
15150
 * @ioread:  an I/O read function
15151
 * @ioclose:  an I/O close function
15152
 * @ioctx:  an I/O handler
15153
 * @URL:  the base URL to use for the document
15154
 * @encoding:  the document encoding, or NULL
15155
 * @options:  a combination of xmlParserOption
15156
 *
15157
 * parse an XML document from I/O functions and source and build a tree.
15158
 *
15159
 * Returns the resulting document tree
15160
 */
15161
xmlDocPtr
15162
xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
15163
          void *ioctx, const char *URL, const char *encoding, int options)
15164
0
{
15165
0
    xmlParserCtxtPtr ctxt;
15166
0
    xmlParserInputBufferPtr input;
15167
0
    xmlParserInputPtr stream;
15168
15169
0
    if (ioread == NULL)
15170
0
        return (NULL);
15171
0
    xmlInitParser();
15172
15173
0
    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15174
0
                                         XML_CHAR_ENCODING_NONE);
15175
0
    if (input == NULL) {
15176
0
        if (ioclose != NULL)
15177
0
            ioclose(ioctx);
15178
0
        return (NULL);
15179
0
    }
15180
0
    ctxt = xmlNewParserCtxt();
15181
0
    if (ctxt == NULL) {
15182
0
        xmlFreeParserInputBuffer(input);
15183
0
        return (NULL);
15184
0
    }
15185
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15186
0
    if (stream == NULL) {
15187
0
        xmlFreeParserInputBuffer(input);
15188
0
  xmlFreeParserCtxt(ctxt);
15189
0
        return (NULL);
15190
0
    }
15191
0
    inputPush(ctxt, stream);
15192
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15193
0
}
15194
15195
/**
15196
 * xmlCtxtReadDoc:
15197
 * @ctxt:  an XML parser context
15198
 * @cur:  a pointer to a zero terminated string
15199
 * @URL:  the base URL to use for the document
15200
 * @encoding:  the document encoding, or NULL
15201
 * @options:  a combination of xmlParserOption
15202
 *
15203
 * parse an XML in-memory document and build a tree.
15204
 * This reuses the existing @ctxt parser context
15205
 *
15206
 * Returns the resulting document tree
15207
 */
15208
xmlDocPtr
15209
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
15210
               const char *URL, const char *encoding, int options)
15211
0
{
15212
0
    if (cur == NULL)
15213
0
        return (NULL);
15214
0
    return (xmlCtxtReadMemory(ctxt, (const char *) cur, xmlStrlen(cur), URL,
15215
0
                              encoding, options));
15216
0
}
15217
15218
/**
15219
 * xmlCtxtReadFile:
15220
 * @ctxt:  an XML parser context
15221
 * @filename:  a file or URL
15222
 * @encoding:  the document encoding, or NULL
15223
 * @options:  a combination of xmlParserOption
15224
 *
15225
 * parse an XML file from the filesystem or the network.
15226
 * This reuses the existing @ctxt parser context
15227
 *
15228
 * Returns the resulting document tree
15229
 */
15230
xmlDocPtr
15231
xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15232
                const char *encoding, int options)
15233
0
{
15234
0
    xmlParserInputPtr stream;
15235
15236
0
    if (filename == NULL)
15237
0
        return (NULL);
15238
0
    if (ctxt == NULL)
15239
0
        return (NULL);
15240
0
    xmlInitParser();
15241
15242
0
    xmlCtxtReset(ctxt);
15243
15244
0
    stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15245
0
    if (stream == NULL) {
15246
0
        return (NULL);
15247
0
    }
15248
0
    inputPush(ctxt, stream);
15249
0
    return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15250
0
}
15251
15252
/**
15253
 * xmlCtxtReadMemory:
15254
 * @ctxt:  an XML parser context
15255
 * @buffer:  a pointer to a char array
15256
 * @size:  the size of the array
15257
 * @URL:  the base URL to use for the document
15258
 * @encoding:  the document encoding, or NULL
15259
 * @options:  a combination of xmlParserOption
15260
 *
15261
 * parse an XML in-memory document and build a tree.
15262
 * This reuses the existing @ctxt parser context
15263
 *
15264
 * Returns the resulting document tree
15265
 */
15266
xmlDocPtr
15267
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15268
                  const char *URL, const char *encoding, int options)
15269
0
{
15270
0
    xmlParserInputBufferPtr input;
15271
0
    xmlParserInputPtr stream;
15272
15273
0
    if (ctxt == NULL)
15274
0
        return (NULL);
15275
0
    if (buffer == NULL)
15276
0
        return (NULL);
15277
0
    xmlInitParser();
15278
15279
0
    xmlCtxtReset(ctxt);
15280
15281
0
    input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15282
0
    if (input == NULL) {
15283
0
  return(NULL);
15284
0
    }
15285
15286
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15287
0
    if (stream == NULL) {
15288
0
  xmlFreeParserInputBuffer(input);
15289
0
  return(NULL);
15290
0
    }
15291
15292
0
    inputPush(ctxt, stream);
15293
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15294
0
}
15295
15296
/**
15297
 * xmlCtxtReadFd:
15298
 * @ctxt:  an XML parser context
15299
 * @fd:  an open file descriptor
15300
 * @URL:  the base URL to use for the document
15301
 * @encoding:  the document encoding, or NULL
15302
 * @options:  a combination of xmlParserOption
15303
 *
15304
 * parse an XML from a file descriptor and build a tree.
15305
 * This reuses the existing @ctxt parser context
15306
 * NOTE that the file descriptor will not be closed when the
15307
 *      reader is closed or reset.
15308
 *
15309
 * Returns the resulting document tree
15310
 */
15311
xmlDocPtr
15312
xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15313
              const char *URL, const char *encoding, int options)
15314
0
{
15315
0
    xmlParserInputBufferPtr input;
15316
0
    xmlParserInputPtr stream;
15317
15318
0
    if (fd < 0)
15319
0
        return (NULL);
15320
0
    if (ctxt == NULL)
15321
0
        return (NULL);
15322
0
    xmlInitParser();
15323
15324
0
    xmlCtxtReset(ctxt);
15325
15326
15327
0
    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15328
0
    if (input == NULL)
15329
0
        return (NULL);
15330
0
    input->closecallback = NULL;
15331
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15332
0
    if (stream == NULL) {
15333
0
        xmlFreeParserInputBuffer(input);
15334
0
        return (NULL);
15335
0
    }
15336
0
    inputPush(ctxt, stream);
15337
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15338
0
}
15339
15340
/**
15341
 * xmlCtxtReadIO:
15342
 * @ctxt:  an XML parser context
15343
 * @ioread:  an I/O read function
15344
 * @ioclose:  an I/O close function
15345
 * @ioctx:  an I/O handler
15346
 * @URL:  the base URL to use for the document
15347
 * @encoding:  the document encoding, or NULL
15348
 * @options:  a combination of xmlParserOption
15349
 *
15350
 * parse an XML document from I/O functions and source and build a tree.
15351
 * This reuses the existing @ctxt parser context
15352
 *
15353
 * Returns the resulting document tree
15354
 */
15355
xmlDocPtr
15356
xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15357
              xmlInputCloseCallback ioclose, void *ioctx,
15358
        const char *URL,
15359
              const char *encoding, int options)
15360
0
{
15361
0
    xmlParserInputBufferPtr input;
15362
0
    xmlParserInputPtr stream;
15363
15364
0
    if (ioread == NULL)
15365
0
        return (NULL);
15366
0
    if (ctxt == NULL)
15367
0
        return (NULL);
15368
0
    xmlInitParser();
15369
15370
0
    xmlCtxtReset(ctxt);
15371
15372
0
    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15373
0
                                         XML_CHAR_ENCODING_NONE);
15374
0
    if (input == NULL) {
15375
0
        if (ioclose != NULL)
15376
0
            ioclose(ioctx);
15377
0
        return (NULL);
15378
0
    }
15379
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15380
0
    if (stream == NULL) {
15381
0
        xmlFreeParserInputBuffer(input);
15382
0
        return (NULL);
15383
0
    }
15384
0
    inputPush(ctxt, stream);
15385
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15386
0
}
15387