Coverage Report

Created: 2025-03-12 04:16

/src/libxml2/parser.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
 *            implemented on top of the SAX interfaces
4
 *
5
 * References:
6
 *   The XML specification:
7
 *     http://www.w3.org/TR/REC-xml
8
 *   Original 1.0 version:
9
 *     http://www.w3.org/TR/1998/REC-xml-19980210
10
 *   XML second edition working draft
11
 *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12
 *
13
 * Okay this is a big file, the parser core is around 7000 lines, then it
14
 * is followed by the progressive parser top routines, then the various
15
 * high level APIs to call the parser and a few miscellaneous functions.
16
 * A number of helper functions and deprecated ones have been moved to
17
 * parserInternals.c to reduce this file size.
18
 * As much as possible the functions are associated with their relative
19
 * production in the XML specification. A few productions defining the
20
 * different ranges of character are actually implanted either in
21
 * parserInternals.h or parserInternals.c
22
 * The DOM tree build is realized from the default SAX callbacks in
23
 * the module SAX.c.
24
 * The routines doing the validation checks are in valid.c and called either
25
 * from the SAX callbacks or as standalone functions using a preparsed
26
 * document.
27
 *
28
 * See Copyright for the status of this software.
29
 *
30
 * daniel@veillard.com
31
 */
32
33
/* To avoid EBCDIC trouble when parsing on zOS */
34
#if defined(__MVS__)
35
#pragma convert("ISO8859-1")
36
#endif
37
38
#define IN_LIBXML
39
#include "libxml.h"
40
41
#if defined(_WIN32)
42
#define XML_DIR_SEP '\\'
43
#else
44
#define XML_DIR_SEP '/'
45
#endif
46
47
#include <stdlib.h>
48
#include <limits.h>
49
#include <string.h>
50
#include <stdarg.h>
51
#include <stddef.h>
52
#include <ctype.h>
53
#include <stdlib.h>
54
#include <libxml/xmlmemory.h>
55
#include <libxml/threads.h>
56
#include <libxml/globals.h>
57
#include <libxml/tree.h>
58
#include <libxml/parser.h>
59
#include <libxml/parserInternals.h>
60
#include <libxml/HTMLparser.h>
61
#include <libxml/valid.h>
62
#include <libxml/entities.h>
63
#include <libxml/xmlerror.h>
64
#include <libxml/encoding.h>
65
#include <libxml/xmlIO.h>
66
#include <libxml/uri.h>
67
#ifdef LIBXML_CATALOG_ENABLED
68
#include <libxml/catalog.h>
69
#endif
70
#ifdef LIBXML_SCHEMAS_ENABLED
71
#include <libxml/xmlschemastypes.h>
72
#include <libxml/relaxng.h>
73
#endif
74
#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
75
#include <libxml/xpath.h>
76
#endif
77
78
#include "private/buf.h"
79
#include "private/dict.h"
80
#include "private/enc.h"
81
#include "private/entities.h"
82
#include "private/error.h"
83
#include "private/globals.h"
84
#include "private/html.h"
85
#include "private/io.h"
86
#include "private/memory.h"
87
#include "private/parser.h"
88
#include "private/threads.h"
89
#include "private/xpath.h"
90
91
struct _xmlStartTag {
92
    const xmlChar *prefix;
93
    const xmlChar *URI;
94
    int line;
95
    int nsNr;
96
};
97
98
static xmlParserCtxtPtr
99
xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
100
        const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
101
        xmlParserCtxtPtr pctx);
102
103
static void xmlHaltParser(xmlParserCtxtPtr ctxt);
104
105
static int
106
xmlParseElementStart(xmlParserCtxtPtr ctxt);
107
108
static void
109
xmlParseElementEnd(xmlParserCtxtPtr ctxt);
110
111
/************************************************************************
112
 *                  *
113
 *  Arbitrary limits set in the parser. See XML_PARSE_HUGE    *
114
 *                  *
115
 ************************************************************************/
116
117
783k
#define XML_MAX_HUGE_LENGTH 1000000000
118
119
#define XML_PARSER_BIG_ENTITY 1000
120
#define XML_PARSER_LOT_ENTITY 5000
121
122
/*
123
 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
124
 *    replacement over the size in byte of the input indicates that you have
125
 *    and exponential behaviour. A value of 10 correspond to at least 3 entity
126
 *    replacement per byte of input.
127
 */
128
180
#define XML_PARSER_NON_LINEAR 10
129
130
13.4M
#define XML_ENT_FIXED_COST 50
131
132
/**
133
 * xmlParserMaxDepth:
134
 *
135
 * arbitrary depth limit for the XML documents that we allow to
136
 * process. This is not a limitation of the parser but a safety
137
 * boundary feature. It can be disabled with the XML_PARSE_HUGE
138
 * parser option.
139
 */
140
unsigned int xmlParserMaxDepth = 256;
141
142
143
144
#define SAX2 1
145
14.3M
#define XML_PARSER_BIG_BUFFER_SIZE 300
146
2.03G
#define XML_PARSER_BUFFER_SIZE 100
147
106k
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
148
149
/**
150
 * XML_PARSER_CHUNK_SIZE
151
 *
152
 * When calling GROW that's the minimal amount of data
153
 * the parser expected to have received. It is not a hard
154
 * limit but an optimization when reading strings like Names
155
 * It is not strictly needed as long as inputs available characters
156
 * are followed by 0, which should be provided by the I/O level
157
 */
158
4.73M
#define XML_PARSER_CHUNK_SIZE 100
159
160
/*
161
 * List of XML prefixed PI allowed by W3C specs
162
 */
163
164
static const char* const xmlW3CPIs[] = {
165
    "xml-stylesheet",
166
    "xml-model",
167
    NULL
168
};
169
170
171
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
172
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
173
                                              const xmlChar **str);
174
175
static xmlParserErrors
176
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
177
                xmlSAXHandlerPtr sax,
178
          void *user_data, int depth, const xmlChar *URL,
179
          const xmlChar *ID, xmlNodePtr *list);
180
181
static int
182
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
183
                          const char *encoding);
184
#ifdef LIBXML_LEGACY_ENABLED
185
static void
186
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
187
                      xmlNodePtr lastNode);
188
#endif /* LIBXML_LEGACY_ENABLED */
189
190
static xmlParserErrors
191
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
192
          const xmlChar *string, void *user_data, xmlNodePtr *lst);
193
194
static int
195
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
196
197
/************************************************************************
198
 *                  *
199
 *    Some factorized error routines        *
200
 *                  *
201
 ************************************************************************/
202
203
/**
204
 * xmlErrAttributeDup:
205
 * @ctxt:  an XML parser context
206
 * @prefix:  the attribute prefix
207
 * @localname:  the attribute localname
208
 *
209
 * Handle a redefinition of attribute error
210
 */
211
static void
212
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
213
                   const xmlChar * localname)
214
1.83k
{
215
1.83k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
216
1.83k
        (ctxt->instate == XML_PARSER_EOF))
217
0
  return;
218
1.83k
    if (ctxt != NULL)
219
1.83k
  ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
220
221
1.83k
    if (prefix == NULL)
222
1.36k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
223
1.36k
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
224
1.36k
                        (const char *) localname, NULL, NULL, 0, 0,
225
1.36k
                        "Attribute %s redefined\n", localname);
226
461
    else
227
461
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
228
461
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
229
461
                        (const char *) prefix, (const char *) localname,
230
461
                        NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
231
461
                        localname);
232
1.83k
    if (ctxt != NULL) {
233
1.83k
  ctxt->wellFormed = 0;
234
1.83k
  if (ctxt->recovery == 0)
235
958
      ctxt->disableSAX = 1;
236
1.83k
    }
237
1.83k
}
238
239
/**
240
 * xmlFatalErr:
241
 * @ctxt:  an XML parser context
242
 * @error:  the error number
243
 * @extra:  extra information string
244
 *
245
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
246
 */
247
static void
248
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
249
174k
{
250
174k
    const char *errmsg;
251
252
174k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
253
174k
        (ctxt->instate == XML_PARSER_EOF))
254
2.66k
  return;
255
172k
    switch (error) {
256
1.71k
        case XML_ERR_INVALID_HEX_CHARREF:
257
1.71k
            errmsg = "CharRef: invalid hexadecimal value";
258
1.71k
            break;
259
3.73k
        case XML_ERR_INVALID_DEC_CHARREF:
260
3.73k
            errmsg = "CharRef: invalid decimal value";
261
3.73k
            break;
262
0
        case XML_ERR_INVALID_CHARREF:
263
0
            errmsg = "CharRef: invalid value";
264
0
            break;
265
66.3k
        case XML_ERR_INTERNAL_ERROR:
266
66.3k
            errmsg = "internal error";
267
66.3k
            break;
268
0
        case XML_ERR_PEREF_AT_EOF:
269
0
            errmsg = "PEReference at end of document";
270
0
            break;
271
0
        case XML_ERR_PEREF_IN_PROLOG:
272
0
            errmsg = "PEReference in prolog";
273
0
            break;
274
0
        case XML_ERR_PEREF_IN_EPILOG:
275
0
            errmsg = "PEReference in epilog";
276
0
            break;
277
0
        case XML_ERR_PEREF_NO_NAME:
278
0
            errmsg = "PEReference: no name";
279
0
            break;
280
657
        case XML_ERR_PEREF_SEMICOL_MISSING:
281
657
            errmsg = "PEReference: expecting ';'";
282
657
            break;
283
211
        case XML_ERR_ENTITY_LOOP:
284
211
            errmsg = "Detected an entity reference loop";
285
211
            break;
286
0
        case XML_ERR_ENTITY_NOT_STARTED:
287
0
            errmsg = "EntityValue: \" or ' expected";
288
0
            break;
289
888
        case XML_ERR_ENTITY_PE_INTERNAL:
290
888
            errmsg = "PEReferences forbidden in internal subset";
291
888
            break;
292
531
        case XML_ERR_ENTITY_NOT_FINISHED:
293
531
            errmsg = "EntityValue: \" or ' expected";
294
531
            break;
295
4.20k
        case XML_ERR_ATTRIBUTE_NOT_STARTED:
296
4.20k
            errmsg = "AttValue: \" or ' expected";
297
4.20k
            break;
298
9.62k
        case XML_ERR_LT_IN_ATTRIBUTE:
299
9.62k
            errmsg = "Unescaped '<' not allowed in attributes values";
300
9.62k
            break;
301
921
        case XML_ERR_LITERAL_NOT_STARTED:
302
921
            errmsg = "SystemLiteral \" or ' expected";
303
921
            break;
304
1.27k
        case XML_ERR_LITERAL_NOT_FINISHED:
305
1.27k
            errmsg = "Unfinished System or Public ID \" or ' expected";
306
1.27k
            break;
307
2.10k
        case XML_ERR_MISPLACED_CDATA_END:
308
2.10k
            errmsg = "Sequence ']]>' not allowed in content";
309
2.10k
            break;
310
816
        case XML_ERR_URI_REQUIRED:
311
816
            errmsg = "SYSTEM or PUBLIC, the URI is missing";
312
816
            break;
313
108
        case XML_ERR_PUBID_REQUIRED:
314
108
            errmsg = "PUBLIC, the Public Identifier is missing";
315
108
            break;
316
4.00k
        case XML_ERR_HYPHEN_IN_COMMENT:
317
4.00k
            errmsg = "Comment must not contain '--' (double-hyphen)";
318
4.00k
            break;
319
685
        case XML_ERR_PI_NOT_STARTED:
320
685
            errmsg = "xmlParsePI : no target name";
321
685
            break;
322
152
        case XML_ERR_RESERVED_XML_NAME:
323
152
            errmsg = "Invalid PI name";
324
152
            break;
325
89
        case XML_ERR_NOTATION_NOT_STARTED:
326
89
            errmsg = "NOTATION: Name expected here";
327
89
            break;
328
235
        case XML_ERR_NOTATION_NOT_FINISHED:
329
235
            errmsg = "'>' required to close NOTATION declaration";
330
235
            break;
331
1.06k
        case XML_ERR_VALUE_REQUIRED:
332
1.06k
            errmsg = "Entity value required";
333
1.06k
            break;
334
155
        case XML_ERR_URI_FRAGMENT:
335
155
            errmsg = "Fragment not allowed";
336
155
            break;
337
1.03k
        case XML_ERR_ATTLIST_NOT_STARTED:
338
1.03k
            errmsg = "'(' required to start ATTLIST enumeration";
339
1.03k
            break;
340
62
        case XML_ERR_NMTOKEN_REQUIRED:
341
62
            errmsg = "NmToken expected in ATTLIST enumeration";
342
62
            break;
343
237
        case XML_ERR_ATTLIST_NOT_FINISHED:
344
237
            errmsg = "')' required to finish ATTLIST enumeration";
345
237
            break;
346
228
        case XML_ERR_MIXED_NOT_STARTED:
347
228
            errmsg = "MixedContentDecl : '|' or ')*' expected";
348
228
            break;
349
0
        case XML_ERR_PCDATA_REQUIRED:
350
0
            errmsg = "MixedContentDecl : '#PCDATA' expected";
351
0
            break;
352
798
        case XML_ERR_ELEMCONTENT_NOT_STARTED:
353
798
            errmsg = "ContentDecl : Name or '(' expected";
354
798
            break;
355
923
        case XML_ERR_ELEMCONTENT_NOT_FINISHED:
356
923
            errmsg = "ContentDecl : ',' '|' or ')' expected";
357
923
            break;
358
0
        case XML_ERR_PEREF_IN_INT_SUBSET:
359
0
            errmsg =
360
0
                "PEReference: forbidden within markup decl in internal subset";
361
0
            break;
362
14.5k
        case XML_ERR_GT_REQUIRED:
363
14.5k
            errmsg = "expected '>'";
364
14.5k
            break;
365
41
        case XML_ERR_CONDSEC_INVALID:
366
41
            errmsg = "XML conditional section '[' expected";
367
41
            break;
368
1.42k
        case XML_ERR_EXT_SUBSET_NOT_FINISHED:
369
1.42k
            errmsg = "Content error in the external subset";
370
1.42k
            break;
371
257
        case XML_ERR_CONDSEC_INVALID_KEYWORD:
372
257
            errmsg =
373
257
                "conditional section INCLUDE or IGNORE keyword expected";
374
257
            break;
375
258
        case XML_ERR_CONDSEC_NOT_FINISHED:
376
258
            errmsg = "XML conditional section not closed";
377
258
            break;
378
80
        case XML_ERR_XMLDECL_NOT_STARTED:
379
80
            errmsg = "Text declaration '<?xml' required";
380
80
            break;
381
7.97k
        case XML_ERR_XMLDECL_NOT_FINISHED:
382
7.97k
            errmsg = "parsing XML declaration: '?>' expected";
383
7.97k
            break;
384
0
        case XML_ERR_EXT_ENTITY_STANDALONE:
385
0
            errmsg = "external parsed entities cannot be standalone";
386
0
            break;
387
8.75k
        case XML_ERR_ENTITYREF_SEMICOL_MISSING:
388
8.75k
            errmsg = "EntityRef: expecting ';'";
389
8.75k
            break;
390
1.50k
        case XML_ERR_DOCTYPE_NOT_FINISHED:
391
1.50k
            errmsg = "DOCTYPE improperly terminated";
392
1.50k
            break;
393
0
        case XML_ERR_LTSLASH_REQUIRED:
394
0
            errmsg = "EndTag: '</' not found";
395
0
            break;
396
574
        case XML_ERR_EQUAL_REQUIRED:
397
574
            errmsg = "expected '='";
398
574
            break;
399
2.39k
        case XML_ERR_STRING_NOT_CLOSED:
400
2.39k
            errmsg = "String not closed expecting \" or '";
401
2.39k
            break;
402
519
        case XML_ERR_STRING_NOT_STARTED:
403
519
            errmsg = "String not started expecting ' or \"";
404
519
            break;
405
108
        case XML_ERR_ENCODING_NAME:
406
108
            errmsg = "Invalid XML encoding name";
407
108
            break;
408
237
        case XML_ERR_STANDALONE_VALUE:
409
237
            errmsg = "standalone accepts only 'yes' or 'no'";
410
237
            break;
411
5.07k
        case XML_ERR_DOCUMENT_EMPTY:
412
5.07k
            errmsg = "Document is empty";
413
5.07k
            break;
414
19.5k
        case XML_ERR_DOCUMENT_END:
415
19.5k
            errmsg = "Extra content at the end of the document";
416
19.5k
            break;
417
1.84k
        case XML_ERR_NOT_WELL_BALANCED:
418
1.84k
            errmsg = "chunk is not well balanced";
419
1.84k
            break;
420
0
        case XML_ERR_EXTRA_CONTENT:
421
0
            errmsg = "extra content at the end of well balanced chunk";
422
0
            break;
423
3.96k
        case XML_ERR_VERSION_MISSING:
424
3.96k
            errmsg = "Malformed declaration expecting version";
425
3.96k
            break;
426
3
        case XML_ERR_NAME_TOO_LONG:
427
3
            errmsg = "Name too long";
428
3
            break;
429
#if 0
430
        case:
431
            errmsg = "";
432
            break;
433
#endif
434
131
        default:
435
131
            errmsg = "Unregistered error message";
436
172k
    }
437
172k
    if (ctxt != NULL)
438
172k
  ctxt->errNo = error;
439
172k
    if (info == NULL) {
440
105k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
441
105k
                        XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
442
105k
                        errmsg);
443
105k
    } else {
444
66.3k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
445
66.3k
                        XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
446
66.3k
                        errmsg, info);
447
66.3k
    }
448
172k
    if (ctxt != NULL) {
449
172k
  ctxt->wellFormed = 0;
450
172k
  if (ctxt->recovery == 0)
451
44.8k
      ctxt->disableSAX = 1;
452
172k
    }
453
172k
}
454
455
/**
456
 * xmlFatalErrMsg:
457
 * @ctxt:  an XML parser context
458
 * @error:  the error number
459
 * @msg:  the error message
460
 *
461
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
462
 */
463
static void LIBXML_ATTR_FORMAT(3,0)
464
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
465
               const char *msg)
466
186k
{
467
186k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
468
186k
        (ctxt->instate == XML_PARSER_EOF))
469
0
  return;
470
186k
    if (ctxt != NULL)
471
186k
  ctxt->errNo = error;
472
186k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
473
186k
                    XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
474
186k
    if (ctxt != NULL) {
475
186k
  ctxt->wellFormed = 0;
476
186k
  if (ctxt->recovery == 0)
477
56.8k
      ctxt->disableSAX = 1;
478
186k
    }
479
186k
}
480
481
/**
482
 * xmlWarningMsg:
483
 * @ctxt:  an XML parser context
484
 * @error:  the error number
485
 * @msg:  the error message
486
 * @str1:  extra data
487
 * @str2:  extra data
488
 *
489
 * Handle a warning.
490
 */
491
static void LIBXML_ATTR_FORMAT(3,0)
492
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
493
              const char *msg, const xmlChar *str1, const xmlChar *str2)
494
27.8k
{
495
27.8k
    xmlStructuredErrorFunc schannel = NULL;
496
497
27.8k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
498
27.8k
        (ctxt->instate == XML_PARSER_EOF))
499
0
  return;
500
27.8k
    if ((ctxt != NULL) && (ctxt->sax != NULL) &&
501
27.8k
        (ctxt->sax->initialized == XML_SAX2_MAGIC))
502
22.3k
        schannel = ctxt->sax->serror;
503
27.8k
    if (ctxt != NULL) {
504
27.8k
        __xmlRaiseError(schannel,
505
27.8k
                    (ctxt->sax) ? ctxt->sax->warning : NULL,
506
27.8k
                    ctxt->userData,
507
27.8k
                    ctxt, NULL, XML_FROM_PARSER, error,
508
27.8k
                    XML_ERR_WARNING, NULL, 0,
509
27.8k
        (const char *) str1, (const char *) str2, NULL, 0, 0,
510
27.8k
        msg, (const char *) str1, (const char *) str2);
511
27.8k
    } else {
512
0
        __xmlRaiseError(schannel, NULL, NULL,
513
0
                    ctxt, NULL, XML_FROM_PARSER, error,
514
0
                    XML_ERR_WARNING, NULL, 0,
515
0
        (const char *) str1, (const char *) str2, NULL, 0, 0,
516
0
        msg, (const char *) str1, (const char *) str2);
517
0
    }
518
27.8k
}
519
520
/**
521
 * xmlValidityError:
522
 * @ctxt:  an XML parser context
523
 * @error:  the error number
524
 * @msg:  the error message
525
 * @str1:  extra data
526
 *
527
 * Handle a validity error.
528
 */
529
static void LIBXML_ATTR_FORMAT(3,0)
530
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
531
              const char *msg, const xmlChar *str1, const xmlChar *str2)
532
2.90k
{
533
2.90k
    xmlStructuredErrorFunc schannel = NULL;
534
535
2.90k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
536
2.90k
        (ctxt->instate == XML_PARSER_EOF))
537
0
  return;
538
2.90k
    if (ctxt != NULL) {
539
2.90k
  ctxt->errNo = error;
540
2.90k
  if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
541
1.29k
      schannel = ctxt->sax->serror;
542
2.90k
    }
543
2.90k
    if (ctxt != NULL) {
544
2.90k
        __xmlRaiseError(schannel,
545
2.90k
                    ctxt->vctxt.error, ctxt->vctxt.userData,
546
2.90k
                    ctxt, NULL, XML_FROM_DTD, error,
547
2.90k
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
548
2.90k
        (const char *) str2, NULL, 0, 0,
549
2.90k
        msg, (const char *) str1, (const char *) str2);
550
2.90k
  ctxt->valid = 0;
551
2.90k
    } else {
552
0
        __xmlRaiseError(schannel, NULL, NULL,
553
0
                    ctxt, NULL, XML_FROM_DTD, error,
554
0
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
555
0
        (const char *) str2, NULL, 0, 0,
556
0
        msg, (const char *) str1, (const char *) str2);
557
0
    }
558
2.90k
}
559
560
/**
561
 * xmlFatalErrMsgInt:
562
 * @ctxt:  an XML parser context
563
 * @error:  the error number
564
 * @msg:  the error message
565
 * @val:  an integer value
566
 *
567
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
568
 */
569
static void LIBXML_ATTR_FORMAT(3,0)
570
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
571
                  const char *msg, int val)
572
109k
{
573
109k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
574
109k
        (ctxt->instate == XML_PARSER_EOF))
575
0
  return;
576
109k
    if (ctxt != NULL)
577
109k
  ctxt->errNo = error;
578
109k
    __xmlRaiseError(NULL, NULL, NULL,
579
109k
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
580
109k
                    NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
581
109k
    if (ctxt != NULL) {
582
109k
  ctxt->wellFormed = 0;
583
109k
  if (ctxt->recovery == 0)
584
16.1k
      ctxt->disableSAX = 1;
585
109k
    }
586
109k
}
587
588
/**
589
 * xmlFatalErrMsgStrIntStr:
590
 * @ctxt:  an XML parser context
591
 * @error:  the error number
592
 * @msg:  the error message
593
 * @str1:  an string info
594
 * @val:  an integer value
595
 * @str2:  an string info
596
 *
597
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
598
 */
599
static void LIBXML_ATTR_FORMAT(3,0)
600
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
601
                  const char *msg, const xmlChar *str1, int val,
602
      const xmlChar *str2)
603
69.7k
{
604
69.7k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
605
69.7k
        (ctxt->instate == XML_PARSER_EOF))
606
0
  return;
607
69.7k
    if (ctxt != NULL)
608
69.7k
  ctxt->errNo = error;
609
69.7k
    __xmlRaiseError(NULL, NULL, NULL,
610
69.7k
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
611
69.7k
                    NULL, 0, (const char *) str1, (const char *) str2,
612
69.7k
        NULL, val, 0, msg, str1, val, str2);
613
69.7k
    if (ctxt != NULL) {
614
69.7k
  ctxt->wellFormed = 0;
615
69.7k
  if (ctxt->recovery == 0)
616
19.8k
      ctxt->disableSAX = 1;
617
69.7k
    }
618
69.7k
}
619
620
/**
621
 * xmlFatalErrMsgStr:
622
 * @ctxt:  an XML parser context
623
 * @error:  the error number
624
 * @msg:  the error message
625
 * @val:  a string value
626
 *
627
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
628
 */
629
static void LIBXML_ATTR_FORMAT(3,0)
630
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
631
                  const char *msg, const xmlChar * val)
632
277k
{
633
277k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
634
277k
        (ctxt->instate == XML_PARSER_EOF))
635
0
  return;
636
277k
    if (ctxt != NULL)
637
277k
  ctxt->errNo = error;
638
277k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
639
277k
                    XML_FROM_PARSER, error, XML_ERR_FATAL,
640
277k
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
641
277k
                    val);
642
277k
    if (ctxt != NULL) {
643
277k
  ctxt->wellFormed = 0;
644
277k
  if (ctxt->recovery == 0)
645
92.2k
      ctxt->disableSAX = 1;
646
277k
    }
647
277k
}
648
649
/**
650
 * xmlErrMsgStr:
651
 * @ctxt:  an XML parser context
652
 * @error:  the error number
653
 * @msg:  the error message
654
 * @val:  a string value
655
 *
656
 * Handle a non fatal parser error
657
 */
658
static void LIBXML_ATTR_FORMAT(3,0)
659
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
660
                  const char *msg, const xmlChar * val)
661
85.7k
{
662
85.7k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
663
85.7k
        (ctxt->instate == XML_PARSER_EOF))
664
0
  return;
665
85.7k
    if (ctxt != NULL)
666
85.7k
  ctxt->errNo = error;
667
85.7k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
668
85.7k
                    XML_FROM_PARSER, error, XML_ERR_ERROR,
669
85.7k
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
670
85.7k
                    val);
671
85.7k
}
672
673
/**
674
 * xmlNsErr:
675
 * @ctxt:  an XML parser context
676
 * @error:  the error number
677
 * @msg:  the message
678
 * @info1:  extra information string
679
 * @info2:  extra information string
680
 *
681
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
682
 */
683
static void LIBXML_ATTR_FORMAT(3,0)
684
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
685
         const char *msg,
686
         const xmlChar * info1, const xmlChar * info2,
687
         const xmlChar * info3)
688
48.2k
{
689
48.2k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
690
48.2k
        (ctxt->instate == XML_PARSER_EOF))
691
0
  return;
692
48.2k
    if (ctxt != NULL)
693
48.2k
  ctxt->errNo = error;
694
48.2k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
695
48.2k
                    XML_ERR_ERROR, NULL, 0, (const char *) info1,
696
48.2k
                    (const char *) info2, (const char *) info3, 0, 0, msg,
697
48.2k
                    info1, info2, info3);
698
48.2k
    if (ctxt != NULL)
699
48.2k
  ctxt->nsWellFormed = 0;
700
48.2k
}
701
702
/**
703
 * xmlNsWarn
704
 * @ctxt:  an XML parser context
705
 * @error:  the error number
706
 * @msg:  the message
707
 * @info1:  extra information string
708
 * @info2:  extra information string
709
 *
710
 * Handle a namespace warning error
711
 */
712
static void LIBXML_ATTR_FORMAT(3,0)
713
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
714
         const char *msg,
715
         const xmlChar * info1, const xmlChar * info2,
716
         const xmlChar * info3)
717
610
{
718
610
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
719
610
        (ctxt->instate == XML_PARSER_EOF))
720
0
  return;
721
610
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
722
610
                    XML_ERR_WARNING, NULL, 0, (const char *) info1,
723
610
                    (const char *) info2, (const char *) info3, 0, 0, msg,
724
610
                    info1, info2, info3);
725
610
}
726
727
static void
728
48.9M
xmlSaturatedAdd(unsigned long *dst, unsigned long val) {
729
48.9M
    if (val > ULONG_MAX - *dst)
730
0
        *dst = ULONG_MAX;
731
48.9M
    else
732
48.9M
        *dst += val;
733
48.9M
}
734
735
static void
736
13.5M
xmlSaturatedAddSizeT(unsigned long *dst, unsigned long val) {
737
13.5M
    if (val > ULONG_MAX - *dst)
738
0
        *dst = ULONG_MAX;
739
13.5M
    else
740
13.5M
        *dst += val;
741
13.5M
}
742
743
/**
744
 * xmlParserEntityCheck:
745
 * @ctxt:  parser context
746
 * @extra:  sum of unexpanded entity sizes
747
 *
748
 * Check for non-linear entity expansion behaviour.
749
 *
750
 * In some cases like xmlStringDecodeEntities, this function is called
751
 * for each, possibly nested entity and its unexpanded content length.
752
 *
753
 * In other cases like xmlParseReference, it's only called for each
754
 * top-level entity with its unexpanded content length plus the sum of
755
 * the unexpanded content lengths (plus fixed cost) of all nested
756
 * entities.
757
 *
758
 * Summing the unexpanded lengths also adds the length of the reference.
759
 * This is by design. Taking the length of the entity name into account
760
 * discourages attacks that try to waste CPU time with abusively long
761
 * entity names. See test/recurse/lol6.xml for example. Each call also
762
 * adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with
763
 * short entities.
764
 *
765
 * Returns 1 on error, 0 on success.
766
 */
767
static int
768
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long extra)
769
13.4M
{
770
13.4M
    unsigned long consumed;
771
13.4M
    xmlParserInputPtr input = ctxt->input;
772
13.4M
    xmlEntityPtr entity = input->entity;
773
774
    /*
775
     * Compute total consumed bytes so far, including input streams of
776
     * external entities.
777
     */
778
13.4M
    consumed = input->parentConsumed;
779
13.4M
    if ((entity == NULL) ||
780
13.4M
        ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
781
8.47M
         ((entity->flags & XML_ENT_PARSED) == 0))) {
782
8.47M
        xmlSaturatedAdd(&consumed, input->consumed);
783
8.47M
        xmlSaturatedAddSizeT(&consumed, input->cur - input->base);
784
8.47M
    }
785
13.4M
    xmlSaturatedAdd(&consumed, ctxt->sizeentities);
786
787
    /*
788
     * Add extra cost and some fixed cost.
789
     */
790
13.4M
    xmlSaturatedAdd(&ctxt->sizeentcopy, extra);
791
13.4M
    xmlSaturatedAdd(&ctxt->sizeentcopy, XML_ENT_FIXED_COST);
792
793
    /*
794
     * It's important to always use saturation arithmetic when tracking
795
     * entity sizes to make the size checks reliable. If "sizeentcopy"
796
     * overflows, we have to abort.
797
     */
798
13.4M
    if ((ctxt->sizeentcopy > XML_MAX_TEXT_LENGTH) &&
799
13.4M
        ((ctxt->sizeentcopy >= ULONG_MAX) ||
800
180
         (ctxt->sizeentcopy / XML_PARSER_NON_LINEAR > consumed))) {
801
180
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP,
802
180
                       "Maximum entity amplification factor exceeded");
803
180
        xmlHaltParser(ctxt);
804
180
        return(1);
805
180
    }
806
807
13.4M
    return(0);
808
13.4M
}
809
810
/************************************************************************
811
 *                  *
812
 *    Library wide options          *
813
 *                  *
814
 ************************************************************************/
815
816
/**
817
  * xmlHasFeature:
818
  * @feature: the feature to be examined
819
  *
820
  * Examines if the library has been compiled with a given feature.
821
  *
822
  * Returns a non-zero value if the feature exist, otherwise zero.
823
  * Returns zero (0) if the feature does not exist or an unknown
824
  * unknown feature is requested, non-zero otherwise.
825
  */
826
int
827
xmlHasFeature(xmlFeature feature)
828
0
{
829
0
    switch (feature) {
830
0
  case XML_WITH_THREAD:
831
0
#ifdef LIBXML_THREAD_ENABLED
832
0
      return(1);
833
#else
834
      return(0);
835
#endif
836
0
        case XML_WITH_TREE:
837
0
#ifdef LIBXML_TREE_ENABLED
838
0
            return(1);
839
#else
840
            return(0);
841
#endif
842
0
        case XML_WITH_OUTPUT:
843
0
#ifdef LIBXML_OUTPUT_ENABLED
844
0
            return(1);
845
#else
846
            return(0);
847
#endif
848
0
        case XML_WITH_PUSH:
849
0
#ifdef LIBXML_PUSH_ENABLED
850
0
            return(1);
851
#else
852
            return(0);
853
#endif
854
0
        case XML_WITH_READER:
855
0
#ifdef LIBXML_READER_ENABLED
856
0
            return(1);
857
#else
858
            return(0);
859
#endif
860
0
        case XML_WITH_PATTERN:
861
0
#ifdef LIBXML_PATTERN_ENABLED
862
0
            return(1);
863
#else
864
            return(0);
865
#endif
866
0
        case XML_WITH_WRITER:
867
0
#ifdef LIBXML_WRITER_ENABLED
868
0
            return(1);
869
#else
870
            return(0);
871
#endif
872
0
        case XML_WITH_SAX1:
873
0
#ifdef LIBXML_SAX1_ENABLED
874
0
            return(1);
875
#else
876
            return(0);
877
#endif
878
0
        case XML_WITH_FTP:
879
#ifdef LIBXML_FTP_ENABLED
880
            return(1);
881
#else
882
0
            return(0);
883
0
#endif
884
0
        case XML_WITH_HTTP:
885
#ifdef LIBXML_HTTP_ENABLED
886
            return(1);
887
#else
888
0
            return(0);
889
0
#endif
890
0
        case XML_WITH_VALID:
891
0
#ifdef LIBXML_VALID_ENABLED
892
0
            return(1);
893
#else
894
            return(0);
895
#endif
896
0
        case XML_WITH_HTML:
897
0
#ifdef LIBXML_HTML_ENABLED
898
0
            return(1);
899
#else
900
            return(0);
901
#endif
902
0
        case XML_WITH_LEGACY:
903
#ifdef LIBXML_LEGACY_ENABLED
904
            return(1);
905
#else
906
0
            return(0);
907
0
#endif
908
0
        case XML_WITH_C14N:
909
0
#ifdef LIBXML_C14N_ENABLED
910
0
            return(1);
911
#else
912
            return(0);
913
#endif
914
0
        case XML_WITH_CATALOG:
915
0
#ifdef LIBXML_CATALOG_ENABLED
916
0
            return(1);
917
#else
918
            return(0);
919
#endif
920
0
        case XML_WITH_XPATH:
921
0
#ifdef LIBXML_XPATH_ENABLED
922
0
            return(1);
923
#else
924
            return(0);
925
#endif
926
0
        case XML_WITH_XPTR:
927
0
#ifdef LIBXML_XPTR_ENABLED
928
0
            return(1);
929
#else
930
            return(0);
931
#endif
932
0
        case XML_WITH_XINCLUDE:
933
0
#ifdef LIBXML_XINCLUDE_ENABLED
934
0
            return(1);
935
#else
936
            return(0);
937
#endif
938
0
        case XML_WITH_ICONV:
939
0
#ifdef LIBXML_ICONV_ENABLED
940
0
            return(1);
941
#else
942
            return(0);
943
#endif
944
0
        case XML_WITH_ISO8859X:
945
0
#ifdef LIBXML_ISO8859X_ENABLED
946
0
            return(1);
947
#else
948
            return(0);
949
#endif
950
0
        case XML_WITH_UNICODE:
951
0
#ifdef LIBXML_UNICODE_ENABLED
952
0
            return(1);
953
#else
954
            return(0);
955
#endif
956
0
        case XML_WITH_REGEXP:
957
0
#ifdef LIBXML_REGEXP_ENABLED
958
0
            return(1);
959
#else
960
            return(0);
961
#endif
962
0
        case XML_WITH_AUTOMATA:
963
0
#ifdef LIBXML_AUTOMATA_ENABLED
964
0
            return(1);
965
#else
966
            return(0);
967
#endif
968
0
        case XML_WITH_EXPR:
969
#ifdef LIBXML_EXPR_ENABLED
970
            return(1);
971
#else
972
0
            return(0);
973
0
#endif
974
0
        case XML_WITH_SCHEMAS:
975
0
#ifdef LIBXML_SCHEMAS_ENABLED
976
0
            return(1);
977
#else
978
            return(0);
979
#endif
980
0
        case XML_WITH_SCHEMATRON:
981
0
#ifdef LIBXML_SCHEMATRON_ENABLED
982
0
            return(1);
983
#else
984
            return(0);
985
#endif
986
0
        case XML_WITH_MODULES:
987
0
#ifdef LIBXML_MODULES_ENABLED
988
0
            return(1);
989
#else
990
            return(0);
991
#endif
992
0
        case XML_WITH_DEBUG:
993
#ifdef LIBXML_DEBUG_ENABLED
994
            return(1);
995
#else
996
0
            return(0);
997
0
#endif
998
0
        case XML_WITH_DEBUG_MEM:
999
#ifdef DEBUG_MEMORY_LOCATION
1000
            return(1);
1001
#else
1002
0
            return(0);
1003
0
#endif
1004
0
        case XML_WITH_DEBUG_RUN:
1005
0
            return(0);
1006
0
        case XML_WITH_ZLIB:
1007
0
#ifdef LIBXML_ZLIB_ENABLED
1008
0
            return(1);
1009
#else
1010
            return(0);
1011
#endif
1012
0
        case XML_WITH_LZMA:
1013
0
#ifdef LIBXML_LZMA_ENABLED
1014
0
            return(1);
1015
#else
1016
            return(0);
1017
#endif
1018
0
        case XML_WITH_ICU:
1019
#ifdef LIBXML_ICU_ENABLED
1020
            return(1);
1021
#else
1022
0
            return(0);
1023
0
#endif
1024
0
        default:
1025
0
      break;
1026
0
     }
1027
0
     return(0);
1028
0
}
1029
1030
/************************************************************************
1031
 *                  *
1032
 *    SAX2 defaulted attributes handling      *
1033
 *                  *
1034
 ************************************************************************/
1035
1036
/**
1037
 * xmlDetectSAX2:
1038
 * @ctxt:  an XML parser context
1039
 *
1040
 * Do the SAX2 detection and specific initialization
1041
 */
1042
static void
1043
159k
xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1044
159k
    xmlSAXHandlerPtr sax;
1045
1046
    /* Avoid unused variable warning if features are disabled. */
1047
159k
    (void) sax;
1048
1049
159k
    if (ctxt == NULL) return;
1050
159k
    sax = ctxt->sax;
1051
159k
#ifdef LIBXML_SAX1_ENABLED
1052
159k
    if ((sax) &&  (sax->initialized == XML_SAX2_MAGIC) &&
1053
159k
        ((sax->startElementNs != NULL) ||
1054
99.4k
         (sax->endElementNs != NULL) ||
1055
99.4k
         ((sax->startElement == NULL) && (sax->endElement == NULL))))
1056
99.4k
        ctxt->sax2 = 1;
1057
#else
1058
    ctxt->sax2 = 1;
1059
#endif /* LIBXML_SAX1_ENABLED */
1060
1061
159k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1062
159k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1063
159k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1064
159k
    if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1065
159k
    (ctxt->str_xml_ns == NULL)) {
1066
0
        xmlErrMemory(ctxt, NULL);
1067
0
    }
1068
159k
}
1069
1070
typedef struct _xmlDefAttrs xmlDefAttrs;
1071
typedef xmlDefAttrs *xmlDefAttrsPtr;
1072
struct _xmlDefAttrs {
1073
    int nbAttrs;  /* number of defaulted attributes on that element */
1074
    int maxAttrs;       /* the size of the array */
1075
#if __STDC_VERSION__ >= 199901L
1076
    /* Using a C99 flexible array member avoids UBSan errors. */
1077
    const xmlChar *values[]; /* array of localname/prefix/values/external */
1078
#else
1079
    const xmlChar *values[5];
1080
#endif
1081
};
1082
1083
/**
1084
 * xmlAttrNormalizeSpace:
1085
 * @src: the source string
1086
 * @dst: the target string
1087
 *
1088
 * Normalize the space in non CDATA attribute values:
1089
 * If the attribute type is not CDATA, then the XML processor MUST further
1090
 * process the normalized attribute value by discarding any leading and
1091
 * trailing space (#x20) characters, and by replacing sequences of space
1092
 * (#x20) characters by a single space (#x20) character.
1093
 * Note that the size of dst need to be at least src, and if one doesn't need
1094
 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1095
 * passing src as dst is just fine.
1096
 *
1097
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1098
 *         is needed.
1099
 */
1100
static xmlChar *
1101
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1102
13.3k
{
1103
13.3k
    if ((src == NULL) || (dst == NULL))
1104
0
        return(NULL);
1105
1106
14.7k
    while (*src == 0x20) src++;
1107
183k
    while (*src != 0) {
1108
169k
  if (*src == 0x20) {
1109
43.8k
      while (*src == 0x20) src++;
1110
13.7k
      if (*src != 0)
1111
13.0k
    *dst++ = 0x20;
1112
155k
  } else {
1113
155k
      *dst++ = *src++;
1114
155k
  }
1115
169k
    }
1116
13.3k
    *dst = 0;
1117
13.3k
    if (dst == src)
1118
11.6k
       return(NULL);
1119
1.67k
    return(dst);
1120
13.3k
}
1121
1122
/**
1123
 * xmlAttrNormalizeSpace2:
1124
 * @src: the source string
1125
 *
1126
 * Normalize the space in non CDATA attribute values, a slightly more complex
1127
 * front end to avoid allocation problems when running on attribute values
1128
 * coming from the input.
1129
 *
1130
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1131
 *         is needed.
1132
 */
1133
static const xmlChar *
1134
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1135
3.90k
{
1136
3.90k
    int i;
1137
3.90k
    int remove_head = 0;
1138
3.90k
    int need_realloc = 0;
1139
3.90k
    const xmlChar *cur;
1140
1141
3.90k
    if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1142
0
        return(NULL);
1143
3.90k
    i = *len;
1144
3.90k
    if (i <= 0)
1145
157
        return(NULL);
1146
1147
3.74k
    cur = src;
1148
5.07k
    while (*cur == 0x20) {
1149
1.32k
        cur++;
1150
1.32k
  remove_head++;
1151
1.32k
    }
1152
188k
    while (*cur != 0) {
1153
185k
  if (*cur == 0x20) {
1154
8.98k
      cur++;
1155
8.98k
      if ((*cur == 0x20) || (*cur == 0)) {
1156
789
          need_realloc = 1;
1157
789
    break;
1158
789
      }
1159
8.98k
  } else
1160
176k
      cur++;
1161
185k
    }
1162
3.74k
    if (need_realloc) {
1163
789
        xmlChar *ret;
1164
1165
789
  ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1166
789
  if (ret == NULL) {
1167
0
      xmlErrMemory(ctxt, NULL);
1168
0
      return(NULL);
1169
0
  }
1170
789
  xmlAttrNormalizeSpace(ret, ret);
1171
789
  *len = strlen((const char *)ret);
1172
789
        return(ret);
1173
2.95k
    } else if (remove_head) {
1174
69
        *len -= remove_head;
1175
69
        memmove(src, src + remove_head, 1 + *len);
1176
69
  return(src);
1177
69
    }
1178
2.88k
    return(NULL);
1179
3.74k
}
1180
1181
/**
1182
 * xmlAddDefAttrs:
1183
 * @ctxt:  an XML parser context
1184
 * @fullname:  the element fullname
1185
 * @fullattr:  the attribute fullname
1186
 * @value:  the attribute value
1187
 *
1188
 * Add a defaulted attribute for an element
1189
 */
1190
static void
1191
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1192
               const xmlChar *fullname,
1193
               const xmlChar *fullattr,
1194
18.5k
               const xmlChar *value) {
1195
18.5k
    xmlDefAttrsPtr defaults;
1196
18.5k
    int len;
1197
18.5k
    const xmlChar *name;
1198
18.5k
    const xmlChar *prefix;
1199
1200
    /*
1201
     * Allows to detect attribute redefinitions
1202
     */
1203
18.5k
    if (ctxt->attsSpecial != NULL) {
1204
16.0k
        if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1205
284
      return;
1206
16.0k
    }
1207
1208
18.2k
    if (ctxt->attsDefault == NULL) {
1209
3.47k
        ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1210
3.47k
  if (ctxt->attsDefault == NULL)
1211
0
      goto mem_error;
1212
3.47k
    }
1213
1214
    /*
1215
     * split the element name into prefix:localname , the string found
1216
     * are within the DTD and then not associated to namespace names.
1217
     */
1218
18.2k
    name = xmlSplitQName3(fullname, &len);
1219
18.2k
    if (name == NULL) {
1220
17.2k
        name = xmlDictLookup(ctxt->dict, fullname, -1);
1221
17.2k
  prefix = NULL;
1222
17.2k
    } else {
1223
1.01k
        name = xmlDictLookup(ctxt->dict, name, -1);
1224
1.01k
  prefix = xmlDictLookup(ctxt->dict, fullname, len);
1225
1.01k
    }
1226
1227
    /*
1228
     * make sure there is some storage
1229
     */
1230
18.2k
    defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1231
18.2k
    if (defaults == NULL) {
1232
10.0k
        defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1233
10.0k
                     (4 * 5) * sizeof(const xmlChar *));
1234
10.0k
  if (defaults == NULL)
1235
0
      goto mem_error;
1236
10.0k
  defaults->nbAttrs = 0;
1237
10.0k
  defaults->maxAttrs = 4;
1238
10.0k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1239
10.0k
                          defaults, NULL) < 0) {
1240
0
      xmlFree(defaults);
1241
0
      goto mem_error;
1242
0
  }
1243
10.0k
    } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1244
270
        xmlDefAttrsPtr temp;
1245
1246
270
        temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1247
270
           (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1248
270
  if (temp == NULL)
1249
0
      goto mem_error;
1250
270
  defaults = temp;
1251
270
  defaults->maxAttrs *= 2;
1252
270
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1253
270
                          defaults, NULL) < 0) {
1254
0
      xmlFree(defaults);
1255
0
      goto mem_error;
1256
0
  }
1257
270
    }
1258
1259
    /*
1260
     * Split the element name into prefix:localname , the string found
1261
     * are within the DTD and hen not associated to namespace names.
1262
     */
1263
18.2k
    name = xmlSplitQName3(fullattr, &len);
1264
18.2k
    if (name == NULL) {
1265
15.0k
        name = xmlDictLookup(ctxt->dict, fullattr, -1);
1266
15.0k
  prefix = NULL;
1267
15.0k
    } else {
1268
3.15k
        name = xmlDictLookup(ctxt->dict, name, -1);
1269
3.15k
  prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1270
3.15k
    }
1271
1272
18.2k
    defaults->values[5 * defaults->nbAttrs] = name;
1273
18.2k
    defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1274
    /* intern the string and precompute the end */
1275
18.2k
    len = xmlStrlen(value);
1276
18.2k
    value = xmlDictLookup(ctxt->dict, value, len);
1277
18.2k
    if (value == NULL)
1278
0
        goto mem_error;
1279
18.2k
    defaults->values[5 * defaults->nbAttrs + 2] = value;
1280
18.2k
    defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1281
18.2k
    if (ctxt->external)
1282
6.90k
        defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1283
11.3k
    else
1284
11.3k
        defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1285
18.2k
    defaults->nbAttrs++;
1286
1287
18.2k
    return;
1288
1289
0
mem_error:
1290
0
    xmlErrMemory(ctxt, NULL);
1291
0
    return;
1292
18.2k
}
1293
1294
/**
1295
 * xmlAddSpecialAttr:
1296
 * @ctxt:  an XML parser context
1297
 * @fullname:  the element fullname
1298
 * @fullattr:  the attribute fullname
1299
 * @type:  the attribute type
1300
 *
1301
 * Register this attribute type
1302
 */
1303
static void
1304
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1305
      const xmlChar *fullname,
1306
      const xmlChar *fullattr,
1307
      int type)
1308
246k
{
1309
246k
    if (ctxt->attsSpecial == NULL) {
1310
7.73k
        ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1311
7.73k
  if (ctxt->attsSpecial == NULL)
1312
0
      goto mem_error;
1313
7.73k
    }
1314
1315
246k
    if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1316
5.09k
        return;
1317
1318
241k
    xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1319
241k
                     (void *) (ptrdiff_t) type);
1320
241k
    return;
1321
1322
0
mem_error:
1323
0
    xmlErrMemory(ctxt, NULL);
1324
0
    return;
1325
246k
}
1326
1327
/**
1328
 * xmlCleanSpecialAttrCallback:
1329
 *
1330
 * Removes CDATA attributes from the special attribute table
1331
 */
1332
static void
1333
xmlCleanSpecialAttrCallback(void *payload, void *data,
1334
                            const xmlChar *fullname, const xmlChar *fullattr,
1335
172k
                            const xmlChar *unused ATTRIBUTE_UNUSED) {
1336
172k
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1337
1338
172k
    if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1339
66.1k
        xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1340
66.1k
    }
1341
172k
}
1342
1343
/**
1344
 * xmlCleanSpecialAttr:
1345
 * @ctxt:  an XML parser context
1346
 *
1347
 * Trim the list of attributes defined to remove all those of type
1348
 * CDATA as they are not special. This call should be done when finishing
1349
 * to parse the DTD and before starting to parse the document root.
1350
 */
1351
static void
1352
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1353
36.1k
{
1354
36.1k
    if (ctxt->attsSpecial == NULL)
1355
30.2k
        return;
1356
1357
5.87k
    xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1358
1359
5.87k
    if (xmlHashSize(ctxt->attsSpecial) == 0) {
1360
1.25k
        xmlHashFree(ctxt->attsSpecial, NULL);
1361
1.25k
        ctxt->attsSpecial = NULL;
1362
1.25k
    }
1363
5.87k
    return;
1364
36.1k
}
1365
1366
/**
1367
 * xmlCheckLanguageID:
1368
 * @lang:  pointer to the string value
1369
 *
1370
 * Checks that the value conforms to the LanguageID production:
1371
 *
1372
 * NOTE: this is somewhat deprecated, those productions were removed from
1373
 *       the XML Second edition.
1374
 *
1375
 * [33] LanguageID ::= Langcode ('-' Subcode)*
1376
 * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1377
 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1378
 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1379
 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1380
 * [38] Subcode ::= ([a-z] | [A-Z])+
1381
 *
1382
 * The current REC reference the successors of RFC 1766, currently 5646
1383
 *
1384
 * http://www.rfc-editor.org/rfc/rfc5646.txt
1385
 * langtag       = language
1386
 *                 ["-" script]
1387
 *                 ["-" region]
1388
 *                 *("-" variant)
1389
 *                 *("-" extension)
1390
 *                 ["-" privateuse]
1391
 * language      = 2*3ALPHA            ; shortest ISO 639 code
1392
 *                 ["-" extlang]       ; sometimes followed by
1393
 *                                     ; extended language subtags
1394
 *               / 4ALPHA              ; or reserved for future use
1395
 *               / 5*8ALPHA            ; or registered language subtag
1396
 *
1397
 * extlang       = 3ALPHA              ; selected ISO 639 codes
1398
 *                 *2("-" 3ALPHA)      ; permanently reserved
1399
 *
1400
 * script        = 4ALPHA              ; ISO 15924 code
1401
 *
1402
 * region        = 2ALPHA              ; ISO 3166-1 code
1403
 *               / 3DIGIT              ; UN M.49 code
1404
 *
1405
 * variant       = 5*8alphanum         ; registered variants
1406
 *               / (DIGIT 3alphanum)
1407
 *
1408
 * extension     = singleton 1*("-" (2*8alphanum))
1409
 *
1410
 *                                     ; Single alphanumerics
1411
 *                                     ; "x" reserved for private use
1412
 * singleton     = DIGIT               ; 0 - 9
1413
 *               / %x41-57             ; A - W
1414
 *               / %x59-5A             ; Y - Z
1415
 *               / %x61-77             ; a - w
1416
 *               / %x79-7A             ; y - z
1417
 *
1418
 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1419
 * The parser below doesn't try to cope with extension or privateuse
1420
 * that could be added but that's not interoperable anyway
1421
 *
1422
 * Returns 1 if correct 0 otherwise
1423
 **/
1424
int
1425
xmlCheckLanguageID(const xmlChar * lang)
1426
1.48k
{
1427
1.48k
    const xmlChar *cur = lang, *nxt;
1428
1429
1.48k
    if (cur == NULL)
1430
57
        return (0);
1431
1.42k
    if (((cur[0] == 'i') && (cur[1] == '-')) ||
1432
1.42k
        ((cur[0] == 'I') && (cur[1] == '-')) ||
1433
1.42k
        ((cur[0] == 'x') && (cur[1] == '-')) ||
1434
1.42k
        ((cur[0] == 'X') && (cur[1] == '-'))) {
1435
        /*
1436
         * Still allow IANA code and user code which were coming
1437
         * from the previous version of the XML-1.0 specification
1438
         * it's deprecated but we should not fail
1439
         */
1440
11
        cur += 2;
1441
40
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1442
40
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
1443
29
            cur++;
1444
11
        return(cur[0] == 0);
1445
11
    }
1446
1.41k
    nxt = cur;
1447
5.52k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1448
5.52k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1449
4.11k
           nxt++;
1450
1.41k
    if (nxt - cur >= 4) {
1451
        /*
1452
         * Reserved
1453
         */
1454
99
        if ((nxt - cur > 8) || (nxt[0] != 0))
1455
90
            return(0);
1456
9
        return(1);
1457
99
    }
1458
1.31k
    if (nxt - cur < 2)
1459
154
        return(0);
1460
    /* we got an ISO 639 code */
1461
1.16k
    if (nxt[0] == 0)
1462
757
        return(1);
1463
408
    if (nxt[0] != '-')
1464
101
        return(0);
1465
1466
307
    nxt++;
1467
307
    cur = nxt;
1468
    /* now we can have extlang or script or region or variant */
1469
307
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1470
72
        goto region_m49;
1471
1472
886
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1473
886
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1474
651
           nxt++;
1475
235
    if (nxt - cur == 4)
1476
31
        goto script;
1477
204
    if (nxt - cur == 2)
1478
102
        goto region;
1479
102
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1480
19
        goto variant;
1481
83
    if (nxt - cur != 3)
1482
56
        return(0);
1483
    /* we parsed an extlang */
1484
27
    if (nxt[0] == 0)
1485
8
        return(1);
1486
19
    if (nxt[0] != '-')
1487
12
        return(0);
1488
1489
7
    nxt++;
1490
7
    cur = nxt;
1491
    /* now we can have script or region or variant */
1492
7
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1493
3
        goto region_m49;
1494
1495
7
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1496
7
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1497
3
           nxt++;
1498
4
    if (nxt - cur == 2)
1499
1
        goto region;
1500
3
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1501
0
        goto variant;
1502
3
    if (nxt - cur != 4)
1503
3
        return(0);
1504
    /* we parsed a script */
1505
31
script:
1506
31
    if (nxt[0] == 0)
1507
4
        return(1);
1508
27
    if (nxt[0] != '-')
1509
14
        return(0);
1510
1511
13
    nxt++;
1512
13
    cur = nxt;
1513
    /* now we can have region or variant */
1514
13
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1515
2
        goto region_m49;
1516
1517
76
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1518
76
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1519
65
           nxt++;
1520
1521
11
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1522
2
        goto variant;
1523
9
    if (nxt - cur != 2)
1524
7
        return(0);
1525
    /* we parsed a region */
1526
161
region:
1527
161
    if (nxt[0] == 0)
1528
84
        return(1);
1529
77
    if (nxt[0] != '-')
1530
65
        return(0);
1531
1532
12
    nxt++;
1533
12
    cur = nxt;
1534
    /* now we can just have a variant */
1535
69
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1536
69
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1537
57
           nxt++;
1538
1539
12
    if ((nxt - cur < 5) || (nxt - cur > 8))
1540
12
        return(0);
1541
1542
    /* we parsed a variant */
1543
21
variant:
1544
21
    if (nxt[0] == 0)
1545
2
        return(1);
1546
19
    if (nxt[0] != '-')
1547
11
        return(0);
1548
    /* extensions and private use subtags not checked */
1549
8
    return (1);
1550
1551
77
region_m49:
1552
77
    if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1553
77
        ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1554
56
        nxt += 3;
1555
56
        goto region;
1556
56
    }
1557
21
    return(0);
1558
77
}
1559
1560
/************************************************************************
1561
 *                  *
1562
 *    Parser stacks related functions and macros    *
1563
 *                  *
1564
 ************************************************************************/
1565
1566
static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1567
                                            const xmlChar ** str);
1568
1569
#ifdef SAX2
1570
/**
1571
 * nsPush:
1572
 * @ctxt:  an XML parser context
1573
 * @prefix:  the namespace prefix or NULL
1574
 * @URL:  the namespace name
1575
 *
1576
 * Pushes a new parser namespace on top of the ns stack
1577
 *
1578
 * Returns -1 in case of error, -2 if the namespace should be discarded
1579
 *     and the index in the stack otherwise.
1580
 */
1581
static int
1582
nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1583
18.2k
{
1584
18.2k
    if (ctxt->options & XML_PARSE_NSCLEAN) {
1585
7.67k
        int i;
1586
12.8k
  for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1587
6.62k
      if (ctxt->nsTab[i] == prefix) {
1588
    /* in scope */
1589
1.43k
          if (ctxt->nsTab[i + 1] == URL)
1590
509
        return(-2);
1591
    /* out of scope keep it */
1592
929
    break;
1593
1.43k
      }
1594
6.62k
  }
1595
7.67k
    }
1596
17.7k
    if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1597
9.32k
  ctxt->nsMax = 10;
1598
9.32k
  ctxt->nsNr = 0;
1599
9.32k
  ctxt->nsTab = (const xmlChar **)
1600
9.32k
                xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1601
9.32k
  if (ctxt->nsTab == NULL) {
1602
0
      xmlErrMemory(ctxt, NULL);
1603
0
      ctxt->nsMax = 0;
1604
0
            return (-1);
1605
0
  }
1606
9.32k
    } else if (ctxt->nsNr >= ctxt->nsMax) {
1607
288
        const xmlChar ** tmp;
1608
288
        ctxt->nsMax *= 2;
1609
288
        tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1610
288
            ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1611
288
        if (tmp == NULL) {
1612
0
            xmlErrMemory(ctxt, NULL);
1613
0
      ctxt->nsMax /= 2;
1614
0
            return (-1);
1615
0
        }
1616
288
  ctxt->nsTab = tmp;
1617
288
    }
1618
17.7k
    ctxt->nsTab[ctxt->nsNr++] = prefix;
1619
17.7k
    ctxt->nsTab[ctxt->nsNr++] = URL;
1620
17.7k
    return (ctxt->nsNr);
1621
17.7k
}
1622
/**
1623
 * nsPop:
1624
 * @ctxt: an XML parser context
1625
 * @nr:  the number to pop
1626
 *
1627
 * Pops the top @nr parser prefix/namespace from the ns stack
1628
 *
1629
 * Returns the number of namespaces removed
1630
 */
1631
static int
1632
nsPop(xmlParserCtxtPtr ctxt, int nr)
1633
5.81k
{
1634
5.81k
    int i;
1635
1636
5.81k
    if (ctxt->nsTab == NULL) return(0);
1637
5.81k
    if (ctxt->nsNr < nr) {
1638
0
        xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1639
0
        nr = ctxt->nsNr;
1640
0
    }
1641
5.81k
    if (ctxt->nsNr <= 0)
1642
0
        return (0);
1643
1644
22.0k
    for (i = 0;i < nr;i++) {
1645
16.1k
         ctxt->nsNr--;
1646
16.1k
   ctxt->nsTab[ctxt->nsNr] = NULL;
1647
16.1k
    }
1648
5.81k
    return(nr);
1649
5.81k
}
1650
#endif
1651
1652
static int
1653
19.8k
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1654
19.8k
    const xmlChar **atts;
1655
19.8k
    int *attallocs;
1656
19.8k
    int maxatts;
1657
1658
19.8k
    if (nr + 5 > ctxt->maxatts) {
1659
19.8k
  maxatts = ctxt->maxatts == 0 ? 55 : (nr + 5) * 2;
1660
19.8k
  atts = (const xmlChar **) xmlMalloc(
1661
19.8k
             maxatts * sizeof(const xmlChar *));
1662
19.8k
  if (atts == NULL) goto mem_error;
1663
19.8k
  attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1664
19.8k
                               (maxatts / 5) * sizeof(int));
1665
19.8k
  if (attallocs == NULL) {
1666
0
            xmlFree(atts);
1667
0
            goto mem_error;
1668
0
        }
1669
19.8k
        if (ctxt->maxatts > 0)
1670
44
            memcpy(atts, ctxt->atts, ctxt->maxatts * sizeof(const xmlChar *));
1671
19.8k
        xmlFree(ctxt->atts);
1672
19.8k
  ctxt->atts = atts;
1673
19.8k
  ctxt->attallocs = attallocs;
1674
19.8k
  ctxt->maxatts = maxatts;
1675
19.8k
    }
1676
19.8k
    return(ctxt->maxatts);
1677
0
mem_error:
1678
0
    xmlErrMemory(ctxt, NULL);
1679
0
    return(-1);
1680
19.8k
}
1681
1682
/**
1683
 * inputPush:
1684
 * @ctxt:  an XML parser context
1685
 * @value:  the parser input
1686
 *
1687
 * Pushes a new parser input on top of the input stack
1688
 *
1689
 * Returns -1 in case of error, the index in the stack otherwise
1690
 */
1691
int
1692
inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1693
5.04M
{
1694
5.04M
    if ((ctxt == NULL) || (value == NULL))
1695
0
        return(-1);
1696
5.04M
    if (ctxt->inputNr >= ctxt->inputMax) {
1697
120
        size_t newSize = ctxt->inputMax * 2;
1698
120
        xmlParserInputPtr *tmp;
1699
1700
120
        tmp = (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1701
120
                                               newSize * sizeof(*tmp));
1702
120
        if (tmp == NULL) {
1703
0
            xmlErrMemory(ctxt, NULL);
1704
0
            return (-1);
1705
0
        }
1706
120
        ctxt->inputTab = tmp;
1707
120
        ctxt->inputMax = newSize;
1708
120
    }
1709
5.04M
    ctxt->inputTab[ctxt->inputNr] = value;
1710
5.04M
    ctxt->input = value;
1711
5.04M
    return (ctxt->inputNr++);
1712
5.04M
}
1713
/**
1714
 * inputPop:
1715
 * @ctxt: an XML parser context
1716
 *
1717
 * Pops the top parser input from the input stack
1718
 *
1719
 * Returns the input just removed
1720
 */
1721
xmlParserInputPtr
1722
inputPop(xmlParserCtxtPtr ctxt)
1723
5.46M
{
1724
5.46M
    xmlParserInputPtr ret;
1725
1726
5.46M
    if (ctxt == NULL)
1727
0
        return(NULL);
1728
5.46M
    if (ctxt->inputNr <= 0)
1729
418k
        return (NULL);
1730
5.04M
    ctxt->inputNr--;
1731
5.04M
    if (ctxt->inputNr > 0)
1732
4.92M
        ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1733
114k
    else
1734
114k
        ctxt->input = NULL;
1735
5.04M
    ret = ctxt->inputTab[ctxt->inputNr];
1736
5.04M
    ctxt->inputTab[ctxt->inputNr] = NULL;
1737
5.04M
    return (ret);
1738
5.46M
}
1739
/**
1740
 * nodePush:
1741
 * @ctxt:  an XML parser context
1742
 * @value:  the element node
1743
 *
1744
 * Pushes a new element node on top of the node stack
1745
 *
1746
 * Returns -1 in case of error, the index in the stack otherwise
1747
 */
1748
int
1749
nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1750
1.82M
{
1751
1.82M
    if (ctxt == NULL) return(0);
1752
1.82M
    if (ctxt->nodeNr >= ctxt->nodeMax) {
1753
792
        xmlNodePtr *tmp;
1754
1755
792
  tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1756
792
                                      ctxt->nodeMax * 2 *
1757
792
                                      sizeof(ctxt->nodeTab[0]));
1758
792
        if (tmp == NULL) {
1759
0
            xmlErrMemory(ctxt, NULL);
1760
0
            return (-1);
1761
0
        }
1762
792
        ctxt->nodeTab = tmp;
1763
792
  ctxt->nodeMax *= 2;
1764
792
    }
1765
1.82M
    if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1766
1.82M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1767
0
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1768
0
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1769
0
        xmlParserMaxDepth);
1770
0
  xmlHaltParser(ctxt);
1771
0
  return(-1);
1772
0
    }
1773
1.82M
    ctxt->nodeTab[ctxt->nodeNr] = value;
1774
1.82M
    ctxt->node = value;
1775
1.82M
    return (ctxt->nodeNr++);
1776
1.82M
}
1777
1778
/**
1779
 * nodePop:
1780
 * @ctxt: an XML parser context
1781
 *
1782
 * Pops the top element node from the node stack
1783
 *
1784
 * Returns the node just removed
1785
 */
1786
xmlNodePtr
1787
nodePop(xmlParserCtxtPtr ctxt)
1788
1.74M
{
1789
1.74M
    xmlNodePtr ret;
1790
1791
1.74M
    if (ctxt == NULL) return(NULL);
1792
1.74M
    if (ctxt->nodeNr <= 0)
1793
15.2k
        return (NULL);
1794
1.73M
    ctxt->nodeNr--;
1795
1.73M
    if (ctxt->nodeNr > 0)
1796
1.67M
        ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1797
60.4k
    else
1798
60.4k
        ctxt->node = NULL;
1799
1.73M
    ret = ctxt->nodeTab[ctxt->nodeNr];
1800
1.73M
    ctxt->nodeTab[ctxt->nodeNr] = NULL;
1801
1.73M
    return (ret);
1802
1.74M
}
1803
1804
/**
1805
 * nameNsPush:
1806
 * @ctxt:  an XML parser context
1807
 * @value:  the element name
1808
 * @prefix:  the element prefix
1809
 * @URI:  the element namespace name
1810
 * @line:  the current line number for error messages
1811
 * @nsNr:  the number of namespaces pushed on the namespace table
1812
 *
1813
 * Pushes a new element name/prefix/URL on top of the name stack
1814
 *
1815
 * Returns -1 in case of error, the index in the stack otherwise
1816
 */
1817
static int
1818
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1819
           const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
1820
1.47M
{
1821
1.47M
    xmlStartTag *tag;
1822
1823
1.47M
    if (ctxt->nameNr >= ctxt->nameMax) {
1824
1.64k
        const xmlChar * *tmp;
1825
1.64k
        xmlStartTag *tmp2;
1826
1.64k
        ctxt->nameMax *= 2;
1827
1.64k
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1828
1.64k
                                    ctxt->nameMax *
1829
1.64k
                                    sizeof(ctxt->nameTab[0]));
1830
1.64k
        if (tmp == NULL) {
1831
0
      ctxt->nameMax /= 2;
1832
0
      goto mem_error;
1833
0
        }
1834
1.64k
  ctxt->nameTab = tmp;
1835
1.64k
        tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
1836
1.64k
                                    ctxt->nameMax *
1837
1.64k
                                    sizeof(ctxt->pushTab[0]));
1838
1.64k
        if (tmp2 == NULL) {
1839
0
      ctxt->nameMax /= 2;
1840
0
      goto mem_error;
1841
0
        }
1842
1.64k
  ctxt->pushTab = tmp2;
1843
1.47M
    } else if (ctxt->pushTab == NULL) {
1844
62.3k
        ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
1845
62.3k
                                            sizeof(ctxt->pushTab[0]));
1846
62.3k
        if (ctxt->pushTab == NULL)
1847
0
            goto mem_error;
1848
62.3k
    }
1849
1.47M
    ctxt->nameTab[ctxt->nameNr] = value;
1850
1.47M
    ctxt->name = value;
1851
1.47M
    tag = &ctxt->pushTab[ctxt->nameNr];
1852
1.47M
    tag->prefix = prefix;
1853
1.47M
    tag->URI = URI;
1854
1.47M
    tag->line = line;
1855
1.47M
    tag->nsNr = nsNr;
1856
1.47M
    return (ctxt->nameNr++);
1857
0
mem_error:
1858
0
    xmlErrMemory(ctxt, NULL);
1859
0
    return (-1);
1860
1.47M
}
1861
#ifdef LIBXML_PUSH_ENABLED
1862
/**
1863
 * nameNsPop:
1864
 * @ctxt: an XML parser context
1865
 *
1866
 * Pops the top element/prefix/URI name from the name stack
1867
 *
1868
 * Returns the name just removed
1869
 */
1870
static const xmlChar *
1871
nameNsPop(xmlParserCtxtPtr ctxt)
1872
335k
{
1873
335k
    const xmlChar *ret;
1874
1875
335k
    if (ctxt->nameNr <= 0)
1876
0
        return (NULL);
1877
335k
    ctxt->nameNr--;
1878
335k
    if (ctxt->nameNr > 0)
1879
327k
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1880
7.24k
    else
1881
7.24k
        ctxt->name = NULL;
1882
335k
    ret = ctxt->nameTab[ctxt->nameNr];
1883
335k
    ctxt->nameTab[ctxt->nameNr] = NULL;
1884
335k
    return (ret);
1885
335k
}
1886
#endif /* LIBXML_PUSH_ENABLED */
1887
1888
/**
1889
 * namePush:
1890
 * @ctxt:  an XML parser context
1891
 * @value:  the element name
1892
 *
1893
 * Pushes a new element name on top of the name stack
1894
 *
1895
 * Returns -1 in case of error, the index in the stack otherwise
1896
 */
1897
int
1898
namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1899
0
{
1900
0
    if (ctxt == NULL) return (-1);
1901
1902
0
    if (ctxt->nameNr >= ctxt->nameMax) {
1903
0
        const xmlChar * *tmp;
1904
0
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1905
0
                                    ctxt->nameMax * 2 *
1906
0
                                    sizeof(ctxt->nameTab[0]));
1907
0
        if (tmp == NULL) {
1908
0
      goto mem_error;
1909
0
        }
1910
0
  ctxt->nameTab = tmp;
1911
0
        ctxt->nameMax *= 2;
1912
0
    }
1913
0
    ctxt->nameTab[ctxt->nameNr] = value;
1914
0
    ctxt->name = value;
1915
0
    return (ctxt->nameNr++);
1916
0
mem_error:
1917
0
    xmlErrMemory(ctxt, NULL);
1918
0
    return (-1);
1919
0
}
1920
/**
1921
 * namePop:
1922
 * @ctxt: an XML parser context
1923
 *
1924
 * Pops the top element name from the name stack
1925
 *
1926
 * Returns the name just removed
1927
 */
1928
const xmlChar *
1929
namePop(xmlParserCtxtPtr ctxt)
1930
1.03M
{
1931
1.03M
    const xmlChar *ret;
1932
1933
1.03M
    if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1934
0
        return (NULL);
1935
1.03M
    ctxt->nameNr--;
1936
1.03M
    if (ctxt->nameNr > 0)
1937
987k
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1938
48.5k
    else
1939
48.5k
        ctxt->name = NULL;
1940
1.03M
    ret = ctxt->nameTab[ctxt->nameNr];
1941
1.03M
    ctxt->nameTab[ctxt->nameNr] = NULL;
1942
1.03M
    return (ret);
1943
1.03M
}
1944
1945
2.05M
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1946
2.05M
    if (ctxt->spaceNr >= ctxt->spaceMax) {
1947
1.46k
        int *tmp;
1948
1949
1.46k
  ctxt->spaceMax *= 2;
1950
1.46k
        tmp = (int *) xmlRealloc(ctxt->spaceTab,
1951
1.46k
                           ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1952
1.46k
        if (tmp == NULL) {
1953
0
      xmlErrMemory(ctxt, NULL);
1954
0
      ctxt->spaceMax /=2;
1955
0
      return(-1);
1956
0
  }
1957
1.46k
  ctxt->spaceTab = tmp;
1958
1.46k
    }
1959
2.05M
    ctxt->spaceTab[ctxt->spaceNr] = val;
1960
2.05M
    ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1961
2.05M
    return(ctxt->spaceNr++);
1962
2.05M
}
1963
1964
1.98M
static int spacePop(xmlParserCtxtPtr ctxt) {
1965
1.98M
    int ret;
1966
1.98M
    if (ctxt->spaceNr <= 0) return(0);
1967
1.98M
    ctxt->spaceNr--;
1968
1.98M
    if (ctxt->spaceNr > 0)
1969
1.97M
  ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1970
11.1k
    else
1971
11.1k
        ctxt->space = &ctxt->spaceTab[0];
1972
1.98M
    ret = ctxt->spaceTab[ctxt->spaceNr];
1973
1.98M
    ctxt->spaceTab[ctxt->spaceNr] = -1;
1974
1.98M
    return(ret);
1975
1.98M
}
1976
1977
/*
1978
 * Macros for accessing the content. Those should be used only by the parser,
1979
 * and not exported.
1980
 *
1981
 * Dirty macros, i.e. one often need to make assumption on the context to
1982
 * use them
1983
 *
1984
 *   CUR_PTR return the current pointer to the xmlChar to be parsed.
1985
 *           To be used with extreme caution since operations consuming
1986
 *           characters may move the input buffer to a different location !
1987
 *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
1988
 *           This should be used internally by the parser
1989
 *           only to compare to ASCII values otherwise it would break when
1990
 *           running with UTF-8 encoding.
1991
 *   RAW     same as CUR but in the input buffer, bypass any token
1992
 *           extraction that may have been done
1993
 *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
1994
 *           to compare on ASCII based substring.
1995
 *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1996
 *           strings without newlines within the parser.
1997
 *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1998
 *           defined char within the parser.
1999
 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2000
 *
2001
 *   NEXT    Skip to the next character, this does the proper decoding
2002
 *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2003
 *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2004
 *   CUR_CHAR(l) returns the current unicode character (int), set l
2005
 *           to the number of xmlChars used for the encoding [0-5].
2006
 *   CUR_SCHAR  same but operate on a string instead of the context
2007
 *   COPY_BUF  copy the current unicode char to the target buffer, increment
2008
 *            the index
2009
 *   GROW, SHRINK  handling of input buffers
2010
 */
2011
2012
69.6M
#define RAW (*ctxt->input->cur)
2013
49.4M
#define CUR (*ctxt->input->cur)
2014
43.9M
#define NXT(val) ctxt->input->cur[(val)]
2015
4.43M
#define CUR_PTR ctxt->input->cur
2016
66.9k
#define BASE_PTR ctxt->input->base
2017
2018
#define CMP4( s, c1, c2, c3, c4 ) \
2019
12.9M
  ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2020
6.53M
    ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2021
#define CMP5( s, c1, c2, c3, c4, c5 ) \
2022
12.0M
  ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2023
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2024
10.5M
  ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2025
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2026
9.17M
  ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2027
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2028
7.81M
  ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2029
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2030
3.58M
  ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2031
3.58M
    ((unsigned char *) s)[ 8 ] == c9 )
2032
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2033
24.3k
  ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2034
24.3k
    ((unsigned char *) s)[ 9 ] == c10 )
2035
2036
18.8M
#define SKIP(val) do {             \
2037
18.8M
    ctxt->input->cur += (val),ctxt->input->col+=(val);      \
2038
18.8M
    if (*ctxt->input->cur == 0)           \
2039
18.8M
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);     \
2040
18.8M
  } while (0)
2041
2042
11.0k
#define SKIPL(val) do {             \
2043
11.0k
    int skipl;                \
2044
2.79M
    for(skipl=0; skipl<val; skipl++) {         \
2045
2.78M
  if (*(ctxt->input->cur) == '\n') {       \
2046
46.6k
  ctxt->input->line++; ctxt->input->col = 1;      \
2047
2.73M
  } else ctxt->input->col++;         \
2048
2.78M
  ctxt->input->cur++;           \
2049
2.78M
    }                  \
2050
11.0k
    if (*ctxt->input->cur == 0)           \
2051
11.0k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);     \
2052
11.0k
  } while (0)
2053
2054
19.4M
#define SHRINK if ((ctxt->progressive == 0) &&       \
2055
19.4M
       (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2056
19.4M
       (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2057
19.4M
  xmlSHRINK (ctxt);
2058
2059
245k
static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2060
    /* Don't shrink memory buffers. */
2061
245k
    if ((ctxt->input->buf) &&
2062
245k
        ((ctxt->input->buf->encoder) || (ctxt->input->buf->readcallback)))
2063
1.05k
        xmlParserInputShrink(ctxt->input);
2064
245k
    if (*ctxt->input->cur == 0)
2065
9.44k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2066
245k
}
2067
2068
68.3M
#define GROW if ((ctxt->progressive == 0) &&       \
2069
68.3M
     (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2070
68.3M
  xmlGROW (ctxt);
2071
2072
11.1M
static void xmlGROW (xmlParserCtxtPtr ctxt) {
2073
11.1M
    ptrdiff_t curEnd = ctxt->input->end - ctxt->input->cur;
2074
11.1M
    ptrdiff_t curBase = ctxt->input->cur - ctxt->input->base;
2075
2076
11.1M
    if (((curEnd > XML_MAX_LOOKUP_LIMIT) ||
2077
11.1M
         (curBase > XML_MAX_LOOKUP_LIMIT)) &&
2078
11.1M
         ((ctxt->input->buf) &&
2079
0
          (ctxt->input->buf->readcallback != NULL)) &&
2080
11.1M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2081
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2082
0
        xmlHaltParser(ctxt);
2083
0
  return;
2084
0
    }
2085
11.1M
    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2086
11.1M
    if ((ctxt->input->cur > ctxt->input->end) ||
2087
11.1M
        (ctxt->input->cur < ctxt->input->base)) {
2088
0
        xmlHaltParser(ctxt);
2089
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2090
0
  return;
2091
0
    }
2092
11.1M
    if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0))
2093
118k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2094
11.1M
}
2095
2096
18.5M
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2097
2098
34.6M
#define NEXT xmlNextChar(ctxt)
2099
2100
3.47M
#define NEXT1 {               \
2101
3.47M
  ctxt->input->col++;           \
2102
3.47M
  ctxt->input->cur++;           \
2103
3.47M
  if (*ctxt->input->cur == 0)         \
2104
3.47M
      xmlParserInputGrow(ctxt->input, INPUT_CHUNK);   \
2105
3.47M
    }
2106
2107
32.7M
#define NEXTL(l) do {             \
2108
32.7M
    if (*(ctxt->input->cur) == '\n') {         \
2109
590k
  ctxt->input->line++; ctxt->input->col = 1;      \
2110
32.1M
    } else ctxt->input->col++;           \
2111
32.7M
    ctxt->input->cur += l;        \
2112
32.7M
  } while (0)
2113
2114
34.0M
#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2115
672M
#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2116
2117
#define COPY_BUF(l,b,i,v)           \
2118
686M
    if (l == 1) b[i++] = v;           \
2119
686M
    else i += xmlCopyCharMultiByte(&b[i],v)
2120
2121
/**
2122
 * xmlSkipBlankChars:
2123
 * @ctxt:  the XML parser context
2124
 *
2125
 * skip all blanks character found at that point in the input streams.
2126
 * It pops up finished entities in the process if allowable at that point.
2127
 *
2128
 * Returns the number of space chars skipped
2129
 */
2130
2131
int
2132
18.5M
xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2133
18.5M
    int res = 0;
2134
2135
    /*
2136
     * It's Okay to use CUR/NEXT here since all the blanks are on
2137
     * the ASCII range.
2138
     */
2139
18.5M
    if (((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) ||
2140
18.5M
        (ctxt->instate == XML_PARSER_START)) {
2141
9.18M
  const xmlChar *cur;
2142
  /*
2143
   * if we are in the document content, go really fast
2144
   */
2145
9.18M
  cur = ctxt->input->cur;
2146
9.18M
  while (IS_BLANK_CH(*cur)) {
2147
4.23M
      if (*cur == '\n') {
2148
173k
    ctxt->input->line++; ctxt->input->col = 1;
2149
4.06M
      } else {
2150
4.06M
    ctxt->input->col++;
2151
4.06M
      }
2152
4.23M
      cur++;
2153
4.23M
      if (res < INT_MAX)
2154
4.23M
    res++;
2155
4.23M
      if (*cur == 0) {
2156
16.6k
    ctxt->input->cur = cur;
2157
16.6k
    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2158
16.6k
    cur = ctxt->input->cur;
2159
16.6k
      }
2160
4.23M
  }
2161
9.18M
  ctxt->input->cur = cur;
2162
9.40M
    } else {
2163
9.40M
        int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2164
2165
31.9M
  while (ctxt->instate != XML_PARSER_EOF) {
2166
31.9M
            if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2167
12.7M
    NEXT;
2168
19.2M
      } else if (CUR == '%') {
2169
                /*
2170
                 * Need to handle support of entities branching here
2171
                 */
2172
5.01M
          if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2173
71.3k
                    break;
2174
4.94M
          xmlParsePEReference(ctxt);
2175
14.2M
            } else if (CUR == 0) {
2176
4.93M
                unsigned long consumed;
2177
4.93M
                xmlEntityPtr ent;
2178
2179
4.93M
                if (ctxt->inputNr <= 1)
2180
5.68k
                    break;
2181
2182
4.92M
                consumed = ctxt->input->consumed;
2183
4.92M
                xmlSaturatedAddSizeT(&consumed,
2184
4.92M
                                     ctxt->input->cur - ctxt->input->base);
2185
2186
                /*
2187
                 * Add to sizeentities when parsing an external entity
2188
                 * for the first time.
2189
                 */
2190
4.92M
                ent = ctxt->input->entity;
2191
4.92M
                if ((ent->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2192
4.92M
                    ((ent->flags & XML_ENT_PARSED) == 0)) {
2193
639
                    ent->flags |= XML_ENT_PARSED;
2194
2195
639
                    xmlSaturatedAdd(&ctxt->sizeentities, consumed);
2196
639
                }
2197
2198
4.92M
                xmlParserEntityCheck(ctxt, consumed);
2199
2200
4.92M
                xmlPopInput(ctxt);
2201
9.32M
            } else {
2202
9.32M
                break;
2203
9.32M
            }
2204
2205
            /*
2206
             * Also increase the counter when entering or exiting a PERef.
2207
             * The spec says: "When a parameter-entity reference is recognized
2208
             * in the DTD and included, its replacement text MUST be enlarged
2209
             * by the attachment of one leading and one following space (#x20)
2210
             * character."
2211
             */
2212
22.5M
      if (res < INT_MAX)
2213
22.5M
    res++;
2214
22.5M
        }
2215
9.40M
    }
2216
18.5M
    return(res);
2217
18.5M
}
2218
2219
/************************************************************************
2220
 *                  *
2221
 *    Commodity functions to handle entities      *
2222
 *                  *
2223
 ************************************************************************/
2224
2225
/**
2226
 * xmlPopInput:
2227
 * @ctxt:  an XML parser context
2228
 *
2229
 * xmlPopInput: the current input pointed by ctxt->input came to an end
2230
 *          pop it and return the next char.
2231
 *
2232
 * Returns the current xmlChar in the parser context
2233
 */
2234
xmlChar
2235
4.92M
xmlPopInput(xmlParserCtxtPtr ctxt) {
2236
4.92M
    xmlParserInputPtr input;
2237
2238
4.92M
    if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2239
4.92M
    if (xmlParserDebugEntities)
2240
0
  xmlGenericError(xmlGenericErrorContext,
2241
0
    "Popping input %d\n", ctxt->inputNr);
2242
4.92M
    if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2243
4.92M
        (ctxt->instate != XML_PARSER_EOF))
2244
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2245
0
                    "Unfinished entity outside the DTD");
2246
4.92M
    input = inputPop(ctxt);
2247
4.92M
    if (input->entity != NULL)
2248
4.92M
        input->entity->flags &= ~XML_ENT_EXPANDING;
2249
4.92M
    xmlFreeInputStream(input);
2250
4.92M
    if (*ctxt->input->cur == 0)
2251
2.38M
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2252
4.92M
    return(CUR);
2253
4.92M
}
2254
2255
/**
2256
 * xmlPushInput:
2257
 * @ctxt:  an XML parser context
2258
 * @input:  an XML parser input fragment (entity, XML fragment ...).
2259
 *
2260
 * xmlPushInput: switch to a new input stream which is stacked on top
2261
 *               of the previous one(s).
2262
 * Returns -1 in case of error or the index in the input stack
2263
 */
2264
int
2265
4.93M
xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2266
4.93M
    int ret;
2267
4.93M
    if (input == NULL) return(-1);
2268
2269
4.93M
    if (xmlParserDebugEntities) {
2270
0
  if ((ctxt->input != NULL) && (ctxt->input->filename))
2271
0
      xmlGenericError(xmlGenericErrorContext,
2272
0
        "%s(%d): ", ctxt->input->filename,
2273
0
        ctxt->input->line);
2274
0
  xmlGenericError(xmlGenericErrorContext,
2275
0
    "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2276
0
    }
2277
4.93M
    if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2278
4.93M
        (ctxt->inputNr > 100)) {
2279
0
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2280
0
        while (ctxt->inputNr > 1)
2281
0
            xmlFreeInputStream(inputPop(ctxt));
2282
0
  return(-1);
2283
0
    }
2284
4.93M
    ret = inputPush(ctxt, input);
2285
4.93M
    if (ctxt->instate == XML_PARSER_EOF)
2286
0
        return(-1);
2287
4.93M
    GROW;
2288
4.93M
    return(ret);
2289
4.93M
}
2290
2291
/**
2292
 * xmlParseCharRef:
2293
 * @ctxt:  an XML parser context
2294
 *
2295
 * DEPRECATED: Internal function, don't use.
2296
 *
2297
 * Parse a numeric character reference. Always consumes '&'.
2298
 *
2299
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2300
 *                  '&#x' [0-9a-fA-F]+ ';'
2301
 *
2302
 * [ WFC: Legal Character ]
2303
 * Characters referred to using character references must match the
2304
 * production for Char.
2305
 *
2306
 * Returns the value parsed (as an int), 0 in case of error
2307
 */
2308
int
2309
47.7k
xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2310
47.7k
    int val = 0;
2311
47.7k
    int count = 0;
2312
2313
    /*
2314
     * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2315
     */
2316
47.7k
    if ((RAW == '&') && (NXT(1) == '#') &&
2317
47.7k
        (NXT(2) == 'x')) {
2318
14.9k
  SKIP(3);
2319
14.9k
  GROW;
2320
97.9k
  while (RAW != ';') { /* loop blocked by count */
2321
84.4k
      if (count++ > 20) {
2322
5.81k
    count = 0;
2323
5.81k
    GROW;
2324
5.81k
                if (ctxt->instate == XML_PARSER_EOF)
2325
0
                    return(0);
2326
5.81k
      }
2327
84.4k
      if ((RAW >= '0') && (RAW <= '9'))
2328
71.0k
          val = val * 16 + (CUR - '0');
2329
13.4k
      else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2330
10.3k
          val = val * 16 + (CUR - 'a') + 10;
2331
3.09k
      else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2332
1.57k
          val = val * 16 + (CUR - 'A') + 10;
2333
1.52k
      else {
2334
1.52k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2335
1.52k
    val = 0;
2336
1.52k
    break;
2337
1.52k
      }
2338
82.9k
      if (val > 0x110000)
2339
63.9k
          val = 0x110000;
2340
2341
82.9k
      NEXT;
2342
82.9k
      count++;
2343
82.9k
  }
2344
14.9k
  if (RAW == ';') {
2345
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2346
13.4k
      ctxt->input->col++;
2347
13.4k
      ctxt->input->cur++;
2348
13.4k
  }
2349
32.7k
    } else if  ((RAW == '&') && (NXT(1) == '#')) {
2350
32.7k
  SKIP(2);
2351
32.7k
  GROW;
2352
181k
  while (RAW != ';') { /* loop blocked by count */
2353
152k
      if (count++ > 20) {
2354
6.07k
    count = 0;
2355
6.07k
    GROW;
2356
6.07k
                if (ctxt->instate == XML_PARSER_EOF)
2357
0
                    return(0);
2358
6.07k
      }
2359
152k
      if ((RAW >= '0') && (RAW <= '9'))
2360
148k
          val = val * 10 + (CUR - '0');
2361
3.48k
      else {
2362
3.48k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2363
3.48k
    val = 0;
2364
3.48k
    break;
2365
3.48k
      }
2366
148k
      if (val > 0x110000)
2367
66.1k
          val = 0x110000;
2368
2369
148k
      NEXT;
2370
148k
      count++;
2371
148k
  }
2372
32.7k
  if (RAW == ';') {
2373
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2374
29.2k
      ctxt->input->col++;
2375
29.2k
      ctxt->input->cur++;
2376
29.2k
  }
2377
32.7k
    } else {
2378
0
        if (RAW == '&')
2379
0
            SKIP(1);
2380
0
        xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2381
0
    }
2382
2383
    /*
2384
     * [ WFC: Legal Character ]
2385
     * Characters referred to using character references must match the
2386
     * production for Char.
2387
     */
2388
47.7k
    if (val >= 0x110000) {
2389
93
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2390
93
                "xmlParseCharRef: character reference out of bounds\n",
2391
93
          val);
2392
47.6k
    } else if (IS_CHAR(val)) {
2393
42.2k
        return(val);
2394
42.2k
    } else {
2395
5.38k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2396
5.38k
                          "xmlParseCharRef: invalid xmlChar value %d\n",
2397
5.38k
                    val);
2398
5.38k
    }
2399
5.47k
    return(0);
2400
47.7k
}
2401
2402
/**
2403
 * xmlParseStringCharRef:
2404
 * @ctxt:  an XML parser context
2405
 * @str:  a pointer to an index in the string
2406
 *
2407
 * parse Reference declarations, variant parsing from a string rather
2408
 * than an an input flow.
2409
 *
2410
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2411
 *                  '&#x' [0-9a-fA-F]+ ';'
2412
 *
2413
 * [ WFC: Legal Character ]
2414
 * Characters referred to using character references must match the
2415
 * production for Char.
2416
 *
2417
 * Returns the value parsed (as an int), 0 in case of error, str will be
2418
 *         updated to the current value of the index
2419
 */
2420
static int
2421
37.4k
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2422
37.4k
    const xmlChar *ptr;
2423
37.4k
    xmlChar cur;
2424
37.4k
    int val = 0;
2425
2426
37.4k
    if ((str == NULL) || (*str == NULL)) return(0);
2427
37.4k
    ptr = *str;
2428
37.4k
    cur = *ptr;
2429
37.4k
    if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2430
2.88k
  ptr += 3;
2431
2.88k
  cur = *ptr;
2432
8.43k
  while (cur != ';') { /* Non input consuming loop */
2433
5.73k
      if ((cur >= '0') && (cur <= '9'))
2434
3.18k
          val = val * 16 + (cur - '0');
2435
2.55k
      else if ((cur >= 'a') && (cur <= 'f'))
2436
472
          val = val * 16 + (cur - 'a') + 10;
2437
2.07k
      else if ((cur >= 'A') && (cur <= 'F'))
2438
1.88k
          val = val * 16 + (cur - 'A') + 10;
2439
192
      else {
2440
192
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2441
192
    val = 0;
2442
192
    break;
2443
192
      }
2444
5.54k
      if (val > 0x110000)
2445
1.27k
          val = 0x110000;
2446
2447
5.54k
      ptr++;
2448
5.54k
      cur = *ptr;
2449
5.54k
  }
2450
2.88k
  if (cur == ';')
2451
2.69k
      ptr++;
2452
34.5k
    } else if  ((cur == '&') && (ptr[1] == '#')){
2453
34.5k
  ptr += 2;
2454
34.5k
  cur = *ptr;
2455
133k
  while (cur != ';') { /* Non input consuming loops */
2456
99.6k
      if ((cur >= '0') && (cur <= '9'))
2457
99.3k
          val = val * 10 + (cur - '0');
2458
252
      else {
2459
252
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2460
252
    val = 0;
2461
252
    break;
2462
252
      }
2463
99.3k
      if (val > 0x110000)
2464
18.9k
          val = 0x110000;
2465
2466
99.3k
      ptr++;
2467
99.3k
      cur = *ptr;
2468
99.3k
  }
2469
34.5k
  if (cur == ';')
2470
34.3k
      ptr++;
2471
34.5k
    } else {
2472
0
  xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2473
0
  return(0);
2474
0
    }
2475
37.4k
    *str = ptr;
2476
2477
    /*
2478
     * [ WFC: Legal Character ]
2479
     * Characters referred to using character references must match the
2480
     * production for Char.
2481
     */
2482
37.4k
    if (val >= 0x110000) {
2483
61
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2484
61
                "xmlParseStringCharRef: character reference out of bounds\n",
2485
61
                val);
2486
37.4k
    } else if (IS_CHAR(val)) {
2487
36.8k
        return(val);
2488
36.8k
    } else {
2489
532
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2490
532
        "xmlParseStringCharRef: invalid xmlChar value %d\n",
2491
532
        val);
2492
532
    }
2493
593
    return(0);
2494
37.4k
}
2495
2496
/**
2497
 * xmlParserHandlePEReference:
2498
 * @ctxt:  the parser context
2499
 *
2500
 * [69] PEReference ::= '%' Name ';'
2501
 *
2502
 * [ WFC: No Recursion ]
2503
 * A parsed entity must not contain a recursive
2504
 * reference to itself, either directly or indirectly.
2505
 *
2506
 * [ WFC: Entity Declared ]
2507
 * In a document without any DTD, a document with only an internal DTD
2508
 * subset which contains no parameter entity references, or a document
2509
 * with "standalone='yes'", ...  ... The declaration of a parameter
2510
 * entity must precede any reference to it...
2511
 *
2512
 * [ VC: Entity Declared ]
2513
 * In a document with an external subset or external parameter entities
2514
 * with "standalone='no'", ...  ... The declaration of a parameter entity
2515
 * must precede any reference to it...
2516
 *
2517
 * [ WFC: In DTD ]
2518
 * Parameter-entity references may only appear in the DTD.
2519
 * NOTE: misleading but this is handled.
2520
 *
2521
 * A PEReference may have been detected in the current input stream
2522
 * the handling is done accordingly to
2523
 *      http://www.w3.org/TR/REC-xml#entproc
2524
 * i.e.
2525
 *   - Included in literal in entity values
2526
 *   - Included as Parameter Entity reference within DTDs
2527
 */
2528
void
2529
0
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2530
0
    switch(ctxt->instate) {
2531
0
  case XML_PARSER_CDATA_SECTION:
2532
0
      return;
2533
0
        case XML_PARSER_COMMENT:
2534
0
      return;
2535
0
  case XML_PARSER_START_TAG:
2536
0
      return;
2537
0
  case XML_PARSER_END_TAG:
2538
0
      return;
2539
0
        case XML_PARSER_EOF:
2540
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2541
0
      return;
2542
0
        case XML_PARSER_PROLOG:
2543
0
  case XML_PARSER_START:
2544
0
  case XML_PARSER_MISC:
2545
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2546
0
      return;
2547
0
  case XML_PARSER_ENTITY_DECL:
2548
0
        case XML_PARSER_CONTENT:
2549
0
        case XML_PARSER_ATTRIBUTE_VALUE:
2550
0
        case XML_PARSER_PI:
2551
0
  case XML_PARSER_SYSTEM_LITERAL:
2552
0
  case XML_PARSER_PUBLIC_LITERAL:
2553
      /* we just ignore it there */
2554
0
      return;
2555
0
        case XML_PARSER_EPILOG:
2556
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2557
0
      return;
2558
0
  case XML_PARSER_ENTITY_VALUE:
2559
      /*
2560
       * NOTE: in the case of entity values, we don't do the
2561
       *       substitution here since we need the literal
2562
       *       entity value to be able to save the internal
2563
       *       subset of the document.
2564
       *       This will be handled by xmlStringDecodeEntities
2565
       */
2566
0
      return;
2567
0
        case XML_PARSER_DTD:
2568
      /*
2569
       * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2570
       * In the internal DTD subset, parameter-entity references
2571
       * can occur only where markup declarations can occur, not
2572
       * within markup declarations.
2573
       * In that case this is handled in xmlParseMarkupDecl
2574
       */
2575
0
      if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2576
0
    return;
2577
0
      if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2578
0
    return;
2579
0
            break;
2580
0
        case XML_PARSER_IGNORE:
2581
0
            return;
2582
0
    }
2583
2584
0
    xmlParsePEReference(ctxt);
2585
0
}
2586
2587
/*
2588
 * Macro used to grow the current buffer.
2589
 * buffer##_size is expected to be a size_t
2590
 * mem_error: is expected to handle memory allocation failures
2591
 */
2592
460k
#define growBuffer(buffer, n) {           \
2593
460k
    xmlChar *tmp;             \
2594
460k
    size_t new_size = buffer##_size * 2 + n;                            \
2595
460k
    if (new_size < buffer##_size) goto mem_error;                       \
2596
460k
    tmp = (xmlChar *) xmlRealloc(buffer, new_size);                     \
2597
460k
    if (tmp == NULL) goto mem_error;         \
2598
460k
    buffer = tmp;             \
2599
460k
    buffer##_size = new_size;                                           \
2600
460k
}
2601
2602
/**
2603
 * xmlStringDecodeEntitiesInt:
2604
 * @ctxt:  the parser context
2605
 * @str:  the input string
2606
 * @len: the string length
2607
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2608
 * @end:  an end marker xmlChar, 0 if none
2609
 * @end2:  an end marker xmlChar, 0 if none
2610
 * @end3:  an end marker xmlChar, 0 if none
2611
 * @check:  whether to perform entity checks
2612
 */
2613
static xmlChar *
2614
xmlStringDecodeEntitiesInt(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2615
               int what, xmlChar end, xmlChar  end2, xmlChar end3,
2616
7.98M
                           int check) {
2617
7.98M
    xmlChar *buffer = NULL;
2618
7.98M
    size_t buffer_size = 0;
2619
7.98M
    size_t nbchars = 0;
2620
2621
7.98M
    xmlChar *current = NULL;
2622
7.98M
    xmlChar *rep = NULL;
2623
7.98M
    const xmlChar *last;
2624
7.98M
    xmlEntityPtr ent;
2625
7.98M
    int c,l;
2626
2627
7.98M
    if (str == NULL)
2628
5.17k
        return(NULL);
2629
7.98M
    last = str + len;
2630
2631
7.98M
    if (((ctxt->depth > 40) &&
2632
7.98M
         ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2633
7.98M
  (ctxt->depth > 100)) {
2634
0
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP,
2635
0
                       "Maximum entity nesting depth exceeded");
2636
0
  return(NULL);
2637
0
    }
2638
2639
    /*
2640
     * allocate a translation buffer.
2641
     */
2642
7.98M
    buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2643
7.98M
    buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2644
7.98M
    if (buffer == NULL) goto mem_error;
2645
2646
    /*
2647
     * OK loop until we reach one of the ending char or a size limit.
2648
     * we are operating on already parsed values.
2649
     */
2650
7.98M
    if (str < last)
2651
7.96M
  c = CUR_SCHAR(str, l);
2652
18.4k
    else
2653
18.4k
        c = 0;
2654
509M
    while ((c != 0) && (c != end) && /* non input consuming loop */
2655
509M
           (c != end2) && (c != end3) &&
2656
509M
           (ctxt->instate != XML_PARSER_EOF)) {
2657
2658
501M
  if (c == 0) break;
2659
501M
        if ((c == '&') && (str[1] == '#')) {
2660
37.4k
      int val = xmlParseStringCharRef(ctxt, &str);
2661
37.4k
      if (val == 0)
2662
593
                goto int_error;
2663
36.8k
      COPY_BUF(0,buffer,nbchars,val);
2664
36.8k
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2665
24
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2666
24
      }
2667
501M
  } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2668
7.49M
      if (xmlParserDebugEntities)
2669
0
    xmlGenericError(xmlGenericErrorContext,
2670
0
      "String decoding Entity Reference: %.30s\n",
2671
0
      str);
2672
7.49M
      ent = xmlParseStringEntityRef(ctxt, &str);
2673
7.49M
      if ((ent != NULL) &&
2674
7.49M
    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2675
27.6k
    if (ent->content != NULL) {
2676
27.6k
        COPY_BUF(0,buffer,nbchars,ent->content[0]);
2677
27.6k
        if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2678
0
      growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2679
0
        }
2680
27.6k
    } else {
2681
0
        xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2682
0
          "predefined entity has no content\n");
2683
0
                    goto int_error;
2684
0
    }
2685
7.46M
      } else if ((ent != NULL) && (ent->content != NULL)) {
2686
7.42M
          if ((check) && (xmlParserEntityCheck(ctxt, ent->length)))
2687
60
                    goto int_error;
2688
2689
7.42M
                if (ent->flags & XML_ENT_EXPANDING) {
2690
60
              xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2691
60
                    xmlHaltParser(ctxt);
2692
60
                    ent->content[0] = 0;
2693
60
                    goto int_error;
2694
60
                }
2695
2696
7.42M
                ent->flags |= XML_ENT_EXPANDING;
2697
7.42M
    ctxt->depth++;
2698
7.42M
    rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
2699
7.42M
                        ent->length, what, 0, 0, 0, check);
2700
7.42M
    ctxt->depth--;
2701
7.42M
                ent->flags &= ~XML_ENT_EXPANDING;
2702
2703
7.42M
    if (rep == NULL) {
2704
1.04k
                    ent->content[0] = 0;
2705
1.04k
                    goto int_error;
2706
1.04k
                }
2707
2708
7.42M
                current = rep;
2709
1.27G
                while (*current != 0) { /* non input consuming loop */
2710
1.27G
                    buffer[nbchars++] = *current++;
2711
1.27G
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2712
792k
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2713
792k
                    }
2714
1.27G
                }
2715
7.42M
                xmlFree(rep);
2716
7.42M
                rep = NULL;
2717
7.42M
      } else if (ent != NULL) {
2718
10.8k
    int i = xmlStrlen(ent->name);
2719
10.8k
    const xmlChar *cur = ent->name;
2720
2721
10.8k
    buffer[nbchars++] = '&';
2722
10.8k
    if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2723
0
        growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2724
0
    }
2725
21.8k
    for (;i > 0;i--)
2726
11.0k
        buffer[nbchars++] = *cur++;
2727
10.8k
    buffer[nbchars++] = ';';
2728
10.8k
      }
2729
494M
  } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2730
56.2k
      if (xmlParserDebugEntities)
2731
0
    xmlGenericError(xmlGenericErrorContext,
2732
0
      "String decoding PE Reference: %.30s\n", str);
2733
56.2k
      ent = xmlParseStringPEReference(ctxt, &str);
2734
56.2k
      if (ent != NULL) {
2735
48.8k
                if (ent->content == NULL) {
2736
        /*
2737
         * Note: external parsed entities will not be loaded,
2738
         * it is not required for a non-validating parser to
2739
         * complete external PEReferences coming from the
2740
         * internal subset
2741
         */
2742
455
        if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2743
455
      ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2744
455
      (ctxt->validate != 0)) {
2745
428
      xmlLoadEntityContent(ctxt, ent);
2746
428
        } else {
2747
27
      xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2748
27
      "not validating will not read content for PE entity %s\n",
2749
27
                          ent->name, NULL);
2750
27
        }
2751
455
    }
2752
2753
48.8k
          if ((check) && (xmlParserEntityCheck(ctxt, ent->length)))
2754
30
                    goto int_error;
2755
2756
48.8k
                if (ent->flags & XML_ENT_EXPANDING) {
2757
39
              xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2758
39
                    xmlHaltParser(ctxt);
2759
39
                    if (ent->content != NULL)
2760
21
                        ent->content[0] = 0;
2761
39
                    goto int_error;
2762
39
                }
2763
2764
48.8k
                ent->flags |= XML_ENT_EXPANDING;
2765
48.8k
    ctxt->depth++;
2766
48.8k
    rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
2767
48.8k
                        ent->length, what, 0, 0, 0, check);
2768
48.8k
    ctxt->depth--;
2769
48.8k
                ent->flags &= ~XML_ENT_EXPANDING;
2770
2771
48.8k
    if (rep == NULL) {
2772
243
                    if (ent->content != NULL)
2773
42
                        ent->content[0] = 0;
2774
243
                    goto int_error;
2775
243
                }
2776
48.5k
                current = rep;
2777
262M
                while (*current != 0) { /* non input consuming loop */
2778
262M
                    buffer[nbchars++] = *current++;
2779
262M
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2780
12.3k
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2781
12.3k
                    }
2782
262M
                }
2783
48.5k
                xmlFree(rep);
2784
48.5k
                rep = NULL;
2785
48.5k
      }
2786
493M
  } else {
2787
493M
      COPY_BUF(l,buffer,nbchars,c);
2788
493M
      str += l;
2789
493M
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2790
90.9k
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2791
90.9k
      }
2792
493M
  }
2793
501M
  if (str < last)
2794
493M
      c = CUR_SCHAR(str, l);
2795
7.96M
  else
2796
7.96M
      c = 0;
2797
501M
    }
2798
7.98M
    buffer[nbchars] = 0;
2799
7.98M
    return(buffer);
2800
2801
0
mem_error:
2802
0
    xmlErrMemory(ctxt, NULL);
2803
2.06k
int_error:
2804
2.06k
    if (rep != NULL)
2805
0
        xmlFree(rep);
2806
2.06k
    if (buffer != NULL)
2807
2.06k
        xmlFree(buffer);
2808
2.06k
    return(NULL);
2809
0
}
2810
2811
/**
2812
 * xmlStringLenDecodeEntities:
2813
 * @ctxt:  the parser context
2814
 * @str:  the input string
2815
 * @len: the string length
2816
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2817
 * @end:  an end marker xmlChar, 0 if none
2818
 * @end2:  an end marker xmlChar, 0 if none
2819
 * @end3:  an end marker xmlChar, 0 if none
2820
 *
2821
 * DEPRECATED: Internal function, don't use.
2822
 *
2823
 * Takes a entity string content and process to do the adequate substitutions.
2824
 *
2825
 * [67] Reference ::= EntityRef | CharRef
2826
 *
2827
 * [69] PEReference ::= '%' Name ';'
2828
 *
2829
 * Returns A newly allocated string with the substitution done. The caller
2830
 *      must deallocate it !
2831
 */
2832
xmlChar *
2833
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2834
                           int what, xmlChar end, xmlChar  end2,
2835
1.32k
                           xmlChar end3) {
2836
1.32k
    if ((ctxt == NULL) || (str == NULL) || (len < 0))
2837
0
        return(NULL);
2838
1.32k
    return(xmlStringDecodeEntitiesInt(ctxt, str, len, what,
2839
1.32k
                                      end, end2, end3, 0));
2840
1.32k
}
2841
2842
/**
2843
 * xmlStringDecodeEntities:
2844
 * @ctxt:  the parser context
2845
 * @str:  the input string
2846
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2847
 * @end:  an end marker xmlChar, 0 if none
2848
 * @end2:  an end marker xmlChar, 0 if none
2849
 * @end3:  an end marker xmlChar, 0 if none
2850
 *
2851
 * DEPRECATED: Internal function, don't use.
2852
 *
2853
 * Takes a entity string content and process to do the adequate substitutions.
2854
 *
2855
 * [67] Reference ::= EntityRef | CharRef
2856
 *
2857
 * [69] PEReference ::= '%' Name ';'
2858
 *
2859
 * Returns A newly allocated string with the substitution done. The caller
2860
 *      must deallocate it !
2861
 */
2862
xmlChar *
2863
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2864
32.3k
            xmlChar end, xmlChar  end2, xmlChar end3) {
2865
32.3k
    if ((ctxt == NULL) || (str == NULL)) return(NULL);
2866
32.3k
    return(xmlStringDecodeEntitiesInt(ctxt, str, xmlStrlen(str), what,
2867
32.3k
                                      end, end2, end3, 0));
2868
32.3k
}
2869
2870
/************************************************************************
2871
 *                  *
2872
 *    Commodity functions, cleanup needed ?     *
2873
 *                  *
2874
 ************************************************************************/
2875
2876
/**
2877
 * areBlanks:
2878
 * @ctxt:  an XML parser context
2879
 * @str:  a xmlChar *
2880
 * @len:  the size of @str
2881
 * @blank_chars: we know the chars are blanks
2882
 *
2883
 * Is this a sequence of blank chars that one can ignore ?
2884
 *
2885
 * Returns 1 if ignorable 0 otherwise.
2886
 */
2887
2888
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2889
776k
                     int blank_chars) {
2890
776k
    int i, ret;
2891
776k
    xmlNodePtr lastChild;
2892
2893
    /*
2894
     * Don't spend time trying to differentiate them, the same callback is
2895
     * used !
2896
     */
2897
776k
    if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2898
35.0k
  return(0);
2899
2900
    /*
2901
     * Check for xml:space value.
2902
     */
2903
740k
    if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2904
740k
        (*(ctxt->space) == -2))
2905
243k
  return(0);
2906
2907
    /*
2908
     * Check that the string is made of blanks
2909
     */
2910
497k
    if (blank_chars == 0) {
2911
1.23M
  for (i = 0;i < len;i++)
2912
1.05M
      if (!(IS_BLANK_CH(str[i]))) return(0);
2913
238k
    }
2914
2915
    /*
2916
     * Look if the element is mixed content in the DTD if available
2917
     */
2918
438k
    if (ctxt->node == NULL) return(0);
2919
429k
    if (ctxt->myDoc != NULL) {
2920
429k
  ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2921
429k
        if (ret == 0) return(1);
2922
415k
        if (ret == 1) return(0);
2923
415k
    }
2924
2925
    /*
2926
     * Otherwise, heuristic :-\
2927
     */
2928
415k
    if ((RAW != '<') && (RAW != 0xD)) return(0);
2929
411k
    if ((ctxt->node->children == NULL) &&
2930
411k
  (RAW == '<') && (NXT(1) == '/')) return(0);
2931
2932
411k
    lastChild = xmlGetLastChild(ctxt->node);
2933
411k
    if (lastChild == NULL) {
2934
74.7k
        if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2935
74.7k
            (ctxt->node->content != NULL)) return(0);
2936
336k
    } else if (xmlNodeIsText(lastChild))
2937
4.59k
        return(0);
2938
331k
    else if ((ctxt->node->children != NULL) &&
2939
331k
             (xmlNodeIsText(ctxt->node->children)))
2940
3.54k
        return(0);
2941
403k
    return(1);
2942
411k
}
2943
2944
/************************************************************************
2945
 *                  *
2946
 *    Extra stuff for namespace support     *
2947
 *  Relates to http://www.w3.org/TR/WD-xml-names      *
2948
 *                  *
2949
 ************************************************************************/
2950
2951
/**
2952
 * xmlSplitQName:
2953
 * @ctxt:  an XML parser context
2954
 * @name:  an XML parser context
2955
 * @prefix:  a xmlChar **
2956
 *
2957
 * parse an UTF8 encoded XML qualified name string
2958
 *
2959
 * [NS 5] QName ::= (Prefix ':')? LocalPart
2960
 *
2961
 * [NS 6] Prefix ::= NCName
2962
 *
2963
 * [NS 7] LocalPart ::= NCName
2964
 *
2965
 * Returns the local part, and prefix is updated
2966
 *   to get the Prefix if any.
2967
 */
2968
2969
xmlChar *
2970
1.54M
xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2971
1.54M
    xmlChar buf[XML_MAX_NAMELEN + 5];
2972
1.54M
    xmlChar *buffer = NULL;
2973
1.54M
    int len = 0;
2974
1.54M
    int max = XML_MAX_NAMELEN;
2975
1.54M
    xmlChar *ret = NULL;
2976
1.54M
    const xmlChar *cur = name;
2977
1.54M
    int c;
2978
2979
1.54M
    if (prefix == NULL) return(NULL);
2980
1.54M
    *prefix = NULL;
2981
2982
1.54M
    if (cur == NULL) return(NULL);
2983
2984
#ifndef XML_XML_NAMESPACE
2985
    /* xml: prefix is not really a namespace */
2986
    if ((cur[0] == 'x') && (cur[1] == 'm') &&
2987
        (cur[2] == 'l') && (cur[3] == ':'))
2988
  return(xmlStrdup(name));
2989
#endif
2990
2991
    /* nasty but well=formed */
2992
1.54M
    if (cur[0] == ':')
2993
350
  return(xmlStrdup(name));
2994
2995
1.54M
    c = *cur++;
2996
6.81M
    while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2997
5.27M
  buf[len++] = c;
2998
5.27M
  c = *cur++;
2999
5.27M
    }
3000
1.54M
    if (len >= max) {
3001
  /*
3002
   * Okay someone managed to make a huge name, so he's ready to pay
3003
   * for the processing speed.
3004
   */
3005
284
  max = len * 2;
3006
3007
284
  buffer = (xmlChar *) xmlMallocAtomic(max);
3008
284
  if (buffer == NULL) {
3009
0
      xmlErrMemory(ctxt, NULL);
3010
0
      return(NULL);
3011
0
  }
3012
284
  memcpy(buffer, buf, len);
3013
780k
  while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3014
780k
      if (len + 10 > max) {
3015
665
          xmlChar *tmp;
3016
3017
665
    max *= 2;
3018
665
    tmp = (xmlChar *) xmlRealloc(buffer, max);
3019
665
    if (tmp == NULL) {
3020
0
        xmlFree(buffer);
3021
0
        xmlErrMemory(ctxt, NULL);
3022
0
        return(NULL);
3023
0
    }
3024
665
    buffer = tmp;
3025
665
      }
3026
780k
      buffer[len++] = c;
3027
780k
      c = *cur++;
3028
780k
  }
3029
284
  buffer[len] = 0;
3030
284
    }
3031
3032
1.54M
    if ((c == ':') && (*cur == 0)) {
3033
515
        if (buffer != NULL)
3034
20
      xmlFree(buffer);
3035
515
  *prefix = NULL;
3036
515
  return(xmlStrdup(name));
3037
515
    }
3038
3039
1.54M
    if (buffer == NULL)
3040
1.54M
  ret = xmlStrndup(buf, len);
3041
264
    else {
3042
264
  ret = buffer;
3043
264
  buffer = NULL;
3044
264
  max = XML_MAX_NAMELEN;
3045
264
    }
3046
3047
3048
1.54M
    if (c == ':') {
3049
80.0k
  c = *cur;
3050
80.0k
        *prefix = ret;
3051
80.0k
  if (c == 0) {
3052
0
      return(xmlStrndup(BAD_CAST "", 0));
3053
0
  }
3054
80.0k
  len = 0;
3055
3056
  /*
3057
   * Check that the first character is proper to start
3058
   * a new name
3059
   */
3060
80.0k
  if (!(((c >= 0x61) && (c <= 0x7A)) ||
3061
80.0k
        ((c >= 0x41) && (c <= 0x5A)) ||
3062
80.0k
        (c == '_') || (c == ':'))) {
3063
362
      int l;
3064
362
      int first = CUR_SCHAR(cur, l);
3065
3066
362
      if (!IS_LETTER(first) && (first != '_')) {
3067
168
    xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3068
168
          "Name %s is not XML Namespace compliant\n",
3069
168
          name);
3070
168
      }
3071
362
  }
3072
80.0k
  cur++;
3073
3074
502k
  while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3075
422k
      buf[len++] = c;
3076
422k
      c = *cur++;
3077
422k
  }
3078
80.0k
  if (len >= max) {
3079
      /*
3080
       * Okay someone managed to make a huge name, so he's ready to pay
3081
       * for the processing speed.
3082
       */
3083
185
      max = len * 2;
3084
3085
185
      buffer = (xmlChar *) xmlMallocAtomic(max);
3086
185
      if (buffer == NULL) {
3087
0
          xmlErrMemory(ctxt, NULL);
3088
0
    return(NULL);
3089
0
      }
3090
185
      memcpy(buffer, buf, len);
3091
300k
      while (c != 0) { /* tested bigname2.xml */
3092
299k
    if (len + 10 > max) {
3093
365
        xmlChar *tmp;
3094
3095
365
        max *= 2;
3096
365
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3097
365
        if (tmp == NULL) {
3098
0
      xmlErrMemory(ctxt, NULL);
3099
0
      xmlFree(buffer);
3100
0
      return(NULL);
3101
0
        }
3102
365
        buffer = tmp;
3103
365
    }
3104
299k
    buffer[len++] = c;
3105
299k
    c = *cur++;
3106
299k
      }
3107
185
      buffer[len] = 0;
3108
185
  }
3109
3110
80.0k
  if (buffer == NULL)
3111
79.8k
      ret = xmlStrndup(buf, len);
3112
185
  else {
3113
185
      ret = buffer;
3114
185
  }
3115
80.0k
    }
3116
3117
1.54M
    return(ret);
3118
1.54M
}
3119
3120
/************************************************************************
3121
 *                  *
3122
 *      The parser itself       *
3123
 *  Relates to http://www.w3.org/TR/REC-xml       *
3124
 *                  *
3125
 ************************************************************************/
3126
3127
/************************************************************************
3128
 *                  *
3129
 *  Routines to parse Name, NCName and NmToken      *
3130
 *                  *
3131
 ************************************************************************/
3132
#ifdef DEBUG
3133
static unsigned long nbParseName = 0;
3134
static unsigned long nbParseNmToken = 0;
3135
static unsigned long nbParseNCName = 0;
3136
static unsigned long nbParseNCNameComplex = 0;
3137
static unsigned long nbParseNameComplex = 0;
3138
static unsigned long nbParseStringName = 0;
3139
#endif
3140
3141
/*
3142
 * The two following functions are related to the change of accepted
3143
 * characters for Name and NmToken in the Revision 5 of XML-1.0
3144
 * They correspond to the modified production [4] and the new production [4a]
3145
 * changes in that revision. Also note that the macros used for the
3146
 * productions Letter, Digit, CombiningChar and Extender are not needed
3147
 * anymore.
3148
 * We still keep compatibility to pre-revision5 parsing semantic if the
3149
 * new XML_PARSE_OLD10 option is given to the parser.
3150
 */
3151
static int
3152
7.69M
xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3153
7.69M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3154
        /*
3155
   * Use the new checks of production [4] [4a] amd [5] of the
3156
   * Update 5 of XML-1.0
3157
   */
3158
7.42M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3159
7.42M
      (((c >= 'a') && (c <= 'z')) ||
3160
7.42M
       ((c >= 'A') && (c <= 'Z')) ||
3161
7.42M
       (c == '_') || (c == ':') ||
3162
7.42M
       ((c >= 0xC0) && (c <= 0xD6)) ||
3163
7.42M
       ((c >= 0xD8) && (c <= 0xF6)) ||
3164
7.42M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3165
7.42M
       ((c >= 0x370) && (c <= 0x37D)) ||
3166
7.42M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3167
7.42M
       ((c >= 0x200C) && (c <= 0x200D)) ||
3168
7.42M
       ((c >= 0x2070) && (c <= 0x218F)) ||
3169
7.42M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3170
7.42M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3171
7.42M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3172
7.42M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3173
7.42M
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3174
7.40M
      return(1);
3175
7.42M
    } else {
3176
271k
        if (IS_LETTER(c) || (c == '_') || (c == ':'))
3177
257k
      return(1);
3178
271k
    }
3179
39.3k
    return(0);
3180
7.69M
}
3181
3182
static int
3183
165M
xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3184
165M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3185
        /*
3186
   * Use the new checks of production [4] [4a] amd [5] of the
3187
   * Update 5 of XML-1.0
3188
   */
3189
164M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3190
164M
      (((c >= 'a') && (c <= 'z')) ||
3191
164M
       ((c >= 'A') && (c <= 'Z')) ||
3192
164M
       ((c >= '0') && (c <= '9')) || /* !start */
3193
164M
       (c == '_') || (c == ':') ||
3194
164M
       (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3195
164M
       ((c >= 0xC0) && (c <= 0xD6)) ||
3196
164M
       ((c >= 0xD8) && (c <= 0xF6)) ||
3197
164M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3198
164M
       ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3199
164M
       ((c >= 0x370) && (c <= 0x37D)) ||
3200
164M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3201
164M
       ((c >= 0x200C) && (c <= 0x200D)) ||
3202
164M
       ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3203
164M
       ((c >= 0x2070) && (c <= 0x218F)) ||
3204
164M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3205
164M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3206
164M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3207
164M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3208
164M
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3209
156M
       return(1);
3210
164M
    } else {
3211
1.29M
        if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3212
1.29M
            (c == '.') || (c == '-') ||
3213
1.29M
      (c == '_') || (c == ':') ||
3214
1.29M
      (IS_COMBINING(c)) ||
3215
1.29M
      (IS_EXTENDER(c)))
3216
987k
      return(1);
3217
1.29M
    }
3218
7.78M
    return(0);
3219
165M
}
3220
3221
static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3222
                                          int *len, int *alloc, int normalize);
3223
3224
static const xmlChar *
3225
86.2k
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3226
86.2k
    int len = 0, l;
3227
86.2k
    int c;
3228
86.2k
    int count = 0;
3229
86.2k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3230
35.5k
                    XML_MAX_TEXT_LENGTH :
3231
86.2k
                    XML_MAX_NAME_LENGTH;
3232
3233
#ifdef DEBUG
3234
    nbParseNameComplex++;
3235
#endif
3236
3237
    /*
3238
     * Handler for more complex cases
3239
     */
3240
86.2k
    GROW;
3241
86.2k
    if (ctxt->instate == XML_PARSER_EOF)
3242
0
        return(NULL);
3243
86.2k
    c = CUR_CHAR(l);
3244
86.2k
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3245
        /*
3246
   * Use the new checks of production [4] [4a] amd [5] of the
3247
   * Update 5 of XML-1.0
3248
   */
3249
45.7k
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3250
45.7k
      (!(((c >= 'a') && (c <= 'z')) ||
3251
43.3k
         ((c >= 'A') && (c <= 'Z')) ||
3252
43.3k
         (c == '_') || (c == ':') ||
3253
43.3k
         ((c >= 0xC0) && (c <= 0xD6)) ||
3254
43.3k
         ((c >= 0xD8) && (c <= 0xF6)) ||
3255
43.3k
         ((c >= 0xF8) && (c <= 0x2FF)) ||
3256
43.3k
         ((c >= 0x370) && (c <= 0x37D)) ||
3257
43.3k
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
3258
43.3k
         ((c >= 0x200C) && (c <= 0x200D)) ||
3259
43.3k
         ((c >= 0x2070) && (c <= 0x218F)) ||
3260
43.3k
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3261
43.3k
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
3262
43.3k
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
3263
43.3k
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3264
43.3k
         ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3265
27.6k
      return(NULL);
3266
27.6k
  }
3267
18.1k
  len += l;
3268
18.1k
  NEXTL(l);
3269
18.1k
  c = CUR_CHAR(l);
3270
758k
  while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3271
758k
         (((c >= 'a') && (c <= 'z')) ||
3272
754k
          ((c >= 'A') && (c <= 'Z')) ||
3273
754k
          ((c >= '0') && (c <= '9')) || /* !start */
3274
754k
          (c == '_') || (c == ':') ||
3275
754k
          (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3276
754k
          ((c >= 0xC0) && (c <= 0xD6)) ||
3277
754k
          ((c >= 0xD8) && (c <= 0xF6)) ||
3278
754k
          ((c >= 0xF8) && (c <= 0x2FF)) ||
3279
754k
          ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3280
754k
          ((c >= 0x370) && (c <= 0x37D)) ||
3281
754k
          ((c >= 0x37F) && (c <= 0x1FFF)) ||
3282
754k
          ((c >= 0x200C) && (c <= 0x200D)) ||
3283
754k
          ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3284
754k
          ((c >= 0x2070) && (c <= 0x218F)) ||
3285
754k
          ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3286
754k
          ((c >= 0x3001) && (c <= 0xD7FF)) ||
3287
754k
          ((c >= 0xF900) && (c <= 0xFDCF)) ||
3288
754k
          ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3289
754k
          ((c >= 0x10000) && (c <= 0xEFFFF))
3290
754k
    )) {
3291
740k
      if (count++ > XML_PARSER_CHUNK_SIZE) {
3292
6.30k
    count = 0;
3293
6.30k
    GROW;
3294
6.30k
                if (ctxt->instate == XML_PARSER_EOF)
3295
0
                    return(NULL);
3296
6.30k
      }
3297
740k
            if (len <= INT_MAX - l)
3298
740k
          len += l;
3299
740k
      NEXTL(l);
3300
740k
      c = CUR_CHAR(l);
3301
740k
  }
3302
40.4k
    } else {
3303
40.4k
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3304
40.4k
      (!IS_LETTER(c) && (c != '_') &&
3305
38.2k
       (c != ':'))) {
3306
25.9k
      return(NULL);
3307
25.9k
  }
3308
14.5k
  len += l;
3309
14.5k
  NEXTL(l);
3310
14.5k
  c = CUR_CHAR(l);
3311
3312
814k
  while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3313
814k
         ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3314
811k
    (c == '.') || (c == '-') ||
3315
811k
    (c == '_') || (c == ':') ||
3316
811k
    (IS_COMBINING(c)) ||
3317
811k
    (IS_EXTENDER(c)))) {
3318
800k
      if (count++ > XML_PARSER_CHUNK_SIZE) {
3319
7.05k
    count = 0;
3320
7.05k
    GROW;
3321
7.05k
                if (ctxt->instate == XML_PARSER_EOF)
3322
0
                    return(NULL);
3323
7.05k
      }
3324
800k
            if (len <= INT_MAX - l)
3325
800k
          len += l;
3326
800k
      NEXTL(l);
3327
800k
      c = CUR_CHAR(l);
3328
800k
  }
3329
14.5k
    }
3330
32.6k
    if (len > maxLength) {
3331
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3332
0
        return(NULL);
3333
0
    }
3334
32.6k
    if (ctxt->input->cur - ctxt->input->base < len) {
3335
        /*
3336
         * There were a couple of bugs where PERefs lead to to a change
3337
         * of the buffer. Check the buffer size to avoid passing an invalid
3338
         * pointer to xmlDictLookup.
3339
         */
3340
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3341
0
                    "unexpected change of input buffer");
3342
0
        return (NULL);
3343
0
    }
3344
32.6k
    if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3345
193
        return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3346
32.4k
    return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3347
32.6k
}
3348
3349
/**
3350
 * xmlParseName:
3351
 * @ctxt:  an XML parser context
3352
 *
3353
 * DEPRECATED: Internal function, don't use.
3354
 *
3355
 * parse an XML name.
3356
 *
3357
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3358
 *                  CombiningChar | Extender
3359
 *
3360
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3361
 *
3362
 * [6] Names ::= Name (#x20 Name)*
3363
 *
3364
 * Returns the Name parsed or NULL
3365
 */
3366
3367
const xmlChar *
3368
9.25M
xmlParseName(xmlParserCtxtPtr ctxt) {
3369
9.25M
    const xmlChar *in;
3370
9.25M
    const xmlChar *ret;
3371
9.25M
    size_t count = 0;
3372
9.25M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3373
1.52M
                       XML_MAX_TEXT_LENGTH :
3374
9.25M
                       XML_MAX_NAME_LENGTH;
3375
3376
9.25M
    GROW;
3377
3378
#ifdef DEBUG
3379
    nbParseName++;
3380
#endif
3381
3382
    /*
3383
     * Accelerator for simple ASCII names
3384
     */
3385
9.25M
    in = ctxt->input->cur;
3386
9.25M
    if (((*in >= 0x61) && (*in <= 0x7A)) ||
3387
9.25M
  ((*in >= 0x41) && (*in <= 0x5A)) ||
3388
9.25M
  (*in == '_') || (*in == ':')) {
3389
9.19M
  in++;
3390
38.5M
  while (((*in >= 0x61) && (*in <= 0x7A)) ||
3391
38.5M
         ((*in >= 0x41) && (*in <= 0x5A)) ||
3392
38.5M
         ((*in >= 0x30) && (*in <= 0x39)) ||
3393
38.5M
         (*in == '_') || (*in == '-') ||
3394
38.5M
         (*in == ':') || (*in == '.'))
3395
29.3M
      in++;
3396
9.19M
  if ((*in > 0) && (*in < 0x80)) {
3397
9.17M
      count = in - ctxt->input->cur;
3398
9.17M
            if (count > maxLength) {
3399
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3400
0
                return(NULL);
3401
0
            }
3402
9.17M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3403
9.17M
      ctxt->input->cur = in;
3404
9.17M
      ctxt->input->col += count;
3405
9.17M
      if (ret == NULL)
3406
0
          xmlErrMemory(ctxt, NULL);
3407
9.17M
      return(ret);
3408
9.17M
  }
3409
9.19M
    }
3410
    /* accelerator for special cases */
3411
86.2k
    return(xmlParseNameComplex(ctxt));
3412
9.25M
}
3413
3414
static const xmlChar *
3415
54.5k
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3416
54.5k
    int len = 0, l;
3417
54.5k
    int c;
3418
54.5k
    int count = 0;
3419
54.5k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3420
19.6k
                    XML_MAX_TEXT_LENGTH :
3421
54.5k
                    XML_MAX_NAME_LENGTH;
3422
54.5k
    size_t startPosition = 0;
3423
3424
#ifdef DEBUG
3425
    nbParseNCNameComplex++;
3426
#endif
3427
3428
    /*
3429
     * Handler for more complex cases
3430
     */
3431
54.5k
    GROW;
3432
54.5k
    startPosition = CUR_PTR - BASE_PTR;
3433
54.5k
    c = CUR_CHAR(l);
3434
54.5k
    if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3435
54.5k
  (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3436
42.0k
  return(NULL);
3437
42.0k
    }
3438
3439
914k
    while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3440
914k
     (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3441
901k
  if (count++ > XML_PARSER_CHUNK_SIZE) {
3442
7.94k
      count = 0;
3443
7.94k
      GROW;
3444
7.94k
            if (ctxt->instate == XML_PARSER_EOF)
3445
0
                return(NULL);
3446
7.94k
  }
3447
901k
        if (len <= INT_MAX - l)
3448
901k
      len += l;
3449
901k
  NEXTL(l);
3450
901k
  c = CUR_CHAR(l);
3451
901k
  if (c == 0) {
3452
1.86k
      count = 0;
3453
      /*
3454
       * when shrinking to extend the buffer we really need to preserve
3455
       * the part of the name we already parsed. Hence rolling back
3456
       * by current length.
3457
       */
3458
1.86k
      ctxt->input->cur -= l;
3459
1.86k
      GROW;
3460
1.86k
            if (ctxt->instate == XML_PARSER_EOF)
3461
0
                return(NULL);
3462
1.86k
      ctxt->input->cur += l;
3463
1.86k
      c = CUR_CHAR(l);
3464
1.86k
  }
3465
901k
    }
3466
12.4k
    if (len > maxLength) {
3467
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3468
0
        return(NULL);
3469
0
    }
3470
12.4k
    return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
3471
12.4k
}
3472
3473
/**
3474
 * xmlParseNCName:
3475
 * @ctxt:  an XML parser context
3476
 * @len:  length of the string parsed
3477
 *
3478
 * parse an XML name.
3479
 *
3480
 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3481
 *                      CombiningChar | Extender
3482
 *
3483
 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3484
 *
3485
 * Returns the Name parsed or NULL
3486
 */
3487
3488
static const xmlChar *
3489
3.06M
xmlParseNCName(xmlParserCtxtPtr ctxt) {
3490
3.06M
    const xmlChar *in, *e;
3491
3.06M
    const xmlChar *ret;
3492
3.06M
    size_t count = 0;
3493
3.06M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3494
702k
                       XML_MAX_TEXT_LENGTH :
3495
3.06M
                       XML_MAX_NAME_LENGTH;
3496
3497
#ifdef DEBUG
3498
    nbParseNCName++;
3499
#endif
3500
3501
    /*
3502
     * Accelerator for simple ASCII names
3503
     */
3504
3.06M
    in = ctxt->input->cur;
3505
3.06M
    e = ctxt->input->end;
3506
3.06M
    if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3507
3.06M
   ((*in >= 0x41) && (*in <= 0x5A)) ||
3508
3.06M
   (*in == '_')) && (in < e)) {
3509
3.01M
  in++;
3510
11.9M
  while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3511
11.9M
          ((*in >= 0x41) && (*in <= 0x5A)) ||
3512
11.9M
          ((*in >= 0x30) && (*in <= 0x39)) ||
3513
11.9M
          (*in == '_') || (*in == '-') ||
3514
11.9M
          (*in == '.')) && (in < e))
3515
8.95M
      in++;
3516
3.01M
  if (in >= e)
3517
446
      goto complex;
3518
3.01M
  if ((*in > 0) && (*in < 0x80)) {
3519
3.00M
      count = in - ctxt->input->cur;
3520
3.00M
            if (count > maxLength) {
3521
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3522
0
                return(NULL);
3523
0
            }
3524
3.00M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3525
3.00M
      ctxt->input->cur = in;
3526
3.00M
      ctxt->input->col += count;
3527
3.00M
      if (ret == NULL) {
3528
0
          xmlErrMemory(ctxt, NULL);
3529
0
      }
3530
3.00M
      return(ret);
3531
3.00M
  }
3532
3.01M
    }
3533
54.5k
complex:
3534
54.5k
    return(xmlParseNCNameComplex(ctxt));
3535
3.06M
}
3536
3537
/**
3538
 * xmlParseNameAndCompare:
3539
 * @ctxt:  an XML parser context
3540
 *
3541
 * parse an XML name and compares for match
3542
 * (specialized for endtag parsing)
3543
 *
3544
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3545
 * and the name for mismatch
3546
 */
3547
3548
static const xmlChar *
3549
938k
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3550
938k
    register const xmlChar *cmp = other;
3551
938k
    register const xmlChar *in;
3552
938k
    const xmlChar *ret;
3553
3554
938k
    GROW;
3555
938k
    if (ctxt->instate == XML_PARSER_EOF)
3556
0
        return(NULL);
3557
3558
938k
    in = ctxt->input->cur;
3559
4.81M
    while (*in != 0 && *in == *cmp) {
3560
3.87M
  ++in;
3561
3.87M
  ++cmp;
3562
3.87M
    }
3563
938k
    if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3564
  /* success */
3565
906k
  ctxt->input->col += in - ctxt->input->cur;
3566
906k
  ctxt->input->cur = in;
3567
906k
  return (const xmlChar*) 1;
3568
906k
    }
3569
    /* failure (or end of input buffer), check with full function */
3570
31.9k
    ret = xmlParseName (ctxt);
3571
    /* strings coming from the dictionary direct compare possible */
3572
31.9k
    if (ret == other) {
3573
2.33k
  return (const xmlChar*) 1;
3574
2.33k
    }
3575
29.6k
    return ret;
3576
31.9k
}
3577
3578
/**
3579
 * xmlParseStringName:
3580
 * @ctxt:  an XML parser context
3581
 * @str:  a pointer to the string pointer (IN/OUT)
3582
 *
3583
 * parse an XML name.
3584
 *
3585
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3586
 *                  CombiningChar | Extender
3587
 *
3588
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3589
 *
3590
 * [6] Names ::= Name (#x20 Name)*
3591
 *
3592
 * Returns the Name parsed or NULL. The @str pointer
3593
 * is updated to the current location in the string.
3594
 */
3595
3596
static xmlChar *
3597
7.64M
xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3598
7.64M
    xmlChar buf[XML_MAX_NAMELEN + 5];
3599
7.64M
    const xmlChar *cur = *str;
3600
7.64M
    int len = 0, l;
3601
7.64M
    int c;
3602
7.64M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3603
293k
                    XML_MAX_TEXT_LENGTH :
3604
7.64M
                    XML_MAX_NAME_LENGTH;
3605
3606
#ifdef DEBUG
3607
    nbParseStringName++;
3608
#endif
3609
3610
7.64M
    c = CUR_SCHAR(cur, l);
3611
7.64M
    if (!xmlIsNameStartChar(ctxt, c)) {
3612
967
  return(NULL);
3613
967
    }
3614
3615
7.64M
    COPY_BUF(l,buf,len,c);
3616
7.64M
    cur += l;
3617
7.64M
    c = CUR_SCHAR(cur, l);
3618
73.3M
    while (xmlIsNameChar(ctxt, c)) {
3619
66.1M
  COPY_BUF(l,buf,len,c);
3620
66.1M
  cur += l;
3621
66.1M
  c = CUR_SCHAR(cur, l);
3622
66.1M
  if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3623
      /*
3624
       * Okay someone managed to make a huge name, so he's ready to pay
3625
       * for the processing speed.
3626
       */
3627
396k
      xmlChar *buffer;
3628
396k
      int max = len * 2;
3629
3630
396k
      buffer = (xmlChar *) xmlMallocAtomic(max);
3631
396k
      if (buffer == NULL) {
3632
0
          xmlErrMemory(ctxt, NULL);
3633
0
    return(NULL);
3634
0
      }
3635
396k
      memcpy(buffer, buf, len);
3636
90.1M
      while (xmlIsNameChar(ctxt, c)) {
3637
89.7M
    if (len + 10 > max) {
3638
396k
        xmlChar *tmp;
3639
3640
396k
        max *= 2;
3641
396k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3642
396k
        if (tmp == NULL) {
3643
0
      xmlErrMemory(ctxt, NULL);
3644
0
      xmlFree(buffer);
3645
0
      return(NULL);
3646
0
        }
3647
396k
        buffer = tmp;
3648
396k
    }
3649
89.7M
    COPY_BUF(l,buffer,len,c);
3650
89.7M
    cur += l;
3651
89.7M
    c = CUR_SCHAR(cur, l);
3652
89.7M
                if (len > maxLength) {
3653
0
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3654
0
                    xmlFree(buffer);
3655
0
                    return(NULL);
3656
0
                }
3657
89.7M
      }
3658
396k
      buffer[len] = 0;
3659
396k
      *str = cur;
3660
396k
      return(buffer);
3661
396k
  }
3662
66.1M
    }
3663
7.24M
    if (len > maxLength) {
3664
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3665
0
        return(NULL);
3666
0
    }
3667
7.24M
    *str = cur;
3668
7.24M
    return(xmlStrndup(buf, len));
3669
7.24M
}
3670
3671
/**
3672
 * xmlParseNmtoken:
3673
 * @ctxt:  an XML parser context
3674
 *
3675
 * DEPRECATED: Internal function, don't use.
3676
 *
3677
 * parse an XML Nmtoken.
3678
 *
3679
 * [7] Nmtoken ::= (NameChar)+
3680
 *
3681
 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3682
 *
3683
 * Returns the Nmtoken parsed or NULL
3684
 */
3685
3686
xmlChar *
3687
133k
xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3688
133k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3689
133k
    int len = 0, l;
3690
133k
    int c;
3691
133k
    int count = 0;
3692
133k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3693
46.9k
                    XML_MAX_TEXT_LENGTH :
3694
133k
                    XML_MAX_NAME_LENGTH;
3695
3696
#ifdef DEBUG
3697
    nbParseNmToken++;
3698
#endif
3699
3700
133k
    GROW;
3701
133k
    if (ctxt->instate == XML_PARSER_EOF)
3702
0
        return(NULL);
3703
133k
    c = CUR_CHAR(l);
3704
3705
812k
    while (xmlIsNameChar(ctxt, c)) {
3706
679k
  if (count++ > XML_PARSER_CHUNK_SIZE) {
3707
0
      count = 0;
3708
0
      GROW;
3709
0
  }
3710
679k
  COPY_BUF(l,buf,len,c);
3711
679k
  NEXTL(l);
3712
679k
  c = CUR_CHAR(l);
3713
679k
  if (c == 0) {
3714
116
      count = 0;
3715
116
      GROW;
3716
116
      if (ctxt->instate == XML_PARSER_EOF)
3717
0
    return(NULL);
3718
116
            c = CUR_CHAR(l);
3719
116
  }
3720
679k
  if (len >= XML_MAX_NAMELEN) {
3721
      /*
3722
       * Okay someone managed to make a huge token, so he's ready to pay
3723
       * for the processing speed.
3724
       */
3725
289
      xmlChar *buffer;
3726
289
      int max = len * 2;
3727
3728
289
      buffer = (xmlChar *) xmlMallocAtomic(max);
3729
289
      if (buffer == NULL) {
3730
0
          xmlErrMemory(ctxt, NULL);
3731
0
    return(NULL);
3732
0
      }
3733
289
      memcpy(buffer, buf, len);
3734
333k
      while (xmlIsNameChar(ctxt, c)) {
3735
332k
    if (count++ > XML_PARSER_CHUNK_SIZE) {
3736
3.37k
        count = 0;
3737
3.37k
        GROW;
3738
3.37k
                    if (ctxt->instate == XML_PARSER_EOF) {
3739
0
                        xmlFree(buffer);
3740
0
                        return(NULL);
3741
0
                    }
3742
3.37k
    }
3743
332k
    if (len + 10 > max) {
3744
448
        xmlChar *tmp;
3745
3746
448
        max *= 2;
3747
448
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3748
448
        if (tmp == NULL) {
3749
0
      xmlErrMemory(ctxt, NULL);
3750
0
      xmlFree(buffer);
3751
0
      return(NULL);
3752
0
        }
3753
448
        buffer = tmp;
3754
448
    }
3755
332k
    COPY_BUF(l,buffer,len,c);
3756
332k
    NEXTL(l);
3757
332k
    c = CUR_CHAR(l);
3758
332k
                if (len > maxLength) {
3759
0
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3760
0
                    xmlFree(buffer);
3761
0
                    return(NULL);
3762
0
                }
3763
332k
      }
3764
289
      buffer[len] = 0;
3765
289
      return(buffer);
3766
289
  }
3767
679k
    }
3768
133k
    if (len == 0)
3769
1.04k
        return(NULL);
3770
131k
    if (len > maxLength) {
3771
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3772
0
        return(NULL);
3773
0
    }
3774
131k
    return(xmlStrndup(buf, len));
3775
131k
}
3776
3777
/**
3778
 * xmlParseEntityValue:
3779
 * @ctxt:  an XML parser context
3780
 * @orig:  if non-NULL store a copy of the original entity value
3781
 *
3782
 * DEPRECATED: Internal function, don't use.
3783
 *
3784
 * parse a value for ENTITY declarations
3785
 *
3786
 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3787
 *                 "'" ([^%&'] | PEReference | Reference)* "'"
3788
 *
3789
 * Returns the EntityValue parsed with reference substituted or NULL
3790
 */
3791
3792
xmlChar *
3793
131k
xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3794
131k
    xmlChar *buf = NULL;
3795
131k
    int len = 0;
3796
131k
    int size = XML_PARSER_BUFFER_SIZE;
3797
131k
    int c, l;
3798
131k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3799
35.0k
                    XML_MAX_HUGE_LENGTH :
3800
131k
                    XML_MAX_TEXT_LENGTH;
3801
131k
    xmlChar stop;
3802
131k
    xmlChar *ret = NULL;
3803
131k
    const xmlChar *cur = NULL;
3804
131k
    xmlParserInputPtr input;
3805
3806
131k
    if (RAW == '"') stop = '"';
3807
27.1k
    else if (RAW == '\'') stop = '\'';
3808
0
    else {
3809
0
  xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3810
0
  return(NULL);
3811
0
    }
3812
131k
    buf = (xmlChar *) xmlMallocAtomic(size);
3813
131k
    if (buf == NULL) {
3814
0
  xmlErrMemory(ctxt, NULL);
3815
0
  return(NULL);
3816
0
    }
3817
3818
    /*
3819
     * The content of the entity definition is copied in a buffer.
3820
     */
3821
3822
131k
    ctxt->instate = XML_PARSER_ENTITY_VALUE;
3823
131k
    input = ctxt->input;
3824
131k
    GROW;
3825
131k
    if (ctxt->instate == XML_PARSER_EOF)
3826
0
        goto error;
3827
131k
    NEXT;
3828
131k
    c = CUR_CHAR(l);
3829
    /*
3830
     * NOTE: 4.4.5 Included in Literal
3831
     * When a parameter entity reference appears in a literal entity
3832
     * value, ... a single or double quote character in the replacement
3833
     * text is always treated as a normal data character and will not
3834
     * terminate the literal.
3835
     * In practice it means we stop the loop only when back at parsing
3836
     * the initial entity and the quote is found
3837
     */
3838
6.95M
    while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3839
6.95M
      (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3840
6.82M
  if (len + 5 >= size) {
3841
16.9k
      xmlChar *tmp;
3842
3843
16.9k
      size *= 2;
3844
16.9k
      tmp = (xmlChar *) xmlRealloc(buf, size);
3845
16.9k
      if (tmp == NULL) {
3846
0
    xmlErrMemory(ctxt, NULL);
3847
0
                goto error;
3848
0
      }
3849
16.9k
      buf = tmp;
3850
16.9k
  }
3851
6.82M
  COPY_BUF(l,buf,len,c);
3852
6.82M
  NEXTL(l);
3853
3854
6.82M
  GROW;
3855
6.82M
  c = CUR_CHAR(l);
3856
6.82M
  if (c == 0) {
3857
313
      GROW;
3858
313
      c = CUR_CHAR(l);
3859
313
  }
3860
3861
6.82M
        if (len > maxLength) {
3862
0
            xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
3863
0
                           "entity value too long\n");
3864
0
            goto error;
3865
0
        }
3866
6.82M
    }
3867
131k
    buf[len] = 0;
3868
131k
    if (ctxt->instate == XML_PARSER_EOF)
3869
0
        goto error;
3870
131k
    if (c != stop) {
3871
531
        xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3872
531
        goto error;
3873
531
    }
3874
130k
    NEXT;
3875
3876
    /*
3877
     * Raise problem w.r.t. '&' and '%' being used in non-entities
3878
     * reference constructs. Note Charref will be handled in
3879
     * xmlStringDecodeEntities()
3880
     */
3881
130k
    cur = buf;
3882
4.94M
    while (*cur != 0) { /* non input consuming */
3883
4.81M
  if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3884
94.8k
      xmlChar *name;
3885
94.8k
      xmlChar tmp = *cur;
3886
94.8k
            int nameOk = 0;
3887
3888
94.8k
      cur++;
3889
94.8k
      name = xmlParseStringName(ctxt, &cur);
3890
94.8k
            if (name != NULL) {
3891
94.4k
                nameOk = 1;
3892
94.4k
                xmlFree(name);
3893
94.4k
            }
3894
94.8k
            if ((nameOk == 0) || (*cur != ';')) {
3895
1.05k
    xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3896
1.05k
      "EntityValue: '%c' forbidden except for entities references\n",
3897
1.05k
                            tmp);
3898
1.05k
                goto error;
3899
1.05k
      }
3900
93.8k
      if ((tmp == '%') && (ctxt->inSubset == 1) &&
3901
93.8k
    (ctxt->inputNr == 1)) {
3902
888
    xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3903
888
                goto error;
3904
888
      }
3905
92.9k
      if (*cur == 0)
3906
0
          break;
3907
92.9k
  }
3908
4.81M
  cur++;
3909
4.81M
    }
3910
3911
    /*
3912
     * Then PEReference entities are substituted.
3913
     *
3914
     * NOTE: 4.4.7 Bypassed
3915
     * When a general entity reference appears in the EntityValue in
3916
     * an entity declaration, it is bypassed and left as is.
3917
     * so XML_SUBSTITUTE_REF is not set here.
3918
     */
3919
128k
    ++ctxt->depth;
3920
128k
    ret = xmlStringDecodeEntitiesInt(ctxt, buf, len, XML_SUBSTITUTE_PEREF,
3921
128k
                                     0, 0, 0, /* check */ 1);
3922
128k
    --ctxt->depth;
3923
3924
128k
    if (orig != NULL) {
3925
128k
        *orig = buf;
3926
128k
        buf = NULL;
3927
128k
    }
3928
3929
131k
error:
3930
131k
    if (buf != NULL)
3931
2.47k
        xmlFree(buf);
3932
131k
    return(ret);
3933
128k
}
3934
3935
/**
3936
 * xmlParseAttValueComplex:
3937
 * @ctxt:  an XML parser context
3938
 * @len:   the resulting attribute len
3939
 * @normalize:  whether to apply the inner normalization
3940
 *
3941
 * parse a value for an attribute, this is the fallback function
3942
 * of xmlParseAttValue() when the attribute parsing requires handling
3943
 * of non-ASCII characters, or normalization compaction.
3944
 *
3945
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3946
 */
3947
static xmlChar *
3948
57.6k
xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3949
57.6k
    xmlChar limit = 0;
3950
57.6k
    xmlChar *buf = NULL;
3951
57.6k
    xmlChar *rep = NULL;
3952
57.6k
    size_t len = 0;
3953
57.6k
    size_t buf_size = 0;
3954
57.6k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3955
21.6k
                       XML_MAX_HUGE_LENGTH :
3956
57.6k
                       XML_MAX_TEXT_LENGTH;
3957
57.6k
    int c, l, in_space = 0;
3958
57.6k
    xmlChar *current = NULL;
3959
57.6k
    xmlEntityPtr ent;
3960
3961
57.6k
    if (NXT(0) == '"') {
3962
34.1k
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3963
34.1k
  limit = '"';
3964
34.1k
        NEXT;
3965
34.1k
    } else if (NXT(0) == '\'') {
3966
23.4k
  limit = '\'';
3967
23.4k
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3968
23.4k
        NEXT;
3969
23.4k
    } else {
3970
0
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3971
0
  return(NULL);
3972
0
    }
3973
3974
    /*
3975
     * allocate a translation buffer.
3976
     */
3977
57.6k
    buf_size = XML_PARSER_BUFFER_SIZE;
3978
57.6k
    buf = (xmlChar *) xmlMallocAtomic(buf_size);
3979
57.6k
    if (buf == NULL) goto mem_error;
3980
3981
    /*
3982
     * OK loop until we reach one of the ending char or a size limit.
3983
     */
3984
57.6k
    c = CUR_CHAR(l);
3985
3.86M
    while (((NXT(0) != limit) && /* checked */
3986
3.86M
            (IS_CHAR(c)) && (c != '<')) &&
3987
3.86M
            (ctxt->instate != XML_PARSER_EOF)) {
3988
3.80M
  if (c == '&') {
3989
606k
      in_space = 0;
3990
606k
      if (NXT(1) == '#') {
3991
21.9k
    int val = xmlParseCharRef(ctxt);
3992
3993
21.9k
    if (val == '&') {
3994
480
        if (ctxt->replaceEntities) {
3995
274
      if (len + 10 > buf_size) {
3996
12
          growBuffer(buf, 10);
3997
12
      }
3998
274
      buf[len++] = '&';
3999
274
        } else {
4000
      /*
4001
       * The reparsing will be done in xmlStringGetNodeList()
4002
       * called by the attribute() function in SAX.c
4003
       */
4004
206
      if (len + 10 > buf_size) {
4005
12
          growBuffer(buf, 10);
4006
12
      }
4007
206
      buf[len++] = '&';
4008
206
      buf[len++] = '#';
4009
206
      buf[len++] = '3';
4010
206
      buf[len++] = '8';
4011
206
      buf[len++] = ';';
4012
206
        }
4013
21.4k
    } else if (val != 0) {
4014
19.4k
        if (len + 10 > buf_size) {
4015
312
      growBuffer(buf, 10);
4016
312
        }
4017
19.4k
        len += xmlCopyChar(0, &buf[len], val);
4018
19.4k
    }
4019
584k
      } else {
4020
584k
    ent = xmlParseEntityRef(ctxt);
4021
584k
    if ((ent != NULL) &&
4022
584k
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
4023
7.11k
        if (len + 10 > buf_size) {
4024
12
      growBuffer(buf, 10);
4025
12
        }
4026
7.11k
        if ((ctxt->replaceEntities == 0) &&
4027
7.11k
            (ent->content[0] == '&')) {
4028
2.52k
      buf[len++] = '&';
4029
2.52k
      buf[len++] = '#';
4030
2.52k
      buf[len++] = '3';
4031
2.52k
      buf[len++] = '8';
4032
2.52k
      buf[len++] = ';';
4033
4.59k
        } else {
4034
4.59k
      buf[len++] = ent->content[0];
4035
4.59k
        }
4036
577k
    } else if ((ent != NULL) &&
4037
577k
               (ctxt->replaceEntities != 0)) {
4038
352k
        if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4039
352k
                        if (xmlParserEntityCheck(ctxt, ent->length))
4040
0
                            goto error;
4041
4042
352k
      ++ctxt->depth;
4043
352k
      rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
4044
352k
                                ent->length, XML_SUBSTITUTE_REF, 0, 0, 0,
4045
352k
                                /* check */ 1);
4046
352k
      --ctxt->depth;
4047
352k
      if (rep != NULL) {
4048
347k
          current = rep;
4049
70.4M
          while (*current != 0) { /* non input consuming */
4050
70.1M
                                if ((*current == 0xD) || (*current == 0xA) ||
4051
70.1M
                                    (*current == 0x9)) {
4052
53.3k
                                    buf[len++] = 0x20;
4053
53.3k
                                    current++;
4054
53.3k
                                } else
4055
70.0M
                                    buf[len++] = *current++;
4056
70.1M
        if (len + 10 > buf_size) {
4057
7.42k
            growBuffer(buf, 10);
4058
7.42k
        }
4059
70.1M
          }
4060
347k
          xmlFree(rep);
4061
347k
          rep = NULL;
4062
347k
      }
4063
352k
        } else {
4064
0
      if (len + 10 > buf_size) {
4065
0
          growBuffer(buf, 10);
4066
0
      }
4067
0
      if (ent->content != NULL)
4068
0
          buf[len++] = ent->content[0];
4069
0
        }
4070
352k
    } else if (ent != NULL) {
4071
166k
        int i = xmlStrlen(ent->name);
4072
166k
        const xmlChar *cur = ent->name;
4073
4074
        /*
4075
                     * We also check for recursion and amplification
4076
                     * when entities are not substituted. They're
4077
                     * often expanded later.
4078
         */
4079
166k
        if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4080
166k
      (ent->content != NULL)) {
4081
161k
                        if ((ent->flags & XML_ENT_CHECKED) == 0) {
4082
1.16k
                            unsigned long oldCopy = ctxt->sizeentcopy;
4083
4084
1.16k
                            ctxt->sizeentcopy = ent->length;
4085
4086
1.16k
                            ++ctxt->depth;
4087
1.16k
                            rep = xmlStringDecodeEntitiesInt(ctxt,
4088
1.16k
                                    ent->content, ent->length,
4089
1.16k
                                    XML_SUBSTITUTE_REF, 0, 0, 0,
4090
1.16k
                                    /* check */ 1);
4091
1.16k
                            --ctxt->depth;
4092
4093
                            /*
4094
                             * If we're parsing DTD content, the entity
4095
                             * might reference other entities which
4096
                             * weren't defined yet, so the check isn't
4097
                             * reliable.
4098
                             */
4099
1.16k
                            if (ctxt->inSubset == 0) {
4100
1.10k
                                ent->flags |= XML_ENT_CHECKED;
4101
1.10k
                                ent->expandedSize = ctxt->sizeentcopy;
4102
1.10k
                            }
4103
4104
1.16k
                            if (rep != NULL) {
4105
1.14k
                                xmlFree(rep);
4106
1.14k
                                rep = NULL;
4107
1.14k
                            } else {
4108
18
                                ent->content[0] = 0;
4109
18
                            }
4110
4111
1.16k
                            if (xmlParserEntityCheck(ctxt, oldCopy))
4112
0
                                goto error;
4113
160k
                        } else {
4114
160k
                            if (xmlParserEntityCheck(ctxt, ent->expandedSize))
4115
0
                                goto error;
4116
160k
                        }
4117
161k
        }
4118
4119
        /*
4120
         * Just output the reference
4121
         */
4122
166k
        buf[len++] = '&';
4123
167k
        while (len + i + 10 > buf_size) {
4124
1.82k
      growBuffer(buf, i + 10);
4125
1.82k
        }
4126
377k
        for (;i > 0;i--)
4127
210k
      buf[len++] = *cur++;
4128
166k
        buf[len++] = ';';
4129
166k
    }
4130
584k
      }
4131
3.20M
  } else {
4132
3.20M
      if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4133
583k
          if ((len != 0) || (!normalize)) {
4134
526k
        if ((!normalize) || (!in_space)) {
4135
459k
      COPY_BUF(l,buf,len,0x20);
4136
459k
      while (len + 10 > buf_size) {
4137
1.85k
          growBuffer(buf, 10);
4138
1.85k
      }
4139
459k
        }
4140
526k
        in_space = 1;
4141
526k
    }
4142
2.61M
      } else {
4143
2.61M
          in_space = 0;
4144
2.61M
    COPY_BUF(l,buf,len,c);
4145
2.61M
    if (len + 10 > buf_size) {
4146
13.9k
        growBuffer(buf, 10);
4147
13.9k
    }
4148
2.61M
      }
4149
3.20M
      NEXTL(l);
4150
3.20M
  }
4151
3.80M
  GROW;
4152
3.80M
  c = CUR_CHAR(l);
4153
3.80M
        if (len > maxLength) {
4154
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4155
0
                           "AttValue length too long\n");
4156
0
            goto mem_error;
4157
0
        }
4158
3.80M
    }
4159
57.6k
    if (ctxt->instate == XML_PARSER_EOF)
4160
120
        goto error;
4161
4162
57.5k
    if ((in_space) && (normalize)) {
4163
4.80k
        while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4164
2.14k
    }
4165
57.5k
    buf[len] = 0;
4166
57.5k
    if (RAW == '<') {
4167
9.62k
  xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4168
47.8k
    } else if (RAW != limit) {
4169
8.91k
  if ((c != 0) && (!IS_CHAR(c))) {
4170
3.71k
      xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4171
3.71k
         "invalid character in attribute value\n");
4172
5.20k
  } else {
4173
5.20k
      xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4174
5.20k
         "AttValue: ' expected\n");
4175
5.20k
        }
4176
8.91k
    } else
4177
38.9k
  NEXT;
4178
4179
57.5k
    if (attlen != NULL) *attlen = len;
4180
57.5k
    return(buf);
4181
4182
0
mem_error:
4183
0
    xmlErrMemory(ctxt, NULL);
4184
120
error:
4185
120
    if (buf != NULL)
4186
120
        xmlFree(buf);
4187
120
    if (rep != NULL)
4188
0
        xmlFree(rep);
4189
120
    return(NULL);
4190
0
}
4191
4192
/**
4193
 * xmlParseAttValue:
4194
 * @ctxt:  an XML parser context
4195
 *
4196
 * DEPRECATED: Internal function, don't use.
4197
 *
4198
 * parse a value for an attribute
4199
 * Note: the parser won't do substitution of entities here, this
4200
 * will be handled later in xmlStringGetNodeList
4201
 *
4202
 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4203
 *                   "'" ([^<&'] | Reference)* "'"
4204
 *
4205
 * 3.3.3 Attribute-Value Normalization:
4206
 * Before the value of an attribute is passed to the application or
4207
 * checked for validity, the XML processor must normalize it as follows:
4208
 * - a character reference is processed by appending the referenced
4209
 *   character to the attribute value
4210
 * - an entity reference is processed by recursively processing the
4211
 *   replacement text of the entity
4212
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4213
 *   appending #x20 to the normalized value, except that only a single
4214
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4215
 *   parsed entity or the literal entity value of an internal parsed entity
4216
 * - other characters are processed by appending them to the normalized value
4217
 * If the declared value is not CDATA, then the XML processor must further
4218
 * process the normalized attribute value by discarding any leading and
4219
 * trailing space (#x20) characters, and by replacing sequences of space
4220
 * (#x20) characters by a single space (#x20) character.
4221
 * All attributes for which no declaration has been read should be treated
4222
 * by a non-validating parser as if declared CDATA.
4223
 *
4224
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4225
 */
4226
4227
4228
xmlChar *
4229
662k
xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4230
662k
    if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4231
662k
    return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4232
662k
}
4233
4234
/**
4235
 * xmlParseSystemLiteral:
4236
 * @ctxt:  an XML parser context
4237
 *
4238
 * DEPRECATED: Internal function, don't use.
4239
 *
4240
 * parse an XML Literal
4241
 *
4242
 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4243
 *
4244
 * Returns the SystemLiteral parsed or NULL
4245
 */
4246
4247
xmlChar *
4248
44.5k
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4249
44.5k
    xmlChar *buf = NULL;
4250
44.5k
    int len = 0;
4251
44.5k
    int size = XML_PARSER_BUFFER_SIZE;
4252
44.5k
    int cur, l;
4253
44.5k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4254
18.0k
                    XML_MAX_TEXT_LENGTH :
4255
44.5k
                    XML_MAX_NAME_LENGTH;
4256
44.5k
    xmlChar stop;
4257
44.5k
    int state = ctxt->instate;
4258
44.5k
    int count = 0;
4259
4260
44.5k
    SHRINK;
4261
44.5k
    if (RAW == '"') {
4262
42.5k
        NEXT;
4263
42.5k
  stop = '"';
4264
42.5k
    } else if (RAW == '\'') {
4265
1.23k
        NEXT;
4266
1.23k
  stop = '\'';
4267
1.23k
    } else {
4268
813
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4269
813
  return(NULL);
4270
813
    }
4271
4272
43.7k
    buf = (xmlChar *) xmlMallocAtomic(size);
4273
43.7k
    if (buf == NULL) {
4274
0
        xmlErrMemory(ctxt, NULL);
4275
0
  return(NULL);
4276
0
    }
4277
43.7k
    ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4278
43.7k
    cur = CUR_CHAR(l);
4279
1.83M
    while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4280
1.78M
  if (len + 5 >= size) {
4281
1.52k
      xmlChar *tmp;
4282
4283
1.52k
      size *= 2;
4284
1.52k
      tmp = (xmlChar *) xmlRealloc(buf, size);
4285
1.52k
      if (tmp == NULL) {
4286
0
          xmlFree(buf);
4287
0
    xmlErrMemory(ctxt, NULL);
4288
0
    ctxt->instate = (xmlParserInputState) state;
4289
0
    return(NULL);
4290
0
      }
4291
1.52k
      buf = tmp;
4292
1.52k
  }
4293
1.78M
  count++;
4294
1.78M
  if (count > 50) {
4295
22.0k
      SHRINK;
4296
22.0k
      GROW;
4297
22.0k
      count = 0;
4298
22.0k
            if (ctxt->instate == XML_PARSER_EOF) {
4299
0
          xmlFree(buf);
4300
0
    return(NULL);
4301
0
            }
4302
22.0k
  }
4303
1.78M
  COPY_BUF(l,buf,len,cur);
4304
1.78M
  NEXTL(l);
4305
1.78M
  cur = CUR_CHAR(l);
4306
1.78M
  if (cur == 0) {
4307
391
      GROW;
4308
391
      SHRINK;
4309
391
      cur = CUR_CHAR(l);
4310
391
  }
4311
1.78M
        if (len > maxLength) {
4312
3
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4313
3
            xmlFree(buf);
4314
3
            ctxt->instate = (xmlParserInputState) state;
4315
3
            return(NULL);
4316
3
        }
4317
1.78M
    }
4318
43.7k
    buf[len] = 0;
4319
43.7k
    ctxt->instate = (xmlParserInputState) state;
4320
43.7k
    if (!IS_CHAR(cur)) {
4321
758
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4322
42.9k
    } else {
4323
42.9k
  NEXT;
4324
42.9k
    }
4325
43.7k
    return(buf);
4326
43.7k
}
4327
4328
/**
4329
 * xmlParsePubidLiteral:
4330
 * @ctxt:  an XML parser context
4331
 *
4332
 * DEPRECATED: Internal function, don't use.
4333
 *
4334
 * parse an XML public literal
4335
 *
4336
 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4337
 *
4338
 * Returns the PubidLiteral parsed or NULL.
4339
 */
4340
4341
xmlChar *
4342
10.4k
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4343
10.4k
    xmlChar *buf = NULL;
4344
10.4k
    int len = 0;
4345
10.4k
    int size = XML_PARSER_BUFFER_SIZE;
4346
10.4k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4347
3.44k
                    XML_MAX_TEXT_LENGTH :
4348
10.4k
                    XML_MAX_NAME_LENGTH;
4349
10.4k
    xmlChar cur;
4350
10.4k
    xmlChar stop;
4351
10.4k
    int count = 0;
4352
10.4k
    xmlParserInputState oldstate = ctxt->instate;
4353
4354
10.4k
    SHRINK;
4355
10.4k
    if (RAW == '"') {
4356
9.33k
        NEXT;
4357
9.33k
  stop = '"';
4358
9.33k
    } else if (RAW == '\'') {
4359
1.03k
        NEXT;
4360
1.03k
  stop = '\'';
4361
1.03k
    } else {
4362
108
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4363
108
  return(NULL);
4364
108
    }
4365
10.3k
    buf = (xmlChar *) xmlMallocAtomic(size);
4366
10.3k
    if (buf == NULL) {
4367
0
  xmlErrMemory(ctxt, NULL);
4368
0
  return(NULL);
4369
0
    }
4370
10.3k
    ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4371
10.3k
    cur = CUR;
4372
579k
    while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4373
569k
  if (len + 1 >= size) {
4374
607
      xmlChar *tmp;
4375
4376
607
      size *= 2;
4377
607
      tmp = (xmlChar *) xmlRealloc(buf, size);
4378
607
      if (tmp == NULL) {
4379
0
    xmlErrMemory(ctxt, NULL);
4380
0
    xmlFree(buf);
4381
0
    return(NULL);
4382
0
      }
4383
607
      buf = tmp;
4384
607
  }
4385
569k
  buf[len++] = cur;
4386
569k
  count++;
4387
569k
  if (count > 50) {
4388
5.89k
      SHRINK;
4389
5.89k
      GROW;
4390
5.89k
      count = 0;
4391
5.89k
            if (ctxt->instate == XML_PARSER_EOF) {
4392
0
    xmlFree(buf);
4393
0
    return(NULL);
4394
0
            }
4395
5.89k
  }
4396
569k
  NEXT;
4397
569k
  cur = CUR;
4398
569k
  if (cur == 0) {
4399
88
      GROW;
4400
88
      SHRINK;
4401
88
      cur = CUR;
4402
88
  }
4403
569k
        if (len > maxLength) {
4404
0
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4405
0
            xmlFree(buf);
4406
0
            return(NULL);
4407
0
        }
4408
569k
    }
4409
10.3k
    buf[len] = 0;
4410
10.3k
    if (cur != stop) {
4411
521
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4412
9.84k
    } else {
4413
9.84k
  NEXT;
4414
9.84k
    }
4415
10.3k
    ctxt->instate = oldstate;
4416
10.3k
    return(buf);
4417
10.3k
}
4418
4419
static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt);
4420
4421
/*
4422
 * used for the test in the inner loop of the char data testing
4423
 */
4424
static const unsigned char test_char_data[256] = {
4425
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4426
    0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4427
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4428
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4429
    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4430
    0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4431
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4432
    0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4433
    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4434
    0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4435
    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4436
    0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4437
    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4438
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4439
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4440
    0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4441
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4442
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4443
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4444
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4445
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4446
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4447
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4448
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4449
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4450
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4451
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4452
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4453
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4454
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4455
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4456
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4457
};
4458
4459
/**
4460
 * xmlParseCharData:
4461
 * @ctxt:  an XML parser context
4462
 * @cdata:  unused
4463
 *
4464
 * DEPRECATED: Internal function, don't use.
4465
 *
4466
 * Parse character data. Always makes progress if the first char isn't
4467
 * '<' or '&'.
4468
 *
4469
 * if we are within a CDATA section ']]>' marks an end of section.
4470
 *
4471
 * The right angle bracket (>) may be represented using the string "&gt;",
4472
 * and must, for compatibility, be escaped using "&gt;" or a character
4473
 * reference when it appears in the string "]]>" in content, when that
4474
 * string is not marking the end of a CDATA section.
4475
 *
4476
 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4477
 */
4478
4479
void
4480
3.08M
xmlParseCharData(xmlParserCtxtPtr ctxt, ATTRIBUTE_UNUSED int cdata) {
4481
3.08M
    const xmlChar *in;
4482
3.08M
    int nbchar = 0;
4483
3.08M
    int line = ctxt->input->line;
4484
3.08M
    int col = ctxt->input->col;
4485
3.08M
    int ccol;
4486
4487
3.08M
    SHRINK;
4488
3.08M
    GROW;
4489
    /*
4490
     * Accelerated common case where input don't need to be
4491
     * modified before passing it to the handler.
4492
     */
4493
3.08M
    in = ctxt->input->cur;
4494
3.97M
    do {
4495
5.00M
get_more_space:
4496
6.74M
        while (*in == 0x20) { in++; ctxt->input->col++; }
4497
5.00M
        if (*in == 0xA) {
4498
1.07M
            do {
4499
1.07M
                ctxt->input->line++; ctxt->input->col = 1;
4500
1.07M
                in++;
4501
1.07M
            } while (*in == 0xA);
4502
1.03M
            goto get_more_space;
4503
1.03M
        }
4504
3.97M
        if (*in == '<') {
4505
803k
            nbchar = in - ctxt->input->cur;
4506
803k
            if (nbchar > 0) {
4507
803k
                const xmlChar *tmp = ctxt->input->cur;
4508
803k
                ctxt->input->cur = in;
4509
4510
803k
                if ((ctxt->sax != NULL) &&
4511
803k
                    (ctxt->sax->ignorableWhitespace !=
4512
803k
                     ctxt->sax->characters)) {
4513
300k
                    if (areBlanks(ctxt, tmp, nbchar, 1)) {
4514
246k
                        if (ctxt->sax->ignorableWhitespace != NULL)
4515
246k
                            ctxt->sax->ignorableWhitespace(ctxt->userData,
4516
246k
                                                   tmp, nbchar);
4517
246k
                    } else {
4518
54.4k
                        if (ctxt->sax->characters != NULL)
4519
54.4k
                            ctxt->sax->characters(ctxt->userData,
4520
54.4k
                                                  tmp, nbchar);
4521
54.4k
                        if (*ctxt->space == -1)
4522
13.0k
                            *ctxt->space = -2;
4523
54.4k
                    }
4524
502k
                } else if ((ctxt->sax != NULL) &&
4525
502k
                           (ctxt->sax->characters != NULL)) {
4526
502k
                    ctxt->sax->characters(ctxt->userData,
4527
502k
                                          tmp, nbchar);
4528
502k
                }
4529
803k
            }
4530
803k
            return;
4531
803k
        }
4532
4533
4.04M
get_more:
4534
4.04M
        ccol = ctxt->input->col;
4535
57.3M
        while (test_char_data[*in]) {
4536
53.3M
            in++;
4537
53.3M
            ccol++;
4538
53.3M
        }
4539
4.04M
        ctxt->input->col = ccol;
4540
4.04M
        if (*in == 0xA) {
4541
840k
            do {
4542
840k
                ctxt->input->line++; ctxt->input->col = 1;
4543
840k
                in++;
4544
840k
            } while (*in == 0xA);
4545
813k
            goto get_more;
4546
813k
        }
4547
3.22M
        if (*in == ']') {
4548
55.7k
            if ((in[1] == ']') && (in[2] == '>')) {
4549
1.28k
                xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4550
1.28k
                ctxt->input->cur = in + 1;
4551
1.28k
                return;
4552
1.28k
            }
4553
54.5k
            in++;
4554
54.5k
            ctxt->input->col++;
4555
54.5k
            goto get_more;
4556
55.7k
        }
4557
3.17M
        nbchar = in - ctxt->input->cur;
4558
3.17M
        if (nbchar > 0) {
4559
2.26M
            if ((ctxt->sax != NULL) &&
4560
2.26M
                (ctxt->sax->ignorableWhitespace !=
4561
2.26M
                 ctxt->sax->characters) &&
4562
2.26M
                (IS_BLANK_CH(*ctxt->input->cur))) {
4563
405k
                const xmlChar *tmp = ctxt->input->cur;
4564
405k
                ctxt->input->cur = in;
4565
4566
405k
                if (areBlanks(ctxt, tmp, nbchar, 0)) {
4567
169k
                    if (ctxt->sax->ignorableWhitespace != NULL)
4568
169k
                        ctxt->sax->ignorableWhitespace(ctxt->userData,
4569
169k
                                                       tmp, nbchar);
4570
235k
                } else {
4571
235k
                    if (ctxt->sax->characters != NULL)
4572
235k
                        ctxt->sax->characters(ctxt->userData,
4573
235k
                                              tmp, nbchar);
4574
235k
                    if (*ctxt->space == -1)
4575
55.4k
                        *ctxt->space = -2;
4576
235k
                }
4577
405k
                line = ctxt->input->line;
4578
405k
                col = ctxt->input->col;
4579
1.86M
            } else if (ctxt->sax != NULL) {
4580
1.86M
                if (ctxt->sax->characters != NULL)
4581
1.86M
                    ctxt->sax->characters(ctxt->userData,
4582
1.86M
                                          ctxt->input->cur, nbchar);
4583
1.86M
                line = ctxt->input->line;
4584
1.86M
                col = ctxt->input->col;
4585
1.86M
            }
4586
2.26M
        }
4587
3.17M
        ctxt->input->cur = in;
4588
3.17M
        if (*in == 0xD) {
4589
898k
            in++;
4590
898k
            if (*in == 0xA) {
4591
894k
                ctxt->input->cur = in;
4592
894k
                in++;
4593
894k
                ctxt->input->line++; ctxt->input->col = 1;
4594
894k
                continue; /* while */
4595
894k
            }
4596
3.94k
            in--;
4597
3.94k
        }
4598
2.27M
        if (*in == '<') {
4599
1.95M
            return;
4600
1.95M
        }
4601
322k
        if (*in == '&') {
4602
148k
            return;
4603
148k
        }
4604
173k
        SHRINK;
4605
173k
        GROW;
4606
173k
        if (ctxt->instate == XML_PARSER_EOF)
4607
0
            return;
4608
173k
        in = ctxt->input->cur;
4609
1.06M
    } while (((*in >= 0x20) && (*in <= 0x7F)) ||
4610
1.06M
             (*in == 0x09) || (*in == 0x0a));
4611
176k
    ctxt->input->line = line;
4612
176k
    ctxt->input->col = col;
4613
176k
    xmlParseCharDataComplex(ctxt);
4614
176k
}
4615
4616
/**
4617
 * xmlParseCharDataComplex:
4618
 * @ctxt:  an XML parser context
4619
 * @cdata:  int indicating whether we are within a CDATA section
4620
 *
4621
 * Always makes progress if the first char isn't '<' or '&'.
4622
 *
4623
 * parse a CharData section.this is the fallback function
4624
 * of xmlParseCharData() when the parsing requires handling
4625
 * of non-ASCII characters.
4626
 */
4627
static void
4628
176k
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt) {
4629
176k
    xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4630
176k
    int nbchar = 0;
4631
176k
    int cur, l;
4632
176k
    int count = 0;
4633
4634
176k
    SHRINK;
4635
176k
    GROW;
4636
176k
    cur = CUR_CHAR(l);
4637
4.54M
    while ((cur != '<') && /* checked */
4638
4.54M
           (cur != '&') &&
4639
4.54M
     (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4640
4.36M
  if ((cur == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4641
818
      xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4642
818
  }
4643
4.36M
  COPY_BUF(l,buf,nbchar,cur);
4644
  /* move current position before possible calling of ctxt->sax->characters */
4645
4.36M
  NEXTL(l);
4646
4.36M
  cur = CUR_CHAR(l);
4647
4.36M
  if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4648
10.6k
      buf[nbchar] = 0;
4649
4650
      /*
4651
       * OK the segment is to be consumed as chars.
4652
       */
4653
10.6k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4654
8.59k
    if (areBlanks(ctxt, buf, nbchar, 0)) {
4655
0
        if (ctxt->sax->ignorableWhitespace != NULL)
4656
0
      ctxt->sax->ignorableWhitespace(ctxt->userData,
4657
0
                                     buf, nbchar);
4658
8.59k
    } else {
4659
8.59k
        if (ctxt->sax->characters != NULL)
4660
8.59k
      ctxt->sax->characters(ctxt->userData, buf, nbchar);
4661
8.59k
        if ((ctxt->sax->characters !=
4662
8.59k
             ctxt->sax->ignorableWhitespace) &&
4663
8.59k
      (*ctxt->space == -1))
4664
345
      *ctxt->space = -2;
4665
8.59k
    }
4666
8.59k
      }
4667
10.6k
      nbchar = 0;
4668
            /* something really bad happened in the SAX callback */
4669
10.6k
            if (ctxt->instate != XML_PARSER_CONTENT)
4670
0
                return;
4671
10.6k
  }
4672
4.36M
  count++;
4673
4.36M
  if (count > 50) {
4674
66.6k
      SHRINK;
4675
66.6k
      GROW;
4676
66.6k
      count = 0;
4677
66.6k
            if (ctxt->instate == XML_PARSER_EOF)
4678
0
    return;
4679
66.6k
  }
4680
4.36M
    }
4681
176k
    if (nbchar != 0) {
4682
70.6k
        buf[nbchar] = 0;
4683
  /*
4684
   * OK the segment is to be consumed as chars.
4685
   */
4686
70.6k
  if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4687
60.8k
      if (areBlanks(ctxt, buf, nbchar, 0)) {
4688
452
    if (ctxt->sax->ignorableWhitespace != NULL)
4689
452
        ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4690
60.3k
      } else {
4691
60.3k
    if (ctxt->sax->characters != NULL)
4692
60.3k
        ctxt->sax->characters(ctxt->userData, buf, nbchar);
4693
60.3k
    if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4694
60.3k
        (*ctxt->space == -1))
4695
11.9k
        *ctxt->space = -2;
4696
60.3k
      }
4697
60.8k
  }
4698
70.6k
    }
4699
176k
    if ((ctxt->input->cur < ctxt->input->end) && (!IS_CHAR(cur))) {
4700
  /* Generate the error and skip the offending character */
4701
99.9k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4702
99.9k
                          "PCDATA invalid Char value %d\n",
4703
99.9k
                    cur ? cur : CUR);
4704
99.9k
  NEXT;
4705
99.9k
    }
4706
176k
}
4707
4708
/**
4709
 * xmlParseExternalID:
4710
 * @ctxt:  an XML parser context
4711
 * @publicID:  a xmlChar** receiving PubidLiteral
4712
 * @strict: indicate whether we should restrict parsing to only
4713
 *          production [75], see NOTE below
4714
 *
4715
 * DEPRECATED: Internal function, don't use.
4716
 *
4717
 * Parse an External ID or a Public ID
4718
 *
4719
 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4720
 *       'PUBLIC' S PubidLiteral S SystemLiteral
4721
 *
4722
 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4723
 *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
4724
 *
4725
 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4726
 *
4727
 * Returns the function returns SystemLiteral and in the second
4728
 *                case publicID receives PubidLiteral, is strict is off
4729
 *                it is possible to return NULL and have publicID set.
4730
 */
4731
4732
xmlChar *
4733
73.9k
xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4734
73.9k
    xmlChar *URI = NULL;
4735
4736
73.9k
    SHRINK;
4737
4738
73.9k
    *publicID = NULL;
4739
73.9k
    if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4740
34.5k
        SKIP(6);
4741
34.5k
  if (SKIP_BLANKS == 0) {
4742
58
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4743
58
                     "Space required after 'SYSTEM'\n");
4744
58
  }
4745
34.5k
  URI = xmlParseSystemLiteral(ctxt);
4746
34.5k
  if (URI == NULL) {
4747
91
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4748
91
        }
4749
39.4k
    } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4750
10.4k
        SKIP(6);
4751
10.4k
  if (SKIP_BLANKS == 0) {
4752
72
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4753
72
        "Space required after 'PUBLIC'\n");
4754
72
  }
4755
10.4k
  *publicID = xmlParsePubidLiteral(ctxt);
4756
10.4k
  if (*publicID == NULL) {
4757
108
      xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4758
108
  }
4759
10.4k
  if (strict) {
4760
      /*
4761
       * We don't handle [83] so "S SystemLiteral" is required.
4762
       */
4763
10.0k
      if (SKIP_BLANKS == 0) {
4764
688
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4765
688
      "Space required after the Public Identifier\n");
4766
688
      }
4767
10.0k
  } else {
4768
      /*
4769
       * We handle [83] so we return immediately, if
4770
       * "S SystemLiteral" is not detected. We skip blanks if no
4771
             * system literal was found, but this is harmless since we must
4772
             * be at the end of a NotationDecl.
4773
       */
4774
431
      if (SKIP_BLANKS == 0) return(NULL);
4775
39
      if ((CUR != '\'') && (CUR != '"')) return(NULL);
4776
39
  }
4777
10.0k
  URI = xmlParseSystemLiteral(ctxt);
4778
10.0k
  if (URI == NULL) {
4779
725
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4780
725
        }
4781
10.0k
    }
4782
73.4k
    return(URI);
4783
73.9k
}
4784
4785
/**
4786
 * xmlParseCommentComplex:
4787
 * @ctxt:  an XML parser context
4788
 * @buf:  the already parsed part of the buffer
4789
 * @len:  number of bytes in the buffer
4790
 * @size:  allocated size of the buffer
4791
 *
4792
 * Skip an XML (SGML) comment <!-- .... -->
4793
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4794
 *  must not occur within comments. "
4795
 * This is the slow routine in case the accelerator for ascii didn't work
4796
 *
4797
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4798
 */
4799
static void
4800
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4801
9.24k
                       size_t len, size_t size) {
4802
9.24k
    int q, ql;
4803
9.24k
    int r, rl;
4804
9.24k
    int cur, l;
4805
9.24k
    size_t count = 0;
4806
9.24k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4807
3.18k
                       XML_MAX_HUGE_LENGTH :
4808
9.24k
                       XML_MAX_TEXT_LENGTH;
4809
9.24k
    int inputid;
4810
4811
9.24k
    inputid = ctxt->input->id;
4812
4813
9.24k
    if (buf == NULL) {
4814
391
        len = 0;
4815
391
  size = XML_PARSER_BUFFER_SIZE;
4816
391
  buf = (xmlChar *) xmlMallocAtomic(size);
4817
391
  if (buf == NULL) {
4818
0
      xmlErrMemory(ctxt, NULL);
4819
0
      return;
4820
0
  }
4821
391
    }
4822
9.24k
    GROW; /* Assure there's enough input data */
4823
9.24k
    q = CUR_CHAR(ql);
4824
9.24k
    if (q == 0)
4825
1.20k
        goto not_terminated;
4826
8.03k
    if (!IS_CHAR(q)) {
4827
957
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4828
957
                          "xmlParseComment: invalid xmlChar value %d\n",
4829
957
                    q);
4830
957
  xmlFree (buf);
4831
957
  return;
4832
957
    }
4833
7.07k
    NEXTL(ql);
4834
7.07k
    r = CUR_CHAR(rl);
4835
7.07k
    if (r == 0)
4836
127
        goto not_terminated;
4837
6.95k
    if (!IS_CHAR(r)) {
4838
105
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4839
105
                          "xmlParseComment: invalid xmlChar value %d\n",
4840
105
                    r);
4841
105
  xmlFree (buf);
4842
105
  return;
4843
105
    }
4844
6.84k
    NEXTL(rl);
4845
6.84k
    cur = CUR_CHAR(l);
4846
6.84k
    if (cur == 0)
4847
92
        goto not_terminated;
4848
3.30M
    while (IS_CHAR(cur) && /* checked */
4849
3.30M
           ((cur != '>') ||
4850
3.29M
      (r != '-') || (q != '-'))) {
4851
3.29M
  if ((r == '-') && (q == '-')) {
4852
4.00k
      xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4853
4.00k
  }
4854
3.29M
  if (len + 5 >= size) {
4855
3.81k
      xmlChar *new_buf;
4856
3.81k
            size_t new_size;
4857
4858
3.81k
      new_size = size * 2;
4859
3.81k
      new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4860
3.81k
      if (new_buf == NULL) {
4861
0
    xmlFree (buf);
4862
0
    xmlErrMemory(ctxt, NULL);
4863
0
    return;
4864
0
      }
4865
3.81k
      buf = new_buf;
4866
3.81k
            size = new_size;
4867
3.81k
  }
4868
3.29M
  COPY_BUF(ql,buf,len,q);
4869
3.29M
  q = r;
4870
3.29M
  ql = rl;
4871
3.29M
  r = cur;
4872
3.29M
  rl = l;
4873
4874
3.29M
  count++;
4875
3.29M
  if (count > 50) {
4876
62.4k
      SHRINK;
4877
62.4k
      GROW;
4878
62.4k
      count = 0;
4879
62.4k
            if (ctxt->instate == XML_PARSER_EOF) {
4880
0
    xmlFree(buf);
4881
0
    return;
4882
0
            }
4883
62.4k
  }
4884
3.29M
  NEXTL(l);
4885
3.29M
  cur = CUR_CHAR(l);
4886
3.29M
  if (cur == 0) {
4887
896
      SHRINK;
4888
896
      GROW;
4889
896
      cur = CUR_CHAR(l);
4890
896
  }
4891
4892
3.29M
        if (len > maxLength) {
4893
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4894
0
                         "Comment too big found", NULL);
4895
0
            xmlFree (buf);
4896
0
            return;
4897
0
        }
4898
3.29M
    }
4899
6.75k
    buf[len] = 0;
4900
6.75k
    if (cur == 0) {
4901
896
  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4902
896
                       "Comment not terminated \n<!--%.50s\n", buf);
4903
5.85k
    } else if (!IS_CHAR(cur)) {
4904
786
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4905
786
                          "xmlParseComment: invalid xmlChar value %d\n",
4906
786
                    cur);
4907
5.07k
    } else {
4908
5.07k
  if (inputid != ctxt->input->id) {
4909
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4910
0
               "Comment doesn't start and stop in the same"
4911
0
                           " entity\n");
4912
0
  }
4913
5.07k
        NEXT;
4914
5.07k
  if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4915
5.07k
      (!ctxt->disableSAX))
4916
4.01k
      ctxt->sax->comment(ctxt->userData, buf);
4917
5.07k
    }
4918
6.75k
    xmlFree(buf);
4919
6.75k
    return;
4920
1.42k
not_terminated:
4921
1.42k
    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4922
1.42k
       "Comment not terminated\n", NULL);
4923
1.42k
    xmlFree(buf);
4924
1.42k
    return;
4925
6.75k
}
4926
4927
/**
4928
 * xmlParseComment:
4929
 * @ctxt:  an XML parser context
4930
 *
4931
 * DEPRECATED: Internal function, don't use.
4932
 *
4933
 * Parse an XML (SGML) comment. Always consumes '<!'.
4934
 *
4935
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4936
 *  must not occur within comments. "
4937
 *
4938
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4939
 */
4940
void
4941
4.93M
xmlParseComment(xmlParserCtxtPtr ctxt) {
4942
4.93M
    xmlChar *buf = NULL;
4943
4.93M
    size_t size = XML_PARSER_BUFFER_SIZE;
4944
4.93M
    size_t len = 0;
4945
4.93M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4946
40.9k
                       XML_MAX_HUGE_LENGTH :
4947
4.93M
                       XML_MAX_TEXT_LENGTH;
4948
4.93M
    xmlParserInputState state;
4949
4.93M
    const xmlChar *in;
4950
4.93M
    size_t nbchar = 0;
4951
4.93M
    int ccol;
4952
4.93M
    int inputid;
4953
4954
    /*
4955
     * Check that there is a comment right here.
4956
     */
4957
4.93M
    if ((RAW != '<') || (NXT(1) != '!'))
4958
0
        return;
4959
4.93M
    SKIP(2);
4960
4.93M
    if ((RAW != '-') || (NXT(1) != '-'))
4961
33
        return;
4962
4.93M
    state = ctxt->instate;
4963
4.93M
    ctxt->instate = XML_PARSER_COMMENT;
4964
4.93M
    inputid = ctxt->input->id;
4965
4.93M
    SKIP(2);
4966
4.93M
    SHRINK;
4967
4.93M
    GROW;
4968
4969
    /*
4970
     * Accelerated common case where input don't need to be
4971
     * modified before passing it to the handler.
4972
     */
4973
4.93M
    in = ctxt->input->cur;
4974
4.93M
    do {
4975
4.93M
  if (*in == 0xA) {
4976
21.7k
      do {
4977
21.7k
    ctxt->input->line++; ctxt->input->col = 1;
4978
21.7k
    in++;
4979
21.7k
      } while (*in == 0xA);
4980
21.0k
  }
4981
5.54M
get_more:
4982
5.54M
        ccol = ctxt->input->col;
4983
30.3M
  while (((*in > '-') && (*in <= 0x7F)) ||
4984
30.3M
         ((*in >= 0x20) && (*in < '-')) ||
4985
30.3M
         (*in == 0x09)) {
4986
24.7M
        in++;
4987
24.7M
        ccol++;
4988
24.7M
  }
4989
5.54M
  ctxt->input->col = ccol;
4990
5.54M
  if (*in == 0xA) {
4991
216k
      do {
4992
216k
    ctxt->input->line++; ctxt->input->col = 1;
4993
216k
    in++;
4994
216k
      } while (*in == 0xA);
4995
206k
      goto get_more;
4996
206k
  }
4997
5.33M
  nbchar = in - ctxt->input->cur;
4998
  /*
4999
   * save current set of data
5000
   */
5001
5.33M
  if (nbchar > 0) {
5002
556k
      if ((ctxt->sax != NULL) &&
5003
556k
    (ctxt->sax->comment != NULL)) {
5004
556k
    if (buf == NULL) {
5005
151k
        if ((*in == '-') && (in[1] == '-'))
5006
84.7k
            size = nbchar + 1;
5007
66.5k
        else
5008
66.5k
            size = XML_PARSER_BUFFER_SIZE + nbchar;
5009
151k
        buf = (xmlChar *) xmlMallocAtomic(size);
5010
151k
        if (buf == NULL) {
5011
0
            xmlErrMemory(ctxt, NULL);
5012
0
      ctxt->instate = state;
5013
0
      return;
5014
0
        }
5015
151k
        len = 0;
5016
404k
    } else if (len + nbchar + 1 >= size) {
5017
49.7k
        xmlChar *new_buf;
5018
49.7k
        size  += len + nbchar + XML_PARSER_BUFFER_SIZE;
5019
49.7k
        new_buf = (xmlChar *) xmlRealloc(buf, size);
5020
49.7k
        if (new_buf == NULL) {
5021
0
            xmlFree (buf);
5022
0
      xmlErrMemory(ctxt, NULL);
5023
0
      ctxt->instate = state;
5024
0
      return;
5025
0
        }
5026
49.7k
        buf = new_buf;
5027
49.7k
    }
5028
556k
    memcpy(&buf[len], ctxt->input->cur, nbchar);
5029
556k
    len += nbchar;
5030
556k
    buf[len] = 0;
5031
556k
      }
5032
556k
  }
5033
5.33M
        if (len > maxLength) {
5034
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5035
0
                         "Comment too big found", NULL);
5036
0
            xmlFree (buf);
5037
0
            return;
5038
0
        }
5039
5.33M
  ctxt->input->cur = in;
5040
5.33M
  if (*in == 0xA) {
5041
0
      in++;
5042
0
      ctxt->input->line++; ctxt->input->col = 1;
5043
0
  }
5044
5.33M
  if (*in == 0xD) {
5045
220k
      in++;
5046
220k
      if (*in == 0xA) {
5047
220k
    ctxt->input->cur = in;
5048
220k
    in++;
5049
220k
    ctxt->input->line++; ctxt->input->col = 1;
5050
220k
    goto get_more;
5051
220k
      }
5052
400
      in--;
5053
400
  }
5054
5.11M
  SHRINK;
5055
5.11M
  GROW;
5056
5.11M
        if (ctxt->instate == XML_PARSER_EOF) {
5057
0
            xmlFree(buf);
5058
0
            return;
5059
0
        }
5060
5.11M
  in = ctxt->input->cur;
5061
5.11M
  if (*in == '-') {
5062
5.10M
      if (in[1] == '-') {
5063
4.92M
          if (in[2] == '>') {
5064
4.92M
        if (ctxt->input->id != inputid) {
5065
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5066
0
                     "comment doesn't start and stop in the"
5067
0
                                       " same entity\n");
5068
0
        }
5069
4.92M
        SKIP(3);
5070
4.92M
        if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5071
4.92M
            (!ctxt->disableSAX)) {
5072
4.90M
      if (buf != NULL)
5073
127k
          ctxt->sax->comment(ctxt->userData, buf);
5074
4.77M
      else
5075
4.77M
          ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5076
4.90M
        }
5077
4.92M
        if (buf != NULL)
5078
142k
            xmlFree(buf);
5079
4.92M
        if (ctxt->instate != XML_PARSER_EOF)
5080
4.92M
      ctxt->instate = state;
5081
4.92M
        return;
5082
4.92M
    }
5083
2.47k
    if (buf != NULL) {
5084
2.38k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5085
2.38k
                          "Double hyphen within comment: "
5086
2.38k
                                      "<!--%.50s\n",
5087
2.38k
              buf);
5088
2.38k
    } else
5089
82
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5090
82
                          "Double hyphen within comment\n", NULL);
5091
2.47k
                if (ctxt->instate == XML_PARSER_EOF) {
5092
0
                    xmlFree(buf);
5093
0
                    return;
5094
0
                }
5095
2.47k
    in++;
5096
2.47k
    ctxt->input->col++;
5097
2.47k
      }
5098
187k
      in++;
5099
187k
      ctxt->input->col++;
5100
187k
      goto get_more;
5101
5.10M
  }
5102
5.11M
    } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5103
9.24k
    xmlParseCommentComplex(ctxt, buf, len, size);
5104
9.24k
    ctxt->instate = state;
5105
9.24k
    return;
5106
4.93M
}
5107
5108
5109
/**
5110
 * xmlParsePITarget:
5111
 * @ctxt:  an XML parser context
5112
 *
5113
 * DEPRECATED: Internal function, don't use.
5114
 *
5115
 * parse the name of a PI
5116
 *
5117
 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5118
 *
5119
 * Returns the PITarget name or NULL
5120
 */
5121
5122
const xmlChar *
5123
19.0k
xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5124
19.0k
    const xmlChar *name;
5125
5126
19.0k
    name = xmlParseName(ctxt);
5127
19.0k
    if ((name != NULL) &&
5128
19.0k
        ((name[0] == 'x') || (name[0] == 'X')) &&
5129
19.0k
        ((name[1] == 'm') || (name[1] == 'M')) &&
5130
19.0k
        ((name[2] == 'l') || (name[2] == 'L'))) {
5131
4.28k
  int i;
5132
4.28k
  if ((name[0] == 'x') && (name[1] == 'm') &&
5133
4.28k
      (name[2] == 'l') && (name[3] == 0)) {
5134
1.04k
      xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5135
1.04k
     "XML declaration allowed only at the start of the document\n");
5136
1.04k
      return(name);
5137
3.24k
  } else if (name[3] == 0) {
5138
152
      xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5139
152
      return(name);
5140
152
  }
5141
4.99k
  for (i = 0;;i++) {
5142
4.99k
      if (xmlW3CPIs[i] == NULL) break;
5143
4.05k
      if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5144
2.14k
          return(name);
5145
4.05k
  }
5146
947
  xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5147
947
          "xmlParsePITarget: invalid name prefix 'xml'\n",
5148
947
          NULL, NULL);
5149
947
    }
5150
15.7k
    if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5151
716
  xmlNsErr(ctxt, XML_NS_ERR_COLON,
5152
716
     "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5153
716
    }
5154
15.7k
    return(name);
5155
19.0k
}
5156
5157
#ifdef LIBXML_CATALOG_ENABLED
5158
/**
5159
 * xmlParseCatalogPI:
5160
 * @ctxt:  an XML parser context
5161
 * @catalog:  the PI value string
5162
 *
5163
 * parse an XML Catalog Processing Instruction.
5164
 *
5165
 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5166
 *
5167
 * Occurs only if allowed by the user and if happening in the Misc
5168
 * part of the document before any doctype information
5169
 * This will add the given catalog to the parsing context in order
5170
 * to be used if there is a resolution need further down in the document
5171
 */
5172
5173
static void
5174
3
xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5175
3
    xmlChar *URL = NULL;
5176
3
    const xmlChar *tmp, *base;
5177
3
    xmlChar marker;
5178
5179
3
    tmp = catalog;
5180
3
    while (IS_BLANK_CH(*tmp)) tmp++;
5181
3
    if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5182
3
  goto error;
5183
0
    tmp += 7;
5184
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5185
0
    if (*tmp != '=') {
5186
0
  return;
5187
0
    }
5188
0
    tmp++;
5189
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5190
0
    marker = *tmp;
5191
0
    if ((marker != '\'') && (marker != '"'))
5192
0
  goto error;
5193
0
    tmp++;
5194
0
    base = tmp;
5195
0
    while ((*tmp != 0) && (*tmp != marker)) tmp++;
5196
0
    if (*tmp == 0)
5197
0
  goto error;
5198
0
    URL = xmlStrndup(base, tmp - base);
5199
0
    tmp++;
5200
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5201
0
    if (*tmp != 0)
5202
0
  goto error;
5203
5204
0
    if (URL != NULL) {
5205
0
  ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5206
0
  xmlFree(URL);
5207
0
    }
5208
0
    return;
5209
5210
3
error:
5211
3
    xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5212
3
            "Catalog PI syntax error: %s\n",
5213
3
      catalog, NULL);
5214
3
    if (URL != NULL)
5215
0
  xmlFree(URL);
5216
3
}
5217
#endif
5218
5219
/**
5220
 * xmlParsePI:
5221
 * @ctxt:  an XML parser context
5222
 *
5223
 * DEPRECATED: Internal function, don't use.
5224
 *
5225
 * parse an XML Processing Instruction.
5226
 *
5227
 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5228
 *
5229
 * The processing is transferred to SAX once parsed.
5230
 */
5231
5232
void
5233
19.0k
xmlParsePI(xmlParserCtxtPtr ctxt) {
5234
19.0k
    xmlChar *buf = NULL;
5235
19.0k
    size_t len = 0;
5236
19.0k
    size_t size = XML_PARSER_BUFFER_SIZE;
5237
19.0k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5238
7.21k
                       XML_MAX_HUGE_LENGTH :
5239
19.0k
                       XML_MAX_TEXT_LENGTH;
5240
19.0k
    int cur, l;
5241
19.0k
    const xmlChar *target;
5242
19.0k
    xmlParserInputState state;
5243
19.0k
    int count = 0;
5244
5245
19.0k
    if ((RAW == '<') && (NXT(1) == '?')) {
5246
19.0k
  int inputid = ctxt->input->id;
5247
19.0k
  state = ctxt->instate;
5248
19.0k
        ctxt->instate = XML_PARSER_PI;
5249
  /*
5250
   * this is a Processing Instruction.
5251
   */
5252
19.0k
  SKIP(2);
5253
19.0k
  SHRINK;
5254
5255
  /*
5256
   * Parse the target name and check for special support like
5257
   * namespace.
5258
   */
5259
19.0k
        target = xmlParsePITarget(ctxt);
5260
19.0k
  if (target != NULL) {
5261
18.4k
      if ((RAW == '?') && (NXT(1) == '>')) {
5262
761
    if (inputid != ctxt->input->id) {
5263
0
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5264
0
                             "PI declaration doesn't start and stop in"
5265
0
                                   " the same entity\n");
5266
0
    }
5267
761
    SKIP(2);
5268
5269
    /*
5270
     * SAX: PI detected.
5271
     */
5272
761
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5273
761
        (ctxt->sax->processingInstruction != NULL))
5274
633
        ctxt->sax->processingInstruction(ctxt->userData,
5275
633
                                         target, NULL);
5276
761
    if (ctxt->instate != XML_PARSER_EOF)
5277
761
        ctxt->instate = state;
5278
761
    return;
5279
761
      }
5280
17.6k
      buf = (xmlChar *) xmlMallocAtomic(size);
5281
17.6k
      if (buf == NULL) {
5282
0
    xmlErrMemory(ctxt, NULL);
5283
0
    ctxt->instate = state;
5284
0
    return;
5285
0
      }
5286
17.6k
      if (SKIP_BLANKS == 0) {
5287
3.03k
    xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5288
3.03k
        "ParsePI: PI %s space expected\n", target);
5289
3.03k
      }
5290
17.6k
      cur = CUR_CHAR(l);
5291
6.05M
      while (IS_CHAR(cur) && /* checked */
5292
6.05M
       ((cur != '?') || (NXT(1) != '>'))) {
5293
6.03M
    if (len + 5 >= size) {
5294
4.00k
        xmlChar *tmp;
5295
4.00k
                    size_t new_size = size * 2;
5296
4.00k
        tmp = (xmlChar *) xmlRealloc(buf, new_size);
5297
4.00k
        if (tmp == NULL) {
5298
0
      xmlErrMemory(ctxt, NULL);
5299
0
      xmlFree(buf);
5300
0
      ctxt->instate = state;
5301
0
      return;
5302
0
        }
5303
4.00k
        buf = tmp;
5304
4.00k
                    size = new_size;
5305
4.00k
    }
5306
6.03M
    count++;
5307
6.03M
    if (count > 50) {
5308
113k
        SHRINK;
5309
113k
        GROW;
5310
113k
                    if (ctxt->instate == XML_PARSER_EOF) {
5311
0
                        xmlFree(buf);
5312
0
                        return;
5313
0
                    }
5314
113k
        count = 0;
5315
113k
    }
5316
6.03M
    COPY_BUF(l,buf,len,cur);
5317
6.03M
    NEXTL(l);
5318
6.03M
    cur = CUR_CHAR(l);
5319
6.03M
    if (cur == 0) {
5320
1.17k
        SHRINK;
5321
1.17k
        GROW;
5322
1.17k
        cur = CUR_CHAR(l);
5323
1.17k
    }
5324
6.03M
                if (len > maxLength) {
5325
0
                    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5326
0
                                      "PI %s too big found", target);
5327
0
                    xmlFree(buf);
5328
0
                    ctxt->instate = state;
5329
0
                    return;
5330
0
                }
5331
6.03M
      }
5332
17.6k
      buf[len] = 0;
5333
17.6k
      if (cur != '?') {
5334
2.50k
    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5335
2.50k
          "ParsePI: PI %s never end ...\n", target);
5336
15.1k
      } else {
5337
15.1k
    if (inputid != ctxt->input->id) {
5338
0
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5339
0
                             "PI declaration doesn't start and stop in"
5340
0
                                   " the same entity\n");
5341
0
    }
5342
15.1k
    SKIP(2);
5343
5344
15.1k
#ifdef LIBXML_CATALOG_ENABLED
5345
15.1k
    if (((state == XML_PARSER_MISC) ||
5346
15.1k
               (state == XML_PARSER_START)) &&
5347
15.1k
        (xmlStrEqual(target, XML_CATALOG_PI))) {
5348
3
        xmlCatalogAllow allow = xmlCatalogGetDefaults();
5349
3
        if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5350
3
      (allow == XML_CATA_ALLOW_ALL))
5351
3
      xmlParseCatalogPI(ctxt, buf);
5352
3
    }
5353
15.1k
#endif
5354
5355
5356
    /*
5357
     * SAX: PI detected.
5358
     */
5359
15.1k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5360
15.1k
        (ctxt->sax->processingInstruction != NULL))
5361
13.2k
        ctxt->sax->processingInstruction(ctxt->userData,
5362
13.2k
                                         target, buf);
5363
15.1k
      }
5364
17.6k
      xmlFree(buf);
5365
17.6k
  } else {
5366
685
      xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5367
685
  }
5368
18.3k
  if (ctxt->instate != XML_PARSER_EOF)
5369
18.3k
      ctxt->instate = state;
5370
18.3k
    }
5371
19.0k
}
5372
5373
/**
5374
 * xmlParseNotationDecl:
5375
 * @ctxt:  an XML parser context
5376
 *
5377
 * DEPRECATED: Internal function, don't use.
5378
 *
5379
 * Parse a notation declaration. Always consumes '<!'.
5380
 *
5381
 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>'
5382
 *
5383
 * Hence there is actually 3 choices:
5384
 *     'PUBLIC' S PubidLiteral
5385
 *     'PUBLIC' S PubidLiteral S SystemLiteral
5386
 * and 'SYSTEM' S SystemLiteral
5387
 *
5388
 * See the NOTE on xmlParseExternalID().
5389
 */
5390
5391
void
5392
1.11k
xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5393
1.11k
    const xmlChar *name;
5394
1.11k
    xmlChar *Pubid;
5395
1.11k
    xmlChar *Systemid;
5396
5397
1.11k
    if ((CUR != '<') || (NXT(1) != '!'))
5398
0
        return;
5399
1.11k
    SKIP(2);
5400
5401
1.11k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5402
971
  int inputid = ctxt->input->id;
5403
971
  SHRINK;
5404
971
  SKIP(8);
5405
971
  if (SKIP_BLANKS == 0) {
5406
64
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5407
64
         "Space required after '<!NOTATION'\n");
5408
64
      return;
5409
64
  }
5410
5411
907
        name = xmlParseName(ctxt);
5412
907
  if (name == NULL) {
5413
51
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5414
51
      return;
5415
51
  }
5416
856
  if (xmlStrchr(name, ':') != NULL) {
5417
18
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5418
18
         "colons are forbidden from notation names '%s'\n",
5419
18
         name, NULL, NULL);
5420
18
  }
5421
856
  if (SKIP_BLANKS == 0) {
5422
55
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5423
55
         "Space required after the NOTATION name'\n");
5424
55
      return;
5425
55
  }
5426
5427
  /*
5428
   * Parse the IDs.
5429
   */
5430
801
  Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5431
801
  SKIP_BLANKS;
5432
5433
801
  if (RAW == '>') {
5434
636
      if (inputid != ctxt->input->id) {
5435
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5436
0
                         "Notation declaration doesn't start and stop"
5437
0
                               " in the same entity\n");
5438
0
      }
5439
636
      NEXT;
5440
636
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5441
636
    (ctxt->sax->notationDecl != NULL))
5442
515
    ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5443
636
  } else {
5444
165
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5445
165
  }
5446
801
  if (Systemid != NULL) xmlFree(Systemid);
5447
801
  if (Pubid != NULL) xmlFree(Pubid);
5448
801
    }
5449
1.11k
}
5450
5451
/**
5452
 * xmlParseEntityDecl:
5453
 * @ctxt:  an XML parser context
5454
 *
5455
 * DEPRECATED: Internal function, don't use.
5456
 *
5457
 * Parse an entity declaration. Always consumes '<!'.
5458
 *
5459
 * [70] EntityDecl ::= GEDecl | PEDecl
5460
 *
5461
 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5462
 *
5463
 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5464
 *
5465
 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5466
 *
5467
 * [74] PEDef ::= EntityValue | ExternalID
5468
 *
5469
 * [76] NDataDecl ::= S 'NDATA' S Name
5470
 *
5471
 * [ VC: Notation Declared ]
5472
 * The Name must match the declared name of a notation.
5473
 */
5474
5475
void
5476
157k
xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5477
157k
    const xmlChar *name = NULL;
5478
157k
    xmlChar *value = NULL;
5479
157k
    xmlChar *URI = NULL, *literal = NULL;
5480
157k
    const xmlChar *ndata = NULL;
5481
157k
    int isParameter = 0;
5482
157k
    xmlChar *orig = NULL;
5483
5484
157k
    if ((CUR != '<') || (NXT(1) != '!'))
5485
0
        return;
5486
157k
    SKIP(2);
5487
5488
    /* GROW; done in the caller */
5489
157k
    if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5490
156k
  int inputid = ctxt->input->id;
5491
156k
  SHRINK;
5492
156k
  SKIP(6);
5493
156k
  if (SKIP_BLANKS == 0) {
5494
417
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5495
417
         "Space required after '<!ENTITY'\n");
5496
417
  }
5497
5498
156k
  if (RAW == '%') {
5499
65.1k
      NEXT;
5500
65.1k
      if (SKIP_BLANKS == 0) {
5501
67
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5502
67
             "Space required after '%%'\n");
5503
67
      }
5504
65.1k
      isParameter = 1;
5505
65.1k
  }
5506
5507
156k
        name = xmlParseName(ctxt);
5508
156k
  if (name == NULL) {
5509
438
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5510
438
                     "xmlParseEntityDecl: no name\n");
5511
438
            return;
5512
438
  }
5513
156k
  if (xmlStrchr(name, ':') != NULL) {
5514
161
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5515
161
         "colons are forbidden from entities names '%s'\n",
5516
161
         name, NULL, NULL);
5517
161
  }
5518
156k
  if (SKIP_BLANKS == 0) {
5519
584
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5520
584
         "Space required after the entity name\n");
5521
584
  }
5522
5523
156k
  ctxt->instate = XML_PARSER_ENTITY_DECL;
5524
  /*
5525
   * handle the various case of definitions...
5526
   */
5527
156k
  if (isParameter) {
5528
65.1k
      if ((RAW == '"') || (RAW == '\'')) {
5529
61.0k
          value = xmlParseEntityValue(ctxt, &orig);
5530
61.0k
    if (value) {
5531
59.4k
        if ((ctxt->sax != NULL) &&
5532
59.4k
      (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5533
54.3k
      ctxt->sax->entityDecl(ctxt->userData, name,
5534
54.3k
                        XML_INTERNAL_PARAMETER_ENTITY,
5535
54.3k
            NULL, NULL, value);
5536
59.4k
    }
5537
61.0k
      } else {
5538
4.12k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5539
4.12k
    if ((URI == NULL) && (literal == NULL)) {
5540
220
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5541
220
    }
5542
4.12k
    if (URI) {
5543
3.86k
        xmlURIPtr uri;
5544
5545
3.86k
        uri = xmlParseURI((const char *) URI);
5546
3.86k
        if (uri == NULL) {
5547
167
            xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5548
167
             "Invalid URI: %s\n", URI);
5549
      /*
5550
       * This really ought to be a well formedness error
5551
       * but the XML Core WG decided otherwise c.f. issue
5552
       * E26 of the XML erratas.
5553
       */
5554
3.70k
        } else {
5555
3.70k
      if (uri->fragment != NULL) {
5556
          /*
5557
           * Okay this is foolish to block those but not
5558
           * invalid URIs.
5559
           */
5560
15
          xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5561
3.68k
      } else {
5562
3.68k
          if ((ctxt->sax != NULL) &&
5563
3.68k
        (!ctxt->disableSAX) &&
5564
3.68k
        (ctxt->sax->entityDecl != NULL))
5565
3.64k
        ctxt->sax->entityDecl(ctxt->userData, name,
5566
3.64k
              XML_EXTERNAL_PARAMETER_ENTITY,
5567
3.64k
              literal, URI, NULL);
5568
3.68k
      }
5569
3.70k
      xmlFreeURI(uri);
5570
3.70k
        }
5571
3.86k
    }
5572
4.12k
      }
5573
91.2k
  } else {
5574
91.2k
      if ((RAW == '"') || (RAW == '\'')) {
5575
70.2k
          value = xmlParseEntityValue(ctxt, &orig);
5576
70.2k
    if ((ctxt->sax != NULL) &&
5577
70.2k
        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5578
64.3k
        ctxt->sax->entityDecl(ctxt->userData, name,
5579
64.3k
        XML_INTERNAL_GENERAL_ENTITY,
5580
64.3k
        NULL, NULL, value);
5581
    /*
5582
     * For expat compatibility in SAX mode.
5583
     */
5584
70.2k
    if ((ctxt->myDoc == NULL) ||
5585
70.2k
        (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5586
803
        if (ctxt->myDoc == NULL) {
5587
111
      ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5588
111
      if (ctxt->myDoc == NULL) {
5589
0
          xmlErrMemory(ctxt, "New Doc failed");
5590
0
          return;
5591
0
      }
5592
111
      ctxt->myDoc->properties = XML_DOC_INTERNAL;
5593
111
        }
5594
803
        if (ctxt->myDoc->intSubset == NULL)
5595
111
      ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5596
111
              BAD_CAST "fake", NULL, NULL);
5597
5598
803
        xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5599
803
                    NULL, NULL, value);
5600
803
    }
5601
70.2k
      } else {
5602
20.9k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5603
20.9k
    if ((URI == NULL) && (literal == NULL)) {
5604
845
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5605
845
    }
5606
20.9k
    if (URI) {
5607
19.8k
        xmlURIPtr uri;
5608
5609
19.8k
        uri = xmlParseURI((const char *)URI);
5610
19.8k
        if (uri == NULL) {
5611
677
            xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5612
677
             "Invalid URI: %s\n", URI);
5613
      /*
5614
       * This really ought to be a well formedness error
5615
       * but the XML Core WG decided otherwise c.f. issue
5616
       * E26 of the XML erratas.
5617
       */
5618
19.1k
        } else {
5619
19.1k
      if (uri->fragment != NULL) {
5620
          /*
5621
           * Okay this is foolish to block those but not
5622
           * invalid URIs.
5623
           */
5624
140
          xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5625
140
      }
5626
19.1k
      xmlFreeURI(uri);
5627
19.1k
        }
5628
19.8k
    }
5629
20.9k
    if ((RAW != '>') && (SKIP_BLANKS == 0)) {
5630
1.27k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5631
1.27k
           "Space required before 'NDATA'\n");
5632
1.27k
    }
5633
20.9k
    if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5634
1.39k
        SKIP(5);
5635
1.39k
        if (SKIP_BLANKS == 0) {
5636
59
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5637
59
               "Space required after 'NDATA'\n");
5638
59
        }
5639
1.39k
        ndata = xmlParseName(ctxt);
5640
1.39k
        if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5641
1.39k
            (ctxt->sax->unparsedEntityDecl != NULL))
5642
1.29k
      ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5643
1.29k
            literal, URI, ndata);
5644
19.5k
    } else {
5645
19.5k
        if ((ctxt->sax != NULL) &&
5646
19.5k
            (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5647
18.5k
      ctxt->sax->entityDecl(ctxt->userData, name,
5648
18.5k
            XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5649
18.5k
            literal, URI, NULL);
5650
        /*
5651
         * For expat compatibility in SAX mode.
5652
         * assuming the entity replacement was asked for
5653
         */
5654
19.5k
        if ((ctxt->replaceEntities != 0) &&
5655
19.5k
      ((ctxt->myDoc == NULL) ||
5656
10.7k
      (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5657
110
      if (ctxt->myDoc == NULL) {
5658
37
          ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5659
37
          if (ctxt->myDoc == NULL) {
5660
0
              xmlErrMemory(ctxt, "New Doc failed");
5661
0
        return;
5662
0
          }
5663
37
          ctxt->myDoc->properties = XML_DOC_INTERNAL;
5664
37
      }
5665
5666
110
      if (ctxt->myDoc->intSubset == NULL)
5667
37
          ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5668
37
            BAD_CAST "fake", NULL, NULL);
5669
110
      xmlSAX2EntityDecl(ctxt, name,
5670
110
                  XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5671
110
                  literal, URI, NULL);
5672
110
        }
5673
19.5k
    }
5674
20.9k
      }
5675
91.2k
  }
5676
156k
  if (ctxt->instate == XML_PARSER_EOF)
5677
69
      goto done;
5678
156k
  SKIP_BLANKS;
5679
156k
  if (RAW != '>') {
5680
2.96k
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5681
2.96k
              "xmlParseEntityDecl: entity %s not terminated\n", name);
5682
2.96k
      xmlHaltParser(ctxt);
5683
153k
  } else {
5684
153k
      if (inputid != ctxt->input->id) {
5685
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5686
0
                         "Entity declaration doesn't start and stop in"
5687
0
                               " the same entity\n");
5688
0
      }
5689
153k
      NEXT;
5690
153k
  }
5691
156k
  if (orig != NULL) {
5692
      /*
5693
       * Ugly mechanism to save the raw entity value.
5694
       */
5695
128k
      xmlEntityPtr cur = NULL;
5696
5697
128k
      if (isParameter) {
5698
59.6k
          if ((ctxt->sax != NULL) &&
5699
59.6k
        (ctxt->sax->getParameterEntity != NULL))
5700
59.6k
        cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5701
69.0k
      } else {
5702
69.0k
          if ((ctxt->sax != NULL) &&
5703
69.0k
        (ctxt->sax->getEntity != NULL))
5704
69.0k
        cur = ctxt->sax->getEntity(ctxt->userData, name);
5705
69.0k
    if ((cur == NULL) && (ctxt->userData==ctxt)) {
5706
4.03k
        cur = xmlSAX2GetEntity(ctxt, name);
5707
4.03k
    }
5708
69.0k
      }
5709
128k
            if ((cur != NULL) && (cur->orig == NULL)) {
5710
116k
    cur->orig = orig;
5711
116k
                orig = NULL;
5712
116k
      }
5713
128k
  }
5714
5715
156k
done:
5716
156k
  if (value != NULL) xmlFree(value);
5717
156k
  if (URI != NULL) xmlFree(URI);
5718
156k
  if (literal != NULL) xmlFree(literal);
5719
156k
        if (orig != NULL) xmlFree(orig);
5720
156k
    }
5721
157k
}
5722
5723
/**
5724
 * xmlParseDefaultDecl:
5725
 * @ctxt:  an XML parser context
5726
 * @value:  Receive a possible fixed default value for the attribute
5727
 *
5728
 * DEPRECATED: Internal function, don't use.
5729
 *
5730
 * Parse an attribute default declaration
5731
 *
5732
 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5733
 *
5734
 * [ VC: Required Attribute ]
5735
 * if the default declaration is the keyword #REQUIRED, then the
5736
 * attribute must be specified for all elements of the type in the
5737
 * attribute-list declaration.
5738
 *
5739
 * [ VC: Attribute Default Legal ]
5740
 * The declared default value must meet the lexical constraints of
5741
 * the declared attribute type c.f. xmlValidateAttributeDecl()
5742
 *
5743
 * [ VC: Fixed Attribute Default ]
5744
 * if an attribute has a default value declared with the #FIXED
5745
 * keyword, instances of that attribute must match the default value.
5746
 *
5747
 * [ WFC: No < in Attribute Values ]
5748
 * handled in xmlParseAttValue()
5749
 *
5750
 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5751
 *          or XML_ATTRIBUTE_FIXED.
5752
 */
5753
5754
int
5755
398k
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5756
398k
    int val;
5757
398k
    xmlChar *ret;
5758
5759
398k
    *value = NULL;
5760
398k
    if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5761
30.4k
  SKIP(9);
5762
30.4k
  return(XML_ATTRIBUTE_REQUIRED);
5763
30.4k
    }
5764
368k
    if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5765
339k
  SKIP(8);
5766
339k
  return(XML_ATTRIBUTE_IMPLIED);
5767
339k
    }
5768
29.1k
    val = XML_ATTRIBUTE_NONE;
5769
29.1k
    if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5770
13.2k
  SKIP(6);
5771
13.2k
  val = XML_ATTRIBUTE_FIXED;
5772
13.2k
  if (SKIP_BLANKS == 0) {
5773
42
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5774
42
         "Space required after '#FIXED'\n");
5775
42
  }
5776
13.2k
    }
5777
29.1k
    ret = xmlParseAttValue(ctxt);
5778
29.1k
    ctxt->instate = XML_PARSER_DTD;
5779
29.1k
    if (ret == NULL) {
5780
683
  xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5781
683
           "Attribute default value declaration error\n");
5782
683
    } else
5783
28.5k
        *value = ret;
5784
29.1k
    return(val);
5785
368k
}
5786
5787
/**
5788
 * xmlParseNotationType:
5789
 * @ctxt:  an XML parser context
5790
 *
5791
 * DEPRECATED: Internal function, don't use.
5792
 *
5793
 * parse an Notation attribute type.
5794
 *
5795
 * Note: the leading 'NOTATION' S part has already being parsed...
5796
 *
5797
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5798
 *
5799
 * [ VC: Notation Attributes ]
5800
 * Values of this type must match one of the notation names included
5801
 * in the declaration; all notation names in the declaration must be declared.
5802
 *
5803
 * Returns: the notation attribute tree built while parsing
5804
 */
5805
5806
xmlEnumerationPtr
5807
311
xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5808
311
    const xmlChar *name;
5809
311
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5810
5811
311
    if (RAW != '(') {
5812
38
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5813
38
  return(NULL);
5814
38
    }
5815
273
    SHRINK;
5816
405
    do {
5817
405
        NEXT;
5818
405
  SKIP_BLANKS;
5819
405
        name = xmlParseName(ctxt);
5820
405
  if (name == NULL) {
5821
44
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5822
44
         "Name expected in NOTATION declaration\n");
5823
44
            xmlFreeEnumeration(ret);
5824
44
      return(NULL);
5825
44
  }
5826
361
  tmp = ret;
5827
477
  while (tmp != NULL) {
5828
143
      if (xmlStrEqual(name, tmp->name)) {
5829
27
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5830
27
    "standalone: attribute notation value token %s duplicated\n",
5831
27
         name, NULL);
5832
27
    if (!xmlDictOwns(ctxt->dict, name))
5833
0
        xmlFree((xmlChar *) name);
5834
27
    break;
5835
27
      }
5836
116
      tmp = tmp->next;
5837
116
  }
5838
361
  if (tmp == NULL) {
5839
334
      cur = xmlCreateEnumeration(name);
5840
334
      if (cur == NULL) {
5841
0
                xmlFreeEnumeration(ret);
5842
0
                return(NULL);
5843
0
            }
5844
334
      if (last == NULL) ret = last = cur;
5845
79
      else {
5846
79
    last->next = cur;
5847
79
    last = cur;
5848
79
      }
5849
334
  }
5850
361
  SKIP_BLANKS;
5851
361
    } while (RAW == '|');
5852
229
    if (RAW != ')') {
5853
70
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5854
70
        xmlFreeEnumeration(ret);
5855
70
  return(NULL);
5856
70
    }
5857
159
    NEXT;
5858
159
    return(ret);
5859
229
}
5860
5861
/**
5862
 * xmlParseEnumerationType:
5863
 * @ctxt:  an XML parser context
5864
 *
5865
 * DEPRECATED: Internal function, don't use.
5866
 *
5867
 * parse an Enumeration attribute type.
5868
 *
5869
 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5870
 *
5871
 * [ VC: Enumeration ]
5872
 * Values of this type must match one of the Nmtoken tokens in
5873
 * the declaration
5874
 *
5875
 * Returns: the enumeration attribute tree built while parsing
5876
 */
5877
5878
xmlEnumerationPtr
5879
43.1k
xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5880
43.1k
    xmlChar *name;
5881
43.1k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5882
5883
43.1k
    if (RAW != '(') {
5884
1.03k
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5885
1.03k
  return(NULL);
5886
1.03k
    }
5887
42.0k
    SHRINK;
5888
131k
    do {
5889
131k
        NEXT;
5890
131k
  SKIP_BLANKS;
5891
131k
        name = xmlParseNmtoken(ctxt);
5892
131k
  if (name == NULL) {
5893
62
      xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5894
62
      return(ret);
5895
62
  }
5896
131k
  tmp = ret;
5897
357k
  while (tmp != NULL) {
5898
226k
      if (xmlStrEqual(name, tmp->name)) {
5899
54
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5900
54
    "standalone: attribute enumeration value token %s duplicated\n",
5901
54
         name, NULL);
5902
54
    if (!xmlDictOwns(ctxt->dict, name))
5903
54
        xmlFree(name);
5904
54
    break;
5905
54
      }
5906
226k
      tmp = tmp->next;
5907
226k
  }
5908
131k
  if (tmp == NULL) {
5909
131k
      cur = xmlCreateEnumeration(name);
5910
131k
      if (!xmlDictOwns(ctxt->dict, name))
5911
131k
    xmlFree(name);
5912
131k
      if (cur == NULL) {
5913
0
                xmlFreeEnumeration(ret);
5914
0
                return(NULL);
5915
0
            }
5916
131k
      if (last == NULL) ret = last = cur;
5917
89.5k
      else {
5918
89.5k
    last->next = cur;
5919
89.5k
    last = cur;
5920
89.5k
      }
5921
131k
  }
5922
131k
  SKIP_BLANKS;
5923
131k
    } while (RAW == '|');
5924
42.0k
    if (RAW != ')') {
5925
237
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5926
237
  return(ret);
5927
237
    }
5928
41.7k
    NEXT;
5929
41.7k
    return(ret);
5930
42.0k
}
5931
5932
/**
5933
 * xmlParseEnumeratedType:
5934
 * @ctxt:  an XML parser context
5935
 * @tree:  the enumeration tree built while parsing
5936
 *
5937
 * DEPRECATED: Internal function, don't use.
5938
 *
5939
 * parse an Enumerated attribute type.
5940
 *
5941
 * [57] EnumeratedType ::= NotationType | Enumeration
5942
 *
5943
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5944
 *
5945
 *
5946
 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5947
 */
5948
5949
int
5950
43.4k
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5951
43.4k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5952
344
  SKIP(8);
5953
344
  if (SKIP_BLANKS == 0) {
5954
33
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5955
33
         "Space required after 'NOTATION'\n");
5956
33
      return(0);
5957
33
  }
5958
311
  *tree = xmlParseNotationType(ctxt);
5959
311
  if (*tree == NULL) return(0);
5960
159
  return(XML_ATTRIBUTE_NOTATION);
5961
311
    }
5962
43.1k
    *tree = xmlParseEnumerationType(ctxt);
5963
43.1k
    if (*tree == NULL) return(0);
5964
42.0k
    return(XML_ATTRIBUTE_ENUMERATION);
5965
43.1k
}
5966
5967
/**
5968
 * xmlParseAttributeType:
5969
 * @ctxt:  an XML parser context
5970
 * @tree:  the enumeration tree built while parsing
5971
 *
5972
 * DEPRECATED: Internal function, don't use.
5973
 *
5974
 * parse the Attribute list def for an element
5975
 *
5976
 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5977
 *
5978
 * [55] StringType ::= 'CDATA'
5979
 *
5980
 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5981
 *                        'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5982
 *
5983
 * Validity constraints for attribute values syntax are checked in
5984
 * xmlValidateAttributeValue()
5985
 *
5986
 * [ VC: ID ]
5987
 * Values of type ID must match the Name production. A name must not
5988
 * appear more than once in an XML document as a value of this type;
5989
 * i.e., ID values must uniquely identify the elements which bear them.
5990
 *
5991
 * [ VC: One ID per Element Type ]
5992
 * No element type may have more than one ID attribute specified.
5993
 *
5994
 * [ VC: ID Attribute Default ]
5995
 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5996
 *
5997
 * [ VC: IDREF ]
5998
 * Values of type IDREF must match the Name production, and values
5999
 * of type IDREFS must match Names; each IDREF Name must match the value
6000
 * of an ID attribute on some element in the XML document; i.e. IDREF
6001
 * values must match the value of some ID attribute.
6002
 *
6003
 * [ VC: Entity Name ]
6004
 * Values of type ENTITY must match the Name production, values
6005
 * of type ENTITIES must match Names; each Entity Name must match the
6006
 * name of an unparsed entity declared in the DTD.
6007
 *
6008
 * [ VC: Name Token ]
6009
 * Values of type NMTOKEN must match the Nmtoken production; values
6010
 * of type NMTOKENS must match Nmtokens.
6011
 *
6012
 * Returns the attribute type
6013
 */
6014
int
6015
400k
xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6016
400k
    SHRINK;
6017
400k
    if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
6018
161k
  SKIP(5);
6019
161k
  return(XML_ATTRIBUTE_CDATA);
6020
239k
     } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
6021
1.03k
  SKIP(6);
6022
1.03k
  return(XML_ATTRIBUTE_IDREFS);
6023
238k
     } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
6024
4.52k
  SKIP(5);
6025
4.52k
  return(XML_ATTRIBUTE_IDREF);
6026
233k
     } else if ((RAW == 'I') && (NXT(1) == 'D')) {
6027
100k
        SKIP(2);
6028
100k
  return(XML_ATTRIBUTE_ID);
6029
133k
     } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
6030
1.40k
  SKIP(6);
6031
1.40k
  return(XML_ATTRIBUTE_ENTITY);
6032
131k
     } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
6033
80
  SKIP(8);
6034
80
  return(XML_ATTRIBUTE_ENTITIES);
6035
131k
     } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
6036
21.3k
  SKIP(8);
6037
21.3k
  return(XML_ATTRIBUTE_NMTOKENS);
6038
110k
     } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
6039
66.7k
  SKIP(7);
6040
66.7k
  return(XML_ATTRIBUTE_NMTOKEN);
6041
66.7k
     }
6042
43.4k
     return(xmlParseEnumeratedType(ctxt, tree));
6043
400k
}
6044
6045
/**
6046
 * xmlParseAttributeListDecl:
6047
 * @ctxt:  an XML parser context
6048
 *
6049
 * DEPRECATED: Internal function, don't use.
6050
 *
6051
 * Parse an attribute list declaration for an element. Always consumes '<!'.
6052
 *
6053
 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
6054
 *
6055
 * [53] AttDef ::= S Name S AttType S DefaultDecl
6056
 *
6057
 */
6058
void
6059
134k
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
6060
134k
    const xmlChar *elemName;
6061
134k
    const xmlChar *attrName;
6062
134k
    xmlEnumerationPtr tree;
6063
6064
134k
    if ((CUR != '<') || (NXT(1) != '!'))
6065
0
        return;
6066
134k
    SKIP(2);
6067
6068
134k
    if (CMP7(CUR_PTR, 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
6069
133k
  int inputid = ctxt->input->id;
6070
6071
133k
  SKIP(7);
6072
133k
  if (SKIP_BLANKS == 0) {
6073
276
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6074
276
                     "Space required after '<!ATTLIST'\n");
6075
276
  }
6076
133k
        elemName = xmlParseName(ctxt);
6077
133k
  if (elemName == NULL) {
6078
167
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6079
167
         "ATTLIST: no name for Element\n");
6080
167
      return;
6081
167
  }
6082
133k
  SKIP_BLANKS;
6083
133k
  GROW;
6084
531k
  while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
6085
401k
      int type;
6086
401k
      int def;
6087
401k
      xmlChar *defaultValue = NULL;
6088
6089
401k
      GROW;
6090
401k
            tree = NULL;
6091
401k
      attrName = xmlParseName(ctxt);
6092
401k
      if (attrName == NULL) {
6093
769
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6094
769
             "ATTLIST: no name for Attribute\n");
6095
769
    break;
6096
769
      }
6097
401k
      GROW;
6098
401k
      if (SKIP_BLANKS == 0) {
6099
571
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6100
571
            "Space required after the attribute name\n");
6101
571
    break;
6102
571
      }
6103
6104
400k
      type = xmlParseAttributeType(ctxt, &tree);
6105
400k
      if (type <= 0) {
6106
1.26k
          break;
6107
1.26k
      }
6108
6109
399k
      GROW;
6110
399k
      if (SKIP_BLANKS == 0) {
6111
487
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6112
487
             "Space required after the attribute type\n");
6113
487
          if (tree != NULL)
6114
283
        xmlFreeEnumeration(tree);
6115
487
    break;
6116
487
      }
6117
6118
398k
      def = xmlParseDefaultDecl(ctxt, &defaultValue);
6119
398k
      if (def <= 0) {
6120
0
                if (defaultValue != NULL)
6121
0
        xmlFree(defaultValue);
6122
0
          if (tree != NULL)
6123
0
        xmlFreeEnumeration(tree);
6124
0
          break;
6125
0
      }
6126
398k
      if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6127
12.5k
          xmlAttrNormalizeSpace(defaultValue, defaultValue);
6128
6129
398k
      GROW;
6130
398k
            if (RAW != '>') {
6131
375k
    if (SKIP_BLANKS == 0) {
6132
1.41k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6133
1.41k
      "Space required after the attribute default value\n");
6134
1.41k
        if (defaultValue != NULL)
6135
719
      xmlFree(defaultValue);
6136
1.41k
        if (tree != NULL)
6137
132
      xmlFreeEnumeration(tree);
6138
1.41k
        break;
6139
1.41k
    }
6140
375k
      }
6141
397k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6142
397k
    (ctxt->sax->attributeDecl != NULL))
6143
369k
    ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6144
369k
                          type, def, defaultValue, tree);
6145
28.0k
      else if (tree != NULL)
6146
3.05k
    xmlFreeEnumeration(tree);
6147
6148
397k
      if ((ctxt->sax2) && (defaultValue != NULL) &&
6149
397k
          (def != XML_ATTRIBUTE_IMPLIED) &&
6150
397k
    (def != XML_ATTRIBUTE_REQUIRED)) {
6151
18.5k
    xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6152
18.5k
      }
6153
397k
      if (ctxt->sax2) {
6154
246k
    xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6155
246k
      }
6156
397k
      if (defaultValue != NULL)
6157
27.7k
          xmlFree(defaultValue);
6158
397k
      GROW;
6159
397k
  }
6160
133k
  if (RAW == '>') {
6161
129k
      if (inputid != ctxt->input->id) {
6162
12
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6163
12
                               "Attribute list declaration doesn't start and"
6164
12
                               " stop in the same entity\n");
6165
12
      }
6166
129k
      NEXT;
6167
129k
  }
6168
133k
    }
6169
134k
}
6170
6171
/**
6172
 * xmlParseElementMixedContentDecl:
6173
 * @ctxt:  an XML parser context
6174
 * @inputchk:  the input used for the current entity, needed for boundary checks
6175
 *
6176
 * DEPRECATED: Internal function, don't use.
6177
 *
6178
 * parse the declaration for a Mixed Element content
6179
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6180
 *
6181
 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6182
 *                '(' S? '#PCDATA' S? ')'
6183
 *
6184
 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6185
 *
6186
 * [ VC: No Duplicate Types ]
6187
 * The same name must not appear more than once in a single
6188
 * mixed-content declaration.
6189
 *
6190
 * returns: the list of the xmlElementContentPtr describing the element choices
6191
 */
6192
xmlElementContentPtr
6193
64.9k
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6194
64.9k
    xmlElementContentPtr ret = NULL, cur = NULL, n;
6195
64.9k
    const xmlChar *elem = NULL;
6196
6197
64.9k
    GROW;
6198
64.9k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6199
64.9k
  SKIP(7);
6200
64.9k
  SKIP_BLANKS;
6201
64.9k
  SHRINK;
6202
64.9k
  if (RAW == ')') {
6203
40.7k
      if (ctxt->input->id != inputchk) {
6204
3
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6205
3
                               "Element content declaration doesn't start and"
6206
3
                               " stop in the same entity\n");
6207
3
      }
6208
40.7k
      NEXT;
6209
40.7k
      ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6210
40.7k
      if (ret == NULL)
6211
0
          return(NULL);
6212
40.7k
      if (RAW == '*') {
6213
27
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6214
27
    NEXT;
6215
27
      }
6216
40.7k
      return(ret);
6217
40.7k
  }
6218
24.2k
  if ((RAW == '(') || (RAW == '|')) {
6219
24.1k
      ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6220
24.1k
      if (ret == NULL) return(NULL);
6221
24.1k
  }
6222
265k
  while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6223
241k
      NEXT;
6224
241k
      if (elem == NULL) {
6225
24.1k
          ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6226
24.1k
    if (ret == NULL) {
6227
0
        xmlFreeDocElementContent(ctxt->myDoc, cur);
6228
0
                    return(NULL);
6229
0
                }
6230
24.1k
    ret->c1 = cur;
6231
24.1k
    if (cur != NULL)
6232
24.1k
        cur->parent = ret;
6233
24.1k
    cur = ret;
6234
217k
      } else {
6235
217k
          n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6236
217k
    if (n == NULL) {
6237
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6238
0
                    return(NULL);
6239
0
                }
6240
217k
    n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6241
217k
    if (n->c1 != NULL)
6242
217k
        n->c1->parent = n;
6243
217k
          cur->c2 = n;
6244
217k
    if (n != NULL)
6245
217k
        n->parent = cur;
6246
217k
    cur = n;
6247
217k
      }
6248
241k
      SKIP_BLANKS;
6249
241k
      elem = xmlParseName(ctxt);
6250
241k
      if (elem == NULL) {
6251
43
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6252
43
      "xmlParseElementMixedContentDecl : Name expected\n");
6253
43
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6254
43
    return(NULL);
6255
43
      }
6256
241k
      SKIP_BLANKS;
6257
241k
      GROW;
6258
241k
  }
6259
24.2k
  if ((RAW == ')') && (NXT(1) == '*')) {
6260
24.0k
      if (elem != NULL) {
6261
24.0k
    cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6262
24.0k
                                   XML_ELEMENT_CONTENT_ELEMENT);
6263
24.0k
    if (cur->c2 != NULL)
6264
24.0k
        cur->c2->parent = cur;
6265
24.0k
            }
6266
24.0k
            if (ret != NULL)
6267
24.0k
                ret->ocur = XML_ELEMENT_CONTENT_MULT;
6268
24.0k
      if (ctxt->input->id != inputchk) {
6269
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6270
0
                               "Element content declaration doesn't start and"
6271
0
                               " stop in the same entity\n");
6272
0
      }
6273
24.0k
      SKIP(2);
6274
24.0k
  } else {
6275
228
      xmlFreeDocElementContent(ctxt->myDoc, ret);
6276
228
      xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6277
228
      return(NULL);
6278
228
  }
6279
6280
24.2k
    } else {
6281
0
  xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6282
0
    }
6283
24.0k
    return(ret);
6284
64.9k
}
6285
6286
/**
6287
 * xmlParseElementChildrenContentDeclPriv:
6288
 * @ctxt:  an XML parser context
6289
 * @inputchk:  the input used for the current entity, needed for boundary checks
6290
 * @depth: the level of recursion
6291
 *
6292
 * parse the declaration for a Mixed Element content
6293
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6294
 *
6295
 *
6296
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6297
 *
6298
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6299
 *
6300
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6301
 *
6302
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6303
 *
6304
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6305
 * TODO Parameter-entity replacement text must be properly nested
6306
 *  with parenthesized groups. That is to say, if either of the
6307
 *  opening or closing parentheses in a choice, seq, or Mixed
6308
 *  construct is contained in the replacement text for a parameter
6309
 *  entity, both must be contained in the same replacement text. For
6310
 *  interoperability, if a parameter-entity reference appears in a
6311
 *  choice, seq, or Mixed construct, its replacement text should not
6312
 *  be empty, and neither the first nor last non-blank character of
6313
 *  the replacement text should be a connector (| or ,).
6314
 *
6315
 * Returns the tree of xmlElementContentPtr describing the element
6316
 *          hierarchy.
6317
 */
6318
static xmlElementContentPtr
6319
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6320
71.8k
                                       int depth) {
6321
71.8k
    xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6322
71.8k
    const xmlChar *elem;
6323
71.8k
    xmlChar type = 0;
6324
6325
71.8k
    if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6326
71.8k
        (depth >  2048)) {
6327
0
        xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6328
0
"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6329
0
                          depth);
6330
0
  return(NULL);
6331
0
    }
6332
71.8k
    SKIP_BLANKS;
6333
71.8k
    GROW;
6334
71.8k
    if (RAW == '(') {
6335
3.66k
  int inputid = ctxt->input->id;
6336
6337
        /* Recurse on first child */
6338
3.66k
  NEXT;
6339
3.66k
  SKIP_BLANKS;
6340
3.66k
        cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6341
3.66k
                                                           depth + 1);
6342
3.66k
        if (cur == NULL)
6343
453
            return(NULL);
6344
3.20k
  SKIP_BLANKS;
6345
3.20k
  GROW;
6346
68.1k
    } else {
6347
68.1k
  elem = xmlParseName(ctxt);
6348
68.1k
  if (elem == NULL) {
6349
650
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6350
650
      return(NULL);
6351
650
  }
6352
67.5k
        cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6353
67.5k
  if (cur == NULL) {
6354
0
      xmlErrMemory(ctxt, NULL);
6355
0
      return(NULL);
6356
0
  }
6357
67.5k
  GROW;
6358
67.5k
  if (RAW == '?') {
6359
6.57k
      cur->ocur = XML_ELEMENT_CONTENT_OPT;
6360
6.57k
      NEXT;
6361
60.9k
  } else if (RAW == '*') {
6362
7.03k
      cur->ocur = XML_ELEMENT_CONTENT_MULT;
6363
7.03k
      NEXT;
6364
53.9k
  } else if (RAW == '+') {
6365
10.4k
      cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6366
10.4k
      NEXT;
6367
43.5k
  } else {
6368
43.5k
      cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6369
43.5k
  }
6370
67.5k
  GROW;
6371
67.5k
    }
6372
70.7k
    SKIP_BLANKS;
6373
70.7k
    SHRINK;
6374
282k
    while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6375
        /*
6376
   * Each loop we parse one separator and one element.
6377
   */
6378
213k
        if (RAW == ',') {
6379
62.6k
      if (type == 0) type = CUR;
6380
6381
      /*
6382
       * Detect "Name | Name , Name" error
6383
       */
6384
37.8k
      else if (type != CUR) {
6385
24
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6386
24
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6387
24
                      type);
6388
24
    if ((last != NULL) && (last != ret))
6389
24
        xmlFreeDocElementContent(ctxt->myDoc, last);
6390
24
    if (ret != NULL)
6391
24
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6392
24
    return(NULL);
6393
24
      }
6394
62.6k
      NEXT;
6395
6396
62.6k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6397
62.6k
      if (op == NULL) {
6398
0
    if ((last != NULL) && (last != ret))
6399
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6400
0
          xmlFreeDocElementContent(ctxt->myDoc, ret);
6401
0
    return(NULL);
6402
0
      }
6403
62.6k
      if (last == NULL) {
6404
24.8k
    op->c1 = ret;
6405
24.8k
    if (ret != NULL)
6406
24.8k
        ret->parent = op;
6407
24.8k
    ret = cur = op;
6408
37.7k
      } else {
6409
37.7k
          cur->c2 = op;
6410
37.7k
    if (op != NULL)
6411
37.7k
        op->parent = cur;
6412
37.7k
    op->c1 = last;
6413
37.7k
    if (last != NULL)
6414
37.7k
        last->parent = op;
6415
37.7k
    cur =op;
6416
37.7k
    last = NULL;
6417
37.7k
      }
6418
150k
  } else if (RAW == '|') {
6419
149k
      if (type == 0) type = CUR;
6420
6421
      /*
6422
       * Detect "Name , Name | Name" error
6423
       */
6424
129k
      else if (type != CUR) {
6425
18
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6426
18
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6427
18
          type);
6428
18
    if ((last != NULL) && (last != ret))
6429
18
        xmlFreeDocElementContent(ctxt->myDoc, last);
6430
18
    if (ret != NULL)
6431
18
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6432
18
    return(NULL);
6433
18
      }
6434
149k
      NEXT;
6435
6436
149k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6437
149k
      if (op == NULL) {
6438
0
    if ((last != NULL) && (last != ret))
6439
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6440
0
    if (ret != NULL)
6441
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6442
0
    return(NULL);
6443
0
      }
6444
149k
      if (last == NULL) {
6445
19.7k
    op->c1 = ret;
6446
19.7k
    if (ret != NULL)
6447
19.7k
        ret->parent = op;
6448
19.7k
    ret = cur = op;
6449
129k
      } else {
6450
129k
          cur->c2 = op;
6451
129k
    if (op != NULL)
6452
129k
        op->parent = cur;
6453
129k
    op->c1 = last;
6454
129k
    if (last != NULL)
6455
129k
        last->parent = op;
6456
129k
    cur =op;
6457
129k
    last = NULL;
6458
129k
      }
6459
149k
  } else {
6460
923
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6461
923
      if ((last != NULL) && (last != ret))
6462
285
          xmlFreeDocElementContent(ctxt->myDoc, last);
6463
923
      if (ret != NULL)
6464
923
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6465
923
      return(NULL);
6466
923
  }
6467
212k
  GROW;
6468
212k
  SKIP_BLANKS;
6469
212k
  GROW;
6470
212k
  if (RAW == '(') {
6471
9.45k
      int inputid = ctxt->input->id;
6472
      /* Recurse on second child */
6473
9.45k
      NEXT;
6474
9.45k
      SKIP_BLANKS;
6475
9.45k
      last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6476
9.45k
                                                          depth + 1);
6477
9.45k
            if (last == NULL) {
6478
139
    if (ret != NULL)
6479
139
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6480
139
    return(NULL);
6481
139
            }
6482
9.31k
      SKIP_BLANKS;
6483
202k
  } else {
6484
202k
      elem = xmlParseName(ctxt);
6485
202k
      if (elem == NULL) {
6486
148
    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6487
148
    if (ret != NULL)
6488
148
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6489
148
    return(NULL);
6490
148
      }
6491
202k
      last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6492
202k
      if (last == NULL) {
6493
0
    if (ret != NULL)
6494
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6495
0
    return(NULL);
6496
0
      }
6497
202k
      if (RAW == '?') {
6498
25.4k
    last->ocur = XML_ELEMENT_CONTENT_OPT;
6499
25.4k
    NEXT;
6500
177k
      } else if (RAW == '*') {
6501
16.0k
    last->ocur = XML_ELEMENT_CONTENT_MULT;
6502
16.0k
    NEXT;
6503
161k
      } else if (RAW == '+') {
6504
3.21k
    last->ocur = XML_ELEMENT_CONTENT_PLUS;
6505
3.21k
    NEXT;
6506
157k
      } else {
6507
157k
    last->ocur = XML_ELEMENT_CONTENT_ONCE;
6508
157k
      }
6509
202k
  }
6510
211k
  SKIP_BLANKS;
6511
211k
  GROW;
6512
211k
    }
6513
69.4k
    if ((cur != NULL) && (last != NULL)) {
6514
44.0k
        cur->c2 = last;
6515
44.0k
  if (last != NULL)
6516
44.0k
      last->parent = cur;
6517
44.0k
    }
6518
69.4k
    if (ctxt->input->id != inputchk) {
6519
9
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6520
9
                       "Element content declaration doesn't start and stop in"
6521
9
                       " the same entity\n");
6522
9
    }
6523
69.4k
    NEXT;
6524
69.4k
    if (RAW == '?') {
6525
2.24k
  if (ret != NULL) {
6526
2.24k
      if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6527
2.24k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6528
36
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6529
2.21k
      else
6530
2.21k
          ret->ocur = XML_ELEMENT_CONTENT_OPT;
6531
2.24k
  }
6532
2.24k
  NEXT;
6533
67.2k
    } else if (RAW == '*') {
6534
18.0k
  if (ret != NULL) {
6535
18.0k
      ret->ocur = XML_ELEMENT_CONTENT_MULT;
6536
18.0k
      cur = ret;
6537
      /*
6538
       * Some normalization:
6539
       * (a | b* | c?)* == (a | b | c)*
6540
       */
6541
110k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6542
92.2k
    if ((cur->c1 != NULL) &&
6543
92.2k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6544
92.2k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6545
2.36k
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6546
92.2k
    if ((cur->c2 != NULL) &&
6547
92.2k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6548
92.2k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6549
380
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6550
92.2k
    cur = cur->c2;
6551
92.2k
      }
6552
18.0k
  }
6553
18.0k
  NEXT;
6554
49.1k
    } else if (RAW == '+') {
6555
9.18k
  if (ret != NULL) {
6556
9.18k
      int found = 0;
6557
6558
9.18k
      if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6559
9.18k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6560
3
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6561
9.17k
      else
6562
9.17k
          ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6563
      /*
6564
       * Some normalization:
6565
       * (a | b*)+ == (a | b)*
6566
       * (a | b?)+ == (a | b)*
6567
       */
6568
15.0k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6569
5.87k
    if ((cur->c1 != NULL) &&
6570
5.87k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6571
5.87k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6572
13
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6573
13
        found = 1;
6574
13
    }
6575
5.87k
    if ((cur->c2 != NULL) &&
6576
5.87k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6577
5.87k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6578
12
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6579
12
        found = 1;
6580
12
    }
6581
5.87k
    cur = cur->c2;
6582
5.87k
      }
6583
9.18k
      if (found)
6584
16
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6585
9.18k
  }
6586
9.18k
  NEXT;
6587
9.18k
    }
6588
69.4k
    return(ret);
6589
70.7k
}
6590
6591
/**
6592
 * xmlParseElementChildrenContentDecl:
6593
 * @ctxt:  an XML parser context
6594
 * @inputchk:  the input used for the current entity, needed for boundary checks
6595
 *
6596
 * DEPRECATED: Internal function, don't use.
6597
 *
6598
 * parse the declaration for a Mixed Element content
6599
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6600
 *
6601
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6602
 *
6603
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6604
 *
6605
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6606
 *
6607
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6608
 *
6609
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6610
 * TODO Parameter-entity replacement text must be properly nested
6611
 *  with parenthesized groups. That is to say, if either of the
6612
 *  opening or closing parentheses in a choice, seq, or Mixed
6613
 *  construct is contained in the replacement text for a parameter
6614
 *  entity, both must be contained in the same replacement text. For
6615
 *  interoperability, if a parameter-entity reference appears in a
6616
 *  choice, seq, or Mixed construct, its replacement text should not
6617
 *  be empty, and neither the first nor last non-blank character of
6618
 *  the replacement text should be a connector (| or ,).
6619
 *
6620
 * Returns the tree of xmlElementContentPtr describing the element
6621
 *          hierarchy.
6622
 */
6623
xmlElementContentPtr
6624
0
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6625
    /* stub left for API/ABI compat */
6626
0
    return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6627
0
}
6628
6629
/**
6630
 * xmlParseElementContentDecl:
6631
 * @ctxt:  an XML parser context
6632
 * @name:  the name of the element being defined.
6633
 * @result:  the Element Content pointer will be stored here if any
6634
 *
6635
 * DEPRECATED: Internal function, don't use.
6636
 *
6637
 * parse the declaration for an Element content either Mixed or Children,
6638
 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6639
 *
6640
 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6641
 *
6642
 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6643
 */
6644
6645
int
6646
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6647
123k
                           xmlElementContentPtr *result) {
6648
6649
123k
    xmlElementContentPtr tree = NULL;
6650
123k
    int inputid = ctxt->input->id;
6651
123k
    int res;
6652
6653
123k
    *result = NULL;
6654
6655
123k
    if (RAW != '(') {
6656
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6657
0
    "xmlParseElementContentDecl : %s '(' expected\n", name);
6658
0
  return(-1);
6659
0
    }
6660
123k
    NEXT;
6661
123k
    GROW;
6662
123k
    if (ctxt->instate == XML_PARSER_EOF)
6663
0
        return(-1);
6664
123k
    SKIP_BLANKS;
6665
123k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6666
64.9k
        tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6667
64.9k
  res = XML_ELEMENT_TYPE_MIXED;
6668
64.9k
    } else {
6669
58.7k
        tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6670
58.7k
  res = XML_ELEMENT_TYPE_ELEMENT;
6671
58.7k
    }
6672
123k
    SKIP_BLANKS;
6673
123k
    *result = tree;
6674
123k
    return(res);
6675
123k
}
6676
6677
/**
6678
 * xmlParseElementDecl:
6679
 * @ctxt:  an XML parser context
6680
 *
6681
 * DEPRECATED: Internal function, don't use.
6682
 *
6683
 * Parse an element declaration. Always consumes '<!'.
6684
 *
6685
 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6686
 *
6687
 * [ VC: Unique Element Type Declaration ]
6688
 * No element type may be declared more than once
6689
 *
6690
 * Returns the type of the element, or -1 in case of error
6691
 */
6692
int
6693
165k
xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6694
165k
    const xmlChar *name;
6695
165k
    int ret = -1;
6696
165k
    xmlElementContentPtr content  = NULL;
6697
6698
165k
    if ((CUR != '<') || (NXT(1) != '!'))
6699
0
        return(ret);
6700
165k
    SKIP(2);
6701
6702
    /* GROW; done in the caller */
6703
165k
    if (CMP7(CUR_PTR, 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6704
165k
  int inputid = ctxt->input->id;
6705
6706
165k
  SKIP(7);
6707
165k
  if (SKIP_BLANKS == 0) {
6708
221
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6709
221
               "Space required after 'ELEMENT'\n");
6710
221
      return(-1);
6711
221
  }
6712
165k
        name = xmlParseName(ctxt);
6713
165k
  if (name == NULL) {
6714
151
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6715
151
         "xmlParseElementDecl: no name for Element\n");
6716
151
      return(-1);
6717
151
  }
6718
165k
  if (SKIP_BLANKS == 0) {
6719
427
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6720
427
         "Space required after the element name\n");
6721
427
  }
6722
165k
  if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6723
40.0k
      SKIP(5);
6724
      /*
6725
       * Element must always be empty.
6726
       */
6727
40.0k
      ret = XML_ELEMENT_TYPE_EMPTY;
6728
125k
  } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6729
125k
             (NXT(2) == 'Y')) {
6730
868
      SKIP(3);
6731
      /*
6732
       * Element is a generic container.
6733
       */
6734
868
      ret = XML_ELEMENT_TYPE_ANY;
6735
124k
  } else if (RAW == '(') {
6736
123k
      ret = xmlParseElementContentDecl(ctxt, name, &content);
6737
123k
  } else {
6738
      /*
6739
       * [ WFC: PEs in Internal Subset ] error handling.
6740
       */
6741
645
      if ((RAW == '%') && (ctxt->external == 0) &&
6742
645
          (ctxt->inputNr == 1)) {
6743
33
    xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6744
33
    "PEReference: forbidden within markup decl in internal subset\n");
6745
612
      } else {
6746
612
    xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6747
612
          "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6748
612
            }
6749
645
      return(-1);
6750
645
  }
6751
6752
164k
  SKIP_BLANKS;
6753
6754
164k
  if (RAW != '>') {
6755
2.09k
      xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6756
2.09k
      if (content != NULL) {
6757
317
    xmlFreeDocElementContent(ctxt->myDoc, content);
6758
317
      }
6759
162k
  } else {
6760
162k
      if (inputid != ctxt->input->id) {
6761
3
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6762
3
                               "Element declaration doesn't start and stop in"
6763
3
                               " the same entity\n");
6764
3
      }
6765
6766
162k
      NEXT;
6767
162k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6768
162k
    (ctxt->sax->elementDecl != NULL)) {
6769
148k
    if (content != NULL)
6770
111k
        content->parent = NULL;
6771
148k
          ctxt->sax->elementDecl(ctxt->userData, name, ret,
6772
148k
                           content);
6773
148k
    if ((content != NULL) && (content->parent == NULL)) {
6774
        /*
6775
         * this is a trick: if xmlAddElementDecl is called,
6776
         * instead of copying the full tree it is plugged directly
6777
         * if called from the parser. Avoid duplicating the
6778
         * interfaces or change the API/ABI
6779
         */
6780
1.43k
        xmlFreeDocElementContent(ctxt->myDoc, content);
6781
1.43k
    }
6782
148k
      } else if (content != NULL) {
6783
9.86k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6784
9.86k
      }
6785
162k
  }
6786
164k
    }
6787
164k
    return(ret);
6788
165k
}
6789
6790
/**
6791
 * xmlParseConditionalSections
6792
 * @ctxt:  an XML parser context
6793
 *
6794
 * Parse a conditional section. Always consumes '<!['.
6795
 *
6796
 * [61] conditionalSect ::= includeSect | ignoreSect
6797
 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6798
 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6799
 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6800
 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6801
 */
6802
6803
static void
6804
1.43k
xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6805
1.43k
    int *inputIds = NULL;
6806
1.43k
    size_t inputIdsSize = 0;
6807
1.43k
    size_t depth = 0;
6808
6809
8.17k
    while (ctxt->instate != XML_PARSER_EOF) {
6810
8.15k
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6811
4.33k
            int id = ctxt->input->id;
6812
6813
4.33k
            SKIP(3);
6814
4.33k
            SKIP_BLANKS;
6815
6816
4.33k
            if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6817
3.64k
                SKIP(7);
6818
3.64k
                SKIP_BLANKS;
6819
3.64k
                if (RAW != '[') {
6820
18
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6821
18
                    xmlHaltParser(ctxt);
6822
18
                    goto error;
6823
18
                }
6824
3.62k
                if (ctxt->input->id != id) {
6825
6
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6826
6
                                   "All markup of the conditional section is"
6827
6
                                   " not in the same entity\n");
6828
6
                }
6829
3.62k
                NEXT;
6830
6831
3.62k
                if (inputIdsSize <= depth) {
6832
1.05k
                    int *tmp;
6833
6834
1.05k
                    inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2);
6835
1.05k
                    tmp = (int *) xmlRealloc(inputIds,
6836
1.05k
                            inputIdsSize * sizeof(int));
6837
1.05k
                    if (tmp == NULL) {
6838
0
                        xmlErrMemory(ctxt, NULL);
6839
0
                        goto error;
6840
0
                    }
6841
1.05k
                    inputIds = tmp;
6842
1.05k
                }
6843
3.62k
                inputIds[depth] = id;
6844
3.62k
                depth++;
6845
3.62k
            } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6846
433
                size_t ignoreDepth = 0;
6847
6848
433
                SKIP(6);
6849
433
                SKIP_BLANKS;
6850
433
                if (RAW != '[') {
6851
23
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6852
23
                    xmlHaltParser(ctxt);
6853
23
                    goto error;
6854
23
                }
6855
410
                if (ctxt->input->id != id) {
6856
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6857
0
                                   "All markup of the conditional section is"
6858
0
                                   " not in the same entity\n");
6859
0
                }
6860
410
                NEXT;
6861
6862
2.02M
                while (RAW != 0) {
6863
2.02M
                    if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6864
1.04k
                        SKIP(3);
6865
1.04k
                        ignoreDepth++;
6866
                        /* Check for integer overflow */
6867
1.04k
                        if (ignoreDepth == 0) {
6868
0
                            xmlErrMemory(ctxt, NULL);
6869
0
                            goto error;
6870
0
                        }
6871
2.02M
                    } else if ((RAW == ']') && (NXT(1) == ']') &&
6872
2.02M
                               (NXT(2) == '>')) {
6873
803
                        if (ignoreDepth == 0)
6874
152
                            break;
6875
651
                        SKIP(3);
6876
651
                        ignoreDepth--;
6877
2.01M
                    } else {
6878
2.01M
                        NEXT;
6879
2.01M
                    }
6880
2.02M
                }
6881
6882
410
    if (RAW == 0) {
6883
258
        xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6884
258
                    goto error;
6885
258
    }
6886
152
                if (ctxt->input->id != id) {
6887
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6888
0
                                   "All markup of the conditional section is"
6889
0
                                   " not in the same entity\n");
6890
0
                }
6891
152
                SKIP(3);
6892
257
            } else {
6893
257
                xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6894
257
                xmlHaltParser(ctxt);
6895
257
                goto error;
6896
257
            }
6897
4.33k
        } else if ((depth > 0) &&
6898
3.82k
                   (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6899
2.10k
            depth--;
6900
2.10k
            if (ctxt->input->id != inputIds[depth]) {
6901
45
                xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6902
45
                               "All markup of the conditional section is not"
6903
45
                               " in the same entity\n");
6904
45
            }
6905
2.10k
            SKIP(3);
6906
2.10k
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
6907
1.38k
            xmlParseMarkupDecl(ctxt);
6908
1.38k
        } else {
6909
327
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6910
327
            xmlHaltParser(ctxt);
6911
327
            goto error;
6912
327
        }
6913
6914
7.27k
        if (depth == 0)
6915
527
            break;
6916
6917
6.74k
        SKIP_BLANKS;
6918
6.74k
        GROW;
6919
6.74k
    }
6920
6921
1.43k
error:
6922
1.43k
    xmlFree(inputIds);
6923
1.43k
}
6924
6925
/**
6926
 * xmlParseMarkupDecl:
6927
 * @ctxt:  an XML parser context
6928
 *
6929
 * DEPRECATED: Internal function, don't use.
6930
 *
6931
 * Parse markup declarations. Always consumes '<!' or '<?'.
6932
 *
6933
 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6934
 *                     NotationDecl | PI | Comment
6935
 *
6936
 * [ VC: Proper Declaration/PE Nesting ]
6937
 * Parameter-entity replacement text must be properly nested with
6938
 * markup declarations. That is to say, if either the first character
6939
 * or the last character of a markup declaration (markupdecl above) is
6940
 * contained in the replacement text for a parameter-entity reference,
6941
 * both must be contained in the same replacement text.
6942
 *
6943
 * [ WFC: PEs in Internal Subset ]
6944
 * In the internal DTD subset, parameter-entity references can occur
6945
 * only where markup declarations can occur, not within markup declarations.
6946
 * (This does not apply to references that occur in external parameter
6947
 * entities or to the external subset.)
6948
 */
6949
void
6950
5.32M
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6951
5.32M
    GROW;
6952
5.32M
    if (CUR == '<') {
6953
5.32M
        if (NXT(1) == '!') {
6954
5.32M
      switch (NXT(2)) {
6955
323k
          case 'E':
6956
323k
        if (NXT(3) == 'L')
6957
165k
      xmlParseElementDecl(ctxt);
6958
157k
        else if (NXT(3) == 'N')
6959
157k
      xmlParseEntityDecl(ctxt);
6960
169
                    else
6961
169
                        SKIP(2);
6962
323k
        break;
6963
134k
          case 'A':
6964
134k
        xmlParseAttributeListDecl(ctxt);
6965
134k
        break;
6966
1.11k
          case 'N':
6967
1.11k
        xmlParseNotationDecl(ctxt);
6968
1.11k
        break;
6969
4.86M
          case '-':
6970
4.86M
        xmlParseComment(ctxt);
6971
4.86M
        break;
6972
990
    default:
6973
        /* there is an error but it will be detected later */
6974
990
                    SKIP(2);
6975
990
        break;
6976
5.32M
      }
6977
5.32M
  } else if (NXT(1) == '?') {
6978
656
      xmlParsePI(ctxt);
6979
656
  }
6980
5.32M
    }
6981
6982
    /*
6983
     * detect requirement to exit there and act accordingly
6984
     * and avoid having instate overridden later on
6985
     */
6986
5.32M
    if (ctxt->instate == XML_PARSER_EOF)
6987
3.03k
        return;
6988
6989
5.32M
    ctxt->instate = XML_PARSER_DTD;
6990
5.32M
}
6991
6992
/**
6993
 * xmlParseTextDecl:
6994
 * @ctxt:  an XML parser context
6995
 *
6996
 * DEPRECATED: Internal function, don't use.
6997
 *
6998
 * parse an XML declaration header for external entities
6999
 *
7000
 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
7001
 */
7002
7003
void
7004
1.02k
xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
7005
1.02k
    xmlChar *version;
7006
1.02k
    const xmlChar *encoding;
7007
1.02k
    int oldstate;
7008
7009
    /*
7010
     * We know that '<?xml' is here.
7011
     */
7012
1.02k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7013
945
  SKIP(5);
7014
945
    } else {
7015
80
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
7016
80
  return;
7017
80
    }
7018
7019
    /* Avoid expansion of parameter entities when skipping blanks. */
7020
945
    oldstate = ctxt->instate;
7021
945
    ctxt->instate = XML_PARSER_START;
7022
7023
945
    if (SKIP_BLANKS == 0) {
7024
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7025
0
           "Space needed after '<?xml'\n");
7026
0
    }
7027
7028
    /*
7029
     * We may have the VersionInfo here.
7030
     */
7031
945
    version = xmlParseVersionInfo(ctxt);
7032
945
    if (version == NULL)
7033
223
  version = xmlCharStrdup(XML_DEFAULT_VERSION);
7034
722
    else {
7035
722
  if (SKIP_BLANKS == 0) {
7036
31
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7037
31
               "Space needed here\n");
7038
31
  }
7039
722
    }
7040
945
    ctxt->input->version = version;
7041
7042
    /*
7043
     * We must have the encoding declaration
7044
     */
7045
945
    encoding = xmlParseEncodingDecl(ctxt);
7046
945
    if (ctxt->instate == XML_PARSER_EOF)
7047
0
        return;
7048
945
    if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7049
  /*
7050
   * The XML REC instructs us to stop parsing right here
7051
   */
7052
27
        ctxt->instate = oldstate;
7053
27
        return;
7054
27
    }
7055
918
    if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
7056
387
  xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
7057
387
           "Missing encoding in text declaration\n");
7058
387
    }
7059
7060
918
    SKIP_BLANKS;
7061
918
    if ((RAW == '?') && (NXT(1) == '>')) {
7062
573
        SKIP(2);
7063
573
    } else if (RAW == '>') {
7064
        /* Deprecated old WD ... */
7065
18
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7066
18
  NEXT;
7067
327
    } else {
7068
327
        int c;
7069
7070
327
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7071
39.7k
        while ((c = CUR) != 0) {
7072
39.6k
            NEXT;
7073
39.6k
            if (c == '>')
7074
241
                break;
7075
39.6k
        }
7076
327
    }
7077
7078
918
    ctxt->instate = oldstate;
7079
918
}
7080
7081
/**
7082
 * xmlParseExternalSubset:
7083
 * @ctxt:  an XML parser context
7084
 * @ExternalID: the external identifier
7085
 * @SystemID: the system identifier (or URL)
7086
 *
7087
 * parse Markup declarations from an external subset
7088
 *
7089
 * [30] extSubset ::= textDecl? extSubsetDecl
7090
 *
7091
 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7092
 */
7093
void
7094
xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7095
5.22k
                       const xmlChar *SystemID) {
7096
5.22k
    xmlDetectSAX2(ctxt);
7097
5.22k
    GROW;
7098
7099
5.22k
    if ((ctxt->encoding == NULL) &&
7100
5.22k
        (ctxt->input->end - ctxt->input->cur >= 4)) {
7101
5.20k
        xmlChar start[4];
7102
5.20k
  xmlCharEncoding enc;
7103
7104
5.20k
  start[0] = RAW;
7105
5.20k
  start[1] = NXT(1);
7106
5.20k
  start[2] = NXT(2);
7107
5.20k
  start[3] = NXT(3);
7108
5.20k
  enc = xmlDetectCharEncoding(start, 4);
7109
5.20k
  if (enc != XML_CHAR_ENCODING_NONE)
7110
909
      xmlSwitchEncoding(ctxt, enc);
7111
5.20k
    }
7112
7113
5.22k
    if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7114
836
  xmlParseTextDecl(ctxt);
7115
836
  if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7116
      /*
7117
       * The XML REC instructs us to stop parsing right here
7118
       */
7119
18
      xmlHaltParser(ctxt);
7120
18
      return;
7121
18
  }
7122
836
    }
7123
5.21k
    if (ctxt->myDoc == NULL) {
7124
0
        ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7125
0
  if (ctxt->myDoc == NULL) {
7126
0
      xmlErrMemory(ctxt, "New Doc failed");
7127
0
      return;
7128
0
  }
7129
0
  ctxt->myDoc->properties = XML_DOC_INTERNAL;
7130
0
    }
7131
5.21k
    if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7132
0
        xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7133
7134
5.21k
    ctxt->instate = XML_PARSER_DTD;
7135
5.21k
    ctxt->external = 1;
7136
5.21k
    SKIP_BLANKS;
7137
213k
    while ((ctxt->instate != XML_PARSER_EOF) && (RAW != 0)) {
7138
209k
  GROW;
7139
209k
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7140
1.43k
            xmlParseConditionalSections(ctxt);
7141
208k
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7142
207k
            xmlParseMarkupDecl(ctxt);
7143
207k
        } else {
7144
1.09k
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7145
1.09k
            xmlHaltParser(ctxt);
7146
1.09k
            return;
7147
1.09k
        }
7148
208k
        SKIP_BLANKS;
7149
208k
    }
7150
7151
4.11k
    if (RAW != 0) {
7152
0
  xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7153
0
    }
7154
7155
4.11k
}
7156
7157
/**
7158
 * xmlParseReference:
7159
 * @ctxt:  an XML parser context
7160
 *
7161
 * DEPRECATED: Internal function, don't use.
7162
 *
7163
 * parse and handle entity references in content, depending on the SAX
7164
 * interface, this may end-up in a call to character() if this is a
7165
 * CharRef, a predefined entity, if there is no reference() callback.
7166
 * or if the parser was asked to switch to that mode.
7167
 *
7168
 * Always consumes '&'.
7169
 *
7170
 * [67] Reference ::= EntityRef | CharRef
7171
 */
7172
void
7173
923k
xmlParseReference(xmlParserCtxtPtr ctxt) {
7174
923k
    xmlEntityPtr ent;
7175
923k
    xmlChar *val;
7176
923k
    int was_checked;
7177
923k
    xmlNodePtr list = NULL;
7178
923k
    xmlParserErrors ret = XML_ERR_OK;
7179
7180
7181
923k
    if (RAW != '&')
7182
0
        return;
7183
7184
    /*
7185
     * Simple case of a CharRef
7186
     */
7187
923k
    if (NXT(1) == '#') {
7188
25.8k
  int i = 0;
7189
25.8k
  xmlChar out[16];
7190
25.8k
  int hex = NXT(2);
7191
25.8k
  int value = xmlParseCharRef(ctxt);
7192
7193
25.8k
  if (value == 0)
7194
3.45k
      return;
7195
22.3k
  if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7196
      /*
7197
       * So we are using non-UTF-8 buffers
7198
       * Check that the char fit on 8bits, if not
7199
       * generate a CharRef.
7200
       */
7201
13.6k
      if (value <= 0xFF) {
7202
13.3k
    out[0] = value;
7203
13.3k
    out[1] = 0;
7204
13.3k
    if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7205
13.3k
        (!ctxt->disableSAX))
7206
10.7k
        ctxt->sax->characters(ctxt->userData, out, 1);
7207
13.3k
      } else {
7208
338
    if ((hex == 'x') || (hex == 'X'))
7209
148
        snprintf((char *)out, sizeof(out), "#x%X", value);
7210
190
    else
7211
190
        snprintf((char *)out, sizeof(out), "#%d", value);
7212
338
    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7213
338
        (!ctxt->disableSAX))
7214
205
        ctxt->sax->reference(ctxt->userData, out);
7215
338
      }
7216
13.6k
  } else {
7217
      /*
7218
       * Just encode the value in UTF-8
7219
       */
7220
8.67k
      COPY_BUF(0 ,out, i, value);
7221
8.67k
      out[i] = 0;
7222
8.67k
      if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7223
8.67k
    (!ctxt->disableSAX))
7224
6.75k
    ctxt->sax->characters(ctxt->userData, out, i);
7225
8.67k
  }
7226
22.3k
  return;
7227
25.8k
    }
7228
7229
    /*
7230
     * We are seeing an entity reference
7231
     */
7232
897k
    ent = xmlParseEntityRef(ctxt);
7233
897k
    if (ent == NULL) return;
7234
732k
    if (!ctxt->wellFormed)
7235
199k
  return;
7236
532k
    was_checked = ent->flags & XML_ENT_PARSED;
7237
7238
    /* special case of predefined entities */
7239
532k
    if ((ent->name == NULL) ||
7240
532k
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7241
30.3k
  val = ent->content;
7242
30.3k
  if (val == NULL) return;
7243
  /*
7244
   * inline the entity.
7245
   */
7246
30.3k
  if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7247
30.3k
      (!ctxt->disableSAX))
7248
30.3k
      ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7249
30.3k
  return;
7250
30.3k
    }
7251
7252
    /*
7253
     * The first reference to the entity trigger a parsing phase
7254
     * where the ent->children is filled with the result from
7255
     * the parsing.
7256
     * Note: external parsed entities will not be loaded, it is not
7257
     * required for a non-validating parser, unless the parsing option
7258
     * of validating, or substituting entities were given. Doing so is
7259
     * far more secure as the parser will only process data coming from
7260
     * the document entity by default.
7261
     */
7262
502k
    if (((ent->flags & XML_ENT_PARSED) == 0) &&
7263
502k
        ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7264
20.0k
         (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7265
19.0k
  unsigned long oldsizeentcopy = ctxt->sizeentcopy;
7266
7267
  /*
7268
   * This is a bit hackish but this seems the best
7269
   * way to make sure both SAX and DOM entity support
7270
   * behaves okay.
7271
   */
7272
19.0k
  void *user_data;
7273
19.0k
  if (ctxt->userData == ctxt)
7274
19.0k
      user_data = NULL;
7275
0
  else
7276
0
      user_data = ctxt->userData;
7277
7278
        /* Avoid overflow as much as possible */
7279
19.0k
        ctxt->sizeentcopy = 0;
7280
7281
19.0k
        if (ent->flags & XML_ENT_EXPANDING) {
7282
79
            xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7283
79
            xmlHaltParser(ctxt);
7284
79
            return;
7285
79
        }
7286
7287
18.9k
        ent->flags |= XML_ENT_EXPANDING;
7288
7289
  /*
7290
   * Check that this entity is well formed
7291
   * 4.3.2: An internal general parsed entity is well-formed
7292
   * if its replacement text matches the production labeled
7293
   * content.
7294
   */
7295
18.9k
  if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7296
10.9k
      ctxt->depth++;
7297
10.9k
      ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7298
10.9k
                                                user_data, &list);
7299
10.9k
      ctxt->depth--;
7300
7301
10.9k
  } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7302
8.02k
      ctxt->depth++;
7303
8.02k
      ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7304
8.02k
                                     user_data, ctxt->depth, ent->URI,
7305
8.02k
             ent->ExternalID, &list);
7306
8.02k
      ctxt->depth--;
7307
8.02k
  } else {
7308
0
      ret = XML_ERR_ENTITY_PE_INTERNAL;
7309
0
      xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7310
0
       "invalid entity type found\n", NULL);
7311
0
  }
7312
7313
18.9k
        ent->flags &= ~XML_ENT_EXPANDING;
7314
18.9k
        ent->flags |= XML_ENT_PARSED | XML_ENT_CHECKED;
7315
18.9k
        ent->expandedSize = ctxt->sizeentcopy;
7316
18.9k
  if (ret == XML_ERR_ENTITY_LOOP) {
7317
1.46k
            xmlHaltParser(ctxt);
7318
1.46k
      xmlFreeNodeList(list);
7319
1.46k
      return;
7320
1.46k
  }
7321
17.5k
  if (xmlParserEntityCheck(ctxt, oldsizeentcopy)) {
7322
0
      xmlFreeNodeList(list);
7323
0
      return;
7324
0
  }
7325
7326
17.5k
  if ((ret == XML_ERR_OK) && (list != NULL)) {
7327
9.61k
            ent->children = list;
7328
            /*
7329
             * Prune it directly in the generated document
7330
             * except for single text nodes.
7331
             */
7332
9.61k
            if ((ctxt->replaceEntities == 0) ||
7333
9.61k
                (ctxt->parseMode == XML_PARSE_READER) ||
7334
9.61k
                ((list->type == XML_TEXT_NODE) &&
7335
8.64k
                 (list->next == NULL))) {
7336
8.64k
                ent->owner = 1;
7337
33.2k
                while (list != NULL) {
7338
24.5k
                    list->parent = (xmlNodePtr) ent;
7339
24.5k
                    if (list->doc != ent->doc)
7340
0
                        xmlSetTreeDoc(list, ent->doc);
7341
24.5k
                    if (list->next == NULL)
7342
8.64k
                        ent->last = list;
7343
24.5k
                    list = list->next;
7344
24.5k
                }
7345
8.64k
                list = NULL;
7346
8.64k
            } else {
7347
968
                ent->owner = 0;
7348
6.03k
                while (list != NULL) {
7349
5.06k
                    list->parent = (xmlNodePtr) ctxt->node;
7350
5.06k
                    list->doc = ctxt->myDoc;
7351
5.06k
                    if (list->next == NULL)
7352
968
                        ent->last = list;
7353
5.06k
                    list = list->next;
7354
5.06k
                }
7355
968
                list = ent->children;
7356
#ifdef LIBXML_LEGACY_ENABLED
7357
                if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7358
                    xmlAddEntityReference(ent, list, NULL);
7359
#endif /* LIBXML_LEGACY_ENABLED */
7360
968
            }
7361
9.61k
  } else if ((ret != XML_ERR_OK) &&
7362
7.90k
       (ret != XML_WAR_UNDECLARED_ENTITY)) {
7363
3.96k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7364
3.96k
         "Entity '%s' failed to parse\n", ent->name);
7365
3.96k
            if (ent->content != NULL)
7366
696
                ent->content[0] = 0;
7367
3.96k
  } else if (list != NULL) {
7368
0
      xmlFreeNodeList(list);
7369
0
      list = NULL;
7370
0
  }
7371
7372
        /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7373
17.5k
        was_checked = 0;
7374
17.5k
    }
7375
7376
    /*
7377
     * Now that the entity content has been gathered
7378
     * provide it to the application, this can take different forms based
7379
     * on the parsing modes.
7380
     */
7381
500k
    if (ent->children == NULL) {
7382
  /*
7383
   * Probably running in SAX mode and the callbacks don't
7384
   * build the entity content. So unless we already went
7385
   * though parsing for first checking go though the entity
7386
   * content to generate callbacks associated to the entity
7387
   */
7388
107k
  if (was_checked != 0) {
7389
98.6k
      void *user_data;
7390
      /*
7391
       * This is a bit hackish but this seems the best
7392
       * way to make sure both SAX and DOM entity support
7393
       * behaves okay.
7394
       */
7395
98.6k
      if (ctxt->userData == ctxt)
7396
98.6k
    user_data = NULL;
7397
0
      else
7398
0
    user_data = ctxt->userData;
7399
7400
98.6k
      if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7401
3
    ctxt->depth++;
7402
3
    ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7403
3
           ent->content, user_data, NULL);
7404
3
    ctxt->depth--;
7405
98.6k
      } else if (ent->etype ==
7406
98.6k
           XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7407
98.6k
          unsigned long oldsizeentities = ctxt->sizeentities;
7408
7409
98.6k
    ctxt->depth++;
7410
98.6k
    ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7411
98.6k
         ctxt->sax, user_data, ctxt->depth,
7412
98.6k
         ent->URI, ent->ExternalID, NULL);
7413
98.6k
    ctxt->depth--;
7414
7415
                /* Undo the change to sizeentities */
7416
98.6k
                ctxt->sizeentities = oldsizeentities;
7417
98.6k
      } else {
7418
0
    ret = XML_ERR_ENTITY_PE_INTERNAL;
7419
0
    xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7420
0
           "invalid entity type found\n", NULL);
7421
0
      }
7422
98.6k
      if (ret == XML_ERR_ENTITY_LOOP) {
7423
0
    xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7424
0
    return;
7425
0
      }
7426
98.6k
            if (xmlParserEntityCheck(ctxt, 0))
7427
0
                return;
7428
98.6k
  }
7429
107k
  if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7430
107k
      (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7431
      /*
7432
       * Entity reference callback comes second, it's somewhat
7433
       * superfluous but a compatibility to historical behaviour
7434
       */
7435
22.8k
      ctxt->sax->reference(ctxt->userData, ent->name);
7436
22.8k
  }
7437
107k
  return;
7438
107k
    }
7439
7440
    /*
7441
     * We also check for amplification if entities aren't substituted.
7442
     * They might be expanded later.
7443
     */
7444
393k
    if ((was_checked != 0) &&
7445
393k
        (xmlParserEntityCheck(ctxt, ent->expandedSize)))
7446
60
        return;
7447
7448
    /*
7449
     * If we didn't get any children for the entity being built
7450
     */
7451
392k
    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7452
392k
  (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7453
  /*
7454
   * Create a node.
7455
   */
7456
86.5k
  ctxt->sax->reference(ctxt->userData, ent->name);
7457
86.5k
  return;
7458
86.5k
    }
7459
7460
306k
    if (ctxt->replaceEntities)  {
7461
  /*
7462
   * There is a problem on the handling of _private for entities
7463
   * (bug 155816): Should we copy the content of the field from
7464
   * the entity (possibly overwriting some value set by the user
7465
   * when a copy is created), should we leave it alone, or should
7466
   * we try to take care of different situations?  The problem
7467
   * is exacerbated by the usage of this field by the xmlReader.
7468
   * To fix this bug, we look at _private on the created node
7469
   * and, if it's NULL, we copy in whatever was in the entity.
7470
   * If it's not NULL we leave it alone.  This is somewhat of a
7471
   * hack - maybe we should have further tests to determine
7472
   * what to do.
7473
   */
7474
306k
  if (ctxt->node != NULL) {
7475
      /*
7476
       * Seems we are generating the DOM content, do
7477
       * a simple tree copy for all references except the first
7478
       * In the first occurrence list contains the replacement.
7479
       */
7480
306k
      if (((list == NULL) && (ent->owner == 0)) ||
7481
306k
    (ctxt->parseMode == XML_PARSE_READER)) {
7482
99.0k
    xmlNodePtr nw = NULL, cur, firstChild = NULL;
7483
7484
    /*
7485
     * when operating on a reader, the entities definitions
7486
     * are always owning the entities subtree.
7487
    if (ctxt->parseMode == XML_PARSE_READER)
7488
        ent->owner = 1;
7489
     */
7490
7491
99.0k
    cur = ent->children;
7492
103k
    while (cur != NULL) {
7493
103k
        nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7494
103k
        if (nw != NULL) {
7495
103k
      if (nw->_private == NULL)
7496
103k
          nw->_private = cur->_private;
7497
103k
      if (firstChild == NULL){
7498
99.0k
          firstChild = nw;
7499
99.0k
      }
7500
103k
      nw = xmlAddChild(ctxt->node, nw);
7501
103k
        }
7502
103k
        if (cur == ent->last) {
7503
      /*
7504
       * needed to detect some strange empty
7505
       * node cases in the reader tests
7506
       */
7507
99.0k
      if ((ctxt->parseMode == XML_PARSE_READER) &&
7508
99.0k
          (nw != NULL) &&
7509
99.0k
          (nw->type == XML_ELEMENT_NODE) &&
7510
99.0k
          (nw->children == NULL))
7511
335
          nw->extra = 1;
7512
7513
99.0k
      break;
7514
99.0k
        }
7515
4.76k
        cur = cur->next;
7516
4.76k
    }
7517
#ifdef LIBXML_LEGACY_ENABLED
7518
    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7519
      xmlAddEntityReference(ent, firstChild, nw);
7520
#endif /* LIBXML_LEGACY_ENABLED */
7521
207k
      } else if ((list == NULL) || (ctxt->inputNr > 0)) {
7522
207k
    xmlNodePtr nw = NULL, cur, next, last,
7523
207k
         firstChild = NULL;
7524
7525
    /*
7526
     * Copy the entity child list and make it the new
7527
     * entity child list. The goal is to make sure any
7528
     * ID or REF referenced will be the one from the
7529
     * document content and not the entity copy.
7530
     */
7531
207k
    cur = ent->children;
7532
207k
    ent->children = NULL;
7533
207k
    last = ent->last;
7534
207k
    ent->last = NULL;
7535
217k
    while (cur != NULL) {
7536
217k
        next = cur->next;
7537
217k
        cur->next = NULL;
7538
217k
        cur->parent = NULL;
7539
217k
        nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7540
217k
        if (nw != NULL) {
7541
217k
      if (nw->_private == NULL)
7542
217k
          nw->_private = cur->_private;
7543
217k
      if (firstChild == NULL){
7544
207k
          firstChild = cur;
7545
207k
      }
7546
217k
      xmlAddChild((xmlNodePtr) ent, nw);
7547
217k
        }
7548
217k
        xmlAddChild(ctxt->node, cur);
7549
217k
        if (cur == last)
7550
207k
      break;
7551
9.83k
        cur = next;
7552
9.83k
    }
7553
207k
    if (ent->owner == 0)
7554
968
        ent->owner = 1;
7555
#ifdef LIBXML_LEGACY_ENABLED
7556
    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7557
      xmlAddEntityReference(ent, firstChild, nw);
7558
#endif /* LIBXML_LEGACY_ENABLED */
7559
207k
      } else {
7560
0
    const xmlChar *nbktext;
7561
7562
    /*
7563
     * the name change is to avoid coalescing of the
7564
     * node with a possible previous text one which
7565
     * would make ent->children a dangling pointer
7566
     */
7567
0
    nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7568
0
          -1);
7569
0
    if (ent->children->type == XML_TEXT_NODE)
7570
0
        ent->children->name = nbktext;
7571
0
    if ((ent->last != ent->children) &&
7572
0
        (ent->last->type == XML_TEXT_NODE))
7573
0
        ent->last->name = nbktext;
7574
0
    xmlAddChildList(ctxt->node, ent->children);
7575
0
      }
7576
7577
      /*
7578
       * This is to avoid a nasty side effect, see
7579
       * characters() in SAX.c
7580
       */
7581
306k
      ctxt->nodemem = 0;
7582
306k
      ctxt->nodelen = 0;
7583
306k
      return;
7584
306k
  }
7585
306k
    }
7586
306k
}
7587
7588
/**
7589
 * xmlParseEntityRef:
7590
 * @ctxt:  an XML parser context
7591
 *
7592
 * DEPRECATED: Internal function, don't use.
7593
 *
7594
 * Parse an entitiy reference. Always consumes '&'.
7595
 *
7596
 * [68] EntityRef ::= '&' Name ';'
7597
 *
7598
 * [ WFC: Entity Declared ]
7599
 * In a document without any DTD, a document with only an internal DTD
7600
 * subset which contains no parameter entity references, or a document
7601
 * with "standalone='yes'", the Name given in the entity reference
7602
 * must match that in an entity declaration, except that well-formed
7603
 * documents need not declare any of the following entities: amp, lt,
7604
 * gt, apos, quot.  The declaration of a parameter entity must precede
7605
 * any reference to it.  Similarly, the declaration of a general entity
7606
 * must precede any reference to it which appears in a default value in an
7607
 * attribute-list declaration. Note that if entities are declared in the
7608
 * external subset or in external parameter entities, a non-validating
7609
 * processor is not obligated to read and process their declarations;
7610
 * for such documents, the rule that an entity must be declared is a
7611
 * well-formedness constraint only if standalone='yes'.
7612
 *
7613
 * [ WFC: Parsed Entity ]
7614
 * An entity reference must not contain the name of an unparsed entity
7615
 *
7616
 * Returns the xmlEntityPtr if found, or NULL otherwise.
7617
 */
7618
xmlEntityPtr
7619
1.48M
xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7620
1.48M
    const xmlChar *name;
7621
1.48M
    xmlEntityPtr ent = NULL;
7622
7623
1.48M
    GROW;
7624
1.48M
    if (ctxt->instate == XML_PARSER_EOF)
7625
0
        return(NULL);
7626
7627
1.48M
    if (RAW != '&')
7628
0
        return(NULL);
7629
1.48M
    NEXT;
7630
1.48M
    name = xmlParseName(ctxt);
7631
1.48M
    if (name == NULL) {
7632
8.28k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7633
8.28k
           "xmlParseEntityRef: no name\n");
7634
8.28k
        return(NULL);
7635
8.28k
    }
7636
1.47M
    if (RAW != ';') {
7637
8.63k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7638
8.63k
  return(NULL);
7639
8.63k
    }
7640
1.46M
    NEXT;
7641
7642
    /*
7643
     * Predefined entities override any extra definition
7644
     */
7645
1.46M
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7646
1.13M
        ent = xmlGetPredefinedEntity(name);
7647
1.13M
        if (ent != NULL)
7648
45.4k
            return(ent);
7649
1.13M
    }
7650
7651
    /*
7652
     * Ask first SAX for entity resolution, otherwise try the
7653
     * entities which may have stored in the parser context.
7654
     */
7655
1.42M
    if (ctxt->sax != NULL) {
7656
1.42M
  if (ctxt->sax->getEntity != NULL)
7657
1.42M
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7658
1.42M
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7659
1.42M
      (ctxt->options & XML_PARSE_OLDSAX))
7660
15.2k
      ent = xmlGetPredefinedEntity(name);
7661
1.42M
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7662
1.42M
      (ctxt->userData==ctxt)) {
7663
35.4k
      ent = xmlSAX2GetEntity(ctxt, name);
7664
35.4k
  }
7665
1.42M
    }
7666
1.42M
    if (ctxt->instate == XML_PARSER_EOF)
7667
0
  return(NULL);
7668
    /*
7669
     * [ WFC: Entity Declared ]
7670
     * In a document without any DTD, a document with only an
7671
     * internal DTD subset which contains no parameter entity
7672
     * references, or a document with "standalone='yes'", the
7673
     * Name given in the entity reference must match that in an
7674
     * entity declaration, except that well-formed documents
7675
     * need not declare any of the following entities: amp, lt,
7676
     * gt, apos, quot.
7677
     * The declaration of a parameter entity must precede any
7678
     * reference to it.
7679
     * Similarly, the declaration of a general entity must
7680
     * precede any reference to it which appears in a default
7681
     * value in an attribute-list declaration. Note that if
7682
     * entities are declared in the external subset or in
7683
     * external parameter entities, a non-validating processor
7684
     * is not obligated to read and process their declarations;
7685
     * for such documents, the rule that an entity must be
7686
     * declared is a well-formedness constraint only if
7687
     * standalone='yes'.
7688
     */
7689
1.42M
    if (ent == NULL) {
7690
207k
  if ((ctxt->standalone == 1) ||
7691
207k
      ((ctxt->hasExternalSubset == 0) &&
7692
199k
       (ctxt->hasPErefs == 0))) {
7693
124k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7694
124k
         "Entity '%s' not defined\n", name);
7695
124k
  } else {
7696
82.4k
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7697
82.4k
         "Entity '%s' not defined\n", name);
7698
82.4k
      if ((ctxt->inSubset == 0) &&
7699
82.4k
    (ctxt->sax != NULL) &&
7700
82.4k
    (ctxt->sax->reference != NULL)) {
7701
79.4k
    ctxt->sax->reference(ctxt->userData, name);
7702
79.4k
      }
7703
82.4k
  }
7704
207k
  ctxt->valid = 0;
7705
207k
    }
7706
7707
    /*
7708
     * [ WFC: Parsed Entity ]
7709
     * An entity reference must not contain the name of an
7710
     * unparsed entity
7711
     */
7712
1.21M
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7713
58
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7714
58
     "Entity reference to unparsed entity %s\n", name);
7715
58
    }
7716
7717
    /*
7718
     * [ WFC: No External Entity References ]
7719
     * Attribute values cannot contain direct or indirect
7720
     * entity references to external entities.
7721
     */
7722
1.21M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7723
1.21M
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7724
2.12k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7725
2.12k
       "Attribute references external entity '%s'\n", name);
7726
2.12k
    }
7727
    /*
7728
     * [ WFC: No < in Attribute Values ]
7729
     * The replacement text of any entity referred to directly or
7730
     * indirectly in an attribute value (other than "&lt;") must
7731
     * not contain a <.
7732
     */
7733
1.21M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7734
1.21M
       (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7735
517k
  if ((ent->flags & XML_ENT_CHECKED_LT) == 0) {
7736
3.70k
            if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7737
88
                ent->flags |= XML_ENT_CONTAINS_LT;
7738
3.70k
            ent->flags |= XML_ENT_CHECKED_LT;
7739
3.70k
        }
7740
517k
        if (ent->flags & XML_ENT_CONTAINS_LT)
7741
2.15k
            xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7742
2.15k
                    "'<' in entity '%s' is not allowed in attributes "
7743
2.15k
                    "values\n", name);
7744
517k
    }
7745
7746
    /*
7747
     * Internal check, no parameter entities here ...
7748
     */
7749
693k
    else {
7750
693k
  switch (ent->etype) {
7751
0
      case XML_INTERNAL_PARAMETER_ENTITY:
7752
0
      case XML_EXTERNAL_PARAMETER_ENTITY:
7753
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7754
0
       "Attempt to reference the parameter entity '%s'\n",
7755
0
            name);
7756
0
      break;
7757
693k
      default:
7758
693k
      break;
7759
693k
  }
7760
693k
    }
7761
7762
    /*
7763
     * [ WFC: No Recursion ]
7764
     * A parsed entity must not contain a recursive reference
7765
     * to itself, either directly or indirectly.
7766
     * Done somewhere else
7767
     */
7768
1.42M
    return(ent);
7769
1.42M
}
7770
7771
/**
7772
 * xmlParseStringEntityRef:
7773
 * @ctxt:  an XML parser context
7774
 * @str:  a pointer to an index in the string
7775
 *
7776
 * parse ENTITY references declarations, but this version parses it from
7777
 * a string value.
7778
 *
7779
 * [68] EntityRef ::= '&' Name ';'
7780
 *
7781
 * [ WFC: Entity Declared ]
7782
 * In a document without any DTD, a document with only an internal DTD
7783
 * subset which contains no parameter entity references, or a document
7784
 * with "standalone='yes'", the Name given in the entity reference
7785
 * must match that in an entity declaration, except that well-formed
7786
 * documents need not declare any of the following entities: amp, lt,
7787
 * gt, apos, quot.  The declaration of a parameter entity must precede
7788
 * any reference to it.  Similarly, the declaration of a general entity
7789
 * must precede any reference to it which appears in a default value in an
7790
 * attribute-list declaration. Note that if entities are declared in the
7791
 * external subset or in external parameter entities, a non-validating
7792
 * processor is not obligated to read and process their declarations;
7793
 * for such documents, the rule that an entity must be declared is a
7794
 * well-formedness constraint only if standalone='yes'.
7795
 *
7796
 * [ WFC: Parsed Entity ]
7797
 * An entity reference must not contain the name of an unparsed entity
7798
 *
7799
 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7800
 * is updated to the current location in the string.
7801
 */
7802
static xmlEntityPtr
7803
7.49M
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7804
7.49M
    xmlChar *name;
7805
7.49M
    const xmlChar *ptr;
7806
7.49M
    xmlChar cur;
7807
7.49M
    xmlEntityPtr ent = NULL;
7808
7809
7.49M
    if ((str == NULL) || (*str == NULL))
7810
0
        return(NULL);
7811
7.49M
    ptr = *str;
7812
7.49M
    cur = *ptr;
7813
7.49M
    if (cur != '&')
7814
0
  return(NULL);
7815
7816
7.49M
    ptr++;
7817
7.49M
    name = xmlParseStringName(ctxt, &ptr);
7818
7.49M
    if (name == NULL) {
7819
0
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7820
0
           "xmlParseStringEntityRef: no name\n");
7821
0
  *str = ptr;
7822
0
  return(NULL);
7823
0
    }
7824
7.49M
    if (*ptr != ';') {
7825
14
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7826
14
        xmlFree(name);
7827
14
  *str = ptr;
7828
14
  return(NULL);
7829
14
    }
7830
7.49M
    ptr++;
7831
7832
7833
    /*
7834
     * Predefined entities override any extra definition
7835
     */
7836
7.49M
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7837
7.22M
        ent = xmlGetPredefinedEntity(name);
7838
7.22M
        if (ent != NULL) {
7839
11.5k
            xmlFree(name);
7840
11.5k
            *str = ptr;
7841
11.5k
            return(ent);
7842
11.5k
        }
7843
7.22M
    }
7844
7845
    /*
7846
     * Ask first SAX for entity resolution, otherwise try the
7847
     * entities which may have stored in the parser context.
7848
     */
7849
7.48M
    if (ctxt->sax != NULL) {
7850
7.48M
  if (ctxt->sax->getEntity != NULL)
7851
7.48M
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7852
7.48M
  if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7853
8.94k
      ent = xmlGetPredefinedEntity(name);
7854
7.48M
  if ((ent == NULL) && (ctxt->userData==ctxt)) {
7855
31.2k
      ent = xmlSAX2GetEntity(ctxt, name);
7856
31.2k
  }
7857
7.48M
    }
7858
7.48M
    if (ctxt->instate == XML_PARSER_EOF) {
7859
0
  xmlFree(name);
7860
0
  return(NULL);
7861
0
    }
7862
7863
    /*
7864
     * [ WFC: Entity Declared ]
7865
     * In a document without any DTD, a document with only an
7866
     * internal DTD subset which contains no parameter entity
7867
     * references, or a document with "standalone='yes'", the
7868
     * Name given in the entity reference must match that in an
7869
     * entity declaration, except that well-formed documents
7870
     * need not declare any of the following entities: amp, lt,
7871
     * gt, apos, quot.
7872
     * The declaration of a parameter entity must precede any
7873
     * reference to it.
7874
     * Similarly, the declaration of a general entity must
7875
     * precede any reference to it which appears in a default
7876
     * value in an attribute-list declaration. Note that if
7877
     * entities are declared in the external subset or in
7878
     * external parameter entities, a non-validating processor
7879
     * is not obligated to read and process their declarations;
7880
     * for such documents, the rule that an entity must be
7881
     * declared is a well-formedness constraint only if
7882
     * standalone='yes'.
7883
     */
7884
7.48M
    if (ent == NULL) {
7885
31.2k
  if ((ctxt->standalone == 1) ||
7886
31.2k
      ((ctxt->hasExternalSubset == 0) &&
7887
31.1k
       (ctxt->hasPErefs == 0))) {
7888
28.7k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7889
28.7k
         "Entity '%s' not defined\n", name);
7890
28.7k
  } else {
7891
2.52k
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7892
2.52k
        "Entity '%s' not defined\n",
7893
2.52k
        name);
7894
2.52k
  }
7895
  /* TODO ? check regressions ctxt->valid = 0; */
7896
31.2k
    }
7897
7898
    /*
7899
     * [ WFC: Parsed Entity ]
7900
     * An entity reference must not contain the name of an
7901
     * unparsed entity
7902
     */
7903
7.45M
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7904
3
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7905
3
     "Entity reference to unparsed entity %s\n", name);
7906
3
    }
7907
7908
    /*
7909
     * [ WFC: No External Entity References ]
7910
     * Attribute values cannot contain direct or indirect
7911
     * entity references to external entities.
7912
     */
7913
7.45M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7914
7.45M
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7915
1.03k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7916
1.03k
   "Attribute references external entity '%s'\n", name);
7917
1.03k
    }
7918
    /*
7919
     * [ WFC: No < in Attribute Values ]
7920
     * The replacement text of any entity referred to directly or
7921
     * indirectly in an attribute value (other than "&lt;") must
7922
     * not contain a <.
7923
     */
7924
7.45M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7925
7.45M
       (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7926
7.42M
  if ((ent->flags & XML_ENT_CHECKED_LT) == 0) {
7927
2.99k
            if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7928
123
                ent->flags |= XML_ENT_CONTAINS_LT;
7929
2.99k
            ent->flags |= XML_ENT_CHECKED_LT;
7930
2.99k
        }
7931
7.42M
        if (ent->flags & XML_ENT_CONTAINS_LT)
7932
39.0k
            xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7933
39.0k
                    "'<' in entity '%s' is not allowed in attributes "
7934
39.0k
                    "values\n", name);
7935
7.42M
    }
7936
7937
    /*
7938
     * Internal check, no parameter entities here ...
7939
     */
7940
24.8k
    else {
7941
24.8k
  switch (ent->etype) {
7942
0
      case XML_INTERNAL_PARAMETER_ENTITY:
7943
0
      case XML_EXTERNAL_PARAMETER_ENTITY:
7944
0
    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7945
0
       "Attempt to reference the parameter entity '%s'\n",
7946
0
          name);
7947
0
      break;
7948
24.8k
      default:
7949
24.8k
      break;
7950
24.8k
  }
7951
24.8k
    }
7952
7953
    /*
7954
     * [ WFC: No Recursion ]
7955
     * A parsed entity must not contain a recursive reference
7956
     * to itself, either directly or indirectly.
7957
     * Done somewhere else
7958
     */
7959
7960
7.48M
    xmlFree(name);
7961
7.48M
    *str = ptr;
7962
7.48M
    return(ent);
7963
7.48M
}
7964
7965
/**
7966
 * xmlParsePEReference:
7967
 * @ctxt:  an XML parser context
7968
 *
7969
 * DEPRECATED: Internal function, don't use.
7970
 *
7971
 * Parse a parameter entity reference. Always consumes '%'.
7972
 *
7973
 * The entity content is handled directly by pushing it's content as
7974
 * a new input stream.
7975
 *
7976
 * [69] PEReference ::= '%' Name ';'
7977
 *
7978
 * [ WFC: No Recursion ]
7979
 * A parsed entity must not contain a recursive
7980
 * reference to itself, either directly or indirectly.
7981
 *
7982
 * [ WFC: Entity Declared ]
7983
 * In a document without any DTD, a document with only an internal DTD
7984
 * subset which contains no parameter entity references, or a document
7985
 * with "standalone='yes'", ...  ... The declaration of a parameter
7986
 * entity must precede any reference to it...
7987
 *
7988
 * [ VC: Entity Declared ]
7989
 * In a document with an external subset or external parameter entities
7990
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7991
 * must precede any reference to it...
7992
 *
7993
 * [ WFC: In DTD ]
7994
 * Parameter-entity references may only appear in the DTD.
7995
 * NOTE: misleading but this is handled.
7996
 */
7997
void
7998
xmlParsePEReference(xmlParserCtxtPtr ctxt)
7999
4.95M
{
8000
4.95M
    const xmlChar *name;
8001
4.95M
    xmlEntityPtr entity = NULL;
8002
4.95M
    xmlParserInputPtr input;
8003
8004
4.95M
    if (RAW != '%')
8005
0
        return;
8006
4.95M
    NEXT;
8007
4.95M
    name = xmlParseName(ctxt);
8008
4.95M
    if (name == NULL) {
8009
1.39k
  xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
8010
1.39k
  return;
8011
1.39k
    }
8012
4.95M
    if (xmlParserDebugEntities)
8013
0
  xmlGenericError(xmlGenericErrorContext,
8014
0
    "PEReference: %s\n", name);
8015
4.95M
    if (RAW != ';') {
8016
657
  xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
8017
657
        return;
8018
657
    }
8019
8020
4.95M
    NEXT;
8021
8022
    /*
8023
     * Request the entity from SAX
8024
     */
8025
4.95M
    if ((ctxt->sax != NULL) &&
8026
4.95M
  (ctxt->sax->getParameterEntity != NULL))
8027
4.95M
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8028
4.95M
    if (ctxt->instate == XML_PARSER_EOF)
8029
0
  return;
8030
4.95M
    if (entity == NULL) {
8031
  /*
8032
   * [ WFC: Entity Declared ]
8033
   * In a document without any DTD, a document with only an
8034
   * internal DTD subset which contains no parameter entity
8035
   * references, or a document with "standalone='yes'", ...
8036
   * ... The declaration of a parameter entity must precede
8037
   * any reference to it...
8038
   */
8039
22.3k
  if ((ctxt->standalone == 1) ||
8040
22.3k
      ((ctxt->hasExternalSubset == 0) &&
8041
22.2k
       (ctxt->hasPErefs == 0))) {
8042
283
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8043
283
            "PEReference: %%%s; not found\n",
8044
283
            name);
8045
22.0k
  } else {
8046
      /*
8047
       * [ VC: Entity Declared ]
8048
       * In a document with an external subset or external
8049
       * parameter entities with "standalone='no'", ...
8050
       * ... The declaration of a parameter entity must
8051
       * precede any reference to it...
8052
       */
8053
22.0k
            if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
8054
2.82k
                xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
8055
2.82k
                                 "PEReference: %%%s; not found\n",
8056
2.82k
                                 name, NULL);
8057
2.82k
            } else
8058
19.2k
                xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8059
19.2k
                              "PEReference: %%%s; not found\n",
8060
19.2k
                              name, NULL);
8061
22.0k
            ctxt->valid = 0;
8062
22.0k
  }
8063
4.92M
    } else {
8064
  /*
8065
   * Internal checking in case the entity quest barfed
8066
   */
8067
4.92M
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8068
4.92M
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8069
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8070
0
      "Internal: %%%s; is not a parameter entity\n",
8071
0
        name, NULL);
8072
4.92M
  } else {
8073
4.92M
            xmlChar start[4];
8074
4.92M
            xmlCharEncoding enc;
8075
4.92M
            unsigned long parentConsumed;
8076
4.92M
            xmlEntityPtr oldEnt;
8077
8078
4.92M
      if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8079
4.92M
          ((ctxt->options & XML_PARSE_NOENT) == 0) &&
8080
4.92M
    ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
8081
4.92M
    ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
8082
4.92M
    ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
8083
4.92M
    (ctxt->replaceEntities == 0) &&
8084
4.92M
    (ctxt->validate == 0))
8085
108
    return;
8086
8087
4.92M
            if (entity->flags & XML_ENT_EXPANDING) {
8088
33
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
8089
33
                xmlHaltParser(ctxt);
8090
33
                return;
8091
33
            }
8092
8093
            /* Must be computed from old input before pushing new input. */
8094
4.92M
            parentConsumed = ctxt->input->parentConsumed;
8095
4.92M
            oldEnt = ctxt->input->entity;
8096
4.92M
            if ((oldEnt == NULL) ||
8097
4.92M
                ((oldEnt->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8098
4.85M
                 ((oldEnt->flags & XML_ENT_PARSED) == 0))) {
8099
151k
                xmlSaturatedAdd(&parentConsumed, ctxt->input->consumed);
8100
151k
                xmlSaturatedAddSizeT(&parentConsumed,
8101
151k
                                     ctxt->input->cur - ctxt->input->base);
8102
151k
            }
8103
8104
4.92M
      input = xmlNewEntityInputStream(ctxt, entity);
8105
4.92M
      if (xmlPushInput(ctxt, input) < 0) {
8106
1.32k
                xmlFreeInputStream(input);
8107
1.32k
    return;
8108
1.32k
            }
8109
8110
4.92M
            entity->flags |= XML_ENT_EXPANDING;
8111
8112
4.92M
            input->parentConsumed = parentConsumed;
8113
8114
4.92M
      if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
8115
                /*
8116
                 * Get the 4 first bytes and decode the charset
8117
                 * if enc != XML_CHAR_ENCODING_NONE
8118
                 * plug some encoding conversion routines.
8119
                 * Note that, since we may have some non-UTF8
8120
                 * encoding (like UTF16, bug 135229), the 'length'
8121
                 * is not known, but we can calculate based upon
8122
                 * the amount of data in the buffer.
8123
                 */
8124
1.58k
                GROW
8125
1.58k
                if (ctxt->instate == XML_PARSER_EOF)
8126
0
                    return;
8127
1.58k
                if ((ctxt->input->end - ctxt->input->cur)>=4) {
8128
1.56k
                    start[0] = RAW;
8129
1.56k
                    start[1] = NXT(1);
8130
1.56k
                    start[2] = NXT(2);
8131
1.56k
                    start[3] = NXT(3);
8132
1.56k
                    enc = xmlDetectCharEncoding(start, 4);
8133
1.56k
                    if (enc != XML_CHAR_ENCODING_NONE) {
8134
3
                        xmlSwitchEncoding(ctxt, enc);
8135
3
                    }
8136
1.56k
                }
8137
8138
1.58k
                if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8139
1.58k
                    (IS_BLANK_CH(NXT(5)))) {
8140
3
                    xmlParseTextDecl(ctxt);
8141
3
                }
8142
1.58k
            }
8143
4.92M
  }
8144
4.92M
    }
8145
4.94M
    ctxt->hasPErefs = 1;
8146
4.94M
}
8147
8148
/**
8149
 * xmlLoadEntityContent:
8150
 * @ctxt:  an XML parser context
8151
 * @entity: an unloaded system entity
8152
 *
8153
 * Load the original content of the given system entity from the
8154
 * ExternalID/SystemID given. This is to be used for Included in Literal
8155
 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8156
 *
8157
 * Returns 0 in case of success and -1 in case of failure
8158
 */
8159
static int
8160
428
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8161
428
    xmlParserInputPtr input;
8162
428
    xmlBufferPtr buf;
8163
428
    int l, c;
8164
428
    int count = 0;
8165
8166
428
    if ((ctxt == NULL) || (entity == NULL) ||
8167
428
        ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8168
428
   (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8169
428
  (entity->content != NULL)) {
8170
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8171
0
              "xmlLoadEntityContent parameter error");
8172
0
        return(-1);
8173
0
    }
8174
8175
428
    if (xmlParserDebugEntities)
8176
0
  xmlGenericError(xmlGenericErrorContext,
8177
0
    "Reading %s entity content input\n", entity->name);
8178
8179
428
    buf = xmlBufferCreate();
8180
428
    if (buf == NULL) {
8181
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8182
0
              "xmlLoadEntityContent parameter error");
8183
0
        return(-1);
8184
0
    }
8185
428
    xmlBufferSetAllocationScheme(buf, XML_BUFFER_ALLOC_DOUBLEIT);
8186
8187
428
    input = xmlNewEntityInputStream(ctxt, entity);
8188
428
    if (input == NULL) {
8189
78
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8190
78
              "xmlLoadEntityContent input error");
8191
78
  xmlBufferFree(buf);
8192
78
        return(-1);
8193
78
    }
8194
8195
    /*
8196
     * Push the entity as the current input, read char by char
8197
     * saving to the buffer until the end of the entity or an error
8198
     */
8199
350
    if (xmlPushInput(ctxt, input) < 0) {
8200
0
        xmlBufferFree(buf);
8201
0
  xmlFreeInputStream(input);
8202
0
  return(-1);
8203
0
    }
8204
8205
350
    GROW;
8206
350
    c = CUR_CHAR(l);
8207
1.28M
    while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8208
1.28M
           (IS_CHAR(c))) {
8209
1.28M
        xmlBufferAdd(buf, ctxt->input->cur, l);
8210
1.28M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
8211
12.3k
      count = 0;
8212
12.3k
      GROW;
8213
12.3k
            if (ctxt->instate == XML_PARSER_EOF) {
8214
0
                xmlBufferFree(buf);
8215
0
                return(-1);
8216
0
            }
8217
12.3k
  }
8218
1.28M
  NEXTL(l);
8219
1.28M
  c = CUR_CHAR(l);
8220
1.28M
  if (c == 0) {
8221
299
      count = 0;
8222
299
      GROW;
8223
299
            if (ctxt->instate == XML_PARSER_EOF) {
8224
0
                xmlBufferFree(buf);
8225
0
                return(-1);
8226
0
            }
8227
299
      c = CUR_CHAR(l);
8228
299
  }
8229
1.28M
    }
8230
8231
350
    if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8232
236
        xmlSaturatedAdd(&ctxt->sizeentities, ctxt->input->consumed);
8233
236
        xmlPopInput(ctxt);
8234
236
    } else if (!IS_CHAR(c)) {
8235
114
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8236
114
                          "xmlLoadEntityContent: invalid char value %d\n",
8237
114
                    c);
8238
114
  xmlBufferFree(buf);
8239
114
  return(-1);
8240
114
    }
8241
236
    entity->content = buf->content;
8242
236
    entity->length = buf->use;
8243
236
    buf->content = NULL;
8244
236
    xmlBufferFree(buf);
8245
8246
236
    return(0);
8247
350
}
8248
8249
/**
8250
 * xmlParseStringPEReference:
8251
 * @ctxt:  an XML parser context
8252
 * @str:  a pointer to an index in the string
8253
 *
8254
 * parse PEReference declarations
8255
 *
8256
 * [69] PEReference ::= '%' Name ';'
8257
 *
8258
 * [ WFC: No Recursion ]
8259
 * A parsed entity must not contain a recursive
8260
 * reference to itself, either directly or indirectly.
8261
 *
8262
 * [ WFC: Entity Declared ]
8263
 * In a document without any DTD, a document with only an internal DTD
8264
 * subset which contains no parameter entity references, or a document
8265
 * with "standalone='yes'", ...  ... The declaration of a parameter
8266
 * entity must precede any reference to it...
8267
 *
8268
 * [ VC: Entity Declared ]
8269
 * In a document with an external subset or external parameter entities
8270
 * with "standalone='no'", ...  ... The declaration of a parameter entity
8271
 * must precede any reference to it...
8272
 *
8273
 * [ WFC: In DTD ]
8274
 * Parameter-entity references may only appear in the DTD.
8275
 * NOTE: misleading but this is handled.
8276
 *
8277
 * Returns the string of the entity content.
8278
 *         str is updated to the current value of the index
8279
 */
8280
static xmlEntityPtr
8281
56.2k
xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8282
56.2k
    const xmlChar *ptr;
8283
56.2k
    xmlChar cur;
8284
56.2k
    xmlChar *name;
8285
56.2k
    xmlEntityPtr entity = NULL;
8286
8287
56.2k
    if ((str == NULL) || (*str == NULL)) return(NULL);
8288
56.2k
    ptr = *str;
8289
56.2k
    cur = *ptr;
8290
56.2k
    if (cur != '%')
8291
0
        return(NULL);
8292
56.2k
    ptr++;
8293
56.2k
    name = xmlParseStringName(ctxt, &ptr);
8294
56.2k
    if (name == NULL) {
8295
586
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8296
586
           "xmlParseStringPEReference: no name\n");
8297
586
  *str = ptr;
8298
586
  return(NULL);
8299
586
    }
8300
55.6k
    cur = *ptr;
8301
55.6k
    if (cur != ';') {
8302
108
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8303
108
  xmlFree(name);
8304
108
  *str = ptr;
8305
108
  return(NULL);
8306
108
    }
8307
55.5k
    ptr++;
8308
8309
    /*
8310
     * Request the entity from SAX
8311
     */
8312
55.5k
    if ((ctxt->sax != NULL) &&
8313
55.5k
  (ctxt->sax->getParameterEntity != NULL))
8314
55.5k
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8315
55.5k
    if (ctxt->instate == XML_PARSER_EOF) {
8316
0
  xmlFree(name);
8317
0
  *str = ptr;
8318
0
  return(NULL);
8319
0
    }
8320
55.5k
    if (entity == NULL) {
8321
  /*
8322
   * [ WFC: Entity Declared ]
8323
   * In a document without any DTD, a document with only an
8324
   * internal DTD subset which contains no parameter entity
8325
   * references, or a document with "standalone='yes'", ...
8326
   * ... The declaration of a parameter entity must precede
8327
   * any reference to it...
8328
   */
8329
6.68k
  if ((ctxt->standalone == 1) ||
8330
6.68k
      ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8331
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8332
0
     "PEReference: %%%s; not found\n", name);
8333
6.68k
  } else {
8334
      /*
8335
       * [ VC: Entity Declared ]
8336
       * In a document with an external subset or external
8337
       * parameter entities with "standalone='no'", ...
8338
       * ... The declaration of a parameter entity must
8339
       * precede any reference to it...
8340
       */
8341
6.68k
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8342
6.68k
        "PEReference: %%%s; not found\n",
8343
6.68k
        name, NULL);
8344
6.68k
      ctxt->valid = 0;
8345
6.68k
  }
8346
48.8k
    } else {
8347
  /*
8348
   * Internal checking in case the entity quest barfed
8349
   */
8350
48.8k
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8351
48.8k
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8352
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8353
0
        "%%%s; is not a parameter entity\n",
8354
0
        name, NULL);
8355
0
  }
8356
48.8k
    }
8357
55.5k
    ctxt->hasPErefs = 1;
8358
55.5k
    xmlFree(name);
8359
55.5k
    *str = ptr;
8360
55.5k
    return(entity);
8361
55.5k
}
8362
8363
/**
8364
 * xmlParseDocTypeDecl:
8365
 * @ctxt:  an XML parser context
8366
 *
8367
 * DEPRECATED: Internal function, don't use.
8368
 *
8369
 * parse a DOCTYPE declaration
8370
 *
8371
 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8372
 *                      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8373
 *
8374
 * [ VC: Root Element Type ]
8375
 * The Name in the document type declaration must match the element
8376
 * type of the root element.
8377
 */
8378
8379
void
8380
47.9k
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8381
47.9k
    const xmlChar *name = NULL;
8382
47.9k
    xmlChar *ExternalID = NULL;
8383
47.9k
    xmlChar *URI = NULL;
8384
8385
    /*
8386
     * We know that '<!DOCTYPE' has been detected.
8387
     */
8388
47.9k
    SKIP(9);
8389
8390
47.9k
    SKIP_BLANKS;
8391
8392
    /*
8393
     * Parse the DOCTYPE name.
8394
     */
8395
47.9k
    name = xmlParseName(ctxt);
8396
47.9k
    if (name == NULL) {
8397
91
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8398
91
           "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8399
91
    }
8400
47.9k
    ctxt->intSubName = name;
8401
8402
47.9k
    SKIP_BLANKS;
8403
8404
    /*
8405
     * Check for SystemID and ExternalID
8406
     */
8407
47.9k
    URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8408
8409
47.9k
    if ((URI != NULL) || (ExternalID != NULL)) {
8410
20.0k
        ctxt->hasExternalSubset = 1;
8411
20.0k
    }
8412
47.9k
    ctxt->extSubURI = URI;
8413
47.9k
    ctxt->extSubSystem = ExternalID;
8414
8415
47.9k
    SKIP_BLANKS;
8416
8417
    /*
8418
     * Create and update the internal subset.
8419
     */
8420
47.9k
    if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8421
47.9k
  (!ctxt->disableSAX))
8422
47.0k
  ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8423
47.9k
    if (ctxt->instate == XML_PARSER_EOF)
8424
0
  return;
8425
8426
    /*
8427
     * Is there any internal subset declarations ?
8428
     * they are handled separately in xmlParseInternalSubset()
8429
     */
8430
47.9k
    if (RAW == '[')
8431
36.6k
  return;
8432
8433
    /*
8434
     * We should be at the end of the DOCTYPE declaration.
8435
     */
8436
11.3k
    if (RAW != '>') {
8437
1.28k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8438
1.28k
    }
8439
11.3k
    NEXT;
8440
11.3k
}
8441
8442
/**
8443
 * xmlParseInternalSubset:
8444
 * @ctxt:  an XML parser context
8445
 *
8446
 * parse the internal subset declaration
8447
 *
8448
 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8449
 */
8450
8451
static void
8452
36.5k
xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8453
    /*
8454
     * Is there any DTD definition ?
8455
     */
8456
36.5k
    if (RAW == '[') {
8457
36.5k
        int baseInputNr = ctxt->inputNr;
8458
36.5k
        ctxt->instate = XML_PARSER_DTD;
8459
36.5k
        NEXT;
8460
  /*
8461
   * Parse the succession of Markup declarations and
8462
   * PEReferences.
8463
   * Subsequence (markupdecl | PEReference | S)*
8464
   */
8465
36.5k
  SKIP_BLANKS;
8466
5.15M
  while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) &&
8467
5.15M
               (ctxt->instate != XML_PARSER_EOF)) {
8468
8469
            /*
8470
             * Conditional sections are allowed from external entities included
8471
             * by PE References in the internal subset.
8472
             */
8473
5.12M
            if ((ctxt->inputNr > 1) && (ctxt->input->filename != NULL) &&
8474
5.12M
                (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8475
0
                xmlParseConditionalSections(ctxt);
8476
5.12M
            } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
8477
5.11M
          xmlParseMarkupDecl(ctxt);
8478
5.11M
            } else if (RAW == '%') {
8479
5.43k
          xmlParsePEReference(ctxt);
8480
8.34k
            } else {
8481
8.34k
    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8482
8.34k
                        "xmlParseInternalSubset: error detected in"
8483
8.34k
                        " Markup declaration\n");
8484
8.34k
                xmlHaltParser(ctxt);
8485
8.34k
                return;
8486
8.34k
            }
8487
5.11M
      SKIP_BLANKS;
8488
5.11M
  }
8489
28.2k
  if (RAW == ']') {
8490
25.7k
      NEXT;
8491
25.7k
      SKIP_BLANKS;
8492
25.7k
  }
8493
28.2k
    }
8494
8495
    /*
8496
     * We should be at the end of the DOCTYPE declaration.
8497
     */
8498
28.2k
    if (RAW != '>') {
8499
2.72k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8500
2.72k
  return;
8501
2.72k
    }
8502
25.5k
    NEXT;
8503
25.5k
}
8504
8505
#ifdef LIBXML_SAX1_ENABLED
8506
/**
8507
 * xmlParseAttribute:
8508
 * @ctxt:  an XML parser context
8509
 * @value:  a xmlChar ** used to store the value of the attribute
8510
 *
8511
 * DEPRECATED: Internal function, don't use.
8512
 *
8513
 * parse an attribute
8514
 *
8515
 * [41] Attribute ::= Name Eq AttValue
8516
 *
8517
 * [ WFC: No External Entity References ]
8518
 * Attribute values cannot contain direct or indirect entity references
8519
 * to external entities.
8520
 *
8521
 * [ WFC: No < in Attribute Values ]
8522
 * The replacement text of any entity referred to directly or indirectly in
8523
 * an attribute value (other than "&lt;") must not contain a <.
8524
 *
8525
 * [ VC: Attribute Value Type ]
8526
 * The attribute must have been declared; the value must be of the type
8527
 * declared for it.
8528
 *
8529
 * [25] Eq ::= S? '=' S?
8530
 *
8531
 * With namespace:
8532
 *
8533
 * [NS 11] Attribute ::= QName Eq AttValue
8534
 *
8535
 * Also the case QName == xmlns:??? is handled independently as a namespace
8536
 * definition.
8537
 *
8538
 * Returns the attribute name, and the value in *value.
8539
 */
8540
8541
const xmlChar *
8542
667k
xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8543
667k
    const xmlChar *name;
8544
667k
    xmlChar *val;
8545
8546
667k
    *value = NULL;
8547
667k
    GROW;
8548
667k
    name = xmlParseName(ctxt);
8549
667k
    if (name == NULL) {
8550
20.7k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8551
20.7k
                 "error parsing attribute name\n");
8552
20.7k
        return(NULL);
8553
20.7k
    }
8554
8555
    /*
8556
     * read the value
8557
     */
8558
647k
    SKIP_BLANKS;
8559
647k
    if (RAW == '=') {
8560
633k
        NEXT;
8561
633k
  SKIP_BLANKS;
8562
633k
  val = xmlParseAttValue(ctxt);
8563
633k
  ctxt->instate = XML_PARSER_CONTENT;
8564
633k
    } else {
8565
13.3k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8566
13.3k
         "Specification mandates value for attribute %s\n", name);
8567
13.3k
  return(name);
8568
13.3k
    }
8569
8570
    /*
8571
     * Check that xml:lang conforms to the specification
8572
     * No more registered as an error, just generate a warning now
8573
     * since this was deprecated in XML second edition
8574
     */
8575
633k
    if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8576
731
  if (!xmlCheckLanguageID(val)) {
8577
365
      xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8578
365
              "Malformed value for xml:lang : %s\n",
8579
365
        val, NULL);
8580
365
  }
8581
731
    }
8582
8583
    /*
8584
     * Check that xml:space conforms to the specification
8585
     */
8586
633k
    if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8587
118
  if (xmlStrEqual(val, BAD_CAST "default"))
8588
4
      *(ctxt->space) = 0;
8589
114
  else if (xmlStrEqual(val, BAD_CAST "preserve"))
8590
38
      *(ctxt->space) = 1;
8591
76
  else {
8592
76
    xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8593
76
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8594
76
                                 val, NULL);
8595
76
  }
8596
118
    }
8597
8598
633k
    *value = val;
8599
633k
    return(name);
8600
647k
}
8601
8602
/**
8603
 * xmlParseStartTag:
8604
 * @ctxt:  an XML parser context
8605
 *
8606
 * DEPRECATED: Internal function, don't use.
8607
 *
8608
 * Parse a start tag. Always consumes '<'.
8609
 *
8610
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8611
 *
8612
 * [ WFC: Unique Att Spec ]
8613
 * No attribute name may appear more than once in the same start-tag or
8614
 * empty-element tag.
8615
 *
8616
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8617
 *
8618
 * [ WFC: Unique Att Spec ]
8619
 * No attribute name may appear more than once in the same start-tag or
8620
 * empty-element tag.
8621
 *
8622
 * With namespace:
8623
 *
8624
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8625
 *
8626
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8627
 *
8628
 * Returns the element name parsed
8629
 */
8630
8631
const xmlChar *
8632
679k
xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8633
679k
    const xmlChar *name;
8634
679k
    const xmlChar *attname;
8635
679k
    xmlChar *attvalue;
8636
679k
    const xmlChar **atts = ctxt->atts;
8637
679k
    int nbatts = 0;
8638
679k
    int maxatts = ctxt->maxatts;
8639
679k
    int i;
8640
8641
679k
    if (RAW != '<') return(NULL);
8642
679k
    NEXT1;
8643
8644
679k
    name = xmlParseName(ctxt);
8645
679k
    if (name == NULL) {
8646
18.4k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8647
18.4k
       "xmlParseStartTag: invalid element name\n");
8648
18.4k
        return(NULL);
8649
18.4k
    }
8650
8651
    /*
8652
     * Now parse the attributes, it ends up with the ending
8653
     *
8654
     * (S Attribute)* S?
8655
     */
8656
661k
    SKIP_BLANKS;
8657
661k
    GROW;
8658
8659
928k
    while (((RAW != '>') &&
8660
928k
     ((RAW != '/') || (NXT(1) != '>')) &&
8661
928k
     (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8662
667k
  attname = xmlParseAttribute(ctxt, &attvalue);
8663
667k
        if (attname == NULL) {
8664
20.7k
      xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8665
20.7k
         "xmlParseStartTag: problem parsing attributes\n");
8666
20.7k
      break;
8667
20.7k
  }
8668
647k
        if (attvalue != NULL) {
8669
      /*
8670
       * [ WFC: Unique Att Spec ]
8671
       * No attribute name may appear more than once in the same
8672
       * start-tag or empty-element tag.
8673
       */
8674
880k
      for (i = 0; i < nbatts;i += 2) {
8675
249k
          if (xmlStrEqual(atts[i], attname)) {
8676
559
        xmlErrAttributeDup(ctxt, NULL, attname);
8677
559
        xmlFree(attvalue);
8678
559
        goto failed;
8679
559
    }
8680
249k
      }
8681
      /*
8682
       * Add the pair to atts
8683
       */
8684
631k
      if (atts == NULL) {
8685
13.2k
          maxatts = 22; /* allow for 10 attrs by default */
8686
13.2k
          atts = (const xmlChar **)
8687
13.2k
           xmlMalloc(maxatts * sizeof(xmlChar *));
8688
13.2k
    if (atts == NULL) {
8689
0
        xmlErrMemory(ctxt, NULL);
8690
0
        if (attvalue != NULL)
8691
0
      xmlFree(attvalue);
8692
0
        goto failed;
8693
0
    }
8694
13.2k
    ctxt->atts = atts;
8695
13.2k
    ctxt->maxatts = maxatts;
8696
618k
      } else if (nbatts + 4 > maxatts) {
8697
29
          const xmlChar **n;
8698
8699
29
          maxatts *= 2;
8700
29
          n = (const xmlChar **) xmlRealloc((void *) atts,
8701
29
               maxatts * sizeof(const xmlChar *));
8702
29
    if (n == NULL) {
8703
0
        xmlErrMemory(ctxt, NULL);
8704
0
        if (attvalue != NULL)
8705
0
      xmlFree(attvalue);
8706
0
        goto failed;
8707
0
    }
8708
29
    atts = n;
8709
29
    ctxt->atts = atts;
8710
29
    ctxt->maxatts = maxatts;
8711
29
      }
8712
631k
      atts[nbatts++] = attname;
8713
631k
      atts[nbatts++] = attvalue;
8714
631k
      atts[nbatts] = NULL;
8715
631k
      atts[nbatts + 1] = NULL;
8716
631k
  } else {
8717
14.8k
      if (attvalue != NULL)
8718
0
    xmlFree(attvalue);
8719
14.8k
  }
8720
8721
647k
failed:
8722
8723
647k
  GROW
8724
647k
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8725
380k
      break;
8726
266k
  if (SKIP_BLANKS == 0) {
8727
25.3k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8728
25.3k
         "attributes construct error\n");
8729
25.3k
  }
8730
266k
  SHRINK;
8731
266k
        GROW;
8732
266k
    }
8733
8734
    /*
8735
     * SAX: Start of Element !
8736
     */
8737
661k
    if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8738
661k
  (!ctxt->disableSAX)) {
8739
604k
  if (nbatts > 0)
8740
355k
      ctxt->sax->startElement(ctxt->userData, name, atts);
8741
248k
  else
8742
248k
      ctxt->sax->startElement(ctxt->userData, name, NULL);
8743
604k
    }
8744
8745
661k
    if (atts != NULL) {
8746
        /* Free only the content strings */
8747
1.24M
        for (i = 1;i < nbatts;i+=2)
8748
631k
      if (atts[i] != NULL)
8749
631k
         xmlFree((xmlChar *) atts[i]);
8750
618k
    }
8751
661k
    return(name);
8752
661k
}
8753
8754
/**
8755
 * xmlParseEndTag1:
8756
 * @ctxt:  an XML parser context
8757
 * @line:  line of the start tag
8758
 * @nsNr:  number of namespaces on the start tag
8759
 *
8760
 * Parse an end tag. Always consumes '</'.
8761
 *
8762
 * [42] ETag ::= '</' Name S? '>'
8763
 *
8764
 * With namespace
8765
 *
8766
 * [NS 9] ETag ::= '</' QName S? '>'
8767
 */
8768
8769
static void
8770
364k
xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8771
364k
    const xmlChar *name;
8772
8773
364k
    GROW;
8774
364k
    if ((RAW != '<') || (NXT(1) != '/')) {
8775
0
  xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8776
0
           "xmlParseEndTag: '</' not found\n");
8777
0
  return;
8778
0
    }
8779
364k
    SKIP(2);
8780
8781
364k
    name = xmlParseNameAndCompare(ctxt,ctxt->name);
8782
8783
    /*
8784
     * We should definitely be at the ending "S? '>'" part
8785
     */
8786
364k
    GROW;
8787
364k
    SKIP_BLANKS;
8788
364k
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8789
5.42k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8790
5.42k
    } else
8791
358k
  NEXT1;
8792
8793
    /*
8794
     * [ WFC: Element Type Match ]
8795
     * The Name in an element's end-tag must match the element type in the
8796
     * start-tag.
8797
     *
8798
     */
8799
364k
    if (name != (xmlChar*)1) {
8800
15.0k
        if (name == NULL) name = BAD_CAST "unparsable";
8801
15.0k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8802
15.0k
         "Opening and ending tag mismatch: %s line %d and %s\n",
8803
15.0k
                    ctxt->name, line, name);
8804
15.0k
    }
8805
8806
    /*
8807
     * SAX: End of Tag
8808
     */
8809
364k
    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8810
364k
  (!ctxt->disableSAX))
8811
338k
        ctxt->sax->endElement(ctxt->userData, ctxt->name);
8812
8813
364k
    namePop(ctxt);
8814
364k
    spacePop(ctxt);
8815
364k
    return;
8816
364k
}
8817
8818
/**
8819
 * xmlParseEndTag:
8820
 * @ctxt:  an XML parser context
8821
 *
8822
 * DEPRECATED: Internal function, don't use.
8823
 *
8824
 * parse an end of tag
8825
 *
8826
 * [42] ETag ::= '</' Name S? '>'
8827
 *
8828
 * With namespace
8829
 *
8830
 * [NS 9] ETag ::= '</' QName S? '>'
8831
 */
8832
8833
void
8834
0
xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8835
0
    xmlParseEndTag1(ctxt, 0);
8836
0
}
8837
#endif /* LIBXML_SAX1_ENABLED */
8838
8839
/************************************************************************
8840
 *                  *
8841
 *          SAX 2 specific operations       *
8842
 *                  *
8843
 ************************************************************************/
8844
8845
/*
8846
 * xmlGetNamespace:
8847
 * @ctxt:  an XML parser context
8848
 * @prefix:  the prefix to lookup
8849
 *
8850
 * Lookup the namespace name for the @prefix (which ca be NULL)
8851
 * The prefix must come from the @ctxt->dict dictionary
8852
 *
8853
 * Returns the namespace name or NULL if not bound
8854
 */
8855
static const xmlChar *
8856
1.38M
xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8857
1.38M
    int i;
8858
8859
1.38M
    if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8860
1.43M
    for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8861
158k
        if (ctxt->nsTab[i] == prefix) {
8862
94.1k
      if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8863
1.86k
          return(NULL);
8864
92.2k
      return(ctxt->nsTab[i + 1]);
8865
94.1k
  }
8866
1.27M
    return(NULL);
8867
1.36M
}
8868
8869
/**
8870
 * xmlParseQName:
8871
 * @ctxt:  an XML parser context
8872
 * @prefix:  pointer to store the prefix part
8873
 *
8874
 * parse an XML Namespace QName
8875
 *
8876
 * [6]  QName  ::= (Prefix ':')? LocalPart
8877
 * [7]  Prefix  ::= NCName
8878
 * [8]  LocalPart  ::= NCName
8879
 *
8880
 * Returns the Name parsed or NULL
8881
 */
8882
8883
static const xmlChar *
8884
2.94M
xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8885
2.94M
    const xmlChar *l, *p;
8886
8887
2.94M
    GROW;
8888
8889
2.94M
    l = xmlParseNCName(ctxt);
8890
2.94M
    if (l == NULL) {
8891
40.4k
        if (CUR == ':') {
8892
768
      l = xmlParseName(ctxt);
8893
768
      if (l != NULL) {
8894
768
          xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8895
768
             "Failed to parse QName '%s'\n", l, NULL, NULL);
8896
768
    *prefix = NULL;
8897
768
    return(l);
8898
768
      }
8899
768
  }
8900
39.6k
        return(NULL);
8901
40.4k
    }
8902
2.90M
    if (CUR == ':') {
8903
117k
        NEXT;
8904
117k
  p = l;
8905
117k
  l = xmlParseNCName(ctxt);
8906
117k
  if (l == NULL) {
8907
1.63k
      xmlChar *tmp;
8908
8909
1.63k
            if (ctxt->instate == XML_PARSER_EOF)
8910
0
                return(NULL);
8911
1.63k
            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8912
1.63k
               "Failed to parse QName '%s:'\n", p, NULL, NULL);
8913
1.63k
      l = xmlParseNmtoken(ctxt);
8914
1.63k
      if (l == NULL) {
8915
987
                if (ctxt->instate == XML_PARSER_EOF)
8916
0
                    return(NULL);
8917
987
    tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8918
987
            } else {
8919
648
    tmp = xmlBuildQName(l, p, NULL, 0);
8920
648
    xmlFree((char *)l);
8921
648
      }
8922
1.63k
      p = xmlDictLookup(ctxt->dict, tmp, -1);
8923
1.63k
      if (tmp != NULL) xmlFree(tmp);
8924
1.63k
      *prefix = NULL;
8925
1.63k
      return(p);
8926
1.63k
  }
8927
115k
  if (CUR == ':') {
8928
999
      xmlChar *tmp;
8929
8930
999
            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8931
999
               "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8932
999
      NEXT;
8933
999
      tmp = (xmlChar *) xmlParseName(ctxt);
8934
999
      if (tmp != NULL) {
8935
825
          tmp = xmlBuildQName(tmp, l, NULL, 0);
8936
825
    l = xmlDictLookup(ctxt->dict, tmp, -1);
8937
825
    if (tmp != NULL) xmlFree(tmp);
8938
825
    *prefix = p;
8939
825
    return(l);
8940
825
      }
8941
174
            if (ctxt->instate == XML_PARSER_EOF)
8942
0
                return(NULL);
8943
174
      tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8944
174
      l = xmlDictLookup(ctxt->dict, tmp, -1);
8945
174
      if (tmp != NULL) xmlFree(tmp);
8946
174
      *prefix = p;
8947
174
      return(l);
8948
174
  }
8949
114k
  *prefix = p;
8950
114k
    } else
8951
2.78M
        *prefix = NULL;
8952
2.90M
    return(l);
8953
2.90M
}
8954
8955
/**
8956
 * xmlParseQNameAndCompare:
8957
 * @ctxt:  an XML parser context
8958
 * @name:  the localname
8959
 * @prefix:  the prefix, if any.
8960
 *
8961
 * parse an XML name and compares for match
8962
 * (specialized for endtag parsing)
8963
 *
8964
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8965
 * and the name for mismatch
8966
 */
8967
8968
static const xmlChar *
8969
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8970
38.7k
                        xmlChar const *prefix) {
8971
38.7k
    const xmlChar *cmp;
8972
38.7k
    const xmlChar *in;
8973
38.7k
    const xmlChar *ret;
8974
38.7k
    const xmlChar *prefix2;
8975
8976
38.7k
    if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8977
8978
38.7k
    GROW;
8979
38.7k
    in = ctxt->input->cur;
8980
8981
38.7k
    cmp = prefix;
8982
133k
    while (*in != 0 && *in == *cmp) {
8983
94.8k
  ++in;
8984
94.8k
  ++cmp;
8985
94.8k
    }
8986
38.7k
    if ((*cmp == 0) && (*in == ':')) {
8987
35.4k
        in++;
8988
35.4k
  cmp = name;
8989
282k
  while (*in != 0 && *in == *cmp) {
8990
246k
      ++in;
8991
246k
      ++cmp;
8992
246k
  }
8993
35.4k
  if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8994
      /* success */
8995
31.9k
            ctxt->input->col += in - ctxt->input->cur;
8996
31.9k
      ctxt->input->cur = in;
8997
31.9k
      return((const xmlChar*) 1);
8998
31.9k
  }
8999
35.4k
    }
9000
    /*
9001
     * all strings coms from the dictionary, equality can be done directly
9002
     */
9003
6.73k
    ret = xmlParseQName (ctxt, &prefix2);
9004
6.73k
    if ((ret == name) && (prefix == prefix2))
9005
188
  return((const xmlChar*) 1);
9006
6.55k
    return ret;
9007
6.73k
}
9008
9009
/**
9010
 * xmlParseAttValueInternal:
9011
 * @ctxt:  an XML parser context
9012
 * @len:  attribute len result
9013
 * @alloc:  whether the attribute was reallocated as a new string
9014
 * @normalize:  if 1 then further non-CDATA normalization must be done
9015
 *
9016
 * parse a value for an attribute.
9017
 * NOTE: if no normalization is needed, the routine will return pointers
9018
 *       directly from the data buffer.
9019
 *
9020
 * 3.3.3 Attribute-Value Normalization:
9021
 * Before the value of an attribute is passed to the application or
9022
 * checked for validity, the XML processor must normalize it as follows:
9023
 * - a character reference is processed by appending the referenced
9024
 *   character to the attribute value
9025
 * - an entity reference is processed by recursively processing the
9026
 *   replacement text of the entity
9027
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
9028
 *   appending #x20 to the normalized value, except that only a single
9029
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
9030
 *   parsed entity or the literal entity value of an internal parsed entity
9031
 * - other characters are processed by appending them to the normalized value
9032
 * If the declared value is not CDATA, then the XML processor must further
9033
 * process the normalized attribute value by discarding any leading and
9034
 * trailing space (#x20) characters, and by replacing sequences of space
9035
 * (#x20) characters by a single space (#x20) character.
9036
 * All attributes for which no declaration has been read should be treated
9037
 * by a non-validating parser as if declared CDATA.
9038
 *
9039
 * Returns the AttValue parsed or NULL. The value has to be freed by the
9040
 *     caller if it was copied, this can be detected by val[*len] == 0.
9041
 */
9042
9043
#define GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) \
9044
838
    const xmlChar *oldbase = ctxt->input->base;\
9045
838
    GROW;\
9046
838
    if (ctxt->instate == XML_PARSER_EOF)\
9047
838
        return(NULL);\
9048
838
    if (oldbase != ctxt->input->base) {\
9049
0
        ptrdiff_t delta = ctxt->input->base - oldbase;\
9050
0
        start = start + delta;\
9051
0
        in = in + delta;\
9052
0
    }\
9053
838
    end = ctxt->input->end;
9054
9055
static xmlChar *
9056
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
9057
                         int normalize)
9058
2.20M
{
9059
2.20M
    xmlChar limit = 0;
9060
2.20M
    const xmlChar *in = NULL, *start, *end, *last;
9061
2.20M
    xmlChar *ret = NULL;
9062
2.20M
    int line, col;
9063
2.20M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9064
674k
                    XML_MAX_HUGE_LENGTH :
9065
2.20M
                    XML_MAX_TEXT_LENGTH;
9066
9067
2.20M
    GROW;
9068
2.20M
    in = (xmlChar *) CUR_PTR;
9069
2.20M
    line = ctxt->input->line;
9070
2.20M
    col = ctxt->input->col;
9071
2.20M
    if (*in != '"' && *in != '\'') {
9072
4.20k
        xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
9073
4.20k
        return (NULL);
9074
4.20k
    }
9075
2.20M
    ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
9076
9077
    /*
9078
     * try to handle in this routine the most common case where no
9079
     * allocation of a new string is required and where content is
9080
     * pure ASCII.
9081
     */
9082
2.20M
    limit = *in++;
9083
2.20M
    col++;
9084
2.20M
    end = ctxt->input->end;
9085
2.20M
    start = in;
9086
2.20M
    if (in >= end) {
9087
154
        GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9088
154
    }
9089
2.20M
    if (normalize) {
9090
        /*
9091
   * Skip any leading spaces
9092
   */
9093
127k
  while ((in < end) && (*in != limit) &&
9094
127k
         ((*in == 0x20) || (*in == 0x9) ||
9095
126k
          (*in == 0xA) || (*in == 0xD))) {
9096
61.2k
      if (*in == 0xA) {
9097
2.90k
          line++; col = 1;
9098
58.3k
      } else {
9099
58.3k
          col++;
9100
58.3k
      }
9101
61.2k
      in++;
9102
61.2k
      start = in;
9103
61.2k
      if (in >= end) {
9104
27
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9105
27
                if ((in - start) > maxLength) {
9106
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9107
0
                                   "AttValue length too long\n");
9108
0
                    return(NULL);
9109
0
                }
9110
27
      }
9111
61.2k
  }
9112
662k
  while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9113
662k
         (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9114
596k
      col++;
9115
596k
      if ((*in++ == 0x20) && (*in == 0x20)) break;
9116
596k
      if (in >= end) {
9117
54
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9118
54
                if ((in - start) > maxLength) {
9119
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9120
0
                                   "AttValue length too long\n");
9121
0
                    return(NULL);
9122
0
                }
9123
54
      }
9124
596k
  }
9125
66.0k
  last = in;
9126
  /*
9127
   * skip the trailing blanks
9128
   */
9129
66.8k
  while ((last[-1] == 0x20) && (last > start)) last--;
9130
105k
  while ((in < end) && (*in != limit) &&
9131
105k
         ((*in == 0x20) || (*in == 0x9) ||
9132
42.7k
          (*in == 0xA) || (*in == 0xD))) {
9133
38.9k
      if (*in == 0xA) {
9134
3.05k
          line++, col = 1;
9135
35.9k
      } else {
9136
35.9k
          col++;
9137
35.9k
      }
9138
38.9k
      in++;
9139
38.9k
      if (in >= end) {
9140
53
    const xmlChar *oldbase = ctxt->input->base;
9141
53
    GROW;
9142
53
                if (ctxt->instate == XML_PARSER_EOF)
9143
0
                    return(NULL);
9144
53
    if (oldbase != ctxt->input->base) {
9145
0
        ptrdiff_t delta = ctxt->input->base - oldbase;
9146
0
        start = start + delta;
9147
0
        in = in + delta;
9148
0
        last = last + delta;
9149
0
    }
9150
53
    end = ctxt->input->end;
9151
53
                if ((in - start) > maxLength) {
9152
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9153
0
                                   "AttValue length too long\n");
9154
0
                    return(NULL);
9155
0
                }
9156
53
      }
9157
38.9k
  }
9158
66.0k
        if ((in - start) > maxLength) {
9159
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9160
0
                           "AttValue length too long\n");
9161
0
            return(NULL);
9162
0
        }
9163
66.0k
  if (*in != limit) goto need_complex;
9164
2.13M
    } else {
9165
27.0M
  while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9166
27.0M
         (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9167
24.9M
      in++;
9168
24.9M
      col++;
9169
24.9M
      if (in >= end) {
9170
603
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9171
603
                if ((in - start) > maxLength) {
9172
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9173
0
                                   "AttValue length too long\n");
9174
0
                    return(NULL);
9175
0
                }
9176
603
      }
9177
24.9M
  }
9178
2.13M
  last = in;
9179
2.13M
        if ((in - start) > maxLength) {
9180
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9181
0
                           "AttValue length too long\n");
9182
0
            return(NULL);
9183
0
        }
9184
2.13M
  if (*in != limit) goto need_complex;
9185
2.13M
    }
9186
2.14M
    in++;
9187
2.14M
    col++;
9188
2.14M
    if (len != NULL) {
9189
1.51M
        if (alloc) *alloc = 0;
9190
1.51M
        *len = last - start;
9191
1.51M
        ret = (xmlChar *) start;
9192
1.51M
    } else {
9193
635k
        if (alloc) *alloc = 1;
9194
635k
        ret = xmlStrndup(start, last - start);
9195
635k
    }
9196
2.14M
    CUR_PTR = in;
9197
2.14M
    ctxt->input->line = line;
9198
2.14M
    ctxt->input->col = col;
9199
2.14M
    return ret;
9200
57.6k
need_complex:
9201
57.6k
    if (alloc) *alloc = 1;
9202
57.6k
    return xmlParseAttValueComplex(ctxt, len, normalize);
9203
2.20M
}
9204
9205
/**
9206
 * xmlParseAttribute2:
9207
 * @ctxt:  an XML parser context
9208
 * @pref:  the element prefix
9209
 * @elem:  the element name
9210
 * @prefix:  a xmlChar ** used to store the value of the attribute prefix
9211
 * @value:  a xmlChar ** used to store the value of the attribute
9212
 * @len:  an int * to save the length of the attribute
9213
 * @alloc:  an int * to indicate if the attribute was allocated
9214
 *
9215
 * parse an attribute in the new SAX2 framework.
9216
 *
9217
 * Returns the attribute name, and the value in *value, .
9218
 */
9219
9220
static const xmlChar *
9221
xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9222
                   const xmlChar * pref, const xmlChar * elem,
9223
                   const xmlChar ** prefix, xmlChar ** value,
9224
                   int *len, int *alloc)
9225
1.56M
{
9226
1.56M
    const xmlChar *name;
9227
1.56M
    xmlChar *val, *internal_val = NULL;
9228
1.56M
    int normalize = 0;
9229
9230
1.56M
    *value = NULL;
9231
1.56M
    GROW;
9232
1.56M
    name = xmlParseQName(ctxt, prefix);
9233
1.56M
    if (name == NULL) {
9234
11.7k
        xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9235
11.7k
                       "error parsing attribute name\n");
9236
11.7k
        return (NULL);
9237
11.7k
    }
9238
9239
    /*
9240
     * get the type if needed
9241
     */
9242
1.55M
    if (ctxt->attsSpecial != NULL) {
9243
139k
        int type;
9244
9245
139k
        type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
9246
139k
                                                 pref, elem, *prefix, name);
9247
139k
        if (type != 0)
9248
66.2k
            normalize = 1;
9249
139k
    }
9250
9251
    /*
9252
     * read the value
9253
     */
9254
1.55M
    SKIP_BLANKS;
9255
1.55M
    if (RAW == '=') {
9256
1.54M
        NEXT;
9257
1.54M
        SKIP_BLANKS;
9258
1.54M
        val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9259
1.54M
        if (val == NULL)
9260
2.15k
            return (NULL);
9261
1.54M
  if (normalize) {
9262
      /*
9263
       * Sometimes a second normalisation pass for spaces is needed
9264
       * but that only happens if charrefs or entities references
9265
       * have been used in the attribute value, i.e. the attribute
9266
       * value have been extracted in an allocated string already.
9267
       */
9268
66.0k
      if (*alloc) {
9269
3.90k
          const xmlChar *val2;
9270
9271
3.90k
          val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9272
3.90k
    if ((val2 != NULL) && (val2 != val)) {
9273
789
        xmlFree(val);
9274
789
        val = (xmlChar *) val2;
9275
789
    }
9276
3.90k
      }
9277
66.0k
  }
9278
1.54M
        ctxt->instate = XML_PARSER_CONTENT;
9279
1.54M
    } else {
9280
6.98k
        xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9281
6.98k
                          "Specification mandates value for attribute %s\n",
9282
6.98k
                          name);
9283
6.98k
        return (name);
9284
6.98k
    }
9285
9286
1.54M
    if (*prefix == ctxt->str_xml) {
9287
        /*
9288
         * Check that xml:lang conforms to the specification
9289
         * No more registered as an error, just generate a warning now
9290
         * since this was deprecated in XML second edition
9291
         */
9292
3.72k
        if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9293
755
            internal_val = xmlStrndup(val, *len);
9294
755
            if (!xmlCheckLanguageID(internal_val)) {
9295
249
                xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9296
249
                              "Malformed value for xml:lang : %s\n",
9297
249
                              internal_val, NULL);
9298
249
            }
9299
755
        }
9300
9301
        /*
9302
         * Check that xml:space conforms to the specification
9303
         */
9304
3.72k
        if (xmlStrEqual(name, BAD_CAST "space")) {
9305
130
            internal_val = xmlStrndup(val, *len);
9306
130
            if (xmlStrEqual(internal_val, BAD_CAST "default"))
9307
4
                *(ctxt->space) = 0;
9308
126
            else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9309
56
                *(ctxt->space) = 1;
9310
70
            else {
9311
70
                xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9312
70
                              "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9313
70
                              internal_val, NULL);
9314
70
            }
9315
130
        }
9316
3.72k
        if (internal_val) {
9317
885
            xmlFree(internal_val);
9318
885
        }
9319
3.72k
    }
9320
9321
1.54M
    *value = val;
9322
1.54M
    return (name);
9323
1.55M
}
9324
/**
9325
 * xmlParseStartTag2:
9326
 * @ctxt:  an XML parser context
9327
 *
9328
 * Parse a start tag. Always consumes '<'.
9329
 *
9330
 * This routine is called when running SAX2 parsing
9331
 *
9332
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9333
 *
9334
 * [ WFC: Unique Att Spec ]
9335
 * No attribute name may appear more than once in the same start-tag or
9336
 * empty-element tag.
9337
 *
9338
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9339
 *
9340
 * [ WFC: Unique Att Spec ]
9341
 * No attribute name may appear more than once in the same start-tag or
9342
 * empty-element tag.
9343
 *
9344
 * With namespace:
9345
 *
9346
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9347
 *
9348
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9349
 *
9350
 * Returns the element name parsed
9351
 */
9352
9353
static const xmlChar *
9354
xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9355
1.37M
                  const xmlChar **URI, int *tlen) {
9356
1.37M
    const xmlChar *localname;
9357
1.37M
    const xmlChar *prefix;
9358
1.37M
    const xmlChar *attname;
9359
1.37M
    const xmlChar *aprefix;
9360
1.37M
    const xmlChar *nsname;
9361
1.37M
    xmlChar *attvalue;
9362
1.37M
    const xmlChar **atts = ctxt->atts;
9363
1.37M
    int maxatts = ctxt->maxatts;
9364
1.37M
    int nratts, nbatts, nbdef, inputid;
9365
1.37M
    int i, j, nbNs, attval;
9366
1.37M
    unsigned long cur;
9367
1.37M
    int nsNr = ctxt->nsNr;
9368
9369
1.37M
    if (RAW != '<') return(NULL);
9370
1.37M
    NEXT1;
9371
9372
    /*
9373
     * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9374
     *       point since the attribute values may be stored as pointers to
9375
     *       the buffer and calling SHRINK would destroy them !
9376
     *       The Shrinking is only possible once the full set of attribute
9377
     *       callbacks have been done.
9378
     */
9379
1.37M
    SHRINK;
9380
1.37M
    cur = ctxt->input->cur - ctxt->input->base;
9381
1.37M
    inputid = ctxt->input->id;
9382
1.37M
    nbatts = 0;
9383
1.37M
    nratts = 0;
9384
1.37M
    nbdef = 0;
9385
1.37M
    nbNs = 0;
9386
1.37M
    attval = 0;
9387
    /* Forget any namespaces added during an earlier parse of this element. */
9388
1.37M
    ctxt->nsNr = nsNr;
9389
9390
1.37M
    localname = xmlParseQName(ctxt, &prefix);
9391
1.37M
    if (localname == NULL) {
9392
27.6k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9393
27.6k
           "StartTag: invalid element name\n");
9394
27.6k
        return(NULL);
9395
27.6k
    }
9396
1.34M
    *tlen = ctxt->input->cur - ctxt->input->base - cur;
9397
9398
    /*
9399
     * Now parse the attributes, it ends up with the ending
9400
     *
9401
     * (S Attribute)* S?
9402
     */
9403
1.34M
    SKIP_BLANKS;
9404
1.34M
    GROW;
9405
9406
1.98M
    while (((RAW != '>') &&
9407
1.98M
     ((RAW != '/') || (NXT(1) != '>')) &&
9408
1.98M
     (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9409
1.56M
  int len = -1, alloc = 0;
9410
9411
1.56M
  attname = xmlParseAttribute2(ctxt, prefix, localname,
9412
1.56M
                               &aprefix, &attvalue, &len, &alloc);
9413
1.56M
        if (attname == NULL) {
9414
13.8k
      xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9415
13.8k
           "xmlParseStartTag: problem parsing attributes\n");
9416
13.8k
      break;
9417
13.8k
  }
9418
1.55M
        if (attvalue == NULL)
9419
6.98k
            goto next_attr;
9420
1.54M
  if (len < 0) len = xmlStrlen(attvalue);
9421
9422
1.54M
        if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9423
5.47k
            const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9424
5.47k
            xmlURIPtr uri;
9425
9426
5.47k
            if (URL == NULL) {
9427
0
                xmlErrMemory(ctxt, "dictionary allocation failure");
9428
0
                if ((attvalue != NULL) && (alloc != 0))
9429
0
                    xmlFree(attvalue);
9430
0
                localname = NULL;
9431
0
                goto done;
9432
0
            }
9433
5.47k
            if (*URL != 0) {
9434
5.30k
                uri = xmlParseURI((const char *) URL);
9435
5.30k
                if (uri == NULL) {
9436
1.44k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9437
1.44k
                             "xmlns: '%s' is not a valid URI\n",
9438
1.44k
                                       URL, NULL, NULL);
9439
3.85k
                } else {
9440
3.85k
                    if (uri->scheme == NULL) {
9441
481
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9442
481
                                  "xmlns: URI %s is not absolute\n",
9443
481
                                  URL, NULL, NULL);
9444
481
                    }
9445
3.85k
                    xmlFreeURI(uri);
9446
3.85k
                }
9447
5.30k
                if (URL == ctxt->str_xml_ns) {
9448
0
                    if (attname != ctxt->str_xml) {
9449
0
                        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9450
0
                     "xml namespace URI cannot be the default namespace\n",
9451
0
                                 NULL, NULL, NULL);
9452
0
                    }
9453
0
                    goto next_attr;
9454
0
                }
9455
5.30k
                if ((len == 29) &&
9456
5.30k
                    (xmlStrEqual(URL,
9457
53
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9458
6
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9459
6
                         "reuse of the xmlns namespace name is forbidden\n",
9460
6
                             NULL, NULL, NULL);
9461
6
                    goto next_attr;
9462
6
                }
9463
5.30k
            }
9464
            /*
9465
             * check that it's not a defined namespace
9466
             */
9467
7.96k
            for (j = 1;j <= nbNs;j++)
9468
2.83k
                if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9469
334
                    break;
9470
5.46k
            if (j <= nbNs)
9471
334
                xmlErrAttributeDup(ctxt, NULL, attname);
9472
5.13k
            else
9473
5.13k
                if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9474
9475
1.53M
        } else if (aprefix == ctxt->str_xmlns) {
9476
13.1k
            const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9477
13.1k
            xmlURIPtr uri;
9478
9479
13.1k
            if (attname == ctxt->str_xml) {
9480
92
                if (URL != ctxt->str_xml_ns) {
9481
92
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9482
92
                             "xml namespace prefix mapped to wrong URI\n",
9483
92
                             NULL, NULL, NULL);
9484
92
                }
9485
                /*
9486
                 * Do not keep a namespace definition node
9487
                 */
9488
92
                goto next_attr;
9489
92
            }
9490
13.0k
            if (URL == ctxt->str_xml_ns) {
9491
0
                if (attname != ctxt->str_xml) {
9492
0
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9493
0
                             "xml namespace URI mapped to wrong prefix\n",
9494
0
                             NULL, NULL, NULL);
9495
0
                }
9496
0
                goto next_attr;
9497
0
            }
9498
13.0k
            if (attname == ctxt->str_xmlns) {
9499
16
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9500
16
                         "redefinition of the xmlns prefix is forbidden\n",
9501
16
                         NULL, NULL, NULL);
9502
16
                goto next_attr;
9503
16
            }
9504
13.0k
            if ((len == 29) &&
9505
13.0k
                (xmlStrEqual(URL,
9506
432
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9507
9
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9508
9
                         "reuse of the xmlns namespace name is forbidden\n",
9509
9
                         NULL, NULL, NULL);
9510
9
                goto next_attr;
9511
9
            }
9512
12.9k
            if ((URL == NULL) || (URL[0] == 0)) {
9513
92
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9514
92
                         "xmlns:%s: Empty XML namespace is not allowed\n",
9515
92
                              attname, NULL, NULL);
9516
92
                goto next_attr;
9517
12.9k
            } else {
9518
12.9k
                uri = xmlParseURI((const char *) URL);
9519
12.9k
                if (uri == NULL) {
9520
2.24k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9521
2.24k
                         "xmlns:%s: '%s' is not a valid URI\n",
9522
2.24k
                                       attname, URL, NULL);
9523
10.6k
                } else {
9524
10.6k
                    if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9525
129
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9526
129
                                  "xmlns:%s: URI %s is not absolute\n",
9527
129
                                  attname, URL, NULL);
9528
129
                    }
9529
10.6k
                    xmlFreeURI(uri);
9530
10.6k
                }
9531
12.9k
            }
9532
9533
            /*
9534
             * check that it's not a defined namespace
9535
             */
9536
18.3k
            for (j = 1;j <= nbNs;j++)
9537
5.64k
                if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9538
227
                    break;
9539
12.9k
            if (j <= nbNs)
9540
227
                xmlErrAttributeDup(ctxt, aprefix, attname);
9541
12.6k
            else
9542
12.6k
                if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9543
9544
1.52M
        } else {
9545
            /*
9546
             * Add the pair to atts
9547
             */
9548
1.52M
            if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9549
19.4k
                if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9550
0
                    goto next_attr;
9551
0
                }
9552
19.4k
                maxatts = ctxt->maxatts;
9553
19.4k
                atts = ctxt->atts;
9554
19.4k
            }
9555
1.52M
            ctxt->attallocs[nratts++] = alloc;
9556
1.52M
            atts[nbatts++] = attname;
9557
1.52M
            atts[nbatts++] = aprefix;
9558
            /*
9559
             * The namespace URI field is used temporarily to point at the
9560
             * base of the current input buffer for non-alloced attributes.
9561
             * When the input buffer is reallocated, all the pointers become
9562
             * invalid, but they can be reconstructed later.
9563
             */
9564
1.52M
            if (alloc)
9565
28.3k
                atts[nbatts++] = NULL;
9566
1.49M
            else
9567
1.49M
                atts[nbatts++] = ctxt->input->base;
9568
1.52M
            atts[nbatts++] = attvalue;
9569
1.52M
            attvalue += len;
9570
1.52M
            atts[nbatts++] = attvalue;
9571
            /*
9572
             * tag if some deallocation is needed
9573
             */
9574
1.52M
            if (alloc != 0) attval = 1;
9575
1.52M
            attvalue = NULL; /* moved into atts */
9576
1.52M
        }
9577
9578
1.55M
next_attr:
9579
1.55M
        if ((attvalue != NULL) && (alloc != 0)) {
9580
4.15k
            xmlFree(attvalue);
9581
4.15k
            attvalue = NULL;
9582
4.15k
        }
9583
9584
1.55M
  GROW
9585
1.55M
        if (ctxt->instate == XML_PARSER_EOF)
9586
0
            break;
9587
1.55M
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9588
884k
      break;
9589
666k
  if (SKIP_BLANKS == 0) {
9590
21.6k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9591
21.6k
         "attributes construct error\n");
9592
21.6k
      break;
9593
21.6k
  }
9594
645k
        GROW;
9595
645k
    }
9596
9597
1.34M
    if (ctxt->input->id != inputid) {
9598
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9599
0
                    "Unexpected change of input\n");
9600
0
        localname = NULL;
9601
0
        goto done;
9602
0
    }
9603
9604
    /* Reconstruct attribute value pointers. */
9605
2.86M
    for (i = 0, j = 0; j < nratts; i += 5, j++) {
9606
1.52M
        if (atts[i+2] != NULL) {
9607
            /*
9608
             * Arithmetic on dangling pointers is technically undefined
9609
             * behavior, but well...
9610
             */
9611
1.49M
            const xmlChar *old = atts[i+2];
9612
1.49M
            atts[i+2]  = NULL;    /* Reset repurposed namespace URI */
9613
1.49M
            atts[i+3] = ctxt->input->base + (atts[i+3] - old);  /* value */
9614
1.49M
            atts[i+4] = ctxt->input->base + (atts[i+4] - old);  /* valuend */
9615
1.49M
        }
9616
1.52M
    }
9617
9618
    /*
9619
     * The attributes defaulting
9620
     */
9621
1.34M
    if (ctxt->attsDefault != NULL) {
9622
194k
        xmlDefAttrsPtr defaults;
9623
9624
194k
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9625
194k
  if (defaults != NULL) {
9626
43.8k
      for (i = 0;i < defaults->nbAttrs;i++) {
9627
30.7k
          attname = defaults->values[5 * i];
9628
30.7k
    aprefix = defaults->values[5 * i + 1];
9629
9630
                /*
9631
     * special work for namespaces defaulted defs
9632
     */
9633
30.7k
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9634
        /*
9635
         * check that it's not a defined namespace
9636
         */
9637
346
        for (j = 1;j <= nbNs;j++)
9638
198
            if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9639
99
          break;
9640
247
              if (j <= nbNs) continue;
9641
9642
148
        nsname = xmlGetNamespace(ctxt, NULL);
9643
148
        if (nsname != defaults->values[5 * i + 2]) {
9644
115
      if (nsPush(ctxt, NULL,
9645
115
                 defaults->values[5 * i + 2]) > 0)
9646
115
          nbNs++;
9647
115
        }
9648
30.4k
    } else if (aprefix == ctxt->str_xmlns) {
9649
        /*
9650
         * check that it's not a defined namespace
9651
         */
9652
879
        for (j = 1;j <= nbNs;j++)
9653
426
            if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9654
358
          break;
9655
811
              if (j <= nbNs) continue;
9656
9657
453
        nsname = xmlGetNamespace(ctxt, attname);
9658
453
        if (nsname != defaults->values[5 * i + 2]) {
9659
340
      if (nsPush(ctxt, attname,
9660
340
                 defaults->values[5 * i + 2]) > 0)
9661
340
          nbNs++;
9662
340
        }
9663
29.6k
    } else {
9664
        /*
9665
         * check that it's not a defined attribute
9666
         */
9667
83.5k
        for (j = 0;j < nbatts;j+=5) {
9668
54.3k
      if ((attname == atts[j]) && (aprefix == atts[j+1]))
9669
481
          break;
9670
54.3k
        }
9671
29.6k
        if (j < nbatts) continue;
9672
9673
29.2k
        if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9674
382
      if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9675
0
                            localname = NULL;
9676
0
                            goto done;
9677
0
      }
9678
382
      maxatts = ctxt->maxatts;
9679
382
      atts = ctxt->atts;
9680
382
        }
9681
29.2k
        atts[nbatts++] = attname;
9682
29.2k
        atts[nbatts++] = aprefix;
9683
29.2k
        if (aprefix == NULL)
9684
23.2k
      atts[nbatts++] = NULL;
9685
5.99k
        else
9686
5.99k
            atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9687
29.2k
        atts[nbatts++] = defaults->values[5 * i + 2];
9688
29.2k
        atts[nbatts++] = defaults->values[5 * i + 3];
9689
29.2k
        if ((ctxt->standalone == 1) &&
9690
29.2k
            (defaults->values[5 * i + 4] != NULL)) {
9691
0
      xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9692
0
    "standalone: attribute %s on %s defaulted from external subset\n",
9693
0
                                   attname, localname);
9694
0
        }
9695
29.2k
        nbdef++;
9696
29.2k
    }
9697
30.7k
      }
9698
13.1k
  }
9699
194k
    }
9700
9701
    /*
9702
     * The attributes checkings
9703
     */
9704
2.89M
    for (i = 0; i < nbatts;i += 5) {
9705
        /*
9706
  * The default namespace does not apply to attribute names.
9707
  */
9708
1.55M
  if (atts[i + 1] != NULL) {
9709
32.1k
      nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9710
32.1k
      if (nsname == NULL) {
9711
12.1k
    xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9712
12.1k
        "Namespace prefix %s for %s on %s is not defined\n",
9713
12.1k
        atts[i + 1], atts[i], localname);
9714
12.1k
      }
9715
32.1k
      atts[i + 2] = nsname;
9716
32.1k
  } else
9717
1.52M
      nsname = NULL;
9718
  /*
9719
   * [ WFC: Unique Att Spec ]
9720
   * No attribute name may appear more than once in the same
9721
   * start-tag or empty-element tag.
9722
   * As extended by the Namespace in XML REC.
9723
   */
9724
2.25M
        for (j = 0; j < i;j += 5) {
9725
699k
      if (atts[i] == atts[j]) {
9726
2.12k
          if (atts[i+1] == atts[j+1]) {
9727
710
        xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9728
710
        break;
9729
710
    }
9730
1.41k
    if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9731
72
        xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9732
72
           "Namespaced Attribute %s in '%s' redefined\n",
9733
72
           atts[i], nsname, NULL);
9734
72
        break;
9735
72
    }
9736
1.41k
      }
9737
699k
  }
9738
1.55M
    }
9739
9740
1.34M
    nsname = xmlGetNamespace(ctxt, prefix);
9741
1.34M
    if ((prefix != NULL) && (nsname == NULL)) {
9742
27.8k
  xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9743
27.8k
           "Namespace prefix %s on %s is not defined\n",
9744
27.8k
     prefix, localname, NULL);
9745
27.8k
    }
9746
1.34M
    *pref = prefix;
9747
1.34M
    *URI = nsname;
9748
9749
    /*
9750
     * SAX: Start of Element !
9751
     */
9752
1.34M
    if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9753
1.34M
  (!ctxt->disableSAX)) {
9754
1.19M
  if (nbNs > 0)
9755
9.96k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9756
9.96k
        nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9757
9.96k
        nbatts / 5, nbdef, atts);
9758
1.18M
  else
9759
1.18M
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9760
1.18M
                    nsname, 0, NULL, nbatts / 5, nbdef, atts);
9761
1.19M
    }
9762
9763
1.34M
done:
9764
    /*
9765
     * Free up attribute allocated strings if needed
9766
     */
9767
1.34M
    if (attval != 0) {
9768
62.0k
  for (i = 3,j = 0; j < nratts;i += 5,j++)
9769
34.8k
      if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9770
28.3k
          xmlFree((xmlChar *) atts[i]);
9771
27.2k
    }
9772
9773
1.34M
    return(localname);
9774
1.34M
}
9775
9776
/**
9777
 * xmlParseEndTag2:
9778
 * @ctxt:  an XML parser context
9779
 * @line:  line of the start tag
9780
 * @nsNr:  number of namespaces on the start tag
9781
 *
9782
 * Parse an end tag. Always consumes '</'.
9783
 *
9784
 * [42] ETag ::= '</' Name S? '>'
9785
 *
9786
 * With namespace
9787
 *
9788
 * [NS 9] ETag ::= '</' QName S? '>'
9789
 */
9790
9791
static void
9792
613k
xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9793
613k
    const xmlChar *name;
9794
9795
613k
    GROW;
9796
613k
    if ((RAW != '<') || (NXT(1) != '/')) {
9797
0
  xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9798
0
  return;
9799
0
    }
9800
613k
    SKIP(2);
9801
9802
613k
    if (tag->prefix == NULL)
9803
574k
        name = xmlParseNameAndCompare(ctxt, ctxt->name);
9804
38.7k
    else
9805
38.7k
        name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9806
9807
    /*
9808
     * We should definitely be at the ending "S? '>'" part
9809
     */
9810
613k
    GROW;
9811
613k
    if (ctxt->instate == XML_PARSER_EOF)
9812
0
        return;
9813
613k
    SKIP_BLANKS;
9814
613k
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9815
7.03k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9816
7.03k
    } else
9817
606k
  NEXT1;
9818
9819
    /*
9820
     * [ WFC: Element Type Match ]
9821
     * The Name in an element's end-tag must match the element type in the
9822
     * start-tag.
9823
     *
9824
     */
9825
613k
    if (name != (xmlChar*)1) {
9826
21.1k
        if (name == NULL) name = BAD_CAST "unparsable";
9827
21.1k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9828
21.1k
         "Opening and ending tag mismatch: %s line %d and %s\n",
9829
21.1k
                    ctxt->name, tag->line, name);
9830
21.1k
    }
9831
9832
    /*
9833
     * SAX: End of Tag
9834
     */
9835
613k
    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9836
613k
  (!ctxt->disableSAX))
9837
527k
  ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9838
527k
                                tag->URI);
9839
9840
613k
    spacePop(ctxt);
9841
613k
    if (tag->nsNr != 0)
9842
3.27k
  nsPop(ctxt, tag->nsNr);
9843
613k
}
9844
9845
/**
9846
 * xmlParseCDSect:
9847
 * @ctxt:  an XML parser context
9848
 *
9849
 * DEPRECATED: Internal function, don't use.
9850
 *
9851
 * Parse escaped pure raw content. Always consumes '<!['.
9852
 *
9853
 * [18] CDSect ::= CDStart CData CDEnd
9854
 *
9855
 * [19] CDStart ::= '<![CDATA['
9856
 *
9857
 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9858
 *
9859
 * [21] CDEnd ::= ']]>'
9860
 */
9861
void
9862
3.48k
xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9863
3.48k
    xmlChar *buf = NULL;
9864
3.48k
    int len = 0;
9865
3.48k
    int size = XML_PARSER_BUFFER_SIZE;
9866
3.48k
    int r, rl;
9867
3.48k
    int s, sl;
9868
3.48k
    int cur, l;
9869
3.48k
    int count = 0;
9870
3.48k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9871
1.07k
                    XML_MAX_HUGE_LENGTH :
9872
3.48k
                    XML_MAX_TEXT_LENGTH;
9873
9874
3.48k
    if ((CUR != '<') || (NXT(1) != '!') || (NXT(2) != '['))
9875
0
        return;
9876
3.48k
    SKIP(3);
9877
9878
3.48k
    if (!CMP6(CUR_PTR, 'C', 'D', 'A', 'T', 'A', '['))
9879
0
        return;
9880
3.48k
    SKIP(6);
9881
9882
3.48k
    ctxt->instate = XML_PARSER_CDATA_SECTION;
9883
3.48k
    r = CUR_CHAR(rl);
9884
3.48k
    if (!IS_CHAR(r)) {
9885
68
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9886
68
        goto out;
9887
68
    }
9888
3.41k
    NEXTL(rl);
9889
3.41k
    s = CUR_CHAR(sl);
9890
3.41k
    if (!IS_CHAR(s)) {
9891
63
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9892
63
        goto out;
9893
63
    }
9894
3.35k
    NEXTL(sl);
9895
3.35k
    cur = CUR_CHAR(l);
9896
3.35k
    buf = (xmlChar *) xmlMallocAtomic(size);
9897
3.35k
    if (buf == NULL) {
9898
0
  xmlErrMemory(ctxt, NULL);
9899
0
        goto out;
9900
0
    }
9901
2.40M
    while (IS_CHAR(cur) &&
9902
2.40M
           ((r != ']') || (s != ']') || (cur != '>'))) {
9903
2.39M
  if (len + 5 >= size) {
9904
4.98k
      xmlChar *tmp;
9905
9906
4.98k
      tmp = (xmlChar *) xmlRealloc(buf, size * 2);
9907
4.98k
      if (tmp == NULL) {
9908
0
    xmlErrMemory(ctxt, NULL);
9909
0
                goto out;
9910
0
      }
9911
4.98k
      buf = tmp;
9912
4.98k
      size *= 2;
9913
4.98k
  }
9914
2.39M
  COPY_BUF(rl,buf,len,r);
9915
2.39M
  r = s;
9916
2.39M
  rl = sl;
9917
2.39M
  s = cur;
9918
2.39M
  sl = l;
9919
2.39M
  count++;
9920
2.39M
  if (count > 50) {
9921
45.4k
      SHRINK;
9922
45.4k
      GROW;
9923
45.4k
            if (ctxt->instate == XML_PARSER_EOF) {
9924
0
                goto out;
9925
0
            }
9926
45.4k
      count = 0;
9927
45.4k
  }
9928
2.39M
  NEXTL(l);
9929
2.39M
  cur = CUR_CHAR(l);
9930
2.39M
        if (len > maxLength) {
9931
0
            xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9932
0
                           "CData section too big found\n");
9933
0
            goto out;
9934
0
        }
9935
2.39M
    }
9936
3.35k
    buf[len] = 0;
9937
3.35k
    if (cur != '>') {
9938
640
  xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9939
640
                       "CData section not finished\n%.50s\n", buf);
9940
640
        goto out;
9941
640
    }
9942
2.71k
    NEXTL(l);
9943
9944
    /*
9945
     * OK the buffer is to be consumed as cdata.
9946
     */
9947
2.71k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9948
2.02k
  if (ctxt->sax->cdataBlock != NULL)
9949
1.36k
      ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9950
666
  else if (ctxt->sax->characters != NULL)
9951
666
      ctxt->sax->characters(ctxt->userData, buf, len);
9952
2.02k
    }
9953
9954
3.48k
out:
9955
3.48k
    if (ctxt->instate != XML_PARSER_EOF)
9956
3.48k
        ctxt->instate = XML_PARSER_CONTENT;
9957
3.48k
    xmlFree(buf);
9958
3.48k
}
9959
9960
/**
9961
 * xmlParseContentInternal:
9962
 * @ctxt:  an XML parser context
9963
 *
9964
 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9965
 * unexpected EOF to the caller.
9966
 */
9967
9968
static void
9969
37.7k
xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9970
37.7k
    int nameNr = ctxt->nameNr;
9971
9972
37.7k
    GROW;
9973
3.13M
    while ((RAW != 0) &&
9974
3.13M
     (ctxt->instate != XML_PARSER_EOF)) {
9975
3.10M
  const xmlChar *cur = ctxt->input->cur;
9976
9977
  /*
9978
   * First case : a Processing Instruction.
9979
   */
9980
3.10M
  if ((*cur == '<') && (cur[1] == '?')) {
9981
3.22k
      xmlParsePI(ctxt);
9982
3.22k
  }
9983
9984
  /*
9985
   * Second case : a CDSection
9986
   */
9987
  /* 2.6.0 test was *cur not RAW */
9988
3.10M
  else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9989
3.48k
      xmlParseCDSect(ctxt);
9990
3.48k
  }
9991
9992
  /*
9993
   * Third case :  a comment
9994
   */
9995
3.10M
  else if ((*cur == '<') && (NXT(1) == '!') &&
9996
3.10M
     (NXT(2) == '-') && (NXT(3) == '-')) {
9997
25.0k
      xmlParseComment(ctxt);
9998
25.0k
      ctxt->instate = XML_PARSER_CONTENT;
9999
25.0k
  }
10000
10001
  /*
10002
   * Fourth case :  a sub-element.
10003
   */
10004
3.07M
  else if (*cur == '<') {
10005
1.30M
            if (NXT(1) == '/') {
10006
433k
                if (ctxt->nameNr <= nameNr)
10007
7.57k
                    break;
10008
426k
          xmlParseElementEnd(ctxt);
10009
870k
            } else {
10010
870k
          xmlParseElementStart(ctxt);
10011
870k
            }
10012
1.30M
  }
10013
10014
  /*
10015
   * Fifth case : a reference. If if has not been resolved,
10016
   *    parsing returns it's Name, create the node
10017
   */
10018
10019
1.77M
  else if (*cur == '&') {
10020
457k
      xmlParseReference(ctxt);
10021
457k
  }
10022
10023
  /*
10024
   * Last case, text. Note that References are handled directly.
10025
   */
10026
1.31M
  else {
10027
1.31M
      xmlParseCharData(ctxt, 0);
10028
1.31M
  }
10029
10030
3.09M
  GROW;
10031
3.09M
  SHRINK;
10032
3.09M
    }
10033
37.7k
}
10034
10035
/**
10036
 * xmlParseContent:
10037
 * @ctxt:  an XML parser context
10038
 *
10039
 * Parse a content sequence. Stops at EOF or '</'.
10040
 *
10041
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10042
 */
10043
10044
void
10045
22.5k
xmlParseContent(xmlParserCtxtPtr ctxt) {
10046
22.5k
    int nameNr = ctxt->nameNr;
10047
10048
22.5k
    xmlParseContentInternal(ctxt);
10049
10050
22.5k
    if ((ctxt->instate != XML_PARSER_EOF) && (ctxt->nameNr > nameNr)) {
10051
554
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
10052
554
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
10053
554
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10054
554
                "Premature end of data in tag %s line %d\n",
10055
554
    name, line, NULL);
10056
554
    }
10057
22.5k
}
10058
10059
/**
10060
 * xmlParseElement:
10061
 * @ctxt:  an XML parser context
10062
 *
10063
 * DEPRECATED: Internal function, don't use.
10064
 *
10065
 * parse an XML element
10066
 *
10067
 * [39] element ::= EmptyElemTag | STag content ETag
10068
 *
10069
 * [ WFC: Element Type Match ]
10070
 * The Name in an element's end-tag must match the element type in the
10071
 * start-tag.
10072
 *
10073
 */
10074
10075
void
10076
21.9k
xmlParseElement(xmlParserCtxtPtr ctxt) {
10077
21.9k
    if (xmlParseElementStart(ctxt) != 0)
10078
6.75k
        return;
10079
10080
15.1k
    xmlParseContentInternal(ctxt);
10081
15.1k
    if (ctxt->instate == XML_PARSER_EOF)
10082
57
  return;
10083
10084
15.0k
    if (CUR == 0) {
10085
8.04k
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
10086
8.04k
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
10087
8.04k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10088
8.04k
                "Premature end of data in tag %s line %d\n",
10089
8.04k
    name, line, NULL);
10090
8.04k
        return;
10091
8.04k
    }
10092
10093
7.05k
    xmlParseElementEnd(ctxt);
10094
7.05k
}
10095
10096
/**
10097
 * xmlParseElementStart:
10098
 * @ctxt:  an XML parser context
10099
 *
10100
 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
10101
 * opening tag was parsed, 1 if an empty element was parsed.
10102
 *
10103
 * Always consumes '<'.
10104
 */
10105
static int
10106
892k
xmlParseElementStart(xmlParserCtxtPtr ctxt) {
10107
892k
    const xmlChar *name;
10108
892k
    const xmlChar *prefix = NULL;
10109
892k
    const xmlChar *URI = NULL;
10110
892k
    xmlParserNodeInfo node_info;
10111
892k
    int line, tlen = 0;
10112
892k
    xmlNodePtr ret;
10113
892k
    int nsNr = ctxt->nsNr;
10114
10115
892k
    if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
10116
892k
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
10117
0
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
10118
0
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
10119
0
        xmlParserMaxDepth);
10120
0
  xmlHaltParser(ctxt);
10121
0
  return(-1);
10122
0
    }
10123
10124
    /* Capture start position */
10125
892k
    if (ctxt->record_info) {
10126
0
        node_info.begin_pos = ctxt->input->consumed +
10127
0
                          (CUR_PTR - ctxt->input->base);
10128
0
  node_info.begin_line = ctxt->input->line;
10129
0
    }
10130
10131
892k
    if (ctxt->spaceNr == 0)
10132
0
  spacePush(ctxt, -1);
10133
892k
    else if (*ctxt->space == -2)
10134
125k
  spacePush(ctxt, -1);
10135
767k
    else
10136
767k
  spacePush(ctxt, *ctxt->space);
10137
10138
892k
    line = ctxt->input->line;
10139
892k
#ifdef LIBXML_SAX1_ENABLED
10140
892k
    if (ctxt->sax2)
10141
594k
#endif /* LIBXML_SAX1_ENABLED */
10142
594k
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
10143
297k
#ifdef LIBXML_SAX1_ENABLED
10144
297k
    else
10145
297k
  name = xmlParseStartTag(ctxt);
10146
892k
#endif /* LIBXML_SAX1_ENABLED */
10147
892k
    if (ctxt->instate == XML_PARSER_EOF)
10148
35
  return(-1);
10149
892k
    if (name == NULL) {
10150
42.0k
  spacePop(ctxt);
10151
42.0k
        return(-1);
10152
42.0k
    }
10153
850k
    nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
10154
850k
    ret = ctxt->node;
10155
10156
850k
#ifdef LIBXML_VALID_ENABLED
10157
    /*
10158
     * [ VC: Root Element Type ]
10159
     * The Name in the document type declaration must match the element
10160
     * type of the root element.
10161
     */
10162
850k
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10163
850k
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
10164
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10165
850k
#endif /* LIBXML_VALID_ENABLED */
10166
10167
    /*
10168
     * Check for an Empty Element.
10169
     */
10170
850k
    if ((RAW == '/') && (NXT(1) == '>')) {
10171
368k
        SKIP(2);
10172
368k
  if (ctxt->sax2) {
10173
262k
      if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10174
262k
    (!ctxt->disableSAX))
10175
210k
    ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
10176
262k
#ifdef LIBXML_SAX1_ENABLED
10177
262k
  } else {
10178
106k
      if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10179
106k
    (!ctxt->disableSAX))
10180
79.4k
    ctxt->sax->endElement(ctxt->userData, name);
10181
106k
#endif /* LIBXML_SAX1_ENABLED */
10182
106k
  }
10183
368k
  namePop(ctxt);
10184
368k
  spacePop(ctxt);
10185
368k
  if (nsNr != ctxt->nsNr)
10186
475
      nsPop(ctxt, ctxt->nsNr - nsNr);
10187
368k
  if ( ret != NULL && ctxt->record_info ) {
10188
0
     node_info.end_pos = ctxt->input->consumed +
10189
0
            (CUR_PTR - ctxt->input->base);
10190
0
     node_info.end_line = ctxt->input->line;
10191
0
     node_info.node = ret;
10192
0
     xmlParserAddNodeInfo(ctxt, &node_info);
10193
0
  }
10194
368k
  return(1);
10195
368k
    }
10196
481k
    if (RAW == '>') {
10197
456k
        NEXT1;
10198
456k
    } else {
10199
24.9k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10200
24.9k
         "Couldn't find end of Start Tag %s line %d\n",
10201
24.9k
                    name, line, NULL);
10202
10203
  /*
10204
   * end of parsing of this node.
10205
   */
10206
24.9k
  nodePop(ctxt);
10207
24.9k
  namePop(ctxt);
10208
24.9k
  spacePop(ctxt);
10209
24.9k
  if (nsNr != ctxt->nsNr)
10210
1.35k
      nsPop(ctxt, ctxt->nsNr - nsNr);
10211
10212
  /*
10213
   * Capture end position and add node
10214
   */
10215
24.9k
  if ( ret != NULL && ctxt->record_info ) {
10216
0
     node_info.end_pos = ctxt->input->consumed +
10217
0
            (CUR_PTR - ctxt->input->base);
10218
0
     node_info.end_line = ctxt->input->line;
10219
0
     node_info.node = ret;
10220
0
     xmlParserAddNodeInfo(ctxt, &node_info);
10221
0
  }
10222
24.9k
  return(-1);
10223
24.9k
    }
10224
10225
456k
    return(0);
10226
481k
}
10227
10228
/**
10229
 * xmlParseElementEnd:
10230
 * @ctxt:  an XML parser context
10231
 *
10232
 * Parse the end of an XML element. Always consumes '</'.
10233
 */
10234
static void
10235
433k
xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
10236
433k
    xmlParserNodeInfo node_info;
10237
433k
    xmlNodePtr ret = ctxt->node;
10238
10239
433k
    if (ctxt->nameNr <= 0) {
10240
0
        if ((RAW == '<') && (NXT(1) == '/'))
10241
0
            SKIP(2);
10242
0
        return;
10243
0
    }
10244
10245
    /*
10246
     * parse the end of tag: '</' should be here.
10247
     */
10248
433k
    if (ctxt->sax2) {
10249
278k
  xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
10250
278k
  namePop(ctxt);
10251
278k
    }
10252
155k
#ifdef LIBXML_SAX1_ENABLED
10253
155k
    else
10254
155k
  xmlParseEndTag1(ctxt, 0);
10255
433k
#endif /* LIBXML_SAX1_ENABLED */
10256
10257
    /*
10258
     * Capture end position and add node
10259
     */
10260
433k
    if ( ret != NULL && ctxt->record_info ) {
10261
0
       node_info.end_pos = ctxt->input->consumed +
10262
0
                          (CUR_PTR - ctxt->input->base);
10263
0
       node_info.end_line = ctxt->input->line;
10264
0
       node_info.node = ret;
10265
0
       xmlParserAddNodeInfo(ctxt, &node_info);
10266
0
    }
10267
433k
}
10268
10269
/**
10270
 * xmlParseVersionNum:
10271
 * @ctxt:  an XML parser context
10272
 *
10273
 * DEPRECATED: Internal function, don't use.
10274
 *
10275
 * parse the XML version value.
10276
 *
10277
 * [26] VersionNum ::= '1.' [0-9]+
10278
 *
10279
 * In practice allow [0-9].[0-9]+ at that level
10280
 *
10281
 * Returns the string giving the XML version number, or NULL
10282
 */
10283
xmlChar *
10284
44.0k
xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10285
44.0k
    xmlChar *buf = NULL;
10286
44.0k
    int len = 0;
10287
44.0k
    int size = 10;
10288
44.0k
    xmlChar cur;
10289
10290
44.0k
    buf = (xmlChar *) xmlMallocAtomic(size);
10291
44.0k
    if (buf == NULL) {
10292
0
  xmlErrMemory(ctxt, NULL);
10293
0
  return(NULL);
10294
0
    }
10295
44.0k
    cur = CUR;
10296
44.0k
    if (!((cur >= '0') && (cur <= '9'))) {
10297
267
  xmlFree(buf);
10298
267
  return(NULL);
10299
267
    }
10300
43.8k
    buf[len++] = cur;
10301
43.8k
    NEXT;
10302
43.8k
    cur=CUR;
10303
43.8k
    if (cur != '.') {
10304
557
  xmlFree(buf);
10305
557
  return(NULL);
10306
557
    }
10307
43.2k
    buf[len++] = cur;
10308
43.2k
    NEXT;
10309
43.2k
    cur=CUR;
10310
142k
    while ((cur >= '0') && (cur <= '9')) {
10311
98.7k
  if (len + 1 >= size) {
10312
255
      xmlChar *tmp;
10313
10314
255
      size *= 2;
10315
255
      tmp = (xmlChar *) xmlRealloc(buf, size);
10316
255
      if (tmp == NULL) {
10317
0
          xmlFree(buf);
10318
0
    xmlErrMemory(ctxt, NULL);
10319
0
    return(NULL);
10320
0
      }
10321
255
      buf = tmp;
10322
255
  }
10323
98.7k
  buf[len++] = cur;
10324
98.7k
  NEXT;
10325
98.7k
  cur=CUR;
10326
98.7k
    }
10327
43.2k
    buf[len] = 0;
10328
43.2k
    return(buf);
10329
43.2k
}
10330
10331
/**
10332
 * xmlParseVersionInfo:
10333
 * @ctxt:  an XML parser context
10334
 *
10335
 * DEPRECATED: Internal function, don't use.
10336
 *
10337
 * parse the XML version.
10338
 *
10339
 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10340
 *
10341
 * [25] Eq ::= S? '=' S?
10342
 *
10343
 * Returns the version string, e.g. "1.0"
10344
 */
10345
10346
xmlChar *
10347
47.4k
xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10348
47.4k
    xmlChar *version = NULL;
10349
10350
47.4k
    if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10351
44.9k
  SKIP(7);
10352
44.9k
  SKIP_BLANKS;
10353
44.9k
  if (RAW != '=') {
10354
457
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10355
457
      return(NULL);
10356
457
        }
10357
44.5k
  NEXT;
10358
44.5k
  SKIP_BLANKS;
10359
44.5k
  if (RAW == '"') {
10360
40.6k
      NEXT;
10361
40.6k
      version = xmlParseVersionNum(ctxt);
10362
40.6k
      if (RAW != '"') {
10363
1.31k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10364
1.31k
      } else
10365
39.2k
          NEXT;
10366
40.6k
  } else if (RAW == '\''){
10367
3.49k
      NEXT;
10368
3.49k
      version = xmlParseVersionNum(ctxt);
10369
3.49k
      if (RAW != '\'') {
10370
120
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10371
120
      } else
10372
3.37k
          NEXT;
10373
3.49k
  } else {
10374
422
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10375
422
  }
10376
44.5k
    }
10377
47.0k
    return(version);
10378
47.4k
}
10379
10380
/**
10381
 * xmlParseEncName:
10382
 * @ctxt:  an XML parser context
10383
 *
10384
 * DEPRECATED: Internal function, don't use.
10385
 *
10386
 * parse the XML encoding name
10387
 *
10388
 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10389
 *
10390
 * Returns the encoding name value or NULL
10391
 */
10392
xmlChar *
10393
17.6k
xmlParseEncName(xmlParserCtxtPtr ctxt) {
10394
17.6k
    xmlChar *buf = NULL;
10395
17.6k
    int len = 0;
10396
17.6k
    int size = 10;
10397
17.6k
    xmlChar cur;
10398
10399
17.6k
    cur = CUR;
10400
17.6k
    if (((cur >= 'a') && (cur <= 'z')) ||
10401
17.6k
        ((cur >= 'A') && (cur <= 'Z'))) {
10402
17.5k
  buf = (xmlChar *) xmlMallocAtomic(size);
10403
17.5k
  if (buf == NULL) {
10404
0
      xmlErrMemory(ctxt, NULL);
10405
0
      return(NULL);
10406
0
  }
10407
10408
17.5k
  buf[len++] = cur;
10409
17.5k
  NEXT;
10410
17.5k
  cur = CUR;
10411
208k
  while (((cur >= 'a') && (cur <= 'z')) ||
10412
208k
         ((cur >= 'A') && (cur <= 'Z')) ||
10413
208k
         ((cur >= '0') && (cur <= '9')) ||
10414
208k
         (cur == '.') || (cur == '_') ||
10415
208k
         (cur == '-')) {
10416
190k
      if (len + 1 >= size) {
10417
7.35k
          xmlChar *tmp;
10418
10419
7.35k
    size *= 2;
10420
7.35k
    tmp = (xmlChar *) xmlRealloc(buf, size);
10421
7.35k
    if (tmp == NULL) {
10422
0
        xmlErrMemory(ctxt, NULL);
10423
0
        xmlFree(buf);
10424
0
        return(NULL);
10425
0
    }
10426
7.35k
    buf = tmp;
10427
7.35k
      }
10428
190k
      buf[len++] = cur;
10429
190k
      NEXT;
10430
190k
      cur = CUR;
10431
190k
      if (cur == 0) {
10432
106
          SHRINK;
10433
106
    GROW;
10434
106
    cur = CUR;
10435
106
      }
10436
190k
        }
10437
17.5k
  buf[len] = 0;
10438
17.5k
    } else {
10439
108
  xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10440
108
    }
10441
17.6k
    return(buf);
10442
17.6k
}
10443
10444
/**
10445
 * xmlParseEncodingDecl:
10446
 * @ctxt:  an XML parser context
10447
 *
10448
 * DEPRECATED: Internal function, don't use.
10449
 *
10450
 * parse the XML encoding declaration
10451
 *
10452
 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'")
10453
 *
10454
 * this setups the conversion filters.
10455
 *
10456
 * Returns the encoding value or NULL
10457
 */
10458
10459
const xmlChar *
10460
28.5k
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10461
28.5k
    xmlChar *encoding = NULL;
10462
10463
28.5k
    SKIP_BLANKS;
10464
28.5k
    if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10465
17.8k
  SKIP(8);
10466
17.8k
  SKIP_BLANKS;
10467
17.8k
  if (RAW != '=') {
10468
81
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10469
81
      return(NULL);
10470
81
        }
10471
17.7k
  NEXT;
10472
17.7k
  SKIP_BLANKS;
10473
17.7k
  if (RAW == '"') {
10474
15.8k
      NEXT;
10475
15.8k
      encoding = xmlParseEncName(ctxt);
10476
15.8k
      if (RAW != '"') {
10477
612
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10478
612
    xmlFree((xmlChar *) encoding);
10479
612
    return(NULL);
10480
612
      } else
10481
15.2k
          NEXT;
10482
15.8k
  } else if (RAW == '\''){
10483
1.80k
      NEXT;
10484
1.80k
      encoding = xmlParseEncName(ctxt);
10485
1.80k
      if (RAW != '\'') {
10486
30
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10487
30
    xmlFree((xmlChar *) encoding);
10488
30
    return(NULL);
10489
30
      } else
10490
1.77k
          NEXT;
10491
1.80k
  } else {
10492
73
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10493
73
  }
10494
10495
        /*
10496
         * Non standard parsing, allowing the user to ignore encoding
10497
         */
10498
17.1k
        if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10499
7.79k
      xmlFree((xmlChar *) encoding);
10500
7.79k
            return(NULL);
10501
7.79k
  }
10502
10503
  /*
10504
   * UTF-16 encoding switch has already taken place at this stage,
10505
   * more over the little-endian/big-endian selection is already done
10506
   */
10507
9.30k
        if ((encoding != NULL) &&
10508
9.30k
      ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10509
9.24k
       (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10510
      /*
10511
       * If no encoding was passed to the parser, that we are
10512
       * using UTF-16 and no decoder is present i.e. the
10513
       * document is apparently UTF-8 compatible, then raise an
10514
       * encoding mismatch fatal error
10515
       */
10516
381
      if ((ctxt->encoding == NULL) &&
10517
381
          (ctxt->input->buf != NULL) &&
10518
381
          (ctxt->input->buf->encoder == NULL)) {
10519
381
    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10520
381
      "Document labelled UTF-16 but has UTF-8 content\n");
10521
381
      }
10522
381
      if (ctxt->encoding != NULL)
10523
0
    xmlFree((xmlChar *) ctxt->encoding);
10524
381
      ctxt->encoding = encoding;
10525
381
  }
10526
  /*
10527
   * UTF-8 encoding is handled natively
10528
   */
10529
8.92k
        else if ((encoding != NULL) &&
10530
8.92k
      ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10531
8.86k
       (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10532
3.96k
      if (ctxt->encoding != NULL)
10533
0
    xmlFree((xmlChar *) ctxt->encoding);
10534
3.96k
      ctxt->encoding = encoding;
10535
3.96k
  }
10536
4.95k
  else if (encoding != NULL) {
10537
4.89k
      xmlCharEncodingHandlerPtr handler;
10538
10539
4.89k
      if (ctxt->input->encoding != NULL)
10540
0
    xmlFree((xmlChar *) ctxt->input->encoding);
10541
4.89k
      ctxt->input->encoding = encoding;
10542
10543
4.89k
            handler = xmlFindCharEncodingHandler((const char *) encoding);
10544
4.89k
      if (handler != NULL) {
10545
4.76k
    if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10546
        /* failed to convert */
10547
45
        ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10548
45
        return(NULL);
10549
45
    }
10550
4.76k
      } else {
10551
133
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10552
133
      "Unsupported encoding %s\n", encoding);
10553
133
    return(NULL);
10554
133
      }
10555
4.89k
  }
10556
9.30k
    }
10557
19.8k
    return(encoding);
10558
28.5k
}
10559
10560
/**
10561
 * xmlParseSDDecl:
10562
 * @ctxt:  an XML parser context
10563
 *
10564
 * DEPRECATED: Internal function, don't use.
10565
 *
10566
 * parse the XML standalone declaration
10567
 *
10568
 * [32] SDDecl ::= S 'standalone' Eq
10569
 *                 (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10570
 *
10571
 * [ VC: Standalone Document Declaration ]
10572
 * TODO The standalone document declaration must have the value "no"
10573
 * if any external markup declarations contain declarations of:
10574
 *  - attributes with default values, if elements to which these
10575
 *    attributes apply appear in the document without specifications
10576
 *    of values for these attributes, or
10577
 *  - entities (other than amp, lt, gt, apos, quot), if references
10578
 *    to those entities appear in the document, or
10579
 *  - attributes with values subject to normalization, where the
10580
 *    attribute appears in the document with a value which will change
10581
 *    as a result of normalization, or
10582
 *  - element types with element content, if white space occurs directly
10583
 *    within any instance of those types.
10584
 *
10585
 * Returns:
10586
 *   1 if standalone="yes"
10587
 *   0 if standalone="no"
10588
 *  -2 if standalone attribute is missing or invalid
10589
 *    (A standalone value of -2 means that the XML declaration was found,
10590
 *     but no value was specified for the standalone attribute).
10591
 */
10592
10593
int
10594
24.3k
xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10595
24.3k
    int standalone = -2;
10596
10597
24.3k
    SKIP_BLANKS;
10598
24.3k
    if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10599
5.61k
  SKIP(10);
10600
5.61k
        SKIP_BLANKS;
10601
5.61k
  if (RAW != '=') {
10602
36
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10603
36
      return(standalone);
10604
36
        }
10605
5.57k
  NEXT;
10606
5.57k
  SKIP_BLANKS;
10607
5.57k
        if (RAW == '\''){
10608
2.32k
      NEXT;
10609
2.32k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10610
1.55k
          standalone = 0;
10611
1.55k
                SKIP(2);
10612
1.55k
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10613
771
                 (NXT(2) == 's')) {
10614
600
          standalone = 1;
10615
600
    SKIP(3);
10616
600
            } else {
10617
171
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10618
171
      }
10619
2.32k
      if (RAW != '\'') {
10620
210
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10621
210
      } else
10622
2.11k
          NEXT;
10623
3.24k
  } else if (RAW == '"'){
10624
3.22k
      NEXT;
10625
3.22k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10626
1.72k
          standalone = 0;
10627
1.72k
    SKIP(2);
10628
1.72k
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10629
1.50k
                 (NXT(2) == 's')) {
10630
1.43k
          standalone = 1;
10631
1.43k
                SKIP(3);
10632
1.43k
            } else {
10633
66
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10634
66
      }
10635
3.22k
      if (RAW != '"') {
10636
111
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10637
111
      } else
10638
3.11k
          NEXT;
10639
3.22k
  } else {
10640
24
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10641
24
        }
10642
5.57k
    }
10643
24.2k
    return(standalone);
10644
24.3k
}
10645
10646
/**
10647
 * xmlParseXMLDecl:
10648
 * @ctxt:  an XML parser context
10649
 *
10650
 * DEPRECATED: Internal function, don't use.
10651
 *
10652
 * parse an XML declaration header
10653
 *
10654
 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10655
 */
10656
10657
void
10658
46.5k
xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10659
46.5k
    xmlChar *version;
10660
10661
    /*
10662
     * This value for standalone indicates that the document has an
10663
     * XML declaration but it does not have a standalone attribute.
10664
     * It will be overwritten later if a standalone attribute is found.
10665
     */
10666
46.5k
    ctxt->input->standalone = -2;
10667
10668
    /*
10669
     * We know that '<?xml' is here.
10670
     */
10671
46.5k
    SKIP(5);
10672
10673
46.5k
    if (!IS_BLANK_CH(RAW)) {
10674
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10675
0
                 "Blank needed after '<?xml'\n");
10676
0
    }
10677
46.5k
    SKIP_BLANKS;
10678
10679
    /*
10680
     * We must have the VersionInfo here.
10681
     */
10682
46.5k
    version = xmlParseVersionInfo(ctxt);
10683
46.5k
    if (version == NULL) {
10684
3.96k
  xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10685
42.5k
    } else {
10686
42.5k
  if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10687
      /*
10688
       * Changed here for XML-1.0 5th edition
10689
       */
10690
471
      if (ctxt->options & XML_PARSE_OLD10) {
10691
180
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10692
180
                "Unsupported version '%s'\n",
10693
180
                version);
10694
291
      } else {
10695
291
          if ((version[0] == '1') && ((version[1] == '.'))) {
10696
240
        xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10697
240
                      "Unsupported version '%s'\n",
10698
240
          version, NULL);
10699
240
    } else {
10700
51
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10701
51
              "Unsupported version '%s'\n",
10702
51
              version);
10703
51
    }
10704
291
      }
10705
471
  }
10706
42.5k
  if (ctxt->version != NULL)
10707
0
      xmlFree((void *) ctxt->version);
10708
42.5k
  ctxt->version = version;
10709
42.5k
    }
10710
10711
    /*
10712
     * We may have the encoding declaration
10713
     */
10714
46.5k
    if (!IS_BLANK_CH(RAW)) {
10715
23.6k
        if ((RAW == '?') && (NXT(1) == '>')) {
10716
18.8k
      SKIP(2);
10717
18.8k
      return;
10718
18.8k
  }
10719
4.78k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10720
4.78k
    }
10721
27.6k
    xmlParseEncodingDecl(ctxt);
10722
27.6k
    if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10723
27.6k
         (ctxt->instate == XML_PARSER_EOF)) {
10724
  /*
10725
   * The XML REC instructs us to stop parsing right here
10726
   */
10727
151
        return;
10728
151
    }
10729
10730
    /*
10731
     * We may have the standalone status.
10732
     */
10733
27.4k
    if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10734
3.44k
        if ((RAW == '?') && (NXT(1) == '>')) {
10735
3.17k
      SKIP(2);
10736
3.17k
      return;
10737
3.17k
  }
10738
270
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10739
270
    }
10740
10741
    /*
10742
     * We can grow the input buffer freely at that point
10743
     */
10744
24.3k
    GROW;
10745
10746
24.3k
    SKIP_BLANKS;
10747
24.3k
    ctxt->input->standalone = xmlParseSDDecl(ctxt);
10748
10749
24.3k
    SKIP_BLANKS;
10750
24.3k
    if ((RAW == '?') && (NXT(1) == '>')) {
10751
16.6k
        SKIP(2);
10752
16.6k
    } else if (RAW == '>') {
10753
        /* Deprecated old WD ... */
10754
141
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10755
141
  NEXT;
10756
7.49k
    } else {
10757
7.49k
        int c;
10758
10759
7.49k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10760
574k
        while ((c = CUR) != 0) {
10761
573k
            NEXT;
10762
573k
            if (c == '>')
10763
6.43k
                break;
10764
573k
        }
10765
7.49k
    }
10766
24.3k
}
10767
10768
/**
10769
 * xmlParseMisc:
10770
 * @ctxt:  an XML parser context
10771
 *
10772
 * DEPRECATED: Internal function, don't use.
10773
 *
10774
 * parse an XML Misc* optional field.
10775
 *
10776
 * [27] Misc ::= Comment | PI |  S
10777
 */
10778
10779
void
10780
64.0k
xmlParseMisc(xmlParserCtxtPtr ctxt) {
10781
71.6k
    while (ctxt->instate != XML_PARSER_EOF) {
10782
71.6k
        SKIP_BLANKS;
10783
71.6k
        GROW;
10784
71.6k
        if ((RAW == '<') && (NXT(1) == '?')) {
10785
4.02k
      xmlParsePI(ctxt);
10786
67.6k
        } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10787
3.52k
      xmlParseComment(ctxt);
10788
64.0k
        } else {
10789
64.0k
            break;
10790
64.0k
        }
10791
71.6k
    }
10792
64.0k
}
10793
10794
/**
10795
 * xmlParseDocument:
10796
 * @ctxt:  an XML parser context
10797
 *
10798
 * parse an XML document (and build a tree if using the standard SAX
10799
 * interface).
10800
 *
10801
 * [1] document ::= prolog element Misc*
10802
 *
10803
 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10804
 *
10805
 * Returns 0, -1 in case of error. the parser context is augmented
10806
 *                as a result of the parsing.
10807
 */
10808
10809
int
10810
30.6k
xmlParseDocument(xmlParserCtxtPtr ctxt) {
10811
30.6k
    xmlChar start[4];
10812
30.6k
    xmlCharEncoding enc;
10813
10814
30.6k
    xmlInitParser();
10815
10816
30.6k
    if ((ctxt == NULL) || (ctxt->input == NULL))
10817
0
        return(-1);
10818
10819
30.6k
    GROW;
10820
10821
    /*
10822
     * SAX: detecting the level.
10823
     */
10824
30.6k
    xmlDetectSAX2(ctxt);
10825
10826
    /*
10827
     * SAX: beginning of the document processing.
10828
     */
10829
30.6k
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10830
30.6k
        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10831
30.6k
    if (ctxt->instate == XML_PARSER_EOF)
10832
0
  return(-1);
10833
10834
30.6k
    if ((ctxt->encoding == NULL) &&
10835
30.6k
        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10836
  /*
10837
   * Get the 4 first bytes and decode the charset
10838
   * if enc != XML_CHAR_ENCODING_NONE
10839
   * plug some encoding conversion routines.
10840
   */
10841
30.4k
  start[0] = RAW;
10842
30.4k
  start[1] = NXT(1);
10843
30.4k
  start[2] = NXT(2);
10844
30.4k
  start[3] = NXT(3);
10845
30.4k
  enc = xmlDetectCharEncoding(&start[0], 4);
10846
30.4k
  if (enc != XML_CHAR_ENCODING_NONE) {
10847
16.6k
      xmlSwitchEncoding(ctxt, enc);
10848
16.6k
  }
10849
30.4k
    }
10850
10851
10852
30.6k
    if (CUR == 0) {
10853
74
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10854
74
  return(-1);
10855
74
    }
10856
10857
    /*
10858
     * Check for the XMLDecl in the Prolog.
10859
     * do not GROW here to avoid the detected encoder to decode more
10860
     * than just the first line, unless the amount of data is really
10861
     * too small to hold "<?xml version="1.0" encoding="foo"
10862
     */
10863
30.5k
    if ((ctxt->input->end - ctxt->input->cur) < 35) {
10864
1.83k
       GROW;
10865
1.83k
    }
10866
30.5k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10867
10868
  /*
10869
   * Note that we will switch encoding on the fly.
10870
   */
10871
15.5k
  xmlParseXMLDecl(ctxt);
10872
15.5k
  if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10873
15.5k
      (ctxt->instate == XML_PARSER_EOF)) {
10874
      /*
10875
       * The XML REC instructs us to stop parsing right here
10876
       */
10877
51
      return(-1);
10878
51
  }
10879
15.4k
  ctxt->standalone = ctxt->input->standalone;
10880
15.4k
  SKIP_BLANKS;
10881
15.4k
    } else {
10882
15.0k
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10883
15.0k
    }
10884
30.4k
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10885
29.4k
        ctxt->sax->startDocument(ctxt->userData);
10886
30.4k
    if (ctxt->instate == XML_PARSER_EOF)
10887
0
  return(-1);
10888
30.4k
    if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10889
30.4k
        (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10890
0
  ctxt->myDoc->compression = ctxt->input->buf->compressed;
10891
0
    }
10892
10893
    /*
10894
     * The Misc part of the Prolog
10895
     */
10896
30.4k
    xmlParseMisc(ctxt);
10897
10898
    /*
10899
     * Then possibly doc type declaration(s) and more Misc
10900
     * (doctypedecl Misc*)?
10901
     */
10902
30.4k
    GROW;
10903
30.4k
    if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10904
10905
16.3k
  ctxt->inSubset = 1;
10906
16.3k
  xmlParseDocTypeDecl(ctxt);
10907
16.3k
  if (RAW == '[') {
10908
12.4k
      ctxt->instate = XML_PARSER_DTD;
10909
12.4k
      xmlParseInternalSubset(ctxt);
10910
12.4k
      if (ctxt->instate == XML_PARSER_EOF)
10911
3.82k
    return(-1);
10912
12.4k
  }
10913
10914
  /*
10915
   * Create and update the external subset.
10916
   */
10917
12.4k
  ctxt->inSubset = 2;
10918
12.4k
  if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10919
12.4k
      (!ctxt->disableSAX))
10920
11.8k
      ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10921
11.8k
                                ctxt->extSubSystem, ctxt->extSubURI);
10922
12.4k
  if (ctxt->instate == XML_PARSER_EOF)
10923
786
      return(-1);
10924
11.7k
  ctxt->inSubset = 0;
10925
10926
11.7k
        xmlCleanSpecialAttr(ctxt);
10927
10928
11.7k
  ctxt->instate = XML_PARSER_PROLOG;
10929
11.7k
  xmlParseMisc(ctxt);
10930
11.7k
    }
10931
10932
    /*
10933
     * Time to start parsing the tree itself
10934
     */
10935
25.8k
    GROW;
10936
25.8k
    if (RAW != '<') {
10937
3.97k
  xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10938
3.97k
           "Start tag expected, '<' not found\n");
10939
21.9k
    } else {
10940
21.9k
  ctxt->instate = XML_PARSER_CONTENT;
10941
21.9k
  xmlParseElement(ctxt);
10942
21.9k
  ctxt->instate = XML_PARSER_EPILOG;
10943
10944
10945
  /*
10946
   * The Misc part at the end
10947
   */
10948
21.9k
  xmlParseMisc(ctxt);
10949
10950
21.9k
  if (RAW != 0) {
10951
6.48k
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10952
6.48k
  }
10953
21.9k
  ctxt->instate = XML_PARSER_EOF;
10954
21.9k
    }
10955
10956
    /*
10957
     * SAX: end of the document processing.
10958
     */
10959
25.8k
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10960
25.8k
        ctxt->sax->endDocument(ctxt->userData);
10961
10962
    /*
10963
     * Remove locally kept entity definitions if the tree was not built
10964
     */
10965
25.8k
    if ((ctxt->myDoc != NULL) &&
10966
25.8k
  (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10967
77
  xmlFreeDoc(ctxt->myDoc);
10968
77
  ctxt->myDoc = NULL;
10969
77
    }
10970
10971
25.8k
    if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10972
3.91k
        ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10973
3.91k
  if (ctxt->valid)
10974
3.38k
      ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10975
3.91k
  if (ctxt->nsWellFormed)
10976
3.66k
      ctxt->myDoc->properties |= XML_DOC_NSVALID;
10977
3.91k
  if (ctxt->options & XML_PARSE_OLD10)
10978
352
      ctxt->myDoc->properties |= XML_DOC_OLD10;
10979
3.91k
    }
10980
25.8k
    if (! ctxt->wellFormed) {
10981
21.9k
  ctxt->valid = 0;
10982
21.9k
  return(-1);
10983
21.9k
    }
10984
3.91k
    return(0);
10985
25.8k
}
10986
10987
/**
10988
 * xmlParseExtParsedEnt:
10989
 * @ctxt:  an XML parser context
10990
 *
10991
 * parse a general parsed entity
10992
 * An external general parsed entity is well-formed if it matches the
10993
 * production labeled extParsedEnt.
10994
 *
10995
 * [78] extParsedEnt ::= TextDecl? content
10996
 *
10997
 * Returns 0, -1 in case of error. the parser context is augmented
10998
 *                as a result of the parsing.
10999
 */
11000
11001
int
11002
0
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
11003
0
    xmlChar start[4];
11004
0
    xmlCharEncoding enc;
11005
11006
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
11007
0
        return(-1);
11008
11009
0
    xmlDetectSAX2(ctxt);
11010
11011
0
    GROW;
11012
11013
    /*
11014
     * SAX: beginning of the document processing.
11015
     */
11016
0
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11017
0
        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
11018
11019
    /*
11020
     * Get the 4 first bytes and decode the charset
11021
     * if enc != XML_CHAR_ENCODING_NONE
11022
     * plug some encoding conversion routines.
11023
     */
11024
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11025
0
  start[0] = RAW;
11026
0
  start[1] = NXT(1);
11027
0
  start[2] = NXT(2);
11028
0
  start[3] = NXT(3);
11029
0
  enc = xmlDetectCharEncoding(start, 4);
11030
0
  if (enc != XML_CHAR_ENCODING_NONE) {
11031
0
      xmlSwitchEncoding(ctxt, enc);
11032
0
  }
11033
0
    }
11034
11035
11036
0
    if (CUR == 0) {
11037
0
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11038
0
    }
11039
11040
    /*
11041
     * Check for the XMLDecl in the Prolog.
11042
     */
11043
0
    GROW;
11044
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
11045
11046
  /*
11047
   * Note that we will switch encoding on the fly.
11048
   */
11049
0
  xmlParseXMLDecl(ctxt);
11050
0
  if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11051
      /*
11052
       * The XML REC instructs us to stop parsing right here
11053
       */
11054
0
      return(-1);
11055
0
  }
11056
0
  SKIP_BLANKS;
11057
0
    } else {
11058
0
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11059
0
    }
11060
0
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
11061
0
        ctxt->sax->startDocument(ctxt->userData);
11062
0
    if (ctxt->instate == XML_PARSER_EOF)
11063
0
  return(-1);
11064
11065
    /*
11066
     * Doing validity checking on chunk doesn't make sense
11067
     */
11068
0
    ctxt->instate = XML_PARSER_CONTENT;
11069
0
    ctxt->validate = 0;
11070
0
    ctxt->loadsubset = 0;
11071
0
    ctxt->depth = 0;
11072
11073
0
    xmlParseContent(ctxt);
11074
0
    if (ctxt->instate == XML_PARSER_EOF)
11075
0
  return(-1);
11076
11077
0
    if ((RAW == '<') && (NXT(1) == '/')) {
11078
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11079
0
    } else if (RAW != 0) {
11080
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11081
0
    }
11082
11083
    /*
11084
     * SAX: end of the document processing.
11085
     */
11086
0
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11087
0
        ctxt->sax->endDocument(ctxt->userData);
11088
11089
0
    if (! ctxt->wellFormed) return(-1);
11090
0
    return(0);
11091
0
}
11092
11093
#ifdef LIBXML_PUSH_ENABLED
11094
/************************************************************************
11095
 *                  *
11096
 *    Progressive parsing interfaces        *
11097
 *                  *
11098
 ************************************************************************/
11099
11100
/**
11101
 * xmlParseLookupChar:
11102
 * @ctxt:  an XML parser context
11103
 * @c:  character
11104
 *
11105
 * Check whether the input buffer contains a character.
11106
 */
11107
static int
11108
938k
xmlParseLookupChar(xmlParserCtxtPtr ctxt, int c) {
11109
938k
    const xmlChar *cur;
11110
11111
938k
    if (ctxt->checkIndex == 0) {
11112
909k
        cur = ctxt->input->cur + 1;
11113
909k
    } else {
11114
29.7k
        cur = ctxt->input->cur + ctxt->checkIndex;
11115
29.7k
    }
11116
11117
938k
    if (memchr(cur, c, ctxt->input->end - cur) == NULL) {
11118
31.7k
        ctxt->checkIndex = ctxt->input->end - ctxt->input->cur;
11119
31.7k
        return(0);
11120
907k
    } else {
11121
907k
        ctxt->checkIndex = 0;
11122
907k
        return(1);
11123
907k
    }
11124
938k
}
11125
11126
/**
11127
 * xmlParseLookupString:
11128
 * @ctxt:  an XML parser context
11129
 * @startDelta: delta to apply at the start
11130
 * @str:  string
11131
 * @strLen:  length of string
11132
 *
11133
 * Check whether the input buffer contains a string.
11134
 */
11135
static const xmlChar *
11136
xmlParseLookupString(xmlParserCtxtPtr ctxt, size_t startDelta,
11137
215k
                     const char *str, size_t strLen) {
11138
215k
    const xmlChar *cur, *term;
11139
11140
215k
    if (ctxt->checkIndex == 0) {
11141
120k
        cur = ctxt->input->cur + startDelta;
11142
120k
    } else {
11143
95.2k
        cur = ctxt->input->cur + ctxt->checkIndex;
11144
95.2k
    }
11145
11146
215k
    term = BAD_CAST strstr((const char *) cur, str);
11147
215k
    if (term == NULL) {
11148
123k
        const xmlChar *end = ctxt->input->end;
11149
11150
        /* Rescan (strLen - 1) characters. */
11151
123k
        if ((size_t) (end - cur) < strLen)
11152
2.19k
            end = cur;
11153
120k
        else
11154
120k
            end -= strLen - 1;
11155
123k
        ctxt->checkIndex = end - ctxt->input->cur;
11156
123k
    } else {
11157
92.4k
        ctxt->checkIndex = 0;
11158
92.4k
    }
11159
11160
215k
    return(term);
11161
215k
}
11162
11163
/**
11164
 * xmlParseLookupCharData:
11165
 * @ctxt:  an XML parser context
11166
 *
11167
 * Check whether the input buffer contains terminated char data.
11168
 */
11169
static int
11170
1.37M
xmlParseLookupCharData(xmlParserCtxtPtr ctxt) {
11171
1.37M
    const xmlChar *cur = ctxt->input->cur + ctxt->checkIndex;
11172
1.37M
    const xmlChar *end = ctxt->input->end;
11173
11174
24.1M
    while (cur < end) {
11175
24.0M
        if ((*cur == '<') || (*cur == '&')) {
11176
1.22M
            ctxt->checkIndex = 0;
11177
1.22M
            return(1);
11178
1.22M
        }
11179
22.7M
        cur++;
11180
22.7M
    }
11181
11182
150k
    ctxt->checkIndex = cur - ctxt->input->cur;
11183
150k
    return(0);
11184
1.37M
}
11185
11186
/**
11187
 * xmlParseLookupGt:
11188
 * @ctxt:  an XML parser context
11189
 *
11190
 * Check whether there's enough data in the input buffer to finish parsing
11191
 * a start tag. This has to take quotes into account.
11192
 */
11193
static int
11194
1.35M
xmlParseLookupGt(xmlParserCtxtPtr ctxt) {
11195
1.35M
    const xmlChar *cur;
11196
1.35M
    const xmlChar *end = ctxt->input->end;
11197
1.35M
    int state = ctxt->endCheckState;
11198
11199
1.35M
    if (ctxt->checkIndex == 0)
11200
1.10M
        cur = ctxt->input->cur + 1;
11201
254k
    else
11202
254k
        cur = ctxt->input->cur + ctxt->checkIndex;
11203
11204
54.0M
    while (cur < end) {
11205
53.8M
        if (state) {
11206
30.3M
            if (*cur == state)
11207
1.40M
                state = 0;
11208
30.3M
        } else if (*cur == '\'' || *cur == '"') {
11209
1.41M
            state = *cur;
11210
22.0M
        } else if (*cur == '>') {
11211
1.08M
            ctxt->checkIndex = 0;
11212
1.08M
            ctxt->endCheckState = 0;
11213
1.08M
            return(1);
11214
1.08M
        }
11215
52.7M
        cur++;
11216
52.7M
    }
11217
11218
267k
    ctxt->checkIndex = cur - ctxt->input->cur;
11219
267k
    ctxt->endCheckState = state;
11220
267k
    return(0);
11221
1.35M
}
11222
11223
/**
11224
 * xmlParseLookupInternalSubset:
11225
 * @ctxt:  an XML parser context
11226
 *
11227
 * Check whether there's enough data in the input buffer to finish parsing
11228
 * the internal subset.
11229
 */
11230
static int
11231
84.8k
xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt) {
11232
    /*
11233
     * Sorry, but progressive parsing of the internal subset is not
11234
     * supported. We first check that the full content of the internal
11235
     * subset is available and parsing is launched only at that point.
11236
     * Internal subset ends with "']' S? '>'" in an unescaped section and
11237
     * not in a ']]>' sequence which are conditional sections.
11238
     */
11239
84.8k
    const xmlChar *cur, *start;
11240
84.8k
    const xmlChar *end = ctxt->input->end;
11241
84.8k
    int state = ctxt->endCheckState;
11242
11243
84.8k
    if (ctxt->checkIndex == 0) {
11244
23.6k
        cur = ctxt->input->cur + 1;
11245
61.1k
    } else {
11246
61.1k
        cur = ctxt->input->cur + ctxt->checkIndex;
11247
61.1k
    }
11248
84.8k
    start = cur;
11249
11250
14.3M
    while (cur < end) {
11251
14.2M
        if (state == '-') {
11252
1.87M
            if ((*cur == '-') &&
11253
1.87M
                (cur[1] == '-') &&
11254
1.87M
                (cur[2] == '>')) {
11255
15.1k
                state = 0;
11256
15.1k
                cur += 3;
11257
15.1k
                start = cur;
11258
15.1k
                continue;
11259
15.1k
            }
11260
1.87M
        }
11261
12.4M
        else if (state == ']') {
11262
24.8k
            if (*cur == '>') {
11263
18.9k
                ctxt->checkIndex = 0;
11264
18.9k
                ctxt->endCheckState = 0;
11265
18.9k
                return(1);
11266
18.9k
            }
11267
5.87k
            if (IS_BLANK_CH(*cur)) {
11268
4.35k
                state = ' ';
11269
4.35k
            } else if (*cur != ']') {
11270
864
                state = 0;
11271
864
                start = cur;
11272
864
                continue;
11273
864
            }
11274
5.87k
        }
11275
12.3M
        else if (state == ' ') {
11276
19.4k
            if (*cur == '>') {
11277
306
                ctxt->checkIndex = 0;
11278
306
                ctxt->endCheckState = 0;
11279
306
                return(1);
11280
306
            }
11281
19.1k
            if (!IS_BLANK_CH(*cur)) {
11282
4.04k
                state = 0;
11283
4.04k
                start = cur;
11284
4.04k
                continue;
11285
4.04k
            }
11286
19.1k
        }
11287
12.3M
        else if (state != 0) {
11288
5.74M
            if (*cur == state) {
11289
126k
                state = 0;
11290
126k
                start = cur + 1;
11291
126k
            }
11292
5.74M
        }
11293
6.62M
        else if (*cur == '<') {
11294
190k
            if ((cur[1] == '!') &&
11295
190k
                (cur[2] == '-') &&
11296
190k
                (cur[3] == '-')) {
11297
15.4k
                state = '-';
11298
15.4k
                cur += 4;
11299
                /* Don't treat <!--> as comment */
11300
15.4k
                start = cur;
11301
15.4k
                continue;
11302
15.4k
            }
11303
190k
        }
11304
6.43M
        else if ((*cur == '"') || (*cur == '\'') || (*cur == ']')) {
11305
152k
            state = *cur;
11306
152k
        }
11307
11308
14.2M
        cur++;
11309
14.2M
    }
11310
11311
    /*
11312
     * Rescan the three last characters to detect "<!--" and "-->"
11313
     * split across chunks.
11314
     */
11315
65.5k
    if ((state == 0) || (state == '-')) {
11316
35.7k
        if (cur - start < 3)
11317
2.64k
            cur = start;
11318
33.0k
        else
11319
33.0k
            cur -= 3;
11320
35.7k
    }
11321
65.5k
    ctxt->checkIndex = cur - ctxt->input->cur;
11322
65.5k
    ctxt->endCheckState = state;
11323
65.5k
    return(0);
11324
84.8k
}
11325
11326
/**
11327
 * xmlCheckCdataPush:
11328
 * @cur: pointer to the block of characters
11329
 * @len: length of the block in bytes
11330
 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
11331
 *
11332
 * Check that the block of characters is okay as SCdata content [20]
11333
 *
11334
 * Returns the number of bytes to pass if okay, a negative index where an
11335
 *         UTF-8 error occurred otherwise
11336
 */
11337
static int
11338
46.0k
xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
11339
46.0k
    int ix;
11340
46.0k
    unsigned char c;
11341
46.0k
    int codepoint;
11342
11343
46.0k
    if ((utf == NULL) || (len <= 0))
11344
231
        return(0);
11345
11346
2.41M
    for (ix = 0; ix < len;) {      /* string is 0-terminated */
11347
2.40M
        c = utf[ix];
11348
2.40M
        if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11349
2.13M
      if (c >= 0x20)
11350
1.99M
    ix++;
11351
139k
      else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11352
136k
          ix++;
11353
2.37k
      else
11354
2.37k
          return(-ix);
11355
2.13M
  } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11356
71.2k
      if (ix + 2 > len) return(complete ? -ix : ix);
11357
71.2k
      if ((utf[ix+1] & 0xc0 ) != 0x80)
11358
13.3k
          return(-ix);
11359
57.9k
      codepoint = (utf[ix] & 0x1f) << 6;
11360
57.9k
      codepoint |= utf[ix+1] & 0x3f;
11361
57.9k
      if (!xmlIsCharQ(codepoint))
11362
1.83k
          return(-ix);
11363
56.0k
      ix += 2;
11364
200k
  } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11365
68.8k
      if (ix + 3 > len) return(complete ? -ix : ix);
11366
67.8k
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11367
67.8k
          ((utf[ix+2] & 0xc0) != 0x80))
11368
3.89k
        return(-ix);
11369
63.9k
      codepoint = (utf[ix] & 0xf) << 12;
11370
63.9k
      codepoint |= (utf[ix+1] & 0x3f) << 6;
11371
63.9k
      codepoint |= utf[ix+2] & 0x3f;
11372
63.9k
      if (!xmlIsCharQ(codepoint))
11373
236
          return(-ix);
11374
63.6k
      ix += 3;
11375
131k
  } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11376
128k
      if (ix + 4 > len) return(complete ? -ix : ix);
11377
126k
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11378
126k
          ((utf[ix+2] & 0xc0) != 0x80) ||
11379
126k
    ((utf[ix+3] & 0xc0) != 0x80))
11380
6.49k
        return(-ix);
11381
120k
      codepoint = (utf[ix] & 0x7) << 18;
11382
120k
      codepoint |= (utf[ix+1] & 0x3f) << 12;
11383
120k
      codepoint |= (utf[ix+2] & 0x3f) << 6;
11384
120k
      codepoint |= utf[ix+3] & 0x3f;
11385
120k
      if (!xmlIsCharQ(codepoint))
11386
1.64k
          return(-ix);
11387
118k
      ix += 4;
11388
118k
  } else       /* unknown encoding */
11389
3.28k
      return(-ix);
11390
2.40M
      }
11391
10.4k
      return(ix);
11392
45.7k
}
11393
11394
/**
11395
 * xmlParseTryOrFinish:
11396
 * @ctxt:  an XML parser context
11397
 * @terminate:  last chunk indicator
11398
 *
11399
 * Try to progress on parsing
11400
 *
11401
 * Returns zero if no parsing was possible
11402
 */
11403
static int
11404
763k
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11405
763k
    int ret = 0;
11406
763k
    int avail, tlen;
11407
763k
    xmlChar cur, next;
11408
11409
763k
    if (ctxt->input == NULL)
11410
0
        return(0);
11411
11412
#ifdef DEBUG_PUSH
11413
    switch (ctxt->instate) {
11414
  case XML_PARSER_EOF:
11415
      xmlGenericError(xmlGenericErrorContext,
11416
        "PP: try EOF\n"); break;
11417
  case XML_PARSER_START:
11418
      xmlGenericError(xmlGenericErrorContext,
11419
        "PP: try START\n"); break;
11420
  case XML_PARSER_MISC:
11421
      xmlGenericError(xmlGenericErrorContext,
11422
        "PP: try MISC\n");break;
11423
  case XML_PARSER_COMMENT:
11424
      xmlGenericError(xmlGenericErrorContext,
11425
        "PP: try COMMENT\n");break;
11426
  case XML_PARSER_PROLOG:
11427
      xmlGenericError(xmlGenericErrorContext,
11428
        "PP: try PROLOG\n");break;
11429
  case XML_PARSER_START_TAG:
11430
      xmlGenericError(xmlGenericErrorContext,
11431
        "PP: try START_TAG\n");break;
11432
  case XML_PARSER_CONTENT:
11433
      xmlGenericError(xmlGenericErrorContext,
11434
        "PP: try CONTENT\n");break;
11435
  case XML_PARSER_CDATA_SECTION:
11436
      xmlGenericError(xmlGenericErrorContext,
11437
        "PP: try CDATA_SECTION\n");break;
11438
  case XML_PARSER_END_TAG:
11439
      xmlGenericError(xmlGenericErrorContext,
11440
        "PP: try END_TAG\n");break;
11441
  case XML_PARSER_ENTITY_DECL:
11442
      xmlGenericError(xmlGenericErrorContext,
11443
        "PP: try ENTITY_DECL\n");break;
11444
  case XML_PARSER_ENTITY_VALUE:
11445
      xmlGenericError(xmlGenericErrorContext,
11446
        "PP: try ENTITY_VALUE\n");break;
11447
  case XML_PARSER_ATTRIBUTE_VALUE:
11448
      xmlGenericError(xmlGenericErrorContext,
11449
        "PP: try ATTRIBUTE_VALUE\n");break;
11450
  case XML_PARSER_DTD:
11451
      xmlGenericError(xmlGenericErrorContext,
11452
        "PP: try DTD\n");break;
11453
  case XML_PARSER_EPILOG:
11454
      xmlGenericError(xmlGenericErrorContext,
11455
        "PP: try EPILOG\n");break;
11456
  case XML_PARSER_PI:
11457
      xmlGenericError(xmlGenericErrorContext,
11458
        "PP: try PI\n");break;
11459
        case XML_PARSER_IGNORE:
11460
            xmlGenericError(xmlGenericErrorContext,
11461
        "PP: try IGNORE\n");break;
11462
    }
11463
#endif
11464
11465
763k
    if ((ctxt->input != NULL) &&
11466
763k
        (ctxt->input->cur - ctxt->input->base > 4096)) {
11467
13.3k
        xmlParserInputShrink(ctxt->input);
11468
13.3k
    }
11469
11470
6.67M
    while (ctxt->instate != XML_PARSER_EOF) {
11471
6.67M
  if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11472
15.3k
      return(0);
11473
11474
6.66M
  if (ctxt->input == NULL) break;
11475
6.66M
  if (ctxt->input->buf == NULL)
11476
0
      avail = ctxt->input->length -
11477
0
              (ctxt->input->cur - ctxt->input->base);
11478
6.66M
  else {
11479
      /*
11480
       * If we are operating on converted input, try to flush
11481
       * remaining chars to avoid them stalling in the non-converted
11482
       * buffer. But do not do this in document start where
11483
       * encoding="..." may not have been read and we work on a
11484
       * guessed encoding.
11485
       */
11486
6.66M
      if ((ctxt->instate != XML_PARSER_START) &&
11487
6.66M
          (ctxt->input->buf->raw != NULL) &&
11488
6.66M
    (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11489
13.3k
                size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11490
13.3k
                                                 ctxt->input);
11491
13.3k
    size_t current = ctxt->input->cur - ctxt->input->base;
11492
11493
13.3k
    xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11494
13.3k
                xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11495
13.3k
                                      base, current);
11496
13.3k
      }
11497
6.66M
      avail = xmlBufUse(ctxt->input->buf->buffer) -
11498
6.66M
        (ctxt->input->cur - ctxt->input->base);
11499
6.66M
  }
11500
6.66M
        if (avail < 1)
11501
47.2k
      goto done;
11502
6.61M
        switch (ctxt->instate) {
11503
0
            case XML_PARSER_EOF:
11504
          /*
11505
     * Document parsing is done !
11506
     */
11507
0
          goto done;
11508
148k
            case XML_PARSER_START:
11509
148k
    if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11510
45.0k
        xmlChar start[4];
11511
45.0k
        xmlCharEncoding enc;
11512
11513
        /*
11514
         * Very first chars read from the document flow.
11515
         */
11516
45.0k
        if (avail < 4)
11517
902
      goto done;
11518
11519
        /*
11520
         * Get the 4 first bytes and decode the charset
11521
         * if enc != XML_CHAR_ENCODING_NONE
11522
         * plug some encoding conversion routines,
11523
         * else xmlSwitchEncoding will set to (default)
11524
         * UTF8.
11525
         */
11526
44.1k
        start[0] = RAW;
11527
44.1k
        start[1] = NXT(1);
11528
44.1k
        start[2] = NXT(2);
11529
44.1k
        start[3] = NXT(3);
11530
44.1k
        enc = xmlDetectCharEncoding(start, 4);
11531
44.1k
        xmlSwitchEncoding(ctxt, enc);
11532
44.1k
        break;
11533
45.0k
    }
11534
11535
103k
    if (avail < 2)
11536
31
        goto done;
11537
103k
    cur = ctxt->input->cur[0];
11538
103k
    next = ctxt->input->cur[1];
11539
103k
    if (cur == 0) {
11540
98
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11541
98
      ctxt->sax->setDocumentLocator(ctxt->userData,
11542
98
                  &xmlDefaultSAXLocator);
11543
98
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11544
98
        xmlHaltParser(ctxt);
11545
#ifdef DEBUG_PUSH
11546
        xmlGenericError(xmlGenericErrorContext,
11547
          "PP: entering EOF\n");
11548
#endif
11549
98
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11550
98
      ctxt->sax->endDocument(ctxt->userData);
11551
98
        goto done;
11552
98
    }
11553
102k
          if ((cur == '<') && (next == '?')) {
11554
        /* PI or XML decl */
11555
76.0k
        if (avail < 5) goto done;
11556
75.9k
        if ((!terminate) &&
11557
75.9k
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11558
42.3k
      goto done;
11559
33.6k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11560
33.6k
      ctxt->sax->setDocumentLocator(ctxt->userData,
11561
33.6k
                  &xmlDefaultSAXLocator);
11562
33.6k
        if ((ctxt->input->cur[2] == 'x') &&
11563
33.6k
      (ctxt->input->cur[3] == 'm') &&
11564
33.6k
      (ctxt->input->cur[4] == 'l') &&
11565
33.6k
      (IS_BLANK_CH(ctxt->input->cur[5]))) {
11566
30.9k
      ret += 5;
11567
#ifdef DEBUG_PUSH
11568
      xmlGenericError(xmlGenericErrorContext,
11569
        "PP: Parsing XML Decl\n");
11570
#endif
11571
30.9k
      xmlParseXMLDecl(ctxt);
11572
30.9k
      if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11573
          /*
11574
           * The XML REC instructs us to stop parsing right
11575
           * here
11576
           */
11577
100
          xmlHaltParser(ctxt);
11578
100
          return(0);
11579
100
      }
11580
30.8k
      ctxt->standalone = ctxt->input->standalone;
11581
30.8k
      if ((ctxt->encoding == NULL) &&
11582
30.8k
          (ctxt->input->encoding != NULL))
11583
3.14k
          ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11584
30.8k
      if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11585
30.8k
          (!ctxt->disableSAX))
11586
28.8k
          ctxt->sax->startDocument(ctxt->userData);
11587
30.8k
      ctxt->instate = XML_PARSER_MISC;
11588
#ifdef DEBUG_PUSH
11589
      xmlGenericError(xmlGenericErrorContext,
11590
        "PP: entering MISC\n");
11591
#endif
11592
30.8k
        } else {
11593
2.67k
      ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11594
2.67k
      if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11595
2.67k
          (!ctxt->disableSAX))
11596
2.67k
          ctxt->sax->startDocument(ctxt->userData);
11597
2.67k
      ctxt->instate = XML_PARSER_MISC;
11598
#ifdef DEBUG_PUSH
11599
      xmlGenericError(xmlGenericErrorContext,
11600
        "PP: entering MISC\n");
11601
#endif
11602
2.67k
        }
11603
33.6k
    } else {
11604
26.9k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11605
26.9k
      ctxt->sax->setDocumentLocator(ctxt->userData,
11606
26.9k
                  &xmlDefaultSAXLocator);
11607
26.9k
        ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11608
26.9k
        if (ctxt->version == NULL) {
11609
0
            xmlErrMemory(ctxt, NULL);
11610
0
      break;
11611
0
        }
11612
26.9k
        if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11613
26.9k
            (!ctxt->disableSAX))
11614
26.9k
      ctxt->sax->startDocument(ctxt->userData);
11615
26.9k
        ctxt->instate = XML_PARSER_MISC;
11616
#ifdef DEBUG_PUSH
11617
        xmlGenericError(xmlGenericErrorContext,
11618
          "PP: entering MISC\n");
11619
#endif
11620
26.9k
    }
11621
60.4k
    break;
11622
1.41M
            case XML_PARSER_START_TAG: {
11623
1.41M
          const xmlChar *name;
11624
1.41M
    const xmlChar *prefix = NULL;
11625
1.41M
    const xmlChar *URI = NULL;
11626
1.41M
                int line = ctxt->input->line;
11627
1.41M
    int nsNr = ctxt->nsNr;
11628
11629
1.41M
    if ((avail < 2) && (ctxt->inputNr == 1))
11630
0
        goto done;
11631
1.41M
    cur = ctxt->input->cur[0];
11632
1.41M
          if (cur != '<') {
11633
4.92k
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11634
4.92k
        xmlHaltParser(ctxt);
11635
4.92k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11636
4.92k
      ctxt->sax->endDocument(ctxt->userData);
11637
4.92k
        goto done;
11638
4.92k
    }
11639
1.41M
    if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11640
251k
                    goto done;
11641
1.15M
    if (ctxt->spaceNr == 0)
11642
9.38k
        spacePush(ctxt, -1);
11643
1.14M
    else if (*ctxt->space == -2)
11644
108k
        spacePush(ctxt, -1);
11645
1.04M
    else
11646
1.04M
        spacePush(ctxt, *ctxt->space);
11647
1.15M
#ifdef LIBXML_SAX1_ENABLED
11648
1.15M
    if (ctxt->sax2)
11649
776k
#endif /* LIBXML_SAX1_ENABLED */
11650
776k
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11651
382k
#ifdef LIBXML_SAX1_ENABLED
11652
382k
    else
11653
382k
        name = xmlParseStartTag(ctxt);
11654
1.15M
#endif /* LIBXML_SAX1_ENABLED */
11655
1.15M
    if (ctxt->instate == XML_PARSER_EOF)
11656
64
        goto done;
11657
1.15M
    if (name == NULL) {
11658
3.98k
        spacePop(ctxt);
11659
3.98k
        xmlHaltParser(ctxt);
11660
3.98k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11661
3.98k
      ctxt->sax->endDocument(ctxt->userData);
11662
3.98k
        goto done;
11663
3.98k
    }
11664
1.15M
#ifdef LIBXML_VALID_ENABLED
11665
    /*
11666
     * [ VC: Root Element Type ]
11667
     * The Name in the document type declaration must match
11668
     * the element type of the root element.
11669
     */
11670
1.15M
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11671
1.15M
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
11672
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11673
1.15M
#endif /* LIBXML_VALID_ENABLED */
11674
11675
    /*
11676
     * Check for an Empty Element.
11677
     */
11678
1.15M
    if ((RAW == '/') && (NXT(1) == '>')) {
11679
529k
        SKIP(2);
11680
11681
529k
        if (ctxt->sax2) {
11682
391k
      if ((ctxt->sax != NULL) &&
11683
391k
          (ctxt->sax->endElementNs != NULL) &&
11684
391k
          (!ctxt->disableSAX))
11685
391k
          ctxt->sax->endElementNs(ctxt->userData, name,
11686
391k
                                  prefix, URI);
11687
391k
      if (ctxt->nsNr - nsNr > 0)
11688
715
          nsPop(ctxt, ctxt->nsNr - nsNr);
11689
391k
#ifdef LIBXML_SAX1_ENABLED
11690
391k
        } else {
11691
137k
      if ((ctxt->sax != NULL) &&
11692
137k
          (ctxt->sax->endElement != NULL) &&
11693
137k
          (!ctxt->disableSAX))
11694
137k
          ctxt->sax->endElement(ctxt->userData, name);
11695
137k
#endif /* LIBXML_SAX1_ENABLED */
11696
137k
        }
11697
529k
        if (ctxt->instate == XML_PARSER_EOF)
11698
0
      goto done;
11699
529k
        spacePop(ctxt);
11700
529k
        if (ctxt->nameNr == 0) {
11701
2.18k
      ctxt->instate = XML_PARSER_EPILOG;
11702
526k
        } else {
11703
526k
      ctxt->instate = XML_PARSER_CONTENT;
11704
526k
        }
11705
529k
        break;
11706
529k
    }
11707
626k
    if (RAW == '>') {
11708
585k
        NEXT;
11709
585k
    } else {
11710
40.6k
        xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11711
40.6k
           "Couldn't find end of Start Tag %s\n",
11712
40.6k
           name);
11713
40.6k
        nodePop(ctxt);
11714
40.6k
        spacePop(ctxt);
11715
40.6k
    }
11716
626k
                nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
11717
11718
626k
    ctxt->instate = XML_PARSER_CONTENT;
11719
626k
                break;
11720
1.15M
      }
11721
4.19M
            case XML_PARSER_CONTENT: {
11722
4.19M
    if ((avail < 2) && (ctxt->inputNr == 1))
11723
17.1k
        goto done;
11724
4.17M
    cur = ctxt->input->cur[0];
11725
4.17M
    next = ctxt->input->cur[1];
11726
11727
4.17M
    if ((cur == '<') && (next == '/')) {
11728
544k
        ctxt->instate = XML_PARSER_END_TAG;
11729
544k
        break;
11730
3.63M
          } else if ((cur == '<') && (next == '?')) {
11731
6.61k
        if ((!terminate) &&
11732
6.61k
            (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11733
2.67k
      goto done;
11734
3.93k
        xmlParsePI(ctxt);
11735
3.93k
        ctxt->instate = XML_PARSER_CONTENT;
11736
3.62M
    } else if ((cur == '<') && (next != '!')) {
11737
1.12M
        ctxt->instate = XML_PARSER_START_TAG;
11738
1.12M
        break;
11739
2.50M
    } else if ((cur == '<') && (next == '!') &&
11740
2.50M
               (ctxt->input->cur[2] == '-') &&
11741
2.50M
         (ctxt->input->cur[3] == '-')) {
11742
50.3k
        if ((!terminate) &&
11743
50.3k
            (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11744
17.6k
      goto done;
11745
32.7k
        xmlParseComment(ctxt);
11746
32.7k
        ctxt->instate = XML_PARSER_CONTENT;
11747
2.45M
    } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11748
2.45M
        (ctxt->input->cur[2] == '[') &&
11749
2.45M
        (ctxt->input->cur[3] == 'C') &&
11750
2.45M
        (ctxt->input->cur[4] == 'D') &&
11751
2.45M
        (ctxt->input->cur[5] == 'A') &&
11752
2.45M
        (ctxt->input->cur[6] == 'T') &&
11753
2.45M
        (ctxt->input->cur[7] == 'A') &&
11754
2.45M
        (ctxt->input->cur[8] == '[')) {
11755
4.33k
        SKIP(9);
11756
4.33k
        ctxt->instate = XML_PARSER_CDATA_SECTION;
11757
4.33k
        break;
11758
2.45M
    } else if ((cur == '<') && (next == '!') &&
11759
2.45M
               (avail < 9)) {
11760
1.83k
        goto done;
11761
2.44M
    } else if (cur == '<') {
11762
44.1k
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11763
44.1k
                    "detected an error in element content\n");
11764
44.1k
                    SKIP(1);
11765
2.40M
    } else if (cur == '&') {
11766
481k
        if ((!terminate) && (!xmlParseLookupChar(ctxt, ';')))
11767
15.7k
      goto done;
11768
465k
        xmlParseReference(ctxt);
11769
1.92M
    } else {
11770
        /* TODO Avoid the extra copy, handle directly !!! */
11771
        /*
11772
         * Goal of the following test is:
11773
         *  - minimize calls to the SAX 'character' callback
11774
         *    when they are mergeable
11775
         *  - handle an problem for isBlank when we only parse
11776
         *    a sequence of blank chars and the next one is
11777
         *    not available to check against '<' presence.
11778
         *  - tries to homogenize the differences in SAX
11779
         *    callbacks between the push and pull versions
11780
         *    of the parser.
11781
         */
11782
1.92M
        if ((ctxt->inputNr == 1) &&
11783
1.92M
            (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11784
1.42M
      if ((!terminate) && (!xmlParseLookupCharData(ctxt)))
11785
150k
          goto done;
11786
1.42M
                    }
11787
1.77M
                    ctxt->checkIndex = 0;
11788
1.77M
        xmlParseCharData(ctxt, 0);
11789
1.77M
    }
11790
2.32M
    break;
11791
4.17M
      }
11792
2.32M
            case XML_PARSER_END_TAG:
11793
560k
    if (avail < 2)
11794
0
        goto done;
11795
560k
    if ((!terminate) && (!xmlParseLookupChar(ctxt, '>')))
11796
16.0k
        goto done;
11797
544k
    if (ctxt->sax2) {
11798
335k
              xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11799
335k
        nameNsPop(ctxt);
11800
335k
    }
11801
209k
#ifdef LIBXML_SAX1_ENABLED
11802
209k
      else
11803
209k
        xmlParseEndTag1(ctxt, 0);
11804
544k
#endif /* LIBXML_SAX1_ENABLED */
11805
544k
    if (ctxt->instate == XML_PARSER_EOF) {
11806
        /* Nothing */
11807
544k
    } else if (ctxt->nameNr == 0) {
11808
9.82k
        ctxt->instate = XML_PARSER_EPILOG;
11809
534k
    } else {
11810
534k
        ctxt->instate = XML_PARSER_CONTENT;
11811
534k
    }
11812
544k
    break;
11813
61.1k
            case XML_PARSER_CDATA_SECTION: {
11814
          /*
11815
     * The Push mode need to have the SAX callback for
11816
     * cdataBlock merge back contiguous callbacks.
11817
     */
11818
61.1k
    const xmlChar *term;
11819
11820
61.1k
                if (terminate) {
11821
                    /*
11822
                     * Don't call xmlParseLookupString. If 'terminate'
11823
                     * is set, checkIndex is invalid.
11824
                     */
11825
961
                    term = BAD_CAST strstr((const char *) ctxt->input->cur,
11826
961
                                           "]]>");
11827
60.1k
                } else {
11828
60.1k
        term = xmlParseLookupString(ctxt, 0, "]]>", 3);
11829
60.1k
                }
11830
11831
61.1k
    if (term == NULL) {
11832
38.8k
        int tmp, size;
11833
11834
38.8k
                    if (terminate) {
11835
                        /* Unfinished CDATA section */
11836
513
                        size = ctxt->input->end - ctxt->input->cur;
11837
38.2k
                    } else {
11838
38.2k
                        if (avail < XML_PARSER_BIG_BUFFER_SIZE + 2)
11839
15.1k
                            goto done;
11840
23.1k
                        ctxt->checkIndex = 0;
11841
                        /* XXX: Why don't we pass the full buffer? */
11842
23.1k
                        size = XML_PARSER_BIG_BUFFER_SIZE;
11843
23.1k
                    }
11844
23.6k
                    tmp = xmlCheckCdataPush(ctxt->input->cur, size, 0);
11845
23.6k
                    if (tmp <= 0) {
11846
15.6k
                        tmp = -tmp;
11847
15.6k
                        ctxt->input->cur += tmp;
11848
15.6k
                        goto encoding_error;
11849
15.6k
                    }
11850
8.03k
                    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11851
8.03k
                        if (ctxt->sax->cdataBlock != NULL)
11852
5.03k
                            ctxt->sax->cdataBlock(ctxt->userData,
11853
5.03k
                                                  ctxt->input->cur, tmp);
11854
3.00k
                        else if (ctxt->sax->characters != NULL)
11855
3.00k
                            ctxt->sax->characters(ctxt->userData,
11856
3.00k
                                                  ctxt->input->cur, tmp);
11857
8.03k
                    }
11858
8.03k
                    if (ctxt->instate == XML_PARSER_EOF)
11859
0
                        goto done;
11860
8.03k
                    SKIPL(tmp);
11861
22.3k
    } else {
11862
22.3k
                    int base = term - CUR_PTR;
11863
22.3k
        int tmp;
11864
11865
22.3k
        tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11866
22.3k
        if ((tmp < 0) || (tmp != base)) {
11867
19.3k
      tmp = -tmp;
11868
19.3k
      ctxt->input->cur += tmp;
11869
19.3k
      goto encoding_error;
11870
19.3k
        }
11871
2.98k
        if ((ctxt->sax != NULL) && (base == 0) &&
11872
2.98k
            (ctxt->sax->cdataBlock != NULL) &&
11873
2.98k
            (!ctxt->disableSAX)) {
11874
      /*
11875
       * Special case to provide identical behaviour
11876
       * between pull and push parsers on enpty CDATA
11877
       * sections
11878
       */
11879
138
       if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11880
138
           (!strncmp((const char *)&ctxt->input->cur[-9],
11881
138
                     "<![CDATA[", 9)))
11882
138
           ctxt->sax->cdataBlock(ctxt->userData,
11883
138
                                 BAD_CAST "", 0);
11884
2.84k
        } else if ((ctxt->sax != NULL) && (base > 0) &&
11885
2.84k
      (!ctxt->disableSAX)) {
11886
2.75k
      if (ctxt->sax->cdataBlock != NULL)
11887
1.99k
          ctxt->sax->cdataBlock(ctxt->userData,
11888
1.99k
              ctxt->input->cur, base);
11889
760
      else if (ctxt->sax->characters != NULL)
11890
760
          ctxt->sax->characters(ctxt->userData,
11891
760
              ctxt->input->cur, base);
11892
2.75k
        }
11893
2.98k
        if (ctxt->instate == XML_PARSER_EOF)
11894
0
      goto done;
11895
2.98k
        SKIPL(base + 3);
11896
2.98k
        ctxt->instate = XML_PARSER_CONTENT;
11897
#ifdef DEBUG_PUSH
11898
        xmlGenericError(xmlGenericErrorContext,
11899
          "PP: entering CONTENT\n");
11900
#endif
11901
2.98k
    }
11902
11.0k
    break;
11903
61.1k
      }
11904
96.9k
            case XML_PARSER_MISC:
11905
128k
            case XML_PARSER_PROLOG:
11906
143k
            case XML_PARSER_EPILOG:
11907
143k
    SKIP_BLANKS;
11908
143k
    if (ctxt->input->buf == NULL)
11909
0
        avail = ctxt->input->length -
11910
0
                (ctxt->input->cur - ctxt->input->base);
11911
143k
    else
11912
143k
        avail = xmlBufUse(ctxt->input->buf->buffer) -
11913
143k
                (ctxt->input->cur - ctxt->input->base);
11914
143k
    if (avail < 2)
11915
10.2k
        goto done;
11916
132k
    cur = ctxt->input->cur[0];
11917
132k
    next = ctxt->input->cur[1];
11918
132k
          if ((cur == '<') && (next == '?')) {
11919
14.1k
        if ((!terminate) &&
11920
14.1k
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11921
6.90k
      goto done;
11922
#ifdef DEBUG_PUSH
11923
        xmlGenericError(xmlGenericErrorContext,
11924
          "PP: Parsing PI\n");
11925
#endif
11926
7.24k
        xmlParsePI(ctxt);
11927
7.24k
        if (ctxt->instate == XML_PARSER_EOF)
11928
0
      goto done;
11929
118k
    } else if ((cur == '<') && (next == '!') &&
11930
118k
        (ctxt->input->cur[2] == '-') &&
11931
118k
        (ctxt->input->cur[3] == '-')) {
11932
21.5k
        if ((!terminate) &&
11933
21.5k
                        (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11934
15.2k
      goto done;
11935
#ifdef DEBUG_PUSH
11936
        xmlGenericError(xmlGenericErrorContext,
11937
          "PP: Parsing Comment\n");
11938
#endif
11939
6.26k
        xmlParseComment(ctxt);
11940
6.26k
        if (ctxt->instate == XML_PARSER_EOF)
11941
0
      goto done;
11942
97.0k
    } else if ((ctxt->instate == XML_PARSER_MISC) &&
11943
97.0k
                    (cur == '<') && (next == '!') &&
11944
97.0k
        (ctxt->input->cur[2] == 'D') &&
11945
97.0k
        (ctxt->input->cur[3] == 'O') &&
11946
97.0k
        (ctxt->input->cur[4] == 'C') &&
11947
97.0k
        (ctxt->input->cur[5] == 'T') &&
11948
97.0k
        (ctxt->input->cur[6] == 'Y') &&
11949
97.0k
        (ctxt->input->cur[7] == 'P') &&
11950
97.0k
        (ctxt->input->cur[8] == 'E')) {
11951
47.6k
        if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11952
15.9k
                        goto done;
11953
#ifdef DEBUG_PUSH
11954
        xmlGenericError(xmlGenericErrorContext,
11955
          "PP: Parsing internal subset\n");
11956
#endif
11957
31.6k
        ctxt->inSubset = 1;
11958
31.6k
        xmlParseDocTypeDecl(ctxt);
11959
31.6k
        if (ctxt->instate == XML_PARSER_EOF)
11960
0
      goto done;
11961
31.6k
        if (RAW == '[') {
11962
24.2k
      ctxt->instate = XML_PARSER_DTD;
11963
#ifdef DEBUG_PUSH
11964
      xmlGenericError(xmlGenericErrorContext,
11965
        "PP: entering DTD\n");
11966
#endif
11967
24.2k
        } else {
11968
      /*
11969
       * Create and update the external subset.
11970
       */
11971
7.38k
      ctxt->inSubset = 2;
11972
7.38k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11973
7.38k
          (ctxt->sax->externalSubset != NULL))
11974
6.95k
          ctxt->sax->externalSubset(ctxt->userData,
11975
6.95k
            ctxt->intSubName, ctxt->extSubSystem,
11976
6.95k
            ctxt->extSubURI);
11977
7.38k
      ctxt->inSubset = 0;
11978
7.38k
      xmlCleanSpecialAttr(ctxt);
11979
7.38k
      ctxt->instate = XML_PARSER_PROLOG;
11980
#ifdef DEBUG_PUSH
11981
      xmlGenericError(xmlGenericErrorContext,
11982
        "PP: entering PROLOG\n");
11983
#endif
11984
7.38k
        }
11985
49.4k
    } else if ((cur == '<') && (next == '!') &&
11986
49.4k
               (avail <
11987
1.93k
                            (ctxt->instate == XML_PARSER_MISC ? 9 : 4))) {
11988
1.25k
        goto done;
11989
48.1k
    } else if (ctxt->instate == XML_PARSER_EPILOG) {
11990
2.09k
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11991
2.09k
        xmlHaltParser(ctxt);
11992
#ifdef DEBUG_PUSH
11993
        xmlGenericError(xmlGenericErrorContext,
11994
          "PP: entering EOF\n");
11995
#endif
11996
2.09k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11997
2.09k
      ctxt->sax->endDocument(ctxt->userData);
11998
2.09k
        goto done;
11999
46.0k
                } else {
12000
46.0k
        ctxt->instate = XML_PARSER_START_TAG;
12001
#ifdef DEBUG_PUSH
12002
        xmlGenericError(xmlGenericErrorContext,
12003
          "PP: entering START_TAG\n");
12004
#endif
12005
46.0k
    }
12006
91.2k
    break;
12007
91.2k
            case XML_PARSER_DTD: {
12008
89.6k
                if ((!terminate) && (!xmlParseLookupInternalSubset(ctxt)))
12009
65.5k
                    goto done;
12010
24.1k
    xmlParseInternalSubset(ctxt);
12011
24.1k
    if (ctxt->instate == XML_PARSER_EOF)
12012
7.02k
        goto done;
12013
17.0k
    ctxt->inSubset = 2;
12014
17.0k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
12015
17.0k
        (ctxt->sax->externalSubset != NULL))
12016
16.6k
        ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
12017
16.6k
          ctxt->extSubSystem, ctxt->extSubURI);
12018
17.0k
    ctxt->inSubset = 0;
12019
17.0k
    xmlCleanSpecialAttr(ctxt);
12020
17.0k
    if (ctxt->instate == XML_PARSER_EOF)
12021
586
        goto done;
12022
16.4k
    ctxt->instate = XML_PARSER_PROLOG;
12023
#ifdef DEBUG_PUSH
12024
    xmlGenericError(xmlGenericErrorContext,
12025
      "PP: entering PROLOG\n");
12026
#endif
12027
16.4k
                break;
12028
17.0k
      }
12029
0
            case XML_PARSER_COMMENT:
12030
0
    xmlGenericError(xmlGenericErrorContext,
12031
0
      "PP: internal error, state == COMMENT\n");
12032
0
    ctxt->instate = XML_PARSER_CONTENT;
12033
#ifdef DEBUG_PUSH
12034
    xmlGenericError(xmlGenericErrorContext,
12035
      "PP: entering CONTENT\n");
12036
#endif
12037
0
    break;
12038
0
            case XML_PARSER_IGNORE:
12039
0
    xmlGenericError(xmlGenericErrorContext,
12040
0
      "PP: internal error, state == IGNORE");
12041
0
          ctxt->instate = XML_PARSER_DTD;
12042
#ifdef DEBUG_PUSH
12043
    xmlGenericError(xmlGenericErrorContext,
12044
      "PP: entering DTD\n");
12045
#endif
12046
0
          break;
12047
0
            case XML_PARSER_PI:
12048
0
    xmlGenericError(xmlGenericErrorContext,
12049
0
      "PP: internal error, state == PI\n");
12050
0
    ctxt->instate = XML_PARSER_CONTENT;
12051
#ifdef DEBUG_PUSH
12052
    xmlGenericError(xmlGenericErrorContext,
12053
      "PP: entering CONTENT\n");
12054
#endif
12055
0
    break;
12056
0
            case XML_PARSER_ENTITY_DECL:
12057
0
    xmlGenericError(xmlGenericErrorContext,
12058
0
      "PP: internal error, state == ENTITY_DECL\n");
12059
0
    ctxt->instate = XML_PARSER_DTD;
12060
#ifdef DEBUG_PUSH
12061
    xmlGenericError(xmlGenericErrorContext,
12062
      "PP: entering DTD\n");
12063
#endif
12064
0
    break;
12065
0
            case XML_PARSER_ENTITY_VALUE:
12066
0
    xmlGenericError(xmlGenericErrorContext,
12067
0
      "PP: internal error, state == ENTITY_VALUE\n");
12068
0
    ctxt->instate = XML_PARSER_CONTENT;
12069
#ifdef DEBUG_PUSH
12070
    xmlGenericError(xmlGenericErrorContext,
12071
      "PP: entering DTD\n");
12072
#endif
12073
0
    break;
12074
0
            case XML_PARSER_ATTRIBUTE_VALUE:
12075
0
    xmlGenericError(xmlGenericErrorContext,
12076
0
      "PP: internal error, state == ATTRIBUTE_VALUE\n");
12077
0
    ctxt->instate = XML_PARSER_START_TAG;
12078
#ifdef DEBUG_PUSH
12079
    xmlGenericError(xmlGenericErrorContext,
12080
      "PP: entering START_TAG\n");
12081
#endif
12082
0
    break;
12083
0
            case XML_PARSER_SYSTEM_LITERAL:
12084
0
    xmlGenericError(xmlGenericErrorContext,
12085
0
      "PP: internal error, state == SYSTEM_LITERAL\n");
12086
0
    ctxt->instate = XML_PARSER_START_TAG;
12087
#ifdef DEBUG_PUSH
12088
    xmlGenericError(xmlGenericErrorContext,
12089
      "PP: entering START_TAG\n");
12090
#endif
12091
0
    break;
12092
0
            case XML_PARSER_PUBLIC_LITERAL:
12093
0
    xmlGenericError(xmlGenericErrorContext,
12094
0
      "PP: internal error, state == PUBLIC_LITERAL\n");
12095
0
    ctxt->instate = XML_PARSER_START_TAG;
12096
#ifdef DEBUG_PUSH
12097
    xmlGenericError(xmlGenericErrorContext,
12098
      "PP: entering START_TAG\n");
12099
#endif
12100
0
    break;
12101
6.61M
  }
12102
6.61M
    }
12103
712k
done:
12104
#ifdef DEBUG_PUSH
12105
    xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12106
#endif
12107
712k
    return(ret);
12108
34.9k
encoding_error:
12109
34.9k
    {
12110
34.9k
        char buffer[150];
12111
12112
34.9k
  snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12113
34.9k
      ctxt->input->cur[0], ctxt->input->cur[1],
12114
34.9k
      ctxt->input->cur[2], ctxt->input->cur[3]);
12115
34.9k
  __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12116
34.9k
         "Input is not proper UTF-8, indicate encoding !\n%s",
12117
34.9k
         BAD_CAST buffer, NULL);
12118
34.9k
    }
12119
34.9k
    return(0);
12120
763k
}
12121
12122
/**
12123
 * xmlParseChunk:
12124
 * @ctxt:  an XML parser context
12125
 * @chunk:  an char array
12126
 * @size:  the size in byte of the chunk
12127
 * @terminate:  last chunk indicator
12128
 *
12129
 * Parse a Chunk of memory
12130
 *
12131
 * Returns zero if no error, the xmlParserErrors otherwise.
12132
 */
12133
int
12134
xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12135
1.00M
              int terminate) {
12136
1.00M
    int end_in_lf = 0;
12137
1.00M
    int remain = 0;
12138
12139
1.00M
    if (ctxt == NULL)
12140
0
        return(XML_ERR_INTERNAL_ERROR);
12141
1.00M
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12142
241k
        return(ctxt->errNo);
12143
760k
    if (ctxt->instate == XML_PARSER_EOF)
12144
37
        return(-1);
12145
760k
    if (ctxt->input == NULL)
12146
0
        return(-1);
12147
12148
760k
    ctxt->progressive = 1;
12149
760k
    if (ctxt->instate == XML_PARSER_START)
12150
101k
        xmlDetectSAX2(ctxt);
12151
760k
    if ((size > 0) && (chunk != NULL) && (!terminate) &&
12152
760k
        (chunk[size - 1] == '\r')) {
12153
4.95k
  end_in_lf = 1;
12154
4.95k
  size--;
12155
4.95k
    }
12156
12157
763k
xmldecl_done:
12158
12159
763k
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12160
763k
        (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF))  {
12161
718k
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12162
718k
  size_t cur = ctxt->input->cur - ctxt->input->base;
12163
718k
  int res;
12164
12165
        /*
12166
         * Specific handling if we autodetected an encoding, we should not
12167
         * push more than the first line ... which depend on the encoding
12168
         * And only push the rest once the final encoding was detected
12169
         */
12170
718k
        if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12171
718k
            (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
12172
4.88k
            unsigned int len = 45;
12173
12174
4.88k
            if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12175
4.88k
                               BAD_CAST "UTF-16")) ||
12176
4.88k
                (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12177
811
                               BAD_CAST "UTF16")))
12178
4.07k
                len = 90;
12179
811
            else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12180
811
                                    BAD_CAST "UCS-4")) ||
12181
811
                     (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12182
775
                                    BAD_CAST "UCS4")))
12183
36
                len = 180;
12184
12185
4.88k
            if (ctxt->input->buf->rawconsumed < len)
12186
482
                len -= ctxt->input->buf->rawconsumed;
12187
12188
            /*
12189
             * Change size for reading the initial declaration only
12190
             * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12191
             * will blindly copy extra bytes from memory.
12192
             */
12193
4.88k
            if ((unsigned int) size > len) {
12194
3.24k
                remain = size - len;
12195
3.24k
                size = len;
12196
3.24k
            } else {
12197
1.63k
                remain = 0;
12198
1.63k
            }
12199
4.88k
        }
12200
718k
  res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12201
718k
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12202
718k
  if (res < 0) {
12203
122
      ctxt->errNo = XML_PARSER_EOF;
12204
122
      xmlHaltParser(ctxt);
12205
122
      return (XML_PARSER_EOF);
12206
122
  }
12207
#ifdef DEBUG_PUSH
12208
  xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12209
#endif
12210
12211
718k
    } else if (ctxt->instate != XML_PARSER_EOF) {
12212
45.4k
  if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12213
45.4k
      xmlParserInputBufferPtr in = ctxt->input->buf;
12214
45.4k
      if ((in->encoder != NULL) && (in->buffer != NULL) &&
12215
45.4k
        (in->raw != NULL)) {
12216
2.70k
    int nbchars;
12217
2.70k
    size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12218
2.70k
    size_t current = ctxt->input->cur - ctxt->input->base;
12219
12220
2.70k
    nbchars = xmlCharEncInput(in, terminate);
12221
2.70k
    xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
12222
2.70k
    if (nbchars < 0) {
12223
        /* TODO 2.6.0 */
12224
77
        xmlGenericError(xmlGenericErrorContext,
12225
77
            "xmlParseChunk: encoder error\n");
12226
77
                    xmlHaltParser(ctxt);
12227
77
        return(XML_ERR_INVALID_ENCODING);
12228
77
    }
12229
2.70k
      }
12230
45.4k
  }
12231
45.4k
    }
12232
12233
763k
    if (remain != 0) {
12234
3.18k
        xmlParseTryOrFinish(ctxt, 0);
12235
760k
    } else {
12236
760k
        xmlParseTryOrFinish(ctxt, terminate);
12237
760k
    }
12238
763k
    if (ctxt->instate == XML_PARSER_EOF)
12239
19.0k
        return(ctxt->errNo);
12240
12241
744k
    if ((ctxt->input != NULL) &&
12242
744k
         (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12243
744k
         ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12244
744k
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12245
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12246
0
        xmlHaltParser(ctxt);
12247
0
    }
12248
744k
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12249
15.7k
        return(ctxt->errNo);
12250
12251
728k
    if (remain != 0) {
12252
3.07k
        chunk += size;
12253
3.07k
        size = remain;
12254
3.07k
        remain = 0;
12255
3.07k
        goto xmldecl_done;
12256
3.07k
    }
12257
725k
    if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12258
725k
        (ctxt->input->buf != NULL)) {
12259
4.88k
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12260
4.88k
           ctxt->input);
12261
4.88k
  size_t current = ctxt->input->cur - ctxt->input->base;
12262
12263
4.88k
  xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12264
12265
4.88k
  xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12266
4.88k
            base, current);
12267
4.88k
    }
12268
725k
    if (terminate) {
12269
  /*
12270
   * Check for termination
12271
   */
12272
19.7k
  int cur_avail = 0;
12273
12274
19.7k
  if (ctxt->input != NULL) {
12275
19.7k
      if (ctxt->input->buf == NULL)
12276
0
    cur_avail = ctxt->input->length -
12277
0
          (ctxt->input->cur - ctxt->input->base);
12278
19.7k
      else
12279
19.7k
    cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12280
19.7k
                    (ctxt->input->cur - ctxt->input->base);
12281
19.7k
  }
12282
12283
19.7k
  if ((ctxt->instate != XML_PARSER_EOF) &&
12284
19.7k
      (ctxt->instate != XML_PARSER_EPILOG)) {
12285
10.9k
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12286
10.9k
  }
12287
19.7k
  if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
12288
54
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12289
54
  }
12290
19.7k
  if (ctxt->instate != XML_PARSER_EOF) {
12291
19.7k
      if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12292
19.7k
    ctxt->sax->endDocument(ctxt->userData);
12293
19.7k
  }
12294
19.7k
  ctxt->instate = XML_PARSER_EOF;
12295
19.7k
    }
12296
725k
    if (ctxt->wellFormed == 0)
12297
208k
  return((xmlParserErrors) ctxt->errNo);
12298
517k
    else
12299
517k
        return(0);
12300
725k
}
12301
12302
/************************************************************************
12303
 *                  *
12304
 *    I/O front end functions to the parser     *
12305
 *                  *
12306
 ************************************************************************/
12307
12308
/**
12309
 * xmlCreatePushParserCtxt:
12310
 * @sax:  a SAX handler
12311
 * @user_data:  The user data returned on SAX callbacks
12312
 * @chunk:  a pointer to an array of chars
12313
 * @size:  number of chars in the array
12314
 * @filename:  an optional file name or URI
12315
 *
12316
 * Create a parser context for using the XML parser in push mode.
12317
 * If @buffer and @size are non-NULL, the data is used to detect
12318
 * the encoding.  The remaining characters will be parsed so they
12319
 * don't need to be fed in again through xmlParseChunk.
12320
 * To allow content encoding detection, @size should be >= 4
12321
 * The value of @filename is used for fetching external entities
12322
 * and error/warning reports.
12323
 *
12324
 * Returns the new parser context or NULL
12325
 */
12326
12327
xmlParserCtxtPtr
12328
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12329
61.2k
                        const char *chunk, int size, const char *filename) {
12330
61.2k
    xmlParserCtxtPtr ctxt;
12331
61.2k
    xmlParserInputPtr inputStream;
12332
61.2k
    xmlParserInputBufferPtr buf;
12333
61.2k
    xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12334
12335
    /*
12336
     * plug some encoding conversion routines
12337
     */
12338
61.2k
    if ((chunk != NULL) && (size >= 4))
12339
30.4k
  enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12340
12341
61.2k
    buf = xmlAllocParserInputBuffer(enc);
12342
61.2k
    if (buf == NULL) return(NULL);
12343
12344
61.2k
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12345
61.2k
    if (ctxt == NULL) {
12346
0
        xmlErrMemory(NULL, "creating parser: out of memory\n");
12347
0
  xmlFreeParserInputBuffer(buf);
12348
0
  return(NULL);
12349
0
    }
12350
61.2k
    ctxt->dictNames = 1;
12351
61.2k
    if (filename == NULL) {
12352
30.6k
  ctxt->directory = NULL;
12353
30.6k
    } else {
12354
30.6k
        ctxt->directory = xmlParserGetDirectory(filename);
12355
30.6k
    }
12356
12357
61.2k
    inputStream = xmlNewInputStream(ctxt);
12358
61.2k
    if (inputStream == NULL) {
12359
0
  xmlFreeParserCtxt(ctxt);
12360
0
  xmlFreeParserInputBuffer(buf);
12361
0
  return(NULL);
12362
0
    }
12363
12364
61.2k
    if (filename == NULL)
12365
30.6k
  inputStream->filename = NULL;
12366
30.6k
    else {
12367
30.6k
  inputStream->filename = (char *)
12368
30.6k
      xmlCanonicPath((const xmlChar *) filename);
12369
30.6k
  if (inputStream->filename == NULL) {
12370
0
            xmlFreeInputStream(inputStream);
12371
0
      xmlFreeParserCtxt(ctxt);
12372
0
      xmlFreeParserInputBuffer(buf);
12373
0
      return(NULL);
12374
0
  }
12375
30.6k
    }
12376
61.2k
    inputStream->buf = buf;
12377
61.2k
    xmlBufResetInput(inputStream->buf->buffer, inputStream);
12378
61.2k
    inputPush(ctxt, inputStream);
12379
12380
    /*
12381
     * If the caller didn't provide an initial 'chunk' for determining
12382
     * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12383
     * that it can be automatically determined later
12384
     */
12385
61.2k
    ctxt->charset = XML_CHAR_ENCODING_NONE;
12386
12387
61.2k
    if ((size != 0) && (chunk != NULL) &&
12388
61.2k
        (ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12389
30.4k
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12390
30.4k
  size_t cur = ctxt->input->cur - ctxt->input->base;
12391
12392
30.4k
  xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12393
12394
30.4k
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12395
#ifdef DEBUG_PUSH
12396
  xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12397
#endif
12398
30.4k
    }
12399
12400
61.2k
    if (enc != XML_CHAR_ENCODING_NONE) {
12401
16.6k
        xmlSwitchEncoding(ctxt, enc);
12402
16.6k
    }
12403
12404
61.2k
    return(ctxt);
12405
61.2k
}
12406
#endif /* LIBXML_PUSH_ENABLED */
12407
12408
/**
12409
 * xmlHaltParser:
12410
 * @ctxt:  an XML parser context
12411
 *
12412
 * Blocks further parser processing don't override error
12413
 * for internal use
12414
 */
12415
static void
12416
56.9k
xmlHaltParser(xmlParserCtxtPtr ctxt) {
12417
56.9k
    if (ctxt == NULL)
12418
0
        return;
12419
56.9k
    ctxt->instate = XML_PARSER_EOF;
12420
56.9k
    ctxt->disableSAX = 1;
12421
59.3k
    while (ctxt->inputNr > 1)
12422
2.35k
        xmlFreeInputStream(inputPop(ctxt));
12423
56.9k
    if (ctxt->input != NULL) {
12424
        /*
12425
   * in case there was a specific allocation deallocate before
12426
   * overriding base
12427
   */
12428
56.9k
        if (ctxt->input->free != NULL) {
12429
0
      ctxt->input->free((xmlChar *) ctxt->input->base);
12430
0
      ctxt->input->free = NULL;
12431
0
  }
12432
56.9k
        if (ctxt->input->buf != NULL) {
12433
48.2k
            xmlFreeParserInputBuffer(ctxt->input->buf);
12434
48.2k
            ctxt->input->buf = NULL;
12435
48.2k
        }
12436
56.9k
  ctxt->input->cur = BAD_CAST"";
12437
56.9k
        ctxt->input->length = 0;
12438
56.9k
  ctxt->input->base = ctxt->input->cur;
12439
56.9k
        ctxt->input->end = ctxt->input->cur;
12440
56.9k
    }
12441
56.9k
}
12442
12443
/**
12444
 * xmlStopParser:
12445
 * @ctxt:  an XML parser context
12446
 *
12447
 * Blocks further parser processing
12448
 */
12449
void
12450
30.6k
xmlStopParser(xmlParserCtxtPtr ctxt) {
12451
30.6k
    if (ctxt == NULL)
12452
0
        return;
12453
30.6k
    xmlHaltParser(ctxt);
12454
30.6k
    ctxt->errNo = XML_ERR_USER_STOP;
12455
30.6k
}
12456
12457
/**
12458
 * xmlCreateIOParserCtxt:
12459
 * @sax:  a SAX handler
12460
 * @user_data:  The user data returned on SAX callbacks
12461
 * @ioread:  an I/O read function
12462
 * @ioclose:  an I/O close function
12463
 * @ioctx:  an I/O handler
12464
 * @enc:  the charset encoding if known
12465
 *
12466
 * Create a parser context for using the XML parser with an existing
12467
 * I/O stream
12468
 *
12469
 * Returns the new parser context or NULL
12470
 */
12471
xmlParserCtxtPtr
12472
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12473
  xmlInputReadCallback   ioread, xmlInputCloseCallback  ioclose,
12474
0
  void *ioctx, xmlCharEncoding enc) {
12475
0
    xmlParserCtxtPtr ctxt;
12476
0
    xmlParserInputPtr inputStream;
12477
0
    xmlParserInputBufferPtr buf;
12478
12479
0
    if (ioread == NULL) return(NULL);
12480
12481
0
    buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12482
0
    if (buf == NULL) {
12483
0
        if (ioclose != NULL)
12484
0
            ioclose(ioctx);
12485
0
        return (NULL);
12486
0
    }
12487
12488
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12489
0
    if (ctxt == NULL) {
12490
0
  xmlFreeParserInputBuffer(buf);
12491
0
  return(NULL);
12492
0
    }
12493
12494
0
    inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12495
0
    if (inputStream == NULL) {
12496
0
  xmlFreeParserCtxt(ctxt);
12497
0
  return(NULL);
12498
0
    }
12499
0
    inputPush(ctxt, inputStream);
12500
12501
0
    return(ctxt);
12502
0
}
12503
12504
#ifdef LIBXML_VALID_ENABLED
12505
/************************************************************************
12506
 *                  *
12507
 *    Front ends when parsing a DTD       *
12508
 *                  *
12509
 ************************************************************************/
12510
12511
/**
12512
 * xmlIOParseDTD:
12513
 * @sax:  the SAX handler block or NULL
12514
 * @input:  an Input Buffer
12515
 * @enc:  the charset encoding if known
12516
 *
12517
 * Load and parse a DTD
12518
 *
12519
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12520
 * @input will be freed by the function in any case.
12521
 */
12522
12523
xmlDtdPtr
12524
xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12525
0
        xmlCharEncoding enc) {
12526
0
    xmlDtdPtr ret = NULL;
12527
0
    xmlParserCtxtPtr ctxt;
12528
0
    xmlParserInputPtr pinput = NULL;
12529
0
    xmlChar start[4];
12530
12531
0
    if (input == NULL)
12532
0
  return(NULL);
12533
12534
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12535
0
    if (ctxt == NULL) {
12536
0
        xmlFreeParserInputBuffer(input);
12537
0
  return(NULL);
12538
0
    }
12539
12540
    /* We are loading a DTD */
12541
0
    ctxt->options |= XML_PARSE_DTDLOAD;
12542
12543
0
    xmlDetectSAX2(ctxt);
12544
12545
    /*
12546
     * generate a parser input from the I/O handler
12547
     */
12548
12549
0
    pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12550
0
    if (pinput == NULL) {
12551
0
        xmlFreeParserInputBuffer(input);
12552
0
  xmlFreeParserCtxt(ctxt);
12553
0
  return(NULL);
12554
0
    }
12555
12556
    /*
12557
     * plug some encoding conversion routines here.
12558
     */
12559
0
    if (xmlPushInput(ctxt, pinput) < 0) {
12560
0
  xmlFreeParserCtxt(ctxt);
12561
0
  return(NULL);
12562
0
    }
12563
0
    if (enc != XML_CHAR_ENCODING_NONE) {
12564
0
        xmlSwitchEncoding(ctxt, enc);
12565
0
    }
12566
12567
0
    pinput->filename = NULL;
12568
0
    pinput->line = 1;
12569
0
    pinput->col = 1;
12570
0
    pinput->base = ctxt->input->cur;
12571
0
    pinput->cur = ctxt->input->cur;
12572
0
    pinput->free = NULL;
12573
12574
    /*
12575
     * let's parse that entity knowing it's an external subset.
12576
     */
12577
0
    ctxt->inSubset = 2;
12578
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12579
0
    if (ctxt->myDoc == NULL) {
12580
0
  xmlErrMemory(ctxt, "New Doc failed");
12581
0
  return(NULL);
12582
0
    }
12583
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12584
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12585
0
                                 BAD_CAST "none", BAD_CAST "none");
12586
12587
0
    if ((enc == XML_CHAR_ENCODING_NONE) &&
12588
0
        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12589
  /*
12590
   * Get the 4 first bytes and decode the charset
12591
   * if enc != XML_CHAR_ENCODING_NONE
12592
   * plug some encoding conversion routines.
12593
   */
12594
0
  start[0] = RAW;
12595
0
  start[1] = NXT(1);
12596
0
  start[2] = NXT(2);
12597
0
  start[3] = NXT(3);
12598
0
  enc = xmlDetectCharEncoding(start, 4);
12599
0
  if (enc != XML_CHAR_ENCODING_NONE) {
12600
0
      xmlSwitchEncoding(ctxt, enc);
12601
0
  }
12602
0
    }
12603
12604
0
    xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12605
12606
0
    if (ctxt->myDoc != NULL) {
12607
0
  if (ctxt->wellFormed) {
12608
0
      ret = ctxt->myDoc->extSubset;
12609
0
      ctxt->myDoc->extSubset = NULL;
12610
0
      if (ret != NULL) {
12611
0
    xmlNodePtr tmp;
12612
12613
0
    ret->doc = NULL;
12614
0
    tmp = ret->children;
12615
0
    while (tmp != NULL) {
12616
0
        tmp->doc = NULL;
12617
0
        tmp = tmp->next;
12618
0
    }
12619
0
      }
12620
0
  } else {
12621
0
      ret = NULL;
12622
0
  }
12623
0
        xmlFreeDoc(ctxt->myDoc);
12624
0
        ctxt->myDoc = NULL;
12625
0
    }
12626
0
    xmlFreeParserCtxt(ctxt);
12627
12628
0
    return(ret);
12629
0
}
12630
12631
/**
12632
 * xmlSAXParseDTD:
12633
 * @sax:  the SAX handler block
12634
 * @ExternalID:  a NAME* containing the External ID of the DTD
12635
 * @SystemID:  a NAME* containing the URL to the DTD
12636
 *
12637
 * DEPRECATED: Don't use.
12638
 *
12639
 * Load and parse an external subset.
12640
 *
12641
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12642
 */
12643
12644
xmlDtdPtr
12645
xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12646
0
                          const xmlChar *SystemID) {
12647
0
    xmlDtdPtr ret = NULL;
12648
0
    xmlParserCtxtPtr ctxt;
12649
0
    xmlParserInputPtr input = NULL;
12650
0
    xmlCharEncoding enc;
12651
0
    xmlChar* systemIdCanonic;
12652
12653
0
    if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12654
12655
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12656
0
    if (ctxt == NULL) {
12657
0
  return(NULL);
12658
0
    }
12659
12660
    /* We are loading a DTD */
12661
0
    ctxt->options |= XML_PARSE_DTDLOAD;
12662
12663
    /*
12664
     * Canonicalise the system ID
12665
     */
12666
0
    systemIdCanonic = xmlCanonicPath(SystemID);
12667
0
    if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12668
0
  xmlFreeParserCtxt(ctxt);
12669
0
  return(NULL);
12670
0
    }
12671
12672
    /*
12673
     * Ask the Entity resolver to load the damn thing
12674
     */
12675
12676
0
    if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12677
0
  input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12678
0
                                   systemIdCanonic);
12679
0
    if (input == NULL) {
12680
0
  xmlFreeParserCtxt(ctxt);
12681
0
  if (systemIdCanonic != NULL)
12682
0
      xmlFree(systemIdCanonic);
12683
0
  return(NULL);
12684
0
    }
12685
12686
    /*
12687
     * plug some encoding conversion routines here.
12688
     */
12689
0
    if (xmlPushInput(ctxt, input) < 0) {
12690
0
  xmlFreeParserCtxt(ctxt);
12691
0
  if (systemIdCanonic != NULL)
12692
0
      xmlFree(systemIdCanonic);
12693
0
  return(NULL);
12694
0
    }
12695
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12696
0
  enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12697
0
  xmlSwitchEncoding(ctxt, enc);
12698
0
    }
12699
12700
0
    if (input->filename == NULL)
12701
0
  input->filename = (char *) systemIdCanonic;
12702
0
    else
12703
0
  xmlFree(systemIdCanonic);
12704
0
    input->line = 1;
12705
0
    input->col = 1;
12706
0
    input->base = ctxt->input->cur;
12707
0
    input->cur = ctxt->input->cur;
12708
0
    input->free = NULL;
12709
12710
    /*
12711
     * let's parse that entity knowing it's an external subset.
12712
     */
12713
0
    ctxt->inSubset = 2;
12714
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12715
0
    if (ctxt->myDoc == NULL) {
12716
0
  xmlErrMemory(ctxt, "New Doc failed");
12717
0
  xmlFreeParserCtxt(ctxt);
12718
0
  return(NULL);
12719
0
    }
12720
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12721
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12722
0
                                 ExternalID, SystemID);
12723
0
    xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12724
12725
0
    if (ctxt->myDoc != NULL) {
12726
0
  if (ctxt->wellFormed) {
12727
0
      ret = ctxt->myDoc->extSubset;
12728
0
      ctxt->myDoc->extSubset = NULL;
12729
0
      if (ret != NULL) {
12730
0
    xmlNodePtr tmp;
12731
12732
0
    ret->doc = NULL;
12733
0
    tmp = ret->children;
12734
0
    while (tmp != NULL) {
12735
0
        tmp->doc = NULL;
12736
0
        tmp = tmp->next;
12737
0
    }
12738
0
      }
12739
0
  } else {
12740
0
      ret = NULL;
12741
0
  }
12742
0
        xmlFreeDoc(ctxt->myDoc);
12743
0
        ctxt->myDoc = NULL;
12744
0
    }
12745
0
    xmlFreeParserCtxt(ctxt);
12746
12747
0
    return(ret);
12748
0
}
12749
12750
12751
/**
12752
 * xmlParseDTD:
12753
 * @ExternalID:  a NAME* containing the External ID of the DTD
12754
 * @SystemID:  a NAME* containing the URL to the DTD
12755
 *
12756
 * Load and parse an external subset.
12757
 *
12758
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12759
 */
12760
12761
xmlDtdPtr
12762
0
xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12763
0
    return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12764
0
}
12765
#endif /* LIBXML_VALID_ENABLED */
12766
12767
/************************************************************************
12768
 *                  *
12769
 *    Front ends when parsing an Entity     *
12770
 *                  *
12771
 ************************************************************************/
12772
12773
/**
12774
 * xmlParseCtxtExternalEntity:
12775
 * @ctx:  the existing parsing context
12776
 * @URL:  the URL for the entity to load
12777
 * @ID:  the System ID for the entity to load
12778
 * @lst:  the return value for the set of parsed nodes
12779
 *
12780
 * Parse an external general entity within an existing parsing context
12781
 * An external general parsed entity is well-formed if it matches the
12782
 * production labeled extParsedEnt.
12783
 *
12784
 * [78] extParsedEnt ::= TextDecl? content
12785
 *
12786
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12787
 *    the parser error code otherwise
12788
 */
12789
12790
int
12791
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12792
0
                 const xmlChar *ID, xmlNodePtr *lst) {
12793
0
    void *userData;
12794
12795
0
    if (ctx == NULL) return(-1);
12796
    /*
12797
     * If the user provided their own SAX callbacks, then reuse the
12798
     * userData callback field, otherwise the expected setup in a
12799
     * DOM builder is to have userData == ctxt
12800
     */
12801
0
    if (ctx->userData == ctx)
12802
0
        userData = NULL;
12803
0
    else
12804
0
        userData = ctx->userData;
12805
0
    return xmlParseExternalEntityPrivate(ctx->myDoc, ctx, ctx->sax,
12806
0
                                         userData, ctx->depth + 1,
12807
0
                                         URL, ID, lst);
12808
0
}
12809
12810
/**
12811
 * xmlParseExternalEntityPrivate:
12812
 * @doc:  the document the chunk pertains to
12813
 * @oldctxt:  the previous parser context if available
12814
 * @sax:  the SAX handler block (possibly NULL)
12815
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12816
 * @depth:  Used for loop detection, use 0
12817
 * @URL:  the URL for the entity to load
12818
 * @ID:  the System ID for the entity to load
12819
 * @list:  the return value for the set of parsed nodes
12820
 *
12821
 * Private version of xmlParseExternalEntity()
12822
 *
12823
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12824
 *    the parser error code otherwise
12825
 */
12826
12827
static xmlParserErrors
12828
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12829
                xmlSAXHandlerPtr sax,
12830
          void *user_data, int depth, const xmlChar *URL,
12831
106k
          const xmlChar *ID, xmlNodePtr *list) {
12832
106k
    xmlParserCtxtPtr ctxt;
12833
106k
    xmlDocPtr newDoc;
12834
106k
    xmlNodePtr newRoot;
12835
106k
    xmlParserErrors ret = XML_ERR_OK;
12836
106k
    xmlChar start[4];
12837
106k
    xmlCharEncoding enc;
12838
12839
106k
    if (((depth > 40) &&
12840
106k
  ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12841
106k
  (depth > 100)) {
12842
0
  xmlFatalErrMsg(oldctxt, XML_ERR_ENTITY_LOOP,
12843
0
                       "Maximum entity nesting depth exceeded");
12844
0
        return(XML_ERR_ENTITY_LOOP);
12845
0
    }
12846
12847
106k
    if (list != NULL)
12848
8.02k
        *list = NULL;
12849
106k
    if ((URL == NULL) && (ID == NULL))
12850
42
  return(XML_ERR_INTERNAL_ERROR);
12851
106k
    if (doc == NULL)
12852
0
  return(XML_ERR_INTERNAL_ERROR);
12853
12854
106k
    ctxt = xmlCreateEntityParserCtxtInternal(sax, user_data, URL, ID, NULL,
12855
106k
                                             oldctxt);
12856
106k
    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
12857
11.6k
    if (oldctxt != NULL) {
12858
11.6k
        ctxt->nbErrors = oldctxt->nbErrors;
12859
11.6k
        ctxt->nbWarnings = oldctxt->nbWarnings;
12860
11.6k
    }
12861
11.6k
    xmlDetectSAX2(ctxt);
12862
12863
11.6k
    newDoc = xmlNewDoc(BAD_CAST "1.0");
12864
11.6k
    if (newDoc == NULL) {
12865
0
  xmlFreeParserCtxt(ctxt);
12866
0
  return(XML_ERR_INTERNAL_ERROR);
12867
0
    }
12868
11.6k
    newDoc->properties = XML_DOC_INTERNAL;
12869
11.6k
    if (doc) {
12870
11.6k
        newDoc->intSubset = doc->intSubset;
12871
11.6k
        newDoc->extSubset = doc->extSubset;
12872
11.6k
        if (doc->dict) {
12873
7.62k
            newDoc->dict = doc->dict;
12874
7.62k
            xmlDictReference(newDoc->dict);
12875
7.62k
        }
12876
11.6k
        if (doc->URL != NULL) {
12877
7.63k
            newDoc->URL = xmlStrdup(doc->URL);
12878
7.63k
        }
12879
11.6k
    }
12880
11.6k
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12881
11.6k
    if (newRoot == NULL) {
12882
0
  if (sax != NULL)
12883
0
  xmlFreeParserCtxt(ctxt);
12884
0
  newDoc->intSubset = NULL;
12885
0
  newDoc->extSubset = NULL;
12886
0
        xmlFreeDoc(newDoc);
12887
0
  return(XML_ERR_INTERNAL_ERROR);
12888
0
    }
12889
11.6k
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
12890
11.6k
    nodePush(ctxt, newDoc->children);
12891
11.6k
    if (doc == NULL) {
12892
0
        ctxt->myDoc = newDoc;
12893
11.6k
    } else {
12894
11.6k
        ctxt->myDoc = doc;
12895
11.6k
        newRoot->doc = doc;
12896
11.6k
    }
12897
12898
    /*
12899
     * Get the 4 first bytes and decode the charset
12900
     * if enc != XML_CHAR_ENCODING_NONE
12901
     * plug some encoding conversion routines.
12902
     */
12903
11.6k
    GROW;
12904
11.6k
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12905
11.5k
  start[0] = RAW;
12906
11.5k
  start[1] = NXT(1);
12907
11.5k
  start[2] = NXT(2);
12908
11.5k
  start[3] = NXT(3);
12909
11.5k
  enc = xmlDetectCharEncoding(start, 4);
12910
11.5k
  if (enc != XML_CHAR_ENCODING_NONE) {
12911
289
      xmlSwitchEncoding(ctxt, enc);
12912
289
  }
12913
11.5k
    }
12914
12915
    /*
12916
     * Parse a possible text declaration first
12917
     */
12918
11.6k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12919
186
  xmlParseTextDecl(ctxt);
12920
        /*
12921
         * An XML-1.0 document can't reference an entity not XML-1.0
12922
         */
12923
186
        if ((xmlStrEqual(oldctxt->version, BAD_CAST "1.0")) &&
12924
186
            (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12925
18
            xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12926
18
                           "Version mismatch between document and entity\n");
12927
18
        }
12928
186
    }
12929
12930
11.6k
    ctxt->instate = XML_PARSER_CONTENT;
12931
11.6k
    ctxt->depth = depth;
12932
11.6k
    if (oldctxt != NULL) {
12933
11.6k
  ctxt->_private = oldctxt->_private;
12934
11.6k
  ctxt->loadsubset = oldctxt->loadsubset;
12935
11.6k
  ctxt->validate = oldctxt->validate;
12936
11.6k
  ctxt->valid = oldctxt->valid;
12937
11.6k
  ctxt->replaceEntities = oldctxt->replaceEntities;
12938
11.6k
        if (oldctxt->validate) {
12939
5.51k
            ctxt->vctxt.error = oldctxt->vctxt.error;
12940
5.51k
            ctxt->vctxt.warning = oldctxt->vctxt.warning;
12941
5.51k
            ctxt->vctxt.userData = oldctxt->vctxt.userData;
12942
5.51k
            ctxt->vctxt.flags = oldctxt->vctxt.flags;
12943
5.51k
        }
12944
11.6k
  ctxt->external = oldctxt->external;
12945
11.6k
        if (ctxt->dict) xmlDictFree(ctxt->dict);
12946
11.6k
        ctxt->dict = oldctxt->dict;
12947
11.6k
        ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12948
11.6k
        ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12949
11.6k
        ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12950
11.6k
        ctxt->dictNames = oldctxt->dictNames;
12951
11.6k
        ctxt->attsDefault = oldctxt->attsDefault;
12952
11.6k
        ctxt->attsSpecial = oldctxt->attsSpecial;
12953
11.6k
        ctxt->linenumbers = oldctxt->linenumbers;
12954
11.6k
  ctxt->record_info = oldctxt->record_info;
12955
11.6k
  ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
12956
11.6k
  ctxt->node_seq.length = oldctxt->node_seq.length;
12957
11.6k
  ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
12958
11.6k
    } else {
12959
  /*
12960
   * Doing validity checking on chunk without context
12961
   * doesn't make sense
12962
   */
12963
0
  ctxt->_private = NULL;
12964
0
  ctxt->validate = 0;
12965
0
  ctxt->external = 2;
12966
0
  ctxt->loadsubset = 0;
12967
0
    }
12968
12969
11.6k
    xmlParseContent(ctxt);
12970
12971
11.6k
    if ((RAW == '<') && (NXT(1) == '/')) {
12972
494
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12973
11.1k
    } else if (RAW != 0) {
12974
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12975
0
    }
12976
11.6k
    if (ctxt->node != newDoc->children) {
12977
1.20k
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12978
1.20k
    }
12979
12980
11.6k
    if (!ctxt->wellFormed) {
12981
3.36k
  ret = (xmlParserErrors)ctxt->errNo;
12982
3.36k
        if (oldctxt != NULL) {
12983
3.36k
            oldctxt->errNo = ctxt->errNo;
12984
3.36k
            oldctxt->wellFormed = 0;
12985
3.36k
            xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12986
3.36k
        }
12987
8.32k
    } else {
12988
8.32k
  if (list != NULL) {
12989
931
      xmlNodePtr cur;
12990
12991
      /*
12992
       * Return the newly created nodeset after unlinking it from
12993
       * they pseudo parent.
12994
       */
12995
931
      cur = newDoc->children->children;
12996
931
      *list = cur;
12997
13.2k
      while (cur != NULL) {
12998
12.3k
    cur->parent = NULL;
12999
12.3k
    cur = cur->next;
13000
12.3k
      }
13001
931
            newDoc->children->children = NULL;
13002
931
  }
13003
8.32k
  ret = XML_ERR_OK;
13004
8.32k
    }
13005
13006
    /*
13007
     * Also record the size of the entity parsed
13008
     */
13009
11.6k
    if (ctxt->input != NULL && oldctxt != NULL) {
13010
11.6k
        unsigned long consumed = ctxt->input->consumed;
13011
13012
11.6k
        xmlSaturatedAddSizeT(&consumed, ctxt->input->cur - ctxt->input->base);
13013
13014
11.6k
        xmlSaturatedAdd(&oldctxt->sizeentities, consumed);
13015
11.6k
        xmlSaturatedAdd(&oldctxt->sizeentities, ctxt->sizeentities);
13016
13017
11.6k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, consumed);
13018
11.6k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, ctxt->sizeentcopy);
13019
11.6k
    }
13020
13021
11.6k
    if (oldctxt != NULL) {
13022
11.6k
        ctxt->dict = NULL;
13023
11.6k
        ctxt->attsDefault = NULL;
13024
11.6k
        ctxt->attsSpecial = NULL;
13025
11.6k
        oldctxt->nbErrors = ctxt->nbErrors;
13026
11.6k
        oldctxt->nbWarnings = ctxt->nbWarnings;
13027
11.6k
        oldctxt->validate = ctxt->validate;
13028
11.6k
        oldctxt->valid = ctxt->valid;
13029
11.6k
        oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13030
11.6k
        oldctxt->node_seq.length = ctxt->node_seq.length;
13031
11.6k
        oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13032
11.6k
    }
13033
11.6k
    ctxt->node_seq.maximum = 0;
13034
11.6k
    ctxt->node_seq.length = 0;
13035
11.6k
    ctxt->node_seq.buffer = NULL;
13036
11.6k
    xmlFreeParserCtxt(ctxt);
13037
11.6k
    newDoc->intSubset = NULL;
13038
11.6k
    newDoc->extSubset = NULL;
13039
11.6k
    xmlFreeDoc(newDoc);
13040
13041
11.6k
    return(ret);
13042
11.6k
}
13043
13044
#ifdef LIBXML_SAX1_ENABLED
13045
/**
13046
 * xmlParseExternalEntity:
13047
 * @doc:  the document the chunk pertains to
13048
 * @sax:  the SAX handler block (possibly NULL)
13049
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13050
 * @depth:  Used for loop detection, use 0
13051
 * @URL:  the URL for the entity to load
13052
 * @ID:  the System ID for the entity to load
13053
 * @lst:  the return value for the set of parsed nodes
13054
 *
13055
 * Parse an external general entity
13056
 * An external general parsed entity is well-formed if it matches the
13057
 * production labeled extParsedEnt.
13058
 *
13059
 * [78] extParsedEnt ::= TextDecl? content
13060
 *
13061
 * Returns 0 if the entity is well formed, -1 in case of args problem and
13062
 *    the parser error code otherwise
13063
 */
13064
13065
int
13066
xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
13067
0
    int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
13068
0
    return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
13069
0
                           ID, lst));
13070
0
}
13071
13072
/**
13073
 * xmlParseBalancedChunkMemory:
13074
 * @doc:  the document the chunk pertains to (must not be NULL)
13075
 * @sax:  the SAX handler block (possibly NULL)
13076
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13077
 * @depth:  Used for loop detection, use 0
13078
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13079
 * @lst:  the return value for the set of parsed nodes
13080
 *
13081
 * Parse a well-balanced chunk of an XML document
13082
 * called by the parser
13083
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13084
 * the content production in the XML grammar:
13085
 *
13086
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13087
 *
13088
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13089
 *    the parser error code otherwise
13090
 */
13091
13092
int
13093
xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13094
0
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
13095
0
    return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13096
0
                                                depth, string, lst, 0 );
13097
0
}
13098
#endif /* LIBXML_SAX1_ENABLED */
13099
13100
/**
13101
 * xmlParseBalancedChunkMemoryInternal:
13102
 * @oldctxt:  the existing parsing context
13103
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13104
 * @user_data:  the user data field for the parser context
13105
 * @lst:  the return value for the set of parsed nodes
13106
 *
13107
 *
13108
 * Parse a well-balanced chunk of an XML document
13109
 * called by the parser
13110
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13111
 * the content production in the XML grammar:
13112
 *
13113
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13114
 *
13115
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13116
 * error code otherwise
13117
 *
13118
 * In case recover is set to 1, the nodelist will not be empty even if
13119
 * the parsed chunk is not well balanced.
13120
 */
13121
static xmlParserErrors
13122
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13123
10.9k
  const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13124
10.9k
    xmlParserCtxtPtr ctxt;
13125
10.9k
    xmlDocPtr newDoc = NULL;
13126
10.9k
    xmlNodePtr newRoot;
13127
10.9k
    xmlSAXHandlerPtr oldsax = NULL;
13128
10.9k
    xmlNodePtr content = NULL;
13129
10.9k
    xmlNodePtr last = NULL;
13130
10.9k
    int size;
13131
10.9k
    xmlParserErrors ret = XML_ERR_OK;
13132
10.9k
#ifdef SAX2
13133
10.9k
    int i;
13134
10.9k
#endif
13135
13136
10.9k
    if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13137
10.9k
        (oldctxt->depth >  100)) {
13138
30
  xmlFatalErrMsg(oldctxt, XML_ERR_ENTITY_LOOP,
13139
30
                       "Maximum entity nesting depth exceeded");
13140
30
  return(XML_ERR_ENTITY_LOOP);
13141
30
    }
13142
13143
13144
10.9k
    if (lst != NULL)
13145
10.9k
        *lst = NULL;
13146
10.9k
    if (string == NULL)
13147
18
        return(XML_ERR_INTERNAL_ERROR);
13148
13149
10.9k
    size = xmlStrlen(string);
13150
13151
10.9k
    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13152
10.9k
    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13153
10.8k
    ctxt->nbErrors = oldctxt->nbErrors;
13154
10.8k
    ctxt->nbWarnings = oldctxt->nbWarnings;
13155
10.8k
    if (user_data != NULL)
13156
0
  ctxt->userData = user_data;
13157
10.8k
    else
13158
10.8k
  ctxt->userData = ctxt;
13159
10.8k
    if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13160
10.8k
    ctxt->dict = oldctxt->dict;
13161
10.8k
    ctxt->input_id = oldctxt->input_id;
13162
10.8k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13163
10.8k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13164
10.8k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13165
13166
10.8k
#ifdef SAX2
13167
    /* propagate namespaces down the entity */
13168
10.8k
    for (i = 0;i < oldctxt->nsNr;i += 2) {
13169
0
        nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13170
0
    }
13171
10.8k
#endif
13172
13173
10.8k
    oldsax = ctxt->sax;
13174
10.8k
    ctxt->sax = oldctxt->sax;
13175
10.8k
    xmlDetectSAX2(ctxt);
13176
10.8k
    ctxt->replaceEntities = oldctxt->replaceEntities;
13177
10.8k
    ctxt->options = oldctxt->options;
13178
13179
10.8k
    ctxt->_private = oldctxt->_private;
13180
10.8k
    if (oldctxt->myDoc == NULL) {
13181
0
  newDoc = xmlNewDoc(BAD_CAST "1.0");
13182
0
  if (newDoc == NULL) {
13183
0
      ctxt->sax = oldsax;
13184
0
      ctxt->dict = NULL;
13185
0
      xmlFreeParserCtxt(ctxt);
13186
0
      return(XML_ERR_INTERNAL_ERROR);
13187
0
  }
13188
0
  newDoc->properties = XML_DOC_INTERNAL;
13189
0
  newDoc->dict = ctxt->dict;
13190
0
  xmlDictReference(newDoc->dict);
13191
0
  ctxt->myDoc = newDoc;
13192
10.8k
    } else {
13193
10.8k
  ctxt->myDoc = oldctxt->myDoc;
13194
10.8k
        content = ctxt->myDoc->children;
13195
10.8k
  last = ctxt->myDoc->last;
13196
10.8k
    }
13197
10.8k
    newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13198
10.8k
    if (newRoot == NULL) {
13199
0
  ctxt->sax = oldsax;
13200
0
  ctxt->dict = NULL;
13201
0
  xmlFreeParserCtxt(ctxt);
13202
0
  if (newDoc != NULL) {
13203
0
      xmlFreeDoc(newDoc);
13204
0
  }
13205
0
  return(XML_ERR_INTERNAL_ERROR);
13206
0
    }
13207
10.8k
    ctxt->myDoc->children = NULL;
13208
10.8k
    ctxt->myDoc->last = NULL;
13209
10.8k
    xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13210
10.8k
    nodePush(ctxt, ctxt->myDoc->children);
13211
10.8k
    ctxt->instate = XML_PARSER_CONTENT;
13212
10.8k
    ctxt->depth = oldctxt->depth;
13213
13214
10.8k
    ctxt->validate = 0;
13215
10.8k
    ctxt->loadsubset = oldctxt->loadsubset;
13216
10.8k
    if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13217
  /*
13218
   * ID/IDREF registration will be done in xmlValidateElement below
13219
   */
13220
8.87k
  ctxt->loadsubset |= XML_SKIP_IDS;
13221
8.87k
    }
13222
10.8k
    ctxt->dictNames = oldctxt->dictNames;
13223
10.8k
    ctxt->attsDefault = oldctxt->attsDefault;
13224
10.8k
    ctxt->attsSpecial = oldctxt->attsSpecial;
13225
13226
10.8k
    xmlParseContent(ctxt);
13227
10.8k
    if ((RAW == '<') && (NXT(1) == '/')) {
13228
30
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13229
10.8k
    } else if (RAW != 0) {
13230
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13231
0
    }
13232
10.8k
    if (ctxt->node != ctxt->myDoc->children) {
13233
168
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13234
168
    }
13235
13236
10.8k
    if (!ctxt->wellFormed) {
13237
1.97k
  ret = (xmlParserErrors)ctxt->errNo;
13238
1.97k
        oldctxt->errNo = ctxt->errNo;
13239
1.97k
        oldctxt->wellFormed = 0;
13240
1.97k
        xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13241
8.92k
    } else {
13242
8.92k
        ret = XML_ERR_OK;
13243
8.92k
    }
13244
13245
10.8k
    if ((lst != NULL) && (ret == XML_ERR_OK)) {
13246
8.92k
  xmlNodePtr cur;
13247
13248
  /*
13249
   * Return the newly created nodeset after unlinking it from
13250
   * they pseudo parent.
13251
   */
13252
8.92k
  cur = ctxt->myDoc->children->children;
13253
8.92k
  *lst = cur;
13254
26.2k
  while (cur != NULL) {
13255
17.3k
#ifdef LIBXML_VALID_ENABLED
13256
17.3k
      if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13257
17.3k
    (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13258
17.3k
    (cur->type == XML_ELEMENT_NODE)) {
13259
2.15k
    oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13260
2.15k
      oldctxt->myDoc, cur);
13261
2.15k
      }
13262
17.3k
#endif /* LIBXML_VALID_ENABLED */
13263
17.3k
      cur->parent = NULL;
13264
17.3k
      cur = cur->next;
13265
17.3k
  }
13266
8.92k
  ctxt->myDoc->children->children = NULL;
13267
8.92k
    }
13268
10.8k
    if (ctxt->myDoc != NULL) {
13269
10.8k
  xmlFreeNode(ctxt->myDoc->children);
13270
10.8k
        ctxt->myDoc->children = content;
13271
10.8k
        ctxt->myDoc->last = last;
13272
10.8k
    }
13273
13274
    /*
13275
     * Also record the size of the entity parsed
13276
     */
13277
10.8k
    if (ctxt->input != NULL && oldctxt != NULL) {
13278
10.8k
        unsigned long consumed = ctxt->input->consumed;
13279
13280
10.8k
        xmlSaturatedAddSizeT(&consumed, ctxt->input->cur - ctxt->input->base);
13281
13282
10.8k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, consumed);
13283
10.8k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, ctxt->sizeentcopy);
13284
10.8k
    }
13285
13286
10.8k
    oldctxt->nbErrors = ctxt->nbErrors;
13287
10.8k
    oldctxt->nbWarnings = ctxt->nbWarnings;
13288
10.8k
    ctxt->sax = oldsax;
13289
10.8k
    ctxt->dict = NULL;
13290
10.8k
    ctxt->attsDefault = NULL;
13291
10.8k
    ctxt->attsSpecial = NULL;
13292
10.8k
    xmlFreeParserCtxt(ctxt);
13293
10.8k
    if (newDoc != NULL) {
13294
0
  xmlFreeDoc(newDoc);
13295
0
    }
13296
13297
10.8k
    return(ret);
13298
10.8k
}
13299
13300
/**
13301
 * xmlParseInNodeContext:
13302
 * @node:  the context node
13303
 * @data:  the input string
13304
 * @datalen:  the input string length in bytes
13305
 * @options:  a combination of xmlParserOption
13306
 * @lst:  the return value for the set of parsed nodes
13307
 *
13308
 * Parse a well-balanced chunk of an XML document
13309
 * within the context (DTD, namespaces, etc ...) of the given node.
13310
 *
13311
 * The allowed sequence for the data is a Well Balanced Chunk defined by
13312
 * the content production in the XML grammar:
13313
 *
13314
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13315
 *
13316
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13317
 * error code otherwise
13318
 */
13319
xmlParserErrors
13320
xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13321
0
                      int options, xmlNodePtr *lst) {
13322
0
#ifdef SAX2
13323
0
    xmlParserCtxtPtr ctxt;
13324
0
    xmlDocPtr doc = NULL;
13325
0
    xmlNodePtr fake, cur;
13326
0
    int nsnr = 0;
13327
13328
0
    xmlParserErrors ret = XML_ERR_OK;
13329
13330
    /*
13331
     * check all input parameters, grab the document
13332
     */
13333
0
    if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13334
0
        return(XML_ERR_INTERNAL_ERROR);
13335
0
    switch (node->type) {
13336
0
        case XML_ELEMENT_NODE:
13337
0
        case XML_ATTRIBUTE_NODE:
13338
0
        case XML_TEXT_NODE:
13339
0
        case XML_CDATA_SECTION_NODE:
13340
0
        case XML_ENTITY_REF_NODE:
13341
0
        case XML_PI_NODE:
13342
0
        case XML_COMMENT_NODE:
13343
0
        case XML_DOCUMENT_NODE:
13344
0
        case XML_HTML_DOCUMENT_NODE:
13345
0
      break;
13346
0
  default:
13347
0
      return(XML_ERR_INTERNAL_ERROR);
13348
13349
0
    }
13350
0
    while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13351
0
           (node->type != XML_DOCUMENT_NODE) &&
13352
0
     (node->type != XML_HTML_DOCUMENT_NODE))
13353
0
  node = node->parent;
13354
0
    if (node == NULL)
13355
0
  return(XML_ERR_INTERNAL_ERROR);
13356
0
    if (node->type == XML_ELEMENT_NODE)
13357
0
  doc = node->doc;
13358
0
    else
13359
0
        doc = (xmlDocPtr) node;
13360
0
    if (doc == NULL)
13361
0
  return(XML_ERR_INTERNAL_ERROR);
13362
13363
    /*
13364
     * allocate a context and set-up everything not related to the
13365
     * node position in the tree
13366
     */
13367
0
    if (doc->type == XML_DOCUMENT_NODE)
13368
0
  ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13369
0
#ifdef LIBXML_HTML_ENABLED
13370
0
    else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13371
0
  ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13372
        /*
13373
         * When parsing in context, it makes no sense to add implied
13374
         * elements like html/body/etc...
13375
         */
13376
0
        options |= HTML_PARSE_NOIMPLIED;
13377
0
    }
13378
0
#endif
13379
0
    else
13380
0
        return(XML_ERR_INTERNAL_ERROR);
13381
13382
0
    if (ctxt == NULL)
13383
0
        return(XML_ERR_NO_MEMORY);
13384
13385
    /*
13386
     * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13387
     * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13388
     * we must wait until the last moment to free the original one.
13389
     */
13390
0
    if (doc->dict != NULL) {
13391
0
        if (ctxt->dict != NULL)
13392
0
      xmlDictFree(ctxt->dict);
13393
0
  ctxt->dict = doc->dict;
13394
0
    } else
13395
0
        options |= XML_PARSE_NODICT;
13396
13397
0
    if (doc->encoding != NULL) {
13398
0
        xmlCharEncodingHandlerPtr hdlr;
13399
13400
0
        if (ctxt->encoding != NULL)
13401
0
      xmlFree((xmlChar *) ctxt->encoding);
13402
0
        ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13403
13404
0
        hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
13405
0
        if (hdlr != NULL) {
13406
0
            xmlSwitchToEncoding(ctxt, hdlr);
13407
0
  } else {
13408
0
            return(XML_ERR_UNSUPPORTED_ENCODING);
13409
0
        }
13410
0
    }
13411
13412
0
    xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13413
0
    xmlDetectSAX2(ctxt);
13414
0
    ctxt->myDoc = doc;
13415
    /* parsing in context, i.e. as within existing content */
13416
0
    ctxt->input_id = 2;
13417
0
    ctxt->instate = XML_PARSER_CONTENT;
13418
13419
0
    fake = xmlNewDocComment(node->doc, NULL);
13420
0
    if (fake == NULL) {
13421
0
        xmlFreeParserCtxt(ctxt);
13422
0
  return(XML_ERR_NO_MEMORY);
13423
0
    }
13424
0
    xmlAddChild(node, fake);
13425
13426
0
    if (node->type == XML_ELEMENT_NODE) {
13427
0
  nodePush(ctxt, node);
13428
  /*
13429
   * initialize the SAX2 namespaces stack
13430
   */
13431
0
  cur = node;
13432
0
  while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13433
0
      xmlNsPtr ns = cur->nsDef;
13434
0
      const xmlChar *iprefix, *ihref;
13435
13436
0
      while (ns != NULL) {
13437
0
    if (ctxt->dict) {
13438
0
        iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13439
0
        ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13440
0
    } else {
13441
0
        iprefix = ns->prefix;
13442
0
        ihref = ns->href;
13443
0
    }
13444
13445
0
          if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13446
0
        nsPush(ctxt, iprefix, ihref);
13447
0
        nsnr++;
13448
0
    }
13449
0
    ns = ns->next;
13450
0
      }
13451
0
      cur = cur->parent;
13452
0
  }
13453
0
    }
13454
13455
0
    if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13456
  /*
13457
   * ID/IDREF registration will be done in xmlValidateElement below
13458
   */
13459
0
  ctxt->loadsubset |= XML_SKIP_IDS;
13460
0
    }
13461
13462
0
#ifdef LIBXML_HTML_ENABLED
13463
0
    if (doc->type == XML_HTML_DOCUMENT_NODE)
13464
0
        __htmlParseContent(ctxt);
13465
0
    else
13466
0
#endif
13467
0
  xmlParseContent(ctxt);
13468
13469
0
    nsPop(ctxt, nsnr);
13470
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13471
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13472
0
    } else if (RAW != 0) {
13473
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13474
0
    }
13475
0
    if ((ctxt->node != NULL) && (ctxt->node != node)) {
13476
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13477
0
  ctxt->wellFormed = 0;
13478
0
    }
13479
13480
0
    if (!ctxt->wellFormed) {
13481
0
        if (ctxt->errNo == 0)
13482
0
      ret = XML_ERR_INTERNAL_ERROR;
13483
0
  else
13484
0
      ret = (xmlParserErrors)ctxt->errNo;
13485
0
    } else {
13486
0
        ret = XML_ERR_OK;
13487
0
    }
13488
13489
    /*
13490
     * Return the newly created nodeset after unlinking it from
13491
     * the pseudo sibling.
13492
     */
13493
13494
0
    cur = fake->next;
13495
0
    fake->next = NULL;
13496
0
    node->last = fake;
13497
13498
0
    if (cur != NULL) {
13499
0
  cur->prev = NULL;
13500
0
    }
13501
13502
0
    *lst = cur;
13503
13504
0
    while (cur != NULL) {
13505
0
  cur->parent = NULL;
13506
0
  cur = cur->next;
13507
0
    }
13508
13509
0
    xmlUnlinkNode(fake);
13510
0
    xmlFreeNode(fake);
13511
13512
13513
0
    if (ret != XML_ERR_OK) {
13514
0
        xmlFreeNodeList(*lst);
13515
0
  *lst = NULL;
13516
0
    }
13517
13518
0
    if (doc->dict != NULL)
13519
0
        ctxt->dict = NULL;
13520
0
    xmlFreeParserCtxt(ctxt);
13521
13522
0
    return(ret);
13523
#else /* !SAX2 */
13524
    return(XML_ERR_INTERNAL_ERROR);
13525
#endif
13526
0
}
13527
13528
#ifdef LIBXML_SAX1_ENABLED
13529
/**
13530
 * xmlParseBalancedChunkMemoryRecover:
13531
 * @doc:  the document the chunk pertains to (must not be NULL)
13532
 * @sax:  the SAX handler block (possibly NULL)
13533
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13534
 * @depth:  Used for loop detection, use 0
13535
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13536
 * @lst:  the return value for the set of parsed nodes
13537
 * @recover: return nodes even if the data is broken (use 0)
13538
 *
13539
 *
13540
 * Parse a well-balanced chunk of an XML document
13541
 * called by the parser
13542
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13543
 * the content production in the XML grammar:
13544
 *
13545
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13546
 *
13547
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13548
 *    the parser error code otherwise
13549
 *
13550
 * In case recover is set to 1, the nodelist will not be empty even if
13551
 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13552
 * some extent.
13553
 */
13554
int
13555
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13556
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13557
0
     int recover) {
13558
0
    xmlParserCtxtPtr ctxt;
13559
0
    xmlDocPtr newDoc;
13560
0
    xmlSAXHandlerPtr oldsax = NULL;
13561
0
    xmlNodePtr content, newRoot;
13562
0
    int size;
13563
0
    int ret = 0;
13564
13565
0
    if (depth > 40) {
13566
0
  return(XML_ERR_ENTITY_LOOP);
13567
0
    }
13568
13569
13570
0
    if (lst != NULL)
13571
0
        *lst = NULL;
13572
0
    if (string == NULL)
13573
0
        return(-1);
13574
13575
0
    size = xmlStrlen(string);
13576
13577
0
    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13578
0
    if (ctxt == NULL) return(-1);
13579
0
    ctxt->userData = ctxt;
13580
0
    if (sax != NULL) {
13581
0
  oldsax = ctxt->sax;
13582
0
        ctxt->sax = sax;
13583
0
  if (user_data != NULL)
13584
0
      ctxt->userData = user_data;
13585
0
    }
13586
0
    newDoc = xmlNewDoc(BAD_CAST "1.0");
13587
0
    if (newDoc == NULL) {
13588
0
  xmlFreeParserCtxt(ctxt);
13589
0
  return(-1);
13590
0
    }
13591
0
    newDoc->properties = XML_DOC_INTERNAL;
13592
0
    if ((doc != NULL) && (doc->dict != NULL)) {
13593
0
        xmlDictFree(ctxt->dict);
13594
0
  ctxt->dict = doc->dict;
13595
0
  xmlDictReference(ctxt->dict);
13596
0
  ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13597
0
  ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13598
0
  ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13599
0
  ctxt->dictNames = 1;
13600
0
    } else {
13601
0
  xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13602
0
    }
13603
    /* doc == NULL is only supported for historic reasons */
13604
0
    if (doc != NULL) {
13605
0
  newDoc->intSubset = doc->intSubset;
13606
0
  newDoc->extSubset = doc->extSubset;
13607
0
    }
13608
0
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13609
0
    if (newRoot == NULL) {
13610
0
  if (sax != NULL)
13611
0
      ctxt->sax = oldsax;
13612
0
  xmlFreeParserCtxt(ctxt);
13613
0
  newDoc->intSubset = NULL;
13614
0
  newDoc->extSubset = NULL;
13615
0
        xmlFreeDoc(newDoc);
13616
0
  return(-1);
13617
0
    }
13618
0
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
13619
0
    nodePush(ctxt, newRoot);
13620
    /* doc == NULL is only supported for historic reasons */
13621
0
    if (doc == NULL) {
13622
0
  ctxt->myDoc = newDoc;
13623
0
    } else {
13624
0
  ctxt->myDoc = newDoc;
13625
0
  newDoc->children->doc = doc;
13626
  /* Ensure that doc has XML spec namespace */
13627
0
  xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13628
0
  newDoc->oldNs = doc->oldNs;
13629
0
    }
13630
0
    ctxt->instate = XML_PARSER_CONTENT;
13631
0
    ctxt->input_id = 2;
13632
0
    ctxt->depth = depth;
13633
13634
    /*
13635
     * Doing validity checking on chunk doesn't make sense
13636
     */
13637
0
    ctxt->validate = 0;
13638
0
    ctxt->loadsubset = 0;
13639
0
    xmlDetectSAX2(ctxt);
13640
13641
0
    if ( doc != NULL ){
13642
0
        content = doc->children;
13643
0
        doc->children = NULL;
13644
0
        xmlParseContent(ctxt);
13645
0
        doc->children = content;
13646
0
    }
13647
0
    else {
13648
0
        xmlParseContent(ctxt);
13649
0
    }
13650
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13651
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13652
0
    } else if (RAW != 0) {
13653
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13654
0
    }
13655
0
    if (ctxt->node != newDoc->children) {
13656
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13657
0
    }
13658
13659
0
    if (!ctxt->wellFormed) {
13660
0
        if (ctxt->errNo == 0)
13661
0
      ret = 1;
13662
0
  else
13663
0
      ret = ctxt->errNo;
13664
0
    } else {
13665
0
      ret = 0;
13666
0
    }
13667
13668
0
    if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13669
0
  xmlNodePtr cur;
13670
13671
  /*
13672
   * Return the newly created nodeset after unlinking it from
13673
   * they pseudo parent.
13674
   */
13675
0
  cur = newDoc->children->children;
13676
0
  *lst = cur;
13677
0
  while (cur != NULL) {
13678
0
      xmlSetTreeDoc(cur, doc);
13679
0
      cur->parent = NULL;
13680
0
      cur = cur->next;
13681
0
  }
13682
0
  newDoc->children->children = NULL;
13683
0
    }
13684
13685
0
    if (sax != NULL)
13686
0
  ctxt->sax = oldsax;
13687
0
    xmlFreeParserCtxt(ctxt);
13688
0
    newDoc->intSubset = NULL;
13689
0
    newDoc->extSubset = NULL;
13690
    /* This leaks the namespace list if doc == NULL */
13691
0
    newDoc->oldNs = NULL;
13692
0
    xmlFreeDoc(newDoc);
13693
13694
0
    return(ret);
13695
0
}
13696
13697
/**
13698
 * xmlSAXParseEntity:
13699
 * @sax:  the SAX handler block
13700
 * @filename:  the filename
13701
 *
13702
 * DEPRECATED: Don't use.
13703
 *
13704
 * parse an XML external entity out of context and build a tree.
13705
 * It use the given SAX function block to handle the parsing callback.
13706
 * If sax is NULL, fallback to the default DOM tree building routines.
13707
 *
13708
 * [78] extParsedEnt ::= TextDecl? content
13709
 *
13710
 * This correspond to a "Well Balanced" chunk
13711
 *
13712
 * Returns the resulting document tree
13713
 */
13714
13715
xmlDocPtr
13716
0
xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13717
0
    xmlDocPtr ret;
13718
0
    xmlParserCtxtPtr ctxt;
13719
13720
0
    ctxt = xmlCreateFileParserCtxt(filename);
13721
0
    if (ctxt == NULL) {
13722
0
  return(NULL);
13723
0
    }
13724
0
    if (sax != NULL) {
13725
0
  if (ctxt->sax != NULL)
13726
0
      xmlFree(ctxt->sax);
13727
0
        ctxt->sax = sax;
13728
0
        ctxt->userData = NULL;
13729
0
    }
13730
13731
0
    xmlParseExtParsedEnt(ctxt);
13732
13733
0
    if (ctxt->wellFormed)
13734
0
  ret = ctxt->myDoc;
13735
0
    else {
13736
0
        ret = NULL;
13737
0
        xmlFreeDoc(ctxt->myDoc);
13738
0
        ctxt->myDoc = NULL;
13739
0
    }
13740
0
    if (sax != NULL)
13741
0
        ctxt->sax = NULL;
13742
0
    xmlFreeParserCtxt(ctxt);
13743
13744
0
    return(ret);
13745
0
}
13746
13747
/**
13748
 * xmlParseEntity:
13749
 * @filename:  the filename
13750
 *
13751
 * parse an XML external entity out of context and build a tree.
13752
 *
13753
 * [78] extParsedEnt ::= TextDecl? content
13754
 *
13755
 * This correspond to a "Well Balanced" chunk
13756
 *
13757
 * Returns the resulting document tree
13758
 */
13759
13760
xmlDocPtr
13761
0
xmlParseEntity(const char *filename) {
13762
0
    return(xmlSAXParseEntity(NULL, filename));
13763
0
}
13764
#endif /* LIBXML_SAX1_ENABLED */
13765
13766
/**
13767
 * xmlCreateEntityParserCtxtInternal:
13768
 * @URL:  the entity URL
13769
 * @ID:  the entity PUBLIC ID
13770
 * @base:  a possible base for the target URI
13771
 * @pctx:  parser context used to set options on new context
13772
 *
13773
 * Create a parser context for an external entity
13774
 * Automatic support for ZLIB/Compress compressed document is provided
13775
 * by default if found at compile-time.
13776
 *
13777
 * Returns the new parser context or NULL
13778
 */
13779
static xmlParserCtxtPtr
13780
xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
13781
        const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
13782
106k
        xmlParserCtxtPtr pctx) {
13783
106k
    xmlParserCtxtPtr ctxt;
13784
106k
    xmlParserInputPtr inputStream;
13785
106k
    char *directory = NULL;
13786
106k
    xmlChar *uri;
13787
13788
106k
    ctxt = xmlNewSAXParserCtxt(sax, userData);
13789
106k
    if (ctxt == NULL) {
13790
0
  return(NULL);
13791
0
    }
13792
13793
106k
    if (pctx != NULL) {
13794
106k
        ctxt->options = pctx->options;
13795
106k
        ctxt->_private = pctx->_private;
13796
106k
  ctxt->input_id = pctx->input_id;
13797
106k
    }
13798
13799
    /* Don't read from stdin. */
13800
106k
    if (xmlStrcmp(URL, BAD_CAST "-") == 0)
13801
0
        URL = BAD_CAST "./-";
13802
13803
106k
    uri = xmlBuildURI(URL, base);
13804
13805
106k
    if (uri == NULL) {
13806
323
  inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13807
323
  if (inputStream == NULL) {
13808
323
      xmlFreeParserCtxt(ctxt);
13809
323
      return(NULL);
13810
323
  }
13811
13812
0
  inputPush(ctxt, inputStream);
13813
13814
0
  if ((ctxt->directory == NULL) && (directory == NULL))
13815
0
      directory = xmlParserGetDirectory((char *)URL);
13816
0
  if ((ctxt->directory == NULL) && (directory != NULL))
13817
0
      ctxt->directory = directory;
13818
106k
    } else {
13819
106k
  inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13820
106k
  if (inputStream == NULL) {
13821
94.5k
      xmlFree(uri);
13822
94.5k
      xmlFreeParserCtxt(ctxt);
13823
94.5k
      return(NULL);
13824
94.5k
  }
13825
13826
11.6k
  inputPush(ctxt, inputStream);
13827
13828
11.6k
  if ((ctxt->directory == NULL) && (directory == NULL))
13829
11.6k
      directory = xmlParserGetDirectory((char *)uri);
13830
11.6k
  if ((ctxt->directory == NULL) && (directory != NULL))
13831
11.6k
      ctxt->directory = directory;
13832
11.6k
  xmlFree(uri);
13833
11.6k
    }
13834
11.6k
    return(ctxt);
13835
106k
}
13836
13837
/**
13838
 * xmlCreateEntityParserCtxt:
13839
 * @URL:  the entity URL
13840
 * @ID:  the entity PUBLIC ID
13841
 * @base:  a possible base for the target URI
13842
 *
13843
 * Create a parser context for an external entity
13844
 * Automatic support for ZLIB/Compress compressed document is provided
13845
 * by default if found at compile-time.
13846
 *
13847
 * Returns the new parser context or NULL
13848
 */
13849
xmlParserCtxtPtr
13850
xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
13851
0
                    const xmlChar *base) {
13852
0
    return xmlCreateEntityParserCtxtInternal(NULL, NULL, URL, ID, base, NULL);
13853
13854
0
}
13855
13856
/************************************************************************
13857
 *                  *
13858
 *    Front ends when parsing from a file     *
13859
 *                  *
13860
 ************************************************************************/
13861
13862
/**
13863
 * xmlCreateURLParserCtxt:
13864
 * @filename:  the filename or URL
13865
 * @options:  a combination of xmlParserOption
13866
 *
13867
 * Create a parser context for a file or URL content.
13868
 * Automatic support for ZLIB/Compress compressed document is provided
13869
 * by default if found at compile-time and for file accesses
13870
 *
13871
 * Returns the new parser context or NULL
13872
 */
13873
xmlParserCtxtPtr
13874
xmlCreateURLParserCtxt(const char *filename, int options)
13875
0
{
13876
0
    xmlParserCtxtPtr ctxt;
13877
0
    xmlParserInputPtr inputStream;
13878
0
    char *directory = NULL;
13879
13880
0
    ctxt = xmlNewParserCtxt();
13881
0
    if (ctxt == NULL) {
13882
0
  xmlErrMemory(NULL, "cannot allocate parser context");
13883
0
  return(NULL);
13884
0
    }
13885
13886
0
    if (options)
13887
0
  xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13888
0
    ctxt->linenumbers = 1;
13889
13890
0
    inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
13891
0
    if (inputStream == NULL) {
13892
0
  xmlFreeParserCtxt(ctxt);
13893
0
  return(NULL);
13894
0
    }
13895
13896
0
    inputPush(ctxt, inputStream);
13897
0
    if ((ctxt->directory == NULL) && (directory == NULL))
13898
0
        directory = xmlParserGetDirectory(filename);
13899
0
    if ((ctxt->directory == NULL) && (directory != NULL))
13900
0
        ctxt->directory = directory;
13901
13902
0
    return(ctxt);
13903
0
}
13904
13905
/**
13906
 * xmlCreateFileParserCtxt:
13907
 * @filename:  the filename
13908
 *
13909
 * Create a parser context for a file content.
13910
 * Automatic support for ZLIB/Compress compressed document is provided
13911
 * by default if found at compile-time.
13912
 *
13913
 * Returns the new parser context or NULL
13914
 */
13915
xmlParserCtxtPtr
13916
xmlCreateFileParserCtxt(const char *filename)
13917
0
{
13918
0
    return(xmlCreateURLParserCtxt(filename, 0));
13919
0
}
13920
13921
#ifdef LIBXML_SAX1_ENABLED
13922
/**
13923
 * xmlSAXParseFileWithData:
13924
 * @sax:  the SAX handler block
13925
 * @filename:  the filename
13926
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13927
 *             documents
13928
 * @data:  the userdata
13929
 *
13930
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
13931
 *
13932
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13933
 * compressed document is provided by default if found at compile-time.
13934
 * It use the given SAX function block to handle the parsing callback.
13935
 * If sax is NULL, fallback to the default DOM tree building routines.
13936
 *
13937
 * User data (void *) is stored within the parser context in the
13938
 * context's _private member, so it is available nearly everywhere in libxml
13939
 *
13940
 * Returns the resulting document tree
13941
 */
13942
13943
xmlDocPtr
13944
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
13945
0
                        int recovery, void *data) {
13946
0
    xmlDocPtr ret;
13947
0
    xmlParserCtxtPtr ctxt;
13948
13949
0
    xmlInitParser();
13950
13951
0
    ctxt = xmlCreateFileParserCtxt(filename);
13952
0
    if (ctxt == NULL) {
13953
0
  return(NULL);
13954
0
    }
13955
0
    if (sax != NULL) {
13956
0
  if (ctxt->sax != NULL)
13957
0
      xmlFree(ctxt->sax);
13958
0
        ctxt->sax = sax;
13959
0
    }
13960
0
    xmlDetectSAX2(ctxt);
13961
0
    if (data!=NULL) {
13962
0
  ctxt->_private = data;
13963
0
    }
13964
13965
0
    if (ctxt->directory == NULL)
13966
0
        ctxt->directory = xmlParserGetDirectory(filename);
13967
13968
0
    ctxt->recovery = recovery;
13969
13970
0
    xmlParseDocument(ctxt);
13971
13972
0
    if ((ctxt->wellFormed) || recovery) {
13973
0
        ret = ctxt->myDoc;
13974
0
  if ((ret != NULL) && (ctxt->input->buf != NULL)) {
13975
0
      if (ctxt->input->buf->compressed > 0)
13976
0
    ret->compression = 9;
13977
0
      else
13978
0
    ret->compression = ctxt->input->buf->compressed;
13979
0
  }
13980
0
    }
13981
0
    else {
13982
0
       ret = NULL;
13983
0
       xmlFreeDoc(ctxt->myDoc);
13984
0
       ctxt->myDoc = NULL;
13985
0
    }
13986
0
    if (sax != NULL)
13987
0
        ctxt->sax = NULL;
13988
0
    xmlFreeParserCtxt(ctxt);
13989
13990
0
    return(ret);
13991
0
}
13992
13993
/**
13994
 * xmlSAXParseFile:
13995
 * @sax:  the SAX handler block
13996
 * @filename:  the filename
13997
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13998
 *             documents
13999
 *
14000
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
14001
 *
14002
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14003
 * compressed document is provided by default if found at compile-time.
14004
 * It use the given SAX function block to handle the parsing callback.
14005
 * If sax is NULL, fallback to the default DOM tree building routines.
14006
 *
14007
 * Returns the resulting document tree
14008
 */
14009
14010
xmlDocPtr
14011
xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14012
0
                          int recovery) {
14013
0
    return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14014
0
}
14015
14016
/**
14017
 * xmlRecoverDoc:
14018
 * @cur:  a pointer to an array of xmlChar
14019
 *
14020
 * DEPRECATED: Use xmlReadDoc with XML_PARSE_RECOVER.
14021
 *
14022
 * parse an XML in-memory document and build a tree.
14023
 * In the case the document is not Well Formed, a attempt to build a
14024
 * tree is tried anyway
14025
 *
14026
 * Returns the resulting document tree or NULL in case of failure
14027
 */
14028
14029
xmlDocPtr
14030
0
xmlRecoverDoc(const xmlChar *cur) {
14031
0
    return(xmlSAXParseDoc(NULL, cur, 1));
14032
0
}
14033
14034
/**
14035
 * xmlParseFile:
14036
 * @filename:  the filename
14037
 *
14038
 * DEPRECATED: Use xmlReadFile.
14039
 *
14040
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14041
 * compressed document is provided by default if found at compile-time.
14042
 *
14043
 * Returns the resulting document tree if the file was wellformed,
14044
 * NULL otherwise.
14045
 */
14046
14047
xmlDocPtr
14048
0
xmlParseFile(const char *filename) {
14049
0
    return(xmlSAXParseFile(NULL, filename, 0));
14050
0
}
14051
14052
/**
14053
 * xmlRecoverFile:
14054
 * @filename:  the filename
14055
 *
14056
 * DEPRECATED: Use xmlReadFile with XML_PARSE_RECOVER.
14057
 *
14058
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14059
 * compressed document is provided by default if found at compile-time.
14060
 * In the case the document is not Well Formed, it attempts to build
14061
 * a tree anyway
14062
 *
14063
 * Returns the resulting document tree or NULL in case of failure
14064
 */
14065
14066
xmlDocPtr
14067
0
xmlRecoverFile(const char *filename) {
14068
0
    return(xmlSAXParseFile(NULL, filename, 1));
14069
0
}
14070
14071
14072
/**
14073
 * xmlSetupParserForBuffer:
14074
 * @ctxt:  an XML parser context
14075
 * @buffer:  a xmlChar * buffer
14076
 * @filename:  a file name
14077
 *
14078
 * DEPRECATED: Don't use.
14079
 *
14080
 * Setup the parser context to parse a new buffer; Clears any prior
14081
 * contents from the parser context. The buffer parameter must not be
14082
 * NULL, but the filename parameter can be
14083
 */
14084
void
14085
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14086
                             const char* filename)
14087
0
{
14088
0
    xmlParserInputPtr input;
14089
14090
0
    if ((ctxt == NULL) || (buffer == NULL))
14091
0
        return;
14092
14093
0
    input = xmlNewInputStream(ctxt);
14094
0
    if (input == NULL) {
14095
0
        xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
14096
0
        xmlClearParserCtxt(ctxt);
14097
0
        return;
14098
0
    }
14099
14100
0
    xmlClearParserCtxt(ctxt);
14101
0
    if (filename != NULL)
14102
0
        input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
14103
0
    input->base = buffer;
14104
0
    input->cur = buffer;
14105
0
    input->end = &buffer[xmlStrlen(buffer)];
14106
0
    inputPush(ctxt, input);
14107
0
}
14108
14109
/**
14110
 * xmlSAXUserParseFile:
14111
 * @sax:  a SAX handler
14112
 * @user_data:  The user data returned on SAX callbacks
14113
 * @filename:  a file name
14114
 *
14115
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
14116
 *
14117
 * parse an XML file and call the given SAX handler routines.
14118
 * Automatic support for ZLIB/Compress compressed document is provided
14119
 *
14120
 * Returns 0 in case of success or a error number otherwise
14121
 */
14122
int
14123
xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14124
0
                    const char *filename) {
14125
0
    int ret = 0;
14126
0
    xmlParserCtxtPtr ctxt;
14127
14128
0
    ctxt = xmlCreateFileParserCtxt(filename);
14129
0
    if (ctxt == NULL) return -1;
14130
0
    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14131
0
  xmlFree(ctxt->sax);
14132
0
    ctxt->sax = sax;
14133
0
    xmlDetectSAX2(ctxt);
14134
14135
0
    if (user_data != NULL)
14136
0
  ctxt->userData = user_data;
14137
14138
0
    xmlParseDocument(ctxt);
14139
14140
0
    if (ctxt->wellFormed)
14141
0
  ret = 0;
14142
0
    else {
14143
0
        if (ctxt->errNo != 0)
14144
0
      ret = ctxt->errNo;
14145
0
  else
14146
0
      ret = -1;
14147
0
    }
14148
0
    if (sax != NULL)
14149
0
  ctxt->sax = NULL;
14150
0
    if (ctxt->myDoc != NULL) {
14151
0
        xmlFreeDoc(ctxt->myDoc);
14152
0
  ctxt->myDoc = NULL;
14153
0
    }
14154
0
    xmlFreeParserCtxt(ctxt);
14155
14156
0
    return ret;
14157
0
}
14158
#endif /* LIBXML_SAX1_ENABLED */
14159
14160
/************************************************************************
14161
 *                  *
14162
 *    Front ends when parsing from memory     *
14163
 *                  *
14164
 ************************************************************************/
14165
14166
/**
14167
 * xmlCreateMemoryParserCtxt:
14168
 * @buffer:  a pointer to a char array
14169
 * @size:  the size of the array
14170
 *
14171
 * Create a parser context for an XML in-memory document.
14172
 *
14173
 * Returns the new parser context or NULL
14174
 */
14175
xmlParserCtxtPtr
14176
41.5k
xmlCreateMemoryParserCtxt(const char *buffer, int size) {
14177
41.5k
    xmlParserCtxtPtr ctxt;
14178
41.5k
    xmlParserInputPtr input;
14179
41.5k
    xmlParserInputBufferPtr buf;
14180
14181
41.5k
    if (buffer == NULL)
14182
0
  return(NULL);
14183
41.5k
    if (size <= 0)
14184
22
  return(NULL);
14185
14186
41.5k
    ctxt = xmlNewParserCtxt();
14187
41.5k
    if (ctxt == NULL)
14188
0
  return(NULL);
14189
14190
41.5k
    buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14191
41.5k
    if (buf == NULL) {
14192
0
  xmlFreeParserCtxt(ctxt);
14193
0
  return(NULL);
14194
0
    }
14195
14196
41.5k
    input = xmlNewInputStream(ctxt);
14197
41.5k
    if (input == NULL) {
14198
0
  xmlFreeParserInputBuffer(buf);
14199
0
  xmlFreeParserCtxt(ctxt);
14200
0
  return(NULL);
14201
0
    }
14202
14203
41.5k
    input->filename = NULL;
14204
41.5k
    input->buf = buf;
14205
41.5k
    xmlBufResetInput(input->buf->buffer, input);
14206
14207
41.5k
    inputPush(ctxt, input);
14208
41.5k
    return(ctxt);
14209
41.5k
}
14210
14211
#ifdef LIBXML_SAX1_ENABLED
14212
/**
14213
 * xmlSAXParseMemoryWithData:
14214
 * @sax:  the SAX handler block
14215
 * @buffer:  an pointer to a char array
14216
 * @size:  the size of the array
14217
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14218
 *             documents
14219
 * @data:  the userdata
14220
 *
14221
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14222
 *
14223
 * parse an XML in-memory block and use the given SAX function block
14224
 * to handle the parsing callback. If sax is NULL, fallback to the default
14225
 * DOM tree building routines.
14226
 *
14227
 * User data (void *) is stored within the parser context in the
14228
 * context's _private member, so it is available nearly everywhere in libxml
14229
 *
14230
 * Returns the resulting document tree
14231
 */
14232
14233
xmlDocPtr
14234
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14235
0
            int size, int recovery, void *data) {
14236
0
    xmlDocPtr ret;
14237
0
    xmlParserCtxtPtr ctxt;
14238
14239
0
    xmlInitParser();
14240
14241
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14242
0
    if (ctxt == NULL) return(NULL);
14243
0
    if (sax != NULL) {
14244
0
  if (ctxt->sax != NULL)
14245
0
      xmlFree(ctxt->sax);
14246
0
        ctxt->sax = sax;
14247
0
    }
14248
0
    xmlDetectSAX2(ctxt);
14249
0
    if (data!=NULL) {
14250
0
  ctxt->_private=data;
14251
0
    }
14252
14253
0
    ctxt->recovery = recovery;
14254
14255
0
    xmlParseDocument(ctxt);
14256
14257
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14258
0
    else {
14259
0
       ret = NULL;
14260
0
       xmlFreeDoc(ctxt->myDoc);
14261
0
       ctxt->myDoc = NULL;
14262
0
    }
14263
0
    if (sax != NULL)
14264
0
  ctxt->sax = NULL;
14265
0
    xmlFreeParserCtxt(ctxt);
14266
14267
0
    return(ret);
14268
0
}
14269
14270
/**
14271
 * xmlSAXParseMemory:
14272
 * @sax:  the SAX handler block
14273
 * @buffer:  an pointer to a char array
14274
 * @size:  the size of the array
14275
 * @recovery:  work in recovery mode, i.e. tries to read not Well Formed
14276
 *             documents
14277
 *
14278
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14279
 *
14280
 * parse an XML in-memory block and use the given SAX function block
14281
 * to handle the parsing callback. If sax is NULL, fallback to the default
14282
 * DOM tree building routines.
14283
 *
14284
 * Returns the resulting document tree
14285
 */
14286
xmlDocPtr
14287
xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14288
0
            int size, int recovery) {
14289
0
    return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14290
0
}
14291
14292
/**
14293
 * xmlParseMemory:
14294
 * @buffer:  an pointer to a char array
14295
 * @size:  the size of the array
14296
 *
14297
 * DEPRECATED: Use xmlReadMemory.
14298
 *
14299
 * parse an XML in-memory block and build a tree.
14300
 *
14301
 * Returns the resulting document tree
14302
 */
14303
14304
0
xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14305
0
   return(xmlSAXParseMemory(NULL, buffer, size, 0));
14306
0
}
14307
14308
/**
14309
 * xmlRecoverMemory:
14310
 * @buffer:  an pointer to a char array
14311
 * @size:  the size of the array
14312
 *
14313
 * DEPRECATED: Use xmlReadMemory with XML_PARSE_RECOVER.
14314
 *
14315
 * parse an XML in-memory block and build a tree.
14316
 * In the case the document is not Well Formed, an attempt to
14317
 * build a tree is tried anyway
14318
 *
14319
 * Returns the resulting document tree or NULL in case of error
14320
 */
14321
14322
0
xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14323
0
   return(xmlSAXParseMemory(NULL, buffer, size, 1));
14324
0
}
14325
14326
/**
14327
 * xmlSAXUserParseMemory:
14328
 * @sax:  a SAX handler
14329
 * @user_data:  The user data returned on SAX callbacks
14330
 * @buffer:  an in-memory XML document input
14331
 * @size:  the length of the XML document in bytes
14332
 *
14333
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14334
 *
14335
 * parse an XML in-memory buffer and call the given SAX handler routines.
14336
 *
14337
 * Returns 0 in case of success or a error number otherwise
14338
 */
14339
int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14340
0
        const char *buffer, int size) {
14341
0
    int ret = 0;
14342
0
    xmlParserCtxtPtr ctxt;
14343
14344
0
    xmlInitParser();
14345
14346
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14347
0
    if (ctxt == NULL) return -1;
14348
0
    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14349
0
        xmlFree(ctxt->sax);
14350
0
    ctxt->sax = sax;
14351
0
    xmlDetectSAX2(ctxt);
14352
14353
0
    if (user_data != NULL)
14354
0
  ctxt->userData = user_data;
14355
14356
0
    xmlParseDocument(ctxt);
14357
14358
0
    if (ctxt->wellFormed)
14359
0
  ret = 0;
14360
0
    else {
14361
0
        if (ctxt->errNo != 0)
14362
0
      ret = ctxt->errNo;
14363
0
  else
14364
0
      ret = -1;
14365
0
    }
14366
0
    if (sax != NULL)
14367
0
        ctxt->sax = NULL;
14368
0
    if (ctxt->myDoc != NULL) {
14369
0
        xmlFreeDoc(ctxt->myDoc);
14370
0
  ctxt->myDoc = NULL;
14371
0
    }
14372
0
    xmlFreeParserCtxt(ctxt);
14373
14374
0
    return ret;
14375
0
}
14376
#endif /* LIBXML_SAX1_ENABLED */
14377
14378
/**
14379
 * xmlCreateDocParserCtxt:
14380
 * @cur:  a pointer to an array of xmlChar
14381
 *
14382
 * Creates a parser context for an XML in-memory document.
14383
 *
14384
 * Returns the new parser context or NULL
14385
 */
14386
xmlParserCtxtPtr
14387
0
xmlCreateDocParserCtxt(const xmlChar *cur) {
14388
0
    int len;
14389
14390
0
    if (cur == NULL)
14391
0
  return(NULL);
14392
0
    len = xmlStrlen(cur);
14393
0
    return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14394
0
}
14395
14396
#ifdef LIBXML_SAX1_ENABLED
14397
/**
14398
 * xmlSAXParseDoc:
14399
 * @sax:  the SAX handler block
14400
 * @cur:  a pointer to an array of xmlChar
14401
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14402
 *             documents
14403
 *
14404
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadDoc.
14405
 *
14406
 * parse an XML in-memory document and build a tree.
14407
 * It use the given SAX function block to handle the parsing callback.
14408
 * If sax is NULL, fallback to the default DOM tree building routines.
14409
 *
14410
 * Returns the resulting document tree
14411
 */
14412
14413
xmlDocPtr
14414
0
xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14415
0
    xmlDocPtr ret;
14416
0
    xmlParserCtxtPtr ctxt;
14417
0
    xmlSAXHandlerPtr oldsax = NULL;
14418
14419
0
    if (cur == NULL) return(NULL);
14420
14421
14422
0
    ctxt = xmlCreateDocParserCtxt(cur);
14423
0
    if (ctxt == NULL) return(NULL);
14424
0
    if (sax != NULL) {
14425
0
        oldsax = ctxt->sax;
14426
0
        ctxt->sax = sax;
14427
0
        ctxt->userData = NULL;
14428
0
    }
14429
0
    xmlDetectSAX2(ctxt);
14430
14431
0
    xmlParseDocument(ctxt);
14432
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14433
0
    else {
14434
0
       ret = NULL;
14435
0
       xmlFreeDoc(ctxt->myDoc);
14436
0
       ctxt->myDoc = NULL;
14437
0
    }
14438
0
    if (sax != NULL)
14439
0
  ctxt->sax = oldsax;
14440
0
    xmlFreeParserCtxt(ctxt);
14441
14442
0
    return(ret);
14443
0
}
14444
14445
/**
14446
 * xmlParseDoc:
14447
 * @cur:  a pointer to an array of xmlChar
14448
 *
14449
 * DEPRECATED: Use xmlReadDoc.
14450
 *
14451
 * parse an XML in-memory document and build a tree.
14452
 *
14453
 * Returns the resulting document tree
14454
 */
14455
14456
xmlDocPtr
14457
0
xmlParseDoc(const xmlChar *cur) {
14458
0
    return(xmlSAXParseDoc(NULL, cur, 0));
14459
0
}
14460
#endif /* LIBXML_SAX1_ENABLED */
14461
14462
#ifdef LIBXML_LEGACY_ENABLED
14463
/************************************************************************
14464
 *                  *
14465
 *  Specific function to keep track of entities references    *
14466
 *  and used by the XSLT debugger         *
14467
 *                  *
14468
 ************************************************************************/
14469
14470
static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14471
14472
/**
14473
 * xmlAddEntityReference:
14474
 * @ent : A valid entity
14475
 * @firstNode : A valid first node for children of entity
14476
 * @lastNode : A valid last node of children entity
14477
 *
14478
 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14479
 */
14480
static void
14481
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14482
                      xmlNodePtr lastNode)
14483
{
14484
    if (xmlEntityRefFunc != NULL) {
14485
        (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14486
    }
14487
}
14488
14489
14490
/**
14491
 * xmlSetEntityReferenceFunc:
14492
 * @func: A valid function
14493
 *
14494
 * Set the function to call call back when a xml reference has been made
14495
 */
14496
void
14497
xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14498
{
14499
    xmlEntityRefFunc = func;
14500
}
14501
#endif /* LIBXML_LEGACY_ENABLED */
14502
14503
/************************************************************************
14504
 *                  *
14505
 *        Miscellaneous       *
14506
 *                  *
14507
 ************************************************************************/
14508
14509
static int xmlParserInitialized = 0;
14510
14511
/**
14512
 * xmlInitParser:
14513
 *
14514
 * Initialization function for the XML parser.
14515
 * This is not reentrant. Call once before processing in case of
14516
 * use in multithreaded programs.
14517
 */
14518
14519
void
14520
38.7M
xmlInitParser(void) {
14521
    /*
14522
     * Note that the initialization code must not make memory allocations.
14523
     */
14524
38.7M
    if (xmlParserInitialized != 0)
14525
38.7M
  return;
14526
14527
50
#ifdef LIBXML_THREAD_ENABLED
14528
50
    __xmlGlobalInitMutexLock();
14529
50
    if (xmlParserInitialized == 0) {
14530
50
#endif
14531
#if defined(_WIN32) && (!defined(LIBXML_STATIC) || defined(LIBXML_STATIC_FOR_DLL))
14532
        if (xmlFree == free)
14533
            atexit(xmlCleanupParser);
14534
#endif
14535
14536
50
  xmlInitThreadsInternal();
14537
50
  xmlInitGlobalsInternal();
14538
50
  xmlInitMemoryInternal();
14539
50
        __xmlInitializeDict();
14540
50
  xmlInitEncodingInternal();
14541
50
  xmlRegisterDefaultInputCallbacks();
14542
50
#ifdef LIBXML_OUTPUT_ENABLED
14543
50
  xmlRegisterDefaultOutputCallbacks();
14544
50
#endif /* LIBXML_OUTPUT_ENABLED */
14545
50
#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
14546
50
  xmlInitXPathInternal();
14547
50
#endif
14548
50
  xmlParserInitialized = 1;
14549
50
#ifdef LIBXML_THREAD_ENABLED
14550
50
    }
14551
50
    __xmlGlobalInitMutexUnlock();
14552
50
#endif
14553
50
}
14554
14555
/**
14556
 * xmlCleanupParser:
14557
 *
14558
 * This function name is somewhat misleading. It does not clean up
14559
 * parser state, it cleans up memory allocated by the library itself.
14560
 * It is a cleanup function for the XML library. It tries to reclaim all
14561
 * related global memory allocated for the library processing.
14562
 * It doesn't deallocate any document related memory. One should
14563
 * call xmlCleanupParser() only when the process has finished using
14564
 * the library and all XML/HTML documents built with it.
14565
 * See also xmlInitParser() which has the opposite function of preparing
14566
 * the library for operations.
14567
 *
14568
 * WARNING: if your application is multithreaded or has plugin support
14569
 *          calling this may crash the application if another thread or
14570
 *          a plugin is still using libxml2. It's sometimes very hard to
14571
 *          guess if libxml2 is in use in the application, some libraries
14572
 *          or plugins may use it without notice. In case of doubt abstain
14573
 *          from calling this function or do it just before calling exit()
14574
 *          to avoid leak reports from valgrind !
14575
 */
14576
14577
void
14578
0
xmlCleanupParser(void) {
14579
0
    if (!xmlParserInitialized)
14580
0
  return;
14581
14582
0
    xmlCleanupCharEncodingHandlers();
14583
0
#ifdef LIBXML_CATALOG_ENABLED
14584
0
    xmlCatalogCleanup();
14585
0
#endif
14586
0
    xmlCleanupDictInternal();
14587
0
    xmlCleanupInputCallbacks();
14588
0
#ifdef LIBXML_OUTPUT_ENABLED
14589
0
    xmlCleanupOutputCallbacks();
14590
0
#endif
14591
0
#ifdef LIBXML_SCHEMAS_ENABLED
14592
0
    xmlSchemaCleanupTypes();
14593
0
    xmlRelaxNGCleanupTypes();
14594
0
#endif
14595
0
    xmlCleanupGlobalsInternal();
14596
0
    xmlCleanupThreadsInternal();
14597
0
    xmlCleanupMemoryInternal();
14598
0
    xmlParserInitialized = 0;
14599
0
}
14600
14601
#if defined(HAVE_ATTRIBUTE_DESTRUCTOR) && !defined(LIBXML_STATIC) && \
14602
    !defined(_WIN32)
14603
static void
14604
ATTRIBUTE_DESTRUCTOR
14605
0
xmlDestructor(void) {
14606
    /*
14607
     * Calling custom deallocation functions in a destructor can cause
14608
     * problems, for example with Nokogiri.
14609
     */
14610
0
    if (xmlFree == free)
14611
0
        xmlCleanupParser();
14612
0
}
14613
#endif
14614
14615
/************************************************************************
14616
 *                  *
14617
 *  New set (2.6.0) of simpler and more flexible APIs   *
14618
 *                  *
14619
 ************************************************************************/
14620
14621
/**
14622
 * DICT_FREE:
14623
 * @str:  a string
14624
 *
14625
 * Free a string if it is not owned by the "dict" dictionary in the
14626
 * current scope
14627
 */
14628
#define DICT_FREE(str)            \
14629
0
  if ((str) && ((!dict) ||       \
14630
0
      (xmlDictOwns(dict, (const xmlChar *)(str)) == 0)))  \
14631
0
      xmlFree((char *)(str));
14632
14633
/**
14634
 * xmlCtxtReset:
14635
 * @ctxt: an XML parser context
14636
 *
14637
 * Reset a parser context
14638
 */
14639
void
14640
xmlCtxtReset(xmlParserCtxtPtr ctxt)
14641
0
{
14642
0
    xmlParserInputPtr input;
14643
0
    xmlDictPtr dict;
14644
14645
0
    if (ctxt == NULL)
14646
0
        return;
14647
14648
0
    dict = ctxt->dict;
14649
14650
0
    while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14651
0
        xmlFreeInputStream(input);
14652
0
    }
14653
0
    ctxt->inputNr = 0;
14654
0
    ctxt->input = NULL;
14655
14656
0
    ctxt->spaceNr = 0;
14657
0
    if (ctxt->spaceTab != NULL) {
14658
0
  ctxt->spaceTab[0] = -1;
14659
0
  ctxt->space = &ctxt->spaceTab[0];
14660
0
    } else {
14661
0
        ctxt->space = NULL;
14662
0
    }
14663
14664
14665
0
    ctxt->nodeNr = 0;
14666
0
    ctxt->node = NULL;
14667
14668
0
    ctxt->nameNr = 0;
14669
0
    ctxt->name = NULL;
14670
14671
0
    ctxt->nsNr = 0;
14672
14673
0
    DICT_FREE(ctxt->version);
14674
0
    ctxt->version = NULL;
14675
0
    DICT_FREE(ctxt->encoding);
14676
0
    ctxt->encoding = NULL;
14677
0
    DICT_FREE(ctxt->directory);
14678
0
    ctxt->directory = NULL;
14679
0
    DICT_FREE(ctxt->extSubURI);
14680
0
    ctxt->extSubURI = NULL;
14681
0
    DICT_FREE(ctxt->extSubSystem);
14682
0
    ctxt->extSubSystem = NULL;
14683
0
    if (ctxt->myDoc != NULL)
14684
0
        xmlFreeDoc(ctxt->myDoc);
14685
0
    ctxt->myDoc = NULL;
14686
14687
0
    ctxt->standalone = -1;
14688
0
    ctxt->hasExternalSubset = 0;
14689
0
    ctxt->hasPErefs = 0;
14690
0
    ctxt->html = 0;
14691
0
    ctxt->external = 0;
14692
0
    ctxt->instate = XML_PARSER_START;
14693
0
    ctxt->token = 0;
14694
14695
0
    ctxt->wellFormed = 1;
14696
0
    ctxt->nsWellFormed = 1;
14697
0
    ctxt->disableSAX = 0;
14698
0
    ctxt->valid = 1;
14699
#if 0
14700
    ctxt->vctxt.userData = ctxt;
14701
    ctxt->vctxt.error = xmlParserValidityError;
14702
    ctxt->vctxt.warning = xmlParserValidityWarning;
14703
#endif
14704
0
    ctxt->record_info = 0;
14705
0
    ctxt->checkIndex = 0;
14706
0
    ctxt->endCheckState = 0;
14707
0
    ctxt->inSubset = 0;
14708
0
    ctxt->errNo = XML_ERR_OK;
14709
0
    ctxt->depth = 0;
14710
0
    ctxt->charset = XML_CHAR_ENCODING_UTF8;
14711
0
    ctxt->catalogs = NULL;
14712
0
    ctxt->sizeentities = 0;
14713
0
    ctxt->sizeentcopy = 0;
14714
0
    xmlInitNodeInfoSeq(&ctxt->node_seq);
14715
14716
0
    if (ctxt->attsDefault != NULL) {
14717
0
        xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
14718
0
        ctxt->attsDefault = NULL;
14719
0
    }
14720
0
    if (ctxt->attsSpecial != NULL) {
14721
0
        xmlHashFree(ctxt->attsSpecial, NULL);
14722
0
        ctxt->attsSpecial = NULL;
14723
0
    }
14724
14725
0
#ifdef LIBXML_CATALOG_ENABLED
14726
0
    if (ctxt->catalogs != NULL)
14727
0
  xmlCatalogFreeLocal(ctxt->catalogs);
14728
0
#endif
14729
0
    ctxt->nbErrors = 0;
14730
0
    ctxt->nbWarnings = 0;
14731
0
    if (ctxt->lastError.code != XML_ERR_OK)
14732
0
        xmlResetError(&ctxt->lastError);
14733
0
}
14734
14735
/**
14736
 * xmlCtxtResetPush:
14737
 * @ctxt: an XML parser context
14738
 * @chunk:  a pointer to an array of chars
14739
 * @size:  number of chars in the array
14740
 * @filename:  an optional file name or URI
14741
 * @encoding:  the document encoding, or NULL
14742
 *
14743
 * Reset a push parser context
14744
 *
14745
 * Returns 0 in case of success and 1 in case of error
14746
 */
14747
int
14748
xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14749
                 int size, const char *filename, const char *encoding)
14750
0
{
14751
0
    xmlParserInputPtr inputStream;
14752
0
    xmlParserInputBufferPtr buf;
14753
0
    xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14754
14755
0
    if (ctxt == NULL)
14756
0
        return(1);
14757
14758
0
    if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14759
0
        enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14760
14761
0
    buf = xmlAllocParserInputBuffer(enc);
14762
0
    if (buf == NULL)
14763
0
        return(1);
14764
14765
0
    if (ctxt == NULL) {
14766
0
        xmlFreeParserInputBuffer(buf);
14767
0
        return(1);
14768
0
    }
14769
14770
0
    xmlCtxtReset(ctxt);
14771
14772
0
    if (filename == NULL) {
14773
0
        ctxt->directory = NULL;
14774
0
    } else {
14775
0
        ctxt->directory = xmlParserGetDirectory(filename);
14776
0
    }
14777
14778
0
    inputStream = xmlNewInputStream(ctxt);
14779
0
    if (inputStream == NULL) {
14780
0
        xmlFreeParserInputBuffer(buf);
14781
0
        return(1);
14782
0
    }
14783
14784
0
    if (filename == NULL)
14785
0
        inputStream->filename = NULL;
14786
0
    else
14787
0
        inputStream->filename = (char *)
14788
0
            xmlCanonicPath((const xmlChar *) filename);
14789
0
    inputStream->buf = buf;
14790
0
    xmlBufResetInput(buf->buffer, inputStream);
14791
14792
0
    inputPush(ctxt, inputStream);
14793
14794
0
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14795
0
        (ctxt->input->buf != NULL)) {
14796
0
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14797
0
        size_t cur = ctxt->input->cur - ctxt->input->base;
14798
14799
0
        xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14800
14801
0
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
14802
#ifdef DEBUG_PUSH
14803
        xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14804
#endif
14805
0
    }
14806
14807
0
    if (encoding != NULL) {
14808
0
        xmlCharEncodingHandlerPtr hdlr;
14809
14810
0
        if (ctxt->encoding != NULL)
14811
0
      xmlFree((xmlChar *) ctxt->encoding);
14812
0
        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14813
14814
0
        hdlr = xmlFindCharEncodingHandler(encoding);
14815
0
        if (hdlr != NULL) {
14816
0
            xmlSwitchToEncoding(ctxt, hdlr);
14817
0
  } else {
14818
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14819
0
            "Unsupported encoding %s\n", BAD_CAST encoding);
14820
0
        }
14821
0
    } else if (enc != XML_CHAR_ENCODING_NONE) {
14822
0
        xmlSwitchEncoding(ctxt, enc);
14823
0
    }
14824
14825
0
    return(0);
14826
0
}
14827
14828
14829
/**
14830
 * xmlCtxtUseOptionsInternal:
14831
 * @ctxt: an XML parser context
14832
 * @options:  a combination of xmlParserOption
14833
 * @encoding:  the user provided encoding to use
14834
 *
14835
 * Applies the options to the parser context
14836
 *
14837
 * Returns 0 in case of success, the set of unknown or unimplemented options
14838
 *         in case of error.
14839
 */
14840
static int
14841
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
14842
91.8k
{
14843
91.8k
    if (ctxt == NULL)
14844
0
        return(-1);
14845
91.8k
    if (encoding != NULL) {
14846
0
        if (ctxt->encoding != NULL)
14847
0
      xmlFree((xmlChar *) ctxt->encoding);
14848
0
        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14849
0
    }
14850
91.8k
    if (options & XML_PARSE_RECOVER) {
14851
45.4k
        ctxt->recovery = 1;
14852
45.4k
        options -= XML_PARSE_RECOVER;
14853
45.4k
  ctxt->options |= XML_PARSE_RECOVER;
14854
45.4k
    } else
14855
46.3k
        ctxt->recovery = 0;
14856
91.8k
    if (options & XML_PARSE_DTDLOAD) {
14857
61.6k
        ctxt->loadsubset = XML_DETECT_IDS;
14858
61.6k
        options -= XML_PARSE_DTDLOAD;
14859
61.6k
  ctxt->options |= XML_PARSE_DTDLOAD;
14860
61.6k
    } else
14861
30.2k
        ctxt->loadsubset = 0;
14862
91.8k
    if (options & XML_PARSE_DTDATTR) {
14863
33.7k
        ctxt->loadsubset |= XML_COMPLETE_ATTRS;
14864
33.7k
        options -= XML_PARSE_DTDATTR;
14865
33.7k
  ctxt->options |= XML_PARSE_DTDATTR;
14866
33.7k
    }
14867
91.8k
    if (options & XML_PARSE_NOENT) {
14868
55.0k
        ctxt->replaceEntities = 1;
14869
        /* ctxt->loadsubset |= XML_DETECT_IDS; */
14870
55.0k
        options -= XML_PARSE_NOENT;
14871
55.0k
  ctxt->options |= XML_PARSE_NOENT;
14872
55.0k
    } else
14873
36.8k
        ctxt->replaceEntities = 0;
14874
91.8k
    if (options & XML_PARSE_PEDANTIC) {
14875
10.2k
        ctxt->pedantic = 1;
14876
10.2k
        options -= XML_PARSE_PEDANTIC;
14877
10.2k
  ctxt->options |= XML_PARSE_PEDANTIC;
14878
10.2k
    } else
14879
81.6k
        ctxt->pedantic = 0;
14880
91.8k
    if (options & XML_PARSE_NOBLANKS) {
14881
30.1k
        ctxt->keepBlanks = 0;
14882
30.1k
        ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
14883
30.1k
        options -= XML_PARSE_NOBLANKS;
14884
30.1k
  ctxt->options |= XML_PARSE_NOBLANKS;
14885
30.1k
    } else
14886
61.6k
        ctxt->keepBlanks = 1;
14887
91.8k
    if (options & XML_PARSE_DTDVALID) {
14888
33.0k
        ctxt->validate = 1;
14889
33.0k
        if (options & XML_PARSE_NOWARNING)
14890
14.9k
            ctxt->vctxt.warning = NULL;
14891
33.0k
        if (options & XML_PARSE_NOERROR)
14892
25.4k
            ctxt->vctxt.error = NULL;
14893
33.0k
        options -= XML_PARSE_DTDVALID;
14894
33.0k
  ctxt->options |= XML_PARSE_DTDVALID;
14895
33.0k
    } else
14896
58.8k
        ctxt->validate = 0;
14897
91.8k
    if (options & XML_PARSE_NOWARNING) {
14898
34.5k
        ctxt->sax->warning = NULL;
14899
34.5k
        options -= XML_PARSE_NOWARNING;
14900
34.5k
    }
14901
91.8k
    if (options & XML_PARSE_NOERROR) {
14902
48.8k
        ctxt->sax->error = NULL;
14903
48.8k
        ctxt->sax->fatalError = NULL;
14904
48.8k
        options -= XML_PARSE_NOERROR;
14905
48.8k
    }
14906
91.8k
#ifdef LIBXML_SAX1_ENABLED
14907
91.8k
    if (options & XML_PARSE_SAX1) {
14908
32.3k
        ctxt->sax->startElement = xmlSAX2StartElement;
14909
32.3k
        ctxt->sax->endElement = xmlSAX2EndElement;
14910
32.3k
        ctxt->sax->startElementNs = NULL;
14911
32.3k
        ctxt->sax->endElementNs = NULL;
14912
32.3k
        ctxt->sax->initialized = 1;
14913
32.3k
        options -= XML_PARSE_SAX1;
14914
32.3k
  ctxt->options |= XML_PARSE_SAX1;
14915
32.3k
    }
14916
91.8k
#endif /* LIBXML_SAX1_ENABLED */
14917
91.8k
    if (options & XML_PARSE_NODICT) {
14918
25.8k
        ctxt->dictNames = 0;
14919
25.8k
        options -= XML_PARSE_NODICT;
14920
25.8k
  ctxt->options |= XML_PARSE_NODICT;
14921
66.0k
    } else {
14922
66.0k
        ctxt->dictNames = 1;
14923
66.0k
    }
14924
91.8k
    if (options & XML_PARSE_NOCDATA) {
14925
31.7k
        ctxt->sax->cdataBlock = NULL;
14926
31.7k
        options -= XML_PARSE_NOCDATA;
14927
31.7k
  ctxt->options |= XML_PARSE_NOCDATA;
14928
31.7k
    }
14929
91.8k
    if (options & XML_PARSE_NSCLEAN) {
14930
48.8k
  ctxt->options |= XML_PARSE_NSCLEAN;
14931
48.8k
        options -= XML_PARSE_NSCLEAN;
14932
48.8k
    }
14933
91.8k
    if (options & XML_PARSE_NONET) {
14934
30.3k
  ctxt->options |= XML_PARSE_NONET;
14935
30.3k
        options -= XML_PARSE_NONET;
14936
30.3k
    }
14937
91.8k
    if (options & XML_PARSE_COMPACT) {
14938
50.5k
  ctxt->options |= XML_PARSE_COMPACT;
14939
50.5k
        options -= XML_PARSE_COMPACT;
14940
50.5k
    }
14941
91.8k
    if (options & XML_PARSE_OLD10) {
14942
29.3k
  ctxt->options |= XML_PARSE_OLD10;
14943
29.3k
        options -= XML_PARSE_OLD10;
14944
29.3k
    }
14945
91.8k
    if (options & XML_PARSE_NOBASEFIX) {
14946
32.2k
  ctxt->options |= XML_PARSE_NOBASEFIX;
14947
32.2k
        options -= XML_PARSE_NOBASEFIX;
14948
32.2k
    }
14949
91.8k
    if (options & XML_PARSE_HUGE) {
14950
30.2k
  ctxt->options |= XML_PARSE_HUGE;
14951
30.2k
        options -= XML_PARSE_HUGE;
14952
30.2k
        if (ctxt->dict != NULL)
14953
30.2k
            xmlDictSetLimit(ctxt->dict, 0);
14954
30.2k
    }
14955
91.8k
    if (options & XML_PARSE_OLDSAX) {
14956
27.5k
  ctxt->options |= XML_PARSE_OLDSAX;
14957
27.5k
        options -= XML_PARSE_OLDSAX;
14958
27.5k
    }
14959
91.8k
    if (options & XML_PARSE_IGNORE_ENC) {
14960
48.4k
  ctxt->options |= XML_PARSE_IGNORE_ENC;
14961
48.4k
        options -= XML_PARSE_IGNORE_ENC;
14962
48.4k
    }
14963
91.8k
    if (options & XML_PARSE_BIG_LINES) {
14964
36.2k
  ctxt->options |= XML_PARSE_BIG_LINES;
14965
36.2k
        options -= XML_PARSE_BIG_LINES;
14966
36.2k
    }
14967
91.8k
    ctxt->linenumbers = 1;
14968
91.8k
    return (options);
14969
91.8k
}
14970
14971
/**
14972
 * xmlCtxtUseOptions:
14973
 * @ctxt: an XML parser context
14974
 * @options:  a combination of xmlParserOption
14975
 *
14976
 * Applies the options to the parser context
14977
 *
14978
 * Returns 0 in case of success, the set of unknown or unimplemented options
14979
 *         in case of error.
14980
 */
14981
int
14982
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
14983
61.2k
{
14984
61.2k
   return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
14985
61.2k
}
14986
14987
/**
14988
 * xmlDoRead:
14989
 * @ctxt:  an XML parser context
14990
 * @URL:  the base URL to use for the document
14991
 * @encoding:  the document encoding, or NULL
14992
 * @options:  a combination of xmlParserOption
14993
 * @reuse:  keep the context for reuse
14994
 *
14995
 * Common front-end for the xmlRead functions
14996
 *
14997
 * Returns the resulting document tree or NULL
14998
 */
14999
static xmlDocPtr
15000
xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15001
          int options, int reuse)
15002
30.6k
{
15003
30.6k
    xmlDocPtr ret;
15004
15005
30.6k
    xmlCtxtUseOptionsInternal(ctxt, options, encoding);
15006
30.6k
    if (encoding != NULL) {
15007
0
        xmlCharEncodingHandlerPtr hdlr;
15008
15009
0
  hdlr = xmlFindCharEncodingHandler(encoding);
15010
0
  if (hdlr != NULL)
15011
0
      xmlSwitchToEncoding(ctxt, hdlr);
15012
0
    }
15013
30.6k
    if ((URL != NULL) && (ctxt->input != NULL) &&
15014
30.6k
        (ctxt->input->filename == NULL))
15015
30.6k
        ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
15016
30.6k
    xmlParseDocument(ctxt);
15017
30.6k
    if ((ctxt->wellFormed) || ctxt->recovery)
15018
18.6k
        ret = ctxt->myDoc;
15019
11.9k
    else {
15020
11.9k
        ret = NULL;
15021
11.9k
  if (ctxt->myDoc != NULL) {
15022
10.9k
      xmlFreeDoc(ctxt->myDoc);
15023
10.9k
  }
15024
11.9k
    }
15025
30.6k
    ctxt->myDoc = NULL;
15026
30.6k
    if (!reuse) {
15027
30.6k
  xmlFreeParserCtxt(ctxt);
15028
30.6k
    }
15029
15030
30.6k
    return (ret);
15031
30.6k
}
15032
15033
/**
15034
 * xmlReadDoc:
15035
 * @cur:  a pointer to a zero terminated string
15036
 * @URL:  the base URL to use for the document
15037
 * @encoding:  the document encoding, or NULL
15038
 * @options:  a combination of xmlParserOption
15039
 *
15040
 * parse an XML in-memory document and build a tree.
15041
 *
15042
 * Returns the resulting document tree
15043
 */
15044
xmlDocPtr
15045
xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
15046
0
{
15047
0
    xmlParserCtxtPtr ctxt;
15048
15049
0
    if (cur == NULL)
15050
0
        return (NULL);
15051
0
    xmlInitParser();
15052
15053
0
    ctxt = xmlCreateDocParserCtxt(cur);
15054
0
    if (ctxt == NULL)
15055
0
        return (NULL);
15056
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15057
0
}
15058
15059
/**
15060
 * xmlReadFile:
15061
 * @filename:  a file or URL
15062
 * @encoding:  the document encoding, or NULL
15063
 * @options:  a combination of xmlParserOption
15064
 *
15065
 * parse an XML file from the filesystem or the network.
15066
 *
15067
 * Returns the resulting document tree
15068
 */
15069
xmlDocPtr
15070
xmlReadFile(const char *filename, const char *encoding, int options)
15071
0
{
15072
0
    xmlParserCtxtPtr ctxt;
15073
15074
0
    xmlInitParser();
15075
0
    ctxt = xmlCreateURLParserCtxt(filename, options);
15076
0
    if (ctxt == NULL)
15077
0
        return (NULL);
15078
0
    return (xmlDoRead(ctxt, NULL, encoding, options, 0));
15079
0
}
15080
15081
/**
15082
 * xmlReadMemory:
15083
 * @buffer:  a pointer to a char array
15084
 * @size:  the size of the array
15085
 * @URL:  the base URL to use for the document
15086
 * @encoding:  the document encoding, or NULL
15087
 * @options:  a combination of xmlParserOption
15088
 *
15089
 * parse an XML in-memory document and build a tree.
15090
 *
15091
 * Returns the resulting document tree
15092
 */
15093
xmlDocPtr
15094
xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
15095
30.6k
{
15096
30.6k
    xmlParserCtxtPtr ctxt;
15097
15098
30.6k
    xmlInitParser();
15099
30.6k
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15100
30.6k
    if (ctxt == NULL)
15101
7
        return (NULL);
15102
30.6k
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15103
30.6k
}
15104
15105
/**
15106
 * xmlReadFd:
15107
 * @fd:  an open file descriptor
15108
 * @URL:  the base URL to use for the document
15109
 * @encoding:  the document encoding, or NULL
15110
 * @options:  a combination of xmlParserOption
15111
 *
15112
 * parse an XML from a file descriptor and build a tree.
15113
 * NOTE that the file descriptor will not be closed when the
15114
 *      reader is closed or reset.
15115
 *
15116
 * Returns the resulting document tree
15117
 */
15118
xmlDocPtr
15119
xmlReadFd(int fd, const char *URL, const char *encoding, int options)
15120
0
{
15121
0
    xmlParserCtxtPtr ctxt;
15122
0
    xmlParserInputBufferPtr input;
15123
0
    xmlParserInputPtr stream;
15124
15125
0
    if (fd < 0)
15126
0
        return (NULL);
15127
0
    xmlInitParser();
15128
15129
0
    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15130
0
    if (input == NULL)
15131
0
        return (NULL);
15132
0
    input->closecallback = NULL;
15133
0
    ctxt = xmlNewParserCtxt();
15134
0
    if (ctxt == NULL) {
15135
0
        xmlFreeParserInputBuffer(input);
15136
0
        return (NULL);
15137
0
    }
15138
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15139
0
    if (stream == NULL) {
15140
0
        xmlFreeParserInputBuffer(input);
15141
0
  xmlFreeParserCtxt(ctxt);
15142
0
        return (NULL);
15143
0
    }
15144
0
    inputPush(ctxt, stream);
15145
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15146
0
}
15147
15148
/**
15149
 * xmlReadIO:
15150
 * @ioread:  an I/O read function
15151
 * @ioclose:  an I/O close function
15152
 * @ioctx:  an I/O handler
15153
 * @URL:  the base URL to use for the document
15154
 * @encoding:  the document encoding, or NULL
15155
 * @options:  a combination of xmlParserOption
15156
 *
15157
 * parse an XML document from I/O functions and source and build a tree.
15158
 *
15159
 * Returns the resulting document tree
15160
 */
15161
xmlDocPtr
15162
xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
15163
          void *ioctx, const char *URL, const char *encoding, int options)
15164
0
{
15165
0
    xmlParserCtxtPtr ctxt;
15166
0
    xmlParserInputBufferPtr input;
15167
0
    xmlParserInputPtr stream;
15168
15169
0
    if (ioread == NULL)
15170
0
        return (NULL);
15171
0
    xmlInitParser();
15172
15173
0
    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15174
0
                                         XML_CHAR_ENCODING_NONE);
15175
0
    if (input == NULL) {
15176
0
        if (ioclose != NULL)
15177
0
            ioclose(ioctx);
15178
0
        return (NULL);
15179
0
    }
15180
0
    ctxt = xmlNewParserCtxt();
15181
0
    if (ctxt == NULL) {
15182
0
        xmlFreeParserInputBuffer(input);
15183
0
        return (NULL);
15184
0
    }
15185
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15186
0
    if (stream == NULL) {
15187
0
        xmlFreeParserInputBuffer(input);
15188
0
  xmlFreeParserCtxt(ctxt);
15189
0
        return (NULL);
15190
0
    }
15191
0
    inputPush(ctxt, stream);
15192
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15193
0
}
15194
15195
/**
15196
 * xmlCtxtReadDoc:
15197
 * @ctxt:  an XML parser context
15198
 * @cur:  a pointer to a zero terminated string
15199
 * @URL:  the base URL to use for the document
15200
 * @encoding:  the document encoding, or NULL
15201
 * @options:  a combination of xmlParserOption
15202
 *
15203
 * parse an XML in-memory document and build a tree.
15204
 * This reuses the existing @ctxt parser context
15205
 *
15206
 * Returns the resulting document tree
15207
 */
15208
xmlDocPtr
15209
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
15210
               const char *URL, const char *encoding, int options)
15211
0
{
15212
0
    if (cur == NULL)
15213
0
        return (NULL);
15214
0
    return (xmlCtxtReadMemory(ctxt, (const char *) cur, xmlStrlen(cur), URL,
15215
0
                              encoding, options));
15216
0
}
15217
15218
/**
15219
 * xmlCtxtReadFile:
15220
 * @ctxt:  an XML parser context
15221
 * @filename:  a file or URL
15222
 * @encoding:  the document encoding, or NULL
15223
 * @options:  a combination of xmlParserOption
15224
 *
15225
 * parse an XML file from the filesystem or the network.
15226
 * This reuses the existing @ctxt parser context
15227
 *
15228
 * Returns the resulting document tree
15229
 */
15230
xmlDocPtr
15231
xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15232
                const char *encoding, int options)
15233
0
{
15234
0
    xmlParserInputPtr stream;
15235
15236
0
    if (filename == NULL)
15237
0
        return (NULL);
15238
0
    if (ctxt == NULL)
15239
0
        return (NULL);
15240
0
    xmlInitParser();
15241
15242
0
    xmlCtxtReset(ctxt);
15243
15244
0
    stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15245
0
    if (stream == NULL) {
15246
0
        return (NULL);
15247
0
    }
15248
0
    inputPush(ctxt, stream);
15249
0
    return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15250
0
}
15251
15252
/**
15253
 * xmlCtxtReadMemory:
15254
 * @ctxt:  an XML parser context
15255
 * @buffer:  a pointer to a char array
15256
 * @size:  the size of the array
15257
 * @URL:  the base URL to use for the document
15258
 * @encoding:  the document encoding, or NULL
15259
 * @options:  a combination of xmlParserOption
15260
 *
15261
 * parse an XML in-memory document and build a tree.
15262
 * This reuses the existing @ctxt parser context
15263
 *
15264
 * Returns the resulting document tree
15265
 */
15266
xmlDocPtr
15267
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15268
                  const char *URL, const char *encoding, int options)
15269
0
{
15270
0
    xmlParserInputBufferPtr input;
15271
0
    xmlParserInputPtr stream;
15272
15273
0
    if (ctxt == NULL)
15274
0
        return (NULL);
15275
0
    if (buffer == NULL)
15276
0
        return (NULL);
15277
0
    xmlInitParser();
15278
15279
0
    xmlCtxtReset(ctxt);
15280
15281
0
    input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15282
0
    if (input == NULL) {
15283
0
  return(NULL);
15284
0
    }
15285
15286
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15287
0
    if (stream == NULL) {
15288
0
  xmlFreeParserInputBuffer(input);
15289
0
  return(NULL);
15290
0
    }
15291
15292
0
    inputPush(ctxt, stream);
15293
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15294
0
}
15295
15296
/**
15297
 * xmlCtxtReadFd:
15298
 * @ctxt:  an XML parser context
15299
 * @fd:  an open file descriptor
15300
 * @URL:  the base URL to use for the document
15301
 * @encoding:  the document encoding, or NULL
15302
 * @options:  a combination of xmlParserOption
15303
 *
15304
 * parse an XML from a file descriptor and build a tree.
15305
 * This reuses the existing @ctxt parser context
15306
 * NOTE that the file descriptor will not be closed when the
15307
 *      reader is closed or reset.
15308
 *
15309
 * Returns the resulting document tree
15310
 */
15311
xmlDocPtr
15312
xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15313
              const char *URL, const char *encoding, int options)
15314
0
{
15315
0
    xmlParserInputBufferPtr input;
15316
0
    xmlParserInputPtr stream;
15317
15318
0
    if (fd < 0)
15319
0
        return (NULL);
15320
0
    if (ctxt == NULL)
15321
0
        return (NULL);
15322
0
    xmlInitParser();
15323
15324
0
    xmlCtxtReset(ctxt);
15325
15326
15327
0
    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15328
0
    if (input == NULL)
15329
0
        return (NULL);
15330
0
    input->closecallback = NULL;
15331
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15332
0
    if (stream == NULL) {
15333
0
        xmlFreeParserInputBuffer(input);
15334
0
        return (NULL);
15335
0
    }
15336
0
    inputPush(ctxt, stream);
15337
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15338
0
}
15339
15340
/**
15341
 * xmlCtxtReadIO:
15342
 * @ctxt:  an XML parser context
15343
 * @ioread:  an I/O read function
15344
 * @ioclose:  an I/O close function
15345
 * @ioctx:  an I/O handler
15346
 * @URL:  the base URL to use for the document
15347
 * @encoding:  the document encoding, or NULL
15348
 * @options:  a combination of xmlParserOption
15349
 *
15350
 * parse an XML document from I/O functions and source and build a tree.
15351
 * This reuses the existing @ctxt parser context
15352
 *
15353
 * Returns the resulting document tree
15354
 */
15355
xmlDocPtr
15356
xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15357
              xmlInputCloseCallback ioclose, void *ioctx,
15358
        const char *URL,
15359
              const char *encoding, int options)
15360
0
{
15361
0
    xmlParserInputBufferPtr input;
15362
0
    xmlParserInputPtr stream;
15363
15364
0
    if (ioread == NULL)
15365
0
        return (NULL);
15366
0
    if (ctxt == NULL)
15367
0
        return (NULL);
15368
0
    xmlInitParser();
15369
15370
0
    xmlCtxtReset(ctxt);
15371
15372
0
    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15373
0
                                         XML_CHAR_ENCODING_NONE);
15374
0
    if (input == NULL) {
15375
0
        if (ioclose != NULL)
15376
0
            ioclose(ioctx);
15377
0
        return (NULL);
15378
0
    }
15379
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15380
0
    if (stream == NULL) {
15381
0
        xmlFreeParserInputBuffer(input);
15382
0
        return (NULL);
15383
0
    }
15384
0
    inputPush(ctxt, stream);
15385
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15386
0
}
15387