Coverage Report

Created: 2023-09-25 06:03

/src/libxml2-2.11.5/parser.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
 *            implemented on top of the SAX interfaces
4
 *
5
 * References:
6
 *   The XML specification:
7
 *     http://www.w3.org/TR/REC-xml
8
 *   Original 1.0 version:
9
 *     http://www.w3.org/TR/1998/REC-xml-19980210
10
 *   XML second edition working draft
11
 *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12
 *
13
 * Okay this is a big file, the parser core is around 7000 lines, then it
14
 * is followed by the progressive parser top routines, then the various
15
 * high level APIs to call the parser and a few miscellaneous functions.
16
 * A number of helper functions and deprecated ones have been moved to
17
 * parserInternals.c to reduce this file size.
18
 * As much as possible the functions are associated with their relative
19
 * production in the XML specification. A few productions defining the
20
 * different ranges of character are actually implanted either in
21
 * parserInternals.h or parserInternals.c
22
 * The DOM tree build is realized from the default SAX callbacks in
23
 * the module SAX.c.
24
 * The routines doing the validation checks are in valid.c and called either
25
 * from the SAX callbacks or as standalone functions using a preparsed
26
 * document.
27
 *
28
 * See Copyright for the status of this software.
29
 *
30
 * daniel@veillard.com
31
 */
32
33
/* To avoid EBCDIC trouble when parsing on zOS */
34
#if defined(__MVS__)
35
#pragma convert("ISO8859-1")
36
#endif
37
38
#define IN_LIBXML
39
#include "libxml.h"
40
41
#if defined(_WIN32)
42
#define XML_DIR_SEP '\\'
43
#else
44
#define XML_DIR_SEP '/'
45
#endif
46
47
#include <stdlib.h>
48
#include <limits.h>
49
#include <string.h>
50
#include <stdarg.h>
51
#include <stddef.h>
52
#include <ctype.h>
53
#include <stdlib.h>
54
#include <libxml/xmlmemory.h>
55
#include <libxml/threads.h>
56
#include <libxml/globals.h>
57
#include <libxml/tree.h>
58
#include <libxml/parser.h>
59
#include <libxml/parserInternals.h>
60
#include <libxml/HTMLparser.h>
61
#include <libxml/valid.h>
62
#include <libxml/entities.h>
63
#include <libxml/xmlerror.h>
64
#include <libxml/encoding.h>
65
#include <libxml/xmlIO.h>
66
#include <libxml/uri.h>
67
#ifdef LIBXML_CATALOG_ENABLED
68
#include <libxml/catalog.h>
69
#endif
70
#ifdef LIBXML_SCHEMAS_ENABLED
71
#include <libxml/xmlschemastypes.h>
72
#include <libxml/relaxng.h>
73
#endif
74
#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
75
#include <libxml/xpath.h>
76
#endif
77
78
#include "private/buf.h"
79
#include "private/dict.h"
80
#include "private/enc.h"
81
#include "private/entities.h"
82
#include "private/error.h"
83
#include "private/globals.h"
84
#include "private/html.h"
85
#include "private/io.h"
86
#include "private/memory.h"
87
#include "private/parser.h"
88
#include "private/threads.h"
89
#include "private/xpath.h"
90
91
struct _xmlStartTag {
92
    const xmlChar *prefix;
93
    const xmlChar *URI;
94
    int line;
95
    int nsNr;
96
};
97
98
static xmlParserCtxtPtr
99
xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
100
        const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
101
        xmlParserCtxtPtr pctx);
102
103
static int
104
xmlParseElementStart(xmlParserCtxtPtr ctxt);
105
106
static void
107
xmlParseElementEnd(xmlParserCtxtPtr ctxt);
108
109
/************************************************************************
110
 *                  *
111
 *  Arbitrary limits set in the parser. See XML_PARSE_HUGE    *
112
 *                  *
113
 ************************************************************************/
114
115
#define XML_PARSER_BIG_ENTITY 1000
116
#define XML_PARSER_LOT_ENTITY 5000
117
118
/*
119
 * Constants for protection against abusive entity expansion
120
 * ("billion laughs").
121
 */
122
123
/*
124
 * XML_PARSER_NON_LINEAR is roughly the maximum allowed amplification factor
125
 * of serialized output after entity expansion.
126
 */
127
0
#define XML_PARSER_NON_LINEAR 5
128
129
/*
130
 * A certain amount is always allowed.
131
 */
132
0
#define XML_PARSER_ALLOWED_EXPANSION 1000000
133
134
/*
135
 * Fixed cost for each entity reference. This crudely models processing time
136
 * as well to protect, for example, against exponential expansion of empty
137
 * or very short entities.
138
 */
139
0
#define XML_ENT_FIXED_COST 20
140
141
/**
142
 * xmlParserMaxDepth:
143
 *
144
 * arbitrary depth limit for the XML documents that we allow to
145
 * process. This is not a limitation of the parser but a safety
146
 * boundary feature. It can be disabled with the XML_PARSE_HUGE
147
 * parser option.
148
 */
149
unsigned int xmlParserMaxDepth = 256;
150
151
152
153
#define SAX2 1
154
5.29M
#define XML_PARSER_BIG_BUFFER_SIZE 300
155
5.15M
#define XML_PARSER_BUFFER_SIZE 100
156
11.7k
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
157
158
/**
159
 * XML_PARSER_CHUNK_SIZE
160
 *
161
 * When calling GROW that's the minimal amount of data
162
 * the parser expected to have received. It is not a hard
163
 * limit but an optimization when reading strings like Names
164
 * It is not strictly needed as long as inputs available characters
165
 * are followed by 0, which should be provided by the I/O level
166
 */
167
#define XML_PARSER_CHUNK_SIZE 100
168
169
/*
170
 * List of XML prefixed PI allowed by W3C specs
171
 */
172
173
static const char* const xmlW3CPIs[] = {
174
    "xml-stylesheet",
175
    "xml-model",
176
    NULL
177
};
178
179
180
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
181
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
182
                                              const xmlChar **str);
183
184
static xmlParserErrors
185
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
186
                xmlSAXHandlerPtr sax,
187
          void *user_data, int depth, const xmlChar *URL,
188
          const xmlChar *ID, xmlNodePtr *list);
189
190
static int
191
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
192
                          const char *encoding);
193
#ifdef LIBXML_LEGACY_ENABLED
194
static void
195
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
196
                      xmlNodePtr lastNode);
197
#endif /* LIBXML_LEGACY_ENABLED */
198
199
static xmlParserErrors
200
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
201
          const xmlChar *string, void *user_data, xmlNodePtr *lst);
202
203
static int
204
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
205
206
/************************************************************************
207
 *                  *
208
 *    Some factorized error routines        *
209
 *                  *
210
 ************************************************************************/
211
212
/**
213
 * xmlErrAttributeDup:
214
 * @ctxt:  an XML parser context
215
 * @prefix:  the attribute prefix
216
 * @localname:  the attribute localname
217
 *
218
 * Handle a redefinition of attribute error
219
 */
220
static void
221
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
222
                   const xmlChar * localname)
223
83.8k
{
224
83.8k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
225
83.8k
        (ctxt->instate == XML_PARSER_EOF))
226
0
  return;
227
83.8k
    if (ctxt != NULL)
228
83.8k
  ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
229
230
83.8k
    if (prefix == NULL)
231
63.2k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
232
63.2k
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
233
63.2k
                        (const char *) localname, NULL, NULL, 0, 0,
234
63.2k
                        "Attribute %s redefined\n", localname);
235
20.6k
    else
236
20.6k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
237
20.6k
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
238
20.6k
                        (const char *) prefix, (const char *) localname,
239
20.6k
                        NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
240
20.6k
                        localname);
241
83.8k
    if (ctxt != NULL) {
242
83.8k
  ctxt->wellFormed = 0;
243
83.8k
  if (ctxt->recovery == 0)
244
83.8k
      ctxt->disableSAX = 1;
245
83.8k
    }
246
83.8k
}
247
248
/**
249
 * xmlFatalErr:
250
 * @ctxt:  an XML parser context
251
 * @error:  the error number
252
 * @extra:  extra information string
253
 *
254
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
255
 */
256
static void
257
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
258
141k
{
259
141k
    const char *errmsg;
260
261
141k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
262
141k
        (ctxt->instate == XML_PARSER_EOF))
263
598
  return;
264
140k
    switch (error) {
265
5.36k
        case XML_ERR_INVALID_HEX_CHARREF:
266
5.36k
            errmsg = "CharRef: invalid hexadecimal value";
267
5.36k
            break;
268
38.9k
        case XML_ERR_INVALID_DEC_CHARREF:
269
38.9k
            errmsg = "CharRef: invalid decimal value";
270
38.9k
            break;
271
0
        case XML_ERR_INVALID_CHARREF:
272
0
            errmsg = "CharRef: invalid value";
273
0
            break;
274
1.83k
        case XML_ERR_INTERNAL_ERROR:
275
1.83k
            errmsg = "internal error";
276
1.83k
            break;
277
0
        case XML_ERR_PEREF_AT_EOF:
278
0
            errmsg = "PEReference at end of document";
279
0
            break;
280
0
        case XML_ERR_PEREF_IN_PROLOG:
281
0
            errmsg = "PEReference in prolog";
282
0
            break;
283
0
        case XML_ERR_PEREF_IN_EPILOG:
284
0
            errmsg = "PEReference in epilog";
285
0
            break;
286
0
        case XML_ERR_PEREF_NO_NAME:
287
0
            errmsg = "PEReference: no name";
288
0
            break;
289
1.54k
        case XML_ERR_PEREF_SEMICOL_MISSING:
290
1.54k
            errmsg = "PEReference: expecting ';'";
291
1.54k
            break;
292
0
        case XML_ERR_ENTITY_LOOP:
293
0
            errmsg = "Detected an entity reference loop";
294
0
            break;
295
0
        case XML_ERR_ENTITY_NOT_STARTED:
296
0
            errmsg = "EntityValue: \" or ' expected";
297
0
            break;
298
223
        case XML_ERR_ENTITY_PE_INTERNAL:
299
223
            errmsg = "PEReferences forbidden in internal subset";
300
223
            break;
301
109
        case XML_ERR_ENTITY_NOT_FINISHED:
302
109
            errmsg = "EntityValue: \" or ' expected";
303
109
            break;
304
218
        case XML_ERR_ATTRIBUTE_NOT_STARTED:
305
218
            errmsg = "AttValue: \" or ' expected";
306
218
            break;
307
647
        case XML_ERR_LT_IN_ATTRIBUTE:
308
647
            errmsg = "Unescaped '<' not allowed in attributes values";
309
647
            break;
310
276
        case XML_ERR_LITERAL_NOT_STARTED:
311
276
            errmsg = "SystemLiteral \" or ' expected";
312
276
            break;
313
395
        case XML_ERR_LITERAL_NOT_FINISHED:
314
395
            errmsg = "Unfinished System or Public ID \" or ' expected";
315
395
            break;
316
195
        case XML_ERR_MISPLACED_CDATA_END:
317
195
            errmsg = "Sequence ']]>' not allowed in content";
318
195
            break;
319
129
        case XML_ERR_URI_REQUIRED:
320
129
            errmsg = "SYSTEM or PUBLIC, the URI is missing";
321
129
            break;
322
147
        case XML_ERR_PUBID_REQUIRED:
323
147
            errmsg = "PUBLIC, the Public Identifier is missing";
324
147
            break;
325
2.10k
        case XML_ERR_HYPHEN_IN_COMMENT:
326
2.10k
            errmsg = "Comment must not contain '--' (double-hyphen)";
327
2.10k
            break;
328
217
        case XML_ERR_PI_NOT_STARTED:
329
217
            errmsg = "xmlParsePI : no target name";
330
217
            break;
331
353
        case XML_ERR_RESERVED_XML_NAME:
332
353
            errmsg = "Invalid PI name";
333
353
            break;
334
347
        case XML_ERR_NOTATION_NOT_STARTED:
335
347
            errmsg = "NOTATION: Name expected here";
336
347
            break;
337
1.21k
        case XML_ERR_NOTATION_NOT_FINISHED:
338
1.21k
            errmsg = "'>' required to close NOTATION declaration";
339
1.21k
            break;
340
682
        case XML_ERR_VALUE_REQUIRED:
341
682
            errmsg = "Entity value required";
342
682
            break;
343
1.06k
        case XML_ERR_URI_FRAGMENT:
344
1.06k
            errmsg = "Fragment not allowed";
345
1.06k
            break;
346
445
        case XML_ERR_ATTLIST_NOT_STARTED:
347
445
            errmsg = "'(' required to start ATTLIST enumeration";
348
445
            break;
349
43
        case XML_ERR_NMTOKEN_REQUIRED:
350
43
            errmsg = "NmToken expected in ATTLIST enumeration";
351
43
            break;
352
607
        case XML_ERR_ATTLIST_NOT_FINISHED:
353
607
            errmsg = "')' required to finish ATTLIST enumeration";
354
607
            break;
355
550
        case XML_ERR_MIXED_NOT_STARTED:
356
550
            errmsg = "MixedContentDecl : '|' or ')*' expected";
357
550
            break;
358
0
        case XML_ERR_PCDATA_REQUIRED:
359
0
            errmsg = "MixedContentDecl : '#PCDATA' expected";
360
0
            break;
361
577
        case XML_ERR_ELEMCONTENT_NOT_STARTED:
362
577
            errmsg = "ContentDecl : Name or '(' expected";
363
577
            break;
364
1.45k
        case XML_ERR_ELEMCONTENT_NOT_FINISHED:
365
1.45k
            errmsg = "ContentDecl : ',' '|' or ')' expected";
366
1.45k
            break;
367
0
        case XML_ERR_PEREF_IN_INT_SUBSET:
368
0
            errmsg =
369
0
                "PEReference: forbidden within markup decl in internal subset";
370
0
            break;
371
1.91k
        case XML_ERR_GT_REQUIRED:
372
1.91k
            errmsg = "expected '>'";
373
1.91k
            break;
374
0
        case XML_ERR_CONDSEC_INVALID:
375
0
            errmsg = "XML conditional section '[' expected";
376
0
            break;
377
0
        case XML_ERR_EXT_SUBSET_NOT_FINISHED:
378
0
            errmsg = "Content error in the external subset";
379
0
            break;
380
0
        case XML_ERR_CONDSEC_INVALID_KEYWORD:
381
0
            errmsg =
382
0
                "conditional section INCLUDE or IGNORE keyword expected";
383
0
            break;
384
0
        case XML_ERR_CONDSEC_NOT_FINISHED:
385
0
            errmsg = "XML conditional section not closed";
386
0
            break;
387
0
        case XML_ERR_XMLDECL_NOT_STARTED:
388
0
            errmsg = "Text declaration '<?xml' required";
389
0
            break;
390
385
        case XML_ERR_XMLDECL_NOT_FINISHED:
391
385
            errmsg = "parsing XML declaration: '?>' expected";
392
385
            break;
393
0
        case XML_ERR_EXT_ENTITY_STANDALONE:
394
0
            errmsg = "external parsed entities cannot be standalone";
395
0
            break;
396
76.7k
        case XML_ERR_ENTITYREF_SEMICOL_MISSING:
397
76.7k
            errmsg = "EntityRef: expecting ';'";
398
76.7k
            break;
399
136
        case XML_ERR_DOCTYPE_NOT_FINISHED:
400
136
            errmsg = "DOCTYPE improperly terminated";
401
136
            break;
402
0
        case XML_ERR_LTSLASH_REQUIRED:
403
0
            errmsg = "EndTag: '</' not found";
404
0
            break;
405
40
        case XML_ERR_EQUAL_REQUIRED:
406
40
            errmsg = "expected '='";
407
40
            break;
408
138
        case XML_ERR_STRING_NOT_CLOSED:
409
138
            errmsg = "String not closed expecting \" or '";
410
138
            break;
411
33
        case XML_ERR_STRING_NOT_STARTED:
412
33
            errmsg = "String not started expecting ' or \"";
413
33
            break;
414
4
        case XML_ERR_ENCODING_NAME:
415
4
            errmsg = "Invalid XML encoding name";
416
4
            break;
417
14
        case XML_ERR_STANDALONE_VALUE:
418
14
            errmsg = "standalone accepts only 'yes' or 'no'";
419
14
            break;
420
169
        case XML_ERR_DOCUMENT_EMPTY:
421
169
            errmsg = "Document is empty";
422
169
            break;
423
822
        case XML_ERR_DOCUMENT_END:
424
822
            errmsg = "Extra content at the end of the document";
425
822
            break;
426
0
        case XML_ERR_NOT_WELL_BALANCED:
427
0
            errmsg = "chunk is not well balanced";
428
0
            break;
429
0
        case XML_ERR_EXTRA_CONTENT:
430
0
            errmsg = "extra content at the end of well balanced chunk";
431
0
            break;
432
532
        case XML_ERR_VERSION_MISSING:
433
532
            errmsg = "Malformed declaration expecting version";
434
532
            break;
435
7
        case XML_ERR_NAME_TOO_LONG:
436
7
            errmsg = "Name too long";
437
7
            break;
438
#if 0
439
        case:
440
            errmsg = "";
441
            break;
442
#endif
443
0
        default:
444
0
            errmsg = "Unregistered error message";
445
140k
    }
446
140k
    if (ctxt != NULL)
447
140k
  ctxt->errNo = error;
448
140k
    if (info == NULL) {
449
138k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
450
138k
                        XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
451
138k
                        errmsg);
452
138k
    } else {
453
1.84k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
454
1.84k
                        XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
455
1.84k
                        errmsg, info);
456
1.84k
    }
457
140k
    if (ctxt != NULL) {
458
140k
  ctxt->wellFormed = 0;
459
140k
  if (ctxt->recovery == 0)
460
140k
      ctxt->disableSAX = 1;
461
140k
    }
462
140k
}
463
464
/**
465
 * xmlFatalErrMsg:
466
 * @ctxt:  an XML parser context
467
 * @error:  the error number
468
 * @msg:  the error message
469
 *
470
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
471
 */
472
static void LIBXML_ATTR_FORMAT(3,0)
473
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
474
               const char *msg)
475
465k
{
476
465k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
477
465k
        (ctxt->instate == XML_PARSER_EOF))
478
0
  return;
479
465k
    if (ctxt != NULL)
480
465k
  ctxt->errNo = error;
481
465k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
482
465k
                    XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
483
465k
    if (ctxt != NULL) {
484
465k
  ctxt->wellFormed = 0;
485
465k
  if (ctxt->recovery == 0)
486
465k
      ctxt->disableSAX = 1;
487
465k
    }
488
465k
}
489
490
/**
491
 * xmlWarningMsg:
492
 * @ctxt:  an XML parser context
493
 * @error:  the error number
494
 * @msg:  the error message
495
 * @str1:  extra data
496
 * @str2:  extra data
497
 *
498
 * Handle a warning.
499
 */
500
static void LIBXML_ATTR_FORMAT(3,0)
501
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
502
              const char *msg, const xmlChar *str1, const xmlChar *str2)
503
2.47k
{
504
2.47k
    xmlStructuredErrorFunc schannel = NULL;
505
506
2.47k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
507
2.47k
        (ctxt->instate == XML_PARSER_EOF))
508
0
  return;
509
2.47k
    if ((ctxt != NULL) && (ctxt->sax != NULL) &&
510
2.47k
        (ctxt->sax->initialized == XML_SAX2_MAGIC))
511
2.47k
        schannel = ctxt->sax->serror;
512
2.47k
    if (ctxt != NULL) {
513
2.47k
        __xmlRaiseError(schannel,
514
2.47k
                    (ctxt->sax) ? ctxt->sax->warning : NULL,
515
2.47k
                    ctxt->userData,
516
2.47k
                    ctxt, NULL, XML_FROM_PARSER, error,
517
2.47k
                    XML_ERR_WARNING, NULL, 0,
518
2.47k
        (const char *) str1, (const char *) str2, NULL, 0, 0,
519
2.47k
        msg, (const char *) str1, (const char *) str2);
520
2.47k
    } else {
521
0
        __xmlRaiseError(schannel, NULL, NULL,
522
0
                    ctxt, NULL, XML_FROM_PARSER, error,
523
0
                    XML_ERR_WARNING, NULL, 0,
524
0
        (const char *) str1, (const char *) str2, NULL, 0, 0,
525
0
        msg, (const char *) str1, (const char *) str2);
526
0
    }
527
2.47k
}
528
529
/**
530
 * xmlValidityError:
531
 * @ctxt:  an XML parser context
532
 * @error:  the error number
533
 * @msg:  the error message
534
 * @str1:  extra data
535
 *
536
 * Handle a validity error.
537
 */
538
static void LIBXML_ATTR_FORMAT(3,0)
539
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
540
              const char *msg, const xmlChar *str1, const xmlChar *str2)
541
8.64k
{
542
8.64k
    xmlStructuredErrorFunc schannel = NULL;
543
544
8.64k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
545
8.64k
        (ctxt->instate == XML_PARSER_EOF))
546
0
  return;
547
8.64k
    if (ctxt != NULL) {
548
8.64k
  ctxt->errNo = error;
549
8.64k
  if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
550
8.64k
      schannel = ctxt->sax->serror;
551
8.64k
    }
552
8.64k
    if (ctxt != NULL) {
553
8.64k
        __xmlRaiseError(schannel,
554
8.64k
                    ctxt->vctxt.error, ctxt->vctxt.userData,
555
8.64k
                    ctxt, NULL, XML_FROM_DTD, error,
556
8.64k
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
557
8.64k
        (const char *) str2, NULL, 0, 0,
558
8.64k
        msg, (const char *) str1, (const char *) str2);
559
8.64k
  ctxt->valid = 0;
560
8.64k
    } else {
561
0
        __xmlRaiseError(schannel, NULL, NULL,
562
0
                    ctxt, NULL, XML_FROM_DTD, error,
563
0
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
564
0
        (const char *) str2, NULL, 0, 0,
565
0
        msg, (const char *) str1, (const char *) str2);
566
0
    }
567
8.64k
}
568
569
/**
570
 * xmlFatalErrMsgInt:
571
 * @ctxt:  an XML parser context
572
 * @error:  the error number
573
 * @msg:  the error message
574
 * @val:  an integer value
575
 *
576
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
577
 */
578
static void LIBXML_ATTR_FORMAT(3,0)
579
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
580
                  const char *msg, int val)
581
51.3k
{
582
51.3k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
583
51.3k
        (ctxt->instate == XML_PARSER_EOF))
584
0
  return;
585
51.3k
    if (ctxt != NULL)
586
51.3k
  ctxt->errNo = error;
587
51.3k
    __xmlRaiseError(NULL, NULL, NULL,
588
51.3k
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
589
51.3k
                    NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
590
51.3k
    if (ctxt != NULL) {
591
51.3k
  ctxt->wellFormed = 0;
592
51.3k
  if (ctxt->recovery == 0)
593
51.3k
      ctxt->disableSAX = 1;
594
51.3k
    }
595
51.3k
}
596
597
/**
598
 * xmlFatalErrMsgStrIntStr:
599
 * @ctxt:  an XML parser context
600
 * @error:  the error number
601
 * @msg:  the error message
602
 * @str1:  an string info
603
 * @val:  an integer value
604
 * @str2:  an string info
605
 *
606
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
607
 */
608
static void LIBXML_ATTR_FORMAT(3,0)
609
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
610
                  const char *msg, const xmlChar *str1, int val,
611
      const xmlChar *str2)
612
164
{
613
164
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
614
164
        (ctxt->instate == XML_PARSER_EOF))
615
0
  return;
616
164
    if (ctxt != NULL)
617
164
  ctxt->errNo = error;
618
164
    __xmlRaiseError(NULL, NULL, NULL,
619
164
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
620
164
                    NULL, 0, (const char *) str1, (const char *) str2,
621
164
        NULL, val, 0, msg, str1, val, str2);
622
164
    if (ctxt != NULL) {
623
164
  ctxt->wellFormed = 0;
624
164
  if (ctxt->recovery == 0)
625
164
      ctxt->disableSAX = 1;
626
164
    }
627
164
}
628
629
/**
630
 * xmlFatalErrMsgStr:
631
 * @ctxt:  an XML parser context
632
 * @error:  the error number
633
 * @msg:  the error message
634
 * @val:  a string value
635
 *
636
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
637
 */
638
static void LIBXML_ATTR_FORMAT(3,0)
639
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
640
                  const char *msg, const xmlChar * val)
641
38.4k
{
642
38.4k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
643
38.4k
        (ctxt->instate == XML_PARSER_EOF))
644
0
  return;
645
38.4k
    if (ctxt != NULL)
646
38.4k
  ctxt->errNo = error;
647
38.4k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
648
38.4k
                    XML_FROM_PARSER, error, XML_ERR_FATAL,
649
38.4k
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
650
38.4k
                    val);
651
38.4k
    if (ctxt != NULL) {
652
38.4k
  ctxt->wellFormed = 0;
653
38.4k
  if (ctxt->recovery == 0)
654
38.4k
      ctxt->disableSAX = 1;
655
38.4k
    }
656
38.4k
}
657
658
/**
659
 * xmlErrMsgStr:
660
 * @ctxt:  an XML parser context
661
 * @error:  the error number
662
 * @msg:  the error message
663
 * @val:  a string value
664
 *
665
 * Handle a non fatal parser error
666
 */
667
static void LIBXML_ATTR_FORMAT(3,0)
668
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
669
                  const char *msg, const xmlChar * val)
670
3.04k
{
671
3.04k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
672
3.04k
        (ctxt->instate == XML_PARSER_EOF))
673
0
  return;
674
3.04k
    if (ctxt != NULL)
675
3.04k
  ctxt->errNo = error;
676
3.04k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
677
3.04k
                    XML_FROM_PARSER, error, XML_ERR_ERROR,
678
3.04k
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
679
3.04k
                    val);
680
3.04k
}
681
682
/**
683
 * xmlNsErr:
684
 * @ctxt:  an XML parser context
685
 * @error:  the error number
686
 * @msg:  the message
687
 * @info1:  extra information string
688
 * @info2:  extra information string
689
 *
690
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
691
 */
692
static void LIBXML_ATTR_FORMAT(3,0)
693
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
694
         const char *msg,
695
         const xmlChar * info1, const xmlChar * info2,
696
         const xmlChar * info3)
697
52.1k
{
698
52.1k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
699
52.1k
        (ctxt->instate == XML_PARSER_EOF))
700
0
  return;
701
52.1k
    if (ctxt != NULL)
702
52.1k
  ctxt->errNo = error;
703
52.1k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
704
52.1k
                    XML_ERR_ERROR, NULL, 0, (const char *) info1,
705
52.1k
                    (const char *) info2, (const char *) info3, 0, 0, msg,
706
52.1k
                    info1, info2, info3);
707
52.1k
    if (ctxt != NULL)
708
52.1k
  ctxt->nsWellFormed = 0;
709
52.1k
}
710
711
/**
712
 * xmlNsWarn
713
 * @ctxt:  an XML parser context
714
 * @error:  the error number
715
 * @msg:  the message
716
 * @info1:  extra information string
717
 * @info2:  extra information string
718
 *
719
 * Handle a namespace warning error
720
 */
721
static void LIBXML_ATTR_FORMAT(3,0)
722
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
723
         const char *msg,
724
         const xmlChar * info1, const xmlChar * info2,
725
         const xmlChar * info3)
726
2.97k
{
727
2.97k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
728
2.97k
        (ctxt->instate == XML_PARSER_EOF))
729
0
  return;
730
2.97k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
731
2.97k
                    XML_ERR_WARNING, NULL, 0, (const char *) info1,
732
2.97k
                    (const char *) info2, (const char *) info3, 0, 0, msg,
733
2.97k
                    info1, info2, info3);
734
2.97k
}
735
736
static void
737
0
xmlSaturatedAdd(unsigned long *dst, unsigned long val) {
738
0
    if (val > ULONG_MAX - *dst)
739
0
        *dst = ULONG_MAX;
740
0
    else
741
0
        *dst += val;
742
0
}
743
744
static void
745
0
xmlSaturatedAddSizeT(unsigned long *dst, unsigned long val) {
746
0
    if (val > ULONG_MAX - *dst)
747
0
        *dst = ULONG_MAX;
748
0
    else
749
0
        *dst += val;
750
0
}
751
752
/**
753
 * xmlParserEntityCheck:
754
 * @ctxt:  parser context
755
 * @extra:  sum of unexpanded entity sizes
756
 *
757
 * Check for non-linear entity expansion behaviour.
758
 *
759
 * In some cases like xmlStringDecodeEntities, this function is called
760
 * for each, possibly nested entity and its unexpanded content length.
761
 *
762
 * In other cases like xmlParseReference, it's only called for each
763
 * top-level entity with its unexpanded content length plus the sum of
764
 * the unexpanded content lengths (plus fixed cost) of all nested
765
 * entities.
766
 *
767
 * Summing the unexpanded lengths also adds the length of the reference.
768
 * This is by design. Taking the length of the entity name into account
769
 * discourages attacks that try to waste CPU time with abusively long
770
 * entity names. See test/recurse/lol6.xml for example. Each call also
771
 * adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with
772
 * short entities.
773
 *
774
 * Returns 1 on error, 0 on success.
775
 */
776
static int
777
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long extra)
778
0
{
779
0
    unsigned long consumed;
780
0
    xmlParserInputPtr input = ctxt->input;
781
0
    xmlEntityPtr entity = input->entity;
782
783
    /*
784
     * Compute total consumed bytes so far, including input streams of
785
     * external entities.
786
     */
787
0
    consumed = input->parentConsumed;
788
0
    if ((entity == NULL) ||
789
0
        ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
790
0
         ((entity->flags & XML_ENT_PARSED) == 0))) {
791
0
        xmlSaturatedAdd(&consumed, input->consumed);
792
0
        xmlSaturatedAddSizeT(&consumed, input->cur - input->base);
793
0
    }
794
0
    xmlSaturatedAdd(&consumed, ctxt->sizeentities);
795
796
    /*
797
     * Add extra cost and some fixed cost.
798
     */
799
0
    xmlSaturatedAdd(&ctxt->sizeentcopy, extra);
800
0
    xmlSaturatedAdd(&ctxt->sizeentcopy, XML_ENT_FIXED_COST);
801
802
    /*
803
     * It's important to always use saturation arithmetic when tracking
804
     * entity sizes to make the size checks reliable. If "sizeentcopy"
805
     * overflows, we have to abort.
806
     */
807
0
    if ((ctxt->sizeentcopy > XML_PARSER_ALLOWED_EXPANSION) &&
808
0
        ((ctxt->sizeentcopy >= ULONG_MAX) ||
809
0
         (ctxt->sizeentcopy / XML_PARSER_NON_LINEAR > consumed))) {
810
0
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP,
811
0
                       "Maximum entity amplification factor exceeded");
812
0
        xmlHaltParser(ctxt);
813
0
        return(1);
814
0
    }
815
816
0
    return(0);
817
0
}
818
819
/************************************************************************
820
 *                  *
821
 *    Library wide options          *
822
 *                  *
823
 ************************************************************************/
824
825
/**
826
  * xmlHasFeature:
827
  * @feature: the feature to be examined
828
  *
829
  * Examines if the library has been compiled with a given feature.
830
  *
831
  * Returns a non-zero value if the feature exist, otherwise zero.
832
  * Returns zero (0) if the feature does not exist or an unknown
833
  * unknown feature is requested, non-zero otherwise.
834
  */
835
int
836
xmlHasFeature(xmlFeature feature)
837
0
{
838
0
    switch (feature) {
839
0
  case XML_WITH_THREAD:
840
0
#ifdef LIBXML_THREAD_ENABLED
841
0
      return(1);
842
#else
843
      return(0);
844
#endif
845
0
        case XML_WITH_TREE:
846
0
#ifdef LIBXML_TREE_ENABLED
847
0
            return(1);
848
#else
849
            return(0);
850
#endif
851
0
        case XML_WITH_OUTPUT:
852
0
#ifdef LIBXML_OUTPUT_ENABLED
853
0
            return(1);
854
#else
855
            return(0);
856
#endif
857
0
        case XML_WITH_PUSH:
858
0
#ifdef LIBXML_PUSH_ENABLED
859
0
            return(1);
860
#else
861
            return(0);
862
#endif
863
0
        case XML_WITH_READER:
864
0
#ifdef LIBXML_READER_ENABLED
865
0
            return(1);
866
#else
867
            return(0);
868
#endif
869
0
        case XML_WITH_PATTERN:
870
0
#ifdef LIBXML_PATTERN_ENABLED
871
0
            return(1);
872
#else
873
            return(0);
874
#endif
875
0
        case XML_WITH_WRITER:
876
0
#ifdef LIBXML_WRITER_ENABLED
877
0
            return(1);
878
#else
879
            return(0);
880
#endif
881
0
        case XML_WITH_SAX1:
882
0
#ifdef LIBXML_SAX1_ENABLED
883
0
            return(1);
884
#else
885
            return(0);
886
#endif
887
0
        case XML_WITH_FTP:
888
#ifdef LIBXML_FTP_ENABLED
889
            return(1);
890
#else
891
0
            return(0);
892
0
#endif
893
0
        case XML_WITH_HTTP:
894
0
#ifdef LIBXML_HTTP_ENABLED
895
0
            return(1);
896
#else
897
            return(0);
898
#endif
899
0
        case XML_WITH_VALID:
900
0
#ifdef LIBXML_VALID_ENABLED
901
0
            return(1);
902
#else
903
            return(0);
904
#endif
905
0
        case XML_WITH_HTML:
906
0
#ifdef LIBXML_HTML_ENABLED
907
0
            return(1);
908
#else
909
            return(0);
910
#endif
911
0
        case XML_WITH_LEGACY:
912
#ifdef LIBXML_LEGACY_ENABLED
913
            return(1);
914
#else
915
0
            return(0);
916
0
#endif
917
0
        case XML_WITH_C14N:
918
0
#ifdef LIBXML_C14N_ENABLED
919
0
            return(1);
920
#else
921
            return(0);
922
#endif
923
0
        case XML_WITH_CATALOG:
924
0
#ifdef LIBXML_CATALOG_ENABLED
925
0
            return(1);
926
#else
927
            return(0);
928
#endif
929
0
        case XML_WITH_XPATH:
930
0
#ifdef LIBXML_XPATH_ENABLED
931
0
            return(1);
932
#else
933
            return(0);
934
#endif
935
0
        case XML_WITH_XPTR:
936
0
#ifdef LIBXML_XPTR_ENABLED
937
0
            return(1);
938
#else
939
            return(0);
940
#endif
941
0
        case XML_WITH_XINCLUDE:
942
0
#ifdef LIBXML_XINCLUDE_ENABLED
943
0
            return(1);
944
#else
945
            return(0);
946
#endif
947
0
        case XML_WITH_ICONV:
948
0
#ifdef LIBXML_ICONV_ENABLED
949
0
            return(1);
950
#else
951
            return(0);
952
#endif
953
0
        case XML_WITH_ISO8859X:
954
0
#ifdef LIBXML_ISO8859X_ENABLED
955
0
            return(1);
956
#else
957
            return(0);
958
#endif
959
0
        case XML_WITH_UNICODE:
960
0
#ifdef LIBXML_UNICODE_ENABLED
961
0
            return(1);
962
#else
963
            return(0);
964
#endif
965
0
        case XML_WITH_REGEXP:
966
0
#ifdef LIBXML_REGEXP_ENABLED
967
0
            return(1);
968
#else
969
            return(0);
970
#endif
971
0
        case XML_WITH_AUTOMATA:
972
0
#ifdef LIBXML_AUTOMATA_ENABLED
973
0
            return(1);
974
#else
975
            return(0);
976
#endif
977
0
        case XML_WITH_EXPR:
978
#ifdef LIBXML_EXPR_ENABLED
979
            return(1);
980
#else
981
0
            return(0);
982
0
#endif
983
0
        case XML_WITH_SCHEMAS:
984
0
#ifdef LIBXML_SCHEMAS_ENABLED
985
0
            return(1);
986
#else
987
            return(0);
988
#endif
989
0
        case XML_WITH_SCHEMATRON:
990
0
#ifdef LIBXML_SCHEMATRON_ENABLED
991
0
            return(1);
992
#else
993
            return(0);
994
#endif
995
0
        case XML_WITH_MODULES:
996
#ifdef LIBXML_MODULES_ENABLED
997
            return(1);
998
#else
999
0
            return(0);
1000
0
#endif
1001
0
        case XML_WITH_DEBUG:
1002
0
#ifdef LIBXML_DEBUG_ENABLED
1003
0
            return(1);
1004
#else
1005
            return(0);
1006
#endif
1007
0
        case XML_WITH_DEBUG_MEM:
1008
#ifdef DEBUG_MEMORY_LOCATION
1009
            return(1);
1010
#else
1011
0
            return(0);
1012
0
#endif
1013
0
        case XML_WITH_DEBUG_RUN:
1014
0
            return(0);
1015
0
        case XML_WITH_ZLIB:
1016
#ifdef LIBXML_ZLIB_ENABLED
1017
            return(1);
1018
#else
1019
0
            return(0);
1020
0
#endif
1021
0
        case XML_WITH_LZMA:
1022
#ifdef LIBXML_LZMA_ENABLED
1023
            return(1);
1024
#else
1025
0
            return(0);
1026
0
#endif
1027
0
        case XML_WITH_ICU:
1028
#ifdef LIBXML_ICU_ENABLED
1029
            return(1);
1030
#else
1031
0
            return(0);
1032
0
#endif
1033
0
        default:
1034
0
      break;
1035
0
     }
1036
0
     return(0);
1037
0
}
1038
1039
/************************************************************************
1040
 *                  *
1041
 *    SAX2 defaulted attributes handling      *
1042
 *                  *
1043
 ************************************************************************/
1044
1045
/**
1046
 * xmlDetectSAX2:
1047
 * @ctxt:  an XML parser context
1048
 *
1049
 * Do the SAX2 detection and specific initialization
1050
 */
1051
static void
1052
12.9k
xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1053
12.9k
    xmlSAXHandlerPtr sax;
1054
1055
    /* Avoid unused variable warning if features are disabled. */
1056
12.9k
    (void) sax;
1057
1058
12.9k
    if (ctxt == NULL) return;
1059
12.9k
    sax = ctxt->sax;
1060
12.9k
#ifdef LIBXML_SAX1_ENABLED
1061
12.9k
    if ((sax) &&  (sax->initialized == XML_SAX2_MAGIC) &&
1062
12.9k
        ((sax->startElementNs != NULL) ||
1063
12.9k
         (sax->endElementNs != NULL) ||
1064
12.9k
         ((sax->startElement == NULL) && (sax->endElement == NULL))))
1065
12.9k
        ctxt->sax2 = 1;
1066
#else
1067
    ctxt->sax2 = 1;
1068
#endif /* LIBXML_SAX1_ENABLED */
1069
1070
12.9k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1071
12.9k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1072
12.9k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1073
12.9k
    if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1074
12.9k
    (ctxt->str_xml_ns == NULL)) {
1075
0
        xmlErrMemory(ctxt, NULL);
1076
0
    }
1077
12.9k
}
1078
1079
typedef struct _xmlDefAttrs xmlDefAttrs;
1080
typedef xmlDefAttrs *xmlDefAttrsPtr;
1081
struct _xmlDefAttrs {
1082
    int nbAttrs;  /* number of defaulted attributes on that element */
1083
    int maxAttrs;       /* the size of the array */
1084
#if __STDC_VERSION__ >= 199901L
1085
    /* Using a C99 flexible array member avoids UBSan errors. */
1086
    const xmlChar *values[]; /* array of localname/prefix/values/external */
1087
#else
1088
    const xmlChar *values[5];
1089
#endif
1090
};
1091
1092
/**
1093
 * xmlAttrNormalizeSpace:
1094
 * @src: the source string
1095
 * @dst: the target string
1096
 *
1097
 * Normalize the space in non CDATA attribute values:
1098
 * If the attribute type is not CDATA, then the XML processor MUST further
1099
 * process the normalized attribute value by discarding any leading and
1100
 * trailing space (#x20) characters, and by replacing sequences of space
1101
 * (#x20) characters by a single space (#x20) character.
1102
 * Note that the size of dst need to be at least src, and if one doesn't need
1103
 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1104
 * passing src as dst is just fine.
1105
 *
1106
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1107
 *         is needed.
1108
 */
1109
static xmlChar *
1110
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1111
13.4k
{
1112
13.4k
    if ((src == NULL) || (dst == NULL))
1113
0
        return(NULL);
1114
1115
16.1k
    while (*src == 0x20) src++;
1116
101k
    while (*src != 0) {
1117
88.4k
  if (*src == 0x20) {
1118
16.2k
      while (*src == 0x20) src++;
1119
3.60k
      if (*src != 0)
1120
2.48k
    *dst++ = 0x20;
1121
84.8k
  } else {
1122
84.8k
      *dst++ = *src++;
1123
84.8k
  }
1124
88.4k
    }
1125
13.4k
    *dst = 0;
1126
13.4k
    if (dst == src)
1127
11.1k
       return(NULL);
1128
2.31k
    return(dst);
1129
13.4k
}
1130
1131
/**
1132
 * xmlAttrNormalizeSpace2:
1133
 * @src: the source string
1134
 *
1135
 * Normalize the space in non CDATA attribute values, a slightly more complex
1136
 * front end to avoid allocation problems when running on attribute values
1137
 * coming from the input.
1138
 *
1139
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1140
 *         is needed.
1141
 */
1142
static const xmlChar *
1143
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1144
2.09k
{
1145
2.09k
    int i;
1146
2.09k
    int remove_head = 0;
1147
2.09k
    int need_realloc = 0;
1148
2.09k
    const xmlChar *cur;
1149
1150
2.09k
    if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1151
0
        return(NULL);
1152
2.09k
    i = *len;
1153
2.09k
    if (i <= 0)
1154
72
        return(NULL);
1155
1156
2.02k
    cur = src;
1157
2.17k
    while (*cur == 0x20) {
1158
150
        cur++;
1159
150
  remove_head++;
1160
150
    }
1161
173k
    while (*cur != 0) {
1162
171k
  if (*cur == 0x20) {
1163
1.58k
      cur++;
1164
1.58k
      if ((*cur == 0x20) || (*cur == 0)) {
1165
176
          need_realloc = 1;
1166
176
    break;
1167
176
      }
1168
1.58k
  } else
1169
169k
      cur++;
1170
171k
    }
1171
2.02k
    if (need_realloc) {
1172
176
        xmlChar *ret;
1173
1174
176
  ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1175
176
  if (ret == NULL) {
1176
0
      xmlErrMemory(ctxt, NULL);
1177
0
      return(NULL);
1178
0
  }
1179
176
  xmlAttrNormalizeSpace(ret, ret);
1180
176
  *len = strlen((const char *)ret);
1181
176
        return(ret);
1182
1.84k
    } else if (remove_head) {
1183
81
        *len -= remove_head;
1184
81
        memmove(src, src + remove_head, 1 + *len);
1185
81
  return(src);
1186
81
    }
1187
1.76k
    return(NULL);
1188
2.02k
}
1189
1190
/**
1191
 * xmlAddDefAttrs:
1192
 * @ctxt:  an XML parser context
1193
 * @fullname:  the element fullname
1194
 * @fullattr:  the attribute fullname
1195
 * @value:  the attribute value
1196
 *
1197
 * Add a defaulted attribute for an element
1198
 */
1199
static void
1200
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1201
               const xmlChar *fullname,
1202
               const xmlChar *fullattr,
1203
13.3k
               const xmlChar *value) {
1204
13.3k
    xmlDefAttrsPtr defaults;
1205
13.3k
    int len;
1206
13.3k
    const xmlChar *name;
1207
13.3k
    const xmlChar *prefix;
1208
1209
    /*
1210
     * Allows to detect attribute redefinitions
1211
     */
1212
13.3k
    if (ctxt->attsSpecial != NULL) {
1213
12.8k
        if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1214
2.72k
      return;
1215
12.8k
    }
1216
1217
10.6k
    if (ctxt->attsDefault == NULL) {
1218
532
        ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1219
532
  if (ctxt->attsDefault == NULL)
1220
0
      goto mem_error;
1221
532
    }
1222
1223
    /*
1224
     * split the element name into prefix:localname , the string found
1225
     * are within the DTD and then not associated to namespace names.
1226
     */
1227
10.6k
    name = xmlSplitQName3(fullname, &len);
1228
10.6k
    if (name == NULL) {
1229
9.08k
        name = xmlDictLookup(ctxt->dict, fullname, -1);
1230
9.08k
  prefix = NULL;
1231
9.08k
    } else {
1232
1.53k
        name = xmlDictLookup(ctxt->dict, name, -1);
1233
1.53k
  prefix = xmlDictLookup(ctxt->dict, fullname, len);
1234
1.53k
    }
1235
1236
    /*
1237
     * make sure there is some storage
1238
     */
1239
10.6k
    defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1240
10.6k
    if (defaults == NULL) {
1241
2.01k
        defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1242
2.01k
                     (4 * 5) * sizeof(const xmlChar *));
1243
2.01k
  if (defaults == NULL)
1244
0
      goto mem_error;
1245
2.01k
  defaults->nbAttrs = 0;
1246
2.01k
  defaults->maxAttrs = 4;
1247
2.01k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1248
2.01k
                          defaults, NULL) < 0) {
1249
0
      xmlFree(defaults);
1250
0
      goto mem_error;
1251
0
  }
1252
8.60k
    } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1253
1.37k
        xmlDefAttrsPtr temp;
1254
1255
1.37k
        temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1256
1.37k
           (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1257
1.37k
  if (temp == NULL)
1258
0
      goto mem_error;
1259
1.37k
  defaults = temp;
1260
1.37k
  defaults->maxAttrs *= 2;
1261
1.37k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1262
1.37k
                          defaults, NULL) < 0) {
1263
0
      xmlFree(defaults);
1264
0
      goto mem_error;
1265
0
  }
1266
1.37k
    }
1267
1268
    /*
1269
     * Split the element name into prefix:localname , the string found
1270
     * are within the DTD and hen not associated to namespace names.
1271
     */
1272
10.6k
    name = xmlSplitQName3(fullattr, &len);
1273
10.6k
    if (name == NULL) {
1274
7.35k
        name = xmlDictLookup(ctxt->dict, fullattr, -1);
1275
7.35k
  prefix = NULL;
1276
7.35k
    } else {
1277
3.26k
        name = xmlDictLookup(ctxt->dict, name, -1);
1278
3.26k
  prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1279
3.26k
    }
1280
1281
10.6k
    defaults->values[5 * defaults->nbAttrs] = name;
1282
10.6k
    defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1283
    /* intern the string and precompute the end */
1284
10.6k
    len = xmlStrlen(value);
1285
10.6k
    value = xmlDictLookup(ctxt->dict, value, len);
1286
10.6k
    if (value == NULL)
1287
0
        goto mem_error;
1288
10.6k
    defaults->values[5 * defaults->nbAttrs + 2] = value;
1289
10.6k
    defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1290
10.6k
    if (ctxt->external)
1291
0
        defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1292
10.6k
    else
1293
10.6k
        defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1294
10.6k
    defaults->nbAttrs++;
1295
1296
10.6k
    return;
1297
1298
0
mem_error:
1299
0
    xmlErrMemory(ctxt, NULL);
1300
0
    return;
1301
10.6k
}
1302
1303
/**
1304
 * xmlAddSpecialAttr:
1305
 * @ctxt:  an XML parser context
1306
 * @fullname:  the element fullname
1307
 * @fullattr:  the attribute fullname
1308
 * @type:  the attribute type
1309
 *
1310
 * Register this attribute type
1311
 */
1312
static void
1313
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1314
      const xmlChar *fullname,
1315
      const xmlChar *fullattr,
1316
      int type)
1317
14.8k
{
1318
14.8k
    if (ctxt->attsSpecial == NULL) {
1319
674
        ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1320
674
  if (ctxt->attsSpecial == NULL)
1321
0
      goto mem_error;
1322
674
    }
1323
1324
14.8k
    if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1325
3.21k
        return;
1326
1327
11.6k
    xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1328
11.6k
                     (void *) (ptrdiff_t) type);
1329
11.6k
    return;
1330
1331
0
mem_error:
1332
0
    xmlErrMemory(ctxt, NULL);
1333
0
    return;
1334
14.8k
}
1335
1336
/**
1337
 * xmlCleanSpecialAttrCallback:
1338
 *
1339
 * Removes CDATA attributes from the special attribute table
1340
 */
1341
static void
1342
xmlCleanSpecialAttrCallback(void *payload, void *data,
1343
                            const xmlChar *fullname, const xmlChar *fullattr,
1344
2.51k
                            const xmlChar *unused ATTRIBUTE_UNUSED) {
1345
2.51k
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1346
1347
2.51k
    if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1348
129
        xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1349
129
    }
1350
2.51k
}
1351
1352
/**
1353
 * xmlCleanSpecialAttr:
1354
 * @ctxt:  an XML parser context
1355
 *
1356
 * Trim the list of attributes defined to remove all those of type
1357
 * CDATA as they are not special. This call should be done when finishing
1358
 * to parse the DTD and before starting to parse the document root.
1359
 */
1360
static void
1361
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1362
580
{
1363
580
    if (ctxt->attsSpecial == NULL)
1364
117
        return;
1365
1366
463
    xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1367
1368
463
    if (xmlHashSize(ctxt->attsSpecial) == 0) {
1369
10
        xmlHashFree(ctxt->attsSpecial, NULL);
1370
10
        ctxt->attsSpecial = NULL;
1371
10
    }
1372
463
    return;
1373
580
}
1374
1375
/**
1376
 * xmlCheckLanguageID:
1377
 * @lang:  pointer to the string value
1378
 *
1379
 * DEPRECATED: Internal function, do not use.
1380
 *
1381
 * Checks that the value conforms to the LanguageID production:
1382
 *
1383
 * NOTE: this is somewhat deprecated, those productions were removed from
1384
 *       the XML Second edition.
1385
 *
1386
 * [33] LanguageID ::= Langcode ('-' Subcode)*
1387
 * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1388
 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1389
 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1390
 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1391
 * [38] Subcode ::= ([a-z] | [A-Z])+
1392
 *
1393
 * The current REC reference the successors of RFC 1766, currently 5646
1394
 *
1395
 * http://www.rfc-editor.org/rfc/rfc5646.txt
1396
 * langtag       = language
1397
 *                 ["-" script]
1398
 *                 ["-" region]
1399
 *                 *("-" variant)
1400
 *                 *("-" extension)
1401
 *                 ["-" privateuse]
1402
 * language      = 2*3ALPHA            ; shortest ISO 639 code
1403
 *                 ["-" extlang]       ; sometimes followed by
1404
 *                                     ; extended language subtags
1405
 *               / 4ALPHA              ; or reserved for future use
1406
 *               / 5*8ALPHA            ; or registered language subtag
1407
 *
1408
 * extlang       = 3ALPHA              ; selected ISO 639 codes
1409
 *                 *2("-" 3ALPHA)      ; permanently reserved
1410
 *
1411
 * script        = 4ALPHA              ; ISO 15924 code
1412
 *
1413
 * region        = 2ALPHA              ; ISO 3166-1 code
1414
 *               / 3DIGIT              ; UN M.49 code
1415
 *
1416
 * variant       = 5*8alphanum         ; registered variants
1417
 *               / (DIGIT 3alphanum)
1418
 *
1419
 * extension     = singleton 1*("-" (2*8alphanum))
1420
 *
1421
 *                                     ; Single alphanumerics
1422
 *                                     ; "x" reserved for private use
1423
 * singleton     = DIGIT               ; 0 - 9
1424
 *               / %x41-57             ; A - W
1425
 *               / %x59-5A             ; Y - Z
1426
 *               / %x61-77             ; a - w
1427
 *               / %x79-7A             ; y - z
1428
 *
1429
 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1430
 * The parser below doesn't try to cope with extension or privateuse
1431
 * that could be added but that's not interoperable anyway
1432
 *
1433
 * Returns 1 if correct 0 otherwise
1434
 **/
1435
int
1436
xmlCheckLanguageID(const xmlChar * lang)
1437
0
{
1438
0
    const xmlChar *cur = lang, *nxt;
1439
1440
0
    if (cur == NULL)
1441
0
        return (0);
1442
0
    if (((cur[0] == 'i') && (cur[1] == '-')) ||
1443
0
        ((cur[0] == 'I') && (cur[1] == '-')) ||
1444
0
        ((cur[0] == 'x') && (cur[1] == '-')) ||
1445
0
        ((cur[0] == 'X') && (cur[1] == '-'))) {
1446
        /*
1447
         * Still allow IANA code and user code which were coming
1448
         * from the previous version of the XML-1.0 specification
1449
         * it's deprecated but we should not fail
1450
         */
1451
0
        cur += 2;
1452
0
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1453
0
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
1454
0
            cur++;
1455
0
        return(cur[0] == 0);
1456
0
    }
1457
0
    nxt = cur;
1458
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1459
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1460
0
           nxt++;
1461
0
    if (nxt - cur >= 4) {
1462
        /*
1463
         * Reserved
1464
         */
1465
0
        if ((nxt - cur > 8) || (nxt[0] != 0))
1466
0
            return(0);
1467
0
        return(1);
1468
0
    }
1469
0
    if (nxt - cur < 2)
1470
0
        return(0);
1471
    /* we got an ISO 639 code */
1472
0
    if (nxt[0] == 0)
1473
0
        return(1);
1474
0
    if (nxt[0] != '-')
1475
0
        return(0);
1476
1477
0
    nxt++;
1478
0
    cur = nxt;
1479
    /* now we can have extlang or script or region or variant */
1480
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1481
0
        goto region_m49;
1482
1483
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1484
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1485
0
           nxt++;
1486
0
    if (nxt - cur == 4)
1487
0
        goto script;
1488
0
    if (nxt - cur == 2)
1489
0
        goto region;
1490
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1491
0
        goto variant;
1492
0
    if (nxt - cur != 3)
1493
0
        return(0);
1494
    /* we parsed an extlang */
1495
0
    if (nxt[0] == 0)
1496
0
        return(1);
1497
0
    if (nxt[0] != '-')
1498
0
        return(0);
1499
1500
0
    nxt++;
1501
0
    cur = nxt;
1502
    /* now we can have script or region or variant */
1503
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1504
0
        goto region_m49;
1505
1506
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1507
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1508
0
           nxt++;
1509
0
    if (nxt - cur == 2)
1510
0
        goto region;
1511
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1512
0
        goto variant;
1513
0
    if (nxt - cur != 4)
1514
0
        return(0);
1515
    /* we parsed a script */
1516
0
script:
1517
0
    if (nxt[0] == 0)
1518
0
        return(1);
1519
0
    if (nxt[0] != '-')
1520
0
        return(0);
1521
1522
0
    nxt++;
1523
0
    cur = nxt;
1524
    /* now we can have region or variant */
1525
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1526
0
        goto region_m49;
1527
1528
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1529
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1530
0
           nxt++;
1531
1532
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1533
0
        goto variant;
1534
0
    if (nxt - cur != 2)
1535
0
        return(0);
1536
    /* we parsed a region */
1537
0
region:
1538
0
    if (nxt[0] == 0)
1539
0
        return(1);
1540
0
    if (nxt[0] != '-')
1541
0
        return(0);
1542
1543
0
    nxt++;
1544
0
    cur = nxt;
1545
    /* now we can just have a variant */
1546
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1547
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1548
0
           nxt++;
1549
1550
0
    if ((nxt - cur < 5) || (nxt - cur > 8))
1551
0
        return(0);
1552
1553
    /* we parsed a variant */
1554
0
variant:
1555
0
    if (nxt[0] == 0)
1556
0
        return(1);
1557
0
    if (nxt[0] != '-')
1558
0
        return(0);
1559
    /* extensions and private use subtags not checked */
1560
0
    return (1);
1561
1562
0
region_m49:
1563
0
    if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1564
0
        ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1565
0
        nxt += 3;
1566
0
        goto region;
1567
0
    }
1568
0
    return(0);
1569
0
}
1570
1571
/************************************************************************
1572
 *                  *
1573
 *    Parser stacks related functions and macros    *
1574
 *                  *
1575
 ************************************************************************/
1576
1577
static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1578
                                            const xmlChar ** str);
1579
1580
#ifdef SAX2
1581
/**
1582
 * nsPush:
1583
 * @ctxt:  an XML parser context
1584
 * @prefix:  the namespace prefix or NULL
1585
 * @URL:  the namespace name
1586
 *
1587
 * Pushes a new parser namespace on top of the ns stack
1588
 *
1589
 * Returns -1 in case of error, -2 if the namespace should be discarded
1590
 *     and the index in the stack otherwise.
1591
 */
1592
static int
1593
nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1594
11.7k
{
1595
11.7k
    if (ctxt->options & XML_PARSE_NSCLEAN) {
1596
11.7k
        int i;
1597
112k
  for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1598
105k
      if (ctxt->nsTab[i] == prefix) {
1599
    /* in scope */
1600
4.45k
          if (ctxt->nsTab[i + 1] == URL)
1601
2.66k
        return(-2);
1602
    /* out of scope keep it */
1603
1.78k
    break;
1604
4.45k
      }
1605
105k
  }
1606
11.7k
    }
1607
9.06k
    if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1608
2.53k
  ctxt->nsMax = 10;
1609
2.53k
  ctxt->nsNr = 0;
1610
2.53k
  ctxt->nsTab = (const xmlChar **)
1611
2.53k
                xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1612
2.53k
  if (ctxt->nsTab == NULL) {
1613
0
      xmlErrMemory(ctxt, NULL);
1614
0
      ctxt->nsMax = 0;
1615
0
            return (-1);
1616
0
  }
1617
6.52k
    } else if (ctxt->nsNr >= ctxt->nsMax) {
1618
163
        const xmlChar ** tmp;
1619
163
        ctxt->nsMax *= 2;
1620
163
        tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1621
163
            ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1622
163
        if (tmp == NULL) {
1623
0
            xmlErrMemory(ctxt, NULL);
1624
0
      ctxt->nsMax /= 2;
1625
0
            return (-1);
1626
0
        }
1627
163
  ctxt->nsTab = tmp;
1628
163
    }
1629
9.06k
    ctxt->nsTab[ctxt->nsNr++] = prefix;
1630
9.06k
    ctxt->nsTab[ctxt->nsNr++] = URL;
1631
9.06k
    return (ctxt->nsNr);
1632
9.06k
}
1633
/**
1634
 * nsPop:
1635
 * @ctxt: an XML parser context
1636
 * @nr:  the number to pop
1637
 *
1638
 * Pops the top @nr parser prefix/namespace from the ns stack
1639
 *
1640
 * Returns the number of namespaces removed
1641
 */
1642
static int
1643
nsPop(xmlParserCtxtPtr ctxt, int nr)
1644
1.27k
{
1645
1.27k
    int i;
1646
1647
1.27k
    if (ctxt->nsTab == NULL) return(0);
1648
1.27k
    if (ctxt->nsNr < nr) {
1649
0
        xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1650
0
        nr = ctxt->nsNr;
1651
0
    }
1652
1.27k
    if (ctxt->nsNr <= 0)
1653
0
        return (0);
1654
1655
9.23k
    for (i = 0;i < nr;i++) {
1656
7.96k
         ctxt->nsNr--;
1657
7.96k
   ctxt->nsTab[ctxt->nsNr] = NULL;
1658
7.96k
    }
1659
1.27k
    return(nr);
1660
1.27k
}
1661
#endif
1662
1663
static int
1664
2.35k
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1665
2.35k
    const xmlChar **atts;
1666
2.35k
    int *attallocs;
1667
2.35k
    int maxatts;
1668
1669
2.35k
    if (nr + 5 > ctxt->maxatts) {
1670
2.35k
  maxatts = ctxt->maxatts == 0 ? 55 : (nr + 5) * 2;
1671
2.35k
  atts = (const xmlChar **) xmlMalloc(
1672
2.35k
             maxatts * sizeof(const xmlChar *));
1673
2.35k
  if (atts == NULL) goto mem_error;
1674
2.35k
  attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1675
2.35k
                               (maxatts / 5) * sizeof(int));
1676
2.35k
  if (attallocs == NULL) {
1677
0
            xmlFree(atts);
1678
0
            goto mem_error;
1679
0
        }
1680
2.35k
        if (ctxt->maxatts > 0)
1681
252
            memcpy(atts, ctxt->atts, ctxt->maxatts * sizeof(const xmlChar *));
1682
2.35k
        xmlFree(ctxt->atts);
1683
2.35k
  ctxt->atts = atts;
1684
2.35k
  ctxt->attallocs = attallocs;
1685
2.35k
  ctxt->maxatts = maxatts;
1686
2.35k
    }
1687
2.35k
    return(ctxt->maxatts);
1688
0
mem_error:
1689
0
    xmlErrMemory(ctxt, NULL);
1690
0
    return(-1);
1691
2.35k
}
1692
1693
/**
1694
 * inputPush:
1695
 * @ctxt:  an XML parser context
1696
 * @value:  the parser input
1697
 *
1698
 * Pushes a new parser input on top of the input stack
1699
 *
1700
 * Returns -1 in case of error, the index in the stack otherwise
1701
 */
1702
int
1703
inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1704
9.88k
{
1705
9.88k
    if ((ctxt == NULL) || (value == NULL))
1706
0
        return(-1);
1707
9.88k
    if (ctxt->inputNr >= ctxt->inputMax) {
1708
0
        size_t newSize = ctxt->inputMax * 2;
1709
0
        xmlParserInputPtr *tmp;
1710
1711
0
        tmp = (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1712
0
                                               newSize * sizeof(*tmp));
1713
0
        if (tmp == NULL) {
1714
0
            xmlErrMemory(ctxt, NULL);
1715
0
            return (-1);
1716
0
        }
1717
0
        ctxt->inputTab = tmp;
1718
0
        ctxt->inputMax = newSize;
1719
0
    }
1720
9.88k
    ctxt->inputTab[ctxt->inputNr] = value;
1721
9.88k
    ctxt->input = value;
1722
9.88k
    return (ctxt->inputNr++);
1723
9.88k
}
1724
/**
1725
 * inputPop:
1726
 * @ctxt: an XML parser context
1727
 *
1728
 * Pops the top parser input from the input stack
1729
 *
1730
 * Returns the input just removed
1731
 */
1732
xmlParserInputPtr
1733
inputPop(xmlParserCtxtPtr ctxt)
1734
29.6k
{
1735
29.6k
    xmlParserInputPtr ret;
1736
1737
29.6k
    if (ctxt == NULL)
1738
0
        return(NULL);
1739
29.6k
    if (ctxt->inputNr <= 0)
1740
19.7k
        return (NULL);
1741
9.88k
    ctxt->inputNr--;
1742
9.88k
    if (ctxt->inputNr > 0)
1743
0
        ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1744
9.88k
    else
1745
9.88k
        ctxt->input = NULL;
1746
9.88k
    ret = ctxt->inputTab[ctxt->inputNr];
1747
9.88k
    ctxt->inputTab[ctxt->inputNr] = NULL;
1748
9.88k
    return (ret);
1749
29.6k
}
1750
/**
1751
 * nodePush:
1752
 * @ctxt:  an XML parser context
1753
 * @value:  the element node
1754
 *
1755
 * DEPRECATED: Internal function, do not use.
1756
 *
1757
 * Pushes a new element node on top of the node stack
1758
 *
1759
 * Returns -1 in case of error, the index in the stack otherwise
1760
 */
1761
int
1762
nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1763
0
{
1764
0
    if (ctxt == NULL) return(0);
1765
0
    if (ctxt->nodeNr >= ctxt->nodeMax) {
1766
0
        xmlNodePtr *tmp;
1767
1768
0
  tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1769
0
                                      ctxt->nodeMax * 2 *
1770
0
                                      sizeof(ctxt->nodeTab[0]));
1771
0
        if (tmp == NULL) {
1772
0
            xmlErrMemory(ctxt, NULL);
1773
0
            return (-1);
1774
0
        }
1775
0
        ctxt->nodeTab = tmp;
1776
0
  ctxt->nodeMax *= 2;
1777
0
    }
1778
0
    if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1779
0
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1780
0
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1781
0
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1782
0
        xmlParserMaxDepth);
1783
0
  xmlHaltParser(ctxt);
1784
0
  return(-1);
1785
0
    }
1786
0
    ctxt->nodeTab[ctxt->nodeNr] = value;
1787
0
    ctxt->node = value;
1788
0
    return (ctxt->nodeNr++);
1789
0
}
1790
1791
/**
1792
 * nodePop:
1793
 * @ctxt: an XML parser context
1794
 *
1795
 * DEPRECATED: Internal function, do not use.
1796
 *
1797
 * Pops the top element node from the node stack
1798
 *
1799
 * Returns the node just removed
1800
 */
1801
xmlNodePtr
1802
nodePop(xmlParserCtxtPtr ctxt)
1803
3.71k
{
1804
3.71k
    xmlNodePtr ret;
1805
1806
3.71k
    if (ctxt == NULL) return(NULL);
1807
3.71k
    if (ctxt->nodeNr <= 0)
1808
3.71k
        return (NULL);
1809
0
    ctxt->nodeNr--;
1810
0
    if (ctxt->nodeNr > 0)
1811
0
        ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1812
0
    else
1813
0
        ctxt->node = NULL;
1814
0
    ret = ctxt->nodeTab[ctxt->nodeNr];
1815
0
    ctxt->nodeTab[ctxt->nodeNr] = NULL;
1816
0
    return (ret);
1817
3.71k
}
1818
1819
/**
1820
 * nameNsPush:
1821
 * @ctxt:  an XML parser context
1822
 * @value:  the element name
1823
 * @prefix:  the element prefix
1824
 * @URI:  the element namespace name
1825
 * @line:  the current line number for error messages
1826
 * @nsNr:  the number of namespaces pushed on the namespace table
1827
 *
1828
 * Pushes a new element name/prefix/URL on top of the name stack
1829
 *
1830
 * Returns -1 in case of error, the index in the stack otherwise
1831
 */
1832
static int
1833
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1834
           const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
1835
681k
{
1836
681k
    xmlStartTag *tag;
1837
1838
681k
    if (ctxt->nameNr >= ctxt->nameMax) {
1839
873
        const xmlChar * *tmp;
1840
873
        xmlStartTag *tmp2;
1841
873
        ctxt->nameMax *= 2;
1842
873
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1843
873
                                    ctxt->nameMax *
1844
873
                                    sizeof(ctxt->nameTab[0]));
1845
873
        if (tmp == NULL) {
1846
0
      ctxt->nameMax /= 2;
1847
0
      goto mem_error;
1848
0
        }
1849
873
  ctxt->nameTab = tmp;
1850
873
        tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
1851
873
                                    ctxt->nameMax *
1852
873
                                    sizeof(ctxt->pushTab[0]));
1853
873
        if (tmp2 == NULL) {
1854
0
      ctxt->nameMax /= 2;
1855
0
      goto mem_error;
1856
0
        }
1857
873
  ctxt->pushTab = tmp2;
1858
680k
    } else if (ctxt->pushTab == NULL) {
1859
5.68k
        ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
1860
5.68k
                                            sizeof(ctxt->pushTab[0]));
1861
5.68k
        if (ctxt->pushTab == NULL)
1862
0
            goto mem_error;
1863
5.68k
    }
1864
681k
    ctxt->nameTab[ctxt->nameNr] = value;
1865
681k
    ctxt->name = value;
1866
681k
    tag = &ctxt->pushTab[ctxt->nameNr];
1867
681k
    tag->prefix = prefix;
1868
681k
    tag->URI = URI;
1869
681k
    tag->line = line;
1870
681k
    tag->nsNr = nsNr;
1871
681k
    return (ctxt->nameNr++);
1872
0
mem_error:
1873
0
    xmlErrMemory(ctxt, NULL);
1874
0
    return (-1);
1875
681k
}
1876
#ifdef LIBXML_PUSH_ENABLED
1877
/**
1878
 * nameNsPop:
1879
 * @ctxt: an XML parser context
1880
 *
1881
 * Pops the top element/prefix/URI name from the name stack
1882
 *
1883
 * Returns the name just removed
1884
 */
1885
static const xmlChar *
1886
nameNsPop(xmlParserCtxtPtr ctxt)
1887
28.1k
{
1888
28.1k
    const xmlChar *ret;
1889
1890
28.1k
    if (ctxt->nameNr <= 0)
1891
0
        return (NULL);
1892
28.1k
    ctxt->nameNr--;
1893
28.1k
    if (ctxt->nameNr > 0)
1894
27.8k
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1895
332
    else
1896
332
        ctxt->name = NULL;
1897
28.1k
    ret = ctxt->nameTab[ctxt->nameNr];
1898
28.1k
    ctxt->nameTab[ctxt->nameNr] = NULL;
1899
28.1k
    return (ret);
1900
28.1k
}
1901
#endif /* LIBXML_PUSH_ENABLED */
1902
1903
/**
1904
 * namePush:
1905
 * @ctxt:  an XML parser context
1906
 * @value:  the element name
1907
 *
1908
 * DEPRECATED: Internal function, do not use.
1909
 *
1910
 * Pushes a new element name on top of the name stack
1911
 *
1912
 * Returns -1 in case of error, the index in the stack otherwise
1913
 */
1914
int
1915
namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1916
0
{
1917
0
    if (ctxt == NULL) return (-1);
1918
1919
0
    if (ctxt->nameNr >= ctxt->nameMax) {
1920
0
        const xmlChar * *tmp;
1921
0
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1922
0
                                    ctxt->nameMax * 2 *
1923
0
                                    sizeof(ctxt->nameTab[0]));
1924
0
        if (tmp == NULL) {
1925
0
      goto mem_error;
1926
0
        }
1927
0
  ctxt->nameTab = tmp;
1928
0
        ctxt->nameMax *= 2;
1929
0
    }
1930
0
    ctxt->nameTab[ctxt->nameNr] = value;
1931
0
    ctxt->name = value;
1932
0
    return (ctxt->nameNr++);
1933
0
mem_error:
1934
0
    xmlErrMemory(ctxt, NULL);
1935
0
    return (-1);
1936
0
}
1937
1938
/**
1939
 * namePop:
1940
 * @ctxt: an XML parser context
1941
 *
1942
 * DEPRECATED: Internal function, do not use.
1943
 *
1944
 * Pops the top element name from the name stack
1945
 *
1946
 * Returns the name just removed
1947
 */
1948
const xmlChar *
1949
namePop(xmlParserCtxtPtr ctxt)
1950
0
{
1951
0
    const xmlChar *ret;
1952
1953
0
    if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1954
0
        return (NULL);
1955
0
    ctxt->nameNr--;
1956
0
    if (ctxt->nameNr > 0)
1957
0
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1958
0
    else
1959
0
        ctxt->name = NULL;
1960
0
    ret = ctxt->nameTab[ctxt->nameNr];
1961
0
    ctxt->nameTab[ctxt->nameNr] = NULL;
1962
0
    return (ret);
1963
0
}
1964
1965
701k
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1966
701k
    if (ctxt->spaceNr >= ctxt->spaceMax) {
1967
919
        int *tmp;
1968
1969
919
  ctxt->spaceMax *= 2;
1970
919
        tmp = (int *) xmlRealloc(ctxt->spaceTab,
1971
919
                           ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1972
919
        if (tmp == NULL) {
1973
0
      xmlErrMemory(ctxt, NULL);
1974
0
      ctxt->spaceMax /=2;
1975
0
      return(-1);
1976
0
  }
1977
919
  ctxt->spaceTab = tmp;
1978
919
    }
1979
701k
    ctxt->spaceTab[ctxt->spaceNr] = val;
1980
701k
    ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1981
701k
    return(ctxt->spaceNr++);
1982
701k
}
1983
1984
51.8k
static int spacePop(xmlParserCtxtPtr ctxt) {
1985
51.8k
    int ret;
1986
51.8k
    if (ctxt->spaceNr <= 0) return(0);
1987
51.8k
    ctxt->spaceNr--;
1988
51.8k
    if (ctxt->spaceNr > 0)
1989
51.8k
  ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1990
0
    else
1991
0
        ctxt->space = &ctxt->spaceTab[0];
1992
51.8k
    ret = ctxt->spaceTab[ctxt->spaceNr];
1993
51.8k
    ctxt->spaceTab[ctxt->spaceNr] = -1;
1994
51.8k
    return(ret);
1995
51.8k
}
1996
1997
/*
1998
 * Macros for accessing the content. Those should be used only by the parser,
1999
 * and not exported.
2000
 *
2001
 * Dirty macros, i.e. one often need to make assumption on the context to
2002
 * use them
2003
 *
2004
 *   CUR_PTR return the current pointer to the xmlChar to be parsed.
2005
 *           To be used with extreme caution since operations consuming
2006
 *           characters may move the input buffer to a different location !
2007
 *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
2008
 *           This should be used internally by the parser
2009
 *           only to compare to ASCII values otherwise it would break when
2010
 *           running with UTF-8 encoding.
2011
 *   RAW     same as CUR but in the input buffer, bypass any token
2012
 *           extraction that may have been done
2013
 *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
2014
 *           to compare on ASCII based substring.
2015
 *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2016
 *           strings without newlines within the parser.
2017
 *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2018
 *           defined char within the parser.
2019
 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2020
 *
2021
 *   NEXT    Skip to the next character, this does the proper decoding
2022
 *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2023
 *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2024
 *   CUR_CHAR(l) returns the current unicode character (int), set l
2025
 *           to the number of xmlChars used for the encoding [0-5].
2026
 *   CUR_SCHAR  same but operate on a string instead of the context
2027
 *   COPY_BUF  copy the current unicode char to the target buffer, increment
2028
 *            the index
2029
 *   GROW, SHRINK  handling of input buffers
2030
 */
2031
2032
6.17M
#define RAW (*ctxt->input->cur)
2033
2.87M
#define CUR (*ctxt->input->cur)
2034
20.3M
#define NXT(val) ctxt->input->cur[(val)]
2035
417k
#define CUR_PTR ctxt->input->cur
2036
30.3k
#define BASE_PTR ctxt->input->base
2037
2038
#define CMP4( s, c1, c2, c3, c4 ) \
2039
400k
  ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2040
200k
    ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2041
#define CMP5( s, c1, c2, c3, c4, c5 ) \
2042
359k
  ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2043
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2044
245k
  ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2045
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2046
134k
  ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2047
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2048
64.6k
  ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2049
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2050
17.9k
  ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2051
17.9k
    ((unsigned char *) s)[ 8 ] == c9 )
2052
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2053
1.19k
  ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2054
1.19k
    ((unsigned char *) s)[ 9 ] == c10 )
2055
2056
376k
#define SKIP(val) do {             \
2057
376k
    ctxt->input->cur += (val),ctxt->input->col+=(val);      \
2058
376k
    if (*ctxt->input->cur == 0)           \
2059
376k
        xmlParserGrow(ctxt);           \
2060
376k
  } while (0)
2061
2062
14.2k
#define SKIPL(val) do {             \
2063
14.2k
    int skipl;                \
2064
4.68M
    for(skipl=0; skipl<val; skipl++) {         \
2065
4.66M
  if (*(ctxt->input->cur) == '\n') {       \
2066
28.4k
  ctxt->input->line++; ctxt->input->col = 1;      \
2067
4.63M
  } else ctxt->input->col++;         \
2068
4.66M
  ctxt->input->cur++;           \
2069
4.66M
    }                  \
2070
14.2k
    if (*ctxt->input->cur == 0)           \
2071
14.2k
        xmlParserGrow(ctxt);           \
2072
14.2k
  } while (0)
2073
2074
137k
#define SHRINK if ((ctxt->progressive == 0) &&       \
2075
137k
       (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2076
137k
       (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2077
137k
  xmlParserShrink(ctxt);
2078
2079
29.2M
#define GROW if ((ctxt->progressive == 0) &&       \
2080
29.2M
     (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2081
29.2M
  xmlParserGrow(ctxt);
2082
2083
1.81M
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2084
2085
3.07M
#define NEXT xmlNextChar(ctxt)
2086
2087
729k
#define NEXT1 {               \
2088
729k
  ctxt->input->col++;           \
2089
729k
  ctxt->input->cur++;           \
2090
729k
  if (*ctxt->input->cur == 0)         \
2091
729k
      xmlParserGrow(ctxt);           \
2092
729k
    }
2093
2094
136M
#define NEXTL(l) do {             \
2095
136M
    if (*(ctxt->input->cur) == '\n') {         \
2096
155k
  ctxt->input->line++; ctxt->input->col = 1;      \
2097
136M
    } else ctxt->input->col++;           \
2098
136M
    ctxt->input->cur += l;        \
2099
136M
  } while (0)
2100
2101
137M
#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2102
5.08M
#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2103
2104
#define COPY_BUF(l,b,i,v)           \
2105
59.5M
    if (l == 1) b[i++] = v;           \
2106
59.5M
    else i += xmlCopyCharMultiByte(&b[i],v)
2107
2108
/**
2109
 * xmlSkipBlankChars:
2110
 * @ctxt:  the XML parser context
2111
 *
2112
 * DEPRECATED: Internal function, do not use.
2113
 *
2114
 * skip all blanks character found at that point in the input streams.
2115
 * It pops up finished entities in the process if allowable at that point.
2116
 *
2117
 * Returns the number of space chars skipped
2118
 */
2119
2120
int
2121
1.81M
xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2122
1.81M
    int res = 0;
2123
2124
    /*
2125
     * It's Okay to use CUR/NEXT here since all the blanks are on
2126
     * the ASCII range.
2127
     */
2128
1.81M
    if (((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) ||
2129
1.81M
        (ctxt->instate == XML_PARSER_START)) {
2130
1.38M
  const xmlChar *cur;
2131
  /*
2132
   * if we are in the document content, go really fast
2133
   */
2134
1.38M
  cur = ctxt->input->cur;
2135
1.38M
  while (IS_BLANK_CH(*cur)) {
2136
676k
      if (*cur == '\n') {
2137
262k
    ctxt->input->line++; ctxt->input->col = 1;
2138
413k
      } else {
2139
413k
    ctxt->input->col++;
2140
413k
      }
2141
676k
      cur++;
2142
676k
      if (res < INT_MAX)
2143
676k
    res++;
2144
676k
      if (*cur == 0) {
2145
463
    ctxt->input->cur = cur;
2146
463
    xmlParserGrow(ctxt);
2147
463
    cur = ctxt->input->cur;
2148
463
      }
2149
676k
  }
2150
1.38M
  ctxt->input->cur = cur;
2151
1.38M
    } else {
2152
431k
        int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2153
2154
731k
  while (ctxt->instate != XML_PARSER_EOF) {
2155
731k
            if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2156
300k
    NEXT;
2157
431k
      } else if (CUR == '%') {
2158
                /*
2159
                 * Need to handle support of entities branching here
2160
                 */
2161
17.9k
          if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2162
17.9k
                    break;
2163
0
          xmlParsePEReference(ctxt);
2164
413k
            } else if (CUR == 0) {
2165
886
                unsigned long consumed;
2166
886
                xmlEntityPtr ent;
2167
2168
886
                if (ctxt->inputNr <= 1)
2169
886
                    break;
2170
2171
0
                consumed = ctxt->input->consumed;
2172
0
                xmlSaturatedAddSizeT(&consumed,
2173
0
                                     ctxt->input->cur - ctxt->input->base);
2174
2175
                /*
2176
                 * Add to sizeentities when parsing an external entity
2177
                 * for the first time.
2178
                 */
2179
0
                ent = ctxt->input->entity;
2180
0
                if ((ent->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2181
0
                    ((ent->flags & XML_ENT_PARSED) == 0)) {
2182
0
                    ent->flags |= XML_ENT_PARSED;
2183
2184
0
                    xmlSaturatedAdd(&ctxt->sizeentities, consumed);
2185
0
                }
2186
2187
0
                xmlParserEntityCheck(ctxt, consumed);
2188
2189
0
                xmlPopInput(ctxt);
2190
412k
            } else {
2191
412k
                break;
2192
412k
            }
2193
2194
            /*
2195
             * Also increase the counter when entering or exiting a PERef.
2196
             * The spec says: "When a parameter-entity reference is recognized
2197
             * in the DTD and included, its replacement text MUST be enlarged
2198
             * by the attachment of one leading and one following space (#x20)
2199
             * character."
2200
             */
2201
300k
      if (res < INT_MAX)
2202
300k
    res++;
2203
300k
        }
2204
431k
    }
2205
1.81M
    return(res);
2206
1.81M
}
2207
2208
/************************************************************************
2209
 *                  *
2210
 *    Commodity functions to handle entities      *
2211
 *                  *
2212
 ************************************************************************/
2213
2214
/**
2215
 * xmlPopInput:
2216
 * @ctxt:  an XML parser context
2217
 *
2218
 * xmlPopInput: the current input pointed by ctxt->input came to an end
2219
 *          pop it and return the next char.
2220
 *
2221
 * Returns the current xmlChar in the parser context
2222
 */
2223
xmlChar
2224
0
xmlPopInput(xmlParserCtxtPtr ctxt) {
2225
0
    xmlParserInputPtr input;
2226
2227
0
    if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2228
0
    if (xmlParserDebugEntities)
2229
0
  xmlGenericError(xmlGenericErrorContext,
2230
0
    "Popping input %d\n", ctxt->inputNr);
2231
0
    if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2232
0
        (ctxt->instate != XML_PARSER_EOF))
2233
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2234
0
                    "Unfinished entity outside the DTD");
2235
0
    input = inputPop(ctxt);
2236
0
    if (input->entity != NULL)
2237
0
        input->entity->flags &= ~XML_ENT_EXPANDING;
2238
0
    xmlFreeInputStream(input);
2239
0
    if (*ctxt->input->cur == 0)
2240
0
        xmlParserGrow(ctxt);
2241
0
    return(CUR);
2242
0
}
2243
2244
/**
2245
 * xmlPushInput:
2246
 * @ctxt:  an XML parser context
2247
 * @input:  an XML parser input fragment (entity, XML fragment ...).
2248
 *
2249
 * xmlPushInput: switch to a new input stream which is stacked on top
2250
 *               of the previous one(s).
2251
 * Returns -1 in case of error or the index in the input stack
2252
 */
2253
int
2254
0
xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2255
0
    int ret;
2256
0
    if (input == NULL) return(-1);
2257
2258
0
    if (xmlParserDebugEntities) {
2259
0
  if ((ctxt->input != NULL) && (ctxt->input->filename))
2260
0
      xmlGenericError(xmlGenericErrorContext,
2261
0
        "%s(%d): ", ctxt->input->filename,
2262
0
        ctxt->input->line);
2263
0
  xmlGenericError(xmlGenericErrorContext,
2264
0
    "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2265
0
    }
2266
0
    if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2267
0
        (ctxt->inputNr > 100)) {
2268
0
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2269
0
        while (ctxt->inputNr > 1)
2270
0
            xmlFreeInputStream(inputPop(ctxt));
2271
0
  return(-1);
2272
0
    }
2273
0
    ret = inputPush(ctxt, input);
2274
0
    if (ctxt->instate == XML_PARSER_EOF)
2275
0
        return(-1);
2276
0
    GROW;
2277
0
    return(ret);
2278
0
}
2279
2280
/**
2281
 * xmlParseCharRef:
2282
 * @ctxt:  an XML parser context
2283
 *
2284
 * DEPRECATED: Internal function, don't use.
2285
 *
2286
 * Parse a numeric character reference. Always consumes '&'.
2287
 *
2288
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2289
 *                  '&#x' [0-9a-fA-F]+ ';'
2290
 *
2291
 * [ WFC: Legal Character ]
2292
 * Characters referred to using character references must match the
2293
 * production for Char.
2294
 *
2295
 * Returns the value parsed (as an int), 0 in case of error
2296
 */
2297
int
2298
59.8k
xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2299
59.8k
    int val = 0;
2300
59.8k
    int count = 0;
2301
2302
    /*
2303
     * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2304
     */
2305
59.8k
    if ((RAW == '&') && (NXT(1) == '#') &&
2306
59.8k
        (NXT(2) == 'x')) {
2307
6.23k
  SKIP(3);
2308
6.23k
  GROW;
2309
45.5k
  while (RAW != ';') { /* loop blocked by count */
2310
43.4k
      if (count++ > 20) {
2311
1.05k
    count = 0;
2312
1.05k
    GROW;
2313
1.05k
                if (ctxt->instate == XML_PARSER_EOF)
2314
0
                    return(0);
2315
1.05k
      }
2316
43.4k
      if ((RAW >= '0') && (RAW <= '9'))
2317
28.8k
          val = val * 16 + (CUR - '0');
2318
14.6k
      else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2319
5.84k
          val = val * 16 + (CUR - 'a') + 10;
2320
8.76k
      else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2321
4.61k
          val = val * 16 + (CUR - 'A') + 10;
2322
4.14k
      else {
2323
4.14k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2324
4.14k
    val = 0;
2325
4.14k
    break;
2326
4.14k
      }
2327
39.2k
      if (val > 0x110000)
2328
9.36k
          val = 0x110000;
2329
2330
39.2k
      NEXT;
2331
39.2k
      count++;
2332
39.2k
  }
2333
6.23k
  if (RAW == ';') {
2334
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2335
2.09k
      ctxt->input->col++;
2336
2.09k
      ctxt->input->cur++;
2337
2.09k
  }
2338
53.6k
    } else if  ((RAW == '&') && (NXT(1) == '#')) {
2339
53.6k
  SKIP(2);
2340
53.6k
  GROW;
2341
207k
  while (RAW != ';') { /* loop blocked by count */
2342
188k
      if (count++ > 20) {
2343
313
    count = 0;
2344
313
    GROW;
2345
313
                if (ctxt->instate == XML_PARSER_EOF)
2346
0
                    return(0);
2347
313
      }
2348
188k
      if ((RAW >= '0') && (RAW <= '9'))
2349
153k
          val = val * 10 + (CUR - '0');
2350
34.2k
      else {
2351
34.2k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2352
34.2k
    val = 0;
2353
34.2k
    break;
2354
34.2k
      }
2355
153k
      if (val > 0x110000)
2356
6.38k
          val = 0x110000;
2357
2358
153k
      NEXT;
2359
153k
      count++;
2360
153k
  }
2361
53.6k
  if (RAW == ';') {
2362
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2363
19.3k
      ctxt->input->col++;
2364
19.3k
      ctxt->input->cur++;
2365
19.3k
  }
2366
53.6k
    } else {
2367
0
        if (RAW == '&')
2368
0
            SKIP(1);
2369
0
        xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2370
0
    }
2371
2372
    /*
2373
     * [ WFC: Legal Character ]
2374
     * Characters referred to using character references must match the
2375
     * production for Char.
2376
     */
2377
59.8k
    if (val >= 0x110000) {
2378
1.15k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2379
1.15k
                "xmlParseCharRef: character reference out of bounds\n",
2380
1.15k
          val);
2381
58.7k
    } else if (IS_CHAR(val)) {
2382
18.1k
        return(val);
2383
40.5k
    } else {
2384
40.5k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2385
40.5k
                          "xmlParseCharRef: invalid xmlChar value %d\n",
2386
40.5k
                    val);
2387
40.5k
    }
2388
41.6k
    return(0);
2389
59.8k
}
2390
2391
/**
2392
 * xmlParseStringCharRef:
2393
 * @ctxt:  an XML parser context
2394
 * @str:  a pointer to an index in the string
2395
 *
2396
 * parse Reference declarations, variant parsing from a string rather
2397
 * than an an input flow.
2398
 *
2399
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2400
 *                  '&#x' [0-9a-fA-F]+ ';'
2401
 *
2402
 * [ WFC: Legal Character ]
2403
 * Characters referred to using character references must match the
2404
 * production for Char.
2405
 *
2406
 * Returns the value parsed (as an int), 0 in case of error, str will be
2407
 *         updated to the current value of the index
2408
 */
2409
static int
2410
24.0k
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2411
24.0k
    const xmlChar *ptr;
2412
24.0k
    xmlChar cur;
2413
24.0k
    int val = 0;
2414
2415
24.0k
    if ((str == NULL) || (*str == NULL)) return(0);
2416
24.0k
    ptr = *str;
2417
24.0k
    cur = *ptr;
2418
24.0k
    if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2419
9.34k
  ptr += 3;
2420
9.34k
  cur = *ptr;
2421
54.0k
  while (cur != ';') { /* Non input consuming loop */
2422
45.9k
      if ((cur >= '0') && (cur <= '9'))
2423
21.1k
          val = val * 16 + (cur - '0');
2424
24.8k
      else if ((cur >= 'a') && (cur <= 'f'))
2425
13.6k
          val = val * 16 + (cur - 'a') + 10;
2426
11.1k
      else if ((cur >= 'A') && (cur <= 'F'))
2427
9.93k
          val = val * 16 + (cur - 'A') + 10;
2428
1.22k
      else {
2429
1.22k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2430
1.22k
    val = 0;
2431
1.22k
    break;
2432
1.22k
      }
2433
44.7k
      if (val > 0x110000)
2434
2.13k
          val = 0x110000;
2435
2436
44.7k
      ptr++;
2437
44.7k
      cur = *ptr;
2438
44.7k
  }
2439
9.34k
  if (cur == ';')
2440
8.12k
      ptr++;
2441
14.6k
    } else if  ((cur == '&') && (ptr[1] == '#')){
2442
14.6k
  ptr += 2;
2443
14.6k
  cur = *ptr;
2444
51.8k
  while (cur != ';') { /* Non input consuming loops */
2445
41.7k
      if ((cur >= '0') && (cur <= '9'))
2446
37.1k
          val = val * 10 + (cur - '0');
2447
4.65k
      else {
2448
4.65k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2449
4.65k
    val = 0;
2450
4.65k
    break;
2451
4.65k
      }
2452
37.1k
      if (val > 0x110000)
2453
1.56k
          val = 0x110000;
2454
2455
37.1k
      ptr++;
2456
37.1k
      cur = *ptr;
2457
37.1k
  }
2458
14.6k
  if (cur == ';')
2459
10.0k
      ptr++;
2460
14.6k
    } else {
2461
0
  xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2462
0
  return(0);
2463
0
    }
2464
24.0k
    *str = ptr;
2465
2466
    /*
2467
     * [ WFC: Legal Character ]
2468
     * Characters referred to using character references must match the
2469
     * production for Char.
2470
     */
2471
24.0k
    if (val >= 0x110000) {
2472
66
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2473
66
                "xmlParseStringCharRef: character reference out of bounds\n",
2474
66
                val);
2475
23.9k
    } else if (IS_CHAR(val)) {
2476
16.8k
        return(val);
2477
16.8k
    } else {
2478
7.13k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2479
7.13k
        "xmlParseStringCharRef: invalid xmlChar value %d\n",
2480
7.13k
        val);
2481
7.13k
    }
2482
7.20k
    return(0);
2483
24.0k
}
2484
2485
/**
2486
 * xmlParserHandlePEReference:
2487
 * @ctxt:  the parser context
2488
 *
2489
 * DEPRECATED: Internal function, do not use.
2490
 *
2491
 * [69] PEReference ::= '%' Name ';'
2492
 *
2493
 * [ WFC: No Recursion ]
2494
 * A parsed entity must not contain a recursive
2495
 * reference to itself, either directly or indirectly.
2496
 *
2497
 * [ WFC: Entity Declared ]
2498
 * In a document without any DTD, a document with only an internal DTD
2499
 * subset which contains no parameter entity references, or a document
2500
 * with "standalone='yes'", ...  ... The declaration of a parameter
2501
 * entity must precede any reference to it...
2502
 *
2503
 * [ VC: Entity Declared ]
2504
 * In a document with an external subset or external parameter entities
2505
 * with "standalone='no'", ...  ... The declaration of a parameter entity
2506
 * must precede any reference to it...
2507
 *
2508
 * [ WFC: In DTD ]
2509
 * Parameter-entity references may only appear in the DTD.
2510
 * NOTE: misleading but this is handled.
2511
 *
2512
 * A PEReference may have been detected in the current input stream
2513
 * the handling is done accordingly to
2514
 *      http://www.w3.org/TR/REC-xml#entproc
2515
 * i.e.
2516
 *   - Included in literal in entity values
2517
 *   - Included as Parameter Entity reference within DTDs
2518
 */
2519
void
2520
0
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2521
0
    switch(ctxt->instate) {
2522
0
  case XML_PARSER_CDATA_SECTION:
2523
0
      return;
2524
0
        case XML_PARSER_COMMENT:
2525
0
      return;
2526
0
  case XML_PARSER_START_TAG:
2527
0
      return;
2528
0
  case XML_PARSER_END_TAG:
2529
0
      return;
2530
0
        case XML_PARSER_EOF:
2531
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2532
0
      return;
2533
0
        case XML_PARSER_PROLOG:
2534
0
  case XML_PARSER_START:
2535
0
  case XML_PARSER_MISC:
2536
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2537
0
      return;
2538
0
  case XML_PARSER_ENTITY_DECL:
2539
0
        case XML_PARSER_CONTENT:
2540
0
        case XML_PARSER_ATTRIBUTE_VALUE:
2541
0
        case XML_PARSER_PI:
2542
0
  case XML_PARSER_SYSTEM_LITERAL:
2543
0
  case XML_PARSER_PUBLIC_LITERAL:
2544
      /* we just ignore it there */
2545
0
      return;
2546
0
        case XML_PARSER_EPILOG:
2547
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2548
0
      return;
2549
0
  case XML_PARSER_ENTITY_VALUE:
2550
      /*
2551
       * NOTE: in the case of entity values, we don't do the
2552
       *       substitution here since we need the literal
2553
       *       entity value to be able to save the internal
2554
       *       subset of the document.
2555
       *       This will be handled by xmlStringDecodeEntities
2556
       */
2557
0
      return;
2558
0
        case XML_PARSER_DTD:
2559
      /*
2560
       * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2561
       * In the internal DTD subset, parameter-entity references
2562
       * can occur only where markup declarations can occur, not
2563
       * within markup declarations.
2564
       * In that case this is handled in xmlParseMarkupDecl
2565
       */
2566
0
      if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2567
0
    return;
2568
0
      if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2569
0
    return;
2570
0
            break;
2571
0
        case XML_PARSER_IGNORE:
2572
0
            return;
2573
0
    }
2574
2575
0
    xmlParsePEReference(ctxt);
2576
0
}
2577
2578
/*
2579
 * Macro used to grow the current buffer.
2580
 * buffer##_size is expected to be a size_t
2581
 * mem_error: is expected to handle memory allocation failures
2582
 */
2583
8.96k
#define growBuffer(buffer, n) {           \
2584
8.96k
    xmlChar *tmp;             \
2585
8.96k
    size_t new_size = buffer##_size * 2 + n;                            \
2586
8.96k
    if (new_size < buffer##_size) goto mem_error;                       \
2587
8.96k
    tmp = (xmlChar *) xmlRealloc(buffer, new_size);                     \
2588
8.96k
    if (tmp == NULL) goto mem_error;         \
2589
8.96k
    buffer = tmp;             \
2590
8.96k
    buffer##_size = new_size;                                           \
2591
8.96k
}
2592
2593
/**
2594
 * xmlStringDecodeEntitiesInt:
2595
 * @ctxt:  the parser context
2596
 * @str:  the input string
2597
 * @len: the string length
2598
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2599
 * @end:  an end marker xmlChar, 0 if none
2600
 * @end2:  an end marker xmlChar, 0 if none
2601
 * @end3:  an end marker xmlChar, 0 if none
2602
 * @check:  whether to perform entity checks
2603
 */
2604
static xmlChar *
2605
xmlStringDecodeEntitiesInt(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2606
               int what, xmlChar end, xmlChar  end2, xmlChar end3,
2607
11.6k
                           int check) {
2608
11.6k
    xmlChar *buffer = NULL;
2609
11.6k
    size_t buffer_size = 0;
2610
11.6k
    size_t nbchars = 0;
2611
2612
11.6k
    xmlChar *current = NULL;
2613
11.6k
    xmlChar *rep = NULL;
2614
11.6k
    const xmlChar *last;
2615
11.6k
    xmlEntityPtr ent;
2616
11.6k
    int c,l;
2617
2618
11.6k
    if (str == NULL)
2619
0
        return(NULL);
2620
11.6k
    last = str + len;
2621
2622
11.6k
    if (((ctxt->depth > 40) &&
2623
11.6k
         ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2624
11.6k
  (ctxt->depth > 100)) {
2625
0
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP,
2626
0
                       "Maximum entity nesting depth exceeded");
2627
0
  return(NULL);
2628
0
    }
2629
2630
    /*
2631
     * allocate a translation buffer.
2632
     */
2633
11.6k
    buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2634
11.6k
    buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2635
11.6k
    if (buffer == NULL) goto mem_error;
2636
2637
    /*
2638
     * OK loop until we reach one of the ending char or a size limit.
2639
     * we are operating on already parsed values.
2640
     */
2641
11.6k
    if (str < last)
2642
11.4k
  c = CUR_SCHAR(str, l);
2643
178
    else
2644
178
        c = 0;
2645
5.03M
    while ((c != 0) && (c != end) && /* non input consuming loop */
2646
5.03M
           (c != end2) && (c != end3) &&
2647
5.03M
           (ctxt->instate != XML_PARSER_EOF)) {
2648
2649
5.03M
  if (c == 0) break;
2650
5.03M
        if ((c == '&') && (str[1] == '#')) {
2651
24.0k
      int val = xmlParseStringCharRef(ctxt, &str);
2652
24.0k
      if (val == 0)
2653
7.20k
                goto int_error;
2654
16.8k
      COPY_BUF(0,buffer,nbchars,val);
2655
16.8k
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2656
404
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2657
404
      }
2658
5.01M
  } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2659
0
      if (xmlParserDebugEntities)
2660
0
    xmlGenericError(xmlGenericErrorContext,
2661
0
      "String decoding Entity Reference: %.30s\n",
2662
0
      str);
2663
0
      ent = xmlParseStringEntityRef(ctxt, &str);
2664
0
      if ((ent != NULL) &&
2665
0
    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2666
0
    if (ent->content != NULL) {
2667
0
        COPY_BUF(0,buffer,nbchars,ent->content[0]);
2668
0
        if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2669
0
      growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2670
0
        }
2671
0
    } else {
2672
0
        xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2673
0
          "predefined entity has no content\n");
2674
0
                    goto int_error;
2675
0
    }
2676
0
      } else if ((ent != NULL) && (ent->content != NULL)) {
2677
0
          if ((check) && (xmlParserEntityCheck(ctxt, ent->length)))
2678
0
                    goto int_error;
2679
2680
0
                if (ent->flags & XML_ENT_EXPANDING) {
2681
0
              xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2682
0
                    xmlHaltParser(ctxt);
2683
0
                    ent->content[0] = 0;
2684
0
                    goto int_error;
2685
0
                }
2686
2687
0
                ent->flags |= XML_ENT_EXPANDING;
2688
0
    ctxt->depth++;
2689
0
    rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
2690
0
                        ent->length, what, 0, 0, 0, check);
2691
0
    ctxt->depth--;
2692
0
                ent->flags &= ~XML_ENT_EXPANDING;
2693
2694
0
    if (rep == NULL) {
2695
0
                    ent->content[0] = 0;
2696
0
                    goto int_error;
2697
0
                }
2698
2699
0
                current = rep;
2700
0
                while (*current != 0) { /* non input consuming loop */
2701
0
                    buffer[nbchars++] = *current++;
2702
0
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2703
0
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2704
0
                    }
2705
0
                }
2706
0
                xmlFree(rep);
2707
0
                rep = NULL;
2708
0
      } else if (ent != NULL) {
2709
0
    int i = xmlStrlen(ent->name);
2710
0
    const xmlChar *cur = ent->name;
2711
2712
0
    buffer[nbchars++] = '&';
2713
0
    if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2714
0
        growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2715
0
    }
2716
0
    for (;i > 0;i--)
2717
0
        buffer[nbchars++] = *cur++;
2718
0
    buffer[nbchars++] = ';';
2719
0
      }
2720
5.01M
  } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2721
0
      if (xmlParserDebugEntities)
2722
0
    xmlGenericError(xmlGenericErrorContext,
2723
0
      "String decoding PE Reference: %.30s\n", str);
2724
0
      ent = xmlParseStringPEReference(ctxt, &str);
2725
0
      if (ent != NULL) {
2726
0
                if (ent->content == NULL) {
2727
        /*
2728
         * Note: external parsed entities will not be loaded,
2729
         * it is not required for a non-validating parser to
2730
         * complete external PEReferences coming from the
2731
         * internal subset
2732
         */
2733
0
        if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2734
0
      ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2735
0
      (ctxt->validate != 0)) {
2736
0
      xmlLoadEntityContent(ctxt, ent);
2737
0
        } else {
2738
0
      xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2739
0
      "not validating will not read content for PE entity %s\n",
2740
0
                          ent->name, NULL);
2741
0
        }
2742
0
    }
2743
2744
0
          if ((check) && (xmlParserEntityCheck(ctxt, ent->length)))
2745
0
                    goto int_error;
2746
2747
0
                if (ent->flags & XML_ENT_EXPANDING) {
2748
0
              xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2749
0
                    xmlHaltParser(ctxt);
2750
0
                    if (ent->content != NULL)
2751
0
                        ent->content[0] = 0;
2752
0
                    goto int_error;
2753
0
                }
2754
2755
0
                ent->flags |= XML_ENT_EXPANDING;
2756
0
    ctxt->depth++;
2757
0
    rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
2758
0
                        ent->length, what, 0, 0, 0, check);
2759
0
    ctxt->depth--;
2760
0
                ent->flags &= ~XML_ENT_EXPANDING;
2761
2762
0
    if (rep == NULL) {
2763
0
                    if (ent->content != NULL)
2764
0
                        ent->content[0] = 0;
2765
0
                    goto int_error;
2766
0
                }
2767
0
                current = rep;
2768
0
                while (*current != 0) { /* non input consuming loop */
2769
0
                    buffer[nbchars++] = *current++;
2770
0
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2771
0
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2772
0
                    }
2773
0
                }
2774
0
                xmlFree(rep);
2775
0
                rep = NULL;
2776
0
      }
2777
5.01M
  } else {
2778
5.01M
      COPY_BUF(l,buffer,nbchars,c);
2779
5.01M
      str += l;
2780
5.01M
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2781
1.36k
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2782
1.36k
      }
2783
5.01M
  }
2784
5.02M
  if (str < last)
2785
5.02M
      c = CUR_SCHAR(str, l);
2786
4.26k
  else
2787
4.26k
      c = 0;
2788
5.02M
    }
2789
4.44k
    buffer[nbchars] = 0;
2790
4.44k
    return(buffer);
2791
2792
0
mem_error:
2793
0
    xmlErrMemory(ctxt, NULL);
2794
7.20k
int_error:
2795
7.20k
    if (rep != NULL)
2796
0
        xmlFree(rep);
2797
7.20k
    if (buffer != NULL)
2798
7.20k
        xmlFree(buffer);
2799
7.20k
    return(NULL);
2800
0
}
2801
2802
/**
2803
 * xmlStringLenDecodeEntities:
2804
 * @ctxt:  the parser context
2805
 * @str:  the input string
2806
 * @len: the string length
2807
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2808
 * @end:  an end marker xmlChar, 0 if none
2809
 * @end2:  an end marker xmlChar, 0 if none
2810
 * @end3:  an end marker xmlChar, 0 if none
2811
 *
2812
 * DEPRECATED: Internal function, don't use.
2813
 *
2814
 * Takes a entity string content and process to do the adequate substitutions.
2815
 *
2816
 * [67] Reference ::= EntityRef | CharRef
2817
 *
2818
 * [69] PEReference ::= '%' Name ';'
2819
 *
2820
 * Returns A newly allocated string with the substitution done. The caller
2821
 *      must deallocate it !
2822
 */
2823
xmlChar *
2824
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2825
                           int what, xmlChar end, xmlChar  end2,
2826
0
                           xmlChar end3) {
2827
0
    if ((ctxt == NULL) || (str == NULL) || (len < 0))
2828
0
        return(NULL);
2829
0
    return(xmlStringDecodeEntitiesInt(ctxt, str, len, what,
2830
0
                                      end, end2, end3, 0));
2831
0
}
2832
2833
/**
2834
 * xmlStringDecodeEntities:
2835
 * @ctxt:  the parser context
2836
 * @str:  the input string
2837
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2838
 * @end:  an end marker xmlChar, 0 if none
2839
 * @end2:  an end marker xmlChar, 0 if none
2840
 * @end3:  an end marker xmlChar, 0 if none
2841
 *
2842
 * DEPRECATED: Internal function, don't use.
2843
 *
2844
 * Takes a entity string content and process to do the adequate substitutions.
2845
 *
2846
 * [67] Reference ::= EntityRef | CharRef
2847
 *
2848
 * [69] PEReference ::= '%' Name ';'
2849
 *
2850
 * Returns A newly allocated string with the substitution done. The caller
2851
 *      must deallocate it !
2852
 */
2853
xmlChar *
2854
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2855
0
            xmlChar end, xmlChar  end2, xmlChar end3) {
2856
0
    if ((ctxt == NULL) || (str == NULL)) return(NULL);
2857
0
    return(xmlStringDecodeEntitiesInt(ctxt, str, xmlStrlen(str), what,
2858
0
                                      end, end2, end3, 0));
2859
0
}
2860
2861
/************************************************************************
2862
 *                  *
2863
 *    Commodity functions, cleanup needed ?     *
2864
 *                  *
2865
 ************************************************************************/
2866
2867
/**
2868
 * areBlanks:
2869
 * @ctxt:  an XML parser context
2870
 * @str:  a xmlChar *
2871
 * @len:  the size of @str
2872
 * @blank_chars: we know the chars are blanks
2873
 *
2874
 * Is this a sequence of blank chars that one can ignore ?
2875
 *
2876
 * Returns 1 if ignorable 0 otherwise.
2877
 */
2878
2879
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2880
175k
                     int blank_chars) {
2881
175k
    int i, ret;
2882
175k
    xmlNodePtr lastChild;
2883
2884
    /*
2885
     * Don't spend time trying to differentiate them, the same callback is
2886
     * used !
2887
     */
2888
175k
    if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2889
0
  return(0);
2890
2891
    /*
2892
     * Check for xml:space value.
2893
     */
2894
175k
    if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2895
175k
        (*(ctxt->space) == -2))
2896
144k
  return(0);
2897
2898
    /*
2899
     * Check that the string is made of blanks
2900
     */
2901
30.4k
    if (blank_chars == 0) {
2902
39.5k
  for (i = 0;i < len;i++)
2903
36.7k
      if (!(IS_BLANK_CH(str[i]))) return(0);
2904
16.3k
    }
2905
2906
    /*
2907
     * Look if the element is mixed content in the DTD if available
2908
     */
2909
16.8k
    if (ctxt->node == NULL) return(0);
2910
0
    if (ctxt->myDoc != NULL) {
2911
0
  ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2912
0
        if (ret == 0) return(1);
2913
0
        if (ret == 1) return(0);
2914
0
    }
2915
2916
    /*
2917
     * Otherwise, heuristic :-\
2918
     */
2919
0
    if ((RAW != '<') && (RAW != 0xD)) return(0);
2920
0
    if ((ctxt->node->children == NULL) &&
2921
0
  (RAW == '<') && (NXT(1) == '/')) return(0);
2922
2923
0
    lastChild = xmlGetLastChild(ctxt->node);
2924
0
    if (lastChild == NULL) {
2925
0
        if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2926
0
            (ctxt->node->content != NULL)) return(0);
2927
0
    } else if (xmlNodeIsText(lastChild))
2928
0
        return(0);
2929
0
    else if ((ctxt->node->children != NULL) &&
2930
0
             (xmlNodeIsText(ctxt->node->children)))
2931
0
        return(0);
2932
0
    return(1);
2933
0
}
2934
2935
/************************************************************************
2936
 *                  *
2937
 *    Extra stuff for namespace support     *
2938
 *  Relates to http://www.w3.org/TR/WD-xml-names      *
2939
 *                  *
2940
 ************************************************************************/
2941
2942
/**
2943
 * xmlSplitQName:
2944
 * @ctxt:  an XML parser context
2945
 * @name:  an XML parser context
2946
 * @prefix:  a xmlChar **
2947
 *
2948
 * parse an UTF8 encoded XML qualified name string
2949
 *
2950
 * [NS 5] QName ::= (Prefix ':')? LocalPart
2951
 *
2952
 * [NS 6] Prefix ::= NCName
2953
 *
2954
 * [NS 7] LocalPart ::= NCName
2955
 *
2956
 * Returns the local part, and prefix is updated
2957
 *   to get the Prefix if any.
2958
 */
2959
2960
xmlChar *
2961
0
xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2962
0
    xmlChar buf[XML_MAX_NAMELEN + 5];
2963
0
    xmlChar *buffer = NULL;
2964
0
    int len = 0;
2965
0
    int max = XML_MAX_NAMELEN;
2966
0
    xmlChar *ret = NULL;
2967
0
    const xmlChar *cur = name;
2968
0
    int c;
2969
2970
0
    if (prefix == NULL) return(NULL);
2971
0
    *prefix = NULL;
2972
2973
0
    if (cur == NULL) return(NULL);
2974
2975
#ifndef XML_XML_NAMESPACE
2976
    /* xml: prefix is not really a namespace */
2977
    if ((cur[0] == 'x') && (cur[1] == 'm') &&
2978
        (cur[2] == 'l') && (cur[3] == ':'))
2979
  return(xmlStrdup(name));
2980
#endif
2981
2982
    /* nasty but well=formed */
2983
0
    if (cur[0] == ':')
2984
0
  return(xmlStrdup(name));
2985
2986
0
    c = *cur++;
2987
0
    while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2988
0
  buf[len++] = c;
2989
0
  c = *cur++;
2990
0
    }
2991
0
    if (len >= max) {
2992
  /*
2993
   * Okay someone managed to make a huge name, so he's ready to pay
2994
   * for the processing speed.
2995
   */
2996
0
  max = len * 2;
2997
2998
0
  buffer = (xmlChar *) xmlMallocAtomic(max);
2999
0
  if (buffer == NULL) {
3000
0
      xmlErrMemory(ctxt, NULL);
3001
0
      return(NULL);
3002
0
  }
3003
0
  memcpy(buffer, buf, len);
3004
0
  while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3005
0
      if (len + 10 > max) {
3006
0
          xmlChar *tmp;
3007
3008
0
    max *= 2;
3009
0
    tmp = (xmlChar *) xmlRealloc(buffer, max);
3010
0
    if (tmp == NULL) {
3011
0
        xmlFree(buffer);
3012
0
        xmlErrMemory(ctxt, NULL);
3013
0
        return(NULL);
3014
0
    }
3015
0
    buffer = tmp;
3016
0
      }
3017
0
      buffer[len++] = c;
3018
0
      c = *cur++;
3019
0
  }
3020
0
  buffer[len] = 0;
3021
0
    }
3022
3023
0
    if ((c == ':') && (*cur == 0)) {
3024
0
        if (buffer != NULL)
3025
0
      xmlFree(buffer);
3026
0
  *prefix = NULL;
3027
0
  return(xmlStrdup(name));
3028
0
    }
3029
3030
0
    if (buffer == NULL)
3031
0
  ret = xmlStrndup(buf, len);
3032
0
    else {
3033
0
  ret = buffer;
3034
0
  buffer = NULL;
3035
0
  max = XML_MAX_NAMELEN;
3036
0
    }
3037
3038
3039
0
    if (c == ':') {
3040
0
  c = *cur;
3041
0
        *prefix = ret;
3042
0
  if (c == 0) {
3043
0
      return(xmlStrndup(BAD_CAST "", 0));
3044
0
  }
3045
0
  len = 0;
3046
3047
  /*
3048
   * Check that the first character is proper to start
3049
   * a new name
3050
   */
3051
0
  if (!(((c >= 0x61) && (c <= 0x7A)) ||
3052
0
        ((c >= 0x41) && (c <= 0x5A)) ||
3053
0
        (c == '_') || (c == ':'))) {
3054
0
      int l;
3055
0
      int first = CUR_SCHAR(cur, l);
3056
3057
0
      if (!IS_LETTER(first) && (first != '_')) {
3058
0
    xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3059
0
          "Name %s is not XML Namespace compliant\n",
3060
0
          name);
3061
0
      }
3062
0
  }
3063
0
  cur++;
3064
3065
0
  while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3066
0
      buf[len++] = c;
3067
0
      c = *cur++;
3068
0
  }
3069
0
  if (len >= max) {
3070
      /*
3071
       * Okay someone managed to make a huge name, so he's ready to pay
3072
       * for the processing speed.
3073
       */
3074
0
      max = len * 2;
3075
3076
0
      buffer = (xmlChar *) xmlMallocAtomic(max);
3077
0
      if (buffer == NULL) {
3078
0
          xmlErrMemory(ctxt, NULL);
3079
0
    return(NULL);
3080
0
      }
3081
0
      memcpy(buffer, buf, len);
3082
0
      while (c != 0) { /* tested bigname2.xml */
3083
0
    if (len + 10 > max) {
3084
0
        xmlChar *tmp;
3085
3086
0
        max *= 2;
3087
0
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3088
0
        if (tmp == NULL) {
3089
0
      xmlErrMemory(ctxt, NULL);
3090
0
      xmlFree(buffer);
3091
0
      return(NULL);
3092
0
        }
3093
0
        buffer = tmp;
3094
0
    }
3095
0
    buffer[len++] = c;
3096
0
    c = *cur++;
3097
0
      }
3098
0
      buffer[len] = 0;
3099
0
  }
3100
3101
0
  if (buffer == NULL)
3102
0
      ret = xmlStrndup(buf, len);
3103
0
  else {
3104
0
      ret = buffer;
3105
0
  }
3106
0
    }
3107
3108
0
    return(ret);
3109
0
}
3110
3111
/************************************************************************
3112
 *                  *
3113
 *      The parser itself       *
3114
 *  Relates to http://www.w3.org/TR/REC-xml       *
3115
 *                  *
3116
 ************************************************************************/
3117
3118
/************************************************************************
3119
 *                  *
3120
 *  Routines to parse Name, NCName and NmToken      *
3121
 *                  *
3122
 ************************************************************************/
3123
#ifdef DEBUG
3124
static unsigned long nbParseName = 0;
3125
static unsigned long nbParseNmToken = 0;
3126
static unsigned long nbParseNCName = 0;
3127
static unsigned long nbParseNCNameComplex = 0;
3128
static unsigned long nbParseNameComplex = 0;
3129
static unsigned long nbParseStringName = 0;
3130
#endif
3131
3132
/*
3133
 * The two following functions are related to the change of accepted
3134
 * characters for Name and NmToken in the Revision 5 of XML-1.0
3135
 * They correspond to the modified production [4] and the new production [4a]
3136
 * changes in that revision. Also note that the macros used for the
3137
 * productions Letter, Digit, CombiningChar and Extender are not needed
3138
 * anymore.
3139
 * We still keep compatibility to pre-revision5 parsing semantic if the
3140
 * new XML_PARSE_OLD10 option is given to the parser.
3141
 */
3142
static int
3143
25.0k
xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3144
25.0k
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3145
        /*
3146
   * Use the new checks of production [4] [4a] amd [5] of the
3147
   * Update 5 of XML-1.0
3148
   */
3149
25.0k
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3150
25.0k
      (((c >= 'a') && (c <= 'z')) ||
3151
24.4k
       ((c >= 'A') && (c <= 'Z')) ||
3152
24.4k
       (c == '_') || (c == ':') ||
3153
24.4k
       ((c >= 0xC0) && (c <= 0xD6)) ||
3154
24.4k
       ((c >= 0xD8) && (c <= 0xF6)) ||
3155
24.4k
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3156
24.4k
       ((c >= 0x370) && (c <= 0x37D)) ||
3157
24.4k
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3158
24.4k
       ((c >= 0x200C) && (c <= 0x200D)) ||
3159
24.4k
       ((c >= 0x2070) && (c <= 0x218F)) ||
3160
24.4k
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3161
24.4k
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3162
24.4k
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3163
24.4k
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3164
24.4k
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3165
22.3k
      return(1);
3166
25.0k
    } else {
3167
0
        if (IS_LETTER(c) || (c == '_') || (c == ':'))
3168
0
      return(1);
3169
0
    }
3170
2.69k
    return(0);
3171
25.0k
}
3172
3173
static int
3174
51.9M
xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3175
51.9M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3176
        /*
3177
   * Use the new checks of production [4] [4a] amd [5] of the
3178
   * Update 5 of XML-1.0
3179
   */
3180
51.9M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3181
51.9M
      (((c >= 'a') && (c <= 'z')) ||
3182
51.9M
       ((c >= 'A') && (c <= 'Z')) ||
3183
51.9M
       ((c >= '0') && (c <= '9')) || /* !start */
3184
51.9M
       (c == '_') || (c == ':') ||
3185
51.9M
       (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3186
51.9M
       ((c >= 0xC0) && (c <= 0xD6)) ||
3187
51.9M
       ((c >= 0xD8) && (c <= 0xF6)) ||
3188
51.9M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3189
51.9M
       ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3190
51.9M
       ((c >= 0x370) && (c <= 0x37D)) ||
3191
51.9M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3192
51.9M
       ((c >= 0x200C) && (c <= 0x200D)) ||
3193
51.9M
       ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3194
51.9M
       ((c >= 0x2070) && (c <= 0x218F)) ||
3195
51.9M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3196
51.9M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3197
51.9M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3198
51.9M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3199
51.9M
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3200
51.9M
       return(1);
3201
51.9M
    } else {
3202
0
        if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3203
0
            (c == '.') || (c == '-') ||
3204
0
      (c == '_') || (c == ':') ||
3205
0
      (IS_COMBINING(c)) ||
3206
0
      (IS_EXTENDER(c)))
3207
0
      return(1);
3208
0
    }
3209
25.8k
    return(0);
3210
51.9M
}
3211
3212
static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3213
                                          int *len, int *alloc, int normalize);
3214
3215
static const xmlChar *
3216
491k
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3217
491k
    int len = 0, l;
3218
491k
    int c;
3219
491k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3220
491k
                    XML_MAX_TEXT_LENGTH :
3221
491k
                    XML_MAX_NAME_LENGTH;
3222
3223
#ifdef DEBUG
3224
    nbParseNameComplex++;
3225
#endif
3226
3227
    /*
3228
     * Handler for more complex cases
3229
     */
3230
491k
    c = CUR_CHAR(l);
3231
491k
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3232
        /*
3233
   * Use the new checks of production [4] [4a] amd [5] of the
3234
   * Update 5 of XML-1.0
3235
   */
3236
491k
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3237
491k
      (!(((c >= 'a') && (c <= 'z')) ||
3238
487k
         ((c >= 'A') && (c <= 'Z')) ||
3239
487k
         (c == '_') || (c == ':') ||
3240
487k
         ((c >= 0xC0) && (c <= 0xD6)) ||
3241
487k
         ((c >= 0xD8) && (c <= 0xF6)) ||
3242
487k
         ((c >= 0xF8) && (c <= 0x2FF)) ||
3243
487k
         ((c >= 0x370) && (c <= 0x37D)) ||
3244
487k
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
3245
487k
         ((c >= 0x200C) && (c <= 0x200D)) ||
3246
487k
         ((c >= 0x2070) && (c <= 0x218F)) ||
3247
487k
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3248
487k
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
3249
487k
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
3250
487k
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3251
487k
         ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3252
421k
      return(NULL);
3253
421k
  }
3254
70.2k
  len += l;
3255
70.2k
  NEXTL(l);
3256
70.2k
  c = CUR_CHAR(l);
3257
30.7M
  while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3258
30.7M
         (((c >= 'a') && (c <= 'z')) ||
3259
30.7M
          ((c >= 'A') && (c <= 'Z')) ||
3260
30.7M
          ((c >= '0') && (c <= '9')) || /* !start */
3261
30.7M
          (c == '_') || (c == ':') ||
3262
30.7M
          (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3263
30.7M
          ((c >= 0xC0) && (c <= 0xD6)) ||
3264
30.7M
          ((c >= 0xD8) && (c <= 0xF6)) ||
3265
30.7M
          ((c >= 0xF8) && (c <= 0x2FF)) ||
3266
30.7M
          ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3267
30.7M
          ((c >= 0x370) && (c <= 0x37D)) ||
3268
30.7M
          ((c >= 0x37F) && (c <= 0x1FFF)) ||
3269
30.7M
          ((c >= 0x200C) && (c <= 0x200D)) ||
3270
30.7M
          ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3271
30.7M
          ((c >= 0x2070) && (c <= 0x218F)) ||
3272
30.7M
          ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3273
30.7M
          ((c >= 0x3001) && (c <= 0xD7FF)) ||
3274
30.7M
          ((c >= 0xF900) && (c <= 0xFDCF)) ||
3275
30.7M
          ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3276
30.7M
          ((c >= 0x10000) && (c <= 0xEFFFF))
3277
30.7M
    )) {
3278
30.7M
            if (len <= INT_MAX - l)
3279
30.7M
          len += l;
3280
30.7M
      NEXTL(l);
3281
30.7M
      c = CUR_CHAR(l);
3282
30.7M
  }
3283
70.2k
    } else {
3284
0
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3285
0
      (!IS_LETTER(c) && (c != '_') &&
3286
0
       (c != ':'))) {
3287
0
      return(NULL);
3288
0
  }
3289
0
  len += l;
3290
0
  NEXTL(l);
3291
0
  c = CUR_CHAR(l);
3292
3293
0
  while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3294
0
         ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3295
0
    (c == '.') || (c == '-') ||
3296
0
    (c == '_') || (c == ':') ||
3297
0
    (IS_COMBINING(c)) ||
3298
0
    (IS_EXTENDER(c)))) {
3299
0
            if (len <= INT_MAX - l)
3300
0
          len += l;
3301
0
      NEXTL(l);
3302
0
      c = CUR_CHAR(l);
3303
0
  }
3304
0
    }
3305
70.2k
    if (ctxt->instate == XML_PARSER_EOF)
3306
0
        return(NULL);
3307
70.2k
    if (len > maxLength) {
3308
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3309
0
        return(NULL);
3310
0
    }
3311
70.2k
    if (ctxt->input->cur - ctxt->input->base < len) {
3312
        /*
3313
         * There were a couple of bugs where PERefs lead to to a change
3314
         * of the buffer. Check the buffer size to avoid passing an invalid
3315
         * pointer to xmlDictLookup.
3316
         */
3317
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3318
0
                    "unexpected change of input buffer");
3319
0
        return (NULL);
3320
0
    }
3321
70.2k
    if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3322
16
        return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3323
70.2k
    return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3324
70.2k
}
3325
3326
/**
3327
 * xmlParseName:
3328
 * @ctxt:  an XML parser context
3329
 *
3330
 * DEPRECATED: Internal function, don't use.
3331
 *
3332
 * parse an XML name.
3333
 *
3334
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3335
 *                  CombiningChar | Extender
3336
 *
3337
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3338
 *
3339
 * [6] Names ::= Name (#x20 Name)*
3340
 *
3341
 * Returns the Name parsed or NULL
3342
 */
3343
3344
const xmlChar *
3345
682k
xmlParseName(xmlParserCtxtPtr ctxt) {
3346
682k
    const xmlChar *in;
3347
682k
    const xmlChar *ret;
3348
682k
    size_t count = 0;
3349
682k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3350
682k
                       XML_MAX_TEXT_LENGTH :
3351
682k
                       XML_MAX_NAME_LENGTH;
3352
3353
682k
    GROW;
3354
682k
    if (ctxt->instate == XML_PARSER_EOF)
3355
0
        return(NULL);
3356
3357
#ifdef DEBUG
3358
    nbParseName++;
3359
#endif
3360
3361
    /*
3362
     * Accelerator for simple ASCII names
3363
     */
3364
682k
    in = ctxt->input->cur;
3365
682k
    if (((*in >= 0x61) && (*in <= 0x7A)) ||
3366
682k
  ((*in >= 0x41) && (*in <= 0x5A)) ||
3367
682k
  (*in == '_') || (*in == ':')) {
3368
214k
  in++;
3369
1.04M
  while (((*in >= 0x61) && (*in <= 0x7A)) ||
3370
1.04M
         ((*in >= 0x41) && (*in <= 0x5A)) ||
3371
1.04M
         ((*in >= 0x30) && (*in <= 0x39)) ||
3372
1.04M
         (*in == '_') || (*in == '-') ||
3373
1.04M
         (*in == ':') || (*in == '.'))
3374
834k
      in++;
3375
214k
  if ((*in > 0) && (*in < 0x80)) {
3376
190k
      count = in - ctxt->input->cur;
3377
190k
            if (count > maxLength) {
3378
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3379
0
                return(NULL);
3380
0
            }
3381
190k
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3382
190k
      ctxt->input->cur = in;
3383
190k
      ctxt->input->col += count;
3384
190k
      if (ret == NULL)
3385
0
          xmlErrMemory(ctxt, NULL);
3386
190k
      return(ret);
3387
190k
  }
3388
214k
    }
3389
    /* accelerator for special cases */
3390
491k
    return(xmlParseNameComplex(ctxt));
3391
682k
}
3392
3393
static const xmlChar *
3394
17.8k
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3395
17.8k
    int len = 0, l;
3396
17.8k
    int c;
3397
17.8k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3398
17.8k
                    XML_MAX_TEXT_LENGTH :
3399
17.8k
                    XML_MAX_NAME_LENGTH;
3400
17.8k
    size_t startPosition = 0;
3401
3402
#ifdef DEBUG
3403
    nbParseNCNameComplex++;
3404
#endif
3405
3406
    /*
3407
     * Handler for more complex cases
3408
     */
3409
17.8k
    startPosition = CUR_PTR - BASE_PTR;
3410
17.8k
    c = CUR_CHAR(l);
3411
17.8k
    if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3412
17.8k
  (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3413
5.38k
  return(NULL);
3414
5.38k
    }
3415
3416
50.8M
    while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3417
50.8M
     (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3418
50.8M
        if (len <= INT_MAX - l)
3419
50.8M
      len += l;
3420
50.8M
  NEXTL(l);
3421
50.8M
  c = CUR_CHAR(l);
3422
50.8M
    }
3423
12.5k
    if (ctxt->instate == XML_PARSER_EOF)
3424
0
        return(NULL);
3425
12.5k
    if (len > maxLength) {
3426
7
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3427
7
        return(NULL);
3428
7
    }
3429
12.5k
    return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
3430
12.5k
}
3431
3432
/**
3433
 * xmlParseNCName:
3434
 * @ctxt:  an XML parser context
3435
 * @len:  length of the string parsed
3436
 *
3437
 * parse an XML name.
3438
 *
3439
 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3440
 *                      CombiningChar | Extender
3441
 *
3442
 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3443
 *
3444
 * Returns the Name parsed or NULL
3445
 */
3446
3447
static const xmlChar *
3448
933k
xmlParseNCName(xmlParserCtxtPtr ctxt) {
3449
933k
    const xmlChar *in, *e;
3450
933k
    const xmlChar *ret;
3451
933k
    size_t count = 0;
3452
933k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3453
933k
                       XML_MAX_TEXT_LENGTH :
3454
933k
                       XML_MAX_NAME_LENGTH;
3455
3456
#ifdef DEBUG
3457
    nbParseNCName++;
3458
#endif
3459
3460
    /*
3461
     * Accelerator for simple ASCII names
3462
     */
3463
933k
    in = ctxt->input->cur;
3464
933k
    e = ctxt->input->end;
3465
933k
    if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3466
933k
   ((*in >= 0x41) && (*in <= 0x5A)) ||
3467
933k
   (*in == '_')) && (in < e)) {
3468
922k
  in++;
3469
1.99M
  while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3470
1.99M
          ((*in >= 0x41) && (*in <= 0x5A)) ||
3471
1.99M
          ((*in >= 0x30) && (*in <= 0x39)) ||
3472
1.99M
          (*in == '_') || (*in == '-') ||
3473
1.99M
          (*in == '.')) && (in < e))
3474
1.06M
      in++;
3475
922k
  if (in >= e)
3476
185
      goto complex;
3477
922k
  if ((*in > 0) && (*in < 0x80)) {
3478
915k
      count = in - ctxt->input->cur;
3479
915k
            if (count > maxLength) {
3480
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3481
0
                return(NULL);
3482
0
            }
3483
915k
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3484
915k
      ctxt->input->cur = in;
3485
915k
      ctxt->input->col += count;
3486
915k
      if (ret == NULL) {
3487
0
          xmlErrMemory(ctxt, NULL);
3488
0
      }
3489
915k
      return(ret);
3490
915k
  }
3491
922k
    }
3492
17.8k
complex:
3493
17.8k
    return(xmlParseNCNameComplex(ctxt));
3494
933k
}
3495
3496
/**
3497
 * xmlParseNameAndCompare:
3498
 * @ctxt:  an XML parser context
3499
 *
3500
 * parse an XML name and compares for match
3501
 * (specialized for endtag parsing)
3502
 *
3503
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3504
 * and the name for mismatch
3505
 */
3506
3507
static const xmlChar *
3508
27.6k
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3509
27.6k
    register const xmlChar *cmp = other;
3510
27.6k
    register const xmlChar *in;
3511
27.6k
    const xmlChar *ret;
3512
3513
27.6k
    GROW;
3514
27.6k
    if (ctxt->instate == XML_PARSER_EOF)
3515
0
        return(NULL);
3516
3517
27.6k
    in = ctxt->input->cur;
3518
144k
    while (*in != 0 && *in == *cmp) {
3519
116k
  ++in;
3520
116k
  ++cmp;
3521
116k
    }
3522
27.6k
    if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3523
  /* success */
3524
27.4k
  ctxt->input->col += in - ctxt->input->cur;
3525
27.4k
  ctxt->input->cur = in;
3526
27.4k
  return (const xmlChar*) 1;
3527
27.4k
    }
3528
    /* failure (or end of input buffer), check with full function */
3529
151
    ret = xmlParseName (ctxt);
3530
    /* strings coming from the dictionary direct compare possible */
3531
151
    if (ret == other) {
3532
13
  return (const xmlChar*) 1;
3533
13
    }
3534
138
    return ret;
3535
151
}
3536
3537
/**
3538
 * xmlParseStringName:
3539
 * @ctxt:  an XML parser context
3540
 * @str:  a pointer to the string pointer (IN/OUT)
3541
 *
3542
 * parse an XML name.
3543
 *
3544
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3545
 *                  CombiningChar | Extender
3546
 *
3547
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3548
 *
3549
 * [6] Names ::= Name (#x20 Name)*
3550
 *
3551
 * Returns the Name parsed or NULL. The @str pointer
3552
 * is updated to the current location in the string.
3553
 */
3554
3555
static xmlChar *
3556
7.92k
xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3557
7.92k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3558
7.92k
    const xmlChar *cur = *str;
3559
7.92k
    int len = 0, l;
3560
7.92k
    int c;
3561
7.92k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3562
7.92k
                    XML_MAX_TEXT_LENGTH :
3563
7.92k
                    XML_MAX_NAME_LENGTH;
3564
3565
#ifdef DEBUG
3566
    nbParseStringName++;
3567
#endif
3568
3569
7.92k
    c = CUR_SCHAR(cur, l);
3570
7.92k
    if (!xmlIsNameStartChar(ctxt, c)) {
3571
1.37k
  return(NULL);
3572
1.37k
    }
3573
3574
6.55k
    COPY_BUF(l,buf,len,c);
3575
6.55k
    cur += l;
3576
6.55k
    c = CUR_SCHAR(cur, l);
3577
34.1k
    while (xmlIsNameChar(ctxt, c)) {
3578
27.7k
  COPY_BUF(l,buf,len,c);
3579
27.7k
  cur += l;
3580
27.7k
  c = CUR_SCHAR(cur, l);
3581
27.7k
  if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3582
      /*
3583
       * Okay someone managed to make a huge name, so he's ready to pay
3584
       * for the processing speed.
3585
       */
3586
140
      xmlChar *buffer;
3587
140
      int max = len * 2;
3588
3589
140
      buffer = (xmlChar *) xmlMallocAtomic(max);
3590
140
      if (buffer == NULL) {
3591
0
          xmlErrMemory(ctxt, NULL);
3592
0
    return(NULL);
3593
0
      }
3594
140
      memcpy(buffer, buf, len);
3595
9.83k
      while (xmlIsNameChar(ctxt, c)) {
3596
9.69k
    if (len + 10 > max) {
3597
44
        xmlChar *tmp;
3598
3599
44
        max *= 2;
3600
44
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3601
44
        if (tmp == NULL) {
3602
0
      xmlErrMemory(ctxt, NULL);
3603
0
      xmlFree(buffer);
3604
0
      return(NULL);
3605
0
        }
3606
44
        buffer = tmp;
3607
44
    }
3608
9.69k
    COPY_BUF(l,buffer,len,c);
3609
9.69k
    cur += l;
3610
9.69k
    c = CUR_SCHAR(cur, l);
3611
9.69k
                if (len > maxLength) {
3612
0
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3613
0
                    xmlFree(buffer);
3614
0
                    return(NULL);
3615
0
                }
3616
9.69k
      }
3617
140
      buffer[len] = 0;
3618
140
      *str = cur;
3619
140
      return(buffer);
3620
140
  }
3621
27.7k
    }
3622
6.41k
    if (len > maxLength) {
3623
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3624
0
        return(NULL);
3625
0
    }
3626
6.41k
    *str = cur;
3627
6.41k
    return(xmlStrndup(buf, len));
3628
6.41k
}
3629
3630
/**
3631
 * xmlParseNmtoken:
3632
 * @ctxt:  an XML parser context
3633
 *
3634
 * DEPRECATED: Internal function, don't use.
3635
 *
3636
 * parse an XML Nmtoken.
3637
 *
3638
 * [7] Nmtoken ::= (NameChar)+
3639
 *
3640
 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3641
 *
3642
 * Returns the Nmtoken parsed or NULL
3643
 */
3644
3645
xmlChar *
3646
14.3k
xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3647
14.3k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3648
14.3k
    int len = 0, l;
3649
14.3k
    int c;
3650
14.3k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3651
14.3k
                    XML_MAX_TEXT_LENGTH :
3652
14.3k
                    XML_MAX_NAME_LENGTH;
3653
3654
#ifdef DEBUG
3655
    nbParseNmToken++;
3656
#endif
3657
3658
14.3k
    c = CUR_CHAR(l);
3659
3660
141k
    while (xmlIsNameChar(ctxt, c)) {
3661
127k
  COPY_BUF(l,buf,len,c);
3662
127k
  NEXTL(l);
3663
127k
  c = CUR_CHAR(l);
3664
127k
  if (len >= XML_MAX_NAMELEN) {
3665
      /*
3666
       * Okay someone managed to make a huge token, so he's ready to pay
3667
       * for the processing speed.
3668
       */
3669
744
      xmlChar *buffer;
3670
744
      int max = len * 2;
3671
3672
744
      buffer = (xmlChar *) xmlMallocAtomic(max);
3673
744
      if (buffer == NULL) {
3674
0
          xmlErrMemory(ctxt, NULL);
3675
0
    return(NULL);
3676
0
      }
3677
744
      memcpy(buffer, buf, len);
3678
931k
      while (xmlIsNameChar(ctxt, c)) {
3679
930k
    if (len + 10 > max) {
3680
1.60k
        xmlChar *tmp;
3681
3682
1.60k
        max *= 2;
3683
1.60k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3684
1.60k
        if (tmp == NULL) {
3685
0
      xmlErrMemory(ctxt, NULL);
3686
0
      xmlFree(buffer);
3687
0
      return(NULL);
3688
0
        }
3689
1.60k
        buffer = tmp;
3690
1.60k
    }
3691
930k
    COPY_BUF(l,buffer,len,c);
3692
930k
                if (len > maxLength) {
3693
0
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3694
0
                    xmlFree(buffer);
3695
0
                    return(NULL);
3696
0
                }
3697
930k
    NEXTL(l);
3698
930k
    c = CUR_CHAR(l);
3699
930k
      }
3700
744
      buffer[len] = 0;
3701
744
            if (ctxt->instate == XML_PARSER_EOF) {
3702
0
                xmlFree(buffer);
3703
0
                return(NULL);
3704
0
            }
3705
744
      return(buffer);
3706
744
  }
3707
127k
    }
3708
13.6k
    if (ctxt->instate == XML_PARSER_EOF)
3709
0
        return(NULL);
3710
13.6k
    if (len == 0)
3711
806
        return(NULL);
3712
12.8k
    if (len > maxLength) {
3713
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3714
0
        return(NULL);
3715
0
    }
3716
12.8k
    return(xmlStrndup(buf, len));
3717
12.8k
}
3718
3719
/**
3720
 * xmlParseEntityValue:
3721
 * @ctxt:  an XML parser context
3722
 * @orig:  if non-NULL store a copy of the original entity value
3723
 *
3724
 * DEPRECATED: Internal function, don't use.
3725
 *
3726
 * parse a value for ENTITY declarations
3727
 *
3728
 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3729
 *                 "'" ([^%&'] | PEReference | Reference)* "'"
3730
 *
3731
 * Returns the EntityValue parsed with reference substituted or NULL
3732
 */
3733
3734
xmlChar *
3735
14.2k
xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3736
14.2k
    xmlChar *buf = NULL;
3737
14.2k
    int len = 0;
3738
14.2k
    int size = XML_PARSER_BUFFER_SIZE;
3739
14.2k
    int c, l;
3740
14.2k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3741
14.2k
                    XML_MAX_HUGE_LENGTH :
3742
14.2k
                    XML_MAX_TEXT_LENGTH;
3743
14.2k
    xmlChar stop;
3744
14.2k
    xmlChar *ret = NULL;
3745
14.2k
    const xmlChar *cur = NULL;
3746
14.2k
    xmlParserInputPtr input;
3747
3748
14.2k
    if (RAW == '"') stop = '"';
3749
11.2k
    else if (RAW == '\'') stop = '\'';
3750
0
    else {
3751
0
  xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3752
0
  return(NULL);
3753
0
    }
3754
14.2k
    buf = (xmlChar *) xmlMallocAtomic(size);
3755
14.2k
    if (buf == NULL) {
3756
0
  xmlErrMemory(ctxt, NULL);
3757
0
  return(NULL);
3758
0
    }
3759
3760
    /*
3761
     * The content of the entity definition is copied in a buffer.
3762
     */
3763
3764
14.2k
    ctxt->instate = XML_PARSER_ENTITY_VALUE;
3765
14.2k
    input = ctxt->input;
3766
14.2k
    GROW;
3767
14.2k
    if (ctxt->instate == XML_PARSER_EOF)
3768
0
        goto error;
3769
14.2k
    NEXT;
3770
14.2k
    c = CUR_CHAR(l);
3771
    /*
3772
     * NOTE: 4.4.5 Included in Literal
3773
     * When a parameter entity reference appears in a literal entity
3774
     * value, ... a single or double quote character in the replacement
3775
     * text is always treated as a normal data character and will not
3776
     * terminate the literal.
3777
     * In practice it means we stop the loop only when back at parsing
3778
     * the initial entity and the quote is found
3779
     */
3780
5.91M
    while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3781
5.91M
      (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3782
5.90M
  if (len + 5 >= size) {
3783
5.07k
      xmlChar *tmp;
3784
3785
5.07k
      size *= 2;
3786
5.07k
      tmp = (xmlChar *) xmlRealloc(buf, size);
3787
5.07k
      if (tmp == NULL) {
3788
0
    xmlErrMemory(ctxt, NULL);
3789
0
                goto error;
3790
0
      }
3791
5.07k
      buf = tmp;
3792
5.07k
  }
3793
5.90M
  COPY_BUF(l,buf,len,c);
3794
5.90M
  NEXTL(l);
3795
3796
5.90M
  GROW;
3797
5.90M
  c = CUR_CHAR(l);
3798
5.90M
  if (c == 0) {
3799
88
      GROW;
3800
88
      c = CUR_CHAR(l);
3801
88
  }
3802
3803
5.90M
        if (len > maxLength) {
3804
0
            xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
3805
0
                           "entity value too long\n");
3806
0
            goto error;
3807
0
        }
3808
5.90M
    }
3809
14.2k
    buf[len] = 0;
3810
14.2k
    if (ctxt->instate == XML_PARSER_EOF)
3811
0
        goto error;
3812
14.2k
    if (c != stop) {
3813
109
        xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3814
109
        goto error;
3815
109
    }
3816
14.0k
    NEXT;
3817
3818
    /*
3819
     * Raise problem w.r.t. '&' and '%' being used in non-entities
3820
     * reference constructs. Note Charref will be handled in
3821
     * xmlStringDecodeEntities()
3822
     */
3823
14.0k
    cur = buf;
3824
5.83M
    while (*cur != 0) { /* non input consuming */
3825
5.81M
  if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3826
7.92k
      xmlChar *name;
3827
7.92k
      xmlChar tmp = *cur;
3828
7.92k
            int nameOk = 0;
3829
3830
7.92k
      cur++;
3831
7.92k
      name = xmlParseStringName(ctxt, &cur);
3832
7.92k
            if (name != NULL) {
3833
6.55k
                nameOk = 1;
3834
6.55k
                xmlFree(name);
3835
6.55k
            }
3836
7.92k
            if ((nameOk == 0) || (*cur != ';')) {
3837
2.22k
    xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3838
2.22k
      "EntityValue: '%c' forbidden except for entities references\n",
3839
2.22k
                            tmp);
3840
2.22k
                goto error;
3841
2.22k
      }
3842
5.69k
      if ((tmp == '%') && (ctxt->inSubset == 1) &&
3843
5.69k
    (ctxt->inputNr == 1)) {
3844
223
    xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3845
223
                goto error;
3846
223
      }
3847
5.47k
      if (*cur == 0)
3848
0
          break;
3849
5.47k
  }
3850
5.81M
  cur++;
3851
5.81M
    }
3852
3853
    /*
3854
     * Then PEReference entities are substituted.
3855
     *
3856
     * NOTE: 4.4.7 Bypassed
3857
     * When a general entity reference appears in the EntityValue in
3858
     * an entity declaration, it is bypassed and left as is.
3859
     * so XML_SUBSTITUTE_REF is not set here.
3860
     */
3861
11.6k
    ++ctxt->depth;
3862
11.6k
    ret = xmlStringDecodeEntitiesInt(ctxt, buf, len, XML_SUBSTITUTE_PEREF,
3863
11.6k
                                     0, 0, 0, /* check */ 1);
3864
11.6k
    --ctxt->depth;
3865
3866
11.6k
    if (orig != NULL) {
3867
11.6k
        *orig = buf;
3868
11.6k
        buf = NULL;
3869
11.6k
    }
3870
3871
14.2k
error:
3872
14.2k
    if (buf != NULL)
3873
2.56k
        xmlFree(buf);
3874
14.2k
    return(ret);
3875
11.6k
}
3876
3877
/**
3878
 * xmlParseAttValueComplex:
3879
 * @ctxt:  an XML parser context
3880
 * @len:   the resulting attribute len
3881
 * @normalize:  whether to apply the inner normalization
3882
 *
3883
 * parse a value for an attribute, this is the fallback function
3884
 * of xmlParseAttValue() when the attribute parsing requires handling
3885
 * of non-ASCII characters, or normalization compaction.
3886
 *
3887
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3888
 */
3889
static xmlChar *
3890
30.7k
xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3891
30.7k
    xmlChar limit = 0;
3892
30.7k
    xmlChar *buf = NULL;
3893
30.7k
    xmlChar *rep = NULL;
3894
30.7k
    size_t len = 0;
3895
30.7k
    size_t buf_size = 0;
3896
30.7k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3897
30.7k
                       XML_MAX_HUGE_LENGTH :
3898
30.7k
                       XML_MAX_TEXT_LENGTH;
3899
30.7k
    int c, l, in_space = 0;
3900
30.7k
    xmlChar *current = NULL;
3901
30.7k
    xmlEntityPtr ent;
3902
3903
30.7k
    if (NXT(0) == '"') {
3904
27.0k
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3905
27.0k
  limit = '"';
3906
27.0k
        NEXT;
3907
27.0k
    } else if (NXT(0) == '\'') {
3908
3.65k
  limit = '\'';
3909
3.65k
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3910
3.65k
        NEXT;
3911
3.65k
    } else {
3912
0
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3913
0
  return(NULL);
3914
0
    }
3915
3916
    /*
3917
     * allocate a translation buffer.
3918
     */
3919
30.7k
    buf_size = XML_PARSER_BUFFER_SIZE;
3920
30.7k
    buf = (xmlChar *) xmlMallocAtomic(buf_size);
3921
30.7k
    if (buf == NULL) goto mem_error;
3922
3923
    /*
3924
     * OK loop until we reach one of the ending char or a size limit.
3925
     */
3926
30.7k
    c = CUR_CHAR(l);
3927
19.0M
    while (((NXT(0) != limit) && /* checked */
3928
19.0M
            (IS_CHAR(c)) && (c != '<')) &&
3929
19.0M
            (ctxt->instate != XML_PARSER_EOF)) {
3930
19.0M
  if (c == '&') {
3931
581k
      in_space = 0;
3932
581k
      if (NXT(1) == '#') {
3933
59.1k
    int val = xmlParseCharRef(ctxt);
3934
3935
59.1k
    if (val == '&') {
3936
854
        if (ctxt->replaceEntities) {
3937
0
      if (len + 10 > buf_size) {
3938
0
          growBuffer(buf, 10);
3939
0
      }
3940
0
      buf[len++] = '&';
3941
854
        } else {
3942
      /*
3943
       * The reparsing will be done in xmlStringGetNodeList()
3944
       * called by the attribute() function in SAX.c
3945
       */
3946
854
      if (len + 10 > buf_size) {
3947
2
          growBuffer(buf, 10);
3948
2
      }
3949
854
      buf[len++] = '&';
3950
854
      buf[len++] = '#';
3951
854
      buf[len++] = '3';
3952
854
      buf[len++] = '8';
3953
854
      buf[len++] = ';';
3954
854
        }
3955
58.2k
    } else if (val != 0) {
3956
16.6k
        if (len + 10 > buf_size) {
3957
416
      growBuffer(buf, 10);
3958
416
        }
3959
16.6k
        len += xmlCopyChar(0, &buf[len], val);
3960
16.6k
    }
3961
522k
      } else {
3962
522k
    ent = xmlParseEntityRef(ctxt);
3963
522k
    if ((ent != NULL) &&
3964
522k
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3965
10.6k
        if (len + 10 > buf_size) {
3966
140
      growBuffer(buf, 10);
3967
140
        }
3968
10.6k
        if ((ctxt->replaceEntities == 0) &&
3969
10.6k
            (ent->content[0] == '&')) {
3970
923
      buf[len++] = '&';
3971
923
      buf[len++] = '#';
3972
923
      buf[len++] = '3';
3973
923
      buf[len++] = '8';
3974
923
      buf[len++] = ';';
3975
9.67k
        } else {
3976
9.67k
      buf[len++] = ent->content[0];
3977
9.67k
        }
3978
512k
    } else if ((ent != NULL) &&
3979
512k
               (ctxt->replaceEntities != 0)) {
3980
0
        if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3981
0
                        if (xmlParserEntityCheck(ctxt, ent->length))
3982
0
                            goto error;
3983
3984
0
      ++ctxt->depth;
3985
0
      rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
3986
0
                                ent->length, XML_SUBSTITUTE_REF, 0, 0, 0,
3987
0
                                /* check */ 1);
3988
0
      --ctxt->depth;
3989
0
      if (rep != NULL) {
3990
0
          current = rep;
3991
0
          while (*current != 0) { /* non input consuming */
3992
0
                                if ((*current == 0xD) || (*current == 0xA) ||
3993
0
                                    (*current == 0x9)) {
3994
0
                                    buf[len++] = 0x20;
3995
0
                                    current++;
3996
0
                                } else
3997
0
                                    buf[len++] = *current++;
3998
0
        if (len + 10 > buf_size) {
3999
0
            growBuffer(buf, 10);
4000
0
        }
4001
0
          }
4002
0
          xmlFree(rep);
4003
0
          rep = NULL;
4004
0
      }
4005
0
        } else {
4006
0
      if (len + 10 > buf_size) {
4007
0
          growBuffer(buf, 10);
4008
0
      }
4009
0
      if (ent->content != NULL)
4010
0
          buf[len++] = ent->content[0];
4011
0
        }
4012
512k
    } else if (ent != NULL) {
4013
27.3k
        int i = xmlStrlen(ent->name);
4014
27.3k
        const xmlChar *cur = ent->name;
4015
4016
        /*
4017
                     * We also check for recursion and amplification
4018
                     * when entities are not substituted. They're
4019
                     * often expanded later.
4020
         */
4021
27.3k
        if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4022
27.3k
      (ent->content != NULL)) {
4023
0
                        if ((ent->flags & XML_ENT_CHECKED) == 0) {
4024
0
                            unsigned long oldCopy = ctxt->sizeentcopy;
4025
4026
0
                            ctxt->sizeentcopy = ent->length;
4027
4028
0
                            ++ctxt->depth;
4029
0
                            rep = xmlStringDecodeEntitiesInt(ctxt,
4030
0
                                    ent->content, ent->length,
4031
0
                                    XML_SUBSTITUTE_REF, 0, 0, 0,
4032
0
                                    /* check */ 1);
4033
0
                            --ctxt->depth;
4034
4035
                            /*
4036
                             * If we're parsing DTD content, the entity
4037
                             * might reference other entities which
4038
                             * weren't defined yet, so the check isn't
4039
                             * reliable.
4040
                             */
4041
0
                            if (ctxt->inSubset == 0) {
4042
0
                                ent->flags |= XML_ENT_CHECKED;
4043
0
                                ent->expandedSize = ctxt->sizeentcopy;
4044
0
                            }
4045
4046
0
                            if (rep != NULL) {
4047
0
                                xmlFree(rep);
4048
0
                                rep = NULL;
4049
0
                            } else {
4050
0
                                ent->content[0] = 0;
4051
0
                            }
4052
4053
0
                            if (xmlParserEntityCheck(ctxt, oldCopy))
4054
0
                                goto error;
4055
0
                        } else {
4056
0
                            if (xmlParserEntityCheck(ctxt, ent->expandedSize))
4057
0
                                goto error;
4058
0
                        }
4059
0
        }
4060
4061
        /*
4062
         * Just output the reference
4063
         */
4064
27.3k
        buf[len++] = '&';
4065
27.5k
        while (len + i + 10 > buf_size) {
4066
376
      growBuffer(buf, i + 10);
4067
376
        }
4068
27.3k
        for (;i > 0;i--)
4069
0
      buf[len++] = *cur++;
4070
27.3k
        buf[len++] = ';';
4071
27.3k
    }
4072
522k
      }
4073
18.4M
  } else {
4074
18.4M
      if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4075
360k
          if ((len != 0) || (!normalize)) {
4076
352k
        if ((!normalize) || (!in_space)) {
4077
345k
      COPY_BUF(l,buf,len,0x20);
4078
346k
      while (len + 10 > buf_size) {
4079
418
          growBuffer(buf, 10);
4080
418
      }
4081
345k
        }
4082
352k
        in_space = 1;
4083
352k
    }
4084
18.0M
      } else {
4085
18.0M
          in_space = 0;
4086
18.0M
    COPY_BUF(l,buf,len,c);
4087
18.0M
    if (len + 10 > buf_size) {
4088
14.8k
        growBuffer(buf, 10);
4089
14.8k
    }
4090
18.0M
      }
4091
18.4M
      NEXTL(l);
4092
18.4M
  }
4093
19.0M
  GROW;
4094
19.0M
  c = CUR_CHAR(l);
4095
19.0M
        if (len > maxLength) {
4096
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4097
0
                           "AttValue length too long\n");
4098
0
            goto mem_error;
4099
0
        }
4100
19.0M
    }
4101
30.7k
    if (ctxt->instate == XML_PARSER_EOF)
4102
0
        goto error;
4103
4104
30.7k
    if ((in_space) && (normalize)) {
4105
955
        while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4106
386
    }
4107
30.7k
    buf[len] = 0;
4108
30.7k
    if (RAW == '<') {
4109
647
  xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4110
30.0k
    } else if (RAW != limit) {
4111
2.00k
  if ((c != 0) && (!IS_CHAR(c))) {
4112
146
      xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4113
146
         "invalid character in attribute value\n");
4114
1.85k
  } else {
4115
1.85k
      xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4116
1.85k
         "AttValue: ' expected\n");
4117
1.85k
        }
4118
2.00k
    } else
4119
28.0k
  NEXT;
4120
4121
30.7k
    if (attlen != NULL) *attlen = len;
4122
30.7k
    return(buf);
4123
4124
0
mem_error:
4125
0
    xmlErrMemory(ctxt, NULL);
4126
0
error:
4127
0
    if (buf != NULL)
4128
0
        xmlFree(buf);
4129
0
    if (rep != NULL)
4130
0
        xmlFree(rep);
4131
0
    return(NULL);
4132
0
}
4133
4134
/**
4135
 * xmlParseAttValue:
4136
 * @ctxt:  an XML parser context
4137
 *
4138
 * DEPRECATED: Internal function, don't use.
4139
 *
4140
 * parse a value for an attribute
4141
 * Note: the parser won't do substitution of entities here, this
4142
 * will be handled later in xmlStringGetNodeList
4143
 *
4144
 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4145
 *                   "'" ([^<&'] | Reference)* "'"
4146
 *
4147
 * 3.3.3 Attribute-Value Normalization:
4148
 * Before the value of an attribute is passed to the application or
4149
 * checked for validity, the XML processor must normalize it as follows:
4150
 * - a character reference is processed by appending the referenced
4151
 *   character to the attribute value
4152
 * - an entity reference is processed by recursively processing the
4153
 *   replacement text of the entity
4154
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4155
 *   appending #x20 to the normalized value, except that only a single
4156
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4157
 *   parsed entity or the literal entity value of an internal parsed entity
4158
 * - other characters are processed by appending them to the normalized value
4159
 * If the declared value is not CDATA, then the XML processor must further
4160
 * process the normalized attribute value by discarding any leading and
4161
 * trailing space (#x20) characters, and by replacing sequences of space
4162
 * (#x20) characters by a single space (#x20) character.
4163
 * All attributes for which no declaration has been read should be treated
4164
 * by a non-validating parser as if declared CDATA.
4165
 *
4166
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4167
 */
4168
4169
4170
xmlChar *
4171
14.0k
xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4172
14.0k
    if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4173
14.0k
    return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4174
14.0k
}
4175
4176
/**
4177
 * xmlParseSystemLiteral:
4178
 * @ctxt:  an XML parser context
4179
 *
4180
 * DEPRECATED: Internal function, don't use.
4181
 *
4182
 * parse an XML Literal
4183
 *
4184
 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4185
 *
4186
 * Returns the SystemLiteral parsed or NULL
4187
 */
4188
4189
xmlChar *
4190
6.37k
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4191
6.37k
    xmlChar *buf = NULL;
4192
6.37k
    int len = 0;
4193
6.37k
    int size = XML_PARSER_BUFFER_SIZE;
4194
6.37k
    int cur, l;
4195
6.37k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4196
6.37k
                    XML_MAX_TEXT_LENGTH :
4197
6.37k
                    XML_MAX_NAME_LENGTH;
4198
6.37k
    xmlChar stop;
4199
6.37k
    int state = ctxt->instate;
4200
4201
6.37k
    if (RAW == '"') {
4202
189
        NEXT;
4203
189
  stop = '"';
4204
6.18k
    } else if (RAW == '\'') {
4205
6.05k
        NEXT;
4206
6.05k
  stop = '\'';
4207
6.05k
    } else {
4208
129
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4209
129
  return(NULL);
4210
129
    }
4211
4212
6.24k
    buf = (xmlChar *) xmlMallocAtomic(size);
4213
6.24k
    if (buf == NULL) {
4214
0
        xmlErrMemory(ctxt, NULL);
4215
0
  return(NULL);
4216
0
    }
4217
6.24k
    ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4218
6.24k
    cur = CUR_CHAR(l);
4219
16.0M
    while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4220
16.0M
  if (len + 5 >= size) {
4221
4.16k
      xmlChar *tmp;
4222
4223
4.16k
      size *= 2;
4224
4.16k
      tmp = (xmlChar *) xmlRealloc(buf, size);
4225
4.16k
      if (tmp == NULL) {
4226
0
          xmlFree(buf);
4227
0
    xmlErrMemory(ctxt, NULL);
4228
0
    ctxt->instate = (xmlParserInputState) state;
4229
0
    return(NULL);
4230
0
      }
4231
4.16k
      buf = tmp;
4232
4.16k
  }
4233
16.0M
  COPY_BUF(l,buf,len,cur);
4234
16.0M
        if (len > maxLength) {
4235
0
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4236
0
            xmlFree(buf);
4237
0
            ctxt->instate = (xmlParserInputState) state;
4238
0
            return(NULL);
4239
0
        }
4240
16.0M
  NEXTL(l);
4241
16.0M
  cur = CUR_CHAR(l);
4242
16.0M
    }
4243
6.24k
    buf[len] = 0;
4244
6.24k
    if (ctxt->instate == XML_PARSER_EOF) {
4245
0
        xmlFree(buf);
4246
0
        return(NULL);
4247
0
    }
4248
6.24k
    ctxt->instate = (xmlParserInputState) state;
4249
6.24k
    if (!IS_CHAR(cur)) {
4250
159
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4251
6.08k
    } else {
4252
6.08k
  NEXT;
4253
6.08k
    }
4254
6.24k
    return(buf);
4255
6.24k
}
4256
4257
/**
4258
 * xmlParsePubidLiteral:
4259
 * @ctxt:  an XML parser context
4260
 *
4261
 * DEPRECATED: Internal function, don't use.
4262
 *
4263
 * parse an XML public literal
4264
 *
4265
 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4266
 *
4267
 * Returns the PubidLiteral parsed or NULL.
4268
 */
4269
4270
xmlChar *
4271
981
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4272
981
    xmlChar *buf = NULL;
4273
981
    int len = 0;
4274
981
    int size = XML_PARSER_BUFFER_SIZE;
4275
981
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4276
981
                    XML_MAX_TEXT_LENGTH :
4277
981
                    XML_MAX_NAME_LENGTH;
4278
981
    xmlChar cur;
4279
981
    xmlChar stop;
4280
981
    xmlParserInputState oldstate = ctxt->instate;
4281
4282
981
    if (RAW == '"') {
4283
246
        NEXT;
4284
246
  stop = '"';
4285
735
    } else if (RAW == '\'') {
4286
588
        NEXT;
4287
588
  stop = '\'';
4288
588
    } else {
4289
147
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4290
147
  return(NULL);
4291
147
    }
4292
834
    buf = (xmlChar *) xmlMallocAtomic(size);
4293
834
    if (buf == NULL) {
4294
0
  xmlErrMemory(ctxt, NULL);
4295
0
  return(NULL);
4296
0
    }
4297
834
    ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4298
834
    cur = CUR;
4299
33.2k
    while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4300
32.4k
  if (len + 1 >= size) {
4301
190
      xmlChar *tmp;
4302
4303
190
      size *= 2;
4304
190
      tmp = (xmlChar *) xmlRealloc(buf, size);
4305
190
      if (tmp == NULL) {
4306
0
    xmlErrMemory(ctxt, NULL);
4307
0
    xmlFree(buf);
4308
0
    return(NULL);
4309
0
      }
4310
190
      buf = tmp;
4311
190
  }
4312
32.4k
  buf[len++] = cur;
4313
32.4k
        if (len > maxLength) {
4314
0
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4315
0
            xmlFree(buf);
4316
0
            return(NULL);
4317
0
        }
4318
32.4k
  NEXT;
4319
32.4k
  cur = CUR;
4320
32.4k
    }
4321
834
    buf[len] = 0;
4322
834
    if (ctxt->instate == XML_PARSER_EOF) {
4323
0
        xmlFree(buf);
4324
0
        return(NULL);
4325
0
    }
4326
834
    if (cur != stop) {
4327
236
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4328
598
    } else {
4329
598
  NEXTL(1);
4330
598
    }
4331
834
    ctxt->instate = oldstate;
4332
834
    return(buf);
4333
834
}
4334
4335
static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial);
4336
4337
/*
4338
 * used for the test in the inner loop of the char data testing
4339
 */
4340
static const unsigned char test_char_data[256] = {
4341
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4342
    0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4343
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4344
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4345
    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4346
    0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4347
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4348
    0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4349
    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4350
    0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4351
    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4352
    0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4353
    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4354
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4355
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4356
    0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4357
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4358
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4359
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4360
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4361
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4362
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4363
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4364
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4365
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4366
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4367
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4368
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4369
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4370
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4371
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4372
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4373
};
4374
4375
/**
4376
 * xmlParseCharDataInternal:
4377
 * @ctxt:  an XML parser context
4378
 * @partial:  buffer may contain partial UTF-8 sequences
4379
 *
4380
 * Parse character data. Always makes progress if the first char isn't
4381
 * '<' or '&'.
4382
 *
4383
 * The right angle bracket (>) may be represented using the string "&gt;",
4384
 * and must, for compatibility, be escaped using "&gt;" or a character
4385
 * reference when it appears in the string "]]>" in content, when that
4386
 * string is not marking the end of a CDATA section.
4387
 *
4388
 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4389
 */
4390
static void
4391
200k
xmlParseCharDataInternal(xmlParserCtxtPtr ctxt, int partial) {
4392
200k
    const xmlChar *in;
4393
200k
    int nbchar = 0;
4394
200k
    int line = ctxt->input->line;
4395
200k
    int col = ctxt->input->col;
4396
200k
    int ccol;
4397
4398
200k
    GROW;
4399
    /*
4400
     * Accelerated common case where input don't need to be
4401
     * modified before passing it to the handler.
4402
     */
4403
200k
    in = ctxt->input->cur;
4404
217k
    do {
4405
323k
get_more_space:
4406
478k
        while (*in == 0x20) { in++; ctxt->input->col++; }
4407
323k
        if (*in == 0xA) {
4408
109k
            do {
4409
109k
                ctxt->input->line++; ctxt->input->col = 1;
4410
109k
                in++;
4411
109k
            } while (*in == 0xA);
4412
105k
            goto get_more_space;
4413
105k
        }
4414
217k
        if (*in == '<') {
4415
75.0k
            nbchar = in - ctxt->input->cur;
4416
75.0k
            if (nbchar > 0) {
4417
75.0k
                const xmlChar *tmp = ctxt->input->cur;
4418
75.0k
                ctxt->input->cur = in;
4419
4420
75.0k
                if ((ctxt->sax != NULL) &&
4421
75.0k
                    (ctxt->sax->ignorableWhitespace !=
4422
75.0k
                     ctxt->sax->characters)) {
4423
75.0k
                    if (areBlanks(ctxt, tmp, nbchar, 1)) {
4424
0
                        if (ctxt->sax->ignorableWhitespace != NULL)
4425
0
                            ctxt->sax->ignorableWhitespace(ctxt->userData,
4426
0
                                                   tmp, nbchar);
4427
75.0k
                    } else {
4428
75.0k
                        if (ctxt->sax->characters != NULL)
4429
75.0k
                            ctxt->sax->characters(ctxt->userData,
4430
75.0k
                                                  tmp, nbchar);
4431
75.0k
                        if (*ctxt->space == -1)
4432
14.0k
                            *ctxt->space = -2;
4433
75.0k
                    }
4434
75.0k
                } else if ((ctxt->sax != NULL) &&
4435
0
                           (ctxt->sax->characters != NULL)) {
4436
0
                    ctxt->sax->characters(ctxt->userData,
4437
0
                                          tmp, nbchar);
4438
0
                }
4439
75.0k
            }
4440
75.0k
            return;
4441
75.0k
        }
4442
4443
208k
get_more:
4444
208k
        ccol = ctxt->input->col;
4445
2.45M
        while (test_char_data[*in]) {
4446
2.24M
            in++;
4447
2.24M
            ccol++;
4448
2.24M
        }
4449
208k
        ctxt->input->col = ccol;
4450
208k
        if (*in == 0xA) {
4451
63.3k
            do {
4452
63.3k
                ctxt->input->line++; ctxt->input->col = 1;
4453
63.3k
                in++;
4454
63.3k
            } while (*in == 0xA);
4455
54.4k
            goto get_more;
4456
54.4k
        }
4457
153k
        if (*in == ']') {
4458
10.6k
            if ((in[1] == ']') && (in[2] == '>')) {
4459
4
                xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4460
4
                if (ctxt->instate != XML_PARSER_EOF)
4461
4
                    ctxt->input->cur = in + 1;
4462
4
                return;
4463
4
            }
4464
10.6k
            in++;
4465
10.6k
            ctxt->input->col++;
4466
10.6k
            goto get_more;
4467
10.6k
        }
4468
142k
        nbchar = in - ctxt->input->cur;
4469
142k
        if (nbchar > 0) {
4470
139k
            if ((ctxt->sax != NULL) &&
4471
139k
                (ctxt->sax->ignorableWhitespace !=
4472
139k
                 ctxt->sax->characters) &&
4473
139k
                (IS_BLANK_CH(*ctxt->input->cur))) {
4474
48.5k
                const xmlChar *tmp = ctxt->input->cur;
4475
48.5k
                ctxt->input->cur = in;
4476
4477
48.5k
                if (areBlanks(ctxt, tmp, nbchar, 0)) {
4478
0
                    if (ctxt->sax->ignorableWhitespace != NULL)
4479
0
                        ctxt->sax->ignorableWhitespace(ctxt->userData,
4480
0
                                                       tmp, nbchar);
4481
48.5k
                } else {
4482
48.5k
                    if (ctxt->sax->characters != NULL)
4483
48.5k
                        ctxt->sax->characters(ctxt->userData,
4484
48.5k
                                              tmp, nbchar);
4485
48.5k
                    if (*ctxt->space == -1)
4486
9.94k
                        *ctxt->space = -2;
4487
48.5k
                }
4488
48.5k
                line = ctxt->input->line;
4489
48.5k
                col = ctxt->input->col;
4490
90.8k
            } else if (ctxt->sax != NULL) {
4491
90.8k
                if (ctxt->sax->characters != NULL)
4492
90.8k
                    ctxt->sax->characters(ctxt->userData,
4493
90.8k
                                          ctxt->input->cur, nbchar);
4494
90.8k
                line = ctxt->input->line;
4495
90.8k
                col = ctxt->input->col;
4496
90.8k
            }
4497
139k
        }
4498
142k
        ctxt->input->cur = in;
4499
142k
        if (*in == 0xD) {
4500
20.2k
            in++;
4501
20.2k
            if (*in == 0xA) {
4502
17.7k
                ctxt->input->cur = in;
4503
17.7k
                in++;
4504
17.7k
                ctxt->input->line++; ctxt->input->col = 1;
4505
17.7k
                continue; /* while */
4506
17.7k
            }
4507
2.52k
            in--;
4508
2.52k
        }
4509
125k
        if (*in == '<') {
4510
109k
            return;
4511
109k
        }
4512
15.9k
        if (*in == '&') {
4513
2.59k
            return;
4514
2.59k
        }
4515
13.3k
        SHRINK;
4516
13.3k
        GROW;
4517
13.3k
        if (ctxt->instate == XML_PARSER_EOF)
4518
0
            return;
4519
13.3k
        in = ctxt->input->cur;
4520
31.0k
    } while (((*in >= 0x20) && (*in <= 0x7F)) ||
4521
31.0k
             (*in == 0x09) || (*in == 0x0a));
4522
13.3k
    ctxt->input->line = line;
4523
13.3k
    ctxt->input->col = col;
4524
13.3k
    xmlParseCharDataComplex(ctxt, partial);
4525
13.3k
}
4526
4527
/**
4528
 * xmlParseCharDataComplex:
4529
 * @ctxt:  an XML parser context
4530
 * @cdata:  int indicating whether we are within a CDATA section
4531
 *
4532
 * Always makes progress if the first char isn't '<' or '&'.
4533
 *
4534
 * parse a CharData section.this is the fallback function
4535
 * of xmlParseCharData() when the parsing requires handling
4536
 * of non-ASCII characters.
4537
 */
4538
static void
4539
13.3k
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial) {
4540
13.3k
    xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4541
13.3k
    int nbchar = 0;
4542
13.3k
    int cur, l;
4543
4544
13.3k
    cur = CUR_CHAR(l);
4545
5.07M
    while ((cur != '<') && /* checked */
4546
5.07M
           (cur != '&') &&
4547
5.07M
     (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4548
5.06M
  if ((cur == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4549
191
      xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4550
191
  }
4551
5.06M
  COPY_BUF(l,buf,nbchar,cur);
4552
  /* move current position before possible calling of ctxt->sax->characters */
4553
5.06M
  NEXTL(l);
4554
5.06M
  if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4555
39.4k
      buf[nbchar] = 0;
4556
4557
      /*
4558
       * OK the segment is to be consumed as chars.
4559
       */
4560
39.4k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4561
39.3k
    if (areBlanks(ctxt, buf, nbchar, 0)) {
4562
0
        if (ctxt->sax->ignorableWhitespace != NULL)
4563
0
      ctxt->sax->ignorableWhitespace(ctxt->userData,
4564
0
                                     buf, nbchar);
4565
39.3k
    } else {
4566
39.3k
        if (ctxt->sax->characters != NULL)
4567
39.3k
      ctxt->sax->characters(ctxt->userData, buf, nbchar);
4568
39.3k
        if ((ctxt->sax->characters !=
4569
39.3k
             ctxt->sax->ignorableWhitespace) &&
4570
39.3k
      (*ctxt->space == -1))
4571
2.19k
      *ctxt->space = -2;
4572
39.3k
    }
4573
39.3k
      }
4574
39.4k
      nbchar = 0;
4575
            /* something really bad happened in the SAX callback */
4576
39.4k
            if (ctxt->instate != XML_PARSER_CONTENT)
4577
0
                return;
4578
39.4k
            SHRINK;
4579
39.4k
  }
4580
5.06M
  cur = CUR_CHAR(l);
4581
5.06M
    }
4582
13.3k
    if (ctxt->instate == XML_PARSER_EOF)
4583
0
        return;
4584
13.3k
    if (nbchar != 0) {
4585
12.2k
        buf[nbchar] = 0;
4586
  /*
4587
   * OK the segment is to be consumed as chars.
4588
   */
4589
12.2k
  if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4590
12.0k
      if (areBlanks(ctxt, buf, nbchar, 0)) {
4591
0
    if (ctxt->sax->ignorableWhitespace != NULL)
4592
0
        ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4593
12.0k
      } else {
4594
12.0k
    if (ctxt->sax->characters != NULL)
4595
12.0k
        ctxt->sax->characters(ctxt->userData, buf, nbchar);
4596
12.0k
    if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4597
12.0k
        (*ctxt->space == -1))
4598
4.23k
        *ctxt->space = -2;
4599
12.0k
      }
4600
12.0k
  }
4601
12.2k
    }
4602
    /*
4603
     * cur == 0 can mean
4604
     *
4605
     * - XML_PARSER_EOF or memory error. This is checked above.
4606
     * - An actual 0 character.
4607
     * - End of buffer.
4608
     * - An incomplete UTF-8 sequence. This is allowed if partial is set.
4609
     */
4610
13.3k
    if (ctxt->input->cur < ctxt->input->end) {
4611
12.5k
        if ((cur == 0) && (CUR != 0)) {
4612
22
            if (partial == 0) {
4613
15
                xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4614
15
                        "Incomplete UTF-8 sequence starting with %02X\n", CUR);
4615
15
                NEXTL(1);
4616
15
            }
4617
12.5k
        } else if ((cur != '<') && (cur != '&')) {
4618
            /* Generate the error and skip the offending character */
4619
104
            xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4620
104
                              "PCDATA invalid Char value %d\n", cur);
4621
104
            NEXTL(l);
4622
104
        }
4623
12.5k
    }
4624
13.3k
}
4625
4626
/**
4627
 * xmlParseCharData:
4628
 * @ctxt:  an XML parser context
4629
 * @cdata:  unused
4630
 *
4631
 * DEPRECATED: Internal function, don't use.
4632
 */
4633
void
4634
0
xmlParseCharData(xmlParserCtxtPtr ctxt, ATTRIBUTE_UNUSED int cdata) {
4635
0
    xmlParseCharDataInternal(ctxt, 0);
4636
0
}
4637
4638
/**
4639
 * xmlParseExternalID:
4640
 * @ctxt:  an XML parser context
4641
 * @publicID:  a xmlChar** receiving PubidLiteral
4642
 * @strict: indicate whether we should restrict parsing to only
4643
 *          production [75], see NOTE below
4644
 *
4645
 * DEPRECATED: Internal function, don't use.
4646
 *
4647
 * Parse an External ID or a Public ID
4648
 *
4649
 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4650
 *       'PUBLIC' S PubidLiteral S SystemLiteral
4651
 *
4652
 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4653
 *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
4654
 *
4655
 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4656
 *
4657
 * Returns the function returns SystemLiteral and in the second
4658
 *                case publicID receives PubidLiteral, is strict is off
4659
 *                it is possible to return NULL and have publicID set.
4660
 */
4661
4662
xmlChar *
4663
10.3k
xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4664
10.3k
    xmlChar *URI = NULL;
4665
4666
10.3k
    *publicID = NULL;
4667
10.3k
    if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4668
6.28k
        SKIP(6);
4669
6.28k
  if (SKIP_BLANKS == 0) {
4670
341
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4671
341
                     "Space required after 'SYSTEM'\n");
4672
341
  }
4673
6.28k
  URI = xmlParseSystemLiteral(ctxt);
4674
6.28k
  if (URI == NULL) {
4675
88
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4676
88
        }
4677
6.28k
    } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4678
981
        SKIP(6);
4679
981
  if (SKIP_BLANKS == 0) {
4680
689
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4681
689
        "Space required after 'PUBLIC'\n");
4682
689
  }
4683
981
  *publicID = xmlParsePubidLiteral(ctxt);
4684
981
  if (*publicID == NULL) {
4685
147
      xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4686
147
  }
4687
981
  if (strict) {
4688
      /*
4689
       * We don't handle [83] so "S SystemLiteral" is required.
4690
       */
4691
87
      if (SKIP_BLANKS == 0) {
4692
43
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4693
43
      "Space required after the Public Identifier\n");
4694
43
      }
4695
894
  } else {
4696
      /*
4697
       * We handle [83] so we return immediately, if
4698
       * "S SystemLiteral" is not detected. We skip blanks if no
4699
             * system literal was found, but this is harmless since we must
4700
             * be at the end of a NotationDecl.
4701
       */
4702
894
      if (SKIP_BLANKS == 0) return(NULL);
4703
216
      if ((CUR != '\'') && (CUR != '"')) return(NULL);
4704
216
  }
4705
92
  URI = xmlParseSystemLiteral(ctxt);
4706
92
  if (URI == NULL) {
4707
41
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4708
41
        }
4709
92
    }
4710
9.42k
    return(URI);
4711
10.3k
}
4712
4713
/**
4714
 * xmlParseCommentComplex:
4715
 * @ctxt:  an XML parser context
4716
 * @buf:  the already parsed part of the buffer
4717
 * @len:  number of bytes in the buffer
4718
 * @size:  allocated size of the buffer
4719
 *
4720
 * Skip an XML (SGML) comment <!-- .... -->
4721
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4722
 *  must not occur within comments. "
4723
 * This is the slow routine in case the accelerator for ascii didn't work
4724
 *
4725
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4726
 */
4727
static void
4728
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4729
5.91k
                       size_t len, size_t size) {
4730
5.91k
    int q, ql;
4731
5.91k
    int r, rl;
4732
5.91k
    int cur, l;
4733
5.91k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4734
5.91k
                       XML_MAX_HUGE_LENGTH :
4735
5.91k
                       XML_MAX_TEXT_LENGTH;
4736
5.91k
    int inputid;
4737
4738
5.91k
    inputid = ctxt->input->id;
4739
4740
5.91k
    if (buf == NULL) {
4741
5.91k
        len = 0;
4742
5.91k
  size = XML_PARSER_BUFFER_SIZE;
4743
5.91k
  buf = (xmlChar *) xmlMallocAtomic(size);
4744
5.91k
  if (buf == NULL) {
4745
0
      xmlErrMemory(ctxt, NULL);
4746
0
      return;
4747
0
  }
4748
5.91k
    }
4749
5.91k
    q = CUR_CHAR(ql);
4750
5.91k
    if (q == 0)
4751
75
        goto not_terminated;
4752
5.84k
    if (!IS_CHAR(q)) {
4753
27
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4754
27
                          "xmlParseComment: invalid xmlChar value %d\n",
4755
27
                    q);
4756
27
  xmlFree (buf);
4757
27
  return;
4758
27
    }
4759
5.81k
    NEXTL(ql);
4760
5.81k
    r = CUR_CHAR(rl);
4761
5.81k
    if (r == 0)
4762
12
        goto not_terminated;
4763
5.80k
    if (!IS_CHAR(r)) {
4764
14
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4765
14
                          "xmlParseComment: invalid xmlChar value %d\n",
4766
14
                    r);
4767
14
  xmlFree (buf);
4768
14
  return;
4769
14
    }
4770
5.78k
    NEXTL(rl);
4771
5.78k
    cur = CUR_CHAR(l);
4772
5.78k
    if (cur == 0)
4773
25
        goto not_terminated;
4774
2.59M
    while (IS_CHAR(cur) && /* checked */
4775
2.59M
           ((cur != '>') ||
4776
2.59M
      (r != '-') || (q != '-'))) {
4777
2.59M
  if ((r == '-') && (q == '-')) {
4778
2.10k
      xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4779
2.10k
  }
4780
2.59M
  if (len + 5 >= size) {
4781
1.73k
      xmlChar *new_buf;
4782
1.73k
            size_t new_size;
4783
4784
1.73k
      new_size = size * 2;
4785
1.73k
      new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4786
1.73k
      if (new_buf == NULL) {
4787
0
    xmlFree (buf);
4788
0
    xmlErrMemory(ctxt, NULL);
4789
0
    return;
4790
0
      }
4791
1.73k
      buf = new_buf;
4792
1.73k
            size = new_size;
4793
1.73k
  }
4794
2.59M
  COPY_BUF(ql,buf,len,q);
4795
2.59M
        if (len > maxLength) {
4796
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4797
0
                         "Comment too big found", NULL);
4798
0
            xmlFree (buf);
4799
0
            return;
4800
0
        }
4801
4802
2.59M
  q = r;
4803
2.59M
  ql = rl;
4804
2.59M
  r = cur;
4805
2.59M
  rl = l;
4806
4807
2.59M
  NEXTL(l);
4808
2.59M
  cur = CUR_CHAR(l);
4809
4810
2.59M
    }
4811
5.76k
    buf[len] = 0;
4812
5.76k
    if (ctxt->instate == XML_PARSER_EOF) {
4813
0
        xmlFree(buf);
4814
0
        return;
4815
0
    }
4816
5.76k
    if (cur == 0) {
4817
145
  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4818
145
                       "Comment not terminated \n<!--%.50s\n", buf);
4819
5.61k
    } else if (!IS_CHAR(cur)) {
4820
43
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4821
43
                          "xmlParseComment: invalid xmlChar value %d\n",
4822
43
                    cur);
4823
5.57k
    } else {
4824
5.57k
  if (inputid != ctxt->input->id) {
4825
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4826
0
               "Comment doesn't start and stop in the same"
4827
0
                           " entity\n");
4828
0
  }
4829
5.57k
        NEXT;
4830
5.57k
  if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4831
5.57k
      (!ctxt->disableSAX))
4832
0
      ctxt->sax->comment(ctxt->userData, buf);
4833
5.57k
    }
4834
5.76k
    xmlFree(buf);
4835
5.76k
    return;
4836
112
not_terminated:
4837
112
    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4838
112
       "Comment not terminated\n", NULL);
4839
112
    xmlFree(buf);
4840
112
    return;
4841
5.76k
}
4842
4843
/**
4844
 * xmlParseComment:
4845
 * @ctxt:  an XML parser context
4846
 *
4847
 * DEPRECATED: Internal function, don't use.
4848
 *
4849
 * Parse an XML (SGML) comment. Always consumes '<!'.
4850
 *
4851
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4852
 *  must not occur within comments. "
4853
 *
4854
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4855
 */
4856
void
4857
17.5k
xmlParseComment(xmlParserCtxtPtr ctxt) {
4858
17.5k
    xmlChar *buf = NULL;
4859
17.5k
    size_t size = XML_PARSER_BUFFER_SIZE;
4860
17.5k
    size_t len = 0;
4861
17.5k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4862
17.5k
                       XML_MAX_HUGE_LENGTH :
4863
17.5k
                       XML_MAX_TEXT_LENGTH;
4864
17.5k
    xmlParserInputState state;
4865
17.5k
    const xmlChar *in;
4866
17.5k
    size_t nbchar = 0;
4867
17.5k
    int ccol;
4868
17.5k
    int inputid;
4869
4870
    /*
4871
     * Check that there is a comment right here.
4872
     */
4873
17.5k
    if ((RAW != '<') || (NXT(1) != '!'))
4874
0
        return;
4875
17.5k
    SKIP(2);
4876
17.5k
    if ((RAW != '-') || (NXT(1) != '-'))
4877
6
        return;
4878
17.4k
    state = ctxt->instate;
4879
17.4k
    ctxt->instate = XML_PARSER_COMMENT;
4880
17.4k
    inputid = ctxt->input->id;
4881
17.4k
    SKIP(2);
4882
17.4k
    GROW;
4883
4884
    /*
4885
     * Accelerated common case where input don't need to be
4886
     * modified before passing it to the handler.
4887
     */
4888
17.4k
    in = ctxt->input->cur;
4889
17.4k
    do {
4890
17.4k
  if (*in == 0xA) {
4891
1.18k
      do {
4892
1.18k
    ctxt->input->line++; ctxt->input->col = 1;
4893
1.18k
    in++;
4894
1.18k
      } while (*in == 0xA);
4895
326
  }
4896
40.9k
get_more:
4897
40.9k
        ccol = ctxt->input->col;
4898
1.15M
  while (((*in > '-') && (*in <= 0x7F)) ||
4899
1.15M
         ((*in >= 0x20) && (*in < '-')) ||
4900
1.15M
         (*in == 0x09)) {
4901
1.11M
        in++;
4902
1.11M
        ccol++;
4903
1.11M
  }
4904
40.9k
  ctxt->input->col = ccol;
4905
40.9k
  if (*in == 0xA) {
4906
22.5k
      do {
4907
22.5k
    ctxt->input->line++; ctxt->input->col = 1;
4908
22.5k
    in++;
4909
22.5k
      } while (*in == 0xA);
4910
17.7k
      goto get_more;
4911
17.7k
  }
4912
23.1k
  nbchar = in - ctxt->input->cur;
4913
  /*
4914
   * save current set of data
4915
   */
4916
23.1k
  if (nbchar > 0) {
4917
21.9k
      if ((ctxt->sax != NULL) &&
4918
21.9k
    (ctxt->sax->comment != NULL)) {
4919
0
    if (buf == NULL) {
4920
0
        if ((*in == '-') && (in[1] == '-'))
4921
0
            size = nbchar + 1;
4922
0
        else
4923
0
            size = XML_PARSER_BUFFER_SIZE + nbchar;
4924
0
        buf = (xmlChar *) xmlMallocAtomic(size);
4925
0
        if (buf == NULL) {
4926
0
            xmlErrMemory(ctxt, NULL);
4927
0
      ctxt->instate = state;
4928
0
      return;
4929
0
        }
4930
0
        len = 0;
4931
0
    } else if (len + nbchar + 1 >= size) {
4932
0
        xmlChar *new_buf;
4933
0
        size  += len + nbchar + XML_PARSER_BUFFER_SIZE;
4934
0
        new_buf = (xmlChar *) xmlRealloc(buf, size);
4935
0
        if (new_buf == NULL) {
4936
0
            xmlFree (buf);
4937
0
      xmlErrMemory(ctxt, NULL);
4938
0
      ctxt->instate = state;
4939
0
      return;
4940
0
        }
4941
0
        buf = new_buf;
4942
0
    }
4943
0
    memcpy(&buf[len], ctxt->input->cur, nbchar);
4944
0
    len += nbchar;
4945
0
    buf[len] = 0;
4946
0
      }
4947
21.9k
  }
4948
23.1k
        if (len > maxLength) {
4949
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4950
0
                         "Comment too big found", NULL);
4951
0
            xmlFree (buf);
4952
0
            return;
4953
0
        }
4954
23.1k
  ctxt->input->cur = in;
4955
23.1k
  if (*in == 0xA) {
4956
0
      in++;
4957
0
      ctxt->input->line++; ctxt->input->col = 1;
4958
0
  }
4959
23.1k
  if (*in == 0xD) {
4960
3.76k
      in++;
4961
3.76k
      if (*in == 0xA) {
4962
226
    ctxt->input->cur = in;
4963
226
    in++;
4964
226
    ctxt->input->line++; ctxt->input->col = 1;
4965
226
    goto get_more;
4966
226
      }
4967
3.54k
      in--;
4968
3.54k
  }
4969
22.9k
  SHRINK;
4970
22.9k
  GROW;
4971
22.9k
        if (ctxt->instate == XML_PARSER_EOF) {
4972
0
            xmlFree(buf);
4973
0
            return;
4974
0
        }
4975
22.9k
  in = ctxt->input->cur;
4976
22.9k
  if (*in == '-') {
4977
17.0k
      if (in[1] == '-') {
4978
12.1k
          if (in[2] == '>') {
4979
11.5k
        if (ctxt->input->id != inputid) {
4980
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4981
0
                     "comment doesn't start and stop in the"
4982
0
                                       " same entity\n");
4983
0
        }
4984
11.5k
        SKIP(3);
4985
11.5k
        if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4986
11.5k
            (!ctxt->disableSAX)) {
4987
0
      if (buf != NULL)
4988
0
          ctxt->sax->comment(ctxt->userData, buf);
4989
0
      else
4990
0
          ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4991
0
        }
4992
11.5k
        if (buf != NULL)
4993
0
            xmlFree(buf);
4994
11.5k
        if (ctxt->instate != XML_PARSER_EOF)
4995
11.5k
      ctxt->instate = state;
4996
11.5k
        return;
4997
11.5k
    }
4998
525
    if (buf != NULL) {
4999
0
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5000
0
                          "Double hyphen within comment: "
5001
0
                                      "<!--%.50s\n",
5002
0
              buf);
5003
0
    } else
5004
525
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5005
525
                          "Double hyphen within comment\n", NULL);
5006
525
                if (ctxt->instate == XML_PARSER_EOF) {
5007
0
                    xmlFree(buf);
5008
0
                    return;
5009
0
                }
5010
525
    in++;
5011
525
    ctxt->input->col++;
5012
525
      }
5013
5.44k
      in++;
5014
5.44k
      ctxt->input->col++;
5015
5.44k
      goto get_more;
5016
17.0k
  }
5017
22.9k
    } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5018
5.91k
    xmlParseCommentComplex(ctxt, buf, len, size);
5019
5.91k
    ctxt->instate = state;
5020
5.91k
    return;
5021
17.4k
}
5022
5023
5024
/**
5025
 * xmlParsePITarget:
5026
 * @ctxt:  an XML parser context
5027
 *
5028
 * DEPRECATED: Internal function, don't use.
5029
 *
5030
 * parse the name of a PI
5031
 *
5032
 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5033
 *
5034
 * Returns the PITarget name or NULL
5035
 */
5036
5037
const xmlChar *
5038
48.5k
xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5039
48.5k
    const xmlChar *name;
5040
5041
48.5k
    name = xmlParseName(ctxt);
5042
48.5k
    if ((name != NULL) &&
5043
48.5k
        ((name[0] == 'x') || (name[0] == 'X')) &&
5044
48.5k
        ((name[1] == 'm') || (name[1] == 'M')) &&
5045
48.5k
        ((name[2] == 'l') || (name[2] == 'L'))) {
5046
1.76k
  int i;
5047
1.76k
  if ((name[0] == 'x') && (name[1] == 'm') &&
5048
1.76k
      (name[2] == 'l') && (name[3] == 0)) {
5049
183
      xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5050
183
     "XML declaration allowed only at the start of the document\n");
5051
183
      return(name);
5052
1.57k
  } else if (name[3] == 0) {
5053
353
      xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5054
353
      return(name);
5055
353
  }
5056
3.60k
  for (i = 0;;i++) {
5057
3.60k
      if (xmlW3CPIs[i] == NULL) break;
5058
2.45k
      if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5059
75
          return(name);
5060
2.45k
  }
5061
1.15k
  xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5062
1.15k
          "xmlParsePITarget: invalid name prefix 'xml'\n",
5063
1.15k
          NULL, NULL);
5064
1.15k
    }
5065
47.8k
    if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5066
1.73k
  xmlNsErr(ctxt, XML_NS_ERR_COLON,
5067
1.73k
     "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5068
1.73k
    }
5069
47.8k
    return(name);
5070
48.5k
}
5071
5072
#ifdef LIBXML_CATALOG_ENABLED
5073
/**
5074
 * xmlParseCatalogPI:
5075
 * @ctxt:  an XML parser context
5076
 * @catalog:  the PI value string
5077
 *
5078
 * parse an XML Catalog Processing Instruction.
5079
 *
5080
 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5081
 *
5082
 * Occurs only if allowed by the user and if happening in the Misc
5083
 * part of the document before any doctype information
5084
 * This will add the given catalog to the parsing context in order
5085
 * to be used if there is a resolution need further down in the document
5086
 */
5087
5088
static void
5089
1.09k
xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5090
1.09k
    xmlChar *URL = NULL;
5091
1.09k
    const xmlChar *tmp, *base;
5092
1.09k
    xmlChar marker;
5093
5094
1.09k
    tmp = catalog;
5095
1.09k
    while (IS_BLANK_CH(*tmp)) tmp++;
5096
1.09k
    if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5097
399
  goto error;
5098
691
    tmp += 7;
5099
5.38k
    while (IS_BLANK_CH(*tmp)) tmp++;
5100
691
    if (*tmp != '=') {
5101
188
  return;
5102
188
    }
5103
503
    tmp++;
5104
3.45k
    while (IS_BLANK_CH(*tmp)) tmp++;
5105
503
    marker = *tmp;
5106
503
    if ((marker != '\'') && (marker != '"'))
5107
295
  goto error;
5108
208
    tmp++;
5109
208
    base = tmp;
5110
3.30k
    while ((*tmp != 0) && (*tmp != marker)) tmp++;
5111
208
    if (*tmp == 0)
5112
104
  goto error;
5113
104
    URL = xmlStrndup(base, tmp - base);
5114
104
    tmp++;
5115
252
    while (IS_BLANK_CH(*tmp)) tmp++;
5116
104
    if (*tmp != 0)
5117
23
  goto error;
5118
5119
81
    if (URL != NULL) {
5120
81
  ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5121
81
  xmlFree(URL);
5122
81
    }
5123
81
    return;
5124
5125
821
error:
5126
821
    xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5127
821
            "Catalog PI syntax error: %s\n",
5128
821
      catalog, NULL);
5129
821
    if (URL != NULL)
5130
23
  xmlFree(URL);
5131
821
}
5132
#endif
5133
5134
/**
5135
 * xmlParsePI:
5136
 * @ctxt:  an XML parser context
5137
 *
5138
 * DEPRECATED: Internal function, don't use.
5139
 *
5140
 * parse an XML Processing Instruction.
5141
 *
5142
 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5143
 *
5144
 * The processing is transferred to SAX once parsed.
5145
 */
5146
5147
void
5148
48.5k
xmlParsePI(xmlParserCtxtPtr ctxt) {
5149
48.5k
    xmlChar *buf = NULL;
5150
48.5k
    size_t len = 0;
5151
48.5k
    size_t size = XML_PARSER_BUFFER_SIZE;
5152
48.5k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5153
48.5k
                       XML_MAX_HUGE_LENGTH :
5154
48.5k
                       XML_MAX_TEXT_LENGTH;
5155
48.5k
    int cur, l;
5156
48.5k
    const xmlChar *target;
5157
48.5k
    xmlParserInputState state;
5158
5159
48.5k
    if ((RAW == '<') && (NXT(1) == '?')) {
5160
48.5k
  int inputid = ctxt->input->id;
5161
48.5k
  state = ctxt->instate;
5162
48.5k
        ctxt->instate = XML_PARSER_PI;
5163
  /*
5164
   * this is a Processing Instruction.
5165
   */
5166
48.5k
  SKIP(2);
5167
5168
  /*
5169
   * Parse the target name and check for special support like
5170
   * namespace.
5171
   */
5172
48.5k
        target = xmlParsePITarget(ctxt);
5173
48.5k
  if (target != NULL) {
5174
48.2k
      if ((RAW == '?') && (NXT(1) == '>')) {
5175
19.6k
    if (inputid != ctxt->input->id) {
5176
0
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5177
0
                             "PI declaration doesn't start and stop in"
5178
0
                                   " the same entity\n");
5179
0
    }
5180
19.6k
    SKIP(2);
5181
5182
    /*
5183
     * SAX: PI detected.
5184
     */
5185
19.6k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5186
19.6k
        (ctxt->sax->processingInstruction != NULL))
5187
0
        ctxt->sax->processingInstruction(ctxt->userData,
5188
0
                                         target, NULL);
5189
19.6k
    if (ctxt->instate != XML_PARSER_EOF)
5190
19.6k
        ctxt->instate = state;
5191
19.6k
    return;
5192
19.6k
      }
5193
28.6k
      buf = (xmlChar *) xmlMallocAtomic(size);
5194
28.6k
      if (buf == NULL) {
5195
0
    xmlErrMemory(ctxt, NULL);
5196
0
    ctxt->instate = state;
5197
0
    return;
5198
0
      }
5199
28.6k
      if (SKIP_BLANKS == 0) {
5200
4.52k
    xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5201
4.52k
        "ParsePI: PI %s space expected\n", target);
5202
4.52k
      }
5203
28.6k
      cur = CUR_CHAR(l);
5204
5.45M
      while (IS_CHAR(cur) && /* checked */
5205
5.45M
       ((cur != '?') || (NXT(1) != '>'))) {
5206
5.42M
    if (len + 5 >= size) {
5207
3.46k
        xmlChar *tmp;
5208
3.46k
                    size_t new_size = size * 2;
5209
3.46k
        tmp = (xmlChar *) xmlRealloc(buf, new_size);
5210
3.46k
        if (tmp == NULL) {
5211
0
      xmlErrMemory(ctxt, NULL);
5212
0
      xmlFree(buf);
5213
0
      ctxt->instate = state;
5214
0
      return;
5215
0
        }
5216
3.46k
        buf = tmp;
5217
3.46k
                    size = new_size;
5218
3.46k
    }
5219
5.42M
    COPY_BUF(l,buf,len,cur);
5220
5.42M
                if (len > maxLength) {
5221
0
                    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5222
0
                                      "PI %s too big found", target);
5223
0
                    xmlFree(buf);
5224
0
                    ctxt->instate = state;
5225
0
                    return;
5226
0
                }
5227
5.42M
    NEXTL(l);
5228
5.42M
    cur = CUR_CHAR(l);
5229
5.42M
      }
5230
28.6k
      buf[len] = 0;
5231
28.6k
            if (ctxt->instate == XML_PARSER_EOF) {
5232
0
                xmlFree(buf);
5233
0
                return;
5234
0
            }
5235
28.6k
      if (cur != '?') {
5236
718
    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5237
718
          "ParsePI: PI %s never end ...\n", target);
5238
27.9k
      } else {
5239
27.9k
    if (inputid != ctxt->input->id) {
5240
0
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5241
0
                             "PI declaration doesn't start and stop in"
5242
0
                                   " the same entity\n");
5243
0
    }
5244
27.9k
    SKIP(2);
5245
5246
27.9k
#ifdef LIBXML_CATALOG_ENABLED
5247
27.9k
    if (((state == XML_PARSER_MISC) ||
5248
27.9k
               (state == XML_PARSER_START)) &&
5249
27.9k
        (xmlStrEqual(target, XML_CATALOG_PI))) {
5250
1.09k
        xmlCatalogAllow allow = xmlCatalogGetDefaults();
5251
1.09k
        if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5252
1.09k
      (allow == XML_CATA_ALLOW_ALL))
5253
1.09k
      xmlParseCatalogPI(ctxt, buf);
5254
1.09k
    }
5255
27.9k
#endif
5256
5257
5258
    /*
5259
     * SAX: PI detected.
5260
     */
5261
27.9k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5262
27.9k
        (ctxt->sax->processingInstruction != NULL))
5263
0
        ctxt->sax->processingInstruction(ctxt->userData,
5264
0
                                         target, buf);
5265
27.9k
      }
5266
28.6k
      xmlFree(buf);
5267
28.6k
  } else {
5268
217
      xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5269
217
  }
5270
28.8k
  if (ctxt->instate != XML_PARSER_EOF)
5271
28.8k
      ctxt->instate = state;
5272
28.8k
    }
5273
48.5k
}
5274
5275
/**
5276
 * xmlParseNotationDecl:
5277
 * @ctxt:  an XML parser context
5278
 *
5279
 * DEPRECATED: Internal function, don't use.
5280
 *
5281
 * Parse a notation declaration. Always consumes '<!'.
5282
 *
5283
 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>'
5284
 *
5285
 * Hence there is actually 3 choices:
5286
 *     'PUBLIC' S PubidLiteral
5287
 *     'PUBLIC' S PubidLiteral S SystemLiteral
5288
 * and 'SYSTEM' S SystemLiteral
5289
 *
5290
 * See the NOTE on xmlParseExternalID().
5291
 */
5292
5293
void
5294
1.35k
xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5295
1.35k
    const xmlChar *name;
5296
1.35k
    xmlChar *Pubid;
5297
1.35k
    xmlChar *Systemid;
5298
5299
1.35k
    if ((CUR != '<') || (NXT(1) != '!'))
5300
0
        return;
5301
1.35k
    SKIP(2);
5302
5303
1.35k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5304
1.33k
  int inputid = ctxt->input->id;
5305
1.33k
  SKIP(8);
5306
1.33k
  if (SKIP_BLANKS == 0) {
5307
113
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5308
113
         "Space required after '<!NOTATION'\n");
5309
113
      return;
5310
113
  }
5311
5312
1.22k
        name = xmlParseName(ctxt);
5313
1.22k
  if (name == NULL) {
5314
164
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5315
164
      return;
5316
164
  }
5317
1.05k
  if (xmlStrchr(name, ':') != NULL) {
5318
25
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5319
25
         "colons are forbidden from notation names '%s'\n",
5320
25
         name, NULL, NULL);
5321
25
  }
5322
1.05k
  if (SKIP_BLANKS == 0) {
5323
62
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5324
62
         "Space required after the NOTATION name'\n");
5325
62
      return;
5326
62
  }
5327
5328
  /*
5329
   * Parse the IDs.
5330
   */
5331
996
  Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5332
996
  SKIP_BLANKS;
5333
5334
996
  if (RAW == '>') {
5335
77
      if (inputid != ctxt->input->id) {
5336
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5337
0
                         "Notation declaration doesn't start and stop"
5338
0
                               " in the same entity\n");
5339
0
      }
5340
77
      NEXT;
5341
77
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5342
77
    (ctxt->sax->notationDecl != NULL))
5343
0
    ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5344
919
  } else {
5345
919
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5346
919
  }
5347
996
  if (Systemid != NULL) xmlFree(Systemid);
5348
996
  if (Pubid != NULL) xmlFree(Pubid);
5349
996
    }
5350
1.35k
}
5351
5352
/**
5353
 * xmlParseEntityDecl:
5354
 * @ctxt:  an XML parser context
5355
 *
5356
 * DEPRECATED: Internal function, don't use.
5357
 *
5358
 * Parse an entity declaration. Always consumes '<!'.
5359
 *
5360
 * [70] EntityDecl ::= GEDecl | PEDecl
5361
 *
5362
 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5363
 *
5364
 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5365
 *
5366
 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5367
 *
5368
 * [74] PEDef ::= EntityValue | ExternalID
5369
 *
5370
 * [76] NDataDecl ::= S 'NDATA' S Name
5371
 *
5372
 * [ VC: Notation Declared ]
5373
 * The Name must match the declared name of a notation.
5374
 */
5375
5376
void
5377
21.5k
xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5378
21.5k
    const xmlChar *name = NULL;
5379
21.5k
    xmlChar *value = NULL;
5380
21.5k
    xmlChar *URI = NULL, *literal = NULL;
5381
21.5k
    const xmlChar *ndata = NULL;
5382
21.5k
    int isParameter = 0;
5383
21.5k
    xmlChar *orig = NULL;
5384
5385
21.5k
    if ((CUR != '<') || (NXT(1) != '!'))
5386
0
        return;
5387
21.5k
    SKIP(2);
5388
5389
    /* GROW; done in the caller */
5390
21.5k
    if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5391
21.4k
  int inputid = ctxt->input->id;
5392
21.4k
  SKIP(6);
5393
21.4k
  if (SKIP_BLANKS == 0) {
5394
13.4k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5395
13.4k
         "Space required after '<!ENTITY'\n");
5396
13.4k
  }
5397
5398
21.4k
  if (RAW == '%') {
5399
3.65k
      NEXT;
5400
3.65k
      if (SKIP_BLANKS == 0) {
5401
3.30k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5402
3.30k
             "Space required after '%%'\n");
5403
3.30k
      }
5404
3.65k
      isParameter = 1;
5405
3.65k
  }
5406
5407
21.4k
        name = xmlParseName(ctxt);
5408
21.4k
  if (name == NULL) {
5409
453
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5410
453
                     "xmlParseEntityDecl: no name\n");
5411
453
            return;
5412
453
  }
5413
21.0k
  if (xmlStrchr(name, ':') != NULL) {
5414
525
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5415
525
         "colons are forbidden from entities names '%s'\n",
5416
525
         name, NULL, NULL);
5417
525
  }
5418
21.0k
  if (SKIP_BLANKS == 0) {
5419
13.2k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5420
13.2k
         "Space required after the entity name\n");
5421
13.2k
  }
5422
5423
21.0k
  ctxt->instate = XML_PARSER_ENTITY_DECL;
5424
  /*
5425
   * handle the various case of definitions...
5426
   */
5427
21.0k
  if (isParameter) {
5428
3.45k
      if ((RAW == '"') || (RAW == '\'')) {
5429
2.49k
          value = xmlParseEntityValue(ctxt, &orig);
5430
2.49k
    if (value) {
5431
1.39k
        if ((ctxt->sax != NULL) &&
5432
1.39k
      (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5433
0
      ctxt->sax->entityDecl(ctxt->userData, name,
5434
0
                        XML_INTERNAL_PARAMETER_ENTITY,
5435
0
            NULL, NULL, value);
5436
1.39k
    }
5437
2.49k
      } else {
5438
966
          URI = xmlParseExternalID(ctxt, &literal, 1);
5439
966
    if ((URI == NULL) && (literal == NULL)) {
5440
61
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5441
61
    }
5442
966
    if (URI) {
5443
901
        xmlURIPtr uri;
5444
5445
901
        uri = xmlParseURI((const char *) URI);
5446
901
        if (uri == NULL) {
5447
595
            xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5448
595
             "Invalid URI: %s\n", URI);
5449
      /*
5450
       * This really ought to be a well formedness error
5451
       * but the XML Core WG decided otherwise c.f. issue
5452
       * E26 of the XML erratas.
5453
       */
5454
595
        } else {
5455
306
      if (uri->fragment != NULL) {
5456
          /*
5457
           * Okay this is foolish to block those but not
5458
           * invalid URIs.
5459
           */
5460
215
          xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5461
215
      } else {
5462
91
          if ((ctxt->sax != NULL) &&
5463
91
        (!ctxt->disableSAX) &&
5464
91
        (ctxt->sax->entityDecl != NULL))
5465
0
        ctxt->sax->entityDecl(ctxt->userData, name,
5466
0
              XML_EXTERNAL_PARAMETER_ENTITY,
5467
0
              literal, URI, NULL);
5468
91
      }
5469
306
      xmlFreeURI(uri);
5470
306
        }
5471
901
    }
5472
966
      }
5473
17.5k
  } else {
5474
17.5k
      if ((RAW == '"') || (RAW == '\'')) {
5475
11.7k
          value = xmlParseEntityValue(ctxt, &orig);
5476
11.7k
    if ((ctxt->sax != NULL) &&
5477
11.7k
        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5478
0
        ctxt->sax->entityDecl(ctxt->userData, name,
5479
0
        XML_INTERNAL_GENERAL_ENTITY,
5480
0
        NULL, NULL, value);
5481
    /*
5482
     * For expat compatibility in SAX mode.
5483
     */
5484
11.7k
    if ((ctxt->myDoc == NULL) ||
5485
11.7k
        (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5486
11.7k
        if (ctxt->myDoc == NULL) {
5487
517
      ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5488
517
      if (ctxt->myDoc == NULL) {
5489
0
          xmlErrMemory(ctxt, "New Doc failed");
5490
0
          goto done;
5491
0
      }
5492
517
      ctxt->myDoc->properties = XML_DOC_INTERNAL;
5493
517
        }
5494
11.7k
        if (ctxt->myDoc->intSubset == NULL)
5495
517
      ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5496
517
              BAD_CAST "fake", NULL, NULL);
5497
5498
11.7k
        xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5499
11.7k
                    NULL, NULL, value);
5500
11.7k
    }
5501
11.7k
      } else {
5502
5.86k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5503
5.86k
    if ((URI == NULL) && (literal == NULL)) {
5504
621
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5505
621
    }
5506
5.86k
    if (URI) {
5507
5.22k
        xmlURIPtr uri;
5508
5509
5.22k
        uri = xmlParseURI((const char *)URI);
5510
5.22k
        if (uri == NULL) {
5511
2.44k
            xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5512
2.44k
             "Invalid URI: %s\n", URI);
5513
      /*
5514
       * This really ought to be a well formedness error
5515
       * but the XML Core WG decided otherwise c.f. issue
5516
       * E26 of the XML erratas.
5517
       */
5518
2.78k
        } else {
5519
2.78k
      if (uri->fragment != NULL) {
5520
          /*
5521
           * Okay this is foolish to block those but not
5522
           * invalid URIs.
5523
           */
5524
854
          xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5525
854
      }
5526
2.78k
      xmlFreeURI(uri);
5527
2.78k
        }
5528
5.22k
    }
5529
5.86k
    if ((RAW != '>') && (SKIP_BLANKS == 0)) {
5530
366
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5531
366
           "Space required before 'NDATA'\n");
5532
366
    }
5533
5.86k
    if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5534
287
        SKIP(5);
5535
287
        if (SKIP_BLANKS == 0) {
5536
133
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5537
133
               "Space required after 'NDATA'\n");
5538
133
        }
5539
287
        ndata = xmlParseName(ctxt);
5540
287
        if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5541
287
            (ctxt->sax->unparsedEntityDecl != NULL))
5542
0
      ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5543
0
            literal, URI, ndata);
5544
5.58k
    } else {
5545
5.58k
        if ((ctxt->sax != NULL) &&
5546
5.58k
            (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5547
0
      ctxt->sax->entityDecl(ctxt->userData, name,
5548
0
            XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5549
0
            literal, URI, NULL);
5550
        /*
5551
         * For expat compatibility in SAX mode.
5552
         * assuming the entity replacement was asked for
5553
         */
5554
5.58k
        if ((ctxt->replaceEntities != 0) &&
5555
5.58k
      ((ctxt->myDoc == NULL) ||
5556
0
      (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5557
0
      if (ctxt->myDoc == NULL) {
5558
0
          ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5559
0
          if (ctxt->myDoc == NULL) {
5560
0
              xmlErrMemory(ctxt, "New Doc failed");
5561
0
        goto done;
5562
0
          }
5563
0
          ctxt->myDoc->properties = XML_DOC_INTERNAL;
5564
0
      }
5565
5566
0
      if (ctxt->myDoc->intSubset == NULL)
5567
0
          ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5568
0
            BAD_CAST "fake", NULL, NULL);
5569
0
      xmlSAX2EntityDecl(ctxt, name,
5570
0
                  XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5571
0
                  literal, URI, NULL);
5572
0
        }
5573
5.58k
    }
5574
5.86k
      }
5575
17.5k
  }
5576
21.0k
  if (ctxt->instate == XML_PARSER_EOF)
5577
0
      goto done;
5578
21.0k
  SKIP_BLANKS;
5579
21.0k
  if (RAW != '>') {
5580
598
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5581
598
              "xmlParseEntityDecl: entity %s not terminated\n", name);
5582
598
      xmlHaltParser(ctxt);
5583
20.4k
  } else {
5584
20.4k
      if (inputid != ctxt->input->id) {
5585
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5586
0
                         "Entity declaration doesn't start and stop in"
5587
0
                               " the same entity\n");
5588
0
      }
5589
20.4k
      NEXT;
5590
20.4k
  }
5591
21.0k
  if (orig != NULL) {
5592
      /*
5593
       * Ugly mechanism to save the raw entity value.
5594
       */
5595
11.6k
      xmlEntityPtr cur = NULL;
5596
5597
11.6k
      if (isParameter) {
5598
2.05k
          if ((ctxt->sax != NULL) &&
5599
2.05k
        (ctxt->sax->getParameterEntity != NULL))
5600
0
        cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5601
9.58k
      } else {
5602
9.58k
          if ((ctxt->sax != NULL) &&
5603
9.58k
        (ctxt->sax->getEntity != NULL))
5604
9.58k
        cur = ctxt->sax->getEntity(ctxt->userData, name);
5605
9.58k
    if ((cur == NULL) && (ctxt->userData==ctxt)) {
5606
0
        cur = xmlSAX2GetEntity(ctxt, name);
5607
0
    }
5608
9.58k
      }
5609
11.6k
            if ((cur != NULL) && (cur->orig == NULL)) {
5610
1
    cur->orig = orig;
5611
1
                orig = NULL;
5612
1
      }
5613
11.6k
  }
5614
5615
21.0k
done:
5616
21.0k
  if (value != NULL) xmlFree(value);
5617
21.0k
  if (URI != NULL) xmlFree(URI);
5618
21.0k
  if (literal != NULL) xmlFree(literal);
5619
21.0k
        if (orig != NULL) xmlFree(orig);
5620
21.0k
    }
5621
21.5k
}
5622
5623
/**
5624
 * xmlParseDefaultDecl:
5625
 * @ctxt:  an XML parser context
5626
 * @value:  Receive a possible fixed default value for the attribute
5627
 *
5628
 * DEPRECATED: Internal function, don't use.
5629
 *
5630
 * Parse an attribute default declaration
5631
 *
5632
 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5633
 *
5634
 * [ VC: Required Attribute ]
5635
 * if the default declaration is the keyword #REQUIRED, then the
5636
 * attribute must be specified for all elements of the type in the
5637
 * attribute-list declaration.
5638
 *
5639
 * [ VC: Attribute Default Legal ]
5640
 * The declared default value must meet the lexical constraints of
5641
 * the declared attribute type c.f. xmlValidateAttributeDecl()
5642
 *
5643
 * [ VC: Fixed Attribute Default ]
5644
 * if an attribute has a default value declared with the #FIXED
5645
 * keyword, instances of that attribute must match the default value.
5646
 *
5647
 * [ WFC: No < in Attribute Values ]
5648
 * handled in xmlParseAttValue()
5649
 *
5650
 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5651
 *          or XML_ATTRIBUTE_FIXED.
5652
 */
5653
5654
int
5655
15.5k
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5656
15.5k
    int val;
5657
15.5k
    xmlChar *ret;
5658
5659
15.5k
    *value = NULL;
5660
15.5k
    if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5661
498
  SKIP(9);
5662
498
  return(XML_ATTRIBUTE_REQUIRED);
5663
498
    }
5664
15.0k
    if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5665
1.00k
  SKIP(8);
5666
1.00k
  return(XML_ATTRIBUTE_IMPLIED);
5667
1.00k
    }
5668
14.0k
    val = XML_ATTRIBUTE_NONE;
5669
14.0k
    if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5670
68
  SKIP(6);
5671
68
  val = XML_ATTRIBUTE_FIXED;
5672
68
  if (SKIP_BLANKS == 0) {
5673
27
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5674
27
         "Space required after '#FIXED'\n");
5675
27
  }
5676
68
    }
5677
14.0k
    ret = xmlParseAttValue(ctxt);
5678
14.0k
    ctxt->instate = XML_PARSER_DTD;
5679
14.0k
    if (ret == NULL) {
5680
171
  xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5681
171
           "Attribute default value declaration error\n");
5682
171
    } else
5683
13.8k
        *value = ret;
5684
14.0k
    return(val);
5685
15.0k
}
5686
5687
/**
5688
 * xmlParseNotationType:
5689
 * @ctxt:  an XML parser context
5690
 *
5691
 * DEPRECATED: Internal function, don't use.
5692
 *
5693
 * parse an Notation attribute type.
5694
 *
5695
 * Note: the leading 'NOTATION' S part has already being parsed...
5696
 *
5697
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5698
 *
5699
 * [ VC: Notation Attributes ]
5700
 * Values of this type must match one of the notation names included
5701
 * in the declaration; all notation names in the declaration must be declared.
5702
 *
5703
 * Returns: the notation attribute tree built while parsing
5704
 */
5705
5706
xmlEnumerationPtr
5707
756
xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5708
756
    const xmlChar *name;
5709
756
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5710
5711
756
    if (RAW != '(') {
5712
183
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5713
183
  return(NULL);
5714
183
    }
5715
7.39k
    do {
5716
7.39k
        NEXT;
5717
7.39k
  SKIP_BLANKS;
5718
7.39k
        name = xmlParseName(ctxt);
5719
7.39k
  if (name == NULL) {
5720
202
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5721
202
         "Name expected in NOTATION declaration\n");
5722
202
            xmlFreeEnumeration(ret);
5723
202
      return(NULL);
5724
202
  }
5725
7.18k
  tmp = ret;
5726
51.8k
  while (tmp != NULL) {
5727
48.0k
      if (xmlStrEqual(name, tmp->name)) {
5728
3.47k
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5729
3.47k
    "standalone: attribute notation value token %s duplicated\n",
5730
3.47k
         name, NULL);
5731
3.47k
    if (!xmlDictOwns(ctxt->dict, name))
5732
0
        xmlFree((xmlChar *) name);
5733
3.47k
    break;
5734
3.47k
      }
5735
44.6k
      tmp = tmp->next;
5736
44.6k
  }
5737
7.18k
  if (tmp == NULL) {
5738
3.71k
      cur = xmlCreateEnumeration(name);
5739
3.71k
      if (cur == NULL) {
5740
0
                xmlFreeEnumeration(ret);
5741
0
                return(NULL);
5742
0
            }
5743
3.71k
      if (last == NULL) ret = last = cur;
5744
3.15k
      else {
5745
3.15k
    last->next = cur;
5746
3.15k
    last = cur;
5747
3.15k
      }
5748
3.71k
  }
5749
7.18k
  SKIP_BLANKS;
5750
7.18k
    } while (RAW == '|');
5751
371
    if (RAW != ')') {
5752
292
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5753
292
        xmlFreeEnumeration(ret);
5754
292
  return(NULL);
5755
292
    }
5756
79
    NEXT;
5757
79
    return(ret);
5758
371
}
5759
5760
/**
5761
 * xmlParseEnumerationType:
5762
 * @ctxt:  an XML parser context
5763
 *
5764
 * DEPRECATED: Internal function, don't use.
5765
 *
5766
 * parse an Enumeration attribute type.
5767
 *
5768
 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5769
 *
5770
 * [ VC: Enumeration ]
5771
 * Values of this type must match one of the Nmtoken tokens in
5772
 * the declaration
5773
 *
5774
 * Returns: the enumeration attribute tree built while parsing
5775
 */
5776
5777
xmlEnumerationPtr
5778
2.90k
xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5779
2.90k
    xmlChar *name;
5780
2.90k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5781
5782
2.90k
    if (RAW != '(') {
5783
445
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5784
445
  return(NULL);
5785
445
    }
5786
12.6k
    do {
5787
12.6k
        NEXT;
5788
12.6k
  SKIP_BLANKS;
5789
12.6k
        name = xmlParseNmtoken(ctxt);
5790
12.6k
  if (name == NULL) {
5791
43
      xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5792
43
      return(ret);
5793
43
  }
5794
12.6k
  tmp = ret;
5795
67.1k
  while (tmp != NULL) {
5796
59.7k
      if (xmlStrEqual(name, tmp->name)) {
5797
5.17k
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5798
5.17k
    "standalone: attribute enumeration value token %s duplicated\n",
5799
5.17k
         name, NULL);
5800
5.17k
    if (!xmlDictOwns(ctxt->dict, name))
5801
5.17k
        xmlFree(name);
5802
5.17k
    break;
5803
5.17k
      }
5804
54.5k
      tmp = tmp->next;
5805
54.5k
  }
5806
12.6k
  if (tmp == NULL) {
5807
7.47k
      cur = xmlCreateEnumeration(name);
5808
7.47k
      if (!xmlDictOwns(ctxt->dict, name))
5809
7.47k
    xmlFree(name);
5810
7.47k
      if (cur == NULL) {
5811
0
                xmlFreeEnumeration(ret);
5812
0
                return(NULL);
5813
0
            }
5814
7.47k
      if (last == NULL) ret = last = cur;
5815
5.02k
      else {
5816
5.02k
    last->next = cur;
5817
5.02k
    last = cur;
5818
5.02k
      }
5819
7.47k
  }
5820
12.6k
  SKIP_BLANKS;
5821
12.6k
    } while (RAW == '|');
5822
2.42k
    if (RAW != ')') {
5823
607
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5824
607
  return(ret);
5825
607
    }
5826
1.81k
    NEXT;
5827
1.81k
    return(ret);
5828
2.42k
}
5829
5830
/**
5831
 * xmlParseEnumeratedType:
5832
 * @ctxt:  an XML parser context
5833
 * @tree:  the enumeration tree built while parsing
5834
 *
5835
 * DEPRECATED: Internal function, don't use.
5836
 *
5837
 * parse an Enumerated attribute type.
5838
 *
5839
 * [57] EnumeratedType ::= NotationType | Enumeration
5840
 *
5841
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5842
 *
5843
 *
5844
 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5845
 */
5846
5847
int
5848
3.66k
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5849
3.66k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5850
761
  SKIP(8);
5851
761
  if (SKIP_BLANKS == 0) {
5852
5
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5853
5
         "Space required after 'NOTATION'\n");
5854
5
      return(0);
5855
5
  }
5856
756
  *tree = xmlParseNotationType(ctxt);
5857
756
  if (*tree == NULL) return(0);
5858
79
  return(XML_ATTRIBUTE_NOTATION);
5859
756
    }
5860
2.90k
    *tree = xmlParseEnumerationType(ctxt);
5861
2.90k
    if (*tree == NULL) return(0);
5862
2.44k
    return(XML_ATTRIBUTE_ENUMERATION);
5863
2.90k
}
5864
5865
/**
5866
 * xmlParseAttributeType:
5867
 * @ctxt:  an XML parser context
5868
 * @tree:  the enumeration tree built while parsing
5869
 *
5870
 * DEPRECATED: Internal function, don't use.
5871
 *
5872
 * parse the Attribute list def for an element
5873
 *
5874
 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5875
 *
5876
 * [55] StringType ::= 'CDATA'
5877
 *
5878
 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5879
 *                        'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5880
 *
5881
 * Validity constraints for attribute values syntax are checked in
5882
 * xmlValidateAttributeValue()
5883
 *
5884
 * [ VC: ID ]
5885
 * Values of type ID must match the Name production. A name must not
5886
 * appear more than once in an XML document as a value of this type;
5887
 * i.e., ID values must uniquely identify the elements which bear them.
5888
 *
5889
 * [ VC: One ID per Element Type ]
5890
 * No element type may have more than one ID attribute specified.
5891
 *
5892
 * [ VC: ID Attribute Default ]
5893
 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5894
 *
5895
 * [ VC: IDREF ]
5896
 * Values of type IDREF must match the Name production, and values
5897
 * of type IDREFS must match Names; each IDREF Name must match the value
5898
 * of an ID attribute on some element in the XML document; i.e. IDREF
5899
 * values must match the value of some ID attribute.
5900
 *
5901
 * [ VC: Entity Name ]
5902
 * Values of type ENTITY must match the Name production, values
5903
 * of type ENTITIES must match Names; each Entity Name must match the
5904
 * name of an unparsed entity declared in the DTD.
5905
 *
5906
 * [ VC: Name Token ]
5907
 * Values of type NMTOKEN must match the Nmtoken production; values
5908
 * of type NMTOKENS must match Nmtokens.
5909
 *
5910
 * Returns the attribute type
5911
 */
5912
int
5913
17.4k
xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5914
17.4k
    if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5915
612
  SKIP(5);
5916
612
  return(XML_ATTRIBUTE_CDATA);
5917
16.8k
     } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5918
8.49k
  SKIP(6);
5919
8.49k
  return(XML_ATTRIBUTE_IDREFS);
5920
8.49k
     } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5921
154
  SKIP(5);
5922
154
  return(XML_ATTRIBUTE_IDREF);
5923
8.18k
     } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5924
3.16k
        SKIP(2);
5925
3.16k
  return(XML_ATTRIBUTE_ID);
5926
5.02k
     } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5927
19
  SKIP(6);
5928
19
  return(XML_ATTRIBUTE_ENTITY);
5929
5.00k
     } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5930
1.19k
  SKIP(8);
5931
1.19k
  return(XML_ATTRIBUTE_ENTITIES);
5932
3.80k
     } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5933
77
  SKIP(8);
5934
77
  return(XML_ATTRIBUTE_NMTOKENS);
5935
3.72k
     } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5936
60
  SKIP(7);
5937
60
  return(XML_ATTRIBUTE_NMTOKEN);
5938
60
     }
5939
3.66k
     return(xmlParseEnumeratedType(ctxt, tree));
5940
17.4k
}
5941
5942
/**
5943
 * xmlParseAttributeListDecl:
5944
 * @ctxt:  an XML parser context
5945
 *
5946
 * DEPRECATED: Internal function, don't use.
5947
 *
5948
 * Parse an attribute list declaration for an element. Always consumes '<!'.
5949
 *
5950
 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5951
 *
5952
 * [53] AttDef ::= S Name S AttType S DefaultDecl
5953
 *
5954
 */
5955
void
5956
6.06k
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
5957
6.06k
    const xmlChar *elemName;
5958
6.06k
    const xmlChar *attrName;
5959
6.06k
    xmlEnumerationPtr tree;
5960
5961
6.06k
    if ((CUR != '<') || (NXT(1) != '!'))
5962
0
        return;
5963
6.06k
    SKIP(2);
5964
5965
6.06k
    if (CMP7(CUR_PTR, 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5966
6.02k
  int inputid = ctxt->input->id;
5967
5968
6.02k
  SKIP(7);
5969
6.02k
  if (SKIP_BLANKS == 0) {
5970
2.39k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5971
2.39k
                     "Space required after '<!ATTLIST'\n");
5972
2.39k
  }
5973
6.02k
        elemName = xmlParseName(ctxt);
5974
6.02k
  if (elemName == NULL) {
5975
226
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5976
226
         "ATTLIST: no name for Element\n");
5977
226
      return;
5978
226
  }
5979
5.80k
  SKIP_BLANKS;
5980
5.80k
  GROW;
5981
20.6k
  while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
5982
18.1k
      int type;
5983
18.1k
      int def;
5984
18.1k
      xmlChar *defaultValue = NULL;
5985
5986
18.1k
      GROW;
5987
18.1k
            tree = NULL;
5988
18.1k
      attrName = xmlParseName(ctxt);
5989
18.1k
      if (attrName == NULL) {
5990
362
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5991
362
             "ATTLIST: no name for Attribute\n");
5992
362
    break;
5993
362
      }
5994
17.7k
      GROW;
5995
17.7k
      if (SKIP_BLANKS == 0) {
5996
298
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5997
298
            "Space required after the attribute name\n");
5998
298
    break;
5999
298
      }
6000
6001
17.4k
      type = xmlParseAttributeType(ctxt, &tree);
6002
17.4k
      if (type <= 0) {
6003
1.14k
          break;
6004
1.14k
      }
6005
6006
16.3k
      GROW;
6007
16.3k
      if (SKIP_BLANKS == 0) {
6008
726
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6009
726
             "Space required after the attribute type\n");
6010
726
          if (tree != NULL)
6011
638
        xmlFreeEnumeration(tree);
6012
726
    break;
6013
726
      }
6014
6015
15.5k
      def = xmlParseDefaultDecl(ctxt, &defaultValue);
6016
15.5k
      if (def <= 0) {
6017
0
                if (defaultValue != NULL)
6018
0
        xmlFree(defaultValue);
6019
0
          if (tree != NULL)
6020
0
        xmlFreeEnumeration(tree);
6021
0
          break;
6022
0
      }
6023
15.5k
      if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6024
13.2k
          xmlAttrNormalizeSpace(defaultValue, defaultValue);
6025
6026
15.5k
      GROW;
6027
15.5k
            if (RAW != '>') {
6028
13.6k
    if (SKIP_BLANKS == 0) {
6029
718
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6030
718
      "Space required after the attribute default value\n");
6031
718
        if (defaultValue != NULL)
6032
558
      xmlFree(defaultValue);
6033
718
        if (tree != NULL)
6034
470
      xmlFreeEnumeration(tree);
6035
718
        break;
6036
718
    }
6037
13.6k
      }
6038
14.8k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6039
14.8k
    (ctxt->sax->attributeDecl != NULL))
6040
0
    ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6041
0
                          type, def, defaultValue, tree);
6042
14.8k
      else if (tree != NULL)
6043
1.41k
    xmlFreeEnumeration(tree);
6044
6045
14.8k
      if ((ctxt->sax2) && (defaultValue != NULL) &&
6046
14.8k
          (def != XML_ATTRIBUTE_IMPLIED) &&
6047
14.8k
    (def != XML_ATTRIBUTE_REQUIRED)) {
6048
13.3k
    xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6049
13.3k
      }
6050
14.8k
      if (ctxt->sax2) {
6051
14.8k
    xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6052
14.8k
      }
6053
14.8k
      if (defaultValue != NULL)
6054
13.3k
          xmlFree(defaultValue);
6055
14.8k
      GROW;
6056
14.8k
  }
6057
5.80k
  if (RAW == '>') {
6058
2.62k
      if (inputid != ctxt->input->id) {
6059
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6060
0
                               "Attribute list declaration doesn't start and"
6061
0
                               " stop in the same entity\n");
6062
0
      }
6063
2.62k
      NEXT;
6064
2.62k
  }
6065
5.80k
    }
6066
6.06k
}
6067
6068
/**
6069
 * xmlParseElementMixedContentDecl:
6070
 * @ctxt:  an XML parser context
6071
 * @inputchk:  the input used for the current entity, needed for boundary checks
6072
 *
6073
 * DEPRECATED: Internal function, don't use.
6074
 *
6075
 * parse the declaration for a Mixed Element content
6076
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6077
 *
6078
 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6079
 *                '(' S? '#PCDATA' S? ')'
6080
 *
6081
 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6082
 *
6083
 * [ VC: No Duplicate Types ]
6084
 * The same name must not appear more than once in a single
6085
 * mixed-content declaration.
6086
 *
6087
 * returns: the list of the xmlElementContentPtr describing the element choices
6088
 */
6089
xmlElementContentPtr
6090
7.15k
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6091
7.15k
    xmlElementContentPtr ret = NULL, cur = NULL, n;
6092
7.15k
    const xmlChar *elem = NULL;
6093
6094
7.15k
    GROW;
6095
7.15k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6096
7.15k
  SKIP(7);
6097
7.15k
  SKIP_BLANKS;
6098
7.15k
  if (RAW == ')') {
6099
4.33k
      if (ctxt->input->id != inputchk) {
6100
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6101
0
                               "Element content declaration doesn't start and"
6102
0
                               " stop in the same entity\n");
6103
0
      }
6104
4.33k
      NEXT;
6105
4.33k
      ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6106
4.33k
      if (ret == NULL)
6107
0
          return(NULL);
6108
4.33k
      if (RAW == '*') {
6109
529
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6110
529
    NEXT;
6111
529
      }
6112
4.33k
      return(ret);
6113
4.33k
  }
6114
2.81k
  if ((RAW == '(') || (RAW == '|')) {
6115
2.51k
      ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6116
2.51k
      if (ret == NULL) return(NULL);
6117
2.51k
  }
6118
5.63k
  while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6119
2.83k
      NEXT;
6120
2.83k
      if (elem == NULL) {
6121
2.51k
          ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6122
2.51k
    if (ret == NULL) {
6123
0
        xmlFreeDocElementContent(ctxt->myDoc, cur);
6124
0
                    return(NULL);
6125
0
                }
6126
2.51k
    ret->c1 = cur;
6127
2.51k
    if (cur != NULL)
6128
2.51k
        cur->parent = ret;
6129
2.51k
    cur = ret;
6130
2.51k
      } else {
6131
316
          n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6132
316
    if (n == NULL) {
6133
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6134
0
                    return(NULL);
6135
0
                }
6136
316
    n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6137
316
    if (n->c1 != NULL)
6138
316
        n->c1->parent = n;
6139
316
          cur->c2 = n;
6140
316
    if (n != NULL)
6141
316
        n->parent = cur;
6142
316
    cur = n;
6143
316
      }
6144
2.83k
      SKIP_BLANKS;
6145
2.83k
      elem = xmlParseName(ctxt);
6146
2.83k
      if (elem == NULL) {
6147
12
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6148
12
      "xmlParseElementMixedContentDecl : Name expected\n");
6149
12
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6150
12
    return(NULL);
6151
12
      }
6152
2.82k
      SKIP_BLANKS;
6153
2.82k
      GROW;
6154
2.82k
  }
6155
2.80k
  if ((RAW == ')') && (NXT(1) == '*')) {
6156
2.25k
      if (elem != NULL) {
6157
2.25k
    cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6158
2.25k
                                   XML_ELEMENT_CONTENT_ELEMENT);
6159
2.25k
    if (cur->c2 != NULL)
6160
2.25k
        cur->c2->parent = cur;
6161
2.25k
            }
6162
2.25k
            if (ret != NULL)
6163
2.25k
                ret->ocur = XML_ELEMENT_CONTENT_MULT;
6164
2.25k
      if (ctxt->input->id != inputchk) {
6165
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6166
0
                               "Element content declaration doesn't start and"
6167
0
                               " stop in the same entity\n");
6168
0
      }
6169
2.25k
      SKIP(2);
6170
2.25k
  } else {
6171
550
      xmlFreeDocElementContent(ctxt->myDoc, ret);
6172
550
      xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6173
550
      return(NULL);
6174
550
  }
6175
6176
2.80k
    } else {
6177
0
  xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6178
0
    }
6179
2.25k
    return(ret);
6180
7.15k
}
6181
6182
/**
6183
 * xmlParseElementChildrenContentDeclPriv:
6184
 * @ctxt:  an XML parser context
6185
 * @inputchk:  the input used for the current entity, needed for boundary checks
6186
 * @depth: the level of recursion
6187
 *
6188
 * parse the declaration for a Mixed Element content
6189
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6190
 *
6191
 *
6192
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6193
 *
6194
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6195
 *
6196
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6197
 *
6198
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6199
 *
6200
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6201
 * TODO Parameter-entity replacement text must be properly nested
6202
 *  with parenthesized groups. That is to say, if either of the
6203
 *  opening or closing parentheses in a choice, seq, or Mixed
6204
 *  construct is contained in the replacement text for a parameter
6205
 *  entity, both must be contained in the same replacement text. For
6206
 *  interoperability, if a parameter-entity reference appears in a
6207
 *  choice, seq, or Mixed construct, its replacement text should not
6208
 *  be empty, and neither the first nor last non-blank character of
6209
 *  the replacement text should be a connector (| or ,).
6210
 *
6211
 * Returns the tree of xmlElementContentPtr describing the element
6212
 *          hierarchy.
6213
 */
6214
static xmlElementContentPtr
6215
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6216
53.0k
                                       int depth) {
6217
53.0k
    xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6218
53.0k
    const xmlChar *elem;
6219
53.0k
    xmlChar type = 0;
6220
6221
53.0k
    if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6222
53.0k
        (depth >  2048)) {
6223
0
        xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6224
0
"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6225
0
                          depth);
6226
0
  return(NULL);
6227
0
    }
6228
53.0k
    SKIP_BLANKS;
6229
53.0k
    GROW;
6230
53.0k
    if (RAW == '(') {
6231
44.2k
  int inputid = ctxt->input->id;
6232
6233
        /* Recurse on first child */
6234
44.2k
  NEXT;
6235
44.2k
  SKIP_BLANKS;
6236
44.2k
        cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6237
44.2k
                                                           depth + 1);
6238
44.2k
        if (cur == NULL)
6239
34.1k
            return(NULL);
6240
10.0k
  SKIP_BLANKS;
6241
10.0k
  GROW;
6242
10.0k
    } else {
6243
8.78k
  elem = xmlParseName(ctxt);
6244
8.78k
  if (elem == NULL) {
6245
485
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6246
485
      return(NULL);
6247
485
  }
6248
8.29k
        cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6249
8.29k
  if (cur == NULL) {
6250
0
      xmlErrMemory(ctxt, NULL);
6251
0
      return(NULL);
6252
0
  }
6253
8.29k
  GROW;
6254
8.29k
  if (RAW == '?') {
6255
777
      cur->ocur = XML_ELEMENT_CONTENT_OPT;
6256
777
      NEXT;
6257
7.52k
  } else if (RAW == '*') {
6258
971
      cur->ocur = XML_ELEMENT_CONTENT_MULT;
6259
971
      NEXT;
6260
6.54k
  } else if (RAW == '+') {
6261
480
      cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6262
480
      NEXT;
6263
6.06k
  } else {
6264
6.06k
      cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6265
6.06k
  }
6266
8.29k
  GROW;
6267
8.29k
    }
6268
18.3k
    SKIP_BLANKS;
6269
25.8k
    while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6270
        /*
6271
   * Each loop we parse one separator and one element.
6272
   */
6273
12.2k
        if (RAW == ',') {
6274
1.74k
      if (type == 0) type = CUR;
6275
6276
      /*
6277
       * Detect "Name | Name , Name" error
6278
       */
6279
443
      else if (type != CUR) {
6280
1
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6281
1
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6282
1
                      type);
6283
1
    if ((last != NULL) && (last != ret))
6284
1
        xmlFreeDocElementContent(ctxt->myDoc, last);
6285
1
    if (ret != NULL)
6286
1
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6287
1
    return(NULL);
6288
1
      }
6289
1.74k
      NEXT;
6290
6291
1.74k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6292
1.74k
      if (op == NULL) {
6293
0
    if ((last != NULL) && (last != ret))
6294
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6295
0
          xmlFreeDocElementContent(ctxt->myDoc, ret);
6296
0
    return(NULL);
6297
0
      }
6298
1.74k
      if (last == NULL) {
6299
1.30k
    op->c1 = ret;
6300
1.30k
    if (ret != NULL)
6301
1.30k
        ret->parent = op;
6302
1.30k
    ret = cur = op;
6303
1.30k
      } else {
6304
442
          cur->c2 = op;
6305
442
    if (op != NULL)
6306
442
        op->parent = cur;
6307
442
    op->c1 = last;
6308
442
    if (last != NULL)
6309
442
        last->parent = op;
6310
442
    cur =op;
6311
442
    last = NULL;
6312
442
      }
6313
10.4k
  } else if (RAW == '|') {
6314
9.03k
      if (type == 0) type = CUR;
6315
6316
      /*
6317
       * Detect "Name , Name | Name" error
6318
       */
6319
2.94k
      else if (type != CUR) {
6320
1
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6321
1
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6322
1
          type);
6323
1
    if ((last != NULL) && (last != ret))
6324
1
        xmlFreeDocElementContent(ctxt->myDoc, last);
6325
1
    if (ret != NULL)
6326
1
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6327
1
    return(NULL);
6328
1
      }
6329
9.02k
      NEXT;
6330
6331
9.02k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6332
9.02k
      if (op == NULL) {
6333
0
    if ((last != NULL) && (last != ret))
6334
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6335
0
    if (ret != NULL)
6336
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6337
0
    return(NULL);
6338
0
      }
6339
9.02k
      if (last == NULL) {
6340
6.08k
    op->c1 = ret;
6341
6.08k
    if (ret != NULL)
6342
6.08k
        ret->parent = op;
6343
6.08k
    ret = cur = op;
6344
6.08k
      } else {
6345
2.94k
          cur->c2 = op;
6346
2.94k
    if (op != NULL)
6347
2.94k
        op->parent = cur;
6348
2.94k
    op->c1 = last;
6349
2.94k
    if (last != NULL)
6350
2.94k
        last->parent = op;
6351
2.94k
    cur =op;
6352
2.94k
    last = NULL;
6353
2.94k
      }
6354
9.02k
  } else {
6355
1.45k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6356
1.45k
      if ((last != NULL) && (last != ret))
6357
899
          xmlFreeDocElementContent(ctxt->myDoc, last);
6358
1.45k
      if (ret != NULL)
6359
1.45k
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6360
1.45k
      return(NULL);
6361
1.45k
  }
6362
10.7k
  GROW;
6363
10.7k
  SKIP_BLANKS;
6364
10.7k
  GROW;
6365
10.7k
  if (RAW == '(') {
6366
6.63k
      int inputid = ctxt->input->id;
6367
      /* Recurse on second child */
6368
6.63k
      NEXT;
6369
6.63k
      SKIP_BLANKS;
6370
6.63k
      last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6371
6.63k
                                                          depth + 1);
6372
6.63k
            if (last == NULL) {
6373
3.19k
    if (ret != NULL)
6374
3.19k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6375
3.19k
    return(NULL);
6376
3.19k
            }
6377
3.44k
      SKIP_BLANKS;
6378
4.13k
  } else {
6379
4.13k
      elem = xmlParseName(ctxt);
6380
4.13k
      if (elem == NULL) {
6381
92
    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6382
92
    if (ret != NULL)
6383
92
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6384
92
    return(NULL);
6385
92
      }
6386
4.04k
      last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6387
4.04k
      if (last == NULL) {
6388
0
    if (ret != NULL)
6389
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6390
0
    return(NULL);
6391
0
      }
6392
4.04k
      if (RAW == '?') {
6393
693
    last->ocur = XML_ELEMENT_CONTENT_OPT;
6394
693
    NEXT;
6395
3.35k
      } else if (RAW == '*') {
6396
494
    last->ocur = XML_ELEMENT_CONTENT_MULT;
6397
494
    NEXT;
6398
2.85k
      } else if (RAW == '+') {
6399
29
    last->ocur = XML_ELEMENT_CONTENT_PLUS;
6400
29
    NEXT;
6401
2.82k
      } else {
6402
2.82k
    last->ocur = XML_ELEMENT_CONTENT_ONCE;
6403
2.82k
      }
6404
4.04k
  }
6405
7.48k
  SKIP_BLANKS;
6406
7.48k
  GROW;
6407
7.48k
    }
6408
13.6k
    if ((cur != NULL) && (last != NULL)) {
6409
3.20k
        cur->c2 = last;
6410
3.20k
  if (last != NULL)
6411
3.20k
      last->parent = cur;
6412
3.20k
    }
6413
13.6k
    if (ctxt->input->id != inputchk) {
6414
0
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6415
0
                       "Element content declaration doesn't start and stop in"
6416
0
                       " the same entity\n");
6417
0
    }
6418
13.6k
    NEXT;
6419
13.6k
    if (RAW == '?') {
6420
2.16k
  if (ret != NULL) {
6421
2.16k
      if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6422
2.16k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6423
415
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6424
1.75k
      else
6425
1.75k
          ret->ocur = XML_ELEMENT_CONTENT_OPT;
6426
2.16k
  }
6427
2.16k
  NEXT;
6428
11.4k
    } else if (RAW == '*') {
6429
1.70k
  if (ret != NULL) {
6430
1.70k
      ret->ocur = XML_ELEMENT_CONTENT_MULT;
6431
1.70k
      cur = ret;
6432
      /*
6433
       * Some normalization:
6434
       * (a | b* | c?)* == (a | b | c)*
6435
       */
6436
4.52k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6437
2.82k
    if ((cur->c1 != NULL) &&
6438
2.82k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6439
2.82k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6440
742
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6441
2.82k
    if ((cur->c2 != NULL) &&
6442
2.82k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6443
2.82k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6444
753
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6445
2.82k
    cur = cur->c2;
6446
2.82k
      }
6447
1.70k
  }
6448
1.70k
  NEXT;
6449
9.76k
    } else if (RAW == '+') {
6450
2.84k
  if (ret != NULL) {
6451
2.84k
      int found = 0;
6452
6453
2.84k
      if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6454
2.84k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6455
1.48k
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6456
1.36k
      else
6457
1.36k
          ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6458
      /*
6459
       * Some normalization:
6460
       * (a | b*)+ == (a | b)*
6461
       * (a | b?)+ == (a | b)*
6462
       */
6463
6.10k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6464
3.26k
    if ((cur->c1 != NULL) &&
6465
3.26k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6466
3.26k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6467
813
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6468
813
        found = 1;
6469
813
    }
6470
3.26k
    if ((cur->c2 != NULL) &&
6471
3.26k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6472
3.26k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6473
1.79k
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6474
1.79k
        found = 1;
6475
1.79k
    }
6476
3.26k
    cur = cur->c2;
6477
3.26k
      }
6478
2.84k
      if (found)
6479
1.21k
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6480
2.84k
  }
6481
2.84k
  NEXT;
6482
2.84k
    }
6483
13.6k
    return(ret);
6484
18.3k
}
6485
6486
/**
6487
 * xmlParseElementChildrenContentDecl:
6488
 * @ctxt:  an XML parser context
6489
 * @inputchk:  the input used for the current entity, needed for boundary checks
6490
 *
6491
 * DEPRECATED: Internal function, don't use.
6492
 *
6493
 * parse the declaration for a Mixed Element content
6494
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6495
 *
6496
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6497
 *
6498
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6499
 *
6500
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6501
 *
6502
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6503
 *
6504
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6505
 * TODO Parameter-entity replacement text must be properly nested
6506
 *  with parenthesized groups. That is to say, if either of the
6507
 *  opening or closing parentheses in a choice, seq, or Mixed
6508
 *  construct is contained in the replacement text for a parameter
6509
 *  entity, both must be contained in the same replacement text. For
6510
 *  interoperability, if a parameter-entity reference appears in a
6511
 *  choice, seq, or Mixed construct, its replacement text should not
6512
 *  be empty, and neither the first nor last non-blank character of
6513
 *  the replacement text should be a connector (| or ,).
6514
 *
6515
 * Returns the tree of xmlElementContentPtr describing the element
6516
 *          hierarchy.
6517
 */
6518
xmlElementContentPtr
6519
0
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6520
    /* stub left for API/ABI compat */
6521
0
    return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6522
0
}
6523
6524
/**
6525
 * xmlParseElementContentDecl:
6526
 * @ctxt:  an XML parser context
6527
 * @name:  the name of the element being defined.
6528
 * @result:  the Element Content pointer will be stored here if any
6529
 *
6530
 * DEPRECATED: Internal function, don't use.
6531
 *
6532
 * parse the declaration for an Element content either Mixed or Children,
6533
 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6534
 *
6535
 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6536
 *
6537
 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6538
 */
6539
6540
int
6541
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6542
9.29k
                           xmlElementContentPtr *result) {
6543
6544
9.29k
    xmlElementContentPtr tree = NULL;
6545
9.29k
    int inputid = ctxt->input->id;
6546
9.29k
    int res;
6547
6548
9.29k
    *result = NULL;
6549
6550
9.29k
    if (RAW != '(') {
6551
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6552
0
    "xmlParseElementContentDecl : %s '(' expected\n", name);
6553
0
  return(-1);
6554
0
    }
6555
9.29k
    NEXT;
6556
9.29k
    GROW;
6557
9.29k
    if (ctxt->instate == XML_PARSER_EOF)
6558
0
        return(-1);
6559
9.29k
    SKIP_BLANKS;
6560
9.29k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6561
7.15k
        tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6562
7.15k
  res = XML_ELEMENT_TYPE_MIXED;
6563
7.15k
    } else {
6564
2.14k
        tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6565
2.14k
  res = XML_ELEMENT_TYPE_ELEMENT;
6566
2.14k
    }
6567
9.29k
    SKIP_BLANKS;
6568
9.29k
    *result = tree;
6569
9.29k
    return(res);
6570
9.29k
}
6571
6572
/**
6573
 * xmlParseElementDecl:
6574
 * @ctxt:  an XML parser context
6575
 *
6576
 * DEPRECATED: Internal function, don't use.
6577
 *
6578
 * Parse an element declaration. Always consumes '<!'.
6579
 *
6580
 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6581
 *
6582
 * [ VC: Unique Element Type Declaration ]
6583
 * No element type may be declared more than once
6584
 *
6585
 * Returns the type of the element, or -1 in case of error
6586
 */
6587
int
6588
10.4k
xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6589
10.4k
    const xmlChar *name;
6590
10.4k
    int ret = -1;
6591
10.4k
    xmlElementContentPtr content  = NULL;
6592
6593
10.4k
    if ((CUR != '<') || (NXT(1) != '!'))
6594
0
        return(ret);
6595
10.4k
    SKIP(2);
6596
6597
    /* GROW; done in the caller */
6598
10.4k
    if (CMP7(CUR_PTR, 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6599
10.4k
  int inputid = ctxt->input->id;
6600
6601
10.4k
  SKIP(7);
6602
10.4k
  if (SKIP_BLANKS == 0) {
6603
108
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6604
108
               "Space required after 'ELEMENT'\n");
6605
108
      return(-1);
6606
108
  }
6607
10.3k
        name = xmlParseName(ctxt);
6608
10.3k
  if (name == NULL) {
6609
175
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6610
175
         "xmlParseElementDecl: no name for Element\n");
6611
175
      return(-1);
6612
175
  }
6613
10.1k
  if (SKIP_BLANKS == 0) {
6614
1.99k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6615
1.99k
         "Space required after the element name\n");
6616
1.99k
  }
6617
10.1k
  if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6618
12
      SKIP(5);
6619
      /*
6620
       * Element must always be empty.
6621
       */
6622
12
      ret = XML_ELEMENT_TYPE_EMPTY;
6623
10.1k
  } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6624
10.1k
             (NXT(2) == 'Y')) {
6625
6
      SKIP(3);
6626
      /*
6627
       * Element is a generic container.
6628
       */
6629
6
      ret = XML_ELEMENT_TYPE_ANY;
6630
10.1k
  } else if (RAW == '(') {
6631
9.29k
      ret = xmlParseElementContentDecl(ctxt, name, &content);
6632
9.29k
  } else {
6633
      /*
6634
       * [ WFC: PEs in Internal Subset ] error handling.
6635
       */
6636
862
      if ((RAW == '%') && (ctxt->external == 0) &&
6637
862
          (ctxt->inputNr == 1)) {
6638
314
    xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6639
314
    "PEReference: forbidden within markup decl in internal subset\n");
6640
548
      } else {
6641
548
    xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6642
548
          "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6643
548
            }
6644
862
      return(-1);
6645
862
  }
6646
6647
9.31k
  SKIP_BLANKS;
6648
6649
9.31k
  if (RAW != '>') {
6650
1.79k
      xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6651
1.79k
      if (content != NULL) {
6652
209
    xmlFreeDocElementContent(ctxt->myDoc, content);
6653
209
      }
6654
7.52k
  } else {
6655
7.52k
      if (inputid != ctxt->input->id) {
6656
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6657
0
                               "Element declaration doesn't start and stop in"
6658
0
                               " the same entity\n");
6659
0
      }
6660
6661
7.52k
      NEXT;
6662
7.52k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6663
7.52k
    (ctxt->sax->elementDecl != NULL)) {
6664
0
    if (content != NULL)
6665
0
        content->parent = NULL;
6666
0
          ctxt->sax->elementDecl(ctxt->userData, name, ret,
6667
0
                           content);
6668
0
    if ((content != NULL) && (content->parent == NULL)) {
6669
        /*
6670
         * this is a trick: if xmlAddElementDecl is called,
6671
         * instead of copying the full tree it is plugged directly
6672
         * if called from the parser. Avoid duplicating the
6673
         * interfaces or change the API/ABI
6674
         */
6675
0
        xmlFreeDocElementContent(ctxt->myDoc, content);
6676
0
    }
6677
7.52k
      } else if (content != NULL) {
6678
6.49k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6679
6.49k
      }
6680
7.52k
  }
6681
9.31k
    }
6682
9.33k
    return(ret);
6683
10.4k
}
6684
6685
/**
6686
 * xmlParseConditionalSections
6687
 * @ctxt:  an XML parser context
6688
 *
6689
 * Parse a conditional section. Always consumes '<!['.
6690
 *
6691
 * [61] conditionalSect ::= includeSect | ignoreSect
6692
 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6693
 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6694
 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6695
 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6696
 */
6697
6698
static void
6699
0
xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6700
0
    int *inputIds = NULL;
6701
0
    size_t inputIdsSize = 0;
6702
0
    size_t depth = 0;
6703
6704
0
    while (ctxt->instate != XML_PARSER_EOF) {
6705
0
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6706
0
            int id = ctxt->input->id;
6707
6708
0
            SKIP(3);
6709
0
            SKIP_BLANKS;
6710
6711
0
            if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6712
0
                SKIP(7);
6713
0
                SKIP_BLANKS;
6714
0
                if (RAW != '[') {
6715
0
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6716
0
                    xmlHaltParser(ctxt);
6717
0
                    goto error;
6718
0
                }
6719
0
                if (ctxt->input->id != id) {
6720
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6721
0
                                   "All markup of the conditional section is"
6722
0
                                   " not in the same entity\n");
6723
0
                }
6724
0
                NEXT;
6725
6726
0
                if (inputIdsSize <= depth) {
6727
0
                    int *tmp;
6728
6729
0
                    inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2);
6730
0
                    tmp = (int *) xmlRealloc(inputIds,
6731
0
                            inputIdsSize * sizeof(int));
6732
0
                    if (tmp == NULL) {
6733
0
                        xmlErrMemory(ctxt, NULL);
6734
0
                        goto error;
6735
0
                    }
6736
0
                    inputIds = tmp;
6737
0
                }
6738
0
                inputIds[depth] = id;
6739
0
                depth++;
6740
0
            } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6741
0
                size_t ignoreDepth = 0;
6742
6743
0
                SKIP(6);
6744
0
                SKIP_BLANKS;
6745
0
                if (RAW != '[') {
6746
0
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6747
0
                    xmlHaltParser(ctxt);
6748
0
                    goto error;
6749
0
                }
6750
0
                if (ctxt->input->id != id) {
6751
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6752
0
                                   "All markup of the conditional section is"
6753
0
                                   " not in the same entity\n");
6754
0
                }
6755
0
                NEXT;
6756
6757
0
                while (RAW != 0) {
6758
0
                    if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6759
0
                        SKIP(3);
6760
0
                        ignoreDepth++;
6761
                        /* Check for integer overflow */
6762
0
                        if (ignoreDepth == 0) {
6763
0
                            xmlErrMemory(ctxt, NULL);
6764
0
                            goto error;
6765
0
                        }
6766
0
                    } else if ((RAW == ']') && (NXT(1) == ']') &&
6767
0
                               (NXT(2) == '>')) {
6768
0
                        if (ignoreDepth == 0)
6769
0
                            break;
6770
0
                        SKIP(3);
6771
0
                        ignoreDepth--;
6772
0
                    } else {
6773
0
                        NEXT;
6774
0
                    }
6775
0
                }
6776
6777
0
    if (RAW == 0) {
6778
0
        xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6779
0
                    goto error;
6780
0
    }
6781
0
                if (ctxt->input->id != id) {
6782
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6783
0
                                   "All markup of the conditional section is"
6784
0
                                   " not in the same entity\n");
6785
0
                }
6786
0
                SKIP(3);
6787
0
            } else {
6788
0
                xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6789
0
                xmlHaltParser(ctxt);
6790
0
                goto error;
6791
0
            }
6792
0
        } else if ((depth > 0) &&
6793
0
                   (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6794
0
            depth--;
6795
0
            if (ctxt->input->id != inputIds[depth]) {
6796
0
                xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6797
0
                               "All markup of the conditional section is not"
6798
0
                               " in the same entity\n");
6799
0
            }
6800
0
            SKIP(3);
6801
0
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
6802
0
            xmlParseMarkupDecl(ctxt);
6803
0
        } else {
6804
0
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6805
0
            xmlHaltParser(ctxt);
6806
0
            goto error;
6807
0
        }
6808
6809
0
        if (depth == 0)
6810
0
            break;
6811
6812
0
        SKIP_BLANKS;
6813
0
        SHRINK;
6814
0
        GROW;
6815
0
    }
6816
6817
0
error:
6818
0
    xmlFree(inputIds);
6819
0
}
6820
6821
/**
6822
 * xmlParseMarkupDecl:
6823
 * @ctxt:  an XML parser context
6824
 *
6825
 * DEPRECATED: Internal function, don't use.
6826
 *
6827
 * Parse markup declarations. Always consumes '<!' or '<?'.
6828
 *
6829
 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6830
 *                     NotationDecl | PI | Comment
6831
 *
6832
 * [ VC: Proper Declaration/PE Nesting ]
6833
 * Parameter-entity replacement text must be properly nested with
6834
 * markup declarations. That is to say, if either the first character
6835
 * or the last character of a markup declaration (markupdecl above) is
6836
 * contained in the replacement text for a parameter-entity reference,
6837
 * both must be contained in the same replacement text.
6838
 *
6839
 * [ WFC: PEs in Internal Subset ]
6840
 * In the internal DTD subset, parameter-entity references can occur
6841
 * only where markup declarations can occur, not within markup declarations.
6842
 * (This does not apply to references that occur in external parameter
6843
 * entities or to the external subset.)
6844
 */
6845
void
6846
49.1k
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6847
49.1k
    GROW;
6848
49.1k
    if (CUR == '<') {
6849
49.1k
        if (NXT(1) == '!') {
6850
41.8k
      switch (NXT(2)) {
6851
32.0k
          case 'E':
6852
32.0k
        if (NXT(3) == 'L')
6853
10.4k
      xmlParseElementDecl(ctxt);
6854
21.5k
        else if (NXT(3) == 'N')
6855
21.5k
      xmlParseEntityDecl(ctxt);
6856
15
                    else
6857
15
                        SKIP(2);
6858
32.0k
        break;
6859
6.06k
          case 'A':
6860
6.06k
        xmlParseAttributeListDecl(ctxt);
6861
6.06k
        break;
6862
1.35k
          case 'N':
6863
1.35k
        xmlParseNotationDecl(ctxt);
6864
1.35k
        break;
6865
1.58k
          case '-':
6866
1.58k
        xmlParseComment(ctxt);
6867
1.58k
        break;
6868
849
    default:
6869
        /* there is an error but it will be detected later */
6870
849
                    SKIP(2);
6871
849
        break;
6872
41.8k
      }
6873
41.8k
  } else if (NXT(1) == '?') {
6874
7.24k
      xmlParsePI(ctxt);
6875
7.24k
  }
6876
49.1k
    }
6877
6878
    /*
6879
     * detect requirement to exit there and act accordingly
6880
     * and avoid having instate overridden later on
6881
     */
6882
49.1k
    if (ctxt->instate == XML_PARSER_EOF)
6883
598
        return;
6884
6885
48.5k
    ctxt->instate = XML_PARSER_DTD;
6886
48.5k
}
6887
6888
/**
6889
 * xmlParseTextDecl:
6890
 * @ctxt:  an XML parser context
6891
 *
6892
 * DEPRECATED: Internal function, don't use.
6893
 *
6894
 * parse an XML declaration header for external entities
6895
 *
6896
 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6897
 */
6898
6899
void
6900
0
xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6901
0
    xmlChar *version;
6902
0
    const xmlChar *encoding;
6903
0
    int oldstate;
6904
6905
    /*
6906
     * We know that '<?xml' is here.
6907
     */
6908
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6909
0
  SKIP(5);
6910
0
    } else {
6911
0
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6912
0
  return;
6913
0
    }
6914
6915
    /* Avoid expansion of parameter entities when skipping blanks. */
6916
0
    oldstate = ctxt->instate;
6917
0
    ctxt->instate = XML_PARSER_START;
6918
6919
0
    if (SKIP_BLANKS == 0) {
6920
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6921
0
           "Space needed after '<?xml'\n");
6922
0
    }
6923
6924
    /*
6925
     * We may have the VersionInfo here.
6926
     */
6927
0
    version = xmlParseVersionInfo(ctxt);
6928
0
    if (version == NULL)
6929
0
  version = xmlCharStrdup(XML_DEFAULT_VERSION);
6930
0
    else {
6931
0
  if (SKIP_BLANKS == 0) {
6932
0
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6933
0
               "Space needed here\n");
6934
0
  }
6935
0
    }
6936
0
    ctxt->input->version = version;
6937
6938
    /*
6939
     * We must have the encoding declaration
6940
     */
6941
0
    encoding = xmlParseEncodingDecl(ctxt);
6942
0
    if (ctxt->instate == XML_PARSER_EOF)
6943
0
        return;
6944
0
    if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6945
  /*
6946
   * The XML REC instructs us to stop parsing right here
6947
   */
6948
0
        ctxt->instate = oldstate;
6949
0
        return;
6950
0
    }
6951
0
    if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6952
0
  xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6953
0
           "Missing encoding in text declaration\n");
6954
0
    }
6955
6956
0
    SKIP_BLANKS;
6957
0
    if ((RAW == '?') && (NXT(1) == '>')) {
6958
0
        SKIP(2);
6959
0
    } else if (RAW == '>') {
6960
        /* Deprecated old WD ... */
6961
0
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6962
0
  NEXT;
6963
0
    } else {
6964
0
        int c;
6965
6966
0
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6967
0
        while ((c = CUR) != 0) {
6968
0
            NEXT;
6969
0
            if (c == '>')
6970
0
                break;
6971
0
        }
6972
0
    }
6973
6974
0
    ctxt->instate = oldstate;
6975
0
}
6976
6977
/**
6978
 * xmlParseExternalSubset:
6979
 * @ctxt:  an XML parser context
6980
 * @ExternalID: the external identifier
6981
 * @SystemID: the system identifier (or URL)
6982
 *
6983
 * parse Markup declarations from an external subset
6984
 *
6985
 * [30] extSubset ::= textDecl? extSubsetDecl
6986
 *
6987
 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6988
 */
6989
void
6990
xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6991
0
                       const xmlChar *SystemID) {
6992
0
    xmlDetectSAX2(ctxt);
6993
0
    GROW;
6994
6995
0
    if ((ctxt->encoding == NULL) &&
6996
0
        (ctxt->input->end - ctxt->input->cur >= 4)) {
6997
0
        xmlChar start[4];
6998
0
  xmlCharEncoding enc;
6999
7000
0
  start[0] = RAW;
7001
0
  start[1] = NXT(1);
7002
0
  start[2] = NXT(2);
7003
0
  start[3] = NXT(3);
7004
0
  enc = xmlDetectCharEncoding(start, 4);
7005
0
  if (enc != XML_CHAR_ENCODING_NONE)
7006
0
      xmlSwitchEncoding(ctxt, enc);
7007
0
    }
7008
7009
0
    if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7010
0
  xmlParseTextDecl(ctxt);
7011
0
  if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7012
      /*
7013
       * The XML REC instructs us to stop parsing right here
7014
       */
7015
0
      xmlHaltParser(ctxt);
7016
0
      return;
7017
0
  }
7018
0
    }
7019
0
    if (ctxt->myDoc == NULL) {
7020
0
        ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7021
0
  if (ctxt->myDoc == NULL) {
7022
0
      xmlErrMemory(ctxt, "New Doc failed");
7023
0
      return;
7024
0
  }
7025
0
  ctxt->myDoc->properties = XML_DOC_INTERNAL;
7026
0
    }
7027
0
    if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7028
0
        xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7029
7030
0
    ctxt->instate = XML_PARSER_DTD;
7031
0
    ctxt->external = 1;
7032
0
    SKIP_BLANKS;
7033
0
    while ((ctxt->instate != XML_PARSER_EOF) && (RAW != 0)) {
7034
0
  GROW;
7035
0
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7036
0
            xmlParseConditionalSections(ctxt);
7037
0
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7038
0
            xmlParseMarkupDecl(ctxt);
7039
0
        } else {
7040
0
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7041
0
            xmlHaltParser(ctxt);
7042
0
            return;
7043
0
        }
7044
0
        SKIP_BLANKS;
7045
0
        SHRINK;
7046
0
    }
7047
7048
0
    if (RAW != 0) {
7049
0
  xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7050
0
    }
7051
7052
0
}
7053
7054
/**
7055
 * xmlParseReference:
7056
 * @ctxt:  an XML parser context
7057
 *
7058
 * DEPRECATED: Internal function, don't use.
7059
 *
7060
 * parse and handle entity references in content, depending on the SAX
7061
 * interface, this may end-up in a call to character() if this is a
7062
 * CharRef, a predefined entity, if there is no reference() callback.
7063
 * or if the parser was asked to switch to that mode.
7064
 *
7065
 * Always consumes '&'.
7066
 *
7067
 * [67] Reference ::= EntityRef | CharRef
7068
 */
7069
void
7070
7.48k
xmlParseReference(xmlParserCtxtPtr ctxt) {
7071
7.48k
    xmlEntityPtr ent;
7072
7.48k
    xmlChar *val;
7073
7.48k
    int was_checked;
7074
7.48k
    xmlNodePtr list = NULL;
7075
7.48k
    xmlParserErrors ret = XML_ERR_OK;
7076
7077
7078
7.48k
    if (RAW != '&')
7079
0
        return;
7080
7081
    /*
7082
     * Simple case of a CharRef
7083
     */
7084
7.48k
    if (NXT(1) == '#') {
7085
749
  int i = 0;
7086
749
  xmlChar out[16];
7087
749
  int hex = NXT(2);
7088
749
  int value = xmlParseCharRef(ctxt);
7089
7090
749
  if (value == 0)
7091
42
      return;
7092
707
  if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7093
      /*
7094
       * So we are using non-UTF-8 buffers
7095
       * Check that the char fit on 8bits, if not
7096
       * generate a CharRef.
7097
       */
7098
0
      if (value <= 0xFF) {
7099
0
    out[0] = value;
7100
0
    out[1] = 0;
7101
0
    if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7102
0
        (!ctxt->disableSAX))
7103
0
        ctxt->sax->characters(ctxt->userData, out, 1);
7104
0
      } else {
7105
0
    if ((hex == 'x') || (hex == 'X'))
7106
0
        snprintf((char *)out, sizeof(out), "#x%X", value);
7107
0
    else
7108
0
        snprintf((char *)out, sizeof(out), "#%d", value);
7109
0
    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7110
0
        (!ctxt->disableSAX))
7111
0
        ctxt->sax->reference(ctxt->userData, out);
7112
0
      }
7113
707
  } else {
7114
      /*
7115
       * Just encode the value in UTF-8
7116
       */
7117
707
      COPY_BUF(0 ,out, i, value);
7118
707
      out[i] = 0;
7119
707
      if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7120
707
    (!ctxt->disableSAX))
7121
707
    ctxt->sax->characters(ctxt->userData, out, i);
7122
707
  }
7123
707
  return;
7124
749
    }
7125
7126
    /*
7127
     * We are seeing an entity reference
7128
     */
7129
6.73k
    ent = xmlParseEntityRef(ctxt);
7130
6.73k
    if (ent == NULL) return;
7131
6.67k
    if (!ctxt->wellFormed)
7132
1
  return;
7133
6.67k
    was_checked = ent->flags & XML_ENT_PARSED;
7134
7135
    /* special case of predefined entities */
7136
6.67k
    if ((ent->name == NULL) ||
7137
6.67k
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7138
6.67k
  val = ent->content;
7139
6.67k
  if (val == NULL) return;
7140
  /*
7141
   * inline the entity.
7142
   */
7143
2.76k
  if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7144
2.76k
      (!ctxt->disableSAX))
7145
2.76k
      ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7146
2.76k
  return;
7147
6.67k
    }
7148
7149
    /*
7150
     * The first reference to the entity trigger a parsing phase
7151
     * where the ent->children is filled with the result from
7152
     * the parsing.
7153
     * Note: external parsed entities will not be loaded, it is not
7154
     * required for a non-validating parser, unless the parsing option
7155
     * of validating, or substituting entities were given. Doing so is
7156
     * far more secure as the parser will only process data coming from
7157
     * the document entity by default.
7158
     */
7159
0
    if (((ent->flags & XML_ENT_PARSED) == 0) &&
7160
0
        ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7161
0
         (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7162
0
  unsigned long oldsizeentcopy = ctxt->sizeentcopy;
7163
7164
  /*
7165
   * This is a bit hackish but this seems the best
7166
   * way to make sure both SAX and DOM entity support
7167
   * behaves okay.
7168
   */
7169
0
  void *user_data;
7170
0
  if (ctxt->userData == ctxt)
7171
0
      user_data = NULL;
7172
0
  else
7173
0
      user_data = ctxt->userData;
7174
7175
        /* Avoid overflow as much as possible */
7176
0
        ctxt->sizeentcopy = 0;
7177
7178
0
        if (ent->flags & XML_ENT_EXPANDING) {
7179
0
            xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7180
0
            xmlHaltParser(ctxt);
7181
0
            return;
7182
0
        }
7183
7184
0
        ent->flags |= XML_ENT_EXPANDING;
7185
7186
  /*
7187
   * Check that this entity is well formed
7188
   * 4.3.2: An internal general parsed entity is well-formed
7189
   * if its replacement text matches the production labeled
7190
   * content.
7191
   */
7192
0
  if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7193
0
      ctxt->depth++;
7194
0
      ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7195
0
                                                user_data, &list);
7196
0
      ctxt->depth--;
7197
7198
0
  } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7199
0
      ctxt->depth++;
7200
0
      ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7201
0
                                     user_data, ctxt->depth, ent->URI,
7202
0
             ent->ExternalID, &list);
7203
0
      ctxt->depth--;
7204
0
  } else {
7205
0
      ret = XML_ERR_ENTITY_PE_INTERNAL;
7206
0
      xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7207
0
       "invalid entity type found\n", NULL);
7208
0
  }
7209
7210
0
        ent->flags &= ~XML_ENT_EXPANDING;
7211
0
        ent->flags |= XML_ENT_PARSED | XML_ENT_CHECKED;
7212
0
        ent->expandedSize = ctxt->sizeentcopy;
7213
0
  if (ret == XML_ERR_ENTITY_LOOP) {
7214
0
            xmlHaltParser(ctxt);
7215
0
      xmlFreeNodeList(list);
7216
0
      return;
7217
0
  }
7218
0
  if (xmlParserEntityCheck(ctxt, oldsizeentcopy)) {
7219
0
      xmlFreeNodeList(list);
7220
0
      return;
7221
0
  }
7222
7223
0
  if ((ret == XML_ERR_OK) && (list != NULL)) {
7224
0
            ent->children = list;
7225
            /*
7226
             * Prune it directly in the generated document
7227
             * except for single text nodes.
7228
             */
7229
0
            if ((ctxt->replaceEntities == 0) ||
7230
0
                (ctxt->parseMode == XML_PARSE_READER) ||
7231
0
                ((list->type == XML_TEXT_NODE) &&
7232
0
                 (list->next == NULL))) {
7233
0
                ent->owner = 1;
7234
0
                while (list != NULL) {
7235
0
                    list->parent = (xmlNodePtr) ent;
7236
0
                    if (list->doc != ent->doc)
7237
0
                        xmlSetTreeDoc(list, ent->doc);
7238
0
                    if (list->next == NULL)
7239
0
                        ent->last = list;
7240
0
                    list = list->next;
7241
0
                }
7242
0
                list = NULL;
7243
0
            } else {
7244
0
                ent->owner = 0;
7245
0
                while (list != NULL) {
7246
0
                    list->parent = (xmlNodePtr) ctxt->node;
7247
0
                    list->doc = ctxt->myDoc;
7248
0
                    if (list->next == NULL)
7249
0
                        ent->last = list;
7250
0
                    list = list->next;
7251
0
                }
7252
0
                list = ent->children;
7253
#ifdef LIBXML_LEGACY_ENABLED
7254
                if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7255
                    xmlAddEntityReference(ent, list, NULL);
7256
#endif /* LIBXML_LEGACY_ENABLED */
7257
0
            }
7258
0
  } else if ((ret != XML_ERR_OK) &&
7259
0
       (ret != XML_WAR_UNDECLARED_ENTITY)) {
7260
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7261
0
         "Entity '%s' failed to parse\n", ent->name);
7262
0
            if (ent->content != NULL)
7263
0
                ent->content[0] = 0;
7264
0
  } else if (list != NULL) {
7265
0
      xmlFreeNodeList(list);
7266
0
      list = NULL;
7267
0
  }
7268
7269
        /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7270
0
        was_checked = 0;
7271
0
    }
7272
7273
    /*
7274
     * Now that the entity content has been gathered
7275
     * provide it to the application, this can take different forms based
7276
     * on the parsing modes.
7277
     */
7278
0
    if (ent->children == NULL) {
7279
  /*
7280
   * Probably running in SAX mode and the callbacks don't
7281
   * build the entity content. So unless we already went
7282
   * though parsing for first checking go though the entity
7283
   * content to generate callbacks associated to the entity
7284
   */
7285
0
  if (was_checked != 0) {
7286
0
      void *user_data;
7287
      /*
7288
       * This is a bit hackish but this seems the best
7289
       * way to make sure both SAX and DOM entity support
7290
       * behaves okay.
7291
       */
7292
0
      if (ctxt->userData == ctxt)
7293
0
    user_data = NULL;
7294
0
      else
7295
0
    user_data = ctxt->userData;
7296
7297
0
      if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7298
0
    ctxt->depth++;
7299
0
    ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7300
0
           ent->content, user_data, NULL);
7301
0
    ctxt->depth--;
7302
0
      } else if (ent->etype ==
7303
0
           XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7304
0
          unsigned long oldsizeentities = ctxt->sizeentities;
7305
7306
0
    ctxt->depth++;
7307
0
    ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7308
0
         ctxt->sax, user_data, ctxt->depth,
7309
0
         ent->URI, ent->ExternalID, NULL);
7310
0
    ctxt->depth--;
7311
7312
                /* Undo the change to sizeentities */
7313
0
                ctxt->sizeentities = oldsizeentities;
7314
0
      } else {
7315
0
    ret = XML_ERR_ENTITY_PE_INTERNAL;
7316
0
    xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7317
0
           "invalid entity type found\n", NULL);
7318
0
      }
7319
0
      if (ret == XML_ERR_ENTITY_LOOP) {
7320
0
    xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7321
0
    return;
7322
0
      }
7323
0
            if (xmlParserEntityCheck(ctxt, 0))
7324
0
                return;
7325
0
  }
7326
0
  if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7327
0
      (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7328
      /*
7329
       * Entity reference callback comes second, it's somewhat
7330
       * superfluous but a compatibility to historical behaviour
7331
       */
7332
0
      ctxt->sax->reference(ctxt->userData, ent->name);
7333
0
  }
7334
0
  return;
7335
0
    }
7336
7337
    /*
7338
     * We also check for amplification if entities aren't substituted.
7339
     * They might be expanded later.
7340
     */
7341
0
    if ((was_checked != 0) &&
7342
0
        (xmlParserEntityCheck(ctxt, ent->expandedSize)))
7343
0
        return;
7344
7345
    /*
7346
     * If we didn't get any children for the entity being built
7347
     */
7348
0
    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7349
0
  (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7350
  /*
7351
   * Create a node.
7352
   */
7353
0
  ctxt->sax->reference(ctxt->userData, ent->name);
7354
0
  return;
7355
0
    }
7356
7357
0
    if (ctxt->replaceEntities)  {
7358
  /*
7359
   * There is a problem on the handling of _private for entities
7360
   * (bug 155816): Should we copy the content of the field from
7361
   * the entity (possibly overwriting some value set by the user
7362
   * when a copy is created), should we leave it alone, or should
7363
   * we try to take care of different situations?  The problem
7364
   * is exacerbated by the usage of this field by the xmlReader.
7365
   * To fix this bug, we look at _private on the created node
7366
   * and, if it's NULL, we copy in whatever was in the entity.
7367
   * If it's not NULL we leave it alone.  This is somewhat of a
7368
   * hack - maybe we should have further tests to determine
7369
   * what to do.
7370
   */
7371
0
  if (ctxt->node != NULL) {
7372
      /*
7373
       * Seems we are generating the DOM content, do
7374
       * a simple tree copy for all references except the first
7375
       * In the first occurrence list contains the replacement.
7376
       */
7377
0
      if (((list == NULL) && (ent->owner == 0)) ||
7378
0
    (ctxt->parseMode == XML_PARSE_READER)) {
7379
0
    xmlNodePtr nw = NULL, cur, firstChild = NULL;
7380
7381
    /*
7382
     * when operating on a reader, the entities definitions
7383
     * are always owning the entities subtree.
7384
    if (ctxt->parseMode == XML_PARSE_READER)
7385
        ent->owner = 1;
7386
     */
7387
7388
0
    cur = ent->children;
7389
0
    while (cur != NULL) {
7390
0
        nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7391
0
        if (nw != NULL) {
7392
0
      if (nw->_private == NULL)
7393
0
          nw->_private = cur->_private;
7394
0
      if (firstChild == NULL){
7395
0
          firstChild = nw;
7396
0
      }
7397
0
      nw = xmlAddChild(ctxt->node, nw);
7398
0
        }
7399
0
        if (cur == ent->last) {
7400
      /*
7401
       * needed to detect some strange empty
7402
       * node cases in the reader tests
7403
       */
7404
0
      if ((ctxt->parseMode == XML_PARSE_READER) &&
7405
0
          (nw != NULL) &&
7406
0
          (nw->type == XML_ELEMENT_NODE) &&
7407
0
          (nw->children == NULL))
7408
0
          nw->extra = 1;
7409
7410
0
      break;
7411
0
        }
7412
0
        cur = cur->next;
7413
0
    }
7414
#ifdef LIBXML_LEGACY_ENABLED
7415
    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7416
      xmlAddEntityReference(ent, firstChild, nw);
7417
#endif /* LIBXML_LEGACY_ENABLED */
7418
0
      } else if ((list == NULL) || (ctxt->inputNr > 0)) {
7419
0
    xmlNodePtr nw = NULL, cur, next, last,
7420
0
         firstChild = NULL;
7421
7422
    /*
7423
     * Copy the entity child list and make it the new
7424
     * entity child list. The goal is to make sure any
7425
     * ID or REF referenced will be the one from the
7426
     * document content and not the entity copy.
7427
     */
7428
0
    cur = ent->children;
7429
0
    ent->children = NULL;
7430
0
    last = ent->last;
7431
0
    ent->last = NULL;
7432
0
    while (cur != NULL) {
7433
0
        next = cur->next;
7434
0
        cur->next = NULL;
7435
0
        cur->parent = NULL;
7436
0
        nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7437
0
        if (nw != NULL) {
7438
0
      if (nw->_private == NULL)
7439
0
          nw->_private = cur->_private;
7440
0
      if (firstChild == NULL){
7441
0
          firstChild = cur;
7442
0
      }
7443
0
      xmlAddChild((xmlNodePtr) ent, nw);
7444
0
        }
7445
0
        xmlAddChild(ctxt->node, cur);
7446
0
        if (cur == last)
7447
0
      break;
7448
0
        cur = next;
7449
0
    }
7450
0
    if (ent->owner == 0)
7451
0
        ent->owner = 1;
7452
#ifdef LIBXML_LEGACY_ENABLED
7453
    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7454
      xmlAddEntityReference(ent, firstChild, nw);
7455
#endif /* LIBXML_LEGACY_ENABLED */
7456
0
      } else {
7457
0
    const xmlChar *nbktext;
7458
7459
    /*
7460
     * the name change is to avoid coalescing of the
7461
     * node with a possible previous text one which
7462
     * would make ent->children a dangling pointer
7463
     */
7464
0
    nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7465
0
          -1);
7466
0
    if (ent->children->type == XML_TEXT_NODE)
7467
0
        ent->children->name = nbktext;
7468
0
    if ((ent->last != ent->children) &&
7469
0
        (ent->last->type == XML_TEXT_NODE))
7470
0
        ent->last->name = nbktext;
7471
0
    xmlAddChildList(ctxt->node, ent->children);
7472
0
      }
7473
7474
      /*
7475
       * This is to avoid a nasty side effect, see
7476
       * characters() in SAX.c
7477
       */
7478
0
      ctxt->nodemem = 0;
7479
0
      ctxt->nodelen = 0;
7480
0
      return;
7481
0
  }
7482
0
    }
7483
0
}
7484
7485
/**
7486
 * xmlParseEntityRef:
7487
 * @ctxt:  an XML parser context
7488
 *
7489
 * DEPRECATED: Internal function, don't use.
7490
 *
7491
 * Parse an entitiy reference. Always consumes '&'.
7492
 *
7493
 * [68] EntityRef ::= '&' Name ';'
7494
 *
7495
 * [ WFC: Entity Declared ]
7496
 * In a document without any DTD, a document with only an internal DTD
7497
 * subset which contains no parameter entity references, or a document
7498
 * with "standalone='yes'", the Name given in the entity reference
7499
 * must match that in an entity declaration, except that well-formed
7500
 * documents need not declare any of the following entities: amp, lt,
7501
 * gt, apos, quot.  The declaration of a parameter entity must precede
7502
 * any reference to it.  Similarly, the declaration of a general entity
7503
 * must precede any reference to it which appears in a default value in an
7504
 * attribute-list declaration. Note that if entities are declared in the
7505
 * external subset or in external parameter entities, a non-validating
7506
 * processor is not obligated to read and process their declarations;
7507
 * for such documents, the rule that an entity must be declared is a
7508
 * well-formedness constraint only if standalone='yes'.
7509
 *
7510
 * [ WFC: Parsed Entity ]
7511
 * An entity reference must not contain the name of an unparsed entity
7512
 *
7513
 * Returns the xmlEntityPtr if found, or NULL otherwise.
7514
 */
7515
xmlEntityPtr
7516
529k
xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7517
529k
    const xmlChar *name;
7518
529k
    xmlEntityPtr ent = NULL;
7519
7520
529k
    GROW;
7521
529k
    if (ctxt->instate == XML_PARSER_EOF)
7522
0
        return(NULL);
7523
7524
529k
    if (RAW != '&')
7525
0
        return(NULL);
7526
529k
    NEXT;
7527
529k
    name = xmlParseName(ctxt);
7528
529k
    if (name == NULL) {
7529
408k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7530
408k
           "xmlParseEntityRef: no name\n");
7531
408k
        return(NULL);
7532
408k
    }
7533
121k
    if (RAW != ';') {
7534
76.7k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7535
76.7k
  return(NULL);
7536
76.7k
    }
7537
44.6k
    NEXT;
7538
7539
    /*
7540
     * Predefined entities override any extra definition
7541
     */
7542
44.6k
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7543
44.6k
        ent = xmlGetPredefinedEntity(name);
7544
44.6k
        if (ent != NULL)
7545
13.3k
            return(ent);
7546
44.6k
    }
7547
7548
    /*
7549
     * Ask first SAX for entity resolution, otherwise try the
7550
     * entities which may have stored in the parser context.
7551
     */
7552
31.3k
    if (ctxt->sax != NULL) {
7553
31.3k
  if (ctxt->sax->getEntity != NULL)
7554
31.3k
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7555
31.3k
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7556
31.3k
      (ctxt->options & XML_PARSE_OLDSAX))
7557
0
      ent = xmlGetPredefinedEntity(name);
7558
31.3k
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7559
31.3k
      (ctxt->userData==ctxt)) {
7560
0
      ent = xmlSAX2GetEntity(ctxt, name);
7561
0
  }
7562
31.3k
    }
7563
31.3k
    if (ctxt->instate == XML_PARSER_EOF)
7564
0
  return(NULL);
7565
    /*
7566
     * [ WFC: Entity Declared ]
7567
     * In a document without any DTD, a document with only an
7568
     * internal DTD subset which contains no parameter entity
7569
     * references, or a document with "standalone='yes'", the
7570
     * Name given in the entity reference must match that in an
7571
     * entity declaration, except that well-formed documents
7572
     * need not declare any of the following entities: amp, lt,
7573
     * gt, apos, quot.
7574
     * The declaration of a parameter entity must precede any
7575
     * reference to it.
7576
     * Similarly, the declaration of a general entity must
7577
     * precede any reference to it which appears in a default
7578
     * value in an attribute-list declaration. Note that if
7579
     * entities are declared in the external subset or in
7580
     * external parameter entities, a non-validating processor
7581
     * is not obligated to read and process their declarations;
7582
     * for such documents, the rule that an entity must be
7583
     * declared is a well-formedness constraint only if
7584
     * standalone='yes'.
7585
     */
7586
31.3k
    if (ent == NULL) {
7587
0
  if ((ctxt->standalone == 1) ||
7588
0
      ((ctxt->hasExternalSubset == 0) &&
7589
0
       (ctxt->hasPErefs == 0))) {
7590
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7591
0
         "Entity '%s' not defined\n", name);
7592
0
  } else {
7593
0
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7594
0
         "Entity '%s' not defined\n", name);
7595
0
      if ((ctxt->inSubset == 0) &&
7596
0
    (ctxt->sax != NULL) &&
7597
0
    (ctxt->sax->reference != NULL)) {
7598
0
    ctxt->sax->reference(ctxt->userData, name);
7599
0
      }
7600
0
  }
7601
0
  ctxt->valid = 0;
7602
0
    }
7603
7604
    /*
7605
     * [ WFC: Parsed Entity ]
7606
     * An entity reference must not contain the name of an
7607
     * unparsed entity
7608
     */
7609
31.3k
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7610
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7611
0
     "Entity reference to unparsed entity %s\n", name);
7612
0
    }
7613
7614
    /*
7615
     * [ WFC: No External Entity References ]
7616
     * Attribute values cannot contain direct or indirect
7617
     * entity references to external entities.
7618
     */
7619
31.3k
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7620
31.3k
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7621
27.3k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7622
27.3k
       "Attribute references external entity '%s'\n", name);
7623
27.3k
    }
7624
    /*
7625
     * [ WFC: No < in Attribute Values ]
7626
     * The replacement text of any entity referred to directly or
7627
     * indirectly in an attribute value (other than "&lt;") must
7628
     * not contain a <.
7629
     */
7630
3.91k
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7631
3.91k
       (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7632
0
  if ((ent->flags & XML_ENT_CHECKED_LT) == 0) {
7633
0
            if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7634
0
                ent->flags |= XML_ENT_CONTAINS_LT;
7635
0
            ent->flags |= XML_ENT_CHECKED_LT;
7636
0
        }
7637
0
        if (ent->flags & XML_ENT_CONTAINS_LT)
7638
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7639
0
                    "'<' in entity '%s' is not allowed in attributes "
7640
0
                    "values\n", name);
7641
0
    }
7642
7643
    /*
7644
     * Internal check, no parameter entities here ...
7645
     */
7646
3.91k
    else {
7647
3.91k
  switch (ent->etype) {
7648
0
      case XML_INTERNAL_PARAMETER_ENTITY:
7649
0
      case XML_EXTERNAL_PARAMETER_ENTITY:
7650
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7651
0
       "Attempt to reference the parameter entity '%s'\n",
7652
0
            name);
7653
0
      break;
7654
3.91k
      default:
7655
3.91k
      break;
7656
3.91k
  }
7657
3.91k
    }
7658
7659
    /*
7660
     * [ WFC: No Recursion ]
7661
     * A parsed entity must not contain a recursive reference
7662
     * to itself, either directly or indirectly.
7663
     * Done somewhere else
7664
     */
7665
31.3k
    return(ent);
7666
31.3k
}
7667
7668
/**
7669
 * xmlParseStringEntityRef:
7670
 * @ctxt:  an XML parser context
7671
 * @str:  a pointer to an index in the string
7672
 *
7673
 * parse ENTITY references declarations, but this version parses it from
7674
 * a string value.
7675
 *
7676
 * [68] EntityRef ::= '&' Name ';'
7677
 *
7678
 * [ WFC: Entity Declared ]
7679
 * In a document without any DTD, a document with only an internal DTD
7680
 * subset which contains no parameter entity references, or a document
7681
 * with "standalone='yes'", the Name given in the entity reference
7682
 * must match that in an entity declaration, except that well-formed
7683
 * documents need not declare any of the following entities: amp, lt,
7684
 * gt, apos, quot.  The declaration of a parameter entity must precede
7685
 * any reference to it.  Similarly, the declaration of a general entity
7686
 * must precede any reference to it which appears in a default value in an
7687
 * attribute-list declaration. Note that if entities are declared in the
7688
 * external subset or in external parameter entities, a non-validating
7689
 * processor is not obligated to read and process their declarations;
7690
 * for such documents, the rule that an entity must be declared is a
7691
 * well-formedness constraint only if standalone='yes'.
7692
 *
7693
 * [ WFC: Parsed Entity ]
7694
 * An entity reference must not contain the name of an unparsed entity
7695
 *
7696
 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7697
 * is updated to the current location in the string.
7698
 */
7699
static xmlEntityPtr
7700
0
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7701
0
    xmlChar *name;
7702
0
    const xmlChar *ptr;
7703
0
    xmlChar cur;
7704
0
    xmlEntityPtr ent = NULL;
7705
7706
0
    if ((str == NULL) || (*str == NULL))
7707
0
        return(NULL);
7708
0
    ptr = *str;
7709
0
    cur = *ptr;
7710
0
    if (cur != '&')
7711
0
  return(NULL);
7712
7713
0
    ptr++;
7714
0
    name = xmlParseStringName(ctxt, &ptr);
7715
0
    if (name == NULL) {
7716
0
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7717
0
           "xmlParseStringEntityRef: no name\n");
7718
0
  *str = ptr;
7719
0
  return(NULL);
7720
0
    }
7721
0
    if (*ptr != ';') {
7722
0
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7723
0
        xmlFree(name);
7724
0
  *str = ptr;
7725
0
  return(NULL);
7726
0
    }
7727
0
    ptr++;
7728
7729
7730
    /*
7731
     * Predefined entities override any extra definition
7732
     */
7733
0
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7734
0
        ent = xmlGetPredefinedEntity(name);
7735
0
        if (ent != NULL) {
7736
0
            xmlFree(name);
7737
0
            *str = ptr;
7738
0
            return(ent);
7739
0
        }
7740
0
    }
7741
7742
    /*
7743
     * Ask first SAX for entity resolution, otherwise try the
7744
     * entities which may have stored in the parser context.
7745
     */
7746
0
    if (ctxt->sax != NULL) {
7747
0
  if (ctxt->sax->getEntity != NULL)
7748
0
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7749
0
  if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7750
0
      ent = xmlGetPredefinedEntity(name);
7751
0
  if ((ent == NULL) && (ctxt->userData==ctxt)) {
7752
0
      ent = xmlSAX2GetEntity(ctxt, name);
7753
0
  }
7754
0
    }
7755
0
    if (ctxt->instate == XML_PARSER_EOF) {
7756
0
  xmlFree(name);
7757
0
  return(NULL);
7758
0
    }
7759
7760
    /*
7761
     * [ WFC: Entity Declared ]
7762
     * In a document without any DTD, a document with only an
7763
     * internal DTD subset which contains no parameter entity
7764
     * references, or a document with "standalone='yes'", the
7765
     * Name given in the entity reference must match that in an
7766
     * entity declaration, except that well-formed documents
7767
     * need not declare any of the following entities: amp, lt,
7768
     * gt, apos, quot.
7769
     * The declaration of a parameter entity must precede any
7770
     * reference to it.
7771
     * Similarly, the declaration of a general entity must
7772
     * precede any reference to it which appears in a default
7773
     * value in an attribute-list declaration. Note that if
7774
     * entities are declared in the external subset or in
7775
     * external parameter entities, a non-validating processor
7776
     * is not obligated to read and process their declarations;
7777
     * for such documents, the rule that an entity must be
7778
     * declared is a well-formedness constraint only if
7779
     * standalone='yes'.
7780
     */
7781
0
    if (ent == NULL) {
7782
0
  if ((ctxt->standalone == 1) ||
7783
0
      ((ctxt->hasExternalSubset == 0) &&
7784
0
       (ctxt->hasPErefs == 0))) {
7785
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7786
0
         "Entity '%s' not defined\n", name);
7787
0
  } else {
7788
0
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7789
0
        "Entity '%s' not defined\n",
7790
0
        name);
7791
0
  }
7792
  /* TODO ? check regressions ctxt->valid = 0; */
7793
0
    }
7794
7795
    /*
7796
     * [ WFC: Parsed Entity ]
7797
     * An entity reference must not contain the name of an
7798
     * unparsed entity
7799
     */
7800
0
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7801
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7802
0
     "Entity reference to unparsed entity %s\n", name);
7803
0
    }
7804
7805
    /*
7806
     * [ WFC: No External Entity References ]
7807
     * Attribute values cannot contain direct or indirect
7808
     * entity references to external entities.
7809
     */
7810
0
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7811
0
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7812
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7813
0
   "Attribute references external entity '%s'\n", name);
7814
0
    }
7815
    /*
7816
     * [ WFC: No < in Attribute Values ]
7817
     * The replacement text of any entity referred to directly or
7818
     * indirectly in an attribute value (other than "&lt;") must
7819
     * not contain a <.
7820
     */
7821
0
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7822
0
       (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7823
0
  if ((ent->flags & XML_ENT_CHECKED_LT) == 0) {
7824
0
            if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7825
0
                ent->flags |= XML_ENT_CONTAINS_LT;
7826
0
            ent->flags |= XML_ENT_CHECKED_LT;
7827
0
        }
7828
0
        if (ent->flags & XML_ENT_CONTAINS_LT)
7829
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7830
0
                    "'<' in entity '%s' is not allowed in attributes "
7831
0
                    "values\n", name);
7832
0
    }
7833
7834
    /*
7835
     * Internal check, no parameter entities here ...
7836
     */
7837
0
    else {
7838
0
  switch (ent->etype) {
7839
0
      case XML_INTERNAL_PARAMETER_ENTITY:
7840
0
      case XML_EXTERNAL_PARAMETER_ENTITY:
7841
0
    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7842
0
       "Attempt to reference the parameter entity '%s'\n",
7843
0
          name);
7844
0
      break;
7845
0
      default:
7846
0
      break;
7847
0
  }
7848
0
    }
7849
7850
    /*
7851
     * [ WFC: No Recursion ]
7852
     * A parsed entity must not contain a recursive reference
7853
     * to itself, either directly or indirectly.
7854
     * Done somewhere else
7855
     */
7856
7857
0
    xmlFree(name);
7858
0
    *str = ptr;
7859
0
    return(ent);
7860
0
}
7861
7862
/**
7863
 * xmlParsePEReference:
7864
 * @ctxt:  an XML parser context
7865
 *
7866
 * DEPRECATED: Internal function, don't use.
7867
 *
7868
 * Parse a parameter entity reference. Always consumes '%'.
7869
 *
7870
 * The entity content is handled directly by pushing it's content as
7871
 * a new input stream.
7872
 *
7873
 * [69] PEReference ::= '%' Name ';'
7874
 *
7875
 * [ WFC: No Recursion ]
7876
 * A parsed entity must not contain a recursive
7877
 * reference to itself, either directly or indirectly.
7878
 *
7879
 * [ WFC: Entity Declared ]
7880
 * In a document without any DTD, a document with only an internal DTD
7881
 * subset which contains no parameter entity references, or a document
7882
 * with "standalone='yes'", ...  ... The declaration of a parameter
7883
 * entity must precede any reference to it...
7884
 *
7885
 * [ VC: Entity Declared ]
7886
 * In a document with an external subset or external parameter entities
7887
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7888
 * must precede any reference to it...
7889
 *
7890
 * [ WFC: In DTD ]
7891
 * Parameter-entity references may only appear in the DTD.
7892
 * NOTE: misleading but this is handled.
7893
 */
7894
void
7895
xmlParsePEReference(xmlParserCtxtPtr ctxt)
7896
12.5k
{
7897
12.5k
    const xmlChar *name;
7898
12.5k
    xmlEntityPtr entity = NULL;
7899
12.5k
    xmlParserInputPtr input;
7900
7901
12.5k
    if (RAW != '%')
7902
0
        return;
7903
12.5k
    NEXT;
7904
12.5k
    name = xmlParseName(ctxt);
7905
12.5k
    if (name == NULL) {
7906
10.6k
  xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
7907
10.6k
  return;
7908
10.6k
    }
7909
1.90k
    if (xmlParserDebugEntities)
7910
0
  xmlGenericError(xmlGenericErrorContext,
7911
0
    "PEReference: %s\n", name);
7912
1.90k
    if (RAW != ';') {
7913
1.54k
  xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
7914
1.54k
        return;
7915
1.54k
    }
7916
7917
353
    NEXT;
7918
7919
    /*
7920
     * Request the entity from SAX
7921
     */
7922
353
    if ((ctxt->sax != NULL) &&
7923
353
  (ctxt->sax->getParameterEntity != NULL))
7924
0
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7925
353
    if (ctxt->instate == XML_PARSER_EOF)
7926
0
  return;
7927
353
    if (entity == NULL) {
7928
  /*
7929
   * [ WFC: Entity Declared ]
7930
   * In a document without any DTD, a document with only an
7931
   * internal DTD subset which contains no parameter entity
7932
   * references, or a document with "standalone='yes'", ...
7933
   * ... The declaration of a parameter entity must precede
7934
   * any reference to it...
7935
   */
7936
353
  if ((ctxt->standalone == 1) ||
7937
353
      ((ctxt->hasExternalSubset == 0) &&
7938
338
       (ctxt->hasPErefs == 0))) {
7939
42
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7940
42
            "PEReference: %%%s; not found\n",
7941
42
            name);
7942
311
  } else {
7943
      /*
7944
       * [ VC: Entity Declared ]
7945
       * In a document with an external subset or external
7946
       * parameter entities with "standalone='no'", ...
7947
       * ... The declaration of a parameter entity must
7948
       * precede any reference to it...
7949
       */
7950
311
            if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
7951
0
                xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
7952
0
                                 "PEReference: %%%s; not found\n",
7953
0
                                 name, NULL);
7954
0
            } else
7955
311
                xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7956
311
                              "PEReference: %%%s; not found\n",
7957
311
                              name, NULL);
7958
311
            ctxt->valid = 0;
7959
311
  }
7960
353
    } else {
7961
  /*
7962
   * Internal checking in case the entity quest barfed
7963
   */
7964
0
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7965
0
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7966
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7967
0
      "Internal: %%%s; is not a parameter entity\n",
7968
0
        name, NULL);
7969
0
  } else {
7970
0
            xmlChar start[4];
7971
0
            xmlCharEncoding enc;
7972
0
            unsigned long parentConsumed;
7973
0
            xmlEntityPtr oldEnt;
7974
7975
0
      if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7976
0
          ((ctxt->options & XML_PARSE_NOENT) == 0) &&
7977
0
    ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
7978
0
    ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
7979
0
    ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
7980
0
    (ctxt->replaceEntities == 0) &&
7981
0
    (ctxt->validate == 0))
7982
0
    return;
7983
7984
0
            if (entity->flags & XML_ENT_EXPANDING) {
7985
0
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7986
0
                xmlHaltParser(ctxt);
7987
0
                return;
7988
0
            }
7989
7990
            /* Must be computed from old input before pushing new input. */
7991
0
            parentConsumed = ctxt->input->parentConsumed;
7992
0
            oldEnt = ctxt->input->entity;
7993
0
            if ((oldEnt == NULL) ||
7994
0
                ((oldEnt->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7995
0
                 ((oldEnt->flags & XML_ENT_PARSED) == 0))) {
7996
0
                xmlSaturatedAdd(&parentConsumed, ctxt->input->consumed);
7997
0
                xmlSaturatedAddSizeT(&parentConsumed,
7998
0
                                     ctxt->input->cur - ctxt->input->base);
7999
0
            }
8000
8001
0
      input = xmlNewEntityInputStream(ctxt, entity);
8002
0
      if (xmlPushInput(ctxt, input) < 0) {
8003
0
                xmlFreeInputStream(input);
8004
0
    return;
8005
0
            }
8006
8007
0
            entity->flags |= XML_ENT_EXPANDING;
8008
8009
0
            input->parentConsumed = parentConsumed;
8010
8011
0
      if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
8012
                /*
8013
                 * Get the 4 first bytes and decode the charset
8014
                 * if enc != XML_CHAR_ENCODING_NONE
8015
                 * plug some encoding conversion routines.
8016
                 * Note that, since we may have some non-UTF8
8017
                 * encoding (like UTF16, bug 135229), the 'length'
8018
                 * is not known, but we can calculate based upon
8019
                 * the amount of data in the buffer.
8020
                 */
8021
0
                GROW
8022
0
                if (ctxt->instate == XML_PARSER_EOF)
8023
0
                    return;
8024
0
                if ((ctxt->input->end - ctxt->input->cur)>=4) {
8025
0
                    start[0] = RAW;
8026
0
                    start[1] = NXT(1);
8027
0
                    start[2] = NXT(2);
8028
0
                    start[3] = NXT(3);
8029
0
                    enc = xmlDetectCharEncoding(start, 4);
8030
0
                    if (enc != XML_CHAR_ENCODING_NONE) {
8031
0
                        xmlSwitchEncoding(ctxt, enc);
8032
0
                    }
8033
0
                }
8034
8035
0
                if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8036
0
                    (IS_BLANK_CH(NXT(5)))) {
8037
0
                    xmlParseTextDecl(ctxt);
8038
0
                }
8039
0
            }
8040
0
  }
8041
0
    }
8042
353
    ctxt->hasPErefs = 1;
8043
353
}
8044
8045
/**
8046
 * xmlLoadEntityContent:
8047
 * @ctxt:  an XML parser context
8048
 * @entity: an unloaded system entity
8049
 *
8050
 * Load the original content of the given system entity from the
8051
 * ExternalID/SystemID given. This is to be used for Included in Literal
8052
 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8053
 *
8054
 * Returns 0 in case of success and -1 in case of failure
8055
 */
8056
static int
8057
0
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8058
0
    xmlParserInputPtr input;
8059
0
    xmlBufferPtr buf;
8060
0
    int l, c;
8061
8062
0
    if ((ctxt == NULL) || (entity == NULL) ||
8063
0
        ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8064
0
   (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8065
0
  (entity->content != NULL)) {
8066
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8067
0
              "xmlLoadEntityContent parameter error");
8068
0
        return(-1);
8069
0
    }
8070
8071
0
    if (xmlParserDebugEntities)
8072
0
  xmlGenericError(xmlGenericErrorContext,
8073
0
    "Reading %s entity content input\n", entity->name);
8074
8075
0
    buf = xmlBufferCreate();
8076
0
    if (buf == NULL) {
8077
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8078
0
              "xmlLoadEntityContent parameter error");
8079
0
        return(-1);
8080
0
    }
8081
0
    xmlBufferSetAllocationScheme(buf, XML_BUFFER_ALLOC_DOUBLEIT);
8082
8083
0
    input = xmlNewEntityInputStream(ctxt, entity);
8084
0
    if (input == NULL) {
8085
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8086
0
              "xmlLoadEntityContent input error");
8087
0
  xmlBufferFree(buf);
8088
0
        return(-1);
8089
0
    }
8090
8091
    /*
8092
     * Push the entity as the current input, read char by char
8093
     * saving to the buffer until the end of the entity or an error
8094
     */
8095
0
    if (xmlPushInput(ctxt, input) < 0) {
8096
0
        xmlBufferFree(buf);
8097
0
  xmlFreeInputStream(input);
8098
0
  return(-1);
8099
0
    }
8100
8101
0
    GROW;
8102
0
    c = CUR_CHAR(l);
8103
0
    while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8104
0
           (IS_CHAR(c))) {
8105
0
        xmlBufferAdd(buf, ctxt->input->cur, l);
8106
0
  NEXTL(l);
8107
0
  c = CUR_CHAR(l);
8108
0
    }
8109
0
    if (ctxt->instate == XML_PARSER_EOF) {
8110
0
  xmlBufferFree(buf);
8111
0
  return(-1);
8112
0
    }
8113
8114
0
    if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8115
0
        xmlSaturatedAdd(&ctxt->sizeentities, ctxt->input->consumed);
8116
0
        xmlPopInput(ctxt);
8117
0
    } else if (!IS_CHAR(c)) {
8118
0
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8119
0
                          "xmlLoadEntityContent: invalid char value %d\n",
8120
0
                    c);
8121
0
  xmlBufferFree(buf);
8122
0
  return(-1);
8123
0
    }
8124
0
    entity->content = buf->content;
8125
0
    entity->length = buf->use;
8126
0
    buf->content = NULL;
8127
0
    xmlBufferFree(buf);
8128
8129
0
    return(0);
8130
0
}
8131
8132
/**
8133
 * xmlParseStringPEReference:
8134
 * @ctxt:  an XML parser context
8135
 * @str:  a pointer to an index in the string
8136
 *
8137
 * parse PEReference declarations
8138
 *
8139
 * [69] PEReference ::= '%' Name ';'
8140
 *
8141
 * [ WFC: No Recursion ]
8142
 * A parsed entity must not contain a recursive
8143
 * reference to itself, either directly or indirectly.
8144
 *
8145
 * [ WFC: Entity Declared ]
8146
 * In a document without any DTD, a document with only an internal DTD
8147
 * subset which contains no parameter entity references, or a document
8148
 * with "standalone='yes'", ...  ... The declaration of a parameter
8149
 * entity must precede any reference to it...
8150
 *
8151
 * [ VC: Entity Declared ]
8152
 * In a document with an external subset or external parameter entities
8153
 * with "standalone='no'", ...  ... The declaration of a parameter entity
8154
 * must precede any reference to it...
8155
 *
8156
 * [ WFC: In DTD ]
8157
 * Parameter-entity references may only appear in the DTD.
8158
 * NOTE: misleading but this is handled.
8159
 *
8160
 * Returns the string of the entity content.
8161
 *         str is updated to the current value of the index
8162
 */
8163
static xmlEntityPtr
8164
0
xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8165
0
    const xmlChar *ptr;
8166
0
    xmlChar cur;
8167
0
    xmlChar *name;
8168
0
    xmlEntityPtr entity = NULL;
8169
8170
0
    if ((str == NULL) || (*str == NULL)) return(NULL);
8171
0
    ptr = *str;
8172
0
    cur = *ptr;
8173
0
    if (cur != '%')
8174
0
        return(NULL);
8175
0
    ptr++;
8176
0
    name = xmlParseStringName(ctxt, &ptr);
8177
0
    if (name == NULL) {
8178
0
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8179
0
           "xmlParseStringPEReference: no name\n");
8180
0
  *str = ptr;
8181
0
  return(NULL);
8182
0
    }
8183
0
    cur = *ptr;
8184
0
    if (cur != ';') {
8185
0
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8186
0
  xmlFree(name);
8187
0
  *str = ptr;
8188
0
  return(NULL);
8189
0
    }
8190
0
    ptr++;
8191
8192
    /*
8193
     * Request the entity from SAX
8194
     */
8195
0
    if ((ctxt->sax != NULL) &&
8196
0
  (ctxt->sax->getParameterEntity != NULL))
8197
0
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8198
0
    if (ctxt->instate == XML_PARSER_EOF) {
8199
0
  xmlFree(name);
8200
0
  *str = ptr;
8201
0
  return(NULL);
8202
0
    }
8203
0
    if (entity == NULL) {
8204
  /*
8205
   * [ WFC: Entity Declared ]
8206
   * In a document without any DTD, a document with only an
8207
   * internal DTD subset which contains no parameter entity
8208
   * references, or a document with "standalone='yes'", ...
8209
   * ... The declaration of a parameter entity must precede
8210
   * any reference to it...
8211
   */
8212
0
  if ((ctxt->standalone == 1) ||
8213
0
      ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8214
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8215
0
     "PEReference: %%%s; not found\n", name);
8216
0
  } else {
8217
      /*
8218
       * [ VC: Entity Declared ]
8219
       * In a document with an external subset or external
8220
       * parameter entities with "standalone='no'", ...
8221
       * ... The declaration of a parameter entity must
8222
       * precede any reference to it...
8223
       */
8224
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8225
0
        "PEReference: %%%s; not found\n",
8226
0
        name, NULL);
8227
0
      ctxt->valid = 0;
8228
0
  }
8229
0
    } else {
8230
  /*
8231
   * Internal checking in case the entity quest barfed
8232
   */
8233
0
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8234
0
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8235
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8236
0
        "%%%s; is not a parameter entity\n",
8237
0
        name, NULL);
8238
0
  }
8239
0
    }
8240
0
    ctxt->hasPErefs = 1;
8241
0
    xmlFree(name);
8242
0
    *str = ptr;
8243
0
    return(entity);
8244
0
}
8245
8246
/**
8247
 * xmlParseDocTypeDecl:
8248
 * @ctxt:  an XML parser context
8249
 *
8250
 * DEPRECATED: Internal function, don't use.
8251
 *
8252
 * parse a DOCTYPE declaration
8253
 *
8254
 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8255
 *                      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8256
 *
8257
 * [ VC: Root Element Type ]
8258
 * The Name in the document type declaration must match the element
8259
 * type of the root element.
8260
 */
8261
8262
void
8263
2.48k
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8264
2.48k
    const xmlChar *name = NULL;
8265
2.48k
    xmlChar *ExternalID = NULL;
8266
2.48k
    xmlChar *URI = NULL;
8267
8268
    /*
8269
     * We know that '<!DOCTYPE' has been detected.
8270
     */
8271
2.48k
    SKIP(9);
8272
8273
2.48k
    SKIP_BLANKS;
8274
8275
    /*
8276
     * Parse the DOCTYPE name.
8277
     */
8278
2.48k
    name = xmlParseName(ctxt);
8279
2.48k
    if (name == NULL) {
8280
7
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8281
7
           "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8282
7
    }
8283
2.48k
    ctxt->intSubName = name;
8284
8285
2.48k
    SKIP_BLANKS;
8286
8287
    /*
8288
     * Check for SystemID and ExternalID
8289
     */
8290
2.48k
    URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8291
8292
2.48k
    if ((URI != NULL) || (ExternalID != NULL)) {
8293
62
        ctxt->hasExternalSubset = 1;
8294
62
    }
8295
2.48k
    ctxt->extSubURI = URI;
8296
2.48k
    ctxt->extSubSystem = ExternalID;
8297
8298
2.48k
    SKIP_BLANKS;
8299
8300
    /*
8301
     * Create and update the internal subset.
8302
     */
8303
2.48k
    if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8304
2.48k
  (!ctxt->disableSAX))
8305
0
  ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8306
2.48k
    if (ctxt->instate == XML_PARSER_EOF)
8307
0
  return;
8308
8309
    /*
8310
     * Is there any internal subset declarations ?
8311
     * they are handled separately in xmlParseInternalSubset()
8312
     */
8313
2.48k
    if (RAW == '[')
8314
2.36k
  return;
8315
8316
    /*
8317
     * We should be at the end of the DOCTYPE declaration.
8318
     */
8319
125
    if (RAW != '>') {
8320
91
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8321
91
    }
8322
125
    NEXT;
8323
125
}
8324
8325
/**
8326
 * xmlParseInternalSubset:
8327
 * @ctxt:  an XML parser context
8328
 *
8329
 * parse the internal subset declaration
8330
 *
8331
 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8332
 */
8333
8334
static void
8335
2.38k
xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8336
    /*
8337
     * Is there any DTD definition ?
8338
     */
8339
2.38k
    if (RAW == '[') {
8340
2.38k
        int baseInputNr = ctxt->inputNr;
8341
2.38k
        ctxt->instate = XML_PARSER_DTD;
8342
2.38k
        NEXT;
8343
  /*
8344
   * Parse the succession of Markup declarations and
8345
   * PEReferences.
8346
   * Subsequence (markupdecl | PEReference | S)*
8347
   */
8348
2.38k
  SKIP_BLANKS;
8349
64.0k
  while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) &&
8350
64.0k
               (ctxt->instate != XML_PARSER_EOF)) {
8351
8352
            /*
8353
             * Conditional sections are allowed from external entities included
8354
             * by PE References in the internal subset.
8355
             */
8356
62.9k
            if ((ctxt->inputNr > 1) && (ctxt->input->filename != NULL) &&
8357
62.9k
                (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8358
0
                xmlParseConditionalSections(ctxt);
8359
62.9k
            } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
8360
49.1k
          xmlParseMarkupDecl(ctxt);
8361
49.1k
            } else if (RAW == '%') {
8362
12.5k
          xmlParsePEReference(ctxt);
8363
12.5k
            } else {
8364
1.30k
    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8365
1.30k
                        "xmlParseInternalSubset: error detected in"
8366
1.30k
                        " Markup declaration\n");
8367
1.30k
                xmlHaltParser(ctxt);
8368
1.30k
                return;
8369
1.30k
            }
8370
61.6k
      SKIP_BLANKS;
8371
61.6k
            SHRINK;
8372
61.6k
            GROW;
8373
61.6k
  }
8374
1.07k
  if (RAW == ']') {
8375
476
      NEXT;
8376
476
      SKIP_BLANKS;
8377
476
  }
8378
1.07k
    }
8379
8380
    /*
8381
     * We should be at the end of the DOCTYPE declaration.
8382
     */
8383
1.07k
    if (RAW != '>') {
8384
643
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8385
643
  return;
8386
643
    }
8387
431
    NEXT;
8388
431
}
8389
8390
#ifdef LIBXML_SAX1_ENABLED
8391
/**
8392
 * xmlParseAttribute:
8393
 * @ctxt:  an XML parser context
8394
 * @value:  a xmlChar ** used to store the value of the attribute
8395
 *
8396
 * DEPRECATED: Internal function, don't use.
8397
 *
8398
 * parse an attribute
8399
 *
8400
 * [41] Attribute ::= Name Eq AttValue
8401
 *
8402
 * [ WFC: No External Entity References ]
8403
 * Attribute values cannot contain direct or indirect entity references
8404
 * to external entities.
8405
 *
8406
 * [ WFC: No < in Attribute Values ]
8407
 * The replacement text of any entity referred to directly or indirectly in
8408
 * an attribute value (other than "&lt;") must not contain a <.
8409
 *
8410
 * [ VC: Attribute Value Type ]
8411
 * The attribute must have been declared; the value must be of the type
8412
 * declared for it.
8413
 *
8414
 * [25] Eq ::= S? '=' S?
8415
 *
8416
 * With namespace:
8417
 *
8418
 * [NS 11] Attribute ::= QName Eq AttValue
8419
 *
8420
 * Also the case QName == xmlns:??? is handled independently as a namespace
8421
 * definition.
8422
 *
8423
 * Returns the attribute name, and the value in *value.
8424
 */
8425
8426
const xmlChar *
8427
0
xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8428
0
    const xmlChar *name;
8429
0
    xmlChar *val;
8430
8431
0
    *value = NULL;
8432
0
    GROW;
8433
0
    name = xmlParseName(ctxt);
8434
0
    if (name == NULL) {
8435
0
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8436
0
                 "error parsing attribute name\n");
8437
0
        return(NULL);
8438
0
    }
8439
8440
    /*
8441
     * read the value
8442
     */
8443
0
    SKIP_BLANKS;
8444
0
    if (RAW == '=') {
8445
0
        NEXT;
8446
0
  SKIP_BLANKS;
8447
0
  val = xmlParseAttValue(ctxt);
8448
0
  ctxt->instate = XML_PARSER_CONTENT;
8449
0
    } else {
8450
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8451
0
         "Specification mandates value for attribute %s\n", name);
8452
0
  return(name);
8453
0
    }
8454
8455
    /*
8456
     * Check that xml:lang conforms to the specification
8457
     * No more registered as an error, just generate a warning now
8458
     * since this was deprecated in XML second edition
8459
     */
8460
0
    if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8461
0
  if (!xmlCheckLanguageID(val)) {
8462
0
      xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8463
0
              "Malformed value for xml:lang : %s\n",
8464
0
        val, NULL);
8465
0
  }
8466
0
    }
8467
8468
    /*
8469
     * Check that xml:space conforms to the specification
8470
     */
8471
0
    if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8472
0
  if (xmlStrEqual(val, BAD_CAST "default"))
8473
0
      *(ctxt->space) = 0;
8474
0
  else if (xmlStrEqual(val, BAD_CAST "preserve"))
8475
0
      *(ctxt->space) = 1;
8476
0
  else {
8477
0
    xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8478
0
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8479
0
                                 val, NULL);
8480
0
  }
8481
0
    }
8482
8483
0
    *value = val;
8484
0
    return(name);
8485
0
}
8486
8487
/**
8488
 * xmlParseStartTag:
8489
 * @ctxt:  an XML parser context
8490
 *
8491
 * DEPRECATED: Internal function, don't use.
8492
 *
8493
 * Parse a start tag. Always consumes '<'.
8494
 *
8495
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8496
 *
8497
 * [ WFC: Unique Att Spec ]
8498
 * No attribute name may appear more than once in the same start-tag or
8499
 * empty-element tag.
8500
 *
8501
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8502
 *
8503
 * [ WFC: Unique Att Spec ]
8504
 * No attribute name may appear more than once in the same start-tag or
8505
 * empty-element tag.
8506
 *
8507
 * With namespace:
8508
 *
8509
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8510
 *
8511
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8512
 *
8513
 * Returns the element name parsed
8514
 */
8515
8516
const xmlChar *
8517
0
xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8518
0
    const xmlChar *name;
8519
0
    const xmlChar *attname;
8520
0
    xmlChar *attvalue;
8521
0
    const xmlChar **atts = ctxt->atts;
8522
0
    int nbatts = 0;
8523
0
    int maxatts = ctxt->maxatts;
8524
0
    int i;
8525
8526
0
    if (RAW != '<') return(NULL);
8527
0
    NEXT1;
8528
8529
0
    name = xmlParseName(ctxt);
8530
0
    if (name == NULL) {
8531
0
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8532
0
       "xmlParseStartTag: invalid element name\n");
8533
0
        return(NULL);
8534
0
    }
8535
8536
    /*
8537
     * Now parse the attributes, it ends up with the ending
8538
     *
8539
     * (S Attribute)* S?
8540
     */
8541
0
    SKIP_BLANKS;
8542
0
    GROW;
8543
8544
0
    while (((RAW != '>') &&
8545
0
     ((RAW != '/') || (NXT(1) != '>')) &&
8546
0
     (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8547
0
  attname = xmlParseAttribute(ctxt, &attvalue);
8548
0
        if (attname == NULL) {
8549
0
      xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8550
0
         "xmlParseStartTag: problem parsing attributes\n");
8551
0
      break;
8552
0
  }
8553
0
        if (attvalue != NULL) {
8554
      /*
8555
       * [ WFC: Unique Att Spec ]
8556
       * No attribute name may appear more than once in the same
8557
       * start-tag or empty-element tag.
8558
       */
8559
0
      for (i = 0; i < nbatts;i += 2) {
8560
0
          if (xmlStrEqual(atts[i], attname)) {
8561
0
        xmlErrAttributeDup(ctxt, NULL, attname);
8562
0
        xmlFree(attvalue);
8563
0
        goto failed;
8564
0
    }
8565
0
      }
8566
      /*
8567
       * Add the pair to atts
8568
       */
8569
0
      if (atts == NULL) {
8570
0
          maxatts = 22; /* allow for 10 attrs by default */
8571
0
          atts = (const xmlChar **)
8572
0
           xmlMalloc(maxatts * sizeof(xmlChar *));
8573
0
    if (atts == NULL) {
8574
0
        xmlErrMemory(ctxt, NULL);
8575
0
        if (attvalue != NULL)
8576
0
      xmlFree(attvalue);
8577
0
        goto failed;
8578
0
    }
8579
0
    ctxt->atts = atts;
8580
0
    ctxt->maxatts = maxatts;
8581
0
      } else if (nbatts + 4 > maxatts) {
8582
0
          const xmlChar **n;
8583
8584
0
          maxatts *= 2;
8585
0
          n = (const xmlChar **) xmlRealloc((void *) atts,
8586
0
               maxatts * sizeof(const xmlChar *));
8587
0
    if (n == NULL) {
8588
0
        xmlErrMemory(ctxt, NULL);
8589
0
        if (attvalue != NULL)
8590
0
      xmlFree(attvalue);
8591
0
        goto failed;
8592
0
    }
8593
0
    atts = n;
8594
0
    ctxt->atts = atts;
8595
0
    ctxt->maxatts = maxatts;
8596
0
      }
8597
0
      atts[nbatts++] = attname;
8598
0
      atts[nbatts++] = attvalue;
8599
0
      atts[nbatts] = NULL;
8600
0
      atts[nbatts + 1] = NULL;
8601
0
  } else {
8602
0
      if (attvalue != NULL)
8603
0
    xmlFree(attvalue);
8604
0
  }
8605
8606
0
failed:
8607
8608
0
  GROW
8609
0
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8610
0
      break;
8611
0
  if (SKIP_BLANKS == 0) {
8612
0
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8613
0
         "attributes construct error\n");
8614
0
  }
8615
0
  SHRINK;
8616
0
        GROW;
8617
0
    }
8618
8619
    /*
8620
     * SAX: Start of Element !
8621
     */
8622
0
    if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8623
0
  (!ctxt->disableSAX)) {
8624
0
  if (nbatts > 0)
8625
0
      ctxt->sax->startElement(ctxt->userData, name, atts);
8626
0
  else
8627
0
      ctxt->sax->startElement(ctxt->userData, name, NULL);
8628
0
    }
8629
8630
0
    if (atts != NULL) {
8631
        /* Free only the content strings */
8632
0
        for (i = 1;i < nbatts;i+=2)
8633
0
      if (atts[i] != NULL)
8634
0
         xmlFree((xmlChar *) atts[i]);
8635
0
    }
8636
0
    return(name);
8637
0
}
8638
8639
/**
8640
 * xmlParseEndTag1:
8641
 * @ctxt:  an XML parser context
8642
 * @line:  line of the start tag
8643
 * @nsNr:  number of namespaces on the start tag
8644
 *
8645
 * Parse an end tag. Always consumes '</'.
8646
 *
8647
 * [42] ETag ::= '</' Name S? '>'
8648
 *
8649
 * With namespace
8650
 *
8651
 * [NS 9] ETag ::= '</' QName S? '>'
8652
 */
8653
8654
static void
8655
0
xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8656
0
    const xmlChar *name;
8657
8658
0
    GROW;
8659
0
    if ((RAW != '<') || (NXT(1) != '/')) {
8660
0
  xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8661
0
           "xmlParseEndTag: '</' not found\n");
8662
0
  return;
8663
0
    }
8664
0
    SKIP(2);
8665
8666
0
    name = xmlParseNameAndCompare(ctxt,ctxt->name);
8667
8668
    /*
8669
     * We should definitely be at the ending "S? '>'" part
8670
     */
8671
0
    GROW;
8672
0
    SKIP_BLANKS;
8673
0
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8674
0
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8675
0
    } else
8676
0
  NEXT1;
8677
8678
    /*
8679
     * [ WFC: Element Type Match ]
8680
     * The Name in an element's end-tag must match the element type in the
8681
     * start-tag.
8682
     *
8683
     */
8684
0
    if (name != (xmlChar*)1) {
8685
0
        if (name == NULL) name = BAD_CAST "unparsable";
8686
0
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8687
0
         "Opening and ending tag mismatch: %s line %d and %s\n",
8688
0
                    ctxt->name, line, name);
8689
0
    }
8690
8691
    /*
8692
     * SAX: End of Tag
8693
     */
8694
0
    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8695
0
  (!ctxt->disableSAX))
8696
0
        ctxt->sax->endElement(ctxt->userData, ctxt->name);
8697
8698
0
    namePop(ctxt);
8699
0
    spacePop(ctxt);
8700
0
    return;
8701
0
}
8702
8703
/**
8704
 * xmlParseEndTag:
8705
 * @ctxt:  an XML parser context
8706
 *
8707
 * DEPRECATED: Internal function, don't use.
8708
 *
8709
 * parse an end of tag
8710
 *
8711
 * [42] ETag ::= '</' Name S? '>'
8712
 *
8713
 * With namespace
8714
 *
8715
 * [NS 9] ETag ::= '</' QName S? '>'
8716
 */
8717
8718
void
8719
0
xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8720
0
    xmlParseEndTag1(ctxt, 0);
8721
0
}
8722
#endif /* LIBXML_SAX1_ENABLED */
8723
8724
/************************************************************************
8725
 *                  *
8726
 *          SAX 2 specific operations       *
8727
 *                  *
8728
 ************************************************************************/
8729
8730
/*
8731
 * xmlGetNamespace:
8732
 * @ctxt:  an XML parser context
8733
 * @prefix:  the prefix to lookup
8734
 *
8735
 * Lookup the namespace name for the @prefix (which ca be NULL)
8736
 * The prefix must come from the @ctxt->dict dictionary
8737
 *
8738
 * Returns the namespace name or NULL if not bound
8739
 */
8740
static const xmlChar *
8741
716k
xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8742
716k
    int i;
8743
8744
716k
    if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8745
801k
    for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8746
136k
        if (ctxt->nsTab[i] == prefix) {
8747
50.3k
      if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8748
658
          return(NULL);
8749
49.7k
      return(ctxt->nsTab[i + 1]);
8750
50.3k
  }
8751
664k
    return(NULL);
8752
715k
}
8753
8754
/**
8755
 * xmlParseQName:
8756
 * @ctxt:  an XML parser context
8757
 * @prefix:  pointer to store the prefix part
8758
 *
8759
 * parse an XML Namespace QName
8760
 *
8761
 * [6]  QName  ::= (Prefix ':')? LocalPart
8762
 * [7]  Prefix  ::= NCName
8763
 * [8]  LocalPart  ::= NCName
8764
 *
8765
 * Returns the Name parsed or NULL
8766
 */
8767
8768
static const xmlChar *
8769
901k
xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8770
901k
    const xmlChar *l, *p;
8771
8772
901k
    GROW;
8773
901k
    if (ctxt->instate == XML_PARSER_EOF)
8774
0
        return(NULL);
8775
8776
901k
    l = xmlParseNCName(ctxt);
8777
901k
    if (l == NULL) {
8778
3.70k
        if (CUR == ':') {
8779
3.07k
      l = xmlParseName(ctxt);
8780
3.07k
      if (l != NULL) {
8781
3.07k
          xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8782
3.07k
             "Failed to parse QName '%s'\n", l, NULL, NULL);
8783
3.07k
    *prefix = NULL;
8784
3.07k
    return(l);
8785
3.07k
      }
8786
3.07k
  }
8787
627
        return(NULL);
8788
3.70k
    }
8789
898k
    if (CUR == ':') {
8790
31.4k
        NEXT;
8791
31.4k
  p = l;
8792
31.4k
  l = xmlParseNCName(ctxt);
8793
31.4k
  if (l == NULL) {
8794
1.69k
      xmlChar *tmp;
8795
8796
1.69k
            if (ctxt->instate == XML_PARSER_EOF)
8797
0
                return(NULL);
8798
1.69k
            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8799
1.69k
               "Failed to parse QName '%s:'\n", p, NULL, NULL);
8800
1.69k
      l = xmlParseNmtoken(ctxt);
8801
1.69k
      if (l == NULL) {
8802
763
                if (ctxt->instate == XML_PARSER_EOF)
8803
0
                    return(NULL);
8804
763
    tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8805
927
            } else {
8806
927
    tmp = xmlBuildQName(l, p, NULL, 0);
8807
927
    xmlFree((char *)l);
8808
927
      }
8809
1.69k
      p = xmlDictLookup(ctxt->dict, tmp, -1);
8810
1.69k
      if (tmp != NULL) xmlFree(tmp);
8811
1.69k
      *prefix = NULL;
8812
1.69k
      return(p);
8813
1.69k
  }
8814
29.7k
  if (CUR == ':') {
8815
5.58k
      xmlChar *tmp;
8816
8817
5.58k
            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8818
5.58k
               "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8819
5.58k
      NEXT;
8820
5.58k
      tmp = (xmlChar *) xmlParseName(ctxt);
8821
5.58k
      if (tmp != NULL) {
8822
5.23k
          tmp = xmlBuildQName(tmp, l, NULL, 0);
8823
5.23k
    l = xmlDictLookup(ctxt->dict, tmp, -1);
8824
5.23k
    if (tmp != NULL) xmlFree(tmp);
8825
5.23k
    *prefix = p;
8826
5.23k
    return(l);
8827
5.23k
      }
8828
347
            if (ctxt->instate == XML_PARSER_EOF)
8829
0
                return(NULL);
8830
347
      tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8831
347
      l = xmlDictLookup(ctxt->dict, tmp, -1);
8832
347
      if (tmp != NULL) xmlFree(tmp);
8833
347
      *prefix = p;
8834
347
      return(l);
8835
347
  }
8836
24.1k
  *prefix = p;
8837
24.1k
    } else
8838
866k
        *prefix = NULL;
8839
890k
    return(l);
8840
898k
}
8841
8842
/**
8843
 * xmlParseQNameAndCompare:
8844
 * @ctxt:  an XML parser context
8845
 * @name:  the localname
8846
 * @prefix:  the prefix, if any.
8847
 *
8848
 * parse an XML name and compares for match
8849
 * (specialized for endtag parsing)
8850
 *
8851
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8852
 * and the name for mismatch
8853
 */
8854
8855
static const xmlChar *
8856
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8857
570
                        xmlChar const *prefix) {
8858
570
    const xmlChar *cmp;
8859
570
    const xmlChar *in;
8860
570
    const xmlChar *ret;
8861
570
    const xmlChar *prefix2;
8862
8863
570
    if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8864
8865
570
    GROW;
8866
570
    in = ctxt->input->cur;
8867
8868
570
    cmp = prefix;
8869
2.22k
    while (*in != 0 && *in == *cmp) {
8870
1.65k
  ++in;
8871
1.65k
  ++cmp;
8872
1.65k
    }
8873
570
    if ((*cmp == 0) && (*in == ':')) {
8874
552
        in++;
8875
552
  cmp = name;
8876
3.81k
  while (*in != 0 && *in == *cmp) {
8877
3.25k
      ++in;
8878
3.25k
      ++cmp;
8879
3.25k
  }
8880
552
  if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8881
      /* success */
8882
542
            ctxt->input->col += in - ctxt->input->cur;
8883
542
      ctxt->input->cur = in;
8884
542
      return((const xmlChar*) 1);
8885
542
  }
8886
552
    }
8887
    /*
8888
     * all strings coms from the dictionary, equality can be done directly
8889
     */
8890
28
    ret = xmlParseQName (ctxt, &prefix2);
8891
28
    if ((ret == name) && (prefix == prefix2))
8892
2
  return((const xmlChar*) 1);
8893
26
    return ret;
8894
28
}
8895
8896
/**
8897
 * xmlParseAttValueInternal:
8898
 * @ctxt:  an XML parser context
8899
 * @len:  attribute len result
8900
 * @alloc:  whether the attribute was reallocated as a new string
8901
 * @normalize:  if 1 then further non-CDATA normalization must be done
8902
 *
8903
 * parse a value for an attribute.
8904
 * NOTE: if no normalization is needed, the routine will return pointers
8905
 *       directly from the data buffer.
8906
 *
8907
 * 3.3.3 Attribute-Value Normalization:
8908
 * Before the value of an attribute is passed to the application or
8909
 * checked for validity, the XML processor must normalize it as follows:
8910
 * - a character reference is processed by appending the referenced
8911
 *   character to the attribute value
8912
 * - an entity reference is processed by recursively processing the
8913
 *   replacement text of the entity
8914
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8915
 *   appending #x20 to the normalized value, except that only a single
8916
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
8917
 *   parsed entity or the literal entity value of an internal parsed entity
8918
 * - other characters are processed by appending them to the normalized value
8919
 * If the declared value is not CDATA, then the XML processor must further
8920
 * process the normalized attribute value by discarding any leading and
8921
 * trailing space (#x20) characters, and by replacing sequences of space
8922
 * (#x20) characters by a single space (#x20) character.
8923
 * All attributes for which no declaration has been read should be treated
8924
 * by a non-validating parser as if declared CDATA.
8925
 *
8926
 * Returns the AttValue parsed or NULL. The value has to be freed by the
8927
 *     caller if it was copied, this can be detected by val[*len] == 0.
8928
 */
8929
8930
#define GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) \
8931
872
    const xmlChar *oldbase = ctxt->input->base;\
8932
872
    GROW;\
8933
872
    if (ctxt->instate == XML_PARSER_EOF)\
8934
872
        return(NULL);\
8935
872
    if (oldbase != ctxt->input->base) {\
8936
0
        ptrdiff_t delta = ctxt->input->base - oldbase;\
8937
0
        start = start + delta;\
8938
0
        in = in + delta;\
8939
0
    }\
8940
872
    end = ctxt->input->end;
8941
8942
static xmlChar *
8943
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8944
                         int normalize)
8945
213k
{
8946
213k
    xmlChar limit = 0;
8947
213k
    const xmlChar *in = NULL, *start, *end, *last;
8948
213k
    xmlChar *ret = NULL;
8949
213k
    int line, col;
8950
213k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
8951
213k
                    XML_MAX_HUGE_LENGTH :
8952
213k
                    XML_MAX_TEXT_LENGTH;
8953
8954
213k
    GROW;
8955
213k
    in = (xmlChar *) CUR_PTR;
8956
213k
    line = ctxt->input->line;
8957
213k
    col = ctxt->input->col;
8958
213k
    if (*in != '"' && *in != '\'') {
8959
218
        xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
8960
218
        return (NULL);
8961
218
    }
8962
213k
    ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
8963
8964
    /*
8965
     * try to handle in this routine the most common case where no
8966
     * allocation of a new string is required and where content is
8967
     * pure ASCII.
8968
     */
8969
213k
    limit = *in++;
8970
213k
    col++;
8971
213k
    end = ctxt->input->end;
8972
213k
    start = in;
8973
213k
    if (in >= end) {
8974
22
        GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
8975
22
    }
8976
213k
    if (normalize) {
8977
        /*
8978
   * Skip any leading spaces
8979
   */
8980
14.3k
  while ((in < end) && (*in != limit) &&
8981
14.3k
         ((*in == 0x20) || (*in == 0x9) ||
8982
13.8k
          (*in == 0xA) || (*in == 0xD))) {
8983
10.4k
      if (*in == 0xA) {
8984
235
          line++; col = 1;
8985
10.1k
      } else {
8986
10.1k
          col++;
8987
10.1k
      }
8988
10.4k
      in++;
8989
10.4k
      start = in;
8990
10.4k
      if (in >= end) {
8991
6
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
8992
6
                if ((in - start) > maxLength) {
8993
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8994
0
                                   "AttValue length too long\n");
8995
0
                    return(NULL);
8996
0
                }
8997
6
      }
8998
10.4k
  }
8999
57.8k
  while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9000
57.8k
         (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9001
54.1k
      col++;
9002
54.1k
      if ((*in++ == 0x20) && (*in == 0x20)) break;
9003
53.9k
      if (in >= end) {
9004
19
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9005
19
                if ((in - start) > maxLength) {
9006
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9007
0
                                   "AttValue length too long\n");
9008
0
                    return(NULL);
9009
0
                }
9010
19
      }
9011
53.9k
  }
9012
3.93k
  last = in;
9013
  /*
9014
   * skip the trailing blanks
9015
   */
9016
4.25k
  while ((last[-1] == 0x20) && (last > start)) last--;
9017
5.97k
  while ((in < end) && (*in != limit) &&
9018
5.97k
         ((*in == 0x20) || (*in == 0x9) ||
9019
4.09k
          (*in == 0xA) || (*in == 0xD))) {
9020
2.03k
      if (*in == 0xA) {
9021
778
          line++, col = 1;
9022
1.26k
      } else {
9023
1.26k
          col++;
9024
1.26k
      }
9025
2.03k
      in++;
9026
2.03k
      if (in >= end) {
9027
14
    const xmlChar *oldbase = ctxt->input->base;
9028
14
    GROW;
9029
14
                if (ctxt->instate == XML_PARSER_EOF)
9030
0
                    return(NULL);
9031
14
    if (oldbase != ctxt->input->base) {
9032
0
        ptrdiff_t delta = ctxt->input->base - oldbase;
9033
0
        start = start + delta;
9034
0
        in = in + delta;
9035
0
        last = last + delta;
9036
0
    }
9037
14
    end = ctxt->input->end;
9038
14
                if ((in - start) > maxLength) {
9039
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9040
0
                                   "AttValue length too long\n");
9041
0
                    return(NULL);
9042
0
                }
9043
14
      }
9044
2.03k
  }
9045
3.93k
        if ((in - start) > maxLength) {
9046
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9047
0
                           "AttValue length too long\n");
9048
0
            return(NULL);
9049
0
        }
9050
3.93k
  if (*in != limit) goto need_complex;
9051
209k
    } else {
9052
1.93M
  while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9053
1.93M
         (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9054
1.72M
      in++;
9055
1.72M
      col++;
9056
1.72M
      if (in >= end) {
9057
825
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9058
825
                if ((in - start) > maxLength) {
9059
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9060
0
                                   "AttValue length too long\n");
9061
0
                    return(NULL);
9062
0
                }
9063
825
      }
9064
1.72M
  }
9065
209k
  last = in;
9066
209k
        if ((in - start) > maxLength) {
9067
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9068
0
                           "AttValue length too long\n");
9069
0
            return(NULL);
9070
0
        }
9071
209k
  if (*in != limit) goto need_complex;
9072
209k
    }
9073
182k
    in++;
9074
182k
    col++;
9075
182k
    if (len != NULL) {
9076
170k
        if (alloc) *alloc = 0;
9077
170k
        *len = last - start;
9078
170k
        ret = (xmlChar *) start;
9079
170k
    } else {
9080
12.0k
        if (alloc) *alloc = 1;
9081
12.0k
        ret = xmlStrndup(start, last - start);
9082
12.0k
    }
9083
182k
    CUR_PTR = in;
9084
182k
    ctxt->input->line = line;
9085
182k
    ctxt->input->col = col;
9086
182k
    return ret;
9087
30.7k
need_complex:
9088
30.7k
    if (alloc) *alloc = 1;
9089
30.7k
    return xmlParseAttValueComplex(ctxt, len, normalize);
9090
213k
}
9091
9092
/**
9093
 * xmlParseAttribute2:
9094
 * @ctxt:  an XML parser context
9095
 * @pref:  the element prefix
9096
 * @elem:  the element name
9097
 * @prefix:  a xmlChar ** used to store the value of the attribute prefix
9098
 * @value:  a xmlChar ** used to store the value of the attribute
9099
 * @len:  an int * to save the length of the attribute
9100
 * @alloc:  an int * to indicate if the attribute was allocated
9101
 *
9102
 * parse an attribute in the new SAX2 framework.
9103
 *
9104
 * Returns the attribute name, and the value in *value, .
9105
 */
9106
9107
static const xmlChar *
9108
xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9109
                   const xmlChar * pref, const xmlChar * elem,
9110
                   const xmlChar ** prefix, xmlChar ** value,
9111
                   int *len, int *alloc)
9112
200k
{
9113
200k
    const xmlChar *name;
9114
200k
    xmlChar *val, *internal_val = NULL;
9115
200k
    int normalize = 0;
9116
9117
200k
    *value = NULL;
9118
200k
    GROW;
9119
200k
    name = xmlParseQName(ctxt, prefix);
9120
200k
    if (name == NULL) {
9121
460
        xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9122
460
                       "error parsing attribute name\n");
9123
460
        return (NULL);
9124
460
    }
9125
9126
    /*
9127
     * get the type if needed
9128
     */
9129
200k
    if (ctxt->attsSpecial != NULL) {
9130
8.05k
        int type;
9131
9132
8.05k
        type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
9133
8.05k
                                                 pref, elem, *prefix, name);
9134
8.05k
        if (type != 0)
9135
3.94k
            normalize = 1;
9136
8.05k
    }
9137
9138
    /*
9139
     * read the value
9140
     */
9141
200k
    SKIP_BLANKS;
9142
200k
    if (RAW == '=') {
9143
199k
        NEXT;
9144
199k
        SKIP_BLANKS;
9145
199k
        val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9146
199k
        if (val == NULL)
9147
47
            return (NULL);
9148
199k
  if (normalize) {
9149
      /*
9150
       * Sometimes a second normalisation pass for spaces is needed
9151
       * but that only happens if charrefs or entities references
9152
       * have been used in the attribute value, i.e. the attribute
9153
       * value have been extracted in an allocated string already.
9154
       */
9155
3.93k
      if (*alloc) {
9156
2.09k
          const xmlChar *val2;
9157
9158
2.09k
          val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9159
2.09k
    if ((val2 != NULL) && (val2 != val)) {
9160
176
        xmlFree(val);
9161
176
        val = (xmlChar *) val2;
9162
176
    }
9163
2.09k
      }
9164
3.93k
  }
9165
199k
        ctxt->instate = XML_PARSER_CONTENT;
9166
199k
    } else {
9167
359
        xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9168
359
                          "Specification mandates value for attribute %s\n",
9169
359
                          name);
9170
359
        return (name);
9171
359
    }
9172
9173
199k
    if (*prefix == ctxt->str_xml) {
9174
        /*
9175
         * Check that xml:lang conforms to the specification
9176
         * No more registered as an error, just generate a warning now
9177
         * since this was deprecated in XML second edition
9178
         */
9179
522
        if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9180
0
            internal_val = xmlStrndup(val, *len);
9181
0
            if (!xmlCheckLanguageID(internal_val)) {
9182
0
                xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9183
0
                              "Malformed value for xml:lang : %s\n",
9184
0
                              internal_val, NULL);
9185
0
            }
9186
0
        }
9187
9188
        /*
9189
         * Check that xml:space conforms to the specification
9190
         */
9191
522
        if (xmlStrEqual(name, BAD_CAST "space")) {
9192
100
            internal_val = xmlStrndup(val, *len);
9193
100
            if (xmlStrEqual(internal_val, BAD_CAST "default"))
9194
5
                *(ctxt->space) = 0;
9195
95
            else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9196
22
                *(ctxt->space) = 1;
9197
73
            else {
9198
73
                xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9199
73
                              "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9200
73
                              internal_val, NULL);
9201
73
            }
9202
100
        }
9203
522
        if (internal_val) {
9204
100
            xmlFree(internal_val);
9205
100
        }
9206
522
    }
9207
9208
199k
    *value = val;
9209
199k
    return (name);
9210
200k
}
9211
/**
9212
 * xmlParseStartTag2:
9213
 * @ctxt:  an XML parser context
9214
 *
9215
 * Parse a start tag. Always consumes '<'.
9216
 *
9217
 * This routine is called when running SAX2 parsing
9218
 *
9219
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9220
 *
9221
 * [ WFC: Unique Att Spec ]
9222
 * No attribute name may appear more than once in the same start-tag or
9223
 * empty-element tag.
9224
 *
9225
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9226
 *
9227
 * [ WFC: Unique Att Spec ]
9228
 * No attribute name may appear more than once in the same start-tag or
9229
 * empty-element tag.
9230
 *
9231
 * With namespace:
9232
 *
9233
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9234
 *
9235
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9236
 *
9237
 * Returns the element name parsed
9238
 */
9239
9240
static const xmlChar *
9241
xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9242
701k
                  const xmlChar **URI, int *tlen) {
9243
701k
    const xmlChar *localname;
9244
701k
    const xmlChar *prefix;
9245
701k
    const xmlChar *attname;
9246
701k
    const xmlChar *aprefix;
9247
701k
    const xmlChar *nsname;
9248
701k
    xmlChar *attvalue;
9249
701k
    const xmlChar **atts = ctxt->atts;
9250
701k
    int maxatts = ctxt->maxatts;
9251
701k
    int nratts, nbatts, nbdef, inputid;
9252
701k
    int i, j, nbNs, attval;
9253
701k
    size_t cur;
9254
701k
    int nsNr = ctxt->nsNr;
9255
9256
701k
    if (RAW != '<') return(NULL);
9257
701k
    NEXT1;
9258
9259
701k
    cur = ctxt->input->cur - ctxt->input->base;
9260
701k
    inputid = ctxt->input->id;
9261
701k
    nbatts = 0;
9262
701k
    nratts = 0;
9263
701k
    nbdef = 0;
9264
701k
    nbNs = 0;
9265
701k
    attval = 0;
9266
    /* Forget any namespaces added during an earlier parse of this element. */
9267
701k
    ctxt->nsNr = nsNr;
9268
9269
701k
    localname = xmlParseQName(ctxt, &prefix);
9270
701k
    if (localname == NULL) {
9271
162
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9272
162
           "StartTag: invalid element name\n");
9273
162
        return(NULL);
9274
162
    }
9275
701k
    *tlen = ctxt->input->cur - ctxt->input->base - cur;
9276
9277
    /*
9278
     * Now parse the attributes, it ends up with the ending
9279
     *
9280
     * (S Attribute)* S?
9281
     */
9282
701k
    SKIP_BLANKS;
9283
701k
    GROW;
9284
9285
850k
    while (((RAW != '>') &&
9286
850k
     ((RAW != '/') || (NXT(1) != '>')) &&
9287
850k
     (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9288
200k
  int len = -1, alloc = 0;
9289
9290
200k
  attname = xmlParseAttribute2(ctxt, prefix, localname,
9291
200k
                               &aprefix, &attvalue, &len, &alloc);
9292
200k
        if (attname == NULL) {
9293
507
      xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9294
507
           "xmlParseStartTag: problem parsing attributes\n");
9295
507
      break;
9296
507
  }
9297
200k
        if (attvalue == NULL)
9298
359
            goto next_attr;
9299
199k
  if (len < 0) len = xmlStrlen(attvalue);
9300
9301
199k
        if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9302
23.2k
            const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9303
23.2k
            xmlURIPtr uri;
9304
9305
23.2k
            if (URL == NULL) {
9306
0
                xmlErrMemory(ctxt, "dictionary allocation failure");
9307
0
                if ((attvalue != NULL) && (alloc != 0))
9308
0
                    xmlFree(attvalue);
9309
0
                localname = NULL;
9310
0
                goto done;
9311
0
            }
9312
23.2k
            if (*URL != 0) {
9313
22.9k
                uri = xmlParseURI((const char *) URL);
9314
22.9k
                if (uri == NULL) {
9315
15.0k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9316
15.0k
                             "xmlns: '%s' is not a valid URI\n",
9317
15.0k
                                       URL, NULL, NULL);
9318
15.0k
                } else {
9319
7.88k
                    if (uri->scheme == NULL) {
9320
2.97k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9321
2.97k
                                  "xmlns: URI %s is not absolute\n",
9322
2.97k
                                  URL, NULL, NULL);
9323
2.97k
                    }
9324
7.88k
                    xmlFreeURI(uri);
9325
7.88k
                }
9326
22.9k
                if (URL == ctxt->str_xml_ns) {
9327
149
                    if (attname != ctxt->str_xml) {
9328
149
                        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9329
149
                     "xml namespace URI cannot be the default namespace\n",
9330
149
                                 NULL, NULL, NULL);
9331
149
                    }
9332
149
                    goto next_attr;
9333
149
                }
9334
22.7k
                if ((len == 29) &&
9335
22.7k
                    (xmlStrEqual(URL,
9336
1.45k
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9337
53
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9338
53
                         "reuse of the xmlns namespace name is forbidden\n",
9339
53
                             NULL, NULL, NULL);
9340
53
                    goto next_attr;
9341
53
                }
9342
22.7k
            }
9343
            /*
9344
             * check that it's not a defined namespace
9345
             */
9346
25.3k
            for (j = 1;j <= nbNs;j++)
9347
19.6k
                if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9348
17.3k
                    break;
9349
23.0k
            if (j <= nbNs)
9350
17.3k
                xmlErrAttributeDup(ctxt, NULL, attname);
9351
5.69k
            else
9352
5.69k
                if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9353
9354
176k
        } else if (aprefix == ctxt->str_xmlns) {
9355
22.2k
            const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9356
22.2k
            xmlURIPtr uri;
9357
9358
22.2k
            if (attname == ctxt->str_xml) {
9359
70
                if (URL != ctxt->str_xml_ns) {
9360
62
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9361
62
                             "xml namespace prefix mapped to wrong URI\n",
9362
62
                             NULL, NULL, NULL);
9363
62
                }
9364
                /*
9365
                 * Do not keep a namespace definition node
9366
                 */
9367
70
                goto next_attr;
9368
70
            }
9369
22.2k
            if (URL == ctxt->str_xml_ns) {
9370
337
                if (attname != ctxt->str_xml) {
9371
337
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9372
337
                             "xml namespace URI mapped to wrong prefix\n",
9373
337
                             NULL, NULL, NULL);
9374
337
                }
9375
337
                goto next_attr;
9376
337
            }
9377
21.8k
            if (attname == ctxt->str_xmlns) {
9378
294
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9379
294
                         "redefinition of the xmlns prefix is forbidden\n",
9380
294
                         NULL, NULL, NULL);
9381
294
                goto next_attr;
9382
294
            }
9383
21.5k
            if ((len == 29) &&
9384
21.5k
                (xmlStrEqual(URL,
9385
833
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9386
273
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9387
273
                         "reuse of the xmlns namespace name is forbidden\n",
9388
273
                         NULL, NULL, NULL);
9389
273
                goto next_attr;
9390
273
            }
9391
21.3k
            if ((URL == NULL) || (URL[0] == 0)) {
9392
904
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9393
904
                         "xmlns:%s: Empty XML namespace is not allowed\n",
9394
904
                              attname, NULL, NULL);
9395
904
                goto next_attr;
9396
20.4k
            } else {
9397
20.4k
                uri = xmlParseURI((const char *) URL);
9398
20.4k
                if (uri == NULL) {
9399
15.3k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9400
15.3k
                         "xmlns:%s: '%s' is not a valid URI\n",
9401
15.3k
                                       attname, URL, NULL);
9402
15.3k
                } else {
9403
5.10k
                    if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9404
0
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9405
0
                                  "xmlns:%s: URI %s is not absolute\n",
9406
0
                                  attname, URL, NULL);
9407
0
                    }
9408
5.10k
                    xmlFreeURI(uri);
9409
5.10k
                }
9410
20.4k
            }
9411
9412
            /*
9413
             * check that it's not a defined namespace
9414
             */
9415
52.8k
            for (j = 1;j <= nbNs;j++)
9416
50.2k
                if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9417
17.8k
                    break;
9418
20.4k
            if (j <= nbNs)
9419
17.8k
                xmlErrAttributeDup(ctxt, aprefix, attname);
9420
2.59k
            else
9421
2.59k
                if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9422
9423
154k
        } else {
9424
            /*
9425
             * Add the pair to atts
9426
             */
9427
154k
            if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9428
2.23k
                if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9429
0
                    goto next_attr;
9430
0
                }
9431
2.23k
                maxatts = ctxt->maxatts;
9432
2.23k
                atts = ctxt->atts;
9433
2.23k
            }
9434
154k
            ctxt->attallocs[nratts++] = alloc;
9435
154k
            atts[nbatts++] = attname;
9436
154k
            atts[nbatts++] = aprefix;
9437
            /*
9438
             * The namespace URI field is used temporarily to point at the
9439
             * base of the current input buffer for non-alloced attributes.
9440
             * When the input buffer is reallocated, all the pointers become
9441
             * invalid, but they can be reconstructed later.
9442
             */
9443
154k
            if (alloc)
9444
6.48k
                atts[nbatts++] = NULL;
9445
147k
            else
9446
147k
                atts[nbatts++] = ctxt->input->base;
9447
154k
            atts[nbatts++] = attvalue;
9448
154k
            attvalue += len;
9449
154k
            atts[nbatts++] = attvalue;
9450
            /*
9451
             * tag if some deallocation is needed
9452
             */
9453
154k
            if (alloc != 0) attval = 1;
9454
154k
            attvalue = NULL; /* moved into atts */
9455
154k
        }
9456
9457
200k
next_attr:
9458
200k
        if ((attvalue != NULL) && (alloc != 0)) {
9459
22.4k
            xmlFree(attvalue);
9460
22.4k
            attvalue = NULL;
9461
22.4k
        }
9462
9463
200k
  GROW
9464
200k
        if (ctxt->instate == XML_PARSER_EOF)
9465
0
            break;
9466
200k
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9467
47.6k
      break;
9468
152k
  if (SKIP_BLANKS == 0) {
9469
2.67k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9470
2.67k
         "attributes construct error\n");
9471
2.67k
      break;
9472
2.67k
  }
9473
149k
        GROW;
9474
149k
    }
9475
9476
701k
    if (ctxt->input->id != inputid) {
9477
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9478
0
                    "Unexpected change of input\n");
9479
0
        localname = NULL;
9480
0
        goto done;
9481
0
    }
9482
9483
    /* Reconstruct attribute value pointers. */
9484
855k
    for (i = 0, j = 0; j < nratts; i += 5, j++) {
9485
154k
        if (atts[i+2] != NULL) {
9486
            /*
9487
             * Arithmetic on dangling pointers is technically undefined
9488
             * behavior, but well...
9489
             */
9490
147k
            const xmlChar *old = atts[i+2];
9491
147k
            atts[i+2]  = NULL;    /* Reset repurposed namespace URI */
9492
147k
            atts[i+3] = ctxt->input->base + (atts[i+3] - old);  /* value */
9493
147k
            atts[i+4] = ctxt->input->base + (atts[i+4] - old);  /* valuend */
9494
147k
        }
9495
154k
    }
9496
9497
    /*
9498
     * The attributes defaulting
9499
     */
9500
701k
    if (ctxt->attsDefault != NULL) {
9501
10.1k
        xmlDefAttrsPtr defaults;
9502
9503
10.1k
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9504
10.1k
  if (defaults != NULL) {
9505
18.9k
      for (i = 0;i < defaults->nbAttrs;i++) {
9506
15.2k
          attname = defaults->values[5 * i];
9507
15.2k
    aprefix = defaults->values[5 * i + 1];
9508
9509
                /*
9510
     * special work for namespaces defaulted defs
9511
     */
9512
15.2k
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9513
        /*
9514
         * check that it's not a defined namespace
9515
         */
9516
3.00k
        for (j = 1;j <= nbNs;j++)
9517
1.90k
            if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9518
18
          break;
9519
1.11k
              if (j <= nbNs) continue;
9520
9521
1.09k
        nsname = xmlGetNamespace(ctxt, NULL);
9522
1.09k
        if (nsname != defaults->values[5 * i + 2]) {
9523
791
      if (nsPush(ctxt, NULL,
9524
791
                 defaults->values[5 * i + 2]) > 0)
9525
618
          nbNs++;
9526
791
        }
9527
14.1k
    } else if (aprefix == ctxt->str_xmlns) {
9528
        /*
9529
         * check that it's not a defined namespace
9530
         */
9531
9.76k
        for (j = 1;j <= nbNs;j++)
9532
5.37k
            if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9533
5
          break;
9534
4.39k
              if (j <= nbNs) continue;
9535
9536
4.39k
        nsname = xmlGetNamespace(ctxt, attname);
9537
4.39k
        if (nsname != defaults->values[5 * i + 2]) {
9538
2.65k
      if (nsPush(ctxt, attname,
9539
2.65k
                 defaults->values[5 * i + 2]) > 0)
9540
2.61k
          nbNs++;
9541
2.65k
        }
9542
9.70k
    } else {
9543
        /*
9544
         * check that it's not a defined attribute
9545
         */
9546
22.3k
        for (j = 0;j < nbatts;j+=5) {
9547
13.9k
      if ((attname == atts[j]) && (aprefix == atts[j+1]))
9548
1.32k
          break;
9549
13.9k
        }
9550
9.70k
        if (j < nbatts) continue;
9551
9552
8.38k
        if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9553
121
      if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9554
0
                            localname = NULL;
9555
0
                            goto done;
9556
0
      }
9557
121
      maxatts = ctxt->maxatts;
9558
121
      atts = ctxt->atts;
9559
121
        }
9560
8.38k
        atts[nbatts++] = attname;
9561
8.38k
        atts[nbatts++] = aprefix;
9562
8.38k
        if (aprefix == NULL)
9563
6.11k
      atts[nbatts++] = NULL;
9564
2.26k
        else
9565
2.26k
            atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9566
8.38k
        atts[nbatts++] = defaults->values[5 * i + 2];
9567
8.38k
        atts[nbatts++] = defaults->values[5 * i + 3];
9568
8.38k
        if ((ctxt->standalone == 1) &&
9569
8.38k
            (defaults->values[5 * i + 4] != NULL)) {
9570
0
      xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9571
0
    "standalone: attribute %s on %s defaulted from external subset\n",
9572
0
                                   attname, localname);
9573
0
        }
9574
8.38k
        nbdef++;
9575
8.38k
    }
9576
15.2k
      }
9577
3.71k
  }
9578
10.1k
    }
9579
9580
    /*
9581
     * The attributes checkings
9582
     */
9583
863k
    for (i = 0; i < nbatts;i += 5) {
9584
        /*
9585
  * The default namespace does not apply to attribute names.
9586
  */
9587
162k
  if (atts[i + 1] != NULL) {
9588
7.16k
      nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9589
7.16k
      if (nsname == NULL) {
9590
5.15k
    xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9591
5.15k
        "Namespace prefix %s for %s on %s is not defined\n",
9592
5.15k
        atts[i + 1], atts[i], localname);
9593
5.15k
      }
9594
7.16k
      atts[i + 2] = nsname;
9595
7.16k
  } else
9596
155k
      nsname = NULL;
9597
  /*
9598
   * [ WFC: Unique Att Spec ]
9599
   * No attribute name may appear more than once in the same
9600
   * start-tag or empty-element tag.
9601
   * As extended by the Namespace in XML REC.
9602
   */
9603
16.8M
        for (j = 0; j < i;j += 5) {
9604
16.6M
      if (atts[i] == atts[j]) {
9605
54.0k
          if (atts[i+1] == atts[j+1]) {
9606
48.6k
        xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9607
48.6k
        break;
9608
48.6k
    }
9609
5.33k
    if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9610
0
        xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9611
0
           "Namespaced Attribute %s in '%s' redefined\n",
9612
0
           atts[i], nsname, NULL);
9613
0
        break;
9614
0
    }
9615
5.33k
      }
9616
16.6M
  }
9617
162k
    }
9618
9619
701k
    nsname = xmlGetNamespace(ctxt, prefix);
9620
701k
    if ((prefix != NULL) && (nsname == NULL)) {
9621
1.93k
  xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9622
1.93k
           "Namespace prefix %s on %s is not defined\n",
9623
1.93k
     prefix, localname, NULL);
9624
1.93k
    }
9625
701k
    *pref = prefix;
9626
701k
    *URI = nsname;
9627
9628
    /*
9629
     * SAX: Start of Element !
9630
     */
9631
701k
    if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9632
701k
  (!ctxt->disableSAX)) {
9633
697k
  if (nbNs > 0)
9634
3.25k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9635
3.25k
        nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9636
3.25k
        nbatts / 5, nbdef, atts);
9637
694k
  else
9638
694k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9639
694k
                    nsname, 0, NULL, nbatts / 5, nbdef, atts);
9640
697k
    }
9641
9642
701k
done:
9643
    /*
9644
     * Free up attribute allocated strings if needed
9645
     */
9646
701k
    if (attval != 0) {
9647
54.4k
  for (i = 3,j = 0; j < nratts;i += 5,j++)
9648
51.7k
      if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9649
6.48k
          xmlFree((xmlChar *) atts[i]);
9650
2.64k
    }
9651
9652
701k
    return(localname);
9653
701k
}
9654
9655
/**
9656
 * xmlParseEndTag2:
9657
 * @ctxt:  an XML parser context
9658
 * @line:  line of the start tag
9659
 * @nsNr:  number of namespaces on the start tag
9660
 *
9661
 * Parse an end tag. Always consumes '</'.
9662
 *
9663
 * [42] ETag ::= '</' Name S? '>'
9664
 *
9665
 * With namespace
9666
 *
9667
 * [NS 9] ETag ::= '</' QName S? '>'
9668
 */
9669
9670
static void
9671
28.1k
xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9672
28.1k
    const xmlChar *name;
9673
9674
28.1k
    GROW;
9675
28.1k
    if ((RAW != '<') || (NXT(1) != '/')) {
9676
0
  xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9677
0
  return;
9678
0
    }
9679
28.1k
    SKIP(2);
9680
9681
28.1k
    if (tag->prefix == NULL)
9682
27.6k
        name = xmlParseNameAndCompare(ctxt, ctxt->name);
9683
570
    else
9684
570
        name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9685
9686
    /*
9687
     * We should definitely be at the ending "S? '>'" part
9688
     */
9689
28.1k
    GROW;
9690
28.1k
    if (ctxt->instate == XML_PARSER_EOF)
9691
0
        return;
9692
28.1k
    SKIP_BLANKS;
9693
28.1k
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9694
126
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9695
126
    } else
9696
28.0k
  NEXT1;
9697
9698
    /*
9699
     * [ WFC: Element Type Match ]
9700
     * The Name in an element's end-tag must match the element type in the
9701
     * start-tag.
9702
     *
9703
     */
9704
28.1k
    if (name != (xmlChar*)1) {
9705
164
        if (name == NULL) name = BAD_CAST "unparsable";
9706
164
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9707
164
         "Opening and ending tag mismatch: %s line %d and %s\n",
9708
164
                    ctxt->name, tag->line, name);
9709
164
    }
9710
9711
    /*
9712
     * SAX: End of Tag
9713
     */
9714
28.1k
    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9715
28.1k
  (!ctxt->disableSAX))
9716
27.9k
  ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9717
27.9k
                                tag->URI);
9718
9719
28.1k
    spacePop(ctxt);
9720
28.1k
    if (tag->nsNr != 0)
9721
284
  nsPop(ctxt, tag->nsNr);
9722
28.1k
}
9723
9724
/**
9725
 * xmlParseCDSect:
9726
 * @ctxt:  an XML parser context
9727
 *
9728
 * DEPRECATED: Internal function, don't use.
9729
 *
9730
 * Parse escaped pure raw content. Always consumes '<!['.
9731
 *
9732
 * [18] CDSect ::= CDStart CData CDEnd
9733
 *
9734
 * [19] CDStart ::= '<![CDATA['
9735
 *
9736
 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9737
 *
9738
 * [21] CDEnd ::= ']]>'
9739
 */
9740
void
9741
0
xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9742
0
    xmlChar *buf = NULL;
9743
0
    int len = 0;
9744
0
    int size = XML_PARSER_BUFFER_SIZE;
9745
0
    int r, rl;
9746
0
    int s, sl;
9747
0
    int cur, l;
9748
0
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9749
0
                    XML_MAX_HUGE_LENGTH :
9750
0
                    XML_MAX_TEXT_LENGTH;
9751
9752
0
    if ((CUR != '<') || (NXT(1) != '!') || (NXT(2) != '['))
9753
0
        return;
9754
0
    SKIP(3);
9755
9756
0
    if (!CMP6(CUR_PTR, 'C', 'D', 'A', 'T', 'A', '['))
9757
0
        return;
9758
0
    SKIP(6);
9759
9760
0
    ctxt->instate = XML_PARSER_CDATA_SECTION;
9761
0
    r = CUR_CHAR(rl);
9762
0
    if (!IS_CHAR(r)) {
9763
0
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9764
0
        goto out;
9765
0
    }
9766
0
    NEXTL(rl);
9767
0
    s = CUR_CHAR(sl);
9768
0
    if (!IS_CHAR(s)) {
9769
0
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9770
0
        goto out;
9771
0
    }
9772
0
    NEXTL(sl);
9773
0
    cur = CUR_CHAR(l);
9774
0
    buf = (xmlChar *) xmlMallocAtomic(size);
9775
0
    if (buf == NULL) {
9776
0
  xmlErrMemory(ctxt, NULL);
9777
0
        goto out;
9778
0
    }
9779
0
    while (IS_CHAR(cur) &&
9780
0
           ((r != ']') || (s != ']') || (cur != '>'))) {
9781
0
  if (len + 5 >= size) {
9782
0
      xmlChar *tmp;
9783
9784
0
      tmp = (xmlChar *) xmlRealloc(buf, size * 2);
9785
0
      if (tmp == NULL) {
9786
0
    xmlErrMemory(ctxt, NULL);
9787
0
                goto out;
9788
0
      }
9789
0
      buf = tmp;
9790
0
      size *= 2;
9791
0
  }
9792
0
  COPY_BUF(rl,buf,len,r);
9793
0
        if (len > maxLength) {
9794
0
            xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9795
0
                           "CData section too big found\n");
9796
0
            goto out;
9797
0
        }
9798
0
  r = s;
9799
0
  rl = sl;
9800
0
  s = cur;
9801
0
  sl = l;
9802
0
  NEXTL(l);
9803
0
  cur = CUR_CHAR(l);
9804
0
    }
9805
0
    buf[len] = 0;
9806
0
    if (ctxt->instate == XML_PARSER_EOF) {
9807
0
        xmlFree(buf);
9808
0
        return;
9809
0
    }
9810
0
    if (cur != '>') {
9811
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9812
0
                       "CData section not finished\n%.50s\n", buf);
9813
0
        goto out;
9814
0
    }
9815
0
    NEXTL(l);
9816
9817
    /*
9818
     * OK the buffer is to be consumed as cdata.
9819
     */
9820
0
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9821
0
  if (ctxt->sax->cdataBlock != NULL)
9822
0
      ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9823
0
  else if (ctxt->sax->characters != NULL)
9824
0
      ctxt->sax->characters(ctxt->userData, buf, len);
9825
0
    }
9826
9827
0
out:
9828
0
    if (ctxt->instate != XML_PARSER_EOF)
9829
0
        ctxt->instate = XML_PARSER_CONTENT;
9830
0
    xmlFree(buf);
9831
0
}
9832
9833
/**
9834
 * xmlParseContentInternal:
9835
 * @ctxt:  an XML parser context
9836
 *
9837
 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9838
 * unexpected EOF to the caller.
9839
 */
9840
9841
static void
9842
0
xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9843
0
    int nameNr = ctxt->nameNr;
9844
9845
0
    GROW;
9846
0
    while ((RAW != 0) &&
9847
0
     (ctxt->instate != XML_PARSER_EOF)) {
9848
0
  const xmlChar *cur = ctxt->input->cur;
9849
9850
  /*
9851
   * First case : a Processing Instruction.
9852
   */
9853
0
  if ((*cur == '<') && (cur[1] == '?')) {
9854
0
      xmlParsePI(ctxt);
9855
0
  }
9856
9857
  /*
9858
   * Second case : a CDSection
9859
   */
9860
  /* 2.6.0 test was *cur not RAW */
9861
0
  else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9862
0
      xmlParseCDSect(ctxt);
9863
0
  }
9864
9865
  /*
9866
   * Third case :  a comment
9867
   */
9868
0
  else if ((*cur == '<') && (NXT(1) == '!') &&
9869
0
     (NXT(2) == '-') && (NXT(3) == '-')) {
9870
0
      xmlParseComment(ctxt);
9871
0
      ctxt->instate = XML_PARSER_CONTENT;
9872
0
  }
9873
9874
  /*
9875
   * Fourth case :  a sub-element.
9876
   */
9877
0
  else if (*cur == '<') {
9878
0
            if (NXT(1) == '/') {
9879
0
                if (ctxt->nameNr <= nameNr)
9880
0
                    break;
9881
0
          xmlParseElementEnd(ctxt);
9882
0
            } else {
9883
0
          xmlParseElementStart(ctxt);
9884
0
            }
9885
0
  }
9886
9887
  /*
9888
   * Fifth case : a reference. If if has not been resolved,
9889
   *    parsing returns it's Name, create the node
9890
   */
9891
9892
0
  else if (*cur == '&') {
9893
0
      xmlParseReference(ctxt);
9894
0
  }
9895
9896
  /*
9897
   * Last case, text. Note that References are handled directly.
9898
   */
9899
0
  else {
9900
0
      xmlParseCharDataInternal(ctxt, 0);
9901
0
  }
9902
9903
0
  SHRINK;
9904
0
  GROW;
9905
0
    }
9906
0
}
9907
9908
/**
9909
 * xmlParseContent:
9910
 * @ctxt:  an XML parser context
9911
 *
9912
 * Parse a content sequence. Stops at EOF or '</'.
9913
 *
9914
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9915
 */
9916
9917
void
9918
0
xmlParseContent(xmlParserCtxtPtr ctxt) {
9919
0
    int nameNr = ctxt->nameNr;
9920
9921
0
    xmlParseContentInternal(ctxt);
9922
9923
0
    if ((ctxt->instate != XML_PARSER_EOF) && (ctxt->nameNr > nameNr)) {
9924
0
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9925
0
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9926
0
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9927
0
                "Premature end of data in tag %s line %d\n",
9928
0
    name, line, NULL);
9929
0
    }
9930
0
}
9931
9932
/**
9933
 * xmlParseElement:
9934
 * @ctxt:  an XML parser context
9935
 *
9936
 * DEPRECATED: Internal function, don't use.
9937
 *
9938
 * parse an XML element
9939
 *
9940
 * [39] element ::= EmptyElemTag | STag content ETag
9941
 *
9942
 * [ WFC: Element Type Match ]
9943
 * The Name in an element's end-tag must match the element type in the
9944
 * start-tag.
9945
 *
9946
 */
9947
9948
void
9949
0
xmlParseElement(xmlParserCtxtPtr ctxt) {
9950
0
    if (xmlParseElementStart(ctxt) != 0)
9951
0
        return;
9952
9953
0
    xmlParseContentInternal(ctxt);
9954
0
    if (ctxt->instate == XML_PARSER_EOF)
9955
0
  return;
9956
9957
0
    if (CUR == 0) {
9958
0
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9959
0
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9960
0
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9961
0
                "Premature end of data in tag %s line %d\n",
9962
0
    name, line, NULL);
9963
0
        return;
9964
0
    }
9965
9966
0
    xmlParseElementEnd(ctxt);
9967
0
}
9968
9969
/**
9970
 * xmlParseElementStart:
9971
 * @ctxt:  an XML parser context
9972
 *
9973
 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
9974
 * opening tag was parsed, 1 if an empty element was parsed.
9975
 *
9976
 * Always consumes '<'.
9977
 */
9978
static int
9979
0
xmlParseElementStart(xmlParserCtxtPtr ctxt) {
9980
0
    const xmlChar *name;
9981
0
    const xmlChar *prefix = NULL;
9982
0
    const xmlChar *URI = NULL;
9983
0
    xmlParserNodeInfo node_info;
9984
0
    int line, tlen = 0;
9985
0
    xmlNodePtr cur;
9986
0
    int nsNr = ctxt->nsNr;
9987
9988
0
    if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9989
0
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9990
0
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9991
0
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9992
0
        xmlParserMaxDepth);
9993
0
  xmlHaltParser(ctxt);
9994
0
  return(-1);
9995
0
    }
9996
9997
    /* Capture start position */
9998
0
    if (ctxt->record_info) {
9999
0
        node_info.begin_pos = ctxt->input->consumed +
10000
0
                          (CUR_PTR - ctxt->input->base);
10001
0
  node_info.begin_line = ctxt->input->line;
10002
0
    }
10003
10004
0
    if (ctxt->spaceNr == 0)
10005
0
  spacePush(ctxt, -1);
10006
0
    else if (*ctxt->space == -2)
10007
0
  spacePush(ctxt, -1);
10008
0
    else
10009
0
  spacePush(ctxt, *ctxt->space);
10010
10011
0
    line = ctxt->input->line;
10012
0
#ifdef LIBXML_SAX1_ENABLED
10013
0
    if (ctxt->sax2)
10014
0
#endif /* LIBXML_SAX1_ENABLED */
10015
0
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
10016
0
#ifdef LIBXML_SAX1_ENABLED
10017
0
    else
10018
0
  name = xmlParseStartTag(ctxt);
10019
0
#endif /* LIBXML_SAX1_ENABLED */
10020
0
    if (ctxt->instate == XML_PARSER_EOF)
10021
0
  return(-1);
10022
0
    if (name == NULL) {
10023
0
  spacePop(ctxt);
10024
0
        return(-1);
10025
0
    }
10026
0
    nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
10027
0
    cur = ctxt->node;
10028
10029
0
#ifdef LIBXML_VALID_ENABLED
10030
    /*
10031
     * [ VC: Root Element Type ]
10032
     * The Name in the document type declaration must match the element
10033
     * type of the root element.
10034
     */
10035
0
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10036
0
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
10037
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10038
0
#endif /* LIBXML_VALID_ENABLED */
10039
10040
    /*
10041
     * Check for an Empty Element.
10042
     */
10043
0
    if ((RAW == '/') && (NXT(1) == '>')) {
10044
0
        SKIP(2);
10045
0
  if (ctxt->sax2) {
10046
0
      if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10047
0
    (!ctxt->disableSAX))
10048
0
    ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
10049
0
#ifdef LIBXML_SAX1_ENABLED
10050
0
  } else {
10051
0
      if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10052
0
    (!ctxt->disableSAX))
10053
0
    ctxt->sax->endElement(ctxt->userData, name);
10054
0
#endif /* LIBXML_SAX1_ENABLED */
10055
0
  }
10056
0
  namePop(ctxt);
10057
0
  spacePop(ctxt);
10058
0
  if (nsNr != ctxt->nsNr)
10059
0
      nsPop(ctxt, ctxt->nsNr - nsNr);
10060
0
  if (cur != NULL && ctxt->record_info) {
10061
0
            node_info.node = cur;
10062
0
            node_info.end_pos = ctxt->input->consumed +
10063
0
                                (CUR_PTR - ctxt->input->base);
10064
0
            node_info.end_line = ctxt->input->line;
10065
0
            xmlParserAddNodeInfo(ctxt, &node_info);
10066
0
  }
10067
0
  return(1);
10068
0
    }
10069
0
    if (RAW == '>') {
10070
0
        NEXT1;
10071
0
        if (cur != NULL && ctxt->record_info) {
10072
0
            node_info.node = cur;
10073
0
            node_info.end_pos = 0;
10074
0
            node_info.end_line = 0;
10075
0
            xmlParserAddNodeInfo(ctxt, &node_info);
10076
0
        }
10077
0
    } else {
10078
0
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10079
0
         "Couldn't find end of Start Tag %s line %d\n",
10080
0
                    name, line, NULL);
10081
10082
  /*
10083
   * end of parsing of this node.
10084
   */
10085
0
  nodePop(ctxt);
10086
0
  namePop(ctxt);
10087
0
  spacePop(ctxt);
10088
0
  if (nsNr != ctxt->nsNr)
10089
0
      nsPop(ctxt, ctxt->nsNr - nsNr);
10090
0
  return(-1);
10091
0
    }
10092
10093
0
    return(0);
10094
0
}
10095
10096
/**
10097
 * xmlParseElementEnd:
10098
 * @ctxt:  an XML parser context
10099
 *
10100
 * Parse the end of an XML element. Always consumes '</'.
10101
 */
10102
static void
10103
0
xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
10104
0
    xmlNodePtr cur = ctxt->node;
10105
10106
0
    if (ctxt->nameNr <= 0) {
10107
0
        if ((RAW == '<') && (NXT(1) == '/'))
10108
0
            SKIP(2);
10109
0
        return;
10110
0
    }
10111
10112
    /*
10113
     * parse the end of tag: '</' should be here.
10114
     */
10115
0
    if (ctxt->sax2) {
10116
0
  xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
10117
0
  namePop(ctxt);
10118
0
    }
10119
0
#ifdef LIBXML_SAX1_ENABLED
10120
0
    else
10121
0
  xmlParseEndTag1(ctxt, 0);
10122
0
#endif /* LIBXML_SAX1_ENABLED */
10123
10124
    /*
10125
     * Capture end position
10126
     */
10127
0
    if (cur != NULL && ctxt->record_info) {
10128
0
        xmlParserNodeInfoPtr node_info;
10129
10130
0
        node_info = (xmlParserNodeInfoPtr) xmlParserFindNodeInfo(ctxt, cur);
10131
0
        if (node_info != NULL) {
10132
0
            node_info->end_pos = ctxt->input->consumed +
10133
0
                                 (CUR_PTR - ctxt->input->base);
10134
0
            node_info->end_line = ctxt->input->line;
10135
0
        }
10136
0
    }
10137
0
}
10138
10139
/**
10140
 * xmlParseVersionNum:
10141
 * @ctxt:  an XML parser context
10142
 *
10143
 * DEPRECATED: Internal function, don't use.
10144
 *
10145
 * parse the XML version value.
10146
 *
10147
 * [26] VersionNum ::= '1.' [0-9]+
10148
 *
10149
 * In practice allow [0-9].[0-9]+ at that level
10150
 *
10151
 * Returns the string giving the XML version number, or NULL
10152
 */
10153
xmlChar *
10154
1.74k
xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10155
1.74k
    xmlChar *buf = NULL;
10156
1.74k
    int len = 0;
10157
1.74k
    int size = 10;
10158
1.74k
    xmlChar cur;
10159
10160
1.74k
    buf = (xmlChar *) xmlMallocAtomic(size);
10161
1.74k
    if (buf == NULL) {
10162
0
  xmlErrMemory(ctxt, NULL);
10163
0
  return(NULL);
10164
0
    }
10165
1.74k
    cur = CUR;
10166
1.74k
    if (!((cur >= '0') && (cur <= '9'))) {
10167
33
  xmlFree(buf);
10168
33
  return(NULL);
10169
33
    }
10170
1.70k
    buf[len++] = cur;
10171
1.70k
    NEXT;
10172
1.70k
    cur=CUR;
10173
1.70k
    if (cur != '.') {
10174
8
  xmlFree(buf);
10175
8
  return(NULL);
10176
8
    }
10177
1.69k
    buf[len++] = cur;
10178
1.69k
    NEXT;
10179
1.69k
    cur=CUR;
10180
184k
    while ((cur >= '0') && (cur <= '9')) {
10181
183k
  if (len + 1 >= size) {
10182
146
      xmlChar *tmp;
10183
10184
146
      size *= 2;
10185
146
      tmp = (xmlChar *) xmlRealloc(buf, size);
10186
146
      if (tmp == NULL) {
10187
0
          xmlFree(buf);
10188
0
    xmlErrMemory(ctxt, NULL);
10189
0
    return(NULL);
10190
0
      }
10191
146
      buf = tmp;
10192
146
  }
10193
183k
  buf[len++] = cur;
10194
183k
  NEXT;
10195
183k
  cur=CUR;
10196
183k
    }
10197
1.69k
    buf[len] = 0;
10198
1.69k
    return(buf);
10199
1.69k
}
10200
10201
/**
10202
 * xmlParseVersionInfo:
10203
 * @ctxt:  an XML parser context
10204
 *
10205
 * DEPRECATED: Internal function, don't use.
10206
 *
10207
 * parse the XML version.
10208
 *
10209
 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10210
 *
10211
 * [25] Eq ::= S? '=' S?
10212
 *
10213
 * Returns the version string, e.g. "1.0"
10214
 */
10215
10216
xmlChar *
10217
2.23k
xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10218
2.23k
    xmlChar *version = NULL;
10219
10220
2.23k
    if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10221
1.78k
  SKIP(7);
10222
1.78k
  SKIP_BLANKS;
10223
1.78k
  if (RAW != '=') {
10224
25
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10225
25
      return(NULL);
10226
25
        }
10227
1.75k
  NEXT;
10228
1.75k
  SKIP_BLANKS;
10229
1.75k
  if (RAW == '"') {
10230
1.64k
      NEXT;
10231
1.64k
      version = xmlParseVersionNum(ctxt);
10232
1.64k
      if (RAW != '"') {
10233
49
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10234
49
      } else
10235
1.59k
          NEXT;
10236
1.64k
  } else if (RAW == '\''){
10237
100
      NEXT;
10238
100
      version = xmlParseVersionNum(ctxt);
10239
100
      if (RAW != '\'') {
10240
15
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10241
15
      } else
10242
85
          NEXT;
10243
100
  } else {
10244
16
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10245
16
  }
10246
1.75k
    }
10247
2.20k
    return(version);
10248
2.23k
}
10249
10250
/**
10251
 * xmlParseEncName:
10252
 * @ctxt:  an XML parser context
10253
 *
10254
 * DEPRECATED: Internal function, don't use.
10255
 *
10256
 * parse the XML encoding name
10257
 *
10258
 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10259
 *
10260
 * Returns the encoding name value or NULL
10261
 */
10262
xmlChar *
10263
1.96k
xmlParseEncName(xmlParserCtxtPtr ctxt) {
10264
1.96k
    xmlChar *buf = NULL;
10265
1.96k
    int len = 0;
10266
1.96k
    int size = 10;
10267
1.96k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
10268
1.96k
                    XML_MAX_TEXT_LENGTH :
10269
1.96k
                    XML_MAX_NAME_LENGTH;
10270
1.96k
    xmlChar cur;
10271
10272
1.96k
    cur = CUR;
10273
1.96k
    if (((cur >= 'a') && (cur <= 'z')) ||
10274
1.96k
        ((cur >= 'A') && (cur <= 'Z'))) {
10275
1.95k
  buf = (xmlChar *) xmlMallocAtomic(size);
10276
1.95k
  if (buf == NULL) {
10277
0
      xmlErrMemory(ctxt, NULL);
10278
0
      return(NULL);
10279
0
  }
10280
10281
1.95k
  buf[len++] = cur;
10282
1.95k
  NEXT;
10283
1.95k
  cur = CUR;
10284
312k
  while (((cur >= 'a') && (cur <= 'z')) ||
10285
312k
         ((cur >= 'A') && (cur <= 'Z')) ||
10286
312k
         ((cur >= '0') && (cur <= '9')) ||
10287
312k
         (cur == '.') || (cur == '_') ||
10288
312k
         (cur == '-')) {
10289
310k
      if (len + 1 >= size) {
10290
387
          xmlChar *tmp;
10291
10292
387
    size *= 2;
10293
387
    tmp = (xmlChar *) xmlRealloc(buf, size);
10294
387
    if (tmp == NULL) {
10295
0
        xmlErrMemory(ctxt, NULL);
10296
0
        xmlFree(buf);
10297
0
        return(NULL);
10298
0
    }
10299
387
    buf = tmp;
10300
387
      }
10301
310k
      buf[len++] = cur;
10302
310k
            if (len > maxLength) {
10303
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "EncName");
10304
0
                xmlFree(buf);
10305
0
                return(NULL);
10306
0
            }
10307
310k
      NEXT;
10308
310k
      cur = CUR;
10309
310k
        }
10310
1.95k
  buf[len] = 0;
10311
1.95k
    } else {
10312
4
  xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10313
4
    }
10314
1.96k
    return(buf);
10315
1.96k
}
10316
10317
/**
10318
 * xmlParseEncodingDecl:
10319
 * @ctxt:  an XML parser context
10320
 *
10321
 * DEPRECATED: Internal function, don't use.
10322
 *
10323
 * parse the XML encoding declaration
10324
 *
10325
 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'")
10326
 *
10327
 * this setups the conversion filters.
10328
 *
10329
 * Returns the encoding value or NULL
10330
 */
10331
10332
const xmlChar *
10333
2.21k
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10334
2.21k
    xmlChar *encoding = NULL;
10335
10336
2.21k
    SKIP_BLANKS;
10337
2.21k
    if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10338
1.98k
  SKIP(8);
10339
1.98k
  SKIP_BLANKS;
10340
1.98k
  if (RAW != '=') {
10341
13
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10342
13
      return(NULL);
10343
13
        }
10344
1.96k
  NEXT;
10345
1.96k
  SKIP_BLANKS;
10346
1.96k
  if (RAW == '"') {
10347
1.85k
      NEXT;
10348
1.85k
      encoding = xmlParseEncName(ctxt);
10349
1.85k
      if (RAW != '"') {
10350
53
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10351
53
    xmlFree((xmlChar *) encoding);
10352
53
    return(NULL);
10353
53
      } else
10354
1.80k
          NEXT;
10355
1.85k
  } else if (RAW == '\''){
10356
104
      NEXT;
10357
104
      encoding = xmlParseEncName(ctxt);
10358
104
      if (RAW != '\'') {
10359
3
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10360
3
    xmlFree((xmlChar *) encoding);
10361
3
    return(NULL);
10362
3
      } else
10363
101
          NEXT;
10364
104
  } else {
10365
8
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10366
8
  }
10367
10368
        /*
10369
         * Non standard parsing, allowing the user to ignore encoding
10370
         */
10371
1.91k
        if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10372
0
      xmlFree((xmlChar *) encoding);
10373
0
            return(NULL);
10374
0
  }
10375
10376
  /*
10377
   * UTF-16 encoding switch has already taken place at this stage,
10378
   * more over the little-endian/big-endian selection is already done
10379
   */
10380
1.91k
        if ((encoding != NULL) &&
10381
1.91k
      ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10382
1.90k
       (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10383
      /*
10384
       * If no encoding was passed to the parser, that we are
10385
       * using UTF-16 and no decoder is present i.e. the
10386
       * document is apparently UTF-8 compatible, then raise an
10387
       * encoding mismatch fatal error
10388
       */
10389
2
      if ((ctxt->encoding == NULL) &&
10390
2
          (ctxt->input->buf != NULL) &&
10391
2
          (ctxt->input->buf->encoder == NULL)) {
10392
0
    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10393
0
      "Document labelled UTF-16 but has UTF-8 content\n");
10394
0
      }
10395
2
      if (ctxt->encoding != NULL)
10396
0
    xmlFree((xmlChar *) ctxt->encoding);
10397
2
      ctxt->encoding = encoding;
10398
2
  }
10399
  /*
10400
   * UTF-8 encoding is handled natively
10401
   */
10402
1.91k
        else if ((encoding != NULL) &&
10403
1.91k
      ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10404
1.90k
       (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10405
            /* TODO: Check for encoding mismatch. */
10406
782
      if (ctxt->encoding != NULL)
10407
0
    xmlFree((xmlChar *) ctxt->encoding);
10408
782
      ctxt->encoding = encoding;
10409
782
  }
10410
1.12k
  else if (encoding != NULL) {
10411
1.12k
      xmlCharEncodingHandlerPtr handler;
10412
10413
1.12k
      if (ctxt->input->encoding != NULL)
10414
0
    xmlFree((xmlChar *) ctxt->input->encoding);
10415
1.12k
      ctxt->input->encoding = encoding;
10416
10417
1.12k
            handler = xmlFindCharEncodingHandler((const char *) encoding);
10418
1.12k
      if (handler != NULL) {
10419
845
    if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10420
        /* failed to convert */
10421
2
        ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10422
2
        return(NULL);
10423
2
    }
10424
845
      } else {
10425
275
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10426
275
      "Unsupported encoding %s\n", encoding);
10427
275
    return(NULL);
10428
275
      }
10429
1.12k
  }
10430
1.91k
    }
10431
1.86k
    return(encoding);
10432
2.21k
}
10433
10434
/**
10435
 * xmlParseSDDecl:
10436
 * @ctxt:  an XML parser context
10437
 *
10438
 * DEPRECATED: Internal function, don't use.
10439
 *
10440
 * parse the XML standalone declaration
10441
 *
10442
 * [32] SDDecl ::= S 'standalone' Eq
10443
 *                 (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10444
 *
10445
 * [ VC: Standalone Document Declaration ]
10446
 * TODO The standalone document declaration must have the value "no"
10447
 * if any external markup declarations contain declarations of:
10448
 *  - attributes with default values, if elements to which these
10449
 *    attributes apply appear in the document without specifications
10450
 *    of values for these attributes, or
10451
 *  - entities (other than amp, lt, gt, apos, quot), if references
10452
 *    to those entities appear in the document, or
10453
 *  - attributes with values subject to normalization, where the
10454
 *    attribute appears in the document with a value which will change
10455
 *    as a result of normalization, or
10456
 *  - element types with element content, if white space occurs directly
10457
 *    within any instance of those types.
10458
 *
10459
 * Returns:
10460
 *   1 if standalone="yes"
10461
 *   0 if standalone="no"
10462
 *  -2 if standalone attribute is missing or invalid
10463
 *    (A standalone value of -2 means that the XML declaration was found,
10464
 *     but no value was specified for the standalone attribute).
10465
 */
10466
10467
int
10468
1.19k
xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10469
1.19k
    int standalone = -2;
10470
10471
1.19k
    SKIP_BLANKS;
10472
1.19k
    if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10473
58
  SKIP(10);
10474
58
        SKIP_BLANKS;
10475
58
  if (RAW != '=') {
10476
2
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10477
2
      return(standalone);
10478
2
        }
10479
56
  NEXT;
10480
56
  SKIP_BLANKS;
10481
56
        if (RAW == '\''){
10482
8
      NEXT;
10483
8
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10484
0
          standalone = 0;
10485
0
                SKIP(2);
10486
8
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10487
8
                 (NXT(2) == 's')) {
10488
2
          standalone = 1;
10489
2
    SKIP(3);
10490
6
            } else {
10491
6
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10492
6
      }
10493
8
      if (RAW != '\'') {
10494
8
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10495
8
      } else
10496
0
          NEXT;
10497
48
  } else if (RAW == '"'){
10498
39
      NEXT;
10499
39
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10500
6
          standalone = 0;
10501
6
    SKIP(2);
10502
33
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10503
33
                 (NXT(2) == 's')) {
10504
25
          standalone = 1;
10505
25
                SKIP(3);
10506
25
            } else {
10507
8
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10508
8
      }
10509
39
      if (RAW != '"') {
10510
10
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10511
10
      } else
10512
29
          NEXT;
10513
39
  } else {
10514
9
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10515
9
        }
10516
56
    }
10517
1.19k
    return(standalone);
10518
1.19k
}
10519
10520
/**
10521
 * xmlParseXMLDecl:
10522
 * @ctxt:  an XML parser context
10523
 *
10524
 * DEPRECATED: Internal function, don't use.
10525
 *
10526
 * parse an XML declaration header
10527
 *
10528
 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10529
 */
10530
10531
void
10532
2.23k
xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10533
2.23k
    xmlChar *version;
10534
10535
    /*
10536
     * This value for standalone indicates that the document has an
10537
     * XML declaration but it does not have a standalone attribute.
10538
     * It will be overwritten later if a standalone attribute is found.
10539
     */
10540
2.23k
    ctxt->input->standalone = -2;
10541
10542
    /*
10543
     * We know that '<?xml' is here.
10544
     */
10545
2.23k
    SKIP(5);
10546
10547
2.23k
    if (!IS_BLANK_CH(RAW)) {
10548
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10549
0
                 "Blank needed after '<?xml'\n");
10550
0
    }
10551
2.23k
    SKIP_BLANKS;
10552
10553
    /*
10554
     * We must have the VersionInfo here.
10555
     */
10556
2.23k
    version = xmlParseVersionInfo(ctxt);
10557
2.23k
    if (version == NULL) {
10558
532
  xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10559
1.69k
    } else {
10560
1.69k
  if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10561
      /*
10562
       * Changed here for XML-1.0 5th edition
10563
       */
10564
145
      if (ctxt->options & XML_PARSE_OLD10) {
10565
0
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10566
0
                "Unsupported version '%s'\n",
10567
0
                version);
10568
145
      } else {
10569
145
          if ((version[0] == '1') && ((version[1] == '.'))) {
10570
114
        xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10571
114
                      "Unsupported version '%s'\n",
10572
114
          version, NULL);
10573
114
    } else {
10574
31
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10575
31
              "Unsupported version '%s'\n",
10576
31
              version);
10577
31
    }
10578
145
      }
10579
145
  }
10580
1.69k
  if (ctxt->version != NULL)
10581
0
      xmlFree((void *) ctxt->version);
10582
1.69k
  ctxt->version = version;
10583
1.69k
    }
10584
10585
    /*
10586
     * We may have the encoding declaration
10587
     */
10588
2.23k
    if (!IS_BLANK_CH(RAW)) {
10589
553
        if ((RAW == '?') && (NXT(1) == '>')) {
10590
19
      SKIP(2);
10591
19
      return;
10592
19
  }
10593
534
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10594
534
    }
10595
2.21k
    xmlParseEncodingDecl(ctxt);
10596
2.21k
    if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10597
2.21k
         (ctxt->instate == XML_PARSER_EOF)) {
10598
  /*
10599
   * The XML REC instructs us to stop parsing right here
10600
   */
10601
277
        return;
10602
277
    }
10603
10604
    /*
10605
     * We may have the standalone status.
10606
     */
10607
1.93k
    if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10608
817
        if ((RAW == '?') && (NXT(1) == '>')) {
10609
740
      SKIP(2);
10610
740
      return;
10611
740
  }
10612
77
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10613
77
    }
10614
10615
    /*
10616
     * We can grow the input buffer freely at that point
10617
     */
10618
1.19k
    GROW;
10619
10620
1.19k
    SKIP_BLANKS;
10621
1.19k
    ctxt->input->standalone = xmlParseSDDecl(ctxt);
10622
10623
1.19k
    SKIP_BLANKS;
10624
1.19k
    if ((RAW == '?') && (NXT(1) == '>')) {
10625
810
        SKIP(2);
10626
810
    } else if (RAW == '>') {
10627
        /* Deprecated old WD ... */
10628
9
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10629
9
  NEXT;
10630
376
    } else {
10631
376
        int c;
10632
10633
376
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10634
269k
        while ((c = CUR) != 0) {
10635
268k
            NEXT;
10636
268k
            if (c == '>')
10637
31
                break;
10638
268k
        }
10639
376
    }
10640
1.19k
}
10641
10642
/**
10643
 * xmlParseMisc:
10644
 * @ctxt:  an XML parser context
10645
 *
10646
 * DEPRECATED: Internal function, don't use.
10647
 *
10648
 * parse an XML Misc* optional field.
10649
 *
10650
 * [27] Misc ::= Comment | PI |  S
10651
 */
10652
10653
void
10654
0
xmlParseMisc(xmlParserCtxtPtr ctxt) {
10655
0
    while (ctxt->instate != XML_PARSER_EOF) {
10656
0
        SKIP_BLANKS;
10657
0
        GROW;
10658
0
        if ((RAW == '<') && (NXT(1) == '?')) {
10659
0
      xmlParsePI(ctxt);
10660
0
        } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10661
0
      xmlParseComment(ctxt);
10662
0
        } else {
10663
0
            break;
10664
0
        }
10665
0
    }
10666
0
}
10667
10668
/**
10669
 * xmlParseDocument:
10670
 * @ctxt:  an XML parser context
10671
 *
10672
 * parse an XML document (and build a tree if using the standard SAX
10673
 * interface).
10674
 *
10675
 * [1] document ::= prolog element Misc*
10676
 *
10677
 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10678
 *
10679
 * Returns 0, -1 in case of error. the parser context is augmented
10680
 *                as a result of the parsing.
10681
 */
10682
10683
int
10684
0
xmlParseDocument(xmlParserCtxtPtr ctxt) {
10685
0
    xmlChar start[4];
10686
0
    xmlCharEncoding enc;
10687
10688
0
    xmlInitParser();
10689
10690
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
10691
0
        return(-1);
10692
10693
0
    GROW;
10694
10695
    /*
10696
     * SAX: detecting the level.
10697
     */
10698
0
    xmlDetectSAX2(ctxt);
10699
10700
    /*
10701
     * SAX: beginning of the document processing.
10702
     */
10703
0
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10704
0
        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10705
0
    if (ctxt->instate == XML_PARSER_EOF)
10706
0
  return(-1);
10707
10708
0
    if ((ctxt->encoding == NULL) &&
10709
0
        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10710
  /*
10711
   * Get the 4 first bytes and decode the charset
10712
   * if enc != XML_CHAR_ENCODING_NONE
10713
   * plug some encoding conversion routines.
10714
   */
10715
0
  start[0] = RAW;
10716
0
  start[1] = NXT(1);
10717
0
  start[2] = NXT(2);
10718
0
  start[3] = NXT(3);
10719
0
  enc = xmlDetectCharEncoding(&start[0], 4);
10720
0
  if (enc != XML_CHAR_ENCODING_NONE) {
10721
0
      xmlSwitchEncoding(ctxt, enc);
10722
0
  }
10723
0
    }
10724
10725
10726
0
    if (CUR == 0) {
10727
0
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10728
0
  return(-1);
10729
0
    }
10730
10731
0
    GROW;
10732
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10733
10734
  /*
10735
   * Note that we will switch encoding on the fly.
10736
   */
10737
0
  xmlParseXMLDecl(ctxt);
10738
0
  if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10739
0
      (ctxt->instate == XML_PARSER_EOF)) {
10740
      /*
10741
       * The XML REC instructs us to stop parsing right here
10742
       */
10743
0
      return(-1);
10744
0
  }
10745
0
  ctxt->standalone = ctxt->input->standalone;
10746
0
  SKIP_BLANKS;
10747
0
    } else {
10748
0
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10749
0
    }
10750
0
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10751
0
        ctxt->sax->startDocument(ctxt->userData);
10752
0
    if (ctxt->instate == XML_PARSER_EOF)
10753
0
  return(-1);
10754
0
    if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10755
0
        (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10756
0
  ctxt->myDoc->compression = ctxt->input->buf->compressed;
10757
0
    }
10758
10759
    /*
10760
     * The Misc part of the Prolog
10761
     */
10762
0
    xmlParseMisc(ctxt);
10763
10764
    /*
10765
     * Then possibly doc type declaration(s) and more Misc
10766
     * (doctypedecl Misc*)?
10767
     */
10768
0
    GROW;
10769
0
    if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10770
10771
0
  ctxt->inSubset = 1;
10772
0
  xmlParseDocTypeDecl(ctxt);
10773
0
  if (RAW == '[') {
10774
0
      ctxt->instate = XML_PARSER_DTD;
10775
0
      xmlParseInternalSubset(ctxt);
10776
0
      if (ctxt->instate == XML_PARSER_EOF)
10777
0
    return(-1);
10778
0
  }
10779
10780
  /*
10781
   * Create and update the external subset.
10782
   */
10783
0
  ctxt->inSubset = 2;
10784
0
  if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10785
0
      (!ctxt->disableSAX))
10786
0
      ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10787
0
                                ctxt->extSubSystem, ctxt->extSubURI);
10788
0
  if (ctxt->instate == XML_PARSER_EOF)
10789
0
      return(-1);
10790
0
  ctxt->inSubset = 0;
10791
10792
0
        xmlCleanSpecialAttr(ctxt);
10793
10794
0
  ctxt->instate = XML_PARSER_PROLOG;
10795
0
  xmlParseMisc(ctxt);
10796
0
    }
10797
10798
    /*
10799
     * Time to start parsing the tree itself
10800
     */
10801
0
    GROW;
10802
0
    if (RAW != '<') {
10803
0
  xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10804
0
           "Start tag expected, '<' not found\n");
10805
0
    } else {
10806
0
  ctxt->instate = XML_PARSER_CONTENT;
10807
0
  xmlParseElement(ctxt);
10808
0
  ctxt->instate = XML_PARSER_EPILOG;
10809
10810
10811
  /*
10812
   * The Misc part at the end
10813
   */
10814
0
  xmlParseMisc(ctxt);
10815
10816
0
  if (RAW != 0) {
10817
0
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10818
0
  }
10819
0
  ctxt->instate = XML_PARSER_EOF;
10820
0
    }
10821
10822
    /*
10823
     * SAX: end of the document processing.
10824
     */
10825
0
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10826
0
        ctxt->sax->endDocument(ctxt->userData);
10827
10828
    /*
10829
     * Remove locally kept entity definitions if the tree was not built
10830
     */
10831
0
    if ((ctxt->myDoc != NULL) &&
10832
0
  (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10833
0
  xmlFreeDoc(ctxt->myDoc);
10834
0
  ctxt->myDoc = NULL;
10835
0
    }
10836
10837
0
    if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10838
0
        ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10839
0
  if (ctxt->valid)
10840
0
      ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10841
0
  if (ctxt->nsWellFormed)
10842
0
      ctxt->myDoc->properties |= XML_DOC_NSVALID;
10843
0
  if (ctxt->options & XML_PARSE_OLD10)
10844
0
      ctxt->myDoc->properties |= XML_DOC_OLD10;
10845
0
    }
10846
0
    if (! ctxt->wellFormed) {
10847
0
  ctxt->valid = 0;
10848
0
  return(-1);
10849
0
    }
10850
0
    return(0);
10851
0
}
10852
10853
/**
10854
 * xmlParseExtParsedEnt:
10855
 * @ctxt:  an XML parser context
10856
 *
10857
 * parse a general parsed entity
10858
 * An external general parsed entity is well-formed if it matches the
10859
 * production labeled extParsedEnt.
10860
 *
10861
 * [78] extParsedEnt ::= TextDecl? content
10862
 *
10863
 * Returns 0, -1 in case of error. the parser context is augmented
10864
 *                as a result of the parsing.
10865
 */
10866
10867
int
10868
0
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10869
0
    xmlChar start[4];
10870
0
    xmlCharEncoding enc;
10871
10872
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
10873
0
        return(-1);
10874
10875
0
    xmlDetectSAX2(ctxt);
10876
10877
0
    GROW;
10878
10879
    /*
10880
     * SAX: beginning of the document processing.
10881
     */
10882
0
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10883
0
        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10884
10885
    /*
10886
     * Get the 4 first bytes and decode the charset
10887
     * if enc != XML_CHAR_ENCODING_NONE
10888
     * plug some encoding conversion routines.
10889
     */
10890
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10891
0
  start[0] = RAW;
10892
0
  start[1] = NXT(1);
10893
0
  start[2] = NXT(2);
10894
0
  start[3] = NXT(3);
10895
0
  enc = xmlDetectCharEncoding(start, 4);
10896
0
  if (enc != XML_CHAR_ENCODING_NONE) {
10897
0
      xmlSwitchEncoding(ctxt, enc);
10898
0
  }
10899
0
    }
10900
10901
10902
0
    if (CUR == 0) {
10903
0
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10904
0
    }
10905
10906
    /*
10907
     * Check for the XMLDecl in the Prolog.
10908
     */
10909
0
    GROW;
10910
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10911
10912
  /*
10913
   * Note that we will switch encoding on the fly.
10914
   */
10915
0
  xmlParseXMLDecl(ctxt);
10916
0
  if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10917
      /*
10918
       * The XML REC instructs us to stop parsing right here
10919
       */
10920
0
      return(-1);
10921
0
  }
10922
0
  SKIP_BLANKS;
10923
0
    } else {
10924
0
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10925
0
    }
10926
0
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10927
0
        ctxt->sax->startDocument(ctxt->userData);
10928
0
    if (ctxt->instate == XML_PARSER_EOF)
10929
0
  return(-1);
10930
10931
    /*
10932
     * Doing validity checking on chunk doesn't make sense
10933
     */
10934
0
    ctxt->instate = XML_PARSER_CONTENT;
10935
0
    ctxt->validate = 0;
10936
0
    ctxt->loadsubset = 0;
10937
0
    ctxt->depth = 0;
10938
10939
0
    xmlParseContent(ctxt);
10940
0
    if (ctxt->instate == XML_PARSER_EOF)
10941
0
  return(-1);
10942
10943
0
    if ((RAW == '<') && (NXT(1) == '/')) {
10944
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10945
0
    } else if (RAW != 0) {
10946
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
10947
0
    }
10948
10949
    /*
10950
     * SAX: end of the document processing.
10951
     */
10952
0
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10953
0
        ctxt->sax->endDocument(ctxt->userData);
10954
10955
0
    if (! ctxt->wellFormed) return(-1);
10956
0
    return(0);
10957
0
}
10958
10959
#ifdef LIBXML_PUSH_ENABLED
10960
/************************************************************************
10961
 *                  *
10962
 *    Progressive parsing interfaces        *
10963
 *                  *
10964
 ************************************************************************/
10965
10966
/**
10967
 * xmlParseLookupChar:
10968
 * @ctxt:  an XML parser context
10969
 * @c:  character
10970
 *
10971
 * Check whether the input buffer contains a character.
10972
 */
10973
static int
10974
29.1k
xmlParseLookupChar(xmlParserCtxtPtr ctxt, int c) {
10975
29.1k
    const xmlChar *cur;
10976
10977
29.1k
    if (ctxt->checkIndex == 0) {
10978
28.2k
        cur = ctxt->input->cur + 1;
10979
28.2k
    } else {
10980
874
        cur = ctxt->input->cur + ctxt->checkIndex;
10981
874
    }
10982
10983
29.1k
    if (memchr(cur, c, ctxt->input->end - cur) == NULL) {
10984
905
        size_t index = ctxt->input->end - ctxt->input->cur;
10985
10986
905
        if (index > LONG_MAX) {
10987
0
            ctxt->checkIndex = 0;
10988
0
            return(1);
10989
0
        }
10990
905
        ctxt->checkIndex = index;
10991
905
        return(0);
10992
28.2k
    } else {
10993
28.2k
        ctxt->checkIndex = 0;
10994
28.2k
        return(1);
10995
28.2k
    }
10996
29.1k
}
10997
10998
/**
10999
 * xmlParseLookupString:
11000
 * @ctxt:  an XML parser context
11001
 * @startDelta: delta to apply at the start
11002
 * @str:  string
11003
 * @strLen:  length of string
11004
 *
11005
 * Check whether the input buffer contains a string.
11006
 */
11007
static const xmlChar *
11008
xmlParseLookupString(xmlParserCtxtPtr ctxt, size_t startDelta,
11009
72.4k
                     const char *str, size_t strLen) {
11010
72.4k
    const xmlChar *cur, *term;
11011
11012
72.4k
    if (ctxt->checkIndex == 0) {
11013
66.8k
        cur = ctxt->input->cur + startDelta;
11014
66.8k
    } else {
11015
5.62k
        cur = ctxt->input->cur + ctxt->checkIndex;
11016
5.62k
    }
11017
11018
72.4k
    term = BAD_CAST strstr((const char *) cur, str);
11019
72.4k
    if (term == NULL) {
11020
16.6k
        const xmlChar *end = ctxt->input->end;
11021
16.6k
        size_t index;
11022
11023
        /* Rescan (strLen - 1) characters. */
11024
16.6k
        if ((size_t) (end - cur) < strLen)
11025
62
            end = cur;
11026
16.5k
        else
11027
16.5k
            end -= strLen - 1;
11028
16.6k
        index = end - ctxt->input->cur;
11029
16.6k
        if (index > LONG_MAX) {
11030
0
            ctxt->checkIndex = 0;
11031
0
            return(ctxt->input->end - strLen);
11032
0
        }
11033
16.6k
        ctxt->checkIndex = index;
11034
55.8k
    } else {
11035
55.8k
        ctxt->checkIndex = 0;
11036
55.8k
    }
11037
11038
72.4k
    return(term);
11039
72.4k
}
11040
11041
/**
11042
 * xmlParseLookupCharData:
11043
 * @ctxt:  an XML parser context
11044
 *
11045
 * Check whether the input buffer contains terminated char data.
11046
 */
11047
static int
11048
12.0k
xmlParseLookupCharData(xmlParserCtxtPtr ctxt) {
11049
12.0k
    const xmlChar *cur = ctxt->input->cur + ctxt->checkIndex;
11050
12.0k
    const xmlChar *end = ctxt->input->end;
11051
12.0k
    size_t index;
11052
11053
188k
    while (cur < end) {
11054
187k
        if ((*cur == '<') || (*cur == '&')) {
11055
11.3k
            ctxt->checkIndex = 0;
11056
11.3k
            return(1);
11057
11.3k
        }
11058
175k
        cur++;
11059
175k
    }
11060
11061
695
    index = cur - ctxt->input->cur;
11062
695
    if (index > LONG_MAX) {
11063
0
        ctxt->checkIndex = 0;
11064
0
        return(1);
11065
0
    }
11066
695
    ctxt->checkIndex = index;
11067
695
    return(0);
11068
695
}
11069
11070
/**
11071
 * xmlParseLookupGt:
11072
 * @ctxt:  an XML parser context
11073
 *
11074
 * Check whether there's enough data in the input buffer to finish parsing
11075
 * a start tag. This has to take quotes into account.
11076
 */
11077
static int
11078
681k
xmlParseLookupGt(xmlParserCtxtPtr ctxt) {
11079
681k
    const xmlChar *cur;
11080
681k
    const xmlChar *end = ctxt->input->end;
11081
681k
    int state = ctxt->endCheckState;
11082
681k
    size_t index;
11083
11084
681k
    if (ctxt->checkIndex == 0)
11085
668k
        cur = ctxt->input->cur + 1;
11086
13.1k
    else
11087
13.1k
        cur = ctxt->input->cur + ctxt->checkIndex;
11088
11089
411M
    while (cur < end) {
11090
411M
        if (state) {
11091
119M
            if (*cur == state)
11092
178k
                state = 0;
11093
291M
        } else if (*cur == '\'' || *cur == '"') {
11094
178k
            state = *cur;
11095
291M
        } else if (*cur == '>') {
11096
668k
            ctxt->checkIndex = 0;
11097
668k
            ctxt->endCheckState = 0;
11098
668k
            return(1);
11099
668k
        }
11100
410M
        cur++;
11101
410M
    }
11102
11103
13.5k
    index = cur - ctxt->input->cur;
11104
13.5k
    if (index > LONG_MAX) {
11105
0
        ctxt->checkIndex = 0;
11106
0
        ctxt->endCheckState = 0;
11107
0
        return(1);
11108
0
    }
11109
13.5k
    ctxt->checkIndex = index;
11110
13.5k
    ctxt->endCheckState = state;
11111
13.5k
    return(0);
11112
13.5k
}
11113
11114
/**
11115
 * xmlParseLookupInternalSubset:
11116
 * @ctxt:  an XML parser context
11117
 *
11118
 * Check whether there's enough data in the input buffer to finish parsing
11119
 * the internal subset.
11120
 */
11121
static int
11122
3.02k
xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt) {
11123
    /*
11124
     * Sorry, but progressive parsing of the internal subset is not
11125
     * supported. We first check that the full content of the internal
11126
     * subset is available and parsing is launched only at that point.
11127
     * Internal subset ends with "']' S? '>'" in an unescaped section and
11128
     * not in a ']]>' sequence which are conditional sections.
11129
     */
11130
3.02k
    const xmlChar *cur, *start;
11131
3.02k
    const xmlChar *end = ctxt->input->end;
11132
3.02k
    int state = ctxt->endCheckState;
11133
3.02k
    size_t index;
11134
11135
3.02k
    if (ctxt->checkIndex == 0) {
11136
276
        cur = ctxt->input->cur + 1;
11137
2.74k
    } else {
11138
2.74k
        cur = ctxt->input->cur + ctxt->checkIndex;
11139
2.74k
    }
11140
3.02k
    start = cur;
11141
11142
77.3M
    while (cur < end) {
11143
77.3M
        if (state == '-') {
11144
1.84M
            if ((*cur == '-') &&
11145
1.84M
                (cur[1] == '-') &&
11146
1.84M
                (cur[2] == '>')) {
11147
3.07k
                state = 0;
11148
3.07k
                cur += 3;
11149
3.07k
                start = cur;
11150
3.07k
                continue;
11151
3.07k
            }
11152
1.84M
        }
11153
75.4M
        else if (state == ']') {
11154
8.04k
            if (*cur == '>') {
11155
71
                ctxt->checkIndex = 0;
11156
71
                ctxt->endCheckState = 0;
11157
71
                return(1);
11158
71
            }
11159
7.97k
            if (IS_BLANK_CH(*cur)) {
11160
828
                state = ' ';
11161
7.14k
            } else if (*cur != ']') {
11162
1.03k
                state = 0;
11163
1.03k
                start = cur;
11164
1.03k
                continue;
11165
1.03k
            }
11166
7.97k
        }
11167
75.4M
        else if (state == ' ') {
11168
9.18k
            if (*cur == '>') {
11169
0
                ctxt->checkIndex = 0;
11170
0
                ctxt->endCheckState = 0;
11171
0
                return(1);
11172
0
            }
11173
9.18k
            if (!IS_BLANK_CH(*cur)) {
11174
827
                state = 0;
11175
827
                start = cur;
11176
827
                continue;
11177
827
            }
11178
9.18k
        }
11179
75.4M
        else if (state != 0) {
11180
53.4M
            if (*cur == state) {
11181
110k
                state = 0;
11182
110k
                start = cur + 1;
11183
110k
            }
11184
53.4M
        }
11185
22.0M
        else if (*cur == '<') {
11186
39.2k
            if ((cur[1] == '!') &&
11187
39.2k
                (cur[2] == '-') &&
11188
39.2k
                (cur[3] == '-')) {
11189
3.10k
                state = '-';
11190
3.10k
                cur += 4;
11191
                /* Don't treat <!--> as comment */
11192
3.10k
                start = cur;
11193
3.10k
                continue;
11194
3.10k
            }
11195
39.2k
        }
11196
21.9M
        else if ((*cur == '"') || (*cur == '\'') || (*cur == ']')) {
11197
112k
            state = *cur;
11198
112k
        }
11199
11200
77.3M
        cur++;
11201
77.3M
    }
11202
11203
    /*
11204
     * Rescan the three last characters to detect "<!--" and "-->"
11205
     * split across chunks.
11206
     */
11207
2.95k
    if ((state == 0) || (state == '-')) {
11208
1.29k
        if (cur - start < 3)
11209
61
            cur = start;
11210
1.23k
        else
11211
1.23k
            cur -= 3;
11212
1.29k
    }
11213
2.95k
    index = cur - ctxt->input->cur;
11214
2.95k
    if (index > LONG_MAX) {
11215
0
        ctxt->checkIndex = 0;
11216
0
        ctxt->endCheckState = 0;
11217
0
        return(1);
11218
0
    }
11219
2.95k
    ctxt->checkIndex = index;
11220
2.95k
    ctxt->endCheckState = state;
11221
2.95k
    return(0);
11222
2.95k
}
11223
11224
/**
11225
 * xmlCheckCdataPush:
11226
 * @cur: pointer to the block of characters
11227
 * @len: length of the block in bytes
11228
 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
11229
 *
11230
 * Check that the block of characters is okay as SCdata content [20]
11231
 *
11232
 * Returns the number of bytes to pass if okay, a negative index where an
11233
 *         UTF-8 error occurred otherwise
11234
 */
11235
static int
11236
14.3k
xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
11237
14.3k
    int ix;
11238
14.3k
    unsigned char c;
11239
14.3k
    int codepoint;
11240
11241
14.3k
    if ((utf == NULL) || (len <= 0))
11242
575
        return(0);
11243
11244
2.79M
    for (ix = 0; ix < len;) {      /* string is 0-terminated */
11245
2.78M
        c = utf[ix];
11246
2.78M
        if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11247
1.58M
      if (c >= 0x20)
11248
1.51M
    ix++;
11249
73.7k
      else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11250
73.6k
          ix++;
11251
52
      else
11252
52
          return(-ix);
11253
1.58M
  } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11254
296k
      if (ix + 2 > len) return(complete ? -ix : ix);
11255
295k
      if ((utf[ix+1] & 0xc0 ) != 0x80)
11256
2
          return(-ix);
11257
295k
      codepoint = (utf[ix] & 0x1f) << 6;
11258
295k
      codepoint |= utf[ix+1] & 0x3f;
11259
295k
      if (!xmlIsCharQ(codepoint))
11260
1
          return(-ix);
11261
295k
      ix += 2;
11262
904k
  } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11263
897k
      if (ix + 3 > len) return(complete ? -ix : ix);
11264
895k
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11265
895k
          ((utf[ix+2] & 0xc0) != 0x80))
11266
6
        return(-ix);
11267
895k
      codepoint = (utf[ix] & 0xf) << 12;
11268
895k
      codepoint |= (utf[ix+1] & 0x3f) << 6;
11269
895k
      codepoint |= utf[ix+2] & 0x3f;
11270
895k
      if (!xmlIsCharQ(codepoint))
11271
10
          return(-ix);
11272
895k
      ix += 3;
11273
895k
  } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11274
6.74k
      if (ix + 4 > len) return(complete ? -ix : ix);
11275
6.66k
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11276
6.66k
          ((utf[ix+2] & 0xc0) != 0x80) ||
11277
6.66k
    ((utf[ix+3] & 0xc0) != 0x80))
11278
12
        return(-ix);
11279
6.65k
      codepoint = (utf[ix] & 0x7) << 18;
11280
6.65k
      codepoint |= (utf[ix+1] & 0x3f) << 12;
11281
6.65k
      codepoint |= (utf[ix+2] & 0x3f) << 6;
11282
6.65k
      codepoint |= utf[ix+3] & 0x3f;
11283
6.65k
      if (!xmlIsCharQ(codepoint))
11284
14
          return(-ix);
11285
6.63k
      ix += 4;
11286
6.63k
  } else       /* unknown encoding */
11287
34
      return(-ix);
11288
2.78M
      }
11289
10.9k
      return(ix);
11290
13.7k
}
11291
11292
/**
11293
 * xmlParseTryOrFinish:
11294
 * @ctxt:  an XML parser context
11295
 * @terminate:  last chunk indicator
11296
 *
11297
 * Try to progress on parsing
11298
 *
11299
 * Returns zero if no parsing was possible
11300
 */
11301
static int
11302
34.7k
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11303
34.7k
    int ret = 0;
11304
34.7k
    int tlen;
11305
34.7k
    size_t avail;
11306
34.7k
    xmlChar cur, next;
11307
11308
34.7k
    if (ctxt->input == NULL)
11309
0
        return(0);
11310
11311
#ifdef DEBUG_PUSH
11312
    switch (ctxt->instate) {
11313
  case XML_PARSER_EOF:
11314
      xmlGenericError(xmlGenericErrorContext,
11315
        "PP: try EOF\n"); break;
11316
  case XML_PARSER_START:
11317
      xmlGenericError(xmlGenericErrorContext,
11318
        "PP: try START\n"); break;
11319
  case XML_PARSER_MISC:
11320
      xmlGenericError(xmlGenericErrorContext,
11321
        "PP: try MISC\n");break;
11322
  case XML_PARSER_COMMENT:
11323
      xmlGenericError(xmlGenericErrorContext,
11324
        "PP: try COMMENT\n");break;
11325
  case XML_PARSER_PROLOG:
11326
      xmlGenericError(xmlGenericErrorContext,
11327
        "PP: try PROLOG\n");break;
11328
  case XML_PARSER_START_TAG:
11329
      xmlGenericError(xmlGenericErrorContext,
11330
        "PP: try START_TAG\n");break;
11331
  case XML_PARSER_CONTENT:
11332
      xmlGenericError(xmlGenericErrorContext,
11333
        "PP: try CONTENT\n");break;
11334
  case XML_PARSER_CDATA_SECTION:
11335
      xmlGenericError(xmlGenericErrorContext,
11336
        "PP: try CDATA_SECTION\n");break;
11337
  case XML_PARSER_END_TAG:
11338
      xmlGenericError(xmlGenericErrorContext,
11339
        "PP: try END_TAG\n");break;
11340
  case XML_PARSER_ENTITY_DECL:
11341
      xmlGenericError(xmlGenericErrorContext,
11342
        "PP: try ENTITY_DECL\n");break;
11343
  case XML_PARSER_ENTITY_VALUE:
11344
      xmlGenericError(xmlGenericErrorContext,
11345
        "PP: try ENTITY_VALUE\n");break;
11346
  case XML_PARSER_ATTRIBUTE_VALUE:
11347
      xmlGenericError(xmlGenericErrorContext,
11348
        "PP: try ATTRIBUTE_VALUE\n");break;
11349
  case XML_PARSER_DTD:
11350
      xmlGenericError(xmlGenericErrorContext,
11351
        "PP: try DTD\n");break;
11352
  case XML_PARSER_EPILOG:
11353
      xmlGenericError(xmlGenericErrorContext,
11354
        "PP: try EPILOG\n");break;
11355
  case XML_PARSER_PI:
11356
      xmlGenericError(xmlGenericErrorContext,
11357
        "PP: try PI\n");break;
11358
        case XML_PARSER_IGNORE:
11359
            xmlGenericError(xmlGenericErrorContext,
11360
        "PP: try IGNORE\n");break;
11361
    }
11362
#endif
11363
11364
34.7k
    if ((ctxt->input != NULL) &&
11365
34.7k
        (ctxt->input->cur - ctxt->input->base > 4096)) {
11366
2.77k
        xmlParserShrink(ctxt);
11367
2.77k
    }
11368
11369
1.79M
    while (ctxt->instate != XML_PARSER_EOF) {
11370
1.79M
  if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11371
6.06k
      return(0);
11372
11373
1.79M
  if (ctxt->input == NULL) break;
11374
1.79M
  if (ctxt->input->buf != NULL) {
11375
      /*
11376
       * If we are operating on converted input, try to flush
11377
       * remaining chars to avoid them stalling in the non-converted
11378
       * buffer.
11379
       */
11380
1.79M
      if ((ctxt->input->buf->raw != NULL) &&
11381
1.79M
    (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11382
5.52k
                size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11383
5.52k
                                                 ctxt->input);
11384
5.52k
    size_t current = ctxt->input->cur - ctxt->input->base;
11385
11386
5.52k
    xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11387
5.52k
                xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11388
5.52k
                                      base, current);
11389
5.52k
      }
11390
1.79M
  }
11391
1.79M
        avail = ctxt->input->end - ctxt->input->cur;
11392
1.79M
        if (avail < 1)
11393
1.27k
      goto done;
11394
1.79M
        switch (ctxt->instate) {
11395
0
            case XML_PARSER_EOF:
11396
          /*
11397
     * Document parsing is done !
11398
     */
11399
0
          goto done;
11400
22.8k
            case XML_PARSER_START:
11401
22.8k
    if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11402
9.88k
        xmlChar start[4];
11403
9.88k
        xmlCharEncoding enc;
11404
11405
        /*
11406
         * Very first chars read from the document flow.
11407
         */
11408
9.88k
        if (avail < 4)
11409
3
      goto done;
11410
11411
        /*
11412
         * Get the 4 first bytes and decode the charset
11413
         * if enc != XML_CHAR_ENCODING_NONE
11414
         * plug some encoding conversion routines,
11415
         * else xmlSwitchEncoding will set to (default)
11416
         * UTF8.
11417
         */
11418
9.88k
        start[0] = RAW;
11419
9.88k
        start[1] = NXT(1);
11420
9.88k
        start[2] = NXT(2);
11421
9.88k
        start[3] = NXT(3);
11422
9.88k
        enc = xmlDetectCharEncoding(start, 4);
11423
                    /*
11424
                     * We need more bytes to detect EBCDIC code pages.
11425
                     * See xmlDetectEBCDIC.
11426
                     */
11427
9.88k
                    if ((enc == XML_CHAR_ENCODING_EBCDIC) &&
11428
9.88k
                        (!terminate) && (avail < 200))
11429
0
                        goto done;
11430
9.88k
        xmlSwitchEncoding(ctxt, enc);
11431
9.88k
        break;
11432
9.88k
    }
11433
11434
12.9k
    if (avail < 2)
11435
8
        goto done;
11436
12.9k
    cur = ctxt->input->cur[0];
11437
12.9k
    next = ctxt->input->cur[1];
11438
12.9k
    if (cur == 0) {
11439
36
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11440
0
      ctxt->sax->setDocumentLocator(ctxt->userData,
11441
0
                  &xmlDefaultSAXLocator);
11442
36
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11443
36
        xmlHaltParser(ctxt);
11444
#ifdef DEBUG_PUSH
11445
        xmlGenericError(xmlGenericErrorContext,
11446
          "PP: entering EOF\n");
11447
#endif
11448
36
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11449
0
      ctxt->sax->endDocument(ctxt->userData);
11450
36
        goto done;
11451
36
    }
11452
12.9k
          if ((cur == '<') && (next == '?')) {
11453
        /* PI or XML decl */
11454
6.31k
        if (avail < 5) goto done;
11455
6.30k
        if ((!terminate) &&
11456
6.30k
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11457
3.08k
      goto done;
11458
3.22k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11459
0
      ctxt->sax->setDocumentLocator(ctxt->userData,
11460
0
                  &xmlDefaultSAXLocator);
11461
3.22k
        if ((ctxt->input->cur[2] == 'x') &&
11462
3.22k
      (ctxt->input->cur[3] == 'm') &&
11463
3.22k
      (ctxt->input->cur[4] == 'l') &&
11464
3.22k
      (IS_BLANK_CH(ctxt->input->cur[5]))) {
11465
2.23k
      ret += 5;
11466
#ifdef DEBUG_PUSH
11467
      xmlGenericError(xmlGenericErrorContext,
11468
        "PP: Parsing XML Decl\n");
11469
#endif
11470
2.23k
      xmlParseXMLDecl(ctxt);
11471
2.23k
      if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11472
          /*
11473
           * The XML REC instructs us to stop parsing right
11474
           * here
11475
           */
11476
277
          xmlHaltParser(ctxt);
11477
277
          return(0);
11478
277
      }
11479
1.95k
      ctxt->standalone = ctxt->input->standalone;
11480
1.95k
      if ((ctxt->encoding == NULL) &&
11481
1.95k
          (ctxt->input->encoding != NULL))
11482
843
          ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11483
1.95k
      if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11484
1.95k
          (!ctxt->disableSAX))
11485
1.55k
          ctxt->sax->startDocument(ctxt->userData);
11486
1.95k
      ctxt->instate = XML_PARSER_MISC;
11487
#ifdef DEBUG_PUSH
11488
      xmlGenericError(xmlGenericErrorContext,
11489
        "PP: entering MISC\n");
11490
#endif
11491
1.95k
        } else {
11492
990
      ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11493
990
      if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11494
990
          (!ctxt->disableSAX))
11495
990
          ctxt->sax->startDocument(ctxt->userData);
11496
990
      ctxt->instate = XML_PARSER_MISC;
11497
#ifdef DEBUG_PUSH
11498
      xmlGenericError(xmlGenericErrorContext,
11499
        "PP: entering MISC\n");
11500
#endif
11501
990
        }
11502
6.58k
    } else {
11503
6.58k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11504
0
      ctxt->sax->setDocumentLocator(ctxt->userData,
11505
0
                  &xmlDefaultSAXLocator);
11506
6.58k
        ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11507
6.58k
        if (ctxt->version == NULL) {
11508
0
            xmlErrMemory(ctxt, NULL);
11509
0
      break;
11510
0
        }
11511
6.58k
        if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11512
6.58k
            (!ctxt->disableSAX))
11513
6.58k
      ctxt->sax->startDocument(ctxt->userData);
11514
6.58k
        ctxt->instate = XML_PARSER_MISC;
11515
#ifdef DEBUG_PUSH
11516
        xmlGenericError(xmlGenericErrorContext,
11517
          "PP: entering MISC\n");
11518
#endif
11519
6.58k
    }
11520
9.53k
    break;
11521
711k
            case XML_PARSER_START_TAG: {
11522
711k
          const xmlChar *name;
11523
711k
    const xmlChar *prefix = NULL;
11524
711k
    const xmlChar *URI = NULL;
11525
711k
                int line = ctxt->input->line;
11526
711k
    int nsNr = ctxt->nsNr;
11527
11528
711k
    if ((avail < 2) && (ctxt->inputNr == 1))
11529
0
        goto done;
11530
711k
    cur = ctxt->input->cur[0];
11531
711k
          if (cur != '<') {
11532
133
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11533
133
        xmlHaltParser(ctxt);
11534
133
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11535
0
      ctxt->sax->endDocument(ctxt->userData);
11536
133
        goto done;
11537
133
    }
11538
711k
    if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11539
10.0k
                    goto done;
11540
701k
    if (ctxt->spaceNr == 0)
11541
0
        spacePush(ctxt, -1);
11542
701k
    else if (*ctxt->space == -2)
11543
61.9k
        spacePush(ctxt, -1);
11544
639k
    else
11545
639k
        spacePush(ctxt, *ctxt->space);
11546
701k
#ifdef LIBXML_SAX1_ENABLED
11547
701k
    if (ctxt->sax2)
11548
701k
#endif /* LIBXML_SAX1_ENABLED */
11549
701k
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11550
0
#ifdef LIBXML_SAX1_ENABLED
11551
0
    else
11552
0
        name = xmlParseStartTag(ctxt);
11553
701k
#endif /* LIBXML_SAX1_ENABLED */
11554
701k
    if (ctxt->instate == XML_PARSER_EOF)
11555
0
        goto done;
11556
701k
    if (name == NULL) {
11557
162
        spacePop(ctxt);
11558
162
        xmlHaltParser(ctxt);
11559
162
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11560
0
      ctxt->sax->endDocument(ctxt->userData);
11561
162
        goto done;
11562
162
    }
11563
701k
#ifdef LIBXML_VALID_ENABLED
11564
    /*
11565
     * [ VC: Root Element Type ]
11566
     * The Name in the document type declaration must match
11567
     * the element type of the root element.
11568
     */
11569
701k
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11570
701k
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
11571
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11572
701k
#endif /* LIBXML_VALID_ENABLED */
11573
11574
    /*
11575
     * Check for an Empty Element.
11576
     */
11577
701k
    if ((RAW == '/') && (NXT(1) == '>')) {
11578
19.8k
        SKIP(2);
11579
11580
19.8k
        if (ctxt->sax2) {
11581
19.8k
      if ((ctxt->sax != NULL) &&
11582
19.8k
          (ctxt->sax->endElementNs != NULL) &&
11583
19.8k
          (!ctxt->disableSAX))
11584
19.8k
          ctxt->sax->endElementNs(ctxt->userData, name,
11585
19.8k
                                  prefix, URI);
11586
19.8k
      if (ctxt->nsNr - nsNr > 0)
11587
988
          nsPop(ctxt, ctxt->nsNr - nsNr);
11588
19.8k
#ifdef LIBXML_SAX1_ENABLED
11589
19.8k
        } else {
11590
0
      if ((ctxt->sax != NULL) &&
11591
0
          (ctxt->sax->endElement != NULL) &&
11592
0
          (!ctxt->disableSAX))
11593
0
          ctxt->sax->endElement(ctxt->userData, name);
11594
0
#endif /* LIBXML_SAX1_ENABLED */
11595
0
        }
11596
19.8k
        if (ctxt->instate == XML_PARSER_EOF)
11597
0
      goto done;
11598
19.8k
        spacePop(ctxt);
11599
19.8k
        if (ctxt->nameNr == 0) {
11600
20
      ctxt->instate = XML_PARSER_EPILOG;
11601
19.8k
        } else {
11602
19.8k
      ctxt->instate = XML_PARSER_CONTENT;
11603
19.8k
        }
11604
19.8k
        break;
11605
19.8k
    }
11606
681k
    if (RAW == '>') {
11607
677k
        NEXT;
11608
677k
    } else {
11609
3.71k
        xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11610
3.71k
           "Couldn't find end of Start Tag %s\n",
11611
3.71k
           name);
11612
3.71k
        nodePop(ctxt);
11613
3.71k
        spacePop(ctxt);
11614
3.71k
    }
11615
681k
                nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
11616
11617
681k
    ctxt->instate = XML_PARSER_CONTENT;
11618
681k
                break;
11619
701k
      }
11620
990k
            case XML_PARSER_CONTENT: {
11621
990k
    if ((avail < 2) && (ctxt->inputNr == 1))
11622
321
        goto done;
11623
989k
    cur = ctxt->input->cur[0];
11624
989k
    next = ctxt->input->cur[1];
11625
11626
989k
    if ((cur == '<') && (next == '/')) {
11627
28.1k
        ctxt->instate = XML_PARSER_END_TAG;
11628
28.1k
        break;
11629
961k
          } else if ((cur == '<') && (next == '?')) {
11630
38.0k
        if ((!terminate) &&
11631
38.0k
            (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11632
706
      goto done;
11633
37.3k
        xmlParsePI(ctxt);
11634
37.3k
        ctxt->instate = XML_PARSER_CONTENT;
11635
923k
    } else if ((cur == '<') && (next != '!')) {
11636
695k
        ctxt->instate = XML_PARSER_START_TAG;
11637
695k
        break;
11638
695k
    } else if ((cur == '<') && (next == '!') &&
11639
228k
               (ctxt->input->cur[2] == '-') &&
11640
228k
         (ctxt->input->cur[3] == '-')) {
11641
15.6k
        if ((!terminate) &&
11642
15.6k
            (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11643
943
      goto done;
11644
14.6k
        xmlParseComment(ctxt);
11645
14.6k
        ctxt->instate = XML_PARSER_CONTENT;
11646
212k
    } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11647
212k
        (ctxt->input->cur[2] == '[') &&
11648
212k
        (ctxt->input->cur[3] == 'C') &&
11649
212k
        (ctxt->input->cur[4] == 'D') &&
11650
212k
        (ctxt->input->cur[5] == 'A') &&
11651
212k
        (ctxt->input->cur[6] == 'T') &&
11652
212k
        (ctxt->input->cur[7] == 'A') &&
11653
212k
        (ctxt->input->cur[8] == '[')) {
11654
3.60k
        SKIP(9);
11655
3.60k
        ctxt->instate = XML_PARSER_CDATA_SECTION;
11656
3.60k
        break;
11657
209k
    } else if ((cur == '<') && (next == '!') &&
11658
209k
               (avail < 9)) {
11659
72
        goto done;
11660
208k
    } else if (cur == '<') {
11661
24
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11662
24
                    "detected an error in element content\n");
11663
24
                    SKIP(1);
11664
208k
    } else if (cur == '&') {
11665
7.98k
        if ((!terminate) && (!xmlParseLookupChar(ctxt, ';')))
11666
508
      goto done;
11667
7.48k
        xmlParseReference(ctxt);
11668
200k
    } else {
11669
        /* TODO Avoid the extra copy, handle directly !!! */
11670
        /*
11671
         * Goal of the following test is:
11672
         *  - minimize calls to the SAX 'character' callback
11673
         *    when they are mergeable
11674
         *  - handle an problem for isBlank when we only parse
11675
         *    a sequence of blank chars and the next one is
11676
         *    not available to check against '<' presence.
11677
         *  - tries to homogenize the differences in SAX
11678
         *    callbacks between the push and pull versions
11679
         *    of the parser.
11680
         */
11681
200k
        if ((ctxt->inputNr == 1) &&
11682
200k
            (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11683
19.8k
      if ((!terminate) && (!xmlParseLookupCharData(ctxt)))
11684
695
          goto done;
11685
19.8k
                    }
11686
200k
                    ctxt->checkIndex = 0;
11687
200k
        xmlParseCharDataInternal(ctxt, !terminate);
11688
200k
    }
11689
259k
    break;
11690
989k
      }
11691
259k
            case XML_PARSER_END_TAG:
11692
28.5k
    if (avail < 2)
11693
0
        goto done;
11694
28.5k
    if ((!terminate) && (!xmlParseLookupChar(ctxt, '>')))
11695
397
        goto done;
11696
28.1k
    if (ctxt->sax2) {
11697
28.1k
              xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11698
28.1k
        nameNsPop(ctxt);
11699
28.1k
    }
11700
0
#ifdef LIBXML_SAX1_ENABLED
11701
0
      else
11702
0
        xmlParseEndTag1(ctxt, 0);
11703
28.1k
#endif /* LIBXML_SAX1_ENABLED */
11704
28.1k
    if (ctxt->instate == XML_PARSER_EOF) {
11705
        /* Nothing */
11706
28.1k
    } else if (ctxt->nameNr == 0) {
11707
332
        ctxt->instate = XML_PARSER_EPILOG;
11708
27.8k
    } else {
11709
27.8k
        ctxt->instate = XML_PARSER_CONTENT;
11710
27.8k
    }
11711
28.1k
    break;
11712
14.8k
            case XML_PARSER_CDATA_SECTION: {
11713
          /*
11714
     * The Push mode need to have the SAX callback for
11715
     * cdataBlock merge back contiguous callbacks.
11716
     */
11717
14.8k
    const xmlChar *term;
11718
11719
14.8k
                if (terminate) {
11720
                    /*
11721
                     * Don't call xmlParseLookupString. If 'terminate'
11722
                     * is set, checkIndex is invalid.
11723
                     */
11724
614
                    term = BAD_CAST strstr((const char *) ctxt->input->cur,
11725
614
                                           "]]>");
11726
14.1k
                } else {
11727
14.1k
        term = xmlParseLookupString(ctxt, 0, "]]>", 3);
11728
14.1k
                }
11729
11730
14.8k
    if (term == NULL) {
11731
11.4k
        int tmp, size;
11732
11733
11.4k
                    if (terminate) {
11734
                        /* Unfinished CDATA section */
11735
281
                        size = ctxt->input->end - ctxt->input->cur;
11736
11.2k
                    } else {
11737
11.2k
                        if (avail < XML_PARSER_BIG_BUFFER_SIZE + 2)
11738
435
                            goto done;
11739
10.7k
                        ctxt->checkIndex = 0;
11740
                        /* XXX: Why don't we pass the full buffer? */
11741
10.7k
                        size = XML_PARSER_BIG_BUFFER_SIZE;
11742
10.7k
                    }
11743
11.0k
                    tmp = xmlCheckCdataPush(ctxt->input->cur, size, 0);
11744
11.0k
                    if (tmp <= 0) {
11745
134
                        tmp = -tmp;
11746
134
                        ctxt->input->cur += tmp;
11747
134
                        goto encoding_error;
11748
134
                    }
11749
10.9k
                    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11750
10.9k
                        if (ctxt->sax->cdataBlock != NULL)
11751
0
                            ctxt->sax->cdataBlock(ctxt->userData,
11752
0
                                                  ctxt->input->cur, tmp);
11753
10.9k
                        else if (ctxt->sax->characters != NULL)
11754
10.9k
                            ctxt->sax->characters(ctxt->userData,
11755
10.9k
                                                  ctxt->input->cur, tmp);
11756
10.9k
                    }
11757
10.9k
                    if (ctxt->instate == XML_PARSER_EOF)
11758
0
                        goto done;
11759
10.9k
                    SKIPL(tmp);
11760
10.9k
    } else {
11761
3.32k
                    int base = term - CUR_PTR;
11762
3.32k
        int tmp;
11763
11764
3.32k
        tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11765
3.32k
        if ((tmp < 0) || (tmp != base)) {
11766
24
      tmp = -tmp;
11767
24
      ctxt->input->cur += tmp;
11768
24
      goto encoding_error;
11769
24
        }
11770
3.29k
        if ((ctxt->sax != NULL) && (base == 0) &&
11771
3.29k
            (ctxt->sax->cdataBlock != NULL) &&
11772
3.29k
            (!ctxt->disableSAX)) {
11773
      /*
11774
       * Special case to provide identical behaviour
11775
       * between pull and push parsers on enpty CDATA
11776
       * sections
11777
       */
11778
0
       if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11779
0
           (!strncmp((const char *)&ctxt->input->cur[-9],
11780
0
                     "<![CDATA[", 9)))
11781
0
           ctxt->sax->cdataBlock(ctxt->userData,
11782
0
                                 BAD_CAST "", 0);
11783
3.29k
        } else if ((ctxt->sax != NULL) && (base > 0) &&
11784
3.29k
      (!ctxt->disableSAX)) {
11785
2.72k
      if (ctxt->sax->cdataBlock != NULL)
11786
0
          ctxt->sax->cdataBlock(ctxt->userData,
11787
0
              ctxt->input->cur, base);
11788
2.72k
      else if (ctxt->sax->characters != NULL)
11789
2.72k
          ctxt->sax->characters(ctxt->userData,
11790
2.72k
              ctxt->input->cur, base);
11791
2.72k
        }
11792
3.29k
        if (ctxt->instate == XML_PARSER_EOF)
11793
0
      goto done;
11794
3.29k
        SKIPL(base + 3);
11795
3.29k
        ctxt->instate = XML_PARSER_CONTENT;
11796
#ifdef DEBUG_PUSH
11797
        xmlGenericError(xmlGenericErrorContext,
11798
          "PP: entering CONTENT\n");
11799
#endif
11800
3.29k
    }
11801
14.2k
    break;
11802
14.8k
      }
11803
17.3k
            case XML_PARSER_MISC:
11804
17.8k
            case XML_PARSER_PROLOG:
11805
18.1k
            case XML_PARSER_EPILOG:
11806
18.1k
    SKIP_BLANKS;
11807
18.1k
                avail = ctxt->input->end - ctxt->input->cur;
11808
18.1k
    if (avail < 2)
11809
370
        goto done;
11810
17.7k
    cur = ctxt->input->cur[0];
11811
17.7k
    next = ctxt->input->cur[1];
11812
17.7k
          if ((cur == '<') && (next == '?')) {
11813
4.23k
        if ((!terminate) &&
11814
4.23k
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11815
318
      goto done;
11816
#ifdef DEBUG_PUSH
11817
        xmlGenericError(xmlGenericErrorContext,
11818
          "PP: Parsing PI\n");
11819
#endif
11820
3.91k
        xmlParsePI(ctxt);
11821
3.91k
        if (ctxt->instate == XML_PARSER_EOF)
11822
0
      goto done;
11823
13.5k
    } else if ((cur == '<') && (next == '!') &&
11824
13.5k
        (ctxt->input->cur[2] == '-') &&
11825
13.5k
        (ctxt->input->cur[3] == '-')) {
11826
1.59k
        if ((!terminate) &&
11827
1.59k
                        (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11828
352
      goto done;
11829
#ifdef DEBUG_PUSH
11830
        xmlGenericError(xmlGenericErrorContext,
11831
          "PP: Parsing Comment\n");
11832
#endif
11833
1.23k
        xmlParseComment(ctxt);
11834
1.23k
        if (ctxt->instate == XML_PARSER_EOF)
11835
0
      goto done;
11836
11.9k
    } else if ((ctxt->instate == XML_PARSER_MISC) &&
11837
11.9k
                    (cur == '<') && (next == '!') &&
11838
11.9k
        (ctxt->input->cur[2] == 'D') &&
11839
11.9k
        (ctxt->input->cur[3] == 'O') &&
11840
11.9k
        (ctxt->input->cur[4] == 'C') &&
11841
11.9k
        (ctxt->input->cur[5] == 'T') &&
11842
11.9k
        (ctxt->input->cur[6] == 'Y') &&
11843
11.9k
        (ctxt->input->cur[7] == 'P') &&
11844
11.9k
        (ctxt->input->cur[8] == 'E')) {
11845
5.96k
        if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11846
3.48k
                        goto done;
11847
#ifdef DEBUG_PUSH
11848
        xmlGenericError(xmlGenericErrorContext,
11849
          "PP: Parsing internal subset\n");
11850
#endif
11851
2.48k
        ctxt->inSubset = 1;
11852
2.48k
        xmlParseDocTypeDecl(ctxt);
11853
2.48k
        if (ctxt->instate == XML_PARSER_EOF)
11854
0
      goto done;
11855
2.48k
        if (RAW == '[') {
11856
2.38k
      ctxt->instate = XML_PARSER_DTD;
11857
#ifdef DEBUG_PUSH
11858
      xmlGenericError(xmlGenericErrorContext,
11859
        "PP: entering DTD\n");
11860
#endif
11861
2.38k
        } else {
11862
      /*
11863
       * Create and update the external subset.
11864
       */
11865
104
      ctxt->inSubset = 2;
11866
104
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11867
104
          (ctxt->sax->externalSubset != NULL))
11868
0
          ctxt->sax->externalSubset(ctxt->userData,
11869
0
            ctxt->intSubName, ctxt->extSubSystem,
11870
0
            ctxt->extSubURI);
11871
104
      ctxt->inSubset = 0;
11872
104
      xmlCleanSpecialAttr(ctxt);
11873
104
      ctxt->instate = XML_PARSER_PROLOG;
11874
#ifdef DEBUG_PUSH
11875
      xmlGenericError(xmlGenericErrorContext,
11876
        "PP: entering PROLOG\n");
11877
#endif
11878
104
        }
11879
6.00k
    } else if ((cur == '<') && (next == '!') &&
11880
6.00k
               (avail <
11881
63
                            (ctxt->instate == XML_PARSER_MISC ? 9 : 4))) {
11882
47
        goto done;
11883
5.96k
    } else if (ctxt->instate == XML_PARSER_EPILOG) {
11884
8
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11885
8
        xmlHaltParser(ctxt);
11886
#ifdef DEBUG_PUSH
11887
        xmlGenericError(xmlGenericErrorContext,
11888
          "PP: entering EOF\n");
11889
#endif
11890
8
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11891
0
      ctxt->sax->endDocument(ctxt->userData);
11892
8
        goto done;
11893
5.95k
                } else {
11894
5.95k
        ctxt->instate = XML_PARSER_START_TAG;
11895
#ifdef DEBUG_PUSH
11896
        xmlGenericError(xmlGenericErrorContext,
11897
          "PP: entering START_TAG\n");
11898
#endif
11899
5.95k
    }
11900
13.5k
    break;
11901
13.5k
            case XML_PARSER_DTD: {
11902
5.33k
                if ((!terminate) && (!xmlParseLookupInternalSubset(ctxt)))
11903
2.95k
                    goto done;
11904
2.38k
    xmlParseInternalSubset(ctxt);
11905
2.38k
    if (ctxt->instate == XML_PARSER_EOF)
11906
1.90k
        goto done;
11907
476
    ctxt->inSubset = 2;
11908
476
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11909
476
        (ctxt->sax->externalSubset != NULL))
11910
0
        ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11911
0
          ctxt->extSubSystem, ctxt->extSubURI);
11912
476
    ctxt->inSubset = 0;
11913
476
    xmlCleanSpecialAttr(ctxt);
11914
476
    if (ctxt->instate == XML_PARSER_EOF)
11915
0
        goto done;
11916
476
    ctxt->instate = XML_PARSER_PROLOG;
11917
#ifdef DEBUG_PUSH
11918
    xmlGenericError(xmlGenericErrorContext,
11919
      "PP: entering PROLOG\n");
11920
#endif
11921
476
                break;
11922
476
      }
11923
0
            case XML_PARSER_COMMENT:
11924
0
    xmlGenericError(xmlGenericErrorContext,
11925
0
      "PP: internal error, state == COMMENT\n");
11926
0
    ctxt->instate = XML_PARSER_CONTENT;
11927
#ifdef DEBUG_PUSH
11928
    xmlGenericError(xmlGenericErrorContext,
11929
      "PP: entering CONTENT\n");
11930
#endif
11931
0
    break;
11932
0
            case XML_PARSER_IGNORE:
11933
0
    xmlGenericError(xmlGenericErrorContext,
11934
0
      "PP: internal error, state == IGNORE");
11935
0
          ctxt->instate = XML_PARSER_DTD;
11936
#ifdef DEBUG_PUSH
11937
    xmlGenericError(xmlGenericErrorContext,
11938
      "PP: entering DTD\n");
11939
#endif
11940
0
          break;
11941
0
            case XML_PARSER_PI:
11942
0
    xmlGenericError(xmlGenericErrorContext,
11943
0
      "PP: internal error, state == PI\n");
11944
0
    ctxt->instate = XML_PARSER_CONTENT;
11945
#ifdef DEBUG_PUSH
11946
    xmlGenericError(xmlGenericErrorContext,
11947
      "PP: entering CONTENT\n");
11948
#endif
11949
0
    break;
11950
0
            case XML_PARSER_ENTITY_DECL:
11951
0
    xmlGenericError(xmlGenericErrorContext,
11952
0
      "PP: internal error, state == ENTITY_DECL\n");
11953
0
    ctxt->instate = XML_PARSER_DTD;
11954
#ifdef DEBUG_PUSH
11955
    xmlGenericError(xmlGenericErrorContext,
11956
      "PP: entering DTD\n");
11957
#endif
11958
0
    break;
11959
0
            case XML_PARSER_ENTITY_VALUE:
11960
0
    xmlGenericError(xmlGenericErrorContext,
11961
0
      "PP: internal error, state == ENTITY_VALUE\n");
11962
0
    ctxt->instate = XML_PARSER_CONTENT;
11963
#ifdef DEBUG_PUSH
11964
    xmlGenericError(xmlGenericErrorContext,
11965
      "PP: entering DTD\n");
11966
#endif
11967
0
    break;
11968
0
            case XML_PARSER_ATTRIBUTE_VALUE:
11969
0
    xmlGenericError(xmlGenericErrorContext,
11970
0
      "PP: internal error, state == ATTRIBUTE_VALUE\n");
11971
0
    ctxt->instate = XML_PARSER_START_TAG;
11972
#ifdef DEBUG_PUSH
11973
    xmlGenericError(xmlGenericErrorContext,
11974
      "PP: entering START_TAG\n");
11975
#endif
11976
0
    break;
11977
0
            case XML_PARSER_SYSTEM_LITERAL:
11978
0
    xmlGenericError(xmlGenericErrorContext,
11979
0
      "PP: internal error, state == SYSTEM_LITERAL\n");
11980
0
    ctxt->instate = XML_PARSER_START_TAG;
11981
#ifdef DEBUG_PUSH
11982
    xmlGenericError(xmlGenericErrorContext,
11983
      "PP: entering START_TAG\n");
11984
#endif
11985
0
    break;
11986
0
            case XML_PARSER_PUBLIC_LITERAL:
11987
0
    xmlGenericError(xmlGenericErrorContext,
11988
0
      "PP: internal error, state == PUBLIC_LITERAL\n");
11989
0
    ctxt->instate = XML_PARSER_START_TAG;
11990
#ifdef DEBUG_PUSH
11991
    xmlGenericError(xmlGenericErrorContext,
11992
      "PP: entering START_TAG\n");
11993
#endif
11994
0
    break;
11995
1.79M
  }
11996
1.79M
    }
11997
28.2k
done:
11998
#ifdef DEBUG_PUSH
11999
    xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12000
#endif
12001
28.2k
    return(ret);
12002
158
encoding_error:
12003
158
    if (ctxt->input->end - ctxt->input->cur < 4) {
12004
53
  __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12005
53
         "Input is not proper UTF-8, indicate encoding !\n",
12006
53
         NULL, NULL);
12007
105
    } else {
12008
105
        char buffer[150];
12009
12010
105
  snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12011
105
      ctxt->input->cur[0], ctxt->input->cur[1],
12012
105
      ctxt->input->cur[2], ctxt->input->cur[3]);
12013
105
  __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12014
105
         "Input is not proper UTF-8, indicate encoding !\n%s",
12015
105
         BAD_CAST buffer, NULL);
12016
105
    }
12017
158
    return(0);
12018
34.7k
}
12019
12020
/**
12021
 * xmlParseChunk:
12022
 * @ctxt:  an XML parser context
12023
 * @chunk:  an char array
12024
 * @size:  the size in byte of the chunk
12025
 * @terminate:  last chunk indicator
12026
 *
12027
 * Parse a Chunk of memory
12028
 *
12029
 * Returns zero if no error, the xmlParserErrors otherwise.
12030
 */
12031
int
12032
xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12033
35.2k
              int terminate) {
12034
35.2k
    int end_in_lf = 0;
12035
12036
35.2k
    if (ctxt == NULL)
12037
0
        return(XML_ERR_INTERNAL_ERROR);
12038
35.2k
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12039
448
        return(ctxt->errNo);
12040
34.8k
    if (ctxt->instate == XML_PARSER_EOF)
12041
0
        return(-1);
12042
34.8k
    if (ctxt->input == NULL)
12043
0
        return(-1);
12044
12045
34.8k
    ctxt->progressive = 1;
12046
34.8k
    if (ctxt->instate == XML_PARSER_START)
12047
12.9k
        xmlDetectSAX2(ctxt);
12048
34.8k
    if ((size > 0) && (chunk != NULL) && (!terminate) &&
12049
34.8k
        (chunk[size - 1] == '\r')) {
12050
3.08k
  end_in_lf = 1;
12051
3.08k
  size--;
12052
3.08k
    }
12053
12054
34.8k
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12055
34.8k
        (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF))  {
12056
25.3k
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12057
25.3k
  size_t cur = ctxt->input->cur - ctxt->input->base;
12058
25.3k
  int res;
12059
12060
25.3k
  res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12061
25.3k
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12062
25.3k
  if (res < 0) {
12063
32
      ctxt->errNo = XML_PARSER_EOF;
12064
32
      xmlHaltParser(ctxt);
12065
32
      return (XML_PARSER_EOF);
12066
32
  }
12067
#ifdef DEBUG_PUSH
12068
  xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12069
#endif
12070
12071
25.3k
    } else if (ctxt->instate != XML_PARSER_EOF) {
12072
9.49k
  if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12073
9.49k
      xmlParserInputBufferPtr in = ctxt->input->buf;
12074
9.49k
      if ((in->encoder != NULL) && (in->buffer != NULL) &&
12075
9.49k
        (in->raw != NULL)) {
12076
252
    int nbchars;
12077
252
    size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12078
252
    size_t current = ctxt->input->cur - ctxt->input->base;
12079
12080
252
    nbchars = xmlCharEncInput(in, terminate);
12081
252
    xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
12082
252
    if (nbchars < 0) {
12083
        /* TODO 2.6.0 */
12084
3
        xmlGenericError(xmlGenericErrorContext,
12085
3
            "xmlParseChunk: encoder error\n");
12086
3
                    xmlHaltParser(ctxt);
12087
3
        return(XML_ERR_INVALID_ENCODING);
12088
3
    }
12089
252
      }
12090
9.49k
  }
12091
9.49k
    }
12092
12093
34.7k
    xmlParseTryOrFinish(ctxt, terminate);
12094
34.7k
    if (ctxt->instate == XML_PARSER_EOF)
12095
2.53k
        return(ctxt->errNo);
12096
12097
32.2k
    if ((ctxt->input != NULL) &&
12098
32.2k
         (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12099
32.2k
         ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12100
32.2k
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12101
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12102
0
        xmlHaltParser(ctxt);
12103
0
    }
12104
32.2k
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12105
6.22k
        return(ctxt->errNo);
12106
12107
26.0k
    if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12108
26.0k
        (ctxt->input->buf != NULL)) {
12109
3.07k
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12110
3.07k
           ctxt->input);
12111
3.07k
  size_t current = ctxt->input->cur - ctxt->input->base;
12112
12113
3.07k
  xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12114
12115
3.07k
  xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12116
3.07k
            base, current);
12117
3.07k
    }
12118
26.0k
    if (terminate) {
12119
  /*
12120
   * Check for termination
12121
   */
12122
1.09k
  if ((ctxt->instate != XML_PARSER_EOF) &&
12123
1.09k
      (ctxt->instate != XML_PARSER_EPILOG)) {
12124
811
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12125
811
  }
12126
1.09k
  if ((ctxt->instate == XML_PARSER_EPILOG) &&
12127
1.09k
            (ctxt->input->cur < ctxt->input->end)) {
12128
3
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12129
3
  }
12130
1.09k
  if (ctxt->instate != XML_PARSER_EOF) {
12131
1.09k
      if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12132
0
    ctxt->sax->endDocument(ctxt->userData);
12133
1.09k
  }
12134
1.09k
  ctxt->instate = XML_PARSER_EOF;
12135
1.09k
    }
12136
26.0k
    if (ctxt->wellFormed == 0)
12137
814
  return((xmlParserErrors) ctxt->errNo);
12138
25.1k
    else
12139
25.1k
        return(0);
12140
26.0k
}
12141
12142
/************************************************************************
12143
 *                  *
12144
 *    I/O front end functions to the parser     *
12145
 *                  *
12146
 ************************************************************************/
12147
12148
/**
12149
 * xmlCreatePushParserCtxt:
12150
 * @sax:  a SAX handler
12151
 * @user_data:  The user data returned on SAX callbacks
12152
 * @chunk:  a pointer to an array of chars
12153
 * @size:  number of chars in the array
12154
 * @filename:  an optional file name or URI
12155
 *
12156
 * Create a parser context for using the XML parser in push mode.
12157
 * If @buffer and @size are non-NULL, the data is used to detect
12158
 * the encoding.  The remaining characters will be parsed so they
12159
 * don't need to be fed in again through xmlParseChunk.
12160
 * To allow content encoding detection, @size should be >= 4
12161
 * The value of @filename is used for fetching external entities
12162
 * and error/warning reports.
12163
 *
12164
 * Returns the new parser context or NULL
12165
 */
12166
12167
xmlParserCtxtPtr
12168
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12169
9.88k
                        const char *chunk, int size, const char *filename) {
12170
9.88k
    xmlParserCtxtPtr ctxt;
12171
9.88k
    xmlParserInputPtr inputStream;
12172
9.88k
    xmlParserInputBufferPtr buf;
12173
12174
9.88k
    buf = xmlAllocParserInputBuffer(XML_CHAR_ENCODING_NONE);
12175
9.88k
    if (buf == NULL) return(NULL);
12176
12177
9.88k
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12178
9.88k
    if (ctxt == NULL) {
12179
0
        xmlErrMemory(NULL, "creating parser: out of memory\n");
12180
0
  xmlFreeParserInputBuffer(buf);
12181
0
  return(NULL);
12182
0
    }
12183
9.88k
    ctxt->dictNames = 1;
12184
9.88k
    if (filename == NULL) {
12185
9.88k
  ctxt->directory = NULL;
12186
9.88k
    } else {
12187
0
        ctxt->directory = xmlParserGetDirectory(filename);
12188
0
    }
12189
12190
9.88k
    inputStream = xmlNewInputStream(ctxt);
12191
9.88k
    if (inputStream == NULL) {
12192
0
  xmlFreeParserCtxt(ctxt);
12193
0
  xmlFreeParserInputBuffer(buf);
12194
0
  return(NULL);
12195
0
    }
12196
12197
9.88k
    if (filename == NULL)
12198
9.88k
  inputStream->filename = NULL;
12199
0
    else {
12200
0
  inputStream->filename = (char *)
12201
0
      xmlCanonicPath((const xmlChar *) filename);
12202
0
  if (inputStream->filename == NULL) {
12203
0
            xmlFreeInputStream(inputStream);
12204
0
      xmlFreeParserCtxt(ctxt);
12205
0
      xmlFreeParserInputBuffer(buf);
12206
0
      return(NULL);
12207
0
  }
12208
0
    }
12209
9.88k
    inputStream->buf = buf;
12210
9.88k
    xmlBufResetInput(inputStream->buf->buffer, inputStream);
12211
9.88k
    inputPush(ctxt, inputStream);
12212
12213
    /*
12214
     * If the caller didn't provide an initial 'chunk' for determining
12215
     * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12216
     * that it can be automatically determined later
12217
     */
12218
9.88k
    ctxt->charset = XML_CHAR_ENCODING_NONE;
12219
12220
9.88k
    if ((size != 0) && (chunk != NULL) &&
12221
9.88k
        (ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12222
9.88k
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12223
9.88k
  size_t cur = ctxt->input->cur - ctxt->input->base;
12224
12225
9.88k
  xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12226
12227
9.88k
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12228
#ifdef DEBUG_PUSH
12229
  xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12230
#endif
12231
9.88k
    }
12232
12233
9.88k
    return(ctxt);
12234
9.88k
}
12235
#endif /* LIBXML_PUSH_ENABLED */
12236
12237
/**
12238
 * xmlStopParser:
12239
 * @ctxt:  an XML parser context
12240
 *
12241
 * Blocks further parser processing
12242
 */
12243
void
12244
15
xmlStopParser(xmlParserCtxtPtr ctxt) {
12245
15
    if (ctxt == NULL)
12246
0
        return;
12247
15
    xmlHaltParser(ctxt);
12248
15
    ctxt->errNo = XML_ERR_USER_STOP;
12249
15
}
12250
12251
/**
12252
 * xmlCreateIOParserCtxt:
12253
 * @sax:  a SAX handler
12254
 * @user_data:  The user data returned on SAX callbacks
12255
 * @ioread:  an I/O read function
12256
 * @ioclose:  an I/O close function
12257
 * @ioctx:  an I/O handler
12258
 * @enc:  the charset encoding if known
12259
 *
12260
 * Create a parser context for using the XML parser with an existing
12261
 * I/O stream
12262
 *
12263
 * Returns the new parser context or NULL
12264
 */
12265
xmlParserCtxtPtr
12266
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12267
  xmlInputReadCallback   ioread, xmlInputCloseCallback  ioclose,
12268
0
  void *ioctx, xmlCharEncoding enc) {
12269
0
    xmlParserCtxtPtr ctxt;
12270
0
    xmlParserInputPtr inputStream;
12271
0
    xmlParserInputBufferPtr buf;
12272
12273
0
    if (ioread == NULL) return(NULL);
12274
12275
0
    buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12276
0
    if (buf == NULL) {
12277
0
        if (ioclose != NULL)
12278
0
            ioclose(ioctx);
12279
0
        return (NULL);
12280
0
    }
12281
12282
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12283
0
    if (ctxt == NULL) {
12284
0
  xmlFreeParserInputBuffer(buf);
12285
0
  return(NULL);
12286
0
    }
12287
12288
0
    inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12289
0
    if (inputStream == NULL) {
12290
0
  xmlFreeParserCtxt(ctxt);
12291
0
  return(NULL);
12292
0
    }
12293
0
    inputPush(ctxt, inputStream);
12294
12295
0
    return(ctxt);
12296
0
}
12297
12298
#ifdef LIBXML_VALID_ENABLED
12299
/************************************************************************
12300
 *                  *
12301
 *    Front ends when parsing a DTD       *
12302
 *                  *
12303
 ************************************************************************/
12304
12305
/**
12306
 * xmlIOParseDTD:
12307
 * @sax:  the SAX handler block or NULL
12308
 * @input:  an Input Buffer
12309
 * @enc:  the charset encoding if known
12310
 *
12311
 * Load and parse a DTD
12312
 *
12313
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12314
 * @input will be freed by the function in any case.
12315
 */
12316
12317
xmlDtdPtr
12318
xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12319
0
        xmlCharEncoding enc) {
12320
0
    xmlDtdPtr ret = NULL;
12321
0
    xmlParserCtxtPtr ctxt;
12322
0
    xmlParserInputPtr pinput = NULL;
12323
0
    xmlChar start[4];
12324
12325
0
    if (input == NULL)
12326
0
  return(NULL);
12327
12328
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12329
0
    if (ctxt == NULL) {
12330
0
        xmlFreeParserInputBuffer(input);
12331
0
  return(NULL);
12332
0
    }
12333
12334
    /* We are loading a DTD */
12335
0
    ctxt->options |= XML_PARSE_DTDLOAD;
12336
12337
0
    xmlDetectSAX2(ctxt);
12338
12339
    /*
12340
     * generate a parser input from the I/O handler
12341
     */
12342
12343
0
    pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12344
0
    if (pinput == NULL) {
12345
0
        xmlFreeParserInputBuffer(input);
12346
0
  xmlFreeParserCtxt(ctxt);
12347
0
  return(NULL);
12348
0
    }
12349
12350
    /*
12351
     * plug some encoding conversion routines here.
12352
     */
12353
0
    if (xmlPushInput(ctxt, pinput) < 0) {
12354
0
  xmlFreeParserCtxt(ctxt);
12355
0
  return(NULL);
12356
0
    }
12357
0
    if (enc != XML_CHAR_ENCODING_NONE) {
12358
0
        xmlSwitchEncoding(ctxt, enc);
12359
0
    }
12360
12361
0
    pinput->filename = NULL;
12362
0
    pinput->line = 1;
12363
0
    pinput->col = 1;
12364
0
    pinput->base = ctxt->input->cur;
12365
0
    pinput->cur = ctxt->input->cur;
12366
0
    pinput->free = NULL;
12367
12368
    /*
12369
     * let's parse that entity knowing it's an external subset.
12370
     */
12371
0
    ctxt->inSubset = 2;
12372
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12373
0
    if (ctxt->myDoc == NULL) {
12374
0
  xmlErrMemory(ctxt, "New Doc failed");
12375
0
  return(NULL);
12376
0
    }
12377
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12378
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12379
0
                                 BAD_CAST "none", BAD_CAST "none");
12380
12381
0
    if ((enc == XML_CHAR_ENCODING_NONE) &&
12382
0
        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12383
  /*
12384
   * Get the 4 first bytes and decode the charset
12385
   * if enc != XML_CHAR_ENCODING_NONE
12386
   * plug some encoding conversion routines.
12387
   */
12388
0
  start[0] = RAW;
12389
0
  start[1] = NXT(1);
12390
0
  start[2] = NXT(2);
12391
0
  start[3] = NXT(3);
12392
0
  enc = xmlDetectCharEncoding(start, 4);
12393
0
  if (enc != XML_CHAR_ENCODING_NONE) {
12394
0
      xmlSwitchEncoding(ctxt, enc);
12395
0
  }
12396
0
    }
12397
12398
0
    xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12399
12400
0
    if (ctxt->myDoc != NULL) {
12401
0
  if (ctxt->wellFormed) {
12402
0
      ret = ctxt->myDoc->extSubset;
12403
0
      ctxt->myDoc->extSubset = NULL;
12404
0
      if (ret != NULL) {
12405
0
    xmlNodePtr tmp;
12406
12407
0
    ret->doc = NULL;
12408
0
    tmp = ret->children;
12409
0
    while (tmp != NULL) {
12410
0
        tmp->doc = NULL;
12411
0
        tmp = tmp->next;
12412
0
    }
12413
0
      }
12414
0
  } else {
12415
0
      ret = NULL;
12416
0
  }
12417
0
        xmlFreeDoc(ctxt->myDoc);
12418
0
        ctxt->myDoc = NULL;
12419
0
    }
12420
0
    xmlFreeParserCtxt(ctxt);
12421
12422
0
    return(ret);
12423
0
}
12424
12425
/**
12426
 * xmlSAXParseDTD:
12427
 * @sax:  the SAX handler block
12428
 * @ExternalID:  a NAME* containing the External ID of the DTD
12429
 * @SystemID:  a NAME* containing the URL to the DTD
12430
 *
12431
 * DEPRECATED: Don't use.
12432
 *
12433
 * Load and parse an external subset.
12434
 *
12435
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12436
 */
12437
12438
xmlDtdPtr
12439
xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12440
0
                          const xmlChar *SystemID) {
12441
0
    xmlDtdPtr ret = NULL;
12442
0
    xmlParserCtxtPtr ctxt;
12443
0
    xmlParserInputPtr input = NULL;
12444
0
    xmlCharEncoding enc;
12445
0
    xmlChar* systemIdCanonic;
12446
12447
0
    if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12448
12449
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12450
0
    if (ctxt == NULL) {
12451
0
  return(NULL);
12452
0
    }
12453
12454
    /* We are loading a DTD */
12455
0
    ctxt->options |= XML_PARSE_DTDLOAD;
12456
12457
    /*
12458
     * Canonicalise the system ID
12459
     */
12460
0
    systemIdCanonic = xmlCanonicPath(SystemID);
12461
0
    if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12462
0
  xmlFreeParserCtxt(ctxt);
12463
0
  return(NULL);
12464
0
    }
12465
12466
    /*
12467
     * Ask the Entity resolver to load the damn thing
12468
     */
12469
12470
0
    if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12471
0
  input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12472
0
                                   systemIdCanonic);
12473
0
    if (input == NULL) {
12474
0
  xmlFreeParserCtxt(ctxt);
12475
0
  if (systemIdCanonic != NULL)
12476
0
      xmlFree(systemIdCanonic);
12477
0
  return(NULL);
12478
0
    }
12479
12480
    /*
12481
     * plug some encoding conversion routines here.
12482
     */
12483
0
    if (xmlPushInput(ctxt, input) < 0) {
12484
0
  xmlFreeParserCtxt(ctxt);
12485
0
  if (systemIdCanonic != NULL)
12486
0
      xmlFree(systemIdCanonic);
12487
0
  return(NULL);
12488
0
    }
12489
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12490
0
  enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12491
0
  xmlSwitchEncoding(ctxt, enc);
12492
0
    }
12493
12494
0
    if (input->filename == NULL)
12495
0
  input->filename = (char *) systemIdCanonic;
12496
0
    else
12497
0
  xmlFree(systemIdCanonic);
12498
0
    input->line = 1;
12499
0
    input->col = 1;
12500
0
    input->base = ctxt->input->cur;
12501
0
    input->cur = ctxt->input->cur;
12502
0
    input->free = NULL;
12503
12504
    /*
12505
     * let's parse that entity knowing it's an external subset.
12506
     */
12507
0
    ctxt->inSubset = 2;
12508
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12509
0
    if (ctxt->myDoc == NULL) {
12510
0
  xmlErrMemory(ctxt, "New Doc failed");
12511
0
  xmlFreeParserCtxt(ctxt);
12512
0
  return(NULL);
12513
0
    }
12514
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12515
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12516
0
                                 ExternalID, SystemID);
12517
0
    xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12518
12519
0
    if (ctxt->myDoc != NULL) {
12520
0
  if (ctxt->wellFormed) {
12521
0
      ret = ctxt->myDoc->extSubset;
12522
0
      ctxt->myDoc->extSubset = NULL;
12523
0
      if (ret != NULL) {
12524
0
    xmlNodePtr tmp;
12525
12526
0
    ret->doc = NULL;
12527
0
    tmp = ret->children;
12528
0
    while (tmp != NULL) {
12529
0
        tmp->doc = NULL;
12530
0
        tmp = tmp->next;
12531
0
    }
12532
0
      }
12533
0
  } else {
12534
0
      ret = NULL;
12535
0
  }
12536
0
        xmlFreeDoc(ctxt->myDoc);
12537
0
        ctxt->myDoc = NULL;
12538
0
    }
12539
0
    xmlFreeParserCtxt(ctxt);
12540
12541
0
    return(ret);
12542
0
}
12543
12544
12545
/**
12546
 * xmlParseDTD:
12547
 * @ExternalID:  a NAME* containing the External ID of the DTD
12548
 * @SystemID:  a NAME* containing the URL to the DTD
12549
 *
12550
 * Load and parse an external subset.
12551
 *
12552
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12553
 */
12554
12555
xmlDtdPtr
12556
0
xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12557
0
    return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12558
0
}
12559
#endif /* LIBXML_VALID_ENABLED */
12560
12561
/************************************************************************
12562
 *                  *
12563
 *    Front ends when parsing an Entity     *
12564
 *                  *
12565
 ************************************************************************/
12566
12567
/**
12568
 * xmlParseCtxtExternalEntity:
12569
 * @ctx:  the existing parsing context
12570
 * @URL:  the URL for the entity to load
12571
 * @ID:  the System ID for the entity to load
12572
 * @lst:  the return value for the set of parsed nodes
12573
 *
12574
 * Parse an external general entity within an existing parsing context
12575
 * An external general parsed entity is well-formed if it matches the
12576
 * production labeled extParsedEnt.
12577
 *
12578
 * [78] extParsedEnt ::= TextDecl? content
12579
 *
12580
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12581
 *    the parser error code otherwise
12582
 */
12583
12584
int
12585
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12586
0
                 const xmlChar *ID, xmlNodePtr *lst) {
12587
0
    void *userData;
12588
12589
0
    if (ctx == NULL) return(-1);
12590
    /*
12591
     * If the user provided their own SAX callbacks, then reuse the
12592
     * userData callback field, otherwise the expected setup in a
12593
     * DOM builder is to have userData == ctxt
12594
     */
12595
0
    if (ctx->userData == ctx)
12596
0
        userData = NULL;
12597
0
    else
12598
0
        userData = ctx->userData;
12599
0
    return xmlParseExternalEntityPrivate(ctx->myDoc, ctx, ctx->sax,
12600
0
                                         userData, ctx->depth + 1,
12601
0
                                         URL, ID, lst);
12602
0
}
12603
12604
/**
12605
 * xmlParseExternalEntityPrivate:
12606
 * @doc:  the document the chunk pertains to
12607
 * @oldctxt:  the previous parser context if available
12608
 * @sax:  the SAX handler block (possibly NULL)
12609
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12610
 * @depth:  Used for loop detection, use 0
12611
 * @URL:  the URL for the entity to load
12612
 * @ID:  the System ID for the entity to load
12613
 * @list:  the return value for the set of parsed nodes
12614
 *
12615
 * Private version of xmlParseExternalEntity()
12616
 *
12617
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12618
 *    the parser error code otherwise
12619
 */
12620
12621
static xmlParserErrors
12622
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12623
                xmlSAXHandlerPtr sax,
12624
          void *user_data, int depth, const xmlChar *URL,
12625
0
          const xmlChar *ID, xmlNodePtr *list) {
12626
0
    xmlParserCtxtPtr ctxt;
12627
0
    xmlDocPtr newDoc;
12628
0
    xmlNodePtr newRoot;
12629
0
    xmlParserErrors ret = XML_ERR_OK;
12630
0
    xmlChar start[4];
12631
0
    xmlCharEncoding enc;
12632
12633
0
    if (((depth > 40) &&
12634
0
  ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12635
0
  (depth > 100)) {
12636
0
  xmlFatalErrMsg(oldctxt, XML_ERR_ENTITY_LOOP,
12637
0
                       "Maximum entity nesting depth exceeded");
12638
0
        return(XML_ERR_ENTITY_LOOP);
12639
0
    }
12640
12641
0
    if (list != NULL)
12642
0
        *list = NULL;
12643
0
    if ((URL == NULL) && (ID == NULL))
12644
0
  return(XML_ERR_INTERNAL_ERROR);
12645
0
    if (doc == NULL)
12646
0
  return(XML_ERR_INTERNAL_ERROR);
12647
12648
0
    ctxt = xmlCreateEntityParserCtxtInternal(sax, user_data, URL, ID, NULL,
12649
0
                                             oldctxt);
12650
0
    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
12651
0
    if (oldctxt != NULL) {
12652
0
        ctxt->nbErrors = oldctxt->nbErrors;
12653
0
        ctxt->nbWarnings = oldctxt->nbWarnings;
12654
0
    }
12655
0
    xmlDetectSAX2(ctxt);
12656
12657
0
    newDoc = xmlNewDoc(BAD_CAST "1.0");
12658
0
    if (newDoc == NULL) {
12659
0
  xmlFreeParserCtxt(ctxt);
12660
0
  return(XML_ERR_INTERNAL_ERROR);
12661
0
    }
12662
0
    newDoc->properties = XML_DOC_INTERNAL;
12663
0
    if (doc) {
12664
0
        newDoc->intSubset = doc->intSubset;
12665
0
        newDoc->extSubset = doc->extSubset;
12666
0
        if (doc->dict) {
12667
0
            newDoc->dict = doc->dict;
12668
0
            xmlDictReference(newDoc->dict);
12669
0
        }
12670
0
        if (doc->URL != NULL) {
12671
0
            newDoc->URL = xmlStrdup(doc->URL);
12672
0
        }
12673
0
    }
12674
0
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12675
0
    if (newRoot == NULL) {
12676
0
  if (sax != NULL)
12677
0
  xmlFreeParserCtxt(ctxt);
12678
0
  newDoc->intSubset = NULL;
12679
0
  newDoc->extSubset = NULL;
12680
0
        xmlFreeDoc(newDoc);
12681
0
  return(XML_ERR_INTERNAL_ERROR);
12682
0
    }
12683
0
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
12684
0
    nodePush(ctxt, newDoc->children);
12685
0
    if (doc == NULL) {
12686
0
        ctxt->myDoc = newDoc;
12687
0
    } else {
12688
0
        ctxt->myDoc = doc;
12689
0
        newRoot->doc = doc;
12690
0
    }
12691
12692
    /*
12693
     * Get the 4 first bytes and decode the charset
12694
     * if enc != XML_CHAR_ENCODING_NONE
12695
     * plug some encoding conversion routines.
12696
     */
12697
0
    GROW;
12698
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12699
0
  start[0] = RAW;
12700
0
  start[1] = NXT(1);
12701
0
  start[2] = NXT(2);
12702
0
  start[3] = NXT(3);
12703
0
  enc = xmlDetectCharEncoding(start, 4);
12704
0
  if (enc != XML_CHAR_ENCODING_NONE) {
12705
0
      xmlSwitchEncoding(ctxt, enc);
12706
0
  }
12707
0
    }
12708
12709
    /*
12710
     * Parse a possible text declaration first
12711
     */
12712
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12713
0
  xmlParseTextDecl(ctxt);
12714
        /*
12715
         * An XML-1.0 document can't reference an entity not XML-1.0
12716
         */
12717
0
        if ((xmlStrEqual(oldctxt->version, BAD_CAST "1.0")) &&
12718
0
            (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12719
0
            xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12720
0
                           "Version mismatch between document and entity\n");
12721
0
        }
12722
0
    }
12723
12724
0
    ctxt->instate = XML_PARSER_CONTENT;
12725
0
    ctxt->depth = depth;
12726
0
    if (oldctxt != NULL) {
12727
0
  ctxt->_private = oldctxt->_private;
12728
0
  ctxt->loadsubset = oldctxt->loadsubset;
12729
0
  ctxt->validate = oldctxt->validate;
12730
0
  ctxt->valid = oldctxt->valid;
12731
0
  ctxt->replaceEntities = oldctxt->replaceEntities;
12732
0
        if (oldctxt->validate) {
12733
0
            ctxt->vctxt.error = oldctxt->vctxt.error;
12734
0
            ctxt->vctxt.warning = oldctxt->vctxt.warning;
12735
0
            ctxt->vctxt.userData = oldctxt->vctxt.userData;
12736
0
            ctxt->vctxt.flags = oldctxt->vctxt.flags;
12737
0
        }
12738
0
  ctxt->external = oldctxt->external;
12739
0
        if (ctxt->dict) xmlDictFree(ctxt->dict);
12740
0
        ctxt->dict = oldctxt->dict;
12741
0
        ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12742
0
        ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12743
0
        ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12744
0
        ctxt->dictNames = oldctxt->dictNames;
12745
0
        ctxt->attsDefault = oldctxt->attsDefault;
12746
0
        ctxt->attsSpecial = oldctxt->attsSpecial;
12747
0
        ctxt->linenumbers = oldctxt->linenumbers;
12748
0
  ctxt->record_info = oldctxt->record_info;
12749
0
  ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
12750
0
  ctxt->node_seq.length = oldctxt->node_seq.length;
12751
0
  ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
12752
0
    } else {
12753
  /*
12754
   * Doing validity checking on chunk without context
12755
   * doesn't make sense
12756
   */
12757
0
  ctxt->_private = NULL;
12758
0
  ctxt->validate = 0;
12759
0
  ctxt->external = 2;
12760
0
  ctxt->loadsubset = 0;
12761
0
    }
12762
12763
0
    xmlParseContent(ctxt);
12764
12765
0
    if ((RAW == '<') && (NXT(1) == '/')) {
12766
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12767
0
    } else if (RAW != 0) {
12768
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12769
0
    }
12770
0
    if (ctxt->node != newDoc->children) {
12771
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12772
0
    }
12773
12774
0
    if (!ctxt->wellFormed) {
12775
0
  ret = (xmlParserErrors)ctxt->errNo;
12776
0
        if (oldctxt != NULL) {
12777
0
            oldctxt->errNo = ctxt->errNo;
12778
0
            oldctxt->wellFormed = 0;
12779
0
            xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12780
0
        }
12781
0
    } else {
12782
0
  if (list != NULL) {
12783
0
      xmlNodePtr cur;
12784
12785
      /*
12786
       * Return the newly created nodeset after unlinking it from
12787
       * they pseudo parent.
12788
       */
12789
0
      cur = newDoc->children->children;
12790
0
      *list = cur;
12791
0
      while (cur != NULL) {
12792
0
    cur->parent = NULL;
12793
0
    cur = cur->next;
12794
0
      }
12795
0
            newDoc->children->children = NULL;
12796
0
  }
12797
0
  ret = XML_ERR_OK;
12798
0
    }
12799
12800
    /*
12801
     * Also record the size of the entity parsed
12802
     */
12803
0
    if (ctxt->input != NULL && oldctxt != NULL) {
12804
0
        unsigned long consumed = ctxt->input->consumed;
12805
12806
0
        xmlSaturatedAddSizeT(&consumed, ctxt->input->cur - ctxt->input->base);
12807
12808
0
        xmlSaturatedAdd(&oldctxt->sizeentities, consumed);
12809
0
        xmlSaturatedAdd(&oldctxt->sizeentities, ctxt->sizeentities);
12810
12811
0
        xmlSaturatedAdd(&oldctxt->sizeentcopy, consumed);
12812
0
        xmlSaturatedAdd(&oldctxt->sizeentcopy, ctxt->sizeentcopy);
12813
0
    }
12814
12815
0
    if (oldctxt != NULL) {
12816
0
        ctxt->dict = NULL;
12817
0
        ctxt->attsDefault = NULL;
12818
0
        ctxt->attsSpecial = NULL;
12819
0
        oldctxt->nbErrors = ctxt->nbErrors;
12820
0
        oldctxt->nbWarnings = ctxt->nbWarnings;
12821
0
        oldctxt->validate = ctxt->validate;
12822
0
        oldctxt->valid = ctxt->valid;
12823
0
        oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
12824
0
        oldctxt->node_seq.length = ctxt->node_seq.length;
12825
0
        oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
12826
0
    }
12827
0
    ctxt->node_seq.maximum = 0;
12828
0
    ctxt->node_seq.length = 0;
12829
0
    ctxt->node_seq.buffer = NULL;
12830
0
    xmlFreeParserCtxt(ctxt);
12831
0
    newDoc->intSubset = NULL;
12832
0
    newDoc->extSubset = NULL;
12833
0
    xmlFreeDoc(newDoc);
12834
12835
0
    return(ret);
12836
0
}
12837
12838
#ifdef LIBXML_SAX1_ENABLED
12839
/**
12840
 * xmlParseExternalEntity:
12841
 * @doc:  the document the chunk pertains to
12842
 * @sax:  the SAX handler block (possibly NULL)
12843
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12844
 * @depth:  Used for loop detection, use 0
12845
 * @URL:  the URL for the entity to load
12846
 * @ID:  the System ID for the entity to load
12847
 * @lst:  the return value for the set of parsed nodes
12848
 *
12849
 * Parse an external general entity
12850
 * An external general parsed entity is well-formed if it matches the
12851
 * production labeled extParsedEnt.
12852
 *
12853
 * [78] extParsedEnt ::= TextDecl? content
12854
 *
12855
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12856
 *    the parser error code otherwise
12857
 */
12858
12859
int
12860
xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
12861
0
    int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
12862
0
    return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
12863
0
                           ID, lst));
12864
0
}
12865
12866
/**
12867
 * xmlParseBalancedChunkMemory:
12868
 * @doc:  the document the chunk pertains to (must not be NULL)
12869
 * @sax:  the SAX handler block (possibly NULL)
12870
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12871
 * @depth:  Used for loop detection, use 0
12872
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
12873
 * @lst:  the return value for the set of parsed nodes
12874
 *
12875
 * Parse a well-balanced chunk of an XML document
12876
 * called by the parser
12877
 * The allowed sequence for the Well Balanced Chunk is the one defined by
12878
 * the content production in the XML grammar:
12879
 *
12880
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12881
 *
12882
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12883
 *    the parser error code otherwise
12884
 */
12885
12886
int
12887
xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12888
0
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
12889
0
    return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
12890
0
                                                depth, string, lst, 0 );
12891
0
}
12892
#endif /* LIBXML_SAX1_ENABLED */
12893
12894
/**
12895
 * xmlParseBalancedChunkMemoryInternal:
12896
 * @oldctxt:  the existing parsing context
12897
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
12898
 * @user_data:  the user data field for the parser context
12899
 * @lst:  the return value for the set of parsed nodes
12900
 *
12901
 *
12902
 * Parse a well-balanced chunk of an XML document
12903
 * called by the parser
12904
 * The allowed sequence for the Well Balanced Chunk is the one defined by
12905
 * the content production in the XML grammar:
12906
 *
12907
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12908
 *
12909
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12910
 * error code otherwise
12911
 *
12912
 * In case recover is set to 1, the nodelist will not be empty even if
12913
 * the parsed chunk is not well balanced.
12914
 */
12915
static xmlParserErrors
12916
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
12917
0
  const xmlChar *string, void *user_data, xmlNodePtr *lst) {
12918
0
    xmlParserCtxtPtr ctxt;
12919
0
    xmlDocPtr newDoc = NULL;
12920
0
    xmlNodePtr newRoot;
12921
0
    xmlSAXHandlerPtr oldsax = NULL;
12922
0
    xmlNodePtr content = NULL;
12923
0
    xmlNodePtr last = NULL;
12924
0
    int size;
12925
0
    xmlParserErrors ret = XML_ERR_OK;
12926
0
#ifdef SAX2
12927
0
    int i;
12928
0
#endif
12929
12930
0
    if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12931
0
        (oldctxt->depth >  100)) {
12932
0
  xmlFatalErrMsg(oldctxt, XML_ERR_ENTITY_LOOP,
12933
0
                       "Maximum entity nesting depth exceeded");
12934
0
  return(XML_ERR_ENTITY_LOOP);
12935
0
    }
12936
12937
12938
0
    if (lst != NULL)
12939
0
        *lst = NULL;
12940
0
    if (string == NULL)
12941
0
        return(XML_ERR_INTERNAL_ERROR);
12942
12943
0
    size = xmlStrlen(string);
12944
12945
0
    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
12946
0
    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
12947
0
    ctxt->nbErrors = oldctxt->nbErrors;
12948
0
    ctxt->nbWarnings = oldctxt->nbWarnings;
12949
0
    if (user_data != NULL)
12950
0
  ctxt->userData = user_data;
12951
0
    else
12952
0
  ctxt->userData = ctxt;
12953
0
    if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12954
0
    ctxt->dict = oldctxt->dict;
12955
0
    ctxt->input_id = oldctxt->input_id;
12956
0
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12957
0
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12958
0
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12959
12960
0
#ifdef SAX2
12961
    /* propagate namespaces down the entity */
12962
0
    for (i = 0;i < oldctxt->nsNr;i += 2) {
12963
0
        nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
12964
0
    }
12965
0
#endif
12966
12967
0
    oldsax = ctxt->sax;
12968
0
    ctxt->sax = oldctxt->sax;
12969
0
    xmlDetectSAX2(ctxt);
12970
0
    ctxt->replaceEntities = oldctxt->replaceEntities;
12971
0
    ctxt->options = oldctxt->options;
12972
12973
0
    ctxt->_private = oldctxt->_private;
12974
0
    if (oldctxt->myDoc == NULL) {
12975
0
  newDoc = xmlNewDoc(BAD_CAST "1.0");
12976
0
  if (newDoc == NULL) {
12977
0
      ctxt->sax = oldsax;
12978
0
      ctxt->dict = NULL;
12979
0
      xmlFreeParserCtxt(ctxt);
12980
0
      return(XML_ERR_INTERNAL_ERROR);
12981
0
  }
12982
0
  newDoc->properties = XML_DOC_INTERNAL;
12983
0
  newDoc->dict = ctxt->dict;
12984
0
  xmlDictReference(newDoc->dict);
12985
0
  ctxt->myDoc = newDoc;
12986
0
    } else {
12987
0
  ctxt->myDoc = oldctxt->myDoc;
12988
0
        content = ctxt->myDoc->children;
12989
0
  last = ctxt->myDoc->last;
12990
0
    }
12991
0
    newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
12992
0
    if (newRoot == NULL) {
12993
0
  ctxt->sax = oldsax;
12994
0
  ctxt->dict = NULL;
12995
0
  xmlFreeParserCtxt(ctxt);
12996
0
  if (newDoc != NULL) {
12997
0
      xmlFreeDoc(newDoc);
12998
0
  }
12999
0
  return(XML_ERR_INTERNAL_ERROR);
13000
0
    }
13001
0
    ctxt->myDoc->children = NULL;
13002
0
    ctxt->myDoc->last = NULL;
13003
0
    xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13004
0
    nodePush(ctxt, ctxt->myDoc->children);
13005
0
    ctxt->instate = XML_PARSER_CONTENT;
13006
0
    ctxt->depth = oldctxt->depth;
13007
13008
0
    ctxt->validate = 0;
13009
0
    ctxt->loadsubset = oldctxt->loadsubset;
13010
0
    if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13011
  /*
13012
   * ID/IDREF registration will be done in xmlValidateElement below
13013
   */
13014
0
  ctxt->loadsubset |= XML_SKIP_IDS;
13015
0
    }
13016
0
    ctxt->dictNames = oldctxt->dictNames;
13017
0
    ctxt->attsDefault = oldctxt->attsDefault;
13018
0
    ctxt->attsSpecial = oldctxt->attsSpecial;
13019
13020
0
    xmlParseContent(ctxt);
13021
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13022
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13023
0
    } else if (RAW != 0) {
13024
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13025
0
    }
13026
0
    if (ctxt->node != ctxt->myDoc->children) {
13027
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13028
0
    }
13029
13030
0
    if (!ctxt->wellFormed) {
13031
0
  ret = (xmlParserErrors)ctxt->errNo;
13032
0
        oldctxt->errNo = ctxt->errNo;
13033
0
        oldctxt->wellFormed = 0;
13034
0
        xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13035
0
    } else {
13036
0
        ret = XML_ERR_OK;
13037
0
    }
13038
13039
0
    if ((lst != NULL) && (ret == XML_ERR_OK)) {
13040
0
  xmlNodePtr cur;
13041
13042
  /*
13043
   * Return the newly created nodeset after unlinking it from
13044
   * they pseudo parent.
13045
   */
13046
0
  cur = ctxt->myDoc->children->children;
13047
0
  *lst = cur;
13048
0
  while (cur != NULL) {
13049
0
#ifdef LIBXML_VALID_ENABLED
13050
0
      if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13051
0
    (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13052
0
    (cur->type == XML_ELEMENT_NODE)) {
13053
0
    oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13054
0
      oldctxt->myDoc, cur);
13055
0
      }
13056
0
#endif /* LIBXML_VALID_ENABLED */
13057
0
      cur->parent = NULL;
13058
0
      cur = cur->next;
13059
0
  }
13060
0
  ctxt->myDoc->children->children = NULL;
13061
0
    }
13062
0
    if (ctxt->myDoc != NULL) {
13063
0
  xmlFreeNode(ctxt->myDoc->children);
13064
0
        ctxt->myDoc->children = content;
13065
0
        ctxt->myDoc->last = last;
13066
0
    }
13067
13068
    /*
13069
     * Also record the size of the entity parsed
13070
     */
13071
0
    if (ctxt->input != NULL && oldctxt != NULL) {
13072
0
        unsigned long consumed = ctxt->input->consumed;
13073
13074
0
        xmlSaturatedAddSizeT(&consumed, ctxt->input->cur - ctxt->input->base);
13075
13076
0
        xmlSaturatedAdd(&oldctxt->sizeentcopy, consumed);
13077
0
        xmlSaturatedAdd(&oldctxt->sizeentcopy, ctxt->sizeentcopy);
13078
0
    }
13079
13080
0
    oldctxt->nbErrors = ctxt->nbErrors;
13081
0
    oldctxt->nbWarnings = ctxt->nbWarnings;
13082
0
    ctxt->sax = oldsax;
13083
0
    ctxt->dict = NULL;
13084
0
    ctxt->attsDefault = NULL;
13085
0
    ctxt->attsSpecial = NULL;
13086
0
    xmlFreeParserCtxt(ctxt);
13087
0
    if (newDoc != NULL) {
13088
0
  xmlFreeDoc(newDoc);
13089
0
    }
13090
13091
0
    return(ret);
13092
0
}
13093
13094
/**
13095
 * xmlParseInNodeContext:
13096
 * @node:  the context node
13097
 * @data:  the input string
13098
 * @datalen:  the input string length in bytes
13099
 * @options:  a combination of xmlParserOption
13100
 * @lst:  the return value for the set of parsed nodes
13101
 *
13102
 * Parse a well-balanced chunk of an XML document
13103
 * within the context (DTD, namespaces, etc ...) of the given node.
13104
 *
13105
 * The allowed sequence for the data is a Well Balanced Chunk defined by
13106
 * the content production in the XML grammar:
13107
 *
13108
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13109
 *
13110
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13111
 * error code otherwise
13112
 */
13113
xmlParserErrors
13114
xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13115
0
                      int options, xmlNodePtr *lst) {
13116
0
#ifdef SAX2
13117
0
    xmlParserCtxtPtr ctxt;
13118
0
    xmlDocPtr doc = NULL;
13119
0
    xmlNodePtr fake, cur;
13120
0
    int nsnr = 0;
13121
13122
0
    xmlParserErrors ret = XML_ERR_OK;
13123
13124
    /*
13125
     * check all input parameters, grab the document
13126
     */
13127
0
    if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13128
0
        return(XML_ERR_INTERNAL_ERROR);
13129
0
    switch (node->type) {
13130
0
        case XML_ELEMENT_NODE:
13131
0
        case XML_ATTRIBUTE_NODE:
13132
0
        case XML_TEXT_NODE:
13133
0
        case XML_CDATA_SECTION_NODE:
13134
0
        case XML_ENTITY_REF_NODE:
13135
0
        case XML_PI_NODE:
13136
0
        case XML_COMMENT_NODE:
13137
0
        case XML_DOCUMENT_NODE:
13138
0
        case XML_HTML_DOCUMENT_NODE:
13139
0
      break;
13140
0
  default:
13141
0
      return(XML_ERR_INTERNAL_ERROR);
13142
13143
0
    }
13144
0
    while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13145
0
           (node->type != XML_DOCUMENT_NODE) &&
13146
0
     (node->type != XML_HTML_DOCUMENT_NODE))
13147
0
  node = node->parent;
13148
0
    if (node == NULL)
13149
0
  return(XML_ERR_INTERNAL_ERROR);
13150
0
    if (node->type == XML_ELEMENT_NODE)
13151
0
  doc = node->doc;
13152
0
    else
13153
0
        doc = (xmlDocPtr) node;
13154
0
    if (doc == NULL)
13155
0
  return(XML_ERR_INTERNAL_ERROR);
13156
13157
    /*
13158
     * allocate a context and set-up everything not related to the
13159
     * node position in the tree
13160
     */
13161
0
    if (doc->type == XML_DOCUMENT_NODE)
13162
0
  ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13163
0
#ifdef LIBXML_HTML_ENABLED
13164
0
    else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13165
0
  ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13166
        /*
13167
         * When parsing in context, it makes no sense to add implied
13168
         * elements like html/body/etc...
13169
         */
13170
0
        options |= HTML_PARSE_NOIMPLIED;
13171
0
    }
13172
0
#endif
13173
0
    else
13174
0
        return(XML_ERR_INTERNAL_ERROR);
13175
13176
0
    if (ctxt == NULL)
13177
0
        return(XML_ERR_NO_MEMORY);
13178
13179
    /*
13180
     * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13181
     * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13182
     * we must wait until the last moment to free the original one.
13183
     */
13184
0
    if (doc->dict != NULL) {
13185
0
        if (ctxt->dict != NULL)
13186
0
      xmlDictFree(ctxt->dict);
13187
0
  ctxt->dict = doc->dict;
13188
0
    } else
13189
0
        options |= XML_PARSE_NODICT;
13190
13191
0
    if (doc->encoding != NULL) {
13192
0
        xmlCharEncodingHandlerPtr hdlr;
13193
13194
0
        if (ctxt->encoding != NULL)
13195
0
      xmlFree((xmlChar *) ctxt->encoding);
13196
0
        ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13197
13198
0
        hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
13199
0
        if (hdlr != NULL) {
13200
0
            xmlSwitchToEncoding(ctxt, hdlr);
13201
0
  } else {
13202
0
            return(XML_ERR_UNSUPPORTED_ENCODING);
13203
0
        }
13204
0
    }
13205
13206
0
    xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13207
0
    xmlDetectSAX2(ctxt);
13208
0
    ctxt->myDoc = doc;
13209
    /* parsing in context, i.e. as within existing content */
13210
0
    ctxt->input_id = 2;
13211
0
    ctxt->instate = XML_PARSER_CONTENT;
13212
13213
0
    fake = xmlNewDocComment(node->doc, NULL);
13214
0
    if (fake == NULL) {
13215
0
        xmlFreeParserCtxt(ctxt);
13216
0
  return(XML_ERR_NO_MEMORY);
13217
0
    }
13218
0
    xmlAddChild(node, fake);
13219
13220
0
    if (node->type == XML_ELEMENT_NODE) {
13221
0
  nodePush(ctxt, node);
13222
  /*
13223
   * initialize the SAX2 namespaces stack
13224
   */
13225
0
  cur = node;
13226
0
  while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13227
0
      xmlNsPtr ns = cur->nsDef;
13228
0
      const xmlChar *iprefix, *ihref;
13229
13230
0
      while (ns != NULL) {
13231
0
    if (ctxt->dict) {
13232
0
        iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13233
0
        ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13234
0
    } else {
13235
0
        iprefix = ns->prefix;
13236
0
        ihref = ns->href;
13237
0
    }
13238
13239
0
          if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13240
0
        nsPush(ctxt, iprefix, ihref);
13241
0
        nsnr++;
13242
0
    }
13243
0
    ns = ns->next;
13244
0
      }
13245
0
      cur = cur->parent;
13246
0
  }
13247
0
    }
13248
13249
0
    if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13250
  /*
13251
   * ID/IDREF registration will be done in xmlValidateElement below
13252
   */
13253
0
  ctxt->loadsubset |= XML_SKIP_IDS;
13254
0
    }
13255
13256
0
#ifdef LIBXML_HTML_ENABLED
13257
0
    if (doc->type == XML_HTML_DOCUMENT_NODE)
13258
0
        __htmlParseContent(ctxt);
13259
0
    else
13260
0
#endif
13261
0
  xmlParseContent(ctxt);
13262
13263
0
    nsPop(ctxt, nsnr);
13264
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13265
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13266
0
    } else if (RAW != 0) {
13267
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13268
0
    }
13269
0
    if ((ctxt->node != NULL) && (ctxt->node != node)) {
13270
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13271
0
  ctxt->wellFormed = 0;
13272
0
    }
13273
13274
0
    if (!ctxt->wellFormed) {
13275
0
        if (ctxt->errNo == 0)
13276
0
      ret = XML_ERR_INTERNAL_ERROR;
13277
0
  else
13278
0
      ret = (xmlParserErrors)ctxt->errNo;
13279
0
    } else {
13280
0
        ret = XML_ERR_OK;
13281
0
    }
13282
13283
    /*
13284
     * Return the newly created nodeset after unlinking it from
13285
     * the pseudo sibling.
13286
     */
13287
13288
0
    cur = fake->next;
13289
0
    fake->next = NULL;
13290
0
    node->last = fake;
13291
13292
0
    if (cur != NULL) {
13293
0
  cur->prev = NULL;
13294
0
    }
13295
13296
0
    *lst = cur;
13297
13298
0
    while (cur != NULL) {
13299
0
  cur->parent = NULL;
13300
0
  cur = cur->next;
13301
0
    }
13302
13303
0
    xmlUnlinkNode(fake);
13304
0
    xmlFreeNode(fake);
13305
13306
13307
0
    if (ret != XML_ERR_OK) {
13308
0
        xmlFreeNodeList(*lst);
13309
0
  *lst = NULL;
13310
0
    }
13311
13312
0
    if (doc->dict != NULL)
13313
0
        ctxt->dict = NULL;
13314
0
    xmlFreeParserCtxt(ctxt);
13315
13316
0
    return(ret);
13317
#else /* !SAX2 */
13318
    return(XML_ERR_INTERNAL_ERROR);
13319
#endif
13320
0
}
13321
13322
#ifdef LIBXML_SAX1_ENABLED
13323
/**
13324
 * xmlParseBalancedChunkMemoryRecover:
13325
 * @doc:  the document the chunk pertains to (must not be NULL)
13326
 * @sax:  the SAX handler block (possibly NULL)
13327
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13328
 * @depth:  Used for loop detection, use 0
13329
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13330
 * @lst:  the return value for the set of parsed nodes
13331
 * @recover: return nodes even if the data is broken (use 0)
13332
 *
13333
 *
13334
 * Parse a well-balanced chunk of an XML document
13335
 * called by the parser
13336
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13337
 * the content production in the XML grammar:
13338
 *
13339
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13340
 *
13341
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13342
 *    the parser error code otherwise
13343
 *
13344
 * In case recover is set to 1, the nodelist will not be empty even if
13345
 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13346
 * some extent.
13347
 */
13348
int
13349
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13350
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13351
0
     int recover) {
13352
0
    xmlParserCtxtPtr ctxt;
13353
0
    xmlDocPtr newDoc;
13354
0
    xmlSAXHandlerPtr oldsax = NULL;
13355
0
    xmlNodePtr content, newRoot;
13356
0
    int size;
13357
0
    int ret = 0;
13358
13359
0
    if (depth > 40) {
13360
0
  return(XML_ERR_ENTITY_LOOP);
13361
0
    }
13362
13363
13364
0
    if (lst != NULL)
13365
0
        *lst = NULL;
13366
0
    if (string == NULL)
13367
0
        return(-1);
13368
13369
0
    size = xmlStrlen(string);
13370
13371
0
    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13372
0
    if (ctxt == NULL) return(-1);
13373
0
    ctxt->userData = ctxt;
13374
0
    if (sax != NULL) {
13375
0
  oldsax = ctxt->sax;
13376
0
        ctxt->sax = sax;
13377
0
  if (user_data != NULL)
13378
0
      ctxt->userData = user_data;
13379
0
    }
13380
0
    newDoc = xmlNewDoc(BAD_CAST "1.0");
13381
0
    if (newDoc == NULL) {
13382
0
  xmlFreeParserCtxt(ctxt);
13383
0
  return(-1);
13384
0
    }
13385
0
    newDoc->properties = XML_DOC_INTERNAL;
13386
0
    if ((doc != NULL) && (doc->dict != NULL)) {
13387
0
        xmlDictFree(ctxt->dict);
13388
0
  ctxt->dict = doc->dict;
13389
0
  xmlDictReference(ctxt->dict);
13390
0
  ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13391
0
  ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13392
0
  ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13393
0
  ctxt->dictNames = 1;
13394
0
    } else {
13395
0
  xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13396
0
    }
13397
    /* doc == NULL is only supported for historic reasons */
13398
0
    if (doc != NULL) {
13399
0
  newDoc->intSubset = doc->intSubset;
13400
0
  newDoc->extSubset = doc->extSubset;
13401
0
    }
13402
0
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13403
0
    if (newRoot == NULL) {
13404
0
  if (sax != NULL)
13405
0
      ctxt->sax = oldsax;
13406
0
  xmlFreeParserCtxt(ctxt);
13407
0
  newDoc->intSubset = NULL;
13408
0
  newDoc->extSubset = NULL;
13409
0
        xmlFreeDoc(newDoc);
13410
0
  return(-1);
13411
0
    }
13412
0
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
13413
0
    nodePush(ctxt, newRoot);
13414
    /* doc == NULL is only supported for historic reasons */
13415
0
    if (doc == NULL) {
13416
0
  ctxt->myDoc = newDoc;
13417
0
    } else {
13418
0
  ctxt->myDoc = newDoc;
13419
0
  newDoc->children->doc = doc;
13420
  /* Ensure that doc has XML spec namespace */
13421
0
  xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13422
0
  newDoc->oldNs = doc->oldNs;
13423
0
    }
13424
0
    ctxt->instate = XML_PARSER_CONTENT;
13425
0
    ctxt->input_id = 2;
13426
0
    ctxt->depth = depth;
13427
13428
    /*
13429
     * Doing validity checking on chunk doesn't make sense
13430
     */
13431
0
    ctxt->validate = 0;
13432
0
    ctxt->loadsubset = 0;
13433
0
    xmlDetectSAX2(ctxt);
13434
13435
0
    if ( doc != NULL ){
13436
0
        content = doc->children;
13437
0
        doc->children = NULL;
13438
0
        xmlParseContent(ctxt);
13439
0
        doc->children = content;
13440
0
    }
13441
0
    else {
13442
0
        xmlParseContent(ctxt);
13443
0
    }
13444
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13445
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13446
0
    } else if (RAW != 0) {
13447
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13448
0
    }
13449
0
    if (ctxt->node != newDoc->children) {
13450
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13451
0
    }
13452
13453
0
    if (!ctxt->wellFormed) {
13454
0
        if (ctxt->errNo == 0)
13455
0
      ret = 1;
13456
0
  else
13457
0
      ret = ctxt->errNo;
13458
0
    } else {
13459
0
      ret = 0;
13460
0
    }
13461
13462
0
    if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13463
0
  xmlNodePtr cur;
13464
13465
  /*
13466
   * Return the newly created nodeset after unlinking it from
13467
   * they pseudo parent.
13468
   */
13469
0
  cur = newDoc->children->children;
13470
0
  *lst = cur;
13471
0
  while (cur != NULL) {
13472
0
      xmlSetTreeDoc(cur, doc);
13473
0
      cur->parent = NULL;
13474
0
      cur = cur->next;
13475
0
  }
13476
0
  newDoc->children->children = NULL;
13477
0
    }
13478
13479
0
    if (sax != NULL)
13480
0
  ctxt->sax = oldsax;
13481
0
    xmlFreeParserCtxt(ctxt);
13482
0
    newDoc->intSubset = NULL;
13483
0
    newDoc->extSubset = NULL;
13484
    /* This leaks the namespace list if doc == NULL */
13485
0
    newDoc->oldNs = NULL;
13486
0
    xmlFreeDoc(newDoc);
13487
13488
0
    return(ret);
13489
0
}
13490
13491
/**
13492
 * xmlSAXParseEntity:
13493
 * @sax:  the SAX handler block
13494
 * @filename:  the filename
13495
 *
13496
 * DEPRECATED: Don't use.
13497
 *
13498
 * parse an XML external entity out of context and build a tree.
13499
 * It use the given SAX function block to handle the parsing callback.
13500
 * If sax is NULL, fallback to the default DOM tree building routines.
13501
 *
13502
 * [78] extParsedEnt ::= TextDecl? content
13503
 *
13504
 * This correspond to a "Well Balanced" chunk
13505
 *
13506
 * Returns the resulting document tree
13507
 */
13508
13509
xmlDocPtr
13510
0
xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13511
0
    xmlDocPtr ret;
13512
0
    xmlParserCtxtPtr ctxt;
13513
13514
0
    ctxt = xmlCreateFileParserCtxt(filename);
13515
0
    if (ctxt == NULL) {
13516
0
  return(NULL);
13517
0
    }
13518
0
    if (sax != NULL) {
13519
0
  if (ctxt->sax != NULL)
13520
0
      xmlFree(ctxt->sax);
13521
0
        ctxt->sax = sax;
13522
0
        ctxt->userData = NULL;
13523
0
    }
13524
13525
0
    xmlParseExtParsedEnt(ctxt);
13526
13527
0
    if (ctxt->wellFormed)
13528
0
  ret = ctxt->myDoc;
13529
0
    else {
13530
0
        ret = NULL;
13531
0
        xmlFreeDoc(ctxt->myDoc);
13532
0
        ctxt->myDoc = NULL;
13533
0
    }
13534
0
    if (sax != NULL)
13535
0
        ctxt->sax = NULL;
13536
0
    xmlFreeParserCtxt(ctxt);
13537
13538
0
    return(ret);
13539
0
}
13540
13541
/**
13542
 * xmlParseEntity:
13543
 * @filename:  the filename
13544
 *
13545
 * parse an XML external entity out of context and build a tree.
13546
 *
13547
 * [78] extParsedEnt ::= TextDecl? content
13548
 *
13549
 * This correspond to a "Well Balanced" chunk
13550
 *
13551
 * Returns the resulting document tree
13552
 */
13553
13554
xmlDocPtr
13555
0
xmlParseEntity(const char *filename) {
13556
0
    return(xmlSAXParseEntity(NULL, filename));
13557
0
}
13558
#endif /* LIBXML_SAX1_ENABLED */
13559
13560
/**
13561
 * xmlCreateEntityParserCtxtInternal:
13562
 * @URL:  the entity URL
13563
 * @ID:  the entity PUBLIC ID
13564
 * @base:  a possible base for the target URI
13565
 * @pctx:  parser context used to set options on new context
13566
 *
13567
 * Create a parser context for an external entity
13568
 * Automatic support for ZLIB/Compress compressed document is provided
13569
 * by default if found at compile-time.
13570
 *
13571
 * Returns the new parser context or NULL
13572
 */
13573
static xmlParserCtxtPtr
13574
xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
13575
        const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
13576
0
        xmlParserCtxtPtr pctx) {
13577
0
    xmlParserCtxtPtr ctxt;
13578
0
    xmlParserInputPtr inputStream;
13579
0
    char *directory = NULL;
13580
0
    xmlChar *uri;
13581
13582
0
    ctxt = xmlNewSAXParserCtxt(sax, userData);
13583
0
    if (ctxt == NULL) {
13584
0
  return(NULL);
13585
0
    }
13586
13587
0
    if (pctx != NULL) {
13588
0
        ctxt->options = pctx->options;
13589
0
        ctxt->_private = pctx->_private;
13590
0
  ctxt->input_id = pctx->input_id;
13591
0
    }
13592
13593
    /* Don't read from stdin. */
13594
0
    if (xmlStrcmp(URL, BAD_CAST "-") == 0)
13595
0
        URL = BAD_CAST "./-";
13596
13597
0
    uri = xmlBuildURI(URL, base);
13598
13599
0
    if (uri == NULL) {
13600
0
  inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13601
0
  if (inputStream == NULL) {
13602
0
      xmlFreeParserCtxt(ctxt);
13603
0
      return(NULL);
13604
0
  }
13605
13606
0
  inputPush(ctxt, inputStream);
13607
13608
0
  if ((ctxt->directory == NULL) && (directory == NULL))
13609
0
      directory = xmlParserGetDirectory((char *)URL);
13610
0
  if ((ctxt->directory == NULL) && (directory != NULL))
13611
0
      ctxt->directory = directory;
13612
0
    } else {
13613
0
  inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13614
0
  if (inputStream == NULL) {
13615
0
      xmlFree(uri);
13616
0
      xmlFreeParserCtxt(ctxt);
13617
0
      return(NULL);
13618
0
  }
13619
13620
0
  inputPush(ctxt, inputStream);
13621
13622
0
  if ((ctxt->directory == NULL) && (directory == NULL))
13623
0
      directory = xmlParserGetDirectory((char *)uri);
13624
0
  if ((ctxt->directory == NULL) && (directory != NULL))
13625
0
      ctxt->directory = directory;
13626
0
  xmlFree(uri);
13627
0
    }
13628
0
    return(ctxt);
13629
0
}
13630
13631
/**
13632
 * xmlCreateEntityParserCtxt:
13633
 * @URL:  the entity URL
13634
 * @ID:  the entity PUBLIC ID
13635
 * @base:  a possible base for the target URI
13636
 *
13637
 * Create a parser context for an external entity
13638
 * Automatic support for ZLIB/Compress compressed document is provided
13639
 * by default if found at compile-time.
13640
 *
13641
 * Returns the new parser context or NULL
13642
 */
13643
xmlParserCtxtPtr
13644
xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
13645
0
                    const xmlChar *base) {
13646
0
    return xmlCreateEntityParserCtxtInternal(NULL, NULL, URL, ID, base, NULL);
13647
13648
0
}
13649
13650
/************************************************************************
13651
 *                  *
13652
 *    Front ends when parsing from a file     *
13653
 *                  *
13654
 ************************************************************************/
13655
13656
/**
13657
 * xmlCreateURLParserCtxt:
13658
 * @filename:  the filename or URL
13659
 * @options:  a combination of xmlParserOption
13660
 *
13661
 * Create a parser context for a file or URL content.
13662
 * Automatic support for ZLIB/Compress compressed document is provided
13663
 * by default if found at compile-time and for file accesses
13664
 *
13665
 * Returns the new parser context or NULL
13666
 */
13667
xmlParserCtxtPtr
13668
xmlCreateURLParserCtxt(const char *filename, int options)
13669
0
{
13670
0
    xmlParserCtxtPtr ctxt;
13671
0
    xmlParserInputPtr inputStream;
13672
0
    char *directory = NULL;
13673
13674
0
    ctxt = xmlNewParserCtxt();
13675
0
    if (ctxt == NULL) {
13676
0
  xmlErrMemory(NULL, "cannot allocate parser context");
13677
0
  return(NULL);
13678
0
    }
13679
13680
0
    if (options)
13681
0
  xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13682
0
    ctxt->linenumbers = 1;
13683
13684
0
    inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
13685
0
    if (inputStream == NULL) {
13686
0
  xmlFreeParserCtxt(ctxt);
13687
0
  return(NULL);
13688
0
    }
13689
13690
0
    inputPush(ctxt, inputStream);
13691
0
    if ((ctxt->directory == NULL) && (directory == NULL))
13692
0
        directory = xmlParserGetDirectory(filename);
13693
0
    if ((ctxt->directory == NULL) && (directory != NULL))
13694
0
        ctxt->directory = directory;
13695
13696
0
    return(ctxt);
13697
0
}
13698
13699
/**
13700
 * xmlCreateFileParserCtxt:
13701
 * @filename:  the filename
13702
 *
13703
 * Create a parser context for a file content.
13704
 * Automatic support for ZLIB/Compress compressed document is provided
13705
 * by default if found at compile-time.
13706
 *
13707
 * Returns the new parser context or NULL
13708
 */
13709
xmlParserCtxtPtr
13710
xmlCreateFileParserCtxt(const char *filename)
13711
0
{
13712
0
    return(xmlCreateURLParserCtxt(filename, 0));
13713
0
}
13714
13715
#ifdef LIBXML_SAX1_ENABLED
13716
/**
13717
 * xmlSAXParseFileWithData:
13718
 * @sax:  the SAX handler block
13719
 * @filename:  the filename
13720
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13721
 *             documents
13722
 * @data:  the userdata
13723
 *
13724
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
13725
 *
13726
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13727
 * compressed document is provided by default if found at compile-time.
13728
 * It use the given SAX function block to handle the parsing callback.
13729
 * If sax is NULL, fallback to the default DOM tree building routines.
13730
 *
13731
 * User data (void *) is stored within the parser context in the
13732
 * context's _private member, so it is available nearly everywhere in libxml
13733
 *
13734
 * Returns the resulting document tree
13735
 */
13736
13737
xmlDocPtr
13738
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
13739
0
                        int recovery, void *data) {
13740
0
    xmlDocPtr ret;
13741
0
    xmlParserCtxtPtr ctxt;
13742
13743
0
    xmlInitParser();
13744
13745
0
    ctxt = xmlCreateFileParserCtxt(filename);
13746
0
    if (ctxt == NULL) {
13747
0
  return(NULL);
13748
0
    }
13749
0
    if (sax != NULL) {
13750
0
  if (ctxt->sax != NULL)
13751
0
      xmlFree(ctxt->sax);
13752
0
        ctxt->sax = sax;
13753
0
    }
13754
0
    xmlDetectSAX2(ctxt);
13755
0
    if (data!=NULL) {
13756
0
  ctxt->_private = data;
13757
0
    }
13758
13759
0
    if (ctxt->directory == NULL)
13760
0
        ctxt->directory = xmlParserGetDirectory(filename);
13761
13762
0
    ctxt->recovery = recovery;
13763
13764
0
    xmlParseDocument(ctxt);
13765
13766
0
    if ((ctxt->wellFormed) || recovery) {
13767
0
        ret = ctxt->myDoc;
13768
0
  if ((ret != NULL) && (ctxt->input->buf != NULL)) {
13769
0
      if (ctxt->input->buf->compressed > 0)
13770
0
    ret->compression = 9;
13771
0
      else
13772
0
    ret->compression = ctxt->input->buf->compressed;
13773
0
  }
13774
0
    }
13775
0
    else {
13776
0
       ret = NULL;
13777
0
       xmlFreeDoc(ctxt->myDoc);
13778
0
       ctxt->myDoc = NULL;
13779
0
    }
13780
0
    if (sax != NULL)
13781
0
        ctxt->sax = NULL;
13782
0
    xmlFreeParserCtxt(ctxt);
13783
13784
0
    return(ret);
13785
0
}
13786
13787
/**
13788
 * xmlSAXParseFile:
13789
 * @sax:  the SAX handler block
13790
 * @filename:  the filename
13791
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13792
 *             documents
13793
 *
13794
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
13795
 *
13796
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13797
 * compressed document is provided by default if found at compile-time.
13798
 * It use the given SAX function block to handle the parsing callback.
13799
 * If sax is NULL, fallback to the default DOM tree building routines.
13800
 *
13801
 * Returns the resulting document tree
13802
 */
13803
13804
xmlDocPtr
13805
xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
13806
0
                          int recovery) {
13807
0
    return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
13808
0
}
13809
13810
/**
13811
 * xmlRecoverDoc:
13812
 * @cur:  a pointer to an array of xmlChar
13813
 *
13814
 * DEPRECATED: Use xmlReadDoc with XML_PARSE_RECOVER.
13815
 *
13816
 * parse an XML in-memory document and build a tree.
13817
 * In the case the document is not Well Formed, a attempt to build a
13818
 * tree is tried anyway
13819
 *
13820
 * Returns the resulting document tree or NULL in case of failure
13821
 */
13822
13823
xmlDocPtr
13824
0
xmlRecoverDoc(const xmlChar *cur) {
13825
0
    return(xmlSAXParseDoc(NULL, cur, 1));
13826
0
}
13827
13828
/**
13829
 * xmlParseFile:
13830
 * @filename:  the filename
13831
 *
13832
 * DEPRECATED: Use xmlReadFile.
13833
 *
13834
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13835
 * compressed document is provided by default if found at compile-time.
13836
 *
13837
 * Returns the resulting document tree if the file was wellformed,
13838
 * NULL otherwise.
13839
 */
13840
13841
xmlDocPtr
13842
0
xmlParseFile(const char *filename) {
13843
0
    return(xmlSAXParseFile(NULL, filename, 0));
13844
0
}
13845
13846
/**
13847
 * xmlRecoverFile:
13848
 * @filename:  the filename
13849
 *
13850
 * DEPRECATED: Use xmlReadFile with XML_PARSE_RECOVER.
13851
 *
13852
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13853
 * compressed document is provided by default if found at compile-time.
13854
 * In the case the document is not Well Formed, it attempts to build
13855
 * a tree anyway
13856
 *
13857
 * Returns the resulting document tree or NULL in case of failure
13858
 */
13859
13860
xmlDocPtr
13861
0
xmlRecoverFile(const char *filename) {
13862
0
    return(xmlSAXParseFile(NULL, filename, 1));
13863
0
}
13864
13865
13866
/**
13867
 * xmlSetupParserForBuffer:
13868
 * @ctxt:  an XML parser context
13869
 * @buffer:  a xmlChar * buffer
13870
 * @filename:  a file name
13871
 *
13872
 * DEPRECATED: Don't use.
13873
 *
13874
 * Setup the parser context to parse a new buffer; Clears any prior
13875
 * contents from the parser context. The buffer parameter must not be
13876
 * NULL, but the filename parameter can be
13877
 */
13878
void
13879
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
13880
                             const char* filename)
13881
0
{
13882
0
    xmlParserInputPtr input;
13883
13884
0
    if ((ctxt == NULL) || (buffer == NULL))
13885
0
        return;
13886
13887
0
    input = xmlNewInputStream(ctxt);
13888
0
    if (input == NULL) {
13889
0
        xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
13890
0
        xmlClearParserCtxt(ctxt);
13891
0
        return;
13892
0
    }
13893
13894
0
    xmlClearParserCtxt(ctxt);
13895
0
    if (filename != NULL)
13896
0
        input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
13897
0
    input->base = buffer;
13898
0
    input->cur = buffer;
13899
0
    input->end = &buffer[xmlStrlen(buffer)];
13900
0
    inputPush(ctxt, input);
13901
0
}
13902
13903
/**
13904
 * xmlSAXUserParseFile:
13905
 * @sax:  a SAX handler
13906
 * @user_data:  The user data returned on SAX callbacks
13907
 * @filename:  a file name
13908
 *
13909
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
13910
 *
13911
 * parse an XML file and call the given SAX handler routines.
13912
 * Automatic support for ZLIB/Compress compressed document is provided
13913
 *
13914
 * Returns 0 in case of success or a error number otherwise
13915
 */
13916
int
13917
xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
13918
0
                    const char *filename) {
13919
0
    int ret = 0;
13920
0
    xmlParserCtxtPtr ctxt;
13921
13922
0
    ctxt = xmlCreateFileParserCtxt(filename);
13923
0
    if (ctxt == NULL) return -1;
13924
0
    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
13925
0
  xmlFree(ctxt->sax);
13926
0
    ctxt->sax = sax;
13927
0
    xmlDetectSAX2(ctxt);
13928
13929
0
    if (user_data != NULL)
13930
0
  ctxt->userData = user_data;
13931
13932
0
    xmlParseDocument(ctxt);
13933
13934
0
    if (ctxt->wellFormed)
13935
0
  ret = 0;
13936
0
    else {
13937
0
        if (ctxt->errNo != 0)
13938
0
      ret = ctxt->errNo;
13939
0
  else
13940
0
      ret = -1;
13941
0
    }
13942
0
    if (sax != NULL)
13943
0
  ctxt->sax = NULL;
13944
0
    if (ctxt->myDoc != NULL) {
13945
0
        xmlFreeDoc(ctxt->myDoc);
13946
0
  ctxt->myDoc = NULL;
13947
0
    }
13948
0
    xmlFreeParserCtxt(ctxt);
13949
13950
0
    return ret;
13951
0
}
13952
#endif /* LIBXML_SAX1_ENABLED */
13953
13954
/************************************************************************
13955
 *                  *
13956
 *    Front ends when parsing from memory     *
13957
 *                  *
13958
 ************************************************************************/
13959
13960
/**
13961
 * xmlCreateMemoryParserCtxt:
13962
 * @buffer:  a pointer to a char array
13963
 * @size:  the size of the array
13964
 *
13965
 * Create a parser context for an XML in-memory document.
13966
 *
13967
 * Returns the new parser context or NULL
13968
 */
13969
xmlParserCtxtPtr
13970
0
xmlCreateMemoryParserCtxt(const char *buffer, int size) {
13971
0
    xmlParserCtxtPtr ctxt;
13972
0
    xmlParserInputPtr input;
13973
0
    xmlParserInputBufferPtr buf;
13974
13975
0
    if (buffer == NULL)
13976
0
  return(NULL);
13977
0
    if (size <= 0)
13978
0
  return(NULL);
13979
13980
0
    ctxt = xmlNewParserCtxt();
13981
0
    if (ctxt == NULL)
13982
0
  return(NULL);
13983
13984
0
    buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
13985
0
    if (buf == NULL) {
13986
0
  xmlFreeParserCtxt(ctxt);
13987
0
  return(NULL);
13988
0
    }
13989
13990
0
    input = xmlNewInputStream(ctxt);
13991
0
    if (input == NULL) {
13992
0
  xmlFreeParserInputBuffer(buf);
13993
0
  xmlFreeParserCtxt(ctxt);
13994
0
  return(NULL);
13995
0
    }
13996
13997
0
    input->filename = NULL;
13998
0
    input->buf = buf;
13999
0
    xmlBufResetInput(input->buf->buffer, input);
14000
14001
0
    inputPush(ctxt, input);
14002
0
    return(ctxt);
14003
0
}
14004
14005
#ifdef LIBXML_SAX1_ENABLED
14006
/**
14007
 * xmlSAXParseMemoryWithData:
14008
 * @sax:  the SAX handler block
14009
 * @buffer:  an pointer to a char array
14010
 * @size:  the size of the array
14011
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14012
 *             documents
14013
 * @data:  the userdata
14014
 *
14015
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14016
 *
14017
 * parse an XML in-memory block and use the given SAX function block
14018
 * to handle the parsing callback. If sax is NULL, fallback to the default
14019
 * DOM tree building routines.
14020
 *
14021
 * User data (void *) is stored within the parser context in the
14022
 * context's _private member, so it is available nearly everywhere in libxml
14023
 *
14024
 * Returns the resulting document tree
14025
 */
14026
14027
xmlDocPtr
14028
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14029
0
            int size, int recovery, void *data) {
14030
0
    xmlDocPtr ret;
14031
0
    xmlParserCtxtPtr ctxt;
14032
14033
0
    xmlInitParser();
14034
14035
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14036
0
    if (ctxt == NULL) return(NULL);
14037
0
    if (sax != NULL) {
14038
0
  if (ctxt->sax != NULL)
14039
0
      xmlFree(ctxt->sax);
14040
0
        ctxt->sax = sax;
14041
0
    }
14042
0
    xmlDetectSAX2(ctxt);
14043
0
    if (data!=NULL) {
14044
0
  ctxt->_private=data;
14045
0
    }
14046
14047
0
    ctxt->recovery = recovery;
14048
14049
0
    xmlParseDocument(ctxt);
14050
14051
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14052
0
    else {
14053
0
       ret = NULL;
14054
0
       xmlFreeDoc(ctxt->myDoc);
14055
0
       ctxt->myDoc = NULL;
14056
0
    }
14057
0
    if (sax != NULL)
14058
0
  ctxt->sax = NULL;
14059
0
    xmlFreeParserCtxt(ctxt);
14060
14061
0
    return(ret);
14062
0
}
14063
14064
/**
14065
 * xmlSAXParseMemory:
14066
 * @sax:  the SAX handler block
14067
 * @buffer:  an pointer to a char array
14068
 * @size:  the size of the array
14069
 * @recovery:  work in recovery mode, i.e. tries to read not Well Formed
14070
 *             documents
14071
 *
14072
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14073
 *
14074
 * parse an XML in-memory block and use the given SAX function block
14075
 * to handle the parsing callback. If sax is NULL, fallback to the default
14076
 * DOM tree building routines.
14077
 *
14078
 * Returns the resulting document tree
14079
 */
14080
xmlDocPtr
14081
xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14082
0
            int size, int recovery) {
14083
0
    return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14084
0
}
14085
14086
/**
14087
 * xmlParseMemory:
14088
 * @buffer:  an pointer to a char array
14089
 * @size:  the size of the array
14090
 *
14091
 * DEPRECATED: Use xmlReadMemory.
14092
 *
14093
 * parse an XML in-memory block and build a tree.
14094
 *
14095
 * Returns the resulting document tree
14096
 */
14097
14098
0
xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14099
0
   return(xmlSAXParseMemory(NULL, buffer, size, 0));
14100
0
}
14101
14102
/**
14103
 * xmlRecoverMemory:
14104
 * @buffer:  an pointer to a char array
14105
 * @size:  the size of the array
14106
 *
14107
 * DEPRECATED: Use xmlReadMemory with XML_PARSE_RECOVER.
14108
 *
14109
 * parse an XML in-memory block and build a tree.
14110
 * In the case the document is not Well Formed, an attempt to
14111
 * build a tree is tried anyway
14112
 *
14113
 * Returns the resulting document tree or NULL in case of error
14114
 */
14115
14116
0
xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14117
0
   return(xmlSAXParseMemory(NULL, buffer, size, 1));
14118
0
}
14119
14120
/**
14121
 * xmlSAXUserParseMemory:
14122
 * @sax:  a SAX handler
14123
 * @user_data:  The user data returned on SAX callbacks
14124
 * @buffer:  an in-memory XML document input
14125
 * @size:  the length of the XML document in bytes
14126
 *
14127
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14128
 *
14129
 * parse an XML in-memory buffer and call the given SAX handler routines.
14130
 *
14131
 * Returns 0 in case of success or a error number otherwise
14132
 */
14133
int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14134
0
        const char *buffer, int size) {
14135
0
    int ret = 0;
14136
0
    xmlParserCtxtPtr ctxt;
14137
14138
0
    xmlInitParser();
14139
14140
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14141
0
    if (ctxt == NULL) return -1;
14142
0
    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14143
0
        xmlFree(ctxt->sax);
14144
0
    ctxt->sax = sax;
14145
0
    xmlDetectSAX2(ctxt);
14146
14147
0
    if (user_data != NULL)
14148
0
  ctxt->userData = user_data;
14149
14150
0
    xmlParseDocument(ctxt);
14151
14152
0
    if (ctxt->wellFormed)
14153
0
  ret = 0;
14154
0
    else {
14155
0
        if (ctxt->errNo != 0)
14156
0
      ret = ctxt->errNo;
14157
0
  else
14158
0
      ret = -1;
14159
0
    }
14160
0
    if (sax != NULL)
14161
0
        ctxt->sax = NULL;
14162
0
    if (ctxt->myDoc != NULL) {
14163
0
        xmlFreeDoc(ctxt->myDoc);
14164
0
  ctxt->myDoc = NULL;
14165
0
    }
14166
0
    xmlFreeParserCtxt(ctxt);
14167
14168
0
    return ret;
14169
0
}
14170
#endif /* LIBXML_SAX1_ENABLED */
14171
14172
/**
14173
 * xmlCreateDocParserCtxt:
14174
 * @cur:  a pointer to an array of xmlChar
14175
 *
14176
 * Creates a parser context for an XML in-memory document.
14177
 *
14178
 * Returns the new parser context or NULL
14179
 */
14180
xmlParserCtxtPtr
14181
0
xmlCreateDocParserCtxt(const xmlChar *cur) {
14182
0
    int len;
14183
14184
0
    if (cur == NULL)
14185
0
  return(NULL);
14186
0
    len = xmlStrlen(cur);
14187
0
    return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14188
0
}
14189
14190
#ifdef LIBXML_SAX1_ENABLED
14191
/**
14192
 * xmlSAXParseDoc:
14193
 * @sax:  the SAX handler block
14194
 * @cur:  a pointer to an array of xmlChar
14195
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14196
 *             documents
14197
 *
14198
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadDoc.
14199
 *
14200
 * parse an XML in-memory document and build a tree.
14201
 * It use the given SAX function block to handle the parsing callback.
14202
 * If sax is NULL, fallback to the default DOM tree building routines.
14203
 *
14204
 * Returns the resulting document tree
14205
 */
14206
14207
xmlDocPtr
14208
0
xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14209
0
    xmlDocPtr ret;
14210
0
    xmlParserCtxtPtr ctxt;
14211
0
    xmlSAXHandlerPtr oldsax = NULL;
14212
14213
0
    if (cur == NULL) return(NULL);
14214
14215
14216
0
    ctxt = xmlCreateDocParserCtxt(cur);
14217
0
    if (ctxt == NULL) return(NULL);
14218
0
    if (sax != NULL) {
14219
0
        oldsax = ctxt->sax;
14220
0
        ctxt->sax = sax;
14221
0
        ctxt->userData = NULL;
14222
0
    }
14223
0
    xmlDetectSAX2(ctxt);
14224
14225
0
    xmlParseDocument(ctxt);
14226
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14227
0
    else {
14228
0
       ret = NULL;
14229
0
       xmlFreeDoc(ctxt->myDoc);
14230
0
       ctxt->myDoc = NULL;
14231
0
    }
14232
0
    if (sax != NULL)
14233
0
  ctxt->sax = oldsax;
14234
0
    xmlFreeParserCtxt(ctxt);
14235
14236
0
    return(ret);
14237
0
}
14238
14239
/**
14240
 * xmlParseDoc:
14241
 * @cur:  a pointer to an array of xmlChar
14242
 *
14243
 * DEPRECATED: Use xmlReadDoc.
14244
 *
14245
 * parse an XML in-memory document and build a tree.
14246
 *
14247
 * Returns the resulting document tree
14248
 */
14249
14250
xmlDocPtr
14251
0
xmlParseDoc(const xmlChar *cur) {
14252
0
    return(xmlSAXParseDoc(NULL, cur, 0));
14253
0
}
14254
#endif /* LIBXML_SAX1_ENABLED */
14255
14256
#ifdef LIBXML_LEGACY_ENABLED
14257
/************************************************************************
14258
 *                  *
14259
 *  Specific function to keep track of entities references    *
14260
 *  and used by the XSLT debugger         *
14261
 *                  *
14262
 ************************************************************************/
14263
14264
static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14265
14266
/**
14267
 * xmlAddEntityReference:
14268
 * @ent : A valid entity
14269
 * @firstNode : A valid first node for children of entity
14270
 * @lastNode : A valid last node of children entity
14271
 *
14272
 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14273
 */
14274
static void
14275
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14276
                      xmlNodePtr lastNode)
14277
{
14278
    if (xmlEntityRefFunc != NULL) {
14279
        (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14280
    }
14281
}
14282
14283
14284
/**
14285
 * xmlSetEntityReferenceFunc:
14286
 * @func: A valid function
14287
 *
14288
 * Set the function to call call back when a xml reference has been made
14289
 */
14290
void
14291
xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14292
{
14293
    xmlEntityRefFunc = func;
14294
}
14295
#endif /* LIBXML_LEGACY_ENABLED */
14296
14297
/************************************************************************
14298
 *                  *
14299
 *        Miscellaneous       *
14300
 *                  *
14301
 ************************************************************************/
14302
14303
static int xmlParserInitialized = 0;
14304
14305
/**
14306
 * xmlInitParser:
14307
 *
14308
 * Initialization function for the XML parser.
14309
 * This is not reentrant. Call once before processing in case of
14310
 * use in multithreaded programs.
14311
 */
14312
14313
void
14314
2.39M
xmlInitParser(void) {
14315
    /*
14316
     * Note that the initialization code must not make memory allocations.
14317
     */
14318
2.39M
    if (xmlParserInitialized != 0)
14319
2.39M
  return;
14320
14321
1
#ifdef LIBXML_THREAD_ENABLED
14322
1
    __xmlGlobalInitMutexLock();
14323
1
    if (xmlParserInitialized == 0) {
14324
1
#endif
14325
#if defined(_WIN32) && (!defined(LIBXML_STATIC) || defined(LIBXML_STATIC_FOR_DLL))
14326
        if (xmlFree == free)
14327
            atexit(xmlCleanupParser);
14328
#endif
14329
14330
1
  xmlInitThreadsInternal();
14331
1
  xmlInitGlobalsInternal();
14332
1
  xmlInitMemoryInternal();
14333
1
        __xmlInitializeDict();
14334
1
  xmlInitEncodingInternal();
14335
1
  xmlRegisterDefaultInputCallbacks();
14336
1
#ifdef LIBXML_OUTPUT_ENABLED
14337
1
  xmlRegisterDefaultOutputCallbacks();
14338
1
#endif /* LIBXML_OUTPUT_ENABLED */
14339
1
#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
14340
1
  xmlInitXPathInternal();
14341
1
#endif
14342
1
  xmlParserInitialized = 1;
14343
1
#ifdef LIBXML_THREAD_ENABLED
14344
1
    }
14345
1
    __xmlGlobalInitMutexUnlock();
14346
1
#endif
14347
1
}
14348
14349
/**
14350
 * xmlCleanupParser:
14351
 *
14352
 * This function name is somewhat misleading. It does not clean up
14353
 * parser state, it cleans up memory allocated by the library itself.
14354
 * It is a cleanup function for the XML library. It tries to reclaim all
14355
 * related global memory allocated for the library processing.
14356
 * It doesn't deallocate any document related memory. One should
14357
 * call xmlCleanupParser() only when the process has finished using
14358
 * the library and all XML/HTML documents built with it.
14359
 * See also xmlInitParser() which has the opposite function of preparing
14360
 * the library for operations.
14361
 *
14362
 * WARNING: if your application is multithreaded or has plugin support
14363
 *          calling this may crash the application if another thread or
14364
 *          a plugin is still using libxml2. It's sometimes very hard to
14365
 *          guess if libxml2 is in use in the application, some libraries
14366
 *          or plugins may use it without notice. In case of doubt abstain
14367
 *          from calling this function or do it just before calling exit()
14368
 *          to avoid leak reports from valgrind !
14369
 */
14370
14371
void
14372
0
xmlCleanupParser(void) {
14373
0
    if (!xmlParserInitialized)
14374
0
  return;
14375
14376
0
    xmlCleanupCharEncodingHandlers();
14377
0
#ifdef LIBXML_CATALOG_ENABLED
14378
0
    xmlCatalogCleanup();
14379
0
#endif
14380
0
    xmlCleanupDictInternal();
14381
0
    xmlCleanupInputCallbacks();
14382
0
#ifdef LIBXML_OUTPUT_ENABLED
14383
0
    xmlCleanupOutputCallbacks();
14384
0
#endif
14385
0
#ifdef LIBXML_SCHEMAS_ENABLED
14386
0
    xmlSchemaCleanupTypes();
14387
0
    xmlRelaxNGCleanupTypes();
14388
0
#endif
14389
0
    xmlCleanupGlobalsInternal();
14390
0
    xmlCleanupThreadsInternal();
14391
0
    xmlCleanupMemoryInternal();
14392
0
    xmlParserInitialized = 0;
14393
0
}
14394
14395
#if defined(HAVE_ATTRIBUTE_DESTRUCTOR) && !defined(LIBXML_STATIC) && \
14396
    !defined(_WIN32)
14397
static void
14398
ATTRIBUTE_DESTRUCTOR
14399
xmlDestructor(void) {
14400
    /*
14401
     * Calling custom deallocation functions in a destructor can cause
14402
     * problems, for example with Nokogiri.
14403
     */
14404
    if (xmlFree == free)
14405
        xmlCleanupParser();
14406
}
14407
#endif
14408
14409
/************************************************************************
14410
 *                  *
14411
 *  New set (2.6.0) of simpler and more flexible APIs   *
14412
 *                  *
14413
 ************************************************************************/
14414
14415
/**
14416
 * DICT_FREE:
14417
 * @str:  a string
14418
 *
14419
 * Free a string if it is not owned by the "dict" dictionary in the
14420
 * current scope
14421
 */
14422
#define DICT_FREE(str)            \
14423
0
  if ((str) && ((!dict) ||       \
14424
0
      (xmlDictOwns(dict, (const xmlChar *)(str)) == 0)))  \
14425
0
      xmlFree((char *)(str));
14426
14427
/**
14428
 * xmlCtxtReset:
14429
 * @ctxt: an XML parser context
14430
 *
14431
 * Reset a parser context
14432
 */
14433
void
14434
xmlCtxtReset(xmlParserCtxtPtr ctxt)
14435
0
{
14436
0
    xmlParserInputPtr input;
14437
0
    xmlDictPtr dict;
14438
14439
0
    if (ctxt == NULL)
14440
0
        return;
14441
14442
0
    dict = ctxt->dict;
14443
14444
0
    while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14445
0
        xmlFreeInputStream(input);
14446
0
    }
14447
0
    ctxt->inputNr = 0;
14448
0
    ctxt->input = NULL;
14449
14450
0
    ctxt->spaceNr = 0;
14451
0
    if (ctxt->spaceTab != NULL) {
14452
0
  ctxt->spaceTab[0] = -1;
14453
0
  ctxt->space = &ctxt->spaceTab[0];
14454
0
    } else {
14455
0
        ctxt->space = NULL;
14456
0
    }
14457
14458
14459
0
    ctxt->nodeNr = 0;
14460
0
    ctxt->node = NULL;
14461
14462
0
    ctxt->nameNr = 0;
14463
0
    ctxt->name = NULL;
14464
14465
0
    ctxt->nsNr = 0;
14466
14467
0
    DICT_FREE(ctxt->version);
14468
0
    ctxt->version = NULL;
14469
0
    DICT_FREE(ctxt->encoding);
14470
0
    ctxt->encoding = NULL;
14471
0
    DICT_FREE(ctxt->directory);
14472
0
    ctxt->directory = NULL;
14473
0
    DICT_FREE(ctxt->extSubURI);
14474
0
    ctxt->extSubURI = NULL;
14475
0
    DICT_FREE(ctxt->extSubSystem);
14476
0
    ctxt->extSubSystem = NULL;
14477
0
    if (ctxt->myDoc != NULL)
14478
0
        xmlFreeDoc(ctxt->myDoc);
14479
0
    ctxt->myDoc = NULL;
14480
14481
0
    ctxt->standalone = -1;
14482
0
    ctxt->hasExternalSubset = 0;
14483
0
    ctxt->hasPErefs = 0;
14484
0
    ctxt->html = 0;
14485
0
    ctxt->external = 0;
14486
0
    ctxt->instate = XML_PARSER_START;
14487
0
    ctxt->token = 0;
14488
14489
0
    ctxt->wellFormed = 1;
14490
0
    ctxt->nsWellFormed = 1;
14491
0
    ctxt->disableSAX = 0;
14492
0
    ctxt->valid = 1;
14493
#if 0
14494
    ctxt->vctxt.userData = ctxt;
14495
    ctxt->vctxt.error = xmlParserValidityError;
14496
    ctxt->vctxt.warning = xmlParserValidityWarning;
14497
#endif
14498
0
    ctxt->record_info = 0;
14499
0
    ctxt->checkIndex = 0;
14500
0
    ctxt->endCheckState = 0;
14501
0
    ctxt->inSubset = 0;
14502
0
    ctxt->errNo = XML_ERR_OK;
14503
0
    ctxt->depth = 0;
14504
0
    ctxt->charset = XML_CHAR_ENCODING_UTF8;
14505
0
    ctxt->catalogs = NULL;
14506
0
    ctxt->sizeentities = 0;
14507
0
    ctxt->sizeentcopy = 0;
14508
0
    xmlInitNodeInfoSeq(&ctxt->node_seq);
14509
14510
0
    if (ctxt->attsDefault != NULL) {
14511
0
        xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
14512
0
        ctxt->attsDefault = NULL;
14513
0
    }
14514
0
    if (ctxt->attsSpecial != NULL) {
14515
0
        xmlHashFree(ctxt->attsSpecial, NULL);
14516
0
        ctxt->attsSpecial = NULL;
14517
0
    }
14518
14519
0
#ifdef LIBXML_CATALOG_ENABLED
14520
0
    if (ctxt->catalogs != NULL)
14521
0
  xmlCatalogFreeLocal(ctxt->catalogs);
14522
0
#endif
14523
0
    ctxt->nbErrors = 0;
14524
0
    ctxt->nbWarnings = 0;
14525
0
    if (ctxt->lastError.code != XML_ERR_OK)
14526
0
        xmlResetError(&ctxt->lastError);
14527
0
}
14528
14529
/**
14530
 * xmlCtxtResetPush:
14531
 * @ctxt: an XML parser context
14532
 * @chunk:  a pointer to an array of chars
14533
 * @size:  number of chars in the array
14534
 * @filename:  an optional file name or URI
14535
 * @encoding:  the document encoding, or NULL
14536
 *
14537
 * Reset a push parser context
14538
 *
14539
 * Returns 0 in case of success and 1 in case of error
14540
 */
14541
int
14542
xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14543
                 int size, const char *filename, const char *encoding)
14544
0
{
14545
0
    xmlParserInputPtr inputStream;
14546
0
    xmlParserInputBufferPtr buf;
14547
0
    xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14548
14549
0
    if (ctxt == NULL)
14550
0
        return(1);
14551
14552
0
    if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14553
0
        enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14554
14555
0
    buf = xmlAllocParserInputBuffer(enc);
14556
0
    if (buf == NULL)
14557
0
        return(1);
14558
14559
0
    if (ctxt == NULL) {
14560
0
        xmlFreeParserInputBuffer(buf);
14561
0
        return(1);
14562
0
    }
14563
14564
0
    xmlCtxtReset(ctxt);
14565
14566
0
    if (filename == NULL) {
14567
0
        ctxt->directory = NULL;
14568
0
    } else {
14569
0
        ctxt->directory = xmlParserGetDirectory(filename);
14570
0
    }
14571
14572
0
    inputStream = xmlNewInputStream(ctxt);
14573
0
    if (inputStream == NULL) {
14574
0
        xmlFreeParserInputBuffer(buf);
14575
0
        return(1);
14576
0
    }
14577
14578
0
    if (filename == NULL)
14579
0
        inputStream->filename = NULL;
14580
0
    else
14581
0
        inputStream->filename = (char *)
14582
0
            xmlCanonicPath((const xmlChar *) filename);
14583
0
    inputStream->buf = buf;
14584
0
    xmlBufResetInput(buf->buffer, inputStream);
14585
14586
0
    inputPush(ctxt, inputStream);
14587
14588
0
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14589
0
        (ctxt->input->buf != NULL)) {
14590
0
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14591
0
        size_t cur = ctxt->input->cur - ctxt->input->base;
14592
14593
0
        xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14594
14595
0
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
14596
#ifdef DEBUG_PUSH
14597
        xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14598
#endif
14599
0
    }
14600
14601
0
    if (encoding != NULL) {
14602
0
        xmlCharEncodingHandlerPtr hdlr;
14603
14604
0
        if (ctxt->encoding != NULL)
14605
0
      xmlFree((xmlChar *) ctxt->encoding);
14606
0
        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14607
14608
0
        hdlr = xmlFindCharEncodingHandler(encoding);
14609
0
        if (hdlr != NULL) {
14610
0
            xmlSwitchToEncoding(ctxt, hdlr);
14611
0
  } else {
14612
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14613
0
            "Unsupported encoding %s\n", BAD_CAST encoding);
14614
0
        }
14615
0
    } else if (enc != XML_CHAR_ENCODING_NONE) {
14616
0
        xmlSwitchEncoding(ctxt, enc);
14617
0
    }
14618
14619
0
    return(0);
14620
0
}
14621
14622
14623
/**
14624
 * xmlCtxtUseOptionsInternal:
14625
 * @ctxt: an XML parser context
14626
 * @options:  a combination of xmlParserOption
14627
 * @encoding:  the user provided encoding to use
14628
 *
14629
 * Applies the options to the parser context
14630
 *
14631
 * Returns 0 in case of success, the set of unknown or unimplemented options
14632
 *         in case of error.
14633
 */
14634
static int
14635
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
14636
9.88k
{
14637
9.88k
    if (ctxt == NULL)
14638
0
        return(-1);
14639
9.88k
    if (encoding != NULL) {
14640
0
        if (ctxt->encoding != NULL)
14641
0
      xmlFree((xmlChar *) ctxt->encoding);
14642
0
        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14643
0
    }
14644
9.88k
    if (options & XML_PARSE_RECOVER) {
14645
0
        ctxt->recovery = 1;
14646
0
        options -= XML_PARSE_RECOVER;
14647
0
  ctxt->options |= XML_PARSE_RECOVER;
14648
0
    } else
14649
9.88k
        ctxt->recovery = 0;
14650
9.88k
    if (options & XML_PARSE_DTDLOAD) {
14651
0
        ctxt->loadsubset = XML_DETECT_IDS;
14652
0
        options -= XML_PARSE_DTDLOAD;
14653
0
  ctxt->options |= XML_PARSE_DTDLOAD;
14654
0
    } else
14655
9.88k
        ctxt->loadsubset = 0;
14656
9.88k
    if (options & XML_PARSE_DTDATTR) {
14657
0
        ctxt->loadsubset |= XML_COMPLETE_ATTRS;
14658
0
        options -= XML_PARSE_DTDATTR;
14659
0
  ctxt->options |= XML_PARSE_DTDATTR;
14660
0
    }
14661
9.88k
    if (options & XML_PARSE_NOENT) {
14662
0
        ctxt->replaceEntities = 1;
14663
        /* ctxt->loadsubset |= XML_DETECT_IDS; */
14664
0
        options -= XML_PARSE_NOENT;
14665
0
  ctxt->options |= XML_PARSE_NOENT;
14666
0
    } else
14667
9.88k
        ctxt->replaceEntities = 0;
14668
9.88k
    if (options & XML_PARSE_PEDANTIC) {
14669
0
        ctxt->pedantic = 1;
14670
0
        options -= XML_PARSE_PEDANTIC;
14671
0
  ctxt->options |= XML_PARSE_PEDANTIC;
14672
0
    } else
14673
9.88k
        ctxt->pedantic = 0;
14674
9.88k
    if (options & XML_PARSE_NOBLANKS) {
14675
9.88k
        ctxt->keepBlanks = 0;
14676
9.88k
        ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
14677
9.88k
        options -= XML_PARSE_NOBLANKS;
14678
9.88k
  ctxt->options |= XML_PARSE_NOBLANKS;
14679
9.88k
    } else
14680
0
        ctxt->keepBlanks = 1;
14681
9.88k
    if (options & XML_PARSE_DTDVALID) {
14682
0
        ctxt->validate = 1;
14683
0
        if (options & XML_PARSE_NOWARNING)
14684
0
            ctxt->vctxt.warning = NULL;
14685
0
        if (options & XML_PARSE_NOERROR)
14686
0
            ctxt->vctxt.error = NULL;
14687
0
        options -= XML_PARSE_DTDVALID;
14688
0
  ctxt->options |= XML_PARSE_DTDVALID;
14689
0
    } else
14690
9.88k
        ctxt->validate = 0;
14691
9.88k
    if (options & XML_PARSE_NOWARNING) {
14692
0
        ctxt->sax->warning = NULL;
14693
0
        options -= XML_PARSE_NOWARNING;
14694
0
    }
14695
9.88k
    if (options & XML_PARSE_NOERROR) {
14696
0
        ctxt->sax->error = NULL;
14697
0
        ctxt->sax->fatalError = NULL;
14698
0
        options -= XML_PARSE_NOERROR;
14699
0
    }
14700
9.88k
#ifdef LIBXML_SAX1_ENABLED
14701
9.88k
    if (options & XML_PARSE_SAX1) {
14702
0
        ctxt->sax->startElement = xmlSAX2StartElement;
14703
0
        ctxt->sax->endElement = xmlSAX2EndElement;
14704
0
        ctxt->sax->startElementNs = NULL;
14705
0
        ctxt->sax->endElementNs = NULL;
14706
0
        ctxt->sax->initialized = 1;
14707
0
        options -= XML_PARSE_SAX1;
14708
0
  ctxt->options |= XML_PARSE_SAX1;
14709
0
    }
14710
9.88k
#endif /* LIBXML_SAX1_ENABLED */
14711
9.88k
    if (options & XML_PARSE_NODICT) {
14712
0
        ctxt->dictNames = 0;
14713
0
        options -= XML_PARSE_NODICT;
14714
0
  ctxt->options |= XML_PARSE_NODICT;
14715
9.88k
    } else {
14716
9.88k
        ctxt->dictNames = 1;
14717
9.88k
    }
14718
9.88k
    if (options & XML_PARSE_NOCDATA) {
14719
9.88k
        ctxt->sax->cdataBlock = NULL;
14720
9.88k
        options -= XML_PARSE_NOCDATA;
14721
9.88k
  ctxt->options |= XML_PARSE_NOCDATA;
14722
9.88k
    }
14723
9.88k
    if (options & XML_PARSE_NSCLEAN) {
14724
9.88k
  ctxt->options |= XML_PARSE_NSCLEAN;
14725
9.88k
        options -= XML_PARSE_NSCLEAN;
14726
9.88k
    }
14727
9.88k
    if (options & XML_PARSE_NONET) {
14728
9.88k
  ctxt->options |= XML_PARSE_NONET;
14729
9.88k
        options -= XML_PARSE_NONET;
14730
9.88k
    }
14731
9.88k
    if (options & XML_PARSE_COMPACT) {
14732
0
  ctxt->options |= XML_PARSE_COMPACT;
14733
0
        options -= XML_PARSE_COMPACT;
14734
0
    }
14735
9.88k
    if (options & XML_PARSE_OLD10) {
14736
0
  ctxt->options |= XML_PARSE_OLD10;
14737
0
        options -= XML_PARSE_OLD10;
14738
0
    }
14739
9.88k
    if (options & XML_PARSE_NOBASEFIX) {
14740
0
  ctxt->options |= XML_PARSE_NOBASEFIX;
14741
0
        options -= XML_PARSE_NOBASEFIX;
14742
0
    }
14743
9.88k
    if (options & XML_PARSE_HUGE) {
14744
9.88k
  ctxt->options |= XML_PARSE_HUGE;
14745
9.88k
        options -= XML_PARSE_HUGE;
14746
9.88k
        if (ctxt->dict != NULL)
14747
9.88k
            xmlDictSetLimit(ctxt->dict, 0);
14748
9.88k
    }
14749
9.88k
    if (options & XML_PARSE_OLDSAX) {
14750
0
  ctxt->options |= XML_PARSE_OLDSAX;
14751
0
        options -= XML_PARSE_OLDSAX;
14752
0
    }
14753
9.88k
    if (options & XML_PARSE_IGNORE_ENC) {
14754
0
  ctxt->options |= XML_PARSE_IGNORE_ENC;
14755
0
        options -= XML_PARSE_IGNORE_ENC;
14756
0
    }
14757
9.88k
    if (options & XML_PARSE_BIG_LINES) {
14758
0
  ctxt->options |= XML_PARSE_BIG_LINES;
14759
0
        options -= XML_PARSE_BIG_LINES;
14760
0
    }
14761
9.88k
    ctxt->linenumbers = 1;
14762
9.88k
    return (options);
14763
9.88k
}
14764
14765
/**
14766
 * xmlCtxtUseOptions:
14767
 * @ctxt: an XML parser context
14768
 * @options:  a combination of xmlParserOption
14769
 *
14770
 * Applies the options to the parser context
14771
 *
14772
 * Returns 0 in case of success, the set of unknown or unimplemented options
14773
 *         in case of error.
14774
 */
14775
int
14776
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
14777
9.88k
{
14778
9.88k
   return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
14779
9.88k
}
14780
14781
/**
14782
 * xmlDoRead:
14783
 * @ctxt:  an XML parser context
14784
 * @URL:  the base URL to use for the document
14785
 * @encoding:  the document encoding, or NULL
14786
 * @options:  a combination of xmlParserOption
14787
 * @reuse:  keep the context for reuse
14788
 *
14789
 * Common front-end for the xmlRead functions
14790
 *
14791
 * Returns the resulting document tree or NULL
14792
 */
14793
static xmlDocPtr
14794
xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
14795
          int options, int reuse)
14796
0
{
14797
0
    xmlDocPtr ret;
14798
14799
0
    xmlCtxtUseOptionsInternal(ctxt, options, encoding);
14800
0
    if (encoding != NULL) {
14801
0
        xmlCharEncodingHandlerPtr hdlr;
14802
14803
        /*
14804
         * TODO: We should consider to set XML_PARSE_IGNORE_ENC if the
14805
         * caller provided an encoding. Otherwise, we might switch to
14806
         * the encoding from the XML declaration which is likely to
14807
         * break things. Also see xmlSwitchInputEncoding.
14808
         */
14809
0
  hdlr = xmlFindCharEncodingHandler(encoding);
14810
0
  if (hdlr != NULL)
14811
0
      xmlSwitchToEncoding(ctxt, hdlr);
14812
0
    }
14813
0
    if ((URL != NULL) && (ctxt->input != NULL) &&
14814
0
        (ctxt->input->filename == NULL))
14815
0
        ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
14816
0
    xmlParseDocument(ctxt);
14817
0
    if ((ctxt->wellFormed) || ctxt->recovery)
14818
0
        ret = ctxt->myDoc;
14819
0
    else {
14820
0
        ret = NULL;
14821
0
  if (ctxt->myDoc != NULL) {
14822
0
      xmlFreeDoc(ctxt->myDoc);
14823
0
  }
14824
0
    }
14825
0
    ctxt->myDoc = NULL;
14826
0
    if (!reuse) {
14827
0
  xmlFreeParserCtxt(ctxt);
14828
0
    }
14829
14830
0
    return (ret);
14831
0
}
14832
14833
/**
14834
 * xmlReadDoc:
14835
 * @cur:  a pointer to a zero terminated string
14836
 * @URL:  the base URL to use for the document
14837
 * @encoding:  the document encoding, or NULL
14838
 * @options:  a combination of xmlParserOption
14839
 *
14840
 * parse an XML in-memory document and build a tree.
14841
 *
14842
 * Returns the resulting document tree
14843
 */
14844
xmlDocPtr
14845
xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
14846
0
{
14847
0
    xmlParserCtxtPtr ctxt;
14848
14849
0
    if (cur == NULL)
14850
0
        return (NULL);
14851
0
    xmlInitParser();
14852
14853
0
    ctxt = xmlCreateDocParserCtxt(cur);
14854
0
    if (ctxt == NULL)
14855
0
        return (NULL);
14856
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
14857
0
}
14858
14859
/**
14860
 * xmlReadFile:
14861
 * @filename:  a file or URL
14862
 * @encoding:  the document encoding, or NULL
14863
 * @options:  a combination of xmlParserOption
14864
 *
14865
 * parse an XML file from the filesystem or the network.
14866
 *
14867
 * Returns the resulting document tree
14868
 */
14869
xmlDocPtr
14870
xmlReadFile(const char *filename, const char *encoding, int options)
14871
0
{
14872
0
    xmlParserCtxtPtr ctxt;
14873
14874
0
    xmlInitParser();
14875
0
    ctxt = xmlCreateURLParserCtxt(filename, options);
14876
0
    if (ctxt == NULL)
14877
0
        return (NULL);
14878
0
    return (xmlDoRead(ctxt, NULL, encoding, options, 0));
14879
0
}
14880
14881
/**
14882
 * xmlReadMemory:
14883
 * @buffer:  a pointer to a char array
14884
 * @size:  the size of the array
14885
 * @URL:  the base URL to use for the document
14886
 * @encoding:  the document encoding, or NULL
14887
 * @options:  a combination of xmlParserOption
14888
 *
14889
 * parse an XML in-memory document and build a tree.
14890
 *
14891
 * Returns the resulting document tree
14892
 */
14893
xmlDocPtr
14894
xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
14895
0
{
14896
0
    xmlParserCtxtPtr ctxt;
14897
14898
0
    xmlInitParser();
14899
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14900
0
    if (ctxt == NULL)
14901
0
        return (NULL);
14902
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
14903
0
}
14904
14905
/**
14906
 * xmlReadFd:
14907
 * @fd:  an open file descriptor
14908
 * @URL:  the base URL to use for the document
14909
 * @encoding:  the document encoding, or NULL
14910
 * @options:  a combination of xmlParserOption
14911
 *
14912
 * parse an XML from a file descriptor and build a tree.
14913
 * NOTE that the file descriptor will not be closed when the
14914
 *      reader is closed or reset.
14915
 *
14916
 * Returns the resulting document tree
14917
 */
14918
xmlDocPtr
14919
xmlReadFd(int fd, const char *URL, const char *encoding, int options)
14920
0
{
14921
0
    xmlParserCtxtPtr ctxt;
14922
0
    xmlParserInputBufferPtr input;
14923
0
    xmlParserInputPtr stream;
14924
14925
0
    if (fd < 0)
14926
0
        return (NULL);
14927
0
    xmlInitParser();
14928
14929
0
    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14930
0
    if (input == NULL)
14931
0
        return (NULL);
14932
0
    input->closecallback = NULL;
14933
0
    ctxt = xmlNewParserCtxt();
14934
0
    if (ctxt == NULL) {
14935
0
        xmlFreeParserInputBuffer(input);
14936
0
        return (NULL);
14937
0
    }
14938
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14939
0
    if (stream == NULL) {
14940
0
        xmlFreeParserInputBuffer(input);
14941
0
  xmlFreeParserCtxt(ctxt);
14942
0
        return (NULL);
14943
0
    }
14944
0
    inputPush(ctxt, stream);
14945
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
14946
0
}
14947
14948
/**
14949
 * xmlReadIO:
14950
 * @ioread:  an I/O read function
14951
 * @ioclose:  an I/O close function
14952
 * @ioctx:  an I/O handler
14953
 * @URL:  the base URL to use for the document
14954
 * @encoding:  the document encoding, or NULL
14955
 * @options:  a combination of xmlParserOption
14956
 *
14957
 * parse an XML document from I/O functions and source and build a tree.
14958
 *
14959
 * Returns the resulting document tree
14960
 */
14961
xmlDocPtr
14962
xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
14963
          void *ioctx, const char *URL, const char *encoding, int options)
14964
0
{
14965
0
    xmlParserCtxtPtr ctxt;
14966
0
    xmlParserInputBufferPtr input;
14967
0
    xmlParserInputPtr stream;
14968
14969
0
    if (ioread == NULL)
14970
0
        return (NULL);
14971
0
    xmlInitParser();
14972
14973
0
    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14974
0
                                         XML_CHAR_ENCODING_NONE);
14975
0
    if (input == NULL) {
14976
0
        if (ioclose != NULL)
14977
0
            ioclose(ioctx);
14978
0
        return (NULL);
14979
0
    }
14980
0
    ctxt = xmlNewParserCtxt();
14981
0
    if (ctxt == NULL) {
14982
0
        xmlFreeParserInputBuffer(input);
14983
0
        return (NULL);
14984
0
    }
14985
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14986
0
    if (stream == NULL) {
14987
0
        xmlFreeParserInputBuffer(input);
14988
0
  xmlFreeParserCtxt(ctxt);
14989
0
        return (NULL);
14990
0
    }
14991
0
    inputPush(ctxt, stream);
14992
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
14993
0
}
14994
14995
/**
14996
 * xmlCtxtReadDoc:
14997
 * @ctxt:  an XML parser context
14998
 * @cur:  a pointer to a zero terminated string
14999
 * @URL:  the base URL to use for the document
15000
 * @encoding:  the document encoding, or NULL
15001
 * @options:  a combination of xmlParserOption
15002
 *
15003
 * parse an XML in-memory document and build a tree.
15004
 * This reuses the existing @ctxt parser context
15005
 *
15006
 * Returns the resulting document tree
15007
 */
15008
xmlDocPtr
15009
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
15010
               const char *URL, const char *encoding, int options)
15011
0
{
15012
0
    if (cur == NULL)
15013
0
        return (NULL);
15014
0
    return (xmlCtxtReadMemory(ctxt, (const char *) cur, xmlStrlen(cur), URL,
15015
0
                              encoding, options));
15016
0
}
15017
15018
/**
15019
 * xmlCtxtReadFile:
15020
 * @ctxt:  an XML parser context
15021
 * @filename:  a file or URL
15022
 * @encoding:  the document encoding, or NULL
15023
 * @options:  a combination of xmlParserOption
15024
 *
15025
 * parse an XML file from the filesystem or the network.
15026
 * This reuses the existing @ctxt parser context
15027
 *
15028
 * Returns the resulting document tree
15029
 */
15030
xmlDocPtr
15031
xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15032
                const char *encoding, int options)
15033
0
{
15034
0
    xmlParserInputPtr stream;
15035
15036
0
    if (filename == NULL)
15037
0
        return (NULL);
15038
0
    if (ctxt == NULL)
15039
0
        return (NULL);
15040
0
    xmlInitParser();
15041
15042
0
    xmlCtxtReset(ctxt);
15043
15044
0
    stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15045
0
    if (stream == NULL) {
15046
0
        return (NULL);
15047
0
    }
15048
0
    inputPush(ctxt, stream);
15049
0
    return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15050
0
}
15051
15052
/**
15053
 * xmlCtxtReadMemory:
15054
 * @ctxt:  an XML parser context
15055
 * @buffer:  a pointer to a char array
15056
 * @size:  the size of the array
15057
 * @URL:  the base URL to use for the document
15058
 * @encoding:  the document encoding, or NULL
15059
 * @options:  a combination of xmlParserOption
15060
 *
15061
 * parse an XML in-memory document and build a tree.
15062
 * This reuses the existing @ctxt parser context
15063
 *
15064
 * Returns the resulting document tree
15065
 */
15066
xmlDocPtr
15067
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15068
                  const char *URL, const char *encoding, int options)
15069
0
{
15070
0
    xmlParserInputBufferPtr input;
15071
0
    xmlParserInputPtr stream;
15072
15073
0
    if (ctxt == NULL)
15074
0
        return (NULL);
15075
0
    if (buffer == NULL)
15076
0
        return (NULL);
15077
0
    xmlInitParser();
15078
15079
0
    xmlCtxtReset(ctxt);
15080
15081
0
    input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15082
0
    if (input == NULL) {
15083
0
  return(NULL);
15084
0
    }
15085
15086
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15087
0
    if (stream == NULL) {
15088
0
  xmlFreeParserInputBuffer(input);
15089
0
  return(NULL);
15090
0
    }
15091
15092
0
    inputPush(ctxt, stream);
15093
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15094
0
}
15095
15096
/**
15097
 * xmlCtxtReadFd:
15098
 * @ctxt:  an XML parser context
15099
 * @fd:  an open file descriptor
15100
 * @URL:  the base URL to use for the document
15101
 * @encoding:  the document encoding, or NULL
15102
 * @options:  a combination of xmlParserOption
15103
 *
15104
 * parse an XML from a file descriptor and build a tree.
15105
 * This reuses the existing @ctxt parser context
15106
 * NOTE that the file descriptor will not be closed when the
15107
 *      reader is closed or reset.
15108
 *
15109
 * Returns the resulting document tree
15110
 */
15111
xmlDocPtr
15112
xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15113
              const char *URL, const char *encoding, int options)
15114
0
{
15115
0
    xmlParserInputBufferPtr input;
15116
0
    xmlParserInputPtr stream;
15117
15118
0
    if (fd < 0)
15119
0
        return (NULL);
15120
0
    if (ctxt == NULL)
15121
0
        return (NULL);
15122
0
    xmlInitParser();
15123
15124
0
    xmlCtxtReset(ctxt);
15125
15126
15127
0
    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15128
0
    if (input == NULL)
15129
0
        return (NULL);
15130
0
    input->closecallback = NULL;
15131
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15132
0
    if (stream == NULL) {
15133
0
        xmlFreeParserInputBuffer(input);
15134
0
        return (NULL);
15135
0
    }
15136
0
    inputPush(ctxt, stream);
15137
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15138
0
}
15139
15140
/**
15141
 * xmlCtxtReadIO:
15142
 * @ctxt:  an XML parser context
15143
 * @ioread:  an I/O read function
15144
 * @ioclose:  an I/O close function
15145
 * @ioctx:  an I/O handler
15146
 * @URL:  the base URL to use for the document
15147
 * @encoding:  the document encoding, or NULL
15148
 * @options:  a combination of xmlParserOption
15149
 *
15150
 * parse an XML document from I/O functions and source and build a tree.
15151
 * This reuses the existing @ctxt parser context
15152
 *
15153
 * Returns the resulting document tree
15154
 */
15155
xmlDocPtr
15156
xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15157
              xmlInputCloseCallback ioclose, void *ioctx,
15158
        const char *URL,
15159
              const char *encoding, int options)
15160
0
{
15161
0
    xmlParserInputBufferPtr input;
15162
0
    xmlParserInputPtr stream;
15163
15164
0
    if (ioread == NULL)
15165
0
        return (NULL);
15166
0
    if (ctxt == NULL)
15167
0
        return (NULL);
15168
0
    xmlInitParser();
15169
15170
0
    xmlCtxtReset(ctxt);
15171
15172
0
    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15173
0
                                         XML_CHAR_ENCODING_NONE);
15174
0
    if (input == NULL) {
15175
0
        if (ioclose != NULL)
15176
0
            ioclose(ioctx);
15177
0
        return (NULL);
15178
0
    }
15179
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15180
0
    if (stream == NULL) {
15181
0
        xmlFreeParserInputBuffer(input);
15182
0
        return (NULL);
15183
0
    }
15184
0
    inputPush(ctxt, stream);
15185
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15186
0
}
15187