Coverage Report

Created: 2023-11-19 06:13

/src/libxml2-2.11.5/parser.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
 *            implemented on top of the SAX interfaces
4
 *
5
 * References:
6
 *   The XML specification:
7
 *     http://www.w3.org/TR/REC-xml
8
 *   Original 1.0 version:
9
 *     http://www.w3.org/TR/1998/REC-xml-19980210
10
 *   XML second edition working draft
11
 *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12
 *
13
 * Okay this is a big file, the parser core is around 7000 lines, then it
14
 * is followed by the progressive parser top routines, then the various
15
 * high level APIs to call the parser and a few miscellaneous functions.
16
 * A number of helper functions and deprecated ones have been moved to
17
 * parserInternals.c to reduce this file size.
18
 * As much as possible the functions are associated with their relative
19
 * production in the XML specification. A few productions defining the
20
 * different ranges of character are actually implanted either in
21
 * parserInternals.h or parserInternals.c
22
 * The DOM tree build is realized from the default SAX callbacks in
23
 * the module SAX.c.
24
 * The routines doing the validation checks are in valid.c and called either
25
 * from the SAX callbacks or as standalone functions using a preparsed
26
 * document.
27
 *
28
 * See Copyright for the status of this software.
29
 *
30
 * daniel@veillard.com
31
 */
32
33
/* To avoid EBCDIC trouble when parsing on zOS */
34
#if defined(__MVS__)
35
#pragma convert("ISO8859-1")
36
#endif
37
38
#define IN_LIBXML
39
#include "libxml.h"
40
41
#if defined(_WIN32)
42
#define XML_DIR_SEP '\\'
43
#else
44
#define XML_DIR_SEP '/'
45
#endif
46
47
#include <stdlib.h>
48
#include <limits.h>
49
#include <string.h>
50
#include <stdarg.h>
51
#include <stddef.h>
52
#include <ctype.h>
53
#include <stdlib.h>
54
#include <libxml/xmlmemory.h>
55
#include <libxml/threads.h>
56
#include <libxml/globals.h>
57
#include <libxml/tree.h>
58
#include <libxml/parser.h>
59
#include <libxml/parserInternals.h>
60
#include <libxml/HTMLparser.h>
61
#include <libxml/valid.h>
62
#include <libxml/entities.h>
63
#include <libxml/xmlerror.h>
64
#include <libxml/encoding.h>
65
#include <libxml/xmlIO.h>
66
#include <libxml/uri.h>
67
#ifdef LIBXML_CATALOG_ENABLED
68
#include <libxml/catalog.h>
69
#endif
70
#ifdef LIBXML_SCHEMAS_ENABLED
71
#include <libxml/xmlschemastypes.h>
72
#include <libxml/relaxng.h>
73
#endif
74
#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
75
#include <libxml/xpath.h>
76
#endif
77
78
#include "private/buf.h"
79
#include "private/dict.h"
80
#include "private/enc.h"
81
#include "private/entities.h"
82
#include "private/error.h"
83
#include "private/globals.h"
84
#include "private/html.h"
85
#include "private/io.h"
86
#include "private/memory.h"
87
#include "private/parser.h"
88
#include "private/threads.h"
89
#include "private/xpath.h"
90
91
struct _xmlStartTag {
92
    const xmlChar *prefix;
93
    const xmlChar *URI;
94
    int line;
95
    int nsNr;
96
};
97
98
static xmlParserCtxtPtr
99
xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
100
        const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
101
        xmlParserCtxtPtr pctx);
102
103
static int
104
xmlParseElementStart(xmlParserCtxtPtr ctxt);
105
106
static void
107
xmlParseElementEnd(xmlParserCtxtPtr ctxt);
108
109
/************************************************************************
110
 *                  *
111
 *  Arbitrary limits set in the parser. See XML_PARSE_HUGE    *
112
 *                  *
113
 ************************************************************************/
114
115
#define XML_PARSER_BIG_ENTITY 1000
116
#define XML_PARSER_LOT_ENTITY 5000
117
118
/*
119
 * Constants for protection against abusive entity expansion
120
 * ("billion laughs").
121
 */
122
123
/*
124
 * XML_PARSER_NON_LINEAR is roughly the maximum allowed amplification factor
125
 * of serialized output after entity expansion.
126
 */
127
0
#define XML_PARSER_NON_LINEAR 5
128
129
/*
130
 * A certain amount is always allowed.
131
 */
132
0
#define XML_PARSER_ALLOWED_EXPANSION 1000000
133
134
/*
135
 * Fixed cost for each entity reference. This crudely models processing time
136
 * as well to protect, for example, against exponential expansion of empty
137
 * or very short entities.
138
 */
139
0
#define XML_ENT_FIXED_COST 20
140
141
/**
142
 * xmlParserMaxDepth:
143
 *
144
 * arbitrary depth limit for the XML documents that we allow to
145
 * process. This is not a limitation of the parser but a safety
146
 * boundary feature. It can be disabled with the XML_PARSE_HUGE
147
 * parser option.
148
 */
149
unsigned int xmlParserMaxDepth = 256;
150
151
152
153
#define SAX2 1
154
4.64M
#define XML_PARSER_BIG_BUFFER_SIZE 300
155
24.5M
#define XML_PARSER_BUFFER_SIZE 100
156
8.21k
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
157
158
/**
159
 * XML_PARSER_CHUNK_SIZE
160
 *
161
 * When calling GROW that's the minimal amount of data
162
 * the parser expected to have received. It is not a hard
163
 * limit but an optimization when reading strings like Names
164
 * It is not strictly needed as long as inputs available characters
165
 * are followed by 0, which should be provided by the I/O level
166
 */
167
#define XML_PARSER_CHUNK_SIZE 100
168
169
/*
170
 * List of XML prefixed PI allowed by W3C specs
171
 */
172
173
static const char* const xmlW3CPIs[] = {
174
    "xml-stylesheet",
175
    "xml-model",
176
    NULL
177
};
178
179
180
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
181
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
182
                                              const xmlChar **str);
183
184
static xmlParserErrors
185
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
186
                xmlSAXHandlerPtr sax,
187
          void *user_data, int depth, const xmlChar *URL,
188
          const xmlChar *ID, xmlNodePtr *list);
189
190
static int
191
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
192
                          const char *encoding);
193
#ifdef LIBXML_LEGACY_ENABLED
194
static void
195
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
196
                      xmlNodePtr lastNode);
197
#endif /* LIBXML_LEGACY_ENABLED */
198
199
static xmlParserErrors
200
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
201
          const xmlChar *string, void *user_data, xmlNodePtr *lst);
202
203
static int
204
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
205
206
/************************************************************************
207
 *                  *
208
 *    Some factorized error routines        *
209
 *                  *
210
 ************************************************************************/
211
212
/**
213
 * xmlErrAttributeDup:
214
 * @ctxt:  an XML parser context
215
 * @prefix:  the attribute prefix
216
 * @localname:  the attribute localname
217
 *
218
 * Handle a redefinition of attribute error
219
 */
220
static void
221
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
222
                   const xmlChar * localname)
223
41.5k
{
224
41.5k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
225
41.5k
        (ctxt->instate == XML_PARSER_EOF))
226
0
  return;
227
41.5k
    if (ctxt != NULL)
228
41.5k
  ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
229
230
41.5k
    if (prefix == NULL)
231
29.4k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
232
29.4k
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
233
29.4k
                        (const char *) localname, NULL, NULL, 0, 0,
234
29.4k
                        "Attribute %s redefined\n", localname);
235
12.0k
    else
236
12.0k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
237
12.0k
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
238
12.0k
                        (const char *) prefix, (const char *) localname,
239
12.0k
                        NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
240
12.0k
                        localname);
241
41.5k
    if (ctxt != NULL) {
242
41.5k
  ctxt->wellFormed = 0;
243
41.5k
  if (ctxt->recovery == 0)
244
41.5k
      ctxt->disableSAX = 1;
245
41.5k
    }
246
41.5k
}
247
248
/**
249
 * xmlFatalErr:
250
 * @ctxt:  an XML parser context
251
 * @error:  the error number
252
 * @extra:  extra information string
253
 *
254
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
255
 */
256
static void
257
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
258
87.5k
{
259
87.5k
    const char *errmsg;
260
261
87.5k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
262
87.5k
        (ctxt->instate == XML_PARSER_EOF))
263
728
  return;
264
86.7k
    switch (error) {
265
5.47k
        case XML_ERR_INVALID_HEX_CHARREF:
266
5.47k
            errmsg = "CharRef: invalid hexadecimal value";
267
5.47k
            break;
268
8.37k
        case XML_ERR_INVALID_DEC_CHARREF:
269
8.37k
            errmsg = "CharRef: invalid decimal value";
270
8.37k
            break;
271
0
        case XML_ERR_INVALID_CHARREF:
272
0
            errmsg = "CharRef: invalid value";
273
0
            break;
274
2.28k
        case XML_ERR_INTERNAL_ERROR:
275
2.28k
            errmsg = "internal error";
276
2.28k
            break;
277
0
        case XML_ERR_PEREF_AT_EOF:
278
0
            errmsg = "PEReference at end of document";
279
0
            break;
280
0
        case XML_ERR_PEREF_IN_PROLOG:
281
0
            errmsg = "PEReference in prolog";
282
0
            break;
283
0
        case XML_ERR_PEREF_IN_EPILOG:
284
0
            errmsg = "PEReference in epilog";
285
0
            break;
286
0
        case XML_ERR_PEREF_NO_NAME:
287
0
            errmsg = "PEReference: no name";
288
0
            break;
289
1.11k
        case XML_ERR_PEREF_SEMICOL_MISSING:
290
1.11k
            errmsg = "PEReference: expecting ';'";
291
1.11k
            break;
292
0
        case XML_ERR_ENTITY_LOOP:
293
0
            errmsg = "Detected an entity reference loop";
294
0
            break;
295
0
        case XML_ERR_ENTITY_NOT_STARTED:
296
0
            errmsg = "EntityValue: \" or ' expected";
297
0
            break;
298
87
        case XML_ERR_ENTITY_PE_INTERNAL:
299
87
            errmsg = "PEReferences forbidden in internal subset";
300
87
            break;
301
121
        case XML_ERR_ENTITY_NOT_FINISHED:
302
121
            errmsg = "EntityValue: \" or ' expected";
303
121
            break;
304
371
        case XML_ERR_ATTRIBUTE_NOT_STARTED:
305
371
            errmsg = "AttValue: \" or ' expected";
306
371
            break;
307
267
        case XML_ERR_LT_IN_ATTRIBUTE:
308
267
            errmsg = "Unescaped '<' not allowed in attributes values";
309
267
            break;
310
1.23k
        case XML_ERR_LITERAL_NOT_STARTED:
311
1.23k
            errmsg = "SystemLiteral \" or ' expected";
312
1.23k
            break;
313
634
        case XML_ERR_LITERAL_NOT_FINISHED:
314
634
            errmsg = "Unfinished System or Public ID \" or ' expected";
315
634
            break;
316
314
        case XML_ERR_MISPLACED_CDATA_END:
317
314
            errmsg = "Sequence ']]>' not allowed in content";
318
314
            break;
319
935
        case XML_ERR_URI_REQUIRED:
320
935
            errmsg = "SYSTEM or PUBLIC, the URI is missing";
321
935
            break;
322
302
        case XML_ERR_PUBID_REQUIRED:
323
302
            errmsg = "PUBLIC, the Public Identifier is missing";
324
302
            break;
325
875
        case XML_ERR_HYPHEN_IN_COMMENT:
326
875
            errmsg = "Comment must not contain '--' (double-hyphen)";
327
875
            break;
328
345
        case XML_ERR_PI_NOT_STARTED:
329
345
            errmsg = "xmlParsePI : no target name";
330
345
            break;
331
229
        case XML_ERR_RESERVED_XML_NAME:
332
229
            errmsg = "Invalid PI name";
333
229
            break;
334
575
        case XML_ERR_NOTATION_NOT_STARTED:
335
575
            errmsg = "NOTATION: Name expected here";
336
575
            break;
337
1.32k
        case XML_ERR_NOTATION_NOT_FINISHED:
338
1.32k
            errmsg = "'>' required to close NOTATION declaration";
339
1.32k
            break;
340
855
        case XML_ERR_VALUE_REQUIRED:
341
855
            errmsg = "Entity value required";
342
855
            break;
343
73
        case XML_ERR_URI_FRAGMENT:
344
73
            errmsg = "Fragment not allowed";
345
73
            break;
346
419
        case XML_ERR_ATTLIST_NOT_STARTED:
347
419
            errmsg = "'(' required to start ATTLIST enumeration";
348
419
            break;
349
62
        case XML_ERR_NMTOKEN_REQUIRED:
350
62
            errmsg = "NmToken expected in ATTLIST enumeration";
351
62
            break;
352
131
        case XML_ERR_ATTLIST_NOT_FINISHED:
353
131
            errmsg = "')' required to finish ATTLIST enumeration";
354
131
            break;
355
55
        case XML_ERR_MIXED_NOT_STARTED:
356
55
            errmsg = "MixedContentDecl : '|' or ')*' expected";
357
55
            break;
358
0
        case XML_ERR_PCDATA_REQUIRED:
359
0
            errmsg = "MixedContentDecl : '#PCDATA' expected";
360
0
            break;
361
852
        case XML_ERR_ELEMCONTENT_NOT_STARTED:
362
852
            errmsg = "ContentDecl : Name or '(' expected";
363
852
            break;
364
760
        case XML_ERR_ELEMCONTENT_NOT_FINISHED:
365
760
            errmsg = "ContentDecl : ',' '|' or ')' expected";
366
760
            break;
367
0
        case XML_ERR_PEREF_IN_INT_SUBSET:
368
0
            errmsg =
369
0
                "PEReference: forbidden within markup decl in internal subset";
370
0
            break;
371
2.00k
        case XML_ERR_GT_REQUIRED:
372
2.00k
            errmsg = "expected '>'";
373
2.00k
            break;
374
0
        case XML_ERR_CONDSEC_INVALID:
375
0
            errmsg = "XML conditional section '[' expected";
376
0
            break;
377
0
        case XML_ERR_EXT_SUBSET_NOT_FINISHED:
378
0
            errmsg = "Content error in the external subset";
379
0
            break;
380
0
        case XML_ERR_CONDSEC_INVALID_KEYWORD:
381
0
            errmsg =
382
0
                "conditional section INCLUDE or IGNORE keyword expected";
383
0
            break;
384
0
        case XML_ERR_CONDSEC_NOT_FINISHED:
385
0
            errmsg = "XML conditional section not closed";
386
0
            break;
387
0
        case XML_ERR_XMLDECL_NOT_STARTED:
388
0
            errmsg = "Text declaration '<?xml' required";
389
0
            break;
390
430
        case XML_ERR_XMLDECL_NOT_FINISHED:
391
430
            errmsg = "parsing XML declaration: '?>' expected";
392
430
            break;
393
0
        case XML_ERR_EXT_ENTITY_STANDALONE:
394
0
            errmsg = "external parsed entities cannot be standalone";
395
0
            break;
396
53.8k
        case XML_ERR_ENTITYREF_SEMICOL_MISSING:
397
53.8k
            errmsg = "EntityRef: expecting ';'";
398
53.8k
            break;
399
175
        case XML_ERR_DOCTYPE_NOT_FINISHED:
400
175
            errmsg = "DOCTYPE improperly terminated";
401
175
            break;
402
0
        case XML_ERR_LTSLASH_REQUIRED:
403
0
            errmsg = "EndTag: '</' not found";
404
0
            break;
405
29
        case XML_ERR_EQUAL_REQUIRED:
406
29
            errmsg = "expected '='";
407
29
            break;
408
138
        case XML_ERR_STRING_NOT_CLOSED:
409
138
            errmsg = "String not closed expecting \" or '";
410
138
            break;
411
48
        case XML_ERR_STRING_NOT_STARTED:
412
48
            errmsg = "String not started expecting ' or \"";
413
48
            break;
414
5
        case XML_ERR_ENCODING_NAME:
415
5
            errmsg = "Invalid XML encoding name";
416
5
            break;
417
23
        case XML_ERR_STANDALONE_VALUE:
418
23
            errmsg = "standalone accepts only 'yes' or 'no'";
419
23
            break;
420
221
        case XML_ERR_DOCUMENT_EMPTY:
421
221
            errmsg = "Document is empty";
422
221
            break;
423
1.18k
        case XML_ERR_DOCUMENT_END:
424
1.18k
            errmsg = "Extra content at the end of the document";
425
1.18k
            break;
426
0
        case XML_ERR_NOT_WELL_BALANCED:
427
0
            errmsg = "chunk is not well balanced";
428
0
            break;
429
0
        case XML_ERR_EXTRA_CONTENT:
430
0
            errmsg = "extra content at the end of well balanced chunk";
431
0
            break;
432
608
        case XML_ERR_VERSION_MISSING:
433
608
            errmsg = "Malformed declaration expecting version";
434
608
            break;
435
12
        case XML_ERR_NAME_TOO_LONG:
436
12
            errmsg = "Name too long";
437
12
            break;
438
#if 0
439
        case:
440
            errmsg = "";
441
            break;
442
#endif
443
0
        default:
444
0
            errmsg = "Unregistered error message";
445
86.7k
    }
446
86.7k
    if (ctxt != NULL)
447
86.7k
  ctxt->errNo = error;
448
86.7k
    if (info == NULL) {
449
84.4k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
450
84.4k
                        XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
451
84.4k
                        errmsg);
452
84.4k
    } else {
453
2.29k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
454
2.29k
                        XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
455
2.29k
                        errmsg, info);
456
2.29k
    }
457
86.7k
    if (ctxt != NULL) {
458
86.7k
  ctxt->wellFormed = 0;
459
86.7k
  if (ctxt->recovery == 0)
460
86.7k
      ctxt->disableSAX = 1;
461
86.7k
    }
462
86.7k
}
463
464
/**
465
 * xmlFatalErrMsg:
466
 * @ctxt:  an XML parser context
467
 * @error:  the error number
468
 * @msg:  the error message
469
 *
470
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
471
 */
472
static void LIBXML_ATTR_FORMAT(3,0)
473
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
474
               const char *msg)
475
120k
{
476
120k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
477
120k
        (ctxt->instate == XML_PARSER_EOF))
478
0
  return;
479
120k
    if (ctxt != NULL)
480
120k
  ctxt->errNo = error;
481
120k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
482
120k
                    XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
483
120k
    if (ctxt != NULL) {
484
120k
  ctxt->wellFormed = 0;
485
120k
  if (ctxt->recovery == 0)
486
120k
      ctxt->disableSAX = 1;
487
120k
    }
488
120k
}
489
490
/**
491
 * xmlWarningMsg:
492
 * @ctxt:  an XML parser context
493
 * @error:  the error number
494
 * @msg:  the error message
495
 * @str1:  extra data
496
 * @str2:  extra data
497
 *
498
 * Handle a warning.
499
 */
500
static void LIBXML_ATTR_FORMAT(3,0)
501
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
502
              const char *msg, const xmlChar *str1, const xmlChar *str2)
503
3.28k
{
504
3.28k
    xmlStructuredErrorFunc schannel = NULL;
505
506
3.28k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
507
3.28k
        (ctxt->instate == XML_PARSER_EOF))
508
0
  return;
509
3.28k
    if ((ctxt != NULL) && (ctxt->sax != NULL) &&
510
3.28k
        (ctxt->sax->initialized == XML_SAX2_MAGIC))
511
3.28k
        schannel = ctxt->sax->serror;
512
3.28k
    if (ctxt != NULL) {
513
3.28k
        __xmlRaiseError(schannel,
514
3.28k
                    (ctxt->sax) ? ctxt->sax->warning : NULL,
515
3.28k
                    ctxt->userData,
516
3.28k
                    ctxt, NULL, XML_FROM_PARSER, error,
517
3.28k
                    XML_ERR_WARNING, NULL, 0,
518
3.28k
        (const char *) str1, (const char *) str2, NULL, 0, 0,
519
3.28k
        msg, (const char *) str1, (const char *) str2);
520
3.28k
    } else {
521
0
        __xmlRaiseError(schannel, NULL, NULL,
522
0
                    ctxt, NULL, XML_FROM_PARSER, error,
523
0
                    XML_ERR_WARNING, NULL, 0,
524
0
        (const char *) str1, (const char *) str2, NULL, 0, 0,
525
0
        msg, (const char *) str1, (const char *) str2);
526
0
    }
527
3.28k
}
528
529
/**
530
 * xmlValidityError:
531
 * @ctxt:  an XML parser context
532
 * @error:  the error number
533
 * @msg:  the error message
534
 * @str1:  extra data
535
 *
536
 * Handle a validity error.
537
 */
538
static void LIBXML_ATTR_FORMAT(3,0)
539
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
540
              const char *msg, const xmlChar *str1, const xmlChar *str2)
541
1.31k
{
542
1.31k
    xmlStructuredErrorFunc schannel = NULL;
543
544
1.31k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
545
1.31k
        (ctxt->instate == XML_PARSER_EOF))
546
0
  return;
547
1.31k
    if (ctxt != NULL) {
548
1.31k
  ctxt->errNo = error;
549
1.31k
  if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
550
1.31k
      schannel = ctxt->sax->serror;
551
1.31k
    }
552
1.31k
    if (ctxt != NULL) {
553
1.31k
        __xmlRaiseError(schannel,
554
1.31k
                    ctxt->vctxt.error, ctxt->vctxt.userData,
555
1.31k
                    ctxt, NULL, XML_FROM_DTD, error,
556
1.31k
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
557
1.31k
        (const char *) str2, NULL, 0, 0,
558
1.31k
        msg, (const char *) str1, (const char *) str2);
559
1.31k
  ctxt->valid = 0;
560
1.31k
    } else {
561
0
        __xmlRaiseError(schannel, NULL, NULL,
562
0
                    ctxt, NULL, XML_FROM_DTD, error,
563
0
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
564
0
        (const char *) str2, NULL, 0, 0,
565
0
        msg, (const char *) str1, (const char *) str2);
566
0
    }
567
1.31k
}
568
569
/**
570
 * xmlFatalErrMsgInt:
571
 * @ctxt:  an XML parser context
572
 * @error:  the error number
573
 * @msg:  the error message
574
 * @val:  an integer value
575
 *
576
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
577
 */
578
static void LIBXML_ATTR_FORMAT(3,0)
579
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
580
                  const char *msg, int val)
581
21.1k
{
582
21.1k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
583
21.1k
        (ctxt->instate == XML_PARSER_EOF))
584
0
  return;
585
21.1k
    if (ctxt != NULL)
586
21.1k
  ctxt->errNo = error;
587
21.1k
    __xmlRaiseError(NULL, NULL, NULL,
588
21.1k
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
589
21.1k
                    NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
590
21.1k
    if (ctxt != NULL) {
591
21.1k
  ctxt->wellFormed = 0;
592
21.1k
  if (ctxt->recovery == 0)
593
21.1k
      ctxt->disableSAX = 1;
594
21.1k
    }
595
21.1k
}
596
597
/**
598
 * xmlFatalErrMsgStrIntStr:
599
 * @ctxt:  an XML parser context
600
 * @error:  the error number
601
 * @msg:  the error message
602
 * @str1:  an string info
603
 * @val:  an integer value
604
 * @str2:  an string info
605
 *
606
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
607
 */
608
static void LIBXML_ATTR_FORMAT(3,0)
609
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
610
                  const char *msg, const xmlChar *str1, int val,
611
      const xmlChar *str2)
612
180
{
613
180
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
614
180
        (ctxt->instate == XML_PARSER_EOF))
615
0
  return;
616
180
    if (ctxt != NULL)
617
180
  ctxt->errNo = error;
618
180
    __xmlRaiseError(NULL, NULL, NULL,
619
180
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
620
180
                    NULL, 0, (const char *) str1, (const char *) str2,
621
180
        NULL, val, 0, msg, str1, val, str2);
622
180
    if (ctxt != NULL) {
623
180
  ctxt->wellFormed = 0;
624
180
  if (ctxt->recovery == 0)
625
180
      ctxt->disableSAX = 1;
626
180
    }
627
180
}
628
629
/**
630
 * xmlFatalErrMsgStr:
631
 * @ctxt:  an XML parser context
632
 * @error:  the error number
633
 * @msg:  the error message
634
 * @val:  a string value
635
 *
636
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
637
 */
638
static void LIBXML_ATTR_FORMAT(3,0)
639
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
640
                  const char *msg, const xmlChar * val)
641
22.4k
{
642
22.4k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
643
22.4k
        (ctxt->instate == XML_PARSER_EOF))
644
0
  return;
645
22.4k
    if (ctxt != NULL)
646
22.4k
  ctxt->errNo = error;
647
22.4k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
648
22.4k
                    XML_FROM_PARSER, error, XML_ERR_FATAL,
649
22.4k
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
650
22.4k
                    val);
651
22.4k
    if (ctxt != NULL) {
652
22.4k
  ctxt->wellFormed = 0;
653
22.4k
  if (ctxt->recovery == 0)
654
22.4k
      ctxt->disableSAX = 1;
655
22.4k
    }
656
22.4k
}
657
658
/**
659
 * xmlErrMsgStr:
660
 * @ctxt:  an XML parser context
661
 * @error:  the error number
662
 * @msg:  the error message
663
 * @val:  a string value
664
 *
665
 * Handle a non fatal parser error
666
 */
667
static void LIBXML_ATTR_FORMAT(3,0)
668
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
669
                  const char *msg, const xmlChar * val)
670
765
{
671
765
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
672
765
        (ctxt->instate == XML_PARSER_EOF))
673
0
  return;
674
765
    if (ctxt != NULL)
675
765
  ctxt->errNo = error;
676
765
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
677
765
                    XML_FROM_PARSER, error, XML_ERR_ERROR,
678
765
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
679
765
                    val);
680
765
}
681
682
/**
683
 * xmlNsErr:
684
 * @ctxt:  an XML parser context
685
 * @error:  the error number
686
 * @msg:  the message
687
 * @info1:  extra information string
688
 * @info2:  extra information string
689
 *
690
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
691
 */
692
static void LIBXML_ATTR_FORMAT(3,0)
693
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
694
         const char *msg,
695
         const xmlChar * info1, const xmlChar * info2,
696
         const xmlChar * info3)
697
47.1k
{
698
47.1k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
699
47.1k
        (ctxt->instate == XML_PARSER_EOF))
700
0
  return;
701
47.1k
    if (ctxt != NULL)
702
47.1k
  ctxt->errNo = error;
703
47.1k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
704
47.1k
                    XML_ERR_ERROR, NULL, 0, (const char *) info1,
705
47.1k
                    (const char *) info2, (const char *) info3, 0, 0, msg,
706
47.1k
                    info1, info2, info3);
707
47.1k
    if (ctxt != NULL)
708
47.1k
  ctxt->nsWellFormed = 0;
709
47.1k
}
710
711
/**
712
 * xmlNsWarn
713
 * @ctxt:  an XML parser context
714
 * @error:  the error number
715
 * @msg:  the message
716
 * @info1:  extra information string
717
 * @info2:  extra information string
718
 *
719
 * Handle a namespace warning error
720
 */
721
static void LIBXML_ATTR_FORMAT(3,0)
722
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
723
         const char *msg,
724
         const xmlChar * info1, const xmlChar * info2,
725
         const xmlChar * info3)
726
6.13k
{
727
6.13k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
728
6.13k
        (ctxt->instate == XML_PARSER_EOF))
729
0
  return;
730
6.13k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
731
6.13k
                    XML_ERR_WARNING, NULL, 0, (const char *) info1,
732
6.13k
                    (const char *) info2, (const char *) info3, 0, 0, msg,
733
6.13k
                    info1, info2, info3);
734
6.13k
}
735
736
static void
737
0
xmlSaturatedAdd(unsigned long *dst, unsigned long val) {
738
0
    if (val > ULONG_MAX - *dst)
739
0
        *dst = ULONG_MAX;
740
0
    else
741
0
        *dst += val;
742
0
}
743
744
static void
745
0
xmlSaturatedAddSizeT(unsigned long *dst, unsigned long val) {
746
0
    if (val > ULONG_MAX - *dst)
747
0
        *dst = ULONG_MAX;
748
0
    else
749
0
        *dst += val;
750
0
}
751
752
/**
753
 * xmlParserEntityCheck:
754
 * @ctxt:  parser context
755
 * @extra:  sum of unexpanded entity sizes
756
 *
757
 * Check for non-linear entity expansion behaviour.
758
 *
759
 * In some cases like xmlStringDecodeEntities, this function is called
760
 * for each, possibly nested entity and its unexpanded content length.
761
 *
762
 * In other cases like xmlParseReference, it's only called for each
763
 * top-level entity with its unexpanded content length plus the sum of
764
 * the unexpanded content lengths (plus fixed cost) of all nested
765
 * entities.
766
 *
767
 * Summing the unexpanded lengths also adds the length of the reference.
768
 * This is by design. Taking the length of the entity name into account
769
 * discourages attacks that try to waste CPU time with abusively long
770
 * entity names. See test/recurse/lol6.xml for example. Each call also
771
 * adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with
772
 * short entities.
773
 *
774
 * Returns 1 on error, 0 on success.
775
 */
776
static int
777
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long extra)
778
0
{
779
0
    unsigned long consumed;
780
0
    xmlParserInputPtr input = ctxt->input;
781
0
    xmlEntityPtr entity = input->entity;
782
783
    /*
784
     * Compute total consumed bytes so far, including input streams of
785
     * external entities.
786
     */
787
0
    consumed = input->parentConsumed;
788
0
    if ((entity == NULL) ||
789
0
        ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
790
0
         ((entity->flags & XML_ENT_PARSED) == 0))) {
791
0
        xmlSaturatedAdd(&consumed, input->consumed);
792
0
        xmlSaturatedAddSizeT(&consumed, input->cur - input->base);
793
0
    }
794
0
    xmlSaturatedAdd(&consumed, ctxt->sizeentities);
795
796
    /*
797
     * Add extra cost and some fixed cost.
798
     */
799
0
    xmlSaturatedAdd(&ctxt->sizeentcopy, extra);
800
0
    xmlSaturatedAdd(&ctxt->sizeentcopy, XML_ENT_FIXED_COST);
801
802
    /*
803
     * It's important to always use saturation arithmetic when tracking
804
     * entity sizes to make the size checks reliable. If "sizeentcopy"
805
     * overflows, we have to abort.
806
     */
807
0
    if ((ctxt->sizeentcopy > XML_PARSER_ALLOWED_EXPANSION) &&
808
0
        ((ctxt->sizeentcopy >= ULONG_MAX) ||
809
0
         (ctxt->sizeentcopy / XML_PARSER_NON_LINEAR > consumed))) {
810
0
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP,
811
0
                       "Maximum entity amplification factor exceeded");
812
0
        xmlHaltParser(ctxt);
813
0
        return(1);
814
0
    }
815
816
0
    return(0);
817
0
}
818
819
/************************************************************************
820
 *                  *
821
 *    Library wide options          *
822
 *                  *
823
 ************************************************************************/
824
825
/**
826
  * xmlHasFeature:
827
  * @feature: the feature to be examined
828
  *
829
  * Examines if the library has been compiled with a given feature.
830
  *
831
  * Returns a non-zero value if the feature exist, otherwise zero.
832
  * Returns zero (0) if the feature does not exist or an unknown
833
  * unknown feature is requested, non-zero otherwise.
834
  */
835
int
836
xmlHasFeature(xmlFeature feature)
837
0
{
838
0
    switch (feature) {
839
0
  case XML_WITH_THREAD:
840
0
#ifdef LIBXML_THREAD_ENABLED
841
0
      return(1);
842
#else
843
      return(0);
844
#endif
845
0
        case XML_WITH_TREE:
846
0
#ifdef LIBXML_TREE_ENABLED
847
0
            return(1);
848
#else
849
            return(0);
850
#endif
851
0
        case XML_WITH_OUTPUT:
852
0
#ifdef LIBXML_OUTPUT_ENABLED
853
0
            return(1);
854
#else
855
            return(0);
856
#endif
857
0
        case XML_WITH_PUSH:
858
0
#ifdef LIBXML_PUSH_ENABLED
859
0
            return(1);
860
#else
861
            return(0);
862
#endif
863
0
        case XML_WITH_READER:
864
0
#ifdef LIBXML_READER_ENABLED
865
0
            return(1);
866
#else
867
            return(0);
868
#endif
869
0
        case XML_WITH_PATTERN:
870
0
#ifdef LIBXML_PATTERN_ENABLED
871
0
            return(1);
872
#else
873
            return(0);
874
#endif
875
0
        case XML_WITH_WRITER:
876
0
#ifdef LIBXML_WRITER_ENABLED
877
0
            return(1);
878
#else
879
            return(0);
880
#endif
881
0
        case XML_WITH_SAX1:
882
0
#ifdef LIBXML_SAX1_ENABLED
883
0
            return(1);
884
#else
885
            return(0);
886
#endif
887
0
        case XML_WITH_FTP:
888
#ifdef LIBXML_FTP_ENABLED
889
            return(1);
890
#else
891
0
            return(0);
892
0
#endif
893
0
        case XML_WITH_HTTP:
894
0
#ifdef LIBXML_HTTP_ENABLED
895
0
            return(1);
896
#else
897
            return(0);
898
#endif
899
0
        case XML_WITH_VALID:
900
0
#ifdef LIBXML_VALID_ENABLED
901
0
            return(1);
902
#else
903
            return(0);
904
#endif
905
0
        case XML_WITH_HTML:
906
0
#ifdef LIBXML_HTML_ENABLED
907
0
            return(1);
908
#else
909
            return(0);
910
#endif
911
0
        case XML_WITH_LEGACY:
912
#ifdef LIBXML_LEGACY_ENABLED
913
            return(1);
914
#else
915
0
            return(0);
916
0
#endif
917
0
        case XML_WITH_C14N:
918
0
#ifdef LIBXML_C14N_ENABLED
919
0
            return(1);
920
#else
921
            return(0);
922
#endif
923
0
        case XML_WITH_CATALOG:
924
0
#ifdef LIBXML_CATALOG_ENABLED
925
0
            return(1);
926
#else
927
            return(0);
928
#endif
929
0
        case XML_WITH_XPATH:
930
0
#ifdef LIBXML_XPATH_ENABLED
931
0
            return(1);
932
#else
933
            return(0);
934
#endif
935
0
        case XML_WITH_XPTR:
936
0
#ifdef LIBXML_XPTR_ENABLED
937
0
            return(1);
938
#else
939
            return(0);
940
#endif
941
0
        case XML_WITH_XINCLUDE:
942
0
#ifdef LIBXML_XINCLUDE_ENABLED
943
0
            return(1);
944
#else
945
            return(0);
946
#endif
947
0
        case XML_WITH_ICONV:
948
0
#ifdef LIBXML_ICONV_ENABLED
949
0
            return(1);
950
#else
951
            return(0);
952
#endif
953
0
        case XML_WITH_ISO8859X:
954
0
#ifdef LIBXML_ISO8859X_ENABLED
955
0
            return(1);
956
#else
957
            return(0);
958
#endif
959
0
        case XML_WITH_UNICODE:
960
0
#ifdef LIBXML_UNICODE_ENABLED
961
0
            return(1);
962
#else
963
            return(0);
964
#endif
965
0
        case XML_WITH_REGEXP:
966
0
#ifdef LIBXML_REGEXP_ENABLED
967
0
            return(1);
968
#else
969
            return(0);
970
#endif
971
0
        case XML_WITH_AUTOMATA:
972
0
#ifdef LIBXML_AUTOMATA_ENABLED
973
0
            return(1);
974
#else
975
            return(0);
976
#endif
977
0
        case XML_WITH_EXPR:
978
#ifdef LIBXML_EXPR_ENABLED
979
            return(1);
980
#else
981
0
            return(0);
982
0
#endif
983
0
        case XML_WITH_SCHEMAS:
984
0
#ifdef LIBXML_SCHEMAS_ENABLED
985
0
            return(1);
986
#else
987
            return(0);
988
#endif
989
0
        case XML_WITH_SCHEMATRON:
990
0
#ifdef LIBXML_SCHEMATRON_ENABLED
991
0
            return(1);
992
#else
993
            return(0);
994
#endif
995
0
        case XML_WITH_MODULES:
996
#ifdef LIBXML_MODULES_ENABLED
997
            return(1);
998
#else
999
0
            return(0);
1000
0
#endif
1001
0
        case XML_WITH_DEBUG:
1002
0
#ifdef LIBXML_DEBUG_ENABLED
1003
0
            return(1);
1004
#else
1005
            return(0);
1006
#endif
1007
0
        case XML_WITH_DEBUG_MEM:
1008
#ifdef DEBUG_MEMORY_LOCATION
1009
            return(1);
1010
#else
1011
0
            return(0);
1012
0
#endif
1013
0
        case XML_WITH_DEBUG_RUN:
1014
0
            return(0);
1015
0
        case XML_WITH_ZLIB:
1016
#ifdef LIBXML_ZLIB_ENABLED
1017
            return(1);
1018
#else
1019
0
            return(0);
1020
0
#endif
1021
0
        case XML_WITH_LZMA:
1022
#ifdef LIBXML_LZMA_ENABLED
1023
            return(1);
1024
#else
1025
0
            return(0);
1026
0
#endif
1027
0
        case XML_WITH_ICU:
1028
#ifdef LIBXML_ICU_ENABLED
1029
            return(1);
1030
#else
1031
0
            return(0);
1032
0
#endif
1033
0
        default:
1034
0
      break;
1035
0
     }
1036
0
     return(0);
1037
0
}
1038
1039
/************************************************************************
1040
 *                  *
1041
 *    SAX2 defaulted attributes handling      *
1042
 *                  *
1043
 ************************************************************************/
1044
1045
/**
1046
 * xmlDetectSAX2:
1047
 * @ctxt:  an XML parser context
1048
 *
1049
 * Do the SAX2 detection and specific initialization
1050
 */
1051
static void
1052
15.7k
xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1053
15.7k
    xmlSAXHandlerPtr sax;
1054
1055
    /* Avoid unused variable warning if features are disabled. */
1056
15.7k
    (void) sax;
1057
1058
15.7k
    if (ctxt == NULL) return;
1059
15.7k
    sax = ctxt->sax;
1060
15.7k
#ifdef LIBXML_SAX1_ENABLED
1061
15.7k
    if ((sax) &&  (sax->initialized == XML_SAX2_MAGIC) &&
1062
15.7k
        ((sax->startElementNs != NULL) ||
1063
15.7k
         (sax->endElementNs != NULL) ||
1064
15.7k
         ((sax->startElement == NULL) && (sax->endElement == NULL))))
1065
15.7k
        ctxt->sax2 = 1;
1066
#else
1067
    ctxt->sax2 = 1;
1068
#endif /* LIBXML_SAX1_ENABLED */
1069
1070
15.7k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1071
15.7k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1072
15.7k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1073
15.7k
    if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1074
15.7k
    (ctxt->str_xml_ns == NULL)) {
1075
0
        xmlErrMemory(ctxt, NULL);
1076
0
    }
1077
15.7k
}
1078
1079
typedef struct _xmlDefAttrs xmlDefAttrs;
1080
typedef xmlDefAttrs *xmlDefAttrsPtr;
1081
struct _xmlDefAttrs {
1082
    int nbAttrs;  /* number of defaulted attributes on that element */
1083
    int maxAttrs;       /* the size of the array */
1084
#if __STDC_VERSION__ >= 199901L
1085
    /* Using a C99 flexible array member avoids UBSan errors. */
1086
    const xmlChar *values[]; /* array of localname/prefix/values/external */
1087
#else
1088
    const xmlChar *values[5];
1089
#endif
1090
};
1091
1092
/**
1093
 * xmlAttrNormalizeSpace:
1094
 * @src: the source string
1095
 * @dst: the target string
1096
 *
1097
 * Normalize the space in non CDATA attribute values:
1098
 * If the attribute type is not CDATA, then the XML processor MUST further
1099
 * process the normalized attribute value by discarding any leading and
1100
 * trailing space (#x20) characters, and by replacing sequences of space
1101
 * (#x20) characters by a single space (#x20) character.
1102
 * Note that the size of dst need to be at least src, and if one doesn't need
1103
 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1104
 * passing src as dst is just fine.
1105
 *
1106
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1107
 *         is needed.
1108
 */
1109
static xmlChar *
1110
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1111
12.5k
{
1112
12.5k
    if ((src == NULL) || (dst == NULL))
1113
0
        return(NULL);
1114
1115
13.4k
    while (*src == 0x20) src++;
1116
2.04M
    while (*src != 0) {
1117
2.03M
  if (*src == 0x20) {
1118
4.77k
      while (*src == 0x20) src++;
1119
1.63k
      if (*src != 0)
1120
775
    *dst++ = 0x20;
1121
2.03M
  } else {
1122
2.03M
      *dst++ = *src++;
1123
2.03M
  }
1124
2.03M
    }
1125
12.5k
    *dst = 0;
1126
12.5k
    if (dst == src)
1127
11.3k
       return(NULL);
1128
1.16k
    return(dst);
1129
12.5k
}
1130
1131
/**
1132
 * xmlAttrNormalizeSpace2:
1133
 * @src: the source string
1134
 *
1135
 * Normalize the space in non CDATA attribute values, a slightly more complex
1136
 * front end to avoid allocation problems when running on attribute values
1137
 * coming from the input.
1138
 *
1139
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1140
 *         is needed.
1141
 */
1142
static const xmlChar *
1143
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1144
2.48k
{
1145
2.48k
    int i;
1146
2.48k
    int remove_head = 0;
1147
2.48k
    int need_realloc = 0;
1148
2.48k
    const xmlChar *cur;
1149
1150
2.48k
    if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1151
0
        return(NULL);
1152
2.48k
    i = *len;
1153
2.48k
    if (i <= 0)
1154
161
        return(NULL);
1155
1156
2.32k
    cur = src;
1157
2.98k
    while (*cur == 0x20) {
1158
660
        cur++;
1159
660
  remove_head++;
1160
660
    }
1161
2.13M
    while (*cur != 0) {
1162
2.12M
  if (*cur == 0x20) {
1163
3.40k
      cur++;
1164
3.40k
      if ((*cur == 0x20) || (*cur == 0)) {
1165
135
          need_realloc = 1;
1166
135
    break;
1167
135
      }
1168
3.40k
  } else
1169
2.12M
      cur++;
1170
2.12M
    }
1171
2.32k
    if (need_realloc) {
1172
135
        xmlChar *ret;
1173
1174
135
  ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1175
135
  if (ret == NULL) {
1176
0
      xmlErrMemory(ctxt, NULL);
1177
0
      return(NULL);
1178
0
  }
1179
135
  xmlAttrNormalizeSpace(ret, ret);
1180
135
  *len = strlen((const char *)ret);
1181
135
        return(ret);
1182
2.18k
    } else if (remove_head) {
1183
648
        *len -= remove_head;
1184
648
        memmove(src, src + remove_head, 1 + *len);
1185
648
  return(src);
1186
648
    }
1187
1.54k
    return(NULL);
1188
2.32k
}
1189
1190
/**
1191
 * xmlAddDefAttrs:
1192
 * @ctxt:  an XML parser context
1193
 * @fullname:  the element fullname
1194
 * @fullattr:  the attribute fullname
1195
 * @value:  the attribute value
1196
 *
1197
 * Add a defaulted attribute for an element
1198
 */
1199
static void
1200
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1201
               const xmlChar *fullname,
1202
               const xmlChar *fullattr,
1203
13.0k
               const xmlChar *value) {
1204
13.0k
    xmlDefAttrsPtr defaults;
1205
13.0k
    int len;
1206
13.0k
    const xmlChar *name;
1207
13.0k
    const xmlChar *prefix;
1208
1209
    /*
1210
     * Allows to detect attribute redefinitions
1211
     */
1212
13.0k
    if (ctxt->attsSpecial != NULL) {
1213
12.2k
        if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1214
2.20k
      return;
1215
12.2k
    }
1216
1217
10.8k
    if (ctxt->attsDefault == NULL) {
1218
814
        ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1219
814
  if (ctxt->attsDefault == NULL)
1220
0
      goto mem_error;
1221
814
    }
1222
1223
    /*
1224
     * split the element name into prefix:localname , the string found
1225
     * are within the DTD and then not associated to namespace names.
1226
     */
1227
10.8k
    name = xmlSplitQName3(fullname, &len);
1228
10.8k
    if (name == NULL) {
1229
8.66k
        name = xmlDictLookup(ctxt->dict, fullname, -1);
1230
8.66k
  prefix = NULL;
1231
8.66k
    } else {
1232
2.20k
        name = xmlDictLookup(ctxt->dict, name, -1);
1233
2.20k
  prefix = xmlDictLookup(ctxt->dict, fullname, len);
1234
2.20k
    }
1235
1236
    /*
1237
     * make sure there is some storage
1238
     */
1239
10.8k
    defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1240
10.8k
    if (defaults == NULL) {
1241
1.78k
        defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1242
1.78k
                     (4 * 5) * sizeof(const xmlChar *));
1243
1.78k
  if (defaults == NULL)
1244
0
      goto mem_error;
1245
1.78k
  defaults->nbAttrs = 0;
1246
1.78k
  defaults->maxAttrs = 4;
1247
1.78k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1248
1.78k
                          defaults, NULL) < 0) {
1249
0
      xmlFree(defaults);
1250
0
      goto mem_error;
1251
0
  }
1252
9.08k
    } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1253
1.44k
        xmlDefAttrsPtr temp;
1254
1255
1.44k
        temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1256
1.44k
           (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1257
1.44k
  if (temp == NULL)
1258
0
      goto mem_error;
1259
1.44k
  defaults = temp;
1260
1.44k
  defaults->maxAttrs *= 2;
1261
1.44k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1262
1.44k
                          defaults, NULL) < 0) {
1263
0
      xmlFree(defaults);
1264
0
      goto mem_error;
1265
0
  }
1266
1.44k
    }
1267
1268
    /*
1269
     * Split the element name into prefix:localname , the string found
1270
     * are within the DTD and hen not associated to namespace names.
1271
     */
1272
10.8k
    name = xmlSplitQName3(fullattr, &len);
1273
10.8k
    if (name == NULL) {
1274
8.57k
        name = xmlDictLookup(ctxt->dict, fullattr, -1);
1275
8.57k
  prefix = NULL;
1276
8.57k
    } else {
1277
2.29k
        name = xmlDictLookup(ctxt->dict, name, -1);
1278
2.29k
  prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1279
2.29k
    }
1280
1281
10.8k
    defaults->values[5 * defaults->nbAttrs] = name;
1282
10.8k
    defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1283
    /* intern the string and precompute the end */
1284
10.8k
    len = xmlStrlen(value);
1285
10.8k
    value = xmlDictLookup(ctxt->dict, value, len);
1286
10.8k
    if (value == NULL)
1287
0
        goto mem_error;
1288
10.8k
    defaults->values[5 * defaults->nbAttrs + 2] = value;
1289
10.8k
    defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1290
10.8k
    if (ctxt->external)
1291
0
        defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1292
10.8k
    else
1293
10.8k
        defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1294
10.8k
    defaults->nbAttrs++;
1295
1296
10.8k
    return;
1297
1298
0
mem_error:
1299
0
    xmlErrMemory(ctxt, NULL);
1300
0
    return;
1301
10.8k
}
1302
1303
/**
1304
 * xmlAddSpecialAttr:
1305
 * @ctxt:  an XML parser context
1306
 * @fullname:  the element fullname
1307
 * @fullattr:  the attribute fullname
1308
 * @type:  the attribute type
1309
 *
1310
 * Register this attribute type
1311
 */
1312
static void
1313
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1314
      const xmlChar *fullname,
1315
      const xmlChar *fullattr,
1316
      int type)
1317
13.3k
{
1318
13.3k
    if (ctxt->attsSpecial == NULL) {
1319
931
        ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1320
931
  if (ctxt->attsSpecial == NULL)
1321
0
      goto mem_error;
1322
931
    }
1323
1324
13.3k
    if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1325
2.22k
        return;
1326
1327
11.0k
    xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1328
11.0k
                     (void *) (ptrdiff_t) type);
1329
11.0k
    return;
1330
1331
0
mem_error:
1332
0
    xmlErrMemory(ctxt, NULL);
1333
0
    return;
1334
13.3k
}
1335
1336
/**
1337
 * xmlCleanSpecialAttrCallback:
1338
 *
1339
 * Removes CDATA attributes from the special attribute table
1340
 */
1341
static void
1342
xmlCleanSpecialAttrCallback(void *payload, void *data,
1343
                            const xmlChar *fullname, const xmlChar *fullattr,
1344
2.68k
                            const xmlChar *unused ATTRIBUTE_UNUSED) {
1345
2.68k
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1346
1347
2.68k
    if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1348
477
        xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1349
477
    }
1350
2.68k
}
1351
1352
/**
1353
 * xmlCleanSpecialAttr:
1354
 * @ctxt:  an XML parser context
1355
 *
1356
 * Trim the list of attributes defined to remove all those of type
1357
 * CDATA as they are not special. This call should be done when finishing
1358
 * to parse the DTD and before starting to parse the document root.
1359
 */
1360
static void
1361
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1362
778
{
1363
778
    if (ctxt->attsSpecial == NULL)
1364
139
        return;
1365
1366
639
    xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1367
1368
639
    if (xmlHashSize(ctxt->attsSpecial) == 0) {
1369
24
        xmlHashFree(ctxt->attsSpecial, NULL);
1370
24
        ctxt->attsSpecial = NULL;
1371
24
    }
1372
639
    return;
1373
778
}
1374
1375
/**
1376
 * xmlCheckLanguageID:
1377
 * @lang:  pointer to the string value
1378
 *
1379
 * DEPRECATED: Internal function, do not use.
1380
 *
1381
 * Checks that the value conforms to the LanguageID production:
1382
 *
1383
 * NOTE: this is somewhat deprecated, those productions were removed from
1384
 *       the XML Second edition.
1385
 *
1386
 * [33] LanguageID ::= Langcode ('-' Subcode)*
1387
 * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1388
 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1389
 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1390
 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1391
 * [38] Subcode ::= ([a-z] | [A-Z])+
1392
 *
1393
 * The current REC reference the successors of RFC 1766, currently 5646
1394
 *
1395
 * http://www.rfc-editor.org/rfc/rfc5646.txt
1396
 * langtag       = language
1397
 *                 ["-" script]
1398
 *                 ["-" region]
1399
 *                 *("-" variant)
1400
 *                 *("-" extension)
1401
 *                 ["-" privateuse]
1402
 * language      = 2*3ALPHA            ; shortest ISO 639 code
1403
 *                 ["-" extlang]       ; sometimes followed by
1404
 *                                     ; extended language subtags
1405
 *               / 4ALPHA              ; or reserved for future use
1406
 *               / 5*8ALPHA            ; or registered language subtag
1407
 *
1408
 * extlang       = 3ALPHA              ; selected ISO 639 codes
1409
 *                 *2("-" 3ALPHA)      ; permanently reserved
1410
 *
1411
 * script        = 4ALPHA              ; ISO 15924 code
1412
 *
1413
 * region        = 2ALPHA              ; ISO 3166-1 code
1414
 *               / 3DIGIT              ; UN M.49 code
1415
 *
1416
 * variant       = 5*8alphanum         ; registered variants
1417
 *               / (DIGIT 3alphanum)
1418
 *
1419
 * extension     = singleton 1*("-" (2*8alphanum))
1420
 *
1421
 *                                     ; Single alphanumerics
1422
 *                                     ; "x" reserved for private use
1423
 * singleton     = DIGIT               ; 0 - 9
1424
 *               / %x41-57             ; A - W
1425
 *               / %x59-5A             ; Y - Z
1426
 *               / %x61-77             ; a - w
1427
 *               / %x79-7A             ; y - z
1428
 *
1429
 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1430
 * The parser below doesn't try to cope with extension or privateuse
1431
 * that could be added but that's not interoperable anyway
1432
 *
1433
 * Returns 1 if correct 0 otherwise
1434
 **/
1435
int
1436
xmlCheckLanguageID(const xmlChar * lang)
1437
0
{
1438
0
    const xmlChar *cur = lang, *nxt;
1439
1440
0
    if (cur == NULL)
1441
0
        return (0);
1442
0
    if (((cur[0] == 'i') && (cur[1] == '-')) ||
1443
0
        ((cur[0] == 'I') && (cur[1] == '-')) ||
1444
0
        ((cur[0] == 'x') && (cur[1] == '-')) ||
1445
0
        ((cur[0] == 'X') && (cur[1] == '-'))) {
1446
        /*
1447
         * Still allow IANA code and user code which were coming
1448
         * from the previous version of the XML-1.0 specification
1449
         * it's deprecated but we should not fail
1450
         */
1451
0
        cur += 2;
1452
0
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1453
0
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
1454
0
            cur++;
1455
0
        return(cur[0] == 0);
1456
0
    }
1457
0
    nxt = cur;
1458
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1459
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1460
0
           nxt++;
1461
0
    if (nxt - cur >= 4) {
1462
        /*
1463
         * Reserved
1464
         */
1465
0
        if ((nxt - cur > 8) || (nxt[0] != 0))
1466
0
            return(0);
1467
0
        return(1);
1468
0
    }
1469
0
    if (nxt - cur < 2)
1470
0
        return(0);
1471
    /* we got an ISO 639 code */
1472
0
    if (nxt[0] == 0)
1473
0
        return(1);
1474
0
    if (nxt[0] != '-')
1475
0
        return(0);
1476
1477
0
    nxt++;
1478
0
    cur = nxt;
1479
    /* now we can have extlang or script or region or variant */
1480
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1481
0
        goto region_m49;
1482
1483
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1484
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1485
0
           nxt++;
1486
0
    if (nxt - cur == 4)
1487
0
        goto script;
1488
0
    if (nxt - cur == 2)
1489
0
        goto region;
1490
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1491
0
        goto variant;
1492
0
    if (nxt - cur != 3)
1493
0
        return(0);
1494
    /* we parsed an extlang */
1495
0
    if (nxt[0] == 0)
1496
0
        return(1);
1497
0
    if (nxt[0] != '-')
1498
0
        return(0);
1499
1500
0
    nxt++;
1501
0
    cur = nxt;
1502
    /* now we can have script or region or variant */
1503
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1504
0
        goto region_m49;
1505
1506
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1507
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1508
0
           nxt++;
1509
0
    if (nxt - cur == 2)
1510
0
        goto region;
1511
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1512
0
        goto variant;
1513
0
    if (nxt - cur != 4)
1514
0
        return(0);
1515
    /* we parsed a script */
1516
0
script:
1517
0
    if (nxt[0] == 0)
1518
0
        return(1);
1519
0
    if (nxt[0] != '-')
1520
0
        return(0);
1521
1522
0
    nxt++;
1523
0
    cur = nxt;
1524
    /* now we can have region or variant */
1525
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1526
0
        goto region_m49;
1527
1528
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1529
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1530
0
           nxt++;
1531
1532
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1533
0
        goto variant;
1534
0
    if (nxt - cur != 2)
1535
0
        return(0);
1536
    /* we parsed a region */
1537
0
region:
1538
0
    if (nxt[0] == 0)
1539
0
        return(1);
1540
0
    if (nxt[0] != '-')
1541
0
        return(0);
1542
1543
0
    nxt++;
1544
0
    cur = nxt;
1545
    /* now we can just have a variant */
1546
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1547
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1548
0
           nxt++;
1549
1550
0
    if ((nxt - cur < 5) || (nxt - cur > 8))
1551
0
        return(0);
1552
1553
    /* we parsed a variant */
1554
0
variant:
1555
0
    if (nxt[0] == 0)
1556
0
        return(1);
1557
0
    if (nxt[0] != '-')
1558
0
        return(0);
1559
    /* extensions and private use subtags not checked */
1560
0
    return (1);
1561
1562
0
region_m49:
1563
0
    if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1564
0
        ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1565
0
        nxt += 3;
1566
0
        goto region;
1567
0
    }
1568
0
    return(0);
1569
0
}
1570
1571
/************************************************************************
1572
 *                  *
1573
 *    Parser stacks related functions and macros    *
1574
 *                  *
1575
 ************************************************************************/
1576
1577
static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1578
                                            const xmlChar ** str);
1579
1580
#ifdef SAX2
1581
/**
1582
 * nsPush:
1583
 * @ctxt:  an XML parser context
1584
 * @prefix:  the namespace prefix or NULL
1585
 * @URL:  the namespace name
1586
 *
1587
 * Pushes a new parser namespace on top of the ns stack
1588
 *
1589
 * Returns -1 in case of error, -2 if the namespace should be discarded
1590
 *     and the index in the stack otherwise.
1591
 */
1592
static int
1593
nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1594
20.3k
{
1595
20.3k
    if (ctxt->options & XML_PARSE_NSCLEAN) {
1596
20.3k
        int i;
1597
52.5k
  for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1598
45.6k
      if (ctxt->nsTab[i] == prefix) {
1599
    /* in scope */
1600
13.3k
          if (ctxt->nsTab[i + 1] == URL)
1601
4.69k
        return(-2);
1602
    /* out of scope keep it */
1603
8.68k
    break;
1604
13.3k
      }
1605
45.6k
  }
1606
20.3k
    }
1607
15.6k
    if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1608
2.69k
  ctxt->nsMax = 10;
1609
2.69k
  ctxt->nsNr = 0;
1610
2.69k
  ctxt->nsTab = (const xmlChar **)
1611
2.69k
                xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1612
2.69k
  if (ctxt->nsTab == NULL) {
1613
0
      xmlErrMemory(ctxt, NULL);
1614
0
      ctxt->nsMax = 0;
1615
0
            return (-1);
1616
0
  }
1617
12.9k
    } else if (ctxt->nsNr >= ctxt->nsMax) {
1618
238
        const xmlChar ** tmp;
1619
238
        ctxt->nsMax *= 2;
1620
238
        tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1621
238
            ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1622
238
        if (tmp == NULL) {
1623
0
            xmlErrMemory(ctxt, NULL);
1624
0
      ctxt->nsMax /= 2;
1625
0
            return (-1);
1626
0
        }
1627
238
  ctxt->nsTab = tmp;
1628
238
    }
1629
15.6k
    ctxt->nsTab[ctxt->nsNr++] = prefix;
1630
15.6k
    ctxt->nsTab[ctxt->nsNr++] = URL;
1631
15.6k
    return (ctxt->nsNr);
1632
15.6k
}
1633
/**
1634
 * nsPop:
1635
 * @ctxt: an XML parser context
1636
 * @nr:  the number to pop
1637
 *
1638
 * Pops the top @nr parser prefix/namespace from the ns stack
1639
 *
1640
 * Returns the number of namespaces removed
1641
 */
1642
static int
1643
nsPop(xmlParserCtxtPtr ctxt, int nr)
1644
1.65k
{
1645
1.65k
    int i;
1646
1647
1.65k
    if (ctxt->nsTab == NULL) return(0);
1648
1.65k
    if (ctxt->nsNr < nr) {
1649
0
        xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1650
0
        nr = ctxt->nsNr;
1651
0
    }
1652
1.65k
    if (ctxt->nsNr <= 0)
1653
0
        return (0);
1654
1655
9.52k
    for (i = 0;i < nr;i++) {
1656
7.87k
         ctxt->nsNr--;
1657
7.87k
   ctxt->nsTab[ctxt->nsNr] = NULL;
1658
7.87k
    }
1659
1.65k
    return(nr);
1660
1.65k
}
1661
#endif
1662
1663
static int
1664
3.12k
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1665
3.12k
    const xmlChar **atts;
1666
3.12k
    int *attallocs;
1667
3.12k
    int maxatts;
1668
1669
3.12k
    if (nr + 5 > ctxt->maxatts) {
1670
3.12k
  maxatts = ctxt->maxatts == 0 ? 55 : (nr + 5) * 2;
1671
3.12k
  atts = (const xmlChar **) xmlMalloc(
1672
3.12k
             maxatts * sizeof(const xmlChar *));
1673
3.12k
  if (atts == NULL) goto mem_error;
1674
3.12k
  attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1675
3.12k
                               (maxatts / 5) * sizeof(int));
1676
3.12k
  if (attallocs == NULL) {
1677
0
            xmlFree(atts);
1678
0
            goto mem_error;
1679
0
        }
1680
3.12k
        if (ctxt->maxatts > 0)
1681
291
            memcpy(atts, ctxt->atts, ctxt->maxatts * sizeof(const xmlChar *));
1682
3.12k
        xmlFree(ctxt->atts);
1683
3.12k
  ctxt->atts = atts;
1684
3.12k
  ctxt->attallocs = attallocs;
1685
3.12k
  ctxt->maxatts = maxatts;
1686
3.12k
    }
1687
3.12k
    return(ctxt->maxatts);
1688
0
mem_error:
1689
0
    xmlErrMemory(ctxt, NULL);
1690
0
    return(-1);
1691
3.12k
}
1692
1693
/**
1694
 * inputPush:
1695
 * @ctxt:  an XML parser context
1696
 * @value:  the parser input
1697
 *
1698
 * Pushes a new parser input on top of the input stack
1699
 *
1700
 * Returns -1 in case of error, the index in the stack otherwise
1701
 */
1702
int
1703
inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1704
12.4k
{
1705
12.4k
    if ((ctxt == NULL) || (value == NULL))
1706
0
        return(-1);
1707
12.4k
    if (ctxt->inputNr >= ctxt->inputMax) {
1708
0
        size_t newSize = ctxt->inputMax * 2;
1709
0
        xmlParserInputPtr *tmp;
1710
1711
0
        tmp = (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1712
0
                                               newSize * sizeof(*tmp));
1713
0
        if (tmp == NULL) {
1714
0
            xmlErrMemory(ctxt, NULL);
1715
0
            return (-1);
1716
0
        }
1717
0
        ctxt->inputTab = tmp;
1718
0
        ctxt->inputMax = newSize;
1719
0
    }
1720
12.4k
    ctxt->inputTab[ctxt->inputNr] = value;
1721
12.4k
    ctxt->input = value;
1722
12.4k
    return (ctxt->inputNr++);
1723
12.4k
}
1724
/**
1725
 * inputPop:
1726
 * @ctxt: an XML parser context
1727
 *
1728
 * Pops the top parser input from the input stack
1729
 *
1730
 * Returns the input just removed
1731
 */
1732
xmlParserInputPtr
1733
inputPop(xmlParserCtxtPtr ctxt)
1734
37.3k
{
1735
37.3k
    xmlParserInputPtr ret;
1736
1737
37.3k
    if (ctxt == NULL)
1738
0
        return(NULL);
1739
37.3k
    if (ctxt->inputNr <= 0)
1740
24.9k
        return (NULL);
1741
12.4k
    ctxt->inputNr--;
1742
12.4k
    if (ctxt->inputNr > 0)
1743
0
        ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1744
12.4k
    else
1745
12.4k
        ctxt->input = NULL;
1746
12.4k
    ret = ctxt->inputTab[ctxt->inputNr];
1747
12.4k
    ctxt->inputTab[ctxt->inputNr] = NULL;
1748
12.4k
    return (ret);
1749
37.3k
}
1750
/**
1751
 * nodePush:
1752
 * @ctxt:  an XML parser context
1753
 * @value:  the element node
1754
 *
1755
 * DEPRECATED: Internal function, do not use.
1756
 *
1757
 * Pushes a new element node on top of the node stack
1758
 *
1759
 * Returns -1 in case of error, the index in the stack otherwise
1760
 */
1761
int
1762
nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1763
0
{
1764
0
    if (ctxt == NULL) return(0);
1765
0
    if (ctxt->nodeNr >= ctxt->nodeMax) {
1766
0
        xmlNodePtr *tmp;
1767
1768
0
  tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1769
0
                                      ctxt->nodeMax * 2 *
1770
0
                                      sizeof(ctxt->nodeTab[0]));
1771
0
        if (tmp == NULL) {
1772
0
            xmlErrMemory(ctxt, NULL);
1773
0
            return (-1);
1774
0
        }
1775
0
        ctxt->nodeTab = tmp;
1776
0
  ctxt->nodeMax *= 2;
1777
0
    }
1778
0
    if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1779
0
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1780
0
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1781
0
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1782
0
        xmlParserMaxDepth);
1783
0
  xmlHaltParser(ctxt);
1784
0
  return(-1);
1785
0
    }
1786
0
    ctxt->nodeTab[ctxt->nodeNr] = value;
1787
0
    ctxt->node = value;
1788
0
    return (ctxt->nodeNr++);
1789
0
}
1790
1791
/**
1792
 * nodePop:
1793
 * @ctxt: an XML parser context
1794
 *
1795
 * DEPRECATED: Internal function, do not use.
1796
 *
1797
 * Pops the top element node from the node stack
1798
 *
1799
 * Returns the node just removed
1800
 */
1801
xmlNodePtr
1802
nodePop(xmlParserCtxtPtr ctxt)
1803
4.95k
{
1804
4.95k
    xmlNodePtr ret;
1805
1806
4.95k
    if (ctxt == NULL) return(NULL);
1807
4.95k
    if (ctxt->nodeNr <= 0)
1808
4.95k
        return (NULL);
1809
0
    ctxt->nodeNr--;
1810
0
    if (ctxt->nodeNr > 0)
1811
0
        ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1812
0
    else
1813
0
        ctxt->node = NULL;
1814
0
    ret = ctxt->nodeTab[ctxt->nodeNr];
1815
0
    ctxt->nodeTab[ctxt->nodeNr] = NULL;
1816
0
    return (ret);
1817
4.95k
}
1818
1819
/**
1820
 * nameNsPush:
1821
 * @ctxt:  an XML parser context
1822
 * @value:  the element name
1823
 * @prefix:  the element prefix
1824
 * @URI:  the element namespace name
1825
 * @line:  the current line number for error messages
1826
 * @nsNr:  the number of namespaces pushed on the namespace table
1827
 *
1828
 * Pushes a new element name/prefix/URL on top of the name stack
1829
 *
1830
 * Returns -1 in case of error, the index in the stack otherwise
1831
 */
1832
static int
1833
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1834
           const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
1835
1.77M
{
1836
1.77M
    xmlStartTag *tag;
1837
1838
1.77M
    if (ctxt->nameNr >= ctxt->nameMax) {
1839
1.35k
        const xmlChar * *tmp;
1840
1.35k
        xmlStartTag *tmp2;
1841
1.35k
        ctxt->nameMax *= 2;
1842
1.35k
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1843
1.35k
                                    ctxt->nameMax *
1844
1.35k
                                    sizeof(ctxt->nameTab[0]));
1845
1.35k
        if (tmp == NULL) {
1846
0
      ctxt->nameMax /= 2;
1847
0
      goto mem_error;
1848
0
        }
1849
1.35k
  ctxt->nameTab = tmp;
1850
1.35k
        tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
1851
1.35k
                                    ctxt->nameMax *
1852
1.35k
                                    sizeof(ctxt->pushTab[0]));
1853
1.35k
        if (tmp2 == NULL) {
1854
0
      ctxt->nameMax /= 2;
1855
0
      goto mem_error;
1856
0
        }
1857
1.35k
  ctxt->pushTab = tmp2;
1858
1.77M
    } else if (ctxt->pushTab == NULL) {
1859
7.45k
        ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
1860
7.45k
                                            sizeof(ctxt->pushTab[0]));
1861
7.45k
        if (ctxt->pushTab == NULL)
1862
0
            goto mem_error;
1863
7.45k
    }
1864
1.77M
    ctxt->nameTab[ctxt->nameNr] = value;
1865
1.77M
    ctxt->name = value;
1866
1.77M
    tag = &ctxt->pushTab[ctxt->nameNr];
1867
1.77M
    tag->prefix = prefix;
1868
1.77M
    tag->URI = URI;
1869
1.77M
    tag->line = line;
1870
1.77M
    tag->nsNr = nsNr;
1871
1.77M
    return (ctxt->nameNr++);
1872
0
mem_error:
1873
0
    xmlErrMemory(ctxt, NULL);
1874
0
    return (-1);
1875
1.77M
}
1876
#ifdef LIBXML_PUSH_ENABLED
1877
/**
1878
 * nameNsPop:
1879
 * @ctxt: an XML parser context
1880
 *
1881
 * Pops the top element/prefix/URI name from the name stack
1882
 *
1883
 * Returns the name just removed
1884
 */
1885
static const xmlChar *
1886
nameNsPop(xmlParserCtxtPtr ctxt)
1887
74.9k
{
1888
74.9k
    const xmlChar *ret;
1889
1890
74.9k
    if (ctxt->nameNr <= 0)
1891
0
        return (NULL);
1892
74.9k
    ctxt->nameNr--;
1893
74.9k
    if (ctxt->nameNr > 0)
1894
74.4k
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1895
514
    else
1896
514
        ctxt->name = NULL;
1897
74.9k
    ret = ctxt->nameTab[ctxt->nameNr];
1898
74.9k
    ctxt->nameTab[ctxt->nameNr] = NULL;
1899
74.9k
    return (ret);
1900
74.9k
}
1901
#endif /* LIBXML_PUSH_ENABLED */
1902
1903
/**
1904
 * namePush:
1905
 * @ctxt:  an XML parser context
1906
 * @value:  the element name
1907
 *
1908
 * DEPRECATED: Internal function, do not use.
1909
 *
1910
 * Pushes a new element name on top of the name stack
1911
 *
1912
 * Returns -1 in case of error, the index in the stack otherwise
1913
 */
1914
int
1915
namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1916
0
{
1917
0
    if (ctxt == NULL) return (-1);
1918
1919
0
    if (ctxt->nameNr >= ctxt->nameMax) {
1920
0
        const xmlChar * *tmp;
1921
0
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1922
0
                                    ctxt->nameMax * 2 *
1923
0
                                    sizeof(ctxt->nameTab[0]));
1924
0
        if (tmp == NULL) {
1925
0
      goto mem_error;
1926
0
        }
1927
0
  ctxt->nameTab = tmp;
1928
0
        ctxt->nameMax *= 2;
1929
0
    }
1930
0
    ctxt->nameTab[ctxt->nameNr] = value;
1931
0
    ctxt->name = value;
1932
0
    return (ctxt->nameNr++);
1933
0
mem_error:
1934
0
    xmlErrMemory(ctxt, NULL);
1935
0
    return (-1);
1936
0
}
1937
1938
/**
1939
 * namePop:
1940
 * @ctxt: an XML parser context
1941
 *
1942
 * DEPRECATED: Internal function, do not use.
1943
 *
1944
 * Pops the top element name from the name stack
1945
 *
1946
 * Returns the name just removed
1947
 */
1948
const xmlChar *
1949
namePop(xmlParserCtxtPtr ctxt)
1950
0
{
1951
0
    const xmlChar *ret;
1952
1953
0
    if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1954
0
        return (NULL);
1955
0
    ctxt->nameNr--;
1956
0
    if (ctxt->nameNr > 0)
1957
0
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1958
0
    else
1959
0
        ctxt->name = NULL;
1960
0
    ret = ctxt->nameTab[ctxt->nameNr];
1961
0
    ctxt->nameTab[ctxt->nameNr] = NULL;
1962
0
    return (ret);
1963
0
}
1964
1965
1.83M
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1966
1.83M
    if (ctxt->spaceNr >= ctxt->spaceMax) {
1967
1.39k
        int *tmp;
1968
1969
1.39k
  ctxt->spaceMax *= 2;
1970
1.39k
        tmp = (int *) xmlRealloc(ctxt->spaceTab,
1971
1.39k
                           ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1972
1.39k
        if (tmp == NULL) {
1973
0
      xmlErrMemory(ctxt, NULL);
1974
0
      ctxt->spaceMax /=2;
1975
0
      return(-1);
1976
0
  }
1977
1.39k
  ctxt->spaceTab = tmp;
1978
1.39k
    }
1979
1.83M
    ctxt->spaceTab[ctxt->spaceNr] = val;
1980
1.83M
    ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1981
1.83M
    return(ctxt->spaceNr++);
1982
1.83M
}
1983
1984
135k
static int spacePop(xmlParserCtxtPtr ctxt) {
1985
135k
    int ret;
1986
135k
    if (ctxt->spaceNr <= 0) return(0);
1987
135k
    ctxt->spaceNr--;
1988
135k
    if (ctxt->spaceNr > 0)
1989
135k
  ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1990
0
    else
1991
0
        ctxt->space = &ctxt->spaceTab[0];
1992
135k
    ret = ctxt->spaceTab[ctxt->spaceNr];
1993
135k
    ctxt->spaceTab[ctxt->spaceNr] = -1;
1994
135k
    return(ret);
1995
135k
}
1996
1997
/*
1998
 * Macros for accessing the content. Those should be used only by the parser,
1999
 * and not exported.
2000
 *
2001
 * Dirty macros, i.e. one often need to make assumption on the context to
2002
 * use them
2003
 *
2004
 *   CUR_PTR return the current pointer to the xmlChar to be parsed.
2005
 *           To be used with extreme caution since operations consuming
2006
 *           characters may move the input buffer to a different location !
2007
 *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
2008
 *           This should be used internally by the parser
2009
 *           only to compare to ASCII values otherwise it would break when
2010
 *           running with UTF-8 encoding.
2011
 *   RAW     same as CUR but in the input buffer, bypass any token
2012
 *           extraction that may have been done
2013
 *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
2014
 *           to compare on ASCII based substring.
2015
 *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2016
 *           strings without newlines within the parser.
2017
 *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2018
 *           defined char within the parser.
2019
 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2020
 *
2021
 *   NEXT    Skip to the next character, this does the proper decoding
2022
 *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2023
 *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2024
 *   CUR_CHAR(l) returns the current unicode character (int), set l
2025
 *           to the number of xmlChars used for the encoding [0-5].
2026
 *   CUR_SCHAR  same but operate on a string instead of the context
2027
 *   COPY_BUF  copy the current unicode char to the target buffer, increment
2028
 *            the index
2029
 *   GROW, SHRINK  handling of input buffers
2030
 */
2031
2032
10.6M
#define RAW (*ctxt->input->cur)
2033
6.45M
#define CUR (*ctxt->input->cur)
2034
38.1M
#define NXT(val) ctxt->input->cur[(val)]
2035
1.46M
#define CUR_PTR ctxt->input->cur
2036
23.1k
#define BASE_PTR ctxt->input->base
2037
2038
#define CMP4( s, c1, c2, c3, c4 ) \
2039
291k
  ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2040
145k
    ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2041
#define CMP5( s, c1, c2, c3, c4, c5 ) \
2042
258k
  ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2043
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2044
168k
  ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2045
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2046
94.8k
  ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2047
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2048
54.2k
  ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2049
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2050
15.5k
  ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2051
15.5k
    ((unsigned char *) s)[ 8 ] == c9 )
2052
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2053
865
  ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2054
865
    ((unsigned char *) s)[ 9 ] == c10 )
2055
2056
1.56M
#define SKIP(val) do {             \
2057
1.56M
    ctxt->input->cur += (val),ctxt->input->col+=(val);      \
2058
1.56M
    if (*ctxt->input->cur == 0)           \
2059
1.56M
        xmlParserGrow(ctxt);           \
2060
1.56M
  } while (0)
2061
2062
790k
#define SKIPL(val) do {             \
2063
790k
    int skipl;                \
2064
8.52M
    for(skipl=0; skipl<val; skipl++) {         \
2065
7.73M
  if (*(ctxt->input->cur) == '\n') {       \
2066
35.0k
  ctxt->input->line++; ctxt->input->col = 1;      \
2067
7.70M
  } else ctxt->input->col++;         \
2068
7.73M
  ctxt->input->cur++;           \
2069
7.73M
    }                  \
2070
790k
    if (*ctxt->input->cur == 0)           \
2071
790k
        xmlParserGrow(ctxt);           \
2072
790k
  } while (0)
2073
2074
198k
#define SHRINK if ((ctxt->progressive == 0) &&       \
2075
198k
       (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2076
198k
       (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2077
198k
  xmlParserShrink(ctxt);
2078
2079
69.3M
#define GROW if ((ctxt->progressive == 0) &&       \
2080
69.3M
     (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2081
69.3M
  xmlParserGrow(ctxt);
2082
2083
3.25M
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2084
2085
8.85M
#define NEXT xmlNextChar(ctxt)
2086
2087
1.90M
#define NEXT1 {               \
2088
1.90M
  ctxt->input->col++;           \
2089
1.90M
  ctxt->input->cur++;           \
2090
1.90M
  if (*ctxt->input->cur == 0)         \
2091
1.90M
      xmlParserGrow(ctxt);           \
2092
1.90M
    }
2093
2094
269M
#define NEXTL(l) do {             \
2095
269M
    if (*(ctxt->input->cur) == '\n') {         \
2096
262k
  ctxt->input->line++; ctxt->input->col = 1;      \
2097
269M
    } else ctxt->input->col++;           \
2098
269M
    ctxt->input->cur += l;        \
2099
269M
  } while (0)
2100
2101
270M
#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2102
24.3M
#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2103
2104
#define COPY_BUF(l,b,i,v)           \
2105
151M
    if (l == 1) b[i++] = v;           \
2106
151M
    else i += xmlCopyCharMultiByte(&b[i],v)
2107
2108
/**
2109
 * xmlSkipBlankChars:
2110
 * @ctxt:  the XML parser context
2111
 *
2112
 * DEPRECATED: Internal function, do not use.
2113
 *
2114
 * skip all blanks character found at that point in the input streams.
2115
 * It pops up finished entities in the process if allowable at that point.
2116
 *
2117
 * Returns the number of space chars skipped
2118
 */
2119
2120
int
2121
3.25M
xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2122
3.25M
    int res = 0;
2123
2124
    /*
2125
     * It's Okay to use CUR/NEXT here since all the blanks are on
2126
     * the ASCII range.
2127
     */
2128
3.25M
    if (((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) ||
2129
3.25M
        (ctxt->instate == XML_PARSER_START)) {
2130
2.98M
  const xmlChar *cur;
2131
  /*
2132
   * if we are in the document content, go really fast
2133
   */
2134
2.98M
  cur = ctxt->input->cur;
2135
2.98M
  while (IS_BLANK_CH(*cur)) {
2136
1.47M
      if (*cur == '\n') {
2137
63.7k
    ctxt->input->line++; ctxt->input->col = 1;
2138
1.41M
      } else {
2139
1.41M
    ctxt->input->col++;
2140
1.41M
      }
2141
1.47M
      cur++;
2142
1.47M
      if (res < INT_MAX)
2143
1.47M
    res++;
2144
1.47M
      if (*cur == 0) {
2145
775
    ctxt->input->cur = cur;
2146
775
    xmlParserGrow(ctxt);
2147
775
    cur = ctxt->input->cur;
2148
775
      }
2149
1.47M
  }
2150
2.98M
  ctxt->input->cur = cur;
2151
2.98M
    } else {
2152
273k
        int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2153
2154
2.86M
  while (ctxt->instate != XML_PARSER_EOF) {
2155
2.86M
            if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2156
2.59M
    NEXT;
2157
2.59M
      } else if (CUR == '%') {
2158
                /*
2159
                 * Need to handle support of entities branching here
2160
                 */
2161
15.6k
          if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2162
15.6k
                    break;
2163
0
          xmlParsePEReference(ctxt);
2164
257k
            } else if (CUR == 0) {
2165
1.65k
                unsigned long consumed;
2166
1.65k
                xmlEntityPtr ent;
2167
2168
1.65k
                if (ctxt->inputNr <= 1)
2169
1.65k
                    break;
2170
2171
0
                consumed = ctxt->input->consumed;
2172
0
                xmlSaturatedAddSizeT(&consumed,
2173
0
                                     ctxt->input->cur - ctxt->input->base);
2174
2175
                /*
2176
                 * Add to sizeentities when parsing an external entity
2177
                 * for the first time.
2178
                 */
2179
0
                ent = ctxt->input->entity;
2180
0
                if ((ent->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2181
0
                    ((ent->flags & XML_ENT_PARSED) == 0)) {
2182
0
                    ent->flags |= XML_ENT_PARSED;
2183
2184
0
                    xmlSaturatedAdd(&ctxt->sizeentities, consumed);
2185
0
                }
2186
2187
0
                xmlParserEntityCheck(ctxt, consumed);
2188
2189
0
                xmlPopInput(ctxt);
2190
255k
            } else {
2191
255k
                break;
2192
255k
            }
2193
2194
            /*
2195
             * Also increase the counter when entering or exiting a PERef.
2196
             * The spec says: "When a parameter-entity reference is recognized
2197
             * in the DTD and included, its replacement text MUST be enlarged
2198
             * by the attachment of one leading and one following space (#x20)
2199
             * character."
2200
             */
2201
2.59M
      if (res < INT_MAX)
2202
2.59M
    res++;
2203
2.59M
        }
2204
273k
    }
2205
3.25M
    return(res);
2206
3.25M
}
2207
2208
/************************************************************************
2209
 *                  *
2210
 *    Commodity functions to handle entities      *
2211
 *                  *
2212
 ************************************************************************/
2213
2214
/**
2215
 * xmlPopInput:
2216
 * @ctxt:  an XML parser context
2217
 *
2218
 * xmlPopInput: the current input pointed by ctxt->input came to an end
2219
 *          pop it and return the next char.
2220
 *
2221
 * Returns the current xmlChar in the parser context
2222
 */
2223
xmlChar
2224
0
xmlPopInput(xmlParserCtxtPtr ctxt) {
2225
0
    xmlParserInputPtr input;
2226
2227
0
    if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2228
0
    if (xmlParserDebugEntities)
2229
0
  xmlGenericError(xmlGenericErrorContext,
2230
0
    "Popping input %d\n", ctxt->inputNr);
2231
0
    if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2232
0
        (ctxt->instate != XML_PARSER_EOF))
2233
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2234
0
                    "Unfinished entity outside the DTD");
2235
0
    input = inputPop(ctxt);
2236
0
    if (input->entity != NULL)
2237
0
        input->entity->flags &= ~XML_ENT_EXPANDING;
2238
0
    xmlFreeInputStream(input);
2239
0
    if (*ctxt->input->cur == 0)
2240
0
        xmlParserGrow(ctxt);
2241
0
    return(CUR);
2242
0
}
2243
2244
/**
2245
 * xmlPushInput:
2246
 * @ctxt:  an XML parser context
2247
 * @input:  an XML parser input fragment (entity, XML fragment ...).
2248
 *
2249
 * xmlPushInput: switch to a new input stream which is stacked on top
2250
 *               of the previous one(s).
2251
 * Returns -1 in case of error or the index in the input stack
2252
 */
2253
int
2254
0
xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2255
0
    int ret;
2256
0
    if (input == NULL) return(-1);
2257
2258
0
    if (xmlParserDebugEntities) {
2259
0
  if ((ctxt->input != NULL) && (ctxt->input->filename))
2260
0
      xmlGenericError(xmlGenericErrorContext,
2261
0
        "%s(%d): ", ctxt->input->filename,
2262
0
        ctxt->input->line);
2263
0
  xmlGenericError(xmlGenericErrorContext,
2264
0
    "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2265
0
    }
2266
0
    if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2267
0
        (ctxt->inputNr > 100)) {
2268
0
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2269
0
        while (ctxt->inputNr > 1)
2270
0
            xmlFreeInputStream(inputPop(ctxt));
2271
0
  return(-1);
2272
0
    }
2273
0
    ret = inputPush(ctxt, input);
2274
0
    if (ctxt->instate == XML_PARSER_EOF)
2275
0
        return(-1);
2276
0
    GROW;
2277
0
    return(ret);
2278
0
}
2279
2280
/**
2281
 * xmlParseCharRef:
2282
 * @ctxt:  an XML parser context
2283
 *
2284
 * DEPRECATED: Internal function, don't use.
2285
 *
2286
 * Parse a numeric character reference. Always consumes '&'.
2287
 *
2288
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2289
 *                  '&#x' [0-9a-fA-F]+ ';'
2290
 *
2291
 * [ WFC: Legal Character ]
2292
 * Characters referred to using character references must match the
2293
 * production for Char.
2294
 *
2295
 * Returns the value parsed (as an int), 0 in case of error
2296
 */
2297
int
2298
33.4k
xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2299
33.4k
    int val = 0;
2300
33.4k
    int count = 0;
2301
2302
    /*
2303
     * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2304
     */
2305
33.4k
    if ((RAW == '&') && (NXT(1) == '#') &&
2306
33.4k
        (NXT(2) == 'x')) {
2307
23.4k
  SKIP(3);
2308
23.4k
  GROW;
2309
107k
  while (RAW != ';') { /* loop blocked by count */
2310
88.8k
      if (count++ > 20) {
2311
1.28k
    count = 0;
2312
1.28k
    GROW;
2313
1.28k
                if (ctxt->instate == XML_PARSER_EOF)
2314
0
                    return(0);
2315
1.28k
      }
2316
88.8k
      if ((RAW >= '0') && (RAW <= '9'))
2317
49.7k
          val = val * 16 + (CUR - '0');
2318
39.0k
      else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2319
20.2k
          val = val * 16 + (CUR - 'a') + 10;
2320
18.8k
      else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2321
14.1k
          val = val * 16 + (CUR - 'A') + 10;
2322
4.67k
      else {
2323
4.67k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2324
4.67k
    val = 0;
2325
4.67k
    break;
2326
4.67k
      }
2327
84.1k
      if (val > 0x110000)
2328
16.0k
          val = 0x110000;
2329
2330
84.1k
      NEXT;
2331
84.1k
      count++;
2332
84.1k
  }
2333
23.4k
  if (RAW == ';') {
2334
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2335
18.7k
      ctxt->input->col++;
2336
18.7k
      ctxt->input->cur++;
2337
18.7k
  }
2338
23.4k
    } else if  ((RAW == '&') && (NXT(1) == '#')) {
2339
9.97k
  SKIP(2);
2340
9.97k
  GROW;
2341
23.0k
  while (RAW != ';') { /* loop blocked by count */
2342
19.4k
      if (count++ > 20) {
2343
375
    count = 0;
2344
375
    GROW;
2345
375
                if (ctxt->instate == XML_PARSER_EOF)
2346
0
                    return(0);
2347
375
      }
2348
19.4k
      if ((RAW >= '0') && (RAW <= '9'))
2349
13.0k
          val = val * 10 + (CUR - '0');
2350
6.42k
      else {
2351
6.42k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2352
6.42k
    val = 0;
2353
6.42k
    break;
2354
6.42k
      }
2355
13.0k
      if (val > 0x110000)
2356
2.56k
          val = 0x110000;
2357
2358
13.0k
      NEXT;
2359
13.0k
      count++;
2360
13.0k
  }
2361
9.97k
  if (RAW == ';') {
2362
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2363
3.55k
      ctxt->input->col++;
2364
3.55k
      ctxt->input->cur++;
2365
3.55k
  }
2366
9.97k
    } else {
2367
0
        if (RAW == '&')
2368
0
            SKIP(1);
2369
0
        xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2370
0
    }
2371
2372
    /*
2373
     * [ WFC: Legal Character ]
2374
     * Characters referred to using character references must match the
2375
     * production for Char.
2376
     */
2377
33.4k
    if (val >= 0x110000) {
2378
674
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2379
674
                "xmlParseCharRef: character reference out of bounds\n",
2380
674
          val);
2381
32.7k
    } else if (IS_CHAR(val)) {
2382
19.5k
        return(val);
2383
19.5k
    } else {
2384
13.1k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2385
13.1k
                          "xmlParseCharRef: invalid xmlChar value %d\n",
2386
13.1k
                    val);
2387
13.1k
    }
2388
13.8k
    return(0);
2389
33.4k
}
2390
2391
/**
2392
 * xmlParseStringCharRef:
2393
 * @ctxt:  an XML parser context
2394
 * @str:  a pointer to an index in the string
2395
 *
2396
 * parse Reference declarations, variant parsing from a string rather
2397
 * than an an input flow.
2398
 *
2399
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2400
 *                  '&#x' [0-9a-fA-F]+ ';'
2401
 *
2402
 * [ WFC: Legal Character ]
2403
 * Characters referred to using character references must match the
2404
 * production for Char.
2405
 *
2406
 * Returns the value parsed (as an int), 0 in case of error, str will be
2407
 *         updated to the current value of the index
2408
 */
2409
static int
2410
5.60k
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2411
5.60k
    const xmlChar *ptr;
2412
5.60k
    xmlChar cur;
2413
5.60k
    int val = 0;
2414
2415
5.60k
    if ((str == NULL) || (*str == NULL)) return(0);
2416
5.60k
    ptr = *str;
2417
5.60k
    cur = *ptr;
2418
5.60k
    if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2419
2.87k
  ptr += 3;
2420
2.87k
  cur = *ptr;
2421
11.4k
  while (cur != ';') { /* Non input consuming loop */
2422
9.38k
      if ((cur >= '0') && (cur <= '9'))
2423
2.00k
          val = val * 16 + (cur - '0');
2424
7.38k
      else if ((cur >= 'a') && (cur <= 'f'))
2425
2.05k
          val = val * 16 + (cur - 'a') + 10;
2426
5.33k
      else if ((cur >= 'A') && (cur <= 'F'))
2427
4.53k
          val = val * 16 + (cur - 'A') + 10;
2428
803
      else {
2429
803
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2430
803
    val = 0;
2431
803
    break;
2432
803
      }
2433
8.58k
      if (val > 0x110000)
2434
488
          val = 0x110000;
2435
2436
8.58k
      ptr++;
2437
8.58k
      cur = *ptr;
2438
8.58k
  }
2439
2.87k
  if (cur == ';')
2440
2.06k
      ptr++;
2441
2.87k
    } else if  ((cur == '&') && (ptr[1] == '#')){
2442
2.73k
  ptr += 2;
2443
2.73k
  cur = *ptr;
2444
9.13k
  while (cur != ';') { /* Non input consuming loops */
2445
8.35k
      if ((cur >= '0') && (cur <= '9'))
2446
6.40k
          val = val * 10 + (cur - '0');
2447
1.95k
      else {
2448
1.95k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2449
1.95k
    val = 0;
2450
1.95k
    break;
2451
1.95k
      }
2452
6.40k
      if (val > 0x110000)
2453
151
          val = 0x110000;
2454
2455
6.40k
      ptr++;
2456
6.40k
      cur = *ptr;
2457
6.40k
  }
2458
2.73k
  if (cur == ';')
2459
783
      ptr++;
2460
2.73k
    } else {
2461
0
  xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2462
0
  return(0);
2463
0
    }
2464
5.60k
    *str = ptr;
2465
2466
    /*
2467
     * [ WFC: Legal Character ]
2468
     * Characters referred to using character references must match the
2469
     * production for Char.
2470
     */
2471
5.60k
    if (val >= 0x110000) {
2472
61
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2473
61
                "xmlParseStringCharRef: character reference out of bounds\n",
2474
61
                val);
2475
5.54k
    } else if (IS_CHAR(val)) {
2476
2.03k
        return(val);
2477
3.51k
    } else {
2478
3.51k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2479
3.51k
        "xmlParseStringCharRef: invalid xmlChar value %d\n",
2480
3.51k
        val);
2481
3.51k
    }
2482
3.57k
    return(0);
2483
5.60k
}
2484
2485
/**
2486
 * xmlParserHandlePEReference:
2487
 * @ctxt:  the parser context
2488
 *
2489
 * DEPRECATED: Internal function, do not use.
2490
 *
2491
 * [69] PEReference ::= '%' Name ';'
2492
 *
2493
 * [ WFC: No Recursion ]
2494
 * A parsed entity must not contain a recursive
2495
 * reference to itself, either directly or indirectly.
2496
 *
2497
 * [ WFC: Entity Declared ]
2498
 * In a document without any DTD, a document with only an internal DTD
2499
 * subset which contains no parameter entity references, or a document
2500
 * with "standalone='yes'", ...  ... The declaration of a parameter
2501
 * entity must precede any reference to it...
2502
 *
2503
 * [ VC: Entity Declared ]
2504
 * In a document with an external subset or external parameter entities
2505
 * with "standalone='no'", ...  ... The declaration of a parameter entity
2506
 * must precede any reference to it...
2507
 *
2508
 * [ WFC: In DTD ]
2509
 * Parameter-entity references may only appear in the DTD.
2510
 * NOTE: misleading but this is handled.
2511
 *
2512
 * A PEReference may have been detected in the current input stream
2513
 * the handling is done accordingly to
2514
 *      http://www.w3.org/TR/REC-xml#entproc
2515
 * i.e.
2516
 *   - Included in literal in entity values
2517
 *   - Included as Parameter Entity reference within DTDs
2518
 */
2519
void
2520
0
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2521
0
    switch(ctxt->instate) {
2522
0
  case XML_PARSER_CDATA_SECTION:
2523
0
      return;
2524
0
        case XML_PARSER_COMMENT:
2525
0
      return;
2526
0
  case XML_PARSER_START_TAG:
2527
0
      return;
2528
0
  case XML_PARSER_END_TAG:
2529
0
      return;
2530
0
        case XML_PARSER_EOF:
2531
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2532
0
      return;
2533
0
        case XML_PARSER_PROLOG:
2534
0
  case XML_PARSER_START:
2535
0
  case XML_PARSER_MISC:
2536
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2537
0
      return;
2538
0
  case XML_PARSER_ENTITY_DECL:
2539
0
        case XML_PARSER_CONTENT:
2540
0
        case XML_PARSER_ATTRIBUTE_VALUE:
2541
0
        case XML_PARSER_PI:
2542
0
  case XML_PARSER_SYSTEM_LITERAL:
2543
0
  case XML_PARSER_PUBLIC_LITERAL:
2544
      /* we just ignore it there */
2545
0
      return;
2546
0
        case XML_PARSER_EPILOG:
2547
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2548
0
      return;
2549
0
  case XML_PARSER_ENTITY_VALUE:
2550
      /*
2551
       * NOTE: in the case of entity values, we don't do the
2552
       *       substitution here since we need the literal
2553
       *       entity value to be able to save the internal
2554
       *       subset of the document.
2555
       *       This will be handled by xmlStringDecodeEntities
2556
       */
2557
0
      return;
2558
0
        case XML_PARSER_DTD:
2559
      /*
2560
       * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2561
       * In the internal DTD subset, parameter-entity references
2562
       * can occur only where markup declarations can occur, not
2563
       * within markup declarations.
2564
       * In that case this is handled in xmlParseMarkupDecl
2565
       */
2566
0
      if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2567
0
    return;
2568
0
      if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2569
0
    return;
2570
0
            break;
2571
0
        case XML_PARSER_IGNORE:
2572
0
            return;
2573
0
    }
2574
2575
0
    xmlParsePEReference(ctxt);
2576
0
}
2577
2578
/*
2579
 * Macro used to grow the current buffer.
2580
 * buffer##_size is expected to be a size_t
2581
 * mem_error: is expected to handle memory allocation failures
2582
 */
2583
8.49k
#define growBuffer(buffer, n) {           \
2584
8.49k
    xmlChar *tmp;             \
2585
8.49k
    size_t new_size = buffer##_size * 2 + n;                            \
2586
8.49k
    if (new_size < buffer##_size) goto mem_error;                       \
2587
8.49k
    tmp = (xmlChar *) xmlRealloc(buffer, new_size);                     \
2588
8.49k
    if (tmp == NULL) goto mem_error;         \
2589
8.49k
    buffer = tmp;             \
2590
8.49k
    buffer##_size = new_size;                                           \
2591
8.49k
}
2592
2593
/**
2594
 * xmlStringDecodeEntitiesInt:
2595
 * @ctxt:  the parser context
2596
 * @str:  the input string
2597
 * @len: the string length
2598
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2599
 * @end:  an end marker xmlChar, 0 if none
2600
 * @end2:  an end marker xmlChar, 0 if none
2601
 * @end3:  an end marker xmlChar, 0 if none
2602
 * @check:  whether to perform entity checks
2603
 */
2604
static xmlChar *
2605
xmlStringDecodeEntitiesInt(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2606
               int what, xmlChar end, xmlChar  end2, xmlChar end3,
2607
5.96k
                           int check) {
2608
5.96k
    xmlChar *buffer = NULL;
2609
5.96k
    size_t buffer_size = 0;
2610
5.96k
    size_t nbchars = 0;
2611
2612
5.96k
    xmlChar *current = NULL;
2613
5.96k
    xmlChar *rep = NULL;
2614
5.96k
    const xmlChar *last;
2615
5.96k
    xmlEntityPtr ent;
2616
5.96k
    int c,l;
2617
2618
5.96k
    if (str == NULL)
2619
0
        return(NULL);
2620
5.96k
    last = str + len;
2621
2622
5.96k
    if (((ctxt->depth > 40) &&
2623
5.96k
         ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2624
5.96k
  (ctxt->depth > 100)) {
2625
0
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP,
2626
0
                       "Maximum entity nesting depth exceeded");
2627
0
  return(NULL);
2628
0
    }
2629
2630
    /*
2631
     * allocate a translation buffer.
2632
     */
2633
5.96k
    buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2634
5.96k
    buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2635
5.96k
    if (buffer == NULL) goto mem_error;
2636
2637
    /*
2638
     * OK loop until we reach one of the ending char or a size limit.
2639
     * we are operating on already parsed values.
2640
     */
2641
5.96k
    if (str < last)
2642
5.39k
  c = CUR_SCHAR(str, l);
2643
572
    else
2644
572
        c = 0;
2645
24.2M
    while ((c != 0) && (c != end) && /* non input consuming loop */
2646
24.2M
           (c != end2) && (c != end3) &&
2647
24.2M
           (ctxt->instate != XML_PARSER_EOF)) {
2648
2649
24.2M
  if (c == 0) break;
2650
24.2M
        if ((c == '&') && (str[1] == '#')) {
2651
5.60k
      int val = xmlParseStringCharRef(ctxt, &str);
2652
5.60k
      if (val == 0)
2653
3.57k
                goto int_error;
2654
2.03k
      COPY_BUF(0,buffer,nbchars,val);
2655
2.03k
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2656
450
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2657
450
      }
2658
24.2M
  } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2659
0
      if (xmlParserDebugEntities)
2660
0
    xmlGenericError(xmlGenericErrorContext,
2661
0
      "String decoding Entity Reference: %.30s\n",
2662
0
      str);
2663
0
      ent = xmlParseStringEntityRef(ctxt, &str);
2664
0
      if ((ent != NULL) &&
2665
0
    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2666
0
    if (ent->content != NULL) {
2667
0
        COPY_BUF(0,buffer,nbchars,ent->content[0]);
2668
0
        if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2669
0
      growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2670
0
        }
2671
0
    } else {
2672
0
        xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2673
0
          "predefined entity has no content\n");
2674
0
                    goto int_error;
2675
0
    }
2676
0
      } else if ((ent != NULL) && (ent->content != NULL)) {
2677
0
          if ((check) && (xmlParserEntityCheck(ctxt, ent->length)))
2678
0
                    goto int_error;
2679
2680
0
                if (ent->flags & XML_ENT_EXPANDING) {
2681
0
              xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2682
0
                    xmlHaltParser(ctxt);
2683
0
                    ent->content[0] = 0;
2684
0
                    goto int_error;
2685
0
                }
2686
2687
0
                ent->flags |= XML_ENT_EXPANDING;
2688
0
    ctxt->depth++;
2689
0
    rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
2690
0
                        ent->length, what, 0, 0, 0, check);
2691
0
    ctxt->depth--;
2692
0
                ent->flags &= ~XML_ENT_EXPANDING;
2693
2694
0
    if (rep == NULL) {
2695
0
                    ent->content[0] = 0;
2696
0
                    goto int_error;
2697
0
                }
2698
2699
0
                current = rep;
2700
0
                while (*current != 0) { /* non input consuming loop */
2701
0
                    buffer[nbchars++] = *current++;
2702
0
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2703
0
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2704
0
                    }
2705
0
                }
2706
0
                xmlFree(rep);
2707
0
                rep = NULL;
2708
0
      } else if (ent != NULL) {
2709
0
    int i = xmlStrlen(ent->name);
2710
0
    const xmlChar *cur = ent->name;
2711
2712
0
    buffer[nbchars++] = '&';
2713
0
    if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2714
0
        growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2715
0
    }
2716
0
    for (;i > 0;i--)
2717
0
        buffer[nbchars++] = *cur++;
2718
0
    buffer[nbchars++] = ';';
2719
0
      }
2720
24.2M
  } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2721
0
      if (xmlParserDebugEntities)
2722
0
    xmlGenericError(xmlGenericErrorContext,
2723
0
      "String decoding PE Reference: %.30s\n", str);
2724
0
      ent = xmlParseStringPEReference(ctxt, &str);
2725
0
      if (ent != NULL) {
2726
0
                if (ent->content == NULL) {
2727
        /*
2728
         * Note: external parsed entities will not be loaded,
2729
         * it is not required for a non-validating parser to
2730
         * complete external PEReferences coming from the
2731
         * internal subset
2732
         */
2733
0
        if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2734
0
      ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2735
0
      (ctxt->validate != 0)) {
2736
0
      xmlLoadEntityContent(ctxt, ent);
2737
0
        } else {
2738
0
      xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2739
0
      "not validating will not read content for PE entity %s\n",
2740
0
                          ent->name, NULL);
2741
0
        }
2742
0
    }
2743
2744
0
          if ((check) && (xmlParserEntityCheck(ctxt, ent->length)))
2745
0
                    goto int_error;
2746
2747
0
                if (ent->flags & XML_ENT_EXPANDING) {
2748
0
              xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2749
0
                    xmlHaltParser(ctxt);
2750
0
                    if (ent->content != NULL)
2751
0
                        ent->content[0] = 0;
2752
0
                    goto int_error;
2753
0
                }
2754
2755
0
                ent->flags |= XML_ENT_EXPANDING;
2756
0
    ctxt->depth++;
2757
0
    rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
2758
0
                        ent->length, what, 0, 0, 0, check);
2759
0
    ctxt->depth--;
2760
0
                ent->flags &= ~XML_ENT_EXPANDING;
2761
2762
0
    if (rep == NULL) {
2763
0
                    if (ent->content != NULL)
2764
0
                        ent->content[0] = 0;
2765
0
                    goto int_error;
2766
0
                }
2767
0
                current = rep;
2768
0
                while (*current != 0) { /* non input consuming loop */
2769
0
                    buffer[nbchars++] = *current++;
2770
0
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2771
0
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2772
0
                    }
2773
0
                }
2774
0
                xmlFree(rep);
2775
0
                rep = NULL;
2776
0
      }
2777
24.2M
  } else {
2778
24.2M
      COPY_BUF(l,buffer,nbchars,c);
2779
24.2M
      str += l;
2780
24.2M
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2781
1.58k
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2782
1.58k
      }
2783
24.2M
  }
2784
24.2M
  if (str < last)
2785
24.2M
      c = CUR_SCHAR(str, l);
2786
1.82k
  else
2787
1.82k
      c = 0;
2788
24.2M
    }
2789
2.39k
    buffer[nbchars] = 0;
2790
2.39k
    return(buffer);
2791
2792
0
mem_error:
2793
0
    xmlErrMemory(ctxt, NULL);
2794
3.57k
int_error:
2795
3.57k
    if (rep != NULL)
2796
0
        xmlFree(rep);
2797
3.57k
    if (buffer != NULL)
2798
3.57k
        xmlFree(buffer);
2799
3.57k
    return(NULL);
2800
0
}
2801
2802
/**
2803
 * xmlStringLenDecodeEntities:
2804
 * @ctxt:  the parser context
2805
 * @str:  the input string
2806
 * @len: the string length
2807
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2808
 * @end:  an end marker xmlChar, 0 if none
2809
 * @end2:  an end marker xmlChar, 0 if none
2810
 * @end3:  an end marker xmlChar, 0 if none
2811
 *
2812
 * DEPRECATED: Internal function, don't use.
2813
 *
2814
 * Takes a entity string content and process to do the adequate substitutions.
2815
 *
2816
 * [67] Reference ::= EntityRef | CharRef
2817
 *
2818
 * [69] PEReference ::= '%' Name ';'
2819
 *
2820
 * Returns A newly allocated string with the substitution done. The caller
2821
 *      must deallocate it !
2822
 */
2823
xmlChar *
2824
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2825
                           int what, xmlChar end, xmlChar  end2,
2826
0
                           xmlChar end3) {
2827
0
    if ((ctxt == NULL) || (str == NULL) || (len < 0))
2828
0
        return(NULL);
2829
0
    return(xmlStringDecodeEntitiesInt(ctxt, str, len, what,
2830
0
                                      end, end2, end3, 0));
2831
0
}
2832
2833
/**
2834
 * xmlStringDecodeEntities:
2835
 * @ctxt:  the parser context
2836
 * @str:  the input string
2837
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2838
 * @end:  an end marker xmlChar, 0 if none
2839
 * @end2:  an end marker xmlChar, 0 if none
2840
 * @end3:  an end marker xmlChar, 0 if none
2841
 *
2842
 * DEPRECATED: Internal function, don't use.
2843
 *
2844
 * Takes a entity string content and process to do the adequate substitutions.
2845
 *
2846
 * [67] Reference ::= EntityRef | CharRef
2847
 *
2848
 * [69] PEReference ::= '%' Name ';'
2849
 *
2850
 * Returns A newly allocated string with the substitution done. The caller
2851
 *      must deallocate it !
2852
 */
2853
xmlChar *
2854
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2855
0
            xmlChar end, xmlChar  end2, xmlChar end3) {
2856
0
    if ((ctxt == NULL) || (str == NULL)) return(NULL);
2857
0
    return(xmlStringDecodeEntitiesInt(ctxt, str, xmlStrlen(str), what,
2858
0
                                      end, end2, end3, 0));
2859
0
}
2860
2861
/************************************************************************
2862
 *                  *
2863
 *    Commodity functions, cleanup needed ?     *
2864
 *                  *
2865
 ************************************************************************/
2866
2867
/**
2868
 * areBlanks:
2869
 * @ctxt:  an XML parser context
2870
 * @str:  a xmlChar *
2871
 * @len:  the size of @str
2872
 * @blank_chars: we know the chars are blanks
2873
 *
2874
 * Is this a sequence of blank chars that one can ignore ?
2875
 *
2876
 * Returns 1 if ignorable 0 otherwise.
2877
 */
2878
2879
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2880
491k
                     int blank_chars) {
2881
491k
    int i, ret;
2882
491k
    xmlNodePtr lastChild;
2883
2884
    /*
2885
     * Don't spend time trying to differentiate them, the same callback is
2886
     * used !
2887
     */
2888
491k
    if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2889
0
  return(0);
2890
2891
    /*
2892
     * Check for xml:space value.
2893
     */
2894
491k
    if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2895
491k
        (*(ctxt->space) == -2))
2896
445k
  return(0);
2897
2898
    /*
2899
     * Check that the string is made of blanks
2900
     */
2901
45.3k
    if (blank_chars == 0) {
2902
78.2k
  for (i = 0;i < len;i++)
2903
75.1k
      if (!(IS_BLANK_CH(str[i]))) return(0);
2904
22.4k
    }
2905
2906
    /*
2907
     * Look if the element is mixed content in the DTD if available
2908
     */
2909
25.9k
    if (ctxt->node == NULL) return(0);
2910
0
    if (ctxt->myDoc != NULL) {
2911
0
  ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2912
0
        if (ret == 0) return(1);
2913
0
        if (ret == 1) return(0);
2914
0
    }
2915
2916
    /*
2917
     * Otherwise, heuristic :-\
2918
     */
2919
0
    if ((RAW != '<') && (RAW != 0xD)) return(0);
2920
0
    if ((ctxt->node->children == NULL) &&
2921
0
  (RAW == '<') && (NXT(1) == '/')) return(0);
2922
2923
0
    lastChild = xmlGetLastChild(ctxt->node);
2924
0
    if (lastChild == NULL) {
2925
0
        if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2926
0
            (ctxt->node->content != NULL)) return(0);
2927
0
    } else if (xmlNodeIsText(lastChild))
2928
0
        return(0);
2929
0
    else if ((ctxt->node->children != NULL) &&
2930
0
             (xmlNodeIsText(ctxt->node->children)))
2931
0
        return(0);
2932
0
    return(1);
2933
0
}
2934
2935
/************************************************************************
2936
 *                  *
2937
 *    Extra stuff for namespace support     *
2938
 *  Relates to http://www.w3.org/TR/WD-xml-names      *
2939
 *                  *
2940
 ************************************************************************/
2941
2942
/**
2943
 * xmlSplitQName:
2944
 * @ctxt:  an XML parser context
2945
 * @name:  an XML parser context
2946
 * @prefix:  a xmlChar **
2947
 *
2948
 * parse an UTF8 encoded XML qualified name string
2949
 *
2950
 * [NS 5] QName ::= (Prefix ':')? LocalPart
2951
 *
2952
 * [NS 6] Prefix ::= NCName
2953
 *
2954
 * [NS 7] LocalPart ::= NCName
2955
 *
2956
 * Returns the local part, and prefix is updated
2957
 *   to get the Prefix if any.
2958
 */
2959
2960
xmlChar *
2961
0
xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2962
0
    xmlChar buf[XML_MAX_NAMELEN + 5];
2963
0
    xmlChar *buffer = NULL;
2964
0
    int len = 0;
2965
0
    int max = XML_MAX_NAMELEN;
2966
0
    xmlChar *ret = NULL;
2967
0
    const xmlChar *cur = name;
2968
0
    int c;
2969
2970
0
    if (prefix == NULL) return(NULL);
2971
0
    *prefix = NULL;
2972
2973
0
    if (cur == NULL) return(NULL);
2974
2975
#ifndef XML_XML_NAMESPACE
2976
    /* xml: prefix is not really a namespace */
2977
    if ((cur[0] == 'x') && (cur[1] == 'm') &&
2978
        (cur[2] == 'l') && (cur[3] == ':'))
2979
  return(xmlStrdup(name));
2980
#endif
2981
2982
    /* nasty but well=formed */
2983
0
    if (cur[0] == ':')
2984
0
  return(xmlStrdup(name));
2985
2986
0
    c = *cur++;
2987
0
    while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2988
0
  buf[len++] = c;
2989
0
  c = *cur++;
2990
0
    }
2991
0
    if (len >= max) {
2992
  /*
2993
   * Okay someone managed to make a huge name, so he's ready to pay
2994
   * for the processing speed.
2995
   */
2996
0
  max = len * 2;
2997
2998
0
  buffer = (xmlChar *) xmlMallocAtomic(max);
2999
0
  if (buffer == NULL) {
3000
0
      xmlErrMemory(ctxt, NULL);
3001
0
      return(NULL);
3002
0
  }
3003
0
  memcpy(buffer, buf, len);
3004
0
  while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3005
0
      if (len + 10 > max) {
3006
0
          xmlChar *tmp;
3007
3008
0
    max *= 2;
3009
0
    tmp = (xmlChar *) xmlRealloc(buffer, max);
3010
0
    if (tmp == NULL) {
3011
0
        xmlFree(buffer);
3012
0
        xmlErrMemory(ctxt, NULL);
3013
0
        return(NULL);
3014
0
    }
3015
0
    buffer = tmp;
3016
0
      }
3017
0
      buffer[len++] = c;
3018
0
      c = *cur++;
3019
0
  }
3020
0
  buffer[len] = 0;
3021
0
    }
3022
3023
0
    if ((c == ':') && (*cur == 0)) {
3024
0
        if (buffer != NULL)
3025
0
      xmlFree(buffer);
3026
0
  *prefix = NULL;
3027
0
  return(xmlStrdup(name));
3028
0
    }
3029
3030
0
    if (buffer == NULL)
3031
0
  ret = xmlStrndup(buf, len);
3032
0
    else {
3033
0
  ret = buffer;
3034
0
  buffer = NULL;
3035
0
  max = XML_MAX_NAMELEN;
3036
0
    }
3037
3038
3039
0
    if (c == ':') {
3040
0
  c = *cur;
3041
0
        *prefix = ret;
3042
0
  if (c == 0) {
3043
0
      return(xmlStrndup(BAD_CAST "", 0));
3044
0
  }
3045
0
  len = 0;
3046
3047
  /*
3048
   * Check that the first character is proper to start
3049
   * a new name
3050
   */
3051
0
  if (!(((c >= 0x61) && (c <= 0x7A)) ||
3052
0
        ((c >= 0x41) && (c <= 0x5A)) ||
3053
0
        (c == '_') || (c == ':'))) {
3054
0
      int l;
3055
0
      int first = CUR_SCHAR(cur, l);
3056
3057
0
      if (!IS_LETTER(first) && (first != '_')) {
3058
0
    xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3059
0
          "Name %s is not XML Namespace compliant\n",
3060
0
          name);
3061
0
      }
3062
0
  }
3063
0
  cur++;
3064
3065
0
  while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3066
0
      buf[len++] = c;
3067
0
      c = *cur++;
3068
0
  }
3069
0
  if (len >= max) {
3070
      /*
3071
       * Okay someone managed to make a huge name, so he's ready to pay
3072
       * for the processing speed.
3073
       */
3074
0
      max = len * 2;
3075
3076
0
      buffer = (xmlChar *) xmlMallocAtomic(max);
3077
0
      if (buffer == NULL) {
3078
0
          xmlErrMemory(ctxt, NULL);
3079
0
    return(NULL);
3080
0
      }
3081
0
      memcpy(buffer, buf, len);
3082
0
      while (c != 0) { /* tested bigname2.xml */
3083
0
    if (len + 10 > max) {
3084
0
        xmlChar *tmp;
3085
3086
0
        max *= 2;
3087
0
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3088
0
        if (tmp == NULL) {
3089
0
      xmlErrMemory(ctxt, NULL);
3090
0
      xmlFree(buffer);
3091
0
      return(NULL);
3092
0
        }
3093
0
        buffer = tmp;
3094
0
    }
3095
0
    buffer[len++] = c;
3096
0
    c = *cur++;
3097
0
      }
3098
0
      buffer[len] = 0;
3099
0
  }
3100
3101
0
  if (buffer == NULL)
3102
0
      ret = xmlStrndup(buf, len);
3103
0
  else {
3104
0
      ret = buffer;
3105
0
  }
3106
0
    }
3107
3108
0
    return(ret);
3109
0
}
3110
3111
/************************************************************************
3112
 *                  *
3113
 *      The parser itself       *
3114
 *  Relates to http://www.w3.org/TR/REC-xml       *
3115
 *                  *
3116
 ************************************************************************/
3117
3118
/************************************************************************
3119
 *                  *
3120
 *  Routines to parse Name, NCName and NmToken      *
3121
 *                  *
3122
 ************************************************************************/
3123
#ifdef DEBUG
3124
static unsigned long nbParseName = 0;
3125
static unsigned long nbParseNmToken = 0;
3126
static unsigned long nbParseNCName = 0;
3127
static unsigned long nbParseNCNameComplex = 0;
3128
static unsigned long nbParseNameComplex = 0;
3129
static unsigned long nbParseStringName = 0;
3130
#endif
3131
3132
/*
3133
 * The two following functions are related to the change of accepted
3134
 * characters for Name and NmToken in the Revision 5 of XML-1.0
3135
 * They correspond to the modified production [4] and the new production [4a]
3136
 * changes in that revision. Also note that the macros used for the
3137
 * productions Letter, Digit, CombiningChar and Extender are not needed
3138
 * anymore.
3139
 * We still keep compatibility to pre-revision5 parsing semantic if the
3140
 * new XML_PARSE_OLD10 option is given to the parser.
3141
 */
3142
static int
3143
29.4k
xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3144
29.4k
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3145
        /*
3146
   * Use the new checks of production [4] [4a] amd [5] of the
3147
   * Update 5 of XML-1.0
3148
   */
3149
29.4k
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3150
29.4k
      (((c >= 'a') && (c <= 'z')) ||
3151
28.7k
       ((c >= 'A') && (c <= 'Z')) ||
3152
28.7k
       (c == '_') || (c == ':') ||
3153
28.7k
       ((c >= 0xC0) && (c <= 0xD6)) ||
3154
28.7k
       ((c >= 0xD8) && (c <= 0xF6)) ||
3155
28.7k
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3156
28.7k
       ((c >= 0x370) && (c <= 0x37D)) ||
3157
28.7k
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3158
28.7k
       ((c >= 0x200C) && (c <= 0x200D)) ||
3159
28.7k
       ((c >= 0x2070) && (c <= 0x218F)) ||
3160
28.7k
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3161
28.7k
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3162
28.7k
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3163
28.7k
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3164
28.7k
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3165
27.0k
      return(1);
3166
29.4k
    } else {
3167
0
        if (IS_LETTER(c) || (c == '_') || (c == ':'))
3168
0
      return(1);
3169
0
    }
3170
2.47k
    return(0);
3171
29.4k
}
3172
3173
static int
3174
95.0M
xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3175
95.0M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3176
        /*
3177
   * Use the new checks of production [4] [4a] amd [5] of the
3178
   * Update 5 of XML-1.0
3179
   */
3180
95.0M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3181
95.0M
      (((c >= 'a') && (c <= 'z')) ||
3182
95.0M
       ((c >= 'A') && (c <= 'Z')) ||
3183
95.0M
       ((c >= '0') && (c <= '9')) || /* !start */
3184
95.0M
       (c == '_') || (c == ':') ||
3185
95.0M
       (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3186
95.0M
       ((c >= 0xC0) && (c <= 0xD6)) ||
3187
95.0M
       ((c >= 0xD8) && (c <= 0xF6)) ||
3188
95.0M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3189
95.0M
       ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3190
95.0M
       ((c >= 0x370) && (c <= 0x37D)) ||
3191
95.0M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3192
95.0M
       ((c >= 0x200C) && (c <= 0x200D)) ||
3193
95.0M
       ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3194
95.0M
       ((c >= 0x2070) && (c <= 0x218F)) ||
3195
95.0M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3196
95.0M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3197
95.0M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3198
95.0M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3199
95.0M
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3200
95.0M
       return(1);
3201
95.0M
    } else {
3202
0
        if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3203
0
            (c == '.') || (c == '-') ||
3204
0
      (c == '_') || (c == ':') ||
3205
0
      (IS_COMBINING(c)) ||
3206
0
      (IS_EXTENDER(c)))
3207
0
      return(1);
3208
0
    }
3209
21.1k
    return(0);
3210
95.0M
}
3211
3212
static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3213
                                          int *len, int *alloc, int normalize);
3214
3215
static const xmlChar *
3216
148k
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3217
148k
    int len = 0, l;
3218
148k
    int c;
3219
148k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3220
148k
                    XML_MAX_TEXT_LENGTH :
3221
148k
                    XML_MAX_NAME_LENGTH;
3222
3223
#ifdef DEBUG
3224
    nbParseNameComplex++;
3225
#endif
3226
3227
    /*
3228
     * Handler for more complex cases
3229
     */
3230
148k
    c = CUR_CHAR(l);
3231
148k
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3232
        /*
3233
   * Use the new checks of production [4] [4a] amd [5] of the
3234
   * Update 5 of XML-1.0
3235
   */
3236
148k
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3237
148k
      (!(((c >= 'a') && (c <= 'z')) ||
3238
144k
         ((c >= 'A') && (c <= 'Z')) ||
3239
144k
         (c == '_') || (c == ':') ||
3240
144k
         ((c >= 0xC0) && (c <= 0xD6)) ||
3241
144k
         ((c >= 0xD8) && (c <= 0xF6)) ||
3242
144k
         ((c >= 0xF8) && (c <= 0x2FF)) ||
3243
144k
         ((c >= 0x370) && (c <= 0x37D)) ||
3244
144k
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
3245
144k
         ((c >= 0x200C) && (c <= 0x200D)) ||
3246
144k
         ((c >= 0x2070) && (c <= 0x218F)) ||
3247
144k
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3248
144k
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
3249
144k
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
3250
144k
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3251
144k
         ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3252
88.0k
      return(NULL);
3253
88.0k
  }
3254
60.2k
  len += l;
3255
60.2k
  NEXTL(l);
3256
60.2k
  c = CUR_CHAR(l);
3257
57.1M
  while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3258
57.1M
         (((c >= 'a') && (c <= 'z')) ||
3259
57.1M
          ((c >= 'A') && (c <= 'Z')) ||
3260
57.1M
          ((c >= '0') && (c <= '9')) || /* !start */
3261
57.1M
          (c == '_') || (c == ':') ||
3262
57.1M
          (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3263
57.1M
          ((c >= 0xC0) && (c <= 0xD6)) ||
3264
57.1M
          ((c >= 0xD8) && (c <= 0xF6)) ||
3265
57.1M
          ((c >= 0xF8) && (c <= 0x2FF)) ||
3266
57.1M
          ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3267
57.1M
          ((c >= 0x370) && (c <= 0x37D)) ||
3268
57.1M
          ((c >= 0x37F) && (c <= 0x1FFF)) ||
3269
57.1M
          ((c >= 0x200C) && (c <= 0x200D)) ||
3270
57.1M
          ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3271
57.1M
          ((c >= 0x2070) && (c <= 0x218F)) ||
3272
57.1M
          ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3273
57.1M
          ((c >= 0x3001) && (c <= 0xD7FF)) ||
3274
57.1M
          ((c >= 0xF900) && (c <= 0xFDCF)) ||
3275
57.1M
          ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3276
57.1M
          ((c >= 0x10000) && (c <= 0xEFFFF))
3277
57.1M
    )) {
3278
57.1M
            if (len <= INT_MAX - l)
3279
57.1M
          len += l;
3280
57.1M
      NEXTL(l);
3281
57.1M
      c = CUR_CHAR(l);
3282
57.1M
  }
3283
60.2k
    } else {
3284
0
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3285
0
      (!IS_LETTER(c) && (c != '_') &&
3286
0
       (c != ':'))) {
3287
0
      return(NULL);
3288
0
  }
3289
0
  len += l;
3290
0
  NEXTL(l);
3291
0
  c = CUR_CHAR(l);
3292
3293
0
  while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3294
0
         ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3295
0
    (c == '.') || (c == '-') ||
3296
0
    (c == '_') || (c == ':') ||
3297
0
    (IS_COMBINING(c)) ||
3298
0
    (IS_EXTENDER(c)))) {
3299
0
            if (len <= INT_MAX - l)
3300
0
          len += l;
3301
0
      NEXTL(l);
3302
0
      c = CUR_CHAR(l);
3303
0
  }
3304
0
    }
3305
60.2k
    if (ctxt->instate == XML_PARSER_EOF)
3306
0
        return(NULL);
3307
60.2k
    if (len > maxLength) {
3308
1
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3309
1
        return(NULL);
3310
1
    }
3311
60.2k
    if (ctxt->input->cur - ctxt->input->base < len) {
3312
        /*
3313
         * There were a couple of bugs where PERefs lead to to a change
3314
         * of the buffer. Check the buffer size to avoid passing an invalid
3315
         * pointer to xmlDictLookup.
3316
         */
3317
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3318
0
                    "unexpected change of input buffer");
3319
0
        return (NULL);
3320
0
    }
3321
60.2k
    if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3322
801
        return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3323
59.4k
    return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3324
60.2k
}
3325
3326
/**
3327
 * xmlParseName:
3328
 * @ctxt:  an XML parser context
3329
 *
3330
 * DEPRECATED: Internal function, don't use.
3331
 *
3332
 * parse an XML name.
3333
 *
3334
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3335
 *                  CombiningChar | Extender
3336
 *
3337
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3338
 *
3339
 * [6] Names ::= Name (#x20 Name)*
3340
 *
3341
 * Returns the Name parsed or NULL
3342
 */
3343
3344
const xmlChar *
3345
441k
xmlParseName(xmlParserCtxtPtr ctxt) {
3346
441k
    const xmlChar *in;
3347
441k
    const xmlChar *ret;
3348
441k
    size_t count = 0;
3349
441k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3350
441k
                       XML_MAX_TEXT_LENGTH :
3351
441k
                       XML_MAX_NAME_LENGTH;
3352
3353
441k
    GROW;
3354
441k
    if (ctxt->instate == XML_PARSER_EOF)
3355
0
        return(NULL);
3356
3357
#ifdef DEBUG
3358
    nbParseName++;
3359
#endif
3360
3361
    /*
3362
     * Accelerator for simple ASCII names
3363
     */
3364
441k
    in = ctxt->input->cur;
3365
441k
    if (((*in >= 0x61) && (*in <= 0x7A)) ||
3366
441k
  ((*in >= 0x41) && (*in <= 0x5A)) ||
3367
441k
  (*in == '_') || (*in == ':')) {
3368
308k
  in++;
3369
1.15M
  while (((*in >= 0x61) && (*in <= 0x7A)) ||
3370
1.15M
         ((*in >= 0x41) && (*in <= 0x5A)) ||
3371
1.15M
         ((*in >= 0x30) && (*in <= 0x39)) ||
3372
1.15M
         (*in == '_') || (*in == '-') ||
3373
1.15M
         (*in == ':') || (*in == '.'))
3374
847k
      in++;
3375
308k
  if ((*in > 0) && (*in < 0x80)) {
3376
293k
      count = in - ctxt->input->cur;
3377
293k
            if (count > maxLength) {
3378
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3379
0
                return(NULL);
3380
0
            }
3381
293k
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3382
293k
      ctxt->input->cur = in;
3383
293k
      ctxt->input->col += count;
3384
293k
      if (ret == NULL)
3385
0
          xmlErrMemory(ctxt, NULL);
3386
293k
      return(ret);
3387
293k
  }
3388
308k
    }
3389
    /* accelerator for special cases */
3390
148k
    return(xmlParseNameComplex(ctxt));
3391
441k
}
3392
3393
static const xmlChar *
3394
15.9k
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3395
15.9k
    int len = 0, l;
3396
15.9k
    int c;
3397
15.9k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3398
15.9k
                    XML_MAX_TEXT_LENGTH :
3399
15.9k
                    XML_MAX_NAME_LENGTH;
3400
15.9k
    size_t startPosition = 0;
3401
3402
#ifdef DEBUG
3403
    nbParseNCNameComplex++;
3404
#endif
3405
3406
    /*
3407
     * Handler for more complex cases
3408
     */
3409
15.9k
    startPosition = CUR_PTR - BASE_PTR;
3410
15.9k
    c = CUR_CHAR(l);
3411
15.9k
    if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3412
15.9k
  (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3413
8.70k
  return(NULL);
3414
8.70k
    }
3415
3416
84.8M
    while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3417
84.8M
     (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3418
84.8M
        if (len <= INT_MAX - l)
3419
84.8M
      len += l;
3420
84.8M
  NEXTL(l);
3421
84.8M
  c = CUR_CHAR(l);
3422
84.8M
    }
3423
7.22k
    if (ctxt->instate == XML_PARSER_EOF)
3424
0
        return(NULL);
3425
7.22k
    if (len > maxLength) {
3426
8
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3427
8
        return(NULL);
3428
8
    }
3429
7.21k
    return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
3430
7.22k
}
3431
3432
/**
3433
 * xmlParseNCName:
3434
 * @ctxt:  an XML parser context
3435
 * @len:  length of the string parsed
3436
 *
3437
 * parse an XML name.
3438
 *
3439
 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3440
 *                      CombiningChar | Extender
3441
 *
3442
 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3443
 *
3444
 * Returns the Name parsed or NULL
3445
 */
3446
3447
static const xmlChar *
3448
2.20M
xmlParseNCName(xmlParserCtxtPtr ctxt) {
3449
2.20M
    const xmlChar *in, *e;
3450
2.20M
    const xmlChar *ret;
3451
2.20M
    size_t count = 0;
3452
2.20M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3453
2.20M
                       XML_MAX_TEXT_LENGTH :
3454
2.20M
                       XML_MAX_NAME_LENGTH;
3455
3456
#ifdef DEBUG
3457
    nbParseNCName++;
3458
#endif
3459
3460
    /*
3461
     * Accelerator for simple ASCII names
3462
     */
3463
2.20M
    in = ctxt->input->cur;
3464
2.20M
    e = ctxt->input->end;
3465
2.20M
    if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3466
2.20M
   ((*in >= 0x41) && (*in <= 0x5A)) ||
3467
2.20M
   (*in == '_')) && (in < e)) {
3468
2.18M
  in++;
3469
4.22M
  while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3470
4.22M
          ((*in >= 0x41) && (*in <= 0x5A)) ||
3471
4.22M
          ((*in >= 0x30) && (*in <= 0x39)) ||
3472
4.22M
          (*in == '_') || (*in == '-') ||
3473
4.22M
          (*in == '.')) && (in < e))
3474
2.04M
      in++;
3475
2.18M
  if (in >= e)
3476
393
      goto complex;
3477
2.18M
  if ((*in > 0) && (*in < 0x80)) {
3478
2.18M
      count = in - ctxt->input->cur;
3479
2.18M
            if (count > maxLength) {
3480
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3481
0
                return(NULL);
3482
0
            }
3483
2.18M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3484
2.18M
      ctxt->input->cur = in;
3485
2.18M
      ctxt->input->col += count;
3486
2.18M
      if (ret == NULL) {
3487
0
          xmlErrMemory(ctxt, NULL);
3488
0
      }
3489
2.18M
      return(ret);
3490
2.18M
  }
3491
2.18M
    }
3492
15.9k
complex:
3493
15.9k
    return(xmlParseNCNameComplex(ctxt));
3494
2.20M
}
3495
3496
/**
3497
 * xmlParseNameAndCompare:
3498
 * @ctxt:  an XML parser context
3499
 *
3500
 * parse an XML name and compares for match
3501
 * (specialized for endtag parsing)
3502
 *
3503
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3504
 * and the name for mismatch
3505
 */
3506
3507
static const xmlChar *
3508
74.3k
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3509
74.3k
    register const xmlChar *cmp = other;
3510
74.3k
    register const xmlChar *in;
3511
74.3k
    const xmlChar *ret;
3512
3513
74.3k
    GROW;
3514
74.3k
    if (ctxt->instate == XML_PARSER_EOF)
3515
0
        return(NULL);
3516
3517
74.3k
    in = ctxt->input->cur;
3518
369k
    while (*in != 0 && *in == *cmp) {
3519
295k
  ++in;
3520
295k
  ++cmp;
3521
295k
    }
3522
74.3k
    if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3523
  /* success */
3524
74.1k
  ctxt->input->col += in - ctxt->input->cur;
3525
74.1k
  ctxt->input->cur = in;
3526
74.1k
  return (const xmlChar*) 1;
3527
74.1k
    }
3528
    /* failure (or end of input buffer), check with full function */
3529
160
    ret = xmlParseName (ctxt);
3530
    /* strings coming from the dictionary direct compare possible */
3531
160
    if (ret == other) {
3532
12
  return (const xmlChar*) 1;
3533
12
    }
3534
148
    return ret;
3535
160
}
3536
3537
/**
3538
 * xmlParseStringName:
3539
 * @ctxt:  an XML parser context
3540
 * @str:  a pointer to the string pointer (IN/OUT)
3541
 *
3542
 * parse an XML name.
3543
 *
3544
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3545
 *                  CombiningChar | Extender
3546
 *
3547
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3548
 *
3549
 * [6] Names ::= Name (#x20 Name)*
3550
 *
3551
 * Returns the Name parsed or NULL. The @str pointer
3552
 * is updated to the current location in the string.
3553
 */
3554
3555
static xmlChar *
3556
15.0k
xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3557
15.0k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3558
15.0k
    const xmlChar *cur = *str;
3559
15.0k
    int len = 0, l;
3560
15.0k
    int c;
3561
15.0k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3562
15.0k
                    XML_MAX_TEXT_LENGTH :
3563
15.0k
                    XML_MAX_NAME_LENGTH;
3564
3565
#ifdef DEBUG
3566
    nbParseStringName++;
3567
#endif
3568
3569
15.0k
    c = CUR_SCHAR(cur, l);
3570
15.0k
    if (!xmlIsNameStartChar(ctxt, c)) {
3571
993
  return(NULL);
3572
993
    }
3573
3574
14.0k
    COPY_BUF(l,buf,len,c);
3575
14.0k
    cur += l;
3576
14.0k
    c = CUR_SCHAR(cur, l);
3577
63.2k
    while (xmlIsNameChar(ctxt, c)) {
3578
49.9k
  COPY_BUF(l,buf,len,c);
3579
49.9k
  cur += l;
3580
49.9k
  c = CUR_SCHAR(cur, l);
3581
49.9k
  if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3582
      /*
3583
       * Okay someone managed to make a huge name, so he's ready to pay
3584
       * for the processing speed.
3585
       */
3586
809
      xmlChar *buffer;
3587
809
      int max = len * 2;
3588
3589
809
      buffer = (xmlChar *) xmlMallocAtomic(max);
3590
809
      if (buffer == NULL) {
3591
0
          xmlErrMemory(ctxt, NULL);
3592
0
    return(NULL);
3593
0
      }
3594
809
      memcpy(buffer, buf, len);
3595
18.4k
      while (xmlIsNameChar(ctxt, c)) {
3596
17.5k
    if (len + 10 > max) {
3597
159
        xmlChar *tmp;
3598
3599
159
        max *= 2;
3600
159
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3601
159
        if (tmp == NULL) {
3602
0
      xmlErrMemory(ctxt, NULL);
3603
0
      xmlFree(buffer);
3604
0
      return(NULL);
3605
0
        }
3606
159
        buffer = tmp;
3607
159
    }
3608
17.5k
    COPY_BUF(l,buffer,len,c);
3609
17.5k
    cur += l;
3610
17.5k
    c = CUR_SCHAR(cur, l);
3611
17.5k
                if (len > maxLength) {
3612
0
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3613
0
                    xmlFree(buffer);
3614
0
                    return(NULL);
3615
0
                }
3616
17.5k
      }
3617
809
      buffer[len] = 0;
3618
809
      *str = cur;
3619
809
      return(buffer);
3620
809
  }
3621
49.9k
    }
3622
13.2k
    if (len > maxLength) {
3623
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3624
0
        return(NULL);
3625
0
    }
3626
13.2k
    *str = cur;
3627
13.2k
    return(xmlStrndup(buf, len));
3628
13.2k
}
3629
3630
/**
3631
 * xmlParseNmtoken:
3632
 * @ctxt:  an XML parser context
3633
 *
3634
 * DEPRECATED: Internal function, don't use.
3635
 *
3636
 * parse an XML Nmtoken.
3637
 *
3638
 * [7] Nmtoken ::= (NameChar)+
3639
 *
3640
 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3641
 *
3642
 * Returns the Nmtoken parsed or NULL
3643
 */
3644
3645
xmlChar *
3646
5.09k
xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3647
5.09k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3648
5.09k
    int len = 0, l;
3649
5.09k
    int c;
3650
5.09k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3651
5.09k
                    XML_MAX_TEXT_LENGTH :
3652
5.09k
                    XML_MAX_NAME_LENGTH;
3653
3654
#ifdef DEBUG
3655
    nbParseNmToken++;
3656
#endif
3657
3658
5.09k
    c = CUR_CHAR(l);
3659
3660
55.3k
    while (xmlIsNameChar(ctxt, c)) {
3661
51.3k
  COPY_BUF(l,buf,len,c);
3662
51.3k
  NEXTL(l);
3663
51.3k
  c = CUR_CHAR(l);
3664
51.3k
  if (len >= XML_MAX_NAMELEN) {
3665
      /*
3666
       * Okay someone managed to make a huge token, so he's ready to pay
3667
       * for the processing speed.
3668
       */
3669
1.16k
      xmlChar *buffer;
3670
1.16k
      int max = len * 2;
3671
3672
1.16k
      buffer = (xmlChar *) xmlMallocAtomic(max);
3673
1.16k
      if (buffer == NULL) {
3674
0
          xmlErrMemory(ctxt, NULL);
3675
0
    return(NULL);
3676
0
      }
3677
1.16k
      memcpy(buffer, buf, len);
3678
10.0M
      while (xmlIsNameChar(ctxt, c)) {
3679
10.0M
    if (len + 10 > max) {
3680
1.63k
        xmlChar *tmp;
3681
3682
1.63k
        max *= 2;
3683
1.63k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3684
1.63k
        if (tmp == NULL) {
3685
0
      xmlErrMemory(ctxt, NULL);
3686
0
      xmlFree(buffer);
3687
0
      return(NULL);
3688
0
        }
3689
1.63k
        buffer = tmp;
3690
1.63k
    }
3691
10.0M
    COPY_BUF(l,buffer,len,c);
3692
10.0M
                if (len > maxLength) {
3693
1
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3694
1
                    xmlFree(buffer);
3695
1
                    return(NULL);
3696
1
                }
3697
10.0M
    NEXTL(l);
3698
10.0M
    c = CUR_CHAR(l);
3699
10.0M
      }
3700
1.16k
      buffer[len] = 0;
3701
1.16k
            if (ctxt->instate == XML_PARSER_EOF) {
3702
0
                xmlFree(buffer);
3703
0
                return(NULL);
3704
0
            }
3705
1.16k
      return(buffer);
3706
1.16k
  }
3707
51.3k
    }
3708
3.93k
    if (ctxt->instate == XML_PARSER_EOF)
3709
0
        return(NULL);
3710
3.93k
    if (len == 0)
3711
1.61k
        return(NULL);
3712
2.32k
    if (len > maxLength) {
3713
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3714
0
        return(NULL);
3715
0
    }
3716
2.32k
    return(xmlStrndup(buf, len));
3717
2.32k
}
3718
3719
/**
3720
 * xmlParseEntityValue:
3721
 * @ctxt:  an XML parser context
3722
 * @orig:  if non-NULL store a copy of the original entity value
3723
 *
3724
 * DEPRECATED: Internal function, don't use.
3725
 *
3726
 * parse a value for ENTITY declarations
3727
 *
3728
 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3729
 *                 "'" ([^%&'] | PEReference | Reference)* "'"
3730
 *
3731
 * Returns the EntityValue parsed with reference substituted or NULL
3732
 */
3733
3734
xmlChar *
3735
9.74k
xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3736
9.74k
    xmlChar *buf = NULL;
3737
9.74k
    int len = 0;
3738
9.74k
    int size = XML_PARSER_BUFFER_SIZE;
3739
9.74k
    int c, l;
3740
9.74k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3741
9.74k
                    XML_MAX_HUGE_LENGTH :
3742
9.74k
                    XML_MAX_TEXT_LENGTH;
3743
9.74k
    xmlChar stop;
3744
9.74k
    xmlChar *ret = NULL;
3745
9.74k
    const xmlChar *cur = NULL;
3746
9.74k
    xmlParserInputPtr input;
3747
3748
9.74k
    if (RAW == '"') stop = '"';
3749
8.76k
    else if (RAW == '\'') stop = '\'';
3750
0
    else {
3751
0
  xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3752
0
  return(NULL);
3753
0
    }
3754
9.74k
    buf = (xmlChar *) xmlMallocAtomic(size);
3755
9.74k
    if (buf == NULL) {
3756
0
  xmlErrMemory(ctxt, NULL);
3757
0
  return(NULL);
3758
0
    }
3759
3760
    /*
3761
     * The content of the entity definition is copied in a buffer.
3762
     */
3763
3764
9.74k
    ctxt->instate = XML_PARSER_ENTITY_VALUE;
3765
9.74k
    input = ctxt->input;
3766
9.74k
    GROW;
3767
9.74k
    if (ctxt->instate == XML_PARSER_EOF)
3768
0
        goto error;
3769
9.74k
    NEXT;
3770
9.74k
    c = CUR_CHAR(l);
3771
    /*
3772
     * NOTE: 4.4.5 Included in Literal
3773
     * When a parameter entity reference appears in a literal entity
3774
     * value, ... a single or double quote character in the replacement
3775
     * text is always treated as a normal data character and will not
3776
     * terminate the literal.
3777
     * In practice it means we stop the loop only when back at parsing
3778
     * the initial entity and the quote is found
3779
     */
3780
25.3M
    while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3781
25.3M
      (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3782
25.3M
  if (len + 5 >= size) {
3783
3.58k
      xmlChar *tmp;
3784
3785
3.58k
      size *= 2;
3786
3.58k
      tmp = (xmlChar *) xmlRealloc(buf, size);
3787
3.58k
      if (tmp == NULL) {
3788
0
    xmlErrMemory(ctxt, NULL);
3789
0
                goto error;
3790
0
      }
3791
3.58k
      buf = tmp;
3792
3.58k
  }
3793
25.3M
  COPY_BUF(l,buf,len,c);
3794
25.3M
  NEXTL(l);
3795
3796
25.3M
  GROW;
3797
25.3M
  c = CUR_CHAR(l);
3798
25.3M
  if (c == 0) {
3799
70
      GROW;
3800
70
      c = CUR_CHAR(l);
3801
70
  }
3802
3803
25.3M
        if (len > maxLength) {
3804
0
            xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
3805
0
                           "entity value too long\n");
3806
0
            goto error;
3807
0
        }
3808
25.3M
    }
3809
9.74k
    buf[len] = 0;
3810
9.74k
    if (ctxt->instate == XML_PARSER_EOF)
3811
0
        goto error;
3812
9.74k
    if (c != stop) {
3813
121
        xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3814
121
        goto error;
3815
121
    }
3816
9.62k
    NEXT;
3817
3818
    /*
3819
     * Raise problem w.r.t. '&' and '%' being used in non-entities
3820
     * reference constructs. Note Charref will be handled in
3821
     * xmlStringDecodeEntities()
3822
     */
3823
9.62k
    cur = buf;
3824
72.2M
    while (*cur != 0) { /* non input consuming */
3825
72.2M
  if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3826
15.0k
      xmlChar *name;
3827
15.0k
      xmlChar tmp = *cur;
3828
15.0k
            int nameOk = 0;
3829
3830
15.0k
      cur++;
3831
15.0k
      name = xmlParseStringName(ctxt, &cur);
3832
15.0k
            if (name != NULL) {
3833
14.0k
                nameOk = 1;
3834
14.0k
                xmlFree(name);
3835
14.0k
            }
3836
15.0k
            if ((nameOk == 0) || (*cur != ';')) {
3837
3.57k
    xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3838
3.57k
      "EntityValue: '%c' forbidden except for entities references\n",
3839
3.57k
                            tmp);
3840
3.57k
                goto error;
3841
3.57k
      }
3842
11.4k
      if ((tmp == '%') && (ctxt->inSubset == 1) &&
3843
11.4k
    (ctxt->inputNr == 1)) {
3844
87
    xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3845
87
                goto error;
3846
87
      }
3847
11.3k
      if (*cur == 0)
3848
0
          break;
3849
11.3k
  }
3850
72.2M
  cur++;
3851
72.2M
    }
3852
3853
    /*
3854
     * Then PEReference entities are substituted.
3855
     *
3856
     * NOTE: 4.4.7 Bypassed
3857
     * When a general entity reference appears in the EntityValue in
3858
     * an entity declaration, it is bypassed and left as is.
3859
     * so XML_SUBSTITUTE_REF is not set here.
3860
     */
3861
5.96k
    ++ctxt->depth;
3862
5.96k
    ret = xmlStringDecodeEntitiesInt(ctxt, buf, len, XML_SUBSTITUTE_PEREF,
3863
5.96k
                                     0, 0, 0, /* check */ 1);
3864
5.96k
    --ctxt->depth;
3865
3866
5.96k
    if (orig != NULL) {
3867
5.96k
        *orig = buf;
3868
5.96k
        buf = NULL;
3869
5.96k
    }
3870
3871
9.74k
error:
3872
9.74k
    if (buf != NULL)
3873
3.78k
        xmlFree(buf);
3874
9.74k
    return(ret);
3875
5.96k
}
3876
3877
/**
3878
 * xmlParseAttValueComplex:
3879
 * @ctxt:  an XML parser context
3880
 * @len:   the resulting attribute len
3881
 * @normalize:  whether to apply the inner normalization
3882
 *
3883
 * parse a value for an attribute, this is the fallback function
3884
 * of xmlParseAttValue() when the attribute parsing requires handling
3885
 * of non-ASCII characters, or normalization compaction.
3886
 *
3887
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3888
 */
3889
static xmlChar *
3890
20.4k
xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3891
20.4k
    xmlChar limit = 0;
3892
20.4k
    xmlChar *buf = NULL;
3893
20.4k
    xmlChar *rep = NULL;
3894
20.4k
    size_t len = 0;
3895
20.4k
    size_t buf_size = 0;
3896
20.4k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3897
20.4k
                       XML_MAX_HUGE_LENGTH :
3898
20.4k
                       XML_MAX_TEXT_LENGTH;
3899
20.4k
    int c, l, in_space = 0;
3900
20.4k
    xmlChar *current = NULL;
3901
20.4k
    xmlEntityPtr ent;
3902
3903
20.4k
    if (NXT(0) == '"') {
3904
16.9k
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3905
16.9k
  limit = '"';
3906
16.9k
        NEXT;
3907
16.9k
    } else if (NXT(0) == '\'') {
3908
3.44k
  limit = '\'';
3909
3.44k
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3910
3.44k
        NEXT;
3911
3.44k
    } else {
3912
0
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3913
0
  return(NULL);
3914
0
    }
3915
3916
    /*
3917
     * allocate a translation buffer.
3918
     */
3919
20.4k
    buf_size = XML_PARSER_BUFFER_SIZE;
3920
20.4k
    buf = (xmlChar *) xmlMallocAtomic(buf_size);
3921
20.4k
    if (buf == NULL) goto mem_error;
3922
3923
    /*
3924
     * OK loop until we reach one of the ending char or a size limit.
3925
     */
3926
20.4k
    c = CUR_CHAR(l);
3927
36.9M
    while (((NXT(0) != limit) && /* checked */
3928
36.9M
            (IS_CHAR(c)) && (c != '<')) &&
3929
36.9M
            (ctxt->instate != XML_PARSER_EOF)) {
3930
36.8M
  if (c == '&') {
3931
204k
      in_space = 0;
3932
204k
      if (NXT(1) == '#') {
3933
32.8k
    int val = xmlParseCharRef(ctxt);
3934
3935
32.8k
    if (val == '&') {
3936
997
        if (ctxt->replaceEntities) {
3937
0
      if (len + 10 > buf_size) {
3938
0
          growBuffer(buf, 10);
3939
0
      }
3940
0
      buf[len++] = '&';
3941
997
        } else {
3942
      /*
3943
       * The reparsing will be done in xmlStringGetNodeList()
3944
       * called by the attribute() function in SAX.c
3945
       */
3946
997
      if (len + 10 > buf_size) {
3947
98
          growBuffer(buf, 10);
3948
98
      }
3949
997
      buf[len++] = '&';
3950
997
      buf[len++] = '#';
3951
997
      buf[len++] = '3';
3952
997
      buf[len++] = '8';
3953
997
      buf[len++] = ';';
3954
997
        }
3955
31.8k
    } else if (val != 0) {
3956
18.1k
        if (len + 10 > buf_size) {
3957
40
      growBuffer(buf, 10);
3958
40
        }
3959
18.1k
        len += xmlCopyChar(0, &buf[len], val);
3960
18.1k
    }
3961
171k
      } else {
3962
171k
    ent = xmlParseEntityRef(ctxt);
3963
171k
    if ((ent != NULL) &&
3964
171k
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3965
30.2k
        if (len + 10 > buf_size) {
3966
1.58k
      growBuffer(buf, 10);
3967
1.58k
        }
3968
30.2k
        if ((ctxt->replaceEntities == 0) &&
3969
30.2k
            (ent->content[0] == '&')) {
3970
26.9k
      buf[len++] = '&';
3971
26.9k
      buf[len++] = '#';
3972
26.9k
      buf[len++] = '3';
3973
26.9k
      buf[len++] = '8';
3974
26.9k
      buf[len++] = ';';
3975
26.9k
        } else {
3976
3.36k
      buf[len++] = ent->content[0];
3977
3.36k
        }
3978
141k
    } else if ((ent != NULL) &&
3979
141k
               (ctxt->replaceEntities != 0)) {
3980
0
        if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3981
0
                        if (xmlParserEntityCheck(ctxt, ent->length))
3982
0
                            goto error;
3983
3984
0
      ++ctxt->depth;
3985
0
      rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
3986
0
                                ent->length, XML_SUBSTITUTE_REF, 0, 0, 0,
3987
0
                                /* check */ 1);
3988
0
      --ctxt->depth;
3989
0
      if (rep != NULL) {
3990
0
          current = rep;
3991
0
          while (*current != 0) { /* non input consuming */
3992
0
                                if ((*current == 0xD) || (*current == 0xA) ||
3993
0
                                    (*current == 0x9)) {
3994
0
                                    buf[len++] = 0x20;
3995
0
                                    current++;
3996
0
                                } else
3997
0
                                    buf[len++] = *current++;
3998
0
        if (len + 10 > buf_size) {
3999
0
            growBuffer(buf, 10);
4000
0
        }
4001
0
          }
4002
0
          xmlFree(rep);
4003
0
          rep = NULL;
4004
0
      }
4005
0
        } else {
4006
0
      if (len + 10 > buf_size) {
4007
0
          growBuffer(buf, 10);
4008
0
      }
4009
0
      if (ent->content != NULL)
4010
0
          buf[len++] = ent->content[0];
4011
0
        }
4012
141k
    } else if (ent != NULL) {
4013
12.8k
        int i = xmlStrlen(ent->name);
4014
12.8k
        const xmlChar *cur = ent->name;
4015
4016
        /*
4017
                     * We also check for recursion and amplification
4018
                     * when entities are not substituted. They're
4019
                     * often expanded later.
4020
         */
4021
12.8k
        if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4022
12.8k
      (ent->content != NULL)) {
4023
0
                        if ((ent->flags & XML_ENT_CHECKED) == 0) {
4024
0
                            unsigned long oldCopy = ctxt->sizeentcopy;
4025
4026
0
                            ctxt->sizeentcopy = ent->length;
4027
4028
0
                            ++ctxt->depth;
4029
0
                            rep = xmlStringDecodeEntitiesInt(ctxt,
4030
0
                                    ent->content, ent->length,
4031
0
                                    XML_SUBSTITUTE_REF, 0, 0, 0,
4032
0
                                    /* check */ 1);
4033
0
                            --ctxt->depth;
4034
4035
                            /*
4036
                             * If we're parsing DTD content, the entity
4037
                             * might reference other entities which
4038
                             * weren't defined yet, so the check isn't
4039
                             * reliable.
4040
                             */
4041
0
                            if (ctxt->inSubset == 0) {
4042
0
                                ent->flags |= XML_ENT_CHECKED;
4043
0
                                ent->expandedSize = ctxt->sizeentcopy;
4044
0
                            }
4045
4046
0
                            if (rep != NULL) {
4047
0
                                xmlFree(rep);
4048
0
                                rep = NULL;
4049
0
                            } else {
4050
0
                                ent->content[0] = 0;
4051
0
                            }
4052
4053
0
                            if (xmlParserEntityCheck(ctxt, oldCopy))
4054
0
                                goto error;
4055
0
                        } else {
4056
0
                            if (xmlParserEntityCheck(ctxt, ent->expandedSize))
4057
0
                                goto error;
4058
0
                        }
4059
0
        }
4060
4061
        /*
4062
         * Just output the reference
4063
         */
4064
12.8k
        buf[len++] = '&';
4065
13.2k
        while (len + i + 10 > buf_size) {
4066
660
      growBuffer(buf, i + 10);
4067
660
        }
4068
12.8k
        for (;i > 0;i--)
4069
0
      buf[len++] = *cur++;
4070
12.8k
        buf[len++] = ';';
4071
12.8k
    }
4072
171k
      }
4073
36.6M
  } else {
4074
36.6M
      if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4075
531k
          if ((len != 0) || (!normalize)) {
4076
528k
        if ((!normalize) || (!in_space)) {
4077
520k
      COPY_BUF(l,buf,len,0x20);
4078
520k
      while (len + 10 > buf_size) {
4079
918
          growBuffer(buf, 10);
4080
918
      }
4081
520k
        }
4082
528k
        in_space = 1;
4083
528k
    }
4084
36.1M
      } else {
4085
36.1M
          in_space = 0;
4086
36.1M
    COPY_BUF(l,buf,len,c);
4087
36.1M
    if (len + 10 > buf_size) {
4088
11.6k
        growBuffer(buf, 10);
4089
11.6k
    }
4090
36.1M
      }
4091
36.6M
      NEXTL(l);
4092
36.6M
  }
4093
36.8M
  GROW;
4094
36.8M
  c = CUR_CHAR(l);
4095
36.8M
        if (len > maxLength) {
4096
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4097
0
                           "AttValue length too long\n");
4098
0
            goto mem_error;
4099
0
        }
4100
36.8M
    }
4101
20.4k
    if (ctxt->instate == XML_PARSER_EOF)
4102
0
        goto error;
4103
4104
20.4k
    if ((in_space) && (normalize)) {
4105
1.90k
        while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4106
664
    }
4107
20.4k
    buf[len] = 0;
4108
20.4k
    if (RAW == '<') {
4109
267
  xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4110
20.1k
    } else if (RAW != limit) {
4111
2.51k
  if ((c != 0) && (!IS_CHAR(c))) {
4112
111
      xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4113
111
         "invalid character in attribute value\n");
4114
2.40k
  } else {
4115
2.40k
      xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4116
2.40k
         "AttValue: ' expected\n");
4117
2.40k
        }
4118
2.51k
    } else
4119
17.6k
  NEXT;
4120
4121
20.4k
    if (attlen != NULL) *attlen = len;
4122
20.4k
    return(buf);
4123
4124
0
mem_error:
4125
0
    xmlErrMemory(ctxt, NULL);
4126
0
error:
4127
0
    if (buf != NULL)
4128
0
        xmlFree(buf);
4129
0
    if (rep != NULL)
4130
0
        xmlFree(rep);
4131
0
    return(NULL);
4132
0
}
4133
4134
/**
4135
 * xmlParseAttValue:
4136
 * @ctxt:  an XML parser context
4137
 *
4138
 * DEPRECATED: Internal function, don't use.
4139
 *
4140
 * parse a value for an attribute
4141
 * Note: the parser won't do substitution of entities here, this
4142
 * will be handled later in xmlStringGetNodeList
4143
 *
4144
 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4145
 *                   "'" ([^<&'] | Reference)* "'"
4146
 *
4147
 * 3.3.3 Attribute-Value Normalization:
4148
 * Before the value of an attribute is passed to the application or
4149
 * checked for validity, the XML processor must normalize it as follows:
4150
 * - a character reference is processed by appending the referenced
4151
 *   character to the attribute value
4152
 * - an entity reference is processed by recursively processing the
4153
 *   replacement text of the entity
4154
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4155
 *   appending #x20 to the normalized value, except that only a single
4156
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4157
 *   parsed entity or the literal entity value of an internal parsed entity
4158
 * - other characters are processed by appending them to the normalized value
4159
 * If the declared value is not CDATA, then the XML processor must further
4160
 * process the normalized attribute value by discarding any leading and
4161
 * trailing space (#x20) characters, and by replacing sequences of space
4162
 * (#x20) characters by a single space (#x20) character.
4163
 * All attributes for which no declaration has been read should be treated
4164
 * by a non-validating parser as if declared CDATA.
4165
 *
4166
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4167
 */
4168
4169
4170
xmlChar *
4171
13.5k
xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4172
13.5k
    if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4173
13.5k
    return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4174
13.5k
}
4175
4176
/**
4177
 * xmlParseSystemLiteral:
4178
 * @ctxt:  an XML parser context
4179
 *
4180
 * DEPRECATED: Internal function, don't use.
4181
 *
4182
 * parse an XML Literal
4183
 *
4184
 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4185
 *
4186
 * Returns the SystemLiteral parsed or NULL
4187
 */
4188
4189
xmlChar *
4190
3.27k
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4191
3.27k
    xmlChar *buf = NULL;
4192
3.27k
    int len = 0;
4193
3.27k
    int size = XML_PARSER_BUFFER_SIZE;
4194
3.27k
    int cur, l;
4195
3.27k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4196
3.27k
                    XML_MAX_TEXT_LENGTH :
4197
3.27k
                    XML_MAX_NAME_LENGTH;
4198
3.27k
    xmlChar stop;
4199
3.27k
    int state = ctxt->instate;
4200
4201
3.27k
    if (RAW == '"') {
4202
464
        NEXT;
4203
464
  stop = '"';
4204
2.81k
    } else if (RAW == '\'') {
4205
1.88k
        NEXT;
4206
1.88k
  stop = '\'';
4207
1.88k
    } else {
4208
933
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4209
933
  return(NULL);
4210
933
    }
4211
4212
2.34k
    buf = (xmlChar *) xmlMallocAtomic(size);
4213
2.34k
    if (buf == NULL) {
4214
0
        xmlErrMemory(ctxt, NULL);
4215
0
  return(NULL);
4216
0
    }
4217
2.34k
    ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4218
2.34k
    cur = CUR_CHAR(l);
4219
16.8M
    while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4220
16.8M
  if (len + 5 >= size) {
4221
457
      xmlChar *tmp;
4222
4223
457
      size *= 2;
4224
457
      tmp = (xmlChar *) xmlRealloc(buf, size);
4225
457
      if (tmp == NULL) {
4226
0
          xmlFree(buf);
4227
0
    xmlErrMemory(ctxt, NULL);
4228
0
    ctxt->instate = (xmlParserInputState) state;
4229
0
    return(NULL);
4230
0
      }
4231
457
      buf = tmp;
4232
457
  }
4233
16.8M
  COPY_BUF(l,buf,len,cur);
4234
16.8M
        if (len > maxLength) {
4235
2
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4236
2
            xmlFree(buf);
4237
2
            ctxt->instate = (xmlParserInputState) state;
4238
2
            return(NULL);
4239
2
        }
4240
16.8M
  NEXTL(l);
4241
16.8M
  cur = CUR_CHAR(l);
4242
16.8M
    }
4243
2.34k
    buf[len] = 0;
4244
2.34k
    if (ctxt->instate == XML_PARSER_EOF) {
4245
0
        xmlFree(buf);
4246
0
        return(NULL);
4247
0
    }
4248
2.34k
    ctxt->instate = (xmlParserInputState) state;
4249
2.34k
    if (!IS_CHAR(cur)) {
4250
141
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4251
2.20k
    } else {
4252
2.20k
  NEXT;
4253
2.20k
    }
4254
2.34k
    return(buf);
4255
2.34k
}
4256
4257
/**
4258
 * xmlParsePubidLiteral:
4259
 * @ctxt:  an XML parser context
4260
 *
4261
 * DEPRECATED: Internal function, don't use.
4262
 *
4263
 * parse an XML public literal
4264
 *
4265
 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4266
 *
4267
 * Returns the PubidLiteral parsed or NULL.
4268
 */
4269
4270
xmlChar *
4271
1.63k
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4272
1.63k
    xmlChar *buf = NULL;
4273
1.63k
    int len = 0;
4274
1.63k
    int size = XML_PARSER_BUFFER_SIZE;
4275
1.63k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4276
1.63k
                    XML_MAX_TEXT_LENGTH :
4277
1.63k
                    XML_MAX_NAME_LENGTH;
4278
1.63k
    xmlChar cur;
4279
1.63k
    xmlChar stop;
4280
1.63k
    xmlParserInputState oldstate = ctxt->instate;
4281
4282
1.63k
    if (RAW == '"') {
4283
512
        NEXT;
4284
512
  stop = '"';
4285
1.12k
    } else if (RAW == '\'') {
4286
825
        NEXT;
4287
825
  stop = '\'';
4288
825
    } else {
4289
302
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4290
302
  return(NULL);
4291
302
    }
4292
1.33k
    buf = (xmlChar *) xmlMallocAtomic(size);
4293
1.33k
    if (buf == NULL) {
4294
0
  xmlErrMemory(ctxt, NULL);
4295
0
  return(NULL);
4296
0
    }
4297
1.33k
    ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4298
1.33k
    cur = CUR;
4299
78.4k
    while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4300
77.1k
  if (len + 1 >= size) {
4301
302
      xmlChar *tmp;
4302
4303
302
      size *= 2;
4304
302
      tmp = (xmlChar *) xmlRealloc(buf, size);
4305
302
      if (tmp == NULL) {
4306
0
    xmlErrMemory(ctxt, NULL);
4307
0
    xmlFree(buf);
4308
0
    return(NULL);
4309
0
      }
4310
302
      buf = tmp;
4311
302
  }
4312
77.1k
  buf[len++] = cur;
4313
77.1k
        if (len > maxLength) {
4314
0
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4315
0
            xmlFree(buf);
4316
0
            return(NULL);
4317
0
        }
4318
77.1k
  NEXT;
4319
77.1k
  cur = CUR;
4320
77.1k
    }
4321
1.33k
    buf[len] = 0;
4322
1.33k
    if (ctxt->instate == XML_PARSER_EOF) {
4323
0
        xmlFree(buf);
4324
0
        return(NULL);
4325
0
    }
4326
1.33k
    if (cur != stop) {
4327
493
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4328
844
    } else {
4329
844
  NEXTL(1);
4330
844
    }
4331
1.33k
    ctxt->instate = oldstate;
4332
1.33k
    return(buf);
4333
1.33k
}
4334
4335
static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial);
4336
4337
/*
4338
 * used for the test in the inner loop of the char data testing
4339
 */
4340
static const unsigned char test_char_data[256] = {
4341
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4342
    0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4343
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4344
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4345
    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4346
    0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4347
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4348
    0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4349
    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4350
    0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4351
    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4352
    0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4353
    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4354
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4355
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4356
    0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4357
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4358
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4359
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4360
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4361
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4362
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4363
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4364
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4365
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4366
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4367
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4368
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4369
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4370
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4371
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4372
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4373
};
4374
4375
/**
4376
 * xmlParseCharDataInternal:
4377
 * @ctxt:  an XML parser context
4378
 * @partial:  buffer may contain partial UTF-8 sequences
4379
 *
4380
 * Parse character data. Always makes progress if the first char isn't
4381
 * '<' or '&'.
4382
 *
4383
 * The right angle bracket (>) may be represented using the string "&gt;",
4384
 * and must, for compatibility, be escaped using "&gt;" or a character
4385
 * reference when it appears in the string "]]>" in content, when that
4386
 * string is not marking the end of a CDATA section.
4387
 *
4388
 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4389
 */
4390
static void
4391
556k
xmlParseCharDataInternal(xmlParserCtxtPtr ctxt, int partial) {
4392
556k
    const xmlChar *in;
4393
556k
    int nbchar = 0;
4394
556k
    int line = ctxt->input->line;
4395
556k
    int col = ctxt->input->col;
4396
556k
    int ccol;
4397
4398
556k
    GROW;
4399
    /*
4400
     * Accelerated common case where input don't need to be
4401
     * modified before passing it to the handler.
4402
     */
4403
556k
    in = ctxt->input->cur;
4404
641k
    do {
4405
1.03M
get_more_space:
4406
1.48M
        while (*in == 0x20) { in++; ctxt->input->col++; }
4407
1.03M
        if (*in == 0xA) {
4408
417k
            do {
4409
417k
                ctxt->input->line++; ctxt->input->col = 1;
4410
417k
                in++;
4411
417k
            } while (*in == 0xA);
4412
395k
            goto get_more_space;
4413
395k
        }
4414
641k
        if (*in == '<') {
4415
267k
            nbchar = in - ctxt->input->cur;
4416
267k
            if (nbchar > 0) {
4417
267k
                const xmlChar *tmp = ctxt->input->cur;
4418
267k
                ctxt->input->cur = in;
4419
4420
267k
                if ((ctxt->sax != NULL) &&
4421
267k
                    (ctxt->sax->ignorableWhitespace !=
4422
267k
                     ctxt->sax->characters)) {
4423
267k
                    if (areBlanks(ctxt, tmp, nbchar, 1)) {
4424
0
                        if (ctxt->sax->ignorableWhitespace != NULL)
4425
0
                            ctxt->sax->ignorableWhitespace(ctxt->userData,
4426
0
                                                   tmp, nbchar);
4427
267k
                    } else {
4428
267k
                        if (ctxt->sax->characters != NULL)
4429
267k
                            ctxt->sax->characters(ctxt->userData,
4430
267k
                                                  tmp, nbchar);
4431
267k
                        if (*ctxt->space == -1)
4432
22.8k
                            *ctxt->space = -2;
4433
267k
                    }
4434
267k
                } else if ((ctxt->sax != NULL) &&
4435
0
                           (ctxt->sax->characters != NULL)) {
4436
0
                    ctxt->sax->characters(ctxt->userData,
4437
0
                                          tmp, nbchar);
4438
0
                }
4439
267k
            }
4440
267k
            return;
4441
267k
        }
4442
4443
499k
get_more:
4444
499k
        ccol = ctxt->input->col;
4445
4.63M
        while (test_char_data[*in]) {
4446
4.13M
            in++;
4447
4.13M
            ccol++;
4448
4.13M
        }
4449
499k
        ctxt->input->col = ccol;
4450
499k
        if (*in == 0xA) {
4451
116k
            do {
4452
116k
                ctxt->input->line++; ctxt->input->col = 1;
4453
116k
                in++;
4454
116k
            } while (*in == 0xA);
4455
112k
            goto get_more;
4456
112k
        }
4457
386k
        if (*in == ']') {
4458
12.1k
            if ((in[1] == ']') && (in[2] == '>')) {
4459
3
                xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4460
3
                if (ctxt->instate != XML_PARSER_EOF)
4461
3
                    ctxt->input->cur = in + 1;
4462
3
                return;
4463
3
            }
4464
12.1k
            in++;
4465
12.1k
            ctxt->input->col++;
4466
12.1k
            goto get_more;
4467
12.1k
        }
4468
374k
        nbchar = in - ctxt->input->cur;
4469
374k
        if (nbchar > 0) {
4470
361k
            if ((ctxt->sax != NULL) &&
4471
361k
                (ctxt->sax->ignorableWhitespace !=
4472
361k
                 ctxt->sax->characters) &&
4473
361k
                (IS_BLANK_CH(*ctxt->input->cur))) {
4474
177k
                const xmlChar *tmp = ctxt->input->cur;
4475
177k
                ctxt->input->cur = in;
4476
4477
177k
                if (areBlanks(ctxt, tmp, nbchar, 0)) {
4478
0
                    if (ctxt->sax->ignorableWhitespace != NULL)
4479
0
                        ctxt->sax->ignorableWhitespace(ctxt->userData,
4480
0
                                                       tmp, nbchar);
4481
177k
                } else {
4482
177k
                    if (ctxt->sax->characters != NULL)
4483
177k
                        ctxt->sax->characters(ctxt->userData,
4484
177k
                                              tmp, nbchar);
4485
177k
                    if (*ctxt->space == -1)
4486
16.6k
                        *ctxt->space = -2;
4487
177k
                }
4488
177k
                line = ctxt->input->line;
4489
177k
                col = ctxt->input->col;
4490
183k
            } else if (ctxt->sax != NULL) {
4491
183k
                if (ctxt->sax->characters != NULL)
4492
183k
                    ctxt->sax->characters(ctxt->userData,
4493
183k
                                          ctxt->input->cur, nbchar);
4494
183k
                line = ctxt->input->line;
4495
183k
                col = ctxt->input->col;
4496
183k
            }
4497
361k
        }
4498
374k
        ctxt->input->cur = in;
4499
374k
        if (*in == 0xD) {
4500
96.0k
            in++;
4501
96.0k
            if (*in == 0xA) {
4502
89.2k
                ctxt->input->cur = in;
4503
89.2k
                in++;
4504
89.2k
                ctxt->input->line++; ctxt->input->col = 1;
4505
89.2k
                continue; /* while */
4506
89.2k
            }
4507
6.88k
            in--;
4508
6.88k
        }
4509
285k
        if (*in == '<') {
4510
268k
            return;
4511
268k
        }
4512
16.7k
        if (*in == '&') {
4513
5.51k
            return;
4514
5.51k
        }
4515
11.1k
        SHRINK;
4516
11.1k
        GROW;
4517
11.1k
        if (ctxt->instate == XML_PARSER_EOF)
4518
0
            return;
4519
11.1k
        in = ctxt->input->cur;
4520
100k
    } while (((*in >= 0x20) && (*in <= 0x7F)) ||
4521
100k
             (*in == 0x09) || (*in == 0x0a));
4522
14.9k
    ctxt->input->line = line;
4523
14.9k
    ctxt->input->col = col;
4524
14.9k
    xmlParseCharDataComplex(ctxt, partial);
4525
14.9k
}
4526
4527
/**
4528
 * xmlParseCharDataComplex:
4529
 * @ctxt:  an XML parser context
4530
 * @cdata:  int indicating whether we are within a CDATA section
4531
 *
4532
 * Always makes progress if the first char isn't '<' or '&'.
4533
 *
4534
 * parse a CharData section.this is the fallback function
4535
 * of xmlParseCharData() when the parsing requires handling
4536
 * of non-ASCII characters.
4537
 */
4538
static void
4539
14.9k
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial) {
4540
14.9k
    xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4541
14.9k
    int nbchar = 0;
4542
14.9k
    int cur, l;
4543
4544
14.9k
    cur = CUR_CHAR(l);
4545
4.07M
    while ((cur != '<') && /* checked */
4546
4.07M
           (cur != '&') &&
4547
4.07M
     (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4548
4.05M
  if ((cur == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4549
311
      xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4550
311
  }
4551
4.05M
  COPY_BUF(l,buf,nbchar,cur);
4552
  /* move current position before possible calling of ctxt->sax->characters */
4553
4.05M
  NEXTL(l);
4554
4.05M
  if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4555
31.4k
      buf[nbchar] = 0;
4556
4557
      /*
4558
       * OK the segment is to be consumed as chars.
4559
       */
4560
31.4k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4561
31.2k
    if (areBlanks(ctxt, buf, nbchar, 0)) {
4562
0
        if (ctxt->sax->ignorableWhitespace != NULL)
4563
0
      ctxt->sax->ignorableWhitespace(ctxt->userData,
4564
0
                                     buf, nbchar);
4565
31.2k
    } else {
4566
31.2k
        if (ctxt->sax->characters != NULL)
4567
31.2k
      ctxt->sax->characters(ctxt->userData, buf, nbchar);
4568
31.2k
        if ((ctxt->sax->characters !=
4569
31.2k
             ctxt->sax->ignorableWhitespace) &&
4570
31.2k
      (*ctxt->space == -1))
4571
381
      *ctxt->space = -2;
4572
31.2k
    }
4573
31.2k
      }
4574
31.4k
      nbchar = 0;
4575
            /* something really bad happened in the SAX callback */
4576
31.4k
            if (ctxt->instate != XML_PARSER_CONTENT)
4577
0
                return;
4578
31.4k
            SHRINK;
4579
31.4k
  }
4580
4.05M
  cur = CUR_CHAR(l);
4581
4.05M
    }
4582
14.9k
    if (ctxt->instate == XML_PARSER_EOF)
4583
0
        return;
4584
14.9k
    if (nbchar != 0) {
4585
14.6k
        buf[nbchar] = 0;
4586
  /*
4587
   * OK the segment is to be consumed as chars.
4588
   */
4589
14.6k
  if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4590
14.4k
      if (areBlanks(ctxt, buf, nbchar, 0)) {
4591
0
    if (ctxt->sax->ignorableWhitespace != NULL)
4592
0
        ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4593
14.4k
      } else {
4594
14.4k
    if (ctxt->sax->characters != NULL)
4595
14.4k
        ctxt->sax->characters(ctxt->userData, buf, nbchar);
4596
14.4k
    if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4597
14.4k
        (*ctxt->space == -1))
4598
5.17k
        *ctxt->space = -2;
4599
14.4k
      }
4600
14.4k
  }
4601
14.6k
    }
4602
    /*
4603
     * cur == 0 can mean
4604
     *
4605
     * - XML_PARSER_EOF or memory error. This is checked above.
4606
     * - An actual 0 character.
4607
     * - End of buffer.
4608
     * - An incomplete UTF-8 sequence. This is allowed if partial is set.
4609
     */
4610
14.9k
    if (ctxt->input->cur < ctxt->input->end) {
4611
14.0k
        if ((cur == 0) && (CUR != 0)) {
4612
123
            if (partial == 0) {
4613
13
                xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4614
13
                        "Incomplete UTF-8 sequence starting with %02X\n", CUR);
4615
13
                NEXTL(1);
4616
13
            }
4617
13.8k
        } else if ((cur != '<') && (cur != '&')) {
4618
            /* Generate the error and skip the offending character */
4619
85
            xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4620
85
                              "PCDATA invalid Char value %d\n", cur);
4621
85
            NEXTL(l);
4622
85
        }
4623
14.0k
    }
4624
14.9k
}
4625
4626
/**
4627
 * xmlParseCharData:
4628
 * @ctxt:  an XML parser context
4629
 * @cdata:  unused
4630
 *
4631
 * DEPRECATED: Internal function, don't use.
4632
 */
4633
void
4634
0
xmlParseCharData(xmlParserCtxtPtr ctxt, ATTRIBUTE_UNUSED int cdata) {
4635
0
    xmlParseCharDataInternal(ctxt, 0);
4636
0
}
4637
4638
/**
4639
 * xmlParseExternalID:
4640
 * @ctxt:  an XML parser context
4641
 * @publicID:  a xmlChar** receiving PubidLiteral
4642
 * @strict: indicate whether we should restrict parsing to only
4643
 *          production [75], see NOTE below
4644
 *
4645
 * DEPRECATED: Internal function, don't use.
4646
 *
4647
 * Parse an External ID or a Public ID
4648
 *
4649
 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4650
 *       'PUBLIC' S PubidLiteral S SystemLiteral
4651
 *
4652
 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4653
 *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
4654
 *
4655
 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4656
 *
4657
 * Returns the function returns SystemLiteral and in the second
4658
 *                case publicID receives PubidLiteral, is strict is off
4659
 *                it is possible to return NULL and have publicID set.
4660
 */
4661
4662
xmlChar *
4663
8.05k
xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4664
8.05k
    xmlChar *URI = NULL;
4665
4666
8.05k
    *publicID = NULL;
4667
8.05k
    if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4668
2.32k
        SKIP(6);
4669
2.32k
  if (SKIP_BLANKS == 0) {
4670
734
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4671
734
                     "Space required after 'SYSTEM'\n");
4672
734
  }
4673
2.32k
  URI = xmlParseSystemLiteral(ctxt);
4674
2.32k
  if (URI == NULL) {
4675
350
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4676
350
        }
4677
5.72k
    } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4678
1.63k
        SKIP(6);
4679
1.63k
  if (SKIP_BLANKS == 0) {
4680
106
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4681
106
        "Space required after 'PUBLIC'\n");
4682
106
  }
4683
1.63k
  *publicID = xmlParsePubidLiteral(ctxt);
4684
1.63k
  if (*publicID == NULL) {
4685
302
      xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4686
302
  }
4687
1.63k
  if (strict) {
4688
      /*
4689
       * We don't handle [83] so "S SystemLiteral" is required.
4690
       */
4691
813
      if (SKIP_BLANKS == 0) {
4692
595
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4693
595
      "Space required after the Public Identifier\n");
4694
595
      }
4695
826
  } else {
4696
      /*
4697
       * We handle [83] so we return immediately, if
4698
       * "S SystemLiteral" is not detected. We skip blanks if no
4699
             * system literal was found, but this is harmless since we must
4700
             * be at the end of a NotationDecl.
4701
       */
4702
826
      if (SKIP_BLANKS == 0) return(NULL);
4703
307
      if ((CUR != '\'') && (CUR != '"')) return(NULL);
4704
307
  }
4705
951
  URI = xmlParseSystemLiteral(ctxt);
4706
951
  if (URI == NULL) {
4707
585
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4708
585
        }
4709
951
    }
4710
7.36k
    return(URI);
4711
8.05k
}
4712
4713
/**
4714
 * xmlParseCommentComplex:
4715
 * @ctxt:  an XML parser context
4716
 * @buf:  the already parsed part of the buffer
4717
 * @len:  number of bytes in the buffer
4718
 * @size:  allocated size of the buffer
4719
 *
4720
 * Skip an XML (SGML) comment <!-- .... -->
4721
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4722
 *  must not occur within comments. "
4723
 * This is the slow routine in case the accelerator for ascii didn't work
4724
 *
4725
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4726
 */
4727
static void
4728
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4729
46.8k
                       size_t len, size_t size) {
4730
46.8k
    int q, ql;
4731
46.8k
    int r, rl;
4732
46.8k
    int cur, l;
4733
46.8k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4734
46.8k
                       XML_MAX_HUGE_LENGTH :
4735
46.8k
                       XML_MAX_TEXT_LENGTH;
4736
46.8k
    int inputid;
4737
4738
46.8k
    inputid = ctxt->input->id;
4739
4740
46.8k
    if (buf == NULL) {
4741
46.8k
        len = 0;
4742
46.8k
  size = XML_PARSER_BUFFER_SIZE;
4743
46.8k
  buf = (xmlChar *) xmlMallocAtomic(size);
4744
46.8k
  if (buf == NULL) {
4745
0
      xmlErrMemory(ctxt, NULL);
4746
0
      return;
4747
0
  }
4748
46.8k
    }
4749
46.8k
    q = CUR_CHAR(ql);
4750
46.8k
    if (q == 0)
4751
76
        goto not_terminated;
4752
46.7k
    if (!IS_CHAR(q)) {
4753
27
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4754
27
                          "xmlParseComment: invalid xmlChar value %d\n",
4755
27
                    q);
4756
27
  xmlFree (buf);
4757
27
  return;
4758
27
    }
4759
46.7k
    NEXTL(ql);
4760
46.7k
    r = CUR_CHAR(rl);
4761
46.7k
    if (r == 0)
4762
41
        goto not_terminated;
4763
46.7k
    if (!IS_CHAR(r)) {
4764
25
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4765
25
                          "xmlParseComment: invalid xmlChar value %d\n",
4766
25
                    r);
4767
25
  xmlFree (buf);
4768
25
  return;
4769
25
    }
4770
46.6k
    NEXTL(rl);
4771
46.6k
    cur = CUR_CHAR(l);
4772
46.6k
    if (cur == 0)
4773
37
        goto not_terminated;
4774
26.4M
    while (IS_CHAR(cur) && /* checked */
4775
26.4M
           ((cur != '>') ||
4776
26.4M
      (r != '-') || (q != '-'))) {
4777
26.4M
  if ((r == '-') && (q == '-')) {
4778
875
      xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4779
875
  }
4780
26.4M
  if (len + 5 >= size) {
4781
3.24k
      xmlChar *new_buf;
4782
3.24k
            size_t new_size;
4783
4784
3.24k
      new_size = size * 2;
4785
3.24k
      new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4786
3.24k
      if (new_buf == NULL) {
4787
0
    xmlFree (buf);
4788
0
    xmlErrMemory(ctxt, NULL);
4789
0
    return;
4790
0
      }
4791
3.24k
      buf = new_buf;
4792
3.24k
            size = new_size;
4793
3.24k
  }
4794
26.4M
  COPY_BUF(ql,buf,len,q);
4795
26.4M
        if (len > maxLength) {
4796
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4797
0
                         "Comment too big found", NULL);
4798
0
            xmlFree (buf);
4799
0
            return;
4800
0
        }
4801
4802
26.4M
  q = r;
4803
26.4M
  ql = rl;
4804
26.4M
  r = cur;
4805
26.4M
  rl = l;
4806
4807
26.4M
  NEXTL(l);
4808
26.4M
  cur = CUR_CHAR(l);
4809
4810
26.4M
    }
4811
46.6k
    buf[len] = 0;
4812
46.6k
    if (ctxt->instate == XML_PARSER_EOF) {
4813
0
        xmlFree(buf);
4814
0
        return;
4815
0
    }
4816
46.6k
    if (cur == 0) {
4817
160
  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4818
160
                       "Comment not terminated \n<!--%.50s\n", buf);
4819
46.4k
    } else if (!IS_CHAR(cur)) {
4820
49
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4821
49
                          "xmlParseComment: invalid xmlChar value %d\n",
4822
49
                    cur);
4823
46.4k
    } else {
4824
46.4k
  if (inputid != ctxt->input->id) {
4825
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4826
0
               "Comment doesn't start and stop in the same"
4827
0
                           " entity\n");
4828
0
  }
4829
46.4k
        NEXT;
4830
46.4k
  if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4831
46.4k
      (!ctxt->disableSAX))
4832
0
      ctxt->sax->comment(ctxt->userData, buf);
4833
46.4k
    }
4834
46.6k
    xmlFree(buf);
4835
46.6k
    return;
4836
154
not_terminated:
4837
154
    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4838
154
       "Comment not terminated\n", NULL);
4839
154
    xmlFree(buf);
4840
154
    return;
4841
46.6k
}
4842
4843
/**
4844
 * xmlParseComment:
4845
 * @ctxt:  an XML parser context
4846
 *
4847
 * DEPRECATED: Internal function, don't use.
4848
 *
4849
 * Parse an XML (SGML) comment. Always consumes '<!'.
4850
 *
4851
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4852
 *  must not occur within comments. "
4853
 *
4854
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4855
 */
4856
void
4857
73.7k
xmlParseComment(xmlParserCtxtPtr ctxt) {
4858
73.7k
    xmlChar *buf = NULL;
4859
73.7k
    size_t size = XML_PARSER_BUFFER_SIZE;
4860
73.7k
    size_t len = 0;
4861
73.7k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4862
73.7k
                       XML_MAX_HUGE_LENGTH :
4863
73.7k
                       XML_MAX_TEXT_LENGTH;
4864
73.7k
    xmlParserInputState state;
4865
73.7k
    const xmlChar *in;
4866
73.7k
    size_t nbchar = 0;
4867
73.7k
    int ccol;
4868
73.7k
    int inputid;
4869
4870
    /*
4871
     * Check that there is a comment right here.
4872
     */
4873
73.7k
    if ((RAW != '<') || (NXT(1) != '!'))
4874
0
        return;
4875
73.7k
    SKIP(2);
4876
73.7k
    if ((RAW != '-') || (NXT(1) != '-'))
4877
5
        return;
4878
73.7k
    state = ctxt->instate;
4879
73.7k
    ctxt->instate = XML_PARSER_COMMENT;
4880
73.7k
    inputid = ctxt->input->id;
4881
73.7k
    SKIP(2);
4882
73.7k
    GROW;
4883
4884
    /*
4885
     * Accelerated common case where input don't need to be
4886
     * modified before passing it to the handler.
4887
     */
4888
73.7k
    in = ctxt->input->cur;
4889
73.7k
    do {
4890
73.7k
  if (*in == 0xA) {
4891
22.5k
      do {
4892
22.5k
    ctxt->input->line++; ctxt->input->col = 1;
4893
22.5k
    in++;
4894
22.5k
      } while (*in == 0xA);
4895
21.6k
  }
4896
233k
get_more:
4897
233k
        ccol = ctxt->input->col;
4898
5.44M
  while (((*in > '-') && (*in <= 0x7F)) ||
4899
5.44M
         ((*in >= 0x20) && (*in < '-')) ||
4900
5.44M
         (*in == 0x09)) {
4901
5.20M
        in++;
4902
5.20M
        ccol++;
4903
5.20M
  }
4904
233k
  ctxt->input->col = ccol;
4905
233k
  if (*in == 0xA) {
4906
228k
      do {
4907
228k
    ctxt->input->line++; ctxt->input->col = 1;
4908
228k
    in++;
4909
228k
      } while (*in == 0xA);
4910
117k
      goto get_more;
4911
117k
  }
4912
116k
  nbchar = in - ctxt->input->cur;
4913
  /*
4914
   * save current set of data
4915
   */
4916
116k
  if (nbchar > 0) {
4917
107k
      if ((ctxt->sax != NULL) &&
4918
107k
    (ctxt->sax->comment != NULL)) {
4919
0
    if (buf == NULL) {
4920
0
        if ((*in == '-') && (in[1] == '-'))
4921
0
            size = nbchar + 1;
4922
0
        else
4923
0
            size = XML_PARSER_BUFFER_SIZE + nbchar;
4924
0
        buf = (xmlChar *) xmlMallocAtomic(size);
4925
0
        if (buf == NULL) {
4926
0
            xmlErrMemory(ctxt, NULL);
4927
0
      ctxt->instate = state;
4928
0
      return;
4929
0
        }
4930
0
        len = 0;
4931
0
    } else if (len + nbchar + 1 >= size) {
4932
0
        xmlChar *new_buf;
4933
0
        size  += len + nbchar + XML_PARSER_BUFFER_SIZE;
4934
0
        new_buf = (xmlChar *) xmlRealloc(buf, size);
4935
0
        if (new_buf == NULL) {
4936
0
            xmlFree (buf);
4937
0
      xmlErrMemory(ctxt, NULL);
4938
0
      ctxt->instate = state;
4939
0
      return;
4940
0
        }
4941
0
        buf = new_buf;
4942
0
    }
4943
0
    memcpy(&buf[len], ctxt->input->cur, nbchar);
4944
0
    len += nbchar;
4945
0
    buf[len] = 0;
4946
0
      }
4947
107k
  }
4948
116k
        if (len > maxLength) {
4949
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4950
0
                         "Comment too big found", NULL);
4951
0
            xmlFree (buf);
4952
0
            return;
4953
0
        }
4954
116k
  ctxt->input->cur = in;
4955
116k
  if (*in == 0xA) {
4956
0
      in++;
4957
0
      ctxt->input->line++; ctxt->input->col = 1;
4958
0
  }
4959
116k
  if (*in == 0xD) {
4960
44.0k
      in++;
4961
44.0k
      if (*in == 0xA) {
4962
395
    ctxt->input->cur = in;
4963
395
    in++;
4964
395
    ctxt->input->line++; ctxt->input->col = 1;
4965
395
    goto get_more;
4966
395
      }
4967
43.6k
      in--;
4968
43.6k
  }
4969
115k
  SHRINK;
4970
115k
  GROW;
4971
115k
        if (ctxt->instate == XML_PARSER_EOF) {
4972
0
            xmlFree(buf);
4973
0
            return;
4974
0
        }
4975
115k
  in = ctxt->input->cur;
4976
115k
  if (*in == '-') {
4977
68.7k
      if (in[1] == '-') {
4978
27.6k
          if (in[2] == '>') {
4979
26.9k
        if (ctxt->input->id != inputid) {
4980
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4981
0
                     "comment doesn't start and stop in the"
4982
0
                                       " same entity\n");
4983
0
        }
4984
26.9k
        SKIP(3);
4985
26.9k
        if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4986
26.9k
            (!ctxt->disableSAX)) {
4987
0
      if (buf != NULL)
4988
0
          ctxt->sax->comment(ctxt->userData, buf);
4989
0
      else
4990
0
          ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4991
0
        }
4992
26.9k
        if (buf != NULL)
4993
0
            xmlFree(buf);
4994
26.9k
        if (ctxt->instate != XML_PARSER_EOF)
4995
26.9k
      ctxt->instate = state;
4996
26.9k
        return;
4997
26.9k
    }
4998
688
    if (buf != NULL) {
4999
0
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5000
0
                          "Double hyphen within comment: "
5001
0
                                      "<!--%.50s\n",
5002
0
              buf);
5003
0
    } else
5004
688
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5005
688
                          "Double hyphen within comment\n", NULL);
5006
688
                if (ctxt->instate == XML_PARSER_EOF) {
5007
0
                    xmlFree(buf);
5008
0
                    return;
5009
0
                }
5010
688
    in++;
5011
688
    ctxt->input->col++;
5012
688
      }
5013
41.8k
      in++;
5014
41.8k
      ctxt->input->col++;
5015
41.8k
      goto get_more;
5016
68.7k
  }
5017
115k
    } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5018
46.8k
    xmlParseCommentComplex(ctxt, buf, len, size);
5019
46.8k
    ctxt->instate = state;
5020
46.8k
    return;
5021
73.7k
}
5022
5023
5024
/**
5025
 * xmlParsePITarget:
5026
 * @ctxt:  an XML parser context
5027
 *
5028
 * DEPRECATED: Internal function, don't use.
5029
 *
5030
 * parse the name of a PI
5031
 *
5032
 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5033
 *
5034
 * Returns the PITarget name or NULL
5035
 */
5036
5037
const xmlChar *
5038
184k
xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5039
184k
    const xmlChar *name;
5040
5041
184k
    name = xmlParseName(ctxt);
5042
184k
    if ((name != NULL) &&
5043
184k
        ((name[0] == 'x') || (name[0] == 'X')) &&
5044
184k
        ((name[1] == 'm') || (name[1] == 'M')) &&
5045
184k
        ((name[2] == 'l') || (name[2] == 'L'))) {
5046
2.02k
  int i;
5047
2.02k
  if ((name[0] == 'x') && (name[1] == 'm') &&
5048
2.02k
      (name[2] == 'l') && (name[3] == 0)) {
5049
461
      xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5050
461
     "XML declaration allowed only at the start of the document\n");
5051
461
      return(name);
5052
1.56k
  } else if (name[3] == 0) {
5053
229
      xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5054
229
      return(name);
5055
229
  }
5056
3.80k
  for (i = 0;;i++) {
5057
3.80k
      if (xmlW3CPIs[i] == NULL) break;
5058
2.66k
      if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5059
190
          return(name);
5060
2.66k
  }
5061
1.14k
  xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5062
1.14k
          "xmlParsePITarget: invalid name prefix 'xml'\n",
5063
1.14k
          NULL, NULL);
5064
1.14k
    }
5065
183k
    if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5066
458
  xmlNsErr(ctxt, XML_NS_ERR_COLON,
5067
458
     "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5068
458
    }
5069
183k
    return(name);
5070
184k
}
5071
5072
#ifdef LIBXML_CATALOG_ENABLED
5073
/**
5074
 * xmlParseCatalogPI:
5075
 * @ctxt:  an XML parser context
5076
 * @catalog:  the PI value string
5077
 *
5078
 * parse an XML Catalog Processing Instruction.
5079
 *
5080
 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5081
 *
5082
 * Occurs only if allowed by the user and if happening in the Misc
5083
 * part of the document before any doctype information
5084
 * This will add the given catalog to the parsing context in order
5085
 * to be used if there is a resolution need further down in the document
5086
 */
5087
5088
static void
5089
1.27k
xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5090
1.27k
    xmlChar *URL = NULL;
5091
1.27k
    const xmlChar *tmp, *base;
5092
1.27k
    xmlChar marker;
5093
5094
1.27k
    tmp = catalog;
5095
1.27k
    while (IS_BLANK_CH(*tmp)) tmp++;
5096
1.27k
    if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5097
196
  goto error;
5098
1.08k
    tmp += 7;
5099
1.50k
    while (IS_BLANK_CH(*tmp)) tmp++;
5100
1.08k
    if (*tmp != '=') {
5101
432
  return;
5102
432
    }
5103
648
    tmp++;
5104
768
    while (IS_BLANK_CH(*tmp)) tmp++;
5105
648
    marker = *tmp;
5106
648
    if ((marker != '\'') && (marker != '"'))
5107
235
  goto error;
5108
413
    tmp++;
5109
413
    base = tmp;
5110
3.70k
    while ((*tmp != 0) && (*tmp != marker)) tmp++;
5111
413
    if (*tmp == 0)
5112
49
  goto error;
5113
364
    URL = xmlStrndup(base, tmp - base);
5114
364
    tmp++;
5115
565
    while (IS_BLANK_CH(*tmp)) tmp++;
5116
364
    if (*tmp != 0)
5117
94
  goto error;
5118
5119
270
    if (URL != NULL) {
5120
270
  ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5121
270
  xmlFree(URL);
5122
270
    }
5123
270
    return;
5124
5125
574
error:
5126
574
    xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5127
574
            "Catalog PI syntax error: %s\n",
5128
574
      catalog, NULL);
5129
574
    if (URL != NULL)
5130
94
  xmlFree(URL);
5131
574
}
5132
#endif
5133
5134
/**
5135
 * xmlParsePI:
5136
 * @ctxt:  an XML parser context
5137
 *
5138
 * DEPRECATED: Internal function, don't use.
5139
 *
5140
 * parse an XML Processing Instruction.
5141
 *
5142
 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5143
 *
5144
 * The processing is transferred to SAX once parsed.
5145
 */
5146
5147
void
5148
184k
xmlParsePI(xmlParserCtxtPtr ctxt) {
5149
184k
    xmlChar *buf = NULL;
5150
184k
    size_t len = 0;
5151
184k
    size_t size = XML_PARSER_BUFFER_SIZE;
5152
184k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5153
184k
                       XML_MAX_HUGE_LENGTH :
5154
184k
                       XML_MAX_TEXT_LENGTH;
5155
184k
    int cur, l;
5156
184k
    const xmlChar *target;
5157
184k
    xmlParserInputState state;
5158
5159
184k
    if ((RAW == '<') && (NXT(1) == '?')) {
5160
184k
  int inputid = ctxt->input->id;
5161
184k
  state = ctxt->instate;
5162
184k
        ctxt->instate = XML_PARSER_PI;
5163
  /*
5164
   * this is a Processing Instruction.
5165
   */
5166
184k
  SKIP(2);
5167
5168
  /*
5169
   * Parse the target name and check for special support like
5170
   * namespace.
5171
   */
5172
184k
        target = xmlParsePITarget(ctxt);
5173
184k
  if (target != NULL) {
5174
183k
      if ((RAW == '?') && (NXT(1) == '>')) {
5175
86.7k
    if (inputid != ctxt->input->id) {
5176
0
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5177
0
                             "PI declaration doesn't start and stop in"
5178
0
                                   " the same entity\n");
5179
0
    }
5180
86.7k
    SKIP(2);
5181
5182
    /*
5183
     * SAX: PI detected.
5184
     */
5185
86.7k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5186
86.7k
        (ctxt->sax->processingInstruction != NULL))
5187
0
        ctxt->sax->processingInstruction(ctxt->userData,
5188
0
                                         target, NULL);
5189
86.7k
    if (ctxt->instate != XML_PARSER_EOF)
5190
86.7k
        ctxt->instate = state;
5191
86.7k
    return;
5192
86.7k
      }
5193
97.2k
      buf = (xmlChar *) xmlMallocAtomic(size);
5194
97.2k
      if (buf == NULL) {
5195
0
    xmlErrMemory(ctxt, NULL);
5196
0
    ctxt->instate = state;
5197
0
    return;
5198
0
      }
5199
97.2k
      if (SKIP_BLANKS == 0) {
5200
1.30k
    xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5201
1.30k
        "ParsePI: PI %s space expected\n", target);
5202
1.30k
      }
5203
97.2k
      cur = CUR_CHAR(l);
5204
8.26M
      while (IS_CHAR(cur) && /* checked */
5205
8.26M
       ((cur != '?') || (NXT(1) != '>'))) {
5206
8.17M
    if (len + 5 >= size) {
5207
8.12k
        xmlChar *tmp;
5208
8.12k
                    size_t new_size = size * 2;
5209
8.12k
        tmp = (xmlChar *) xmlRealloc(buf, new_size);
5210
8.12k
        if (tmp == NULL) {
5211
0
      xmlErrMemory(ctxt, NULL);
5212
0
      xmlFree(buf);
5213
0
      ctxt->instate = state;
5214
0
      return;
5215
0
        }
5216
8.12k
        buf = tmp;
5217
8.12k
                    size = new_size;
5218
8.12k
    }
5219
8.17M
    COPY_BUF(l,buf,len,cur);
5220
8.17M
                if (len > maxLength) {
5221
0
                    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5222
0
                                      "PI %s too big found", target);
5223
0
                    xmlFree(buf);
5224
0
                    ctxt->instate = state;
5225
0
                    return;
5226
0
                }
5227
8.17M
    NEXTL(l);
5228
8.17M
    cur = CUR_CHAR(l);
5229
8.17M
      }
5230
97.2k
      buf[len] = 0;
5231
97.2k
            if (ctxt->instate == XML_PARSER_EOF) {
5232
0
                xmlFree(buf);
5233
0
                return;
5234
0
            }
5235
97.2k
      if (cur != '?') {
5236
736
    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5237
736
          "ParsePI: PI %s never end ...\n", target);
5238
96.4k
      } else {
5239
96.4k
    if (inputid != ctxt->input->id) {
5240
0
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5241
0
                             "PI declaration doesn't start and stop in"
5242
0
                                   " the same entity\n");
5243
0
    }
5244
96.4k
    SKIP(2);
5245
5246
96.4k
#ifdef LIBXML_CATALOG_ENABLED
5247
96.4k
    if (((state == XML_PARSER_MISC) ||
5248
96.4k
               (state == XML_PARSER_START)) &&
5249
96.4k
        (xmlStrEqual(target, XML_CATALOG_PI))) {
5250
1.27k
        xmlCatalogAllow allow = xmlCatalogGetDefaults();
5251
1.27k
        if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5252
1.27k
      (allow == XML_CATA_ALLOW_ALL))
5253
1.27k
      xmlParseCatalogPI(ctxt, buf);
5254
1.27k
    }
5255
96.4k
#endif
5256
5257
5258
    /*
5259
     * SAX: PI detected.
5260
     */
5261
96.4k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5262
96.4k
        (ctxt->sax->processingInstruction != NULL))
5263
0
        ctxt->sax->processingInstruction(ctxt->userData,
5264
0
                                         target, buf);
5265
96.4k
      }
5266
97.2k
      xmlFree(buf);
5267
97.2k
  } else {
5268
345
      xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5269
345
  }
5270
97.5k
  if (ctxt->instate != XML_PARSER_EOF)
5271
97.5k
      ctxt->instate = state;
5272
97.5k
    }
5273
184k
}
5274
5275
/**
5276
 * xmlParseNotationDecl:
5277
 * @ctxt:  an XML parser context
5278
 *
5279
 * DEPRECATED: Internal function, don't use.
5280
 *
5281
 * Parse a notation declaration. Always consumes '<!'.
5282
 *
5283
 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>'
5284
 *
5285
 * Hence there is actually 3 choices:
5286
 *     'PUBLIC' S PubidLiteral
5287
 *     'PUBLIC' S PubidLiteral S SystemLiteral
5288
 * and 'SYSTEM' S SystemLiteral
5289
 *
5290
 * See the NOTE on xmlParseExternalID().
5291
 */
5292
5293
void
5294
2.19k
xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5295
2.19k
    const xmlChar *name;
5296
2.19k
    xmlChar *Pubid;
5297
2.19k
    xmlChar *Systemid;
5298
5299
2.19k
    if ((CUR != '<') || (NXT(1) != '!'))
5300
0
        return;
5301
2.19k
    SKIP(2);
5302
5303
2.19k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5304
2.16k
  int inputid = ctxt->input->id;
5305
2.16k
  SKIP(8);
5306
2.16k
  if (SKIP_BLANKS == 0) {
5307
137
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5308
137
         "Space required after '<!NOTATION'\n");
5309
137
      return;
5310
137
  }
5311
5312
2.02k
        name = xmlParseName(ctxt);
5313
2.02k
  if (name == NULL) {
5314
271
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5315
271
      return;
5316
271
  }
5317
1.75k
  if (xmlStrchr(name, ':') != NULL) {
5318
78
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5319
78
         "colons are forbidden from notation names '%s'\n",
5320
78
         name, NULL, NULL);
5321
78
  }
5322
1.75k
  if (SKIP_BLANKS == 0) {
5323
46
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5324
46
         "Space required after the NOTATION name'\n");
5325
46
      return;
5326
46
  }
5327
5328
  /*
5329
   * Parse the IDs.
5330
   */
5331
1.71k
  Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5332
1.71k
  SKIP_BLANKS;
5333
5334
1.71k
  if (RAW == '>') {
5335
487
      if (inputid != ctxt->input->id) {
5336
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5337
0
                         "Notation declaration doesn't start and stop"
5338
0
                               " in the same entity\n");
5339
0
      }
5340
487
      NEXT;
5341
487
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5342
487
    (ctxt->sax->notationDecl != NULL))
5343
0
    ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5344
1.22k
  } else {
5345
1.22k
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5346
1.22k
  }
5347
1.71k
  if (Systemid != NULL) xmlFree(Systemid);
5348
1.71k
  if (Pubid != NULL) xmlFree(Pubid);
5349
1.71k
    }
5350
2.19k
}
5351
5352
/**
5353
 * xmlParseEntityDecl:
5354
 * @ctxt:  an XML parser context
5355
 *
5356
 * DEPRECATED: Internal function, don't use.
5357
 *
5358
 * Parse an entity declaration. Always consumes '<!'.
5359
 *
5360
 * [70] EntityDecl ::= GEDecl | PEDecl
5361
 *
5362
 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5363
 *
5364
 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5365
 *
5366
 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5367
 *
5368
 * [74] PEDef ::= EntityValue | ExternalID
5369
 *
5370
 * [76] NDataDecl ::= S 'NDATA' S Name
5371
 *
5372
 * [ VC: Notation Declared ]
5373
 * The Name must match the declared name of a notation.
5374
 */
5375
5376
void
5377
13.3k
xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5378
13.3k
    const xmlChar *name = NULL;
5379
13.3k
    xmlChar *value = NULL;
5380
13.3k
    xmlChar *URI = NULL, *literal = NULL;
5381
13.3k
    const xmlChar *ndata = NULL;
5382
13.3k
    int isParameter = 0;
5383
13.3k
    xmlChar *orig = NULL;
5384
5385
13.3k
    if ((CUR != '<') || (NXT(1) != '!'))
5386
0
        return;
5387
13.3k
    SKIP(2);
5388
5389
    /* GROW; done in the caller */
5390
13.3k
    if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5391
13.3k
  int inputid = ctxt->input->id;
5392
13.3k
  SKIP(6);
5393
13.3k
  if (SKIP_BLANKS == 0) {
5394
8.30k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5395
8.30k
         "Space required after '<!ENTITY'\n");
5396
8.30k
  }
5397
5398
13.3k
  if (RAW == '%') {
5399
2.76k
      NEXT;
5400
2.76k
      if (SKIP_BLANKS == 0) {
5401
2.02k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5402
2.02k
             "Space required after '%%'\n");
5403
2.02k
      }
5404
2.76k
      isParameter = 1;
5405
2.76k
  }
5406
5407
13.3k
        name = xmlParseName(ctxt);
5408
13.3k
  if (name == NULL) {
5409
358
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5410
358
                     "xmlParseEntityDecl: no name\n");
5411
358
            return;
5412
358
  }
5413
12.9k
  if (xmlStrchr(name, ':') != NULL) {
5414
1.65k
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5415
1.65k
         "colons are forbidden from entities names '%s'\n",
5416
1.65k
         name, NULL, NULL);
5417
1.65k
  }
5418
12.9k
  if (SKIP_BLANKS == 0) {
5419
8.77k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5420
8.77k
         "Space required after the entity name\n");
5421
8.77k
  }
5422
5423
12.9k
  ctxt->instate = XML_PARSER_ENTITY_DECL;
5424
  /*
5425
   * handle the various case of definitions...
5426
   */
5427
12.9k
  if (isParameter) {
5428
2.67k
      if ((RAW == '"') || (RAW == '\'')) {
5429
1.53k
          value = xmlParseEntityValue(ctxt, &orig);
5430
1.53k
    if (value) {
5431
291
        if ((ctxt->sax != NULL) &&
5432
291
      (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5433
0
      ctxt->sax->entityDecl(ctxt->userData, name,
5434
0
                        XML_INTERNAL_PARAMETER_ENTITY,
5435
0
            NULL, NULL, value);
5436
291
    }
5437
1.53k
      } else {
5438
1.13k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5439
1.13k
    if ((URI == NULL) && (literal == NULL)) {
5440
272
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5441
272
    }
5442
1.13k
    if (URI) {
5443
651
        xmlURIPtr uri;
5444
5445
651
        uri = xmlParseURI((const char *) URI);
5446
651
        if (uri == NULL) {
5447
260
            xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5448
260
             "Invalid URI: %s\n", URI);
5449
      /*
5450
       * This really ought to be a well formedness error
5451
       * but the XML Core WG decided otherwise c.f. issue
5452
       * E26 of the XML erratas.
5453
       */
5454
391
        } else {
5455
391
      if (uri->fragment != NULL) {
5456
          /*
5457
           * Okay this is foolish to block those but not
5458
           * invalid URIs.
5459
           */
5460
3
          xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5461
388
      } else {
5462
388
          if ((ctxt->sax != NULL) &&
5463
388
        (!ctxt->disableSAX) &&
5464
388
        (ctxt->sax->entityDecl != NULL))
5465
0
        ctxt->sax->entityDecl(ctxt->userData, name,
5466
0
              XML_EXTERNAL_PARAMETER_ENTITY,
5467
0
              literal, URI, NULL);
5468
388
      }
5469
391
      xmlFreeURI(uri);
5470
391
        }
5471
651
    }
5472
1.13k
      }
5473
10.2k
  } else {
5474
10.2k
      if ((RAW == '"') || (RAW == '\'')) {
5475
8.21k
          value = xmlParseEntityValue(ctxt, &orig);
5476
8.21k
    if ((ctxt->sax != NULL) &&
5477
8.21k
        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5478
0
        ctxt->sax->entityDecl(ctxt->userData, name,
5479
0
        XML_INTERNAL_GENERAL_ENTITY,
5480
0
        NULL, NULL, value);
5481
    /*
5482
     * For expat compatibility in SAX mode.
5483
     */
5484
8.21k
    if ((ctxt->myDoc == NULL) ||
5485
8.21k
        (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5486
8.21k
        if (ctxt->myDoc == NULL) {
5487
655
      ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5488
655
      if (ctxt->myDoc == NULL) {
5489
0
          xmlErrMemory(ctxt, "New Doc failed");
5490
0
          goto done;
5491
0
      }
5492
655
      ctxt->myDoc->properties = XML_DOC_INTERNAL;
5493
655
        }
5494
8.21k
        if (ctxt->myDoc->intSubset == NULL)
5495
655
      ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5496
655
              BAD_CAST "fake", NULL, NULL);
5497
5498
8.21k
        xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5499
8.21k
                    NULL, NULL, value);
5500
8.21k
    }
5501
8.21k
      } else {
5502
2.08k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5503
2.08k
    if ((URI == NULL) && (literal == NULL)) {
5504
583
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5505
583
    }
5506
2.08k
    if (URI) {
5507
1.24k
        xmlURIPtr uri;
5508
5509
1.24k
        uri = xmlParseURI((const char *)URI);
5510
1.24k
        if (uri == NULL) {
5511
505
            xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5512
505
             "Invalid URI: %s\n", URI);
5513
      /*
5514
       * This really ought to be a well formedness error
5515
       * but the XML Core WG decided otherwise c.f. issue
5516
       * E26 of the XML erratas.
5517
       */
5518
736
        } else {
5519
736
      if (uri->fragment != NULL) {
5520
          /*
5521
           * Okay this is foolish to block those but not
5522
           * invalid URIs.
5523
           */
5524
70
          xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5525
70
      }
5526
736
      xmlFreeURI(uri);
5527
736
        }
5528
1.24k
    }
5529
2.08k
    if ((RAW != '>') && (SKIP_BLANKS == 0)) {
5530
197
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5531
197
           "Space required before 'NDATA'\n");
5532
197
    }
5533
2.08k
    if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5534
317
        SKIP(5);
5535
317
        if (SKIP_BLANKS == 0) {
5536
60
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5537
60
               "Space required after 'NDATA'\n");
5538
60
        }
5539
317
        ndata = xmlParseName(ctxt);
5540
317
        if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5541
317
            (ctxt->sax->unparsedEntityDecl != NULL))
5542
0
      ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5543
0
            literal, URI, ndata);
5544
1.76k
    } else {
5545
1.76k
        if ((ctxt->sax != NULL) &&
5546
1.76k
            (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5547
0
      ctxt->sax->entityDecl(ctxt->userData, name,
5548
0
            XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5549
0
            literal, URI, NULL);
5550
        /*
5551
         * For expat compatibility in SAX mode.
5552
         * assuming the entity replacement was asked for
5553
         */
5554
1.76k
        if ((ctxt->replaceEntities != 0) &&
5555
1.76k
      ((ctxt->myDoc == NULL) ||
5556
0
      (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5557
0
      if (ctxt->myDoc == NULL) {
5558
0
          ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5559
0
          if (ctxt->myDoc == NULL) {
5560
0
              xmlErrMemory(ctxt, "New Doc failed");
5561
0
        goto done;
5562
0
          }
5563
0
          ctxt->myDoc->properties = XML_DOC_INTERNAL;
5564
0
      }
5565
5566
0
      if (ctxt->myDoc->intSubset == NULL)
5567
0
          ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5568
0
            BAD_CAST "fake", NULL, NULL);
5569
0
      xmlSAX2EntityDecl(ctxt, name,
5570
0
                  XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5571
0
                  literal, URI, NULL);
5572
0
        }
5573
1.76k
    }
5574
2.08k
      }
5575
10.2k
  }
5576
12.9k
  if (ctxt->instate == XML_PARSER_EOF)
5577
0
      goto done;
5578
12.9k
  SKIP_BLANKS;
5579
12.9k
  if (RAW != '>') {
5580
728
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5581
728
              "xmlParseEntityDecl: entity %s not terminated\n", name);
5582
728
      xmlHaltParser(ctxt);
5583
12.2k
  } else {
5584
12.2k
      if (inputid != ctxt->input->id) {
5585
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5586
0
                         "Entity declaration doesn't start and stop in"
5587
0
                               " the same entity\n");
5588
0
      }
5589
12.2k
      NEXT;
5590
12.2k
  }
5591
12.9k
  if (orig != NULL) {
5592
      /*
5593
       * Ugly mechanism to save the raw entity value.
5594
       */
5595
5.96k
      xmlEntityPtr cur = NULL;
5596
5597
5.96k
      if (isParameter) {
5598
351
          if ((ctxt->sax != NULL) &&
5599
351
        (ctxt->sax->getParameterEntity != NULL))
5600
0
        cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5601
5.61k
      } else {
5602
5.61k
          if ((ctxt->sax != NULL) &&
5603
5.61k
        (ctxt->sax->getEntity != NULL))
5604
5.61k
        cur = ctxt->sax->getEntity(ctxt->userData, name);
5605
5.61k
    if ((cur == NULL) && (ctxt->userData==ctxt)) {
5606
0
        cur = xmlSAX2GetEntity(ctxt, name);
5607
0
    }
5608
5.61k
      }
5609
5.96k
            if ((cur != NULL) && (cur->orig == NULL)) {
5610
1
    cur->orig = orig;
5611
1
                orig = NULL;
5612
1
      }
5613
5.96k
  }
5614
5615
12.9k
done:
5616
12.9k
  if (value != NULL) xmlFree(value);
5617
12.9k
  if (URI != NULL) xmlFree(URI);
5618
12.9k
  if (literal != NULL) xmlFree(literal);
5619
12.9k
        if (orig != NULL) xmlFree(orig);
5620
12.9k
    }
5621
13.3k
}
5622
5623
/**
5624
 * xmlParseDefaultDecl:
5625
 * @ctxt:  an XML parser context
5626
 * @value:  Receive a possible fixed default value for the attribute
5627
 *
5628
 * DEPRECATED: Internal function, don't use.
5629
 *
5630
 * Parse an attribute default declaration
5631
 *
5632
 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5633
 *
5634
 * [ VC: Required Attribute ]
5635
 * if the default declaration is the keyword #REQUIRED, then the
5636
 * attribute must be specified for all elements of the type in the
5637
 * attribute-list declaration.
5638
 *
5639
 * [ VC: Attribute Default Legal ]
5640
 * The declared default value must meet the lexical constraints of
5641
 * the declared attribute type c.f. xmlValidateAttributeDecl()
5642
 *
5643
 * [ VC: Fixed Attribute Default ]
5644
 * if an attribute has a default value declared with the #FIXED
5645
 * keyword, instances of that attribute must match the default value.
5646
 *
5647
 * [ WFC: No < in Attribute Values ]
5648
 * handled in xmlParseAttValue()
5649
 *
5650
 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5651
 *          or XML_ATTRIBUTE_FIXED.
5652
 */
5653
5654
int
5655
13.7k
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5656
13.7k
    int val;
5657
13.7k
    xmlChar *ret;
5658
5659
13.7k
    *value = NULL;
5660
13.7k
    if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5661
108
  SKIP(9);
5662
108
  return(XML_ATTRIBUTE_REQUIRED);
5663
108
    }
5664
13.6k
    if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5665
116
  SKIP(8);
5666
116
  return(XML_ATTRIBUTE_IMPLIED);
5667
116
    }
5668
13.5k
    val = XML_ATTRIBUTE_NONE;
5669
13.5k
    if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5670
75
  SKIP(6);
5671
75
  val = XML_ATTRIBUTE_FIXED;
5672
75
  if (SKIP_BLANKS == 0) {
5673
21
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5674
21
         "Space required after '#FIXED'\n");
5675
21
  }
5676
75
    }
5677
13.5k
    ret = xmlParseAttValue(ctxt);
5678
13.5k
    ctxt->instate = XML_PARSER_DTD;
5679
13.5k
    if (ret == NULL) {
5680
296
  xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5681
296
           "Attribute default value declaration error\n");
5682
296
    } else
5683
13.2k
        *value = ret;
5684
13.5k
    return(val);
5685
13.6k
}
5686
5687
/**
5688
 * xmlParseNotationType:
5689
 * @ctxt:  an XML parser context
5690
 *
5691
 * DEPRECATED: Internal function, don't use.
5692
 *
5693
 * parse an Notation attribute type.
5694
 *
5695
 * Note: the leading 'NOTATION' S part has already being parsed...
5696
 *
5697
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5698
 *
5699
 * [ VC: Notation Attributes ]
5700
 * Values of this type must match one of the notation names included
5701
 * in the declaration; all notation names in the declaration must be declared.
5702
 *
5703
 * Returns: the notation attribute tree built while parsing
5704
 */
5705
5706
xmlEnumerationPtr
5707
634
xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5708
634
    const xmlChar *name;
5709
634
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5710
5711
634
    if (RAW != '(') {
5712
304
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5713
304
  return(NULL);
5714
304
    }
5715
1.58k
    do {
5716
1.58k
        NEXT;
5717
1.58k
  SKIP_BLANKS;
5718
1.58k
        name = xmlParseName(ctxt);
5719
1.58k
  if (name == NULL) {
5720
96
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5721
96
         "Name expected in NOTATION declaration\n");
5722
96
            xmlFreeEnumeration(ret);
5723
96
      return(NULL);
5724
96
  }
5725
1.48k
  tmp = ret;
5726
13.1k
  while (tmp != NULL) {
5727
12.3k
      if (xmlStrEqual(name, tmp->name)) {
5728
728
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5729
728
    "standalone: attribute notation value token %s duplicated\n",
5730
728
         name, NULL);
5731
728
    if (!xmlDictOwns(ctxt->dict, name))
5732
0
        xmlFree((xmlChar *) name);
5733
728
    break;
5734
728
      }
5735
11.6k
      tmp = tmp->next;
5736
11.6k
  }
5737
1.48k
  if (tmp == NULL) {
5738
760
      cur = xmlCreateEnumeration(name);
5739
760
      if (cur == NULL) {
5740
0
                xmlFreeEnumeration(ret);
5741
0
                return(NULL);
5742
0
            }
5743
760
      if (last == NULL) ret = last = cur;
5744
470
      else {
5745
470
    last->next = cur;
5746
470
    last = cur;
5747
470
      }
5748
760
  }
5749
1.48k
  SKIP_BLANKS;
5750
1.48k
    } while (RAW == '|');
5751
234
    if (RAW != ')') {
5752
100
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5753
100
        xmlFreeEnumeration(ret);
5754
100
  return(NULL);
5755
100
    }
5756
134
    NEXT;
5757
134
    return(ret);
5758
234
}
5759
5760
/**
5761
 * xmlParseEnumerationType:
5762
 * @ctxt:  an XML parser context
5763
 *
5764
 * DEPRECATED: Internal function, don't use.
5765
 *
5766
 * parse an Enumeration attribute type.
5767
 *
5768
 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5769
 *
5770
 * [ VC: Enumeration ]
5771
 * Values of this type must match one of the Nmtoken tokens in
5772
 * the declaration
5773
 *
5774
 * Returns: the enumeration attribute tree built while parsing
5775
 */
5776
5777
xmlEnumerationPtr
5778
1.37k
xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5779
1.37k
    xmlChar *name;
5780
1.37k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5781
5782
1.37k
    if (RAW != '(') {
5783
419
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5784
419
  return(NULL);
5785
419
    }
5786
2.09k
    do {
5787
2.09k
        NEXT;
5788
2.09k
  SKIP_BLANKS;
5789
2.09k
        name = xmlParseNmtoken(ctxt);
5790
2.09k
  if (name == NULL) {
5791
62
      xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5792
62
      return(ret);
5793
62
  }
5794
2.03k
  tmp = ret;
5795
9.62k
  while (tmp != NULL) {
5796
8.17k
      if (xmlStrEqual(name, tmp->name)) {
5797
584
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5798
584
    "standalone: attribute enumeration value token %s duplicated\n",
5799
584
         name, NULL);
5800
584
    if (!xmlDictOwns(ctxt->dict, name))
5801
584
        xmlFree(name);
5802
584
    break;
5803
584
      }
5804
7.58k
      tmp = tmp->next;
5805
7.58k
  }
5806
2.03k
  if (tmp == NULL) {
5807
1.45k
      cur = xmlCreateEnumeration(name);
5808
1.45k
      if (!xmlDictOwns(ctxt->dict, name))
5809
1.45k
    xmlFree(name);
5810
1.45k
      if (cur == NULL) {
5811
0
                xmlFreeEnumeration(ret);
5812
0
                return(NULL);
5813
0
            }
5814
1.45k
      if (last == NULL) ret = last = cur;
5815
547
      else {
5816
547
    last->next = cur;
5817
547
    last = cur;
5818
547
      }
5819
1.45k
  }
5820
2.03k
  SKIP_BLANKS;
5821
2.03k
    } while (RAW == '|');
5822
892
    if (RAW != ')') {
5823
131
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5824
131
  return(ret);
5825
131
    }
5826
761
    NEXT;
5827
761
    return(ret);
5828
892
}
5829
5830
/**
5831
 * xmlParseEnumeratedType:
5832
 * @ctxt:  an XML parser context
5833
 * @tree:  the enumeration tree built while parsing
5834
 *
5835
 * DEPRECATED: Internal function, don't use.
5836
 *
5837
 * parse an Enumerated attribute type.
5838
 *
5839
 * [57] EnumeratedType ::= NotationType | Enumeration
5840
 *
5841
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5842
 *
5843
 *
5844
 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5845
 */
5846
5847
int
5848
2.01k
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5849
2.01k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5850
640
  SKIP(8);
5851
640
  if (SKIP_BLANKS == 0) {
5852
6
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5853
6
         "Space required after 'NOTATION'\n");
5854
6
      return(0);
5855
6
  }
5856
634
  *tree = xmlParseNotationType(ctxt);
5857
634
  if (*tree == NULL) return(0);
5858
134
  return(XML_ATTRIBUTE_NOTATION);
5859
634
    }
5860
1.37k
    *tree = xmlParseEnumerationType(ctxt);
5861
1.37k
    if (*tree == NULL) return(0);
5862
904
    return(XML_ATTRIBUTE_ENUMERATION);
5863
1.37k
}
5864
5865
/**
5866
 * xmlParseAttributeType:
5867
 * @ctxt:  an XML parser context
5868
 * @tree:  the enumeration tree built while parsing
5869
 *
5870
 * DEPRECATED: Internal function, don't use.
5871
 *
5872
 * parse the Attribute list def for an element
5873
 *
5874
 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5875
 *
5876
 * [55] StringType ::= 'CDATA'
5877
 *
5878
 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5879
 *                        'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5880
 *
5881
 * Validity constraints for attribute values syntax are checked in
5882
 * xmlValidateAttributeValue()
5883
 *
5884
 * [ VC: ID ]
5885
 * Values of type ID must match the Name production. A name must not
5886
 * appear more than once in an XML document as a value of this type;
5887
 * i.e., ID values must uniquely identify the elements which bear them.
5888
 *
5889
 * [ VC: One ID per Element Type ]
5890
 * No element type may have more than one ID attribute specified.
5891
 *
5892
 * [ VC: ID Attribute Default ]
5893
 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5894
 *
5895
 * [ VC: IDREF ]
5896
 * Values of type IDREF must match the Name production, and values
5897
 * of type IDREFS must match Names; each IDREF Name must match the value
5898
 * of an ID attribute on some element in the XML document; i.e. IDREF
5899
 * values must match the value of some ID attribute.
5900
 *
5901
 * [ VC: Entity Name ]
5902
 * Values of type ENTITY must match the Name production, values
5903
 * of type ENTITIES must match Names; each Entity Name must match the
5904
 * name of an unparsed entity declared in the DTD.
5905
 *
5906
 * [ VC: Name Token ]
5907
 * Values of type NMTOKEN must match the Nmtoken production; values
5908
 * of type NMTOKENS must match Nmtokens.
5909
 *
5910
 * Returns the attribute type
5911
 */
5912
int
5913
15.1k
xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5914
15.1k
    if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5915
892
  SKIP(5);
5916
892
  return(XML_ATTRIBUTE_CDATA);
5917
14.2k
     } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5918
1.53k
  SKIP(6);
5919
1.53k
  return(XML_ATTRIBUTE_IDREFS);
5920
12.7k
     } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5921
254
  SKIP(5);
5922
254
  return(XML_ATTRIBUTE_IDREF);
5923
12.4k
     } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5924
9.89k
        SKIP(2);
5925
9.89k
  return(XML_ATTRIBUTE_ID);
5926
9.89k
     } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5927
89
  SKIP(6);
5928
89
  return(XML_ATTRIBUTE_ENTITY);
5929
2.49k
     } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5930
163
  SKIP(8);
5931
163
  return(XML_ATTRIBUTE_ENTITIES);
5932
2.33k
     } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5933
117
  SKIP(8);
5934
117
  return(XML_ATTRIBUTE_NMTOKENS);
5935
2.21k
     } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5936
200
  SKIP(7);
5937
200
  return(XML_ATTRIBUTE_NMTOKEN);
5938
200
     }
5939
2.01k
     return(xmlParseEnumeratedType(ctxt, tree));
5940
15.1k
}
5941
5942
/**
5943
 * xmlParseAttributeListDecl:
5944
 * @ctxt:  an XML parser context
5945
 *
5946
 * DEPRECATED: Internal function, don't use.
5947
 *
5948
 * Parse an attribute list declaration for an element. Always consumes '<!'.
5949
 *
5950
 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5951
 *
5952
 * [53] AttDef ::= S Name S AttType S DefaultDecl
5953
 *
5954
 */
5955
void
5956
4.35k
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
5957
4.35k
    const xmlChar *elemName;
5958
4.35k
    const xmlChar *attrName;
5959
4.35k
    xmlEnumerationPtr tree;
5960
5961
4.35k
    if ((CUR != '<') || (NXT(1) != '!'))
5962
0
        return;
5963
4.35k
    SKIP(2);
5964
5965
4.35k
    if (CMP7(CUR_PTR, 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5966
4.30k
  int inputid = ctxt->input->id;
5967
5968
4.30k
  SKIP(7);
5969
4.30k
  if (SKIP_BLANKS == 0) {
5970
1.25k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5971
1.25k
                     "Space required after '<!ATTLIST'\n");
5972
1.25k
  }
5973
4.30k
        elemName = xmlParseName(ctxt);
5974
4.30k
  if (elemName == NULL) {
5975
172
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5976
172
         "ATTLIST: no name for Element\n");
5977
172
      return;
5978
172
  }
5979
4.13k
  SKIP_BLANKS;
5980
4.13k
  GROW;
5981
17.4k
  while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
5982
16.1k
      int type;
5983
16.1k
      int def;
5984
16.1k
      xmlChar *defaultValue = NULL;
5985
5986
16.1k
      GROW;
5987
16.1k
            tree = NULL;
5988
16.1k
      attrName = xmlParseName(ctxt);
5989
16.1k
      if (attrName == NULL) {
5990
563
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5991
563
             "ATTLIST: no name for Attribute\n");
5992
563
    break;
5993
563
      }
5994
15.5k
      GROW;
5995
15.5k
      if (SKIP_BLANKS == 0) {
5996
429
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5997
429
            "Space required after the attribute name\n");
5998
429
    break;
5999
429
      }
6000
6001
15.1k
      type = xmlParseAttributeType(ctxt, &tree);
6002
15.1k
      if (type <= 0) {
6003
975
          break;
6004
975
      }
6005
6006
14.1k
      GROW;
6007
14.1k
      if (SKIP_BLANKS == 0) {
6008
384
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6009
384
             "Space required after the attribute type\n");
6010
384
          if (tree != NULL)
6011
147
        xmlFreeEnumeration(tree);
6012
384
    break;
6013
384
      }
6014
6015
13.7k
      def = xmlParseDefaultDecl(ctxt, &defaultValue);
6016
13.7k
      if (def <= 0) {
6017
0
                if (defaultValue != NULL)
6018
0
        xmlFree(defaultValue);
6019
0
          if (tree != NULL)
6020
0
        xmlFreeEnumeration(tree);
6021
0
          break;
6022
0
      }
6023
13.7k
      if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6024
12.4k
          xmlAttrNormalizeSpace(defaultValue, defaultValue);
6025
6026
13.7k
      GROW;
6027
13.7k
            if (RAW != '>') {
6028
12.6k
    if (SKIP_BLANKS == 0) {
6029
487
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6030
487
      "Space required after the attribute default value\n");
6031
487
        if (defaultValue != NULL)
6032
205
      xmlFree(defaultValue);
6033
487
        if (tree != NULL)
6034
221
      xmlFreeEnumeration(tree);
6035
487
        break;
6036
487
    }
6037
12.6k
      }
6038
13.3k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6039
13.3k
    (ctxt->sax->attributeDecl != NULL))
6040
0
    ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6041
0
                          type, def, defaultValue, tree);
6042
13.3k
      else if (tree != NULL)
6043
670
    xmlFreeEnumeration(tree);
6044
6045
13.3k
      if ((ctxt->sax2) && (defaultValue != NULL) &&
6046
13.3k
          (def != XML_ATTRIBUTE_IMPLIED) &&
6047
13.3k
    (def != XML_ATTRIBUTE_REQUIRED)) {
6048
13.0k
    xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6049
13.0k
      }
6050
13.3k
      if (ctxt->sax2) {
6051
13.3k
    xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6052
13.3k
      }
6053
13.3k
      if (defaultValue != NULL)
6054
13.0k
          xmlFree(defaultValue);
6055
13.3k
      GROW;
6056
13.3k
  }
6057
4.13k
  if (RAW == '>') {
6058
1.30k
      if (inputid != ctxt->input->id) {
6059
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6060
0
                               "Attribute list declaration doesn't start and"
6061
0
                               " stop in the same entity\n");
6062
0
      }
6063
1.30k
      NEXT;
6064
1.30k
  }
6065
4.13k
    }
6066
4.35k
}
6067
6068
/**
6069
 * xmlParseElementMixedContentDecl:
6070
 * @ctxt:  an XML parser context
6071
 * @inputchk:  the input used for the current entity, needed for boundary checks
6072
 *
6073
 * DEPRECATED: Internal function, don't use.
6074
 *
6075
 * parse the declaration for a Mixed Element content
6076
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6077
 *
6078
 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6079
 *                '(' S? '#PCDATA' S? ')'
6080
 *
6081
 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6082
 *
6083
 * [ VC: No Duplicate Types ]
6084
 * The same name must not appear more than once in a single
6085
 * mixed-content declaration.
6086
 *
6087
 * returns: the list of the xmlElementContentPtr describing the element choices
6088
 */
6089
xmlElementContentPtr
6090
752
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6091
752
    xmlElementContentPtr ret = NULL, cur = NULL, n;
6092
752
    const xmlChar *elem = NULL;
6093
6094
752
    GROW;
6095
752
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6096
752
  SKIP(7);
6097
752
  SKIP_BLANKS;
6098
752
  if (RAW == ')') {
6099
257
      if (ctxt->input->id != inputchk) {
6100
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6101
0
                               "Element content declaration doesn't start and"
6102
0
                               " stop in the same entity\n");
6103
0
      }
6104
257
      NEXT;
6105
257
      ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6106
257
      if (ret == NULL)
6107
0
          return(NULL);
6108
257
      if (RAW == '*') {
6109
188
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6110
188
    NEXT;
6111
188
      }
6112
257
      return(ret);
6113
257
  }
6114
495
  if ((RAW == '(') || (RAW == '|')) {
6115
449
      ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6116
449
      if (ret == NULL) return(NULL);
6117
449
  }
6118
1.19k
  while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6119
900
      NEXT;
6120
900
      if (elem == NULL) {
6121
448
          ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6122
448
    if (ret == NULL) {
6123
0
        xmlFreeDocElementContent(ctxt->myDoc, cur);
6124
0
                    return(NULL);
6125
0
                }
6126
448
    ret->c1 = cur;
6127
448
    if (cur != NULL)
6128
448
        cur->parent = ret;
6129
448
    cur = ret;
6130
452
      } else {
6131
452
          n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6132
452
    if (n == NULL) {
6133
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6134
0
                    return(NULL);
6135
0
                }
6136
452
    n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6137
452
    if (n->c1 != NULL)
6138
452
        n->c1->parent = n;
6139
452
          cur->c2 = n;
6140
452
    if (n != NULL)
6141
452
        n->parent = cur;
6142
452
    cur = n;
6143
452
      }
6144
900
      SKIP_BLANKS;
6145
900
      elem = xmlParseName(ctxt);
6146
900
      if (elem == NULL) {
6147
204
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6148
204
      "xmlParseElementMixedContentDecl : Name expected\n");
6149
204
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6150
204
    return(NULL);
6151
204
      }
6152
696
      SKIP_BLANKS;
6153
696
      GROW;
6154
696
  }
6155
291
  if ((RAW == ')') && (NXT(1) == '*')) {
6156
236
      if (elem != NULL) {
6157
236
    cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6158
236
                                   XML_ELEMENT_CONTENT_ELEMENT);
6159
236
    if (cur->c2 != NULL)
6160
236
        cur->c2->parent = cur;
6161
236
            }
6162
236
            if (ret != NULL)
6163
236
                ret->ocur = XML_ELEMENT_CONTENT_MULT;
6164
236
      if (ctxt->input->id != inputchk) {
6165
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6166
0
                               "Element content declaration doesn't start and"
6167
0
                               " stop in the same entity\n");
6168
0
      }
6169
236
      SKIP(2);
6170
236
  } else {
6171
55
      xmlFreeDocElementContent(ctxt->myDoc, ret);
6172
55
      xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6173
55
      return(NULL);
6174
55
  }
6175
6176
291
    } else {
6177
0
  xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6178
0
    }
6179
236
    return(ret);
6180
752
}
6181
6182
/**
6183
 * xmlParseElementChildrenContentDeclPriv:
6184
 * @ctxt:  an XML parser context
6185
 * @inputchk:  the input used for the current entity, needed for boundary checks
6186
 * @depth: the level of recursion
6187
 *
6188
 * parse the declaration for a Mixed Element content
6189
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6190
 *
6191
 *
6192
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6193
 *
6194
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6195
 *
6196
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6197
 *
6198
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6199
 *
6200
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6201
 * TODO Parameter-entity replacement text must be properly nested
6202
 *  with parenthesized groups. That is to say, if either of the
6203
 *  opening or closing parentheses in a choice, seq, or Mixed
6204
 *  construct is contained in the replacement text for a parameter
6205
 *  entity, both must be contained in the same replacement text. For
6206
 *  interoperability, if a parameter-entity reference appears in a
6207
 *  choice, seq, or Mixed construct, its replacement text should not
6208
 *  be empty, and neither the first nor last non-blank character of
6209
 *  the replacement text should be a connector (| or ,).
6210
 *
6211
 * Returns the tree of xmlElementContentPtr describing the element
6212
 *          hierarchy.
6213
 */
6214
static xmlElementContentPtr
6215
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6216
46.9k
                                       int depth) {
6217
46.9k
    xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6218
46.9k
    const xmlChar *elem;
6219
46.9k
    xmlChar type = 0;
6220
6221
46.9k
    if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6222
46.9k
        (depth >  2048)) {
6223
2
        xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6224
2
"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6225
2
                          depth);
6226
2
  return(NULL);
6227
2
    }
6228
46.9k
    SKIP_BLANKS;
6229
46.9k
    GROW;
6230
46.9k
    if (RAW == '(') {
6231
41.4k
  int inputid = ctxt->input->id;
6232
6233
        /* Recurse on first child */
6234
41.4k
  NEXT;
6235
41.4k
  SKIP_BLANKS;
6236
41.4k
        cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6237
41.4k
                                                           depth + 1);
6238
41.4k
        if (cur == NULL)
6239
36.7k
            return(NULL);
6240
4.72k
  SKIP_BLANKS;
6241
4.72k
  GROW;
6242
5.45k
    } else {
6243
5.45k
  elem = xmlParseName(ctxt);
6244
5.45k
  if (elem == NULL) {
6245
302
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6246
302
      return(NULL);
6247
302
  }
6248
5.15k
        cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6249
5.15k
  if (cur == NULL) {
6250
0
      xmlErrMemory(ctxt, NULL);
6251
0
      return(NULL);
6252
0
  }
6253
5.15k
  GROW;
6254
5.15k
  if (RAW == '?') {
6255
919
      cur->ocur = XML_ELEMENT_CONTENT_OPT;
6256
919
      NEXT;
6257
4.23k
  } else if (RAW == '*') {
6258
950
      cur->ocur = XML_ELEMENT_CONTENT_MULT;
6259
950
      NEXT;
6260
3.28k
  } else if (RAW == '+') {
6261
600
      cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6262
600
      NEXT;
6263
2.68k
  } else {
6264
2.68k
      cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6265
2.68k
  }
6266
5.15k
  GROW;
6267
5.15k
    }
6268
9.87k
    SKIP_BLANKS;
6269
13.6k
    while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6270
        /*
6271
   * Each loop we parse one separator and one element.
6272
   */
6273
6.75k
        if (RAW == ',') {
6274
1.59k
      if (type == 0) type = CUR;
6275
6276
      /*
6277
       * Detect "Name | Name , Name" error
6278
       */
6279
478
      else if (type != CUR) {
6280
1
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6281
1
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6282
1
                      type);
6283
1
    if ((last != NULL) && (last != ret))
6284
1
        xmlFreeDocElementContent(ctxt->myDoc, last);
6285
1
    if (ret != NULL)
6286
1
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6287
1
    return(NULL);
6288
1
      }
6289
1.59k
      NEXT;
6290
6291
1.59k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6292
1.59k
      if (op == NULL) {
6293
0
    if ((last != NULL) && (last != ret))
6294
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6295
0
          xmlFreeDocElementContent(ctxt->myDoc, ret);
6296
0
    return(NULL);
6297
0
      }
6298
1.59k
      if (last == NULL) {
6299
1.12k
    op->c1 = ret;
6300
1.12k
    if (ret != NULL)
6301
1.12k
        ret->parent = op;
6302
1.12k
    ret = cur = op;
6303
1.12k
      } else {
6304
477
          cur->c2 = op;
6305
477
    if (op != NULL)
6306
477
        op->parent = cur;
6307
477
    op->c1 = last;
6308
477
    if (last != NULL)
6309
477
        last->parent = op;
6310
477
    cur =op;
6311
477
    last = NULL;
6312
477
      }
6313
5.15k
  } else if (RAW == '|') {
6314
4.39k
      if (type == 0) type = CUR;
6315
6316
      /*
6317
       * Detect "Name , Name | Name" error
6318
       */
6319
975
      else if (type != CUR) {
6320
1
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6321
1
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6322
1
          type);
6323
1
    if ((last != NULL) && (last != ret))
6324
1
        xmlFreeDocElementContent(ctxt->myDoc, last);
6325
1
    if (ret != NULL)
6326
1
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6327
1
    return(NULL);
6328
1
      }
6329
4.39k
      NEXT;
6330
6331
4.39k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6332
4.39k
      if (op == NULL) {
6333
0
    if ((last != NULL) && (last != ret))
6334
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6335
0
    if (ret != NULL)
6336
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6337
0
    return(NULL);
6338
0
      }
6339
4.39k
      if (last == NULL) {
6340
3.41k
    op->c1 = ret;
6341
3.41k
    if (ret != NULL)
6342
3.41k
        ret->parent = op;
6343
3.41k
    ret = cur = op;
6344
3.41k
      } else {
6345
974
          cur->c2 = op;
6346
974
    if (op != NULL)
6347
974
        op->parent = cur;
6348
974
    op->c1 = last;
6349
974
    if (last != NULL)
6350
974
        last->parent = op;
6351
974
    cur =op;
6352
974
    last = NULL;
6353
974
      }
6354
4.39k
  } else {
6355
760
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6356
760
      if ((last != NULL) && (last != ret))
6357
502
          xmlFreeDocElementContent(ctxt->myDoc, last);
6358
760
      if (ret != NULL)
6359
760
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6360
760
      return(NULL);
6361
760
  }
6362
5.98k
  GROW;
6363
5.98k
  SKIP_BLANKS;
6364
5.98k
  GROW;
6365
5.98k
  if (RAW == '(') {
6366
3.63k
      int inputid = ctxt->input->id;
6367
      /* Recurse on second child */
6368
3.63k
      NEXT;
6369
3.63k
      SKIP_BLANKS;
6370
3.63k
      last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6371
3.63k
                                                          depth + 1);
6372
3.63k
            if (last == NULL) {
6373
1.69k
    if (ret != NULL)
6374
1.69k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6375
1.69k
    return(NULL);
6376
1.69k
            }
6377
1.93k
      SKIP_BLANKS;
6378
2.35k
  } else {
6379
2.35k
      elem = xmlParseName(ctxt);
6380
2.35k
      if (elem == NULL) {
6381
550
    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6382
550
    if (ret != NULL)
6383
550
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6384
550
    return(NULL);
6385
550
      }
6386
1.80k
      last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6387
1.80k
      if (last == NULL) {
6388
0
    if (ret != NULL)
6389
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6390
0
    return(NULL);
6391
0
      }
6392
1.80k
      if (RAW == '?') {
6393
188
    last->ocur = XML_ELEMENT_CONTENT_OPT;
6394
188
    NEXT;
6395
1.62k
      } else if (RAW == '*') {
6396
80
    last->ocur = XML_ELEMENT_CONTENT_MULT;
6397
80
    NEXT;
6398
1.54k
      } else if (RAW == '+') {
6399
40
    last->ocur = XML_ELEMENT_CONTENT_PLUS;
6400
40
    NEXT;
6401
1.50k
      } else {
6402
1.50k
    last->ocur = XML_ELEMENT_CONTENT_ONCE;
6403
1.50k
      }
6404
1.80k
  }
6405
3.74k
  SKIP_BLANKS;
6406
3.74k
  GROW;
6407
3.74k
    }
6408
6.86k
    if ((cur != NULL) && (last != NULL)) {
6409
1.78k
        cur->c2 = last;
6410
1.78k
  if (last != NULL)
6411
1.78k
      last->parent = cur;
6412
1.78k
    }
6413
6.86k
    if (ctxt->input->id != inputchk) {
6414
0
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6415
0
                       "Element content declaration doesn't start and stop in"
6416
0
                       " the same entity\n");
6417
0
    }
6418
6.86k
    NEXT;
6419
6.86k
    if (RAW == '?') {
6420
1.31k
  if (ret != NULL) {
6421
1.31k
      if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6422
1.31k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6423
792
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6424
518
      else
6425
518
          ret->ocur = XML_ELEMENT_CONTENT_OPT;
6426
1.31k
  }
6427
1.31k
  NEXT;
6428
5.55k
    } else if (RAW == '*') {
6429
1.00k
  if (ret != NULL) {
6430
1.00k
      ret->ocur = XML_ELEMENT_CONTENT_MULT;
6431
1.00k
      cur = ret;
6432
      /*
6433
       * Some normalization:
6434
       * (a | b* | c?)* == (a | b | c)*
6435
       */
6436
11.1k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6437
10.1k
    if ((cur->c1 != NULL) &&
6438
10.1k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6439
10.1k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6440
563
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6441
10.1k
    if ((cur->c2 != NULL) &&
6442
10.1k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6443
10.1k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6444
481
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6445
10.1k
    cur = cur->c2;
6446
10.1k
      }
6447
1.00k
  }
6448
1.00k
  NEXT;
6449
4.54k
    } else if (RAW == '+') {
6450
1.45k
  if (ret != NULL) {
6451
1.45k
      int found = 0;
6452
6453
1.45k
      if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6454
1.45k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6455
674
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6456
782
      else
6457
782
          ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6458
      /*
6459
       * Some normalization:
6460
       * (a | b*)+ == (a | b)*
6461
       * (a | b?)+ == (a | b)*
6462
       */
6463
22.8k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6464
21.4k
    if ((cur->c1 != NULL) &&
6465
21.4k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6466
21.4k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6467
758
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6468
758
        found = 1;
6469
758
    }
6470
21.4k
    if ((cur->c2 != NULL) &&
6471
21.4k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6472
21.4k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6473
644
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6474
644
        found = 1;
6475
644
    }
6476
21.4k
    cur = cur->c2;
6477
21.4k
      }
6478
1.45k
      if (found)
6479
633
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6480
1.45k
  }
6481
1.45k
  NEXT;
6482
1.45k
    }
6483
6.86k
    return(ret);
6484
9.87k
}
6485
6486
/**
6487
 * xmlParseElementChildrenContentDecl:
6488
 * @ctxt:  an XML parser context
6489
 * @inputchk:  the input used for the current entity, needed for boundary checks
6490
 *
6491
 * DEPRECATED: Internal function, don't use.
6492
 *
6493
 * parse the declaration for a Mixed Element content
6494
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6495
 *
6496
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6497
 *
6498
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6499
 *
6500
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6501
 *
6502
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6503
 *
6504
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6505
 * TODO Parameter-entity replacement text must be properly nested
6506
 *  with parenthesized groups. That is to say, if either of the
6507
 *  opening or closing parentheses in a choice, seq, or Mixed
6508
 *  construct is contained in the replacement text for a parameter
6509
 *  entity, both must be contained in the same replacement text. For
6510
 *  interoperability, if a parameter-entity reference appears in a
6511
 *  choice, seq, or Mixed construct, its replacement text should not
6512
 *  be empty, and neither the first nor last non-blank character of
6513
 *  the replacement text should be a connector (| or ,).
6514
 *
6515
 * Returns the tree of xmlElementContentPtr describing the element
6516
 *          hierarchy.
6517
 */
6518
xmlElementContentPtr
6519
0
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6520
    /* stub left for API/ABI compat */
6521
0
    return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6522
0
}
6523
6524
/**
6525
 * xmlParseElementContentDecl:
6526
 * @ctxt:  an XML parser context
6527
 * @name:  the name of the element being defined.
6528
 * @result:  the Element Content pointer will be stored here if any
6529
 *
6530
 * DEPRECATED: Internal function, don't use.
6531
 *
6532
 * parse the declaration for an Element content either Mixed or Children,
6533
 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6534
 *
6535
 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6536
 *
6537
 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6538
 */
6539
6540
int
6541
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6542
2.57k
                           xmlElementContentPtr *result) {
6543
6544
2.57k
    xmlElementContentPtr tree = NULL;
6545
2.57k
    int inputid = ctxt->input->id;
6546
2.57k
    int res;
6547
6548
2.57k
    *result = NULL;
6549
6550
2.57k
    if (RAW != '(') {
6551
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6552
0
    "xmlParseElementContentDecl : %s '(' expected\n", name);
6553
0
  return(-1);
6554
0
    }
6555
2.57k
    NEXT;
6556
2.57k
    GROW;
6557
2.57k
    if (ctxt->instate == XML_PARSER_EOF)
6558
0
        return(-1);
6559
2.57k
    SKIP_BLANKS;
6560
2.57k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6561
752
        tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6562
752
  res = XML_ELEMENT_TYPE_MIXED;
6563
1.82k
    } else {
6564
1.82k
        tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6565
1.82k
  res = XML_ELEMENT_TYPE_ELEMENT;
6566
1.82k
    }
6567
2.57k
    SKIP_BLANKS;
6568
2.57k
    *result = tree;
6569
2.57k
    return(res);
6570
2.57k
}
6571
6572
/**
6573
 * xmlParseElementDecl:
6574
 * @ctxt:  an XML parser context
6575
 *
6576
 * DEPRECATED: Internal function, don't use.
6577
 *
6578
 * Parse an element declaration. Always consumes '<!'.
6579
 *
6580
 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6581
 *
6582
 * [ VC: Unique Element Type Declaration ]
6583
 * No element type may be declared more than once
6584
 *
6585
 * Returns the type of the element, or -1 in case of error
6586
 */
6587
int
6588
3.54k
xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6589
3.54k
    const xmlChar *name;
6590
3.54k
    int ret = -1;
6591
3.54k
    xmlElementContentPtr content  = NULL;
6592
6593
3.54k
    if ((CUR != '<') || (NXT(1) != '!'))
6594
0
        return(ret);
6595
3.54k
    SKIP(2);
6596
6597
    /* GROW; done in the caller */
6598
3.54k
    if (CMP7(CUR_PTR, 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6599
3.52k
  int inputid = ctxt->input->id;
6600
6601
3.52k
  SKIP(7);
6602
3.52k
  if (SKIP_BLANKS == 0) {
6603
587
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6604
587
               "Space required after 'ELEMENT'\n");
6605
587
      return(-1);
6606
587
  }
6607
2.93k
        name = xmlParseName(ctxt);
6608
2.93k
  if (name == NULL) {
6609
82
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6610
82
         "xmlParseElementDecl: no name for Element\n");
6611
82
      return(-1);
6612
82
  }
6613
2.85k
  if (SKIP_BLANKS == 0) {
6614
1.78k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6615
1.78k
         "Space required after the element name\n");
6616
1.78k
  }
6617
2.85k
  if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6618
22
      SKIP(5);
6619
      /*
6620
       * Element must always be empty.
6621
       */
6622
22
      ret = XML_ELEMENT_TYPE_EMPTY;
6623
2.83k
  } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6624
2.83k
             (NXT(2) == 'Y')) {
6625
23
      SKIP(3);
6626
      /*
6627
       * Element is a generic container.
6628
       */
6629
23
      ret = XML_ELEMENT_TYPE_ANY;
6630
2.80k
  } else if (RAW == '(') {
6631
2.57k
      ret = xmlParseElementContentDecl(ctxt, name, &content);
6632
2.57k
  } else {
6633
      /*
6634
       * [ WFC: PEs in Internal Subset ] error handling.
6635
       */
6636
231
      if ((RAW == '%') && (ctxt->external == 0) &&
6637
231
          (ctxt->inputNr == 1)) {
6638
34
    xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6639
34
    "PEReference: forbidden within markup decl in internal subset\n");
6640
197
      } else {
6641
197
    xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6642
197
          "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6643
197
            }
6644
231
      return(-1);
6645
231
  }
6646
6647
2.62k
  SKIP_BLANKS;
6648
6649
2.62k
  if (RAW != '>') {
6650
1.84k
      xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6651
1.84k
      if (content != NULL) {
6652
168
    xmlFreeDocElementContent(ctxt->myDoc, content);
6653
168
      }
6654
1.84k
  } else {
6655
779
      if (inputid != ctxt->input->id) {
6656
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6657
0
                               "Element declaration doesn't start and stop in"
6658
0
                               " the same entity\n");
6659
0
      }
6660
6661
779
      NEXT;
6662
779
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6663
779
    (ctxt->sax->elementDecl != NULL)) {
6664
0
    if (content != NULL)
6665
0
        content->parent = NULL;
6666
0
          ctxt->sax->elementDecl(ctxt->userData, name, ret,
6667
0
                           content);
6668
0
    if ((content != NULL) && (content->parent == NULL)) {
6669
        /*
6670
         * this is a trick: if xmlAddElementDecl is called,
6671
         * instead of copying the full tree it is plugged directly
6672
         * if called from the parser. Avoid duplicating the
6673
         * interfaces or change the API/ABI
6674
         */
6675
0
        xmlFreeDocElementContent(ctxt->myDoc, content);
6676
0
    }
6677
779
      } else if (content != NULL) {
6678
533
    xmlFreeDocElementContent(ctxt->myDoc, content);
6679
533
      }
6680
779
  }
6681
2.62k
    }
6682
2.64k
    return(ret);
6683
3.54k
}
6684
6685
/**
6686
 * xmlParseConditionalSections
6687
 * @ctxt:  an XML parser context
6688
 *
6689
 * Parse a conditional section. Always consumes '<!['.
6690
 *
6691
 * [61] conditionalSect ::= includeSect | ignoreSect
6692
 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6693
 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6694
 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6695
 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6696
 */
6697
6698
static void
6699
0
xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6700
0
    int *inputIds = NULL;
6701
0
    size_t inputIdsSize = 0;
6702
0
    size_t depth = 0;
6703
6704
0
    while (ctxt->instate != XML_PARSER_EOF) {
6705
0
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6706
0
            int id = ctxt->input->id;
6707
6708
0
            SKIP(3);
6709
0
            SKIP_BLANKS;
6710
6711
0
            if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6712
0
                SKIP(7);
6713
0
                SKIP_BLANKS;
6714
0
                if (RAW != '[') {
6715
0
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6716
0
                    xmlHaltParser(ctxt);
6717
0
                    goto error;
6718
0
                }
6719
0
                if (ctxt->input->id != id) {
6720
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6721
0
                                   "All markup of the conditional section is"
6722
0
                                   " not in the same entity\n");
6723
0
                }
6724
0
                NEXT;
6725
6726
0
                if (inputIdsSize <= depth) {
6727
0
                    int *tmp;
6728
6729
0
                    inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2);
6730
0
                    tmp = (int *) xmlRealloc(inputIds,
6731
0
                            inputIdsSize * sizeof(int));
6732
0
                    if (tmp == NULL) {
6733
0
                        xmlErrMemory(ctxt, NULL);
6734
0
                        goto error;
6735
0
                    }
6736
0
                    inputIds = tmp;
6737
0
                }
6738
0
                inputIds[depth] = id;
6739
0
                depth++;
6740
0
            } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6741
0
                size_t ignoreDepth = 0;
6742
6743
0
                SKIP(6);
6744
0
                SKIP_BLANKS;
6745
0
                if (RAW != '[') {
6746
0
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6747
0
                    xmlHaltParser(ctxt);
6748
0
                    goto error;
6749
0
                }
6750
0
                if (ctxt->input->id != id) {
6751
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6752
0
                                   "All markup of the conditional section is"
6753
0
                                   " not in the same entity\n");
6754
0
                }
6755
0
                NEXT;
6756
6757
0
                while (RAW != 0) {
6758
0
                    if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6759
0
                        SKIP(3);
6760
0
                        ignoreDepth++;
6761
                        /* Check for integer overflow */
6762
0
                        if (ignoreDepth == 0) {
6763
0
                            xmlErrMemory(ctxt, NULL);
6764
0
                            goto error;
6765
0
                        }
6766
0
                    } else if ((RAW == ']') && (NXT(1) == ']') &&
6767
0
                               (NXT(2) == '>')) {
6768
0
                        if (ignoreDepth == 0)
6769
0
                            break;
6770
0
                        SKIP(3);
6771
0
                        ignoreDepth--;
6772
0
                    } else {
6773
0
                        NEXT;
6774
0
                    }
6775
0
                }
6776
6777
0
    if (RAW == 0) {
6778
0
        xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6779
0
                    goto error;
6780
0
    }
6781
0
                if (ctxt->input->id != id) {
6782
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6783
0
                                   "All markup of the conditional section is"
6784
0
                                   " not in the same entity\n");
6785
0
                }
6786
0
                SKIP(3);
6787
0
            } else {
6788
0
                xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6789
0
                xmlHaltParser(ctxt);
6790
0
                goto error;
6791
0
            }
6792
0
        } else if ((depth > 0) &&
6793
0
                   (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6794
0
            depth--;
6795
0
            if (ctxt->input->id != inputIds[depth]) {
6796
0
                xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6797
0
                               "All markup of the conditional section is not"
6798
0
                               " in the same entity\n");
6799
0
            }
6800
0
            SKIP(3);
6801
0
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
6802
0
            xmlParseMarkupDecl(ctxt);
6803
0
        } else {
6804
0
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6805
0
            xmlHaltParser(ctxt);
6806
0
            goto error;
6807
0
        }
6808
6809
0
        if (depth == 0)
6810
0
            break;
6811
6812
0
        SKIP_BLANKS;
6813
0
        SHRINK;
6814
0
        GROW;
6815
0
    }
6816
6817
0
error:
6818
0
    xmlFree(inputIds);
6819
0
}
6820
6821
/**
6822
 * xmlParseMarkupDecl:
6823
 * @ctxt:  an XML parser context
6824
 *
6825
 * DEPRECATED: Internal function, don't use.
6826
 *
6827
 * Parse markup declarations. Always consumes '<!' or '<?'.
6828
 *
6829
 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6830
 *                     NotationDecl | PI | Comment
6831
 *
6832
 * [ VC: Proper Declaration/PE Nesting ]
6833
 * Parameter-entity replacement text must be properly nested with
6834
 * markup declarations. That is to say, if either the first character
6835
 * or the last character of a markup declaration (markupdecl above) is
6836
 * contained in the replacement text for a parameter-entity reference,
6837
 * both must be contained in the same replacement text.
6838
 *
6839
 * [ WFC: PEs in Internal Subset ]
6840
 * In the internal DTD subset, parameter-entity references can occur
6841
 * only where markup declarations can occur, not within markup declarations.
6842
 * (This does not apply to references that occur in external parameter
6843
 * entities or to the external subset.)
6844
 */
6845
void
6846
27.9k
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6847
27.9k
    GROW;
6848
27.9k
    if (CUR == '<') {
6849
27.9k
        if (NXT(1) == '!') {
6850
25.7k
      switch (NXT(2)) {
6851
16.9k
          case 'E':
6852
16.9k
        if (NXT(3) == 'L')
6853
3.54k
      xmlParseElementDecl(ctxt);
6854
13.3k
        else if (NXT(3) == 'N')
6855
13.3k
      xmlParseEntityDecl(ctxt);
6856
18
                    else
6857
18
                        SKIP(2);
6858
16.9k
        break;
6859
4.35k
          case 'A':
6860
4.35k
        xmlParseAttributeListDecl(ctxt);
6861
4.35k
        break;
6862
2.19k
          case 'N':
6863
2.19k
        xmlParseNotationDecl(ctxt);
6864
2.19k
        break;
6865
1.70k
          case '-':
6866
1.70k
        xmlParseComment(ctxt);
6867
1.70k
        break;
6868
576
    default:
6869
        /* there is an error but it will be detected later */
6870
576
                    SKIP(2);
6871
576
        break;
6872
25.7k
      }
6873
25.7k
  } else if (NXT(1) == '?') {
6874
2.23k
      xmlParsePI(ctxt);
6875
2.23k
  }
6876
27.9k
    }
6877
6878
    /*
6879
     * detect requirement to exit there and act accordingly
6880
     * and avoid having instate overridden later on
6881
     */
6882
27.9k
    if (ctxt->instate == XML_PARSER_EOF)
6883
728
        return;
6884
6885
27.2k
    ctxt->instate = XML_PARSER_DTD;
6886
27.2k
}
6887
6888
/**
6889
 * xmlParseTextDecl:
6890
 * @ctxt:  an XML parser context
6891
 *
6892
 * DEPRECATED: Internal function, don't use.
6893
 *
6894
 * parse an XML declaration header for external entities
6895
 *
6896
 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6897
 */
6898
6899
void
6900
0
xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6901
0
    xmlChar *version;
6902
0
    const xmlChar *encoding;
6903
0
    int oldstate;
6904
6905
    /*
6906
     * We know that '<?xml' is here.
6907
     */
6908
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6909
0
  SKIP(5);
6910
0
    } else {
6911
0
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6912
0
  return;
6913
0
    }
6914
6915
    /* Avoid expansion of parameter entities when skipping blanks. */
6916
0
    oldstate = ctxt->instate;
6917
0
    ctxt->instate = XML_PARSER_START;
6918
6919
0
    if (SKIP_BLANKS == 0) {
6920
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6921
0
           "Space needed after '<?xml'\n");
6922
0
    }
6923
6924
    /*
6925
     * We may have the VersionInfo here.
6926
     */
6927
0
    version = xmlParseVersionInfo(ctxt);
6928
0
    if (version == NULL)
6929
0
  version = xmlCharStrdup(XML_DEFAULT_VERSION);
6930
0
    else {
6931
0
  if (SKIP_BLANKS == 0) {
6932
0
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6933
0
               "Space needed here\n");
6934
0
  }
6935
0
    }
6936
0
    ctxt->input->version = version;
6937
6938
    /*
6939
     * We must have the encoding declaration
6940
     */
6941
0
    encoding = xmlParseEncodingDecl(ctxt);
6942
0
    if (ctxt->instate == XML_PARSER_EOF)
6943
0
        return;
6944
0
    if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6945
  /*
6946
   * The XML REC instructs us to stop parsing right here
6947
   */
6948
0
        ctxt->instate = oldstate;
6949
0
        return;
6950
0
    }
6951
0
    if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6952
0
  xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6953
0
           "Missing encoding in text declaration\n");
6954
0
    }
6955
6956
0
    SKIP_BLANKS;
6957
0
    if ((RAW == '?') && (NXT(1) == '>')) {
6958
0
        SKIP(2);
6959
0
    } else if (RAW == '>') {
6960
        /* Deprecated old WD ... */
6961
0
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6962
0
  NEXT;
6963
0
    } else {
6964
0
        int c;
6965
6966
0
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6967
0
        while ((c = CUR) != 0) {
6968
0
            NEXT;
6969
0
            if (c == '>')
6970
0
                break;
6971
0
        }
6972
0
    }
6973
6974
0
    ctxt->instate = oldstate;
6975
0
}
6976
6977
/**
6978
 * xmlParseExternalSubset:
6979
 * @ctxt:  an XML parser context
6980
 * @ExternalID: the external identifier
6981
 * @SystemID: the system identifier (or URL)
6982
 *
6983
 * parse Markup declarations from an external subset
6984
 *
6985
 * [30] extSubset ::= textDecl? extSubsetDecl
6986
 *
6987
 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6988
 */
6989
void
6990
xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6991
0
                       const xmlChar *SystemID) {
6992
0
    xmlDetectSAX2(ctxt);
6993
0
    GROW;
6994
6995
0
    if ((ctxt->encoding == NULL) &&
6996
0
        (ctxt->input->end - ctxt->input->cur >= 4)) {
6997
0
        xmlChar start[4];
6998
0
  xmlCharEncoding enc;
6999
7000
0
  start[0] = RAW;
7001
0
  start[1] = NXT(1);
7002
0
  start[2] = NXT(2);
7003
0
  start[3] = NXT(3);
7004
0
  enc = xmlDetectCharEncoding(start, 4);
7005
0
  if (enc != XML_CHAR_ENCODING_NONE)
7006
0
      xmlSwitchEncoding(ctxt, enc);
7007
0
    }
7008
7009
0
    if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7010
0
  xmlParseTextDecl(ctxt);
7011
0
  if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7012
      /*
7013
       * The XML REC instructs us to stop parsing right here
7014
       */
7015
0
      xmlHaltParser(ctxt);
7016
0
      return;
7017
0
  }
7018
0
    }
7019
0
    if (ctxt->myDoc == NULL) {
7020
0
        ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7021
0
  if (ctxt->myDoc == NULL) {
7022
0
      xmlErrMemory(ctxt, "New Doc failed");
7023
0
      return;
7024
0
  }
7025
0
  ctxt->myDoc->properties = XML_DOC_INTERNAL;
7026
0
    }
7027
0
    if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7028
0
        xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7029
7030
0
    ctxt->instate = XML_PARSER_DTD;
7031
0
    ctxt->external = 1;
7032
0
    SKIP_BLANKS;
7033
0
    while ((ctxt->instate != XML_PARSER_EOF) && (RAW != 0)) {
7034
0
  GROW;
7035
0
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7036
0
            xmlParseConditionalSections(ctxt);
7037
0
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7038
0
            xmlParseMarkupDecl(ctxt);
7039
0
        } else {
7040
0
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7041
0
            xmlHaltParser(ctxt);
7042
0
            return;
7043
0
        }
7044
0
        SKIP_BLANKS;
7045
0
        SHRINK;
7046
0
    }
7047
7048
0
    if (RAW != 0) {
7049
0
  xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7050
0
    }
7051
7052
0
}
7053
7054
/**
7055
 * xmlParseReference:
7056
 * @ctxt:  an XML parser context
7057
 *
7058
 * DEPRECATED: Internal function, don't use.
7059
 *
7060
 * parse and handle entity references in content, depending on the SAX
7061
 * interface, this may end-up in a call to character() if this is a
7062
 * CharRef, a predefined entity, if there is no reference() callback.
7063
 * or if the parser was asked to switch to that mode.
7064
 *
7065
 * Always consumes '&'.
7066
 *
7067
 * [67] Reference ::= EntityRef | CharRef
7068
 */
7069
void
7070
13.2k
xmlParseReference(xmlParserCtxtPtr ctxt) {
7071
13.2k
    xmlEntityPtr ent;
7072
13.2k
    xmlChar *val;
7073
13.2k
    int was_checked;
7074
13.2k
    xmlNodePtr list = NULL;
7075
13.2k
    xmlParserErrors ret = XML_ERR_OK;
7076
7077
7078
13.2k
    if (RAW != '&')
7079
0
        return;
7080
7081
    /*
7082
     * Simple case of a CharRef
7083
     */
7084
13.2k
    if (NXT(1) == '#') {
7085
521
  int i = 0;
7086
521
  xmlChar out[16];
7087
521
  int hex = NXT(2);
7088
521
  int value = xmlParseCharRef(ctxt);
7089
7090
521
  if (value == 0)
7091
83
      return;
7092
438
  if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7093
      /*
7094
       * So we are using non-UTF-8 buffers
7095
       * Check that the char fit on 8bits, if not
7096
       * generate a CharRef.
7097
       */
7098
0
      if (value <= 0xFF) {
7099
0
    out[0] = value;
7100
0
    out[1] = 0;
7101
0
    if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7102
0
        (!ctxt->disableSAX))
7103
0
        ctxt->sax->characters(ctxt->userData, out, 1);
7104
0
      } else {
7105
0
    if ((hex == 'x') || (hex == 'X'))
7106
0
        snprintf((char *)out, sizeof(out), "#x%X", value);
7107
0
    else
7108
0
        snprintf((char *)out, sizeof(out), "#%d", value);
7109
0
    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7110
0
        (!ctxt->disableSAX))
7111
0
        ctxt->sax->reference(ctxt->userData, out);
7112
0
      }
7113
438
  } else {
7114
      /*
7115
       * Just encode the value in UTF-8
7116
       */
7117
438
      COPY_BUF(0 ,out, i, value);
7118
438
      out[i] = 0;
7119
438
      if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7120
438
    (!ctxt->disableSAX))
7121
438
    ctxt->sax->characters(ctxt->userData, out, i);
7122
438
  }
7123
438
  return;
7124
521
    }
7125
7126
    /*
7127
     * We are seeing an entity reference
7128
     */
7129
12.7k
    ent = xmlParseEntityRef(ctxt);
7130
12.7k
    if (ent == NULL) return;
7131
12.6k
    if (!ctxt->wellFormed)
7132
2
  return;
7133
12.6k
    was_checked = ent->flags & XML_ENT_PARSED;
7134
7135
    /* special case of predefined entities */
7136
12.6k
    if ((ent->name == NULL) ||
7137
12.6k
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7138
12.6k
  val = ent->content;
7139
12.6k
  if (val == NULL) return;
7140
  /*
7141
   * inline the entity.
7142
   */
7143
11.4k
  if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7144
11.4k
      (!ctxt->disableSAX))
7145
11.4k
      ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7146
11.4k
  return;
7147
12.6k
    }
7148
7149
    /*
7150
     * The first reference to the entity trigger a parsing phase
7151
     * where the ent->children is filled with the result from
7152
     * the parsing.
7153
     * Note: external parsed entities will not be loaded, it is not
7154
     * required for a non-validating parser, unless the parsing option
7155
     * of validating, or substituting entities were given. Doing so is
7156
     * far more secure as the parser will only process data coming from
7157
     * the document entity by default.
7158
     */
7159
0
    if (((ent->flags & XML_ENT_PARSED) == 0) &&
7160
0
        ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7161
0
         (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7162
0
  unsigned long oldsizeentcopy = ctxt->sizeentcopy;
7163
7164
  /*
7165
   * This is a bit hackish but this seems the best
7166
   * way to make sure both SAX and DOM entity support
7167
   * behaves okay.
7168
   */
7169
0
  void *user_data;
7170
0
  if (ctxt->userData == ctxt)
7171
0
      user_data = NULL;
7172
0
  else
7173
0
      user_data = ctxt->userData;
7174
7175
        /* Avoid overflow as much as possible */
7176
0
        ctxt->sizeentcopy = 0;
7177
7178
0
        if (ent->flags & XML_ENT_EXPANDING) {
7179
0
            xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7180
0
            xmlHaltParser(ctxt);
7181
0
            return;
7182
0
        }
7183
7184
0
        ent->flags |= XML_ENT_EXPANDING;
7185
7186
  /*
7187
   * Check that this entity is well formed
7188
   * 4.3.2: An internal general parsed entity is well-formed
7189
   * if its replacement text matches the production labeled
7190
   * content.
7191
   */
7192
0
  if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7193
0
      ctxt->depth++;
7194
0
      ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7195
0
                                                user_data, &list);
7196
0
      ctxt->depth--;
7197
7198
0
  } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7199
0
      ctxt->depth++;
7200
0
      ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7201
0
                                     user_data, ctxt->depth, ent->URI,
7202
0
             ent->ExternalID, &list);
7203
0
      ctxt->depth--;
7204
0
  } else {
7205
0
      ret = XML_ERR_ENTITY_PE_INTERNAL;
7206
0
      xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7207
0
       "invalid entity type found\n", NULL);
7208
0
  }
7209
7210
0
        ent->flags &= ~XML_ENT_EXPANDING;
7211
0
        ent->flags |= XML_ENT_PARSED | XML_ENT_CHECKED;
7212
0
        ent->expandedSize = ctxt->sizeentcopy;
7213
0
  if (ret == XML_ERR_ENTITY_LOOP) {
7214
0
            xmlHaltParser(ctxt);
7215
0
      xmlFreeNodeList(list);
7216
0
      return;
7217
0
  }
7218
0
  if (xmlParserEntityCheck(ctxt, oldsizeentcopy)) {
7219
0
      xmlFreeNodeList(list);
7220
0
      return;
7221
0
  }
7222
7223
0
  if ((ret == XML_ERR_OK) && (list != NULL)) {
7224
0
            ent->children = list;
7225
            /*
7226
             * Prune it directly in the generated document
7227
             * except for single text nodes.
7228
             */
7229
0
            if ((ctxt->replaceEntities == 0) ||
7230
0
                (ctxt->parseMode == XML_PARSE_READER) ||
7231
0
                ((list->type == XML_TEXT_NODE) &&
7232
0
                 (list->next == NULL))) {
7233
0
                ent->owner = 1;
7234
0
                while (list != NULL) {
7235
0
                    list->parent = (xmlNodePtr) ent;
7236
0
                    if (list->doc != ent->doc)
7237
0
                        xmlSetTreeDoc(list, ent->doc);
7238
0
                    if (list->next == NULL)
7239
0
                        ent->last = list;
7240
0
                    list = list->next;
7241
0
                }
7242
0
                list = NULL;
7243
0
            } else {
7244
0
                ent->owner = 0;
7245
0
                while (list != NULL) {
7246
0
                    list->parent = (xmlNodePtr) ctxt->node;
7247
0
                    list->doc = ctxt->myDoc;
7248
0
                    if (list->next == NULL)
7249
0
                        ent->last = list;
7250
0
                    list = list->next;
7251
0
                }
7252
0
                list = ent->children;
7253
#ifdef LIBXML_LEGACY_ENABLED
7254
                if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7255
                    xmlAddEntityReference(ent, list, NULL);
7256
#endif /* LIBXML_LEGACY_ENABLED */
7257
0
            }
7258
0
  } else if ((ret != XML_ERR_OK) &&
7259
0
       (ret != XML_WAR_UNDECLARED_ENTITY)) {
7260
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7261
0
         "Entity '%s' failed to parse\n", ent->name);
7262
0
            if (ent->content != NULL)
7263
0
                ent->content[0] = 0;
7264
0
  } else if (list != NULL) {
7265
0
      xmlFreeNodeList(list);
7266
0
      list = NULL;
7267
0
  }
7268
7269
        /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7270
0
        was_checked = 0;
7271
0
    }
7272
7273
    /*
7274
     * Now that the entity content has been gathered
7275
     * provide it to the application, this can take different forms based
7276
     * on the parsing modes.
7277
     */
7278
0
    if (ent->children == NULL) {
7279
  /*
7280
   * Probably running in SAX mode and the callbacks don't
7281
   * build the entity content. So unless we already went
7282
   * though parsing for first checking go though the entity
7283
   * content to generate callbacks associated to the entity
7284
   */
7285
0
  if (was_checked != 0) {
7286
0
      void *user_data;
7287
      /*
7288
       * This is a bit hackish but this seems the best
7289
       * way to make sure both SAX and DOM entity support
7290
       * behaves okay.
7291
       */
7292
0
      if (ctxt->userData == ctxt)
7293
0
    user_data = NULL;
7294
0
      else
7295
0
    user_data = ctxt->userData;
7296
7297
0
      if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7298
0
    ctxt->depth++;
7299
0
    ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7300
0
           ent->content, user_data, NULL);
7301
0
    ctxt->depth--;
7302
0
      } else if (ent->etype ==
7303
0
           XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7304
0
          unsigned long oldsizeentities = ctxt->sizeentities;
7305
7306
0
    ctxt->depth++;
7307
0
    ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7308
0
         ctxt->sax, user_data, ctxt->depth,
7309
0
         ent->URI, ent->ExternalID, NULL);
7310
0
    ctxt->depth--;
7311
7312
                /* Undo the change to sizeentities */
7313
0
                ctxt->sizeentities = oldsizeentities;
7314
0
      } else {
7315
0
    ret = XML_ERR_ENTITY_PE_INTERNAL;
7316
0
    xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7317
0
           "invalid entity type found\n", NULL);
7318
0
      }
7319
0
      if (ret == XML_ERR_ENTITY_LOOP) {
7320
0
    xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7321
0
    return;
7322
0
      }
7323
0
            if (xmlParserEntityCheck(ctxt, 0))
7324
0
                return;
7325
0
  }
7326
0
  if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7327
0
      (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7328
      /*
7329
       * Entity reference callback comes second, it's somewhat
7330
       * superfluous but a compatibility to historical behaviour
7331
       */
7332
0
      ctxt->sax->reference(ctxt->userData, ent->name);
7333
0
  }
7334
0
  return;
7335
0
    }
7336
7337
    /*
7338
     * We also check for amplification if entities aren't substituted.
7339
     * They might be expanded later.
7340
     */
7341
0
    if ((was_checked != 0) &&
7342
0
        (xmlParserEntityCheck(ctxt, ent->expandedSize)))
7343
0
        return;
7344
7345
    /*
7346
     * If we didn't get any children for the entity being built
7347
     */
7348
0
    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7349
0
  (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7350
  /*
7351
   * Create a node.
7352
   */
7353
0
  ctxt->sax->reference(ctxt->userData, ent->name);
7354
0
  return;
7355
0
    }
7356
7357
0
    if (ctxt->replaceEntities)  {
7358
  /*
7359
   * There is a problem on the handling of _private for entities
7360
   * (bug 155816): Should we copy the content of the field from
7361
   * the entity (possibly overwriting some value set by the user
7362
   * when a copy is created), should we leave it alone, or should
7363
   * we try to take care of different situations?  The problem
7364
   * is exacerbated by the usage of this field by the xmlReader.
7365
   * To fix this bug, we look at _private on the created node
7366
   * and, if it's NULL, we copy in whatever was in the entity.
7367
   * If it's not NULL we leave it alone.  This is somewhat of a
7368
   * hack - maybe we should have further tests to determine
7369
   * what to do.
7370
   */
7371
0
  if (ctxt->node != NULL) {
7372
      /*
7373
       * Seems we are generating the DOM content, do
7374
       * a simple tree copy for all references except the first
7375
       * In the first occurrence list contains the replacement.
7376
       */
7377
0
      if (((list == NULL) && (ent->owner == 0)) ||
7378
0
    (ctxt->parseMode == XML_PARSE_READER)) {
7379
0
    xmlNodePtr nw = NULL, cur, firstChild = NULL;
7380
7381
    /*
7382
     * when operating on a reader, the entities definitions
7383
     * are always owning the entities subtree.
7384
    if (ctxt->parseMode == XML_PARSE_READER)
7385
        ent->owner = 1;
7386
     */
7387
7388
0
    cur = ent->children;
7389
0
    while (cur != NULL) {
7390
0
        nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7391
0
        if (nw != NULL) {
7392
0
      if (nw->_private == NULL)
7393
0
          nw->_private = cur->_private;
7394
0
      if (firstChild == NULL){
7395
0
          firstChild = nw;
7396
0
      }
7397
0
      nw = xmlAddChild(ctxt->node, nw);
7398
0
        }
7399
0
        if (cur == ent->last) {
7400
      /*
7401
       * needed to detect some strange empty
7402
       * node cases in the reader tests
7403
       */
7404
0
      if ((ctxt->parseMode == XML_PARSE_READER) &&
7405
0
          (nw != NULL) &&
7406
0
          (nw->type == XML_ELEMENT_NODE) &&
7407
0
          (nw->children == NULL))
7408
0
          nw->extra = 1;
7409
7410
0
      break;
7411
0
        }
7412
0
        cur = cur->next;
7413
0
    }
7414
#ifdef LIBXML_LEGACY_ENABLED
7415
    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7416
      xmlAddEntityReference(ent, firstChild, nw);
7417
#endif /* LIBXML_LEGACY_ENABLED */
7418
0
      } else if ((list == NULL) || (ctxt->inputNr > 0)) {
7419
0
    xmlNodePtr nw = NULL, cur, next, last,
7420
0
         firstChild = NULL;
7421
7422
    /*
7423
     * Copy the entity child list and make it the new
7424
     * entity child list. The goal is to make sure any
7425
     * ID or REF referenced will be the one from the
7426
     * document content and not the entity copy.
7427
     */
7428
0
    cur = ent->children;
7429
0
    ent->children = NULL;
7430
0
    last = ent->last;
7431
0
    ent->last = NULL;
7432
0
    while (cur != NULL) {
7433
0
        next = cur->next;
7434
0
        cur->next = NULL;
7435
0
        cur->parent = NULL;
7436
0
        nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7437
0
        if (nw != NULL) {
7438
0
      if (nw->_private == NULL)
7439
0
          nw->_private = cur->_private;
7440
0
      if (firstChild == NULL){
7441
0
          firstChild = cur;
7442
0
      }
7443
0
      xmlAddChild((xmlNodePtr) ent, nw);
7444
0
        }
7445
0
        xmlAddChild(ctxt->node, cur);
7446
0
        if (cur == last)
7447
0
      break;
7448
0
        cur = next;
7449
0
    }
7450
0
    if (ent->owner == 0)
7451
0
        ent->owner = 1;
7452
#ifdef LIBXML_LEGACY_ENABLED
7453
    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7454
      xmlAddEntityReference(ent, firstChild, nw);
7455
#endif /* LIBXML_LEGACY_ENABLED */
7456
0
      } else {
7457
0
    const xmlChar *nbktext;
7458
7459
    /*
7460
     * the name change is to avoid coalescing of the
7461
     * node with a possible previous text one which
7462
     * would make ent->children a dangling pointer
7463
     */
7464
0
    nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7465
0
          -1);
7466
0
    if (ent->children->type == XML_TEXT_NODE)
7467
0
        ent->children->name = nbktext;
7468
0
    if ((ent->last != ent->children) &&
7469
0
        (ent->last->type == XML_TEXT_NODE))
7470
0
        ent->last->name = nbktext;
7471
0
    xmlAddChildList(ctxt->node, ent->children);
7472
0
      }
7473
7474
      /*
7475
       * This is to avoid a nasty side effect, see
7476
       * characters() in SAX.c
7477
       */
7478
0
      ctxt->nodemem = 0;
7479
0
      ctxt->nodelen = 0;
7480
0
      return;
7481
0
  }
7482
0
    }
7483
0
}
7484
7485
/**
7486
 * xmlParseEntityRef:
7487
 * @ctxt:  an XML parser context
7488
 *
7489
 * DEPRECATED: Internal function, don't use.
7490
 *
7491
 * Parse an entitiy reference. Always consumes '&'.
7492
 *
7493
 * [68] EntityRef ::= '&' Name ';'
7494
 *
7495
 * [ WFC: Entity Declared ]
7496
 * In a document without any DTD, a document with only an internal DTD
7497
 * subset which contains no parameter entity references, or a document
7498
 * with "standalone='yes'", the Name given in the entity reference
7499
 * must match that in an entity declaration, except that well-formed
7500
 * documents need not declare any of the following entities: amp, lt,
7501
 * gt, apos, quot.  The declaration of a parameter entity must precede
7502
 * any reference to it.  Similarly, the declaration of a general entity
7503
 * must precede any reference to it which appears in a default value in an
7504
 * attribute-list declaration. Note that if entities are declared in the
7505
 * external subset or in external parameter entities, a non-validating
7506
 * processor is not obligated to read and process their declarations;
7507
 * for such documents, the rule that an entity must be declared is a
7508
 * well-formedness constraint only if standalone='yes'.
7509
 *
7510
 * [ WFC: Parsed Entity ]
7511
 * An entity reference must not contain the name of an unparsed entity
7512
 *
7513
 * Returns the xmlEntityPtr if found, or NULL otherwise.
7514
 */
7515
xmlEntityPtr
7516
184k
xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7517
184k
    const xmlChar *name;
7518
184k
    xmlEntityPtr ent = NULL;
7519
7520
184k
    GROW;
7521
184k
    if (ctxt->instate == XML_PARSER_EOF)
7522
0
        return(NULL);
7523
7524
184k
    if (RAW != '&')
7525
0
        return(NULL);
7526
184k
    NEXT;
7527
184k
    name = xmlParseName(ctxt);
7528
184k
    if (name == NULL) {
7529
74.8k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7530
74.8k
           "xmlParseEntityRef: no name\n");
7531
74.8k
        return(NULL);
7532
74.8k
    }
7533
109k
    if (RAW != ';') {
7534
53.8k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7535
53.8k
  return(NULL);
7536
53.8k
    }
7537
55.8k
    NEXT;
7538
7539
    /*
7540
     * Predefined entities override any extra definition
7541
     */
7542
55.8k
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7543
55.8k
        ent = xmlGetPredefinedEntity(name);
7544
55.8k
        if (ent != NULL)
7545
41.7k
            return(ent);
7546
55.8k
    }
7547
7548
    /*
7549
     * Ask first SAX for entity resolution, otherwise try the
7550
     * entities which may have stored in the parser context.
7551
     */
7552
14.1k
    if (ctxt->sax != NULL) {
7553
14.1k
  if (ctxt->sax->getEntity != NULL)
7554
14.1k
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7555
14.1k
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7556
14.1k
      (ctxt->options & XML_PARSE_OLDSAX))
7557
0
      ent = xmlGetPredefinedEntity(name);
7558
14.1k
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7559
14.1k
      (ctxt->userData==ctxt)) {
7560
0
      ent = xmlSAX2GetEntity(ctxt, name);
7561
0
  }
7562
14.1k
    }
7563
14.1k
    if (ctxt->instate == XML_PARSER_EOF)
7564
0
  return(NULL);
7565
    /*
7566
     * [ WFC: Entity Declared ]
7567
     * In a document without any DTD, a document with only an
7568
     * internal DTD subset which contains no parameter entity
7569
     * references, or a document with "standalone='yes'", the
7570
     * Name given in the entity reference must match that in an
7571
     * entity declaration, except that well-formed documents
7572
     * need not declare any of the following entities: amp, lt,
7573
     * gt, apos, quot.
7574
     * The declaration of a parameter entity must precede any
7575
     * reference to it.
7576
     * Similarly, the declaration of a general entity must
7577
     * precede any reference to it which appears in a default
7578
     * value in an attribute-list declaration. Note that if
7579
     * entities are declared in the external subset or in
7580
     * external parameter entities, a non-validating processor
7581
     * is not obligated to read and process their declarations;
7582
     * for such documents, the rule that an entity must be
7583
     * declared is a well-formedness constraint only if
7584
     * standalone='yes'.
7585
     */
7586
14.1k
    if (ent == NULL) {
7587
0
  if ((ctxt->standalone == 1) ||
7588
0
      ((ctxt->hasExternalSubset == 0) &&
7589
0
       (ctxt->hasPErefs == 0))) {
7590
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7591
0
         "Entity '%s' not defined\n", name);
7592
0
  } else {
7593
0
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7594
0
         "Entity '%s' not defined\n", name);
7595
0
      if ((ctxt->inSubset == 0) &&
7596
0
    (ctxt->sax != NULL) &&
7597
0
    (ctxt->sax->reference != NULL)) {
7598
0
    ctxt->sax->reference(ctxt->userData, name);
7599
0
      }
7600
0
  }
7601
0
  ctxt->valid = 0;
7602
0
    }
7603
7604
    /*
7605
     * [ WFC: Parsed Entity ]
7606
     * An entity reference must not contain the name of an
7607
     * unparsed entity
7608
     */
7609
14.1k
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7610
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7611
0
     "Entity reference to unparsed entity %s\n", name);
7612
0
    }
7613
7614
    /*
7615
     * [ WFC: No External Entity References ]
7616
     * Attribute values cannot contain direct or indirect
7617
     * entity references to external entities.
7618
     */
7619
14.1k
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7620
14.1k
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7621
12.8k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7622
12.8k
       "Attribute references external entity '%s'\n", name);
7623
12.8k
    }
7624
    /*
7625
     * [ WFC: No < in Attribute Values ]
7626
     * The replacement text of any entity referred to directly or
7627
     * indirectly in an attribute value (other than "&lt;") must
7628
     * not contain a <.
7629
     */
7630
1.22k
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7631
1.22k
       (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7632
0
  if ((ent->flags & XML_ENT_CHECKED_LT) == 0) {
7633
0
            if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7634
0
                ent->flags |= XML_ENT_CONTAINS_LT;
7635
0
            ent->flags |= XML_ENT_CHECKED_LT;
7636
0
        }
7637
0
        if (ent->flags & XML_ENT_CONTAINS_LT)
7638
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7639
0
                    "'<' in entity '%s' is not allowed in attributes "
7640
0
                    "values\n", name);
7641
0
    }
7642
7643
    /*
7644
     * Internal check, no parameter entities here ...
7645
     */
7646
1.22k
    else {
7647
1.22k
  switch (ent->etype) {
7648
0
      case XML_INTERNAL_PARAMETER_ENTITY:
7649
0
      case XML_EXTERNAL_PARAMETER_ENTITY:
7650
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7651
0
       "Attempt to reference the parameter entity '%s'\n",
7652
0
            name);
7653
0
      break;
7654
1.22k
      default:
7655
1.22k
      break;
7656
1.22k
  }
7657
1.22k
    }
7658
7659
    /*
7660
     * [ WFC: No Recursion ]
7661
     * A parsed entity must not contain a recursive reference
7662
     * to itself, either directly or indirectly.
7663
     * Done somewhere else
7664
     */
7665
14.1k
    return(ent);
7666
14.1k
}
7667
7668
/**
7669
 * xmlParseStringEntityRef:
7670
 * @ctxt:  an XML parser context
7671
 * @str:  a pointer to an index in the string
7672
 *
7673
 * parse ENTITY references declarations, but this version parses it from
7674
 * a string value.
7675
 *
7676
 * [68] EntityRef ::= '&' Name ';'
7677
 *
7678
 * [ WFC: Entity Declared ]
7679
 * In a document without any DTD, a document with only an internal DTD
7680
 * subset which contains no parameter entity references, or a document
7681
 * with "standalone='yes'", the Name given in the entity reference
7682
 * must match that in an entity declaration, except that well-formed
7683
 * documents need not declare any of the following entities: amp, lt,
7684
 * gt, apos, quot.  The declaration of a parameter entity must precede
7685
 * any reference to it.  Similarly, the declaration of a general entity
7686
 * must precede any reference to it which appears in a default value in an
7687
 * attribute-list declaration. Note that if entities are declared in the
7688
 * external subset or in external parameter entities, a non-validating
7689
 * processor is not obligated to read and process their declarations;
7690
 * for such documents, the rule that an entity must be declared is a
7691
 * well-formedness constraint only if standalone='yes'.
7692
 *
7693
 * [ WFC: Parsed Entity ]
7694
 * An entity reference must not contain the name of an unparsed entity
7695
 *
7696
 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7697
 * is updated to the current location in the string.
7698
 */
7699
static xmlEntityPtr
7700
0
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7701
0
    xmlChar *name;
7702
0
    const xmlChar *ptr;
7703
0
    xmlChar cur;
7704
0
    xmlEntityPtr ent = NULL;
7705
7706
0
    if ((str == NULL) || (*str == NULL))
7707
0
        return(NULL);
7708
0
    ptr = *str;
7709
0
    cur = *ptr;
7710
0
    if (cur != '&')
7711
0
  return(NULL);
7712
7713
0
    ptr++;
7714
0
    name = xmlParseStringName(ctxt, &ptr);
7715
0
    if (name == NULL) {
7716
0
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7717
0
           "xmlParseStringEntityRef: no name\n");
7718
0
  *str = ptr;
7719
0
  return(NULL);
7720
0
    }
7721
0
    if (*ptr != ';') {
7722
0
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7723
0
        xmlFree(name);
7724
0
  *str = ptr;
7725
0
  return(NULL);
7726
0
    }
7727
0
    ptr++;
7728
7729
7730
    /*
7731
     * Predefined entities override any extra definition
7732
     */
7733
0
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7734
0
        ent = xmlGetPredefinedEntity(name);
7735
0
        if (ent != NULL) {
7736
0
            xmlFree(name);
7737
0
            *str = ptr;
7738
0
            return(ent);
7739
0
        }
7740
0
    }
7741
7742
    /*
7743
     * Ask first SAX for entity resolution, otherwise try the
7744
     * entities which may have stored in the parser context.
7745
     */
7746
0
    if (ctxt->sax != NULL) {
7747
0
  if (ctxt->sax->getEntity != NULL)
7748
0
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7749
0
  if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7750
0
      ent = xmlGetPredefinedEntity(name);
7751
0
  if ((ent == NULL) && (ctxt->userData==ctxt)) {
7752
0
      ent = xmlSAX2GetEntity(ctxt, name);
7753
0
  }
7754
0
    }
7755
0
    if (ctxt->instate == XML_PARSER_EOF) {
7756
0
  xmlFree(name);
7757
0
  return(NULL);
7758
0
    }
7759
7760
    /*
7761
     * [ WFC: Entity Declared ]
7762
     * In a document without any DTD, a document with only an
7763
     * internal DTD subset which contains no parameter entity
7764
     * references, or a document with "standalone='yes'", the
7765
     * Name given in the entity reference must match that in an
7766
     * entity declaration, except that well-formed documents
7767
     * need not declare any of the following entities: amp, lt,
7768
     * gt, apos, quot.
7769
     * The declaration of a parameter entity must precede any
7770
     * reference to it.
7771
     * Similarly, the declaration of a general entity must
7772
     * precede any reference to it which appears in a default
7773
     * value in an attribute-list declaration. Note that if
7774
     * entities are declared in the external subset or in
7775
     * external parameter entities, a non-validating processor
7776
     * is not obligated to read and process their declarations;
7777
     * for such documents, the rule that an entity must be
7778
     * declared is a well-formedness constraint only if
7779
     * standalone='yes'.
7780
     */
7781
0
    if (ent == NULL) {
7782
0
  if ((ctxt->standalone == 1) ||
7783
0
      ((ctxt->hasExternalSubset == 0) &&
7784
0
       (ctxt->hasPErefs == 0))) {
7785
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7786
0
         "Entity '%s' not defined\n", name);
7787
0
  } else {
7788
0
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7789
0
        "Entity '%s' not defined\n",
7790
0
        name);
7791
0
  }
7792
  /* TODO ? check regressions ctxt->valid = 0; */
7793
0
    }
7794
7795
    /*
7796
     * [ WFC: Parsed Entity ]
7797
     * An entity reference must not contain the name of an
7798
     * unparsed entity
7799
     */
7800
0
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7801
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7802
0
     "Entity reference to unparsed entity %s\n", name);
7803
0
    }
7804
7805
    /*
7806
     * [ WFC: No External Entity References ]
7807
     * Attribute values cannot contain direct or indirect
7808
     * entity references to external entities.
7809
     */
7810
0
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7811
0
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7812
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7813
0
   "Attribute references external entity '%s'\n", name);
7814
0
    }
7815
    /*
7816
     * [ WFC: No < in Attribute Values ]
7817
     * The replacement text of any entity referred to directly or
7818
     * indirectly in an attribute value (other than "&lt;") must
7819
     * not contain a <.
7820
     */
7821
0
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7822
0
       (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7823
0
  if ((ent->flags & XML_ENT_CHECKED_LT) == 0) {
7824
0
            if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7825
0
                ent->flags |= XML_ENT_CONTAINS_LT;
7826
0
            ent->flags |= XML_ENT_CHECKED_LT;
7827
0
        }
7828
0
        if (ent->flags & XML_ENT_CONTAINS_LT)
7829
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7830
0
                    "'<' in entity '%s' is not allowed in attributes "
7831
0
                    "values\n", name);
7832
0
    }
7833
7834
    /*
7835
     * Internal check, no parameter entities here ...
7836
     */
7837
0
    else {
7838
0
  switch (ent->etype) {
7839
0
      case XML_INTERNAL_PARAMETER_ENTITY:
7840
0
      case XML_EXTERNAL_PARAMETER_ENTITY:
7841
0
    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7842
0
       "Attempt to reference the parameter entity '%s'\n",
7843
0
          name);
7844
0
      break;
7845
0
      default:
7846
0
      break;
7847
0
  }
7848
0
    }
7849
7850
    /*
7851
     * [ WFC: No Recursion ]
7852
     * A parsed entity must not contain a recursive reference
7853
     * to itself, either directly or indirectly.
7854
     * Done somewhere else
7855
     */
7856
7857
0
    xmlFree(name);
7858
0
    *str = ptr;
7859
0
    return(ent);
7860
0
}
7861
7862
/**
7863
 * xmlParsePEReference:
7864
 * @ctxt:  an XML parser context
7865
 *
7866
 * DEPRECATED: Internal function, don't use.
7867
 *
7868
 * Parse a parameter entity reference. Always consumes '%'.
7869
 *
7870
 * The entity content is handled directly by pushing it's content as
7871
 * a new input stream.
7872
 *
7873
 * [69] PEReference ::= '%' Name ';'
7874
 *
7875
 * [ WFC: No Recursion ]
7876
 * A parsed entity must not contain a recursive
7877
 * reference to itself, either directly or indirectly.
7878
 *
7879
 * [ WFC: Entity Declared ]
7880
 * In a document without any DTD, a document with only an internal DTD
7881
 * subset which contains no parameter entity references, or a document
7882
 * with "standalone='yes'", ...  ... The declaration of a parameter
7883
 * entity must precede any reference to it...
7884
 *
7885
 * [ VC: Entity Declared ]
7886
 * In a document with an external subset or external parameter entities
7887
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7888
 * must precede any reference to it...
7889
 *
7890
 * [ WFC: In DTD ]
7891
 * Parameter-entity references may only appear in the DTD.
7892
 * NOTE: misleading but this is handled.
7893
 */
7894
void
7895
xmlParsePEReference(xmlParserCtxtPtr ctxt)
7896
11.8k
{
7897
11.8k
    const xmlChar *name;
7898
11.8k
    xmlEntityPtr entity = NULL;
7899
11.8k
    xmlParserInputPtr input;
7900
7901
11.8k
    if (RAW != '%')
7902
0
        return;
7903
11.8k
    NEXT;
7904
11.8k
    name = xmlParseName(ctxt);
7905
11.8k
    if (name == NULL) {
7906
9.62k
  xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
7907
9.62k
  return;
7908
9.62k
    }
7909
2.27k
    if (xmlParserDebugEntities)
7910
0
  xmlGenericError(xmlGenericErrorContext,
7911
0
    "PEReference: %s\n", name);
7912
2.27k
    if (RAW != ';') {
7913
1.11k
  xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
7914
1.11k
        return;
7915
1.11k
    }
7916
7917
1.16k
    NEXT;
7918
7919
    /*
7920
     * Request the entity from SAX
7921
     */
7922
1.16k
    if ((ctxt->sax != NULL) &&
7923
1.16k
  (ctxt->sax->getParameterEntity != NULL))
7924
0
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7925
1.16k
    if (ctxt->instate == XML_PARSER_EOF)
7926
0
  return;
7927
1.16k
    if (entity == NULL) {
7928
  /*
7929
   * [ WFC: Entity Declared ]
7930
   * In a document without any DTD, a document with only an
7931
   * internal DTD subset which contains no parameter entity
7932
   * references, or a document with "standalone='yes'", ...
7933
   * ... The declaration of a parameter entity must precede
7934
   * any reference to it...
7935
   */
7936
1.16k
  if ((ctxt->standalone == 1) ||
7937
1.16k
      ((ctxt->hasExternalSubset == 0) &&
7938
1.14k
       (ctxt->hasPErefs == 0))) {
7939
49
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7940
49
            "PEReference: %%%s; not found\n",
7941
49
            name);
7942
1.11k
  } else {
7943
      /*
7944
       * [ VC: Entity Declared ]
7945
       * In a document with an external subset or external
7946
       * parameter entities with "standalone='no'", ...
7947
       * ... The declaration of a parameter entity must
7948
       * precede any reference to it...
7949
       */
7950
1.11k
            if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
7951
0
                xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
7952
0
                                 "PEReference: %%%s; not found\n",
7953
0
                                 name, NULL);
7954
0
            } else
7955
1.11k
                xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7956
1.11k
                              "PEReference: %%%s; not found\n",
7957
1.11k
                              name, NULL);
7958
1.11k
            ctxt->valid = 0;
7959
1.11k
  }
7960
1.16k
    } else {
7961
  /*
7962
   * Internal checking in case the entity quest barfed
7963
   */
7964
0
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7965
0
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7966
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7967
0
      "Internal: %%%s; is not a parameter entity\n",
7968
0
        name, NULL);
7969
0
  } else {
7970
0
            xmlChar start[4];
7971
0
            xmlCharEncoding enc;
7972
0
            unsigned long parentConsumed;
7973
0
            xmlEntityPtr oldEnt;
7974
7975
0
      if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7976
0
          ((ctxt->options & XML_PARSE_NOENT) == 0) &&
7977
0
    ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
7978
0
    ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
7979
0
    ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
7980
0
    (ctxt->replaceEntities == 0) &&
7981
0
    (ctxt->validate == 0))
7982
0
    return;
7983
7984
0
            if (entity->flags & XML_ENT_EXPANDING) {
7985
0
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7986
0
                xmlHaltParser(ctxt);
7987
0
                return;
7988
0
            }
7989
7990
            /* Must be computed from old input before pushing new input. */
7991
0
            parentConsumed = ctxt->input->parentConsumed;
7992
0
            oldEnt = ctxt->input->entity;
7993
0
            if ((oldEnt == NULL) ||
7994
0
                ((oldEnt->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7995
0
                 ((oldEnt->flags & XML_ENT_PARSED) == 0))) {
7996
0
                xmlSaturatedAdd(&parentConsumed, ctxt->input->consumed);
7997
0
                xmlSaturatedAddSizeT(&parentConsumed,
7998
0
                                     ctxt->input->cur - ctxt->input->base);
7999
0
            }
8000
8001
0
      input = xmlNewEntityInputStream(ctxt, entity);
8002
0
      if (xmlPushInput(ctxt, input) < 0) {
8003
0
                xmlFreeInputStream(input);
8004
0
    return;
8005
0
            }
8006
8007
0
            entity->flags |= XML_ENT_EXPANDING;
8008
8009
0
            input->parentConsumed = parentConsumed;
8010
8011
0
      if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
8012
                /*
8013
                 * Get the 4 first bytes and decode the charset
8014
                 * if enc != XML_CHAR_ENCODING_NONE
8015
                 * plug some encoding conversion routines.
8016
                 * Note that, since we may have some non-UTF8
8017
                 * encoding (like UTF16, bug 135229), the 'length'
8018
                 * is not known, but we can calculate based upon
8019
                 * the amount of data in the buffer.
8020
                 */
8021
0
                GROW
8022
0
                if (ctxt->instate == XML_PARSER_EOF)
8023
0
                    return;
8024
0
                if ((ctxt->input->end - ctxt->input->cur)>=4) {
8025
0
                    start[0] = RAW;
8026
0
                    start[1] = NXT(1);
8027
0
                    start[2] = NXT(2);
8028
0
                    start[3] = NXT(3);
8029
0
                    enc = xmlDetectCharEncoding(start, 4);
8030
0
                    if (enc != XML_CHAR_ENCODING_NONE) {
8031
0
                        xmlSwitchEncoding(ctxt, enc);
8032
0
                    }
8033
0
                }
8034
8035
0
                if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8036
0
                    (IS_BLANK_CH(NXT(5)))) {
8037
0
                    xmlParseTextDecl(ctxt);
8038
0
                }
8039
0
            }
8040
0
  }
8041
0
    }
8042
1.16k
    ctxt->hasPErefs = 1;
8043
1.16k
}
8044
8045
/**
8046
 * xmlLoadEntityContent:
8047
 * @ctxt:  an XML parser context
8048
 * @entity: an unloaded system entity
8049
 *
8050
 * Load the original content of the given system entity from the
8051
 * ExternalID/SystemID given. This is to be used for Included in Literal
8052
 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8053
 *
8054
 * Returns 0 in case of success and -1 in case of failure
8055
 */
8056
static int
8057
0
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8058
0
    xmlParserInputPtr input;
8059
0
    xmlBufferPtr buf;
8060
0
    int l, c;
8061
8062
0
    if ((ctxt == NULL) || (entity == NULL) ||
8063
0
        ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8064
0
   (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8065
0
  (entity->content != NULL)) {
8066
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8067
0
              "xmlLoadEntityContent parameter error");
8068
0
        return(-1);
8069
0
    }
8070
8071
0
    if (xmlParserDebugEntities)
8072
0
  xmlGenericError(xmlGenericErrorContext,
8073
0
    "Reading %s entity content input\n", entity->name);
8074
8075
0
    buf = xmlBufferCreate();
8076
0
    if (buf == NULL) {
8077
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8078
0
              "xmlLoadEntityContent parameter error");
8079
0
        return(-1);
8080
0
    }
8081
0
    xmlBufferSetAllocationScheme(buf, XML_BUFFER_ALLOC_DOUBLEIT);
8082
8083
0
    input = xmlNewEntityInputStream(ctxt, entity);
8084
0
    if (input == NULL) {
8085
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8086
0
              "xmlLoadEntityContent input error");
8087
0
  xmlBufferFree(buf);
8088
0
        return(-1);
8089
0
    }
8090
8091
    /*
8092
     * Push the entity as the current input, read char by char
8093
     * saving to the buffer until the end of the entity or an error
8094
     */
8095
0
    if (xmlPushInput(ctxt, input) < 0) {
8096
0
        xmlBufferFree(buf);
8097
0
  xmlFreeInputStream(input);
8098
0
  return(-1);
8099
0
    }
8100
8101
0
    GROW;
8102
0
    c = CUR_CHAR(l);
8103
0
    while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8104
0
           (IS_CHAR(c))) {
8105
0
        xmlBufferAdd(buf, ctxt->input->cur, l);
8106
0
  NEXTL(l);
8107
0
  c = CUR_CHAR(l);
8108
0
    }
8109
0
    if (ctxt->instate == XML_PARSER_EOF) {
8110
0
  xmlBufferFree(buf);
8111
0
  return(-1);
8112
0
    }
8113
8114
0
    if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8115
0
        xmlSaturatedAdd(&ctxt->sizeentities, ctxt->input->consumed);
8116
0
        xmlPopInput(ctxt);
8117
0
    } else if (!IS_CHAR(c)) {
8118
0
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8119
0
                          "xmlLoadEntityContent: invalid char value %d\n",
8120
0
                    c);
8121
0
  xmlBufferFree(buf);
8122
0
  return(-1);
8123
0
    }
8124
0
    entity->content = buf->content;
8125
0
    entity->length = buf->use;
8126
0
    buf->content = NULL;
8127
0
    xmlBufferFree(buf);
8128
8129
0
    return(0);
8130
0
}
8131
8132
/**
8133
 * xmlParseStringPEReference:
8134
 * @ctxt:  an XML parser context
8135
 * @str:  a pointer to an index in the string
8136
 *
8137
 * parse PEReference declarations
8138
 *
8139
 * [69] PEReference ::= '%' Name ';'
8140
 *
8141
 * [ WFC: No Recursion ]
8142
 * A parsed entity must not contain a recursive
8143
 * reference to itself, either directly or indirectly.
8144
 *
8145
 * [ WFC: Entity Declared ]
8146
 * In a document without any DTD, a document with only an internal DTD
8147
 * subset which contains no parameter entity references, or a document
8148
 * with "standalone='yes'", ...  ... The declaration of a parameter
8149
 * entity must precede any reference to it...
8150
 *
8151
 * [ VC: Entity Declared ]
8152
 * In a document with an external subset or external parameter entities
8153
 * with "standalone='no'", ...  ... The declaration of a parameter entity
8154
 * must precede any reference to it...
8155
 *
8156
 * [ WFC: In DTD ]
8157
 * Parameter-entity references may only appear in the DTD.
8158
 * NOTE: misleading but this is handled.
8159
 *
8160
 * Returns the string of the entity content.
8161
 *         str is updated to the current value of the index
8162
 */
8163
static xmlEntityPtr
8164
0
xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8165
0
    const xmlChar *ptr;
8166
0
    xmlChar cur;
8167
0
    xmlChar *name;
8168
0
    xmlEntityPtr entity = NULL;
8169
8170
0
    if ((str == NULL) || (*str == NULL)) return(NULL);
8171
0
    ptr = *str;
8172
0
    cur = *ptr;
8173
0
    if (cur != '%')
8174
0
        return(NULL);
8175
0
    ptr++;
8176
0
    name = xmlParseStringName(ctxt, &ptr);
8177
0
    if (name == NULL) {
8178
0
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8179
0
           "xmlParseStringPEReference: no name\n");
8180
0
  *str = ptr;
8181
0
  return(NULL);
8182
0
    }
8183
0
    cur = *ptr;
8184
0
    if (cur != ';') {
8185
0
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8186
0
  xmlFree(name);
8187
0
  *str = ptr;
8188
0
  return(NULL);
8189
0
    }
8190
0
    ptr++;
8191
8192
    /*
8193
     * Request the entity from SAX
8194
     */
8195
0
    if ((ctxt->sax != NULL) &&
8196
0
  (ctxt->sax->getParameterEntity != NULL))
8197
0
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8198
0
    if (ctxt->instate == XML_PARSER_EOF) {
8199
0
  xmlFree(name);
8200
0
  *str = ptr;
8201
0
  return(NULL);
8202
0
    }
8203
0
    if (entity == NULL) {
8204
  /*
8205
   * [ WFC: Entity Declared ]
8206
   * In a document without any DTD, a document with only an
8207
   * internal DTD subset which contains no parameter entity
8208
   * references, or a document with "standalone='yes'", ...
8209
   * ... The declaration of a parameter entity must precede
8210
   * any reference to it...
8211
   */
8212
0
  if ((ctxt->standalone == 1) ||
8213
0
      ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8214
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8215
0
     "PEReference: %%%s; not found\n", name);
8216
0
  } else {
8217
      /*
8218
       * [ VC: Entity Declared ]
8219
       * In a document with an external subset or external
8220
       * parameter entities with "standalone='no'", ...
8221
       * ... The declaration of a parameter entity must
8222
       * precede any reference to it...
8223
       */
8224
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8225
0
        "PEReference: %%%s; not found\n",
8226
0
        name, NULL);
8227
0
      ctxt->valid = 0;
8228
0
  }
8229
0
    } else {
8230
  /*
8231
   * Internal checking in case the entity quest barfed
8232
   */
8233
0
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8234
0
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8235
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8236
0
        "%%%s; is not a parameter entity\n",
8237
0
        name, NULL);
8238
0
  }
8239
0
    }
8240
0
    ctxt->hasPErefs = 1;
8241
0
    xmlFree(name);
8242
0
    *str = ptr;
8243
0
    return(entity);
8244
0
}
8245
8246
/**
8247
 * xmlParseDocTypeDecl:
8248
 * @ctxt:  an XML parser context
8249
 *
8250
 * DEPRECATED: Internal function, don't use.
8251
 *
8252
 * parse a DOCTYPE declaration
8253
 *
8254
 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8255
 *                      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8256
 *
8257
 * [ VC: Root Element Type ]
8258
 * The Name in the document type declaration must match the element
8259
 * type of the root element.
8260
 */
8261
8262
void
8263
3.12k
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8264
3.12k
    const xmlChar *name = NULL;
8265
3.12k
    xmlChar *ExternalID = NULL;
8266
3.12k
    xmlChar *URI = NULL;
8267
8268
    /*
8269
     * We know that '<!DOCTYPE' has been detected.
8270
     */
8271
3.12k
    SKIP(9);
8272
8273
3.12k
    SKIP_BLANKS;
8274
8275
    /*
8276
     * Parse the DOCTYPE name.
8277
     */
8278
3.12k
    name = xmlParseName(ctxt);
8279
3.12k
    if (name == NULL) {
8280
8
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8281
8
           "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8282
8
    }
8283
3.12k
    ctxt->intSubName = name;
8284
8285
3.12k
    SKIP_BLANKS;
8286
8287
    /*
8288
     * Check for SystemID and ExternalID
8289
     */
8290
3.12k
    URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8291
8292
3.12k
    if ((URI != NULL) || (ExternalID != NULL)) {
8293
89
        ctxt->hasExternalSubset = 1;
8294
89
    }
8295
3.12k
    ctxt->extSubURI = URI;
8296
3.12k
    ctxt->extSubSystem = ExternalID;
8297
8298
3.12k
    SKIP_BLANKS;
8299
8300
    /*
8301
     * Create and update the internal subset.
8302
     */
8303
3.12k
    if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8304
3.12k
  (!ctxt->disableSAX))
8305
0
  ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8306
3.12k
    if (ctxt->instate == XML_PARSER_EOF)
8307
0
  return;
8308
8309
    /*
8310
     * Is there any internal subset declarations ?
8311
     * they are handled separately in xmlParseInternalSubset()
8312
     */
8313
3.12k
    if (RAW == '[')
8314
2.97k
  return;
8315
8316
    /*
8317
     * We should be at the end of the DOCTYPE declaration.
8318
     */
8319
143
    if (RAW != '>') {
8320
117
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8321
117
    }
8322
143
    NEXT;
8323
143
}
8324
8325
/**
8326
 * xmlParseInternalSubset:
8327
 * @ctxt:  an XML parser context
8328
 *
8329
 * parse the internal subset declaration
8330
 *
8331
 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8332
 */
8333
8334
static void
8335
2.98k
xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8336
    /*
8337
     * Is there any DTD definition ?
8338
     */
8339
2.98k
    if (RAW == '[') {
8340
2.98k
        int baseInputNr = ctxt->inputNr;
8341
2.98k
        ctxt->instate = XML_PARSER_DTD;
8342
2.98k
        NEXT;
8343
  /*
8344
   * Parse the succession of Markup declarations and
8345
   * PEReferences.
8346
   * Subsequence (markupdecl | PEReference | S)*
8347
   */
8348
2.98k
  SKIP_BLANKS;
8349
42.8k
  while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) &&
8350
42.8k
               (ctxt->instate != XML_PARSER_EOF)) {
8351
8352
            /*
8353
             * Conditional sections are allowed from external entities included
8354
             * by PE References in the internal subset.
8355
             */
8356
41.4k
            if ((ctxt->inputNr > 1) && (ctxt->input->filename != NULL) &&
8357
41.4k
                (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8358
0
                xmlParseConditionalSections(ctxt);
8359
41.4k
            } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
8360
27.9k
          xmlParseMarkupDecl(ctxt);
8361
27.9k
            } else if (RAW == '%') {
8362
11.8k
          xmlParsePEReference(ctxt);
8363
11.8k
            } else {
8364
1.61k
    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8365
1.61k
                        "xmlParseInternalSubset: error detected in"
8366
1.61k
                        " Markup declaration\n");
8367
1.61k
                xmlHaltParser(ctxt);
8368
1.61k
                return;
8369
1.61k
            }
8370
39.8k
      SKIP_BLANKS;
8371
39.8k
            SHRINK;
8372
39.8k
            GROW;
8373
39.8k
  }
8374
1.37k
  if (RAW == ']') {
8375
645
      NEXT;
8376
645
      SKIP_BLANKS;
8377
645
  }
8378
1.37k
    }
8379
8380
    /*
8381
     * We should be at the end of the DOCTYPE declaration.
8382
     */
8383
1.37k
    if (RAW != '>') {
8384
786
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8385
786
  return;
8386
786
    }
8387
587
    NEXT;
8388
587
}
8389
8390
#ifdef LIBXML_SAX1_ENABLED
8391
/**
8392
 * xmlParseAttribute:
8393
 * @ctxt:  an XML parser context
8394
 * @value:  a xmlChar ** used to store the value of the attribute
8395
 *
8396
 * DEPRECATED: Internal function, don't use.
8397
 *
8398
 * parse an attribute
8399
 *
8400
 * [41] Attribute ::= Name Eq AttValue
8401
 *
8402
 * [ WFC: No External Entity References ]
8403
 * Attribute values cannot contain direct or indirect entity references
8404
 * to external entities.
8405
 *
8406
 * [ WFC: No < in Attribute Values ]
8407
 * The replacement text of any entity referred to directly or indirectly in
8408
 * an attribute value (other than "&lt;") must not contain a <.
8409
 *
8410
 * [ VC: Attribute Value Type ]
8411
 * The attribute must have been declared; the value must be of the type
8412
 * declared for it.
8413
 *
8414
 * [25] Eq ::= S? '=' S?
8415
 *
8416
 * With namespace:
8417
 *
8418
 * [NS 11] Attribute ::= QName Eq AttValue
8419
 *
8420
 * Also the case QName == xmlns:??? is handled independently as a namespace
8421
 * definition.
8422
 *
8423
 * Returns the attribute name, and the value in *value.
8424
 */
8425
8426
const xmlChar *
8427
0
xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8428
0
    const xmlChar *name;
8429
0
    xmlChar *val;
8430
8431
0
    *value = NULL;
8432
0
    GROW;
8433
0
    name = xmlParseName(ctxt);
8434
0
    if (name == NULL) {
8435
0
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8436
0
                 "error parsing attribute name\n");
8437
0
        return(NULL);
8438
0
    }
8439
8440
    /*
8441
     * read the value
8442
     */
8443
0
    SKIP_BLANKS;
8444
0
    if (RAW == '=') {
8445
0
        NEXT;
8446
0
  SKIP_BLANKS;
8447
0
  val = xmlParseAttValue(ctxt);
8448
0
  ctxt->instate = XML_PARSER_CONTENT;
8449
0
    } else {
8450
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8451
0
         "Specification mandates value for attribute %s\n", name);
8452
0
  return(name);
8453
0
    }
8454
8455
    /*
8456
     * Check that xml:lang conforms to the specification
8457
     * No more registered as an error, just generate a warning now
8458
     * since this was deprecated in XML second edition
8459
     */
8460
0
    if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8461
0
  if (!xmlCheckLanguageID(val)) {
8462
0
      xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8463
0
              "Malformed value for xml:lang : %s\n",
8464
0
        val, NULL);
8465
0
  }
8466
0
    }
8467
8468
    /*
8469
     * Check that xml:space conforms to the specification
8470
     */
8471
0
    if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8472
0
  if (xmlStrEqual(val, BAD_CAST "default"))
8473
0
      *(ctxt->space) = 0;
8474
0
  else if (xmlStrEqual(val, BAD_CAST "preserve"))
8475
0
      *(ctxt->space) = 1;
8476
0
  else {
8477
0
    xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8478
0
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8479
0
                                 val, NULL);
8480
0
  }
8481
0
    }
8482
8483
0
    *value = val;
8484
0
    return(name);
8485
0
}
8486
8487
/**
8488
 * xmlParseStartTag:
8489
 * @ctxt:  an XML parser context
8490
 *
8491
 * DEPRECATED: Internal function, don't use.
8492
 *
8493
 * Parse a start tag. Always consumes '<'.
8494
 *
8495
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8496
 *
8497
 * [ WFC: Unique Att Spec ]
8498
 * No attribute name may appear more than once in the same start-tag or
8499
 * empty-element tag.
8500
 *
8501
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8502
 *
8503
 * [ WFC: Unique Att Spec ]
8504
 * No attribute name may appear more than once in the same start-tag or
8505
 * empty-element tag.
8506
 *
8507
 * With namespace:
8508
 *
8509
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8510
 *
8511
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8512
 *
8513
 * Returns the element name parsed
8514
 */
8515
8516
const xmlChar *
8517
0
xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8518
0
    const xmlChar *name;
8519
0
    const xmlChar *attname;
8520
0
    xmlChar *attvalue;
8521
0
    const xmlChar **atts = ctxt->atts;
8522
0
    int nbatts = 0;
8523
0
    int maxatts = ctxt->maxatts;
8524
0
    int i;
8525
8526
0
    if (RAW != '<') return(NULL);
8527
0
    NEXT1;
8528
8529
0
    name = xmlParseName(ctxt);
8530
0
    if (name == NULL) {
8531
0
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8532
0
       "xmlParseStartTag: invalid element name\n");
8533
0
        return(NULL);
8534
0
    }
8535
8536
    /*
8537
     * Now parse the attributes, it ends up with the ending
8538
     *
8539
     * (S Attribute)* S?
8540
     */
8541
0
    SKIP_BLANKS;
8542
0
    GROW;
8543
8544
0
    while (((RAW != '>') &&
8545
0
     ((RAW != '/') || (NXT(1) != '>')) &&
8546
0
     (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8547
0
  attname = xmlParseAttribute(ctxt, &attvalue);
8548
0
        if (attname == NULL) {
8549
0
      xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8550
0
         "xmlParseStartTag: problem parsing attributes\n");
8551
0
      break;
8552
0
  }
8553
0
        if (attvalue != NULL) {
8554
      /*
8555
       * [ WFC: Unique Att Spec ]
8556
       * No attribute name may appear more than once in the same
8557
       * start-tag or empty-element tag.
8558
       */
8559
0
      for (i = 0; i < nbatts;i += 2) {
8560
0
          if (xmlStrEqual(atts[i], attname)) {
8561
0
        xmlErrAttributeDup(ctxt, NULL, attname);
8562
0
        xmlFree(attvalue);
8563
0
        goto failed;
8564
0
    }
8565
0
      }
8566
      /*
8567
       * Add the pair to atts
8568
       */
8569
0
      if (atts == NULL) {
8570
0
          maxatts = 22; /* allow for 10 attrs by default */
8571
0
          atts = (const xmlChar **)
8572
0
           xmlMalloc(maxatts * sizeof(xmlChar *));
8573
0
    if (atts == NULL) {
8574
0
        xmlErrMemory(ctxt, NULL);
8575
0
        if (attvalue != NULL)
8576
0
      xmlFree(attvalue);
8577
0
        goto failed;
8578
0
    }
8579
0
    ctxt->atts = atts;
8580
0
    ctxt->maxatts = maxatts;
8581
0
      } else if (nbatts + 4 > maxatts) {
8582
0
          const xmlChar **n;
8583
8584
0
          maxatts *= 2;
8585
0
          n = (const xmlChar **) xmlRealloc((void *) atts,
8586
0
               maxatts * sizeof(const xmlChar *));
8587
0
    if (n == NULL) {
8588
0
        xmlErrMemory(ctxt, NULL);
8589
0
        if (attvalue != NULL)
8590
0
      xmlFree(attvalue);
8591
0
        goto failed;
8592
0
    }
8593
0
    atts = n;
8594
0
    ctxt->atts = atts;
8595
0
    ctxt->maxatts = maxatts;
8596
0
      }
8597
0
      atts[nbatts++] = attname;
8598
0
      atts[nbatts++] = attvalue;
8599
0
      atts[nbatts] = NULL;
8600
0
      atts[nbatts + 1] = NULL;
8601
0
  } else {
8602
0
      if (attvalue != NULL)
8603
0
    xmlFree(attvalue);
8604
0
  }
8605
8606
0
failed:
8607
8608
0
  GROW
8609
0
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8610
0
      break;
8611
0
  if (SKIP_BLANKS == 0) {
8612
0
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8613
0
         "attributes construct error\n");
8614
0
  }
8615
0
  SHRINK;
8616
0
        GROW;
8617
0
    }
8618
8619
    /*
8620
     * SAX: Start of Element !
8621
     */
8622
0
    if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8623
0
  (!ctxt->disableSAX)) {
8624
0
  if (nbatts > 0)
8625
0
      ctxt->sax->startElement(ctxt->userData, name, atts);
8626
0
  else
8627
0
      ctxt->sax->startElement(ctxt->userData, name, NULL);
8628
0
    }
8629
8630
0
    if (atts != NULL) {
8631
        /* Free only the content strings */
8632
0
        for (i = 1;i < nbatts;i+=2)
8633
0
      if (atts[i] != NULL)
8634
0
         xmlFree((xmlChar *) atts[i]);
8635
0
    }
8636
0
    return(name);
8637
0
}
8638
8639
/**
8640
 * xmlParseEndTag1:
8641
 * @ctxt:  an XML parser context
8642
 * @line:  line of the start tag
8643
 * @nsNr:  number of namespaces on the start tag
8644
 *
8645
 * Parse an end tag. Always consumes '</'.
8646
 *
8647
 * [42] ETag ::= '</' Name S? '>'
8648
 *
8649
 * With namespace
8650
 *
8651
 * [NS 9] ETag ::= '</' QName S? '>'
8652
 */
8653
8654
static void
8655
0
xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8656
0
    const xmlChar *name;
8657
8658
0
    GROW;
8659
0
    if ((RAW != '<') || (NXT(1) != '/')) {
8660
0
  xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8661
0
           "xmlParseEndTag: '</' not found\n");
8662
0
  return;
8663
0
    }
8664
0
    SKIP(2);
8665
8666
0
    name = xmlParseNameAndCompare(ctxt,ctxt->name);
8667
8668
    /*
8669
     * We should definitely be at the ending "S? '>'" part
8670
     */
8671
0
    GROW;
8672
0
    SKIP_BLANKS;
8673
0
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8674
0
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8675
0
    } else
8676
0
  NEXT1;
8677
8678
    /*
8679
     * [ WFC: Element Type Match ]
8680
     * The Name in an element's end-tag must match the element type in the
8681
     * start-tag.
8682
     *
8683
     */
8684
0
    if (name != (xmlChar*)1) {
8685
0
        if (name == NULL) name = BAD_CAST "unparsable";
8686
0
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8687
0
         "Opening and ending tag mismatch: %s line %d and %s\n",
8688
0
                    ctxt->name, line, name);
8689
0
    }
8690
8691
    /*
8692
     * SAX: End of Tag
8693
     */
8694
0
    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8695
0
  (!ctxt->disableSAX))
8696
0
        ctxt->sax->endElement(ctxt->userData, ctxt->name);
8697
8698
0
    namePop(ctxt);
8699
0
    spacePop(ctxt);
8700
0
    return;
8701
0
}
8702
8703
/**
8704
 * xmlParseEndTag:
8705
 * @ctxt:  an XML parser context
8706
 *
8707
 * DEPRECATED: Internal function, don't use.
8708
 *
8709
 * parse an end of tag
8710
 *
8711
 * [42] ETag ::= '</' Name S? '>'
8712
 *
8713
 * With namespace
8714
 *
8715
 * [NS 9] ETag ::= '</' QName S? '>'
8716
 */
8717
8718
void
8719
0
xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8720
0
    xmlParseEndTag1(ctxt, 0);
8721
0
}
8722
#endif /* LIBXML_SAX1_ENABLED */
8723
8724
/************************************************************************
8725
 *                  *
8726
 *          SAX 2 specific operations       *
8727
 *                  *
8728
 ************************************************************************/
8729
8730
/*
8731
 * xmlGetNamespace:
8732
 * @ctxt:  an XML parser context
8733
 * @prefix:  the prefix to lookup
8734
 *
8735
 * Lookup the namespace name for the @prefix (which ca be NULL)
8736
 * The prefix must come from the @ctxt->dict dictionary
8737
 *
8738
 * Returns the namespace name or NULL if not bound
8739
 */
8740
static const xmlChar *
8741
1.84M
xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8742
1.84M
    int i;
8743
8744
1.84M
    if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8745
2.13M
    for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8746
407k
        if (ctxt->nsTab[i] == prefix) {
8747
110k
      if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8748
968
          return(NULL);
8749
109k
      return(ctxt->nsTab[i + 1]);
8750
110k
  }
8751
1.73M
    return(NULL);
8752
1.84M
}
8753
8754
/**
8755
 * xmlParseQName:
8756
 * @ctxt:  an XML parser context
8757
 * @prefix:  pointer to store the prefix part
8758
 *
8759
 * parse an XML Namespace QName
8760
 *
8761
 * [6]  QName  ::= (Prefix ':')? LocalPart
8762
 * [7]  Prefix  ::= NCName
8763
 * [8]  LocalPart  ::= NCName
8764
 *
8765
 * Returns the Name parsed or NULL
8766
 */
8767
8768
static const xmlChar *
8769
2.16M
xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8770
2.16M
    const xmlChar *l, *p;
8771
8772
2.16M
    GROW;
8773
2.16M
    if (ctxt->instate == XML_PARSER_EOF)
8774
0
        return(NULL);
8775
8776
2.16M
    l = xmlParseNCName(ctxt);
8777
2.16M
    if (l == NULL) {
8778
5.71k
        if (CUR == ':') {
8779
4.94k
      l = xmlParseName(ctxt);
8780
4.94k
      if (l != NULL) {
8781
4.94k
          xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8782
4.94k
             "Failed to parse QName '%s'\n", l, NULL, NULL);
8783
4.94k
    *prefix = NULL;
8784
4.94k
    return(l);
8785
4.94k
      }
8786
4.94k
  }
8787
774
        return(NULL);
8788
5.71k
    }
8789
2.16M
    if (CUR == ':') {
8790
33.8k
        NEXT;
8791
33.8k
  p = l;
8792
33.8k
  l = xmlParseNCName(ctxt);
8793
33.8k
  if (l == NULL) {
8794
2.99k
      xmlChar *tmp;
8795
8796
2.99k
            if (ctxt->instate == XML_PARSER_EOF)
8797
0
                return(NULL);
8798
2.99k
            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8799
2.99k
               "Failed to parse QName '%s:'\n", p, NULL, NULL);
8800
2.99k
      l = xmlParseNmtoken(ctxt);
8801
2.99k
      if (l == NULL) {
8802
1.54k
                if (ctxt->instate == XML_PARSER_EOF)
8803
0
                    return(NULL);
8804
1.54k
    tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8805
1.54k
            } else {
8806
1.45k
    tmp = xmlBuildQName(l, p, NULL, 0);
8807
1.45k
    xmlFree((char *)l);
8808
1.45k
      }
8809
2.99k
      p = xmlDictLookup(ctxt->dict, tmp, -1);
8810
2.99k
      if (tmp != NULL) xmlFree(tmp);
8811
2.99k
      *prefix = NULL;
8812
2.99k
      return(p);
8813
2.99k
  }
8814
30.8k
  if (CUR == ':') {
8815
2.99k
      xmlChar *tmp;
8816
8817
2.99k
            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8818
2.99k
               "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8819
2.99k
      NEXT;
8820
2.99k
      tmp = (xmlChar *) xmlParseName(ctxt);
8821
2.99k
      if (tmp != NULL) {
8822
2.47k
          tmp = xmlBuildQName(tmp, l, NULL, 0);
8823
2.47k
    l = xmlDictLookup(ctxt->dict, tmp, -1);
8824
2.47k
    if (tmp != NULL) xmlFree(tmp);
8825
2.47k
    *prefix = p;
8826
2.47k
    return(l);
8827
2.47k
      }
8828
525
            if (ctxt->instate == XML_PARSER_EOF)
8829
0
                return(NULL);
8830
525
      tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8831
525
      l = xmlDictLookup(ctxt->dict, tmp, -1);
8832
525
      if (tmp != NULL) xmlFree(tmp);
8833
525
      *prefix = p;
8834
525
      return(l);
8835
525
  }
8836
27.8k
  *prefix = p;
8837
27.8k
    } else
8838
2.12M
        *prefix = NULL;
8839
2.15M
    return(l);
8840
2.16M
}
8841
8842
/**
8843
 * xmlParseQNameAndCompare:
8844
 * @ctxt:  an XML parser context
8845
 * @name:  the localname
8846
 * @prefix:  the prefix, if any.
8847
 *
8848
 * parse an XML name and compares for match
8849
 * (specialized for endtag parsing)
8850
 *
8851
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8852
 * and the name for mismatch
8853
 */
8854
8855
static const xmlChar *
8856
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8857
609
                        xmlChar const *prefix) {
8858
609
    const xmlChar *cmp;
8859
609
    const xmlChar *in;
8860
609
    const xmlChar *ret;
8861
609
    const xmlChar *prefix2;
8862
8863
609
    if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8864
8865
609
    GROW;
8866
609
    in = ctxt->input->cur;
8867
8868
609
    cmp = prefix;
8869
1.84k
    while (*in != 0 && *in == *cmp) {
8870
1.24k
  ++in;
8871
1.24k
  ++cmp;
8872
1.24k
    }
8873
609
    if ((*cmp == 0) && (*in == ':')) {
8874
585
        in++;
8875
585
  cmp = name;
8876
1.69k
  while (*in != 0 && *in == *cmp) {
8877
1.11k
      ++in;
8878
1.11k
      ++cmp;
8879
1.11k
  }
8880
585
  if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8881
      /* success */
8882
573
            ctxt->input->col += in - ctxt->input->cur;
8883
573
      ctxt->input->cur = in;
8884
573
      return((const xmlChar*) 1);
8885
573
  }
8886
585
    }
8887
    /*
8888
     * all strings coms from the dictionary, equality can be done directly
8889
     */
8890
36
    ret = xmlParseQName (ctxt, &prefix2);
8891
36
    if ((ret == name) && (prefix == prefix2))
8892
4
  return((const xmlChar*) 1);
8893
32
    return ret;
8894
36
}
8895
8896
/**
8897
 * xmlParseAttValueInternal:
8898
 * @ctxt:  an XML parser context
8899
 * @len:  attribute len result
8900
 * @alloc:  whether the attribute was reallocated as a new string
8901
 * @normalize:  if 1 then further non-CDATA normalization must be done
8902
 *
8903
 * parse a value for an attribute.
8904
 * NOTE: if no normalization is needed, the routine will return pointers
8905
 *       directly from the data buffer.
8906
 *
8907
 * 3.3.3 Attribute-Value Normalization:
8908
 * Before the value of an attribute is passed to the application or
8909
 * checked for validity, the XML processor must normalize it as follows:
8910
 * - a character reference is processed by appending the referenced
8911
 *   character to the attribute value
8912
 * - an entity reference is processed by recursively processing the
8913
 *   replacement text of the entity
8914
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8915
 *   appending #x20 to the normalized value, except that only a single
8916
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
8917
 *   parsed entity or the literal entity value of an internal parsed entity
8918
 * - other characters are processed by appending them to the normalized value
8919
 * If the declared value is not CDATA, then the XML processor must further
8920
 * process the normalized attribute value by discarding any leading and
8921
 * trailing space (#x20) characters, and by replacing sequences of space
8922
 * (#x20) characters by a single space (#x20) character.
8923
 * All attributes for which no declaration has been read should be treated
8924
 * by a non-validating parser as if declared CDATA.
8925
 *
8926
 * Returns the AttValue parsed or NULL. The value has to be freed by the
8927
 *     caller if it was copied, this can be detected by val[*len] == 0.
8928
 */
8929
8930
#define GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) \
8931
1.34k
    const xmlChar *oldbase = ctxt->input->base;\
8932
1.34k
    GROW;\
8933
1.34k
    if (ctxt->instate == XML_PARSER_EOF)\
8934
1.34k
        return(NULL);\
8935
1.34k
    if (oldbase != ctxt->input->base) {\
8936
0
        ptrdiff_t delta = ctxt->input->base - oldbase;\
8937
0
        start = start + delta;\
8938
0
        in = in + delta;\
8939
0
    }\
8940
1.34k
    end = ctxt->input->end;
8941
8942
static xmlChar *
8943
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8944
                         int normalize)
8945
347k
{
8946
347k
    xmlChar limit = 0;
8947
347k
    const xmlChar *in = NULL, *start, *end, *last;
8948
347k
    xmlChar *ret = NULL;
8949
347k
    int line, col;
8950
347k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
8951
347k
                    XML_MAX_HUGE_LENGTH :
8952
347k
                    XML_MAX_TEXT_LENGTH;
8953
8954
347k
    GROW;
8955
347k
    in = (xmlChar *) CUR_PTR;
8956
347k
    line = ctxt->input->line;
8957
347k
    col = ctxt->input->col;
8958
347k
    if (*in != '"' && *in != '\'') {
8959
371
        xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
8960
371
        return (NULL);
8961
371
    }
8962
347k
    ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
8963
8964
    /*
8965
     * try to handle in this routine the most common case where no
8966
     * allocation of a new string is required and where content is
8967
     * pure ASCII.
8968
     */
8969
347k
    limit = *in++;
8970
347k
    col++;
8971
347k
    end = ctxt->input->end;
8972
347k
    start = in;
8973
347k
    if (in >= end) {
8974
91
        GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
8975
91
    }
8976
347k
    if (normalize) {
8977
        /*
8978
   * Skip any leading spaces
8979
   */
8980
7.07k
  while ((in < end) && (*in != limit) &&
8981
7.07k
         ((*in == 0x20) || (*in == 0x9) ||
8982
6.56k
          (*in == 0xA) || (*in == 0xD))) {
8983
2.69k
      if (*in == 0xA) {
8984
620
          line++; col = 1;
8985
2.07k
      } else {
8986
2.07k
          col++;
8987
2.07k
      }
8988
2.69k
      in++;
8989
2.69k
      start = in;
8990
2.69k
      if (in >= end) {
8991
16
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
8992
16
                if ((in - start) > maxLength) {
8993
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8994
0
                                   "AttValue length too long\n");
8995
0
                    return(NULL);
8996
0
                }
8997
16
      }
8998
2.69k
  }
8999
34.9k
  while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9000
34.9k
         (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9001
30.9k
      col++;
9002
30.9k
      if ((*in++ == 0x20) && (*in == 0x20)) break;
9003
30.6k
      if (in >= end) {
9004
20
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9005
20
                if ((in - start) > maxLength) {
9006
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9007
0
                                   "AttValue length too long\n");
9008
0
                    return(NULL);
9009
0
                }
9010
20
      }
9011
30.6k
  }
9012
4.38k
  last = in;
9013
  /*
9014
   * skip the trailing blanks
9015
   */
9016
5.27k
  while ((last[-1] == 0x20) && (last > start)) last--;
9017
7.45k
  while ((in < end) && (*in != limit) &&
9018
7.45k
         ((*in == 0x20) || (*in == 0x9) ||
9019
5.48k
          (*in == 0xA) || (*in == 0xD))) {
9020
3.06k
      if (*in == 0xA) {
9021
272
          line++, col = 1;
9022
2.79k
      } else {
9023
2.79k
          col++;
9024
2.79k
      }
9025
3.06k
      in++;
9026
3.06k
      if (in >= end) {
9027
27
    const xmlChar *oldbase = ctxt->input->base;
9028
27
    GROW;
9029
27
                if (ctxt->instate == XML_PARSER_EOF)
9030
0
                    return(NULL);
9031
27
    if (oldbase != ctxt->input->base) {
9032
0
        ptrdiff_t delta = ctxt->input->base - oldbase;
9033
0
        start = start + delta;
9034
0
        in = in + delta;
9035
0
        last = last + delta;
9036
0
    }
9037
27
    end = ctxt->input->end;
9038
27
                if ((in - start) > maxLength) {
9039
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9040
0
                                   "AttValue length too long\n");
9041
0
                    return(NULL);
9042
0
                }
9043
27
      }
9044
3.06k
  }
9045
4.38k
        if ((in - start) > maxLength) {
9046
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9047
0
                           "AttValue length too long\n");
9048
0
            return(NULL);
9049
0
        }
9050
4.38k
  if (*in != limit) goto need_complex;
9051
342k
    } else {
9052
3.09M
  while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9053
3.09M
         (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9054
2.75M
      in++;
9055
2.75M
      col++;
9056
2.75M
      if (in >= end) {
9057
1.21k
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9058
1.21k
                if ((in - start) > maxLength) {
9059
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9060
0
                                   "AttValue length too long\n");
9061
0
                    return(NULL);
9062
0
                }
9063
1.21k
      }
9064
2.75M
  }
9065
342k
  last = in;
9066
342k
        if ((in - start) > maxLength) {
9067
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9068
0
                           "AttValue length too long\n");
9069
0
            return(NULL);
9070
0
        }
9071
342k
  if (*in != limit) goto need_complex;
9072
342k
    }
9073
326k
    in++;
9074
326k
    col++;
9075
326k
    if (len != NULL) {
9076
314k
        if (alloc) *alloc = 0;
9077
314k
        *len = last - start;
9078
314k
        ret = (xmlChar *) start;
9079
314k
    } else {
9080
12.6k
        if (alloc) *alloc = 1;
9081
12.6k
        ret = xmlStrndup(start, last - start);
9082
12.6k
    }
9083
326k
    CUR_PTR = in;
9084
326k
    ctxt->input->line = line;
9085
326k
    ctxt->input->col = col;
9086
326k
    return ret;
9087
20.4k
need_complex:
9088
20.4k
    if (alloc) *alloc = 1;
9089
20.4k
    return xmlParseAttValueComplex(ctxt, len, normalize);
9090
347k
}
9091
9092
/**
9093
 * xmlParseAttribute2:
9094
 * @ctxt:  an XML parser context
9095
 * @pref:  the element prefix
9096
 * @elem:  the element name
9097
 * @prefix:  a xmlChar ** used to store the value of the attribute prefix
9098
 * @value:  a xmlChar ** used to store the value of the attribute
9099
 * @len:  an int * to save the length of the attribute
9100
 * @alloc:  an int * to indicate if the attribute was allocated
9101
 *
9102
 * parse an attribute in the new SAX2 framework.
9103
 *
9104
 * Returns the attribute name, and the value in *value, .
9105
 */
9106
9107
static const xmlChar *
9108
xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9109
                   const xmlChar * pref, const xmlChar * elem,
9110
                   const xmlChar ** prefix, xmlChar ** value,
9111
                   int *len, int *alloc)
9112
335k
{
9113
335k
    const xmlChar *name;
9114
335k
    xmlChar *val, *internal_val = NULL;
9115
335k
    int normalize = 0;
9116
9117
335k
    *value = NULL;
9118
335k
    GROW;
9119
335k
    name = xmlParseQName(ctxt, prefix);
9120
335k
    if (name == NULL) {
9121
550
        xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9122
550
                       "error parsing attribute name\n");
9123
550
        return (NULL);
9124
550
    }
9125
9126
    /*
9127
     * get the type if needed
9128
     */
9129
334k
    if (ctxt->attsSpecial != NULL) {
9130
9.33k
        int type;
9131
9132
9.33k
        type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
9133
9.33k
                                                 pref, elem, *prefix, name);
9134
9.33k
        if (type != 0)
9135
4.40k
            normalize = 1;
9136
9.33k
    }
9137
9138
    /*
9139
     * read the value
9140
     */
9141
334k
    SKIP_BLANKS;
9142
334k
    if (RAW == '=') {
9143
334k
        NEXT;
9144
334k
        SKIP_BLANKS;
9145
334k
        val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9146
334k
        if (val == NULL)
9147
75
            return (NULL);
9148
334k
  if (normalize) {
9149
      /*
9150
       * Sometimes a second normalisation pass for spaces is needed
9151
       * but that only happens if charrefs or entities references
9152
       * have been used in the attribute value, i.e. the attribute
9153
       * value have been extracted in an allocated string already.
9154
       */
9155
4.38k
      if (*alloc) {
9156
2.48k
          const xmlChar *val2;
9157
9158
2.48k
          val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9159
2.48k
    if ((val2 != NULL) && (val2 != val)) {
9160
135
        xmlFree(val);
9161
135
        val = (xmlChar *) val2;
9162
135
    }
9163
2.48k
      }
9164
4.38k
  }
9165
334k
        ctxt->instate = XML_PARSER_CONTENT;
9166
334k
    } else {
9167
506
        xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9168
506
                          "Specification mandates value for attribute %s\n",
9169
506
                          name);
9170
506
        return (name);
9171
506
    }
9172
9173
334k
    if (*prefix == ctxt->str_xml) {
9174
        /*
9175
         * Check that xml:lang conforms to the specification
9176
         * No more registered as an error, just generate a warning now
9177
         * since this was deprecated in XML second edition
9178
         */
9179
5.47k
        if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9180
0
            internal_val = xmlStrndup(val, *len);
9181
0
            if (!xmlCheckLanguageID(internal_val)) {
9182
0
                xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9183
0
                              "Malformed value for xml:lang : %s\n",
9184
0
                              internal_val, NULL);
9185
0
            }
9186
0
        }
9187
9188
        /*
9189
         * Check that xml:space conforms to the specification
9190
         */
9191
5.47k
        if (xmlStrEqual(name, BAD_CAST "space")) {
9192
4.82k
            internal_val = xmlStrndup(val, *len);
9193
4.82k
            if (xmlStrEqual(internal_val, BAD_CAST "default"))
9194
187
                *(ctxt->space) = 0;
9195
4.64k
            else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9196
4.55k
                *(ctxt->space) = 1;
9197
82
            else {
9198
82
                xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9199
82
                              "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9200
82
                              internal_val, NULL);
9201
82
            }
9202
4.82k
        }
9203
5.47k
        if (internal_val) {
9204
4.82k
            xmlFree(internal_val);
9205
4.82k
        }
9206
5.47k
    }
9207
9208
334k
    *value = val;
9209
334k
    return (name);
9210
334k
}
9211
/**
9212
 * xmlParseStartTag2:
9213
 * @ctxt:  an XML parser context
9214
 *
9215
 * Parse a start tag. Always consumes '<'.
9216
 *
9217
 * This routine is called when running SAX2 parsing
9218
 *
9219
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9220
 *
9221
 * [ WFC: Unique Att Spec ]
9222
 * No attribute name may appear more than once in the same start-tag or
9223
 * empty-element tag.
9224
 *
9225
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9226
 *
9227
 * [ WFC: Unique Att Spec ]
9228
 * No attribute name may appear more than once in the same start-tag or
9229
 * empty-element tag.
9230
 *
9231
 * With namespace:
9232
 *
9233
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9234
 *
9235
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9236
 *
9237
 * Returns the element name parsed
9238
 */
9239
9240
static const xmlChar *
9241
xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9242
1.83M
                  const xmlChar **URI, int *tlen) {
9243
1.83M
    const xmlChar *localname;
9244
1.83M
    const xmlChar *prefix;
9245
1.83M
    const xmlChar *attname;
9246
1.83M
    const xmlChar *aprefix;
9247
1.83M
    const xmlChar *nsname;
9248
1.83M
    xmlChar *attvalue;
9249
1.83M
    const xmlChar **atts = ctxt->atts;
9250
1.83M
    int maxatts = ctxt->maxatts;
9251
1.83M
    int nratts, nbatts, nbdef, inputid;
9252
1.83M
    int i, j, nbNs, attval;
9253
1.83M
    size_t cur;
9254
1.83M
    int nsNr = ctxt->nsNr;
9255
9256
1.83M
    if (RAW != '<') return(NULL);
9257
1.83M
    NEXT1;
9258
9259
1.83M
    cur = ctxt->input->cur - ctxt->input->base;
9260
1.83M
    inputid = ctxt->input->id;
9261
1.83M
    nbatts = 0;
9262
1.83M
    nratts = 0;
9263
1.83M
    nbdef = 0;
9264
1.83M
    nbNs = 0;
9265
1.83M
    attval = 0;
9266
    /* Forget any namespaces added during an earlier parse of this element. */
9267
1.83M
    ctxt->nsNr = nsNr;
9268
9269
1.83M
    localname = xmlParseQName(ctxt, &prefix);
9270
1.83M
    if (localname == NULL) {
9271
216
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9272
216
           "StartTag: invalid element name\n");
9273
216
        return(NULL);
9274
216
    }
9275
1.83M
    *tlen = ctxt->input->cur - ctxt->input->base - cur;
9276
9277
    /*
9278
     * Now parse the attributes, it ends up with the ending
9279
     *
9280
     * (S Attribute)* S?
9281
     */
9282
1.83M
    SKIP_BLANKS;
9283
1.83M
    GROW;
9284
9285
2.03M
    while (((RAW != '>') &&
9286
2.03M
     ((RAW != '/') || (NXT(1) != '>')) &&
9287
2.03M
     (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9288
335k
  int len = -1, alloc = 0;
9289
9290
335k
  attname = xmlParseAttribute2(ctxt, prefix, localname,
9291
335k
                               &aprefix, &attvalue, &len, &alloc);
9292
335k
        if (attname == NULL) {
9293
625
      xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9294
625
           "xmlParseStartTag: problem parsing attributes\n");
9295
625
      break;
9296
625
  }
9297
334k
        if (attvalue == NULL)
9298
506
            goto next_attr;
9299
334k
  if (len < 0) len = xmlStrlen(attvalue);
9300
9301
334k
        if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9302
32.5k
            const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9303
32.5k
            xmlURIPtr uri;
9304
9305
32.5k
            if (URL == NULL) {
9306
0
                xmlErrMemory(ctxt, "dictionary allocation failure");
9307
0
                if ((attvalue != NULL) && (alloc != 0))
9308
0
                    xmlFree(attvalue);
9309
0
                localname = NULL;
9310
0
                goto done;
9311
0
            }
9312
32.5k
            if (*URL != 0) {
9313
32.2k
                uri = xmlParseURI((const char *) URL);
9314
32.2k
                if (uri == NULL) {
9315
17.4k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9316
17.4k
                             "xmlns: '%s' is not a valid URI\n",
9317
17.4k
                                       URL, NULL, NULL);
9318
17.4k
                } else {
9319
14.8k
                    if (uri->scheme == NULL) {
9320
6.13k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9321
6.13k
                                  "xmlns: URI %s is not absolute\n",
9322
6.13k
                                  URL, NULL, NULL);
9323
6.13k
                    }
9324
14.8k
                    xmlFreeURI(uri);
9325
14.8k
                }
9326
32.2k
                if (URL == ctxt->str_xml_ns) {
9327
194
                    if (attname != ctxt->str_xml) {
9328
194
                        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9329
194
                     "xml namespace URI cannot be the default namespace\n",
9330
194
                                 NULL, NULL, NULL);
9331
194
                    }
9332
194
                    goto next_attr;
9333
194
                }
9334
32.1k
                if ((len == 29) &&
9335
32.1k
                    (xmlStrEqual(URL,
9336
2.14k
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9337
138
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9338
138
                         "reuse of the xmlns namespace name is forbidden\n",
9339
138
                             NULL, NULL, NULL);
9340
138
                    goto next_attr;
9341
138
                }
9342
32.1k
            }
9343
            /*
9344
             * check that it's not a defined namespace
9345
             */
9346
45.0k
            for (j = 1;j <= nbNs;j++)
9347
34.2k
                if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9348
21.3k
                    break;
9349
32.2k
            if (j <= nbNs)
9350
21.3k
                xmlErrAttributeDup(ctxt, NULL, attname);
9351
10.8k
            else
9352
10.8k
                if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9353
9354
301k
        } else if (aprefix == ctxt->str_xmlns) {
9355
19.3k
            const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9356
19.3k
            xmlURIPtr uri;
9357
9358
19.3k
            if (attname == ctxt->str_xml) {
9359
445
                if (URL != ctxt->str_xml_ns) {
9360
274
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9361
274
                             "xml namespace prefix mapped to wrong URI\n",
9362
274
                             NULL, NULL, NULL);
9363
274
                }
9364
                /*
9365
                 * Do not keep a namespace definition node
9366
                 */
9367
445
                goto next_attr;
9368
445
            }
9369
18.9k
            if (URL == ctxt->str_xml_ns) {
9370
112
                if (attname != ctxt->str_xml) {
9371
112
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9372
112
                             "xml namespace URI mapped to wrong prefix\n",
9373
112
                             NULL, NULL, NULL);
9374
112
                }
9375
112
                goto next_attr;
9376
112
            }
9377
18.8k
            if (attname == ctxt->str_xmlns) {
9378
2.37k
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9379
2.37k
                         "redefinition of the xmlns prefix is forbidden\n",
9380
2.37k
                         NULL, NULL, NULL);
9381
2.37k
                goto next_attr;
9382
2.37k
            }
9383
16.4k
            if ((len == 29) &&
9384
16.4k
                (xmlStrEqual(URL,
9385
905
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9386
244
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9387
244
                         "reuse of the xmlns namespace name is forbidden\n",
9388
244
                         NULL, NULL, NULL);
9389
244
                goto next_attr;
9390
244
            }
9391
16.1k
            if ((URL == NULL) || (URL[0] == 0)) {
9392
632
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9393
632
                         "xmlns:%s: Empty XML namespace is not allowed\n",
9394
632
                              attname, NULL, NULL);
9395
632
                goto next_attr;
9396
15.5k
            } else {
9397
15.5k
                uri = xmlParseURI((const char *) URL);
9398
15.5k
                if (uri == NULL) {
9399
6.38k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9400
6.38k
                         "xmlns:%s: '%s' is not a valid URI\n",
9401
6.38k
                                       attname, URL, NULL);
9402
9.17k
                } else {
9403
9.17k
                    if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9404
0
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9405
0
                                  "xmlns:%s: URI %s is not absolute\n",
9406
0
                                  attname, URL, NULL);
9407
0
                    }
9408
9.17k
                    xmlFreeURI(uri);
9409
9.17k
                }
9410
15.5k
            }
9411
9412
            /*
9413
             * check that it's not a defined namespace
9414
             */
9415
31.2k
            for (j = 1;j <= nbNs;j++)
9416
25.9k
                if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9417
10.1k
                    break;
9418
15.5k
            if (j <= nbNs)
9419
10.1k
                xmlErrAttributeDup(ctxt, aprefix, attname);
9420
5.36k
            else
9421
5.36k
                if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9422
9423
282k
        } else {
9424
            /*
9425
             * Add the pair to atts
9426
             */
9427
282k
            if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9428
2.97k
                if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9429
0
                    goto next_attr;
9430
0
                }
9431
2.97k
                maxatts = ctxt->maxatts;
9432
2.97k
                atts = ctxt->atts;
9433
2.97k
            }
9434
282k
            ctxt->attallocs[nratts++] = alloc;
9435
282k
            atts[nbatts++] = attname;
9436
282k
            atts[nbatts++] = aprefix;
9437
            /*
9438
             * The namespace URI field is used temporarily to point at the
9439
             * base of the current input buffer for non-alloced attributes.
9440
             * When the input buffer is reallocated, all the pointers become
9441
             * invalid, but they can be reconstructed later.
9442
             */
9443
282k
            if (alloc)
9444
7.10k
                atts[nbatts++] = NULL;
9445
274k
            else
9446
274k
                atts[nbatts++] = ctxt->input->base;
9447
282k
            atts[nbatts++] = attvalue;
9448
282k
            attvalue += len;
9449
282k
            atts[nbatts++] = attvalue;
9450
            /*
9451
             * tag if some deallocation is needed
9452
             */
9453
282k
            if (alloc != 0) attval = 1;
9454
282k
            attvalue = NULL; /* moved into atts */
9455
282k
        }
9456
9457
334k
next_attr:
9458
334k
        if ((attvalue != NULL) && (alloc != 0)) {
9459
12.6k
            xmlFree(attvalue);
9460
12.6k
            attvalue = NULL;
9461
12.6k
        }
9462
9463
334k
  GROW
9464
334k
        if (ctxt->instate == XML_PARSER_EOF)
9465
0
            break;
9466
334k
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9467
122k
      break;
9468
211k
  if (SKIP_BLANKS == 0) {
9469
3.34k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9470
3.34k
         "attributes construct error\n");
9471
3.34k
      break;
9472
3.34k
  }
9473
208k
        GROW;
9474
208k
    }
9475
9476
1.83M
    if (ctxt->input->id != inputid) {
9477
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9478
0
                    "Unexpected change of input\n");
9479
0
        localname = NULL;
9480
0
        goto done;
9481
0
    }
9482
9483
    /* Reconstruct attribute value pointers. */
9484
2.11M
    for (i = 0, j = 0; j < nratts; i += 5, j++) {
9485
282k
        if (atts[i+2] != NULL) {
9486
            /*
9487
             * Arithmetic on dangling pointers is technically undefined
9488
             * behavior, but well...
9489
             */
9490
274k
            const xmlChar *old = atts[i+2];
9491
274k
            atts[i+2]  = NULL;    /* Reset repurposed namespace URI */
9492
274k
            atts[i+3] = ctxt->input->base + (atts[i+3] - old);  /* value */
9493
274k
            atts[i+4] = ctxt->input->base + (atts[i+4] - old);  /* valuend */
9494
274k
        }
9495
282k
    }
9496
9497
    /*
9498
     * The attributes defaulting
9499
     */
9500
1.83M
    if (ctxt->attsDefault != NULL) {
9501
6.96k
        xmlDefAttrsPtr defaults;
9502
9503
6.96k
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9504
6.96k
  if (defaults != NULL) {
9505
16.5k
      for (i = 0;i < defaults->nbAttrs;i++) {
9506
11.4k
          attname = defaults->values[5 * i];
9507
11.4k
    aprefix = defaults->values[5 * i + 1];
9508
9509
                /*
9510
     * special work for namespaces defaulted defs
9511
     */
9512
11.4k
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9513
        /*
9514
         * check that it's not a defined namespace
9515
         */
9516
3.84k
        for (j = 1;j <= nbNs;j++)
9517
2.43k
            if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9518
480
          break;
9519
1.88k
              if (j <= nbNs) continue;
9520
9521
1.40k
        nsname = xmlGetNamespace(ctxt, NULL);
9522
1.40k
        if (nsname != defaults->values[5 * i + 2]) {
9523
1.03k
      if (nsPush(ctxt, NULL,
9524
1.03k
                 defaults->values[5 * i + 2]) > 0)
9525
753
          nbNs++;
9526
1.03k
        }
9527
9.54k
    } else if (aprefix == ctxt->str_xmlns) {
9528
        /*
9529
         * check that it's not a defined namespace
9530
         */
9531
9.52k
        for (j = 1;j <= nbNs;j++)
9532
4.84k
            if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9533
392
          break;
9534
5.07k
              if (j <= nbNs) continue;
9535
9536
4.67k
        nsname = xmlGetNamespace(ctxt, attname);
9537
4.67k
        if (nsname != defaults->values[5 * i + 2]) {
9538
3.04k
      if (nsPush(ctxt, attname,
9539
3.04k
                 defaults->values[5 * i + 2]) > 0)
9540
2.96k
          nbNs++;
9541
3.04k
        }
9542
4.67k
    } else {
9543
        /*
9544
         * check that it's not a defined attribute
9545
         */
9546
10.8k
        for (j = 0;j < nbatts;j+=5) {
9547
7.09k
      if ((attname == atts[j]) && (aprefix == atts[j+1]))
9548
747
          break;
9549
7.09k
        }
9550
4.46k
        if (j < nbatts) continue;
9551
9552
3.72k
        if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9553
148
      if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9554
0
                            localname = NULL;
9555
0
                            goto done;
9556
0
      }
9557
148
      maxatts = ctxt->maxatts;
9558
148
      atts = ctxt->atts;
9559
148
        }
9560
3.72k
        atts[nbatts++] = attname;
9561
3.72k
        atts[nbatts++] = aprefix;
9562
3.72k
        if (aprefix == NULL)
9563
2.18k
      atts[nbatts++] = NULL;
9564
1.54k
        else
9565
1.54k
            atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9566
3.72k
        atts[nbatts++] = defaults->values[5 * i + 2];
9567
3.72k
        atts[nbatts++] = defaults->values[5 * i + 3];
9568
3.72k
        if ((ctxt->standalone == 1) &&
9569
3.72k
            (defaults->values[5 * i + 4] != NULL)) {
9570
0
      xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9571
0
    "standalone: attribute %s on %s defaulted from external subset\n",
9572
0
                                   attname, localname);
9573
0
        }
9574
3.72k
        nbdef++;
9575
3.72k
    }
9576
11.4k
      }
9577
5.15k
  }
9578
6.96k
    }
9579
9580
    /*
9581
     * The attributes checkings
9582
     */
9583
2.11M
    for (i = 0; i < nbatts;i += 5) {
9584
        /*
9585
  * The default namespace does not apply to attribute names.
9586
  */
9587
285k
  if (atts[i + 1] != NULL) {
9588
9.59k
      nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9589
9.59k
      if (nsname == NULL) {
9590
3.32k
    xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9591
3.32k
        "Namespace prefix %s for %s on %s is not defined\n",
9592
3.32k
        atts[i + 1], atts[i], localname);
9593
3.32k
      }
9594
9.59k
      atts[i + 2] = nsname;
9595
9.59k
  } else
9596
276k
      nsname = NULL;
9597
  /*
9598
   * [ WFC: Unique Att Spec ]
9599
   * No attribute name may appear more than once in the same
9600
   * start-tag or empty-element tag.
9601
   * As extended by the Namespace in XML REC.
9602
   */
9603
815k
        for (j = 0; j < i;j += 5) {
9604
540k
      if (atts[i] == atts[j]) {
9605
12.5k
          if (atts[i+1] == atts[j+1]) {
9606
9.92k
        xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9607
9.92k
        break;
9608
9.92k
    }
9609
2.59k
    if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9610
167
        xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9611
167
           "Namespaced Attribute %s in '%s' redefined\n",
9612
167
           atts[i], nsname, NULL);
9613
167
        break;
9614
167
    }
9615
2.59k
      }
9616
540k
  }
9617
285k
    }
9618
9619
1.83M
    nsname = xmlGetNamespace(ctxt, prefix);
9620
1.83M
    if ((prefix != NULL) && (nsname == NULL)) {
9621
2.78k
  xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9622
2.78k
           "Namespace prefix %s on %s is not defined\n",
9623
2.78k
     prefix, localname, NULL);
9624
2.78k
    }
9625
1.83M
    *pref = prefix;
9626
1.83M
    *URI = nsname;
9627
9628
    /*
9629
     * SAX: Start of Element !
9630
     */
9631
1.83M
    if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9632
1.83M
  (!ctxt->disableSAX)) {
9633
1.82M
  if (nbNs > 0)
9634
9.16k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9635
9.16k
        nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9636
9.16k
        nbatts / 5, nbdef, atts);
9637
1.81M
  else
9638
1.81M
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9639
1.81M
                    nsname, 0, NULL, nbatts / 5, nbdef, atts);
9640
1.82M
    }
9641
9642
1.83M
done:
9643
    /*
9644
     * Free up attribute allocated strings if needed
9645
     */
9646
1.83M
    if (attval != 0) {
9647
18.7k
  for (i = 3,j = 0; j < nratts;i += 5,j++)
9648
14.4k
      if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9649
7.10k
          xmlFree((xmlChar *) atts[i]);
9650
4.30k
    }
9651
9652
1.83M
    return(localname);
9653
1.83M
}
9654
9655
/**
9656
 * xmlParseEndTag2:
9657
 * @ctxt:  an XML parser context
9658
 * @line:  line of the start tag
9659
 * @nsNr:  number of namespaces on the start tag
9660
 *
9661
 * Parse an end tag. Always consumes '</'.
9662
 *
9663
 * [42] ETag ::= '</' Name S? '>'
9664
 *
9665
 * With namespace
9666
 *
9667
 * [NS 9] ETag ::= '</' QName S? '>'
9668
 */
9669
9670
static void
9671
74.9k
xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9672
74.9k
    const xmlChar *name;
9673
9674
74.9k
    GROW;
9675
74.9k
    if ((RAW != '<') || (NXT(1) != '/')) {
9676
0
  xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9677
0
  return;
9678
0
    }
9679
74.9k
    SKIP(2);
9680
9681
74.9k
    if (tag->prefix == NULL)
9682
74.3k
        name = xmlParseNameAndCompare(ctxt, ctxt->name);
9683
609
    else
9684
609
        name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9685
9686
    /*
9687
     * We should definitely be at the ending "S? '>'" part
9688
     */
9689
74.9k
    GROW;
9690
74.9k
    if (ctxt->instate == XML_PARSER_EOF)
9691
0
        return;
9692
74.9k
    SKIP_BLANKS;
9693
74.9k
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9694
159
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9695
159
    } else
9696
74.7k
  NEXT1;
9697
9698
    /*
9699
     * [ WFC: Element Type Match ]
9700
     * The Name in an element's end-tag must match the element type in the
9701
     * start-tag.
9702
     *
9703
     */
9704
74.9k
    if (name != (xmlChar*)1) {
9705
180
        if (name == NULL) name = BAD_CAST "unparsable";
9706
180
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9707
180
         "Opening and ending tag mismatch: %s line %d and %s\n",
9708
180
                    ctxt->name, tag->line, name);
9709
180
    }
9710
9711
    /*
9712
     * SAX: End of Tag
9713
     */
9714
74.9k
    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9715
74.9k
  (!ctxt->disableSAX))
9716
74.7k
  ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9717
74.7k
                                tag->URI);
9718
9719
74.9k
    spacePop(ctxt);
9720
74.9k
    if (tag->nsNr != 0)
9721
140
  nsPop(ctxt, tag->nsNr);
9722
74.9k
}
9723
9724
/**
9725
 * xmlParseCDSect:
9726
 * @ctxt:  an XML parser context
9727
 *
9728
 * DEPRECATED: Internal function, don't use.
9729
 *
9730
 * Parse escaped pure raw content. Always consumes '<!['.
9731
 *
9732
 * [18] CDSect ::= CDStart CData CDEnd
9733
 *
9734
 * [19] CDStart ::= '<![CDATA['
9735
 *
9736
 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9737
 *
9738
 * [21] CDEnd ::= ']]>'
9739
 */
9740
void
9741
0
xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9742
0
    xmlChar *buf = NULL;
9743
0
    int len = 0;
9744
0
    int size = XML_PARSER_BUFFER_SIZE;
9745
0
    int r, rl;
9746
0
    int s, sl;
9747
0
    int cur, l;
9748
0
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9749
0
                    XML_MAX_HUGE_LENGTH :
9750
0
                    XML_MAX_TEXT_LENGTH;
9751
9752
0
    if ((CUR != '<') || (NXT(1) != '!') || (NXT(2) != '['))
9753
0
        return;
9754
0
    SKIP(3);
9755
9756
0
    if (!CMP6(CUR_PTR, 'C', 'D', 'A', 'T', 'A', '['))
9757
0
        return;
9758
0
    SKIP(6);
9759
9760
0
    ctxt->instate = XML_PARSER_CDATA_SECTION;
9761
0
    r = CUR_CHAR(rl);
9762
0
    if (!IS_CHAR(r)) {
9763
0
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9764
0
        goto out;
9765
0
    }
9766
0
    NEXTL(rl);
9767
0
    s = CUR_CHAR(sl);
9768
0
    if (!IS_CHAR(s)) {
9769
0
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9770
0
        goto out;
9771
0
    }
9772
0
    NEXTL(sl);
9773
0
    cur = CUR_CHAR(l);
9774
0
    buf = (xmlChar *) xmlMallocAtomic(size);
9775
0
    if (buf == NULL) {
9776
0
  xmlErrMemory(ctxt, NULL);
9777
0
        goto out;
9778
0
    }
9779
0
    while (IS_CHAR(cur) &&
9780
0
           ((r != ']') || (s != ']') || (cur != '>'))) {
9781
0
  if (len + 5 >= size) {
9782
0
      xmlChar *tmp;
9783
9784
0
      tmp = (xmlChar *) xmlRealloc(buf, size * 2);
9785
0
      if (tmp == NULL) {
9786
0
    xmlErrMemory(ctxt, NULL);
9787
0
                goto out;
9788
0
      }
9789
0
      buf = tmp;
9790
0
      size *= 2;
9791
0
  }
9792
0
  COPY_BUF(rl,buf,len,r);
9793
0
        if (len > maxLength) {
9794
0
            xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9795
0
                           "CData section too big found\n");
9796
0
            goto out;
9797
0
        }
9798
0
  r = s;
9799
0
  rl = sl;
9800
0
  s = cur;
9801
0
  sl = l;
9802
0
  NEXTL(l);
9803
0
  cur = CUR_CHAR(l);
9804
0
    }
9805
0
    buf[len] = 0;
9806
0
    if (ctxt->instate == XML_PARSER_EOF) {
9807
0
        xmlFree(buf);
9808
0
        return;
9809
0
    }
9810
0
    if (cur != '>') {
9811
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9812
0
                       "CData section not finished\n%.50s\n", buf);
9813
0
        goto out;
9814
0
    }
9815
0
    NEXTL(l);
9816
9817
    /*
9818
     * OK the buffer is to be consumed as cdata.
9819
     */
9820
0
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9821
0
  if (ctxt->sax->cdataBlock != NULL)
9822
0
      ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9823
0
  else if (ctxt->sax->characters != NULL)
9824
0
      ctxt->sax->characters(ctxt->userData, buf, len);
9825
0
    }
9826
9827
0
out:
9828
0
    if (ctxt->instate != XML_PARSER_EOF)
9829
0
        ctxt->instate = XML_PARSER_CONTENT;
9830
0
    xmlFree(buf);
9831
0
}
9832
9833
/**
9834
 * xmlParseContentInternal:
9835
 * @ctxt:  an XML parser context
9836
 *
9837
 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9838
 * unexpected EOF to the caller.
9839
 */
9840
9841
static void
9842
0
xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9843
0
    int nameNr = ctxt->nameNr;
9844
9845
0
    GROW;
9846
0
    while ((RAW != 0) &&
9847
0
     (ctxt->instate != XML_PARSER_EOF)) {
9848
0
  const xmlChar *cur = ctxt->input->cur;
9849
9850
  /*
9851
   * First case : a Processing Instruction.
9852
   */
9853
0
  if ((*cur == '<') && (cur[1] == '?')) {
9854
0
      xmlParsePI(ctxt);
9855
0
  }
9856
9857
  /*
9858
   * Second case : a CDSection
9859
   */
9860
  /* 2.6.0 test was *cur not RAW */
9861
0
  else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9862
0
      xmlParseCDSect(ctxt);
9863
0
  }
9864
9865
  /*
9866
   * Third case :  a comment
9867
   */
9868
0
  else if ((*cur == '<') && (NXT(1) == '!') &&
9869
0
     (NXT(2) == '-') && (NXT(3) == '-')) {
9870
0
      xmlParseComment(ctxt);
9871
0
      ctxt->instate = XML_PARSER_CONTENT;
9872
0
  }
9873
9874
  /*
9875
   * Fourth case :  a sub-element.
9876
   */
9877
0
  else if (*cur == '<') {
9878
0
            if (NXT(1) == '/') {
9879
0
                if (ctxt->nameNr <= nameNr)
9880
0
                    break;
9881
0
          xmlParseElementEnd(ctxt);
9882
0
            } else {
9883
0
          xmlParseElementStart(ctxt);
9884
0
            }
9885
0
  }
9886
9887
  /*
9888
   * Fifth case : a reference. If if has not been resolved,
9889
   *    parsing returns it's Name, create the node
9890
   */
9891
9892
0
  else if (*cur == '&') {
9893
0
      xmlParseReference(ctxt);
9894
0
  }
9895
9896
  /*
9897
   * Last case, text. Note that References are handled directly.
9898
   */
9899
0
  else {
9900
0
      xmlParseCharDataInternal(ctxt, 0);
9901
0
  }
9902
9903
0
  SHRINK;
9904
0
  GROW;
9905
0
    }
9906
0
}
9907
9908
/**
9909
 * xmlParseContent:
9910
 * @ctxt:  an XML parser context
9911
 *
9912
 * Parse a content sequence. Stops at EOF or '</'.
9913
 *
9914
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9915
 */
9916
9917
void
9918
0
xmlParseContent(xmlParserCtxtPtr ctxt) {
9919
0
    int nameNr = ctxt->nameNr;
9920
9921
0
    xmlParseContentInternal(ctxt);
9922
9923
0
    if ((ctxt->instate != XML_PARSER_EOF) && (ctxt->nameNr > nameNr)) {
9924
0
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9925
0
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9926
0
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9927
0
                "Premature end of data in tag %s line %d\n",
9928
0
    name, line, NULL);
9929
0
    }
9930
0
}
9931
9932
/**
9933
 * xmlParseElement:
9934
 * @ctxt:  an XML parser context
9935
 *
9936
 * DEPRECATED: Internal function, don't use.
9937
 *
9938
 * parse an XML element
9939
 *
9940
 * [39] element ::= EmptyElemTag | STag content ETag
9941
 *
9942
 * [ WFC: Element Type Match ]
9943
 * The Name in an element's end-tag must match the element type in the
9944
 * start-tag.
9945
 *
9946
 */
9947
9948
void
9949
0
xmlParseElement(xmlParserCtxtPtr ctxt) {
9950
0
    if (xmlParseElementStart(ctxt) != 0)
9951
0
        return;
9952
9953
0
    xmlParseContentInternal(ctxt);
9954
0
    if (ctxt->instate == XML_PARSER_EOF)
9955
0
  return;
9956
9957
0
    if (CUR == 0) {
9958
0
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9959
0
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9960
0
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9961
0
                "Premature end of data in tag %s line %d\n",
9962
0
    name, line, NULL);
9963
0
        return;
9964
0
    }
9965
9966
0
    xmlParseElementEnd(ctxt);
9967
0
}
9968
9969
/**
9970
 * xmlParseElementStart:
9971
 * @ctxt:  an XML parser context
9972
 *
9973
 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
9974
 * opening tag was parsed, 1 if an empty element was parsed.
9975
 *
9976
 * Always consumes '<'.
9977
 */
9978
static int
9979
0
xmlParseElementStart(xmlParserCtxtPtr ctxt) {
9980
0
    const xmlChar *name;
9981
0
    const xmlChar *prefix = NULL;
9982
0
    const xmlChar *URI = NULL;
9983
0
    xmlParserNodeInfo node_info;
9984
0
    int line, tlen = 0;
9985
0
    xmlNodePtr cur;
9986
0
    int nsNr = ctxt->nsNr;
9987
9988
0
    if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9989
0
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9990
0
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9991
0
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9992
0
        xmlParserMaxDepth);
9993
0
  xmlHaltParser(ctxt);
9994
0
  return(-1);
9995
0
    }
9996
9997
    /* Capture start position */
9998
0
    if (ctxt->record_info) {
9999
0
        node_info.begin_pos = ctxt->input->consumed +
10000
0
                          (CUR_PTR - ctxt->input->base);
10001
0
  node_info.begin_line = ctxt->input->line;
10002
0
    }
10003
10004
0
    if (ctxt->spaceNr == 0)
10005
0
  spacePush(ctxt, -1);
10006
0
    else if (*ctxt->space == -2)
10007
0
  spacePush(ctxt, -1);
10008
0
    else
10009
0
  spacePush(ctxt, *ctxt->space);
10010
10011
0
    line = ctxt->input->line;
10012
0
#ifdef LIBXML_SAX1_ENABLED
10013
0
    if (ctxt->sax2)
10014
0
#endif /* LIBXML_SAX1_ENABLED */
10015
0
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
10016
0
#ifdef LIBXML_SAX1_ENABLED
10017
0
    else
10018
0
  name = xmlParseStartTag(ctxt);
10019
0
#endif /* LIBXML_SAX1_ENABLED */
10020
0
    if (ctxt->instate == XML_PARSER_EOF)
10021
0
  return(-1);
10022
0
    if (name == NULL) {
10023
0
  spacePop(ctxt);
10024
0
        return(-1);
10025
0
    }
10026
0
    nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
10027
0
    cur = ctxt->node;
10028
10029
0
#ifdef LIBXML_VALID_ENABLED
10030
    /*
10031
     * [ VC: Root Element Type ]
10032
     * The Name in the document type declaration must match the element
10033
     * type of the root element.
10034
     */
10035
0
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10036
0
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
10037
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10038
0
#endif /* LIBXML_VALID_ENABLED */
10039
10040
    /*
10041
     * Check for an Empty Element.
10042
     */
10043
0
    if ((RAW == '/') && (NXT(1) == '>')) {
10044
0
        SKIP(2);
10045
0
  if (ctxt->sax2) {
10046
0
      if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10047
0
    (!ctxt->disableSAX))
10048
0
    ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
10049
0
#ifdef LIBXML_SAX1_ENABLED
10050
0
  } else {
10051
0
      if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10052
0
    (!ctxt->disableSAX))
10053
0
    ctxt->sax->endElement(ctxt->userData, name);
10054
0
#endif /* LIBXML_SAX1_ENABLED */
10055
0
  }
10056
0
  namePop(ctxt);
10057
0
  spacePop(ctxt);
10058
0
  if (nsNr != ctxt->nsNr)
10059
0
      nsPop(ctxt, ctxt->nsNr - nsNr);
10060
0
  if (cur != NULL && ctxt->record_info) {
10061
0
            node_info.node = cur;
10062
0
            node_info.end_pos = ctxt->input->consumed +
10063
0
                                (CUR_PTR - ctxt->input->base);
10064
0
            node_info.end_line = ctxt->input->line;
10065
0
            xmlParserAddNodeInfo(ctxt, &node_info);
10066
0
  }
10067
0
  return(1);
10068
0
    }
10069
0
    if (RAW == '>') {
10070
0
        NEXT1;
10071
0
        if (cur != NULL && ctxt->record_info) {
10072
0
            node_info.node = cur;
10073
0
            node_info.end_pos = 0;
10074
0
            node_info.end_line = 0;
10075
0
            xmlParserAddNodeInfo(ctxt, &node_info);
10076
0
        }
10077
0
    } else {
10078
0
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10079
0
         "Couldn't find end of Start Tag %s line %d\n",
10080
0
                    name, line, NULL);
10081
10082
  /*
10083
   * end of parsing of this node.
10084
   */
10085
0
  nodePop(ctxt);
10086
0
  namePop(ctxt);
10087
0
  spacePop(ctxt);
10088
0
  if (nsNr != ctxt->nsNr)
10089
0
      nsPop(ctxt, ctxt->nsNr - nsNr);
10090
0
  return(-1);
10091
0
    }
10092
10093
0
    return(0);
10094
0
}
10095
10096
/**
10097
 * xmlParseElementEnd:
10098
 * @ctxt:  an XML parser context
10099
 *
10100
 * Parse the end of an XML element. Always consumes '</'.
10101
 */
10102
static void
10103
0
xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
10104
0
    xmlNodePtr cur = ctxt->node;
10105
10106
0
    if (ctxt->nameNr <= 0) {
10107
0
        if ((RAW == '<') && (NXT(1) == '/'))
10108
0
            SKIP(2);
10109
0
        return;
10110
0
    }
10111
10112
    /*
10113
     * parse the end of tag: '</' should be here.
10114
     */
10115
0
    if (ctxt->sax2) {
10116
0
  xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
10117
0
  namePop(ctxt);
10118
0
    }
10119
0
#ifdef LIBXML_SAX1_ENABLED
10120
0
    else
10121
0
  xmlParseEndTag1(ctxt, 0);
10122
0
#endif /* LIBXML_SAX1_ENABLED */
10123
10124
    /*
10125
     * Capture end position
10126
     */
10127
0
    if (cur != NULL && ctxt->record_info) {
10128
0
        xmlParserNodeInfoPtr node_info;
10129
10130
0
        node_info = (xmlParserNodeInfoPtr) xmlParserFindNodeInfo(ctxt, cur);
10131
0
        if (node_info != NULL) {
10132
0
            node_info->end_pos = ctxt->input->consumed +
10133
0
                                 (CUR_PTR - ctxt->input->base);
10134
0
            node_info->end_line = ctxt->input->line;
10135
0
        }
10136
0
    }
10137
0
}
10138
10139
/**
10140
 * xmlParseVersionNum:
10141
 * @ctxt:  an XML parser context
10142
 *
10143
 * DEPRECATED: Internal function, don't use.
10144
 *
10145
 * parse the XML version value.
10146
 *
10147
 * [26] VersionNum ::= '1.' [0-9]+
10148
 *
10149
 * In practice allow [0-9].[0-9]+ at that level
10150
 *
10151
 * Returns the string giving the XML version number, or NULL
10152
 */
10153
xmlChar *
10154
1.64k
xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10155
1.64k
    xmlChar *buf = NULL;
10156
1.64k
    int len = 0;
10157
1.64k
    int size = 10;
10158
1.64k
    xmlChar cur;
10159
10160
1.64k
    buf = (xmlChar *) xmlMallocAtomic(size);
10161
1.64k
    if (buf == NULL) {
10162
0
  xmlErrMemory(ctxt, NULL);
10163
0
  return(NULL);
10164
0
    }
10165
1.64k
    cur = CUR;
10166
1.64k
    if (!((cur >= '0') && (cur <= '9'))) {
10167
13
  xmlFree(buf);
10168
13
  return(NULL);
10169
13
    }
10170
1.63k
    buf[len++] = cur;
10171
1.63k
    NEXT;
10172
1.63k
    cur=CUR;
10173
1.63k
    if (cur != '.') {
10174
5
  xmlFree(buf);
10175
5
  return(NULL);
10176
5
    }
10177
1.62k
    buf[len++] = cur;
10178
1.62k
    NEXT;
10179
1.62k
    cur=CUR;
10180
881k
    while ((cur >= '0') && (cur <= '9')) {
10181
880k
  if (len + 1 >= size) {
10182
324
      xmlChar *tmp;
10183
10184
324
      size *= 2;
10185
324
      tmp = (xmlChar *) xmlRealloc(buf, size);
10186
324
      if (tmp == NULL) {
10187
0
          xmlFree(buf);
10188
0
    xmlErrMemory(ctxt, NULL);
10189
0
    return(NULL);
10190
0
      }
10191
324
      buf = tmp;
10192
324
  }
10193
880k
  buf[len++] = cur;
10194
880k
  NEXT;
10195
880k
  cur=CUR;
10196
880k
    }
10197
1.62k
    buf[len] = 0;
10198
1.62k
    return(buf);
10199
1.62k
}
10200
10201
/**
10202
 * xmlParseVersionInfo:
10203
 * @ctxt:  an XML parser context
10204
 *
10205
 * DEPRECATED: Internal function, don't use.
10206
 *
10207
 * parse the XML version.
10208
 *
10209
 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10210
 *
10211
 * [25] Eq ::= S? '=' S?
10212
 *
10213
 * Returns the version string, e.g. "1.0"
10214
 */
10215
10216
xmlChar *
10217
2.23k
xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10218
2.23k
    xmlChar *version = NULL;
10219
10220
2.23k
    if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10221
1.69k
  SKIP(7);
10222
1.69k
  SKIP_BLANKS;
10223
1.69k
  if (RAW != '=') {
10224
20
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10225
20
      return(NULL);
10226
20
        }
10227
1.67k
  NEXT;
10228
1.67k
  SKIP_BLANKS;
10229
1.67k
  if (RAW == '"') {
10230
1.59k
      NEXT;
10231
1.59k
      version = xmlParseVersionNum(ctxt);
10232
1.59k
      if (RAW != '"') {
10233
31
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10234
31
      } else
10235
1.56k
          NEXT;
10236
1.59k
  } else if (RAW == '\''){
10237
51
      NEXT;
10238
51
      version = xmlParseVersionNum(ctxt);
10239
51
      if (RAW != '\'') {
10240
11
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10241
11
      } else
10242
40
          NEXT;
10243
51
  } else {
10244
33
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10245
33
  }
10246
1.67k
    }
10247
2.21k
    return(version);
10248
2.23k
}
10249
10250
/**
10251
 * xmlParseEncName:
10252
 * @ctxt:  an XML parser context
10253
 *
10254
 * DEPRECATED: Internal function, don't use.
10255
 *
10256
 * parse the XML encoding name
10257
 *
10258
 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10259
 *
10260
 * Returns the encoding name value or NULL
10261
 */
10262
xmlChar *
10263
1.95k
xmlParseEncName(xmlParserCtxtPtr ctxt) {
10264
1.95k
    xmlChar *buf = NULL;
10265
1.95k
    int len = 0;
10266
1.95k
    int size = 10;
10267
1.95k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
10268
1.95k
                    XML_MAX_TEXT_LENGTH :
10269
1.95k
                    XML_MAX_NAME_LENGTH;
10270
1.95k
    xmlChar cur;
10271
10272
1.95k
    cur = CUR;
10273
1.95k
    if (((cur >= 'a') && (cur <= 'z')) ||
10274
1.95k
        ((cur >= 'A') && (cur <= 'Z'))) {
10275
1.94k
  buf = (xmlChar *) xmlMallocAtomic(size);
10276
1.94k
  if (buf == NULL) {
10277
0
      xmlErrMemory(ctxt, NULL);
10278
0
      return(NULL);
10279
0
  }
10280
10281
1.94k
  buf[len++] = cur;
10282
1.94k
  NEXT;
10283
1.94k
  cur = CUR;
10284
1.23M
  while (((cur >= 'a') && (cur <= 'z')) ||
10285
1.23M
         ((cur >= 'A') && (cur <= 'Z')) ||
10286
1.23M
         ((cur >= '0') && (cur <= '9')) ||
10287
1.23M
         (cur == '.') || (cur == '_') ||
10288
1.23M
         (cur == '-')) {
10289
1.23M
      if (len + 1 >= size) {
10290
397
          xmlChar *tmp;
10291
10292
397
    size *= 2;
10293
397
    tmp = (xmlChar *) xmlRealloc(buf, size);
10294
397
    if (tmp == NULL) {
10295
0
        xmlErrMemory(ctxt, NULL);
10296
0
        xmlFree(buf);
10297
0
        return(NULL);
10298
0
    }
10299
397
    buf = tmp;
10300
397
      }
10301
1.23M
      buf[len++] = cur;
10302
1.23M
            if (len > maxLength) {
10303
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "EncName");
10304
0
                xmlFree(buf);
10305
0
                return(NULL);
10306
0
            }
10307
1.23M
      NEXT;
10308
1.23M
      cur = CUR;
10309
1.23M
        }
10310
1.94k
  buf[len] = 0;
10311
1.94k
    } else {
10312
5
  xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10313
5
    }
10314
1.95k
    return(buf);
10315
1.95k
}
10316
10317
/**
10318
 * xmlParseEncodingDecl:
10319
 * @ctxt:  an XML parser context
10320
 *
10321
 * DEPRECATED: Internal function, don't use.
10322
 *
10323
 * parse the XML encoding declaration
10324
 *
10325
 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'")
10326
 *
10327
 * this setups the conversion filters.
10328
 *
10329
 * Returns the encoding value or NULL
10330
 */
10331
10332
const xmlChar *
10333
2.21k
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10334
2.21k
    xmlChar *encoding = NULL;
10335
10336
2.21k
    SKIP_BLANKS;
10337
2.21k
    if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10338
1.96k
  SKIP(8);
10339
1.96k
  SKIP_BLANKS;
10340
1.96k
  if (RAW != '=') {
10341
8
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10342
8
      return(NULL);
10343
8
        }
10344
1.95k
  NEXT;
10345
1.95k
  SKIP_BLANKS;
10346
1.95k
  if (RAW == '"') {
10347
1.89k
      NEXT;
10348
1.89k
      encoding = xmlParseEncName(ctxt);
10349
1.89k
      if (RAW != '"') {
10350
65
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10351
65
    xmlFree((xmlChar *) encoding);
10352
65
    return(NULL);
10353
65
      } else
10354
1.82k
          NEXT;
10355
1.89k
  } else if (RAW == '\''){
10356
57
      NEXT;
10357
57
      encoding = xmlParseEncName(ctxt);
10358
57
      if (RAW != '\'') {
10359
4
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10360
4
    xmlFree((xmlChar *) encoding);
10361
4
    return(NULL);
10362
4
      } else
10363
53
          NEXT;
10364
57
  } else {
10365
5
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10366
5
  }
10367
10368
        /*
10369
         * Non standard parsing, allowing the user to ignore encoding
10370
         */
10371
1.88k
        if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10372
0
      xmlFree((xmlChar *) encoding);
10373
0
            return(NULL);
10374
0
  }
10375
10376
  /*
10377
   * UTF-16 encoding switch has already taken place at this stage,
10378
   * more over the little-endian/big-endian selection is already done
10379
   */
10380
1.88k
        if ((encoding != NULL) &&
10381
1.88k
      ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10382
1.88k
       (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10383
      /*
10384
       * If no encoding was passed to the parser, that we are
10385
       * using UTF-16 and no decoder is present i.e. the
10386
       * document is apparently UTF-8 compatible, then raise an
10387
       * encoding mismatch fatal error
10388
       */
10389
1
      if ((ctxt->encoding == NULL) &&
10390
1
          (ctxt->input->buf != NULL) &&
10391
1
          (ctxt->input->buf->encoder == NULL)) {
10392
1
    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10393
1
      "Document labelled UTF-16 but has UTF-8 content\n");
10394
1
      }
10395
1
      if (ctxt->encoding != NULL)
10396
0
    xmlFree((xmlChar *) ctxt->encoding);
10397
1
      ctxt->encoding = encoding;
10398
1
  }
10399
  /*
10400
   * UTF-8 encoding is handled natively
10401
   */
10402
1.88k
        else if ((encoding != NULL) &&
10403
1.88k
      ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10404
1.88k
       (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10405
            /* TODO: Check for encoding mismatch. */
10406
391
      if (ctxt->encoding != NULL)
10407
0
    xmlFree((xmlChar *) ctxt->encoding);
10408
391
      ctxt->encoding = encoding;
10409
391
  }
10410
1.49k
  else if (encoding != NULL) {
10411
1.49k
      xmlCharEncodingHandlerPtr handler;
10412
10413
1.49k
      if (ctxt->input->encoding != NULL)
10414
0
    xmlFree((xmlChar *) ctxt->input->encoding);
10415
1.49k
      ctxt->input->encoding = encoding;
10416
10417
1.49k
            handler = xmlFindCharEncodingHandler((const char *) encoding);
10418
1.49k
      if (handler != NULL) {
10419
1.21k
    if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10420
        /* failed to convert */
10421
4
        ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10422
4
        return(NULL);
10423
4
    }
10424
1.21k
      } else {
10425
277
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10426
277
      "Unsupported encoding %s\n", encoding);
10427
277
    return(NULL);
10428
277
      }
10429
1.49k
  }
10430
1.88k
    }
10431
1.85k
    return(encoding);
10432
2.21k
}
10433
10434
/**
10435
 * xmlParseSDDecl:
10436
 * @ctxt:  an XML parser context
10437
 *
10438
 * DEPRECATED: Internal function, don't use.
10439
 *
10440
 * parse the XML standalone declaration
10441
 *
10442
 * [32] SDDecl ::= S 'standalone' Eq
10443
 *                 (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10444
 *
10445
 * [ VC: Standalone Document Declaration ]
10446
 * TODO The standalone document declaration must have the value "no"
10447
 * if any external markup declarations contain declarations of:
10448
 *  - attributes with default values, if elements to which these
10449
 *    attributes apply appear in the document without specifications
10450
 *    of values for these attributes, or
10451
 *  - entities (other than amp, lt, gt, apos, quot), if references
10452
 *    to those entities appear in the document, or
10453
 *  - attributes with values subject to normalization, where the
10454
 *    attribute appears in the document with a value which will change
10455
 *    as a result of normalization, or
10456
 *  - element types with element content, if white space occurs directly
10457
 *    within any instance of those types.
10458
 *
10459
 * Returns:
10460
 *   1 if standalone="yes"
10461
 *   0 if standalone="no"
10462
 *  -2 if standalone attribute is missing or invalid
10463
 *    (A standalone value of -2 means that the XML declaration was found,
10464
 *     but no value was specified for the standalone attribute).
10465
 */
10466
10467
int
10468
865
xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10469
865
    int standalone = -2;
10470
10471
865
    SKIP_BLANKS;
10472
865
    if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10473
87
  SKIP(10);
10474
87
        SKIP_BLANKS;
10475
87
  if (RAW != '=') {
10476
1
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10477
1
      return(standalone);
10478
1
        }
10479
86
  NEXT;
10480
86
  SKIP_BLANKS;
10481
86
        if (RAW == '\''){
10482
11
      NEXT;
10483
11
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10484
1
          standalone = 0;
10485
1
                SKIP(2);
10486
10
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10487
10
                 (NXT(2) == 's')) {
10488
2
          standalone = 1;
10489
2
    SKIP(3);
10490
8
            } else {
10491
8
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10492
8
      }
10493
11
      if (RAW != '\'') {
10494
10
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10495
10
      } else
10496
1
          NEXT;
10497
75
  } else if (RAW == '"'){
10498
65
      NEXT;
10499
65
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10500
3
          standalone = 0;
10501
3
    SKIP(2);
10502
62
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10503
62
                 (NXT(2) == 's')) {
10504
47
          standalone = 1;
10505
47
                SKIP(3);
10506
47
            } else {
10507
15
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10508
15
      }
10509
65
      if (RAW != '"') {
10510
17
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10511
17
      } else
10512
48
          NEXT;
10513
65
  } else {
10514
10
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10515
10
        }
10516
86
    }
10517
864
    return(standalone);
10518
865
}
10519
10520
/**
10521
 * xmlParseXMLDecl:
10522
 * @ctxt:  an XML parser context
10523
 *
10524
 * DEPRECATED: Internal function, don't use.
10525
 *
10526
 * parse an XML declaration header
10527
 *
10528
 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10529
 */
10530
10531
void
10532
2.23k
xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10533
2.23k
    xmlChar *version;
10534
10535
    /*
10536
     * This value for standalone indicates that the document has an
10537
     * XML declaration but it does not have a standalone attribute.
10538
     * It will be overwritten later if a standalone attribute is found.
10539
     */
10540
2.23k
    ctxt->input->standalone = -2;
10541
10542
    /*
10543
     * We know that '<?xml' is here.
10544
     */
10545
2.23k
    SKIP(5);
10546
10547
2.23k
    if (!IS_BLANK_CH(RAW)) {
10548
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10549
0
                 "Blank needed after '<?xml'\n");
10550
0
    }
10551
2.23k
    SKIP_BLANKS;
10552
10553
    /*
10554
     * We must have the VersionInfo here.
10555
     */
10556
2.23k
    version = xmlParseVersionInfo(ctxt);
10557
2.23k
    if (version == NULL) {
10558
608
  xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10559
1.62k
    } else {
10560
1.62k
  if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10561
      /*
10562
       * Changed here for XML-1.0 5th edition
10563
       */
10564
392
      if (ctxt->options & XML_PARSE_OLD10) {
10565
0
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10566
0
                "Unsupported version '%s'\n",
10567
0
                version);
10568
392
      } else {
10569
392
          if ((version[0] == '1') && ((version[1] == '.'))) {
10570
375
        xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10571
375
                      "Unsupported version '%s'\n",
10572
375
          version, NULL);
10573
375
    } else {
10574
17
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10575
17
              "Unsupported version '%s'\n",
10576
17
              version);
10577
17
    }
10578
392
      }
10579
392
  }
10580
1.62k
  if (ctxt->version != NULL)
10581
0
      xmlFree((void *) ctxt->version);
10582
1.62k
  ctxt->version = version;
10583
1.62k
    }
10584
10585
    /*
10586
     * We may have the encoding declaration
10587
     */
10588
2.23k
    if (!IS_BLANK_CH(RAW)) {
10589
648
        if ((RAW == '?') && (NXT(1) == '>')) {
10590
21
      SKIP(2);
10591
21
      return;
10592
21
  }
10593
627
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10594
627
    }
10595
2.21k
    xmlParseEncodingDecl(ctxt);
10596
2.21k
    if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10597
2.21k
         (ctxt->instate == XML_PARSER_EOF)) {
10598
  /*
10599
   * The XML REC instructs us to stop parsing right here
10600
   */
10601
281
        return;
10602
281
    }
10603
10604
    /*
10605
     * We may have the standalone status.
10606
     */
10607
1.93k
    if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10608
1.16k
        if ((RAW == '?') && (NXT(1) == '>')) {
10609
1.06k
      SKIP(2);
10610
1.06k
      return;
10611
1.06k
  }
10612
101
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10613
101
    }
10614
10615
    /*
10616
     * We can grow the input buffer freely at that point
10617
     */
10618
865
    GROW;
10619
10620
865
    SKIP_BLANKS;
10621
865
    ctxt->input->standalone = xmlParseSDDecl(ctxt);
10622
10623
865
    SKIP_BLANKS;
10624
865
    if ((RAW == '?') && (NXT(1) == '>')) {
10625
435
        SKIP(2);
10626
435
    } else if (RAW == '>') {
10627
        /* Deprecated old WD ... */
10628
9
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10629
9
  NEXT;
10630
421
    } else {
10631
421
        int c;
10632
10633
421
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10634
1.36M
        while ((c = CUR) != 0) {
10635
1.36M
            NEXT;
10636
1.36M
            if (c == '>')
10637
30
                break;
10638
1.36M
        }
10639
421
    }
10640
865
}
10641
10642
/**
10643
 * xmlParseMisc:
10644
 * @ctxt:  an XML parser context
10645
 *
10646
 * DEPRECATED: Internal function, don't use.
10647
 *
10648
 * parse an XML Misc* optional field.
10649
 *
10650
 * [27] Misc ::= Comment | PI |  S
10651
 */
10652
10653
void
10654
0
xmlParseMisc(xmlParserCtxtPtr ctxt) {
10655
0
    while (ctxt->instate != XML_PARSER_EOF) {
10656
0
        SKIP_BLANKS;
10657
0
        GROW;
10658
0
        if ((RAW == '<') && (NXT(1) == '?')) {
10659
0
      xmlParsePI(ctxt);
10660
0
        } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10661
0
      xmlParseComment(ctxt);
10662
0
        } else {
10663
0
            break;
10664
0
        }
10665
0
    }
10666
0
}
10667
10668
/**
10669
 * xmlParseDocument:
10670
 * @ctxt:  an XML parser context
10671
 *
10672
 * parse an XML document (and build a tree if using the standard SAX
10673
 * interface).
10674
 *
10675
 * [1] document ::= prolog element Misc*
10676
 *
10677
 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10678
 *
10679
 * Returns 0, -1 in case of error. the parser context is augmented
10680
 *                as a result of the parsing.
10681
 */
10682
10683
int
10684
0
xmlParseDocument(xmlParserCtxtPtr ctxt) {
10685
0
    xmlChar start[4];
10686
0
    xmlCharEncoding enc;
10687
10688
0
    xmlInitParser();
10689
10690
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
10691
0
        return(-1);
10692
10693
0
    GROW;
10694
10695
    /*
10696
     * SAX: detecting the level.
10697
     */
10698
0
    xmlDetectSAX2(ctxt);
10699
10700
    /*
10701
     * SAX: beginning of the document processing.
10702
     */
10703
0
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10704
0
        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10705
0
    if (ctxt->instate == XML_PARSER_EOF)
10706
0
  return(-1);
10707
10708
0
    if ((ctxt->encoding == NULL) &&
10709
0
        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10710
  /*
10711
   * Get the 4 first bytes and decode the charset
10712
   * if enc != XML_CHAR_ENCODING_NONE
10713
   * plug some encoding conversion routines.
10714
   */
10715
0
  start[0] = RAW;
10716
0
  start[1] = NXT(1);
10717
0
  start[2] = NXT(2);
10718
0
  start[3] = NXT(3);
10719
0
  enc = xmlDetectCharEncoding(&start[0], 4);
10720
0
  if (enc != XML_CHAR_ENCODING_NONE) {
10721
0
      xmlSwitchEncoding(ctxt, enc);
10722
0
  }
10723
0
    }
10724
10725
10726
0
    if (CUR == 0) {
10727
0
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10728
0
  return(-1);
10729
0
    }
10730
10731
0
    GROW;
10732
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10733
10734
  /*
10735
   * Note that we will switch encoding on the fly.
10736
   */
10737
0
  xmlParseXMLDecl(ctxt);
10738
0
  if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10739
0
      (ctxt->instate == XML_PARSER_EOF)) {
10740
      /*
10741
       * The XML REC instructs us to stop parsing right here
10742
       */
10743
0
      return(-1);
10744
0
  }
10745
0
  ctxt->standalone = ctxt->input->standalone;
10746
0
  SKIP_BLANKS;
10747
0
    } else {
10748
0
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10749
0
    }
10750
0
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10751
0
        ctxt->sax->startDocument(ctxt->userData);
10752
0
    if (ctxt->instate == XML_PARSER_EOF)
10753
0
  return(-1);
10754
0
    if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10755
0
        (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10756
0
  ctxt->myDoc->compression = ctxt->input->buf->compressed;
10757
0
    }
10758
10759
    /*
10760
     * The Misc part of the Prolog
10761
     */
10762
0
    xmlParseMisc(ctxt);
10763
10764
    /*
10765
     * Then possibly doc type declaration(s) and more Misc
10766
     * (doctypedecl Misc*)?
10767
     */
10768
0
    GROW;
10769
0
    if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10770
10771
0
  ctxt->inSubset = 1;
10772
0
  xmlParseDocTypeDecl(ctxt);
10773
0
  if (RAW == '[') {
10774
0
      ctxt->instate = XML_PARSER_DTD;
10775
0
      xmlParseInternalSubset(ctxt);
10776
0
      if (ctxt->instate == XML_PARSER_EOF)
10777
0
    return(-1);
10778
0
  }
10779
10780
  /*
10781
   * Create and update the external subset.
10782
   */
10783
0
  ctxt->inSubset = 2;
10784
0
  if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10785
0
      (!ctxt->disableSAX))
10786
0
      ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10787
0
                                ctxt->extSubSystem, ctxt->extSubURI);
10788
0
  if (ctxt->instate == XML_PARSER_EOF)
10789
0
      return(-1);
10790
0
  ctxt->inSubset = 0;
10791
10792
0
        xmlCleanSpecialAttr(ctxt);
10793
10794
0
  ctxt->instate = XML_PARSER_PROLOG;
10795
0
  xmlParseMisc(ctxt);
10796
0
    }
10797
10798
    /*
10799
     * Time to start parsing the tree itself
10800
     */
10801
0
    GROW;
10802
0
    if (RAW != '<') {
10803
0
  xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10804
0
           "Start tag expected, '<' not found\n");
10805
0
    } else {
10806
0
  ctxt->instate = XML_PARSER_CONTENT;
10807
0
  xmlParseElement(ctxt);
10808
0
  ctxt->instate = XML_PARSER_EPILOG;
10809
10810
10811
  /*
10812
   * The Misc part at the end
10813
   */
10814
0
  xmlParseMisc(ctxt);
10815
10816
0
  if (RAW != 0) {
10817
0
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10818
0
  }
10819
0
  ctxt->instate = XML_PARSER_EOF;
10820
0
    }
10821
10822
    /*
10823
     * SAX: end of the document processing.
10824
     */
10825
0
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10826
0
        ctxt->sax->endDocument(ctxt->userData);
10827
10828
    /*
10829
     * Remove locally kept entity definitions if the tree was not built
10830
     */
10831
0
    if ((ctxt->myDoc != NULL) &&
10832
0
  (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10833
0
  xmlFreeDoc(ctxt->myDoc);
10834
0
  ctxt->myDoc = NULL;
10835
0
    }
10836
10837
0
    if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10838
0
        ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10839
0
  if (ctxt->valid)
10840
0
      ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10841
0
  if (ctxt->nsWellFormed)
10842
0
      ctxt->myDoc->properties |= XML_DOC_NSVALID;
10843
0
  if (ctxt->options & XML_PARSE_OLD10)
10844
0
      ctxt->myDoc->properties |= XML_DOC_OLD10;
10845
0
    }
10846
0
    if (! ctxt->wellFormed) {
10847
0
  ctxt->valid = 0;
10848
0
  return(-1);
10849
0
    }
10850
0
    return(0);
10851
0
}
10852
10853
/**
10854
 * xmlParseExtParsedEnt:
10855
 * @ctxt:  an XML parser context
10856
 *
10857
 * parse a general parsed entity
10858
 * An external general parsed entity is well-formed if it matches the
10859
 * production labeled extParsedEnt.
10860
 *
10861
 * [78] extParsedEnt ::= TextDecl? content
10862
 *
10863
 * Returns 0, -1 in case of error. the parser context is augmented
10864
 *                as a result of the parsing.
10865
 */
10866
10867
int
10868
0
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10869
0
    xmlChar start[4];
10870
0
    xmlCharEncoding enc;
10871
10872
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
10873
0
        return(-1);
10874
10875
0
    xmlDetectSAX2(ctxt);
10876
10877
0
    GROW;
10878
10879
    /*
10880
     * SAX: beginning of the document processing.
10881
     */
10882
0
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10883
0
        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10884
10885
    /*
10886
     * Get the 4 first bytes and decode the charset
10887
     * if enc != XML_CHAR_ENCODING_NONE
10888
     * plug some encoding conversion routines.
10889
     */
10890
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10891
0
  start[0] = RAW;
10892
0
  start[1] = NXT(1);
10893
0
  start[2] = NXT(2);
10894
0
  start[3] = NXT(3);
10895
0
  enc = xmlDetectCharEncoding(start, 4);
10896
0
  if (enc != XML_CHAR_ENCODING_NONE) {
10897
0
      xmlSwitchEncoding(ctxt, enc);
10898
0
  }
10899
0
    }
10900
10901
10902
0
    if (CUR == 0) {
10903
0
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10904
0
    }
10905
10906
    /*
10907
     * Check for the XMLDecl in the Prolog.
10908
     */
10909
0
    GROW;
10910
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10911
10912
  /*
10913
   * Note that we will switch encoding on the fly.
10914
   */
10915
0
  xmlParseXMLDecl(ctxt);
10916
0
  if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10917
      /*
10918
       * The XML REC instructs us to stop parsing right here
10919
       */
10920
0
      return(-1);
10921
0
  }
10922
0
  SKIP_BLANKS;
10923
0
    } else {
10924
0
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10925
0
    }
10926
0
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10927
0
        ctxt->sax->startDocument(ctxt->userData);
10928
0
    if (ctxt->instate == XML_PARSER_EOF)
10929
0
  return(-1);
10930
10931
    /*
10932
     * Doing validity checking on chunk doesn't make sense
10933
     */
10934
0
    ctxt->instate = XML_PARSER_CONTENT;
10935
0
    ctxt->validate = 0;
10936
0
    ctxt->loadsubset = 0;
10937
0
    ctxt->depth = 0;
10938
10939
0
    xmlParseContent(ctxt);
10940
0
    if (ctxt->instate == XML_PARSER_EOF)
10941
0
  return(-1);
10942
10943
0
    if ((RAW == '<') && (NXT(1) == '/')) {
10944
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10945
0
    } else if (RAW != 0) {
10946
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
10947
0
    }
10948
10949
    /*
10950
     * SAX: end of the document processing.
10951
     */
10952
0
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10953
0
        ctxt->sax->endDocument(ctxt->userData);
10954
10955
0
    if (! ctxt->wellFormed) return(-1);
10956
0
    return(0);
10957
0
}
10958
10959
#ifdef LIBXML_PUSH_ENABLED
10960
/************************************************************************
10961
 *                  *
10962
 *    Progressive parsing interfaces        *
10963
 *                  *
10964
 ************************************************************************/
10965
10966
/**
10967
 * xmlParseLookupChar:
10968
 * @ctxt:  an XML parser context
10969
 * @c:  character
10970
 *
10971
 * Check whether the input buffer contains a character.
10972
 */
10973
static int
10974
81.8k
xmlParseLookupChar(xmlParserCtxtPtr ctxt, int c) {
10975
81.8k
    const xmlChar *cur;
10976
10977
81.8k
    if (ctxt->checkIndex == 0) {
10978
79.9k
        cur = ctxt->input->cur + 1;
10979
79.9k
    } else {
10980
1.89k
        cur = ctxt->input->cur + ctxt->checkIndex;
10981
1.89k
    }
10982
10983
81.8k
    if (memchr(cur, c, ctxt->input->end - cur) == NULL) {
10984
1.94k
        size_t index = ctxt->input->end - ctxt->input->cur;
10985
10986
1.94k
        if (index > LONG_MAX) {
10987
0
            ctxt->checkIndex = 0;
10988
0
            return(1);
10989
0
        }
10990
1.94k
        ctxt->checkIndex = index;
10991
1.94k
        return(0);
10992
79.9k
    } else {
10993
79.9k
        ctxt->checkIndex = 0;
10994
79.9k
        return(1);
10995
79.9k
    }
10996
81.8k
}
10997
10998
/**
10999
 * xmlParseLookupString:
11000
 * @ctxt:  an XML parser context
11001
 * @startDelta: delta to apply at the start
11002
 * @str:  string
11003
 * @strLen:  length of string
11004
 *
11005
 * Check whether the input buffer contains a string.
11006
 */
11007
static const xmlChar *
11008
xmlParseLookupString(xmlParserCtxtPtr ctxt, size_t startDelta,
11009
1.04M
                     const char *str, size_t strLen) {
11010
1.04M
    const xmlChar *cur, *term;
11011
11012
1.04M
    if (ctxt->checkIndex == 0) {
11013
1.03M
        cur = ctxt->input->cur + startDelta;
11014
1.03M
    } else {
11015
8.92k
        cur = ctxt->input->cur + ctxt->checkIndex;
11016
8.92k
    }
11017
11018
1.04M
    term = BAD_CAST strstr((const char *) cur, str);
11019
1.04M
    if (term == NULL) {
11020
20.8k
        const xmlChar *end = ctxt->input->end;
11021
20.8k
        size_t index;
11022
11023
        /* Rescan (strLen - 1) characters. */
11024
20.8k
        if ((size_t) (end - cur) < strLen)
11025
373
            end = cur;
11026
20.4k
        else
11027
20.4k
            end -= strLen - 1;
11028
20.8k
        index = end - ctxt->input->cur;
11029
20.8k
        if (index > LONG_MAX) {
11030
0
            ctxt->checkIndex = 0;
11031
0
            return(ctxt->input->end - strLen);
11032
0
        }
11033
20.8k
        ctxt->checkIndex = index;
11034
1.02M
    } else {
11035
1.02M
        ctxt->checkIndex = 0;
11036
1.02M
    }
11037
11038
1.04M
    return(term);
11039
1.04M
}
11040
11041
/**
11042
 * xmlParseLookupCharData:
11043
 * @ctxt:  an XML parser context
11044
 *
11045
 * Check whether the input buffer contains terminated char data.
11046
 */
11047
static int
11048
36.7k
xmlParseLookupCharData(xmlParserCtxtPtr ctxt) {
11049
36.7k
    const xmlChar *cur = ctxt->input->cur + ctxt->checkIndex;
11050
36.7k
    const xmlChar *end = ctxt->input->end;
11051
36.7k
    size_t index;
11052
11053
374k
    while (cur < end) {
11054
373k
        if ((*cur == '<') || (*cur == '&')) {
11055
35.5k
            ctxt->checkIndex = 0;
11056
35.5k
            return(1);
11057
35.5k
        }
11058
337k
        cur++;
11059
337k
    }
11060
11061
1.22k
    index = cur - ctxt->input->cur;
11062
1.22k
    if (index > LONG_MAX) {
11063
0
        ctxt->checkIndex = 0;
11064
0
        return(1);
11065
0
    }
11066
1.22k
    ctxt->checkIndex = index;
11067
1.22k
    return(0);
11068
1.22k
}
11069
11070
/**
11071
 * xmlParseLookupGt:
11072
 * @ctxt:  an XML parser context
11073
 *
11074
 * Check whether there's enough data in the input buffer to finish parsing
11075
 * a start tag. This has to take quotes into account.
11076
 */
11077
static int
11078
1.79M
xmlParseLookupGt(xmlParserCtxtPtr ctxt) {
11079
1.79M
    const xmlChar *cur;
11080
1.79M
    const xmlChar *end = ctxt->input->end;
11081
1.79M
    int state = ctxt->endCheckState;
11082
1.79M
    size_t index;
11083
11084
1.79M
    if (ctxt->checkIndex == 0)
11085
1.77M
        cur = ctxt->input->cur + 1;
11086
20.1k
    else
11087
20.1k
        cur = ctxt->input->cur + ctxt->checkIndex;
11088
11089
780M
    while (cur < end) {
11090
780M
        if (state) {
11091
238M
            if (*cur == state)
11092
299k
                state = 0;
11093
541M
        } else if (*cur == '\'' || *cur == '"') {
11094
299k
            state = *cur;
11095
540M
        } else if (*cur == '>') {
11096
1.77M
            ctxt->checkIndex = 0;
11097
1.77M
            ctxt->endCheckState = 0;
11098
1.77M
            return(1);
11099
1.77M
        }
11100
778M
        cur++;
11101
778M
    }
11102
11103
20.6k
    index = cur - ctxt->input->cur;
11104
20.6k
    if (index > LONG_MAX) {
11105
0
        ctxt->checkIndex = 0;
11106
0
        ctxt->endCheckState = 0;
11107
0
        return(1);
11108
0
    }
11109
20.6k
    ctxt->checkIndex = index;
11110
20.6k
    ctxt->endCheckState = state;
11111
20.6k
    return(0);
11112
20.6k
}
11113
11114
/**
11115
 * xmlParseLookupInternalSubset:
11116
 * @ctxt:  an XML parser context
11117
 *
11118
 * Check whether there's enough data in the input buffer to finish parsing
11119
 * the internal subset.
11120
 */
11121
static int
11122
2.38k
xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt) {
11123
    /*
11124
     * Sorry, but progressive parsing of the internal subset is not
11125
     * supported. We first check that the full content of the internal
11126
     * subset is available and parsing is launched only at that point.
11127
     * Internal subset ends with "']' S? '>'" in an unescaped section and
11128
     * not in a ']]>' sequence which are conditional sections.
11129
     */
11130
2.38k
    const xmlChar *cur, *start;
11131
2.38k
    const xmlChar *end = ctxt->input->end;
11132
2.38k
    int state = ctxt->endCheckState;
11133
2.38k
    size_t index;
11134
11135
2.38k
    if (ctxt->checkIndex == 0) {
11136
222
        cur = ctxt->input->cur + 1;
11137
2.16k
    } else {
11138
2.16k
        cur = ctxt->input->cur + ctxt->checkIndex;
11139
2.16k
    }
11140
2.38k
    start = cur;
11141
11142
81.1M
    while (cur < end) {
11143
81.1M
        if (state == '-') {
11144
1.13M
            if ((*cur == '-') &&
11145
1.13M
                (cur[1] == '-') &&
11146
1.13M
                (cur[2] == '>')) {
11147
10.3k
                state = 0;
11148
10.3k
                cur += 3;
11149
10.3k
                start = cur;
11150
10.3k
                continue;
11151
10.3k
            }
11152
1.13M
        }
11153
79.9M
        else if (state == ']') {
11154
12.9k
            if (*cur == '>') {
11155
53
                ctxt->checkIndex = 0;
11156
53
                ctxt->endCheckState = 0;
11157
53
                return(1);
11158
53
            }
11159
12.8k
            if (IS_BLANK_CH(*cur)) {
11160
1.60k
                state = ' ';
11161
11.2k
            } else if (*cur != ']') {
11162
1.62k
                state = 0;
11163
1.62k
                start = cur;
11164
1.62k
                continue;
11165
1.62k
            }
11166
12.8k
        }
11167
79.9M
        else if (state == ' ') {
11168
23.4k
            if (*cur == '>') {
11169
1
                ctxt->checkIndex = 0;
11170
1
                ctxt->endCheckState = 0;
11171
1
                return(1);
11172
1
            }
11173
23.4k
            if (!IS_BLANK_CH(*cur)) {
11174
1.60k
                state = 0;
11175
1.60k
                start = cur;
11176
1.60k
                continue;
11177
1.60k
            }
11178
23.4k
        }
11179
79.9M
        else if (state != 0) {
11180
51.7M
            if (*cur == state) {
11181
96.7k
                state = 0;
11182
96.7k
                start = cur + 1;
11183
96.7k
            }
11184
51.7M
        }
11185
28.2M
        else if (*cur == '<') {
11186
70.5k
            if ((cur[1] == '!') &&
11187
70.5k
                (cur[2] == '-') &&
11188
70.5k
                (cur[3] == '-')) {
11189
10.3k
                state = '-';
11190
10.3k
                cur += 4;
11191
                /* Don't treat <!--> as comment */
11192
10.3k
                start = cur;
11193
10.3k
                continue;
11194
10.3k
            }
11195
70.5k
        }
11196
28.1M
        else if ((*cur == '"') || (*cur == '\'') || (*cur == ']')) {
11197
100k
            state = *cur;
11198
100k
        }
11199
11200
81.0M
        cur++;
11201
81.0M
    }
11202
11203
    /*
11204
     * Rescan the three last characters to detect "<!--" and "-->"
11205
     * split across chunks.
11206
     */
11207
2.33k
    if ((state == 0) || (state == '-')) {
11208
1.08k
        if (cur - start < 3)
11209
104
            cur = start;
11210
983
        else
11211
983
            cur -= 3;
11212
1.08k
    }
11213
2.33k
    index = cur - ctxt->input->cur;
11214
2.33k
    if (index > LONG_MAX) {
11215
0
        ctxt->checkIndex = 0;
11216
0
        ctxt->endCheckState = 0;
11217
0
        return(1);
11218
0
    }
11219
2.33k
    ctxt->checkIndex = index;
11220
2.33k
    ctxt->endCheckState = state;
11221
2.33k
    return(0);
11222
2.33k
}
11223
11224
/**
11225
 * xmlCheckCdataPush:
11226
 * @cur: pointer to the block of characters
11227
 * @len: length of the block in bytes
11228
 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
11229
 *
11230
 * Check that the block of characters is okay as SCdata content [20]
11231
 *
11232
 * Returns the number of bytes to pass if okay, a negative index where an
11233
 *         UTF-8 error occurred otherwise
11234
 */
11235
static int
11236
790k
xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
11237
790k
    int ix;
11238
790k
    unsigned char c;
11239
790k
    int codepoint;
11240
11241
790k
    if ((utf == NULL) || (len <= 0))
11242
263
        return(0);
11243
11244
4.60M
    for (ix = 0; ix < len;) {      /* string is 0-terminated */
11245
3.81M
        c = utf[ix];
11246
3.81M
        if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11247
2.83M
      if (c >= 0x20)
11248
1.99M
    ix++;
11249
843k
      else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11250
843k
          ix++;
11251
62
      else
11252
62
          return(-ix);
11253
2.83M
  } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11254
206k
      if (ix + 2 > len) return(complete ? -ix : ix);
11255
205k
      if ((utf[ix+1] & 0xc0 ) != 0x80)
11256
7
          return(-ix);
11257
205k
      codepoint = (utf[ix] & 0x1f) << 6;
11258
205k
      codepoint |= utf[ix+1] & 0x3f;
11259
205k
      if (!xmlIsCharQ(codepoint))
11260
2
          return(-ix);
11261
205k
      ix += 2;
11262
769k
  } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11263
743k
      if (ix + 3 > len) return(complete ? -ix : ix);
11264
742k
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11265
742k
          ((utf[ix+2] & 0xc0) != 0x80))
11266
6
        return(-ix);
11267
742k
      codepoint = (utf[ix] & 0xf) << 12;
11268
742k
      codepoint |= (utf[ix+1] & 0x3f) << 6;
11269
742k
      codepoint |= utf[ix+2] & 0x3f;
11270
742k
      if (!xmlIsCharQ(codepoint))
11271
9
          return(-ix);
11272
741k
      ix += 3;
11273
741k
  } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11274
26.1k
      if (ix + 4 > len) return(complete ? -ix : ix);
11275
25.8k
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11276
25.8k
          ((utf[ix+2] & 0xc0) != 0x80) ||
11277
25.8k
    ((utf[ix+3] & 0xc0) != 0x80))
11278
13
        return(-ix);
11279
25.8k
      codepoint = (utf[ix] & 0x7) << 18;
11280
25.8k
      codepoint |= (utf[ix+1] & 0x3f) << 12;
11281
25.8k
      codepoint |= (utf[ix+2] & 0x3f) << 6;
11282
25.8k
      codepoint |= utf[ix+3] & 0x3f;
11283
25.8k
      if (!xmlIsCharQ(codepoint))
11284
23
          return(-ix);
11285
25.8k
      ix += 4;
11286
25.8k
  } else       /* unknown encoding */
11287
32
      return(-ix);
11288
3.81M
      }
11289
788k
      return(ix);
11290
789k
}
11291
11292
/**
11293
 * xmlParseTryOrFinish:
11294
 * @ctxt:  an XML parser context
11295
 * @terminate:  last chunk indicator
11296
 *
11297
 * Try to progress on parsing
11298
 *
11299
 * Returns zero if no parsing was possible
11300
 */
11301
static int
11302
52.0k
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11303
52.0k
    int ret = 0;
11304
52.0k
    int tlen;
11305
52.0k
    size_t avail;
11306
52.0k
    xmlChar cur, next;
11307
11308
52.0k
    if (ctxt->input == NULL)
11309
0
        return(0);
11310
11311
#ifdef DEBUG_PUSH
11312
    switch (ctxt->instate) {
11313
  case XML_PARSER_EOF:
11314
      xmlGenericError(xmlGenericErrorContext,
11315
        "PP: try EOF\n"); break;
11316
  case XML_PARSER_START:
11317
      xmlGenericError(xmlGenericErrorContext,
11318
        "PP: try START\n"); break;
11319
  case XML_PARSER_MISC:
11320
      xmlGenericError(xmlGenericErrorContext,
11321
        "PP: try MISC\n");break;
11322
  case XML_PARSER_COMMENT:
11323
      xmlGenericError(xmlGenericErrorContext,
11324
        "PP: try COMMENT\n");break;
11325
  case XML_PARSER_PROLOG:
11326
      xmlGenericError(xmlGenericErrorContext,
11327
        "PP: try PROLOG\n");break;
11328
  case XML_PARSER_START_TAG:
11329
      xmlGenericError(xmlGenericErrorContext,
11330
        "PP: try START_TAG\n");break;
11331
  case XML_PARSER_CONTENT:
11332
      xmlGenericError(xmlGenericErrorContext,
11333
        "PP: try CONTENT\n");break;
11334
  case XML_PARSER_CDATA_SECTION:
11335
      xmlGenericError(xmlGenericErrorContext,
11336
        "PP: try CDATA_SECTION\n");break;
11337
  case XML_PARSER_END_TAG:
11338
      xmlGenericError(xmlGenericErrorContext,
11339
        "PP: try END_TAG\n");break;
11340
  case XML_PARSER_ENTITY_DECL:
11341
      xmlGenericError(xmlGenericErrorContext,
11342
        "PP: try ENTITY_DECL\n");break;
11343
  case XML_PARSER_ENTITY_VALUE:
11344
      xmlGenericError(xmlGenericErrorContext,
11345
        "PP: try ENTITY_VALUE\n");break;
11346
  case XML_PARSER_ATTRIBUTE_VALUE:
11347
      xmlGenericError(xmlGenericErrorContext,
11348
        "PP: try ATTRIBUTE_VALUE\n");break;
11349
  case XML_PARSER_DTD:
11350
      xmlGenericError(xmlGenericErrorContext,
11351
        "PP: try DTD\n");break;
11352
  case XML_PARSER_EPILOG:
11353
      xmlGenericError(xmlGenericErrorContext,
11354
        "PP: try EPILOG\n");break;
11355
  case XML_PARSER_PI:
11356
      xmlGenericError(xmlGenericErrorContext,
11357
        "PP: try PI\n");break;
11358
        case XML_PARSER_IGNORE:
11359
            xmlGenericError(xmlGenericErrorContext,
11360
        "PP: try IGNORE\n");break;
11361
    }
11362
#endif
11363
11364
52.0k
    if ((ctxt->input != NULL) &&
11365
52.0k
        (ctxt->input->cur - ctxt->input->base > 4096)) {
11366
9.19k
        xmlParserShrink(ctxt);
11367
9.19k
    }
11368
11369
6.28M
    while (ctxt->instate != XML_PARSER_EOF) {
11370
6.28M
  if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11371
7.53k
      return(0);
11372
11373
6.27M
  if (ctxt->input == NULL) break;
11374
6.27M
  if (ctxt->input->buf != NULL) {
11375
      /*
11376
       * If we are operating on converted input, try to flush
11377
       * remaining chars to avoid them stalling in the non-converted
11378
       * buffer.
11379
       */
11380
6.27M
      if ((ctxt->input->buf->raw != NULL) &&
11381
6.27M
    (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11382
9.31k
                size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11383
9.31k
                                                 ctxt->input);
11384
9.31k
    size_t current = ctxt->input->cur - ctxt->input->base;
11385
11386
9.31k
    xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11387
9.31k
                xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11388
9.31k
                                      base, current);
11389
9.31k
      }
11390
6.27M
  }
11391
6.27M
        avail = ctxt->input->end - ctxt->input->cur;
11392
6.27M
        if (avail < 1)
11393
2.61k
      goto done;
11394
6.27M
        switch (ctxt->instate) {
11395
0
            case XML_PARSER_EOF:
11396
          /*
11397
     * Document parsing is done !
11398
     */
11399
0
          goto done;
11400
28.1k
            case XML_PARSER_START:
11401
28.1k
    if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11402
12.4k
        xmlChar start[4];
11403
12.4k
        xmlCharEncoding enc;
11404
11405
        /*
11406
         * Very first chars read from the document flow.
11407
         */
11408
12.4k
        if (avail < 4)
11409
4
      goto done;
11410
11411
        /*
11412
         * Get the 4 first bytes and decode the charset
11413
         * if enc != XML_CHAR_ENCODING_NONE
11414
         * plug some encoding conversion routines,
11415
         * else xmlSwitchEncoding will set to (default)
11416
         * UTF8.
11417
         */
11418
12.4k
        start[0] = RAW;
11419
12.4k
        start[1] = NXT(1);
11420
12.4k
        start[2] = NXT(2);
11421
12.4k
        start[3] = NXT(3);
11422
12.4k
        enc = xmlDetectCharEncoding(start, 4);
11423
                    /*
11424
                     * We need more bytes to detect EBCDIC code pages.
11425
                     * See xmlDetectEBCDIC.
11426
                     */
11427
12.4k
                    if ((enc == XML_CHAR_ENCODING_EBCDIC) &&
11428
12.4k
                        (!terminate) && (avail < 200))
11429
0
                        goto done;
11430
12.4k
        xmlSwitchEncoding(ctxt, enc);
11431
12.4k
        break;
11432
12.4k
    }
11433
11434
15.7k
    if (avail < 2)
11435
13
        goto done;
11436
15.7k
    cur = ctxt->input->cur[0];
11437
15.7k
    next = ctxt->input->cur[1];
11438
15.7k
    if (cur == 0) {
11439
29
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11440
0
      ctxt->sax->setDocumentLocator(ctxt->userData,
11441
0
                  &xmlDefaultSAXLocator);
11442
29
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11443
29
        xmlHaltParser(ctxt);
11444
#ifdef DEBUG_PUSH
11445
        xmlGenericError(xmlGenericErrorContext,
11446
          "PP: entering EOF\n");
11447
#endif
11448
29
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11449
0
      ctxt->sax->endDocument(ctxt->userData);
11450
29
        goto done;
11451
29
    }
11452
15.6k
          if ((cur == '<') && (next == '?')) {
11453
        /* PI or XML decl */
11454
6.70k
        if (avail < 5) goto done;
11455
6.68k
        if ((!terminate) &&
11456
6.68k
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11457
3.30k
      goto done;
11458
3.38k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11459
0
      ctxt->sax->setDocumentLocator(ctxt->userData,
11460
0
                  &xmlDefaultSAXLocator);
11461
3.38k
        if ((ctxt->input->cur[2] == 'x') &&
11462
3.38k
      (ctxt->input->cur[3] == 'm') &&
11463
3.38k
      (ctxt->input->cur[4] == 'l') &&
11464
3.38k
      (IS_BLANK_CH(ctxt->input->cur[5]))) {
11465
2.23k
      ret += 5;
11466
#ifdef DEBUG_PUSH
11467
      xmlGenericError(xmlGenericErrorContext,
11468
        "PP: Parsing XML Decl\n");
11469
#endif
11470
2.23k
      xmlParseXMLDecl(ctxt);
11471
2.23k
      if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11472
          /*
11473
           * The XML REC instructs us to stop parsing right
11474
           * here
11475
           */
11476
281
          xmlHaltParser(ctxt);
11477
281
          return(0);
11478
281
      }
11479
1.95k
      ctxt->standalone = ctxt->input->standalone;
11480
1.95k
      if ((ctxt->encoding == NULL) &&
11481
1.95k
          (ctxt->input->encoding != NULL))
11482
1.20k
          ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11483
1.95k
      if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11484
1.95k
          (!ctxt->disableSAX))
11485
1.51k
          ctxt->sax->startDocument(ctxt->userData);
11486
1.95k
      ctxt->instate = XML_PARSER_MISC;
11487
#ifdef DEBUG_PUSH
11488
      xmlGenericError(xmlGenericErrorContext,
11489
        "PP: entering MISC\n");
11490
#endif
11491
1.95k
        } else {
11492
1.14k
      ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11493
1.14k
      if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11494
1.14k
          (!ctxt->disableSAX))
11495
1.14k
          ctxt->sax->startDocument(ctxt->userData);
11496
1.14k
      ctxt->instate = XML_PARSER_MISC;
11497
#ifdef DEBUG_PUSH
11498
      xmlGenericError(xmlGenericErrorContext,
11499
        "PP: entering MISC\n");
11500
#endif
11501
1.14k
        }
11502
8.99k
    } else {
11503
8.99k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11504
0
      ctxt->sax->setDocumentLocator(ctxt->userData,
11505
0
                  &xmlDefaultSAXLocator);
11506
8.99k
        ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11507
8.99k
        if (ctxt->version == NULL) {
11508
0
            xmlErrMemory(ctxt, NULL);
11509
0
      break;
11510
0
        }
11511
8.99k
        if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11512
8.99k
            (!ctxt->disableSAX))
11513
8.99k
      ctxt->sax->startDocument(ctxt->userData);
11514
8.99k
        ctxt->instate = XML_PARSER_MISC;
11515
#ifdef DEBUG_PUSH
11516
        xmlGenericError(xmlGenericErrorContext,
11517
          "PP: entering MISC\n");
11518
#endif
11519
8.99k
    }
11520
12.0k
    break;
11521
1.84M
            case XML_PARSER_START_TAG: {
11522
1.84M
          const xmlChar *name;
11523
1.84M
    const xmlChar *prefix = NULL;
11524
1.84M
    const xmlChar *URI = NULL;
11525
1.84M
                int line = ctxt->input->line;
11526
1.84M
    int nsNr = ctxt->nsNr;
11527
11528
1.84M
    if ((avail < 2) && (ctxt->inputNr == 1))
11529
0
        goto done;
11530
1.84M
    cur = ctxt->input->cur[0];
11531
1.84M
          if (cur != '<') {
11532
192
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11533
192
        xmlHaltParser(ctxt);
11534
192
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11535
0
      ctxt->sax->endDocument(ctxt->userData);
11536
192
        goto done;
11537
192
    }
11538
1.84M
    if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11539
16.6k
                    goto done;
11540
1.83M
    if (ctxt->spaceNr == 0)
11541
0
        spacePush(ctxt, -1);
11542
1.83M
    else if (*ctxt->space == -2)
11543
127k
        spacePush(ctxt, -1);
11544
1.70M
    else
11545
1.70M
        spacePush(ctxt, *ctxt->space);
11546
1.83M
#ifdef LIBXML_SAX1_ENABLED
11547
1.83M
    if (ctxt->sax2)
11548
1.83M
#endif /* LIBXML_SAX1_ENABLED */
11549
1.83M
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11550
0
#ifdef LIBXML_SAX1_ENABLED
11551
0
    else
11552
0
        name = xmlParseStartTag(ctxt);
11553
1.83M
#endif /* LIBXML_SAX1_ENABLED */
11554
1.83M
    if (ctxt->instate == XML_PARSER_EOF)
11555
0
        goto done;
11556
1.83M
    if (name == NULL) {
11557
216
        spacePop(ctxt);
11558
216
        xmlHaltParser(ctxt);
11559
216
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11560
0
      ctxt->sax->endDocument(ctxt->userData);
11561
216
        goto done;
11562
216
    }
11563
1.83M
#ifdef LIBXML_VALID_ENABLED
11564
    /*
11565
     * [ VC: Root Element Type ]
11566
     * The Name in the document type declaration must match
11567
     * the element type of the root element.
11568
     */
11569
1.83M
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11570
1.83M
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
11571
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11572
1.83M
#endif /* LIBXML_VALID_ENABLED */
11573
11574
    /*
11575
     * Check for an Empty Element.
11576
     */
11577
1.83M
    if ((RAW == '/') && (NXT(1) == '>')) {
11578
55.2k
        SKIP(2);
11579
11580
55.2k
        if (ctxt->sax2) {
11581
55.2k
      if ((ctxt->sax != NULL) &&
11582
55.2k
          (ctxt->sax->endElementNs != NULL) &&
11583
55.2k
          (!ctxt->disableSAX))
11584
55.2k
          ctxt->sax->endElementNs(ctxt->userData, name,
11585
55.2k
                                  prefix, URI);
11586
55.2k
      if (ctxt->nsNr - nsNr > 0)
11587
1.51k
          nsPop(ctxt, ctxt->nsNr - nsNr);
11588
55.2k
#ifdef LIBXML_SAX1_ENABLED
11589
55.2k
        } else {
11590
0
      if ((ctxt->sax != NULL) &&
11591
0
          (ctxt->sax->endElement != NULL) &&
11592
0
          (!ctxt->disableSAX))
11593
0
          ctxt->sax->endElement(ctxt->userData, name);
11594
0
#endif /* LIBXML_SAX1_ENABLED */
11595
0
        }
11596
55.2k
        if (ctxt->instate == XML_PARSER_EOF)
11597
0
      goto done;
11598
55.2k
        spacePop(ctxt);
11599
55.2k
        if (ctxt->nameNr == 0) {
11600
24
      ctxt->instate = XML_PARSER_EPILOG;
11601
55.1k
        } else {
11602
55.1k
      ctxt->instate = XML_PARSER_CONTENT;
11603
55.1k
        }
11604
55.2k
        break;
11605
55.2k
    }
11606
1.77M
    if (RAW == '>') {
11607
1.77M
        NEXT;
11608
1.77M
    } else {
11609
4.95k
        xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11610
4.95k
           "Couldn't find end of Start Tag %s\n",
11611
4.95k
           name);
11612
4.95k
        nodePop(ctxt);
11613
4.95k
        spacePop(ctxt);
11614
4.95k
    }
11615
1.77M
                nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
11616
11617
1.77M
    ctxt->instate = XML_PARSER_CONTENT;
11618
1.77M
                break;
11619
1.83M
      }
11620
3.46M
            case XML_PARSER_CONTENT: {
11621
3.46M
    if ((avail < 2) && (ctxt->inputNr == 1))
11622
929
        goto done;
11623
3.46M
    cur = ctxt->input->cur[0];
11624
3.46M
    next = ctxt->input->cur[1];
11625
11626
3.46M
    if ((cur == '<') && (next == '/')) {
11627
74.9k
        ctxt->instate = XML_PARSER_END_TAG;
11628
74.9k
        break;
11629
3.39M
          } else if ((cur == '<') && (next == '?')) {
11630
177k
        if ((!terminate) &&
11631
177k
            (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11632
1.15k
      goto done;
11633
176k
        xmlParsePI(ctxt);
11634
176k
        ctxt->instate = XML_PARSER_CONTENT;
11635
3.21M
    } else if ((cur == '<') && (next != '!')) {
11636
1.82M
        ctxt->instate = XML_PARSER_START_TAG;
11637
1.82M
        break;
11638
1.82M
    } else if ((cur == '<') && (next == '!') &&
11639
1.39M
               (ctxt->input->cur[2] == '-') &&
11640
1.39M
         (ctxt->input->cur[3] == '-')) {
11641
38.8k
        if ((!terminate) &&
11642
38.8k
            (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11643
1.66k
      goto done;
11644
37.1k
        xmlParseComment(ctxt);
11645
37.1k
        ctxt->instate = XML_PARSER_CONTENT;
11646
1.35M
    } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11647
1.35M
        (ctxt->input->cur[2] == '[') &&
11648
1.35M
        (ctxt->input->cur[3] == 'C') &&
11649
1.35M
        (ctxt->input->cur[4] == 'D') &&
11650
1.35M
        (ctxt->input->cur[5] == 'A') &&
11651
1.35M
        (ctxt->input->cur[6] == 'T') &&
11652
1.35M
        (ctxt->input->cur[7] == 'A') &&
11653
1.35M
        (ctxt->input->cur[8] == '[')) {
11654
778k
        SKIP(9);
11655
778k
        ctxt->instate = XML_PARSER_CDATA_SECTION;
11656
778k
        break;
11657
778k
    } else if ((cur == '<') && (next == '!') &&
11658
573k
               (avail < 9)) {
11659
1.46k
        goto done;
11660
572k
    } else if (cur == '<') {
11661
46
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11662
46
                    "detected an error in element content\n");
11663
46
                    SKIP(1);
11664
572k
    } else if (cur == '&') {
11665
14.6k
        if ((!terminate) && (!xmlParseLookupChar(ctxt, ';')))
11666
1.35k
      goto done;
11667
13.2k
        xmlParseReference(ctxt);
11668
557k
    } else {
11669
        /* TODO Avoid the extra copy, handle directly !!! */
11670
        /*
11671
         * Goal of the following test is:
11672
         *  - minimize calls to the SAX 'character' callback
11673
         *    when they are mergeable
11674
         *  - handle an problem for isBlank when we only parse
11675
         *    a sequence of blank chars and the next one is
11676
         *    not available to check against '<' presence.
11677
         *  - tries to homogenize the differences in SAX
11678
         *    callbacks between the push and pull versions
11679
         *    of the parser.
11680
         */
11681
557k
        if ((ctxt->inputNr == 1) &&
11682
557k
            (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11683
43.9k
      if ((!terminate) && (!xmlParseLookupCharData(ctxt)))
11684
1.22k
          goto done;
11685
43.9k
                    }
11686
556k
                    ctxt->checkIndex = 0;
11687
556k
        xmlParseCharDataInternal(ctxt, !terminate);
11688
556k
    }
11689
783k
    break;
11690
3.46M
      }
11691
783k
            case XML_PARSER_END_TAG:
11692
75.5k
    if (avail < 2)
11693
0
        goto done;
11694
75.5k
    if ((!terminate) && (!xmlParseLookupChar(ctxt, '>')))
11695
588
        goto done;
11696
74.9k
    if (ctxt->sax2) {
11697
74.9k
              xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11698
74.9k
        nameNsPop(ctxt);
11699
74.9k
    }
11700
0
#ifdef LIBXML_SAX1_ENABLED
11701
0
      else
11702
0
        xmlParseEndTag1(ctxt, 0);
11703
74.9k
#endif /* LIBXML_SAX1_ENABLED */
11704
74.9k
    if (ctxt->instate == XML_PARSER_EOF) {
11705
        /* Nothing */
11706
74.9k
    } else if (ctxt->nameNr == 0) {
11707
514
        ctxt->instate = XML_PARSER_EPILOG;
11708
74.4k
    } else {
11709
74.4k
        ctxt->instate = XML_PARSER_CONTENT;
11710
74.4k
    }
11711
74.9k
    break;
11712
791k
            case XML_PARSER_CDATA_SECTION: {
11713
          /*
11714
     * The Push mode need to have the SAX callback for
11715
     * cdataBlock merge back contiguous callbacks.
11716
     */
11717
791k
    const xmlChar *term;
11718
11719
791k
                if (terminate) {
11720
                    /*
11721
                     * Don't call xmlParseLookupString. If 'terminate'
11722
                     * is set, checkIndex is invalid.
11723
                     */
11724
1.03k
                    term = BAD_CAST strstr((const char *) ctxt->input->cur,
11725
1.03k
                                           "]]>");
11726
790k
                } else {
11727
790k
        term = xmlParseLookupString(ctxt, 0, "]]>", 3);
11728
790k
                }
11729
11730
791k
    if (term == NULL) {
11731
12.7k
        int tmp, size;
11732
11733
12.7k
                    if (terminate) {
11734
                        /* Unfinished CDATA section */
11735
293
                        size = ctxt->input->end - ctxt->input->cur;
11736
12.4k
                    } else {
11737
12.4k
                        if (avail < XML_PARSER_BIG_BUFFER_SIZE + 2)
11738
971
                            goto done;
11739
11.4k
                        ctxt->checkIndex = 0;
11740
                        /* XXX: Why don't we pass the full buffer? */
11741
11.4k
                        size = XML_PARSER_BIG_BUFFER_SIZE;
11742
11.4k
                    }
11743
11.7k
                    tmp = xmlCheckCdataPush(ctxt->input->cur, size, 0);
11744
11.7k
                    if (tmp <= 0) {
11745
138
                        tmp = -tmp;
11746
138
                        ctxt->input->cur += tmp;
11747
138
                        goto encoding_error;
11748
138
                    }
11749
11.6k
                    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11750
11.6k
                        if (ctxt->sax->cdataBlock != NULL)
11751
0
                            ctxt->sax->cdataBlock(ctxt->userData,
11752
0
                                                  ctxt->input->cur, tmp);
11753
11.6k
                        else if (ctxt->sax->characters != NULL)
11754
11.6k
                            ctxt->sax->characters(ctxt->userData,
11755
11.6k
                                                  ctxt->input->cur, tmp);
11756
11.6k
                    }
11757
11.6k
                    if (ctxt->instate == XML_PARSER_EOF)
11758
0
                        goto done;
11759
11.6k
                    SKIPL(tmp);
11760
778k
    } else {
11761
778k
                    int base = term - CUR_PTR;
11762
778k
        int tmp;
11763
11764
778k
        tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11765
778k
        if ((tmp < 0) || (tmp != base)) {
11766
35
      tmp = -tmp;
11767
35
      ctxt->input->cur += tmp;
11768
35
      goto encoding_error;
11769
35
        }
11770
778k
        if ((ctxt->sax != NULL) && (base == 0) &&
11771
778k
            (ctxt->sax->cdataBlock != NULL) &&
11772
778k
            (!ctxt->disableSAX)) {
11773
      /*
11774
       * Special case to provide identical behaviour
11775
       * between pull and push parsers on enpty CDATA
11776
       * sections
11777
       */
11778
0
       if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11779
0
           (!strncmp((const char *)&ctxt->input->cur[-9],
11780
0
                     "<![CDATA[", 9)))
11781
0
           ctxt->sax->cdataBlock(ctxt->userData,
11782
0
                                 BAD_CAST "", 0);
11783
778k
        } else if ((ctxt->sax != NULL) && (base > 0) &&
11784
778k
      (!ctxt->disableSAX)) {
11785
778k
      if (ctxt->sax->cdataBlock != NULL)
11786
0
          ctxt->sax->cdataBlock(ctxt->userData,
11787
0
              ctxt->input->cur, base);
11788
778k
      else if (ctxt->sax->characters != NULL)
11789
778k
          ctxt->sax->characters(ctxt->userData,
11790
778k
              ctxt->input->cur, base);
11791
778k
        }
11792
778k
        if (ctxt->instate == XML_PARSER_EOF)
11793
0
      goto done;
11794
778k
        SKIPL(base + 3);
11795
778k
        ctxt->instate = XML_PARSER_CONTENT;
11796
#ifdef DEBUG_PUSH
11797
        xmlGenericError(xmlGenericErrorContext,
11798
          "PP: entering CONTENT\n");
11799
#endif
11800
778k
    }
11801
790k
    break;
11802
791k
      }
11803
790k
            case XML_PARSER_MISC:
11804
57.8k
            case XML_PARSER_PROLOG:
11805
58.3k
            case XML_PARSER_EPILOG:
11806
58.3k
    SKIP_BLANKS;
11807
58.3k
                avail = ctxt->input->end - ctxt->input->cur;
11808
58.3k
    if (avail < 2)
11809
634
        goto done;
11810
57.6k
    cur = ctxt->input->cur[0];
11811
57.6k
    next = ctxt->input->cur[1];
11812
57.6k
          if ((cur == '<') && (next == '?')) {
11813
5.87k
        if ((!terminate) &&
11814
5.87k
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11815
396
      goto done;
11816
#ifdef DEBUG_PUSH
11817
        xmlGenericError(xmlGenericErrorContext,
11818
          "PP: Parsing PI\n");
11819
#endif
11820
5.47k
        xmlParsePI(ctxt);
11821
5.47k
        if (ctxt->instate == XML_PARSER_EOF)
11822
0
      goto done;
11823
51.8k
    } else if ((cur == '<') && (next == '!') &&
11824
51.8k
        (ctxt->input->cur[2] == '-') &&
11825
51.8k
        (ctxt->input->cur[3] == '-')) {
11826
36.7k
        if ((!terminate) &&
11827
36.7k
                        (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11828
1.85k
      goto done;
11829
#ifdef DEBUG_PUSH
11830
        xmlGenericError(xmlGenericErrorContext,
11831
          "PP: Parsing Comment\n");
11832
#endif
11833
34.9k
        xmlParseComment(ctxt);
11834
34.9k
        if (ctxt->instate == XML_PARSER_EOF)
11835
0
      goto done;
11836
34.9k
    } else if ((ctxt->instate == XML_PARSER_MISC) &&
11837
15.0k
                    (cur == '<') && (next == '!') &&
11838
15.0k
        (ctxt->input->cur[2] == 'D') &&
11839
15.0k
        (ctxt->input->cur[3] == 'O') &&
11840
15.0k
        (ctxt->input->cur[4] == 'C') &&
11841
15.0k
        (ctxt->input->cur[5] == 'T') &&
11842
15.0k
        (ctxt->input->cur[6] == 'Y') &&
11843
15.0k
        (ctxt->input->cur[7] == 'P') &&
11844
15.0k
        (ctxt->input->cur[8] == 'E')) {
11845
7.11k
        if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11846
3.99k
                        goto done;
11847
#ifdef DEBUG_PUSH
11848
        xmlGenericError(xmlGenericErrorContext,
11849
          "PP: Parsing internal subset\n");
11850
#endif
11851
3.12k
        ctxt->inSubset = 1;
11852
3.12k
        xmlParseDocTypeDecl(ctxt);
11853
3.12k
        if (ctxt->instate == XML_PARSER_EOF)
11854
0
      goto done;
11855
3.12k
        if (RAW == '[') {
11856
2.98k
      ctxt->instate = XML_PARSER_DTD;
11857
#ifdef DEBUG_PUSH
11858
      xmlGenericError(xmlGenericErrorContext,
11859
        "PP: entering DTD\n");
11860
#endif
11861
2.98k
        } else {
11862
      /*
11863
       * Create and update the external subset.
11864
       */
11865
133
      ctxt->inSubset = 2;
11866
133
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11867
133
          (ctxt->sax->externalSubset != NULL))
11868
0
          ctxt->sax->externalSubset(ctxt->userData,
11869
0
            ctxt->intSubName, ctxt->extSubSystem,
11870
0
            ctxt->extSubURI);
11871
133
      ctxt->inSubset = 0;
11872
133
      xmlCleanSpecialAttr(ctxt);
11873
133
      ctxt->instate = XML_PARSER_PROLOG;
11874
#ifdef DEBUG_PUSH
11875
      xmlGenericError(xmlGenericErrorContext,
11876
        "PP: entering PROLOG\n");
11877
#endif
11878
133
        }
11879
7.92k
    } else if ((cur == '<') && (next == '!') &&
11880
7.92k
               (avail <
11881
124
                            (ctxt->instate == XML_PARSER_MISC ? 9 : 4))) {
11882
87
        goto done;
11883
7.83k
    } else if (ctxt->instate == XML_PARSER_EPILOG) {
11884
8
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11885
8
        xmlHaltParser(ctxt);
11886
#ifdef DEBUG_PUSH
11887
        xmlGenericError(xmlGenericErrorContext,
11888
          "PP: entering EOF\n");
11889
#endif
11890
8
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11891
0
      ctxt->sax->endDocument(ctxt->userData);
11892
8
        goto done;
11893
7.82k
                } else {
11894
7.82k
        ctxt->instate = XML_PARSER_START_TAG;
11895
#ifdef DEBUG_PUSH
11896
        xmlGenericError(xmlGenericErrorContext,
11897
          "PP: entering START_TAG\n");
11898
#endif
11899
7.82k
    }
11900
51.3k
    break;
11901
51.3k
            case XML_PARSER_DTD: {
11902
5.31k
                if ((!terminate) && (!xmlParseLookupInternalSubset(ctxt)))
11903
2.33k
                    goto done;
11904
2.98k
    xmlParseInternalSubset(ctxt);
11905
2.98k
    if (ctxt->instate == XML_PARSER_EOF)
11906
2.34k
        goto done;
11907
645
    ctxt->inSubset = 2;
11908
645
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11909
645
        (ctxt->sax->externalSubset != NULL))
11910
0
        ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11911
0
          ctxt->extSubSystem, ctxt->extSubURI);
11912
645
    ctxt->inSubset = 0;
11913
645
    xmlCleanSpecialAttr(ctxt);
11914
645
    if (ctxt->instate == XML_PARSER_EOF)
11915
0
        goto done;
11916
645
    ctxt->instate = XML_PARSER_PROLOG;
11917
#ifdef DEBUG_PUSH
11918
    xmlGenericError(xmlGenericErrorContext,
11919
      "PP: entering PROLOG\n");
11920
#endif
11921
645
                break;
11922
645
      }
11923
0
            case XML_PARSER_COMMENT:
11924
0
    xmlGenericError(xmlGenericErrorContext,
11925
0
      "PP: internal error, state == COMMENT\n");
11926
0
    ctxt->instate = XML_PARSER_CONTENT;
11927
#ifdef DEBUG_PUSH
11928
    xmlGenericError(xmlGenericErrorContext,
11929
      "PP: entering CONTENT\n");
11930
#endif
11931
0
    break;
11932
0
            case XML_PARSER_IGNORE:
11933
0
    xmlGenericError(xmlGenericErrorContext,
11934
0
      "PP: internal error, state == IGNORE");
11935
0
          ctxt->instate = XML_PARSER_DTD;
11936
#ifdef DEBUG_PUSH
11937
    xmlGenericError(xmlGenericErrorContext,
11938
      "PP: entering DTD\n");
11939
#endif
11940
0
          break;
11941
0
            case XML_PARSER_PI:
11942
0
    xmlGenericError(xmlGenericErrorContext,
11943
0
      "PP: internal error, state == PI\n");
11944
0
    ctxt->instate = XML_PARSER_CONTENT;
11945
#ifdef DEBUG_PUSH
11946
    xmlGenericError(xmlGenericErrorContext,
11947
      "PP: entering CONTENT\n");
11948
#endif
11949
0
    break;
11950
0
            case XML_PARSER_ENTITY_DECL:
11951
0
    xmlGenericError(xmlGenericErrorContext,
11952
0
      "PP: internal error, state == ENTITY_DECL\n");
11953
0
    ctxt->instate = XML_PARSER_DTD;
11954
#ifdef DEBUG_PUSH
11955
    xmlGenericError(xmlGenericErrorContext,
11956
      "PP: entering DTD\n");
11957
#endif
11958
0
    break;
11959
0
            case XML_PARSER_ENTITY_VALUE:
11960
0
    xmlGenericError(xmlGenericErrorContext,
11961
0
      "PP: internal error, state == ENTITY_VALUE\n");
11962
0
    ctxt->instate = XML_PARSER_CONTENT;
11963
#ifdef DEBUG_PUSH
11964
    xmlGenericError(xmlGenericErrorContext,
11965
      "PP: entering DTD\n");
11966
#endif
11967
0
    break;
11968
0
            case XML_PARSER_ATTRIBUTE_VALUE:
11969
0
    xmlGenericError(xmlGenericErrorContext,
11970
0
      "PP: internal error, state == ATTRIBUTE_VALUE\n");
11971
0
    ctxt->instate = XML_PARSER_START_TAG;
11972
#ifdef DEBUG_PUSH
11973
    xmlGenericError(xmlGenericErrorContext,
11974
      "PP: entering START_TAG\n");
11975
#endif
11976
0
    break;
11977
0
            case XML_PARSER_SYSTEM_LITERAL:
11978
0
    xmlGenericError(xmlGenericErrorContext,
11979
0
      "PP: internal error, state == SYSTEM_LITERAL\n");
11980
0
    ctxt->instate = XML_PARSER_START_TAG;
11981
#ifdef DEBUG_PUSH
11982
    xmlGenericError(xmlGenericErrorContext,
11983
      "PP: entering START_TAG\n");
11984
#endif
11985
0
    break;
11986
0
            case XML_PARSER_PUBLIC_LITERAL:
11987
0
    xmlGenericError(xmlGenericErrorContext,
11988
0
      "PP: internal error, state == PUBLIC_LITERAL\n");
11989
0
    ctxt->instate = XML_PARSER_START_TAG;
11990
#ifdef DEBUG_PUSH
11991
    xmlGenericError(xmlGenericErrorContext,
11992
      "PP: entering START_TAG\n");
11993
#endif
11994
0
    break;
11995
6.27M
  }
11996
6.27M
    }
11997
44.0k
done:
11998
#ifdef DEBUG_PUSH
11999
    xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12000
#endif
12001
44.0k
    return(ret);
12002
173
encoding_error:
12003
173
    if (ctxt->input->end - ctxt->input->cur < 4) {
12004
43
  __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12005
43
         "Input is not proper UTF-8, indicate encoding !\n",
12006
43
         NULL, NULL);
12007
130
    } else {
12008
130
        char buffer[150];
12009
12010
130
  snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12011
130
      ctxt->input->cur[0], ctxt->input->cur[1],
12012
130
      ctxt->input->cur[2], ctxt->input->cur[3]);
12013
130
  __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12014
130
         "Input is not proper UTF-8, indicate encoding !\n%s",
12015
130
         BAD_CAST buffer, NULL);
12016
130
    }
12017
173
    return(0);
12018
52.0k
}
12019
12020
/**
12021
 * xmlParseChunk:
12022
 * @ctxt:  an XML parser context
12023
 * @chunk:  an char array
12024
 * @size:  the size in byte of the chunk
12025
 * @terminate:  last chunk indicator
12026
 *
12027
 * Parse a Chunk of memory
12028
 *
12029
 * Returns zero if no error, the xmlParserErrors otherwise.
12030
 */
12031
int
12032
xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12033
52.5k
              int terminate) {
12034
52.5k
    int end_in_lf = 0;
12035
12036
52.5k
    if (ctxt == NULL)
12037
0
        return(XML_ERR_INTERNAL_ERROR);
12038
52.5k
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12039
484
        return(ctxt->errNo);
12040
52.0k
    if (ctxt->instate == XML_PARSER_EOF)
12041
1
        return(-1);
12042
52.0k
    if (ctxt->input == NULL)
12043
0
        return(-1);
12044
12045
52.0k
    ctxt->progressive = 1;
12046
52.0k
    if (ctxt->instate == XML_PARSER_START)
12047
15.7k
        xmlDetectSAX2(ctxt);
12048
52.0k
    if ((size > 0) && (chunk != NULL) && (!terminate) &&
12049
52.0k
        (chunk[size - 1] == '\r')) {
12050
2.71k
  end_in_lf = 1;
12051
2.71k
  size--;
12052
2.71k
    }
12053
12054
52.0k
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12055
52.0k
        (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF))  {
12056
40.0k
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12057
40.0k
  size_t cur = ctxt->input->cur - ctxt->input->base;
12058
40.0k
  int res;
12059
12060
40.0k
  res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12061
40.0k
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12062
40.0k
  if (res < 0) {
12063
49
      ctxt->errNo = XML_PARSER_EOF;
12064
49
      xmlHaltParser(ctxt);
12065
49
      return (XML_PARSER_EOF);
12066
49
  }
12067
#ifdef DEBUG_PUSH
12068
  xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12069
#endif
12070
12071
40.0k
    } else if (ctxt->instate != XML_PARSER_EOF) {
12072
11.9k
  if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12073
11.9k
      xmlParserInputBufferPtr in = ctxt->input->buf;
12074
11.9k
      if ((in->encoder != NULL) && (in->buffer != NULL) &&
12075
11.9k
        (in->raw != NULL)) {
12076
368
    int nbchars;
12077
368
    size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12078
368
    size_t current = ctxt->input->cur - ctxt->input->base;
12079
12080
368
    nbchars = xmlCharEncInput(in, terminate);
12081
368
    xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
12082
368
    if (nbchars < 0) {
12083
        /* TODO 2.6.0 */
12084
4
        xmlGenericError(xmlGenericErrorContext,
12085
4
            "xmlParseChunk: encoder error\n");
12086
4
                    xmlHaltParser(ctxt);
12087
4
        return(XML_ERR_INVALID_ENCODING);
12088
4
    }
12089
368
      }
12090
11.9k
  }
12091
11.9k
    }
12092
12093
52.0k
    xmlParseTryOrFinish(ctxt, terminate);
12094
52.0k
    if (ctxt->instate == XML_PARSER_EOF)
12095
3.08k
        return(ctxt->errNo);
12096
12097
48.9k
    if ((ctxt->input != NULL) &&
12098
48.9k
         (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12099
48.9k
         ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12100
48.9k
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12101
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12102
0
        xmlHaltParser(ctxt);
12103
0
    }
12104
48.9k
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12105
7.71k
        return(ctxt->errNo);
12106
12107
41.2k
    if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12108
41.2k
        (ctxt->input->buf != NULL)) {
12109
2.70k
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12110
2.70k
           ctxt->input);
12111
2.70k
  size_t current = ctxt->input->cur - ctxt->input->base;
12112
12113
2.70k
  xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12114
12115
2.70k
  xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12116
2.70k
            base, current);
12117
2.70k
    }
12118
41.2k
    if (terminate) {
12119
  /*
12120
   * Check for termination
12121
   */
12122
1.60k
  if ((ctxt->instate != XML_PARSER_EOF) &&
12123
1.60k
      (ctxt->instate != XML_PARSER_EPILOG)) {
12124
1.17k
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12125
1.17k
  }
12126
1.60k
  if ((ctxt->instate == XML_PARSER_EPILOG) &&
12127
1.60k
            (ctxt->input->cur < ctxt->input->end)) {
12128
8
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12129
8
  }
12130
1.60k
  if (ctxt->instate != XML_PARSER_EOF) {
12131
1.60k
      if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12132
0
    ctxt->sax->endDocument(ctxt->userData);
12133
1.60k
  }
12134
1.60k
  ctxt->instate = XML_PARSER_EOF;
12135
1.60k
    }
12136
41.2k
    if (ctxt->wellFormed == 0)
12137
1.17k
  return((xmlParserErrors) ctxt->errNo);
12138
40.0k
    else
12139
40.0k
        return(0);
12140
41.2k
}
12141
12142
/************************************************************************
12143
 *                  *
12144
 *    I/O front end functions to the parser     *
12145
 *                  *
12146
 ************************************************************************/
12147
12148
/**
12149
 * xmlCreatePushParserCtxt:
12150
 * @sax:  a SAX handler
12151
 * @user_data:  The user data returned on SAX callbacks
12152
 * @chunk:  a pointer to an array of chars
12153
 * @size:  number of chars in the array
12154
 * @filename:  an optional file name or URI
12155
 *
12156
 * Create a parser context for using the XML parser in push mode.
12157
 * If @buffer and @size are non-NULL, the data is used to detect
12158
 * the encoding.  The remaining characters will be parsed so they
12159
 * don't need to be fed in again through xmlParseChunk.
12160
 * To allow content encoding detection, @size should be >= 4
12161
 * The value of @filename is used for fetching external entities
12162
 * and error/warning reports.
12163
 *
12164
 * Returns the new parser context or NULL
12165
 */
12166
12167
xmlParserCtxtPtr
12168
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12169
12.4k
                        const char *chunk, int size, const char *filename) {
12170
12.4k
    xmlParserCtxtPtr ctxt;
12171
12.4k
    xmlParserInputPtr inputStream;
12172
12.4k
    xmlParserInputBufferPtr buf;
12173
12174
12.4k
    buf = xmlAllocParserInputBuffer(XML_CHAR_ENCODING_NONE);
12175
12.4k
    if (buf == NULL) return(NULL);
12176
12177
12.4k
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12178
12.4k
    if (ctxt == NULL) {
12179
0
        xmlErrMemory(NULL, "creating parser: out of memory\n");
12180
0
  xmlFreeParserInputBuffer(buf);
12181
0
  return(NULL);
12182
0
    }
12183
12.4k
    ctxt->dictNames = 1;
12184
12.4k
    if (filename == NULL) {
12185
12.4k
  ctxt->directory = NULL;
12186
12.4k
    } else {
12187
0
        ctxt->directory = xmlParserGetDirectory(filename);
12188
0
    }
12189
12190
12.4k
    inputStream = xmlNewInputStream(ctxt);
12191
12.4k
    if (inputStream == NULL) {
12192
0
  xmlFreeParserCtxt(ctxt);
12193
0
  xmlFreeParserInputBuffer(buf);
12194
0
  return(NULL);
12195
0
    }
12196
12197
12.4k
    if (filename == NULL)
12198
12.4k
  inputStream->filename = NULL;
12199
0
    else {
12200
0
  inputStream->filename = (char *)
12201
0
      xmlCanonicPath((const xmlChar *) filename);
12202
0
  if (inputStream->filename == NULL) {
12203
0
            xmlFreeInputStream(inputStream);
12204
0
      xmlFreeParserCtxt(ctxt);
12205
0
      xmlFreeParserInputBuffer(buf);
12206
0
      return(NULL);
12207
0
  }
12208
0
    }
12209
12.4k
    inputStream->buf = buf;
12210
12.4k
    xmlBufResetInput(inputStream->buf->buffer, inputStream);
12211
12.4k
    inputPush(ctxt, inputStream);
12212
12213
    /*
12214
     * If the caller didn't provide an initial 'chunk' for determining
12215
     * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12216
     * that it can be automatically determined later
12217
     */
12218
12.4k
    ctxt->charset = XML_CHAR_ENCODING_NONE;
12219
12220
12.4k
    if ((size != 0) && (chunk != NULL) &&
12221
12.4k
        (ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12222
12.4k
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12223
12.4k
  size_t cur = ctxt->input->cur - ctxt->input->base;
12224
12225
12.4k
  xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12226
12227
12.4k
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12228
#ifdef DEBUG_PUSH
12229
  xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12230
#endif
12231
12.4k
    }
12232
12233
12.4k
    return(ctxt);
12234
12.4k
}
12235
#endif /* LIBXML_PUSH_ENABLED */
12236
12237
/**
12238
 * xmlStopParser:
12239
 * @ctxt:  an XML parser context
12240
 *
12241
 * Blocks further parser processing
12242
 */
12243
void
12244
17
xmlStopParser(xmlParserCtxtPtr ctxt) {
12245
17
    if (ctxt == NULL)
12246
0
        return;
12247
17
    xmlHaltParser(ctxt);
12248
17
    ctxt->errNo = XML_ERR_USER_STOP;
12249
17
}
12250
12251
/**
12252
 * xmlCreateIOParserCtxt:
12253
 * @sax:  a SAX handler
12254
 * @user_data:  The user data returned on SAX callbacks
12255
 * @ioread:  an I/O read function
12256
 * @ioclose:  an I/O close function
12257
 * @ioctx:  an I/O handler
12258
 * @enc:  the charset encoding if known
12259
 *
12260
 * Create a parser context for using the XML parser with an existing
12261
 * I/O stream
12262
 *
12263
 * Returns the new parser context or NULL
12264
 */
12265
xmlParserCtxtPtr
12266
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12267
  xmlInputReadCallback   ioread, xmlInputCloseCallback  ioclose,
12268
0
  void *ioctx, xmlCharEncoding enc) {
12269
0
    xmlParserCtxtPtr ctxt;
12270
0
    xmlParserInputPtr inputStream;
12271
0
    xmlParserInputBufferPtr buf;
12272
12273
0
    if (ioread == NULL) return(NULL);
12274
12275
0
    buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12276
0
    if (buf == NULL) {
12277
0
        if (ioclose != NULL)
12278
0
            ioclose(ioctx);
12279
0
        return (NULL);
12280
0
    }
12281
12282
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12283
0
    if (ctxt == NULL) {
12284
0
  xmlFreeParserInputBuffer(buf);
12285
0
  return(NULL);
12286
0
    }
12287
12288
0
    inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12289
0
    if (inputStream == NULL) {
12290
0
  xmlFreeParserCtxt(ctxt);
12291
0
  return(NULL);
12292
0
    }
12293
0
    inputPush(ctxt, inputStream);
12294
12295
0
    return(ctxt);
12296
0
}
12297
12298
#ifdef LIBXML_VALID_ENABLED
12299
/************************************************************************
12300
 *                  *
12301
 *    Front ends when parsing a DTD       *
12302
 *                  *
12303
 ************************************************************************/
12304
12305
/**
12306
 * xmlIOParseDTD:
12307
 * @sax:  the SAX handler block or NULL
12308
 * @input:  an Input Buffer
12309
 * @enc:  the charset encoding if known
12310
 *
12311
 * Load and parse a DTD
12312
 *
12313
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12314
 * @input will be freed by the function in any case.
12315
 */
12316
12317
xmlDtdPtr
12318
xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12319
0
        xmlCharEncoding enc) {
12320
0
    xmlDtdPtr ret = NULL;
12321
0
    xmlParserCtxtPtr ctxt;
12322
0
    xmlParserInputPtr pinput = NULL;
12323
0
    xmlChar start[4];
12324
12325
0
    if (input == NULL)
12326
0
  return(NULL);
12327
12328
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12329
0
    if (ctxt == NULL) {
12330
0
        xmlFreeParserInputBuffer(input);
12331
0
  return(NULL);
12332
0
    }
12333
12334
    /* We are loading a DTD */
12335
0
    ctxt->options |= XML_PARSE_DTDLOAD;
12336
12337
0
    xmlDetectSAX2(ctxt);
12338
12339
    /*
12340
     * generate a parser input from the I/O handler
12341
     */
12342
12343
0
    pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12344
0
    if (pinput == NULL) {
12345
0
        xmlFreeParserInputBuffer(input);
12346
0
  xmlFreeParserCtxt(ctxt);
12347
0
  return(NULL);
12348
0
    }
12349
12350
    /*
12351
     * plug some encoding conversion routines here.
12352
     */
12353
0
    if (xmlPushInput(ctxt, pinput) < 0) {
12354
0
  xmlFreeParserCtxt(ctxt);
12355
0
  return(NULL);
12356
0
    }
12357
0
    if (enc != XML_CHAR_ENCODING_NONE) {
12358
0
        xmlSwitchEncoding(ctxt, enc);
12359
0
    }
12360
12361
0
    pinput->filename = NULL;
12362
0
    pinput->line = 1;
12363
0
    pinput->col = 1;
12364
0
    pinput->base = ctxt->input->cur;
12365
0
    pinput->cur = ctxt->input->cur;
12366
0
    pinput->free = NULL;
12367
12368
    /*
12369
     * let's parse that entity knowing it's an external subset.
12370
     */
12371
0
    ctxt->inSubset = 2;
12372
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12373
0
    if (ctxt->myDoc == NULL) {
12374
0
  xmlErrMemory(ctxt, "New Doc failed");
12375
0
  return(NULL);
12376
0
    }
12377
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12378
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12379
0
                                 BAD_CAST "none", BAD_CAST "none");
12380
12381
0
    if ((enc == XML_CHAR_ENCODING_NONE) &&
12382
0
        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12383
  /*
12384
   * Get the 4 first bytes and decode the charset
12385
   * if enc != XML_CHAR_ENCODING_NONE
12386
   * plug some encoding conversion routines.
12387
   */
12388
0
  start[0] = RAW;
12389
0
  start[1] = NXT(1);
12390
0
  start[2] = NXT(2);
12391
0
  start[3] = NXT(3);
12392
0
  enc = xmlDetectCharEncoding(start, 4);
12393
0
  if (enc != XML_CHAR_ENCODING_NONE) {
12394
0
      xmlSwitchEncoding(ctxt, enc);
12395
0
  }
12396
0
    }
12397
12398
0
    xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12399
12400
0
    if (ctxt->myDoc != NULL) {
12401
0
  if (ctxt->wellFormed) {
12402
0
      ret = ctxt->myDoc->extSubset;
12403
0
      ctxt->myDoc->extSubset = NULL;
12404
0
      if (ret != NULL) {
12405
0
    xmlNodePtr tmp;
12406
12407
0
    ret->doc = NULL;
12408
0
    tmp = ret->children;
12409
0
    while (tmp != NULL) {
12410
0
        tmp->doc = NULL;
12411
0
        tmp = tmp->next;
12412
0
    }
12413
0
      }
12414
0
  } else {
12415
0
      ret = NULL;
12416
0
  }
12417
0
        xmlFreeDoc(ctxt->myDoc);
12418
0
        ctxt->myDoc = NULL;
12419
0
    }
12420
0
    xmlFreeParserCtxt(ctxt);
12421
12422
0
    return(ret);
12423
0
}
12424
12425
/**
12426
 * xmlSAXParseDTD:
12427
 * @sax:  the SAX handler block
12428
 * @ExternalID:  a NAME* containing the External ID of the DTD
12429
 * @SystemID:  a NAME* containing the URL to the DTD
12430
 *
12431
 * DEPRECATED: Don't use.
12432
 *
12433
 * Load and parse an external subset.
12434
 *
12435
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12436
 */
12437
12438
xmlDtdPtr
12439
xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12440
0
                          const xmlChar *SystemID) {
12441
0
    xmlDtdPtr ret = NULL;
12442
0
    xmlParserCtxtPtr ctxt;
12443
0
    xmlParserInputPtr input = NULL;
12444
0
    xmlCharEncoding enc;
12445
0
    xmlChar* systemIdCanonic;
12446
12447
0
    if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12448
12449
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12450
0
    if (ctxt == NULL) {
12451
0
  return(NULL);
12452
0
    }
12453
12454
    /* We are loading a DTD */
12455
0
    ctxt->options |= XML_PARSE_DTDLOAD;
12456
12457
    /*
12458
     * Canonicalise the system ID
12459
     */
12460
0
    systemIdCanonic = xmlCanonicPath(SystemID);
12461
0
    if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12462
0
  xmlFreeParserCtxt(ctxt);
12463
0
  return(NULL);
12464
0
    }
12465
12466
    /*
12467
     * Ask the Entity resolver to load the damn thing
12468
     */
12469
12470
0
    if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12471
0
  input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12472
0
                                   systemIdCanonic);
12473
0
    if (input == NULL) {
12474
0
  xmlFreeParserCtxt(ctxt);
12475
0
  if (systemIdCanonic != NULL)
12476
0
      xmlFree(systemIdCanonic);
12477
0
  return(NULL);
12478
0
    }
12479
12480
    /*
12481
     * plug some encoding conversion routines here.
12482
     */
12483
0
    if (xmlPushInput(ctxt, input) < 0) {
12484
0
  xmlFreeParserCtxt(ctxt);
12485
0
  if (systemIdCanonic != NULL)
12486
0
      xmlFree(systemIdCanonic);
12487
0
  return(NULL);
12488
0
    }
12489
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12490
0
  enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12491
0
  xmlSwitchEncoding(ctxt, enc);
12492
0
    }
12493
12494
0
    if (input->filename == NULL)
12495
0
  input->filename = (char *) systemIdCanonic;
12496
0
    else
12497
0
  xmlFree(systemIdCanonic);
12498
0
    input->line = 1;
12499
0
    input->col = 1;
12500
0
    input->base = ctxt->input->cur;
12501
0
    input->cur = ctxt->input->cur;
12502
0
    input->free = NULL;
12503
12504
    /*
12505
     * let's parse that entity knowing it's an external subset.
12506
     */
12507
0
    ctxt->inSubset = 2;
12508
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12509
0
    if (ctxt->myDoc == NULL) {
12510
0
  xmlErrMemory(ctxt, "New Doc failed");
12511
0
  xmlFreeParserCtxt(ctxt);
12512
0
  return(NULL);
12513
0
    }
12514
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12515
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12516
0
                                 ExternalID, SystemID);
12517
0
    xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12518
12519
0
    if (ctxt->myDoc != NULL) {
12520
0
  if (ctxt->wellFormed) {
12521
0
      ret = ctxt->myDoc->extSubset;
12522
0
      ctxt->myDoc->extSubset = NULL;
12523
0
      if (ret != NULL) {
12524
0
    xmlNodePtr tmp;
12525
12526
0
    ret->doc = NULL;
12527
0
    tmp = ret->children;
12528
0
    while (tmp != NULL) {
12529
0
        tmp->doc = NULL;
12530
0
        tmp = tmp->next;
12531
0
    }
12532
0
      }
12533
0
  } else {
12534
0
      ret = NULL;
12535
0
  }
12536
0
        xmlFreeDoc(ctxt->myDoc);
12537
0
        ctxt->myDoc = NULL;
12538
0
    }
12539
0
    xmlFreeParserCtxt(ctxt);
12540
12541
0
    return(ret);
12542
0
}
12543
12544
12545
/**
12546
 * xmlParseDTD:
12547
 * @ExternalID:  a NAME* containing the External ID of the DTD
12548
 * @SystemID:  a NAME* containing the URL to the DTD
12549
 *
12550
 * Load and parse an external subset.
12551
 *
12552
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12553
 */
12554
12555
xmlDtdPtr
12556
0
xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12557
0
    return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12558
0
}
12559
#endif /* LIBXML_VALID_ENABLED */
12560
12561
/************************************************************************
12562
 *                  *
12563
 *    Front ends when parsing an Entity     *
12564
 *                  *
12565
 ************************************************************************/
12566
12567
/**
12568
 * xmlParseCtxtExternalEntity:
12569
 * @ctx:  the existing parsing context
12570
 * @URL:  the URL for the entity to load
12571
 * @ID:  the System ID for the entity to load
12572
 * @lst:  the return value for the set of parsed nodes
12573
 *
12574
 * Parse an external general entity within an existing parsing context
12575
 * An external general parsed entity is well-formed if it matches the
12576
 * production labeled extParsedEnt.
12577
 *
12578
 * [78] extParsedEnt ::= TextDecl? content
12579
 *
12580
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12581
 *    the parser error code otherwise
12582
 */
12583
12584
int
12585
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12586
0
                 const xmlChar *ID, xmlNodePtr *lst) {
12587
0
    void *userData;
12588
12589
0
    if (ctx == NULL) return(-1);
12590
    /*
12591
     * If the user provided their own SAX callbacks, then reuse the
12592
     * userData callback field, otherwise the expected setup in a
12593
     * DOM builder is to have userData == ctxt
12594
     */
12595
0
    if (ctx->userData == ctx)
12596
0
        userData = NULL;
12597
0
    else
12598
0
        userData = ctx->userData;
12599
0
    return xmlParseExternalEntityPrivate(ctx->myDoc, ctx, ctx->sax,
12600
0
                                         userData, ctx->depth + 1,
12601
0
                                         URL, ID, lst);
12602
0
}
12603
12604
/**
12605
 * xmlParseExternalEntityPrivate:
12606
 * @doc:  the document the chunk pertains to
12607
 * @oldctxt:  the previous parser context if available
12608
 * @sax:  the SAX handler block (possibly NULL)
12609
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12610
 * @depth:  Used for loop detection, use 0
12611
 * @URL:  the URL for the entity to load
12612
 * @ID:  the System ID for the entity to load
12613
 * @list:  the return value for the set of parsed nodes
12614
 *
12615
 * Private version of xmlParseExternalEntity()
12616
 *
12617
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12618
 *    the parser error code otherwise
12619
 */
12620
12621
static xmlParserErrors
12622
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12623
                xmlSAXHandlerPtr sax,
12624
          void *user_data, int depth, const xmlChar *URL,
12625
0
          const xmlChar *ID, xmlNodePtr *list) {
12626
0
    xmlParserCtxtPtr ctxt;
12627
0
    xmlDocPtr newDoc;
12628
0
    xmlNodePtr newRoot;
12629
0
    xmlParserErrors ret = XML_ERR_OK;
12630
0
    xmlChar start[4];
12631
0
    xmlCharEncoding enc;
12632
12633
0
    if (((depth > 40) &&
12634
0
  ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12635
0
  (depth > 100)) {
12636
0
  xmlFatalErrMsg(oldctxt, XML_ERR_ENTITY_LOOP,
12637
0
                       "Maximum entity nesting depth exceeded");
12638
0
        return(XML_ERR_ENTITY_LOOP);
12639
0
    }
12640
12641
0
    if (list != NULL)
12642
0
        *list = NULL;
12643
0
    if ((URL == NULL) && (ID == NULL))
12644
0
  return(XML_ERR_INTERNAL_ERROR);
12645
0
    if (doc == NULL)
12646
0
  return(XML_ERR_INTERNAL_ERROR);
12647
12648
0
    ctxt = xmlCreateEntityParserCtxtInternal(sax, user_data, URL, ID, NULL,
12649
0
                                             oldctxt);
12650
0
    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
12651
0
    if (oldctxt != NULL) {
12652
0
        ctxt->nbErrors = oldctxt->nbErrors;
12653
0
        ctxt->nbWarnings = oldctxt->nbWarnings;
12654
0
    }
12655
0
    xmlDetectSAX2(ctxt);
12656
12657
0
    newDoc = xmlNewDoc(BAD_CAST "1.0");
12658
0
    if (newDoc == NULL) {
12659
0
  xmlFreeParserCtxt(ctxt);
12660
0
  return(XML_ERR_INTERNAL_ERROR);
12661
0
    }
12662
0
    newDoc->properties = XML_DOC_INTERNAL;
12663
0
    if (doc) {
12664
0
        newDoc->intSubset = doc->intSubset;
12665
0
        newDoc->extSubset = doc->extSubset;
12666
0
        if (doc->dict) {
12667
0
            newDoc->dict = doc->dict;
12668
0
            xmlDictReference(newDoc->dict);
12669
0
        }
12670
0
        if (doc->URL != NULL) {
12671
0
            newDoc->URL = xmlStrdup(doc->URL);
12672
0
        }
12673
0
    }
12674
0
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12675
0
    if (newRoot == NULL) {
12676
0
  if (sax != NULL)
12677
0
  xmlFreeParserCtxt(ctxt);
12678
0
  newDoc->intSubset = NULL;
12679
0
  newDoc->extSubset = NULL;
12680
0
        xmlFreeDoc(newDoc);
12681
0
  return(XML_ERR_INTERNAL_ERROR);
12682
0
    }
12683
0
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
12684
0
    nodePush(ctxt, newDoc->children);
12685
0
    if (doc == NULL) {
12686
0
        ctxt->myDoc = newDoc;
12687
0
    } else {
12688
0
        ctxt->myDoc = doc;
12689
0
        newRoot->doc = doc;
12690
0
    }
12691
12692
    /*
12693
     * Get the 4 first bytes and decode the charset
12694
     * if enc != XML_CHAR_ENCODING_NONE
12695
     * plug some encoding conversion routines.
12696
     */
12697
0
    GROW;
12698
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12699
0
  start[0] = RAW;
12700
0
  start[1] = NXT(1);
12701
0
  start[2] = NXT(2);
12702
0
  start[3] = NXT(3);
12703
0
  enc = xmlDetectCharEncoding(start, 4);
12704
0
  if (enc != XML_CHAR_ENCODING_NONE) {
12705
0
      xmlSwitchEncoding(ctxt, enc);
12706
0
  }
12707
0
    }
12708
12709
    /*
12710
     * Parse a possible text declaration first
12711
     */
12712
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12713
0
  xmlParseTextDecl(ctxt);
12714
        /*
12715
         * An XML-1.0 document can't reference an entity not XML-1.0
12716
         */
12717
0
        if ((xmlStrEqual(oldctxt->version, BAD_CAST "1.0")) &&
12718
0
            (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12719
0
            xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12720
0
                           "Version mismatch between document and entity\n");
12721
0
        }
12722
0
    }
12723
12724
0
    ctxt->instate = XML_PARSER_CONTENT;
12725
0
    ctxt->depth = depth;
12726
0
    if (oldctxt != NULL) {
12727
0
  ctxt->_private = oldctxt->_private;
12728
0
  ctxt->loadsubset = oldctxt->loadsubset;
12729
0
  ctxt->validate = oldctxt->validate;
12730
0
  ctxt->valid = oldctxt->valid;
12731
0
  ctxt->replaceEntities = oldctxt->replaceEntities;
12732
0
        if (oldctxt->validate) {
12733
0
            ctxt->vctxt.error = oldctxt->vctxt.error;
12734
0
            ctxt->vctxt.warning = oldctxt->vctxt.warning;
12735
0
            ctxt->vctxt.userData = oldctxt->vctxt.userData;
12736
0
            ctxt->vctxt.flags = oldctxt->vctxt.flags;
12737
0
        }
12738
0
  ctxt->external = oldctxt->external;
12739
0
        if (ctxt->dict) xmlDictFree(ctxt->dict);
12740
0
        ctxt->dict = oldctxt->dict;
12741
0
        ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12742
0
        ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12743
0
        ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12744
0
        ctxt->dictNames = oldctxt->dictNames;
12745
0
        ctxt->attsDefault = oldctxt->attsDefault;
12746
0
        ctxt->attsSpecial = oldctxt->attsSpecial;
12747
0
        ctxt->linenumbers = oldctxt->linenumbers;
12748
0
  ctxt->record_info = oldctxt->record_info;
12749
0
  ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
12750
0
  ctxt->node_seq.length = oldctxt->node_seq.length;
12751
0
  ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
12752
0
    } else {
12753
  /*
12754
   * Doing validity checking on chunk without context
12755
   * doesn't make sense
12756
   */
12757
0
  ctxt->_private = NULL;
12758
0
  ctxt->validate = 0;
12759
0
  ctxt->external = 2;
12760
0
  ctxt->loadsubset = 0;
12761
0
    }
12762
12763
0
    xmlParseContent(ctxt);
12764
12765
0
    if ((RAW == '<') && (NXT(1) == '/')) {
12766
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12767
0
    } else if (RAW != 0) {
12768
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12769
0
    }
12770
0
    if (ctxt->node != newDoc->children) {
12771
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12772
0
    }
12773
12774
0
    if (!ctxt->wellFormed) {
12775
0
  ret = (xmlParserErrors)ctxt->errNo;
12776
0
        if (oldctxt != NULL) {
12777
0
            oldctxt->errNo = ctxt->errNo;
12778
0
            oldctxt->wellFormed = 0;
12779
0
            xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12780
0
        }
12781
0
    } else {
12782
0
  if (list != NULL) {
12783
0
      xmlNodePtr cur;
12784
12785
      /*
12786
       * Return the newly created nodeset after unlinking it from
12787
       * they pseudo parent.
12788
       */
12789
0
      cur = newDoc->children->children;
12790
0
      *list = cur;
12791
0
      while (cur != NULL) {
12792
0
    cur->parent = NULL;
12793
0
    cur = cur->next;
12794
0
      }
12795
0
            newDoc->children->children = NULL;
12796
0
  }
12797
0
  ret = XML_ERR_OK;
12798
0
    }
12799
12800
    /*
12801
     * Also record the size of the entity parsed
12802
     */
12803
0
    if (ctxt->input != NULL && oldctxt != NULL) {
12804
0
        unsigned long consumed = ctxt->input->consumed;
12805
12806
0
        xmlSaturatedAddSizeT(&consumed, ctxt->input->cur - ctxt->input->base);
12807
12808
0
        xmlSaturatedAdd(&oldctxt->sizeentities, consumed);
12809
0
        xmlSaturatedAdd(&oldctxt->sizeentities, ctxt->sizeentities);
12810
12811
0
        xmlSaturatedAdd(&oldctxt->sizeentcopy, consumed);
12812
0
        xmlSaturatedAdd(&oldctxt->sizeentcopy, ctxt->sizeentcopy);
12813
0
    }
12814
12815
0
    if (oldctxt != NULL) {
12816
0
        ctxt->dict = NULL;
12817
0
        ctxt->attsDefault = NULL;
12818
0
        ctxt->attsSpecial = NULL;
12819
0
        oldctxt->nbErrors = ctxt->nbErrors;
12820
0
        oldctxt->nbWarnings = ctxt->nbWarnings;
12821
0
        oldctxt->validate = ctxt->validate;
12822
0
        oldctxt->valid = ctxt->valid;
12823
0
        oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
12824
0
        oldctxt->node_seq.length = ctxt->node_seq.length;
12825
0
        oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
12826
0
    }
12827
0
    ctxt->node_seq.maximum = 0;
12828
0
    ctxt->node_seq.length = 0;
12829
0
    ctxt->node_seq.buffer = NULL;
12830
0
    xmlFreeParserCtxt(ctxt);
12831
0
    newDoc->intSubset = NULL;
12832
0
    newDoc->extSubset = NULL;
12833
0
    xmlFreeDoc(newDoc);
12834
12835
0
    return(ret);
12836
0
}
12837
12838
#ifdef LIBXML_SAX1_ENABLED
12839
/**
12840
 * xmlParseExternalEntity:
12841
 * @doc:  the document the chunk pertains to
12842
 * @sax:  the SAX handler block (possibly NULL)
12843
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12844
 * @depth:  Used for loop detection, use 0
12845
 * @URL:  the URL for the entity to load
12846
 * @ID:  the System ID for the entity to load
12847
 * @lst:  the return value for the set of parsed nodes
12848
 *
12849
 * Parse an external general entity
12850
 * An external general parsed entity is well-formed if it matches the
12851
 * production labeled extParsedEnt.
12852
 *
12853
 * [78] extParsedEnt ::= TextDecl? content
12854
 *
12855
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12856
 *    the parser error code otherwise
12857
 */
12858
12859
int
12860
xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
12861
0
    int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
12862
0
    return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
12863
0
                           ID, lst));
12864
0
}
12865
12866
/**
12867
 * xmlParseBalancedChunkMemory:
12868
 * @doc:  the document the chunk pertains to (must not be NULL)
12869
 * @sax:  the SAX handler block (possibly NULL)
12870
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12871
 * @depth:  Used for loop detection, use 0
12872
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
12873
 * @lst:  the return value for the set of parsed nodes
12874
 *
12875
 * Parse a well-balanced chunk of an XML document
12876
 * called by the parser
12877
 * The allowed sequence for the Well Balanced Chunk is the one defined by
12878
 * the content production in the XML grammar:
12879
 *
12880
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12881
 *
12882
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12883
 *    the parser error code otherwise
12884
 */
12885
12886
int
12887
xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12888
0
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
12889
0
    return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
12890
0
                                                depth, string, lst, 0 );
12891
0
}
12892
#endif /* LIBXML_SAX1_ENABLED */
12893
12894
/**
12895
 * xmlParseBalancedChunkMemoryInternal:
12896
 * @oldctxt:  the existing parsing context
12897
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
12898
 * @user_data:  the user data field for the parser context
12899
 * @lst:  the return value for the set of parsed nodes
12900
 *
12901
 *
12902
 * Parse a well-balanced chunk of an XML document
12903
 * called by the parser
12904
 * The allowed sequence for the Well Balanced Chunk is the one defined by
12905
 * the content production in the XML grammar:
12906
 *
12907
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12908
 *
12909
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12910
 * error code otherwise
12911
 *
12912
 * In case recover is set to 1, the nodelist will not be empty even if
12913
 * the parsed chunk is not well balanced.
12914
 */
12915
static xmlParserErrors
12916
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
12917
0
  const xmlChar *string, void *user_data, xmlNodePtr *lst) {
12918
0
    xmlParserCtxtPtr ctxt;
12919
0
    xmlDocPtr newDoc = NULL;
12920
0
    xmlNodePtr newRoot;
12921
0
    xmlSAXHandlerPtr oldsax = NULL;
12922
0
    xmlNodePtr content = NULL;
12923
0
    xmlNodePtr last = NULL;
12924
0
    int size;
12925
0
    xmlParserErrors ret = XML_ERR_OK;
12926
0
#ifdef SAX2
12927
0
    int i;
12928
0
#endif
12929
12930
0
    if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12931
0
        (oldctxt->depth >  100)) {
12932
0
  xmlFatalErrMsg(oldctxt, XML_ERR_ENTITY_LOOP,
12933
0
                       "Maximum entity nesting depth exceeded");
12934
0
  return(XML_ERR_ENTITY_LOOP);
12935
0
    }
12936
12937
12938
0
    if (lst != NULL)
12939
0
        *lst = NULL;
12940
0
    if (string == NULL)
12941
0
        return(XML_ERR_INTERNAL_ERROR);
12942
12943
0
    size = xmlStrlen(string);
12944
12945
0
    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
12946
0
    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
12947
0
    ctxt->nbErrors = oldctxt->nbErrors;
12948
0
    ctxt->nbWarnings = oldctxt->nbWarnings;
12949
0
    if (user_data != NULL)
12950
0
  ctxt->userData = user_data;
12951
0
    else
12952
0
  ctxt->userData = ctxt;
12953
0
    if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12954
0
    ctxt->dict = oldctxt->dict;
12955
0
    ctxt->input_id = oldctxt->input_id;
12956
0
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12957
0
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12958
0
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12959
12960
0
#ifdef SAX2
12961
    /* propagate namespaces down the entity */
12962
0
    for (i = 0;i < oldctxt->nsNr;i += 2) {
12963
0
        nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
12964
0
    }
12965
0
#endif
12966
12967
0
    oldsax = ctxt->sax;
12968
0
    ctxt->sax = oldctxt->sax;
12969
0
    xmlDetectSAX2(ctxt);
12970
0
    ctxt->replaceEntities = oldctxt->replaceEntities;
12971
0
    ctxt->options = oldctxt->options;
12972
12973
0
    ctxt->_private = oldctxt->_private;
12974
0
    if (oldctxt->myDoc == NULL) {
12975
0
  newDoc = xmlNewDoc(BAD_CAST "1.0");
12976
0
  if (newDoc == NULL) {
12977
0
      ctxt->sax = oldsax;
12978
0
      ctxt->dict = NULL;
12979
0
      xmlFreeParserCtxt(ctxt);
12980
0
      return(XML_ERR_INTERNAL_ERROR);
12981
0
  }
12982
0
  newDoc->properties = XML_DOC_INTERNAL;
12983
0
  newDoc->dict = ctxt->dict;
12984
0
  xmlDictReference(newDoc->dict);
12985
0
  ctxt->myDoc = newDoc;
12986
0
    } else {
12987
0
  ctxt->myDoc = oldctxt->myDoc;
12988
0
        content = ctxt->myDoc->children;
12989
0
  last = ctxt->myDoc->last;
12990
0
    }
12991
0
    newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
12992
0
    if (newRoot == NULL) {
12993
0
  ctxt->sax = oldsax;
12994
0
  ctxt->dict = NULL;
12995
0
  xmlFreeParserCtxt(ctxt);
12996
0
  if (newDoc != NULL) {
12997
0
      xmlFreeDoc(newDoc);
12998
0
  }
12999
0
  return(XML_ERR_INTERNAL_ERROR);
13000
0
    }
13001
0
    ctxt->myDoc->children = NULL;
13002
0
    ctxt->myDoc->last = NULL;
13003
0
    xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13004
0
    nodePush(ctxt, ctxt->myDoc->children);
13005
0
    ctxt->instate = XML_PARSER_CONTENT;
13006
0
    ctxt->depth = oldctxt->depth;
13007
13008
0
    ctxt->validate = 0;
13009
0
    ctxt->loadsubset = oldctxt->loadsubset;
13010
0
    if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13011
  /*
13012
   * ID/IDREF registration will be done in xmlValidateElement below
13013
   */
13014
0
  ctxt->loadsubset |= XML_SKIP_IDS;
13015
0
    }
13016
0
    ctxt->dictNames = oldctxt->dictNames;
13017
0
    ctxt->attsDefault = oldctxt->attsDefault;
13018
0
    ctxt->attsSpecial = oldctxt->attsSpecial;
13019
13020
0
    xmlParseContent(ctxt);
13021
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13022
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13023
0
    } else if (RAW != 0) {
13024
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13025
0
    }
13026
0
    if (ctxt->node != ctxt->myDoc->children) {
13027
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13028
0
    }
13029
13030
0
    if (!ctxt->wellFormed) {
13031
0
  ret = (xmlParserErrors)ctxt->errNo;
13032
0
        oldctxt->errNo = ctxt->errNo;
13033
0
        oldctxt->wellFormed = 0;
13034
0
        xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13035
0
    } else {
13036
0
        ret = XML_ERR_OK;
13037
0
    }
13038
13039
0
    if ((lst != NULL) && (ret == XML_ERR_OK)) {
13040
0
  xmlNodePtr cur;
13041
13042
  /*
13043
   * Return the newly created nodeset after unlinking it from
13044
   * they pseudo parent.
13045
   */
13046
0
  cur = ctxt->myDoc->children->children;
13047
0
  *lst = cur;
13048
0
  while (cur != NULL) {
13049
0
#ifdef LIBXML_VALID_ENABLED
13050
0
      if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13051
0
    (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13052
0
    (cur->type == XML_ELEMENT_NODE)) {
13053
0
    oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13054
0
      oldctxt->myDoc, cur);
13055
0
      }
13056
0
#endif /* LIBXML_VALID_ENABLED */
13057
0
      cur->parent = NULL;
13058
0
      cur = cur->next;
13059
0
  }
13060
0
  ctxt->myDoc->children->children = NULL;
13061
0
    }
13062
0
    if (ctxt->myDoc != NULL) {
13063
0
  xmlFreeNode(ctxt->myDoc->children);
13064
0
        ctxt->myDoc->children = content;
13065
0
        ctxt->myDoc->last = last;
13066
0
    }
13067
13068
    /*
13069
     * Also record the size of the entity parsed
13070
     */
13071
0
    if (ctxt->input != NULL && oldctxt != NULL) {
13072
0
        unsigned long consumed = ctxt->input->consumed;
13073
13074
0
        xmlSaturatedAddSizeT(&consumed, ctxt->input->cur - ctxt->input->base);
13075
13076
0
        xmlSaturatedAdd(&oldctxt->sizeentcopy, consumed);
13077
0
        xmlSaturatedAdd(&oldctxt->sizeentcopy, ctxt->sizeentcopy);
13078
0
    }
13079
13080
0
    oldctxt->nbErrors = ctxt->nbErrors;
13081
0
    oldctxt->nbWarnings = ctxt->nbWarnings;
13082
0
    ctxt->sax = oldsax;
13083
0
    ctxt->dict = NULL;
13084
0
    ctxt->attsDefault = NULL;
13085
0
    ctxt->attsSpecial = NULL;
13086
0
    xmlFreeParserCtxt(ctxt);
13087
0
    if (newDoc != NULL) {
13088
0
  xmlFreeDoc(newDoc);
13089
0
    }
13090
13091
0
    return(ret);
13092
0
}
13093
13094
/**
13095
 * xmlParseInNodeContext:
13096
 * @node:  the context node
13097
 * @data:  the input string
13098
 * @datalen:  the input string length in bytes
13099
 * @options:  a combination of xmlParserOption
13100
 * @lst:  the return value for the set of parsed nodes
13101
 *
13102
 * Parse a well-balanced chunk of an XML document
13103
 * within the context (DTD, namespaces, etc ...) of the given node.
13104
 *
13105
 * The allowed sequence for the data is a Well Balanced Chunk defined by
13106
 * the content production in the XML grammar:
13107
 *
13108
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13109
 *
13110
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13111
 * error code otherwise
13112
 */
13113
xmlParserErrors
13114
xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13115
0
                      int options, xmlNodePtr *lst) {
13116
0
#ifdef SAX2
13117
0
    xmlParserCtxtPtr ctxt;
13118
0
    xmlDocPtr doc = NULL;
13119
0
    xmlNodePtr fake, cur;
13120
0
    int nsnr = 0;
13121
13122
0
    xmlParserErrors ret = XML_ERR_OK;
13123
13124
    /*
13125
     * check all input parameters, grab the document
13126
     */
13127
0
    if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13128
0
        return(XML_ERR_INTERNAL_ERROR);
13129
0
    switch (node->type) {
13130
0
        case XML_ELEMENT_NODE:
13131
0
        case XML_ATTRIBUTE_NODE:
13132
0
        case XML_TEXT_NODE:
13133
0
        case XML_CDATA_SECTION_NODE:
13134
0
        case XML_ENTITY_REF_NODE:
13135
0
        case XML_PI_NODE:
13136
0
        case XML_COMMENT_NODE:
13137
0
        case XML_DOCUMENT_NODE:
13138
0
        case XML_HTML_DOCUMENT_NODE:
13139
0
      break;
13140
0
  default:
13141
0
      return(XML_ERR_INTERNAL_ERROR);
13142
13143
0
    }
13144
0
    while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13145
0
           (node->type != XML_DOCUMENT_NODE) &&
13146
0
     (node->type != XML_HTML_DOCUMENT_NODE))
13147
0
  node = node->parent;
13148
0
    if (node == NULL)
13149
0
  return(XML_ERR_INTERNAL_ERROR);
13150
0
    if (node->type == XML_ELEMENT_NODE)
13151
0
  doc = node->doc;
13152
0
    else
13153
0
        doc = (xmlDocPtr) node;
13154
0
    if (doc == NULL)
13155
0
  return(XML_ERR_INTERNAL_ERROR);
13156
13157
    /*
13158
     * allocate a context and set-up everything not related to the
13159
     * node position in the tree
13160
     */
13161
0
    if (doc->type == XML_DOCUMENT_NODE)
13162
0
  ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13163
0
#ifdef LIBXML_HTML_ENABLED
13164
0
    else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13165
0
  ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13166
        /*
13167
         * When parsing in context, it makes no sense to add implied
13168
         * elements like html/body/etc...
13169
         */
13170
0
        options |= HTML_PARSE_NOIMPLIED;
13171
0
    }
13172
0
#endif
13173
0
    else
13174
0
        return(XML_ERR_INTERNAL_ERROR);
13175
13176
0
    if (ctxt == NULL)
13177
0
        return(XML_ERR_NO_MEMORY);
13178
13179
    /*
13180
     * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13181
     * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13182
     * we must wait until the last moment to free the original one.
13183
     */
13184
0
    if (doc->dict != NULL) {
13185
0
        if (ctxt->dict != NULL)
13186
0
      xmlDictFree(ctxt->dict);
13187
0
  ctxt->dict = doc->dict;
13188
0
    } else
13189
0
        options |= XML_PARSE_NODICT;
13190
13191
0
    if (doc->encoding != NULL) {
13192
0
        xmlCharEncodingHandlerPtr hdlr;
13193
13194
0
        if (ctxt->encoding != NULL)
13195
0
      xmlFree((xmlChar *) ctxt->encoding);
13196
0
        ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13197
13198
0
        hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
13199
0
        if (hdlr != NULL) {
13200
0
            xmlSwitchToEncoding(ctxt, hdlr);
13201
0
  } else {
13202
0
            return(XML_ERR_UNSUPPORTED_ENCODING);
13203
0
        }
13204
0
    }
13205
13206
0
    xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13207
0
    xmlDetectSAX2(ctxt);
13208
0
    ctxt->myDoc = doc;
13209
    /* parsing in context, i.e. as within existing content */
13210
0
    ctxt->input_id = 2;
13211
0
    ctxt->instate = XML_PARSER_CONTENT;
13212
13213
0
    fake = xmlNewDocComment(node->doc, NULL);
13214
0
    if (fake == NULL) {
13215
0
        xmlFreeParserCtxt(ctxt);
13216
0
  return(XML_ERR_NO_MEMORY);
13217
0
    }
13218
0
    xmlAddChild(node, fake);
13219
13220
0
    if (node->type == XML_ELEMENT_NODE) {
13221
0
  nodePush(ctxt, node);
13222
  /*
13223
   * initialize the SAX2 namespaces stack
13224
   */
13225
0
  cur = node;
13226
0
  while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13227
0
      xmlNsPtr ns = cur->nsDef;
13228
0
      const xmlChar *iprefix, *ihref;
13229
13230
0
      while (ns != NULL) {
13231
0
    if (ctxt->dict) {
13232
0
        iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13233
0
        ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13234
0
    } else {
13235
0
        iprefix = ns->prefix;
13236
0
        ihref = ns->href;
13237
0
    }
13238
13239
0
          if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13240
0
        nsPush(ctxt, iprefix, ihref);
13241
0
        nsnr++;
13242
0
    }
13243
0
    ns = ns->next;
13244
0
      }
13245
0
      cur = cur->parent;
13246
0
  }
13247
0
    }
13248
13249
0
    if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13250
  /*
13251
   * ID/IDREF registration will be done in xmlValidateElement below
13252
   */
13253
0
  ctxt->loadsubset |= XML_SKIP_IDS;
13254
0
    }
13255
13256
0
#ifdef LIBXML_HTML_ENABLED
13257
0
    if (doc->type == XML_HTML_DOCUMENT_NODE)
13258
0
        __htmlParseContent(ctxt);
13259
0
    else
13260
0
#endif
13261
0
  xmlParseContent(ctxt);
13262
13263
0
    nsPop(ctxt, nsnr);
13264
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13265
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13266
0
    } else if (RAW != 0) {
13267
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13268
0
    }
13269
0
    if ((ctxt->node != NULL) && (ctxt->node != node)) {
13270
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13271
0
  ctxt->wellFormed = 0;
13272
0
    }
13273
13274
0
    if (!ctxt->wellFormed) {
13275
0
        if (ctxt->errNo == 0)
13276
0
      ret = XML_ERR_INTERNAL_ERROR;
13277
0
  else
13278
0
      ret = (xmlParserErrors)ctxt->errNo;
13279
0
    } else {
13280
0
        ret = XML_ERR_OK;
13281
0
    }
13282
13283
    /*
13284
     * Return the newly created nodeset after unlinking it from
13285
     * the pseudo sibling.
13286
     */
13287
13288
0
    cur = fake->next;
13289
0
    fake->next = NULL;
13290
0
    node->last = fake;
13291
13292
0
    if (cur != NULL) {
13293
0
  cur->prev = NULL;
13294
0
    }
13295
13296
0
    *lst = cur;
13297
13298
0
    while (cur != NULL) {
13299
0
  cur->parent = NULL;
13300
0
  cur = cur->next;
13301
0
    }
13302
13303
0
    xmlUnlinkNode(fake);
13304
0
    xmlFreeNode(fake);
13305
13306
13307
0
    if (ret != XML_ERR_OK) {
13308
0
        xmlFreeNodeList(*lst);
13309
0
  *lst = NULL;
13310
0
    }
13311
13312
0
    if (doc->dict != NULL)
13313
0
        ctxt->dict = NULL;
13314
0
    xmlFreeParserCtxt(ctxt);
13315
13316
0
    return(ret);
13317
#else /* !SAX2 */
13318
    return(XML_ERR_INTERNAL_ERROR);
13319
#endif
13320
0
}
13321
13322
#ifdef LIBXML_SAX1_ENABLED
13323
/**
13324
 * xmlParseBalancedChunkMemoryRecover:
13325
 * @doc:  the document the chunk pertains to (must not be NULL)
13326
 * @sax:  the SAX handler block (possibly NULL)
13327
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13328
 * @depth:  Used for loop detection, use 0
13329
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13330
 * @lst:  the return value for the set of parsed nodes
13331
 * @recover: return nodes even if the data is broken (use 0)
13332
 *
13333
 *
13334
 * Parse a well-balanced chunk of an XML document
13335
 * called by the parser
13336
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13337
 * the content production in the XML grammar:
13338
 *
13339
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13340
 *
13341
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13342
 *    the parser error code otherwise
13343
 *
13344
 * In case recover is set to 1, the nodelist will not be empty even if
13345
 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13346
 * some extent.
13347
 */
13348
int
13349
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13350
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13351
0
     int recover) {
13352
0
    xmlParserCtxtPtr ctxt;
13353
0
    xmlDocPtr newDoc;
13354
0
    xmlSAXHandlerPtr oldsax = NULL;
13355
0
    xmlNodePtr content, newRoot;
13356
0
    int size;
13357
0
    int ret = 0;
13358
13359
0
    if (depth > 40) {
13360
0
  return(XML_ERR_ENTITY_LOOP);
13361
0
    }
13362
13363
13364
0
    if (lst != NULL)
13365
0
        *lst = NULL;
13366
0
    if (string == NULL)
13367
0
        return(-1);
13368
13369
0
    size = xmlStrlen(string);
13370
13371
0
    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13372
0
    if (ctxt == NULL) return(-1);
13373
0
    ctxt->userData = ctxt;
13374
0
    if (sax != NULL) {
13375
0
  oldsax = ctxt->sax;
13376
0
        ctxt->sax = sax;
13377
0
  if (user_data != NULL)
13378
0
      ctxt->userData = user_data;
13379
0
    }
13380
0
    newDoc = xmlNewDoc(BAD_CAST "1.0");
13381
0
    if (newDoc == NULL) {
13382
0
  xmlFreeParserCtxt(ctxt);
13383
0
  return(-1);
13384
0
    }
13385
0
    newDoc->properties = XML_DOC_INTERNAL;
13386
0
    if ((doc != NULL) && (doc->dict != NULL)) {
13387
0
        xmlDictFree(ctxt->dict);
13388
0
  ctxt->dict = doc->dict;
13389
0
  xmlDictReference(ctxt->dict);
13390
0
  ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13391
0
  ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13392
0
  ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13393
0
  ctxt->dictNames = 1;
13394
0
    } else {
13395
0
  xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13396
0
    }
13397
    /* doc == NULL is only supported for historic reasons */
13398
0
    if (doc != NULL) {
13399
0
  newDoc->intSubset = doc->intSubset;
13400
0
  newDoc->extSubset = doc->extSubset;
13401
0
    }
13402
0
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13403
0
    if (newRoot == NULL) {
13404
0
  if (sax != NULL)
13405
0
      ctxt->sax = oldsax;
13406
0
  xmlFreeParserCtxt(ctxt);
13407
0
  newDoc->intSubset = NULL;
13408
0
  newDoc->extSubset = NULL;
13409
0
        xmlFreeDoc(newDoc);
13410
0
  return(-1);
13411
0
    }
13412
0
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
13413
0
    nodePush(ctxt, newRoot);
13414
    /* doc == NULL is only supported for historic reasons */
13415
0
    if (doc == NULL) {
13416
0
  ctxt->myDoc = newDoc;
13417
0
    } else {
13418
0
  ctxt->myDoc = newDoc;
13419
0
  newDoc->children->doc = doc;
13420
  /* Ensure that doc has XML spec namespace */
13421
0
  xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13422
0
  newDoc->oldNs = doc->oldNs;
13423
0
    }
13424
0
    ctxt->instate = XML_PARSER_CONTENT;
13425
0
    ctxt->input_id = 2;
13426
0
    ctxt->depth = depth;
13427
13428
    /*
13429
     * Doing validity checking on chunk doesn't make sense
13430
     */
13431
0
    ctxt->validate = 0;
13432
0
    ctxt->loadsubset = 0;
13433
0
    xmlDetectSAX2(ctxt);
13434
13435
0
    if ( doc != NULL ){
13436
0
        content = doc->children;
13437
0
        doc->children = NULL;
13438
0
        xmlParseContent(ctxt);
13439
0
        doc->children = content;
13440
0
    }
13441
0
    else {
13442
0
        xmlParseContent(ctxt);
13443
0
    }
13444
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13445
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13446
0
    } else if (RAW != 0) {
13447
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13448
0
    }
13449
0
    if (ctxt->node != newDoc->children) {
13450
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13451
0
    }
13452
13453
0
    if (!ctxt->wellFormed) {
13454
0
        if (ctxt->errNo == 0)
13455
0
      ret = 1;
13456
0
  else
13457
0
      ret = ctxt->errNo;
13458
0
    } else {
13459
0
      ret = 0;
13460
0
    }
13461
13462
0
    if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13463
0
  xmlNodePtr cur;
13464
13465
  /*
13466
   * Return the newly created nodeset after unlinking it from
13467
   * they pseudo parent.
13468
   */
13469
0
  cur = newDoc->children->children;
13470
0
  *lst = cur;
13471
0
  while (cur != NULL) {
13472
0
      xmlSetTreeDoc(cur, doc);
13473
0
      cur->parent = NULL;
13474
0
      cur = cur->next;
13475
0
  }
13476
0
  newDoc->children->children = NULL;
13477
0
    }
13478
13479
0
    if (sax != NULL)
13480
0
  ctxt->sax = oldsax;
13481
0
    xmlFreeParserCtxt(ctxt);
13482
0
    newDoc->intSubset = NULL;
13483
0
    newDoc->extSubset = NULL;
13484
    /* This leaks the namespace list if doc == NULL */
13485
0
    newDoc->oldNs = NULL;
13486
0
    xmlFreeDoc(newDoc);
13487
13488
0
    return(ret);
13489
0
}
13490
13491
/**
13492
 * xmlSAXParseEntity:
13493
 * @sax:  the SAX handler block
13494
 * @filename:  the filename
13495
 *
13496
 * DEPRECATED: Don't use.
13497
 *
13498
 * parse an XML external entity out of context and build a tree.
13499
 * It use the given SAX function block to handle the parsing callback.
13500
 * If sax is NULL, fallback to the default DOM tree building routines.
13501
 *
13502
 * [78] extParsedEnt ::= TextDecl? content
13503
 *
13504
 * This correspond to a "Well Balanced" chunk
13505
 *
13506
 * Returns the resulting document tree
13507
 */
13508
13509
xmlDocPtr
13510
0
xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13511
0
    xmlDocPtr ret;
13512
0
    xmlParserCtxtPtr ctxt;
13513
13514
0
    ctxt = xmlCreateFileParserCtxt(filename);
13515
0
    if (ctxt == NULL) {
13516
0
  return(NULL);
13517
0
    }
13518
0
    if (sax != NULL) {
13519
0
  if (ctxt->sax != NULL)
13520
0
      xmlFree(ctxt->sax);
13521
0
        ctxt->sax = sax;
13522
0
        ctxt->userData = NULL;
13523
0
    }
13524
13525
0
    xmlParseExtParsedEnt(ctxt);
13526
13527
0
    if (ctxt->wellFormed)
13528
0
  ret = ctxt->myDoc;
13529
0
    else {
13530
0
        ret = NULL;
13531
0
        xmlFreeDoc(ctxt->myDoc);
13532
0
        ctxt->myDoc = NULL;
13533
0
    }
13534
0
    if (sax != NULL)
13535
0
        ctxt->sax = NULL;
13536
0
    xmlFreeParserCtxt(ctxt);
13537
13538
0
    return(ret);
13539
0
}
13540
13541
/**
13542
 * xmlParseEntity:
13543
 * @filename:  the filename
13544
 *
13545
 * parse an XML external entity out of context and build a tree.
13546
 *
13547
 * [78] extParsedEnt ::= TextDecl? content
13548
 *
13549
 * This correspond to a "Well Balanced" chunk
13550
 *
13551
 * Returns the resulting document tree
13552
 */
13553
13554
xmlDocPtr
13555
0
xmlParseEntity(const char *filename) {
13556
0
    return(xmlSAXParseEntity(NULL, filename));
13557
0
}
13558
#endif /* LIBXML_SAX1_ENABLED */
13559
13560
/**
13561
 * xmlCreateEntityParserCtxtInternal:
13562
 * @URL:  the entity URL
13563
 * @ID:  the entity PUBLIC ID
13564
 * @base:  a possible base for the target URI
13565
 * @pctx:  parser context used to set options on new context
13566
 *
13567
 * Create a parser context for an external entity
13568
 * Automatic support for ZLIB/Compress compressed document is provided
13569
 * by default if found at compile-time.
13570
 *
13571
 * Returns the new parser context or NULL
13572
 */
13573
static xmlParserCtxtPtr
13574
xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
13575
        const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
13576
0
        xmlParserCtxtPtr pctx) {
13577
0
    xmlParserCtxtPtr ctxt;
13578
0
    xmlParserInputPtr inputStream;
13579
0
    char *directory = NULL;
13580
0
    xmlChar *uri;
13581
13582
0
    ctxt = xmlNewSAXParserCtxt(sax, userData);
13583
0
    if (ctxt == NULL) {
13584
0
  return(NULL);
13585
0
    }
13586
13587
0
    if (pctx != NULL) {
13588
0
        ctxt->options = pctx->options;
13589
0
        ctxt->_private = pctx->_private;
13590
0
  ctxt->input_id = pctx->input_id;
13591
0
    }
13592
13593
    /* Don't read from stdin. */
13594
0
    if (xmlStrcmp(URL, BAD_CAST "-") == 0)
13595
0
        URL = BAD_CAST "./-";
13596
13597
0
    uri = xmlBuildURI(URL, base);
13598
13599
0
    if (uri == NULL) {
13600
0
  inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13601
0
  if (inputStream == NULL) {
13602
0
      xmlFreeParserCtxt(ctxt);
13603
0
      return(NULL);
13604
0
  }
13605
13606
0
  inputPush(ctxt, inputStream);
13607
13608
0
  if ((ctxt->directory == NULL) && (directory == NULL))
13609
0
      directory = xmlParserGetDirectory((char *)URL);
13610
0
  if ((ctxt->directory == NULL) && (directory != NULL))
13611
0
      ctxt->directory = directory;
13612
0
    } else {
13613
0
  inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13614
0
  if (inputStream == NULL) {
13615
0
      xmlFree(uri);
13616
0
      xmlFreeParserCtxt(ctxt);
13617
0
      return(NULL);
13618
0
  }
13619
13620
0
  inputPush(ctxt, inputStream);
13621
13622
0
  if ((ctxt->directory == NULL) && (directory == NULL))
13623
0
      directory = xmlParserGetDirectory((char *)uri);
13624
0
  if ((ctxt->directory == NULL) && (directory != NULL))
13625
0
      ctxt->directory = directory;
13626
0
  xmlFree(uri);
13627
0
    }
13628
0
    return(ctxt);
13629
0
}
13630
13631
/**
13632
 * xmlCreateEntityParserCtxt:
13633
 * @URL:  the entity URL
13634
 * @ID:  the entity PUBLIC ID
13635
 * @base:  a possible base for the target URI
13636
 *
13637
 * Create a parser context for an external entity
13638
 * Automatic support for ZLIB/Compress compressed document is provided
13639
 * by default if found at compile-time.
13640
 *
13641
 * Returns the new parser context or NULL
13642
 */
13643
xmlParserCtxtPtr
13644
xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
13645
0
                    const xmlChar *base) {
13646
0
    return xmlCreateEntityParserCtxtInternal(NULL, NULL, URL, ID, base, NULL);
13647
13648
0
}
13649
13650
/************************************************************************
13651
 *                  *
13652
 *    Front ends when parsing from a file     *
13653
 *                  *
13654
 ************************************************************************/
13655
13656
/**
13657
 * xmlCreateURLParserCtxt:
13658
 * @filename:  the filename or URL
13659
 * @options:  a combination of xmlParserOption
13660
 *
13661
 * Create a parser context for a file or URL content.
13662
 * Automatic support for ZLIB/Compress compressed document is provided
13663
 * by default if found at compile-time and for file accesses
13664
 *
13665
 * Returns the new parser context or NULL
13666
 */
13667
xmlParserCtxtPtr
13668
xmlCreateURLParserCtxt(const char *filename, int options)
13669
0
{
13670
0
    xmlParserCtxtPtr ctxt;
13671
0
    xmlParserInputPtr inputStream;
13672
0
    char *directory = NULL;
13673
13674
0
    ctxt = xmlNewParserCtxt();
13675
0
    if (ctxt == NULL) {
13676
0
  xmlErrMemory(NULL, "cannot allocate parser context");
13677
0
  return(NULL);
13678
0
    }
13679
13680
0
    if (options)
13681
0
  xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13682
0
    ctxt->linenumbers = 1;
13683
13684
0
    inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
13685
0
    if (inputStream == NULL) {
13686
0
  xmlFreeParserCtxt(ctxt);
13687
0
  return(NULL);
13688
0
    }
13689
13690
0
    inputPush(ctxt, inputStream);
13691
0
    if ((ctxt->directory == NULL) && (directory == NULL))
13692
0
        directory = xmlParserGetDirectory(filename);
13693
0
    if ((ctxt->directory == NULL) && (directory != NULL))
13694
0
        ctxt->directory = directory;
13695
13696
0
    return(ctxt);
13697
0
}
13698
13699
/**
13700
 * xmlCreateFileParserCtxt:
13701
 * @filename:  the filename
13702
 *
13703
 * Create a parser context for a file content.
13704
 * Automatic support for ZLIB/Compress compressed document is provided
13705
 * by default if found at compile-time.
13706
 *
13707
 * Returns the new parser context or NULL
13708
 */
13709
xmlParserCtxtPtr
13710
xmlCreateFileParserCtxt(const char *filename)
13711
0
{
13712
0
    return(xmlCreateURLParserCtxt(filename, 0));
13713
0
}
13714
13715
#ifdef LIBXML_SAX1_ENABLED
13716
/**
13717
 * xmlSAXParseFileWithData:
13718
 * @sax:  the SAX handler block
13719
 * @filename:  the filename
13720
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13721
 *             documents
13722
 * @data:  the userdata
13723
 *
13724
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
13725
 *
13726
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13727
 * compressed document is provided by default if found at compile-time.
13728
 * It use the given SAX function block to handle the parsing callback.
13729
 * If sax is NULL, fallback to the default DOM tree building routines.
13730
 *
13731
 * User data (void *) is stored within the parser context in the
13732
 * context's _private member, so it is available nearly everywhere in libxml
13733
 *
13734
 * Returns the resulting document tree
13735
 */
13736
13737
xmlDocPtr
13738
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
13739
0
                        int recovery, void *data) {
13740
0
    xmlDocPtr ret;
13741
0
    xmlParserCtxtPtr ctxt;
13742
13743
0
    xmlInitParser();
13744
13745
0
    ctxt = xmlCreateFileParserCtxt(filename);
13746
0
    if (ctxt == NULL) {
13747
0
  return(NULL);
13748
0
    }
13749
0
    if (sax != NULL) {
13750
0
  if (ctxt->sax != NULL)
13751
0
      xmlFree(ctxt->sax);
13752
0
        ctxt->sax = sax;
13753
0
    }
13754
0
    xmlDetectSAX2(ctxt);
13755
0
    if (data!=NULL) {
13756
0
  ctxt->_private = data;
13757
0
    }
13758
13759
0
    if (ctxt->directory == NULL)
13760
0
        ctxt->directory = xmlParserGetDirectory(filename);
13761
13762
0
    ctxt->recovery = recovery;
13763
13764
0
    xmlParseDocument(ctxt);
13765
13766
0
    if ((ctxt->wellFormed) || recovery) {
13767
0
        ret = ctxt->myDoc;
13768
0
  if ((ret != NULL) && (ctxt->input->buf != NULL)) {
13769
0
      if (ctxt->input->buf->compressed > 0)
13770
0
    ret->compression = 9;
13771
0
      else
13772
0
    ret->compression = ctxt->input->buf->compressed;
13773
0
  }
13774
0
    }
13775
0
    else {
13776
0
       ret = NULL;
13777
0
       xmlFreeDoc(ctxt->myDoc);
13778
0
       ctxt->myDoc = NULL;
13779
0
    }
13780
0
    if (sax != NULL)
13781
0
        ctxt->sax = NULL;
13782
0
    xmlFreeParserCtxt(ctxt);
13783
13784
0
    return(ret);
13785
0
}
13786
13787
/**
13788
 * xmlSAXParseFile:
13789
 * @sax:  the SAX handler block
13790
 * @filename:  the filename
13791
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13792
 *             documents
13793
 *
13794
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
13795
 *
13796
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13797
 * compressed document is provided by default if found at compile-time.
13798
 * It use the given SAX function block to handle the parsing callback.
13799
 * If sax is NULL, fallback to the default DOM tree building routines.
13800
 *
13801
 * Returns the resulting document tree
13802
 */
13803
13804
xmlDocPtr
13805
xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
13806
0
                          int recovery) {
13807
0
    return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
13808
0
}
13809
13810
/**
13811
 * xmlRecoverDoc:
13812
 * @cur:  a pointer to an array of xmlChar
13813
 *
13814
 * DEPRECATED: Use xmlReadDoc with XML_PARSE_RECOVER.
13815
 *
13816
 * parse an XML in-memory document and build a tree.
13817
 * In the case the document is not Well Formed, a attempt to build a
13818
 * tree is tried anyway
13819
 *
13820
 * Returns the resulting document tree or NULL in case of failure
13821
 */
13822
13823
xmlDocPtr
13824
0
xmlRecoverDoc(const xmlChar *cur) {
13825
0
    return(xmlSAXParseDoc(NULL, cur, 1));
13826
0
}
13827
13828
/**
13829
 * xmlParseFile:
13830
 * @filename:  the filename
13831
 *
13832
 * DEPRECATED: Use xmlReadFile.
13833
 *
13834
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13835
 * compressed document is provided by default if found at compile-time.
13836
 *
13837
 * Returns the resulting document tree if the file was wellformed,
13838
 * NULL otherwise.
13839
 */
13840
13841
xmlDocPtr
13842
0
xmlParseFile(const char *filename) {
13843
0
    return(xmlSAXParseFile(NULL, filename, 0));
13844
0
}
13845
13846
/**
13847
 * xmlRecoverFile:
13848
 * @filename:  the filename
13849
 *
13850
 * DEPRECATED: Use xmlReadFile with XML_PARSE_RECOVER.
13851
 *
13852
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13853
 * compressed document is provided by default if found at compile-time.
13854
 * In the case the document is not Well Formed, it attempts to build
13855
 * a tree anyway
13856
 *
13857
 * Returns the resulting document tree or NULL in case of failure
13858
 */
13859
13860
xmlDocPtr
13861
0
xmlRecoverFile(const char *filename) {
13862
0
    return(xmlSAXParseFile(NULL, filename, 1));
13863
0
}
13864
13865
13866
/**
13867
 * xmlSetupParserForBuffer:
13868
 * @ctxt:  an XML parser context
13869
 * @buffer:  a xmlChar * buffer
13870
 * @filename:  a file name
13871
 *
13872
 * DEPRECATED: Don't use.
13873
 *
13874
 * Setup the parser context to parse a new buffer; Clears any prior
13875
 * contents from the parser context. The buffer parameter must not be
13876
 * NULL, but the filename parameter can be
13877
 */
13878
void
13879
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
13880
                             const char* filename)
13881
0
{
13882
0
    xmlParserInputPtr input;
13883
13884
0
    if ((ctxt == NULL) || (buffer == NULL))
13885
0
        return;
13886
13887
0
    input = xmlNewInputStream(ctxt);
13888
0
    if (input == NULL) {
13889
0
        xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
13890
0
        xmlClearParserCtxt(ctxt);
13891
0
        return;
13892
0
    }
13893
13894
0
    xmlClearParserCtxt(ctxt);
13895
0
    if (filename != NULL)
13896
0
        input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
13897
0
    input->base = buffer;
13898
0
    input->cur = buffer;
13899
0
    input->end = &buffer[xmlStrlen(buffer)];
13900
0
    inputPush(ctxt, input);
13901
0
}
13902
13903
/**
13904
 * xmlSAXUserParseFile:
13905
 * @sax:  a SAX handler
13906
 * @user_data:  The user data returned on SAX callbacks
13907
 * @filename:  a file name
13908
 *
13909
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
13910
 *
13911
 * parse an XML file and call the given SAX handler routines.
13912
 * Automatic support for ZLIB/Compress compressed document is provided
13913
 *
13914
 * Returns 0 in case of success or a error number otherwise
13915
 */
13916
int
13917
xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
13918
0
                    const char *filename) {
13919
0
    int ret = 0;
13920
0
    xmlParserCtxtPtr ctxt;
13921
13922
0
    ctxt = xmlCreateFileParserCtxt(filename);
13923
0
    if (ctxt == NULL) return -1;
13924
0
    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
13925
0
  xmlFree(ctxt->sax);
13926
0
    ctxt->sax = sax;
13927
0
    xmlDetectSAX2(ctxt);
13928
13929
0
    if (user_data != NULL)
13930
0
  ctxt->userData = user_data;
13931
13932
0
    xmlParseDocument(ctxt);
13933
13934
0
    if (ctxt->wellFormed)
13935
0
  ret = 0;
13936
0
    else {
13937
0
        if (ctxt->errNo != 0)
13938
0
      ret = ctxt->errNo;
13939
0
  else
13940
0
      ret = -1;
13941
0
    }
13942
0
    if (sax != NULL)
13943
0
  ctxt->sax = NULL;
13944
0
    if (ctxt->myDoc != NULL) {
13945
0
        xmlFreeDoc(ctxt->myDoc);
13946
0
  ctxt->myDoc = NULL;
13947
0
    }
13948
0
    xmlFreeParserCtxt(ctxt);
13949
13950
0
    return ret;
13951
0
}
13952
#endif /* LIBXML_SAX1_ENABLED */
13953
13954
/************************************************************************
13955
 *                  *
13956
 *    Front ends when parsing from memory     *
13957
 *                  *
13958
 ************************************************************************/
13959
13960
/**
13961
 * xmlCreateMemoryParserCtxt:
13962
 * @buffer:  a pointer to a char array
13963
 * @size:  the size of the array
13964
 *
13965
 * Create a parser context for an XML in-memory document.
13966
 *
13967
 * Returns the new parser context or NULL
13968
 */
13969
xmlParserCtxtPtr
13970
0
xmlCreateMemoryParserCtxt(const char *buffer, int size) {
13971
0
    xmlParserCtxtPtr ctxt;
13972
0
    xmlParserInputPtr input;
13973
0
    xmlParserInputBufferPtr buf;
13974
13975
0
    if (buffer == NULL)
13976
0
  return(NULL);
13977
0
    if (size <= 0)
13978
0
  return(NULL);
13979
13980
0
    ctxt = xmlNewParserCtxt();
13981
0
    if (ctxt == NULL)
13982
0
  return(NULL);
13983
13984
0
    buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
13985
0
    if (buf == NULL) {
13986
0
  xmlFreeParserCtxt(ctxt);
13987
0
  return(NULL);
13988
0
    }
13989
13990
0
    input = xmlNewInputStream(ctxt);
13991
0
    if (input == NULL) {
13992
0
  xmlFreeParserInputBuffer(buf);
13993
0
  xmlFreeParserCtxt(ctxt);
13994
0
  return(NULL);
13995
0
    }
13996
13997
0
    input->filename = NULL;
13998
0
    input->buf = buf;
13999
0
    xmlBufResetInput(input->buf->buffer, input);
14000
14001
0
    inputPush(ctxt, input);
14002
0
    return(ctxt);
14003
0
}
14004
14005
#ifdef LIBXML_SAX1_ENABLED
14006
/**
14007
 * xmlSAXParseMemoryWithData:
14008
 * @sax:  the SAX handler block
14009
 * @buffer:  an pointer to a char array
14010
 * @size:  the size of the array
14011
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14012
 *             documents
14013
 * @data:  the userdata
14014
 *
14015
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14016
 *
14017
 * parse an XML in-memory block and use the given SAX function block
14018
 * to handle the parsing callback. If sax is NULL, fallback to the default
14019
 * DOM tree building routines.
14020
 *
14021
 * User data (void *) is stored within the parser context in the
14022
 * context's _private member, so it is available nearly everywhere in libxml
14023
 *
14024
 * Returns the resulting document tree
14025
 */
14026
14027
xmlDocPtr
14028
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14029
0
            int size, int recovery, void *data) {
14030
0
    xmlDocPtr ret;
14031
0
    xmlParserCtxtPtr ctxt;
14032
14033
0
    xmlInitParser();
14034
14035
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14036
0
    if (ctxt == NULL) return(NULL);
14037
0
    if (sax != NULL) {
14038
0
  if (ctxt->sax != NULL)
14039
0
      xmlFree(ctxt->sax);
14040
0
        ctxt->sax = sax;
14041
0
    }
14042
0
    xmlDetectSAX2(ctxt);
14043
0
    if (data!=NULL) {
14044
0
  ctxt->_private=data;
14045
0
    }
14046
14047
0
    ctxt->recovery = recovery;
14048
14049
0
    xmlParseDocument(ctxt);
14050
14051
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14052
0
    else {
14053
0
       ret = NULL;
14054
0
       xmlFreeDoc(ctxt->myDoc);
14055
0
       ctxt->myDoc = NULL;
14056
0
    }
14057
0
    if (sax != NULL)
14058
0
  ctxt->sax = NULL;
14059
0
    xmlFreeParserCtxt(ctxt);
14060
14061
0
    return(ret);
14062
0
}
14063
14064
/**
14065
 * xmlSAXParseMemory:
14066
 * @sax:  the SAX handler block
14067
 * @buffer:  an pointer to a char array
14068
 * @size:  the size of the array
14069
 * @recovery:  work in recovery mode, i.e. tries to read not Well Formed
14070
 *             documents
14071
 *
14072
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14073
 *
14074
 * parse an XML in-memory block and use the given SAX function block
14075
 * to handle the parsing callback. If sax is NULL, fallback to the default
14076
 * DOM tree building routines.
14077
 *
14078
 * Returns the resulting document tree
14079
 */
14080
xmlDocPtr
14081
xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14082
0
            int size, int recovery) {
14083
0
    return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14084
0
}
14085
14086
/**
14087
 * xmlParseMemory:
14088
 * @buffer:  an pointer to a char array
14089
 * @size:  the size of the array
14090
 *
14091
 * DEPRECATED: Use xmlReadMemory.
14092
 *
14093
 * parse an XML in-memory block and build a tree.
14094
 *
14095
 * Returns the resulting document tree
14096
 */
14097
14098
0
xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14099
0
   return(xmlSAXParseMemory(NULL, buffer, size, 0));
14100
0
}
14101
14102
/**
14103
 * xmlRecoverMemory:
14104
 * @buffer:  an pointer to a char array
14105
 * @size:  the size of the array
14106
 *
14107
 * DEPRECATED: Use xmlReadMemory with XML_PARSE_RECOVER.
14108
 *
14109
 * parse an XML in-memory block and build a tree.
14110
 * In the case the document is not Well Formed, an attempt to
14111
 * build a tree is tried anyway
14112
 *
14113
 * Returns the resulting document tree or NULL in case of error
14114
 */
14115
14116
0
xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14117
0
   return(xmlSAXParseMemory(NULL, buffer, size, 1));
14118
0
}
14119
14120
/**
14121
 * xmlSAXUserParseMemory:
14122
 * @sax:  a SAX handler
14123
 * @user_data:  The user data returned on SAX callbacks
14124
 * @buffer:  an in-memory XML document input
14125
 * @size:  the length of the XML document in bytes
14126
 *
14127
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14128
 *
14129
 * parse an XML in-memory buffer and call the given SAX handler routines.
14130
 *
14131
 * Returns 0 in case of success or a error number otherwise
14132
 */
14133
int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14134
0
        const char *buffer, int size) {
14135
0
    int ret = 0;
14136
0
    xmlParserCtxtPtr ctxt;
14137
14138
0
    xmlInitParser();
14139
14140
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14141
0
    if (ctxt == NULL) return -1;
14142
0
    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14143
0
        xmlFree(ctxt->sax);
14144
0
    ctxt->sax = sax;
14145
0
    xmlDetectSAX2(ctxt);
14146
14147
0
    if (user_data != NULL)
14148
0
  ctxt->userData = user_data;
14149
14150
0
    xmlParseDocument(ctxt);
14151
14152
0
    if (ctxt->wellFormed)
14153
0
  ret = 0;
14154
0
    else {
14155
0
        if (ctxt->errNo != 0)
14156
0
      ret = ctxt->errNo;
14157
0
  else
14158
0
      ret = -1;
14159
0
    }
14160
0
    if (sax != NULL)
14161
0
        ctxt->sax = NULL;
14162
0
    if (ctxt->myDoc != NULL) {
14163
0
        xmlFreeDoc(ctxt->myDoc);
14164
0
  ctxt->myDoc = NULL;
14165
0
    }
14166
0
    xmlFreeParserCtxt(ctxt);
14167
14168
0
    return ret;
14169
0
}
14170
#endif /* LIBXML_SAX1_ENABLED */
14171
14172
/**
14173
 * xmlCreateDocParserCtxt:
14174
 * @cur:  a pointer to an array of xmlChar
14175
 *
14176
 * Creates a parser context for an XML in-memory document.
14177
 *
14178
 * Returns the new parser context or NULL
14179
 */
14180
xmlParserCtxtPtr
14181
0
xmlCreateDocParserCtxt(const xmlChar *cur) {
14182
0
    int len;
14183
14184
0
    if (cur == NULL)
14185
0
  return(NULL);
14186
0
    len = xmlStrlen(cur);
14187
0
    return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14188
0
}
14189
14190
#ifdef LIBXML_SAX1_ENABLED
14191
/**
14192
 * xmlSAXParseDoc:
14193
 * @sax:  the SAX handler block
14194
 * @cur:  a pointer to an array of xmlChar
14195
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14196
 *             documents
14197
 *
14198
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadDoc.
14199
 *
14200
 * parse an XML in-memory document and build a tree.
14201
 * It use the given SAX function block to handle the parsing callback.
14202
 * If sax is NULL, fallback to the default DOM tree building routines.
14203
 *
14204
 * Returns the resulting document tree
14205
 */
14206
14207
xmlDocPtr
14208
0
xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14209
0
    xmlDocPtr ret;
14210
0
    xmlParserCtxtPtr ctxt;
14211
0
    xmlSAXHandlerPtr oldsax = NULL;
14212
14213
0
    if (cur == NULL) return(NULL);
14214
14215
14216
0
    ctxt = xmlCreateDocParserCtxt(cur);
14217
0
    if (ctxt == NULL) return(NULL);
14218
0
    if (sax != NULL) {
14219
0
        oldsax = ctxt->sax;
14220
0
        ctxt->sax = sax;
14221
0
        ctxt->userData = NULL;
14222
0
    }
14223
0
    xmlDetectSAX2(ctxt);
14224
14225
0
    xmlParseDocument(ctxt);
14226
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14227
0
    else {
14228
0
       ret = NULL;
14229
0
       xmlFreeDoc(ctxt->myDoc);
14230
0
       ctxt->myDoc = NULL;
14231
0
    }
14232
0
    if (sax != NULL)
14233
0
  ctxt->sax = oldsax;
14234
0
    xmlFreeParserCtxt(ctxt);
14235
14236
0
    return(ret);
14237
0
}
14238
14239
/**
14240
 * xmlParseDoc:
14241
 * @cur:  a pointer to an array of xmlChar
14242
 *
14243
 * DEPRECATED: Use xmlReadDoc.
14244
 *
14245
 * parse an XML in-memory document and build a tree.
14246
 *
14247
 * Returns the resulting document tree
14248
 */
14249
14250
xmlDocPtr
14251
0
xmlParseDoc(const xmlChar *cur) {
14252
0
    return(xmlSAXParseDoc(NULL, cur, 0));
14253
0
}
14254
#endif /* LIBXML_SAX1_ENABLED */
14255
14256
#ifdef LIBXML_LEGACY_ENABLED
14257
/************************************************************************
14258
 *                  *
14259
 *  Specific function to keep track of entities references    *
14260
 *  and used by the XSLT debugger         *
14261
 *                  *
14262
 ************************************************************************/
14263
14264
static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14265
14266
/**
14267
 * xmlAddEntityReference:
14268
 * @ent : A valid entity
14269
 * @firstNode : A valid first node for children of entity
14270
 * @lastNode : A valid last node of children entity
14271
 *
14272
 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14273
 */
14274
static void
14275
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14276
                      xmlNodePtr lastNode)
14277
{
14278
    if (xmlEntityRefFunc != NULL) {
14279
        (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14280
    }
14281
}
14282
14283
14284
/**
14285
 * xmlSetEntityReferenceFunc:
14286
 * @func: A valid function
14287
 *
14288
 * Set the function to call call back when a xml reference has been made
14289
 */
14290
void
14291
xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14292
{
14293
    xmlEntityRefFunc = func;
14294
}
14295
#endif /* LIBXML_LEGACY_ENABLED */
14296
14297
/************************************************************************
14298
 *                  *
14299
 *        Miscellaneous       *
14300
 *                  *
14301
 ************************************************************************/
14302
14303
static int xmlParserInitialized = 0;
14304
14305
/**
14306
 * xmlInitParser:
14307
 *
14308
 * Initialization function for the XML parser.
14309
 * This is not reentrant. Call once before processing in case of
14310
 * use in multithreaded programs.
14311
 */
14312
14313
void
14314
1.51M
xmlInitParser(void) {
14315
    /*
14316
     * Note that the initialization code must not make memory allocations.
14317
     */
14318
1.51M
    if (xmlParserInitialized != 0)
14319
1.51M
  return;
14320
14321
1
#ifdef LIBXML_THREAD_ENABLED
14322
1
    __xmlGlobalInitMutexLock();
14323
1
    if (xmlParserInitialized == 0) {
14324
1
#endif
14325
#if defined(_WIN32) && (!defined(LIBXML_STATIC) || defined(LIBXML_STATIC_FOR_DLL))
14326
        if (xmlFree == free)
14327
            atexit(xmlCleanupParser);
14328
#endif
14329
14330
1
  xmlInitThreadsInternal();
14331
1
  xmlInitGlobalsInternal();
14332
1
  xmlInitMemoryInternal();
14333
1
        __xmlInitializeDict();
14334
1
  xmlInitEncodingInternal();
14335
1
  xmlRegisterDefaultInputCallbacks();
14336
1
#ifdef LIBXML_OUTPUT_ENABLED
14337
1
  xmlRegisterDefaultOutputCallbacks();
14338
1
#endif /* LIBXML_OUTPUT_ENABLED */
14339
1
#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
14340
1
  xmlInitXPathInternal();
14341
1
#endif
14342
1
  xmlParserInitialized = 1;
14343
1
#ifdef LIBXML_THREAD_ENABLED
14344
1
    }
14345
1
    __xmlGlobalInitMutexUnlock();
14346
1
#endif
14347
1
}
14348
14349
/**
14350
 * xmlCleanupParser:
14351
 *
14352
 * This function name is somewhat misleading. It does not clean up
14353
 * parser state, it cleans up memory allocated by the library itself.
14354
 * It is a cleanup function for the XML library. It tries to reclaim all
14355
 * related global memory allocated for the library processing.
14356
 * It doesn't deallocate any document related memory. One should
14357
 * call xmlCleanupParser() only when the process has finished using
14358
 * the library and all XML/HTML documents built with it.
14359
 * See also xmlInitParser() which has the opposite function of preparing
14360
 * the library for operations.
14361
 *
14362
 * WARNING: if your application is multithreaded or has plugin support
14363
 *          calling this may crash the application if another thread or
14364
 *          a plugin is still using libxml2. It's sometimes very hard to
14365
 *          guess if libxml2 is in use in the application, some libraries
14366
 *          or plugins may use it without notice. In case of doubt abstain
14367
 *          from calling this function or do it just before calling exit()
14368
 *          to avoid leak reports from valgrind !
14369
 */
14370
14371
void
14372
0
xmlCleanupParser(void) {
14373
0
    if (!xmlParserInitialized)
14374
0
  return;
14375
14376
0
    xmlCleanupCharEncodingHandlers();
14377
0
#ifdef LIBXML_CATALOG_ENABLED
14378
0
    xmlCatalogCleanup();
14379
0
#endif
14380
0
    xmlCleanupDictInternal();
14381
0
    xmlCleanupInputCallbacks();
14382
0
#ifdef LIBXML_OUTPUT_ENABLED
14383
0
    xmlCleanupOutputCallbacks();
14384
0
#endif
14385
0
#ifdef LIBXML_SCHEMAS_ENABLED
14386
0
    xmlSchemaCleanupTypes();
14387
0
    xmlRelaxNGCleanupTypes();
14388
0
#endif
14389
0
    xmlCleanupGlobalsInternal();
14390
0
    xmlCleanupThreadsInternal();
14391
0
    xmlCleanupMemoryInternal();
14392
0
    xmlParserInitialized = 0;
14393
0
}
14394
14395
#if defined(HAVE_ATTRIBUTE_DESTRUCTOR) && !defined(LIBXML_STATIC) && \
14396
    !defined(_WIN32)
14397
static void
14398
ATTRIBUTE_DESTRUCTOR
14399
xmlDestructor(void) {
14400
    /*
14401
     * Calling custom deallocation functions in a destructor can cause
14402
     * problems, for example with Nokogiri.
14403
     */
14404
    if (xmlFree == free)
14405
        xmlCleanupParser();
14406
}
14407
#endif
14408
14409
/************************************************************************
14410
 *                  *
14411
 *  New set (2.6.0) of simpler and more flexible APIs   *
14412
 *                  *
14413
 ************************************************************************/
14414
14415
/**
14416
 * DICT_FREE:
14417
 * @str:  a string
14418
 *
14419
 * Free a string if it is not owned by the "dict" dictionary in the
14420
 * current scope
14421
 */
14422
#define DICT_FREE(str)            \
14423
0
  if ((str) && ((!dict) ||       \
14424
0
      (xmlDictOwns(dict, (const xmlChar *)(str)) == 0)))  \
14425
0
      xmlFree((char *)(str));
14426
14427
/**
14428
 * xmlCtxtReset:
14429
 * @ctxt: an XML parser context
14430
 *
14431
 * Reset a parser context
14432
 */
14433
void
14434
xmlCtxtReset(xmlParserCtxtPtr ctxt)
14435
0
{
14436
0
    xmlParserInputPtr input;
14437
0
    xmlDictPtr dict;
14438
14439
0
    if (ctxt == NULL)
14440
0
        return;
14441
14442
0
    dict = ctxt->dict;
14443
14444
0
    while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14445
0
        xmlFreeInputStream(input);
14446
0
    }
14447
0
    ctxt->inputNr = 0;
14448
0
    ctxt->input = NULL;
14449
14450
0
    ctxt->spaceNr = 0;
14451
0
    if (ctxt->spaceTab != NULL) {
14452
0
  ctxt->spaceTab[0] = -1;
14453
0
  ctxt->space = &ctxt->spaceTab[0];
14454
0
    } else {
14455
0
        ctxt->space = NULL;
14456
0
    }
14457
14458
14459
0
    ctxt->nodeNr = 0;
14460
0
    ctxt->node = NULL;
14461
14462
0
    ctxt->nameNr = 0;
14463
0
    ctxt->name = NULL;
14464
14465
0
    ctxt->nsNr = 0;
14466
14467
0
    DICT_FREE(ctxt->version);
14468
0
    ctxt->version = NULL;
14469
0
    DICT_FREE(ctxt->encoding);
14470
0
    ctxt->encoding = NULL;
14471
0
    DICT_FREE(ctxt->directory);
14472
0
    ctxt->directory = NULL;
14473
0
    DICT_FREE(ctxt->extSubURI);
14474
0
    ctxt->extSubURI = NULL;
14475
0
    DICT_FREE(ctxt->extSubSystem);
14476
0
    ctxt->extSubSystem = NULL;
14477
0
    if (ctxt->myDoc != NULL)
14478
0
        xmlFreeDoc(ctxt->myDoc);
14479
0
    ctxt->myDoc = NULL;
14480
14481
0
    ctxt->standalone = -1;
14482
0
    ctxt->hasExternalSubset = 0;
14483
0
    ctxt->hasPErefs = 0;
14484
0
    ctxt->html = 0;
14485
0
    ctxt->external = 0;
14486
0
    ctxt->instate = XML_PARSER_START;
14487
0
    ctxt->token = 0;
14488
14489
0
    ctxt->wellFormed = 1;
14490
0
    ctxt->nsWellFormed = 1;
14491
0
    ctxt->disableSAX = 0;
14492
0
    ctxt->valid = 1;
14493
#if 0
14494
    ctxt->vctxt.userData = ctxt;
14495
    ctxt->vctxt.error = xmlParserValidityError;
14496
    ctxt->vctxt.warning = xmlParserValidityWarning;
14497
#endif
14498
0
    ctxt->record_info = 0;
14499
0
    ctxt->checkIndex = 0;
14500
0
    ctxt->endCheckState = 0;
14501
0
    ctxt->inSubset = 0;
14502
0
    ctxt->errNo = XML_ERR_OK;
14503
0
    ctxt->depth = 0;
14504
0
    ctxt->charset = XML_CHAR_ENCODING_UTF8;
14505
0
    ctxt->catalogs = NULL;
14506
0
    ctxt->sizeentities = 0;
14507
0
    ctxt->sizeentcopy = 0;
14508
0
    xmlInitNodeInfoSeq(&ctxt->node_seq);
14509
14510
0
    if (ctxt->attsDefault != NULL) {
14511
0
        xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
14512
0
        ctxt->attsDefault = NULL;
14513
0
    }
14514
0
    if (ctxt->attsSpecial != NULL) {
14515
0
        xmlHashFree(ctxt->attsSpecial, NULL);
14516
0
        ctxt->attsSpecial = NULL;
14517
0
    }
14518
14519
0
#ifdef LIBXML_CATALOG_ENABLED
14520
0
    if (ctxt->catalogs != NULL)
14521
0
  xmlCatalogFreeLocal(ctxt->catalogs);
14522
0
#endif
14523
0
    ctxt->nbErrors = 0;
14524
0
    ctxt->nbWarnings = 0;
14525
0
    if (ctxt->lastError.code != XML_ERR_OK)
14526
0
        xmlResetError(&ctxt->lastError);
14527
0
}
14528
14529
/**
14530
 * xmlCtxtResetPush:
14531
 * @ctxt: an XML parser context
14532
 * @chunk:  a pointer to an array of chars
14533
 * @size:  number of chars in the array
14534
 * @filename:  an optional file name or URI
14535
 * @encoding:  the document encoding, or NULL
14536
 *
14537
 * Reset a push parser context
14538
 *
14539
 * Returns 0 in case of success and 1 in case of error
14540
 */
14541
int
14542
xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14543
                 int size, const char *filename, const char *encoding)
14544
0
{
14545
0
    xmlParserInputPtr inputStream;
14546
0
    xmlParserInputBufferPtr buf;
14547
0
    xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14548
14549
0
    if (ctxt == NULL)
14550
0
        return(1);
14551
14552
0
    if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14553
0
        enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14554
14555
0
    buf = xmlAllocParserInputBuffer(enc);
14556
0
    if (buf == NULL)
14557
0
        return(1);
14558
14559
0
    if (ctxt == NULL) {
14560
0
        xmlFreeParserInputBuffer(buf);
14561
0
        return(1);
14562
0
    }
14563
14564
0
    xmlCtxtReset(ctxt);
14565
14566
0
    if (filename == NULL) {
14567
0
        ctxt->directory = NULL;
14568
0
    } else {
14569
0
        ctxt->directory = xmlParserGetDirectory(filename);
14570
0
    }
14571
14572
0
    inputStream = xmlNewInputStream(ctxt);
14573
0
    if (inputStream == NULL) {
14574
0
        xmlFreeParserInputBuffer(buf);
14575
0
        return(1);
14576
0
    }
14577
14578
0
    if (filename == NULL)
14579
0
        inputStream->filename = NULL;
14580
0
    else
14581
0
        inputStream->filename = (char *)
14582
0
            xmlCanonicPath((const xmlChar *) filename);
14583
0
    inputStream->buf = buf;
14584
0
    xmlBufResetInput(buf->buffer, inputStream);
14585
14586
0
    inputPush(ctxt, inputStream);
14587
14588
0
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14589
0
        (ctxt->input->buf != NULL)) {
14590
0
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14591
0
        size_t cur = ctxt->input->cur - ctxt->input->base;
14592
14593
0
        xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14594
14595
0
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
14596
#ifdef DEBUG_PUSH
14597
        xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14598
#endif
14599
0
    }
14600
14601
0
    if (encoding != NULL) {
14602
0
        xmlCharEncodingHandlerPtr hdlr;
14603
14604
0
        if (ctxt->encoding != NULL)
14605
0
      xmlFree((xmlChar *) ctxt->encoding);
14606
0
        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14607
14608
0
        hdlr = xmlFindCharEncodingHandler(encoding);
14609
0
        if (hdlr != NULL) {
14610
0
            xmlSwitchToEncoding(ctxt, hdlr);
14611
0
  } else {
14612
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14613
0
            "Unsupported encoding %s\n", BAD_CAST encoding);
14614
0
        }
14615
0
    } else if (enc != XML_CHAR_ENCODING_NONE) {
14616
0
        xmlSwitchEncoding(ctxt, enc);
14617
0
    }
14618
14619
0
    return(0);
14620
0
}
14621
14622
14623
/**
14624
 * xmlCtxtUseOptionsInternal:
14625
 * @ctxt: an XML parser context
14626
 * @options:  a combination of xmlParserOption
14627
 * @encoding:  the user provided encoding to use
14628
 *
14629
 * Applies the options to the parser context
14630
 *
14631
 * Returns 0 in case of success, the set of unknown or unimplemented options
14632
 *         in case of error.
14633
 */
14634
static int
14635
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
14636
12.4k
{
14637
12.4k
    if (ctxt == NULL)
14638
0
        return(-1);
14639
12.4k
    if (encoding != NULL) {
14640
0
        if (ctxt->encoding != NULL)
14641
0
      xmlFree((xmlChar *) ctxt->encoding);
14642
0
        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14643
0
    }
14644
12.4k
    if (options & XML_PARSE_RECOVER) {
14645
0
        ctxt->recovery = 1;
14646
0
        options -= XML_PARSE_RECOVER;
14647
0
  ctxt->options |= XML_PARSE_RECOVER;
14648
0
    } else
14649
12.4k
        ctxt->recovery = 0;
14650
12.4k
    if (options & XML_PARSE_DTDLOAD) {
14651
0
        ctxt->loadsubset = XML_DETECT_IDS;
14652
0
        options -= XML_PARSE_DTDLOAD;
14653
0
  ctxt->options |= XML_PARSE_DTDLOAD;
14654
0
    } else
14655
12.4k
        ctxt->loadsubset = 0;
14656
12.4k
    if (options & XML_PARSE_DTDATTR) {
14657
0
        ctxt->loadsubset |= XML_COMPLETE_ATTRS;
14658
0
        options -= XML_PARSE_DTDATTR;
14659
0
  ctxt->options |= XML_PARSE_DTDATTR;
14660
0
    }
14661
12.4k
    if (options & XML_PARSE_NOENT) {
14662
0
        ctxt->replaceEntities = 1;
14663
        /* ctxt->loadsubset |= XML_DETECT_IDS; */
14664
0
        options -= XML_PARSE_NOENT;
14665
0
  ctxt->options |= XML_PARSE_NOENT;
14666
0
    } else
14667
12.4k
        ctxt->replaceEntities = 0;
14668
12.4k
    if (options & XML_PARSE_PEDANTIC) {
14669
0
        ctxt->pedantic = 1;
14670
0
        options -= XML_PARSE_PEDANTIC;
14671
0
  ctxt->options |= XML_PARSE_PEDANTIC;
14672
0
    } else
14673
12.4k
        ctxt->pedantic = 0;
14674
12.4k
    if (options & XML_PARSE_NOBLANKS) {
14675
12.4k
        ctxt->keepBlanks = 0;
14676
12.4k
        ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
14677
12.4k
        options -= XML_PARSE_NOBLANKS;
14678
12.4k
  ctxt->options |= XML_PARSE_NOBLANKS;
14679
12.4k
    } else
14680
0
        ctxt->keepBlanks = 1;
14681
12.4k
    if (options & XML_PARSE_DTDVALID) {
14682
0
        ctxt->validate = 1;
14683
0
        if (options & XML_PARSE_NOWARNING)
14684
0
            ctxt->vctxt.warning = NULL;
14685
0
        if (options & XML_PARSE_NOERROR)
14686
0
            ctxt->vctxt.error = NULL;
14687
0
        options -= XML_PARSE_DTDVALID;
14688
0
  ctxt->options |= XML_PARSE_DTDVALID;
14689
0
    } else
14690
12.4k
        ctxt->validate = 0;
14691
12.4k
    if (options & XML_PARSE_NOWARNING) {
14692
0
        ctxt->sax->warning = NULL;
14693
0
        options -= XML_PARSE_NOWARNING;
14694
0
    }
14695
12.4k
    if (options & XML_PARSE_NOERROR) {
14696
0
        ctxt->sax->error = NULL;
14697
0
        ctxt->sax->fatalError = NULL;
14698
0
        options -= XML_PARSE_NOERROR;
14699
0
    }
14700
12.4k
#ifdef LIBXML_SAX1_ENABLED
14701
12.4k
    if (options & XML_PARSE_SAX1) {
14702
0
        ctxt->sax->startElement = xmlSAX2StartElement;
14703
0
        ctxt->sax->endElement = xmlSAX2EndElement;
14704
0
        ctxt->sax->startElementNs = NULL;
14705
0
        ctxt->sax->endElementNs = NULL;
14706
0
        ctxt->sax->initialized = 1;
14707
0
        options -= XML_PARSE_SAX1;
14708
0
  ctxt->options |= XML_PARSE_SAX1;
14709
0
    }
14710
12.4k
#endif /* LIBXML_SAX1_ENABLED */
14711
12.4k
    if (options & XML_PARSE_NODICT) {
14712
0
        ctxt->dictNames = 0;
14713
0
        options -= XML_PARSE_NODICT;
14714
0
  ctxt->options |= XML_PARSE_NODICT;
14715
12.4k
    } else {
14716
12.4k
        ctxt->dictNames = 1;
14717
12.4k
    }
14718
12.4k
    if (options & XML_PARSE_NOCDATA) {
14719
12.4k
        ctxt->sax->cdataBlock = NULL;
14720
12.4k
        options -= XML_PARSE_NOCDATA;
14721
12.4k
  ctxt->options |= XML_PARSE_NOCDATA;
14722
12.4k
    }
14723
12.4k
    if (options & XML_PARSE_NSCLEAN) {
14724
12.4k
  ctxt->options |= XML_PARSE_NSCLEAN;
14725
12.4k
        options -= XML_PARSE_NSCLEAN;
14726
12.4k
    }
14727
12.4k
    if (options & XML_PARSE_NONET) {
14728
12.4k
  ctxt->options |= XML_PARSE_NONET;
14729
12.4k
        options -= XML_PARSE_NONET;
14730
12.4k
    }
14731
12.4k
    if (options & XML_PARSE_COMPACT) {
14732
0
  ctxt->options |= XML_PARSE_COMPACT;
14733
0
        options -= XML_PARSE_COMPACT;
14734
0
    }
14735
12.4k
    if (options & XML_PARSE_OLD10) {
14736
0
  ctxt->options |= XML_PARSE_OLD10;
14737
0
        options -= XML_PARSE_OLD10;
14738
0
    }
14739
12.4k
    if (options & XML_PARSE_NOBASEFIX) {
14740
0
  ctxt->options |= XML_PARSE_NOBASEFIX;
14741
0
        options -= XML_PARSE_NOBASEFIX;
14742
0
    }
14743
12.4k
    if (options & XML_PARSE_HUGE) {
14744
12.4k
  ctxt->options |= XML_PARSE_HUGE;
14745
12.4k
        options -= XML_PARSE_HUGE;
14746
12.4k
        if (ctxt->dict != NULL)
14747
12.4k
            xmlDictSetLimit(ctxt->dict, 0);
14748
12.4k
    }
14749
12.4k
    if (options & XML_PARSE_OLDSAX) {
14750
0
  ctxt->options |= XML_PARSE_OLDSAX;
14751
0
        options -= XML_PARSE_OLDSAX;
14752
0
    }
14753
12.4k
    if (options & XML_PARSE_IGNORE_ENC) {
14754
0
  ctxt->options |= XML_PARSE_IGNORE_ENC;
14755
0
        options -= XML_PARSE_IGNORE_ENC;
14756
0
    }
14757
12.4k
    if (options & XML_PARSE_BIG_LINES) {
14758
0
  ctxt->options |= XML_PARSE_BIG_LINES;
14759
0
        options -= XML_PARSE_BIG_LINES;
14760
0
    }
14761
12.4k
    ctxt->linenumbers = 1;
14762
12.4k
    return (options);
14763
12.4k
}
14764
14765
/**
14766
 * xmlCtxtUseOptions:
14767
 * @ctxt: an XML parser context
14768
 * @options:  a combination of xmlParserOption
14769
 *
14770
 * Applies the options to the parser context
14771
 *
14772
 * Returns 0 in case of success, the set of unknown or unimplemented options
14773
 *         in case of error.
14774
 */
14775
int
14776
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
14777
12.4k
{
14778
12.4k
   return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
14779
12.4k
}
14780
14781
/**
14782
 * xmlDoRead:
14783
 * @ctxt:  an XML parser context
14784
 * @URL:  the base URL to use for the document
14785
 * @encoding:  the document encoding, or NULL
14786
 * @options:  a combination of xmlParserOption
14787
 * @reuse:  keep the context for reuse
14788
 *
14789
 * Common front-end for the xmlRead functions
14790
 *
14791
 * Returns the resulting document tree or NULL
14792
 */
14793
static xmlDocPtr
14794
xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
14795
          int options, int reuse)
14796
0
{
14797
0
    xmlDocPtr ret;
14798
14799
0
    xmlCtxtUseOptionsInternal(ctxt, options, encoding);
14800
0
    if (encoding != NULL) {
14801
0
        xmlCharEncodingHandlerPtr hdlr;
14802
14803
        /*
14804
         * TODO: We should consider to set XML_PARSE_IGNORE_ENC if the
14805
         * caller provided an encoding. Otherwise, we might switch to
14806
         * the encoding from the XML declaration which is likely to
14807
         * break things. Also see xmlSwitchInputEncoding.
14808
         */
14809
0
  hdlr = xmlFindCharEncodingHandler(encoding);
14810
0
  if (hdlr != NULL)
14811
0
      xmlSwitchToEncoding(ctxt, hdlr);
14812
0
    }
14813
0
    if ((URL != NULL) && (ctxt->input != NULL) &&
14814
0
        (ctxt->input->filename == NULL))
14815
0
        ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
14816
0
    xmlParseDocument(ctxt);
14817
0
    if ((ctxt->wellFormed) || ctxt->recovery)
14818
0
        ret = ctxt->myDoc;
14819
0
    else {
14820
0
        ret = NULL;
14821
0
  if (ctxt->myDoc != NULL) {
14822
0
      xmlFreeDoc(ctxt->myDoc);
14823
0
  }
14824
0
    }
14825
0
    ctxt->myDoc = NULL;
14826
0
    if (!reuse) {
14827
0
  xmlFreeParserCtxt(ctxt);
14828
0
    }
14829
14830
0
    return (ret);
14831
0
}
14832
14833
/**
14834
 * xmlReadDoc:
14835
 * @cur:  a pointer to a zero terminated string
14836
 * @URL:  the base URL to use for the document
14837
 * @encoding:  the document encoding, or NULL
14838
 * @options:  a combination of xmlParserOption
14839
 *
14840
 * parse an XML in-memory document and build a tree.
14841
 *
14842
 * Returns the resulting document tree
14843
 */
14844
xmlDocPtr
14845
xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
14846
0
{
14847
0
    xmlParserCtxtPtr ctxt;
14848
14849
0
    if (cur == NULL)
14850
0
        return (NULL);
14851
0
    xmlInitParser();
14852
14853
0
    ctxt = xmlCreateDocParserCtxt(cur);
14854
0
    if (ctxt == NULL)
14855
0
        return (NULL);
14856
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
14857
0
}
14858
14859
/**
14860
 * xmlReadFile:
14861
 * @filename:  a file or URL
14862
 * @encoding:  the document encoding, or NULL
14863
 * @options:  a combination of xmlParserOption
14864
 *
14865
 * parse an XML file from the filesystem or the network.
14866
 *
14867
 * Returns the resulting document tree
14868
 */
14869
xmlDocPtr
14870
xmlReadFile(const char *filename, const char *encoding, int options)
14871
0
{
14872
0
    xmlParserCtxtPtr ctxt;
14873
14874
0
    xmlInitParser();
14875
0
    ctxt = xmlCreateURLParserCtxt(filename, options);
14876
0
    if (ctxt == NULL)
14877
0
        return (NULL);
14878
0
    return (xmlDoRead(ctxt, NULL, encoding, options, 0));
14879
0
}
14880
14881
/**
14882
 * xmlReadMemory:
14883
 * @buffer:  a pointer to a char array
14884
 * @size:  the size of the array
14885
 * @URL:  the base URL to use for the document
14886
 * @encoding:  the document encoding, or NULL
14887
 * @options:  a combination of xmlParserOption
14888
 *
14889
 * parse an XML in-memory document and build a tree.
14890
 *
14891
 * Returns the resulting document tree
14892
 */
14893
xmlDocPtr
14894
xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
14895
0
{
14896
0
    xmlParserCtxtPtr ctxt;
14897
14898
0
    xmlInitParser();
14899
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14900
0
    if (ctxt == NULL)
14901
0
        return (NULL);
14902
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
14903
0
}
14904
14905
/**
14906
 * xmlReadFd:
14907
 * @fd:  an open file descriptor
14908
 * @URL:  the base URL to use for the document
14909
 * @encoding:  the document encoding, or NULL
14910
 * @options:  a combination of xmlParserOption
14911
 *
14912
 * parse an XML from a file descriptor and build a tree.
14913
 * NOTE that the file descriptor will not be closed when the
14914
 *      reader is closed or reset.
14915
 *
14916
 * Returns the resulting document tree
14917
 */
14918
xmlDocPtr
14919
xmlReadFd(int fd, const char *URL, const char *encoding, int options)
14920
0
{
14921
0
    xmlParserCtxtPtr ctxt;
14922
0
    xmlParserInputBufferPtr input;
14923
0
    xmlParserInputPtr stream;
14924
14925
0
    if (fd < 0)
14926
0
        return (NULL);
14927
0
    xmlInitParser();
14928
14929
0
    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14930
0
    if (input == NULL)
14931
0
        return (NULL);
14932
0
    input->closecallback = NULL;
14933
0
    ctxt = xmlNewParserCtxt();
14934
0
    if (ctxt == NULL) {
14935
0
        xmlFreeParserInputBuffer(input);
14936
0
        return (NULL);
14937
0
    }
14938
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14939
0
    if (stream == NULL) {
14940
0
        xmlFreeParserInputBuffer(input);
14941
0
  xmlFreeParserCtxt(ctxt);
14942
0
        return (NULL);
14943
0
    }
14944
0
    inputPush(ctxt, stream);
14945
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
14946
0
}
14947
14948
/**
14949
 * xmlReadIO:
14950
 * @ioread:  an I/O read function
14951
 * @ioclose:  an I/O close function
14952
 * @ioctx:  an I/O handler
14953
 * @URL:  the base URL to use for the document
14954
 * @encoding:  the document encoding, or NULL
14955
 * @options:  a combination of xmlParserOption
14956
 *
14957
 * parse an XML document from I/O functions and source and build a tree.
14958
 *
14959
 * Returns the resulting document tree
14960
 */
14961
xmlDocPtr
14962
xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
14963
          void *ioctx, const char *URL, const char *encoding, int options)
14964
0
{
14965
0
    xmlParserCtxtPtr ctxt;
14966
0
    xmlParserInputBufferPtr input;
14967
0
    xmlParserInputPtr stream;
14968
14969
0
    if (ioread == NULL)
14970
0
        return (NULL);
14971
0
    xmlInitParser();
14972
14973
0
    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14974
0
                                         XML_CHAR_ENCODING_NONE);
14975
0
    if (input == NULL) {
14976
0
        if (ioclose != NULL)
14977
0
            ioclose(ioctx);
14978
0
        return (NULL);
14979
0
    }
14980
0
    ctxt = xmlNewParserCtxt();
14981
0
    if (ctxt == NULL) {
14982
0
        xmlFreeParserInputBuffer(input);
14983
0
        return (NULL);
14984
0
    }
14985
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14986
0
    if (stream == NULL) {
14987
0
        xmlFreeParserInputBuffer(input);
14988
0
  xmlFreeParserCtxt(ctxt);
14989
0
        return (NULL);
14990
0
    }
14991
0
    inputPush(ctxt, stream);
14992
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
14993
0
}
14994
14995
/**
14996
 * xmlCtxtReadDoc:
14997
 * @ctxt:  an XML parser context
14998
 * @cur:  a pointer to a zero terminated string
14999
 * @URL:  the base URL to use for the document
15000
 * @encoding:  the document encoding, or NULL
15001
 * @options:  a combination of xmlParserOption
15002
 *
15003
 * parse an XML in-memory document and build a tree.
15004
 * This reuses the existing @ctxt parser context
15005
 *
15006
 * Returns the resulting document tree
15007
 */
15008
xmlDocPtr
15009
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
15010
               const char *URL, const char *encoding, int options)
15011
0
{
15012
0
    if (cur == NULL)
15013
0
        return (NULL);
15014
0
    return (xmlCtxtReadMemory(ctxt, (const char *) cur, xmlStrlen(cur), URL,
15015
0
                              encoding, options));
15016
0
}
15017
15018
/**
15019
 * xmlCtxtReadFile:
15020
 * @ctxt:  an XML parser context
15021
 * @filename:  a file or URL
15022
 * @encoding:  the document encoding, or NULL
15023
 * @options:  a combination of xmlParserOption
15024
 *
15025
 * parse an XML file from the filesystem or the network.
15026
 * This reuses the existing @ctxt parser context
15027
 *
15028
 * Returns the resulting document tree
15029
 */
15030
xmlDocPtr
15031
xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15032
                const char *encoding, int options)
15033
0
{
15034
0
    xmlParserInputPtr stream;
15035
15036
0
    if (filename == NULL)
15037
0
        return (NULL);
15038
0
    if (ctxt == NULL)
15039
0
        return (NULL);
15040
0
    xmlInitParser();
15041
15042
0
    xmlCtxtReset(ctxt);
15043
15044
0
    stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15045
0
    if (stream == NULL) {
15046
0
        return (NULL);
15047
0
    }
15048
0
    inputPush(ctxt, stream);
15049
0
    return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15050
0
}
15051
15052
/**
15053
 * xmlCtxtReadMemory:
15054
 * @ctxt:  an XML parser context
15055
 * @buffer:  a pointer to a char array
15056
 * @size:  the size of the array
15057
 * @URL:  the base URL to use for the document
15058
 * @encoding:  the document encoding, or NULL
15059
 * @options:  a combination of xmlParserOption
15060
 *
15061
 * parse an XML in-memory document and build a tree.
15062
 * This reuses the existing @ctxt parser context
15063
 *
15064
 * Returns the resulting document tree
15065
 */
15066
xmlDocPtr
15067
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15068
                  const char *URL, const char *encoding, int options)
15069
0
{
15070
0
    xmlParserInputBufferPtr input;
15071
0
    xmlParserInputPtr stream;
15072
15073
0
    if (ctxt == NULL)
15074
0
        return (NULL);
15075
0
    if (buffer == NULL)
15076
0
        return (NULL);
15077
0
    xmlInitParser();
15078
15079
0
    xmlCtxtReset(ctxt);
15080
15081
0
    input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15082
0
    if (input == NULL) {
15083
0
  return(NULL);
15084
0
    }
15085
15086
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15087
0
    if (stream == NULL) {
15088
0
  xmlFreeParserInputBuffer(input);
15089
0
  return(NULL);
15090
0
    }
15091
15092
0
    inputPush(ctxt, stream);
15093
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15094
0
}
15095
15096
/**
15097
 * xmlCtxtReadFd:
15098
 * @ctxt:  an XML parser context
15099
 * @fd:  an open file descriptor
15100
 * @URL:  the base URL to use for the document
15101
 * @encoding:  the document encoding, or NULL
15102
 * @options:  a combination of xmlParserOption
15103
 *
15104
 * parse an XML from a file descriptor and build a tree.
15105
 * This reuses the existing @ctxt parser context
15106
 * NOTE that the file descriptor will not be closed when the
15107
 *      reader is closed or reset.
15108
 *
15109
 * Returns the resulting document tree
15110
 */
15111
xmlDocPtr
15112
xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15113
              const char *URL, const char *encoding, int options)
15114
0
{
15115
0
    xmlParserInputBufferPtr input;
15116
0
    xmlParserInputPtr stream;
15117
15118
0
    if (fd < 0)
15119
0
        return (NULL);
15120
0
    if (ctxt == NULL)
15121
0
        return (NULL);
15122
0
    xmlInitParser();
15123
15124
0
    xmlCtxtReset(ctxt);
15125
15126
15127
0
    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15128
0
    if (input == NULL)
15129
0
        return (NULL);
15130
0
    input->closecallback = NULL;
15131
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15132
0
    if (stream == NULL) {
15133
0
        xmlFreeParserInputBuffer(input);
15134
0
        return (NULL);
15135
0
    }
15136
0
    inputPush(ctxt, stream);
15137
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15138
0
}
15139
15140
/**
15141
 * xmlCtxtReadIO:
15142
 * @ctxt:  an XML parser context
15143
 * @ioread:  an I/O read function
15144
 * @ioclose:  an I/O close function
15145
 * @ioctx:  an I/O handler
15146
 * @URL:  the base URL to use for the document
15147
 * @encoding:  the document encoding, or NULL
15148
 * @options:  a combination of xmlParserOption
15149
 *
15150
 * parse an XML document from I/O functions and source and build a tree.
15151
 * This reuses the existing @ctxt parser context
15152
 *
15153
 * Returns the resulting document tree
15154
 */
15155
xmlDocPtr
15156
xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15157
              xmlInputCloseCallback ioclose, void *ioctx,
15158
        const char *URL,
15159
              const char *encoding, int options)
15160
0
{
15161
0
    xmlParserInputBufferPtr input;
15162
0
    xmlParserInputPtr stream;
15163
15164
0
    if (ioread == NULL)
15165
0
        return (NULL);
15166
0
    if (ctxt == NULL)
15167
0
        return (NULL);
15168
0
    xmlInitParser();
15169
15170
0
    xmlCtxtReset(ctxt);
15171
15172
0
    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15173
0
                                         XML_CHAR_ENCODING_NONE);
15174
0
    if (input == NULL) {
15175
0
        if (ioclose != NULL)
15176
0
            ioclose(ioctx);
15177
0
        return (NULL);
15178
0
    }
15179
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15180
0
    if (stream == NULL) {
15181
0
        xmlFreeParserInputBuffer(input);
15182
0
        return (NULL);
15183
0
    }
15184
0
    inputPush(ctxt, stream);
15185
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15186
0
}
15187