Coverage Report

Created: 2023-03-26 06:14

/src/libxml2/parser.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
 *            implemented on top of the SAX interfaces
4
 *
5
 * References:
6
 *   The XML specification:
7
 *     http://www.w3.org/TR/REC-xml
8
 *   Original 1.0 version:
9
 *     http://www.w3.org/TR/1998/REC-xml-19980210
10
 *   XML second edition working draft
11
 *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12
 *
13
 * Okay this is a big file, the parser core is around 7000 lines, then it
14
 * is followed by the progressive parser top routines, then the various
15
 * high level APIs to call the parser and a few miscellaneous functions.
16
 * A number of helper functions and deprecated ones have been moved to
17
 * parserInternals.c to reduce this file size.
18
 * As much as possible the functions are associated with their relative
19
 * production in the XML specification. A few productions defining the
20
 * different ranges of character are actually implanted either in
21
 * parserInternals.h or parserInternals.c
22
 * The DOM tree build is realized from the default SAX callbacks in
23
 * the module SAX.c.
24
 * The routines doing the validation checks are in valid.c and called either
25
 * from the SAX callbacks or as standalone functions using a preparsed
26
 * document.
27
 *
28
 * See Copyright for the status of this software.
29
 *
30
 * daniel@veillard.com
31
 */
32
33
/* To avoid EBCDIC trouble when parsing on zOS */
34
#if defined(__MVS__)
35
#pragma convert("ISO8859-1")
36
#endif
37
38
#define IN_LIBXML
39
#include "libxml.h"
40
41
#if defined(_WIN32)
42
#define XML_DIR_SEP '\\'
43
#else
44
#define XML_DIR_SEP '/'
45
#endif
46
47
#include <stdlib.h>
48
#include <limits.h>
49
#include <string.h>
50
#include <stdarg.h>
51
#include <stddef.h>
52
#include <ctype.h>
53
#include <stdlib.h>
54
#include <libxml/xmlmemory.h>
55
#include <libxml/threads.h>
56
#include <libxml/globals.h>
57
#include <libxml/tree.h>
58
#include <libxml/parser.h>
59
#include <libxml/parserInternals.h>
60
#include <libxml/HTMLparser.h>
61
#include <libxml/valid.h>
62
#include <libxml/entities.h>
63
#include <libxml/xmlerror.h>
64
#include <libxml/encoding.h>
65
#include <libxml/xmlIO.h>
66
#include <libxml/uri.h>
67
#ifdef LIBXML_CATALOG_ENABLED
68
#include <libxml/catalog.h>
69
#endif
70
#ifdef LIBXML_SCHEMAS_ENABLED
71
#include <libxml/xmlschemastypes.h>
72
#include <libxml/relaxng.h>
73
#endif
74
#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
75
#include <libxml/xpath.h>
76
#endif
77
78
#include "private/buf.h"
79
#include "private/dict.h"
80
#include "private/enc.h"
81
#include "private/entities.h"
82
#include "private/error.h"
83
#include "private/globals.h"
84
#include "private/html.h"
85
#include "private/io.h"
86
#include "private/memory.h"
87
#include "private/parser.h"
88
#include "private/threads.h"
89
#include "private/xpath.h"
90
91
struct _xmlStartTag {
92
    const xmlChar *prefix;
93
    const xmlChar *URI;
94
    int line;
95
    int nsNr;
96
};
97
98
static xmlParserCtxtPtr
99
xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
100
        const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
101
        xmlParserCtxtPtr pctx);
102
103
static int
104
xmlParseElementStart(xmlParserCtxtPtr ctxt);
105
106
static void
107
xmlParseElementEnd(xmlParserCtxtPtr ctxt);
108
109
/************************************************************************
110
 *                  *
111
 *  Arbitrary limits set in the parser. See XML_PARSE_HUGE    *
112
 *                  *
113
 ************************************************************************/
114
115
#define XML_PARSER_BIG_ENTITY 1000
116
#define XML_PARSER_LOT_ENTITY 5000
117
118
/*
119
 * Constants for protection against abusive entity expansion
120
 * ("billion laughs").
121
 */
122
123
/*
124
 * XML_PARSER_NON_LINEAR is roughly the maximum allowed amplification factor
125
 * of serialized output after entity expansion.
126
 */
127
0
#define XML_PARSER_NON_LINEAR 5
128
129
/*
130
 * A certain amount is always allowed.
131
 */
132
0
#define XML_PARSER_ALLOWED_EXPANSION 1000000
133
134
/*
135
 * Fixed cost for each entity reference. This crudely models processing time
136
 * as well to protect, for example, against exponential expansion of empty
137
 * or very short entities.
138
 */
139
0
#define XML_ENT_FIXED_COST 20
140
141
/**
142
 * xmlParserMaxDepth:
143
 *
144
 * arbitrary depth limit for the XML documents that we allow to
145
 * process. This is not a limitation of the parser but a safety
146
 * boundary feature. It can be disabled with the XML_PARSE_HUGE
147
 * parser option.
148
 */
149
unsigned int xmlParserMaxDepth = 256;
150
151
152
153
#define SAX2 1
154
10.6M
#define XML_PARSER_BIG_BUFFER_SIZE 300
155
8.60k
#define XML_PARSER_BUFFER_SIZE 100
156
152
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
157
158
/**
159
 * XML_PARSER_CHUNK_SIZE
160
 *
161
 * When calling GROW that's the minimal amount of data
162
 * the parser expected to have received. It is not a hard
163
 * limit but an optimization when reading strings like Names
164
 * It is not strictly needed as long as inputs available characters
165
 * are followed by 0, which should be provided by the I/O level
166
 */
167
#define XML_PARSER_CHUNK_SIZE 100
168
169
/*
170
 * List of XML prefixed PI allowed by W3C specs
171
 */
172
173
static const char* const xmlW3CPIs[] = {
174
    "xml-stylesheet",
175
    "xml-model",
176
    NULL
177
};
178
179
180
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
181
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
182
                                              const xmlChar **str);
183
184
static xmlParserErrors
185
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
186
                xmlSAXHandlerPtr sax,
187
          void *user_data, int depth, const xmlChar *URL,
188
          const xmlChar *ID, xmlNodePtr *list);
189
190
static int
191
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
192
                          const char *encoding);
193
#ifdef LIBXML_LEGACY_ENABLED
194
static void
195
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
196
                      xmlNodePtr lastNode);
197
#endif /* LIBXML_LEGACY_ENABLED */
198
199
static xmlParserErrors
200
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
201
          const xmlChar *string, void *user_data, xmlNodePtr *lst);
202
203
static int
204
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
205
206
/************************************************************************
207
 *                  *
208
 *    Some factorized error routines        *
209
 *                  *
210
 ************************************************************************/
211
212
/**
213
 * xmlErrAttributeDup:
214
 * @ctxt:  an XML parser context
215
 * @prefix:  the attribute prefix
216
 * @localname:  the attribute localname
217
 *
218
 * Handle a redefinition of attribute error
219
 */
220
static void
221
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
222
                   const xmlChar * localname)
223
1.27k
{
224
1.27k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
225
1.27k
        (ctxt->instate == XML_PARSER_EOF))
226
0
  return;
227
1.27k
    if (ctxt != NULL)
228
1.27k
  ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
229
230
1.27k
    if (prefix == NULL)
231
936
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
232
936
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
233
936
                        (const char *) localname, NULL, NULL, 0, 0,
234
936
                        "Attribute %s redefined\n", localname);
235
335
    else
236
335
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
237
335
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
238
335
                        (const char *) prefix, (const char *) localname,
239
335
                        NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
240
335
                        localname);
241
1.27k
    if (ctxt != NULL) {
242
1.27k
  ctxt->wellFormed = 0;
243
1.27k
  if (ctxt->recovery == 0)
244
0
      ctxt->disableSAX = 1;
245
1.27k
    }
246
1.27k
}
247
248
/**
249
 * xmlFatalErr:
250
 * @ctxt:  an XML parser context
251
 * @error:  the error number
252
 * @extra:  extra information string
253
 *
254
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
255
 */
256
static void
257
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
258
76.2k
{
259
76.2k
    const char *errmsg;
260
261
76.2k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
262
76.2k
        (ctxt->instate == XML_PARSER_EOF))
263
0
  return;
264
76.2k
    switch (error) {
265
423
        case XML_ERR_INVALID_HEX_CHARREF:
266
423
            errmsg = "CharRef: invalid hexadecimal value";
267
423
            break;
268
4
        case XML_ERR_INVALID_DEC_CHARREF:
269
4
            errmsg = "CharRef: invalid decimal value";
270
4
            break;
271
0
        case XML_ERR_INVALID_CHARREF:
272
0
            errmsg = "CharRef: invalid value";
273
0
            break;
274
9.92k
        case XML_ERR_INTERNAL_ERROR:
275
9.92k
            errmsg = "internal error";
276
9.92k
            break;
277
0
        case XML_ERR_PEREF_AT_EOF:
278
0
            errmsg = "PEReference at end of document";
279
0
            break;
280
0
        case XML_ERR_PEREF_IN_PROLOG:
281
0
            errmsg = "PEReference in prolog";
282
0
            break;
283
0
        case XML_ERR_PEREF_IN_EPILOG:
284
0
            errmsg = "PEReference in epilog";
285
0
            break;
286
0
        case XML_ERR_PEREF_NO_NAME:
287
0
            errmsg = "PEReference: no name";
288
0
            break;
289
0
        case XML_ERR_PEREF_SEMICOL_MISSING:
290
0
            errmsg = "PEReference: expecting ';'";
291
0
            break;
292
0
        case XML_ERR_ENTITY_LOOP:
293
0
            errmsg = "Detected an entity reference loop";
294
0
            break;
295
0
        case XML_ERR_ENTITY_NOT_STARTED:
296
0
            errmsg = "EntityValue: \" or ' expected";
297
0
            break;
298
0
        case XML_ERR_ENTITY_PE_INTERNAL:
299
0
            errmsg = "PEReferences forbidden in internal subset";
300
0
            break;
301
0
        case XML_ERR_ENTITY_NOT_FINISHED:
302
0
            errmsg = "EntityValue: \" or ' expected";
303
0
            break;
304
1.26k
        case XML_ERR_ATTRIBUTE_NOT_STARTED:
305
1.26k
            errmsg = "AttValue: \" or ' expected";
306
1.26k
            break;
307
3.76k
        case XML_ERR_LT_IN_ATTRIBUTE:
308
3.76k
            errmsg = "Unescaped '<' not allowed in attributes values";
309
3.76k
            break;
310
0
        case XML_ERR_LITERAL_NOT_STARTED:
311
0
            errmsg = "SystemLiteral \" or ' expected";
312
0
            break;
313
0
        case XML_ERR_LITERAL_NOT_FINISHED:
314
0
            errmsg = "Unfinished System or Public ID \" or ' expected";
315
0
            break;
316
4
        case XML_ERR_MISPLACED_CDATA_END:
317
4
            errmsg = "Sequence ']]>' not allowed in content";
318
4
            break;
319
0
        case XML_ERR_URI_REQUIRED:
320
0
            errmsg = "SYSTEM or PUBLIC, the URI is missing";
321
0
            break;
322
0
        case XML_ERR_PUBID_REQUIRED:
323
0
            errmsg = "PUBLIC, the Public Identifier is missing";
324
0
            break;
325
50.0k
        case XML_ERR_HYPHEN_IN_COMMENT:
326
50.0k
            errmsg = "Comment must not contain '--' (double-hyphen)";
327
50.0k
            break;
328
114
        case XML_ERR_PI_NOT_STARTED:
329
114
            errmsg = "xmlParsePI : no target name";
330
114
            break;
331
0
        case XML_ERR_RESERVED_XML_NAME:
332
0
            errmsg = "Invalid PI name";
333
0
            break;
334
0
        case XML_ERR_NOTATION_NOT_STARTED:
335
0
            errmsg = "NOTATION: Name expected here";
336
0
            break;
337
0
        case XML_ERR_NOTATION_NOT_FINISHED:
338
0
            errmsg = "'>' required to close NOTATION declaration";
339
0
            break;
340
0
        case XML_ERR_VALUE_REQUIRED:
341
0
            errmsg = "Entity value required";
342
0
            break;
343
0
        case XML_ERR_URI_FRAGMENT:
344
0
            errmsg = "Fragment not allowed";
345
0
            break;
346
0
        case XML_ERR_ATTLIST_NOT_STARTED:
347
0
            errmsg = "'(' required to start ATTLIST enumeration";
348
0
            break;
349
0
        case XML_ERR_NMTOKEN_REQUIRED:
350
0
            errmsg = "NmToken expected in ATTLIST enumeration";
351
0
            break;
352
0
        case XML_ERR_ATTLIST_NOT_FINISHED:
353
0
            errmsg = "')' required to finish ATTLIST enumeration";
354
0
            break;
355
0
        case XML_ERR_MIXED_NOT_STARTED:
356
0
            errmsg = "MixedContentDecl : '|' or ')*' expected";
357
0
            break;
358
0
        case XML_ERR_PCDATA_REQUIRED:
359
0
            errmsg = "MixedContentDecl : '#PCDATA' expected";
360
0
            break;
361
0
        case XML_ERR_ELEMCONTENT_NOT_STARTED:
362
0
            errmsg = "ContentDecl : Name or '(' expected";
363
0
            break;
364
0
        case XML_ERR_ELEMCONTENT_NOT_FINISHED:
365
0
            errmsg = "ContentDecl : ',' '|' or ')' expected";
366
0
            break;
367
0
        case XML_ERR_PEREF_IN_INT_SUBSET:
368
0
            errmsg =
369
0
                "PEReference: forbidden within markup decl in internal subset";
370
0
            break;
371
130
        case XML_ERR_GT_REQUIRED:
372
130
            errmsg = "expected '>'";
373
130
            break;
374
0
        case XML_ERR_CONDSEC_INVALID:
375
0
            errmsg = "XML conditional section '[' expected";
376
0
            break;
377
0
        case XML_ERR_EXT_SUBSET_NOT_FINISHED:
378
0
            errmsg = "Content error in the external subset";
379
0
            break;
380
0
        case XML_ERR_CONDSEC_INVALID_KEYWORD:
381
0
            errmsg =
382
0
                "conditional section INCLUDE or IGNORE keyword expected";
383
0
            break;
384
0
        case XML_ERR_CONDSEC_NOT_FINISHED:
385
0
            errmsg = "XML conditional section not closed";
386
0
            break;
387
0
        case XML_ERR_XMLDECL_NOT_STARTED:
388
0
            errmsg = "Text declaration '<?xml' required";
389
0
            break;
390
0
        case XML_ERR_XMLDECL_NOT_FINISHED:
391
0
            errmsg = "parsing XML declaration: '?>' expected";
392
0
            break;
393
0
        case XML_ERR_EXT_ENTITY_STANDALONE:
394
0
            errmsg = "external parsed entities cannot be standalone";
395
0
            break;
396
10.4k
        case XML_ERR_ENTITYREF_SEMICOL_MISSING:
397
10.4k
            errmsg = "EntityRef: expecting ';'";
398
10.4k
            break;
399
0
        case XML_ERR_DOCTYPE_NOT_FINISHED:
400
0
            errmsg = "DOCTYPE improperly terminated";
401
0
            break;
402
0
        case XML_ERR_LTSLASH_REQUIRED:
403
0
            errmsg = "EndTag: '</' not found";
404
0
            break;
405
0
        case XML_ERR_EQUAL_REQUIRED:
406
0
            errmsg = "expected '='";
407
0
            break;
408
0
        case XML_ERR_STRING_NOT_CLOSED:
409
0
            errmsg = "String not closed expecting \" or '";
410
0
            break;
411
0
        case XML_ERR_STRING_NOT_STARTED:
412
0
            errmsg = "String not started expecting ' or \"";
413
0
            break;
414
0
        case XML_ERR_ENCODING_NAME:
415
0
            errmsg = "Invalid XML encoding name";
416
0
            break;
417
0
        case XML_ERR_STANDALONE_VALUE:
418
0
            errmsg = "standalone accepts only 'yes' or 'no'";
419
0
            break;
420
0
        case XML_ERR_DOCUMENT_EMPTY:
421
0
            errmsg = "Document is empty";
422
0
            break;
423
4
        case XML_ERR_DOCUMENT_END:
424
4
            errmsg = "Extra content at the end of the document";
425
4
            break;
426
0
        case XML_ERR_NOT_WELL_BALANCED:
427
0
            errmsg = "chunk is not well balanced";
428
0
            break;
429
0
        case XML_ERR_EXTRA_CONTENT:
430
0
            errmsg = "extra content at the end of well balanced chunk";
431
0
            break;
432
0
        case XML_ERR_VERSION_MISSING:
433
0
            errmsg = "Malformed declaration expecting version";
434
0
            break;
435
65
        case XML_ERR_NAME_TOO_LONG:
436
65
            errmsg = "Name too long";
437
65
            break;
438
#if 0
439
        case:
440
            errmsg = "";
441
            break;
442
#endif
443
0
        default:
444
0
            errmsg = "Unregistered error message";
445
76.2k
    }
446
76.2k
    if (ctxt != NULL)
447
76.2k
  ctxt->errNo = error;
448
76.2k
    if (info == NULL) {
449
66.2k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
450
66.2k
                        XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
451
66.2k
                        errmsg);
452
66.2k
    } else {
453
9.99k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
454
9.99k
                        XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
455
9.99k
                        errmsg, info);
456
9.99k
    }
457
76.2k
    if (ctxt != NULL) {
458
76.2k
  ctxt->wellFormed = 0;
459
76.2k
  if (ctxt->recovery == 0)
460
0
      ctxt->disableSAX = 1;
461
76.2k
    }
462
76.2k
}
463
464
/**
465
 * xmlFatalErrMsg:
466
 * @ctxt:  an XML parser context
467
 * @error:  the error number
468
 * @msg:  the error message
469
 *
470
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
471
 */
472
static void LIBXML_ATTR_FORMAT(3,0)
473
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
474
               const char *msg)
475
34.4k
{
476
34.4k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
477
34.4k
        (ctxt->instate == XML_PARSER_EOF))
478
2
  return;
479
34.4k
    if (ctxt != NULL)
480
34.4k
  ctxt->errNo = error;
481
34.4k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
482
34.4k
                    XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
483
34.4k
    if (ctxt != NULL) {
484
34.4k
  ctxt->wellFormed = 0;
485
34.4k
  if (ctxt->recovery == 0)
486
0
      ctxt->disableSAX = 1;
487
34.4k
    }
488
34.4k
}
489
490
/**
491
 * xmlWarningMsg:
492
 * @ctxt:  an XML parser context
493
 * @error:  the error number
494
 * @msg:  the error message
495
 * @str1:  extra data
496
 * @str2:  extra data
497
 *
498
 * Handle a warning.
499
 */
500
static void LIBXML_ATTR_FORMAT(3,0)
501
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
502
              const char *msg, const xmlChar *str1, const xmlChar *str2)
503
0
{
504
0
    xmlStructuredErrorFunc schannel = NULL;
505
506
0
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
507
0
        (ctxt->instate == XML_PARSER_EOF))
508
0
  return;
509
0
    if ((ctxt != NULL) && (ctxt->sax != NULL) &&
510
0
        (ctxt->sax->initialized == XML_SAX2_MAGIC))
511
0
        schannel = ctxt->sax->serror;
512
0
    if (ctxt != NULL) {
513
0
        __xmlRaiseError(schannel,
514
0
                    (ctxt->sax) ? ctxt->sax->warning : NULL,
515
0
                    ctxt->userData,
516
0
                    ctxt, NULL, XML_FROM_PARSER, error,
517
0
                    XML_ERR_WARNING, NULL, 0,
518
0
        (const char *) str1, (const char *) str2, NULL, 0, 0,
519
0
        msg, (const char *) str1, (const char *) str2);
520
0
    } else {
521
0
        __xmlRaiseError(schannel, NULL, NULL,
522
0
                    ctxt, NULL, XML_FROM_PARSER, error,
523
0
                    XML_ERR_WARNING, NULL, 0,
524
0
        (const char *) str1, (const char *) str2, NULL, 0, 0,
525
0
        msg, (const char *) str1, (const char *) str2);
526
0
    }
527
0
}
528
529
/**
530
 * xmlValidityError:
531
 * @ctxt:  an XML parser context
532
 * @error:  the error number
533
 * @msg:  the error message
534
 * @str1:  extra data
535
 *
536
 * Handle a validity error.
537
 */
538
static void LIBXML_ATTR_FORMAT(3,0)
539
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
540
              const char *msg, const xmlChar *str1, const xmlChar *str2)
541
0
{
542
0
    xmlStructuredErrorFunc schannel = NULL;
543
544
0
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
545
0
        (ctxt->instate == XML_PARSER_EOF))
546
0
  return;
547
0
    if (ctxt != NULL) {
548
0
  ctxt->errNo = error;
549
0
  if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
550
0
      schannel = ctxt->sax->serror;
551
0
    }
552
0
    if (ctxt != NULL) {
553
0
        __xmlRaiseError(schannel,
554
0
                    ctxt->vctxt.error, ctxt->vctxt.userData,
555
0
                    ctxt, NULL, XML_FROM_DTD, error,
556
0
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
557
0
        (const char *) str2, NULL, 0, 0,
558
0
        msg, (const char *) str1, (const char *) str2);
559
0
  ctxt->valid = 0;
560
0
    } else {
561
0
        __xmlRaiseError(schannel, NULL, NULL,
562
0
                    ctxt, NULL, XML_FROM_DTD, error,
563
0
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
564
0
        (const char *) str2, NULL, 0, 0,
565
0
        msg, (const char *) str1, (const char *) str2);
566
0
    }
567
0
}
568
569
/**
570
 * xmlFatalErrMsgInt:
571
 * @ctxt:  an XML parser context
572
 * @error:  the error number
573
 * @msg:  the error message
574
 * @val:  an integer value
575
 *
576
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
577
 */
578
static void LIBXML_ATTR_FORMAT(3,0)
579
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
580
                  const char *msg, int val)
581
137k
{
582
137k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
583
137k
        (ctxt->instate == XML_PARSER_EOF))
584
0
  return;
585
137k
    if (ctxt != NULL)
586
137k
  ctxt->errNo = error;
587
137k
    __xmlRaiseError(NULL, NULL, NULL,
588
137k
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
589
137k
                    NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
590
137k
    if (ctxt != NULL) {
591
137k
  ctxt->wellFormed = 0;
592
137k
  if (ctxt->recovery == 0)
593
0
      ctxt->disableSAX = 1;
594
137k
    }
595
137k
}
596
597
/**
598
 * xmlFatalErrMsgStrIntStr:
599
 * @ctxt:  an XML parser context
600
 * @error:  the error number
601
 * @msg:  the error message
602
 * @str1:  an string info
603
 * @val:  an integer value
604
 * @str2:  an string info
605
 *
606
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
607
 */
608
static void LIBXML_ATTR_FORMAT(3,0)
609
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
610
                  const char *msg, const xmlChar *str1, int val,
611
      const xmlChar *str2)
612
22.5k
{
613
22.5k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
614
22.5k
        (ctxt->instate == XML_PARSER_EOF))
615
0
  return;
616
22.5k
    if (ctxt != NULL)
617
22.5k
  ctxt->errNo = error;
618
22.5k
    __xmlRaiseError(NULL, NULL, NULL,
619
22.5k
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
620
22.5k
                    NULL, 0, (const char *) str1, (const char *) str2,
621
22.5k
        NULL, val, 0, msg, str1, val, str2);
622
22.5k
    if (ctxt != NULL) {
623
22.5k
  ctxt->wellFormed = 0;
624
22.5k
  if (ctxt->recovery == 0)
625
0
      ctxt->disableSAX = 1;
626
22.5k
    }
627
22.5k
}
628
629
/**
630
 * xmlFatalErrMsgStr:
631
 * @ctxt:  an XML parser context
632
 * @error:  the error number
633
 * @msg:  the error message
634
 * @val:  a string value
635
 *
636
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
637
 */
638
static void LIBXML_ATTR_FORMAT(3,0)
639
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
640
                  const char *msg, const xmlChar * val)
641
8.88k
{
642
8.88k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
643
8.88k
        (ctxt->instate == XML_PARSER_EOF))
644
0
  return;
645
8.88k
    if (ctxt != NULL)
646
8.88k
  ctxt->errNo = error;
647
8.88k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
648
8.88k
                    XML_FROM_PARSER, error, XML_ERR_FATAL,
649
8.88k
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
650
8.88k
                    val);
651
8.88k
    if (ctxt != NULL) {
652
8.88k
  ctxt->wellFormed = 0;
653
8.88k
  if (ctxt->recovery == 0)
654
0
      ctxt->disableSAX = 1;
655
8.88k
    }
656
8.88k
}
657
658
/**
659
 * xmlErrMsgStr:
660
 * @ctxt:  an XML parser context
661
 * @error:  the error number
662
 * @msg:  the error message
663
 * @val:  a string value
664
 *
665
 * Handle a non fatal parser error
666
 */
667
static void LIBXML_ATTR_FORMAT(3,0)
668
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
669
                  const char *msg, const xmlChar * val)
670
0
{
671
0
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
672
0
        (ctxt->instate == XML_PARSER_EOF))
673
0
  return;
674
0
    if (ctxt != NULL)
675
0
  ctxt->errNo = error;
676
0
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
677
0
                    XML_FROM_PARSER, error, XML_ERR_ERROR,
678
0
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
679
0
                    val);
680
0
}
681
682
/**
683
 * xmlNsErr:
684
 * @ctxt:  an XML parser context
685
 * @error:  the error number
686
 * @msg:  the message
687
 * @info1:  extra information string
688
 * @info2:  extra information string
689
 *
690
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
691
 */
692
static void LIBXML_ATTR_FORMAT(3,0)
693
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
694
         const char *msg,
695
         const xmlChar * info1, const xmlChar * info2,
696
         const xmlChar * info3)
697
16.5k
{
698
16.5k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
699
16.5k
        (ctxt->instate == XML_PARSER_EOF))
700
0
  return;
701
16.5k
    if (ctxt != NULL)
702
16.5k
  ctxt->errNo = error;
703
16.5k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
704
16.5k
                    XML_ERR_ERROR, NULL, 0, (const char *) info1,
705
16.5k
                    (const char *) info2, (const char *) info3, 0, 0, msg,
706
16.5k
                    info1, info2, info3);
707
16.5k
    if (ctxt != NULL)
708
16.5k
  ctxt->nsWellFormed = 0;
709
16.5k
}
710
711
/**
712
 * xmlNsWarn
713
 * @ctxt:  an XML parser context
714
 * @error:  the error number
715
 * @msg:  the message
716
 * @info1:  extra information string
717
 * @info2:  extra information string
718
 *
719
 * Handle a namespace warning error
720
 */
721
static void LIBXML_ATTR_FORMAT(3,0)
722
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
723
         const char *msg,
724
         const xmlChar * info1, const xmlChar * info2,
725
         const xmlChar * info3)
726
1.36k
{
727
1.36k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
728
1.36k
        (ctxt->instate == XML_PARSER_EOF))
729
0
  return;
730
1.36k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
731
1.36k
                    XML_ERR_WARNING, NULL, 0, (const char *) info1,
732
1.36k
                    (const char *) info2, (const char *) info3, 0, 0, msg,
733
1.36k
                    info1, info2, info3);
734
1.36k
}
735
736
static void
737
0
xmlSaturatedAdd(unsigned long *dst, unsigned long val) {
738
0
    if (val > ULONG_MAX - *dst)
739
0
        *dst = ULONG_MAX;
740
0
    else
741
0
        *dst += val;
742
0
}
743
744
static void
745
0
xmlSaturatedAddSizeT(unsigned long *dst, unsigned long val) {
746
0
    if (val > ULONG_MAX - *dst)
747
0
        *dst = ULONG_MAX;
748
0
    else
749
0
        *dst += val;
750
0
}
751
752
/**
753
 * xmlParserEntityCheck:
754
 * @ctxt:  parser context
755
 * @extra:  sum of unexpanded entity sizes
756
 *
757
 * Check for non-linear entity expansion behaviour.
758
 *
759
 * In some cases like xmlStringDecodeEntities, this function is called
760
 * for each, possibly nested entity and its unexpanded content length.
761
 *
762
 * In other cases like xmlParseReference, it's only called for each
763
 * top-level entity with its unexpanded content length plus the sum of
764
 * the unexpanded content lengths (plus fixed cost) of all nested
765
 * entities.
766
 *
767
 * Summing the unexpanded lengths also adds the length of the reference.
768
 * This is by design. Taking the length of the entity name into account
769
 * discourages attacks that try to waste CPU time with abusively long
770
 * entity names. See test/recurse/lol6.xml for example. Each call also
771
 * adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with
772
 * short entities.
773
 *
774
 * Returns 1 on error, 0 on success.
775
 */
776
static int
777
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long extra)
778
0
{
779
0
    unsigned long consumed;
780
0
    xmlParserInputPtr input = ctxt->input;
781
0
    xmlEntityPtr entity = input->entity;
782
783
    /*
784
     * Compute total consumed bytes so far, including input streams of
785
     * external entities.
786
     */
787
0
    consumed = input->parentConsumed;
788
0
    if ((entity == NULL) ||
789
0
        ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
790
0
         ((entity->flags & XML_ENT_PARSED) == 0))) {
791
0
        xmlSaturatedAdd(&consumed, input->consumed);
792
0
        xmlSaturatedAddSizeT(&consumed, input->cur - input->base);
793
0
    }
794
0
    xmlSaturatedAdd(&consumed, ctxt->sizeentities);
795
796
    /*
797
     * Add extra cost and some fixed cost.
798
     */
799
0
    xmlSaturatedAdd(&ctxt->sizeentcopy, extra);
800
0
    xmlSaturatedAdd(&ctxt->sizeentcopy, XML_ENT_FIXED_COST);
801
802
    /*
803
     * It's important to always use saturation arithmetic when tracking
804
     * entity sizes to make the size checks reliable. If "sizeentcopy"
805
     * overflows, we have to abort.
806
     */
807
0
    if ((ctxt->sizeentcopy > XML_PARSER_ALLOWED_EXPANSION) &&
808
0
        ((ctxt->sizeentcopy >= ULONG_MAX) ||
809
0
         (ctxt->sizeentcopy / XML_PARSER_NON_LINEAR > consumed))) {
810
0
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP,
811
0
                       "Maximum entity amplification factor exceeded");
812
0
        xmlHaltParser(ctxt);
813
0
        return(1);
814
0
    }
815
816
0
    return(0);
817
0
}
818
819
/************************************************************************
820
 *                  *
821
 *    Library wide options          *
822
 *                  *
823
 ************************************************************************/
824
825
/**
826
  * xmlHasFeature:
827
  * @feature: the feature to be examined
828
  *
829
  * Examines if the library has been compiled with a given feature.
830
  *
831
  * Returns a non-zero value if the feature exist, otherwise zero.
832
  * Returns zero (0) if the feature does not exist or an unknown
833
  * unknown feature is requested, non-zero otherwise.
834
  */
835
int
836
xmlHasFeature(xmlFeature feature)
837
0
{
838
0
    switch (feature) {
839
0
  case XML_WITH_THREAD:
840
0
#ifdef LIBXML_THREAD_ENABLED
841
0
      return(1);
842
#else
843
      return(0);
844
#endif
845
0
        case XML_WITH_TREE:
846
0
#ifdef LIBXML_TREE_ENABLED
847
0
            return(1);
848
#else
849
            return(0);
850
#endif
851
0
        case XML_WITH_OUTPUT:
852
0
#ifdef LIBXML_OUTPUT_ENABLED
853
0
            return(1);
854
#else
855
            return(0);
856
#endif
857
0
        case XML_WITH_PUSH:
858
#ifdef LIBXML_PUSH_ENABLED
859
            return(1);
860
#else
861
0
            return(0);
862
0
#endif
863
0
        case XML_WITH_READER:
864
#ifdef LIBXML_READER_ENABLED
865
            return(1);
866
#else
867
0
            return(0);
868
0
#endif
869
0
        case XML_WITH_PATTERN:
870
0
#ifdef LIBXML_PATTERN_ENABLED
871
0
            return(1);
872
#else
873
            return(0);
874
#endif
875
0
        case XML_WITH_WRITER:
876
#ifdef LIBXML_WRITER_ENABLED
877
            return(1);
878
#else
879
0
            return(0);
880
0
#endif
881
0
        case XML_WITH_SAX1:
882
#ifdef LIBXML_SAX1_ENABLED
883
            return(1);
884
#else
885
0
            return(0);
886
0
#endif
887
0
        case XML_WITH_FTP:
888
#ifdef LIBXML_FTP_ENABLED
889
            return(1);
890
#else
891
0
            return(0);
892
0
#endif
893
0
        case XML_WITH_HTTP:
894
0
#ifdef LIBXML_HTTP_ENABLED
895
0
            return(1);
896
#else
897
            return(0);
898
#endif
899
0
        case XML_WITH_VALID:
900
#ifdef LIBXML_VALID_ENABLED
901
            return(1);
902
#else
903
0
            return(0);
904
0
#endif
905
0
        case XML_WITH_HTML:
906
0
#ifdef LIBXML_HTML_ENABLED
907
0
            return(1);
908
#else
909
            return(0);
910
#endif
911
0
        case XML_WITH_LEGACY:
912
#ifdef LIBXML_LEGACY_ENABLED
913
            return(1);
914
#else
915
0
            return(0);
916
0
#endif
917
0
        case XML_WITH_C14N:
918
#ifdef LIBXML_C14N_ENABLED
919
            return(1);
920
#else
921
0
            return(0);
922
0
#endif
923
0
        case XML_WITH_CATALOG:
924
0
#ifdef LIBXML_CATALOG_ENABLED
925
0
            return(1);
926
#else
927
            return(0);
928
#endif
929
0
        case XML_WITH_XPATH:
930
0
#ifdef LIBXML_XPATH_ENABLED
931
0
            return(1);
932
#else
933
            return(0);
934
#endif
935
0
        case XML_WITH_XPTR:
936
0
#ifdef LIBXML_XPTR_ENABLED
937
0
            return(1);
938
#else
939
            return(0);
940
#endif
941
0
        case XML_WITH_XINCLUDE:
942
0
#ifdef LIBXML_XINCLUDE_ENABLED
943
0
            return(1);
944
#else
945
            return(0);
946
#endif
947
0
        case XML_WITH_ICONV:
948
0
#ifdef LIBXML_ICONV_ENABLED
949
0
            return(1);
950
#else
951
            return(0);
952
#endif
953
0
        case XML_WITH_ISO8859X:
954
0
#ifdef LIBXML_ISO8859X_ENABLED
955
0
            return(1);
956
#else
957
            return(0);
958
#endif
959
0
        case XML_WITH_UNICODE:
960
#ifdef LIBXML_UNICODE_ENABLED
961
            return(1);
962
#else
963
0
            return(0);
964
0
#endif
965
0
        case XML_WITH_REGEXP:
966
#ifdef LIBXML_REGEXP_ENABLED
967
            return(1);
968
#else
969
0
            return(0);
970
0
#endif
971
0
        case XML_WITH_AUTOMATA:
972
#ifdef LIBXML_AUTOMATA_ENABLED
973
            return(1);
974
#else
975
0
            return(0);
976
0
#endif
977
0
        case XML_WITH_EXPR:
978
#ifdef LIBXML_EXPR_ENABLED
979
            return(1);
980
#else
981
0
            return(0);
982
0
#endif
983
0
        case XML_WITH_SCHEMAS:
984
#ifdef LIBXML_SCHEMAS_ENABLED
985
            return(1);
986
#else
987
0
            return(0);
988
0
#endif
989
0
        case XML_WITH_SCHEMATRON:
990
#ifdef LIBXML_SCHEMATRON_ENABLED
991
            return(1);
992
#else
993
0
            return(0);
994
0
#endif
995
0
        case XML_WITH_MODULES:
996
0
#ifdef LIBXML_MODULES_ENABLED
997
0
            return(1);
998
#else
999
            return(0);
1000
#endif
1001
0
        case XML_WITH_DEBUG:
1002
0
#ifdef LIBXML_DEBUG_ENABLED
1003
0
            return(1);
1004
#else
1005
            return(0);
1006
#endif
1007
0
        case XML_WITH_DEBUG_MEM:
1008
#ifdef DEBUG_MEMORY_LOCATION
1009
            return(1);
1010
#else
1011
0
            return(0);
1012
0
#endif
1013
0
        case XML_WITH_DEBUG_RUN:
1014
0
            return(0);
1015
0
        case XML_WITH_ZLIB:
1016
#ifdef LIBXML_ZLIB_ENABLED
1017
            return(1);
1018
#else
1019
0
            return(0);
1020
0
#endif
1021
0
        case XML_WITH_LZMA:
1022
#ifdef LIBXML_LZMA_ENABLED
1023
            return(1);
1024
#else
1025
0
            return(0);
1026
0
#endif
1027
0
        case XML_WITH_ICU:
1028
#ifdef LIBXML_ICU_ENABLED
1029
            return(1);
1030
#else
1031
0
            return(0);
1032
0
#endif
1033
0
        default:
1034
0
      break;
1035
0
     }
1036
0
     return(0);
1037
0
}
1038
1039
/************************************************************************
1040
 *                  *
1041
 *    SAX2 defaulted attributes handling      *
1042
 *                  *
1043
 ************************************************************************/
1044
1045
/**
1046
 * xmlDetectSAX2:
1047
 * @ctxt:  an XML parser context
1048
 *
1049
 * Do the SAX2 detection and specific initialization
1050
 */
1051
static void
1052
157
xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1053
157
    xmlSAXHandlerPtr sax;
1054
1055
    /* Avoid unused variable warning if features are disabled. */
1056
157
    (void) sax;
1057
1058
157
    if (ctxt == NULL) return;
1059
157
    sax = ctxt->sax;
1060
#ifdef LIBXML_SAX1_ENABLED
1061
    if ((sax) &&  (sax->initialized == XML_SAX2_MAGIC) &&
1062
        ((sax->startElementNs != NULL) ||
1063
         (sax->endElementNs != NULL) ||
1064
         ((sax->startElement == NULL) && (sax->endElement == NULL))))
1065
        ctxt->sax2 = 1;
1066
#else
1067
157
    ctxt->sax2 = 1;
1068
157
#endif /* LIBXML_SAX1_ENABLED */
1069
1070
157
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1071
157
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1072
157
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1073
157
    if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1074
157
    (ctxt->str_xml_ns == NULL)) {
1075
0
        xmlErrMemory(ctxt, NULL);
1076
0
    }
1077
157
}
1078
1079
typedef struct _xmlDefAttrs xmlDefAttrs;
1080
typedef xmlDefAttrs *xmlDefAttrsPtr;
1081
struct _xmlDefAttrs {
1082
    int nbAttrs;  /* number of defaulted attributes on that element */
1083
    int maxAttrs;       /* the size of the array */
1084
#if __STDC_VERSION__ >= 199901L
1085
    /* Using a C99 flexible array member avoids UBSan errors. */
1086
    const xmlChar *values[]; /* array of localname/prefix/values/external */
1087
#else
1088
    const xmlChar *values[5];
1089
#endif
1090
};
1091
1092
/**
1093
 * xmlAttrNormalizeSpace:
1094
 * @src: the source string
1095
 * @dst: the target string
1096
 *
1097
 * Normalize the space in non CDATA attribute values:
1098
 * If the attribute type is not CDATA, then the XML processor MUST further
1099
 * process the normalized attribute value by discarding any leading and
1100
 * trailing space (#x20) characters, and by replacing sequences of space
1101
 * (#x20) characters by a single space (#x20) character.
1102
 * Note that the size of dst need to be at least src, and if one doesn't need
1103
 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1104
 * passing src as dst is just fine.
1105
 *
1106
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1107
 *         is needed.
1108
 */
1109
static xmlChar *
1110
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1111
0
{
1112
0
    if ((src == NULL) || (dst == NULL))
1113
0
        return(NULL);
1114
1115
0
    while (*src == 0x20) src++;
1116
0
    while (*src != 0) {
1117
0
  if (*src == 0x20) {
1118
0
      while (*src == 0x20) src++;
1119
0
      if (*src != 0)
1120
0
    *dst++ = 0x20;
1121
0
  } else {
1122
0
      *dst++ = *src++;
1123
0
  }
1124
0
    }
1125
0
    *dst = 0;
1126
0
    if (dst == src)
1127
0
       return(NULL);
1128
0
    return(dst);
1129
0
}
1130
1131
/**
1132
 * xmlAttrNormalizeSpace2:
1133
 * @src: the source string
1134
 *
1135
 * Normalize the space in non CDATA attribute values, a slightly more complex
1136
 * front end to avoid allocation problems when running on attribute values
1137
 * coming from the input.
1138
 *
1139
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1140
 *         is needed.
1141
 */
1142
static const xmlChar *
1143
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1144
0
{
1145
0
    int i;
1146
0
    int remove_head = 0;
1147
0
    int need_realloc = 0;
1148
0
    const xmlChar *cur;
1149
1150
0
    if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1151
0
        return(NULL);
1152
0
    i = *len;
1153
0
    if (i <= 0)
1154
0
        return(NULL);
1155
1156
0
    cur = src;
1157
0
    while (*cur == 0x20) {
1158
0
        cur++;
1159
0
  remove_head++;
1160
0
    }
1161
0
    while (*cur != 0) {
1162
0
  if (*cur == 0x20) {
1163
0
      cur++;
1164
0
      if ((*cur == 0x20) || (*cur == 0)) {
1165
0
          need_realloc = 1;
1166
0
    break;
1167
0
      }
1168
0
  } else
1169
0
      cur++;
1170
0
    }
1171
0
    if (need_realloc) {
1172
0
        xmlChar *ret;
1173
1174
0
  ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1175
0
  if (ret == NULL) {
1176
0
      xmlErrMemory(ctxt, NULL);
1177
0
      return(NULL);
1178
0
  }
1179
0
  xmlAttrNormalizeSpace(ret, ret);
1180
0
  *len = strlen((const char *)ret);
1181
0
        return(ret);
1182
0
    } else if (remove_head) {
1183
0
        *len -= remove_head;
1184
0
        memmove(src, src + remove_head, 1 + *len);
1185
0
  return(src);
1186
0
    }
1187
0
    return(NULL);
1188
0
}
1189
1190
/**
1191
 * xmlAddDefAttrs:
1192
 * @ctxt:  an XML parser context
1193
 * @fullname:  the element fullname
1194
 * @fullattr:  the attribute fullname
1195
 * @value:  the attribute value
1196
 *
1197
 * Add a defaulted attribute for an element
1198
 */
1199
static void
1200
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1201
               const xmlChar *fullname,
1202
               const xmlChar *fullattr,
1203
0
               const xmlChar *value) {
1204
0
    xmlDefAttrsPtr defaults;
1205
0
    int len;
1206
0
    const xmlChar *name;
1207
0
    const xmlChar *prefix;
1208
1209
    /*
1210
     * Allows to detect attribute redefinitions
1211
     */
1212
0
    if (ctxt->attsSpecial != NULL) {
1213
0
        if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1214
0
      return;
1215
0
    }
1216
1217
0
    if (ctxt->attsDefault == NULL) {
1218
0
        ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1219
0
  if (ctxt->attsDefault == NULL)
1220
0
      goto mem_error;
1221
0
    }
1222
1223
    /*
1224
     * split the element name into prefix:localname , the string found
1225
     * are within the DTD and then not associated to namespace names.
1226
     */
1227
0
    name = xmlSplitQName3(fullname, &len);
1228
0
    if (name == NULL) {
1229
0
        name = xmlDictLookup(ctxt->dict, fullname, -1);
1230
0
  prefix = NULL;
1231
0
    } else {
1232
0
        name = xmlDictLookup(ctxt->dict, name, -1);
1233
0
  prefix = xmlDictLookup(ctxt->dict, fullname, len);
1234
0
    }
1235
1236
    /*
1237
     * make sure there is some storage
1238
     */
1239
0
    defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1240
0
    if (defaults == NULL) {
1241
0
        defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1242
0
                     (4 * 5) * sizeof(const xmlChar *));
1243
0
  if (defaults == NULL)
1244
0
      goto mem_error;
1245
0
  defaults->nbAttrs = 0;
1246
0
  defaults->maxAttrs = 4;
1247
0
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1248
0
                          defaults, NULL) < 0) {
1249
0
      xmlFree(defaults);
1250
0
      goto mem_error;
1251
0
  }
1252
0
    } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1253
0
        xmlDefAttrsPtr temp;
1254
1255
0
        temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1256
0
           (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1257
0
  if (temp == NULL)
1258
0
      goto mem_error;
1259
0
  defaults = temp;
1260
0
  defaults->maxAttrs *= 2;
1261
0
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1262
0
                          defaults, NULL) < 0) {
1263
0
      xmlFree(defaults);
1264
0
      goto mem_error;
1265
0
  }
1266
0
    }
1267
1268
    /*
1269
     * Split the element name into prefix:localname , the string found
1270
     * are within the DTD and hen not associated to namespace names.
1271
     */
1272
0
    name = xmlSplitQName3(fullattr, &len);
1273
0
    if (name == NULL) {
1274
0
        name = xmlDictLookup(ctxt->dict, fullattr, -1);
1275
0
  prefix = NULL;
1276
0
    } else {
1277
0
        name = xmlDictLookup(ctxt->dict, name, -1);
1278
0
  prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1279
0
    }
1280
1281
0
    defaults->values[5 * defaults->nbAttrs] = name;
1282
0
    defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1283
    /* intern the string and precompute the end */
1284
0
    len = xmlStrlen(value);
1285
0
    value = xmlDictLookup(ctxt->dict, value, len);
1286
0
    if (value == NULL)
1287
0
        goto mem_error;
1288
0
    defaults->values[5 * defaults->nbAttrs + 2] = value;
1289
0
    defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1290
0
    if (ctxt->external)
1291
0
        defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1292
0
    else
1293
0
        defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1294
0
    defaults->nbAttrs++;
1295
1296
0
    return;
1297
1298
0
mem_error:
1299
0
    xmlErrMemory(ctxt, NULL);
1300
0
    return;
1301
0
}
1302
1303
/**
1304
 * xmlAddSpecialAttr:
1305
 * @ctxt:  an XML parser context
1306
 * @fullname:  the element fullname
1307
 * @fullattr:  the attribute fullname
1308
 * @type:  the attribute type
1309
 *
1310
 * Register this attribute type
1311
 */
1312
static void
1313
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1314
      const xmlChar *fullname,
1315
      const xmlChar *fullattr,
1316
      int type)
1317
0
{
1318
0
    if (ctxt->attsSpecial == NULL) {
1319
0
        ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1320
0
  if (ctxt->attsSpecial == NULL)
1321
0
      goto mem_error;
1322
0
    }
1323
1324
0
    if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1325
0
        return;
1326
1327
0
    xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1328
0
                     (void *) (ptrdiff_t) type);
1329
0
    return;
1330
1331
0
mem_error:
1332
0
    xmlErrMemory(ctxt, NULL);
1333
0
    return;
1334
0
}
1335
1336
/**
1337
 * xmlCleanSpecialAttrCallback:
1338
 *
1339
 * Removes CDATA attributes from the special attribute table
1340
 */
1341
static void
1342
xmlCleanSpecialAttrCallback(void *payload, void *data,
1343
                            const xmlChar *fullname, const xmlChar *fullattr,
1344
0
                            const xmlChar *unused ATTRIBUTE_UNUSED) {
1345
0
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1346
1347
0
    if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1348
0
        xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1349
0
    }
1350
0
}
1351
1352
/**
1353
 * xmlCleanSpecialAttr:
1354
 * @ctxt:  an XML parser context
1355
 *
1356
 * Trim the list of attributes defined to remove all those of type
1357
 * CDATA as they are not special. This call should be done when finishing
1358
 * to parse the DTD and before starting to parse the document root.
1359
 */
1360
static void
1361
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1362
0
{
1363
0
    if (ctxt->attsSpecial == NULL)
1364
0
        return;
1365
1366
0
    xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1367
1368
0
    if (xmlHashSize(ctxt->attsSpecial) == 0) {
1369
0
        xmlHashFree(ctxt->attsSpecial, NULL);
1370
0
        ctxt->attsSpecial = NULL;
1371
0
    }
1372
0
    return;
1373
0
}
1374
1375
/**
1376
 * xmlCheckLanguageID:
1377
 * @lang:  pointer to the string value
1378
 *
1379
 * Checks that the value conforms to the LanguageID production:
1380
 *
1381
 * NOTE: this is somewhat deprecated, those productions were removed from
1382
 *       the XML Second edition.
1383
 *
1384
 * [33] LanguageID ::= Langcode ('-' Subcode)*
1385
 * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1386
 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1387
 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1388
 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1389
 * [38] Subcode ::= ([a-z] | [A-Z])+
1390
 *
1391
 * The current REC reference the successors of RFC 1766, currently 5646
1392
 *
1393
 * http://www.rfc-editor.org/rfc/rfc5646.txt
1394
 * langtag       = language
1395
 *                 ["-" script]
1396
 *                 ["-" region]
1397
 *                 *("-" variant)
1398
 *                 *("-" extension)
1399
 *                 ["-" privateuse]
1400
 * language      = 2*3ALPHA            ; shortest ISO 639 code
1401
 *                 ["-" extlang]       ; sometimes followed by
1402
 *                                     ; extended language subtags
1403
 *               / 4ALPHA              ; or reserved for future use
1404
 *               / 5*8ALPHA            ; or registered language subtag
1405
 *
1406
 * extlang       = 3ALPHA              ; selected ISO 639 codes
1407
 *                 *2("-" 3ALPHA)      ; permanently reserved
1408
 *
1409
 * script        = 4ALPHA              ; ISO 15924 code
1410
 *
1411
 * region        = 2ALPHA              ; ISO 3166-1 code
1412
 *               / 3DIGIT              ; UN M.49 code
1413
 *
1414
 * variant       = 5*8alphanum         ; registered variants
1415
 *               / (DIGIT 3alphanum)
1416
 *
1417
 * extension     = singleton 1*("-" (2*8alphanum))
1418
 *
1419
 *                                     ; Single alphanumerics
1420
 *                                     ; "x" reserved for private use
1421
 * singleton     = DIGIT               ; 0 - 9
1422
 *               / %x41-57             ; A - W
1423
 *               / %x59-5A             ; Y - Z
1424
 *               / %x61-77             ; a - w
1425
 *               / %x79-7A             ; y - z
1426
 *
1427
 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1428
 * The parser below doesn't try to cope with extension or privateuse
1429
 * that could be added but that's not interoperable anyway
1430
 *
1431
 * Returns 1 if correct 0 otherwise
1432
 **/
1433
int
1434
xmlCheckLanguageID(const xmlChar * lang)
1435
0
{
1436
0
    const xmlChar *cur = lang, *nxt;
1437
1438
0
    if (cur == NULL)
1439
0
        return (0);
1440
0
    if (((cur[0] == 'i') && (cur[1] == '-')) ||
1441
0
        ((cur[0] == 'I') && (cur[1] == '-')) ||
1442
0
        ((cur[0] == 'x') && (cur[1] == '-')) ||
1443
0
        ((cur[0] == 'X') && (cur[1] == '-'))) {
1444
        /*
1445
         * Still allow IANA code and user code which were coming
1446
         * from the previous version of the XML-1.0 specification
1447
         * it's deprecated but we should not fail
1448
         */
1449
0
        cur += 2;
1450
0
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1451
0
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
1452
0
            cur++;
1453
0
        return(cur[0] == 0);
1454
0
    }
1455
0
    nxt = cur;
1456
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1457
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1458
0
           nxt++;
1459
0
    if (nxt - cur >= 4) {
1460
        /*
1461
         * Reserved
1462
         */
1463
0
        if ((nxt - cur > 8) || (nxt[0] != 0))
1464
0
            return(0);
1465
0
        return(1);
1466
0
    }
1467
0
    if (nxt - cur < 2)
1468
0
        return(0);
1469
    /* we got an ISO 639 code */
1470
0
    if (nxt[0] == 0)
1471
0
        return(1);
1472
0
    if (nxt[0] != '-')
1473
0
        return(0);
1474
1475
0
    nxt++;
1476
0
    cur = nxt;
1477
    /* now we can have extlang or script or region or variant */
1478
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1479
0
        goto region_m49;
1480
1481
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1482
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1483
0
           nxt++;
1484
0
    if (nxt - cur == 4)
1485
0
        goto script;
1486
0
    if (nxt - cur == 2)
1487
0
        goto region;
1488
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1489
0
        goto variant;
1490
0
    if (nxt - cur != 3)
1491
0
        return(0);
1492
    /* we parsed an extlang */
1493
0
    if (nxt[0] == 0)
1494
0
        return(1);
1495
0
    if (nxt[0] != '-')
1496
0
        return(0);
1497
1498
0
    nxt++;
1499
0
    cur = nxt;
1500
    /* now we can have script or region or variant */
1501
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1502
0
        goto region_m49;
1503
1504
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1505
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1506
0
           nxt++;
1507
0
    if (nxt - cur == 2)
1508
0
        goto region;
1509
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1510
0
        goto variant;
1511
0
    if (nxt - cur != 4)
1512
0
        return(0);
1513
    /* we parsed a script */
1514
0
script:
1515
0
    if (nxt[0] == 0)
1516
0
        return(1);
1517
0
    if (nxt[0] != '-')
1518
0
        return(0);
1519
1520
0
    nxt++;
1521
0
    cur = nxt;
1522
    /* now we can have region or variant */
1523
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1524
0
        goto region_m49;
1525
1526
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1527
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1528
0
           nxt++;
1529
1530
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1531
0
        goto variant;
1532
0
    if (nxt - cur != 2)
1533
0
        return(0);
1534
    /* we parsed a region */
1535
0
region:
1536
0
    if (nxt[0] == 0)
1537
0
        return(1);
1538
0
    if (nxt[0] != '-')
1539
0
        return(0);
1540
1541
0
    nxt++;
1542
0
    cur = nxt;
1543
    /* now we can just have a variant */
1544
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1545
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1546
0
           nxt++;
1547
1548
0
    if ((nxt - cur < 5) || (nxt - cur > 8))
1549
0
        return(0);
1550
1551
    /* we parsed a variant */
1552
0
variant:
1553
0
    if (nxt[0] == 0)
1554
0
        return(1);
1555
0
    if (nxt[0] != '-')
1556
0
        return(0);
1557
    /* extensions and private use subtags not checked */
1558
0
    return (1);
1559
1560
0
region_m49:
1561
0
    if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1562
0
        ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1563
0
        nxt += 3;
1564
0
        goto region;
1565
0
    }
1566
0
    return(0);
1567
0
}
1568
1569
/************************************************************************
1570
 *                  *
1571
 *    Parser stacks related functions and macros    *
1572
 *                  *
1573
 ************************************************************************/
1574
1575
static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1576
                                            const xmlChar ** str);
1577
1578
#ifdef SAX2
1579
/**
1580
 * nsPush:
1581
 * @ctxt:  an XML parser context
1582
 * @prefix:  the namespace prefix or NULL
1583
 * @URL:  the namespace name
1584
 *
1585
 * Pushes a new parser namespace on top of the ns stack
1586
 *
1587
 * Returns -1 in case of error, -2 if the namespace should be discarded
1588
 *     and the index in the stack otherwise.
1589
 */
1590
static int
1591
nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1592
9.96k
{
1593
9.96k
    if (ctxt->options & XML_PARSE_NSCLEAN) {
1594
0
        int i;
1595
0
  for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1596
0
      if (ctxt->nsTab[i] == prefix) {
1597
    /* in scope */
1598
0
          if (ctxt->nsTab[i + 1] == URL)
1599
0
        return(-2);
1600
    /* out of scope keep it */
1601
0
    break;
1602
0
      }
1603
0
  }
1604
0
    }
1605
9.96k
    if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1606
28
  ctxt->nsMax = 10;
1607
28
  ctxt->nsNr = 0;
1608
28
  ctxt->nsTab = (const xmlChar **)
1609
28
                xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1610
28
  if (ctxt->nsTab == NULL) {
1611
0
      xmlErrMemory(ctxt, NULL);
1612
0
      ctxt->nsMax = 0;
1613
0
            return (-1);
1614
0
  }
1615
9.94k
    } else if (ctxt->nsNr >= ctxt->nsMax) {
1616
8
        const xmlChar ** tmp;
1617
8
        ctxt->nsMax *= 2;
1618
8
        tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1619
8
            ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1620
8
        if (tmp == NULL) {
1621
0
            xmlErrMemory(ctxt, NULL);
1622
0
      ctxt->nsMax /= 2;
1623
0
            return (-1);
1624
0
        }
1625
8
  ctxt->nsTab = tmp;
1626
8
    }
1627
9.96k
    ctxt->nsTab[ctxt->nsNr++] = prefix;
1628
9.96k
    ctxt->nsTab[ctxt->nsNr++] = URL;
1629
9.96k
    return (ctxt->nsNr);
1630
9.96k
}
1631
/**
1632
 * nsPop:
1633
 * @ctxt: an XML parser context
1634
 * @nr:  the number to pop
1635
 *
1636
 * Pops the top @nr parser prefix/namespace from the ns stack
1637
 *
1638
 * Returns the number of namespaces removed
1639
 */
1640
static int
1641
nsPop(xmlParserCtxtPtr ctxt, int nr)
1642
8.41k
{
1643
8.41k
    int i;
1644
1645
8.41k
    if (ctxt->nsTab == NULL) return(0);
1646
8.41k
    if (ctxt->nsNr < nr) {
1647
0
        xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1648
0
        nr = ctxt->nsNr;
1649
0
    }
1650
8.41k
    if (ctxt->nsNr <= 0)
1651
0
        return (0);
1652
1653
28.1k
    for (i = 0;i < nr;i++) {
1654
19.6k
         ctxt->nsNr--;
1655
19.6k
   ctxt->nsTab[ctxt->nsNr] = NULL;
1656
19.6k
    }
1657
8.41k
    return(nr);
1658
8.41k
}
1659
#endif
1660
1661
static int
1662
29
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1663
29
    const xmlChar **atts;
1664
29
    int *attallocs;
1665
29
    int maxatts;
1666
1667
29
    if (nr + 5 > ctxt->maxatts) {
1668
29
  maxatts = ctxt->maxatts == 0 ? 55 : (nr + 5) * 2;
1669
29
  atts = (const xmlChar **) xmlMalloc(
1670
29
             maxatts * sizeof(const xmlChar *));
1671
29
  if (atts == NULL) goto mem_error;
1672
29
  attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1673
29
                               (maxatts / 5) * sizeof(int));
1674
29
  if (attallocs == NULL) {
1675
0
            xmlFree(atts);
1676
0
            goto mem_error;
1677
0
        }
1678
29
        if (ctxt->maxatts > 0)
1679
4
            memcpy(atts, ctxt->atts, ctxt->maxatts * sizeof(const xmlChar *));
1680
29
        xmlFree(ctxt->atts);
1681
29
  ctxt->atts = atts;
1682
29
  ctxt->attallocs = attallocs;
1683
29
  ctxt->maxatts = maxatts;
1684
29
    }
1685
29
    return(ctxt->maxatts);
1686
0
mem_error:
1687
0
    xmlErrMemory(ctxt, NULL);
1688
0
    return(-1);
1689
29
}
1690
1691
/**
1692
 * inputPush:
1693
 * @ctxt:  an XML parser context
1694
 * @value:  the parser input
1695
 *
1696
 * Pushes a new parser input on top of the input stack
1697
 *
1698
 * Returns -1 in case of error, the index in the stack otherwise
1699
 */
1700
int
1701
inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1702
157
{
1703
157
    if ((ctxt == NULL) || (value == NULL))
1704
0
        return(-1);
1705
157
    if (ctxt->inputNr >= ctxt->inputMax) {
1706
0
        size_t newSize = ctxt->inputMax * 2;
1707
0
        xmlParserInputPtr *tmp;
1708
1709
0
        tmp = (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1710
0
                                               newSize * sizeof(*tmp));
1711
0
        if (tmp == NULL) {
1712
0
            xmlErrMemory(ctxt, NULL);
1713
0
            return (-1);
1714
0
        }
1715
0
        ctxt->inputTab = tmp;
1716
0
        ctxt->inputMax = newSize;
1717
0
    }
1718
157
    ctxt->inputTab[ctxt->inputNr] = value;
1719
157
    ctxt->input = value;
1720
157
    return (ctxt->inputNr++);
1721
157
}
1722
/**
1723
 * inputPop:
1724
 * @ctxt: an XML parser context
1725
 *
1726
 * Pops the top parser input from the input stack
1727
 *
1728
 * Returns the input just removed
1729
 */
1730
xmlParserInputPtr
1731
inputPop(xmlParserCtxtPtr ctxt)
1732
471
{
1733
471
    xmlParserInputPtr ret;
1734
1735
471
    if (ctxt == NULL)
1736
0
        return(NULL);
1737
471
    if (ctxt->inputNr <= 0)
1738
314
        return (NULL);
1739
157
    ctxt->inputNr--;
1740
157
    if (ctxt->inputNr > 0)
1741
0
        ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1742
157
    else
1743
157
        ctxt->input = NULL;
1744
157
    ret = ctxt->inputTab[ctxt->inputNr];
1745
157
    ctxt->inputTab[ctxt->inputNr] = NULL;
1746
157
    return (ret);
1747
471
}
1748
/**
1749
 * nodePush:
1750
 * @ctxt:  an XML parser context
1751
 * @value:  the element node
1752
 *
1753
 * Pushes a new element node on top of the node stack
1754
 *
1755
 * Returns -1 in case of error, the index in the stack otherwise
1756
 */
1757
int
1758
nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1759
27.8k
{
1760
27.8k
    if (ctxt == NULL) return(0);
1761
27.8k
    if (ctxt->nodeNr >= ctxt->nodeMax) {
1762
108
        xmlNodePtr *tmp;
1763
1764
108
  tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1765
108
                                      ctxt->nodeMax * 2 *
1766
108
                                      sizeof(ctxt->nodeTab[0]));
1767
108
        if (tmp == NULL) {
1768
0
            xmlErrMemory(ctxt, NULL);
1769
0
            return (-1);
1770
0
        }
1771
108
        ctxt->nodeTab = tmp;
1772
108
  ctxt->nodeMax *= 2;
1773
108
    }
1774
27.8k
    if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1775
27.8k
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1776
0
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1777
0
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1778
0
        xmlParserMaxDepth);
1779
0
  xmlHaltParser(ctxt);
1780
0
  return(-1);
1781
0
    }
1782
27.8k
    ctxt->nodeTab[ctxt->nodeNr] = value;
1783
27.8k
    ctxt->node = value;
1784
27.8k
    return (ctxt->nodeNr++);
1785
27.8k
}
1786
1787
/**
1788
 * nodePop:
1789
 * @ctxt: an XML parser context
1790
 *
1791
 * Pops the top element node from the node stack
1792
 *
1793
 * Returns the node just removed
1794
 */
1795
xmlNodePtr
1796
nodePop(xmlParserCtxtPtr ctxt)
1797
23.6k
{
1798
23.6k
    xmlNodePtr ret;
1799
1800
23.6k
    if (ctxt == NULL) return(NULL);
1801
23.6k
    if (ctxt->nodeNr <= 0)
1802
0
        return (NULL);
1803
23.6k
    ctxt->nodeNr--;
1804
23.6k
    if (ctxt->nodeNr > 0)
1805
23.6k
        ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1806
6
    else
1807
6
        ctxt->node = NULL;
1808
23.6k
    ret = ctxt->nodeTab[ctxt->nodeNr];
1809
23.6k
    ctxt->nodeTab[ctxt->nodeNr] = NULL;
1810
23.6k
    return (ret);
1811
23.6k
}
1812
1813
/**
1814
 * nameNsPush:
1815
 * @ctxt:  an XML parser context
1816
 * @value:  the element name
1817
 * @prefix:  the element prefix
1818
 * @URI:  the element namespace name
1819
 * @line:  the current line number for error messages
1820
 * @nsNr:  the number of namespaces pushed on the namespace table
1821
 *
1822
 * Pushes a new element name/prefix/URL on top of the name stack
1823
 *
1824
 * Returns -1 in case of error, the index in the stack otherwise
1825
 */
1826
static int
1827
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1828
           const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
1829
27.8k
{
1830
27.8k
    xmlStartTag *tag;
1831
1832
27.8k
    if (ctxt->nameNr >= ctxt->nameMax) {
1833
108
        const xmlChar * *tmp;
1834
108
        xmlStartTag *tmp2;
1835
108
        ctxt->nameMax *= 2;
1836
108
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1837
108
                                    ctxt->nameMax *
1838
108
                                    sizeof(ctxt->nameTab[0]));
1839
108
        if (tmp == NULL) {
1840
0
      ctxt->nameMax /= 2;
1841
0
      goto mem_error;
1842
0
        }
1843
108
  ctxt->nameTab = tmp;
1844
108
        tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
1845
108
                                    ctxt->nameMax *
1846
108
                                    sizeof(ctxt->pushTab[0]));
1847
108
        if (tmp2 == NULL) {
1848
0
      ctxt->nameMax /= 2;
1849
0
      goto mem_error;
1850
0
        }
1851
108
  ctxt->pushTab = tmp2;
1852
27.7k
    } else if (ctxt->pushTab == NULL) {
1853
119
        ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
1854
119
                                            sizeof(ctxt->pushTab[0]));
1855
119
        if (ctxt->pushTab == NULL)
1856
0
            goto mem_error;
1857
119
    }
1858
27.8k
    ctxt->nameTab[ctxt->nameNr] = value;
1859
27.8k
    ctxt->name = value;
1860
27.8k
    tag = &ctxt->pushTab[ctxt->nameNr];
1861
27.8k
    tag->prefix = prefix;
1862
27.8k
    tag->URI = URI;
1863
27.8k
    tag->line = line;
1864
27.8k
    tag->nsNr = nsNr;
1865
27.8k
    return (ctxt->nameNr++);
1866
0
mem_error:
1867
0
    xmlErrMemory(ctxt, NULL);
1868
0
    return (-1);
1869
27.8k
}
1870
#ifdef LIBXML_PUSH_ENABLED
1871
/**
1872
 * nameNsPop:
1873
 * @ctxt: an XML parser context
1874
 *
1875
 * Pops the top element/prefix/URI name from the name stack
1876
 *
1877
 * Returns the name just removed
1878
 */
1879
static const xmlChar *
1880
nameNsPop(xmlParserCtxtPtr ctxt)
1881
{
1882
    const xmlChar *ret;
1883
1884
    if (ctxt->nameNr <= 0)
1885
        return (NULL);
1886
    ctxt->nameNr--;
1887
    if (ctxt->nameNr > 0)
1888
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1889
    else
1890
        ctxt->name = NULL;
1891
    ret = ctxt->nameTab[ctxt->nameNr];
1892
    ctxt->nameTab[ctxt->nameNr] = NULL;
1893
    return (ret);
1894
}
1895
#endif /* LIBXML_PUSH_ENABLED */
1896
1897
/**
1898
 * namePush:
1899
 * @ctxt:  an XML parser context
1900
 * @value:  the element name
1901
 *
1902
 * Pushes a new element name on top of the name stack
1903
 *
1904
 * Returns -1 in case of error, the index in the stack otherwise
1905
 */
1906
int
1907
namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1908
0
{
1909
0
    if (ctxt == NULL) return (-1);
1910
1911
0
    if (ctxt->nameNr >= ctxt->nameMax) {
1912
0
        const xmlChar * *tmp;
1913
0
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1914
0
                                    ctxt->nameMax * 2 *
1915
0
                                    sizeof(ctxt->nameTab[0]));
1916
0
        if (tmp == NULL) {
1917
0
      goto mem_error;
1918
0
        }
1919
0
  ctxt->nameTab = tmp;
1920
0
        ctxt->nameMax *= 2;
1921
0
    }
1922
0
    ctxt->nameTab[ctxt->nameNr] = value;
1923
0
    ctxt->name = value;
1924
0
    return (ctxt->nameNr++);
1925
0
mem_error:
1926
0
    xmlErrMemory(ctxt, NULL);
1927
0
    return (-1);
1928
0
}
1929
/**
1930
 * namePop:
1931
 * @ctxt: an XML parser context
1932
 *
1933
 * Pops the top element name from the name stack
1934
 *
1935
 * Returns the name just removed
1936
 */
1937
const xmlChar *
1938
namePop(xmlParserCtxtPtr ctxt)
1939
23.6k
{
1940
23.6k
    const xmlChar *ret;
1941
1942
23.6k
    if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1943
0
        return (NULL);
1944
23.6k
    ctxt->nameNr--;
1945
23.6k
    if (ctxt->nameNr > 0)
1946
23.6k
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1947
6
    else
1948
6
        ctxt->name = NULL;
1949
23.6k
    ret = ctxt->nameTab[ctxt->nameNr];
1950
23.6k
    ctxt->nameTab[ctxt->nameNr] = NULL;
1951
23.6k
    return (ret);
1952
23.6k
}
1953
1954
31.5k
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1955
31.5k
    if (ctxt->spaceNr >= ctxt->spaceMax) {
1956
111
        int *tmp;
1957
1958
111
  ctxt->spaceMax *= 2;
1959
111
        tmp = (int *) xmlRealloc(ctxt->spaceTab,
1960
111
                           ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1961
111
        if (tmp == NULL) {
1962
0
      xmlErrMemory(ctxt, NULL);
1963
0
      ctxt->spaceMax /=2;
1964
0
      return(-1);
1965
0
  }
1966
111
  ctxt->spaceTab = tmp;
1967
111
    }
1968
31.5k
    ctxt->spaceTab[ctxt->spaceNr] = val;
1969
31.5k
    ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1970
31.5k
    return(ctxt->spaceNr++);
1971
31.5k
}
1972
1973
27.4k
static int spacePop(xmlParserCtxtPtr ctxt) {
1974
27.4k
    int ret;
1975
27.4k
    if (ctxt->spaceNr <= 0) return(0);
1976
27.4k
    ctxt->spaceNr--;
1977
27.4k
    if (ctxt->spaceNr > 0)
1978
27.4k
  ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1979
0
    else
1980
0
        ctxt->space = &ctxt->spaceTab[0];
1981
27.4k
    ret = ctxt->spaceTab[ctxt->spaceNr];
1982
27.4k
    ctxt->spaceTab[ctxt->spaceNr] = -1;
1983
27.4k
    return(ret);
1984
27.4k
}
1985
1986
/*
1987
 * Macros for accessing the content. Those should be used only by the parser,
1988
 * and not exported.
1989
 *
1990
 * Dirty macros, i.e. one often need to make assumption on the context to
1991
 * use them
1992
 *
1993
 *   CUR_PTR return the current pointer to the xmlChar to be parsed.
1994
 *           To be used with extreme caution since operations consuming
1995
 *           characters may move the input buffer to a different location !
1996
 *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
1997
 *           This should be used internally by the parser
1998
 *           only to compare to ASCII values otherwise it would break when
1999
 *           running with UTF-8 encoding.
2000
 *   RAW     same as CUR but in the input buffer, bypass any token
2001
 *           extraction that may have been done
2002
 *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
2003
 *           to compare on ASCII based substring.
2004
 *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2005
 *           strings without newlines within the parser.
2006
 *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2007
 *           defined char within the parser.
2008
 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2009
 *
2010
 *   NEXT    Skip to the next character, this does the proper decoding
2011
 *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2012
 *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2013
 *   CUR_CHAR(l) returns the current unicode character (int), set l
2014
 *           to the number of xmlChars used for the encoding [0-5].
2015
 *   CUR_SCHAR  same but operate on a string instead of the context
2016
 *   COPY_BUF  copy the current unicode char to the target buffer, increment
2017
 *            the index
2018
 *   GROW, SHRINK  handling of input buffers
2019
 */
2020
2021
502k
#define RAW (*ctxt->input->cur)
2022
81.2k
#define CUR (*ctxt->input->cur)
2023
4.09M
#define NXT(val) ctxt->input->cur[(val)]
2024
44.5k
#define CUR_PTR ctxt->input->cur
2025
35.0k
#define BASE_PTR ctxt->input->base
2026
2027
#define CMP4( s, c1, c2, c3, c4 ) \
2028
452k
  ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2029
226k
    ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2030
#define CMP5( s, c1, c2, c3, c4, c5 ) \
2031
451k
  ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2032
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2033
451k
  ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2034
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2035
451k
  ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2036
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2037
451k
  ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2038
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2039
225k
  ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2040
225k
    ((unsigned char *) s)[ 8 ] == c9 )
2041
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2042
0
  ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2043
0
    ((unsigned char *) s)[ 9 ] == c10 )
2044
2045
5.20k
#define SKIP(val) do {             \
2046
5.20k
    ctxt->input->cur += (val),ctxt->input->col+=(val);      \
2047
5.20k
    if (*ctxt->input->cur == 0)           \
2048
5.20k
        xmlParserGrow(ctxt);           \
2049
5.20k
  } while (0)
2050
2051
#define SKIPL(val) do {             \
2052
    int skipl;                \
2053
    for(skipl=0; skipl<val; skipl++) {          \
2054
  if (*(ctxt->input->cur) == '\n') {        \
2055
  ctxt->input->line++; ctxt->input->col = 1;      \
2056
  } else ctxt->input->col++;          \
2057
  ctxt->input->cur++;           \
2058
    }                 \
2059
    if (*ctxt->input->cur == 0)           \
2060
        xmlParserGrow(ctxt);            \
2061
  } while (0)
2062
2063
453k
#define SHRINK if ((ctxt->progressive == 0) &&       \
2064
453k
       (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2065
453k
       (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2066
453k
  xmlParserShrink(ctxt);
2067
2068
4.73M
#define GROW if ((ctxt->progressive == 0) &&       \
2069
4.73M
     (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK))  \
2070
4.73M
  xmlParserGrow(ctxt);
2071
2072
77.1k
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2073
2074
193k
#define NEXT xmlNextChar(ctxt)
2075
2076
36.0k
#define NEXT1 {               \
2077
36.0k
  ctxt->input->col++;           \
2078
36.0k
  ctxt->input->cur++;           \
2079
36.0k
  if (*ctxt->input->cur == 0)         \
2080
36.0k
      xmlParserGrow(ctxt);           \
2081
36.0k
    }
2082
2083
21.6M
#define NEXTL(l) do {             \
2084
21.6M
    if (*(ctxt->input->cur) == '\n') {         \
2085
1.04M
  ctxt->input->line++; ctxt->input->col = 1;      \
2086
20.6M
    } else ctxt->input->col++;           \
2087
21.6M
    ctxt->input->cur += l;        \
2088
21.6M
  } while (0)
2089
2090
21.8M
#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2091
0
#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2092
2093
#define COPY_BUF(l,b,i,v)           \
2094
16.8M
    if (l == 1) b[i++] = v;           \
2095
16.8M
    else i += xmlCopyCharMultiByte(&b[i],v)
2096
2097
/**
2098
 * xmlSkipBlankChars:
2099
 * @ctxt:  the XML parser context
2100
 *
2101
 * skip all blanks character found at that point in the input streams.
2102
 * It pops up finished entities in the process if allowable at that point.
2103
 *
2104
 * Returns the number of space chars skipped
2105
 */
2106
2107
int
2108
77.1k
xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2109
77.1k
    int res = 0;
2110
2111
    /*
2112
     * It's Okay to use CUR/NEXT here since all the blanks are on
2113
     * the ASCII range.
2114
     */
2115
77.1k
    if (((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) ||
2116
77.1k
        (ctxt->instate == XML_PARSER_START)) {
2117
77.1k
  const xmlChar *cur;
2118
  /*
2119
   * if we are in the document content, go really fast
2120
   */
2121
77.1k
  cur = ctxt->input->cur;
2122
77.1k
  while (IS_BLANK_CH(*cur)) {
2123
32.9k
      if (*cur == '\n') {
2124
14.1k
    ctxt->input->line++; ctxt->input->col = 1;
2125
18.7k
      } else {
2126
18.7k
    ctxt->input->col++;
2127
18.7k
      }
2128
32.9k
      cur++;
2129
32.9k
      if (res < INT_MAX)
2130
32.9k
    res++;
2131
32.9k
      if (*cur == 0) {
2132
1
    ctxt->input->cur = cur;
2133
1
    xmlParserGrow(ctxt);
2134
1
    cur = ctxt->input->cur;
2135
1
      }
2136
32.9k
  }
2137
77.1k
  ctxt->input->cur = cur;
2138
77.1k
    } else {
2139
0
        int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2140
2141
0
  while (ctxt->instate != XML_PARSER_EOF) {
2142
0
            if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2143
0
    NEXT;
2144
0
      } else if (CUR == '%') {
2145
                /*
2146
                 * Need to handle support of entities branching here
2147
                 */
2148
0
          if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2149
0
                    break;
2150
0
          xmlParsePEReference(ctxt);
2151
0
            } else if (CUR == 0) {
2152
0
                unsigned long consumed;
2153
0
                xmlEntityPtr ent;
2154
2155
0
                if (ctxt->inputNr <= 1)
2156
0
                    break;
2157
2158
0
                consumed = ctxt->input->consumed;
2159
0
                xmlSaturatedAddSizeT(&consumed,
2160
0
                                     ctxt->input->cur - ctxt->input->base);
2161
2162
                /*
2163
                 * Add to sizeentities when parsing an external entity
2164
                 * for the first time.
2165
                 */
2166
0
                ent = ctxt->input->entity;
2167
0
                if ((ent->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2168
0
                    ((ent->flags & XML_ENT_PARSED) == 0)) {
2169
0
                    ent->flags |= XML_ENT_PARSED;
2170
2171
0
                    xmlSaturatedAdd(&ctxt->sizeentities, consumed);
2172
0
                }
2173
2174
0
                xmlParserEntityCheck(ctxt, consumed);
2175
2176
0
                xmlPopInput(ctxt);
2177
0
            } else {
2178
0
                break;
2179
0
            }
2180
2181
            /*
2182
             * Also increase the counter when entering or exiting a PERef.
2183
             * The spec says: "When a parameter-entity reference is recognized
2184
             * in the DTD and included, its replacement text MUST be enlarged
2185
             * by the attachment of one leading and one following space (#x20)
2186
             * character."
2187
             */
2188
0
      if (res < INT_MAX)
2189
0
    res++;
2190
0
        }
2191
0
    }
2192
77.1k
    return(res);
2193
77.1k
}
2194
2195
/************************************************************************
2196
 *                  *
2197
 *    Commodity functions to handle entities      *
2198
 *                  *
2199
 ************************************************************************/
2200
2201
/**
2202
 * xmlPopInput:
2203
 * @ctxt:  an XML parser context
2204
 *
2205
 * xmlPopInput: the current input pointed by ctxt->input came to an end
2206
 *          pop it and return the next char.
2207
 *
2208
 * Returns the current xmlChar in the parser context
2209
 */
2210
xmlChar
2211
0
xmlPopInput(xmlParserCtxtPtr ctxt) {
2212
0
    xmlParserInputPtr input;
2213
2214
0
    if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2215
0
    if (xmlParserDebugEntities)
2216
0
  xmlGenericError(xmlGenericErrorContext,
2217
0
    "Popping input %d\n", ctxt->inputNr);
2218
0
    if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2219
0
        (ctxt->instate != XML_PARSER_EOF))
2220
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2221
0
                    "Unfinished entity outside the DTD");
2222
0
    input = inputPop(ctxt);
2223
0
    if (input->entity != NULL)
2224
0
        input->entity->flags &= ~XML_ENT_EXPANDING;
2225
0
    xmlFreeInputStream(input);
2226
0
    if (*ctxt->input->cur == 0)
2227
0
        xmlParserGrow(ctxt);
2228
0
    return(CUR);
2229
0
}
2230
2231
/**
2232
 * xmlPushInput:
2233
 * @ctxt:  an XML parser context
2234
 * @input:  an XML parser input fragment (entity, XML fragment ...).
2235
 *
2236
 * xmlPushInput: switch to a new input stream which is stacked on top
2237
 *               of the previous one(s).
2238
 * Returns -1 in case of error or the index in the input stack
2239
 */
2240
int
2241
0
xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2242
0
    int ret;
2243
0
    if (input == NULL) return(-1);
2244
2245
0
    if (xmlParserDebugEntities) {
2246
0
  if ((ctxt->input != NULL) && (ctxt->input->filename))
2247
0
      xmlGenericError(xmlGenericErrorContext,
2248
0
        "%s(%d): ", ctxt->input->filename,
2249
0
        ctxt->input->line);
2250
0
  xmlGenericError(xmlGenericErrorContext,
2251
0
    "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2252
0
    }
2253
0
    if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2254
0
        (ctxt->inputNr > 100)) {
2255
0
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2256
0
        while (ctxt->inputNr > 1)
2257
0
            xmlFreeInputStream(inputPop(ctxt));
2258
0
  return(-1);
2259
0
    }
2260
0
    ret = inputPush(ctxt, input);
2261
0
    if (ctxt->instate == XML_PARSER_EOF)
2262
0
        return(-1);
2263
0
    GROW;
2264
0
    return(ret);
2265
0
}
2266
2267
/**
2268
 * xmlParseCharRef:
2269
 * @ctxt:  an XML parser context
2270
 *
2271
 * DEPRECATED: Internal function, don't use.
2272
 *
2273
 * Parse a numeric character reference. Always consumes '&'.
2274
 *
2275
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2276
 *                  '&#x' [0-9a-fA-F]+ ';'
2277
 *
2278
 * [ WFC: Legal Character ]
2279
 * Characters referred to using character references must match the
2280
 * production for Char.
2281
 *
2282
 * Returns the value parsed (as an int), 0 in case of error
2283
 */
2284
int
2285
440
xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2286
440
    int val = 0;
2287
440
    int count = 0;
2288
2289
    /*
2290
     * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2291
     */
2292
440
    if ((RAW == '&') && (NXT(1) == '#') &&
2293
440
        (NXT(2) == 'x')) {
2294
423
  SKIP(3);
2295
423
  GROW;
2296
2.81k
  while (RAW != ';') { /* loop blocked by count */
2297
2.81k
      if (count++ > 20) {
2298
31
    count = 0;
2299
31
    GROW;
2300
31
                if (ctxt->instate == XML_PARSER_EOF)
2301
0
                    return(0);
2302
31
      }
2303
2.81k
      if ((RAW >= '0') && (RAW <= '9'))
2304
570
          val = val * 16 + (CUR - '0');
2305
2.24k
      else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2306
281
          val = val * 16 + (CUR - 'a') + 10;
2307
1.96k
      else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2308
1.53k
          val = val * 16 + (CUR - 'A') + 10;
2309
423
      else {
2310
423
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2311
423
    val = 0;
2312
423
    break;
2313
423
      }
2314
2.38k
      if (val > 0x110000)
2315
1.17k
          val = 0x110000;
2316
2317
2.38k
      NEXT;
2318
2.38k
      count++;
2319
2.38k
  }
2320
423
  if (RAW == ';') {
2321
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2322
0
      ctxt->input->col++;
2323
0
      ctxt->input->cur++;
2324
0
  }
2325
423
    } else if  ((RAW == '&') && (NXT(1) == '#')) {
2326
17
  SKIP(2);
2327
17
  GROW;
2328
64
  while (RAW != ';') { /* loop blocked by count */
2329
51
      if (count++ > 20) {
2330
0
    count = 0;
2331
0
    GROW;
2332
0
                if (ctxt->instate == XML_PARSER_EOF)
2333
0
                    return(0);
2334
0
      }
2335
51
      if ((RAW >= '0') && (RAW <= '9'))
2336
47
          val = val * 10 + (CUR - '0');
2337
4
      else {
2338
4
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2339
4
    val = 0;
2340
4
    break;
2341
4
      }
2342
47
      if (val > 0x110000)
2343
0
          val = 0x110000;
2344
2345
47
      NEXT;
2346
47
      count++;
2347
47
  }
2348
17
  if (RAW == ';') {
2349
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2350
13
      ctxt->input->col++;
2351
13
      ctxt->input->cur++;
2352
13
  }
2353
17
    } else {
2354
0
        if (RAW == '&')
2355
0
            SKIP(1);
2356
0
        xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2357
0
    }
2358
2359
    /*
2360
     * [ WFC: Legal Character ]
2361
     * Characters referred to using character references must match the
2362
     * production for Char.
2363
     */
2364
440
    if (val >= 0x110000) {
2365
0
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2366
0
                "xmlParseCharRef: character reference out of bounds\n",
2367
0
          val);
2368
440
    } else if (IS_CHAR(val)) {
2369
12
        return(val);
2370
428
    } else {
2371
428
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2372
428
                          "xmlParseCharRef: invalid xmlChar value %d\n",
2373
428
                    val);
2374
428
    }
2375
428
    return(0);
2376
440
}
2377
2378
/**
2379
 * xmlParseStringCharRef:
2380
 * @ctxt:  an XML parser context
2381
 * @str:  a pointer to an index in the string
2382
 *
2383
 * parse Reference declarations, variant parsing from a string rather
2384
 * than an an input flow.
2385
 *
2386
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2387
 *                  '&#x' [0-9a-fA-F]+ ';'
2388
 *
2389
 * [ WFC: Legal Character ]
2390
 * Characters referred to using character references must match the
2391
 * production for Char.
2392
 *
2393
 * Returns the value parsed (as an int), 0 in case of error, str will be
2394
 *         updated to the current value of the index
2395
 */
2396
static int
2397
0
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2398
0
    const xmlChar *ptr;
2399
0
    xmlChar cur;
2400
0
    int val = 0;
2401
2402
0
    if ((str == NULL) || (*str == NULL)) return(0);
2403
0
    ptr = *str;
2404
0
    cur = *ptr;
2405
0
    if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2406
0
  ptr += 3;
2407
0
  cur = *ptr;
2408
0
  while (cur != ';') { /* Non input consuming loop */
2409
0
      if ((cur >= '0') && (cur <= '9'))
2410
0
          val = val * 16 + (cur - '0');
2411
0
      else if ((cur >= 'a') && (cur <= 'f'))
2412
0
          val = val * 16 + (cur - 'a') + 10;
2413
0
      else if ((cur >= 'A') && (cur <= 'F'))
2414
0
          val = val * 16 + (cur - 'A') + 10;
2415
0
      else {
2416
0
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2417
0
    val = 0;
2418
0
    break;
2419
0
      }
2420
0
      if (val > 0x110000)
2421
0
          val = 0x110000;
2422
2423
0
      ptr++;
2424
0
      cur = *ptr;
2425
0
  }
2426
0
  if (cur == ';')
2427
0
      ptr++;
2428
0
    } else if  ((cur == '&') && (ptr[1] == '#')){
2429
0
  ptr += 2;
2430
0
  cur = *ptr;
2431
0
  while (cur != ';') { /* Non input consuming loops */
2432
0
      if ((cur >= '0') && (cur <= '9'))
2433
0
          val = val * 10 + (cur - '0');
2434
0
      else {
2435
0
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2436
0
    val = 0;
2437
0
    break;
2438
0
      }
2439
0
      if (val > 0x110000)
2440
0
          val = 0x110000;
2441
2442
0
      ptr++;
2443
0
      cur = *ptr;
2444
0
  }
2445
0
  if (cur == ';')
2446
0
      ptr++;
2447
0
    } else {
2448
0
  xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2449
0
  return(0);
2450
0
    }
2451
0
    *str = ptr;
2452
2453
    /*
2454
     * [ WFC: Legal Character ]
2455
     * Characters referred to using character references must match the
2456
     * production for Char.
2457
     */
2458
0
    if (val >= 0x110000) {
2459
0
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2460
0
                "xmlParseStringCharRef: character reference out of bounds\n",
2461
0
                val);
2462
0
    } else if (IS_CHAR(val)) {
2463
0
        return(val);
2464
0
    } else {
2465
0
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2466
0
        "xmlParseStringCharRef: invalid xmlChar value %d\n",
2467
0
        val);
2468
0
    }
2469
0
    return(0);
2470
0
}
2471
2472
/**
2473
 * xmlParserHandlePEReference:
2474
 * @ctxt:  the parser context
2475
 *
2476
 * [69] PEReference ::= '%' Name ';'
2477
 *
2478
 * [ WFC: No Recursion ]
2479
 * A parsed entity must not contain a recursive
2480
 * reference to itself, either directly or indirectly.
2481
 *
2482
 * [ WFC: Entity Declared ]
2483
 * In a document without any DTD, a document with only an internal DTD
2484
 * subset which contains no parameter entity references, or a document
2485
 * with "standalone='yes'", ...  ... The declaration of a parameter
2486
 * entity must precede any reference to it...
2487
 *
2488
 * [ VC: Entity Declared ]
2489
 * In a document with an external subset or external parameter entities
2490
 * with "standalone='no'", ...  ... The declaration of a parameter entity
2491
 * must precede any reference to it...
2492
 *
2493
 * [ WFC: In DTD ]
2494
 * Parameter-entity references may only appear in the DTD.
2495
 * NOTE: misleading but this is handled.
2496
 *
2497
 * A PEReference may have been detected in the current input stream
2498
 * the handling is done accordingly to
2499
 *      http://www.w3.org/TR/REC-xml#entproc
2500
 * i.e.
2501
 *   - Included in literal in entity values
2502
 *   - Included as Parameter Entity reference within DTDs
2503
 */
2504
void
2505
0
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2506
0
    switch(ctxt->instate) {
2507
0
  case XML_PARSER_CDATA_SECTION:
2508
0
      return;
2509
0
        case XML_PARSER_COMMENT:
2510
0
      return;
2511
0
  case XML_PARSER_START_TAG:
2512
0
      return;
2513
0
  case XML_PARSER_END_TAG:
2514
0
      return;
2515
0
        case XML_PARSER_EOF:
2516
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2517
0
      return;
2518
0
        case XML_PARSER_PROLOG:
2519
0
  case XML_PARSER_START:
2520
0
  case XML_PARSER_MISC:
2521
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2522
0
      return;
2523
0
  case XML_PARSER_ENTITY_DECL:
2524
0
        case XML_PARSER_CONTENT:
2525
0
        case XML_PARSER_ATTRIBUTE_VALUE:
2526
0
        case XML_PARSER_PI:
2527
0
  case XML_PARSER_SYSTEM_LITERAL:
2528
0
  case XML_PARSER_PUBLIC_LITERAL:
2529
      /* we just ignore it there */
2530
0
      return;
2531
0
        case XML_PARSER_EPILOG:
2532
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2533
0
      return;
2534
0
  case XML_PARSER_ENTITY_VALUE:
2535
      /*
2536
       * NOTE: in the case of entity values, we don't do the
2537
       *       substitution here since we need the literal
2538
       *       entity value to be able to save the internal
2539
       *       subset of the document.
2540
       *       This will be handled by xmlStringDecodeEntities
2541
       */
2542
0
      return;
2543
0
        case XML_PARSER_DTD:
2544
      /*
2545
       * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2546
       * In the internal DTD subset, parameter-entity references
2547
       * can occur only where markup declarations can occur, not
2548
       * within markup declarations.
2549
       * In that case this is handled in xmlParseMarkupDecl
2550
       */
2551
0
      if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2552
0
    return;
2553
0
      if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2554
0
    return;
2555
0
            break;
2556
0
        case XML_PARSER_IGNORE:
2557
0
            return;
2558
0
    }
2559
2560
0
    xmlParsePEReference(ctxt);
2561
0
}
2562
2563
/*
2564
 * Macro used to grow the current buffer.
2565
 * buffer##_size is expected to be a size_t
2566
 * mem_error: is expected to handle memory allocation failures
2567
 */
2568
502
#define growBuffer(buffer, n) {           \
2569
502
    xmlChar *tmp;             \
2570
502
    size_t new_size = buffer##_size * 2 + n;                            \
2571
502
    if (new_size < buffer##_size) goto mem_error;                       \
2572
502
    tmp = (xmlChar *) xmlRealloc(buffer, new_size);                     \
2573
502
    if (tmp == NULL) goto mem_error;         \
2574
502
    buffer = tmp;             \
2575
502
    buffer##_size = new_size;                                           \
2576
502
}
2577
2578
/**
2579
 * xmlStringDecodeEntitiesInt:
2580
 * @ctxt:  the parser context
2581
 * @str:  the input string
2582
 * @len: the string length
2583
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2584
 * @end:  an end marker xmlChar, 0 if none
2585
 * @end2:  an end marker xmlChar, 0 if none
2586
 * @end3:  an end marker xmlChar, 0 if none
2587
 * @check:  whether to perform entity checks
2588
 */
2589
static xmlChar *
2590
xmlStringDecodeEntitiesInt(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2591
               int what, xmlChar end, xmlChar  end2, xmlChar end3,
2592
0
                           int check) {
2593
0
    xmlChar *buffer = NULL;
2594
0
    size_t buffer_size = 0;
2595
0
    size_t nbchars = 0;
2596
2597
0
    xmlChar *current = NULL;
2598
0
    xmlChar *rep = NULL;
2599
0
    const xmlChar *last;
2600
0
    xmlEntityPtr ent;
2601
0
    int c,l;
2602
2603
0
    if (str == NULL)
2604
0
        return(NULL);
2605
0
    last = str + len;
2606
2607
0
    if (((ctxt->depth > 40) &&
2608
0
         ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2609
0
  (ctxt->depth > 100)) {
2610
0
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP,
2611
0
                       "Maximum entity nesting depth exceeded");
2612
0
  return(NULL);
2613
0
    }
2614
2615
    /*
2616
     * allocate a translation buffer.
2617
     */
2618
0
    buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2619
0
    buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2620
0
    if (buffer == NULL) goto mem_error;
2621
2622
    /*
2623
     * OK loop until we reach one of the ending char or a size limit.
2624
     * we are operating on already parsed values.
2625
     */
2626
0
    if (str < last)
2627
0
  c = CUR_SCHAR(str, l);
2628
0
    else
2629
0
        c = 0;
2630
0
    while ((c != 0) && (c != end) && /* non input consuming loop */
2631
0
           (c != end2) && (c != end3) &&
2632
0
           (ctxt->instate != XML_PARSER_EOF)) {
2633
2634
0
  if (c == 0) break;
2635
0
        if ((c == '&') && (str[1] == '#')) {
2636
0
      int val = xmlParseStringCharRef(ctxt, &str);
2637
0
      if (val == 0)
2638
0
                goto int_error;
2639
0
      COPY_BUF(0,buffer,nbchars,val);
2640
0
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2641
0
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2642
0
      }
2643
0
  } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2644
0
      if (xmlParserDebugEntities)
2645
0
    xmlGenericError(xmlGenericErrorContext,
2646
0
      "String decoding Entity Reference: %.30s\n",
2647
0
      str);
2648
0
      ent = xmlParseStringEntityRef(ctxt, &str);
2649
0
      if ((ent != NULL) &&
2650
0
    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2651
0
    if (ent->content != NULL) {
2652
0
        COPY_BUF(0,buffer,nbchars,ent->content[0]);
2653
0
        if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2654
0
      growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2655
0
        }
2656
0
    } else {
2657
0
        xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2658
0
          "predefined entity has no content\n");
2659
0
                    goto int_error;
2660
0
    }
2661
0
      } else if ((ent != NULL) && (ent->content != NULL)) {
2662
0
          if ((check) && (xmlParserEntityCheck(ctxt, ent->length)))
2663
0
                    goto int_error;
2664
2665
0
                if (ent->flags & XML_ENT_EXPANDING) {
2666
0
              xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2667
0
                    xmlHaltParser(ctxt);
2668
0
                    ent->content[0] = 0;
2669
0
                    goto int_error;
2670
0
                }
2671
2672
0
                ent->flags |= XML_ENT_EXPANDING;
2673
0
    ctxt->depth++;
2674
0
    rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
2675
0
                        ent->length, what, 0, 0, 0, check);
2676
0
    ctxt->depth--;
2677
0
                ent->flags &= ~XML_ENT_EXPANDING;
2678
2679
0
    if (rep == NULL) {
2680
0
                    ent->content[0] = 0;
2681
0
                    goto int_error;
2682
0
                }
2683
2684
0
                current = rep;
2685
0
                while (*current != 0) { /* non input consuming loop */
2686
0
                    buffer[nbchars++] = *current++;
2687
0
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2688
0
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2689
0
                    }
2690
0
                }
2691
0
                xmlFree(rep);
2692
0
                rep = NULL;
2693
0
      } else if (ent != NULL) {
2694
0
    int i = xmlStrlen(ent->name);
2695
0
    const xmlChar *cur = ent->name;
2696
2697
0
    buffer[nbchars++] = '&';
2698
0
    if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2699
0
        growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2700
0
    }
2701
0
    for (;i > 0;i--)
2702
0
        buffer[nbchars++] = *cur++;
2703
0
    buffer[nbchars++] = ';';
2704
0
      }
2705
0
  } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2706
0
      if (xmlParserDebugEntities)
2707
0
    xmlGenericError(xmlGenericErrorContext,
2708
0
      "String decoding PE Reference: %.30s\n", str);
2709
0
      ent = xmlParseStringPEReference(ctxt, &str);
2710
0
      if (ent != NULL) {
2711
0
                if (ent->content == NULL) {
2712
        /*
2713
         * Note: external parsed entities will not be loaded,
2714
         * it is not required for a non-validating parser to
2715
         * complete external PEReferences coming from the
2716
         * internal subset
2717
         */
2718
0
        if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2719
0
      ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2720
0
      (ctxt->validate != 0)) {
2721
0
      xmlLoadEntityContent(ctxt, ent);
2722
0
        } else {
2723
0
      xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2724
0
      "not validating will not read content for PE entity %s\n",
2725
0
                          ent->name, NULL);
2726
0
        }
2727
0
    }
2728
2729
0
          if ((check) && (xmlParserEntityCheck(ctxt, ent->length)))
2730
0
                    goto int_error;
2731
2732
0
                if (ent->flags & XML_ENT_EXPANDING) {
2733
0
              xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2734
0
                    xmlHaltParser(ctxt);
2735
0
                    if (ent->content != NULL)
2736
0
                        ent->content[0] = 0;
2737
0
                    goto int_error;
2738
0
                }
2739
2740
0
                ent->flags |= XML_ENT_EXPANDING;
2741
0
    ctxt->depth++;
2742
0
    rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
2743
0
                        ent->length, what, 0, 0, 0, check);
2744
0
    ctxt->depth--;
2745
0
                ent->flags &= ~XML_ENT_EXPANDING;
2746
2747
0
    if (rep == NULL) {
2748
0
                    if (ent->content != NULL)
2749
0
                        ent->content[0] = 0;
2750
0
                    goto int_error;
2751
0
                }
2752
0
                current = rep;
2753
0
                while (*current != 0) { /* non input consuming loop */
2754
0
                    buffer[nbchars++] = *current++;
2755
0
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2756
0
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2757
0
                    }
2758
0
                }
2759
0
                xmlFree(rep);
2760
0
                rep = NULL;
2761
0
      }
2762
0
  } else {
2763
0
      COPY_BUF(l,buffer,nbchars,c);
2764
0
      str += l;
2765
0
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2766
0
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2767
0
      }
2768
0
  }
2769
0
  if (str < last)
2770
0
      c = CUR_SCHAR(str, l);
2771
0
  else
2772
0
      c = 0;
2773
0
    }
2774
0
    buffer[nbchars] = 0;
2775
0
    return(buffer);
2776
2777
0
mem_error:
2778
0
    xmlErrMemory(ctxt, NULL);
2779
0
int_error:
2780
0
    if (rep != NULL)
2781
0
        xmlFree(rep);
2782
0
    if (buffer != NULL)
2783
0
        xmlFree(buffer);
2784
0
    return(NULL);
2785
0
}
2786
2787
/**
2788
 * xmlStringLenDecodeEntities:
2789
 * @ctxt:  the parser context
2790
 * @str:  the input string
2791
 * @len: the string length
2792
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2793
 * @end:  an end marker xmlChar, 0 if none
2794
 * @end2:  an end marker xmlChar, 0 if none
2795
 * @end3:  an end marker xmlChar, 0 if none
2796
 *
2797
 * DEPRECATED: Internal function, don't use.
2798
 *
2799
 * Takes a entity string content and process to do the adequate substitutions.
2800
 *
2801
 * [67] Reference ::= EntityRef | CharRef
2802
 *
2803
 * [69] PEReference ::= '%' Name ';'
2804
 *
2805
 * Returns A newly allocated string with the substitution done. The caller
2806
 *      must deallocate it !
2807
 */
2808
xmlChar *
2809
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2810
                           int what, xmlChar end, xmlChar  end2,
2811
0
                           xmlChar end3) {
2812
0
    if ((ctxt == NULL) || (str == NULL) || (len < 0))
2813
0
        return(NULL);
2814
0
    return(xmlStringDecodeEntitiesInt(ctxt, str, len, what,
2815
0
                                      end, end2, end3, 0));
2816
0
}
2817
2818
/**
2819
 * xmlStringDecodeEntities:
2820
 * @ctxt:  the parser context
2821
 * @str:  the input string
2822
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2823
 * @end:  an end marker xmlChar, 0 if none
2824
 * @end2:  an end marker xmlChar, 0 if none
2825
 * @end3:  an end marker xmlChar, 0 if none
2826
 *
2827
 * DEPRECATED: Internal function, don't use.
2828
 *
2829
 * Takes a entity string content and process to do the adequate substitutions.
2830
 *
2831
 * [67] Reference ::= EntityRef | CharRef
2832
 *
2833
 * [69] PEReference ::= '%' Name ';'
2834
 *
2835
 * Returns A newly allocated string with the substitution done. The caller
2836
 *      must deallocate it !
2837
 */
2838
xmlChar *
2839
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2840
0
            xmlChar end, xmlChar  end2, xmlChar end3) {
2841
0
    if ((ctxt == NULL) || (str == NULL)) return(NULL);
2842
0
    return(xmlStringDecodeEntitiesInt(ctxt, str, xmlStrlen(str), what,
2843
0
                                      end, end2, end3, 0));
2844
0
}
2845
2846
/************************************************************************
2847
 *                  *
2848
 *    Commodity functions, cleanup needed ?     *
2849
 *                  *
2850
 ************************************************************************/
2851
2852
/**
2853
 * areBlanks:
2854
 * @ctxt:  an XML parser context
2855
 * @str:  a xmlChar *
2856
 * @len:  the size of @str
2857
 * @blank_chars: we know the chars are blanks
2858
 *
2859
 * Is this a sequence of blank chars that one can ignore ?
2860
 *
2861
 * Returns 1 if ignorable 0 otherwise.
2862
 */
2863
2864
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2865
115k
                     int blank_chars) {
2866
115k
    int i, ret;
2867
115k
    xmlNodePtr lastChild;
2868
2869
    /*
2870
     * Don't spend time trying to differentiate them, the same callback is
2871
     * used !
2872
     */
2873
115k
    if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2874
115k
  return(0);
2875
2876
    /*
2877
     * Check for xml:space value.
2878
     */
2879
0
    if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2880
0
        (*(ctxt->space) == -2))
2881
0
  return(0);
2882
2883
    /*
2884
     * Check that the string is made of blanks
2885
     */
2886
0
    if (blank_chars == 0) {
2887
0
  for (i = 0;i < len;i++)
2888
0
      if (!(IS_BLANK_CH(str[i]))) return(0);
2889
0
    }
2890
2891
    /*
2892
     * Look if the element is mixed content in the DTD if available
2893
     */
2894
0
    if (ctxt->node == NULL) return(0);
2895
0
    if (ctxt->myDoc != NULL) {
2896
0
  ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2897
0
        if (ret == 0) return(1);
2898
0
        if (ret == 1) return(0);
2899
0
    }
2900
2901
    /*
2902
     * Otherwise, heuristic :-\
2903
     */
2904
0
    if ((RAW != '<') && (RAW != 0xD)) return(0);
2905
0
    if ((ctxt->node->children == NULL) &&
2906
0
  (RAW == '<') && (NXT(1) == '/')) return(0);
2907
2908
0
    lastChild = xmlGetLastChild(ctxt->node);
2909
0
    if (lastChild == NULL) {
2910
0
        if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2911
0
            (ctxt->node->content != NULL)) return(0);
2912
0
    } else if (xmlNodeIsText(lastChild))
2913
0
        return(0);
2914
0
    else if ((ctxt->node->children != NULL) &&
2915
0
             (xmlNodeIsText(ctxt->node->children)))
2916
0
        return(0);
2917
0
    return(1);
2918
0
}
2919
2920
/************************************************************************
2921
 *                  *
2922
 *    Extra stuff for namespace support     *
2923
 *  Relates to http://www.w3.org/TR/WD-xml-names      *
2924
 *                  *
2925
 ************************************************************************/
2926
2927
/**
2928
 * xmlSplitQName:
2929
 * @ctxt:  an XML parser context
2930
 * @name:  an XML parser context
2931
 * @prefix:  a xmlChar **
2932
 *
2933
 * parse an UTF8 encoded XML qualified name string
2934
 *
2935
 * [NS 5] QName ::= (Prefix ':')? LocalPart
2936
 *
2937
 * [NS 6] Prefix ::= NCName
2938
 *
2939
 * [NS 7] LocalPart ::= NCName
2940
 *
2941
 * Returns the local part, and prefix is updated
2942
 *   to get the Prefix if any.
2943
 */
2944
2945
xmlChar *
2946
0
xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2947
0
    xmlChar buf[XML_MAX_NAMELEN + 5];
2948
0
    xmlChar *buffer = NULL;
2949
0
    int len = 0;
2950
0
    int max = XML_MAX_NAMELEN;
2951
0
    xmlChar *ret = NULL;
2952
0
    const xmlChar *cur = name;
2953
0
    int c;
2954
2955
0
    if (prefix == NULL) return(NULL);
2956
0
    *prefix = NULL;
2957
2958
0
    if (cur == NULL) return(NULL);
2959
2960
#ifndef XML_XML_NAMESPACE
2961
    /* xml: prefix is not really a namespace */
2962
    if ((cur[0] == 'x') && (cur[1] == 'm') &&
2963
        (cur[2] == 'l') && (cur[3] == ':'))
2964
  return(xmlStrdup(name));
2965
#endif
2966
2967
    /* nasty but well=formed */
2968
0
    if (cur[0] == ':')
2969
0
  return(xmlStrdup(name));
2970
2971
0
    c = *cur++;
2972
0
    while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2973
0
  buf[len++] = c;
2974
0
  c = *cur++;
2975
0
    }
2976
0
    if (len >= max) {
2977
  /*
2978
   * Okay someone managed to make a huge name, so he's ready to pay
2979
   * for the processing speed.
2980
   */
2981
0
  max = len * 2;
2982
2983
0
  buffer = (xmlChar *) xmlMallocAtomic(max);
2984
0
  if (buffer == NULL) {
2985
0
      xmlErrMemory(ctxt, NULL);
2986
0
      return(NULL);
2987
0
  }
2988
0
  memcpy(buffer, buf, len);
2989
0
  while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2990
0
      if (len + 10 > max) {
2991
0
          xmlChar *tmp;
2992
2993
0
    max *= 2;
2994
0
    tmp = (xmlChar *) xmlRealloc(buffer, max);
2995
0
    if (tmp == NULL) {
2996
0
        xmlFree(buffer);
2997
0
        xmlErrMemory(ctxt, NULL);
2998
0
        return(NULL);
2999
0
    }
3000
0
    buffer = tmp;
3001
0
      }
3002
0
      buffer[len++] = c;
3003
0
      c = *cur++;
3004
0
  }
3005
0
  buffer[len] = 0;
3006
0
    }
3007
3008
0
    if ((c == ':') && (*cur == 0)) {
3009
0
        if (buffer != NULL)
3010
0
      xmlFree(buffer);
3011
0
  *prefix = NULL;
3012
0
  return(xmlStrdup(name));
3013
0
    }
3014
3015
0
    if (buffer == NULL)
3016
0
  ret = xmlStrndup(buf, len);
3017
0
    else {
3018
0
  ret = buffer;
3019
0
  buffer = NULL;
3020
0
  max = XML_MAX_NAMELEN;
3021
0
    }
3022
3023
3024
0
    if (c == ':') {
3025
0
  c = *cur;
3026
0
        *prefix = ret;
3027
0
  if (c == 0) {
3028
0
      return(xmlStrndup(BAD_CAST "", 0));
3029
0
  }
3030
0
  len = 0;
3031
3032
  /*
3033
   * Check that the first character is proper to start
3034
   * a new name
3035
   */
3036
0
  if (!(((c >= 0x61) && (c <= 0x7A)) ||
3037
0
        ((c >= 0x41) && (c <= 0x5A)) ||
3038
0
        (c == '_') || (c == ':'))) {
3039
0
      int l;
3040
0
      int first = CUR_SCHAR(cur, l);
3041
3042
0
      if (!IS_LETTER(first) && (first != '_')) {
3043
0
    xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3044
0
          "Name %s is not XML Namespace compliant\n",
3045
0
          name);
3046
0
      }
3047
0
  }
3048
0
  cur++;
3049
3050
0
  while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3051
0
      buf[len++] = c;
3052
0
      c = *cur++;
3053
0
  }
3054
0
  if (len >= max) {
3055
      /*
3056
       * Okay someone managed to make a huge name, so he's ready to pay
3057
       * for the processing speed.
3058
       */
3059
0
      max = len * 2;
3060
3061
0
      buffer = (xmlChar *) xmlMallocAtomic(max);
3062
0
      if (buffer == NULL) {
3063
0
          xmlErrMemory(ctxt, NULL);
3064
0
    return(NULL);
3065
0
      }
3066
0
      memcpy(buffer, buf, len);
3067
0
      while (c != 0) { /* tested bigname2.xml */
3068
0
    if (len + 10 > max) {
3069
0
        xmlChar *tmp;
3070
3071
0
        max *= 2;
3072
0
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3073
0
        if (tmp == NULL) {
3074
0
      xmlErrMemory(ctxt, NULL);
3075
0
      xmlFree(buffer);
3076
0
      return(NULL);
3077
0
        }
3078
0
        buffer = tmp;
3079
0
    }
3080
0
    buffer[len++] = c;
3081
0
    c = *cur++;
3082
0
      }
3083
0
      buffer[len] = 0;
3084
0
  }
3085
3086
0
  if (buffer == NULL)
3087
0
      ret = xmlStrndup(buf, len);
3088
0
  else {
3089
0
      ret = buffer;
3090
0
  }
3091
0
    }
3092
3093
0
    return(ret);
3094
0
}
3095
3096
/************************************************************************
3097
 *                  *
3098
 *      The parser itself       *
3099
 *  Relates to http://www.w3.org/TR/REC-xml       *
3100
 *                  *
3101
 ************************************************************************/
3102
3103
/************************************************************************
3104
 *                  *
3105
 *  Routines to parse Name, NCName and NmToken      *
3106
 *                  *
3107
 ************************************************************************/
3108
#ifdef DEBUG
3109
static unsigned long nbParseName = 0;
3110
static unsigned long nbParseNmToken = 0;
3111
static unsigned long nbParseNCName = 0;
3112
static unsigned long nbParseNCNameComplex = 0;
3113
static unsigned long nbParseNameComplex = 0;
3114
static unsigned long nbParseStringName = 0;
3115
#endif
3116
3117
/*
3118
 * The two following functions are related to the change of accepted
3119
 * characters for Name and NmToken in the Revision 5 of XML-1.0
3120
 * They correspond to the modified production [4] and the new production [4a]
3121
 * changes in that revision. Also note that the macros used for the
3122
 * productions Letter, Digit, CombiningChar and Extender are not needed
3123
 * anymore.
3124
 * We still keep compatibility to pre-revision5 parsing semantic if the
3125
 * new XML_PARSE_OLD10 option is given to the parser.
3126
 */
3127
static int
3128
21.3k
xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3129
21.3k
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3130
        /*
3131
   * Use the new checks of production [4] [4a] amd [5] of the
3132
   * Update 5 of XML-1.0
3133
   */
3134
21.3k
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3135
21.3k
      (((c >= 'a') && (c <= 'z')) ||
3136
21.3k
       ((c >= 'A') && (c <= 'Z')) ||
3137
21.3k
       (c == '_') || (c == ':') ||
3138
21.3k
       ((c >= 0xC0) && (c <= 0xD6)) ||
3139
21.3k
       ((c >= 0xD8) && (c <= 0xF6)) ||
3140
21.3k
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3141
21.3k
       ((c >= 0x370) && (c <= 0x37D)) ||
3142
21.3k
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3143
21.3k
       ((c >= 0x200C) && (c <= 0x200D)) ||
3144
21.3k
       ((c >= 0x2070) && (c <= 0x218F)) ||
3145
21.3k
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3146
21.3k
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3147
21.3k
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3148
21.3k
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3149
21.3k
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3150
10.8k
      return(1);
3151
21.3k
    } else {
3152
0
        if (IS_LETTER(c) || (c == '_') || (c == ':'))
3153
0
      return(1);
3154
0
    }
3155
10.4k
    return(0);
3156
21.3k
}
3157
3158
static int
3159
3.14M
xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3160
3.14M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3161
        /*
3162
   * Use the new checks of production [4] [4a] amd [5] of the
3163
   * Update 5 of XML-1.0
3164
   */
3165
3.14M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3166
3.14M
      (((c >= 'a') && (c <= 'z')) ||
3167
3.14M
       ((c >= 'A') && (c <= 'Z')) ||
3168
3.14M
       ((c >= '0') && (c <= '9')) || /* !start */
3169
3.14M
       (c == '_') || (c == ':') ||
3170
3.14M
       (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3171
3.14M
       ((c >= 0xC0) && (c <= 0xD6)) ||
3172
3.14M
       ((c >= 0xD8) && (c <= 0xF6)) ||
3173
3.14M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3174
3.14M
       ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3175
3.14M
       ((c >= 0x370) && (c <= 0x37D)) ||
3176
3.14M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3177
3.14M
       ((c >= 0x200C) && (c <= 0x200D)) ||
3178
3.14M
       ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3179
3.14M
       ((c >= 0x2070) && (c <= 0x218F)) ||
3180
3.14M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3181
3.14M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3182
3.14M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3183
3.14M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3184
3.14M
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3185
3.14M
       return(1);
3186
3.14M
    } else {
3187
0
        if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3188
0
            (c == '.') || (c == '-') ||
3189
0
      (c == '_') || (c == ':') ||
3190
0
      (IS_COMBINING(c)) ||
3191
0
      (IS_EXTENDER(c)))
3192
0
      return(1);
3193
0
    }
3194
8.09k
    return(0);
3195
3.14M
}
3196
3197
static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3198
                                          int *len, int *alloc, int normalize);
3199
3200
static const xmlChar *
3201
21.3k
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3202
21.3k
    int len = 0, l;
3203
21.3k
    int c;
3204
21.3k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3205
0
                    XML_MAX_TEXT_LENGTH :
3206
21.3k
                    XML_MAX_NAME_LENGTH;
3207
3208
#ifdef DEBUG
3209
    nbParseNameComplex++;
3210
#endif
3211
3212
    /*
3213
     * Handler for more complex cases
3214
     */
3215
21.3k
    c = CUR_CHAR(l);
3216
21.3k
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3217
        /*
3218
   * Use the new checks of production [4] [4a] amd [5] of the
3219
   * Update 5 of XML-1.0
3220
   */
3221
21.3k
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3222
21.3k
      (!(((c >= 'a') && (c <= 'z')) ||
3223
21.3k
         ((c >= 'A') && (c <= 'Z')) ||
3224
21.3k
         (c == '_') || (c == ':') ||
3225
21.3k
         ((c >= 0xC0) && (c <= 0xD6)) ||
3226
21.3k
         ((c >= 0xD8) && (c <= 0xF6)) ||
3227
21.3k
         ((c >= 0xF8) && (c <= 0x2FF)) ||
3228
21.3k
         ((c >= 0x370) && (c <= 0x37D)) ||
3229
21.3k
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
3230
21.3k
         ((c >= 0x200C) && (c <= 0x200D)) ||
3231
21.3k
         ((c >= 0x2070) && (c <= 0x218F)) ||
3232
21.3k
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3233
21.3k
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
3234
21.3k
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
3235
21.3k
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3236
21.3k
         ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3237
9.49k
      return(NULL);
3238
9.49k
  }
3239
11.8k
  len += l;
3240
11.8k
  NEXTL(l);
3241
11.8k
  c = CUR_CHAR(l);
3242
1.92M
  while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3243
1.92M
         (((c >= 'a') && (c <= 'z')) ||
3244
1.92M
          ((c >= 'A') && (c <= 'Z')) ||
3245
1.92M
          ((c >= '0') && (c <= '9')) || /* !start */
3246
1.92M
          (c == '_') || (c == ':') ||
3247
1.92M
          (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3248
1.92M
          ((c >= 0xC0) && (c <= 0xD6)) ||
3249
1.92M
          ((c >= 0xD8) && (c <= 0xF6)) ||
3250
1.92M
          ((c >= 0xF8) && (c <= 0x2FF)) ||
3251
1.92M
          ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3252
1.92M
          ((c >= 0x370) && (c <= 0x37D)) ||
3253
1.92M
          ((c >= 0x37F) && (c <= 0x1FFF)) ||
3254
1.92M
          ((c >= 0x200C) && (c <= 0x200D)) ||
3255
1.92M
          ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3256
1.92M
          ((c >= 0x2070) && (c <= 0x218F)) ||
3257
1.92M
          ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3258
1.92M
          ((c >= 0x3001) && (c <= 0xD7FF)) ||
3259
1.92M
          ((c >= 0xF900) && (c <= 0xFDCF)) ||
3260
1.92M
          ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3261
1.92M
          ((c >= 0x10000) && (c <= 0xEFFFF))
3262
1.92M
    )) {
3263
1.91M
            if (len <= INT_MAX - l)
3264
1.91M
          len += l;
3265
1.91M
      NEXTL(l);
3266
1.91M
      c = CUR_CHAR(l);
3267
1.91M
  }
3268
11.8k
    } else {
3269
0
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3270
0
      (!IS_LETTER(c) && (c != '_') &&
3271
0
       (c != ':'))) {
3272
0
      return(NULL);
3273
0
  }
3274
0
  len += l;
3275
0
  NEXTL(l);
3276
0
  c = CUR_CHAR(l);
3277
3278
0
  while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3279
0
         ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3280
0
    (c == '.') || (c == '-') ||
3281
0
    (c == '_') || (c == ':') ||
3282
0
    (IS_COMBINING(c)) ||
3283
0
    (IS_EXTENDER(c)))) {
3284
0
            if (len <= INT_MAX - l)
3285
0
          len += l;
3286
0
      NEXTL(l);
3287
0
      c = CUR_CHAR(l);
3288
0
  }
3289
0
    }
3290
11.8k
    if (ctxt->instate == XML_PARSER_EOF)
3291
0
        return(NULL);
3292
11.8k
    if (len > maxLength) {
3293
19
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3294
19
        return(NULL);
3295
19
    }
3296
11.8k
    if (ctxt->input->cur - ctxt->input->base < len) {
3297
        /*
3298
         * There were a couple of bugs where PERefs lead to to a change
3299
         * of the buffer. Check the buffer size to avoid passing an invalid
3300
         * pointer to xmlDictLookup.
3301
         */
3302
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3303
0
                    "unexpected change of input buffer");
3304
0
        return (NULL);
3305
0
    }
3306
11.8k
    if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3307
0
        return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3308
11.8k
    return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3309
11.8k
}
3310
3311
/**
3312
 * xmlParseName:
3313
 * @ctxt:  an XML parser context
3314
 *
3315
 * DEPRECATED: Internal function, don't use.
3316
 *
3317
 * parse an XML name.
3318
 *
3319
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3320
 *                  CombiningChar | Extender
3321
 *
3322
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3323
 *
3324
 * [6] Names ::= Name (#x20 Name)*
3325
 *
3326
 * Returns the Name parsed or NULL
3327
 */
3328
3329
const xmlChar *
3330
23.7k
xmlParseName(xmlParserCtxtPtr ctxt) {
3331
23.7k
    const xmlChar *in;
3332
23.7k
    const xmlChar *ret;
3333
23.7k
    size_t count = 0;
3334
23.7k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3335
0
                       XML_MAX_TEXT_LENGTH :
3336
23.7k
                       XML_MAX_NAME_LENGTH;
3337
3338
23.7k
    GROW;
3339
23.7k
    if (ctxt->instate == XML_PARSER_EOF)
3340
0
        return(NULL);
3341
3342
#ifdef DEBUG
3343
    nbParseName++;
3344
#endif
3345
3346
    /*
3347
     * Accelerator for simple ASCII names
3348
     */
3349
23.7k
    in = ctxt->input->cur;
3350
23.7k
    if (((*in >= 0x61) && (*in <= 0x7A)) ||
3351
23.7k
  ((*in >= 0x41) && (*in <= 0x5A)) ||
3352
23.7k
  (*in == '_') || (*in == ':')) {
3353
2.55k
  in++;
3354
3.99M
  while (((*in >= 0x61) && (*in <= 0x7A)) ||
3355
3.99M
         ((*in >= 0x41) && (*in <= 0x5A)) ||
3356
3.99M
         ((*in >= 0x30) && (*in <= 0x39)) ||
3357
3.99M
         (*in == '_') || (*in == '-') ||
3358
3.99M
         (*in == ':') || (*in == '.'))
3359
3.99M
      in++;
3360
2.55k
  if ((*in > 0) && (*in < 0x80)) {
3361
2.37k
      count = in - ctxt->input->cur;
3362
2.37k
            if (count > maxLength) {
3363
28
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3364
28
                return(NULL);
3365
28
            }
3366
2.34k
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3367
2.34k
      ctxt->input->cur = in;
3368
2.34k
      ctxt->input->col += count;
3369
2.34k
      if (ret == NULL)
3370
0
          xmlErrMemory(ctxt, NULL);
3371
2.34k
      return(ret);
3372
2.37k
  }
3373
2.55k
    }
3374
    /* accelerator for special cases */
3375
21.3k
    return(xmlParseNameComplex(ctxt));
3376
23.7k
}
3377
3378
static const xmlChar *
3379
24.9k
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3380
24.9k
    int len = 0, l;
3381
24.9k
    int c;
3382
24.9k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3383
0
                    XML_MAX_TEXT_LENGTH :
3384
24.9k
                    XML_MAX_NAME_LENGTH;
3385
24.9k
    size_t startPosition = 0;
3386
3387
#ifdef DEBUG
3388
    nbParseNCNameComplex++;
3389
#endif
3390
3391
    /*
3392
     * Handler for more complex cases
3393
     */
3394
24.9k
    startPosition = CUR_PTR - BASE_PTR;
3395
24.9k
    c = CUR_CHAR(l);
3396
24.9k
    if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3397
24.9k
  (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3398
14.7k
  return(NULL);
3399
14.7k
    }
3400
3401
2.90M
    while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3402
2.90M
     (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3403
2.89M
        if (len <= INT_MAX - l)
3404
2.89M
      len += l;
3405
2.89M
  NEXTL(l);
3406
2.89M
  c = CUR_CHAR(l);
3407
2.89M
    }
3408
10.1k
    if (ctxt->instate == XML_PARSER_EOF)
3409
0
        return(NULL);
3410
10.1k
    if (len > maxLength) {
3411
9
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3412
9
        return(NULL);
3413
9
    }
3414
10.1k
    return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
3415
10.1k
}
3416
3417
/**
3418
 * xmlParseNCName:
3419
 * @ctxt:  an XML parser context
3420
 * @len:  length of the string parsed
3421
 *
3422
 * parse an XML name.
3423
 *
3424
 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3425
 *                      CombiningChar | Extender
3426
 *
3427
 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3428
 *
3429
 * Returns the Name parsed or NULL
3430
 */
3431
3432
static const xmlChar *
3433
71.7k
xmlParseNCName(xmlParserCtxtPtr ctxt) {
3434
71.7k
    const xmlChar *in, *e;
3435
71.7k
    const xmlChar *ret;
3436
71.7k
    size_t count = 0;
3437
71.7k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3438
0
                       XML_MAX_TEXT_LENGTH :
3439
71.7k
                       XML_MAX_NAME_LENGTH;
3440
3441
#ifdef DEBUG
3442
    nbParseNCName++;
3443
#endif
3444
3445
    /*
3446
     * Accelerator for simple ASCII names
3447
     */
3448
71.7k
    in = ctxt->input->cur;
3449
71.7k
    e = ctxt->input->end;
3450
71.7k
    if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3451
71.7k
   ((*in >= 0x41) && (*in <= 0x5A)) ||
3452
71.7k
   (*in == '_')) && (in < e)) {
3453
48.8k
  in++;
3454
2.80M
  while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3455
2.80M
          ((*in >= 0x41) && (*in <= 0x5A)) ||
3456
2.80M
          ((*in >= 0x30) && (*in <= 0x39)) ||
3457
2.80M
          (*in == '_') || (*in == '-') ||
3458
2.80M
          (*in == '.')) && (in < e))
3459
2.75M
      in++;
3460
48.8k
  if (in >= e)
3461
2
      goto complex;
3462
48.8k
  if ((*in > 0) && (*in < 0x80)) {
3463
46.8k
      count = in - ctxt->input->cur;
3464
46.8k
            if (count > maxLength) {
3465
7
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3466
7
                return(NULL);
3467
7
            }
3468
46.8k
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3469
46.8k
      ctxt->input->cur = in;
3470
46.8k
      ctxt->input->col += count;
3471
46.8k
      if (ret == NULL) {
3472
0
          xmlErrMemory(ctxt, NULL);
3473
0
      }
3474
46.8k
      return(ret);
3475
46.8k
  }
3476
48.8k
    }
3477
24.9k
complex:
3478
24.9k
    return(xmlParseNCNameComplex(ctxt));
3479
71.7k
}
3480
3481
/**
3482
 * xmlParseNameAndCompare:
3483
 * @ctxt:  an XML parser context
3484
 *
3485
 * parse an XML name and compares for match
3486
 * (specialized for endtag parsing)
3487
 *
3488
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3489
 * and the name for mismatch
3490
 */
3491
3492
static const xmlChar *
3493
168
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3494
168
    register const xmlChar *cmp = other;
3495
168
    register const xmlChar *in;
3496
168
    const xmlChar *ret;
3497
3498
168
    GROW;
3499
168
    if (ctxt->instate == XML_PARSER_EOF)
3500
0
        return(NULL);
3501
3502
168
    in = ctxt->input->cur;
3503
262
    while (*in != 0 && *in == *cmp) {
3504
94
  ++in;
3505
94
  ++cmp;
3506
94
    }
3507
168
    if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3508
  /* success */
3509
53
  ctxt->input->col += in - ctxt->input->cur;
3510
53
  ctxt->input->cur = in;
3511
53
  return (const xmlChar*) 1;
3512
53
    }
3513
    /* failure (or end of input buffer), check with full function */
3514
115
    ret = xmlParseName (ctxt);
3515
    /* strings coming from the dictionary direct compare possible */
3516
115
    if (ret == other) {
3517
2
  return (const xmlChar*) 1;
3518
2
    }
3519
113
    return ret;
3520
115
}
3521
3522
/**
3523
 * xmlParseStringName:
3524
 * @ctxt:  an XML parser context
3525
 * @str:  a pointer to the string pointer (IN/OUT)
3526
 *
3527
 * parse an XML name.
3528
 *
3529
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3530
 *                  CombiningChar | Extender
3531
 *
3532
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3533
 *
3534
 * [6] Names ::= Name (#x20 Name)*
3535
 *
3536
 * Returns the Name parsed or NULL. The @str pointer
3537
 * is updated to the current location in the string.
3538
 */
3539
3540
static xmlChar *
3541
0
xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3542
0
    xmlChar buf[XML_MAX_NAMELEN + 5];
3543
0
    const xmlChar *cur = *str;
3544
0
    int len = 0, l;
3545
0
    int c;
3546
0
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3547
0
                    XML_MAX_TEXT_LENGTH :
3548
0
                    XML_MAX_NAME_LENGTH;
3549
3550
#ifdef DEBUG
3551
    nbParseStringName++;
3552
#endif
3553
3554
0
    c = CUR_SCHAR(cur, l);
3555
0
    if (!xmlIsNameStartChar(ctxt, c)) {
3556
0
  return(NULL);
3557
0
    }
3558
3559
0
    COPY_BUF(l,buf,len,c);
3560
0
    cur += l;
3561
0
    c = CUR_SCHAR(cur, l);
3562
0
    while (xmlIsNameChar(ctxt, c)) {
3563
0
  COPY_BUF(l,buf,len,c);
3564
0
  cur += l;
3565
0
  c = CUR_SCHAR(cur, l);
3566
0
  if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3567
      /*
3568
       * Okay someone managed to make a huge name, so he's ready to pay
3569
       * for the processing speed.
3570
       */
3571
0
      xmlChar *buffer;
3572
0
      int max = len * 2;
3573
3574
0
      buffer = (xmlChar *) xmlMallocAtomic(max);
3575
0
      if (buffer == NULL) {
3576
0
          xmlErrMemory(ctxt, NULL);
3577
0
    return(NULL);
3578
0
      }
3579
0
      memcpy(buffer, buf, len);
3580
0
      while (xmlIsNameChar(ctxt, c)) {
3581
0
    if (len + 10 > max) {
3582
0
        xmlChar *tmp;
3583
3584
0
        max *= 2;
3585
0
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3586
0
        if (tmp == NULL) {
3587
0
      xmlErrMemory(ctxt, NULL);
3588
0
      xmlFree(buffer);
3589
0
      return(NULL);
3590
0
        }
3591
0
        buffer = tmp;
3592
0
    }
3593
0
    COPY_BUF(l,buffer,len,c);
3594
0
    cur += l;
3595
0
    c = CUR_SCHAR(cur, l);
3596
0
                if (len > maxLength) {
3597
0
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3598
0
                    xmlFree(buffer);
3599
0
                    return(NULL);
3600
0
                }
3601
0
      }
3602
0
      buffer[len] = 0;
3603
0
      *str = cur;
3604
0
      return(buffer);
3605
0
  }
3606
0
    }
3607
0
    if (len > maxLength) {
3608
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3609
0
        return(NULL);
3610
0
    }
3611
0
    *str = cur;
3612
0
    return(xmlStrndup(buf, len));
3613
0
}
3614
3615
/**
3616
 * xmlParseNmtoken:
3617
 * @ctxt:  an XML parser context
3618
 *
3619
 * DEPRECATED: Internal function, don't use.
3620
 *
3621
 * parse an XML Nmtoken.
3622
 *
3623
 * [7] Nmtoken ::= (NameChar)+
3624
 *
3625
 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3626
 *
3627
 * Returns the Nmtoken parsed or NULL
3628
 */
3629
3630
xmlChar *
3631
1.69k
xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3632
1.69k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3633
1.69k
    int len = 0, l;
3634
1.69k
    int c;
3635
1.69k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3636
0
                    XML_MAX_TEXT_LENGTH :
3637
1.69k
                    XML_MAX_NAME_LENGTH;
3638
3639
#ifdef DEBUG
3640
    nbParseNmToken++;
3641
#endif
3642
3643
1.69k
    c = CUR_CHAR(l);
3644
3645
3.81k
    while (xmlIsNameChar(ctxt, c)) {
3646
2.14k
  COPY_BUF(l,buf,len,c);
3647
2.14k
  NEXTL(l);
3648
2.14k
  c = CUR_CHAR(l);
3649
2.14k
  if (len >= XML_MAX_NAMELEN) {
3650
      /*
3651
       * Okay someone managed to make a huge token, so he's ready to pay
3652
       * for the processing speed.
3653
       */
3654
17
      xmlChar *buffer;
3655
17
      int max = len * 2;
3656
3657
17
      buffer = (xmlChar *) xmlMallocAtomic(max);
3658
17
      if (buffer == NULL) {
3659
0
          xmlErrMemory(ctxt, NULL);
3660
0
    return(NULL);
3661
0
      }
3662
17
      memcpy(buffer, buf, len);
3663
241k
      while (xmlIsNameChar(ctxt, c)) {
3664
240k
    if (len + 10 > max) {
3665
87
        xmlChar *tmp;
3666
3667
87
        max *= 2;
3668
87
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3669
87
        if (tmp == NULL) {
3670
0
      xmlErrMemory(ctxt, NULL);
3671
0
      xmlFree(buffer);
3672
0
      return(NULL);
3673
0
        }
3674
87
        buffer = tmp;
3675
87
    }
3676
240k
    COPY_BUF(l,buffer,len,c);
3677
240k
                if (len > maxLength) {
3678
2
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3679
2
                    xmlFree(buffer);
3680
2
                    return(NULL);
3681
2
                }
3682
240k
    NEXTL(l);
3683
240k
    c = CUR_CHAR(l);
3684
240k
      }
3685
15
      buffer[len] = 0;
3686
15
            if (ctxt->instate == XML_PARSER_EOF) {
3687
2
                xmlFree(buffer);
3688
2
                return(NULL);
3689
2
            }
3690
13
      return(buffer);
3691
15
  }
3692
2.14k
    }
3693
1.67k
    if (ctxt->instate == XML_PARSER_EOF)
3694
0
        return(NULL);
3695
1.67k
    if (len == 0)
3696
1.56k
        return(NULL);
3697
112
    if (len > maxLength) {
3698
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3699
0
        return(NULL);
3700
0
    }
3701
112
    return(xmlStrndup(buf, len));
3702
112
}
3703
3704
/**
3705
 * xmlParseEntityValue:
3706
 * @ctxt:  an XML parser context
3707
 * @orig:  if non-NULL store a copy of the original entity value
3708
 *
3709
 * DEPRECATED: Internal function, don't use.
3710
 *
3711
 * parse a value for ENTITY declarations
3712
 *
3713
 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3714
 *                 "'" ([^%&'] | PEReference | Reference)* "'"
3715
 *
3716
 * Returns the EntityValue parsed with reference substituted or NULL
3717
 */
3718
3719
xmlChar *
3720
0
xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3721
0
    xmlChar *buf = NULL;
3722
0
    int len = 0;
3723
0
    int size = XML_PARSER_BUFFER_SIZE;
3724
0
    int c, l;
3725
0
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3726
0
                    XML_MAX_HUGE_LENGTH :
3727
0
                    XML_MAX_TEXT_LENGTH;
3728
0
    xmlChar stop;
3729
0
    xmlChar *ret = NULL;
3730
0
    const xmlChar *cur = NULL;
3731
0
    xmlParserInputPtr input;
3732
3733
0
    if (RAW == '"') stop = '"';
3734
0
    else if (RAW == '\'') stop = '\'';
3735
0
    else {
3736
0
  xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3737
0
  return(NULL);
3738
0
    }
3739
0
    buf = (xmlChar *) xmlMallocAtomic(size);
3740
0
    if (buf == NULL) {
3741
0
  xmlErrMemory(ctxt, NULL);
3742
0
  return(NULL);
3743
0
    }
3744
3745
    /*
3746
     * The content of the entity definition is copied in a buffer.
3747
     */
3748
3749
0
    ctxt->instate = XML_PARSER_ENTITY_VALUE;
3750
0
    input = ctxt->input;
3751
0
    GROW;
3752
0
    if (ctxt->instate == XML_PARSER_EOF)
3753
0
        goto error;
3754
0
    NEXT;
3755
0
    c = CUR_CHAR(l);
3756
    /*
3757
     * NOTE: 4.4.5 Included in Literal
3758
     * When a parameter entity reference appears in a literal entity
3759
     * value, ... a single or double quote character in the replacement
3760
     * text is always treated as a normal data character and will not
3761
     * terminate the literal.
3762
     * In practice it means we stop the loop only when back at parsing
3763
     * the initial entity and the quote is found
3764
     */
3765
0
    while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3766
0
      (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3767
0
  if (len + 5 >= size) {
3768
0
      xmlChar *tmp;
3769
3770
0
      size *= 2;
3771
0
      tmp = (xmlChar *) xmlRealloc(buf, size);
3772
0
      if (tmp == NULL) {
3773
0
    xmlErrMemory(ctxt, NULL);
3774
0
                goto error;
3775
0
      }
3776
0
      buf = tmp;
3777
0
  }
3778
0
  COPY_BUF(l,buf,len,c);
3779
0
  NEXTL(l);
3780
3781
0
  GROW;
3782
0
  c = CUR_CHAR(l);
3783
0
  if (c == 0) {
3784
0
      GROW;
3785
0
      c = CUR_CHAR(l);
3786
0
  }
3787
3788
0
        if (len > maxLength) {
3789
0
            xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
3790
0
                           "entity value too long\n");
3791
0
            goto error;
3792
0
        }
3793
0
    }
3794
0
    buf[len] = 0;
3795
0
    if (ctxt->instate == XML_PARSER_EOF)
3796
0
        goto error;
3797
0
    if (c != stop) {
3798
0
        xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3799
0
        goto error;
3800
0
    }
3801
0
    NEXT;
3802
3803
    /*
3804
     * Raise problem w.r.t. '&' and '%' being used in non-entities
3805
     * reference constructs. Note Charref will be handled in
3806
     * xmlStringDecodeEntities()
3807
     */
3808
0
    cur = buf;
3809
0
    while (*cur != 0) { /* non input consuming */
3810
0
  if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3811
0
      xmlChar *name;
3812
0
      xmlChar tmp = *cur;
3813
0
            int nameOk = 0;
3814
3815
0
      cur++;
3816
0
      name = xmlParseStringName(ctxt, &cur);
3817
0
            if (name != NULL) {
3818
0
                nameOk = 1;
3819
0
                xmlFree(name);
3820
0
            }
3821
0
            if ((nameOk == 0) || (*cur != ';')) {
3822
0
    xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3823
0
      "EntityValue: '%c' forbidden except for entities references\n",
3824
0
                            tmp);
3825
0
                goto error;
3826
0
      }
3827
0
      if ((tmp == '%') && (ctxt->inSubset == 1) &&
3828
0
    (ctxt->inputNr == 1)) {
3829
0
    xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3830
0
                goto error;
3831
0
      }
3832
0
      if (*cur == 0)
3833
0
          break;
3834
0
  }
3835
0
  cur++;
3836
0
    }
3837
3838
    /*
3839
     * Then PEReference entities are substituted.
3840
     *
3841
     * NOTE: 4.4.7 Bypassed
3842
     * When a general entity reference appears in the EntityValue in
3843
     * an entity declaration, it is bypassed and left as is.
3844
     * so XML_SUBSTITUTE_REF is not set here.
3845
     */
3846
0
    ++ctxt->depth;
3847
0
    ret = xmlStringDecodeEntitiesInt(ctxt, buf, len, XML_SUBSTITUTE_PEREF,
3848
0
                                     0, 0, 0, /* check */ 1);
3849
0
    --ctxt->depth;
3850
3851
0
    if (orig != NULL) {
3852
0
        *orig = buf;
3853
0
        buf = NULL;
3854
0
    }
3855
3856
0
error:
3857
0
    if (buf != NULL)
3858
0
        xmlFree(buf);
3859
0
    return(ret);
3860
0
}
3861
3862
/**
3863
 * xmlParseAttValueComplex:
3864
 * @ctxt:  an XML parser context
3865
 * @len:   the resulting attribute len
3866
 * @normalize:  whether to apply the inner normalization
3867
 *
3868
 * parse a value for an attribute, this is the fallback function
3869
 * of xmlParseAttValue() when the attribute parsing requires handling
3870
 * of non-ASCII characters, or normalization compaction.
3871
 *
3872
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3873
 */
3874
static xmlChar *
3875
5.48k
xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3876
5.48k
    xmlChar limit = 0;
3877
5.48k
    xmlChar *buf = NULL;
3878
5.48k
    xmlChar *rep = NULL;
3879
5.48k
    size_t len = 0;
3880
5.48k
    size_t buf_size = 0;
3881
5.48k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3882
0
                       XML_MAX_HUGE_LENGTH :
3883
5.48k
                       XML_MAX_TEXT_LENGTH;
3884
5.48k
    int c, l, in_space = 0;
3885
5.48k
    xmlChar *current = NULL;
3886
5.48k
    xmlEntityPtr ent;
3887
3888
5.48k
    if (NXT(0) == '"') {
3889
15
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3890
15
  limit = '"';
3891
15
        NEXT;
3892
5.47k
    } else if (NXT(0) == '\'') {
3893
5.47k
  limit = '\'';
3894
5.47k
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3895
5.47k
        NEXT;
3896
5.47k
    } else {
3897
0
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3898
0
  return(NULL);
3899
0
    }
3900
3901
    /*
3902
     * allocate a translation buffer.
3903
     */
3904
5.48k
    buf_size = XML_PARSER_BUFFER_SIZE;
3905
5.48k
    buf = (xmlChar *) xmlMallocAtomic(buf_size);
3906
5.48k
    if (buf == NULL) goto mem_error;
3907
3908
    /*
3909
     * OK loop until we reach one of the ending char or a size limit.
3910
     */
3911
5.48k
    c = CUR_CHAR(l);
3912
3.98M
    while (((NXT(0) != limit) && /* checked */
3913
3.98M
            (IS_CHAR(c)) && (c != '<')) &&
3914
3.98M
            (ctxt->instate != XML_PARSER_EOF)) {
3915
3.97M
  if (c == '&') {
3916
1.03k
      in_space = 0;
3917
1.03k
      if (NXT(1) == '#') {
3918
0
    int val = xmlParseCharRef(ctxt);
3919
3920
0
    if (val == '&') {
3921
0
        if (ctxt->replaceEntities) {
3922
0
      if (len + 10 > buf_size) {
3923
0
          growBuffer(buf, 10);
3924
0
      }
3925
0
      buf[len++] = '&';
3926
0
        } else {
3927
      /*
3928
       * The reparsing will be done in xmlStringGetNodeList()
3929
       * called by the attribute() function in SAX.c
3930
       */
3931
0
      if (len + 10 > buf_size) {
3932
0
          growBuffer(buf, 10);
3933
0
      }
3934
0
      buf[len++] = '&';
3935
0
      buf[len++] = '#';
3936
0
      buf[len++] = '3';
3937
0
      buf[len++] = '8';
3938
0
      buf[len++] = ';';
3939
0
        }
3940
0
    } else if (val != 0) {
3941
0
        if (len + 10 > buf_size) {
3942
0
      growBuffer(buf, 10);
3943
0
        }
3944
0
        len += xmlCopyChar(0, &buf[len], val);
3945
0
    }
3946
1.03k
      } else {
3947
1.03k
    ent = xmlParseEntityRef(ctxt);
3948
1.03k
    if ((ent != NULL) &&
3949
1.03k
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3950
3
        if (len + 10 > buf_size) {
3951
0
      growBuffer(buf, 10);
3952
0
        }
3953
3
        if ((ctxt->replaceEntities == 0) &&
3954
3
            (ent->content[0] == '&')) {
3955
3
      buf[len++] = '&';
3956
3
      buf[len++] = '#';
3957
3
      buf[len++] = '3';
3958
3
      buf[len++] = '8';
3959
3
      buf[len++] = ';';
3960
3
        } else {
3961
0
      buf[len++] = ent->content[0];
3962
0
        }
3963
1.03k
    } else if ((ent != NULL) &&
3964
1.03k
               (ctxt->replaceEntities != 0)) {
3965
0
        if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3966
0
                        if (xmlParserEntityCheck(ctxt, ent->length))
3967
0
                            goto error;
3968
3969
0
      ++ctxt->depth;
3970
0
      rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
3971
0
                                ent->length, XML_SUBSTITUTE_REF, 0, 0, 0,
3972
0
                                /* check */ 1);
3973
0
      --ctxt->depth;
3974
0
      if (rep != NULL) {
3975
0
          current = rep;
3976
0
          while (*current != 0) { /* non input consuming */
3977
0
                                if ((*current == 0xD) || (*current == 0xA) ||
3978
0
                                    (*current == 0x9)) {
3979
0
                                    buf[len++] = 0x20;
3980
0
                                    current++;
3981
0
                                } else
3982
0
                                    buf[len++] = *current++;
3983
0
        if (len + 10 > buf_size) {
3984
0
            growBuffer(buf, 10);
3985
0
        }
3986
0
          }
3987
0
          xmlFree(rep);
3988
0
          rep = NULL;
3989
0
      }
3990
0
        } else {
3991
0
      if (len + 10 > buf_size) {
3992
0
          growBuffer(buf, 10);
3993
0
      }
3994
0
      if (ent->content != NULL)
3995
0
          buf[len++] = ent->content[0];
3996
0
        }
3997
1.03k
    } else if (ent != NULL) {
3998
0
        int i = xmlStrlen(ent->name);
3999
0
        const xmlChar *cur = ent->name;
4000
4001
        /*
4002
                     * We also check for recursion and amplification
4003
                     * when entities are not substituted. They're
4004
                     * often expanded later.
4005
         */
4006
0
        if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4007
0
      (ent->content != NULL)) {
4008
0
                        if ((ent->flags & XML_ENT_CHECKED) == 0) {
4009
0
                            unsigned long oldCopy = ctxt->sizeentcopy;
4010
4011
0
                            ctxt->sizeentcopy = ent->length;
4012
4013
0
                            ++ctxt->depth;
4014
0
                            rep = xmlStringDecodeEntitiesInt(ctxt,
4015
0
                                    ent->content, ent->length,
4016
0
                                    XML_SUBSTITUTE_REF, 0, 0, 0,
4017
0
                                    /* check */ 1);
4018
0
                            --ctxt->depth;
4019
4020
                            /*
4021
                             * If we're parsing DTD content, the entity
4022
                             * might reference other entities which
4023
                             * weren't defined yet, so the check isn't
4024
                             * reliable.
4025
                             */
4026
0
                            if (ctxt->inSubset == 0) {
4027
0
                                ent->flags |= XML_ENT_CHECKED;
4028
0
                                ent->expandedSize = ctxt->sizeentcopy;
4029
0
                            }
4030
4031
0
                            if (rep != NULL) {
4032
0
                                xmlFree(rep);
4033
0
                                rep = NULL;
4034
0
                            } else {
4035
0
                                ent->content[0] = 0;
4036
0
                            }
4037
4038
0
                            if (xmlParserEntityCheck(ctxt, oldCopy))
4039
0
                                goto error;
4040
0
                        } else {
4041
0
                            if (xmlParserEntityCheck(ctxt, ent->expandedSize))
4042
0
                                goto error;
4043
0
                        }
4044
0
        }
4045
4046
        /*
4047
         * Just output the reference
4048
         */
4049
0
        buf[len++] = '&';
4050
0
        while (len + i + 10 > buf_size) {
4051
0
      growBuffer(buf, i + 10);
4052
0
        }
4053
0
        for (;i > 0;i--)
4054
0
      buf[len++] = *cur++;
4055
0
        buf[len++] = ';';
4056
0
    }
4057
1.03k
      }
4058
3.97M
  } else {
4059
3.97M
      if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4060
535k
          if ((len != 0) || (!normalize)) {
4061
535k
        if ((!normalize) || (!in_space)) {
4062
535k
      COPY_BUF(l,buf,len,0x20);
4063
535k
      while (len + 10 > buf_size) {
4064
306
          growBuffer(buf, 10);
4065
306
      }
4066
535k
        }
4067
535k
        in_space = 1;
4068
535k
    }
4069
3.44M
      } else {
4070
3.44M
          in_space = 0;
4071
3.44M
    COPY_BUF(l,buf,len,c);
4072
3.44M
    if (len + 10 > buf_size) {
4073
698
        growBuffer(buf, 10);
4074
698
    }
4075
3.44M
      }
4076
3.97M
      NEXTL(l);
4077
3.97M
  }
4078
3.97M
  GROW;
4079
3.97M
  c = CUR_CHAR(l);
4080
3.97M
        if (len > maxLength) {
4081
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4082
0
                           "AttValue length too long\n");
4083
0
            goto mem_error;
4084
0
        }
4085
3.97M
    }
4086
5.48k
    if (ctxt->instate == XML_PARSER_EOF)
4087
0
        goto error;
4088
4089
5.48k
    if ((in_space) && (normalize)) {
4090
0
        while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4091
0
    }
4092
5.48k
    buf[len] = 0;
4093
5.48k
    if (RAW == '<') {
4094
3.76k
  xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4095
3.76k
    } else if (RAW != limit) {
4096
452
  if ((c != 0) && (!IS_CHAR(c))) {
4097
446
      xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4098
446
         "invalid character in attribute value\n");
4099
446
  } else {
4100
6
      xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4101
6
         "AttValue: ' expected\n");
4102
6
        }
4103
452
    } else
4104
1.27k
  NEXT;
4105
4106
5.48k
    if (attlen != NULL) *attlen = len;
4107
5.48k
    return(buf);
4108
4109
0
mem_error:
4110
0
    xmlErrMemory(ctxt, NULL);
4111
0
error:
4112
0
    if (buf != NULL)
4113
0
        xmlFree(buf);
4114
0
    if (rep != NULL)
4115
0
        xmlFree(rep);
4116
0
    return(NULL);
4117
0
}
4118
4119
/**
4120
 * xmlParseAttValue:
4121
 * @ctxt:  an XML parser context
4122
 *
4123
 * DEPRECATED: Internal function, don't use.
4124
 *
4125
 * parse a value for an attribute
4126
 * Note: the parser won't do substitution of entities here, this
4127
 * will be handled later in xmlStringGetNodeList
4128
 *
4129
 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4130
 *                   "'" ([^<&'] | Reference)* "'"
4131
 *
4132
 * 3.3.3 Attribute-Value Normalization:
4133
 * Before the value of an attribute is passed to the application or
4134
 * checked for validity, the XML processor must normalize it as follows:
4135
 * - a character reference is processed by appending the referenced
4136
 *   character to the attribute value
4137
 * - an entity reference is processed by recursively processing the
4138
 *   replacement text of the entity
4139
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4140
 *   appending #x20 to the normalized value, except that only a single
4141
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4142
 *   parsed entity or the literal entity value of an internal parsed entity
4143
 * - other characters are processed by appending them to the normalized value
4144
 * If the declared value is not CDATA, then the XML processor must further
4145
 * process the normalized attribute value by discarding any leading and
4146
 * trailing space (#x20) characters, and by replacing sequences of space
4147
 * (#x20) characters by a single space (#x20) character.
4148
 * All attributes for which no declaration has been read should be treated
4149
 * by a non-validating parser as if declared CDATA.
4150
 *
4151
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4152
 */
4153
4154
4155
xmlChar *
4156
0
xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4157
0
    if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4158
0
    return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4159
0
}
4160
4161
/**
4162
 * xmlParseSystemLiteral:
4163
 * @ctxt:  an XML parser context
4164
 *
4165
 * DEPRECATED: Internal function, don't use.
4166
 *
4167
 * parse an XML Literal
4168
 *
4169
 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4170
 *
4171
 * Returns the SystemLiteral parsed or NULL
4172
 */
4173
4174
xmlChar *
4175
0
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4176
0
    xmlChar *buf = NULL;
4177
0
    int len = 0;
4178
0
    int size = XML_PARSER_BUFFER_SIZE;
4179
0
    int cur, l;
4180
0
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4181
0
                    XML_MAX_TEXT_LENGTH :
4182
0
                    XML_MAX_NAME_LENGTH;
4183
0
    xmlChar stop;
4184
0
    int state = ctxt->instate;
4185
4186
0
    if (RAW == '"') {
4187
0
        NEXT;
4188
0
  stop = '"';
4189
0
    } else if (RAW == '\'') {
4190
0
        NEXT;
4191
0
  stop = '\'';
4192
0
    } else {
4193
0
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4194
0
  return(NULL);
4195
0
    }
4196
4197
0
    buf = (xmlChar *) xmlMallocAtomic(size);
4198
0
    if (buf == NULL) {
4199
0
        xmlErrMemory(ctxt, NULL);
4200
0
  return(NULL);
4201
0
    }
4202
0
    ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4203
0
    cur = CUR_CHAR(l);
4204
0
    while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4205
0
  if (len + 5 >= size) {
4206
0
      xmlChar *tmp;
4207
4208
0
      size *= 2;
4209
0
      tmp = (xmlChar *) xmlRealloc(buf, size);
4210
0
      if (tmp == NULL) {
4211
0
          xmlFree(buf);
4212
0
    xmlErrMemory(ctxt, NULL);
4213
0
    ctxt->instate = (xmlParserInputState) state;
4214
0
    return(NULL);
4215
0
      }
4216
0
      buf = tmp;
4217
0
  }
4218
0
  COPY_BUF(l,buf,len,cur);
4219
0
        if (len > maxLength) {
4220
0
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4221
0
            xmlFree(buf);
4222
0
            ctxt->instate = (xmlParserInputState) state;
4223
0
            return(NULL);
4224
0
        }
4225
0
  NEXTL(l);
4226
0
  cur = CUR_CHAR(l);
4227
0
    }
4228
0
    buf[len] = 0;
4229
0
    if (ctxt->instate == XML_PARSER_EOF) {
4230
0
        xmlFree(buf);
4231
0
        return(NULL);
4232
0
    }
4233
0
    ctxt->instate = (xmlParserInputState) state;
4234
0
    if (!IS_CHAR(cur)) {
4235
0
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4236
0
    } else {
4237
0
  NEXT;
4238
0
    }
4239
0
    return(buf);
4240
0
}
4241
4242
/**
4243
 * xmlParsePubidLiteral:
4244
 * @ctxt:  an XML parser context
4245
 *
4246
 * DEPRECATED: Internal function, don't use.
4247
 *
4248
 * parse an XML public literal
4249
 *
4250
 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4251
 *
4252
 * Returns the PubidLiteral parsed or NULL.
4253
 */
4254
4255
xmlChar *
4256
0
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4257
0
    xmlChar *buf = NULL;
4258
0
    int len = 0;
4259
0
    int size = XML_PARSER_BUFFER_SIZE;
4260
0
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4261
0
                    XML_MAX_TEXT_LENGTH :
4262
0
                    XML_MAX_NAME_LENGTH;
4263
0
    xmlChar cur;
4264
0
    xmlChar stop;
4265
0
    xmlParserInputState oldstate = ctxt->instate;
4266
4267
0
    if (RAW == '"') {
4268
0
        NEXT;
4269
0
  stop = '"';
4270
0
    } else if (RAW == '\'') {
4271
0
        NEXT;
4272
0
  stop = '\'';
4273
0
    } else {
4274
0
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4275
0
  return(NULL);
4276
0
    }
4277
0
    buf = (xmlChar *) xmlMallocAtomic(size);
4278
0
    if (buf == NULL) {
4279
0
  xmlErrMemory(ctxt, NULL);
4280
0
  return(NULL);
4281
0
    }
4282
0
    ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4283
0
    cur = CUR;
4284
0
    while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4285
0
  if (len + 1 >= size) {
4286
0
      xmlChar *tmp;
4287
4288
0
      size *= 2;
4289
0
      tmp = (xmlChar *) xmlRealloc(buf, size);
4290
0
      if (tmp == NULL) {
4291
0
    xmlErrMemory(ctxt, NULL);
4292
0
    xmlFree(buf);
4293
0
    return(NULL);
4294
0
      }
4295
0
      buf = tmp;
4296
0
  }
4297
0
  buf[len++] = cur;
4298
0
        if (len > maxLength) {
4299
0
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4300
0
            xmlFree(buf);
4301
0
            return(NULL);
4302
0
        }
4303
0
  NEXT;
4304
0
  cur = CUR;
4305
0
    }
4306
0
    buf[len] = 0;
4307
0
    if (ctxt->instate == XML_PARSER_EOF) {
4308
0
        xmlFree(buf);
4309
0
        return(NULL);
4310
0
    }
4311
0
    if (cur != stop) {
4312
0
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4313
0
    } else {
4314
0
  NEXTL(1);
4315
0
    }
4316
0
    ctxt->instate = oldstate;
4317
0
    return(buf);
4318
0
}
4319
4320
static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt);
4321
4322
/*
4323
 * used for the test in the inner loop of the char data testing
4324
 */
4325
static const unsigned char test_char_data[256] = {
4326
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4327
    0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4328
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4329
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4330
    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4331
    0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4332
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4333
    0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4334
    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4335
    0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4336
    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4337
    0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4338
    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4339
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4340
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4341
    0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4342
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4343
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4344
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4345
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4346
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4347
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4348
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4349
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4350
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4351
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4352
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4353
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4354
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4355
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4356
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4357
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4358
};
4359
4360
/**
4361
 * xmlParseCharData:
4362
 * @ctxt:  an XML parser context
4363
 * @cdata:  unused
4364
 *
4365
 * DEPRECATED: Internal function, don't use.
4366
 *
4367
 * Parse character data. Always makes progress if the first char isn't
4368
 * '<' or '&'.
4369
 *
4370
 * if we are within a CDATA section ']]>' marks an end of section.
4371
 *
4372
 * The right angle bracket (>) may be represented using the string "&gt;",
4373
 * and must, for compatibility, be escaped using "&gt;" or a character
4374
 * reference when it appears in the string "]]>" in content, when that
4375
 * string is not marking the end of a CDATA section.
4376
 *
4377
 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4378
 */
4379
4380
void
4381
174k
xmlParseCharData(xmlParserCtxtPtr ctxt, ATTRIBUTE_UNUSED int cdata) {
4382
174k
    const xmlChar *in;
4383
174k
    int nbchar = 0;
4384
174k
    int line = ctxt->input->line;
4385
174k
    int col = ctxt->input->col;
4386
174k
    int ccol;
4387
4388
174k
    GROW;
4389
    /*
4390
     * Accelerated common case where input don't need to be
4391
     * modified before passing it to the handler.
4392
     */
4393
174k
    in = ctxt->input->cur;
4394
174k
    do {
4395
174k
get_more_space:
4396
176k
        while (*in == 0x20) { in++; ctxt->input->col++; }
4397
174k
        if (*in == 0xA) {
4398
71.8k
            do {
4399
71.8k
                ctxt->input->line++; ctxt->input->col = 1;
4400
71.8k
                in++;
4401
71.8k
            } while (*in == 0xA);
4402
366
            goto get_more_space;
4403
366
        }
4404
174k
        if (*in == '<') {
4405
168
            nbchar = in - ctxt->input->cur;
4406
168
            if (nbchar > 0) {
4407
168
                const xmlChar *tmp = ctxt->input->cur;
4408
168
                ctxt->input->cur = in;
4409
4410
168
                if ((ctxt->sax != NULL) &&
4411
168
                    (ctxt->sax->ignorableWhitespace !=
4412
168
                     ctxt->sax->characters)) {
4413
0
                    if (areBlanks(ctxt, tmp, nbchar, 1)) {
4414
0
                        if (ctxt->sax->ignorableWhitespace != NULL)
4415
0
                            ctxt->sax->ignorableWhitespace(ctxt->userData,
4416
0
                                                   tmp, nbchar);
4417
0
                    } else {
4418
0
                        if (ctxt->sax->characters != NULL)
4419
0
                            ctxt->sax->characters(ctxt->userData,
4420
0
                                                  tmp, nbchar);
4421
0
                        if (*ctxt->space == -1)
4422
0
                            *ctxt->space = -2;
4423
0
                    }
4424
168
                } else if ((ctxt->sax != NULL) &&
4425
168
                           (ctxt->sax->characters != NULL)) {
4426
168
                    ctxt->sax->characters(ctxt->userData,
4427
168
                                          tmp, nbchar);
4428
168
                }
4429
168
            }
4430
168
            return;
4431
168
        }
4432
4433
175k
get_more:
4434
175k
        ccol = ctxt->input->col;
4435
5.23M
        while (test_char_data[*in]) {
4436
5.06M
            in++;
4437
5.06M
            ccol++;
4438
5.06M
        }
4439
175k
        ctxt->input->col = ccol;
4440
175k
        if (*in == 0xA) {
4441
134k
            do {
4442
134k
                ctxt->input->line++; ctxt->input->col = 1;
4443
134k
                in++;
4444
134k
            } while (*in == 0xA);
4445
675
            goto get_more;
4446
675
        }
4447
175k
        if (*in == ']') {
4448
1.15k
            if ((in[1] == ']') && (in[2] == '>')) {
4449
4
                xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4450
4
                ctxt->input->cur = in + 1;
4451
4
                return;
4452
4
            }
4453
1.14k
            in++;
4454
1.14k
            ctxt->input->col++;
4455
1.14k
            goto get_more;
4456
1.15k
        }
4457
173k
        nbchar = in - ctxt->input->cur;
4458
173k
        if (nbchar > 0) {
4459
22.3k
            if ((ctxt->sax != NULL) &&
4460
22.3k
                (ctxt->sax->ignorableWhitespace !=
4461
22.3k
                 ctxt->sax->characters) &&
4462
22.3k
                (IS_BLANK_CH(*ctxt->input->cur))) {
4463
0
                const xmlChar *tmp = ctxt->input->cur;
4464
0
                ctxt->input->cur = in;
4465
4466
0
                if (areBlanks(ctxt, tmp, nbchar, 0)) {
4467
0
                    if (ctxt->sax->ignorableWhitespace != NULL)
4468
0
                        ctxt->sax->ignorableWhitespace(ctxt->userData,
4469
0
                                                       tmp, nbchar);
4470
0
                } else {
4471
0
                    if (ctxt->sax->characters != NULL)
4472
0
                        ctxt->sax->characters(ctxt->userData,
4473
0
                                              tmp, nbchar);
4474
0
                    if (*ctxt->space == -1)
4475
0
                        *ctxt->space = -2;
4476
0
                }
4477
0
                line = ctxt->input->line;
4478
0
                col = ctxt->input->col;
4479
22.3k
            } else if (ctxt->sax != NULL) {
4480
22.3k
                if (ctxt->sax->characters != NULL)
4481
22.3k
                    ctxt->sax->characters(ctxt->userData,
4482
22.3k
                                          ctxt->input->cur, nbchar);
4483
22.3k
                line = ctxt->input->line;
4484
22.3k
                col = ctxt->input->col;
4485
22.3k
            }
4486
22.3k
        }
4487
173k
        ctxt->input->cur = in;
4488
173k
        if (*in == 0xD) {
4489
57
            in++;
4490
57
            if (*in == 0xA) {
4491
0
                ctxt->input->cur = in;
4492
0
                in++;
4493
0
                ctxt->input->line++; ctxt->input->col = 1;
4494
0
                continue; /* while */
4495
0
            }
4496
57
            in--;
4497
57
        }
4498
173k
        if (*in == '<') {
4499
8.70k
            return;
4500
8.70k
        }
4501
165k
        if (*in == '&') {
4502
5.56k
            return;
4503
5.56k
        }
4504
159k
        SHRINK;
4505
159k
        GROW;
4506
159k
        if (ctxt->instate == XML_PARSER_EOF)
4507
0
            return;
4508
159k
        in = ctxt->input->cur;
4509
159k
    } while (((*in >= 0x20) && (*in <= 0x7F)) ||
4510
159k
             (*in == 0x09) || (*in == 0x0a));
4511
159k
    ctxt->input->line = line;
4512
159k
    ctxt->input->col = col;
4513
159k
    xmlParseCharDataComplex(ctxt);
4514
159k
}
4515
4516
/**
4517
 * xmlParseCharDataComplex:
4518
 * @ctxt:  an XML parser context
4519
 * @cdata:  int indicating whether we are within a CDATA section
4520
 *
4521
 * Always makes progress if the first char isn't '<' or '&'.
4522
 *
4523
 * parse a CharData section.this is the fallback function
4524
 * of xmlParseCharData() when the parsing requires handling
4525
 * of non-ASCII characters.
4526
 */
4527
static void
4528
159k
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt) {
4529
159k
    xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4530
159k
    int nbchar = 0;
4531
159k
    int cur, l;
4532
4533
159k
    cur = CUR_CHAR(l);
4534
10.8M
    while ((cur != '<') && /* checked */
4535
10.8M
           (cur != '&') &&
4536
10.8M
     (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4537
10.6M
  if ((cur == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4538
0
      xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4539
0
  }
4540
10.6M
  COPY_BUF(l,buf,nbchar,cur);
4541
  /* move current position before possible calling of ctxt->sax->characters */
4542
10.6M
  NEXTL(l);
4543
10.6M
  if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4544
65.4k
      buf[nbchar] = 0;
4545
4546
      /*
4547
       * OK the segment is to be consumed as chars.
4548
       */
4549
65.4k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4550
65.4k
    if (areBlanks(ctxt, buf, nbchar, 0)) {
4551
0
        if (ctxt->sax->ignorableWhitespace != NULL)
4552
0
      ctxt->sax->ignorableWhitespace(ctxt->userData,
4553
0
                                     buf, nbchar);
4554
65.4k
    } else {
4555
65.4k
        if (ctxt->sax->characters != NULL)
4556
65.4k
      ctxt->sax->characters(ctxt->userData, buf, nbchar);
4557
65.4k
        if ((ctxt->sax->characters !=
4558
65.4k
             ctxt->sax->ignorableWhitespace) &&
4559
65.4k
      (*ctxt->space == -1))
4560
0
      *ctxt->space = -2;
4561
65.4k
    }
4562
65.4k
      }
4563
65.4k
      nbchar = 0;
4564
            /* something really bad happened in the SAX callback */
4565
65.4k
            if (ctxt->instate != XML_PARSER_CONTENT)
4566
0
                return;
4567
65.4k
            SHRINK;
4568
65.4k
  }
4569
10.6M
  cur = CUR_CHAR(l);
4570
10.6M
    }
4571
159k
    if (ctxt->instate == XML_PARSER_EOF)
4572
1
        return;
4573
159k
    if (nbchar != 0) {
4574
50.0k
        buf[nbchar] = 0;
4575
  /*
4576
   * OK the segment is to be consumed as chars.
4577
   */
4578
50.0k
  if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4579
50.0k
      if (areBlanks(ctxt, buf, nbchar, 0)) {
4580
0
    if (ctxt->sax->ignorableWhitespace != NULL)
4581
0
        ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4582
50.0k
      } else {
4583
50.0k
    if (ctxt->sax->characters != NULL)
4584
50.0k
        ctxt->sax->characters(ctxt->userData, buf, nbchar);
4585
50.0k
    if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4586
50.0k
        (*ctxt->space == -1))
4587
0
        *ctxt->space = -2;
4588
50.0k
      }
4589
50.0k
  }
4590
50.0k
    }
4591
159k
    if ((ctxt->input->cur < ctxt->input->end) && (!IS_CHAR(cur))) {
4592
  /* Generate the error and skip the offending character */
4593
136k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4594
136k
                          "PCDATA invalid Char value %d\n",
4595
136k
                    cur ? cur : CUR);
4596
136k
  NEXT;
4597
136k
    }
4598
159k
}
4599
4600
/**
4601
 * xmlParseExternalID:
4602
 * @ctxt:  an XML parser context
4603
 * @publicID:  a xmlChar** receiving PubidLiteral
4604
 * @strict: indicate whether we should restrict parsing to only
4605
 *          production [75], see NOTE below
4606
 *
4607
 * DEPRECATED: Internal function, don't use.
4608
 *
4609
 * Parse an External ID or a Public ID
4610
 *
4611
 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4612
 *       'PUBLIC' S PubidLiteral S SystemLiteral
4613
 *
4614
 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4615
 *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
4616
 *
4617
 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4618
 *
4619
 * Returns the function returns SystemLiteral and in the second
4620
 *                case publicID receives PubidLiteral, is strict is off
4621
 *                it is possible to return NULL and have publicID set.
4622
 */
4623
4624
xmlChar *
4625
0
xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4626
0
    xmlChar *URI = NULL;
4627
4628
0
    *publicID = NULL;
4629
0
    if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4630
0
        SKIP(6);
4631
0
  if (SKIP_BLANKS == 0) {
4632
0
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4633
0
                     "Space required after 'SYSTEM'\n");
4634
0
  }
4635
0
  URI = xmlParseSystemLiteral(ctxt);
4636
0
  if (URI == NULL) {
4637
0
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4638
0
        }
4639
0
    } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4640
0
        SKIP(6);
4641
0
  if (SKIP_BLANKS == 0) {
4642
0
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4643
0
        "Space required after 'PUBLIC'\n");
4644
0
  }
4645
0
  *publicID = xmlParsePubidLiteral(ctxt);
4646
0
  if (*publicID == NULL) {
4647
0
      xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4648
0
  }
4649
0
  if (strict) {
4650
      /*
4651
       * We don't handle [83] so "S SystemLiteral" is required.
4652
       */
4653
0
      if (SKIP_BLANKS == 0) {
4654
0
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4655
0
      "Space required after the Public Identifier\n");
4656
0
      }
4657
0
  } else {
4658
      /*
4659
       * We handle [83] so we return immediately, if
4660
       * "S SystemLiteral" is not detected. We skip blanks if no
4661
             * system literal was found, but this is harmless since we must
4662
             * be at the end of a NotationDecl.
4663
       */
4664
0
      if (SKIP_BLANKS == 0) return(NULL);
4665
0
      if ((CUR != '\'') && (CUR != '"')) return(NULL);
4666
0
  }
4667
0
  URI = xmlParseSystemLiteral(ctxt);
4668
0
  if (URI == NULL) {
4669
0
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4670
0
        }
4671
0
    }
4672
0
    return(URI);
4673
0
}
4674
4675
/**
4676
 * xmlParseCommentComplex:
4677
 * @ctxt:  an XML parser context
4678
 * @buf:  the already parsed part of the buffer
4679
 * @len:  number of bytes in the buffer
4680
 * @size:  allocated size of the buffer
4681
 *
4682
 * Skip an XML (SGML) comment <!-- .... -->
4683
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4684
 *  must not occur within comments. "
4685
 * This is the slow routine in case the accelerator for ascii didn't work
4686
 *
4687
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4688
 */
4689
static void
4690
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4691
553
                       size_t len, size_t size) {
4692
553
    int q, ql;
4693
553
    int r, rl;
4694
553
    int cur, l;
4695
553
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4696
0
                       XML_MAX_HUGE_LENGTH :
4697
553
                       XML_MAX_TEXT_LENGTH;
4698
553
    int inputid;
4699
4700
553
    inputid = ctxt->input->id;
4701
4702
553
    if (buf == NULL) {
4703
7
        len = 0;
4704
7
  size = XML_PARSER_BUFFER_SIZE;
4705
7
  buf = (xmlChar *) xmlMallocAtomic(size);
4706
7
  if (buf == NULL) {
4707
0
      xmlErrMemory(ctxt, NULL);
4708
0
      return;
4709
0
  }
4710
7
    }
4711
553
    q = CUR_CHAR(ql);
4712
553
    if (q == 0)
4713
2
        goto not_terminated;
4714
551
    if (!IS_CHAR(q)) {
4715
4
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4716
4
                          "xmlParseComment: invalid xmlChar value %d\n",
4717
4
                    q);
4718
4
  xmlFree (buf);
4719
4
  return;
4720
4
    }
4721
547
    NEXTL(ql);
4722
547
    r = CUR_CHAR(rl);
4723
547
    if (r == 0)
4724
0
        goto not_terminated;
4725
547
    if (!IS_CHAR(r)) {
4726
1
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4727
1
                          "xmlParseComment: invalid xmlChar value %d\n",
4728
1
                    r);
4729
1
  xmlFree (buf);
4730
1
  return;
4731
1
    }
4732
546
    NEXTL(rl);
4733
546
    cur = CUR_CHAR(l);
4734
546
    if (cur == 0)
4735
0
        goto not_terminated;
4736
913k
    while (IS_CHAR(cur) && /* checked */
4737
913k
           ((cur != '>') ||
4738
912k
      (r != '-') || (q != '-'))) {
4739
912k
  if ((r == '-') && (q == '-')) {
4740
50.0k
      xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4741
50.0k
  }
4742
912k
  if (len + 5 >= size) {
4743
35
      xmlChar *new_buf;
4744
35
            size_t new_size;
4745
4746
35
      new_size = size * 2;
4747
35
      new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4748
35
      if (new_buf == NULL) {
4749
0
    xmlFree (buf);
4750
0
    xmlErrMemory(ctxt, NULL);
4751
0
    return;
4752
0
      }
4753
35
      buf = new_buf;
4754
35
            size = new_size;
4755
35
  }
4756
912k
  COPY_BUF(ql,buf,len,q);
4757
912k
        if (len > maxLength) {
4758
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4759
0
                         "Comment too big found", NULL);
4760
0
            xmlFree (buf);
4761
0
            return;
4762
0
        }
4763
4764
912k
  q = r;
4765
912k
  ql = rl;
4766
912k
  r = cur;
4767
912k
  rl = l;
4768
4769
912k
  NEXTL(l);
4770
912k
  cur = CUR_CHAR(l);
4771
4772
912k
    }
4773
546
    buf[len] = 0;
4774
546
    if (ctxt->instate == XML_PARSER_EOF) {
4775
0
        xmlFree(buf);
4776
0
        return;
4777
0
    }
4778
546
    if (cur == 0) {
4779
4
  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4780
4
                       "Comment not terminated \n<!--%.50s\n", buf);
4781
542
    } else if (!IS_CHAR(cur)) {
4782
542
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4783
542
                          "xmlParseComment: invalid xmlChar value %d\n",
4784
542
                    cur);
4785
542
    } else {
4786
0
  if (inputid != ctxt->input->id) {
4787
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4788
0
               "Comment doesn't start and stop in the same"
4789
0
                           " entity\n");
4790
0
  }
4791
0
        NEXT;
4792
0
  if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4793
0
      (!ctxt->disableSAX))
4794
0
      ctxt->sax->comment(ctxt->userData, buf);
4795
0
    }
4796
546
    xmlFree(buf);
4797
546
    return;
4798
2
not_terminated:
4799
2
    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4800
2
       "Comment not terminated\n", NULL);
4801
2
    xmlFree(buf);
4802
2
    return;
4803
546
}
4804
4805
/**
4806
 * xmlParseComment:
4807
 * @ctxt:  an XML parser context
4808
 *
4809
 * DEPRECATED: Internal function, don't use.
4810
 *
4811
 * Parse an XML (SGML) comment. Always consumes '<!'.
4812
 *
4813
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4814
 *  must not occur within comments. "
4815
 *
4816
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4817
 */
4818
void
4819
622
xmlParseComment(xmlParserCtxtPtr ctxt) {
4820
622
    xmlChar *buf = NULL;
4821
622
    size_t size = XML_PARSER_BUFFER_SIZE;
4822
622
    size_t len = 0;
4823
622
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4824
0
                       XML_MAX_HUGE_LENGTH :
4825
622
                       XML_MAX_TEXT_LENGTH;
4826
622
    xmlParserInputState state;
4827
622
    const xmlChar *in;
4828
622
    size_t nbchar = 0;
4829
622
    int ccol;
4830
622
    int inputid;
4831
4832
    /*
4833
     * Check that there is a comment right here.
4834
     */
4835
622
    if ((RAW != '<') || (NXT(1) != '!'))
4836
0
        return;
4837
622
    SKIP(2);
4838
622
    if ((RAW != '-') || (NXT(1) != '-'))
4839
0
        return;
4840
622
    state = ctxt->instate;
4841
622
    ctxt->instate = XML_PARSER_COMMENT;
4842
622
    inputid = ctxt->input->id;
4843
622
    SKIP(2);
4844
622
    GROW;
4845
4846
    /*
4847
     * Accelerated common case where input don't need to be
4848
     * modified before passing it to the handler.
4849
     */
4850
622
    in = ctxt->input->cur;
4851
622
    do {
4852
622
  if (*in == 0xA) {
4853
0
      do {
4854
0
    ctxt->input->line++; ctxt->input->col = 1;
4855
0
    in++;
4856
0
      } while (*in == 0xA);
4857
0
  }
4858
1.51k
get_more:
4859
1.51k
        ccol = ctxt->input->col;
4860
236k
  while (((*in > '-') && (*in <= 0x7F)) ||
4861
236k
         ((*in >= 0x20) && (*in < '-')) ||
4862
236k
         (*in == 0x09)) {
4863
234k
        in++;
4864
234k
        ccol++;
4865
234k
  }
4866
1.51k
  ctxt->input->col = ccol;
4867
1.51k
  if (*in == 0xA) {
4868
0
      do {
4869
0
    ctxt->input->line++; ctxt->input->col = 1;
4870
0
    in++;
4871
0
      } while (*in == 0xA);
4872
0
      goto get_more;
4873
0
  }
4874
1.51k
  nbchar = in - ctxt->input->cur;
4875
  /*
4876
   * save current set of data
4877
   */
4878
1.51k
  if (nbchar > 0) {
4879
1.49k
      if ((ctxt->sax != NULL) &&
4880
1.49k
    (ctxt->sax->comment != NULL)) {
4881
1.49k
    if (buf == NULL) {
4882
612
        if ((*in == '-') && (in[1] == '-'))
4883
488
            size = nbchar + 1;
4884
124
        else
4885
124
            size = XML_PARSER_BUFFER_SIZE + nbchar;
4886
612
        buf = (xmlChar *) xmlMallocAtomic(size);
4887
612
        if (buf == NULL) {
4888
0
            xmlErrMemory(ctxt, NULL);
4889
0
      ctxt->instate = state;
4890
0
      return;
4891
0
        }
4892
612
        len = 0;
4893
885
    } else if (len + nbchar + 1 >= size) {
4894
426
        xmlChar *new_buf;
4895
426
        size  += len + nbchar + XML_PARSER_BUFFER_SIZE;
4896
426
        new_buf = (xmlChar *) xmlRealloc(buf, size);
4897
426
        if (new_buf == NULL) {
4898
0
            xmlFree (buf);
4899
0
      xmlErrMemory(ctxt, NULL);
4900
0
      ctxt->instate = state;
4901
0
      return;
4902
0
        }
4903
426
        buf = new_buf;
4904
426
    }
4905
1.49k
    memcpy(&buf[len], ctxt->input->cur, nbchar);
4906
1.49k
    len += nbchar;
4907
1.49k
    buf[len] = 0;
4908
1.49k
      }
4909
1.49k
  }
4910
1.51k
        if (len > maxLength) {
4911
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4912
0
                         "Comment too big found", NULL);
4913
0
            xmlFree (buf);
4914
0
            return;
4915
0
        }
4916
1.51k
  ctxt->input->cur = in;
4917
1.51k
  if (*in == 0xA) {
4918
0
      in++;
4919
0
      ctxt->input->line++; ctxt->input->col = 1;
4920
0
  }
4921
1.51k
  if (*in == 0xD) {
4922
113
      in++;
4923
113
      if (*in == 0xA) {
4924
0
    ctxt->input->cur = in;
4925
0
    in++;
4926
0
    ctxt->input->line++; ctxt->input->col = 1;
4927
0
    goto get_more;
4928
0
      }
4929
113
      in--;
4930
113
  }
4931
1.51k
  SHRINK;
4932
1.51k
  GROW;
4933
1.51k
        if (ctxt->instate == XML_PARSER_EOF) {
4934
0
            xmlFree(buf);
4935
0
            return;
4936
0
        }
4937
1.51k
  in = ctxt->input->cur;
4938
1.51k
  if (*in == '-') {
4939
959
      if (in[1] == '-') {
4940
504
          if (in[2] == '>') {
4941
69
        if (ctxt->input->id != inputid) {
4942
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4943
0
                     "comment doesn't start and stop in the"
4944
0
                                       " same entity\n");
4945
0
        }
4946
69
        SKIP(3);
4947
69
        if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4948
69
            (!ctxt->disableSAX)) {
4949
69
      if (buf != NULL)
4950
66
          ctxt->sax->comment(ctxt->userData, buf);
4951
3
      else
4952
3
          ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4953
69
        }
4954
69
        if (buf != NULL)
4955
66
            xmlFree(buf);
4956
69
        if (ctxt->instate != XML_PARSER_EOF)
4957
69
      ctxt->instate = state;
4958
69
        return;
4959
69
    }
4960
435
    if (buf != NULL) {
4961
430
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
4962
430
                          "Double hyphen within comment: "
4963
430
                                      "<!--%.50s\n",
4964
430
              buf);
4965
430
    } else
4966
5
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
4967
5
                          "Double hyphen within comment\n", NULL);
4968
435
                if (ctxt->instate == XML_PARSER_EOF) {
4969
0
                    xmlFree(buf);
4970
0
                    return;
4971
0
                }
4972
435
    in++;
4973
435
    ctxt->input->col++;
4974
435
      }
4975
890
      in++;
4976
890
      ctxt->input->col++;
4977
890
      goto get_more;
4978
959
  }
4979
1.51k
    } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
4980
553
    xmlParseCommentComplex(ctxt, buf, len, size);
4981
553
    ctxt->instate = state;
4982
553
    return;
4983
622
}
4984
4985
4986
/**
4987
 * xmlParsePITarget:
4988
 * @ctxt:  an XML parser context
4989
 *
4990
 * DEPRECATED: Internal function, don't use.
4991
 *
4992
 * parse the name of a PI
4993
 *
4994
 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4995
 *
4996
 * Returns the PITarget name or NULL
4997
 */
4998
4999
const xmlChar *
5000
1.91k
xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5001
1.91k
    const xmlChar *name;
5002
5003
1.91k
    name = xmlParseName(ctxt);
5004
1.91k
    if ((name != NULL) &&
5005
1.91k
        ((name[0] == 'x') || (name[0] == 'X')) &&
5006
1.91k
        ((name[1] == 'm') || (name[1] == 'M')) &&
5007
1.91k
        ((name[2] == 'l') || (name[2] == 'L'))) {
5008
0
  int i;
5009
0
  if ((name[0] == 'x') && (name[1] == 'm') &&
5010
0
      (name[2] == 'l') && (name[3] == 0)) {
5011
0
      xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5012
0
     "XML declaration allowed only at the start of the document\n");
5013
0
      return(name);
5014
0
  } else if (name[3] == 0) {
5015
0
      xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5016
0
      return(name);
5017
0
  }
5018
0
  for (i = 0;;i++) {
5019
0
      if (xmlW3CPIs[i] == NULL) break;
5020
0
      if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5021
0
          return(name);
5022
0
  }
5023
0
  xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5024
0
          "xmlParsePITarget: invalid name prefix 'xml'\n",
5025
0
          NULL, NULL);
5026
0
    }
5027
1.91k
    if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5028
1
  xmlNsErr(ctxt, XML_NS_ERR_COLON,
5029
1
     "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5030
1
    }
5031
1.91k
    return(name);
5032
1.91k
}
5033
5034
#ifdef LIBXML_CATALOG_ENABLED
5035
/**
5036
 * xmlParseCatalogPI:
5037
 * @ctxt:  an XML parser context
5038
 * @catalog:  the PI value string
5039
 *
5040
 * parse an XML Catalog Processing Instruction.
5041
 *
5042
 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5043
 *
5044
 * Occurs only if allowed by the user and if happening in the Misc
5045
 * part of the document before any doctype information
5046
 * This will add the given catalog to the parsing context in order
5047
 * to be used if there is a resolution need further down in the document
5048
 */
5049
5050
static void
5051
0
xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5052
0
    xmlChar *URL = NULL;
5053
0
    const xmlChar *tmp, *base;
5054
0
    xmlChar marker;
5055
5056
0
    tmp = catalog;
5057
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5058
0
    if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5059
0
  goto error;
5060
0
    tmp += 7;
5061
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5062
0
    if (*tmp != '=') {
5063
0
  return;
5064
0
    }
5065
0
    tmp++;
5066
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5067
0
    marker = *tmp;
5068
0
    if ((marker != '\'') && (marker != '"'))
5069
0
  goto error;
5070
0
    tmp++;
5071
0
    base = tmp;
5072
0
    while ((*tmp != 0) && (*tmp != marker)) tmp++;
5073
0
    if (*tmp == 0)
5074
0
  goto error;
5075
0
    URL = xmlStrndup(base, tmp - base);
5076
0
    tmp++;
5077
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5078
0
    if (*tmp != 0)
5079
0
  goto error;
5080
5081
0
    if (URL != NULL) {
5082
0
  ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5083
0
  xmlFree(URL);
5084
0
    }
5085
0
    return;
5086
5087
0
error:
5088
0
    xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5089
0
            "Catalog PI syntax error: %s\n",
5090
0
      catalog, NULL);
5091
0
    if (URL != NULL)
5092
0
  xmlFree(URL);
5093
0
}
5094
#endif
5095
5096
/**
5097
 * xmlParsePI:
5098
 * @ctxt:  an XML parser context
5099
 *
5100
 * DEPRECATED: Internal function, don't use.
5101
 *
5102
 * parse an XML Processing Instruction.
5103
 *
5104
 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5105
 *
5106
 * The processing is transferred to SAX once parsed.
5107
 */
5108
5109
void
5110
1.91k
xmlParsePI(xmlParserCtxtPtr ctxt) {
5111
1.91k
    xmlChar *buf = NULL;
5112
1.91k
    size_t len = 0;
5113
1.91k
    size_t size = XML_PARSER_BUFFER_SIZE;
5114
1.91k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5115
0
                       XML_MAX_HUGE_LENGTH :
5116
1.91k
                       XML_MAX_TEXT_LENGTH;
5117
1.91k
    int cur, l;
5118
1.91k
    const xmlChar *target;
5119
1.91k
    xmlParserInputState state;
5120
5121
1.91k
    if ((RAW == '<') && (NXT(1) == '?')) {
5122
1.91k
  int inputid = ctxt->input->id;
5123
1.91k
  state = ctxt->instate;
5124
1.91k
        ctxt->instate = XML_PARSER_PI;
5125
  /*
5126
   * this is a Processing Instruction.
5127
   */
5128
1.91k
  SKIP(2);
5129
5130
  /*
5131
   * Parse the target name and check for special support like
5132
   * namespace.
5133
   */
5134
1.91k
        target = xmlParsePITarget(ctxt);
5135
1.91k
  if (target != NULL) {
5136
1.79k
      if ((RAW == '?') && (NXT(1) == '>')) {
5137
68
    if (inputid != ctxt->input->id) {
5138
0
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5139
0
                             "PI declaration doesn't start and stop in"
5140
0
                                   " the same entity\n");
5141
0
    }
5142
68
    SKIP(2);
5143
5144
    /*
5145
     * SAX: PI detected.
5146
     */
5147
68
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5148
68
        (ctxt->sax->processingInstruction != NULL))
5149
68
        ctxt->sax->processingInstruction(ctxt->userData,
5150
68
                                         target, NULL);
5151
68
    if (ctxt->instate != XML_PARSER_EOF)
5152
68
        ctxt->instate = state;
5153
68
    return;
5154
68
      }
5155
1.73k
      buf = (xmlChar *) xmlMallocAtomic(size);
5156
1.73k
      if (buf == NULL) {
5157
0
    xmlErrMemory(ctxt, NULL);
5158
0
    ctxt->instate = state;
5159
0
    return;
5160
0
      }
5161
1.73k
      if (SKIP_BLANKS == 0) {
5162
1.72k
    xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5163
1.72k
        "ParsePI: PI %s space expected\n", target);
5164
1.72k
      }
5165
1.73k
      cur = CUR_CHAR(l);
5166
261k
      while (IS_CHAR(cur) && /* checked */
5167
261k
       ((cur != '?') || (NXT(1) != '>'))) {
5168
259k
    if (len + 5 >= size) {
5169
1.47k
        xmlChar *tmp;
5170
1.47k
                    size_t new_size = size * 2;
5171
1.47k
        tmp = (xmlChar *) xmlRealloc(buf, new_size);
5172
1.47k
        if (tmp == NULL) {
5173
0
      xmlErrMemory(ctxt, NULL);
5174
0
      xmlFree(buf);
5175
0
      ctxt->instate = state;
5176
0
      return;
5177
0
        }
5178
1.47k
        buf = tmp;
5179
1.47k
                    size = new_size;
5180
1.47k
    }
5181
259k
    COPY_BUF(l,buf,len,cur);
5182
259k
                if (len > maxLength) {
5183
0
                    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5184
0
                                      "PI %s too big found", target);
5185
0
                    xmlFree(buf);
5186
0
                    ctxt->instate = state;
5187
0
                    return;
5188
0
                }
5189
259k
    NEXTL(l);
5190
259k
    cur = CUR_CHAR(l);
5191
259k
      }
5192
1.73k
      buf[len] = 0;
5193
1.73k
            if (ctxt->instate == XML_PARSER_EOF) {
5194
0
                xmlFree(buf);
5195
0
                return;
5196
0
            }
5197
1.73k
      if (cur != '?') {
5198
1.70k
    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5199
1.70k
          "ParsePI: PI %s never end ...\n", target);
5200
1.70k
      } else {
5201
22
    if (inputid != ctxt->input->id) {
5202
0
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5203
0
                             "PI declaration doesn't start and stop in"
5204
0
                                   " the same entity\n");
5205
0
    }
5206
22
    SKIP(2);
5207
5208
22
#ifdef LIBXML_CATALOG_ENABLED
5209
22
    if (((state == XML_PARSER_MISC) ||
5210
22
               (state == XML_PARSER_START)) &&
5211
22
        (xmlStrEqual(target, XML_CATALOG_PI))) {
5212
0
        xmlCatalogAllow allow = xmlCatalogGetDefaults();
5213
0
        if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5214
0
      (allow == XML_CATA_ALLOW_ALL))
5215
0
      xmlParseCatalogPI(ctxt, buf);
5216
0
    }
5217
22
#endif
5218
5219
5220
    /*
5221
     * SAX: PI detected.
5222
     */
5223
22
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5224
22
        (ctxt->sax->processingInstruction != NULL))
5225
22
        ctxt->sax->processingInstruction(ctxt->userData,
5226
22
                                         target, buf);
5227
22
      }
5228
1.73k
      xmlFree(buf);
5229
1.73k
  } else {
5230
114
      xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5231
114
  }
5232
1.84k
  if (ctxt->instate != XML_PARSER_EOF)
5233
1.84k
      ctxt->instate = state;
5234
1.84k
    }
5235
1.91k
}
5236
5237
/**
5238
 * xmlParseNotationDecl:
5239
 * @ctxt:  an XML parser context
5240
 *
5241
 * DEPRECATED: Internal function, don't use.
5242
 *
5243
 * Parse a notation declaration. Always consumes '<!'.
5244
 *
5245
 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>'
5246
 *
5247
 * Hence there is actually 3 choices:
5248
 *     'PUBLIC' S PubidLiteral
5249
 *     'PUBLIC' S PubidLiteral S SystemLiteral
5250
 * and 'SYSTEM' S SystemLiteral
5251
 *
5252
 * See the NOTE on xmlParseExternalID().
5253
 */
5254
5255
void
5256
0
xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5257
0
    const xmlChar *name;
5258
0
    xmlChar *Pubid;
5259
0
    xmlChar *Systemid;
5260
5261
0
    if ((CUR != '<') || (NXT(1) != '!'))
5262
0
        return;
5263
0
    SKIP(2);
5264
5265
0
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5266
0
  int inputid = ctxt->input->id;
5267
0
  SKIP(8);
5268
0
  if (SKIP_BLANKS == 0) {
5269
0
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5270
0
         "Space required after '<!NOTATION'\n");
5271
0
      return;
5272
0
  }
5273
5274
0
        name = xmlParseName(ctxt);
5275
0
  if (name == NULL) {
5276
0
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5277
0
      return;
5278
0
  }
5279
0
  if (xmlStrchr(name, ':') != NULL) {
5280
0
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5281
0
         "colons are forbidden from notation names '%s'\n",
5282
0
         name, NULL, NULL);
5283
0
  }
5284
0
  if (SKIP_BLANKS == 0) {
5285
0
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5286
0
         "Space required after the NOTATION name'\n");
5287
0
      return;
5288
0
  }
5289
5290
  /*
5291
   * Parse the IDs.
5292
   */
5293
0
  Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5294
0
  SKIP_BLANKS;
5295
5296
0
  if (RAW == '>') {
5297
0
      if (inputid != ctxt->input->id) {
5298
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5299
0
                         "Notation declaration doesn't start and stop"
5300
0
                               " in the same entity\n");
5301
0
      }
5302
0
      NEXT;
5303
0
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5304
0
    (ctxt->sax->notationDecl != NULL))
5305
0
    ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5306
0
  } else {
5307
0
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5308
0
  }
5309
0
  if (Systemid != NULL) xmlFree(Systemid);
5310
0
  if (Pubid != NULL) xmlFree(Pubid);
5311
0
    }
5312
0
}
5313
5314
/**
5315
 * xmlParseEntityDecl:
5316
 * @ctxt:  an XML parser context
5317
 *
5318
 * DEPRECATED: Internal function, don't use.
5319
 *
5320
 * Parse an entity declaration. Always consumes '<!'.
5321
 *
5322
 * [70] EntityDecl ::= GEDecl | PEDecl
5323
 *
5324
 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5325
 *
5326
 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5327
 *
5328
 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5329
 *
5330
 * [74] PEDef ::= EntityValue | ExternalID
5331
 *
5332
 * [76] NDataDecl ::= S 'NDATA' S Name
5333
 *
5334
 * [ VC: Notation Declared ]
5335
 * The Name must match the declared name of a notation.
5336
 */
5337
5338
void
5339
0
xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5340
0
    const xmlChar *name = NULL;
5341
0
    xmlChar *value = NULL;
5342
0
    xmlChar *URI = NULL, *literal = NULL;
5343
0
    const xmlChar *ndata = NULL;
5344
0
    int isParameter = 0;
5345
0
    xmlChar *orig = NULL;
5346
5347
0
    if ((CUR != '<') || (NXT(1) != '!'))
5348
0
        return;
5349
0
    SKIP(2);
5350
5351
    /* GROW; done in the caller */
5352
0
    if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5353
0
  int inputid = ctxt->input->id;
5354
0
  SKIP(6);
5355
0
  if (SKIP_BLANKS == 0) {
5356
0
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5357
0
         "Space required after '<!ENTITY'\n");
5358
0
  }
5359
5360
0
  if (RAW == '%') {
5361
0
      NEXT;
5362
0
      if (SKIP_BLANKS == 0) {
5363
0
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5364
0
             "Space required after '%%'\n");
5365
0
      }
5366
0
      isParameter = 1;
5367
0
  }
5368
5369
0
        name = xmlParseName(ctxt);
5370
0
  if (name == NULL) {
5371
0
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5372
0
                     "xmlParseEntityDecl: no name\n");
5373
0
            return;
5374
0
  }
5375
0
  if (xmlStrchr(name, ':') != NULL) {
5376
0
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5377
0
         "colons are forbidden from entities names '%s'\n",
5378
0
         name, NULL, NULL);
5379
0
  }
5380
0
  if (SKIP_BLANKS == 0) {
5381
0
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5382
0
         "Space required after the entity name\n");
5383
0
  }
5384
5385
0
  ctxt->instate = XML_PARSER_ENTITY_DECL;
5386
  /*
5387
   * handle the various case of definitions...
5388
   */
5389
0
  if (isParameter) {
5390
0
      if ((RAW == '"') || (RAW == '\'')) {
5391
0
          value = xmlParseEntityValue(ctxt, &orig);
5392
0
    if (value) {
5393
0
        if ((ctxt->sax != NULL) &&
5394
0
      (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5395
0
      ctxt->sax->entityDecl(ctxt->userData, name,
5396
0
                        XML_INTERNAL_PARAMETER_ENTITY,
5397
0
            NULL, NULL, value);
5398
0
    }
5399
0
      } else {
5400
0
          URI = xmlParseExternalID(ctxt, &literal, 1);
5401
0
    if ((URI == NULL) && (literal == NULL)) {
5402
0
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5403
0
    }
5404
0
    if (URI) {
5405
0
        xmlURIPtr uri;
5406
5407
0
        uri = xmlParseURI((const char *) URI);
5408
0
        if (uri == NULL) {
5409
0
            xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5410
0
             "Invalid URI: %s\n", URI);
5411
      /*
5412
       * This really ought to be a well formedness error
5413
       * but the XML Core WG decided otherwise c.f. issue
5414
       * E26 of the XML erratas.
5415
       */
5416
0
        } else {
5417
0
      if (uri->fragment != NULL) {
5418
          /*
5419
           * Okay this is foolish to block those but not
5420
           * invalid URIs.
5421
           */
5422
0
          xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5423
0
      } else {
5424
0
          if ((ctxt->sax != NULL) &&
5425
0
        (!ctxt->disableSAX) &&
5426
0
        (ctxt->sax->entityDecl != NULL))
5427
0
        ctxt->sax->entityDecl(ctxt->userData, name,
5428
0
              XML_EXTERNAL_PARAMETER_ENTITY,
5429
0
              literal, URI, NULL);
5430
0
      }
5431
0
      xmlFreeURI(uri);
5432
0
        }
5433
0
    }
5434
0
      }
5435
0
  } else {
5436
0
      if ((RAW == '"') || (RAW == '\'')) {
5437
0
          value = xmlParseEntityValue(ctxt, &orig);
5438
0
    if ((ctxt->sax != NULL) &&
5439
0
        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5440
0
        ctxt->sax->entityDecl(ctxt->userData, name,
5441
0
        XML_INTERNAL_GENERAL_ENTITY,
5442
0
        NULL, NULL, value);
5443
    /*
5444
     * For expat compatibility in SAX mode.
5445
     */
5446
0
    if ((ctxt->myDoc == NULL) ||
5447
0
        (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5448
0
        if (ctxt->myDoc == NULL) {
5449
0
      ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5450
0
      if (ctxt->myDoc == NULL) {
5451
0
          xmlErrMemory(ctxt, "New Doc failed");
5452
0
          goto done;
5453
0
      }
5454
0
      ctxt->myDoc->properties = XML_DOC_INTERNAL;
5455
0
        }
5456
0
        if (ctxt->myDoc->intSubset == NULL)
5457
0
      ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5458
0
              BAD_CAST "fake", NULL, NULL);
5459
5460
0
        xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5461
0
                    NULL, NULL, value);
5462
0
    }
5463
0
      } else {
5464
0
          URI = xmlParseExternalID(ctxt, &literal, 1);
5465
0
    if ((URI == NULL) && (literal == NULL)) {
5466
0
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5467
0
    }
5468
0
    if (URI) {
5469
0
        xmlURIPtr uri;
5470
5471
0
        uri = xmlParseURI((const char *)URI);
5472
0
        if (uri == NULL) {
5473
0
            xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5474
0
             "Invalid URI: %s\n", URI);
5475
      /*
5476
       * This really ought to be a well formedness error
5477
       * but the XML Core WG decided otherwise c.f. issue
5478
       * E26 of the XML erratas.
5479
       */
5480
0
        } else {
5481
0
      if (uri->fragment != NULL) {
5482
          /*
5483
           * Okay this is foolish to block those but not
5484
           * invalid URIs.
5485
           */
5486
0
          xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5487
0
      }
5488
0
      xmlFreeURI(uri);
5489
0
        }
5490
0
    }
5491
0
    if ((RAW != '>') && (SKIP_BLANKS == 0)) {
5492
0
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5493
0
           "Space required before 'NDATA'\n");
5494
0
    }
5495
0
    if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5496
0
        SKIP(5);
5497
0
        if (SKIP_BLANKS == 0) {
5498
0
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5499
0
               "Space required after 'NDATA'\n");
5500
0
        }
5501
0
        ndata = xmlParseName(ctxt);
5502
0
        if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5503
0
            (ctxt->sax->unparsedEntityDecl != NULL))
5504
0
      ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5505
0
            literal, URI, ndata);
5506
0
    } else {
5507
0
        if ((ctxt->sax != NULL) &&
5508
0
            (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5509
0
      ctxt->sax->entityDecl(ctxt->userData, name,
5510
0
            XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5511
0
            literal, URI, NULL);
5512
        /*
5513
         * For expat compatibility in SAX mode.
5514
         * assuming the entity replacement was asked for
5515
         */
5516
0
        if ((ctxt->replaceEntities != 0) &&
5517
0
      ((ctxt->myDoc == NULL) ||
5518
0
      (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5519
0
      if (ctxt->myDoc == NULL) {
5520
0
          ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5521
0
          if (ctxt->myDoc == NULL) {
5522
0
              xmlErrMemory(ctxt, "New Doc failed");
5523
0
        goto done;
5524
0
          }
5525
0
          ctxt->myDoc->properties = XML_DOC_INTERNAL;
5526
0
      }
5527
5528
0
      if (ctxt->myDoc->intSubset == NULL)
5529
0
          ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5530
0
            BAD_CAST "fake", NULL, NULL);
5531
0
      xmlSAX2EntityDecl(ctxt, name,
5532
0
                  XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5533
0
                  literal, URI, NULL);
5534
0
        }
5535
0
    }
5536
0
      }
5537
0
  }
5538
0
  if (ctxt->instate == XML_PARSER_EOF)
5539
0
      goto done;
5540
0
  SKIP_BLANKS;
5541
0
  if (RAW != '>') {
5542
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5543
0
              "xmlParseEntityDecl: entity %s not terminated\n", name);
5544
0
      xmlHaltParser(ctxt);
5545
0
  } else {
5546
0
      if (inputid != ctxt->input->id) {
5547
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5548
0
                         "Entity declaration doesn't start and stop in"
5549
0
                               " the same entity\n");
5550
0
      }
5551
0
      NEXT;
5552
0
  }
5553
0
  if (orig != NULL) {
5554
      /*
5555
       * Ugly mechanism to save the raw entity value.
5556
       */
5557
0
      xmlEntityPtr cur = NULL;
5558
5559
0
      if (isParameter) {
5560
0
          if ((ctxt->sax != NULL) &&
5561
0
        (ctxt->sax->getParameterEntity != NULL))
5562
0
        cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5563
0
      } else {
5564
0
          if ((ctxt->sax != NULL) &&
5565
0
        (ctxt->sax->getEntity != NULL))
5566
0
        cur = ctxt->sax->getEntity(ctxt->userData, name);
5567
0
    if ((cur == NULL) && (ctxt->userData==ctxt)) {
5568
0
        cur = xmlSAX2GetEntity(ctxt, name);
5569
0
    }
5570
0
      }
5571
0
            if ((cur != NULL) && (cur->orig == NULL)) {
5572
0
    cur->orig = orig;
5573
0
                orig = NULL;
5574
0
      }
5575
0
  }
5576
5577
0
done:
5578
0
  if (value != NULL) xmlFree(value);
5579
0
  if (URI != NULL) xmlFree(URI);
5580
0
  if (literal != NULL) xmlFree(literal);
5581
0
        if (orig != NULL) xmlFree(orig);
5582
0
    }
5583
0
}
5584
5585
/**
5586
 * xmlParseDefaultDecl:
5587
 * @ctxt:  an XML parser context
5588
 * @value:  Receive a possible fixed default value for the attribute
5589
 *
5590
 * DEPRECATED: Internal function, don't use.
5591
 *
5592
 * Parse an attribute default declaration
5593
 *
5594
 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5595
 *
5596
 * [ VC: Required Attribute ]
5597
 * if the default declaration is the keyword #REQUIRED, then the
5598
 * attribute must be specified for all elements of the type in the
5599
 * attribute-list declaration.
5600
 *
5601
 * [ VC: Attribute Default Legal ]
5602
 * The declared default value must meet the lexical constraints of
5603
 * the declared attribute type c.f. xmlValidateAttributeDecl()
5604
 *
5605
 * [ VC: Fixed Attribute Default ]
5606
 * if an attribute has a default value declared with the #FIXED
5607
 * keyword, instances of that attribute must match the default value.
5608
 *
5609
 * [ WFC: No < in Attribute Values ]
5610
 * handled in xmlParseAttValue()
5611
 *
5612
 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5613
 *          or XML_ATTRIBUTE_FIXED.
5614
 */
5615
5616
int
5617
0
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5618
0
    int val;
5619
0
    xmlChar *ret;
5620
5621
0
    *value = NULL;
5622
0
    if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5623
0
  SKIP(9);
5624
0
  return(XML_ATTRIBUTE_REQUIRED);
5625
0
    }
5626
0
    if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5627
0
  SKIP(8);
5628
0
  return(XML_ATTRIBUTE_IMPLIED);
5629
0
    }
5630
0
    val = XML_ATTRIBUTE_NONE;
5631
0
    if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5632
0
  SKIP(6);
5633
0
  val = XML_ATTRIBUTE_FIXED;
5634
0
  if (SKIP_BLANKS == 0) {
5635
0
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5636
0
         "Space required after '#FIXED'\n");
5637
0
  }
5638
0
    }
5639
0
    ret = xmlParseAttValue(ctxt);
5640
0
    ctxt->instate = XML_PARSER_DTD;
5641
0
    if (ret == NULL) {
5642
0
  xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5643
0
           "Attribute default value declaration error\n");
5644
0
    } else
5645
0
        *value = ret;
5646
0
    return(val);
5647
0
}
5648
5649
/**
5650
 * xmlParseNotationType:
5651
 * @ctxt:  an XML parser context
5652
 *
5653
 * DEPRECATED: Internal function, don't use.
5654
 *
5655
 * parse an Notation attribute type.
5656
 *
5657
 * Note: the leading 'NOTATION' S part has already being parsed...
5658
 *
5659
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5660
 *
5661
 * [ VC: Notation Attributes ]
5662
 * Values of this type must match one of the notation names included
5663
 * in the declaration; all notation names in the declaration must be declared.
5664
 *
5665
 * Returns: the notation attribute tree built while parsing
5666
 */
5667
5668
xmlEnumerationPtr
5669
0
xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5670
0
    const xmlChar *name;
5671
0
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5672
5673
0
    if (RAW != '(') {
5674
0
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5675
0
  return(NULL);
5676
0
    }
5677
0
    do {
5678
0
        NEXT;
5679
0
  SKIP_BLANKS;
5680
0
        name = xmlParseName(ctxt);
5681
0
  if (name == NULL) {
5682
0
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5683
0
         "Name expected in NOTATION declaration\n");
5684
0
            xmlFreeEnumeration(ret);
5685
0
      return(NULL);
5686
0
  }
5687
0
  tmp = ret;
5688
0
  while (tmp != NULL) {
5689
0
      if (xmlStrEqual(name, tmp->name)) {
5690
0
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5691
0
    "standalone: attribute notation value token %s duplicated\n",
5692
0
         name, NULL);
5693
0
    if (!xmlDictOwns(ctxt->dict, name))
5694
0
        xmlFree((xmlChar *) name);
5695
0
    break;
5696
0
      }
5697
0
      tmp = tmp->next;
5698
0
  }
5699
0
  if (tmp == NULL) {
5700
0
      cur = xmlCreateEnumeration(name);
5701
0
      if (cur == NULL) {
5702
0
                xmlFreeEnumeration(ret);
5703
0
                return(NULL);
5704
0
            }
5705
0
      if (last == NULL) ret = last = cur;
5706
0
      else {
5707
0
    last->next = cur;
5708
0
    last = cur;
5709
0
      }
5710
0
  }
5711
0
  SKIP_BLANKS;
5712
0
    } while (RAW == '|');
5713
0
    if (RAW != ')') {
5714
0
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5715
0
        xmlFreeEnumeration(ret);
5716
0
  return(NULL);
5717
0
    }
5718
0
    NEXT;
5719
0
    return(ret);
5720
0
}
5721
5722
/**
5723
 * xmlParseEnumerationType:
5724
 * @ctxt:  an XML parser context
5725
 *
5726
 * DEPRECATED: Internal function, don't use.
5727
 *
5728
 * parse an Enumeration attribute type.
5729
 *
5730
 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5731
 *
5732
 * [ VC: Enumeration ]
5733
 * Values of this type must match one of the Nmtoken tokens in
5734
 * the declaration
5735
 *
5736
 * Returns: the enumeration attribute tree built while parsing
5737
 */
5738
5739
xmlEnumerationPtr
5740
0
xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5741
0
    xmlChar *name;
5742
0
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5743
5744
0
    if (RAW != '(') {
5745
0
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5746
0
  return(NULL);
5747
0
    }
5748
0
    do {
5749
0
        NEXT;
5750
0
  SKIP_BLANKS;
5751
0
        name = xmlParseNmtoken(ctxt);
5752
0
  if (name == NULL) {
5753
0
      xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5754
0
      return(ret);
5755
0
  }
5756
0
  tmp = ret;
5757
0
  while (tmp != NULL) {
5758
0
      if (xmlStrEqual(name, tmp->name)) {
5759
0
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5760
0
    "standalone: attribute enumeration value token %s duplicated\n",
5761
0
         name, NULL);
5762
0
    if (!xmlDictOwns(ctxt->dict, name))
5763
0
        xmlFree(name);
5764
0
    break;
5765
0
      }
5766
0
      tmp = tmp->next;
5767
0
  }
5768
0
  if (tmp == NULL) {
5769
0
      cur = xmlCreateEnumeration(name);
5770
0
      if (!xmlDictOwns(ctxt->dict, name))
5771
0
    xmlFree(name);
5772
0
      if (cur == NULL) {
5773
0
                xmlFreeEnumeration(ret);
5774
0
                return(NULL);
5775
0
            }
5776
0
      if (last == NULL) ret = last = cur;
5777
0
      else {
5778
0
    last->next = cur;
5779
0
    last = cur;
5780
0
      }
5781
0
  }
5782
0
  SKIP_BLANKS;
5783
0
    } while (RAW == '|');
5784
0
    if (RAW != ')') {
5785
0
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5786
0
  return(ret);
5787
0
    }
5788
0
    NEXT;
5789
0
    return(ret);
5790
0
}
5791
5792
/**
5793
 * xmlParseEnumeratedType:
5794
 * @ctxt:  an XML parser context
5795
 * @tree:  the enumeration tree built while parsing
5796
 *
5797
 * DEPRECATED: Internal function, don't use.
5798
 *
5799
 * parse an Enumerated attribute type.
5800
 *
5801
 * [57] EnumeratedType ::= NotationType | Enumeration
5802
 *
5803
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5804
 *
5805
 *
5806
 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5807
 */
5808
5809
int
5810
0
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5811
0
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5812
0
  SKIP(8);
5813
0
  if (SKIP_BLANKS == 0) {
5814
0
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5815
0
         "Space required after 'NOTATION'\n");
5816
0
      return(0);
5817
0
  }
5818
0
  *tree = xmlParseNotationType(ctxt);
5819
0
  if (*tree == NULL) return(0);
5820
0
  return(XML_ATTRIBUTE_NOTATION);
5821
0
    }
5822
0
    *tree = xmlParseEnumerationType(ctxt);
5823
0
    if (*tree == NULL) return(0);
5824
0
    return(XML_ATTRIBUTE_ENUMERATION);
5825
0
}
5826
5827
/**
5828
 * xmlParseAttributeType:
5829
 * @ctxt:  an XML parser context
5830
 * @tree:  the enumeration tree built while parsing
5831
 *
5832
 * DEPRECATED: Internal function, don't use.
5833
 *
5834
 * parse the Attribute list def for an element
5835
 *
5836
 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5837
 *
5838
 * [55] StringType ::= 'CDATA'
5839
 *
5840
 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5841
 *                        'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5842
 *
5843
 * Validity constraints for attribute values syntax are checked in
5844
 * xmlValidateAttributeValue()
5845
 *
5846
 * [ VC: ID ]
5847
 * Values of type ID must match the Name production. A name must not
5848
 * appear more than once in an XML document as a value of this type;
5849
 * i.e., ID values must uniquely identify the elements which bear them.
5850
 *
5851
 * [ VC: One ID per Element Type ]
5852
 * No element type may have more than one ID attribute specified.
5853
 *
5854
 * [ VC: ID Attribute Default ]
5855
 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5856
 *
5857
 * [ VC: IDREF ]
5858
 * Values of type IDREF must match the Name production, and values
5859
 * of type IDREFS must match Names; each IDREF Name must match the value
5860
 * of an ID attribute on some element in the XML document; i.e. IDREF
5861
 * values must match the value of some ID attribute.
5862
 *
5863
 * [ VC: Entity Name ]
5864
 * Values of type ENTITY must match the Name production, values
5865
 * of type ENTITIES must match Names; each Entity Name must match the
5866
 * name of an unparsed entity declared in the DTD.
5867
 *
5868
 * [ VC: Name Token ]
5869
 * Values of type NMTOKEN must match the Nmtoken production; values
5870
 * of type NMTOKENS must match Nmtokens.
5871
 *
5872
 * Returns the attribute type
5873
 */
5874
int
5875
0
xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5876
0
    if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5877
0
  SKIP(5);
5878
0
  return(XML_ATTRIBUTE_CDATA);
5879
0
     } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5880
0
  SKIP(6);
5881
0
  return(XML_ATTRIBUTE_IDREFS);
5882
0
     } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5883
0
  SKIP(5);
5884
0
  return(XML_ATTRIBUTE_IDREF);
5885
0
     } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5886
0
        SKIP(2);
5887
0
  return(XML_ATTRIBUTE_ID);
5888
0
     } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5889
0
  SKIP(6);
5890
0
  return(XML_ATTRIBUTE_ENTITY);
5891
0
     } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5892
0
  SKIP(8);
5893
0
  return(XML_ATTRIBUTE_ENTITIES);
5894
0
     } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5895
0
  SKIP(8);
5896
0
  return(XML_ATTRIBUTE_NMTOKENS);
5897
0
     } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5898
0
  SKIP(7);
5899
0
  return(XML_ATTRIBUTE_NMTOKEN);
5900
0
     }
5901
0
     return(xmlParseEnumeratedType(ctxt, tree));
5902
0
}
5903
5904
/**
5905
 * xmlParseAttributeListDecl:
5906
 * @ctxt:  an XML parser context
5907
 *
5908
 * DEPRECATED: Internal function, don't use.
5909
 *
5910
 * Parse an attribute list declaration for an element. Always consumes '<!'.
5911
 *
5912
 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5913
 *
5914
 * [53] AttDef ::= S Name S AttType S DefaultDecl
5915
 *
5916
 */
5917
void
5918
0
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
5919
0
    const xmlChar *elemName;
5920
0
    const xmlChar *attrName;
5921
0
    xmlEnumerationPtr tree;
5922
5923
0
    if ((CUR != '<') || (NXT(1) != '!'))
5924
0
        return;
5925
0
    SKIP(2);
5926
5927
0
    if (CMP7(CUR_PTR, 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5928
0
  int inputid = ctxt->input->id;
5929
5930
0
  SKIP(7);
5931
0
  if (SKIP_BLANKS == 0) {
5932
0
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5933
0
                     "Space required after '<!ATTLIST'\n");
5934
0
  }
5935
0
        elemName = xmlParseName(ctxt);
5936
0
  if (elemName == NULL) {
5937
0
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5938
0
         "ATTLIST: no name for Element\n");
5939
0
      return;
5940
0
  }
5941
0
  SKIP_BLANKS;
5942
0
  GROW;
5943
0
  while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
5944
0
      int type;
5945
0
      int def;
5946
0
      xmlChar *defaultValue = NULL;
5947
5948
0
      GROW;
5949
0
            tree = NULL;
5950
0
      attrName = xmlParseName(ctxt);
5951
0
      if (attrName == NULL) {
5952
0
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5953
0
             "ATTLIST: no name for Attribute\n");
5954
0
    break;
5955
0
      }
5956
0
      GROW;
5957
0
      if (SKIP_BLANKS == 0) {
5958
0
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5959
0
            "Space required after the attribute name\n");
5960
0
    break;
5961
0
      }
5962
5963
0
      type = xmlParseAttributeType(ctxt, &tree);
5964
0
      if (type <= 0) {
5965
0
          break;
5966
0
      }
5967
5968
0
      GROW;
5969
0
      if (SKIP_BLANKS == 0) {
5970
0
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5971
0
             "Space required after the attribute type\n");
5972
0
          if (tree != NULL)
5973
0
        xmlFreeEnumeration(tree);
5974
0
    break;
5975
0
      }
5976
5977
0
      def = xmlParseDefaultDecl(ctxt, &defaultValue);
5978
0
      if (def <= 0) {
5979
0
                if (defaultValue != NULL)
5980
0
        xmlFree(defaultValue);
5981
0
          if (tree != NULL)
5982
0
        xmlFreeEnumeration(tree);
5983
0
          break;
5984
0
      }
5985
0
      if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
5986
0
          xmlAttrNormalizeSpace(defaultValue, defaultValue);
5987
5988
0
      GROW;
5989
0
            if (RAW != '>') {
5990
0
    if (SKIP_BLANKS == 0) {
5991
0
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5992
0
      "Space required after the attribute default value\n");
5993
0
        if (defaultValue != NULL)
5994
0
      xmlFree(defaultValue);
5995
0
        if (tree != NULL)
5996
0
      xmlFreeEnumeration(tree);
5997
0
        break;
5998
0
    }
5999
0
      }
6000
0
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6001
0
    (ctxt->sax->attributeDecl != NULL))
6002
0
    ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6003
0
                          type, def, defaultValue, tree);
6004
0
      else if (tree != NULL)
6005
0
    xmlFreeEnumeration(tree);
6006
6007
0
      if ((ctxt->sax2) && (defaultValue != NULL) &&
6008
0
          (def != XML_ATTRIBUTE_IMPLIED) &&
6009
0
    (def != XML_ATTRIBUTE_REQUIRED)) {
6010
0
    xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6011
0
      }
6012
0
      if (ctxt->sax2) {
6013
0
    xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6014
0
      }
6015
0
      if (defaultValue != NULL)
6016
0
          xmlFree(defaultValue);
6017
0
      GROW;
6018
0
  }
6019
0
  if (RAW == '>') {
6020
0
      if (inputid != ctxt->input->id) {
6021
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6022
0
                               "Attribute list declaration doesn't start and"
6023
0
                               " stop in the same entity\n");
6024
0
      }
6025
0
      NEXT;
6026
0
  }
6027
0
    }
6028
0
}
6029
6030
/**
6031
 * xmlParseElementMixedContentDecl:
6032
 * @ctxt:  an XML parser context
6033
 * @inputchk:  the input used for the current entity, needed for boundary checks
6034
 *
6035
 * DEPRECATED: Internal function, don't use.
6036
 *
6037
 * parse the declaration for a Mixed Element content
6038
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6039
 *
6040
 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6041
 *                '(' S? '#PCDATA' S? ')'
6042
 *
6043
 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6044
 *
6045
 * [ VC: No Duplicate Types ]
6046
 * The same name must not appear more than once in a single
6047
 * mixed-content declaration.
6048
 *
6049
 * returns: the list of the xmlElementContentPtr describing the element choices
6050
 */
6051
xmlElementContentPtr
6052
0
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6053
0
    xmlElementContentPtr ret = NULL, cur = NULL, n;
6054
0
    const xmlChar *elem = NULL;
6055
6056
0
    GROW;
6057
0
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6058
0
  SKIP(7);
6059
0
  SKIP_BLANKS;
6060
0
  if (RAW == ')') {
6061
0
      if (ctxt->input->id != inputchk) {
6062
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6063
0
                               "Element content declaration doesn't start and"
6064
0
                               " stop in the same entity\n");
6065
0
      }
6066
0
      NEXT;
6067
0
      ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6068
0
      if (ret == NULL)
6069
0
          return(NULL);
6070
0
      if (RAW == '*') {
6071
0
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6072
0
    NEXT;
6073
0
      }
6074
0
      return(ret);
6075
0
  }
6076
0
  if ((RAW == '(') || (RAW == '|')) {
6077
0
      ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6078
0
      if (ret == NULL) return(NULL);
6079
0
  }
6080
0
  while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6081
0
      NEXT;
6082
0
      if (elem == NULL) {
6083
0
          ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6084
0
    if (ret == NULL) {
6085
0
        xmlFreeDocElementContent(ctxt->myDoc, cur);
6086
0
                    return(NULL);
6087
0
                }
6088
0
    ret->c1 = cur;
6089
0
    if (cur != NULL)
6090
0
        cur->parent = ret;
6091
0
    cur = ret;
6092
0
      } else {
6093
0
          n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6094
0
    if (n == NULL) {
6095
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6096
0
                    return(NULL);
6097
0
                }
6098
0
    n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6099
0
    if (n->c1 != NULL)
6100
0
        n->c1->parent = n;
6101
0
          cur->c2 = n;
6102
0
    if (n != NULL)
6103
0
        n->parent = cur;
6104
0
    cur = n;
6105
0
      }
6106
0
      SKIP_BLANKS;
6107
0
      elem = xmlParseName(ctxt);
6108
0
      if (elem == NULL) {
6109
0
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6110
0
      "xmlParseElementMixedContentDecl : Name expected\n");
6111
0
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6112
0
    return(NULL);
6113
0
      }
6114
0
      SKIP_BLANKS;
6115
0
      GROW;
6116
0
  }
6117
0
  if ((RAW == ')') && (NXT(1) == '*')) {
6118
0
      if (elem != NULL) {
6119
0
    cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6120
0
                                   XML_ELEMENT_CONTENT_ELEMENT);
6121
0
    if (cur->c2 != NULL)
6122
0
        cur->c2->parent = cur;
6123
0
            }
6124
0
            if (ret != NULL)
6125
0
                ret->ocur = XML_ELEMENT_CONTENT_MULT;
6126
0
      if (ctxt->input->id != inputchk) {
6127
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6128
0
                               "Element content declaration doesn't start and"
6129
0
                               " stop in the same entity\n");
6130
0
      }
6131
0
      SKIP(2);
6132
0
  } else {
6133
0
      xmlFreeDocElementContent(ctxt->myDoc, ret);
6134
0
      xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6135
0
      return(NULL);
6136
0
  }
6137
6138
0
    } else {
6139
0
  xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6140
0
    }
6141
0
    return(ret);
6142
0
}
6143
6144
/**
6145
 * xmlParseElementChildrenContentDeclPriv:
6146
 * @ctxt:  an XML parser context
6147
 * @inputchk:  the input used for the current entity, needed for boundary checks
6148
 * @depth: the level of recursion
6149
 *
6150
 * parse the declaration for a Mixed Element content
6151
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6152
 *
6153
 *
6154
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6155
 *
6156
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6157
 *
6158
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6159
 *
6160
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6161
 *
6162
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6163
 * TODO Parameter-entity replacement text must be properly nested
6164
 *  with parenthesized groups. That is to say, if either of the
6165
 *  opening or closing parentheses in a choice, seq, or Mixed
6166
 *  construct is contained in the replacement text for a parameter
6167
 *  entity, both must be contained in the same replacement text. For
6168
 *  interoperability, if a parameter-entity reference appears in a
6169
 *  choice, seq, or Mixed construct, its replacement text should not
6170
 *  be empty, and neither the first nor last non-blank character of
6171
 *  the replacement text should be a connector (| or ,).
6172
 *
6173
 * Returns the tree of xmlElementContentPtr describing the element
6174
 *          hierarchy.
6175
 */
6176
static xmlElementContentPtr
6177
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6178
0
                                       int depth) {
6179
0
    xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6180
0
    const xmlChar *elem;
6181
0
    xmlChar type = 0;
6182
6183
0
    if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6184
0
        (depth >  2048)) {
6185
0
        xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6186
0
"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6187
0
                          depth);
6188
0
  return(NULL);
6189
0
    }
6190
0
    SKIP_BLANKS;
6191
0
    GROW;
6192
0
    if (RAW == '(') {
6193
0
  int inputid = ctxt->input->id;
6194
6195
        /* Recurse on first child */
6196
0
  NEXT;
6197
0
  SKIP_BLANKS;
6198
0
        cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6199
0
                                                           depth + 1);
6200
0
        if (cur == NULL)
6201
0
            return(NULL);
6202
0
  SKIP_BLANKS;
6203
0
  GROW;
6204
0
    } else {
6205
0
  elem = xmlParseName(ctxt);
6206
0
  if (elem == NULL) {
6207
0
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6208
0
      return(NULL);
6209
0
  }
6210
0
        cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6211
0
  if (cur == NULL) {
6212
0
      xmlErrMemory(ctxt, NULL);
6213
0
      return(NULL);
6214
0
  }
6215
0
  GROW;
6216
0
  if (RAW == '?') {
6217
0
      cur->ocur = XML_ELEMENT_CONTENT_OPT;
6218
0
      NEXT;
6219
0
  } else if (RAW == '*') {
6220
0
      cur->ocur = XML_ELEMENT_CONTENT_MULT;
6221
0
      NEXT;
6222
0
  } else if (RAW == '+') {
6223
0
      cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6224
0
      NEXT;
6225
0
  } else {
6226
0
      cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6227
0
  }
6228
0
  GROW;
6229
0
    }
6230
0
    SKIP_BLANKS;
6231
0
    while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6232
        /*
6233
   * Each loop we parse one separator and one element.
6234
   */
6235
0
        if (RAW == ',') {
6236
0
      if (type == 0) type = CUR;
6237
6238
      /*
6239
       * Detect "Name | Name , Name" error
6240
       */
6241
0
      else if (type != CUR) {
6242
0
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6243
0
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6244
0
                      type);
6245
0
    if ((last != NULL) && (last != ret))
6246
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6247
0
    if (ret != NULL)
6248
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6249
0
    return(NULL);
6250
0
      }
6251
0
      NEXT;
6252
6253
0
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6254
0
      if (op == NULL) {
6255
0
    if ((last != NULL) && (last != ret))
6256
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6257
0
          xmlFreeDocElementContent(ctxt->myDoc, ret);
6258
0
    return(NULL);
6259
0
      }
6260
0
      if (last == NULL) {
6261
0
    op->c1 = ret;
6262
0
    if (ret != NULL)
6263
0
        ret->parent = op;
6264
0
    ret = cur = op;
6265
0
      } else {
6266
0
          cur->c2 = op;
6267
0
    if (op != NULL)
6268
0
        op->parent = cur;
6269
0
    op->c1 = last;
6270
0
    if (last != NULL)
6271
0
        last->parent = op;
6272
0
    cur =op;
6273
0
    last = NULL;
6274
0
      }
6275
0
  } else if (RAW == '|') {
6276
0
      if (type == 0) type = CUR;
6277
6278
      /*
6279
       * Detect "Name , Name | Name" error
6280
       */
6281
0
      else if (type != CUR) {
6282
0
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6283
0
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6284
0
          type);
6285
0
    if ((last != NULL) && (last != ret))
6286
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6287
0
    if (ret != NULL)
6288
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6289
0
    return(NULL);
6290
0
      }
6291
0
      NEXT;
6292
6293
0
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6294
0
      if (op == NULL) {
6295
0
    if ((last != NULL) && (last != ret))
6296
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6297
0
    if (ret != NULL)
6298
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6299
0
    return(NULL);
6300
0
      }
6301
0
      if (last == NULL) {
6302
0
    op->c1 = ret;
6303
0
    if (ret != NULL)
6304
0
        ret->parent = op;
6305
0
    ret = cur = op;
6306
0
      } else {
6307
0
          cur->c2 = op;
6308
0
    if (op != NULL)
6309
0
        op->parent = cur;
6310
0
    op->c1 = last;
6311
0
    if (last != NULL)
6312
0
        last->parent = op;
6313
0
    cur =op;
6314
0
    last = NULL;
6315
0
      }
6316
0
  } else {
6317
0
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6318
0
      if ((last != NULL) && (last != ret))
6319
0
          xmlFreeDocElementContent(ctxt->myDoc, last);
6320
0
      if (ret != NULL)
6321
0
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6322
0
      return(NULL);
6323
0
  }
6324
0
  GROW;
6325
0
  SKIP_BLANKS;
6326
0
  GROW;
6327
0
  if (RAW == '(') {
6328
0
      int inputid = ctxt->input->id;
6329
      /* Recurse on second child */
6330
0
      NEXT;
6331
0
      SKIP_BLANKS;
6332
0
      last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6333
0
                                                          depth + 1);
6334
0
            if (last == NULL) {
6335
0
    if (ret != NULL)
6336
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6337
0
    return(NULL);
6338
0
            }
6339
0
      SKIP_BLANKS;
6340
0
  } else {
6341
0
      elem = xmlParseName(ctxt);
6342
0
      if (elem == NULL) {
6343
0
    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6344
0
    if (ret != NULL)
6345
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6346
0
    return(NULL);
6347
0
      }
6348
0
      last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6349
0
      if (last == NULL) {
6350
0
    if (ret != NULL)
6351
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6352
0
    return(NULL);
6353
0
      }
6354
0
      if (RAW == '?') {
6355
0
    last->ocur = XML_ELEMENT_CONTENT_OPT;
6356
0
    NEXT;
6357
0
      } else if (RAW == '*') {
6358
0
    last->ocur = XML_ELEMENT_CONTENT_MULT;
6359
0
    NEXT;
6360
0
      } else if (RAW == '+') {
6361
0
    last->ocur = XML_ELEMENT_CONTENT_PLUS;
6362
0
    NEXT;
6363
0
      } else {
6364
0
    last->ocur = XML_ELEMENT_CONTENT_ONCE;
6365
0
      }
6366
0
  }
6367
0
  SKIP_BLANKS;
6368
0
  GROW;
6369
0
    }
6370
0
    if ((cur != NULL) && (last != NULL)) {
6371
0
        cur->c2 = last;
6372
0
  if (last != NULL)
6373
0
      last->parent = cur;
6374
0
    }
6375
0
    if (ctxt->input->id != inputchk) {
6376
0
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6377
0
                       "Element content declaration doesn't start and stop in"
6378
0
                       " the same entity\n");
6379
0
    }
6380
0
    NEXT;
6381
0
    if (RAW == '?') {
6382
0
  if (ret != NULL) {
6383
0
      if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6384
0
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6385
0
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6386
0
      else
6387
0
          ret->ocur = XML_ELEMENT_CONTENT_OPT;
6388
0
  }
6389
0
  NEXT;
6390
0
    } else if (RAW == '*') {
6391
0
  if (ret != NULL) {
6392
0
      ret->ocur = XML_ELEMENT_CONTENT_MULT;
6393
0
      cur = ret;
6394
      /*
6395
       * Some normalization:
6396
       * (a | b* | c?)* == (a | b | c)*
6397
       */
6398
0
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6399
0
    if ((cur->c1 != NULL) &&
6400
0
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6401
0
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6402
0
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6403
0
    if ((cur->c2 != NULL) &&
6404
0
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6405
0
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6406
0
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6407
0
    cur = cur->c2;
6408
0
      }
6409
0
  }
6410
0
  NEXT;
6411
0
    } else if (RAW == '+') {
6412
0
  if (ret != NULL) {
6413
0
      int found = 0;
6414
6415
0
      if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6416
0
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6417
0
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6418
0
      else
6419
0
          ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6420
      /*
6421
       * Some normalization:
6422
       * (a | b*)+ == (a | b)*
6423
       * (a | b?)+ == (a | b)*
6424
       */
6425
0
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6426
0
    if ((cur->c1 != NULL) &&
6427
0
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6428
0
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6429
0
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6430
0
        found = 1;
6431
0
    }
6432
0
    if ((cur->c2 != NULL) &&
6433
0
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6434
0
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6435
0
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6436
0
        found = 1;
6437
0
    }
6438
0
    cur = cur->c2;
6439
0
      }
6440
0
      if (found)
6441
0
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6442
0
  }
6443
0
  NEXT;
6444
0
    }
6445
0
    return(ret);
6446
0
}
6447
6448
/**
6449
 * xmlParseElementChildrenContentDecl:
6450
 * @ctxt:  an XML parser context
6451
 * @inputchk:  the input used for the current entity, needed for boundary checks
6452
 *
6453
 * DEPRECATED: Internal function, don't use.
6454
 *
6455
 * parse the declaration for a Mixed Element content
6456
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6457
 *
6458
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6459
 *
6460
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6461
 *
6462
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6463
 *
6464
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6465
 *
6466
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6467
 * TODO Parameter-entity replacement text must be properly nested
6468
 *  with parenthesized groups. That is to say, if either of the
6469
 *  opening or closing parentheses in a choice, seq, or Mixed
6470
 *  construct is contained in the replacement text for a parameter
6471
 *  entity, both must be contained in the same replacement text. For
6472
 *  interoperability, if a parameter-entity reference appears in a
6473
 *  choice, seq, or Mixed construct, its replacement text should not
6474
 *  be empty, and neither the first nor last non-blank character of
6475
 *  the replacement text should be a connector (| or ,).
6476
 *
6477
 * Returns the tree of xmlElementContentPtr describing the element
6478
 *          hierarchy.
6479
 */
6480
xmlElementContentPtr
6481
0
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6482
    /* stub left for API/ABI compat */
6483
0
    return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6484
0
}
6485
6486
/**
6487
 * xmlParseElementContentDecl:
6488
 * @ctxt:  an XML parser context
6489
 * @name:  the name of the element being defined.
6490
 * @result:  the Element Content pointer will be stored here if any
6491
 *
6492
 * DEPRECATED: Internal function, don't use.
6493
 *
6494
 * parse the declaration for an Element content either Mixed or Children,
6495
 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6496
 *
6497
 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6498
 *
6499
 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6500
 */
6501
6502
int
6503
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6504
0
                           xmlElementContentPtr *result) {
6505
6506
0
    xmlElementContentPtr tree = NULL;
6507
0
    int inputid = ctxt->input->id;
6508
0
    int res;
6509
6510
0
    *result = NULL;
6511
6512
0
    if (RAW != '(') {
6513
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6514
0
    "xmlParseElementContentDecl : %s '(' expected\n", name);
6515
0
  return(-1);
6516
0
    }
6517
0
    NEXT;
6518
0
    GROW;
6519
0
    if (ctxt->instate == XML_PARSER_EOF)
6520
0
        return(-1);
6521
0
    SKIP_BLANKS;
6522
0
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6523
0
        tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6524
0
  res = XML_ELEMENT_TYPE_MIXED;
6525
0
    } else {
6526
0
        tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6527
0
  res = XML_ELEMENT_TYPE_ELEMENT;
6528
0
    }
6529
0
    SKIP_BLANKS;
6530
0
    *result = tree;
6531
0
    return(res);
6532
0
}
6533
6534
/**
6535
 * xmlParseElementDecl:
6536
 * @ctxt:  an XML parser context
6537
 *
6538
 * DEPRECATED: Internal function, don't use.
6539
 *
6540
 * Parse an element declaration. Always consumes '<!'.
6541
 *
6542
 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6543
 *
6544
 * [ VC: Unique Element Type Declaration ]
6545
 * No element type may be declared more than once
6546
 *
6547
 * Returns the type of the element, or -1 in case of error
6548
 */
6549
int
6550
0
xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6551
0
    const xmlChar *name;
6552
0
    int ret = -1;
6553
0
    xmlElementContentPtr content  = NULL;
6554
6555
0
    if ((CUR != '<') || (NXT(1) != '!'))
6556
0
        return(ret);
6557
0
    SKIP(2);
6558
6559
    /* GROW; done in the caller */
6560
0
    if (CMP7(CUR_PTR, 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6561
0
  int inputid = ctxt->input->id;
6562
6563
0
  SKIP(7);
6564
0
  if (SKIP_BLANKS == 0) {
6565
0
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6566
0
               "Space required after 'ELEMENT'\n");
6567
0
      return(-1);
6568
0
  }
6569
0
        name = xmlParseName(ctxt);
6570
0
  if (name == NULL) {
6571
0
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6572
0
         "xmlParseElementDecl: no name for Element\n");
6573
0
      return(-1);
6574
0
  }
6575
0
  if (SKIP_BLANKS == 0) {
6576
0
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6577
0
         "Space required after the element name\n");
6578
0
  }
6579
0
  if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6580
0
      SKIP(5);
6581
      /*
6582
       * Element must always be empty.
6583
       */
6584
0
      ret = XML_ELEMENT_TYPE_EMPTY;
6585
0
  } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6586
0
             (NXT(2) == 'Y')) {
6587
0
      SKIP(3);
6588
      /*
6589
       * Element is a generic container.
6590
       */
6591
0
      ret = XML_ELEMENT_TYPE_ANY;
6592
0
  } else if (RAW == '(') {
6593
0
      ret = xmlParseElementContentDecl(ctxt, name, &content);
6594
0
  } else {
6595
      /*
6596
       * [ WFC: PEs in Internal Subset ] error handling.
6597
       */
6598
0
      if ((RAW == '%') && (ctxt->external == 0) &&
6599
0
          (ctxt->inputNr == 1)) {
6600
0
    xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6601
0
    "PEReference: forbidden within markup decl in internal subset\n");
6602
0
      } else {
6603
0
    xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6604
0
          "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6605
0
            }
6606
0
      return(-1);
6607
0
  }
6608
6609
0
  SKIP_BLANKS;
6610
6611
0
  if (RAW != '>') {
6612
0
      xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6613
0
      if (content != NULL) {
6614
0
    xmlFreeDocElementContent(ctxt->myDoc, content);
6615
0
      }
6616
0
  } else {
6617
0
      if (inputid != ctxt->input->id) {
6618
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6619
0
                               "Element declaration doesn't start and stop in"
6620
0
                               " the same entity\n");
6621
0
      }
6622
6623
0
      NEXT;
6624
0
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6625
0
    (ctxt->sax->elementDecl != NULL)) {
6626
0
    if (content != NULL)
6627
0
        content->parent = NULL;
6628
0
          ctxt->sax->elementDecl(ctxt->userData, name, ret,
6629
0
                           content);
6630
0
    if ((content != NULL) && (content->parent == NULL)) {
6631
        /*
6632
         * this is a trick: if xmlAddElementDecl is called,
6633
         * instead of copying the full tree it is plugged directly
6634
         * if called from the parser. Avoid duplicating the
6635
         * interfaces or change the API/ABI
6636
         */
6637
0
        xmlFreeDocElementContent(ctxt->myDoc, content);
6638
0
    }
6639
0
      } else if (content != NULL) {
6640
0
    xmlFreeDocElementContent(ctxt->myDoc, content);
6641
0
      }
6642
0
  }
6643
0
    }
6644
0
    return(ret);
6645
0
}
6646
6647
/**
6648
 * xmlParseConditionalSections
6649
 * @ctxt:  an XML parser context
6650
 *
6651
 * Parse a conditional section. Always consumes '<!['.
6652
 *
6653
 * [61] conditionalSect ::= includeSect | ignoreSect
6654
 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6655
 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6656
 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6657
 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6658
 */
6659
6660
static void
6661
0
xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6662
0
    int *inputIds = NULL;
6663
0
    size_t inputIdsSize = 0;
6664
0
    size_t depth = 0;
6665
6666
0
    while (ctxt->instate != XML_PARSER_EOF) {
6667
0
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6668
0
            int id = ctxt->input->id;
6669
6670
0
            SKIP(3);
6671
0
            SKIP_BLANKS;
6672
6673
0
            if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6674
0
                SKIP(7);
6675
0
                SKIP_BLANKS;
6676
0
                if (RAW != '[') {
6677
0
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6678
0
                    xmlHaltParser(ctxt);
6679
0
                    goto error;
6680
0
                }
6681
0
                if (ctxt->input->id != id) {
6682
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6683
0
                                   "All markup of the conditional section is"
6684
0
                                   " not in the same entity\n");
6685
0
                }
6686
0
                NEXT;
6687
6688
0
                if (inputIdsSize <= depth) {
6689
0
                    int *tmp;
6690
6691
0
                    inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2);
6692
0
                    tmp = (int *) xmlRealloc(inputIds,
6693
0
                            inputIdsSize * sizeof(int));
6694
0
                    if (tmp == NULL) {
6695
0
                        xmlErrMemory(ctxt, NULL);
6696
0
                        goto error;
6697
0
                    }
6698
0
                    inputIds = tmp;
6699
0
                }
6700
0
                inputIds[depth] = id;
6701
0
                depth++;
6702
0
            } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6703
0
                size_t ignoreDepth = 0;
6704
6705
0
                SKIP(6);
6706
0
                SKIP_BLANKS;
6707
0
                if (RAW != '[') {
6708
0
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6709
0
                    xmlHaltParser(ctxt);
6710
0
                    goto error;
6711
0
                }
6712
0
                if (ctxt->input->id != id) {
6713
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6714
0
                                   "All markup of the conditional section is"
6715
0
                                   " not in the same entity\n");
6716
0
                }
6717
0
                NEXT;
6718
6719
0
                while (RAW != 0) {
6720
0
                    if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6721
0
                        SKIP(3);
6722
0
                        ignoreDepth++;
6723
                        /* Check for integer overflow */
6724
0
                        if (ignoreDepth == 0) {
6725
0
                            xmlErrMemory(ctxt, NULL);
6726
0
                            goto error;
6727
0
                        }
6728
0
                    } else if ((RAW == ']') && (NXT(1) == ']') &&
6729
0
                               (NXT(2) == '>')) {
6730
0
                        if (ignoreDepth == 0)
6731
0
                            break;
6732
0
                        SKIP(3);
6733
0
                        ignoreDepth--;
6734
0
                    } else {
6735
0
                        NEXT;
6736
0
                    }
6737
0
                }
6738
6739
0
    if (RAW == 0) {
6740
0
        xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6741
0
                    goto error;
6742
0
    }
6743
0
                if (ctxt->input->id != id) {
6744
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6745
0
                                   "All markup of the conditional section is"
6746
0
                                   " not in the same entity\n");
6747
0
                }
6748
0
                SKIP(3);
6749
0
            } else {
6750
0
                xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6751
0
                xmlHaltParser(ctxt);
6752
0
                goto error;
6753
0
            }
6754
0
        } else if ((depth > 0) &&
6755
0
                   (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6756
0
            depth--;
6757
0
            if (ctxt->input->id != inputIds[depth]) {
6758
0
                xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6759
0
                               "All markup of the conditional section is not"
6760
0
                               " in the same entity\n");
6761
0
            }
6762
0
            SKIP(3);
6763
0
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
6764
0
            xmlParseMarkupDecl(ctxt);
6765
0
        } else {
6766
0
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6767
0
            xmlHaltParser(ctxt);
6768
0
            goto error;
6769
0
        }
6770
6771
0
        if (depth == 0)
6772
0
            break;
6773
6774
0
        SKIP_BLANKS;
6775
0
        SHRINK;
6776
0
        GROW;
6777
0
    }
6778
6779
0
error:
6780
0
    xmlFree(inputIds);
6781
0
}
6782
6783
/**
6784
 * xmlParseMarkupDecl:
6785
 * @ctxt:  an XML parser context
6786
 *
6787
 * DEPRECATED: Internal function, don't use.
6788
 *
6789
 * Parse markup declarations. Always consumes '<!' or '<?'.
6790
 *
6791
 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6792
 *                     NotationDecl | PI | Comment
6793
 *
6794
 * [ VC: Proper Declaration/PE Nesting ]
6795
 * Parameter-entity replacement text must be properly nested with
6796
 * markup declarations. That is to say, if either the first character
6797
 * or the last character of a markup declaration (markupdecl above) is
6798
 * contained in the replacement text for a parameter-entity reference,
6799
 * both must be contained in the same replacement text.
6800
 *
6801
 * [ WFC: PEs in Internal Subset ]
6802
 * In the internal DTD subset, parameter-entity references can occur
6803
 * only where markup declarations can occur, not within markup declarations.
6804
 * (This does not apply to references that occur in external parameter
6805
 * entities or to the external subset.)
6806
 */
6807
void
6808
0
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6809
0
    GROW;
6810
0
    if (CUR == '<') {
6811
0
        if (NXT(1) == '!') {
6812
0
      switch (NXT(2)) {
6813
0
          case 'E':
6814
0
        if (NXT(3) == 'L')
6815
0
      xmlParseElementDecl(ctxt);
6816
0
        else if (NXT(3) == 'N')
6817
0
      xmlParseEntityDecl(ctxt);
6818
0
                    else
6819
0
                        SKIP(2);
6820
0
        break;
6821
0
          case 'A':
6822
0
        xmlParseAttributeListDecl(ctxt);
6823
0
        break;
6824
0
          case 'N':
6825
0
        xmlParseNotationDecl(ctxt);
6826
0
        break;
6827
0
          case '-':
6828
0
        xmlParseComment(ctxt);
6829
0
        break;
6830
0
    default:
6831
        /* there is an error but it will be detected later */
6832
0
                    SKIP(2);
6833
0
        break;
6834
0
      }
6835
0
  } else if (NXT(1) == '?') {
6836
0
      xmlParsePI(ctxt);
6837
0
  }
6838
0
    }
6839
6840
    /*
6841
     * detect requirement to exit there and act accordingly
6842
     * and avoid having instate overridden later on
6843
     */
6844
0
    if (ctxt->instate == XML_PARSER_EOF)
6845
0
        return;
6846
6847
0
    ctxt->instate = XML_PARSER_DTD;
6848
0
}
6849
6850
/**
6851
 * xmlParseTextDecl:
6852
 * @ctxt:  an XML parser context
6853
 *
6854
 * DEPRECATED: Internal function, don't use.
6855
 *
6856
 * parse an XML declaration header for external entities
6857
 *
6858
 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6859
 */
6860
6861
void
6862
0
xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6863
0
    xmlChar *version;
6864
0
    const xmlChar *encoding;
6865
0
    int oldstate;
6866
6867
    /*
6868
     * We know that '<?xml' is here.
6869
     */
6870
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6871
0
  SKIP(5);
6872
0
    } else {
6873
0
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6874
0
  return;
6875
0
    }
6876
6877
    /* Avoid expansion of parameter entities when skipping blanks. */
6878
0
    oldstate = ctxt->instate;
6879
0
    ctxt->instate = XML_PARSER_START;
6880
6881
0
    if (SKIP_BLANKS == 0) {
6882
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6883
0
           "Space needed after '<?xml'\n");
6884
0
    }
6885
6886
    /*
6887
     * We may have the VersionInfo here.
6888
     */
6889
0
    version = xmlParseVersionInfo(ctxt);
6890
0
    if (version == NULL)
6891
0
  version = xmlCharStrdup(XML_DEFAULT_VERSION);
6892
0
    else {
6893
0
  if (SKIP_BLANKS == 0) {
6894
0
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6895
0
               "Space needed here\n");
6896
0
  }
6897
0
    }
6898
0
    ctxt->input->version = version;
6899
6900
    /*
6901
     * We must have the encoding declaration
6902
     */
6903
0
    encoding = xmlParseEncodingDecl(ctxt);
6904
0
    if (ctxt->instate == XML_PARSER_EOF)
6905
0
        return;
6906
0
    if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6907
  /*
6908
   * The XML REC instructs us to stop parsing right here
6909
   */
6910
0
        ctxt->instate = oldstate;
6911
0
        return;
6912
0
    }
6913
0
    if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6914
0
  xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6915
0
           "Missing encoding in text declaration\n");
6916
0
    }
6917
6918
0
    SKIP_BLANKS;
6919
0
    if ((RAW == '?') && (NXT(1) == '>')) {
6920
0
        SKIP(2);
6921
0
    } else if (RAW == '>') {
6922
        /* Deprecated old WD ... */
6923
0
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6924
0
  NEXT;
6925
0
    } else {
6926
0
        int c;
6927
6928
0
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6929
0
        while ((c = CUR) != 0) {
6930
0
            NEXT;
6931
0
            if (c == '>')
6932
0
                break;
6933
0
        }
6934
0
    }
6935
6936
0
    ctxt->instate = oldstate;
6937
0
}
6938
6939
/**
6940
 * xmlParseExternalSubset:
6941
 * @ctxt:  an XML parser context
6942
 * @ExternalID: the external identifier
6943
 * @SystemID: the system identifier (or URL)
6944
 *
6945
 * parse Markup declarations from an external subset
6946
 *
6947
 * [30] extSubset ::= textDecl? extSubsetDecl
6948
 *
6949
 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6950
 */
6951
void
6952
xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6953
0
                       const xmlChar *SystemID) {
6954
0
    xmlDetectSAX2(ctxt);
6955
0
    GROW;
6956
6957
0
    if ((ctxt->encoding == NULL) &&
6958
0
        (ctxt->input->end - ctxt->input->cur >= 4)) {
6959
0
        xmlChar start[4];
6960
0
  xmlCharEncoding enc;
6961
6962
0
  start[0] = RAW;
6963
0
  start[1] = NXT(1);
6964
0
  start[2] = NXT(2);
6965
0
  start[3] = NXT(3);
6966
0
  enc = xmlDetectCharEncoding(start, 4);
6967
0
  if (enc != XML_CHAR_ENCODING_NONE)
6968
0
      xmlSwitchEncoding(ctxt, enc);
6969
0
    }
6970
6971
0
    if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
6972
0
  xmlParseTextDecl(ctxt);
6973
0
  if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6974
      /*
6975
       * The XML REC instructs us to stop parsing right here
6976
       */
6977
0
      xmlHaltParser(ctxt);
6978
0
      return;
6979
0
  }
6980
0
    }
6981
0
    if (ctxt->myDoc == NULL) {
6982
0
        ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
6983
0
  if (ctxt->myDoc == NULL) {
6984
0
      xmlErrMemory(ctxt, "New Doc failed");
6985
0
      return;
6986
0
  }
6987
0
  ctxt->myDoc->properties = XML_DOC_INTERNAL;
6988
0
    }
6989
0
    if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6990
0
        xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6991
6992
0
    ctxt->instate = XML_PARSER_DTD;
6993
0
    ctxt->external = 1;
6994
0
    SKIP_BLANKS;
6995
0
    while ((ctxt->instate != XML_PARSER_EOF) && (RAW != 0)) {
6996
0
  GROW;
6997
0
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6998
0
            xmlParseConditionalSections(ctxt);
6999
0
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7000
0
            xmlParseMarkupDecl(ctxt);
7001
0
        } else {
7002
0
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7003
0
            xmlHaltParser(ctxt);
7004
0
            return;
7005
0
        }
7006
0
        SKIP_BLANKS;
7007
0
        SHRINK;
7008
0
    }
7009
7010
0
    if (RAW != 0) {
7011
0
  xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7012
0
    }
7013
7014
0
}
7015
7016
/**
7017
 * xmlParseReference:
7018
 * @ctxt:  an XML parser context
7019
 *
7020
 * DEPRECATED: Internal function, don't use.
7021
 *
7022
 * parse and handle entity references in content, depending on the SAX
7023
 * interface, this may end-up in a call to character() if this is a
7024
 * CharRef, a predefined entity, if there is no reference() callback.
7025
 * or if the parser was asked to switch to that mode.
7026
 *
7027
 * Always consumes '&'.
7028
 *
7029
 * [67] Reference ::= EntityRef | CharRef
7030
 */
7031
void
7032
19.1k
xmlParseReference(xmlParserCtxtPtr ctxt) {
7033
19.1k
    xmlEntityPtr ent;
7034
19.1k
    xmlChar *val;
7035
19.1k
    int was_checked;
7036
19.1k
    xmlNodePtr list = NULL;
7037
19.1k
    xmlParserErrors ret = XML_ERR_OK;
7038
7039
7040
19.1k
    if (RAW != '&')
7041
0
        return;
7042
7043
    /*
7044
     * Simple case of a CharRef
7045
     */
7046
19.1k
    if (NXT(1) == '#') {
7047
440
  int i = 0;
7048
440
  xmlChar out[16];
7049
440
  int hex = NXT(2);
7050
440
  int value = xmlParseCharRef(ctxt);
7051
7052
440
  if (value == 0)
7053
428
      return;
7054
12
  if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7055
      /*
7056
       * So we are using non-UTF-8 buffers
7057
       * Check that the char fit on 8bits, if not
7058
       * generate a CharRef.
7059
       */
7060
12
      if (value <= 0xFF) {
7061
5
    out[0] = value;
7062
5
    out[1] = 0;
7063
5
    if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7064
5
        (!ctxt->disableSAX))
7065
5
        ctxt->sax->characters(ctxt->userData, out, 1);
7066
7
      } else {
7067
7
    if ((hex == 'x') || (hex == 'X'))
7068
0
        snprintf((char *)out, sizeof(out), "#x%X", value);
7069
7
    else
7070
7
        snprintf((char *)out, sizeof(out), "#%d", value);
7071
7
    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7072
7
        (!ctxt->disableSAX))
7073
7
        ctxt->sax->reference(ctxt->userData, out);
7074
7
      }
7075
12
  } else {
7076
      /*
7077
       * Just encode the value in UTF-8
7078
       */
7079
0
      COPY_BUF(0 ,out, i, value);
7080
0
      out[i] = 0;
7081
0
      if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7082
0
    (!ctxt->disableSAX))
7083
0
    ctxt->sax->characters(ctxt->userData, out, i);
7084
0
  }
7085
12
  return;
7086
440
    }
7087
7088
    /*
7089
     * We are seeing an entity reference
7090
     */
7091
18.7k
    ent = xmlParseEntityRef(ctxt);
7092
18.7k
    if (ent == NULL) return;
7093
0
    if (!ctxt->wellFormed)
7094
0
  return;
7095
0
    was_checked = ent->flags & XML_ENT_PARSED;
7096
7097
    /* special case of predefined entities */
7098
0
    if ((ent->name == NULL) ||
7099
0
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7100
0
  val = ent->content;
7101
0
  if (val == NULL) return;
7102
  /*
7103
   * inline the entity.
7104
   */
7105
0
  if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7106
0
      (!ctxt->disableSAX))
7107
0
      ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7108
0
  return;
7109
0
    }
7110
7111
    /*
7112
     * The first reference to the entity trigger a parsing phase
7113
     * where the ent->children is filled with the result from
7114
     * the parsing.
7115
     * Note: external parsed entities will not be loaded, it is not
7116
     * required for a non-validating parser, unless the parsing option
7117
     * of validating, or substituting entities were given. Doing so is
7118
     * far more secure as the parser will only process data coming from
7119
     * the document entity by default.
7120
     */
7121
0
    if (((ent->flags & XML_ENT_PARSED) == 0) &&
7122
0
        ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7123
0
         (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7124
0
  unsigned long oldsizeentcopy = ctxt->sizeentcopy;
7125
7126
  /*
7127
   * This is a bit hackish but this seems the best
7128
   * way to make sure both SAX and DOM entity support
7129
   * behaves okay.
7130
   */
7131
0
  void *user_data;
7132
0
  if (ctxt->userData == ctxt)
7133
0
      user_data = NULL;
7134
0
  else
7135
0
      user_data = ctxt->userData;
7136
7137
        /* Avoid overflow as much as possible */
7138
0
        ctxt->sizeentcopy = 0;
7139
7140
0
        if (ent->flags & XML_ENT_EXPANDING) {
7141
0
            xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7142
0
            xmlHaltParser(ctxt);
7143
0
            return;
7144
0
        }
7145
7146
0
        ent->flags |= XML_ENT_EXPANDING;
7147
7148
  /*
7149
   * Check that this entity is well formed
7150
   * 4.3.2: An internal general parsed entity is well-formed
7151
   * if its replacement text matches the production labeled
7152
   * content.
7153
   */
7154
0
  if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7155
0
      ctxt->depth++;
7156
0
      ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7157
0
                                                user_data, &list);
7158
0
      ctxt->depth--;
7159
7160
0
  } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7161
0
      ctxt->depth++;
7162
0
      ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7163
0
                                     user_data, ctxt->depth, ent->URI,
7164
0
             ent->ExternalID, &list);
7165
0
      ctxt->depth--;
7166
0
  } else {
7167
0
      ret = XML_ERR_ENTITY_PE_INTERNAL;
7168
0
      xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7169
0
       "invalid entity type found\n", NULL);
7170
0
  }
7171
7172
0
        ent->flags &= ~XML_ENT_EXPANDING;
7173
0
        ent->flags |= XML_ENT_PARSED | XML_ENT_CHECKED;
7174
0
        ent->expandedSize = ctxt->sizeentcopy;
7175
0
  if (ret == XML_ERR_ENTITY_LOOP) {
7176
0
            xmlHaltParser(ctxt);
7177
0
      xmlFreeNodeList(list);
7178
0
      return;
7179
0
  }
7180
0
  if (xmlParserEntityCheck(ctxt, oldsizeentcopy)) {
7181
0
      xmlFreeNodeList(list);
7182
0
      return;
7183
0
  }
7184
7185
0
  if ((ret == XML_ERR_OK) && (list != NULL)) {
7186
0
            ent->children = list;
7187
            /*
7188
             * Prune it directly in the generated document
7189
             * except for single text nodes.
7190
             */
7191
0
            if ((ctxt->replaceEntities == 0) ||
7192
0
                (ctxt->parseMode == XML_PARSE_READER) ||
7193
0
                ((list->type == XML_TEXT_NODE) &&
7194
0
                 (list->next == NULL))) {
7195
0
                ent->owner = 1;
7196
0
                while (list != NULL) {
7197
0
                    list->parent = (xmlNodePtr) ent;
7198
0
                    if (list->doc != ent->doc)
7199
0
                        xmlSetTreeDoc(list, ent->doc);
7200
0
                    if (list->next == NULL)
7201
0
                        ent->last = list;
7202
0
                    list = list->next;
7203
0
                }
7204
0
                list = NULL;
7205
0
            } else {
7206
0
                ent->owner = 0;
7207
0
                while (list != NULL) {
7208
0
                    list->parent = (xmlNodePtr) ctxt->node;
7209
0
                    list->doc = ctxt->myDoc;
7210
0
                    if (list->next == NULL)
7211
0
                        ent->last = list;
7212
0
                    list = list->next;
7213
0
                }
7214
0
                list = ent->children;
7215
#ifdef LIBXML_LEGACY_ENABLED
7216
                if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7217
                    xmlAddEntityReference(ent, list, NULL);
7218
#endif /* LIBXML_LEGACY_ENABLED */
7219
0
            }
7220
0
  } else if ((ret != XML_ERR_OK) &&
7221
0
       (ret != XML_WAR_UNDECLARED_ENTITY)) {
7222
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7223
0
         "Entity '%s' failed to parse\n", ent->name);
7224
0
            if (ent->content != NULL)
7225
0
                ent->content[0] = 0;
7226
0
  } else if (list != NULL) {
7227
0
      xmlFreeNodeList(list);
7228
0
      list = NULL;
7229
0
  }
7230
7231
        /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7232
0
        was_checked = 0;
7233
0
    }
7234
7235
    /*
7236
     * Now that the entity content has been gathered
7237
     * provide it to the application, this can take different forms based
7238
     * on the parsing modes.
7239
     */
7240
0
    if (ent->children == NULL) {
7241
  /*
7242
   * Probably running in SAX mode and the callbacks don't
7243
   * build the entity content. So unless we already went
7244
   * though parsing for first checking go though the entity
7245
   * content to generate callbacks associated to the entity
7246
   */
7247
0
  if (was_checked != 0) {
7248
0
      void *user_data;
7249
      /*
7250
       * This is a bit hackish but this seems the best
7251
       * way to make sure both SAX and DOM entity support
7252
       * behaves okay.
7253
       */
7254
0
      if (ctxt->userData == ctxt)
7255
0
    user_data = NULL;
7256
0
      else
7257
0
    user_data = ctxt->userData;
7258
7259
0
      if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7260
0
    ctxt->depth++;
7261
0
    ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7262
0
           ent->content, user_data, NULL);
7263
0
    ctxt->depth--;
7264
0
      } else if (ent->etype ==
7265
0
           XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7266
0
          unsigned long oldsizeentities = ctxt->sizeentities;
7267
7268
0
    ctxt->depth++;
7269
0
    ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7270
0
         ctxt->sax, user_data, ctxt->depth,
7271
0
         ent->URI, ent->ExternalID, NULL);
7272
0
    ctxt->depth--;
7273
7274
                /* Undo the change to sizeentities */
7275
0
                ctxt->sizeentities = oldsizeentities;
7276
0
      } else {
7277
0
    ret = XML_ERR_ENTITY_PE_INTERNAL;
7278
0
    xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7279
0
           "invalid entity type found\n", NULL);
7280
0
      }
7281
0
      if (ret == XML_ERR_ENTITY_LOOP) {
7282
0
    xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7283
0
    return;
7284
0
      }
7285
0
            if (xmlParserEntityCheck(ctxt, 0))
7286
0
                return;
7287
0
  }
7288
0
  if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7289
0
      (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7290
      /*
7291
       * Entity reference callback comes second, it's somewhat
7292
       * superfluous but a compatibility to historical behaviour
7293
       */
7294
0
      ctxt->sax->reference(ctxt->userData, ent->name);
7295
0
  }
7296
0
  return;
7297
0
    }
7298
7299
    /*
7300
     * We also check for amplification if entities aren't substituted.
7301
     * They might be expanded later.
7302
     */
7303
0
    if ((was_checked != 0) &&
7304
0
        (xmlParserEntityCheck(ctxt, ent->expandedSize)))
7305
0
        return;
7306
7307
    /*
7308
     * If we didn't get any children for the entity being built
7309
     */
7310
0
    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7311
0
  (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7312
  /*
7313
   * Create a node.
7314
   */
7315
0
  ctxt->sax->reference(ctxt->userData, ent->name);
7316
0
  return;
7317
0
    }
7318
7319
0
    if (ctxt->replaceEntities)  {
7320
  /*
7321
   * There is a problem on the handling of _private for entities
7322
   * (bug 155816): Should we copy the content of the field from
7323
   * the entity (possibly overwriting some value set by the user
7324
   * when a copy is created), should we leave it alone, or should
7325
   * we try to take care of different situations?  The problem
7326
   * is exacerbated by the usage of this field by the xmlReader.
7327
   * To fix this bug, we look at _private on the created node
7328
   * and, if it's NULL, we copy in whatever was in the entity.
7329
   * If it's not NULL we leave it alone.  This is somewhat of a
7330
   * hack - maybe we should have further tests to determine
7331
   * what to do.
7332
   */
7333
0
  if (ctxt->node != NULL) {
7334
      /*
7335
       * Seems we are generating the DOM content, do
7336
       * a simple tree copy for all references except the first
7337
       * In the first occurrence list contains the replacement.
7338
       */
7339
0
      if (((list == NULL) && (ent->owner == 0)) ||
7340
0
    (ctxt->parseMode == XML_PARSE_READER)) {
7341
0
    xmlNodePtr nw = NULL, cur, firstChild = NULL;
7342
7343
    /*
7344
     * when operating on a reader, the entities definitions
7345
     * are always owning the entities subtree.
7346
    if (ctxt->parseMode == XML_PARSE_READER)
7347
        ent->owner = 1;
7348
     */
7349
7350
0
    cur = ent->children;
7351
0
    while (cur != NULL) {
7352
0
        nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7353
0
        if (nw != NULL) {
7354
0
      if (nw->_private == NULL)
7355
0
          nw->_private = cur->_private;
7356
0
      if (firstChild == NULL){
7357
0
          firstChild = nw;
7358
0
      }
7359
0
      nw = xmlAddChild(ctxt->node, nw);
7360
0
        }
7361
0
        if (cur == ent->last) {
7362
      /*
7363
       * needed to detect some strange empty
7364
       * node cases in the reader tests
7365
       */
7366
0
      if ((ctxt->parseMode == XML_PARSE_READER) &&
7367
0
          (nw != NULL) &&
7368
0
          (nw->type == XML_ELEMENT_NODE) &&
7369
0
          (nw->children == NULL))
7370
0
          nw->extra = 1;
7371
7372
0
      break;
7373
0
        }
7374
0
        cur = cur->next;
7375
0
    }
7376
#ifdef LIBXML_LEGACY_ENABLED
7377
    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7378
      xmlAddEntityReference(ent, firstChild, nw);
7379
#endif /* LIBXML_LEGACY_ENABLED */
7380
0
      } else if ((list == NULL) || (ctxt->inputNr > 0)) {
7381
0
    xmlNodePtr nw = NULL, cur, next, last,
7382
0
         firstChild = NULL;
7383
7384
    /*
7385
     * Copy the entity child list and make it the new
7386
     * entity child list. The goal is to make sure any
7387
     * ID or REF referenced will be the one from the
7388
     * document content and not the entity copy.
7389
     */
7390
0
    cur = ent->children;
7391
0
    ent->children = NULL;
7392
0
    last = ent->last;
7393
0
    ent->last = NULL;
7394
0
    while (cur != NULL) {
7395
0
        next = cur->next;
7396
0
        cur->next = NULL;
7397
0
        cur->parent = NULL;
7398
0
        nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7399
0
        if (nw != NULL) {
7400
0
      if (nw->_private == NULL)
7401
0
          nw->_private = cur->_private;
7402
0
      if (firstChild == NULL){
7403
0
          firstChild = cur;
7404
0
      }
7405
0
      xmlAddChild((xmlNodePtr) ent, nw);
7406
0
        }
7407
0
        xmlAddChild(ctxt->node, cur);
7408
0
        if (cur == last)
7409
0
      break;
7410
0
        cur = next;
7411
0
    }
7412
0
    if (ent->owner == 0)
7413
0
        ent->owner = 1;
7414
#ifdef LIBXML_LEGACY_ENABLED
7415
    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7416
      xmlAddEntityReference(ent, firstChild, nw);
7417
#endif /* LIBXML_LEGACY_ENABLED */
7418
0
      } else {
7419
0
    const xmlChar *nbktext;
7420
7421
    /*
7422
     * the name change is to avoid coalescing of the
7423
     * node with a possible previous text one which
7424
     * would make ent->children a dangling pointer
7425
     */
7426
0
    nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7427
0
          -1);
7428
0
    if (ent->children->type == XML_TEXT_NODE)
7429
0
        ent->children->name = nbktext;
7430
0
    if ((ent->last != ent->children) &&
7431
0
        (ent->last->type == XML_TEXT_NODE))
7432
0
        ent->last->name = nbktext;
7433
0
    xmlAddChildList(ctxt->node, ent->children);
7434
0
      }
7435
7436
      /*
7437
       * This is to avoid a nasty side effect, see
7438
       * characters() in SAX.c
7439
       */
7440
0
      ctxt->nodemem = 0;
7441
0
      ctxt->nodelen = 0;
7442
0
      return;
7443
0
  }
7444
0
    }
7445
0
}
7446
7447
/**
7448
 * xmlParseEntityRef:
7449
 * @ctxt:  an XML parser context
7450
 *
7451
 * DEPRECATED: Internal function, don't use.
7452
 *
7453
 * Parse an entitiy reference. Always consumes '&'.
7454
 *
7455
 * [68] EntityRef ::= '&' Name ';'
7456
 *
7457
 * [ WFC: Entity Declared ]
7458
 * In a document without any DTD, a document with only an internal DTD
7459
 * subset which contains no parameter entity references, or a document
7460
 * with "standalone='yes'", the Name given in the entity reference
7461
 * must match that in an entity declaration, except that well-formed
7462
 * documents need not declare any of the following entities: amp, lt,
7463
 * gt, apos, quot.  The declaration of a parameter entity must precede
7464
 * any reference to it.  Similarly, the declaration of a general entity
7465
 * must precede any reference to it which appears in a default value in an
7466
 * attribute-list declaration. Note that if entities are declared in the
7467
 * external subset or in external parameter entities, a non-validating
7468
 * processor is not obligated to read and process their declarations;
7469
 * for such documents, the rule that an entity must be declared is a
7470
 * well-formedness constraint only if standalone='yes'.
7471
 *
7472
 * [ WFC: Parsed Entity ]
7473
 * An entity reference must not contain the name of an unparsed entity
7474
 *
7475
 * Returns the xmlEntityPtr if found, or NULL otherwise.
7476
 */
7477
xmlEntityPtr
7478
19.7k
xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7479
19.7k
    const xmlChar *name;
7480
19.7k
    xmlEntityPtr ent = NULL;
7481
7482
19.7k
    GROW;
7483
19.7k
    if (ctxt->instate == XML_PARSER_EOF)
7484
0
        return(NULL);
7485
7486
19.7k
    if (RAW != '&')
7487
0
        return(NULL);
7488
19.7k
    NEXT;
7489
19.7k
    name = xmlParseName(ctxt);
7490
19.7k
    if (name == NULL) {
7491
9.29k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7492
9.29k
           "xmlParseEntityRef: no name\n");
7493
9.29k
        return(NULL);
7494
9.29k
    }
7495
10.4k
    if (RAW != ';') {
7496
10.4k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7497
10.4k
  return(NULL);
7498
10.4k
    }
7499
3
    NEXT;
7500
7501
    /*
7502
     * Predefined entities override any extra definition
7503
     */
7504
3
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7505
3
        ent = xmlGetPredefinedEntity(name);
7506
3
        if (ent != NULL)
7507
3
            return(ent);
7508
3
    }
7509
7510
    /*
7511
     * Ask first SAX for entity resolution, otherwise try the
7512
     * entities which may have stored in the parser context.
7513
     */
7514
0
    if (ctxt->sax != NULL) {
7515
0
  if (ctxt->sax->getEntity != NULL)
7516
0
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7517
0
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7518
0
      (ctxt->options & XML_PARSE_OLDSAX))
7519
0
      ent = xmlGetPredefinedEntity(name);
7520
0
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7521
0
      (ctxt->userData==ctxt)) {
7522
0
      ent = xmlSAX2GetEntity(ctxt, name);
7523
0
  }
7524
0
    }
7525
0
    if (ctxt->instate == XML_PARSER_EOF)
7526
0
  return(NULL);
7527
    /*
7528
     * [ WFC: Entity Declared ]
7529
     * In a document without any DTD, a document with only an
7530
     * internal DTD subset which contains no parameter entity
7531
     * references, or a document with "standalone='yes'", the
7532
     * Name given in the entity reference must match that in an
7533
     * entity declaration, except that well-formed documents
7534
     * need not declare any of the following entities: amp, lt,
7535
     * gt, apos, quot.
7536
     * The declaration of a parameter entity must precede any
7537
     * reference to it.
7538
     * Similarly, the declaration of a general entity must
7539
     * precede any reference to it which appears in a default
7540
     * value in an attribute-list declaration. Note that if
7541
     * entities are declared in the external subset or in
7542
     * external parameter entities, a non-validating processor
7543
     * is not obligated to read and process their declarations;
7544
     * for such documents, the rule that an entity must be
7545
     * declared is a well-formedness constraint only if
7546
     * standalone='yes'.
7547
     */
7548
0
    if (ent == NULL) {
7549
0
  if ((ctxt->standalone == 1) ||
7550
0
      ((ctxt->hasExternalSubset == 0) &&
7551
0
       (ctxt->hasPErefs == 0))) {
7552
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7553
0
         "Entity '%s' not defined\n", name);
7554
0
  } else {
7555
0
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7556
0
         "Entity '%s' not defined\n", name);
7557
0
      if ((ctxt->inSubset == 0) &&
7558
0
    (ctxt->sax != NULL) &&
7559
0
    (ctxt->sax->reference != NULL)) {
7560
0
    ctxt->sax->reference(ctxt->userData, name);
7561
0
      }
7562
0
  }
7563
0
  ctxt->valid = 0;
7564
0
    }
7565
7566
    /*
7567
     * [ WFC: Parsed Entity ]
7568
     * An entity reference must not contain the name of an
7569
     * unparsed entity
7570
     */
7571
0
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7572
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7573
0
     "Entity reference to unparsed entity %s\n", name);
7574
0
    }
7575
7576
    /*
7577
     * [ WFC: No External Entity References ]
7578
     * Attribute values cannot contain direct or indirect
7579
     * entity references to external entities.
7580
     */
7581
0
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7582
0
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7583
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7584
0
       "Attribute references external entity '%s'\n", name);
7585
0
    }
7586
    /*
7587
     * [ WFC: No < in Attribute Values ]
7588
     * The replacement text of any entity referred to directly or
7589
     * indirectly in an attribute value (other than "&lt;") must
7590
     * not contain a <.
7591
     */
7592
0
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7593
0
       (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7594
0
  if ((ent->flags & XML_ENT_CHECKED_LT) == 0) {
7595
0
            if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7596
0
                ent->flags |= XML_ENT_CONTAINS_LT;
7597
0
            ent->flags |= XML_ENT_CHECKED_LT;
7598
0
        }
7599
0
        if (ent->flags & XML_ENT_CONTAINS_LT)
7600
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7601
0
                    "'<' in entity '%s' is not allowed in attributes "
7602
0
                    "values\n", name);
7603
0
    }
7604
7605
    /*
7606
     * Internal check, no parameter entities here ...
7607
     */
7608
0
    else {
7609
0
  switch (ent->etype) {
7610
0
      case XML_INTERNAL_PARAMETER_ENTITY:
7611
0
      case XML_EXTERNAL_PARAMETER_ENTITY:
7612
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7613
0
       "Attempt to reference the parameter entity '%s'\n",
7614
0
            name);
7615
0
      break;
7616
0
      default:
7617
0
      break;
7618
0
  }
7619
0
    }
7620
7621
    /*
7622
     * [ WFC: No Recursion ]
7623
     * A parsed entity must not contain a recursive reference
7624
     * to itself, either directly or indirectly.
7625
     * Done somewhere else
7626
     */
7627
0
    return(ent);
7628
0
}
7629
7630
/**
7631
 * xmlParseStringEntityRef:
7632
 * @ctxt:  an XML parser context
7633
 * @str:  a pointer to an index in the string
7634
 *
7635
 * parse ENTITY references declarations, but this version parses it from
7636
 * a string value.
7637
 *
7638
 * [68] EntityRef ::= '&' Name ';'
7639
 *
7640
 * [ WFC: Entity Declared ]
7641
 * In a document without any DTD, a document with only an internal DTD
7642
 * subset which contains no parameter entity references, or a document
7643
 * with "standalone='yes'", the Name given in the entity reference
7644
 * must match that in an entity declaration, except that well-formed
7645
 * documents need not declare any of the following entities: amp, lt,
7646
 * gt, apos, quot.  The declaration of a parameter entity must precede
7647
 * any reference to it.  Similarly, the declaration of a general entity
7648
 * must precede any reference to it which appears in a default value in an
7649
 * attribute-list declaration. Note that if entities are declared in the
7650
 * external subset or in external parameter entities, a non-validating
7651
 * processor is not obligated to read and process their declarations;
7652
 * for such documents, the rule that an entity must be declared is a
7653
 * well-formedness constraint only if standalone='yes'.
7654
 *
7655
 * [ WFC: Parsed Entity ]
7656
 * An entity reference must not contain the name of an unparsed entity
7657
 *
7658
 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7659
 * is updated to the current location in the string.
7660
 */
7661
static xmlEntityPtr
7662
0
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7663
0
    xmlChar *name;
7664
0
    const xmlChar *ptr;
7665
0
    xmlChar cur;
7666
0
    xmlEntityPtr ent = NULL;
7667
7668
0
    if ((str == NULL) || (*str == NULL))
7669
0
        return(NULL);
7670
0
    ptr = *str;
7671
0
    cur = *ptr;
7672
0
    if (cur != '&')
7673
0
  return(NULL);
7674
7675
0
    ptr++;
7676
0
    name = xmlParseStringName(ctxt, &ptr);
7677
0
    if (name == NULL) {
7678
0
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7679
0
           "xmlParseStringEntityRef: no name\n");
7680
0
  *str = ptr;
7681
0
  return(NULL);
7682
0
    }
7683
0
    if (*ptr != ';') {
7684
0
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7685
0
        xmlFree(name);
7686
0
  *str = ptr;
7687
0
  return(NULL);
7688
0
    }
7689
0
    ptr++;
7690
7691
7692
    /*
7693
     * Predefined entities override any extra definition
7694
     */
7695
0
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7696
0
        ent = xmlGetPredefinedEntity(name);
7697
0
        if (ent != NULL) {
7698
0
            xmlFree(name);
7699
0
            *str = ptr;
7700
0
            return(ent);
7701
0
        }
7702
0
    }
7703
7704
    /*
7705
     * Ask first SAX for entity resolution, otherwise try the
7706
     * entities which may have stored in the parser context.
7707
     */
7708
0
    if (ctxt->sax != NULL) {
7709
0
  if (ctxt->sax->getEntity != NULL)
7710
0
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7711
0
  if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7712
0
      ent = xmlGetPredefinedEntity(name);
7713
0
  if ((ent == NULL) && (ctxt->userData==ctxt)) {
7714
0
      ent = xmlSAX2GetEntity(ctxt, name);
7715
0
  }
7716
0
    }
7717
0
    if (ctxt->instate == XML_PARSER_EOF) {
7718
0
  xmlFree(name);
7719
0
  return(NULL);
7720
0
    }
7721
7722
    /*
7723
     * [ WFC: Entity Declared ]
7724
     * In a document without any DTD, a document with only an
7725
     * internal DTD subset which contains no parameter entity
7726
     * references, or a document with "standalone='yes'", the
7727
     * Name given in the entity reference must match that in an
7728
     * entity declaration, except that well-formed documents
7729
     * need not declare any of the following entities: amp, lt,
7730
     * gt, apos, quot.
7731
     * The declaration of a parameter entity must precede any
7732
     * reference to it.
7733
     * Similarly, the declaration of a general entity must
7734
     * precede any reference to it which appears in a default
7735
     * value in an attribute-list declaration. Note that if
7736
     * entities are declared in the external subset or in
7737
     * external parameter entities, a non-validating processor
7738
     * is not obligated to read and process their declarations;
7739
     * for such documents, the rule that an entity must be
7740
     * declared is a well-formedness constraint only if
7741
     * standalone='yes'.
7742
     */
7743
0
    if (ent == NULL) {
7744
0
  if ((ctxt->standalone == 1) ||
7745
0
      ((ctxt->hasExternalSubset == 0) &&
7746
0
       (ctxt->hasPErefs == 0))) {
7747
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7748
0
         "Entity '%s' not defined\n", name);
7749
0
  } else {
7750
0
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7751
0
        "Entity '%s' not defined\n",
7752
0
        name);
7753
0
  }
7754
  /* TODO ? check regressions ctxt->valid = 0; */
7755
0
    }
7756
7757
    /*
7758
     * [ WFC: Parsed Entity ]
7759
     * An entity reference must not contain the name of an
7760
     * unparsed entity
7761
     */
7762
0
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7763
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7764
0
     "Entity reference to unparsed entity %s\n", name);
7765
0
    }
7766
7767
    /*
7768
     * [ WFC: No External Entity References ]
7769
     * Attribute values cannot contain direct or indirect
7770
     * entity references to external entities.
7771
     */
7772
0
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7773
0
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7774
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7775
0
   "Attribute references external entity '%s'\n", name);
7776
0
    }
7777
    /*
7778
     * [ WFC: No < in Attribute Values ]
7779
     * The replacement text of any entity referred to directly or
7780
     * indirectly in an attribute value (other than "&lt;") must
7781
     * not contain a <.
7782
     */
7783
0
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7784
0
       (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7785
0
  if ((ent->flags & XML_ENT_CHECKED_LT) == 0) {
7786
0
            if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7787
0
                ent->flags |= XML_ENT_CONTAINS_LT;
7788
0
            ent->flags |= XML_ENT_CHECKED_LT;
7789
0
        }
7790
0
        if (ent->flags & XML_ENT_CONTAINS_LT)
7791
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7792
0
                    "'<' in entity '%s' is not allowed in attributes "
7793
0
                    "values\n", name);
7794
0
    }
7795
7796
    /*
7797
     * Internal check, no parameter entities here ...
7798
     */
7799
0
    else {
7800
0
  switch (ent->etype) {
7801
0
      case XML_INTERNAL_PARAMETER_ENTITY:
7802
0
      case XML_EXTERNAL_PARAMETER_ENTITY:
7803
0
    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7804
0
       "Attempt to reference the parameter entity '%s'\n",
7805
0
          name);
7806
0
      break;
7807
0
      default:
7808
0
      break;
7809
0
  }
7810
0
    }
7811
7812
    /*
7813
     * [ WFC: No Recursion ]
7814
     * A parsed entity must not contain a recursive reference
7815
     * to itself, either directly or indirectly.
7816
     * Done somewhere else
7817
     */
7818
7819
0
    xmlFree(name);
7820
0
    *str = ptr;
7821
0
    return(ent);
7822
0
}
7823
7824
/**
7825
 * xmlParsePEReference:
7826
 * @ctxt:  an XML parser context
7827
 *
7828
 * DEPRECATED: Internal function, don't use.
7829
 *
7830
 * Parse a parameter entity reference. Always consumes '%'.
7831
 *
7832
 * The entity content is handled directly by pushing it's content as
7833
 * a new input stream.
7834
 *
7835
 * [69] PEReference ::= '%' Name ';'
7836
 *
7837
 * [ WFC: No Recursion ]
7838
 * A parsed entity must not contain a recursive
7839
 * reference to itself, either directly or indirectly.
7840
 *
7841
 * [ WFC: Entity Declared ]
7842
 * In a document without any DTD, a document with only an internal DTD
7843
 * subset which contains no parameter entity references, or a document
7844
 * with "standalone='yes'", ...  ... The declaration of a parameter
7845
 * entity must precede any reference to it...
7846
 *
7847
 * [ VC: Entity Declared ]
7848
 * In a document with an external subset or external parameter entities
7849
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7850
 * must precede any reference to it...
7851
 *
7852
 * [ WFC: In DTD ]
7853
 * Parameter-entity references may only appear in the DTD.
7854
 * NOTE: misleading but this is handled.
7855
 */
7856
void
7857
xmlParsePEReference(xmlParserCtxtPtr ctxt)
7858
0
{
7859
0
    const xmlChar *name;
7860
0
    xmlEntityPtr entity = NULL;
7861
0
    xmlParserInputPtr input;
7862
7863
0
    if (RAW != '%')
7864
0
        return;
7865
0
    NEXT;
7866
0
    name = xmlParseName(ctxt);
7867
0
    if (name == NULL) {
7868
0
  xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
7869
0
  return;
7870
0
    }
7871
0
    if (xmlParserDebugEntities)
7872
0
  xmlGenericError(xmlGenericErrorContext,
7873
0
    "PEReference: %s\n", name);
7874
0
    if (RAW != ';') {
7875
0
  xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
7876
0
        return;
7877
0
    }
7878
7879
0
    NEXT;
7880
7881
    /*
7882
     * Request the entity from SAX
7883
     */
7884
0
    if ((ctxt->sax != NULL) &&
7885
0
  (ctxt->sax->getParameterEntity != NULL))
7886
0
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7887
0
    if (ctxt->instate == XML_PARSER_EOF)
7888
0
  return;
7889
0
    if (entity == NULL) {
7890
  /*
7891
   * [ WFC: Entity Declared ]
7892
   * In a document without any DTD, a document with only an
7893
   * internal DTD subset which contains no parameter entity
7894
   * references, or a document with "standalone='yes'", ...
7895
   * ... The declaration of a parameter entity must precede
7896
   * any reference to it...
7897
   */
7898
0
  if ((ctxt->standalone == 1) ||
7899
0
      ((ctxt->hasExternalSubset == 0) &&
7900
0
       (ctxt->hasPErefs == 0))) {
7901
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7902
0
            "PEReference: %%%s; not found\n",
7903
0
            name);
7904
0
  } else {
7905
      /*
7906
       * [ VC: Entity Declared ]
7907
       * In a document with an external subset or external
7908
       * parameter entities with "standalone='no'", ...
7909
       * ... The declaration of a parameter entity must
7910
       * precede any reference to it...
7911
       */
7912
0
            if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
7913
0
                xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
7914
0
                                 "PEReference: %%%s; not found\n",
7915
0
                                 name, NULL);
7916
0
            } else
7917
0
                xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7918
0
                              "PEReference: %%%s; not found\n",
7919
0
                              name, NULL);
7920
0
            ctxt->valid = 0;
7921
0
  }
7922
0
    } else {
7923
  /*
7924
   * Internal checking in case the entity quest barfed
7925
   */
7926
0
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7927
0
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7928
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7929
0
      "Internal: %%%s; is not a parameter entity\n",
7930
0
        name, NULL);
7931
0
  } else {
7932
0
            xmlChar start[4];
7933
0
            xmlCharEncoding enc;
7934
0
            unsigned long parentConsumed;
7935
0
            xmlEntityPtr oldEnt;
7936
7937
0
      if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7938
0
          ((ctxt->options & XML_PARSE_NOENT) == 0) &&
7939
0
    ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
7940
0
    ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
7941
0
    ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
7942
0
    (ctxt->replaceEntities == 0) &&
7943
0
    (ctxt->validate == 0))
7944
0
    return;
7945
7946
0
            if (entity->flags & XML_ENT_EXPANDING) {
7947
0
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7948
0
                xmlHaltParser(ctxt);
7949
0
                return;
7950
0
            }
7951
7952
            /* Must be computed from old input before pushing new input. */
7953
0
            parentConsumed = ctxt->input->parentConsumed;
7954
0
            oldEnt = ctxt->input->entity;
7955
0
            if ((oldEnt == NULL) ||
7956
0
                ((oldEnt->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7957
0
                 ((oldEnt->flags & XML_ENT_PARSED) == 0))) {
7958
0
                xmlSaturatedAdd(&parentConsumed, ctxt->input->consumed);
7959
0
                xmlSaturatedAddSizeT(&parentConsumed,
7960
0
                                     ctxt->input->cur - ctxt->input->base);
7961
0
            }
7962
7963
0
      input = xmlNewEntityInputStream(ctxt, entity);
7964
0
      if (xmlPushInput(ctxt, input) < 0) {
7965
0
                xmlFreeInputStream(input);
7966
0
    return;
7967
0
            }
7968
7969
0
            entity->flags |= XML_ENT_EXPANDING;
7970
7971
0
            input->parentConsumed = parentConsumed;
7972
7973
0
      if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
7974
                /*
7975
                 * Get the 4 first bytes and decode the charset
7976
                 * if enc != XML_CHAR_ENCODING_NONE
7977
                 * plug some encoding conversion routines.
7978
                 * Note that, since we may have some non-UTF8
7979
                 * encoding (like UTF16, bug 135229), the 'length'
7980
                 * is not known, but we can calculate based upon
7981
                 * the amount of data in the buffer.
7982
                 */
7983
0
                GROW
7984
0
                if (ctxt->instate == XML_PARSER_EOF)
7985
0
                    return;
7986
0
                if ((ctxt->input->end - ctxt->input->cur)>=4) {
7987
0
                    start[0] = RAW;
7988
0
                    start[1] = NXT(1);
7989
0
                    start[2] = NXT(2);
7990
0
                    start[3] = NXT(3);
7991
0
                    enc = xmlDetectCharEncoding(start, 4);
7992
0
                    if (enc != XML_CHAR_ENCODING_NONE) {
7993
0
                        xmlSwitchEncoding(ctxt, enc);
7994
0
                    }
7995
0
                }
7996
7997
0
                if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7998
0
                    (IS_BLANK_CH(NXT(5)))) {
7999
0
                    xmlParseTextDecl(ctxt);
8000
0
                }
8001
0
            }
8002
0
  }
8003
0
    }
8004
0
    ctxt->hasPErefs = 1;
8005
0
}
8006
8007
/**
8008
 * xmlLoadEntityContent:
8009
 * @ctxt:  an XML parser context
8010
 * @entity: an unloaded system entity
8011
 *
8012
 * Load the original content of the given system entity from the
8013
 * ExternalID/SystemID given. This is to be used for Included in Literal
8014
 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8015
 *
8016
 * Returns 0 in case of success and -1 in case of failure
8017
 */
8018
static int
8019
0
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8020
0
    xmlParserInputPtr input;
8021
0
    xmlBufferPtr buf;
8022
0
    int l, c;
8023
8024
0
    if ((ctxt == NULL) || (entity == NULL) ||
8025
0
        ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8026
0
   (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8027
0
  (entity->content != NULL)) {
8028
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8029
0
              "xmlLoadEntityContent parameter error");
8030
0
        return(-1);
8031
0
    }
8032
8033
0
    if (xmlParserDebugEntities)
8034
0
  xmlGenericError(xmlGenericErrorContext,
8035
0
    "Reading %s entity content input\n", entity->name);
8036
8037
0
    buf = xmlBufferCreate();
8038
0
    if (buf == NULL) {
8039
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8040
0
              "xmlLoadEntityContent parameter error");
8041
0
        return(-1);
8042
0
    }
8043
0
    xmlBufferSetAllocationScheme(buf, XML_BUFFER_ALLOC_DOUBLEIT);
8044
8045
0
    input = xmlNewEntityInputStream(ctxt, entity);
8046
0
    if (input == NULL) {
8047
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8048
0
              "xmlLoadEntityContent input error");
8049
0
  xmlBufferFree(buf);
8050
0
        return(-1);
8051
0
    }
8052
8053
    /*
8054
     * Push the entity as the current input, read char by char
8055
     * saving to the buffer until the end of the entity or an error
8056
     */
8057
0
    if (xmlPushInput(ctxt, input) < 0) {
8058
0
        xmlBufferFree(buf);
8059
0
  xmlFreeInputStream(input);
8060
0
  return(-1);
8061
0
    }
8062
8063
0
    GROW;
8064
0
    c = CUR_CHAR(l);
8065
0
    while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8066
0
           (IS_CHAR(c))) {
8067
0
        xmlBufferAdd(buf, ctxt->input->cur, l);
8068
0
  NEXTL(l);
8069
0
  c = CUR_CHAR(l);
8070
0
    }
8071
0
    if (ctxt->instate == XML_PARSER_EOF) {
8072
0
  xmlBufferFree(buf);
8073
0
  return(-1);
8074
0
    }
8075
8076
0
    if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8077
0
        xmlSaturatedAdd(&ctxt->sizeentities, ctxt->input->consumed);
8078
0
        xmlPopInput(ctxt);
8079
0
    } else if (!IS_CHAR(c)) {
8080
0
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8081
0
                          "xmlLoadEntityContent: invalid char value %d\n",
8082
0
                    c);
8083
0
  xmlBufferFree(buf);
8084
0
  return(-1);
8085
0
    }
8086
0
    entity->content = buf->content;
8087
0
    entity->length = buf->use;
8088
0
    buf->content = NULL;
8089
0
    xmlBufferFree(buf);
8090
8091
0
    return(0);
8092
0
}
8093
8094
/**
8095
 * xmlParseStringPEReference:
8096
 * @ctxt:  an XML parser context
8097
 * @str:  a pointer to an index in the string
8098
 *
8099
 * parse PEReference declarations
8100
 *
8101
 * [69] PEReference ::= '%' Name ';'
8102
 *
8103
 * [ WFC: No Recursion ]
8104
 * A parsed entity must not contain a recursive
8105
 * reference to itself, either directly or indirectly.
8106
 *
8107
 * [ WFC: Entity Declared ]
8108
 * In a document without any DTD, a document with only an internal DTD
8109
 * subset which contains no parameter entity references, or a document
8110
 * with "standalone='yes'", ...  ... The declaration of a parameter
8111
 * entity must precede any reference to it...
8112
 *
8113
 * [ VC: Entity Declared ]
8114
 * In a document with an external subset or external parameter entities
8115
 * with "standalone='no'", ...  ... The declaration of a parameter entity
8116
 * must precede any reference to it...
8117
 *
8118
 * [ WFC: In DTD ]
8119
 * Parameter-entity references may only appear in the DTD.
8120
 * NOTE: misleading but this is handled.
8121
 *
8122
 * Returns the string of the entity content.
8123
 *         str is updated to the current value of the index
8124
 */
8125
static xmlEntityPtr
8126
0
xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8127
0
    const xmlChar *ptr;
8128
0
    xmlChar cur;
8129
0
    xmlChar *name;
8130
0
    xmlEntityPtr entity = NULL;
8131
8132
0
    if ((str == NULL) || (*str == NULL)) return(NULL);
8133
0
    ptr = *str;
8134
0
    cur = *ptr;
8135
0
    if (cur != '%')
8136
0
        return(NULL);
8137
0
    ptr++;
8138
0
    name = xmlParseStringName(ctxt, &ptr);
8139
0
    if (name == NULL) {
8140
0
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8141
0
           "xmlParseStringPEReference: no name\n");
8142
0
  *str = ptr;
8143
0
  return(NULL);
8144
0
    }
8145
0
    cur = *ptr;
8146
0
    if (cur != ';') {
8147
0
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8148
0
  xmlFree(name);
8149
0
  *str = ptr;
8150
0
  return(NULL);
8151
0
    }
8152
0
    ptr++;
8153
8154
    /*
8155
     * Request the entity from SAX
8156
     */
8157
0
    if ((ctxt->sax != NULL) &&
8158
0
  (ctxt->sax->getParameterEntity != NULL))
8159
0
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8160
0
    if (ctxt->instate == XML_PARSER_EOF) {
8161
0
  xmlFree(name);
8162
0
  *str = ptr;
8163
0
  return(NULL);
8164
0
    }
8165
0
    if (entity == NULL) {
8166
  /*
8167
   * [ WFC: Entity Declared ]
8168
   * In a document without any DTD, a document with only an
8169
   * internal DTD subset which contains no parameter entity
8170
   * references, or a document with "standalone='yes'", ...
8171
   * ... The declaration of a parameter entity must precede
8172
   * any reference to it...
8173
   */
8174
0
  if ((ctxt->standalone == 1) ||
8175
0
      ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8176
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8177
0
     "PEReference: %%%s; not found\n", name);
8178
0
  } else {
8179
      /*
8180
       * [ VC: Entity Declared ]
8181
       * In a document with an external subset or external
8182
       * parameter entities with "standalone='no'", ...
8183
       * ... The declaration of a parameter entity must
8184
       * precede any reference to it...
8185
       */
8186
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8187
0
        "PEReference: %%%s; not found\n",
8188
0
        name, NULL);
8189
0
      ctxt->valid = 0;
8190
0
  }
8191
0
    } else {
8192
  /*
8193
   * Internal checking in case the entity quest barfed
8194
   */
8195
0
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8196
0
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8197
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8198
0
        "%%%s; is not a parameter entity\n",
8199
0
        name, NULL);
8200
0
  }
8201
0
    }
8202
0
    ctxt->hasPErefs = 1;
8203
0
    xmlFree(name);
8204
0
    *str = ptr;
8205
0
    return(entity);
8206
0
}
8207
8208
/**
8209
 * xmlParseDocTypeDecl:
8210
 * @ctxt:  an XML parser context
8211
 *
8212
 * DEPRECATED: Internal function, don't use.
8213
 *
8214
 * parse a DOCTYPE declaration
8215
 *
8216
 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8217
 *                      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8218
 *
8219
 * [ VC: Root Element Type ]
8220
 * The Name in the document type declaration must match the element
8221
 * type of the root element.
8222
 */
8223
8224
void
8225
0
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8226
0
    const xmlChar *name = NULL;
8227
0
    xmlChar *ExternalID = NULL;
8228
0
    xmlChar *URI = NULL;
8229
8230
    /*
8231
     * We know that '<!DOCTYPE' has been detected.
8232
     */
8233
0
    SKIP(9);
8234
8235
0
    SKIP_BLANKS;
8236
8237
    /*
8238
     * Parse the DOCTYPE name.
8239
     */
8240
0
    name = xmlParseName(ctxt);
8241
0
    if (name == NULL) {
8242
0
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8243
0
           "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8244
0
    }
8245
0
    ctxt->intSubName = name;
8246
8247
0
    SKIP_BLANKS;
8248
8249
    /*
8250
     * Check for SystemID and ExternalID
8251
     */
8252
0
    URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8253
8254
0
    if ((URI != NULL) || (ExternalID != NULL)) {
8255
0
        ctxt->hasExternalSubset = 1;
8256
0
    }
8257
0
    ctxt->extSubURI = URI;
8258
0
    ctxt->extSubSystem = ExternalID;
8259
8260
0
    SKIP_BLANKS;
8261
8262
    /*
8263
     * Create and update the internal subset.
8264
     */
8265
0
    if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8266
0
  (!ctxt->disableSAX))
8267
0
  ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8268
0
    if (ctxt->instate == XML_PARSER_EOF)
8269
0
  return;
8270
8271
    /*
8272
     * Is there any internal subset declarations ?
8273
     * they are handled separately in xmlParseInternalSubset()
8274
     */
8275
0
    if (RAW == '[')
8276
0
  return;
8277
8278
    /*
8279
     * We should be at the end of the DOCTYPE declaration.
8280
     */
8281
0
    if (RAW != '>') {
8282
0
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8283
0
    }
8284
0
    NEXT;
8285
0
}
8286
8287
/**
8288
 * xmlParseInternalSubset:
8289
 * @ctxt:  an XML parser context
8290
 *
8291
 * parse the internal subset declaration
8292
 *
8293
 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8294
 */
8295
8296
static void
8297
0
xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8298
    /*
8299
     * Is there any DTD definition ?
8300
     */
8301
0
    if (RAW == '[') {
8302
0
        int baseInputNr = ctxt->inputNr;
8303
0
        ctxt->instate = XML_PARSER_DTD;
8304
0
        NEXT;
8305
  /*
8306
   * Parse the succession of Markup declarations and
8307
   * PEReferences.
8308
   * Subsequence (markupdecl | PEReference | S)*
8309
   */
8310
0
  SKIP_BLANKS;
8311
0
  while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) &&
8312
0
               (ctxt->instate != XML_PARSER_EOF)) {
8313
8314
            /*
8315
             * Conditional sections are allowed from external entities included
8316
             * by PE References in the internal subset.
8317
             */
8318
0
            if ((ctxt->inputNr > 1) && (ctxt->input->filename != NULL) &&
8319
0
                (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8320
0
                xmlParseConditionalSections(ctxt);
8321
0
            } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
8322
0
          xmlParseMarkupDecl(ctxt);
8323
0
            } else if (RAW == '%') {
8324
0
          xmlParsePEReference(ctxt);
8325
0
            } else {
8326
0
    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8327
0
                        "xmlParseInternalSubset: error detected in"
8328
0
                        " Markup declaration\n");
8329
0
                xmlHaltParser(ctxt);
8330
0
                return;
8331
0
            }
8332
0
      SKIP_BLANKS;
8333
0
            SHRINK;
8334
0
            GROW;
8335
0
  }
8336
0
  if (RAW == ']') {
8337
0
      NEXT;
8338
0
      SKIP_BLANKS;
8339
0
  }
8340
0
    }
8341
8342
    /*
8343
     * We should be at the end of the DOCTYPE declaration.
8344
     */
8345
0
    if (RAW != '>') {
8346
0
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8347
0
  return;
8348
0
    }
8349
0
    NEXT;
8350
0
}
8351
8352
#ifdef LIBXML_SAX1_ENABLED
8353
/**
8354
 * xmlParseAttribute:
8355
 * @ctxt:  an XML parser context
8356
 * @value:  a xmlChar ** used to store the value of the attribute
8357
 *
8358
 * DEPRECATED: Internal function, don't use.
8359
 *
8360
 * parse an attribute
8361
 *
8362
 * [41] Attribute ::= Name Eq AttValue
8363
 *
8364
 * [ WFC: No External Entity References ]
8365
 * Attribute values cannot contain direct or indirect entity references
8366
 * to external entities.
8367
 *
8368
 * [ WFC: No < in Attribute Values ]
8369
 * The replacement text of any entity referred to directly or indirectly in
8370
 * an attribute value (other than "&lt;") must not contain a <.
8371
 *
8372
 * [ VC: Attribute Value Type ]
8373
 * The attribute must have been declared; the value must be of the type
8374
 * declared for it.
8375
 *
8376
 * [25] Eq ::= S? '=' S?
8377
 *
8378
 * With namespace:
8379
 *
8380
 * [NS 11] Attribute ::= QName Eq AttValue
8381
 *
8382
 * Also the case QName == xmlns:??? is handled independently as a namespace
8383
 * definition.
8384
 *
8385
 * Returns the attribute name, and the value in *value.
8386
 */
8387
8388
const xmlChar *
8389
xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8390
    const xmlChar *name;
8391
    xmlChar *val;
8392
8393
    *value = NULL;
8394
    GROW;
8395
    name = xmlParseName(ctxt);
8396
    if (name == NULL) {
8397
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8398
                 "error parsing attribute name\n");
8399
        return(NULL);
8400
    }
8401
8402
    /*
8403
     * read the value
8404
     */
8405
    SKIP_BLANKS;
8406
    if (RAW == '=') {
8407
        NEXT;
8408
  SKIP_BLANKS;
8409
  val = xmlParseAttValue(ctxt);
8410
  ctxt->instate = XML_PARSER_CONTENT;
8411
    } else {
8412
  xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8413
         "Specification mandates value for attribute %s\n", name);
8414
  return(name);
8415
    }
8416
8417
    /*
8418
     * Check that xml:lang conforms to the specification
8419
     * No more registered as an error, just generate a warning now
8420
     * since this was deprecated in XML second edition
8421
     */
8422
    if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8423
  if (!xmlCheckLanguageID(val)) {
8424
      xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8425
              "Malformed value for xml:lang : %s\n",
8426
        val, NULL);
8427
  }
8428
    }
8429
8430
    /*
8431
     * Check that xml:space conforms to the specification
8432
     */
8433
    if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8434
  if (xmlStrEqual(val, BAD_CAST "default"))
8435
      *(ctxt->space) = 0;
8436
  else if (xmlStrEqual(val, BAD_CAST "preserve"))
8437
      *(ctxt->space) = 1;
8438
  else {
8439
    xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8440
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8441
                                 val, NULL);
8442
  }
8443
    }
8444
8445
    *value = val;
8446
    return(name);
8447
}
8448
8449
/**
8450
 * xmlParseStartTag:
8451
 * @ctxt:  an XML parser context
8452
 *
8453
 * DEPRECATED: Internal function, don't use.
8454
 *
8455
 * Parse a start tag. Always consumes '<'.
8456
 *
8457
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8458
 *
8459
 * [ WFC: Unique Att Spec ]
8460
 * No attribute name may appear more than once in the same start-tag or
8461
 * empty-element tag.
8462
 *
8463
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8464
 *
8465
 * [ WFC: Unique Att Spec ]
8466
 * No attribute name may appear more than once in the same start-tag or
8467
 * empty-element tag.
8468
 *
8469
 * With namespace:
8470
 *
8471
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8472
 *
8473
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8474
 *
8475
 * Returns the element name parsed
8476
 */
8477
8478
const xmlChar *
8479
xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8480
    const xmlChar *name;
8481
    const xmlChar *attname;
8482
    xmlChar *attvalue;
8483
    const xmlChar **atts = ctxt->atts;
8484
    int nbatts = 0;
8485
    int maxatts = ctxt->maxatts;
8486
    int i;
8487
8488
    if (RAW != '<') return(NULL);
8489
    NEXT1;
8490
8491
    name = xmlParseName(ctxt);
8492
    if (name == NULL) {
8493
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8494
       "xmlParseStartTag: invalid element name\n");
8495
        return(NULL);
8496
    }
8497
8498
    /*
8499
     * Now parse the attributes, it ends up with the ending
8500
     *
8501
     * (S Attribute)* S?
8502
     */
8503
    SKIP_BLANKS;
8504
    GROW;
8505
8506
    while (((RAW != '>') &&
8507
     ((RAW != '/') || (NXT(1) != '>')) &&
8508
     (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8509
  attname = xmlParseAttribute(ctxt, &attvalue);
8510
        if (attname == NULL) {
8511
      xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8512
         "xmlParseStartTag: problem parsing attributes\n");
8513
      break;
8514
  }
8515
        if (attvalue != NULL) {
8516
      /*
8517
       * [ WFC: Unique Att Spec ]
8518
       * No attribute name may appear more than once in the same
8519
       * start-tag or empty-element tag.
8520
       */
8521
      for (i = 0; i < nbatts;i += 2) {
8522
          if (xmlStrEqual(atts[i], attname)) {
8523
        xmlErrAttributeDup(ctxt, NULL, attname);
8524
        xmlFree(attvalue);
8525
        goto failed;
8526
    }
8527
      }
8528
      /*
8529
       * Add the pair to atts
8530
       */
8531
      if (atts == NULL) {
8532
          maxatts = 22; /* allow for 10 attrs by default */
8533
          atts = (const xmlChar **)
8534
           xmlMalloc(maxatts * sizeof(xmlChar *));
8535
    if (atts == NULL) {
8536
        xmlErrMemory(ctxt, NULL);
8537
        if (attvalue != NULL)
8538
      xmlFree(attvalue);
8539
        goto failed;
8540
    }
8541
    ctxt->atts = atts;
8542
    ctxt->maxatts = maxatts;
8543
      } else if (nbatts + 4 > maxatts) {
8544
          const xmlChar **n;
8545
8546
          maxatts *= 2;
8547
          n = (const xmlChar **) xmlRealloc((void *) atts,
8548
               maxatts * sizeof(const xmlChar *));
8549
    if (n == NULL) {
8550
        xmlErrMemory(ctxt, NULL);
8551
        if (attvalue != NULL)
8552
      xmlFree(attvalue);
8553
        goto failed;
8554
    }
8555
    atts = n;
8556
    ctxt->atts = atts;
8557
    ctxt->maxatts = maxatts;
8558
      }
8559
      atts[nbatts++] = attname;
8560
      atts[nbatts++] = attvalue;
8561
      atts[nbatts] = NULL;
8562
      atts[nbatts + 1] = NULL;
8563
  } else {
8564
      if (attvalue != NULL)
8565
    xmlFree(attvalue);
8566
  }
8567
8568
failed:
8569
8570
  GROW
8571
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8572
      break;
8573
  if (SKIP_BLANKS == 0) {
8574
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8575
         "attributes construct error\n");
8576
  }
8577
  SHRINK;
8578
        GROW;
8579
    }
8580
8581
    /*
8582
     * SAX: Start of Element !
8583
     */
8584
    if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8585
  (!ctxt->disableSAX)) {
8586
  if (nbatts > 0)
8587
      ctxt->sax->startElement(ctxt->userData, name, atts);
8588
  else
8589
      ctxt->sax->startElement(ctxt->userData, name, NULL);
8590
    }
8591
8592
    if (atts != NULL) {
8593
        /* Free only the content strings */
8594
        for (i = 1;i < nbatts;i+=2)
8595
      if (atts[i] != NULL)
8596
         xmlFree((xmlChar *) atts[i]);
8597
    }
8598
    return(name);
8599
}
8600
8601
/**
8602
 * xmlParseEndTag1:
8603
 * @ctxt:  an XML parser context
8604
 * @line:  line of the start tag
8605
 * @nsNr:  number of namespaces on the start tag
8606
 *
8607
 * Parse an end tag. Always consumes '</'.
8608
 *
8609
 * [42] ETag ::= '</' Name S? '>'
8610
 *
8611
 * With namespace
8612
 *
8613
 * [NS 9] ETag ::= '</' QName S? '>'
8614
 */
8615
8616
static void
8617
xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8618
    const xmlChar *name;
8619
8620
    GROW;
8621
    if ((RAW != '<') || (NXT(1) != '/')) {
8622
  xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8623
           "xmlParseEndTag: '</' not found\n");
8624
  return;
8625
    }
8626
    SKIP(2);
8627
8628
    name = xmlParseNameAndCompare(ctxt,ctxt->name);
8629
8630
    /*
8631
     * We should definitely be at the ending "S? '>'" part
8632
     */
8633
    GROW;
8634
    SKIP_BLANKS;
8635
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8636
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8637
    } else
8638
  NEXT1;
8639
8640
    /*
8641
     * [ WFC: Element Type Match ]
8642
     * The Name in an element's end-tag must match the element type in the
8643
     * start-tag.
8644
     *
8645
     */
8646
    if (name != (xmlChar*)1) {
8647
        if (name == NULL) name = BAD_CAST "unparsable";
8648
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8649
         "Opening and ending tag mismatch: %s line %d and %s\n",
8650
                    ctxt->name, line, name);
8651
    }
8652
8653
    /*
8654
     * SAX: End of Tag
8655
     */
8656
    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8657
  (!ctxt->disableSAX))
8658
        ctxt->sax->endElement(ctxt->userData, ctxt->name);
8659
8660
    namePop(ctxt);
8661
    spacePop(ctxt);
8662
    return;
8663
}
8664
8665
/**
8666
 * xmlParseEndTag:
8667
 * @ctxt:  an XML parser context
8668
 *
8669
 * DEPRECATED: Internal function, don't use.
8670
 *
8671
 * parse an end of tag
8672
 *
8673
 * [42] ETag ::= '</' Name S? '>'
8674
 *
8675
 * With namespace
8676
 *
8677
 * [NS 9] ETag ::= '</' QName S? '>'
8678
 */
8679
8680
void
8681
xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8682
    xmlParseEndTag1(ctxt, 0);
8683
}
8684
#endif /* LIBXML_SAX1_ENABLED */
8685
8686
/************************************************************************
8687
 *                  *
8688
 *          SAX 2 specific operations       *
8689
 *                  *
8690
 ************************************************************************/
8691
8692
/*
8693
 * xmlGetNamespace:
8694
 * @ctxt:  an XML parser context
8695
 * @prefix:  the prefix to lookup
8696
 *
8697
 * Lookup the namespace name for the @prefix (which ca be NULL)
8698
 * The prefix must come from the @ctxt->dict dictionary
8699
 *
8700
 * Returns the namespace name or NULL if not bound
8701
 */
8702
static const xmlChar *
8703
28.3k
xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8704
28.3k
    int i;
8705
8706
28.3k
    if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8707
51.2k
    for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8708
27.5k
        if (ctxt->nsTab[i] == prefix) {
8709
4.54k
      if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8710
0
          return(NULL);
8711
4.54k
      return(ctxt->nsTab[i + 1]);
8712
4.54k
  }
8713
23.6k
    return(NULL);
8714
28.1k
}
8715
8716
/**
8717
 * xmlParseQName:
8718
 * @ctxt:  an XML parser context
8719
 * @prefix:  pointer to store the prefix part
8720
 *
8721
 * parse an XML Namespace QName
8722
 *
8723
 * [6]  QName  ::= (Prefix ':')? LocalPart
8724
 * [7]  Prefix  ::= NCName
8725
 * [8]  LocalPart  ::= NCName
8726
 *
8727
 * Returns the Name parsed or NULL
8728
 */
8729
8730
static const xmlChar *
8731
58.5k
xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8732
58.5k
    const xmlChar *l, *p;
8733
8734
58.5k
    GROW;
8735
58.5k
    if (ctxt->instate == XML_PARSER_EOF)
8736
0
        return(NULL);
8737
8738
58.5k
    l = xmlParseNCName(ctxt);
8739
58.5k
    if (l == NULL) {
8740
13.1k
        if (CUR == ':') {
8741
666
      l = xmlParseName(ctxt);
8742
666
      if (l != NULL) {
8743
656
          xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8744
656
             "Failed to parse QName '%s'\n", l, NULL, NULL);
8745
656
    *prefix = NULL;
8746
656
    return(l);
8747
656
      }
8748
666
  }
8749
12.4k
        return(NULL);
8750
13.1k
    }
8751
45.3k
    if (CUR == ':') {
8752
13.2k
        NEXT;
8753
13.2k
  p = l;
8754
13.2k
  l = xmlParseNCName(ctxt);
8755
13.2k
  if (l == NULL) {
8756
1.69k
      xmlChar *tmp;
8757
8758
1.69k
            if (ctxt->instate == XML_PARSER_EOF)
8759
0
                return(NULL);
8760
1.69k
            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8761
1.69k
               "Failed to parse QName '%s:'\n", p, NULL, NULL);
8762
1.69k
      l = xmlParseNmtoken(ctxt);
8763
1.69k
      if (l == NULL) {
8764
1.56k
                if (ctxt->instate == XML_PARSER_EOF)
8765
2
                    return(NULL);
8766
1.56k
    tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8767
1.56k
            } else {
8768
125
    tmp = xmlBuildQName(l, p, NULL, 0);
8769
125
    xmlFree((char *)l);
8770
125
      }
8771
1.69k
      p = xmlDictLookup(ctxt->dict, tmp, -1);
8772
1.69k
      if (tmp != NULL) xmlFree(tmp);
8773
1.69k
      *prefix = NULL;
8774
1.69k
      return(p);
8775
1.69k
  }
8776
11.5k
  if (CUR == ':') {
8777
1.27k
      xmlChar *tmp;
8778
8779
1.27k
            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8780
1.27k
               "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8781
1.27k
      NEXT;
8782
1.27k
      tmp = (xmlChar *) xmlParseName(ctxt);
8783
1.27k
      if (tmp != NULL) {
8784
1.18k
          tmp = xmlBuildQName(tmp, l, NULL, 0);
8785
1.18k
    l = xmlDictLookup(ctxt->dict, tmp, -1);
8786
1.18k
    if (tmp != NULL) xmlFree(tmp);
8787
1.18k
    *prefix = p;
8788
1.18k
    return(l);
8789
1.18k
      }
8790
87
            if (ctxt->instate == XML_PARSER_EOF)
8791
0
                return(NULL);
8792
87
      tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8793
87
      l = xmlDictLookup(ctxt->dict, tmp, -1);
8794
87
      if (tmp != NULL) xmlFree(tmp);
8795
87
      *prefix = p;
8796
87
      return(l);
8797
87
  }
8798
10.3k
  *prefix = p;
8799
10.3k
    } else
8800
32.1k
        *prefix = NULL;
8801
42.4k
    return(l);
8802
45.3k
}
8803
8804
/**
8805
 * xmlParseQNameAndCompare:
8806
 * @ctxt:  an XML parser context
8807
 * @name:  the localname
8808
 * @prefix:  the prefix, if any.
8809
 *
8810
 * parse an XML name and compares for match
8811
 * (specialized for endtag parsing)
8812
 *
8813
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8814
 * and the name for mismatch
8815
 */
8816
8817
static const xmlChar *
8818
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8819
71
                        xmlChar const *prefix) {
8820
71
    const xmlChar *cmp;
8821
71
    const xmlChar *in;
8822
71
    const xmlChar *ret;
8823
71
    const xmlChar *prefix2;
8824
8825
71
    if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8826
8827
71
    GROW;
8828
71
    in = ctxt->input->cur;
8829
8830
71
    cmp = prefix;
8831
106
    while (*in != 0 && *in == *cmp) {
8832
35
  ++in;
8833
35
  ++cmp;
8834
35
    }
8835
71
    if ((*cmp == 0) && (*in == ':')) {
8836
34
        in++;
8837
34
  cmp = name;
8838
54
  while (*in != 0 && *in == *cmp) {
8839
20
      ++in;
8840
20
      ++cmp;
8841
20
  }
8842
34
  if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8843
      /* success */
8844
3
            ctxt->input->col += in - ctxt->input->cur;
8845
3
      ctxt->input->cur = in;
8846
3
      return((const xmlChar*) 1);
8847
3
  }
8848
34
    }
8849
    /*
8850
     * all strings coms from the dictionary, equality can be done directly
8851
     */
8852
68
    ret = xmlParseQName (ctxt, &prefix2);
8853
68
    if ((ret == name) && (prefix == prefix2))
8854
0
  return((const xmlChar*) 1);
8855
68
    return ret;
8856
68
}
8857
8858
/**
8859
 * xmlParseAttValueInternal:
8860
 * @ctxt:  an XML parser context
8861
 * @len:  attribute len result
8862
 * @alloc:  whether the attribute was reallocated as a new string
8863
 * @normalize:  if 1 then further non-CDATA normalization must be done
8864
 *
8865
 * parse a value for an attribute.
8866
 * NOTE: if no normalization is needed, the routine will return pointers
8867
 *       directly from the data buffer.
8868
 *
8869
 * 3.3.3 Attribute-Value Normalization:
8870
 * Before the value of an attribute is passed to the application or
8871
 * checked for validity, the XML processor must normalize it as follows:
8872
 * - a character reference is processed by appending the referenced
8873
 *   character to the attribute value
8874
 * - an entity reference is processed by recursively processing the
8875
 *   replacement text of the entity
8876
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8877
 *   appending #x20 to the normalized value, except that only a single
8878
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
8879
 *   parsed entity or the literal entity value of an internal parsed entity
8880
 * - other characters are processed by appending them to the normalized value
8881
 * If the declared value is not CDATA, then the XML processor must further
8882
 * process the normalized attribute value by discarding any leading and
8883
 * trailing space (#x20) characters, and by replacing sequences of space
8884
 * (#x20) characters by a single space (#x20) character.
8885
 * All attributes for which no declaration has been read should be treated
8886
 * by a non-validating parser as if declared CDATA.
8887
 *
8888
 * Returns the AttValue parsed or NULL. The value has to be freed by the
8889
 *     caller if it was copied, this can be detected by val[*len] == 0.
8890
 */
8891
8892
#define GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) \
8893
2
    const xmlChar *oldbase = ctxt->input->base;\
8894
2
    GROW;\
8895
2
    if (ctxt->instate == XML_PARSER_EOF)\
8896
2
        return(NULL);\
8897
2
    if (oldbase != ctxt->input->base) {\
8898
0
        ptrdiff_t delta = ctxt->input->base - oldbase;\
8899
0
        start = start + delta;\
8900
0
        in = in + delta;\
8901
0
    }\
8902
2
    end = ctxt->input->end;
8903
8904
static xmlChar *
8905
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8906
                         int normalize)
8907
13.2k
{
8908
13.2k
    xmlChar limit = 0;
8909
13.2k
    const xmlChar *in = NULL, *start, *end, *last;
8910
13.2k
    xmlChar *ret = NULL;
8911
13.2k
    int line, col;
8912
13.2k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
8913
0
                    XML_MAX_HUGE_LENGTH :
8914
13.2k
                    XML_MAX_TEXT_LENGTH;
8915
8916
13.2k
    GROW;
8917
13.2k
    in = (xmlChar *) CUR_PTR;
8918
13.2k
    line = ctxt->input->line;
8919
13.2k
    col = ctxt->input->col;
8920
13.2k
    if (*in != '"' && *in != '\'') {
8921
1.26k
        xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
8922
1.26k
        return (NULL);
8923
1.26k
    }
8924
11.9k
    ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
8925
8926
    /*
8927
     * try to handle in this routine the most common case where no
8928
     * allocation of a new string is required and where content is
8929
     * pure ASCII.
8930
     */
8931
11.9k
    limit = *in++;
8932
11.9k
    col++;
8933
11.9k
    end = ctxt->input->end;
8934
11.9k
    start = in;
8935
11.9k
    if (in >= end) {
8936
0
        GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
8937
0
    }
8938
11.9k
    if (normalize) {
8939
        /*
8940
   * Skip any leading spaces
8941
   */
8942
0
  while ((in < end) && (*in != limit) &&
8943
0
         ((*in == 0x20) || (*in == 0x9) ||
8944
0
          (*in == 0xA) || (*in == 0xD))) {
8945
0
      if (*in == 0xA) {
8946
0
          line++; col = 1;
8947
0
      } else {
8948
0
          col++;
8949
0
      }
8950
0
      in++;
8951
0
      start = in;
8952
0
      if (in >= end) {
8953
0
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
8954
0
                if ((in - start) > maxLength) {
8955
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8956
0
                                   "AttValue length too long\n");
8957
0
                    return(NULL);
8958
0
                }
8959
0
      }
8960
0
  }
8961
0
  while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8962
0
         (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8963
0
      col++;
8964
0
      if ((*in++ == 0x20) && (*in == 0x20)) break;
8965
0
      if (in >= end) {
8966
0
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
8967
0
                if ((in - start) > maxLength) {
8968
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8969
0
                                   "AttValue length too long\n");
8970
0
                    return(NULL);
8971
0
                }
8972
0
      }
8973
0
  }
8974
0
  last = in;
8975
  /*
8976
   * skip the trailing blanks
8977
   */
8978
0
  while ((last[-1] == 0x20) && (last > start)) last--;
8979
0
  while ((in < end) && (*in != limit) &&
8980
0
         ((*in == 0x20) || (*in == 0x9) ||
8981
0
          (*in == 0xA) || (*in == 0xD))) {
8982
0
      if (*in == 0xA) {
8983
0
          line++, col = 1;
8984
0
      } else {
8985
0
          col++;
8986
0
      }
8987
0
      in++;
8988
0
      if (in >= end) {
8989
0
    const xmlChar *oldbase = ctxt->input->base;
8990
0
    GROW;
8991
0
                if (ctxt->instate == XML_PARSER_EOF)
8992
0
                    return(NULL);
8993
0
    if (oldbase != ctxt->input->base) {
8994
0
        ptrdiff_t delta = ctxt->input->base - oldbase;
8995
0
        start = start + delta;
8996
0
        in = in + delta;
8997
0
        last = last + delta;
8998
0
    }
8999
0
    end = ctxt->input->end;
9000
0
                if ((in - start) > maxLength) {
9001
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9002
0
                                   "AttValue length too long\n");
9003
0
                    return(NULL);
9004
0
                }
9005
0
      }
9006
0
  }
9007
0
        if ((in - start) > maxLength) {
9008
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9009
0
                           "AttValue length too long\n");
9010
0
            return(NULL);
9011
0
        }
9012
0
  if (*in != limit) goto need_complex;
9013
11.9k
    } else {
9014
168k
  while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9015
168k
         (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9016
156k
      in++;
9017
156k
      col++;
9018
156k
      if (in >= end) {
9019
2
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9020
2
                if ((in - start) > maxLength) {
9021
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9022
0
                                   "AttValue length too long\n");
9023
0
                    return(NULL);
9024
0
                }
9025
2
      }
9026
156k
  }
9027
11.9k
  last = in;
9028
11.9k
        if ((in - start) > maxLength) {
9029
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9030
0
                           "AttValue length too long\n");
9031
0
            return(NULL);
9032
0
        }
9033
11.9k
  if (*in != limit) goto need_complex;
9034
11.9k
    }
9035
6.44k
    in++;
9036
6.44k
    col++;
9037
6.44k
    if (len != NULL) {
9038
6.44k
        if (alloc) *alloc = 0;
9039
6.44k
        *len = last - start;
9040
6.44k
        ret = (xmlChar *) start;
9041
6.44k
    } else {
9042
0
        if (alloc) *alloc = 1;
9043
0
        ret = xmlStrndup(start, last - start);
9044
0
    }
9045
6.44k
    CUR_PTR = in;
9046
6.44k
    ctxt->input->line = line;
9047
6.44k
    ctxt->input->col = col;
9048
6.44k
    return ret;
9049
5.48k
need_complex:
9050
5.48k
    if (alloc) *alloc = 1;
9051
5.48k
    return xmlParseAttValueComplex(ctxt, len, normalize);
9052
11.9k
}
9053
9054
/**
9055
 * xmlParseAttribute2:
9056
 * @ctxt:  an XML parser context
9057
 * @pref:  the element prefix
9058
 * @elem:  the element name
9059
 * @prefix:  a xmlChar ** used to store the value of the attribute prefix
9060
 * @value:  a xmlChar ** used to store the value of the attribute
9061
 * @len:  an int * to save the length of the attribute
9062
 * @alloc:  an int * to indicate if the attribute was allocated
9063
 *
9064
 * parse an attribute in the new SAX2 framework.
9065
 *
9066
 * Returns the attribute name, and the value in *value, .
9067
 */
9068
9069
static const xmlChar *
9070
xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9071
                   const xmlChar * pref, const xmlChar * elem,
9072
                   const xmlChar ** prefix, xmlChar ** value,
9073
                   int *len, int *alloc)
9074
26.8k
{
9075
26.8k
    const xmlChar *name;
9076
26.8k
    xmlChar *val, *internal_val = NULL;
9077
26.8k
    int normalize = 0;
9078
9079
26.8k
    *value = NULL;
9080
26.8k
    GROW;
9081
26.8k
    name = xmlParseQName(ctxt, prefix);
9082
26.8k
    if (name == NULL) {
9083
8.65k
        xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9084
8.65k
                       "error parsing attribute name\n");
9085
8.65k
        return (NULL);
9086
8.65k
    }
9087
9088
    /*
9089
     * get the type if needed
9090
     */
9091
18.2k
    if (ctxt->attsSpecial != NULL) {
9092
0
        int type;
9093
9094
0
        type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
9095
0
                                                 pref, elem, *prefix, name);
9096
0
        if (type != 0)
9097
0
            normalize = 1;
9098
0
    }
9099
9100
    /*
9101
     * read the value
9102
     */
9103
18.2k
    SKIP_BLANKS;
9104
18.2k
    if (RAW == '=') {
9105
13.2k
        NEXT;
9106
13.2k
        SKIP_BLANKS;
9107
13.2k
        val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9108
13.2k
        if (val == NULL)
9109
1.26k
            return (NULL);
9110
11.9k
  if (normalize) {
9111
      /*
9112
       * Sometimes a second normalisation pass for spaces is needed
9113
       * but that only happens if charrefs or entities references
9114
       * have been used in the attribute value, i.e. the attribute
9115
       * value have been extracted in an allocated string already.
9116
       */
9117
0
      if (*alloc) {
9118
0
          const xmlChar *val2;
9119
9120
0
          val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9121
0
    if ((val2 != NULL) && (val2 != val)) {
9122
0
        xmlFree(val);
9123
0
        val = (xmlChar *) val2;
9124
0
    }
9125
0
      }
9126
0
  }
9127
11.9k
        ctxt->instate = XML_PARSER_CONTENT;
9128
11.9k
    } else {
9129
5.00k
        xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9130
5.00k
                          "Specification mandates value for attribute %s\n",
9131
5.00k
                          name);
9132
5.00k
        return (name);
9133
5.00k
    }
9134
9135
11.9k
    if (*prefix == ctxt->str_xml) {
9136
        /*
9137
         * Check that xml:lang conforms to the specification
9138
         * No more registered as an error, just generate a warning now
9139
         * since this was deprecated in XML second edition
9140
         */
9141
181
        if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9142
0
            internal_val = xmlStrndup(val, *len);
9143
0
            if (!xmlCheckLanguageID(internal_val)) {
9144
0
                xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9145
0
                              "Malformed value for xml:lang : %s\n",
9146
0
                              internal_val, NULL);
9147
0
            }
9148
0
        }
9149
9150
        /*
9151
         * Check that xml:space conforms to the specification
9152
         */
9153
181
        if (xmlStrEqual(name, BAD_CAST "space")) {
9154
0
            internal_val = xmlStrndup(val, *len);
9155
0
            if (xmlStrEqual(internal_val, BAD_CAST "default"))
9156
0
                *(ctxt->space) = 0;
9157
0
            else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9158
0
                *(ctxt->space) = 1;
9159
0
            else {
9160
0
                xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9161
0
                              "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9162
0
                              internal_val, NULL);
9163
0
            }
9164
0
        }
9165
181
        if (internal_val) {
9166
0
            xmlFree(internal_val);
9167
0
        }
9168
181
    }
9169
9170
11.9k
    *value = val;
9171
11.9k
    return (name);
9172
18.2k
}
9173
/**
9174
 * xmlParseStartTag2:
9175
 * @ctxt:  an XML parser context
9176
 *
9177
 * Parse a start tag. Always consumes '<'.
9178
 *
9179
 * This routine is called when running SAX2 parsing
9180
 *
9181
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9182
 *
9183
 * [ WFC: Unique Att Spec ]
9184
 * No attribute name may appear more than once in the same start-tag or
9185
 * empty-element tag.
9186
 *
9187
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9188
 *
9189
 * [ WFC: Unique Att Spec ]
9190
 * No attribute name may appear more than once in the same start-tag or
9191
 * empty-element tag.
9192
 *
9193
 * With namespace:
9194
 *
9195
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9196
 *
9197
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9198
 *
9199
 * Returns the element name parsed
9200
 */
9201
9202
static const xmlChar *
9203
xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9204
31.5k
                  const xmlChar **URI, int *tlen) {
9205
31.5k
    const xmlChar *localname;
9206
31.5k
    const xmlChar *prefix;
9207
31.5k
    const xmlChar *attname;
9208
31.5k
    const xmlChar *aprefix;
9209
31.5k
    const xmlChar *nsname;
9210
31.5k
    xmlChar *attvalue;
9211
31.5k
    const xmlChar **atts = ctxt->atts;
9212
31.5k
    int maxatts = ctxt->maxatts;
9213
31.5k
    int nratts, nbatts, nbdef, inputid;
9214
31.5k
    int i, j, nbNs, attval;
9215
31.5k
    size_t cur;
9216
31.5k
    int nsNr = ctxt->nsNr;
9217
9218
31.5k
    if (RAW != '<') return(NULL);
9219
31.5k
    NEXT1;
9220
9221
31.5k
    cur = ctxt->input->cur - ctxt->input->base;
9222
31.5k
    inputid = ctxt->input->id;
9223
31.5k
    nbatts = 0;
9224
31.5k
    nratts = 0;
9225
31.5k
    nbdef = 0;
9226
31.5k
    nbNs = 0;
9227
31.5k
    attval = 0;
9228
    /* Forget any namespaces added during an earlier parse of this element. */
9229
31.5k
    ctxt->nsNr = nsNr;
9230
9231
31.5k
    localname = xmlParseQName(ctxt, &prefix);
9232
31.5k
    if (localname == NULL) {
9233
3.75k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9234
3.75k
           "StartTag: invalid element name\n");
9235
3.75k
        return(NULL);
9236
3.75k
    }
9237
27.8k
    *tlen = ctxt->input->cur - ctxt->input->base - cur;
9238
9239
    /*
9240
     * Now parse the attributes, it ends up with the ending
9241
     *
9242
     * (S Attribute)* S?
9243
     */
9244
27.8k
    SKIP_BLANKS;
9245
27.8k
    GROW;
9246
9247
31.1k
    while (((RAW != '>') &&
9248
31.1k
     ((RAW != '/') || (NXT(1) != '>')) &&
9249
31.1k
     (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9250
26.8k
  int len = -1, alloc = 0;
9251
9252
26.8k
  attname = xmlParseAttribute2(ctxt, prefix, localname,
9253
26.8k
                               &aprefix, &attvalue, &len, &alloc);
9254
26.8k
        if (attname == NULL) {
9255
9.92k
      xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9256
9.92k
           "xmlParseStartTag: problem parsing attributes\n");
9257
9.92k
      break;
9258
9.92k
  }
9259
16.9k
        if (attvalue == NULL)
9260
5.00k
            goto next_attr;
9261
11.9k
  if (len < 0) len = xmlStrlen(attvalue);
9262
9263
11.9k
        if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9264
6.05k
            const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9265
6.05k
            xmlURIPtr uri;
9266
9267
6.05k
            if (URL == NULL) {
9268
0
                xmlErrMemory(ctxt, "dictionary allocation failure");
9269
0
                if ((attvalue != NULL) && (alloc != 0))
9270
0
                    xmlFree(attvalue);
9271
0
                localname = NULL;
9272
0
                goto done;
9273
0
            }
9274
6.05k
            if (*URL != 0) {
9275
6.05k
                uri = xmlParseURI((const char *) URL);
9276
6.05k
                if (uri == NULL) {
9277
3.84k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9278
3.84k
                             "xmlns: '%s' is not a valid URI\n",
9279
3.84k
                                       URL, NULL, NULL);
9280
3.84k
                } else {
9281
2.20k
                    if (uri->scheme == NULL) {
9282
1.36k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9283
1.36k
                                  "xmlns: URI %s is not absolute\n",
9284
1.36k
                                  URL, NULL, NULL);
9285
1.36k
                    }
9286
2.20k
                    xmlFreeURI(uri);
9287
2.20k
                }
9288
6.05k
                if (URL == ctxt->str_xml_ns) {
9289
0
                    if (attname != ctxt->str_xml) {
9290
0
                        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9291
0
                     "xml namespace URI cannot be the default namespace\n",
9292
0
                                 NULL, NULL, NULL);
9293
0
                    }
9294
0
                    goto next_attr;
9295
0
                }
9296
6.05k
                if ((len == 29) &&
9297
6.05k
                    (xmlStrEqual(URL,
9298
145
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9299
36
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9300
36
                         "reuse of the xmlns namespace name is forbidden\n",
9301
36
                             NULL, NULL, NULL);
9302
36
                    goto next_attr;
9303
36
                }
9304
6.05k
            }
9305
            /*
9306
             * check that it's not a defined namespace
9307
             */
9308
6.93k
            for (j = 1;j <= nbNs;j++)
9309
1.83k
                if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9310
914
                    break;
9311
6.01k
            if (j <= nbNs)
9312
914
                xmlErrAttributeDup(ctxt, NULL, attname);
9313
5.10k
            else
9314
5.10k
                if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9315
9316
6.01k
        } else if (aprefix == ctxt->str_xmlns) {
9317
5.06k
            const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9318
5.06k
            xmlURIPtr uri;
9319
9320
5.06k
            if (attname == ctxt->str_xml) {
9321
0
                if (URL != ctxt->str_xml_ns) {
9322
0
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9323
0
                             "xml namespace prefix mapped to wrong URI\n",
9324
0
                             NULL, NULL, NULL);
9325
0
                }
9326
                /*
9327
                 * Do not keep a namespace definition node
9328
                 */
9329
0
                goto next_attr;
9330
0
            }
9331
5.06k
            if (URL == ctxt->str_xml_ns) {
9332
0
                if (attname != ctxt->str_xml) {
9333
0
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9334
0
                             "xml namespace URI mapped to wrong prefix\n",
9335
0
                             NULL, NULL, NULL);
9336
0
                }
9337
0
                goto next_attr;
9338
0
            }
9339
5.06k
            if (attname == ctxt->str_xmlns) {
9340
0
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9341
0
                         "redefinition of the xmlns prefix is forbidden\n",
9342
0
                         NULL, NULL, NULL);
9343
0
                goto next_attr;
9344
0
            }
9345
5.06k
            if ((len == 29) &&
9346
5.06k
                (xmlStrEqual(URL,
9347
54
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9348
0
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9349
0
                         "reuse of the xmlns namespace name is forbidden\n",
9350
0
                         NULL, NULL, NULL);
9351
0
                goto next_attr;
9352
0
            }
9353
5.06k
            if ((URL == NULL) || (URL[0] == 0)) {
9354
0
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9355
0
                         "xmlns:%s: Empty XML namespace is not allowed\n",
9356
0
                              attname, NULL, NULL);
9357
0
                goto next_attr;
9358
5.06k
            } else {
9359
5.06k
                uri = xmlParseURI((const char *) URL);
9360
5.06k
                if (uri == NULL) {
9361
3.18k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9362
3.18k
                         "xmlns:%s: '%s' is not a valid URI\n",
9363
3.18k
                                       attname, URL, NULL);
9364
3.18k
                } else {
9365
1.88k
                    if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9366
0
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9367
0
                                  "xmlns:%s: URI %s is not absolute\n",
9368
0
                                  attname, URL, NULL);
9369
0
                    }
9370
1.88k
                    xmlFreeURI(uri);
9371
1.88k
                }
9372
5.06k
            }
9373
9374
            /*
9375
             * check that it's not a defined namespace
9376
             */
9377
7.21k
            for (j = 1;j <= nbNs;j++)
9378
2.34k
                if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9379
195
                    break;
9380
5.06k
            if (j <= nbNs)
9381
195
                xmlErrAttributeDup(ctxt, aprefix, attname);
9382
4.86k
            else
9383
4.86k
                if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9384
9385
5.06k
        } else {
9386
            /*
9387
             * Add the pair to atts
9388
             */
9389
820
            if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9390
29
                if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9391
0
                    goto next_attr;
9392
0
                }
9393
29
                maxatts = ctxt->maxatts;
9394
29
                atts = ctxt->atts;
9395
29
            }
9396
820
            ctxt->attallocs[nratts++] = alloc;
9397
820
            atts[nbatts++] = attname;
9398
820
            atts[nbatts++] = aprefix;
9399
            /*
9400
             * The namespace URI field is used temporarily to point at the
9401
             * base of the current input buffer for non-alloced attributes.
9402
             * When the input buffer is reallocated, all the pointers become
9403
             * invalid, but they can be reconstructed later.
9404
             */
9405
820
            if (alloc)
9406
277
                atts[nbatts++] = NULL;
9407
543
            else
9408
543
                atts[nbatts++] = ctxt->input->base;
9409
820
            atts[nbatts++] = attvalue;
9410
820
            attvalue += len;
9411
820
            atts[nbatts++] = attvalue;
9412
            /*
9413
             * tag if some deallocation is needed
9414
             */
9415
820
            if (alloc != 0) attval = 1;
9416
820
            attvalue = NULL; /* moved into atts */
9417
820
        }
9418
9419
16.9k
next_attr:
9420
16.9k
        if ((attvalue != NULL) && (alloc != 0)) {
9421
5.21k
            xmlFree(attvalue);
9422
5.21k
            attvalue = NULL;
9423
5.21k
        }
9424
9425
16.9k
  GROW
9426
16.9k
        if (ctxt->instate == XML_PARSER_EOF)
9427
0
            break;
9428
16.9k
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9429
1.30k
      break;
9430
15.6k
  if (SKIP_BLANKS == 0) {
9431
12.2k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9432
12.2k
         "attributes construct error\n");
9433
12.2k
      break;
9434
12.2k
  }
9435
3.36k
        GROW;
9436
3.36k
    }
9437
9438
27.8k
    if (ctxt->input->id != inputid) {
9439
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9440
0
                    "Unexpected change of input\n");
9441
0
        localname = NULL;
9442
0
        goto done;
9443
0
    }
9444
9445
    /* Reconstruct attribute value pointers. */
9446
28.6k
    for (i = 0, j = 0; j < nratts; i += 5, j++) {
9447
820
        if (atts[i+2] != NULL) {
9448
            /*
9449
             * Arithmetic on dangling pointers is technically undefined
9450
             * behavior, but well...
9451
             */
9452
543
            const xmlChar *old = atts[i+2];
9453
543
            atts[i+2]  = NULL;    /* Reset repurposed namespace URI */
9454
543
            atts[i+3] = ctxt->input->base + (atts[i+3] - old);  /* value */
9455
543
            atts[i+4] = ctxt->input->base + (atts[i+4] - old);  /* valuend */
9456
543
        }
9457
820
    }
9458
9459
    /*
9460
     * The attributes defaulting
9461
     */
9462
27.8k
    if (ctxt->attsDefault != NULL) {
9463
0
        xmlDefAttrsPtr defaults;
9464
9465
0
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9466
0
  if (defaults != NULL) {
9467
0
      for (i = 0;i < defaults->nbAttrs;i++) {
9468
0
          attname = defaults->values[5 * i];
9469
0
    aprefix = defaults->values[5 * i + 1];
9470
9471
                /*
9472
     * special work for namespaces defaulted defs
9473
     */
9474
0
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9475
        /*
9476
         * check that it's not a defined namespace
9477
         */
9478
0
        for (j = 1;j <= nbNs;j++)
9479
0
            if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9480
0
          break;
9481
0
              if (j <= nbNs) continue;
9482
9483
0
        nsname = xmlGetNamespace(ctxt, NULL);
9484
0
        if (nsname != defaults->values[5 * i + 2]) {
9485
0
      if (nsPush(ctxt, NULL,
9486
0
                 defaults->values[5 * i + 2]) > 0)
9487
0
          nbNs++;
9488
0
        }
9489
0
    } else if (aprefix == ctxt->str_xmlns) {
9490
        /*
9491
         * check that it's not a defined namespace
9492
         */
9493
0
        for (j = 1;j <= nbNs;j++)
9494
0
            if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9495
0
          break;
9496
0
              if (j <= nbNs) continue;
9497
9498
0
        nsname = xmlGetNamespace(ctxt, attname);
9499
0
        if (nsname != defaults->values[5 * i + 2]) {
9500
0
      if (nsPush(ctxt, attname,
9501
0
                 defaults->values[5 * i + 2]) > 0)
9502
0
          nbNs++;
9503
0
        }
9504
0
    } else {
9505
        /*
9506
         * check that it's not a defined attribute
9507
         */
9508
0
        for (j = 0;j < nbatts;j+=5) {
9509
0
      if ((attname == atts[j]) && (aprefix == atts[j+1]))
9510
0
          break;
9511
0
        }
9512
0
        if (j < nbatts) continue;
9513
9514
0
        if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9515
0
      if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9516
0
                            localname = NULL;
9517
0
                            goto done;
9518
0
      }
9519
0
      maxatts = ctxt->maxatts;
9520
0
      atts = ctxt->atts;
9521
0
        }
9522
0
        atts[nbatts++] = attname;
9523
0
        atts[nbatts++] = aprefix;
9524
0
        if (aprefix == NULL)
9525
0
      atts[nbatts++] = NULL;
9526
0
        else
9527
0
            atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9528
0
        atts[nbatts++] = defaults->values[5 * i + 2];
9529
0
        atts[nbatts++] = defaults->values[5 * i + 3];
9530
0
        if ((ctxt->standalone == 1) &&
9531
0
            (defaults->values[5 * i + 4] != NULL)) {
9532
0
      xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9533
0
    "standalone: attribute %s on %s defaulted from external subset\n",
9534
0
                                   attname, localname);
9535
0
        }
9536
0
        nbdef++;
9537
0
    }
9538
0
      }
9539
0
  }
9540
0
    }
9541
9542
    /*
9543
     * The attributes checkings
9544
     */
9545
28.6k
    for (i = 0; i < nbatts;i += 5) {
9546
        /*
9547
  * The default namespace does not apply to attribute names.
9548
  */
9549
820
  if (atts[i + 1] != NULL) {
9550
563
      nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9551
563
      if (nsname == NULL) {
9552
382
    xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9553
382
        "Namespace prefix %s for %s on %s is not defined\n",
9554
382
        atts[i + 1], atts[i], localname);
9555
382
      }
9556
563
      atts[i + 2] = nsname;
9557
563
  } else
9558
257
      nsname = NULL;
9559
  /*
9560
   * [ WFC: Unique Att Spec ]
9561
   * No attribute name may appear more than once in the same
9562
   * start-tag or empty-element tag.
9563
   * As extended by the Namespace in XML REC.
9564
   */
9565
1.14k
        for (j = 0; j < i;j += 5) {
9566
490
      if (atts[i] == atts[j]) {
9567
162
          if (atts[i+1] == atts[j+1]) {
9568
162
        xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9569
162
        break;
9570
162
    }
9571
0
    if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9572
0
        xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9573
0
           "Namespaced Attribute %s in '%s' redefined\n",
9574
0
           atts[i], nsname, NULL);
9575
0
        break;
9576
0
    }
9577
0
      }
9578
490
  }
9579
820
    }
9580
9581
27.8k
    nsname = xmlGetNamespace(ctxt, prefix);
9582
27.8k
    if ((prefix != NULL) && (nsname == NULL)) {
9583
5.44k
  xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9584
5.44k
           "Namespace prefix %s on %s is not defined\n",
9585
5.44k
     prefix, localname, NULL);
9586
5.44k
    }
9587
27.8k
    *pref = prefix;
9588
27.8k
    *URI = nsname;
9589
9590
    /*
9591
     * SAX: Start of Element !
9592
     */
9593
27.8k
    if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9594
27.8k
  (!ctxt->disableSAX)) {
9595
27.8k
  if (nbNs > 0)
9596
8.50k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9597
8.50k
        nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9598
8.50k
        nbatts / 5, nbdef, atts);
9599
19.3k
  else
9600
19.3k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9601
19.3k
                    nsname, 0, NULL, nbatts / 5, nbdef, atts);
9602
27.8k
    }
9603
9604
27.8k
done:
9605
    /*
9606
     * Free up attribute allocated strings if needed
9607
     */
9608
27.8k
    if (attval != 0) {
9609
491
  for (i = 3,j = 0; j < nratts;i += 5,j++)
9610
337
      if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9611
277
          xmlFree((xmlChar *) atts[i]);
9612
154
    }
9613
9614
27.8k
    return(localname);
9615
27.8k
}
9616
9617
/**
9618
 * xmlParseEndTag2:
9619
 * @ctxt:  an XML parser context
9620
 * @line:  line of the start tag
9621
 * @nsNr:  number of namespaces on the start tag
9622
 *
9623
 * Parse an end tag. Always consumes '</'.
9624
 *
9625
 * [42] ETag ::= '</' Name S? '>'
9626
 *
9627
 * With namespace
9628
 *
9629
 * [NS 9] ETag ::= '</' QName S? '>'
9630
 */
9631
9632
static void
9633
239
xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9634
239
    const xmlChar *name;
9635
9636
239
    GROW;
9637
239
    if ((RAW != '<') || (NXT(1) != '/')) {
9638
0
  xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9639
0
  return;
9640
0
    }
9641
239
    SKIP(2);
9642
9643
239
    if (tag->prefix == NULL)
9644
168
        name = xmlParseNameAndCompare(ctxt, ctxt->name);
9645
71
    else
9646
71
        name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9647
9648
    /*
9649
     * We should definitely be at the ending "S? '>'" part
9650
     */
9651
239
    GROW;
9652
239
    if (ctxt->instate == XML_PARSER_EOF)
9653
0
        return;
9654
239
    SKIP_BLANKS;
9655
239
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9656
130
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9657
130
    } else
9658
109
  NEXT1;
9659
9660
    /*
9661
     * [ WFC: Element Type Match ]
9662
     * The Name in an element's end-tag must match the element type in the
9663
     * start-tag.
9664
     *
9665
     */
9666
239
    if (name != (xmlChar*)1) {
9667
181
        if (name == NULL) name = BAD_CAST "unparsable";
9668
181
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9669
181
         "Opening and ending tag mismatch: %s line %d and %s\n",
9670
181
                    ctxt->name, tag->line, name);
9671
181
    }
9672
9673
    /*
9674
     * SAX: End of Tag
9675
     */
9676
239
    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9677
239
  (!ctxt->disableSAX))
9678
239
  ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9679
239
                                tag->URI);
9680
9681
239
    spacePop(ctxt);
9682
239
    if (tag->nsNr != 0)
9683
21
  nsPop(ctxt, tag->nsNr);
9684
239
}
9685
9686
/**
9687
 * xmlParseCDSect:
9688
 * @ctxt:  an XML parser context
9689
 *
9690
 * DEPRECATED: Internal function, don't use.
9691
 *
9692
 * Parse escaped pure raw content. Always consumes '<!['.
9693
 *
9694
 * [18] CDSect ::= CDStart CData CDEnd
9695
 *
9696
 * [19] CDStart ::= '<![CDATA['
9697
 *
9698
 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9699
 *
9700
 * [21] CDEnd ::= ']]>'
9701
 */
9702
void
9703
22
xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9704
22
    xmlChar *buf = NULL;
9705
22
    int len = 0;
9706
22
    int size = XML_PARSER_BUFFER_SIZE;
9707
22
    int r, rl;
9708
22
    int s, sl;
9709
22
    int cur, l;
9710
22
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9711
0
                    XML_MAX_HUGE_LENGTH :
9712
22
                    XML_MAX_TEXT_LENGTH;
9713
9714
22
    if ((CUR != '<') || (NXT(1) != '!') || (NXT(2) != '['))
9715
0
        return;
9716
22
    SKIP(3);
9717
9718
22
    if (!CMP6(CUR_PTR, 'C', 'D', 'A', 'T', 'A', '['))
9719
0
        return;
9720
22
    SKIP(6);
9721
9722
22
    ctxt->instate = XML_PARSER_CDATA_SECTION;
9723
22
    r = CUR_CHAR(rl);
9724
22
    if (!IS_CHAR(r)) {
9725
0
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9726
0
        goto out;
9727
0
    }
9728
22
    NEXTL(rl);
9729
22
    s = CUR_CHAR(sl);
9730
22
    if (!IS_CHAR(s)) {
9731
0
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9732
0
        goto out;
9733
0
    }
9734
22
    NEXTL(sl);
9735
22
    cur = CUR_CHAR(l);
9736
22
    buf = (xmlChar *) xmlMallocAtomic(size);
9737
22
    if (buf == NULL) {
9738
0
  xmlErrMemory(ctxt, NULL);
9739
0
        goto out;
9740
0
    }
9741
752k
    while (IS_CHAR(cur) &&
9742
752k
           ((r != ']') || (s != ']') || (cur != '>'))) {
9743
752k
  if (len + 5 >= size) {
9744
42
      xmlChar *tmp;
9745
9746
42
      tmp = (xmlChar *) xmlRealloc(buf, size * 2);
9747
42
      if (tmp == NULL) {
9748
0
    xmlErrMemory(ctxt, NULL);
9749
0
                goto out;
9750
0
      }
9751
42
      buf = tmp;
9752
42
      size *= 2;
9753
42
  }
9754
752k
  COPY_BUF(rl,buf,len,r);
9755
752k
        if (len > maxLength) {
9756
0
            xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9757
0
                           "CData section too big found\n");
9758
0
            goto out;
9759
0
        }
9760
752k
  r = s;
9761
752k
  rl = sl;
9762
752k
  s = cur;
9763
752k
  sl = l;
9764
752k
  NEXTL(l);
9765
752k
  cur = CUR_CHAR(l);
9766
752k
    }
9767
22
    buf[len] = 0;
9768
22
    if (ctxt->instate == XML_PARSER_EOF) {
9769
0
        xmlFree(buf);
9770
0
        return;
9771
0
    }
9772
22
    if (cur != '>') {
9773
8
  xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9774
8
                       "CData section not finished\n%.50s\n", buf);
9775
8
        goto out;
9776
8
    }
9777
14
    NEXTL(l);
9778
9779
    /*
9780
     * OK the buffer is to be consumed as cdata.
9781
     */
9782
14
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9783
14
  if (ctxt->sax->cdataBlock != NULL)
9784
14
      ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9785
0
  else if (ctxt->sax->characters != NULL)
9786
0
      ctxt->sax->characters(ctxt->userData, buf, len);
9787
14
    }
9788
9789
22
out:
9790
22
    if (ctxt->instate != XML_PARSER_EOF)
9791
22
        ctxt->instate = XML_PARSER_CONTENT;
9792
22
    xmlFree(buf);
9793
22
}
9794
9795
/**
9796
 * xmlParseContentInternal:
9797
 * @ctxt:  an XML parser context
9798
 *
9799
 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9800
 * unexpected EOF to the caller.
9801
 */
9802
9803
static void
9804
113
xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9805
113
    int nameNr = ctxt->nameNr;
9806
9807
113
    GROW;
9808
227k
    while ((RAW != 0) &&
9809
227k
     (ctxt->instate != XML_PARSER_EOF)) {
9810
227k
  const xmlChar *cur = ctxt->input->cur;
9811
9812
  /*
9813
   * First case : a Processing Instruction.
9814
   */
9815
227k
  if ((*cur == '<') && (cur[1] == '?')) {
9816
1.80k
      xmlParsePI(ctxt);
9817
1.80k
  }
9818
9819
  /*
9820
   * Second case : a CDSection
9821
   */
9822
  /* 2.6.0 test was *cur not RAW */
9823
225k
  else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9824
22
      xmlParseCDSect(ctxt);
9825
22
  }
9826
9827
  /*
9828
   * Third case :  a comment
9829
   */
9830
225k
  else if ((*cur == '<') && (NXT(1) == '!') &&
9831
225k
     (NXT(2) == '-') && (NXT(3) == '-')) {
9832
620
      xmlParseComment(ctxt);
9833
620
      ctxt->instate = XML_PARSER_CONTENT;
9834
620
  }
9835
9836
  /*
9837
   * Fourth case :  a sub-element.
9838
   */
9839
224k
  else if (*cur == '<') {
9840
31.6k
            if (NXT(1) == '/') {
9841
239
                if (ctxt->nameNr <= nameNr)
9842
0
                    break;
9843
239
          xmlParseElementEnd(ctxt);
9844
31.4k
            } else {
9845
31.4k
          xmlParseElementStart(ctxt);
9846
31.4k
            }
9847
31.6k
  }
9848
9849
  /*
9850
   * Fifth case : a reference. If if has not been resolved,
9851
   *    parsing returns it's Name, create the node
9852
   */
9853
9854
193k
  else if (*cur == '&') {
9855
19.1k
      xmlParseReference(ctxt);
9856
19.1k
  }
9857
9858
  /*
9859
   * Last case, text. Note that References are handled directly.
9860
   */
9861
174k
  else {
9862
174k
      xmlParseCharData(ctxt, 0);
9863
174k
  }
9864
9865
227k
  SHRINK;
9866
227k
  GROW;
9867
227k
    }
9868
113
}
9869
9870
/**
9871
 * xmlParseContent:
9872
 * @ctxt:  an XML parser context
9873
 *
9874
 * Parse a content sequence. Stops at EOF or '</'.
9875
 *
9876
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9877
 */
9878
9879
void
9880
0
xmlParseContent(xmlParserCtxtPtr ctxt) {
9881
0
    int nameNr = ctxt->nameNr;
9882
9883
0
    xmlParseContentInternal(ctxt);
9884
9885
0
    if ((ctxt->instate != XML_PARSER_EOF) && (ctxt->nameNr > nameNr)) {
9886
0
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9887
0
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9888
0
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9889
0
                "Premature end of data in tag %s line %d\n",
9890
0
    name, line, NULL);
9891
0
    }
9892
0
}
9893
9894
/**
9895
 * xmlParseElement:
9896
 * @ctxt:  an XML parser context
9897
 *
9898
 * DEPRECATED: Internal function, don't use.
9899
 *
9900
 * parse an XML element
9901
 *
9902
 * [39] element ::= EmptyElemTag | STag content ETag
9903
 *
9904
 * [ WFC: Element Type Match ]
9905
 * The Name in an element's end-tag must match the element type in the
9906
 * start-tag.
9907
 *
9908
 */
9909
9910
void
9911
122
xmlParseElement(xmlParserCtxtPtr ctxt) {
9912
122
    if (xmlParseElementStart(ctxt) != 0)
9913
9
        return;
9914
9915
113
    xmlParseContentInternal(ctxt);
9916
113
    if (ctxt->instate == XML_PARSER_EOF)
9917
7
  return;
9918
9919
106
    if (CUR == 0) {
9920
106
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9921
106
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9922
106
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9923
106
                "Premature end of data in tag %s line %d\n",
9924
106
    name, line, NULL);
9925
106
        return;
9926
106
    }
9927
9928
0
    xmlParseElementEnd(ctxt);
9929
0
}
9930
9931
/**
9932
 * xmlParseElementStart:
9933
 * @ctxt:  an XML parser context
9934
 *
9935
 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
9936
 * opening tag was parsed, 1 if an empty element was parsed.
9937
 *
9938
 * Always consumes '<'.
9939
 */
9940
static int
9941
31.5k
xmlParseElementStart(xmlParserCtxtPtr ctxt) {
9942
31.5k
    const xmlChar *name;
9943
31.5k
    const xmlChar *prefix = NULL;
9944
31.5k
    const xmlChar *URI = NULL;
9945
31.5k
    xmlParserNodeInfo node_info;
9946
31.5k
    int line, tlen = 0;
9947
31.5k
    xmlNodePtr ret;
9948
31.5k
    int nsNr = ctxt->nsNr;
9949
9950
31.5k
    if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9951
31.5k
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9952
6
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9953
6
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9954
6
        xmlParserMaxDepth);
9955
6
  xmlHaltParser(ctxt);
9956
6
  return(-1);
9957
6
    }
9958
9959
    /* Capture start position */
9960
31.5k
    if (ctxt->record_info) {
9961
0
        node_info.begin_pos = ctxt->input->consumed +
9962
0
                          (CUR_PTR - ctxt->input->base);
9963
0
  node_info.begin_line = ctxt->input->line;
9964
0
    }
9965
9966
31.5k
    if (ctxt->spaceNr == 0)
9967
0
  spacePush(ctxt, -1);
9968
31.5k
    else if (*ctxt->space == -2)
9969
0
  spacePush(ctxt, -1);
9970
31.5k
    else
9971
31.5k
  spacePush(ctxt, *ctxt->space);
9972
9973
31.5k
    line = ctxt->input->line;
9974
#ifdef LIBXML_SAX1_ENABLED
9975
    if (ctxt->sax2)
9976
#endif /* LIBXML_SAX1_ENABLED */
9977
31.5k
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
9978
#ifdef LIBXML_SAX1_ENABLED
9979
    else
9980
  name = xmlParseStartTag(ctxt);
9981
#endif /* LIBXML_SAX1_ENABLED */
9982
31.5k
    if (ctxt->instate == XML_PARSER_EOF)
9983
2
  return(-1);
9984
31.5k
    if (name == NULL) {
9985
3.75k
  spacePop(ctxt);
9986
3.75k
        return(-1);
9987
3.75k
    }
9988
27.8k
    nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
9989
27.8k
    ret = ctxt->node;
9990
9991
#ifdef LIBXML_VALID_ENABLED
9992
    /*
9993
     * [ VC: Root Element Type ]
9994
     * The Name in the document type declaration must match the element
9995
     * type of the root element.
9996
     */
9997
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9998
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
9999
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10000
#endif /* LIBXML_VALID_ENABLED */
10001
10002
    /*
10003
     * Check for an Empty Element.
10004
     */
10005
27.8k
    if ((RAW == '/') && (NXT(1) == '>')) {
10006
1.16k
        SKIP(2);
10007
1.16k
  if (ctxt->sax2) {
10008
1.16k
      if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10009
1.16k
    (!ctxt->disableSAX))
10010
1.16k
    ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
10011
#ifdef LIBXML_SAX1_ENABLED
10012
  } else {
10013
      if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10014
    (!ctxt->disableSAX))
10015
    ctxt->sax->endElement(ctxt->userData, name);
10016
#endif /* LIBXML_SAX1_ENABLED */
10017
1.16k
  }
10018
1.16k
  namePop(ctxt);
10019
1.16k
  spacePop(ctxt);
10020
1.16k
  if (nsNr != ctxt->nsNr)
10021
1.11k
      nsPop(ctxt, ctxt->nsNr - nsNr);
10022
1.16k
  if ( ret != NULL && ctxt->record_info ) {
10023
0
     node_info.end_pos = ctxt->input->consumed +
10024
0
            (CUR_PTR - ctxt->input->base);
10025
0
     node_info.end_line = ctxt->input->line;
10026
0
     node_info.node = ret;
10027
0
     xmlParserAddNodeInfo(ctxt, &node_info);
10028
0
  }
10029
1.16k
  return(1);
10030
1.16k
    }
10031
26.6k
    if (RAW == '>') {
10032
4.35k
        NEXT1;
10033
22.2k
    } else {
10034
22.2k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10035
22.2k
         "Couldn't find end of Start Tag %s line %d\n",
10036
22.2k
                    name, line, NULL);
10037
10038
  /*
10039
   * end of parsing of this node.
10040
   */
10041
22.2k
  nodePop(ctxt);
10042
22.2k
  namePop(ctxt);
10043
22.2k
  spacePop(ctxt);
10044
22.2k
  if (nsNr != ctxt->nsNr)
10045
7.27k
      nsPop(ctxt, ctxt->nsNr - nsNr);
10046
10047
  /*
10048
   * Capture end position and add node
10049
   */
10050
22.2k
  if ( ret != NULL && ctxt->record_info ) {
10051
0
     node_info.end_pos = ctxt->input->consumed +
10052
0
            (CUR_PTR - ctxt->input->base);
10053
0
     node_info.end_line = ctxt->input->line;
10054
0
     node_info.node = ret;
10055
0
     xmlParserAddNodeInfo(ctxt, &node_info);
10056
0
  }
10057
22.2k
  return(-1);
10058
22.2k
    }
10059
10060
4.35k
    return(0);
10061
26.6k
}
10062
10063
/**
10064
 * xmlParseElementEnd:
10065
 * @ctxt:  an XML parser context
10066
 *
10067
 * Parse the end of an XML element. Always consumes '</'.
10068
 */
10069
static void
10070
239
xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
10071
239
    xmlParserNodeInfo node_info;
10072
239
    xmlNodePtr ret = ctxt->node;
10073
10074
239
    if (ctxt->nameNr <= 0) {
10075
0
        if ((RAW == '<') && (NXT(1) == '/'))
10076
0
            SKIP(2);
10077
0
        return;
10078
0
    }
10079
10080
    /*
10081
     * parse the end of tag: '</' should be here.
10082
     */
10083
239
    if (ctxt->sax2) {
10084
239
  xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
10085
239
  namePop(ctxt);
10086
239
    }
10087
#ifdef LIBXML_SAX1_ENABLED
10088
    else
10089
  xmlParseEndTag1(ctxt, 0);
10090
#endif /* LIBXML_SAX1_ENABLED */
10091
10092
    /*
10093
     * Capture end position and add node
10094
     */
10095
239
    if ( ret != NULL && ctxt->record_info ) {
10096
0
       node_info.end_pos = ctxt->input->consumed +
10097
0
                          (CUR_PTR - ctxt->input->base);
10098
0
       node_info.end_line = ctxt->input->line;
10099
0
       node_info.node = ret;
10100
0
       xmlParserAddNodeInfo(ctxt, &node_info);
10101
0
    }
10102
239
}
10103
10104
/**
10105
 * xmlParseVersionNum:
10106
 * @ctxt:  an XML parser context
10107
 *
10108
 * DEPRECATED: Internal function, don't use.
10109
 *
10110
 * parse the XML version value.
10111
 *
10112
 * [26] VersionNum ::= '1.' [0-9]+
10113
 *
10114
 * In practice allow [0-9].[0-9]+ at that level
10115
 *
10116
 * Returns the string giving the XML version number, or NULL
10117
 */
10118
xmlChar *
10119
0
xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10120
0
    xmlChar *buf = NULL;
10121
0
    int len = 0;
10122
0
    int size = 10;
10123
0
    xmlChar cur;
10124
10125
0
    buf = (xmlChar *) xmlMallocAtomic(size);
10126
0
    if (buf == NULL) {
10127
0
  xmlErrMemory(ctxt, NULL);
10128
0
  return(NULL);
10129
0
    }
10130
0
    cur = CUR;
10131
0
    if (!((cur >= '0') && (cur <= '9'))) {
10132
0
  xmlFree(buf);
10133
0
  return(NULL);
10134
0
    }
10135
0
    buf[len++] = cur;
10136
0
    NEXT;
10137
0
    cur=CUR;
10138
0
    if (cur != '.') {
10139
0
  xmlFree(buf);
10140
0
  return(NULL);
10141
0
    }
10142
0
    buf[len++] = cur;
10143
0
    NEXT;
10144
0
    cur=CUR;
10145
0
    while ((cur >= '0') && (cur <= '9')) {
10146
0
  if (len + 1 >= size) {
10147
0
      xmlChar *tmp;
10148
10149
0
      size *= 2;
10150
0
      tmp = (xmlChar *) xmlRealloc(buf, size);
10151
0
      if (tmp == NULL) {
10152
0
          xmlFree(buf);
10153
0
    xmlErrMemory(ctxt, NULL);
10154
0
    return(NULL);
10155
0
      }
10156
0
      buf = tmp;
10157
0
  }
10158
0
  buf[len++] = cur;
10159
0
  NEXT;
10160
0
  cur=CUR;
10161
0
    }
10162
0
    buf[len] = 0;
10163
0
    return(buf);
10164
0
}
10165
10166
/**
10167
 * xmlParseVersionInfo:
10168
 * @ctxt:  an XML parser context
10169
 *
10170
 * DEPRECATED: Internal function, don't use.
10171
 *
10172
 * parse the XML version.
10173
 *
10174
 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10175
 *
10176
 * [25] Eq ::= S? '=' S?
10177
 *
10178
 * Returns the version string, e.g. "1.0"
10179
 */
10180
10181
xmlChar *
10182
0
xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10183
0
    xmlChar *version = NULL;
10184
10185
0
    if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10186
0
  SKIP(7);
10187
0
  SKIP_BLANKS;
10188
0
  if (RAW != '=') {
10189
0
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10190
0
      return(NULL);
10191
0
        }
10192
0
  NEXT;
10193
0
  SKIP_BLANKS;
10194
0
  if (RAW == '"') {
10195
0
      NEXT;
10196
0
      version = xmlParseVersionNum(ctxt);
10197
0
      if (RAW != '"') {
10198
0
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10199
0
      } else
10200
0
          NEXT;
10201
0
  } else if (RAW == '\''){
10202
0
      NEXT;
10203
0
      version = xmlParseVersionNum(ctxt);
10204
0
      if (RAW != '\'') {
10205
0
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10206
0
      } else
10207
0
          NEXT;
10208
0
  } else {
10209
0
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10210
0
  }
10211
0
    }
10212
0
    return(version);
10213
0
}
10214
10215
/**
10216
 * xmlParseEncName:
10217
 * @ctxt:  an XML parser context
10218
 *
10219
 * DEPRECATED: Internal function, don't use.
10220
 *
10221
 * parse the XML encoding name
10222
 *
10223
 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10224
 *
10225
 * Returns the encoding name value or NULL
10226
 */
10227
xmlChar *
10228
0
xmlParseEncName(xmlParserCtxtPtr ctxt) {
10229
0
    xmlChar *buf = NULL;
10230
0
    int len = 0;
10231
0
    int size = 10;
10232
0
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
10233
0
                    XML_MAX_TEXT_LENGTH :
10234
0
                    XML_MAX_NAME_LENGTH;
10235
0
    xmlChar cur;
10236
10237
0
    cur = CUR;
10238
0
    if (((cur >= 'a') && (cur <= 'z')) ||
10239
0
        ((cur >= 'A') && (cur <= 'Z'))) {
10240
0
  buf = (xmlChar *) xmlMallocAtomic(size);
10241
0
  if (buf == NULL) {
10242
0
      xmlErrMemory(ctxt, NULL);
10243
0
      return(NULL);
10244
0
  }
10245
10246
0
  buf[len++] = cur;
10247
0
  NEXT;
10248
0
  cur = CUR;
10249
0
  while (((cur >= 'a') && (cur <= 'z')) ||
10250
0
         ((cur >= 'A') && (cur <= 'Z')) ||
10251
0
         ((cur >= '0') && (cur <= '9')) ||
10252
0
         (cur == '.') || (cur == '_') ||
10253
0
         (cur == '-')) {
10254
0
      if (len + 1 >= size) {
10255
0
          xmlChar *tmp;
10256
10257
0
    size *= 2;
10258
0
    tmp = (xmlChar *) xmlRealloc(buf, size);
10259
0
    if (tmp == NULL) {
10260
0
        xmlErrMemory(ctxt, NULL);
10261
0
        xmlFree(buf);
10262
0
        return(NULL);
10263
0
    }
10264
0
    buf = tmp;
10265
0
      }
10266
0
      buf[len++] = cur;
10267
0
            if (len > maxLength) {
10268
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "EncName");
10269
0
                xmlFree(buf);
10270
0
                return(NULL);
10271
0
            }
10272
0
      NEXT;
10273
0
      cur = CUR;
10274
0
        }
10275
0
  buf[len] = 0;
10276
0
    } else {
10277
0
  xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10278
0
    }
10279
0
    return(buf);
10280
0
}
10281
10282
/**
10283
 * xmlParseEncodingDecl:
10284
 * @ctxt:  an XML parser context
10285
 *
10286
 * DEPRECATED: Internal function, don't use.
10287
 *
10288
 * parse the XML encoding declaration
10289
 *
10290
 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'")
10291
 *
10292
 * this setups the conversion filters.
10293
 *
10294
 * Returns the encoding value or NULL
10295
 */
10296
10297
const xmlChar *
10298
0
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10299
0
    xmlChar *encoding = NULL;
10300
10301
0
    SKIP_BLANKS;
10302
0
    if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10303
0
  SKIP(8);
10304
0
  SKIP_BLANKS;
10305
0
  if (RAW != '=') {
10306
0
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10307
0
      return(NULL);
10308
0
        }
10309
0
  NEXT;
10310
0
  SKIP_BLANKS;
10311
0
  if (RAW == '"') {
10312
0
      NEXT;
10313
0
      encoding = xmlParseEncName(ctxt);
10314
0
      if (RAW != '"') {
10315
0
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10316
0
    xmlFree((xmlChar *) encoding);
10317
0
    return(NULL);
10318
0
      } else
10319
0
          NEXT;
10320
0
  } else if (RAW == '\''){
10321
0
      NEXT;
10322
0
      encoding = xmlParseEncName(ctxt);
10323
0
      if (RAW != '\'') {
10324
0
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10325
0
    xmlFree((xmlChar *) encoding);
10326
0
    return(NULL);
10327
0
      } else
10328
0
          NEXT;
10329
0
  } else {
10330
0
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10331
0
  }
10332
10333
        /*
10334
         * Non standard parsing, allowing the user to ignore encoding
10335
         */
10336
0
        if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10337
0
      xmlFree((xmlChar *) encoding);
10338
0
            return(NULL);
10339
0
  }
10340
10341
  /*
10342
   * UTF-16 encoding switch has already taken place at this stage,
10343
   * more over the little-endian/big-endian selection is already done
10344
   */
10345
0
        if ((encoding != NULL) &&
10346
0
      ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10347
0
       (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10348
      /*
10349
       * If no encoding was passed to the parser, that we are
10350
       * using UTF-16 and no decoder is present i.e. the
10351
       * document is apparently UTF-8 compatible, then raise an
10352
       * encoding mismatch fatal error
10353
       */
10354
0
      if ((ctxt->encoding == NULL) &&
10355
0
          (ctxt->input->buf != NULL) &&
10356
0
          (ctxt->input->buf->encoder == NULL)) {
10357
0
    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10358
0
      "Document labelled UTF-16 but has UTF-8 content\n");
10359
0
      }
10360
0
      if (ctxt->encoding != NULL)
10361
0
    xmlFree((xmlChar *) ctxt->encoding);
10362
0
      ctxt->encoding = encoding;
10363
0
  }
10364
  /*
10365
   * UTF-8 encoding is handled natively
10366
   */
10367
0
        else if ((encoding != NULL) &&
10368
0
      ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10369
0
       (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10370
            /* TODO: Check for encoding mismatch. */
10371
0
      if (ctxt->encoding != NULL)
10372
0
    xmlFree((xmlChar *) ctxt->encoding);
10373
0
      ctxt->encoding = encoding;
10374
0
  }
10375
0
  else if (encoding != NULL) {
10376
0
      xmlCharEncodingHandlerPtr handler;
10377
10378
0
      if (ctxt->input->encoding != NULL)
10379
0
    xmlFree((xmlChar *) ctxt->input->encoding);
10380
0
      ctxt->input->encoding = encoding;
10381
10382
0
            handler = xmlFindCharEncodingHandler((const char *) encoding);
10383
0
      if (handler != NULL) {
10384
0
    if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10385
        /* failed to convert */
10386
0
        ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10387
0
        return(NULL);
10388
0
    }
10389
0
      } else {
10390
0
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10391
0
      "Unsupported encoding %s\n", encoding);
10392
0
    return(NULL);
10393
0
      }
10394
0
  }
10395
0
    }
10396
0
    return(encoding);
10397
0
}
10398
10399
/**
10400
 * xmlParseSDDecl:
10401
 * @ctxt:  an XML parser context
10402
 *
10403
 * DEPRECATED: Internal function, don't use.
10404
 *
10405
 * parse the XML standalone declaration
10406
 *
10407
 * [32] SDDecl ::= S 'standalone' Eq
10408
 *                 (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10409
 *
10410
 * [ VC: Standalone Document Declaration ]
10411
 * TODO The standalone document declaration must have the value "no"
10412
 * if any external markup declarations contain declarations of:
10413
 *  - attributes with default values, if elements to which these
10414
 *    attributes apply appear in the document without specifications
10415
 *    of values for these attributes, or
10416
 *  - entities (other than amp, lt, gt, apos, quot), if references
10417
 *    to those entities appear in the document, or
10418
 *  - attributes with values subject to normalization, where the
10419
 *    attribute appears in the document with a value which will change
10420
 *    as a result of normalization, or
10421
 *  - element types with element content, if white space occurs directly
10422
 *    within any instance of those types.
10423
 *
10424
 * Returns:
10425
 *   1 if standalone="yes"
10426
 *   0 if standalone="no"
10427
 *  -2 if standalone attribute is missing or invalid
10428
 *    (A standalone value of -2 means that the XML declaration was found,
10429
 *     but no value was specified for the standalone attribute).
10430
 */
10431
10432
int
10433
0
xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10434
0
    int standalone = -2;
10435
10436
0
    SKIP_BLANKS;
10437
0
    if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10438
0
  SKIP(10);
10439
0
        SKIP_BLANKS;
10440
0
  if (RAW != '=') {
10441
0
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10442
0
      return(standalone);
10443
0
        }
10444
0
  NEXT;
10445
0
  SKIP_BLANKS;
10446
0
        if (RAW == '\''){
10447
0
      NEXT;
10448
0
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10449
0
          standalone = 0;
10450
0
                SKIP(2);
10451
0
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10452
0
                 (NXT(2) == 's')) {
10453
0
          standalone = 1;
10454
0
    SKIP(3);
10455
0
            } else {
10456
0
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10457
0
      }
10458
0
      if (RAW != '\'') {
10459
0
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10460
0
      } else
10461
0
          NEXT;
10462
0
  } else if (RAW == '"'){
10463
0
      NEXT;
10464
0
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10465
0
          standalone = 0;
10466
0
    SKIP(2);
10467
0
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10468
0
                 (NXT(2) == 's')) {
10469
0
          standalone = 1;
10470
0
                SKIP(3);
10471
0
            } else {
10472
0
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10473
0
      }
10474
0
      if (RAW != '"') {
10475
0
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10476
0
      } else
10477
0
          NEXT;
10478
0
  } else {
10479
0
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10480
0
        }
10481
0
    }
10482
0
    return(standalone);
10483
0
}
10484
10485
/**
10486
 * xmlParseXMLDecl:
10487
 * @ctxt:  an XML parser context
10488
 *
10489
 * DEPRECATED: Internal function, don't use.
10490
 *
10491
 * parse an XML declaration header
10492
 *
10493
 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10494
 */
10495
10496
void
10497
0
xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10498
0
    xmlChar *version;
10499
10500
    /*
10501
     * This value for standalone indicates that the document has an
10502
     * XML declaration but it does not have a standalone attribute.
10503
     * It will be overwritten later if a standalone attribute is found.
10504
     */
10505
0
    ctxt->input->standalone = -2;
10506
10507
    /*
10508
     * We know that '<?xml' is here.
10509
     */
10510
0
    SKIP(5);
10511
10512
0
    if (!IS_BLANK_CH(RAW)) {
10513
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10514
0
                 "Blank needed after '<?xml'\n");
10515
0
    }
10516
0
    SKIP_BLANKS;
10517
10518
    /*
10519
     * We must have the VersionInfo here.
10520
     */
10521
0
    version = xmlParseVersionInfo(ctxt);
10522
0
    if (version == NULL) {
10523
0
  xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10524
0
    } else {
10525
0
  if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10526
      /*
10527
       * Changed here for XML-1.0 5th edition
10528
       */
10529
0
      if (ctxt->options & XML_PARSE_OLD10) {
10530
0
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10531
0
                "Unsupported version '%s'\n",
10532
0
                version);
10533
0
      } else {
10534
0
          if ((version[0] == '1') && ((version[1] == '.'))) {
10535
0
        xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10536
0
                      "Unsupported version '%s'\n",
10537
0
          version, NULL);
10538
0
    } else {
10539
0
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10540
0
              "Unsupported version '%s'\n",
10541
0
              version);
10542
0
    }
10543
0
      }
10544
0
  }
10545
0
  if (ctxt->version != NULL)
10546
0
      xmlFree((void *) ctxt->version);
10547
0
  ctxt->version = version;
10548
0
    }
10549
10550
    /*
10551
     * We may have the encoding declaration
10552
     */
10553
0
    if (!IS_BLANK_CH(RAW)) {
10554
0
        if ((RAW == '?') && (NXT(1) == '>')) {
10555
0
      SKIP(2);
10556
0
      return;
10557
0
  }
10558
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10559
0
    }
10560
0
    xmlParseEncodingDecl(ctxt);
10561
0
    if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10562
0
         (ctxt->instate == XML_PARSER_EOF)) {
10563
  /*
10564
   * The XML REC instructs us to stop parsing right here
10565
   */
10566
0
        return;
10567
0
    }
10568
10569
    /*
10570
     * We may have the standalone status.
10571
     */
10572
0
    if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10573
0
        if ((RAW == '?') && (NXT(1) == '>')) {
10574
0
      SKIP(2);
10575
0
      return;
10576
0
  }
10577
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10578
0
    }
10579
10580
    /*
10581
     * We can grow the input buffer freely at that point
10582
     */
10583
0
    GROW;
10584
10585
0
    SKIP_BLANKS;
10586
0
    ctxt->input->standalone = xmlParseSDDecl(ctxt);
10587
10588
0
    SKIP_BLANKS;
10589
0
    if ((RAW == '?') && (NXT(1) == '>')) {
10590
0
        SKIP(2);
10591
0
    } else if (RAW == '>') {
10592
        /* Deprecated old WD ... */
10593
0
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10594
0
  NEXT;
10595
0
    } else {
10596
0
        int c;
10597
10598
0
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10599
0
        while ((c = CUR) != 0) {
10600
0
            NEXT;
10601
0
            if (c == '>')
10602
0
                break;
10603
0
        }
10604
0
    }
10605
0
}
10606
10607
/**
10608
 * xmlParseMisc:
10609
 * @ctxt:  an XML parser context
10610
 *
10611
 * DEPRECATED: Internal function, don't use.
10612
 *
10613
 * parse an XML Misc* optional field.
10614
 *
10615
 * [27] Misc ::= Comment | PI |  S
10616
 */
10617
10618
void
10619
274
xmlParseMisc(xmlParserCtxtPtr ctxt) {
10620
383
    while (ctxt->instate != XML_PARSER_EOF) {
10621
383
        SKIP_BLANKS;
10622
383
        GROW;
10623
383
        if ((RAW == '<') && (NXT(1) == '?')) {
10624
107
      xmlParsePI(ctxt);
10625
276
        } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10626
2
      xmlParseComment(ctxt);
10627
274
        } else {
10628
274
            break;
10629
274
        }
10630
383
    }
10631
274
}
10632
10633
/**
10634
 * xmlParseDocument:
10635
 * @ctxt:  an XML parser context
10636
 *
10637
 * parse an XML document (and build a tree if using the standard SAX
10638
 * interface).
10639
 *
10640
 * [1] document ::= prolog element Misc*
10641
 *
10642
 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10643
 *
10644
 * Returns 0, -1 in case of error. the parser context is augmented
10645
 *                as a result of the parsing.
10646
 */
10647
10648
int
10649
157
xmlParseDocument(xmlParserCtxtPtr ctxt) {
10650
157
    xmlChar start[4];
10651
157
    xmlCharEncoding enc;
10652
10653
157
    xmlInitParser();
10654
10655
157
    if ((ctxt == NULL) || (ctxt->input == NULL))
10656
0
        return(-1);
10657
10658
157
    GROW;
10659
10660
    /*
10661
     * SAX: detecting the level.
10662
     */
10663
157
    xmlDetectSAX2(ctxt);
10664
10665
    /*
10666
     * SAX: beginning of the document processing.
10667
     */
10668
157
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10669
157
        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10670
157
    if (ctxt->instate == XML_PARSER_EOF)
10671
0
  return(-1);
10672
10673
157
    if ((ctxt->encoding == NULL) &&
10674
157
        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10675
  /*
10676
   * Get the 4 first bytes and decode the charset
10677
   * if enc != XML_CHAR_ENCODING_NONE
10678
   * plug some encoding conversion routines.
10679
   */
10680
156
  start[0] = RAW;
10681
156
  start[1] = NXT(1);
10682
156
  start[2] = NXT(2);
10683
156
  start[3] = NXT(3);
10684
156
  enc = xmlDetectCharEncoding(&start[0], 4);
10685
156
  if (enc != XML_CHAR_ENCODING_NONE) {
10686
62
      xmlSwitchEncoding(ctxt, enc);
10687
62
  }
10688
156
    }
10689
10690
10691
157
    if (CUR == 0) {
10692
0
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10693
0
  return(-1);
10694
0
    }
10695
10696
157
    GROW;
10697
157
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10698
10699
  /*
10700
   * Note that we will switch encoding on the fly.
10701
   */
10702
0
  xmlParseXMLDecl(ctxt);
10703
0
  if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10704
0
      (ctxt->instate == XML_PARSER_EOF)) {
10705
      /*
10706
       * The XML REC instructs us to stop parsing right here
10707
       */
10708
0
      return(-1);
10709
0
  }
10710
0
  ctxt->standalone = ctxt->input->standalone;
10711
0
  SKIP_BLANKS;
10712
157
    } else {
10713
157
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10714
157
    }
10715
157
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10716
152
        ctxt->sax->startDocument(ctxt->userData);
10717
157
    if (ctxt->instate == XML_PARSER_EOF)
10718
5
  return(-1);
10719
152
    if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10720
152
        (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10721
0
  ctxt->myDoc->compression = ctxt->input->buf->compressed;
10722
0
    }
10723
10724
    /*
10725
     * The Misc part of the Prolog
10726
     */
10727
152
    xmlParseMisc(ctxt);
10728
10729
    /*
10730
     * Then possibly doc type declaration(s) and more Misc
10731
     * (doctypedecl Misc*)?
10732
     */
10733
152
    GROW;
10734
152
    if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10735
10736
0
  ctxt->inSubset = 1;
10737
0
  xmlParseDocTypeDecl(ctxt);
10738
0
  if (RAW == '[') {
10739
0
      ctxt->instate = XML_PARSER_DTD;
10740
0
      xmlParseInternalSubset(ctxt);
10741
0
      if (ctxt->instate == XML_PARSER_EOF)
10742
0
    return(-1);
10743
0
  }
10744
10745
  /*
10746
   * Create and update the external subset.
10747
   */
10748
0
  ctxt->inSubset = 2;
10749
0
  if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10750
0
      (!ctxt->disableSAX))
10751
0
      ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10752
0
                                ctxt->extSubSystem, ctxt->extSubURI);
10753
0
  if (ctxt->instate == XML_PARSER_EOF)
10754
0
      return(-1);
10755
0
  ctxt->inSubset = 0;
10756
10757
0
        xmlCleanSpecialAttr(ctxt);
10758
10759
0
  ctxt->instate = XML_PARSER_PROLOG;
10760
0
  xmlParseMisc(ctxt);
10761
0
    }
10762
10763
    /*
10764
     * Time to start parsing the tree itself
10765
     */
10766
152
    GROW;
10767
152
    if (RAW != '<') {
10768
30
  xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10769
30
           "Start tag expected, '<' not found\n");
10770
122
    } else {
10771
122
  ctxt->instate = XML_PARSER_CONTENT;
10772
122
  xmlParseElement(ctxt);
10773
122
  ctxt->instate = XML_PARSER_EPILOG;
10774
10775
10776
  /*
10777
   * The Misc part at the end
10778
   */
10779
122
  xmlParseMisc(ctxt);
10780
10781
122
  if (RAW != 0) {
10782
4
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10783
4
  }
10784
122
  ctxt->instate = XML_PARSER_EOF;
10785
122
    }
10786
10787
    /*
10788
     * SAX: end of the document processing.
10789
     */
10790
152
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10791
152
        ctxt->sax->endDocument(ctxt->userData);
10792
10793
    /*
10794
     * Remove locally kept entity definitions if the tree was not built
10795
     */
10796
152
    if ((ctxt->myDoc != NULL) &&
10797
152
  (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10798
0
  xmlFreeDoc(ctxt->myDoc);
10799
0
  ctxt->myDoc = NULL;
10800
0
    }
10801
10802
152
    if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10803
0
        ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10804
0
  if (ctxt->valid)
10805
0
      ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10806
0
  if (ctxt->nsWellFormed)
10807
0
      ctxt->myDoc->properties |= XML_DOC_NSVALID;
10808
0
  if (ctxt->options & XML_PARSE_OLD10)
10809
0
      ctxt->myDoc->properties |= XML_DOC_OLD10;
10810
0
    }
10811
152
    if (! ctxt->wellFormed) {
10812
152
  ctxt->valid = 0;
10813
152
  return(-1);
10814
152
    }
10815
0
    return(0);
10816
152
}
10817
10818
/**
10819
 * xmlParseExtParsedEnt:
10820
 * @ctxt:  an XML parser context
10821
 *
10822
 * parse a general parsed entity
10823
 * An external general parsed entity is well-formed if it matches the
10824
 * production labeled extParsedEnt.
10825
 *
10826
 * [78] extParsedEnt ::= TextDecl? content
10827
 *
10828
 * Returns 0, -1 in case of error. the parser context is augmented
10829
 *                as a result of the parsing.
10830
 */
10831
10832
int
10833
0
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10834
0
    xmlChar start[4];
10835
0
    xmlCharEncoding enc;
10836
10837
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
10838
0
        return(-1);
10839
10840
0
    xmlDetectSAX2(ctxt);
10841
10842
0
    GROW;
10843
10844
    /*
10845
     * SAX: beginning of the document processing.
10846
     */
10847
0
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10848
0
        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10849
10850
    /*
10851
     * Get the 4 first bytes and decode the charset
10852
     * if enc != XML_CHAR_ENCODING_NONE
10853
     * plug some encoding conversion routines.
10854
     */
10855
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10856
0
  start[0] = RAW;
10857
0
  start[1] = NXT(1);
10858
0
  start[2] = NXT(2);
10859
0
  start[3] = NXT(3);
10860
0
  enc = xmlDetectCharEncoding(start, 4);
10861
0
  if (enc != XML_CHAR_ENCODING_NONE) {
10862
0
      xmlSwitchEncoding(ctxt, enc);
10863
0
  }
10864
0
    }
10865
10866
10867
0
    if (CUR == 0) {
10868
0
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10869
0
    }
10870
10871
    /*
10872
     * Check for the XMLDecl in the Prolog.
10873
     */
10874
0
    GROW;
10875
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10876
10877
  /*
10878
   * Note that we will switch encoding on the fly.
10879
   */
10880
0
  xmlParseXMLDecl(ctxt);
10881
0
  if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10882
      /*
10883
       * The XML REC instructs us to stop parsing right here
10884
       */
10885
0
      return(-1);
10886
0
  }
10887
0
  SKIP_BLANKS;
10888
0
    } else {
10889
0
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10890
0
    }
10891
0
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10892
0
        ctxt->sax->startDocument(ctxt->userData);
10893
0
    if (ctxt->instate == XML_PARSER_EOF)
10894
0
  return(-1);
10895
10896
    /*
10897
     * Doing validity checking on chunk doesn't make sense
10898
     */
10899
0
    ctxt->instate = XML_PARSER_CONTENT;
10900
0
    ctxt->validate = 0;
10901
0
    ctxt->loadsubset = 0;
10902
0
    ctxt->depth = 0;
10903
10904
0
    xmlParseContent(ctxt);
10905
0
    if (ctxt->instate == XML_PARSER_EOF)
10906
0
  return(-1);
10907
10908
0
    if ((RAW == '<') && (NXT(1) == '/')) {
10909
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10910
0
    } else if (RAW != 0) {
10911
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
10912
0
    }
10913
10914
    /*
10915
     * SAX: end of the document processing.
10916
     */
10917
0
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10918
0
        ctxt->sax->endDocument(ctxt->userData);
10919
10920
0
    if (! ctxt->wellFormed) return(-1);
10921
0
    return(0);
10922
0
}
10923
10924
#ifdef LIBXML_PUSH_ENABLED
10925
/************************************************************************
10926
 *                  *
10927
 *    Progressive parsing interfaces        *
10928
 *                  *
10929
 ************************************************************************/
10930
10931
/**
10932
 * xmlParseLookupChar:
10933
 * @ctxt:  an XML parser context
10934
 * @c:  character
10935
 *
10936
 * Check whether the input buffer contains a character.
10937
 */
10938
static int
10939
xmlParseLookupChar(xmlParserCtxtPtr ctxt, int c) {
10940
    const xmlChar *cur;
10941
10942
    if (ctxt->checkIndex == 0) {
10943
        cur = ctxt->input->cur + 1;
10944
    } else {
10945
        cur = ctxt->input->cur + ctxt->checkIndex;
10946
    }
10947
10948
    if (memchr(cur, c, ctxt->input->end - cur) == NULL) {
10949
        size_t index = ctxt->input->end - ctxt->input->cur;
10950
10951
        if (index > LONG_MAX) {
10952
            ctxt->checkIndex = 0;
10953
            return(1);
10954
        }
10955
        ctxt->checkIndex = index;
10956
        return(0);
10957
    } else {
10958
        ctxt->checkIndex = 0;
10959
        return(1);
10960
    }
10961
}
10962
10963
/**
10964
 * xmlParseLookupString:
10965
 * @ctxt:  an XML parser context
10966
 * @startDelta: delta to apply at the start
10967
 * @str:  string
10968
 * @strLen:  length of string
10969
 *
10970
 * Check whether the input buffer contains a string.
10971
 */
10972
static const xmlChar *
10973
xmlParseLookupString(xmlParserCtxtPtr ctxt, size_t startDelta,
10974
                     const char *str, size_t strLen) {
10975
    const xmlChar *cur, *term;
10976
10977
    if (ctxt->checkIndex == 0) {
10978
        cur = ctxt->input->cur + startDelta;
10979
    } else {
10980
        cur = ctxt->input->cur + ctxt->checkIndex;
10981
    }
10982
10983
    term = BAD_CAST strstr((const char *) cur, str);
10984
    if (term == NULL) {
10985
        const xmlChar *end = ctxt->input->end;
10986
        size_t index;
10987
10988
        /* Rescan (strLen - 1) characters. */
10989
        if ((size_t) (end - cur) < strLen)
10990
            end = cur;
10991
        else
10992
            end -= strLen - 1;
10993
        index = end - ctxt->input->cur;
10994
        if (index > LONG_MAX) {
10995
            ctxt->checkIndex = 0;
10996
            return(ctxt->input->end - strLen);
10997
        }
10998
        ctxt->checkIndex = index;
10999
    } else {
11000
        ctxt->checkIndex = 0;
11001
    }
11002
11003
    return(term);
11004
}
11005
11006
/**
11007
 * xmlParseLookupCharData:
11008
 * @ctxt:  an XML parser context
11009
 *
11010
 * Check whether the input buffer contains terminated char data.
11011
 */
11012
static int
11013
xmlParseLookupCharData(xmlParserCtxtPtr ctxt) {
11014
    const xmlChar *cur = ctxt->input->cur + ctxt->checkIndex;
11015
    const xmlChar *end = ctxt->input->end;
11016
    size_t index;
11017
11018
    while (cur < end) {
11019
        if ((*cur == '<') || (*cur == '&')) {
11020
            ctxt->checkIndex = 0;
11021
            return(1);
11022
        }
11023
        cur++;
11024
    }
11025
11026
    index = cur - ctxt->input->cur;
11027
    if (index > LONG_MAX) {
11028
        ctxt->checkIndex = 0;
11029
        return(1);
11030
    }
11031
    ctxt->checkIndex = index;
11032
    return(0);
11033
}
11034
11035
/**
11036
 * xmlParseLookupGt:
11037
 * @ctxt:  an XML parser context
11038
 *
11039
 * Check whether there's enough data in the input buffer to finish parsing
11040
 * a start tag. This has to take quotes into account.
11041
 */
11042
static int
11043
xmlParseLookupGt(xmlParserCtxtPtr ctxt) {
11044
    const xmlChar *cur;
11045
    const xmlChar *end = ctxt->input->end;
11046
    int state = ctxt->endCheckState;
11047
    size_t index;
11048
11049
    if (ctxt->checkIndex == 0)
11050
        cur = ctxt->input->cur + 1;
11051
    else
11052
        cur = ctxt->input->cur + ctxt->checkIndex;
11053
11054
    while (cur < end) {
11055
        if (state) {
11056
            if (*cur == state)
11057
                state = 0;
11058
        } else if (*cur == '\'' || *cur == '"') {
11059
            state = *cur;
11060
        } else if (*cur == '>') {
11061
            ctxt->checkIndex = 0;
11062
            ctxt->endCheckState = 0;
11063
            return(1);
11064
        }
11065
        cur++;
11066
    }
11067
11068
    index = cur - ctxt->input->cur;
11069
    if (index > LONG_MAX) {
11070
        ctxt->checkIndex = 0;
11071
        ctxt->endCheckState = 0;
11072
        return(1);
11073
    }
11074
    ctxt->checkIndex = index;
11075
    ctxt->endCheckState = state;
11076
    return(0);
11077
}
11078
11079
/**
11080
 * xmlParseLookupInternalSubset:
11081
 * @ctxt:  an XML parser context
11082
 *
11083
 * Check whether there's enough data in the input buffer to finish parsing
11084
 * the internal subset.
11085
 */
11086
static int
11087
xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt) {
11088
    /*
11089
     * Sorry, but progressive parsing of the internal subset is not
11090
     * supported. We first check that the full content of the internal
11091
     * subset is available and parsing is launched only at that point.
11092
     * Internal subset ends with "']' S? '>'" in an unescaped section and
11093
     * not in a ']]>' sequence which are conditional sections.
11094
     */
11095
    const xmlChar *cur, *start;
11096
    const xmlChar *end = ctxt->input->end;
11097
    int state = ctxt->endCheckState;
11098
    size_t index;
11099
11100
    if (ctxt->checkIndex == 0) {
11101
        cur = ctxt->input->cur + 1;
11102
    } else {
11103
        cur = ctxt->input->cur + ctxt->checkIndex;
11104
    }
11105
    start = cur;
11106
11107
    while (cur < end) {
11108
        if (state == '-') {
11109
            if ((*cur == '-') &&
11110
                (cur[1] == '-') &&
11111
                (cur[2] == '>')) {
11112
                state = 0;
11113
                cur += 3;
11114
                start = cur;
11115
                continue;
11116
            }
11117
        }
11118
        else if (state == ']') {
11119
            if (*cur == '>') {
11120
                ctxt->checkIndex = 0;
11121
                ctxt->endCheckState = 0;
11122
                return(1);
11123
            }
11124
            if (IS_BLANK_CH(*cur)) {
11125
                state = ' ';
11126
            } else if (*cur != ']') {
11127
                state = 0;
11128
                start = cur;
11129
                continue;
11130
            }
11131
        }
11132
        else if (state == ' ') {
11133
            if (*cur == '>') {
11134
                ctxt->checkIndex = 0;
11135
                ctxt->endCheckState = 0;
11136
                return(1);
11137
            }
11138
            if (!IS_BLANK_CH(*cur)) {
11139
                state = 0;
11140
                start = cur;
11141
                continue;
11142
            }
11143
        }
11144
        else if (state != 0) {
11145
            if (*cur == state) {
11146
                state = 0;
11147
                start = cur + 1;
11148
            }
11149
        }
11150
        else if (*cur == '<') {
11151
            if ((cur[1] == '!') &&
11152
                (cur[2] == '-') &&
11153
                (cur[3] == '-')) {
11154
                state = '-';
11155
                cur += 4;
11156
                /* Don't treat <!--> as comment */
11157
                start = cur;
11158
                continue;
11159
            }
11160
        }
11161
        else if ((*cur == '"') || (*cur == '\'') || (*cur == ']')) {
11162
            state = *cur;
11163
        }
11164
11165
        cur++;
11166
    }
11167
11168
    /*
11169
     * Rescan the three last characters to detect "<!--" and "-->"
11170
     * split across chunks.
11171
     */
11172
    if ((state == 0) || (state == '-')) {
11173
        if (cur - start < 3)
11174
            cur = start;
11175
        else
11176
            cur -= 3;
11177
    }
11178
    index = cur - ctxt->input->cur;
11179
    if (index > LONG_MAX) {
11180
        ctxt->checkIndex = 0;
11181
        ctxt->endCheckState = 0;
11182
        return(1);
11183
    }
11184
    ctxt->checkIndex = index;
11185
    ctxt->endCheckState = state;
11186
    return(0);
11187
}
11188
11189
/**
11190
 * xmlCheckCdataPush:
11191
 * @cur: pointer to the block of characters
11192
 * @len: length of the block in bytes
11193
 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
11194
 *
11195
 * Check that the block of characters is okay as SCdata content [20]
11196
 *
11197
 * Returns the number of bytes to pass if okay, a negative index where an
11198
 *         UTF-8 error occurred otherwise
11199
 */
11200
static int
11201
xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
11202
    int ix;
11203
    unsigned char c;
11204
    int codepoint;
11205
11206
    if ((utf == NULL) || (len <= 0))
11207
        return(0);
11208
11209
    for (ix = 0; ix < len;) {      /* string is 0-terminated */
11210
        c = utf[ix];
11211
        if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11212
      if (c >= 0x20)
11213
    ix++;
11214
      else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11215
          ix++;
11216
      else
11217
          return(-ix);
11218
  } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11219
      if (ix + 2 > len) return(complete ? -ix : ix);
11220
      if ((utf[ix+1] & 0xc0 ) != 0x80)
11221
          return(-ix);
11222
      codepoint = (utf[ix] & 0x1f) << 6;
11223
      codepoint |= utf[ix+1] & 0x3f;
11224
      if (!xmlIsCharQ(codepoint))
11225
          return(-ix);
11226
      ix += 2;
11227
  } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11228
      if (ix + 3 > len) return(complete ? -ix : ix);
11229
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11230
          ((utf[ix+2] & 0xc0) != 0x80))
11231
        return(-ix);
11232
      codepoint = (utf[ix] & 0xf) << 12;
11233
      codepoint |= (utf[ix+1] & 0x3f) << 6;
11234
      codepoint |= utf[ix+2] & 0x3f;
11235
      if (!xmlIsCharQ(codepoint))
11236
          return(-ix);
11237
      ix += 3;
11238
  } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11239
      if (ix + 4 > len) return(complete ? -ix : ix);
11240
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11241
          ((utf[ix+2] & 0xc0) != 0x80) ||
11242
    ((utf[ix+3] & 0xc0) != 0x80))
11243
        return(-ix);
11244
      codepoint = (utf[ix] & 0x7) << 18;
11245
      codepoint |= (utf[ix+1] & 0x3f) << 12;
11246
      codepoint |= (utf[ix+2] & 0x3f) << 6;
11247
      codepoint |= utf[ix+3] & 0x3f;
11248
      if (!xmlIsCharQ(codepoint))
11249
          return(-ix);
11250
      ix += 4;
11251
  } else        /* unknown encoding */
11252
      return(-ix);
11253
      }
11254
      return(ix);
11255
}
11256
11257
/**
11258
 * xmlParseTryOrFinish:
11259
 * @ctxt:  an XML parser context
11260
 * @terminate:  last chunk indicator
11261
 *
11262
 * Try to progress on parsing
11263
 *
11264
 * Returns zero if no parsing was possible
11265
 */
11266
static int
11267
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11268
    int ret = 0;
11269
    int tlen;
11270
    size_t avail;
11271
    xmlChar cur, next;
11272
11273
    if (ctxt->input == NULL)
11274
        return(0);
11275
11276
#ifdef DEBUG_PUSH
11277
    switch (ctxt->instate) {
11278
  case XML_PARSER_EOF:
11279
      xmlGenericError(xmlGenericErrorContext,
11280
        "PP: try EOF\n"); break;
11281
  case XML_PARSER_START:
11282
      xmlGenericError(xmlGenericErrorContext,
11283
        "PP: try START\n"); break;
11284
  case XML_PARSER_MISC:
11285
      xmlGenericError(xmlGenericErrorContext,
11286
        "PP: try MISC\n");break;
11287
  case XML_PARSER_COMMENT:
11288
      xmlGenericError(xmlGenericErrorContext,
11289
        "PP: try COMMENT\n");break;
11290
  case XML_PARSER_PROLOG:
11291
      xmlGenericError(xmlGenericErrorContext,
11292
        "PP: try PROLOG\n");break;
11293
  case XML_PARSER_START_TAG:
11294
      xmlGenericError(xmlGenericErrorContext,
11295
        "PP: try START_TAG\n");break;
11296
  case XML_PARSER_CONTENT:
11297
      xmlGenericError(xmlGenericErrorContext,
11298
        "PP: try CONTENT\n");break;
11299
  case XML_PARSER_CDATA_SECTION:
11300
      xmlGenericError(xmlGenericErrorContext,
11301
        "PP: try CDATA_SECTION\n");break;
11302
  case XML_PARSER_END_TAG:
11303
      xmlGenericError(xmlGenericErrorContext,
11304
        "PP: try END_TAG\n");break;
11305
  case XML_PARSER_ENTITY_DECL:
11306
      xmlGenericError(xmlGenericErrorContext,
11307
        "PP: try ENTITY_DECL\n");break;
11308
  case XML_PARSER_ENTITY_VALUE:
11309
      xmlGenericError(xmlGenericErrorContext,
11310
        "PP: try ENTITY_VALUE\n");break;
11311
  case XML_PARSER_ATTRIBUTE_VALUE:
11312
      xmlGenericError(xmlGenericErrorContext,
11313
        "PP: try ATTRIBUTE_VALUE\n");break;
11314
  case XML_PARSER_DTD:
11315
      xmlGenericError(xmlGenericErrorContext,
11316
        "PP: try DTD\n");break;
11317
  case XML_PARSER_EPILOG:
11318
      xmlGenericError(xmlGenericErrorContext,
11319
        "PP: try EPILOG\n");break;
11320
  case XML_PARSER_PI:
11321
      xmlGenericError(xmlGenericErrorContext,
11322
        "PP: try PI\n");break;
11323
        case XML_PARSER_IGNORE:
11324
            xmlGenericError(xmlGenericErrorContext,
11325
        "PP: try IGNORE\n");break;
11326
    }
11327
#endif
11328
11329
    if ((ctxt->input != NULL) &&
11330
        (ctxt->input->cur - ctxt->input->base > 4096)) {
11331
        xmlParserShrink(ctxt);
11332
    }
11333
11334
    while (ctxt->instate != XML_PARSER_EOF) {
11335
  if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11336
      return(0);
11337
11338
  if (ctxt->input == NULL) break;
11339
  if (ctxt->input->buf != NULL) {
11340
      /*
11341
       * If we are operating on converted input, try to flush
11342
       * remaining chars to avoid them stalling in the non-converted
11343
       * buffer.
11344
       */
11345
      if ((ctxt->input->buf->raw != NULL) &&
11346
    (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11347
                size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11348
                                                 ctxt->input);
11349
    size_t current = ctxt->input->cur - ctxt->input->base;
11350
11351
    xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11352
                xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11353
                                      base, current);
11354
      }
11355
  }
11356
        avail = ctxt->input->end - ctxt->input->cur;
11357
        if (avail < 1)
11358
      goto done;
11359
        switch (ctxt->instate) {
11360
            case XML_PARSER_EOF:
11361
          /*
11362
     * Document parsing is done !
11363
     */
11364
          goto done;
11365
            case XML_PARSER_START:
11366
    if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11367
        xmlChar start[4];
11368
        xmlCharEncoding enc;
11369
11370
        /*
11371
         * Very first chars read from the document flow.
11372
         */
11373
        if (avail < 4)
11374
      goto done;
11375
11376
        /*
11377
         * Get the 4 first bytes and decode the charset
11378
         * if enc != XML_CHAR_ENCODING_NONE
11379
         * plug some encoding conversion routines,
11380
         * else xmlSwitchEncoding will set to (default)
11381
         * UTF8.
11382
         */
11383
        start[0] = RAW;
11384
        start[1] = NXT(1);
11385
        start[2] = NXT(2);
11386
        start[3] = NXT(3);
11387
        enc = xmlDetectCharEncoding(start, 4);
11388
                    /*
11389
                     * We need more bytes to detect EBCDIC code pages.
11390
                     * See xmlDetectEBCDIC.
11391
                     */
11392
                    if ((enc == XML_CHAR_ENCODING_EBCDIC) &&
11393
                        (!terminate) && (avail < 200))
11394
                        goto done;
11395
        xmlSwitchEncoding(ctxt, enc);
11396
        break;
11397
    }
11398
11399
    if (avail < 2)
11400
        goto done;
11401
    cur = ctxt->input->cur[0];
11402
    next = ctxt->input->cur[1];
11403
    if (cur == 0) {
11404
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11405
      ctxt->sax->setDocumentLocator(ctxt->userData,
11406
                  &xmlDefaultSAXLocator);
11407
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11408
        xmlHaltParser(ctxt);
11409
#ifdef DEBUG_PUSH
11410
        xmlGenericError(xmlGenericErrorContext,
11411
          "PP: entering EOF\n");
11412
#endif
11413
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11414
      ctxt->sax->endDocument(ctxt->userData);
11415
        goto done;
11416
    }
11417
          if ((cur == '<') && (next == '?')) {
11418
        /* PI or XML decl */
11419
        if (avail < 5) goto done;
11420
        if ((!terminate) &&
11421
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11422
      goto done;
11423
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11424
      ctxt->sax->setDocumentLocator(ctxt->userData,
11425
                  &xmlDefaultSAXLocator);
11426
        if ((ctxt->input->cur[2] == 'x') &&
11427
      (ctxt->input->cur[3] == 'm') &&
11428
      (ctxt->input->cur[4] == 'l') &&
11429
      (IS_BLANK_CH(ctxt->input->cur[5]))) {
11430
      ret += 5;
11431
#ifdef DEBUG_PUSH
11432
      xmlGenericError(xmlGenericErrorContext,
11433
        "PP: Parsing XML Decl\n");
11434
#endif
11435
      xmlParseXMLDecl(ctxt);
11436
      if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11437
          /*
11438
           * The XML REC instructs us to stop parsing right
11439
           * here
11440
           */
11441
          xmlHaltParser(ctxt);
11442
          return(0);
11443
      }
11444
      ctxt->standalone = ctxt->input->standalone;
11445
      if ((ctxt->encoding == NULL) &&
11446
          (ctxt->input->encoding != NULL))
11447
          ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11448
      if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11449
          (!ctxt->disableSAX))
11450
          ctxt->sax->startDocument(ctxt->userData);
11451
      ctxt->instate = XML_PARSER_MISC;
11452
#ifdef DEBUG_PUSH
11453
      xmlGenericError(xmlGenericErrorContext,
11454
        "PP: entering MISC\n");
11455
#endif
11456
        } else {
11457
      ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11458
      if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11459
          (!ctxt->disableSAX))
11460
          ctxt->sax->startDocument(ctxt->userData);
11461
      ctxt->instate = XML_PARSER_MISC;
11462
#ifdef DEBUG_PUSH
11463
      xmlGenericError(xmlGenericErrorContext,
11464
        "PP: entering MISC\n");
11465
#endif
11466
        }
11467
    } else {
11468
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11469
      ctxt->sax->setDocumentLocator(ctxt->userData,
11470
                  &xmlDefaultSAXLocator);
11471
        ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11472
        if (ctxt->version == NULL) {
11473
            xmlErrMemory(ctxt, NULL);
11474
      break;
11475
        }
11476
        if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11477
            (!ctxt->disableSAX))
11478
      ctxt->sax->startDocument(ctxt->userData);
11479
        ctxt->instate = XML_PARSER_MISC;
11480
#ifdef DEBUG_PUSH
11481
        xmlGenericError(xmlGenericErrorContext,
11482
          "PP: entering MISC\n");
11483
#endif
11484
    }
11485
    break;
11486
            case XML_PARSER_START_TAG: {
11487
          const xmlChar *name;
11488
    const xmlChar *prefix = NULL;
11489
    const xmlChar *URI = NULL;
11490
                int line = ctxt->input->line;
11491
    int nsNr = ctxt->nsNr;
11492
11493
    if ((avail < 2) && (ctxt->inputNr == 1))
11494
        goto done;
11495
    cur = ctxt->input->cur[0];
11496
          if (cur != '<') {
11497
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11498
        xmlHaltParser(ctxt);
11499
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11500
      ctxt->sax->endDocument(ctxt->userData);
11501
        goto done;
11502
    }
11503
    if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11504
                    goto done;
11505
    if (ctxt->spaceNr == 0)
11506
        spacePush(ctxt, -1);
11507
    else if (*ctxt->space == -2)
11508
        spacePush(ctxt, -1);
11509
    else
11510
        spacePush(ctxt, *ctxt->space);
11511
#ifdef LIBXML_SAX1_ENABLED
11512
    if (ctxt->sax2)
11513
#endif /* LIBXML_SAX1_ENABLED */
11514
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11515
#ifdef LIBXML_SAX1_ENABLED
11516
    else
11517
        name = xmlParseStartTag(ctxt);
11518
#endif /* LIBXML_SAX1_ENABLED */
11519
    if (ctxt->instate == XML_PARSER_EOF)
11520
        goto done;
11521
    if (name == NULL) {
11522
        spacePop(ctxt);
11523
        xmlHaltParser(ctxt);
11524
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11525
      ctxt->sax->endDocument(ctxt->userData);
11526
        goto done;
11527
    }
11528
#ifdef LIBXML_VALID_ENABLED
11529
    /*
11530
     * [ VC: Root Element Type ]
11531
     * The Name in the document type declaration must match
11532
     * the element type of the root element.
11533
     */
11534
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11535
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
11536
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11537
#endif /* LIBXML_VALID_ENABLED */
11538
11539
    /*
11540
     * Check for an Empty Element.
11541
     */
11542
    if ((RAW == '/') && (NXT(1) == '>')) {
11543
        SKIP(2);
11544
11545
        if (ctxt->sax2) {
11546
      if ((ctxt->sax != NULL) &&
11547
          (ctxt->sax->endElementNs != NULL) &&
11548
          (!ctxt->disableSAX))
11549
          ctxt->sax->endElementNs(ctxt->userData, name,
11550
                                  prefix, URI);
11551
      if (ctxt->nsNr - nsNr > 0)
11552
          nsPop(ctxt, ctxt->nsNr - nsNr);
11553
#ifdef LIBXML_SAX1_ENABLED
11554
        } else {
11555
      if ((ctxt->sax != NULL) &&
11556
          (ctxt->sax->endElement != NULL) &&
11557
          (!ctxt->disableSAX))
11558
          ctxt->sax->endElement(ctxt->userData, name);
11559
#endif /* LIBXML_SAX1_ENABLED */
11560
        }
11561
        if (ctxt->instate == XML_PARSER_EOF)
11562
      goto done;
11563
        spacePop(ctxt);
11564
        if (ctxt->nameNr == 0) {
11565
      ctxt->instate = XML_PARSER_EPILOG;
11566
        } else {
11567
      ctxt->instate = XML_PARSER_CONTENT;
11568
        }
11569
        break;
11570
    }
11571
    if (RAW == '>') {
11572
        NEXT;
11573
    } else {
11574
        xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11575
           "Couldn't find end of Start Tag %s\n",
11576
           name);
11577
        nodePop(ctxt);
11578
        spacePop(ctxt);
11579
    }
11580
                nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
11581
11582
    ctxt->instate = XML_PARSER_CONTENT;
11583
                break;
11584
      }
11585
            case XML_PARSER_CONTENT: {
11586
    if ((avail < 2) && (ctxt->inputNr == 1))
11587
        goto done;
11588
    cur = ctxt->input->cur[0];
11589
    next = ctxt->input->cur[1];
11590
11591
    if ((cur == '<') && (next == '/')) {
11592
        ctxt->instate = XML_PARSER_END_TAG;
11593
        break;
11594
          } else if ((cur == '<') && (next == '?')) {
11595
        if ((!terminate) &&
11596
            (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11597
      goto done;
11598
        xmlParsePI(ctxt);
11599
        ctxt->instate = XML_PARSER_CONTENT;
11600
    } else if ((cur == '<') && (next != '!')) {
11601
        ctxt->instate = XML_PARSER_START_TAG;
11602
        break;
11603
    } else if ((cur == '<') && (next == '!') &&
11604
               (ctxt->input->cur[2] == '-') &&
11605
         (ctxt->input->cur[3] == '-')) {
11606
        if ((!terminate) &&
11607
            (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11608
      goto done;
11609
        xmlParseComment(ctxt);
11610
        ctxt->instate = XML_PARSER_CONTENT;
11611
    } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11612
        (ctxt->input->cur[2] == '[') &&
11613
        (ctxt->input->cur[3] == 'C') &&
11614
        (ctxt->input->cur[4] == 'D') &&
11615
        (ctxt->input->cur[5] == 'A') &&
11616
        (ctxt->input->cur[6] == 'T') &&
11617
        (ctxt->input->cur[7] == 'A') &&
11618
        (ctxt->input->cur[8] == '[')) {
11619
        SKIP(9);
11620
        ctxt->instate = XML_PARSER_CDATA_SECTION;
11621
        break;
11622
    } else if ((cur == '<') && (next == '!') &&
11623
               (avail < 9)) {
11624
        goto done;
11625
    } else if (cur == '<') {
11626
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11627
                    "detected an error in element content\n");
11628
                    SKIP(1);
11629
    } else if (cur == '&') {
11630
        if ((!terminate) && (!xmlParseLookupChar(ctxt, ';')))
11631
      goto done;
11632
        xmlParseReference(ctxt);
11633
    } else {
11634
        /* TODO Avoid the extra copy, handle directly !!! */
11635
        /*
11636
         * Goal of the following test is:
11637
         *  - minimize calls to the SAX 'character' callback
11638
         *    when they are mergeable
11639
         *  - handle an problem for isBlank when we only parse
11640
         *    a sequence of blank chars and the next one is
11641
         *    not available to check against '<' presence.
11642
         *  - tries to homogenize the differences in SAX
11643
         *    callbacks between the push and pull versions
11644
         *    of the parser.
11645
         */
11646
        if ((ctxt->inputNr == 1) &&
11647
            (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11648
      if ((!terminate) && (!xmlParseLookupCharData(ctxt)))
11649
          goto done;
11650
                    }
11651
                    ctxt->checkIndex = 0;
11652
        xmlParseCharData(ctxt, 0);
11653
    }
11654
    break;
11655
      }
11656
            case XML_PARSER_END_TAG:
11657
    if (avail < 2)
11658
        goto done;
11659
    if ((!terminate) && (!xmlParseLookupChar(ctxt, '>')))
11660
        goto done;
11661
    if (ctxt->sax2) {
11662
              xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11663
        nameNsPop(ctxt);
11664
    }
11665
#ifdef LIBXML_SAX1_ENABLED
11666
      else
11667
        xmlParseEndTag1(ctxt, 0);
11668
#endif /* LIBXML_SAX1_ENABLED */
11669
    if (ctxt->instate == XML_PARSER_EOF) {
11670
        /* Nothing */
11671
    } else if (ctxt->nameNr == 0) {
11672
        ctxt->instate = XML_PARSER_EPILOG;
11673
    } else {
11674
        ctxt->instate = XML_PARSER_CONTENT;
11675
    }
11676
    break;
11677
            case XML_PARSER_CDATA_SECTION: {
11678
          /*
11679
     * The Push mode need to have the SAX callback for
11680
     * cdataBlock merge back contiguous callbacks.
11681
     */
11682
    const xmlChar *term;
11683
11684
                if (terminate) {
11685
                    /*
11686
                     * Don't call xmlParseLookupString. If 'terminate'
11687
                     * is set, checkIndex is invalid.
11688
                     */
11689
                    term = BAD_CAST strstr((const char *) ctxt->input->cur,
11690
                                           "]]>");
11691
                } else {
11692
        term = xmlParseLookupString(ctxt, 0, "]]>", 3);
11693
                }
11694
11695
    if (term == NULL) {
11696
        int tmp, size;
11697
11698
                    if (terminate) {
11699
                        /* Unfinished CDATA section */
11700
                        size = ctxt->input->end - ctxt->input->cur;
11701
                    } else {
11702
                        if (avail < XML_PARSER_BIG_BUFFER_SIZE + 2)
11703
                            goto done;
11704
                        ctxt->checkIndex = 0;
11705
                        /* XXX: Why don't we pass the full buffer? */
11706
                        size = XML_PARSER_BIG_BUFFER_SIZE;
11707
                    }
11708
                    tmp = xmlCheckCdataPush(ctxt->input->cur, size, 0);
11709
                    if (tmp <= 0) {
11710
                        tmp = -tmp;
11711
                        ctxt->input->cur += tmp;
11712
                        goto encoding_error;
11713
                    }
11714
                    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11715
                        if (ctxt->sax->cdataBlock != NULL)
11716
                            ctxt->sax->cdataBlock(ctxt->userData,
11717
                                                  ctxt->input->cur, tmp);
11718
                        else if (ctxt->sax->characters != NULL)
11719
                            ctxt->sax->characters(ctxt->userData,
11720
                                                  ctxt->input->cur, tmp);
11721
                    }
11722
                    if (ctxt->instate == XML_PARSER_EOF)
11723
                        goto done;
11724
                    SKIPL(tmp);
11725
    } else {
11726
                    int base = term - CUR_PTR;
11727
        int tmp;
11728
11729
        tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11730
        if ((tmp < 0) || (tmp != base)) {
11731
      tmp = -tmp;
11732
      ctxt->input->cur += tmp;
11733
      goto encoding_error;
11734
        }
11735
        if ((ctxt->sax != NULL) && (base == 0) &&
11736
            (ctxt->sax->cdataBlock != NULL) &&
11737
            (!ctxt->disableSAX)) {
11738
      /*
11739
       * Special case to provide identical behaviour
11740
       * between pull and push parsers on enpty CDATA
11741
       * sections
11742
       */
11743
       if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11744
           (!strncmp((const char *)&ctxt->input->cur[-9],
11745
                     "<![CDATA[", 9)))
11746
           ctxt->sax->cdataBlock(ctxt->userData,
11747
                                 BAD_CAST "", 0);
11748
        } else if ((ctxt->sax != NULL) && (base > 0) &&
11749
      (!ctxt->disableSAX)) {
11750
      if (ctxt->sax->cdataBlock != NULL)
11751
          ctxt->sax->cdataBlock(ctxt->userData,
11752
              ctxt->input->cur, base);
11753
      else if (ctxt->sax->characters != NULL)
11754
          ctxt->sax->characters(ctxt->userData,
11755
              ctxt->input->cur, base);
11756
        }
11757
        if (ctxt->instate == XML_PARSER_EOF)
11758
      goto done;
11759
        SKIPL(base + 3);
11760
        ctxt->instate = XML_PARSER_CONTENT;
11761
#ifdef DEBUG_PUSH
11762
        xmlGenericError(xmlGenericErrorContext,
11763
          "PP: entering CONTENT\n");
11764
#endif
11765
    }
11766
    break;
11767
      }
11768
            case XML_PARSER_MISC:
11769
            case XML_PARSER_PROLOG:
11770
            case XML_PARSER_EPILOG:
11771
    SKIP_BLANKS;
11772
                avail = ctxt->input->end - ctxt->input->cur;
11773
    if (avail < 2)
11774
        goto done;
11775
    cur = ctxt->input->cur[0];
11776
    next = ctxt->input->cur[1];
11777
          if ((cur == '<') && (next == '?')) {
11778
        if ((!terminate) &&
11779
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11780
      goto done;
11781
#ifdef DEBUG_PUSH
11782
        xmlGenericError(xmlGenericErrorContext,
11783
          "PP: Parsing PI\n");
11784
#endif
11785
        xmlParsePI(ctxt);
11786
        if (ctxt->instate == XML_PARSER_EOF)
11787
      goto done;
11788
    } else if ((cur == '<') && (next == '!') &&
11789
        (ctxt->input->cur[2] == '-') &&
11790
        (ctxt->input->cur[3] == '-')) {
11791
        if ((!terminate) &&
11792
                        (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11793
      goto done;
11794
#ifdef DEBUG_PUSH
11795
        xmlGenericError(xmlGenericErrorContext,
11796
          "PP: Parsing Comment\n");
11797
#endif
11798
        xmlParseComment(ctxt);
11799
        if (ctxt->instate == XML_PARSER_EOF)
11800
      goto done;
11801
    } else if ((ctxt->instate == XML_PARSER_MISC) &&
11802
                    (cur == '<') && (next == '!') &&
11803
        (ctxt->input->cur[2] == 'D') &&
11804
        (ctxt->input->cur[3] == 'O') &&
11805
        (ctxt->input->cur[4] == 'C') &&
11806
        (ctxt->input->cur[5] == 'T') &&
11807
        (ctxt->input->cur[6] == 'Y') &&
11808
        (ctxt->input->cur[7] == 'P') &&
11809
        (ctxt->input->cur[8] == 'E')) {
11810
        if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11811
                        goto done;
11812
#ifdef DEBUG_PUSH
11813
        xmlGenericError(xmlGenericErrorContext,
11814
          "PP: Parsing internal subset\n");
11815
#endif
11816
        ctxt->inSubset = 1;
11817
        xmlParseDocTypeDecl(ctxt);
11818
        if (ctxt->instate == XML_PARSER_EOF)
11819
      goto done;
11820
        if (RAW == '[') {
11821
      ctxt->instate = XML_PARSER_DTD;
11822
#ifdef DEBUG_PUSH
11823
      xmlGenericError(xmlGenericErrorContext,
11824
        "PP: entering DTD\n");
11825
#endif
11826
        } else {
11827
      /*
11828
       * Create and update the external subset.
11829
       */
11830
      ctxt->inSubset = 2;
11831
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11832
          (ctxt->sax->externalSubset != NULL))
11833
          ctxt->sax->externalSubset(ctxt->userData,
11834
            ctxt->intSubName, ctxt->extSubSystem,
11835
            ctxt->extSubURI);
11836
      ctxt->inSubset = 0;
11837
      xmlCleanSpecialAttr(ctxt);
11838
      ctxt->instate = XML_PARSER_PROLOG;
11839
#ifdef DEBUG_PUSH
11840
      xmlGenericError(xmlGenericErrorContext,
11841
        "PP: entering PROLOG\n");
11842
#endif
11843
        }
11844
    } else if ((cur == '<') && (next == '!') &&
11845
               (avail <
11846
                            (ctxt->instate == XML_PARSER_MISC ? 9 : 4))) {
11847
        goto done;
11848
    } else if (ctxt->instate == XML_PARSER_EPILOG) {
11849
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11850
        xmlHaltParser(ctxt);
11851
#ifdef DEBUG_PUSH
11852
        xmlGenericError(xmlGenericErrorContext,
11853
          "PP: entering EOF\n");
11854
#endif
11855
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11856
      ctxt->sax->endDocument(ctxt->userData);
11857
        goto done;
11858
                } else {
11859
        ctxt->instate = XML_PARSER_START_TAG;
11860
#ifdef DEBUG_PUSH
11861
        xmlGenericError(xmlGenericErrorContext,
11862
          "PP: entering START_TAG\n");
11863
#endif
11864
    }
11865
    break;
11866
            case XML_PARSER_DTD: {
11867
                if ((!terminate) && (!xmlParseLookupInternalSubset(ctxt)))
11868
                    goto done;
11869
    xmlParseInternalSubset(ctxt);
11870
    if (ctxt->instate == XML_PARSER_EOF)
11871
        goto done;
11872
    ctxt->inSubset = 2;
11873
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11874
        (ctxt->sax->externalSubset != NULL))
11875
        ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11876
          ctxt->extSubSystem, ctxt->extSubURI);
11877
    ctxt->inSubset = 0;
11878
    xmlCleanSpecialAttr(ctxt);
11879
    if (ctxt->instate == XML_PARSER_EOF)
11880
        goto done;
11881
    ctxt->instate = XML_PARSER_PROLOG;
11882
#ifdef DEBUG_PUSH
11883
    xmlGenericError(xmlGenericErrorContext,
11884
      "PP: entering PROLOG\n");
11885
#endif
11886
                break;
11887
      }
11888
            case XML_PARSER_COMMENT:
11889
    xmlGenericError(xmlGenericErrorContext,
11890
      "PP: internal error, state == COMMENT\n");
11891
    ctxt->instate = XML_PARSER_CONTENT;
11892
#ifdef DEBUG_PUSH
11893
    xmlGenericError(xmlGenericErrorContext,
11894
      "PP: entering CONTENT\n");
11895
#endif
11896
    break;
11897
            case XML_PARSER_IGNORE:
11898
    xmlGenericError(xmlGenericErrorContext,
11899
      "PP: internal error, state == IGNORE");
11900
          ctxt->instate = XML_PARSER_DTD;
11901
#ifdef DEBUG_PUSH
11902
    xmlGenericError(xmlGenericErrorContext,
11903
      "PP: entering DTD\n");
11904
#endif
11905
          break;
11906
            case XML_PARSER_PI:
11907
    xmlGenericError(xmlGenericErrorContext,
11908
      "PP: internal error, state == PI\n");
11909
    ctxt->instate = XML_PARSER_CONTENT;
11910
#ifdef DEBUG_PUSH
11911
    xmlGenericError(xmlGenericErrorContext,
11912
      "PP: entering CONTENT\n");
11913
#endif
11914
    break;
11915
            case XML_PARSER_ENTITY_DECL:
11916
    xmlGenericError(xmlGenericErrorContext,
11917
      "PP: internal error, state == ENTITY_DECL\n");
11918
    ctxt->instate = XML_PARSER_DTD;
11919
#ifdef DEBUG_PUSH
11920
    xmlGenericError(xmlGenericErrorContext,
11921
      "PP: entering DTD\n");
11922
#endif
11923
    break;
11924
            case XML_PARSER_ENTITY_VALUE:
11925
    xmlGenericError(xmlGenericErrorContext,
11926
      "PP: internal error, state == ENTITY_VALUE\n");
11927
    ctxt->instate = XML_PARSER_CONTENT;
11928
#ifdef DEBUG_PUSH
11929
    xmlGenericError(xmlGenericErrorContext,
11930
      "PP: entering DTD\n");
11931
#endif
11932
    break;
11933
            case XML_PARSER_ATTRIBUTE_VALUE:
11934
    xmlGenericError(xmlGenericErrorContext,
11935
      "PP: internal error, state == ATTRIBUTE_VALUE\n");
11936
    ctxt->instate = XML_PARSER_START_TAG;
11937
#ifdef DEBUG_PUSH
11938
    xmlGenericError(xmlGenericErrorContext,
11939
      "PP: entering START_TAG\n");
11940
#endif
11941
    break;
11942
            case XML_PARSER_SYSTEM_LITERAL:
11943
    xmlGenericError(xmlGenericErrorContext,
11944
      "PP: internal error, state == SYSTEM_LITERAL\n");
11945
    ctxt->instate = XML_PARSER_START_TAG;
11946
#ifdef DEBUG_PUSH
11947
    xmlGenericError(xmlGenericErrorContext,
11948
      "PP: entering START_TAG\n");
11949
#endif
11950
    break;
11951
            case XML_PARSER_PUBLIC_LITERAL:
11952
    xmlGenericError(xmlGenericErrorContext,
11953
      "PP: internal error, state == PUBLIC_LITERAL\n");
11954
    ctxt->instate = XML_PARSER_START_TAG;
11955
#ifdef DEBUG_PUSH
11956
    xmlGenericError(xmlGenericErrorContext,
11957
      "PP: entering START_TAG\n");
11958
#endif
11959
    break;
11960
  }
11961
    }
11962
done:
11963
#ifdef DEBUG_PUSH
11964
    xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
11965
#endif
11966
    return(ret);
11967
encoding_error:
11968
    if (ctxt->input->end - ctxt->input->cur < 4) {
11969
  __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
11970
         "Input is not proper UTF-8, indicate encoding !\n",
11971
         NULL, NULL);
11972
    } else {
11973
        char buffer[150];
11974
11975
  snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
11976
      ctxt->input->cur[0], ctxt->input->cur[1],
11977
      ctxt->input->cur[2], ctxt->input->cur[3]);
11978
  __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
11979
         "Input is not proper UTF-8, indicate encoding !\n%s",
11980
         BAD_CAST buffer, NULL);
11981
    }
11982
    return(0);
11983
}
11984
11985
/**
11986
 * xmlParseChunk:
11987
 * @ctxt:  an XML parser context
11988
 * @chunk:  an char array
11989
 * @size:  the size in byte of the chunk
11990
 * @terminate:  last chunk indicator
11991
 *
11992
 * Parse a Chunk of memory
11993
 *
11994
 * Returns zero if no error, the xmlParserErrors otherwise.
11995
 */
11996
int
11997
xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
11998
              int terminate) {
11999
    int end_in_lf = 0;
12000
    int remain = 0;
12001
12002
    if (ctxt == NULL)
12003
        return(XML_ERR_INTERNAL_ERROR);
12004
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12005
        return(ctxt->errNo);
12006
    if (ctxt->instate == XML_PARSER_EOF)
12007
        return(-1);
12008
    if (ctxt->input == NULL)
12009
        return(-1);
12010
12011
    ctxt->progressive = 1;
12012
    if (ctxt->instate == XML_PARSER_START)
12013
        xmlDetectSAX2(ctxt);
12014
    if ((size > 0) && (chunk != NULL) && (!terminate) &&
12015
        (chunk[size - 1] == '\r')) {
12016
  end_in_lf = 1;
12017
  size--;
12018
    }
12019
12020
xmldecl_done:
12021
12022
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12023
        (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF))  {
12024
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12025
  size_t cur = ctxt->input->cur - ctxt->input->base;
12026
  int res;
12027
12028
        /*
12029
         * Specific handling if we autodetected an encoding, we should not
12030
         * push more than the first line ... which depend on the encoding
12031
         * And only push the rest once the final encoding was detected
12032
         */
12033
        if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12034
            (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
12035
            unsigned int len = 45;
12036
12037
            if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12038
                               BAD_CAST "UTF-16")) ||
12039
                (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12040
                               BAD_CAST "UTF16")))
12041
                len = 90;
12042
            else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12043
                                    BAD_CAST "UCS-4")) ||
12044
                     (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12045
                                    BAD_CAST "UCS4")))
12046
                len = 180;
12047
12048
            if (ctxt->input->buf->rawconsumed < len)
12049
                len -= ctxt->input->buf->rawconsumed;
12050
12051
            /*
12052
             * Change size for reading the initial declaration only
12053
             * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12054
             * will blindly copy extra bytes from memory.
12055
             */
12056
            if ((unsigned int) size > len) {
12057
                remain = size - len;
12058
                size = len;
12059
            } else {
12060
                remain = 0;
12061
            }
12062
        }
12063
  res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12064
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12065
  if (res < 0) {
12066
      ctxt->errNo = XML_PARSER_EOF;
12067
      xmlHaltParser(ctxt);
12068
      return (XML_PARSER_EOF);
12069
  }
12070
#ifdef DEBUG_PUSH
12071
  xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12072
#endif
12073
12074
    } else if (ctxt->instate != XML_PARSER_EOF) {
12075
  if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12076
      xmlParserInputBufferPtr in = ctxt->input->buf;
12077
      if ((in->encoder != NULL) && (in->buffer != NULL) &&
12078
        (in->raw != NULL)) {
12079
    int nbchars;
12080
    size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12081
    size_t current = ctxt->input->cur - ctxt->input->base;
12082
12083
    nbchars = xmlCharEncInput(in, terminate);
12084
    xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
12085
    if (nbchars < 0) {
12086
        /* TODO 2.6.0 */
12087
        xmlGenericError(xmlGenericErrorContext,
12088
            "xmlParseChunk: encoder error\n");
12089
                    xmlHaltParser(ctxt);
12090
        return(XML_ERR_INVALID_ENCODING);
12091
    }
12092
      }
12093
  }
12094
    }
12095
12096
    if (remain != 0) {
12097
        xmlParseTryOrFinish(ctxt, 0);
12098
    } else {
12099
        xmlParseTryOrFinish(ctxt, terminate);
12100
    }
12101
    if (ctxt->instate == XML_PARSER_EOF)
12102
        return(ctxt->errNo);
12103
12104
    if ((ctxt->input != NULL) &&
12105
         (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12106
         ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12107
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12108
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12109
        xmlHaltParser(ctxt);
12110
    }
12111
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12112
        return(ctxt->errNo);
12113
12114
    if (remain != 0) {
12115
        chunk += size;
12116
        size = remain;
12117
        remain = 0;
12118
        goto xmldecl_done;
12119
    }
12120
    if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12121
        (ctxt->input->buf != NULL)) {
12122
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12123
           ctxt->input);
12124
  size_t current = ctxt->input->cur - ctxt->input->base;
12125
12126
  xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12127
12128
  xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12129
            base, current);
12130
    }
12131
    if (terminate) {
12132
  /*
12133
   * Check for termination
12134
   */
12135
  if ((ctxt->instate != XML_PARSER_EOF) &&
12136
      (ctxt->instate != XML_PARSER_EPILOG)) {
12137
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12138
  }
12139
  if ((ctxt->instate == XML_PARSER_EPILOG) &&
12140
            (ctxt->input->cur < ctxt->input->end)) {
12141
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12142
  }
12143
  if (ctxt->instate != XML_PARSER_EOF) {
12144
      if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12145
    ctxt->sax->endDocument(ctxt->userData);
12146
  }
12147
  ctxt->instate = XML_PARSER_EOF;
12148
    }
12149
    if (ctxt->wellFormed == 0)
12150
  return((xmlParserErrors) ctxt->errNo);
12151
    else
12152
        return(0);
12153
}
12154
12155
/************************************************************************
12156
 *                  *
12157
 *    I/O front end functions to the parser     *
12158
 *                  *
12159
 ************************************************************************/
12160
12161
/**
12162
 * xmlCreatePushParserCtxt:
12163
 * @sax:  a SAX handler
12164
 * @user_data:  The user data returned on SAX callbacks
12165
 * @chunk:  a pointer to an array of chars
12166
 * @size:  number of chars in the array
12167
 * @filename:  an optional file name or URI
12168
 *
12169
 * Create a parser context for using the XML parser in push mode.
12170
 * If @buffer and @size are non-NULL, the data is used to detect
12171
 * the encoding.  The remaining characters will be parsed so they
12172
 * don't need to be fed in again through xmlParseChunk.
12173
 * To allow content encoding detection, @size should be >= 4
12174
 * The value of @filename is used for fetching external entities
12175
 * and error/warning reports.
12176
 *
12177
 * Returns the new parser context or NULL
12178
 */
12179
12180
xmlParserCtxtPtr
12181
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12182
                        const char *chunk, int size, const char *filename) {
12183
    xmlParserCtxtPtr ctxt;
12184
    xmlParserInputPtr inputStream;
12185
    xmlParserInputBufferPtr buf;
12186
12187
    buf = xmlAllocParserInputBuffer(XML_CHAR_ENCODING_NONE);
12188
    if (buf == NULL) return(NULL);
12189
12190
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12191
    if (ctxt == NULL) {
12192
        xmlErrMemory(NULL, "creating parser: out of memory\n");
12193
  xmlFreeParserInputBuffer(buf);
12194
  return(NULL);
12195
    }
12196
    ctxt->dictNames = 1;
12197
    if (filename == NULL) {
12198
  ctxt->directory = NULL;
12199
    } else {
12200
        ctxt->directory = xmlParserGetDirectory(filename);
12201
    }
12202
12203
    inputStream = xmlNewInputStream(ctxt);
12204
    if (inputStream == NULL) {
12205
  xmlFreeParserCtxt(ctxt);
12206
  xmlFreeParserInputBuffer(buf);
12207
  return(NULL);
12208
    }
12209
12210
    if (filename == NULL)
12211
  inputStream->filename = NULL;
12212
    else {
12213
  inputStream->filename = (char *)
12214
      xmlCanonicPath((const xmlChar *) filename);
12215
  if (inputStream->filename == NULL) {
12216
            xmlFreeInputStream(inputStream);
12217
      xmlFreeParserCtxt(ctxt);
12218
      xmlFreeParserInputBuffer(buf);
12219
      return(NULL);
12220
  }
12221
    }
12222
    inputStream->buf = buf;
12223
    xmlBufResetInput(inputStream->buf->buffer, inputStream);
12224
    inputPush(ctxt, inputStream);
12225
12226
    /*
12227
     * If the caller didn't provide an initial 'chunk' for determining
12228
     * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12229
     * that it can be automatically determined later
12230
     */
12231
    ctxt->charset = XML_CHAR_ENCODING_NONE;
12232
12233
    if ((size != 0) && (chunk != NULL) &&
12234
        (ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12235
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12236
  size_t cur = ctxt->input->cur - ctxt->input->base;
12237
12238
  xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12239
12240
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12241
#ifdef DEBUG_PUSH
12242
  xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12243
#endif
12244
    }
12245
12246
    return(ctxt);
12247
}
12248
#endif /* LIBXML_PUSH_ENABLED */
12249
12250
/**
12251
 * xmlStopParser:
12252
 * @ctxt:  an XML parser context
12253
 *
12254
 * Blocks further parser processing
12255
 */
12256
void
12257
68.8k
xmlStopParser(xmlParserCtxtPtr ctxt) {
12258
68.8k
    if (ctxt == NULL)
12259
68.8k
        return;
12260
0
    xmlHaltParser(ctxt);
12261
0
    ctxt->errNo = XML_ERR_USER_STOP;
12262
0
}
12263
12264
/**
12265
 * xmlCreateIOParserCtxt:
12266
 * @sax:  a SAX handler
12267
 * @user_data:  The user data returned on SAX callbacks
12268
 * @ioread:  an I/O read function
12269
 * @ioclose:  an I/O close function
12270
 * @ioctx:  an I/O handler
12271
 * @enc:  the charset encoding if known
12272
 *
12273
 * Create a parser context for using the XML parser with an existing
12274
 * I/O stream
12275
 *
12276
 * Returns the new parser context or NULL
12277
 */
12278
xmlParserCtxtPtr
12279
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12280
  xmlInputReadCallback   ioread, xmlInputCloseCallback  ioclose,
12281
0
  void *ioctx, xmlCharEncoding enc) {
12282
0
    xmlParserCtxtPtr ctxt;
12283
0
    xmlParserInputPtr inputStream;
12284
0
    xmlParserInputBufferPtr buf;
12285
12286
0
    if (ioread == NULL) return(NULL);
12287
12288
0
    buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12289
0
    if (buf == NULL) {
12290
0
        if (ioclose != NULL)
12291
0
            ioclose(ioctx);
12292
0
        return (NULL);
12293
0
    }
12294
12295
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12296
0
    if (ctxt == NULL) {
12297
0
  xmlFreeParserInputBuffer(buf);
12298
0
  return(NULL);
12299
0
    }
12300
12301
0
    inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12302
0
    if (inputStream == NULL) {
12303
0
  xmlFreeParserCtxt(ctxt);
12304
0
  return(NULL);
12305
0
    }
12306
0
    inputPush(ctxt, inputStream);
12307
12308
0
    return(ctxt);
12309
0
}
12310
12311
#ifdef LIBXML_VALID_ENABLED
12312
/************************************************************************
12313
 *                  *
12314
 *    Front ends when parsing a DTD       *
12315
 *                  *
12316
 ************************************************************************/
12317
12318
/**
12319
 * xmlIOParseDTD:
12320
 * @sax:  the SAX handler block or NULL
12321
 * @input:  an Input Buffer
12322
 * @enc:  the charset encoding if known
12323
 *
12324
 * Load and parse a DTD
12325
 *
12326
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12327
 * @input will be freed by the function in any case.
12328
 */
12329
12330
xmlDtdPtr
12331
xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12332
        xmlCharEncoding enc) {
12333
    xmlDtdPtr ret = NULL;
12334
    xmlParserCtxtPtr ctxt;
12335
    xmlParserInputPtr pinput = NULL;
12336
    xmlChar start[4];
12337
12338
    if (input == NULL)
12339
  return(NULL);
12340
12341
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12342
    if (ctxt == NULL) {
12343
        xmlFreeParserInputBuffer(input);
12344
  return(NULL);
12345
    }
12346
12347
    /* We are loading a DTD */
12348
    ctxt->options |= XML_PARSE_DTDLOAD;
12349
12350
    xmlDetectSAX2(ctxt);
12351
12352
    /*
12353
     * generate a parser input from the I/O handler
12354
     */
12355
12356
    pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12357
    if (pinput == NULL) {
12358
        xmlFreeParserInputBuffer(input);
12359
  xmlFreeParserCtxt(ctxt);
12360
  return(NULL);
12361
    }
12362
12363
    /*
12364
     * plug some encoding conversion routines here.
12365
     */
12366
    if (xmlPushInput(ctxt, pinput) < 0) {
12367
  xmlFreeParserCtxt(ctxt);
12368
  return(NULL);
12369
    }
12370
    if (enc != XML_CHAR_ENCODING_NONE) {
12371
        xmlSwitchEncoding(ctxt, enc);
12372
    }
12373
12374
    pinput->filename = NULL;
12375
    pinput->line = 1;
12376
    pinput->col = 1;
12377
    pinput->base = ctxt->input->cur;
12378
    pinput->cur = ctxt->input->cur;
12379
    pinput->free = NULL;
12380
12381
    /*
12382
     * let's parse that entity knowing it's an external subset.
12383
     */
12384
    ctxt->inSubset = 2;
12385
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12386
    if (ctxt->myDoc == NULL) {
12387
  xmlErrMemory(ctxt, "New Doc failed");
12388
  return(NULL);
12389
    }
12390
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12391
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12392
                                 BAD_CAST "none", BAD_CAST "none");
12393
12394
    if ((enc == XML_CHAR_ENCODING_NONE) &&
12395
        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12396
  /*
12397
   * Get the 4 first bytes and decode the charset
12398
   * if enc != XML_CHAR_ENCODING_NONE
12399
   * plug some encoding conversion routines.
12400
   */
12401
  start[0] = RAW;
12402
  start[1] = NXT(1);
12403
  start[2] = NXT(2);
12404
  start[3] = NXT(3);
12405
  enc = xmlDetectCharEncoding(start, 4);
12406
  if (enc != XML_CHAR_ENCODING_NONE) {
12407
      xmlSwitchEncoding(ctxt, enc);
12408
  }
12409
    }
12410
12411
    xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12412
12413
    if (ctxt->myDoc != NULL) {
12414
  if (ctxt->wellFormed) {
12415
      ret = ctxt->myDoc->extSubset;
12416
      ctxt->myDoc->extSubset = NULL;
12417
      if (ret != NULL) {
12418
    xmlNodePtr tmp;
12419
12420
    ret->doc = NULL;
12421
    tmp = ret->children;
12422
    while (tmp != NULL) {
12423
        tmp->doc = NULL;
12424
        tmp = tmp->next;
12425
    }
12426
      }
12427
  } else {
12428
      ret = NULL;
12429
  }
12430
        xmlFreeDoc(ctxt->myDoc);
12431
        ctxt->myDoc = NULL;
12432
    }
12433
    xmlFreeParserCtxt(ctxt);
12434
12435
    return(ret);
12436
}
12437
12438
/**
12439
 * xmlSAXParseDTD:
12440
 * @sax:  the SAX handler block
12441
 * @ExternalID:  a NAME* containing the External ID of the DTD
12442
 * @SystemID:  a NAME* containing the URL to the DTD
12443
 *
12444
 * DEPRECATED: Don't use.
12445
 *
12446
 * Load and parse an external subset.
12447
 *
12448
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12449
 */
12450
12451
xmlDtdPtr
12452
xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12453
                          const xmlChar *SystemID) {
12454
    xmlDtdPtr ret = NULL;
12455
    xmlParserCtxtPtr ctxt;
12456
    xmlParserInputPtr input = NULL;
12457
    xmlCharEncoding enc;
12458
    xmlChar* systemIdCanonic;
12459
12460
    if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12461
12462
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12463
    if (ctxt == NULL) {
12464
  return(NULL);
12465
    }
12466
12467
    /* We are loading a DTD */
12468
    ctxt->options |= XML_PARSE_DTDLOAD;
12469
12470
    /*
12471
     * Canonicalise the system ID
12472
     */
12473
    systemIdCanonic = xmlCanonicPath(SystemID);
12474
    if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12475
  xmlFreeParserCtxt(ctxt);
12476
  return(NULL);
12477
    }
12478
12479
    /*
12480
     * Ask the Entity resolver to load the damn thing
12481
     */
12482
12483
    if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12484
  input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12485
                                   systemIdCanonic);
12486
    if (input == NULL) {
12487
  xmlFreeParserCtxt(ctxt);
12488
  if (systemIdCanonic != NULL)
12489
      xmlFree(systemIdCanonic);
12490
  return(NULL);
12491
    }
12492
12493
    /*
12494
     * plug some encoding conversion routines here.
12495
     */
12496
    if (xmlPushInput(ctxt, input) < 0) {
12497
  xmlFreeParserCtxt(ctxt);
12498
  if (systemIdCanonic != NULL)
12499
      xmlFree(systemIdCanonic);
12500
  return(NULL);
12501
    }
12502
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12503
  enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12504
  xmlSwitchEncoding(ctxt, enc);
12505
    }
12506
12507
    if (input->filename == NULL)
12508
  input->filename = (char *) systemIdCanonic;
12509
    else
12510
  xmlFree(systemIdCanonic);
12511
    input->line = 1;
12512
    input->col = 1;
12513
    input->base = ctxt->input->cur;
12514
    input->cur = ctxt->input->cur;
12515
    input->free = NULL;
12516
12517
    /*
12518
     * let's parse that entity knowing it's an external subset.
12519
     */
12520
    ctxt->inSubset = 2;
12521
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12522
    if (ctxt->myDoc == NULL) {
12523
  xmlErrMemory(ctxt, "New Doc failed");
12524
  xmlFreeParserCtxt(ctxt);
12525
  return(NULL);
12526
    }
12527
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12528
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12529
                                 ExternalID, SystemID);
12530
    xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12531
12532
    if (ctxt->myDoc != NULL) {
12533
  if (ctxt->wellFormed) {
12534
      ret = ctxt->myDoc->extSubset;
12535
      ctxt->myDoc->extSubset = NULL;
12536
      if (ret != NULL) {
12537
    xmlNodePtr tmp;
12538
12539
    ret->doc = NULL;
12540
    tmp = ret->children;
12541
    while (tmp != NULL) {
12542
        tmp->doc = NULL;
12543
        tmp = tmp->next;
12544
    }
12545
      }
12546
  } else {
12547
      ret = NULL;
12548
  }
12549
        xmlFreeDoc(ctxt->myDoc);
12550
        ctxt->myDoc = NULL;
12551
    }
12552
    xmlFreeParserCtxt(ctxt);
12553
12554
    return(ret);
12555
}
12556
12557
12558
/**
12559
 * xmlParseDTD:
12560
 * @ExternalID:  a NAME* containing the External ID of the DTD
12561
 * @SystemID:  a NAME* containing the URL to the DTD
12562
 *
12563
 * Load and parse an external subset.
12564
 *
12565
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12566
 */
12567
12568
xmlDtdPtr
12569
xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12570
    return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12571
}
12572
#endif /* LIBXML_VALID_ENABLED */
12573
12574
/************************************************************************
12575
 *                  *
12576
 *    Front ends when parsing an Entity     *
12577
 *                  *
12578
 ************************************************************************/
12579
12580
/**
12581
 * xmlParseCtxtExternalEntity:
12582
 * @ctx:  the existing parsing context
12583
 * @URL:  the URL for the entity to load
12584
 * @ID:  the System ID for the entity to load
12585
 * @lst:  the return value for the set of parsed nodes
12586
 *
12587
 * Parse an external general entity within an existing parsing context
12588
 * An external general parsed entity is well-formed if it matches the
12589
 * production labeled extParsedEnt.
12590
 *
12591
 * [78] extParsedEnt ::= TextDecl? content
12592
 *
12593
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12594
 *    the parser error code otherwise
12595
 */
12596
12597
int
12598
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12599
0
                 const xmlChar *ID, xmlNodePtr *lst) {
12600
0
    void *userData;
12601
12602
0
    if (ctx == NULL) return(-1);
12603
    /*
12604
     * If the user provided their own SAX callbacks, then reuse the
12605
     * userData callback field, otherwise the expected setup in a
12606
     * DOM builder is to have userData == ctxt
12607
     */
12608
0
    if (ctx->userData == ctx)
12609
0
        userData = NULL;
12610
0
    else
12611
0
        userData = ctx->userData;
12612
0
    return xmlParseExternalEntityPrivate(ctx->myDoc, ctx, ctx->sax,
12613
0
                                         userData, ctx->depth + 1,
12614
0
                                         URL, ID, lst);
12615
0
}
12616
12617
/**
12618
 * xmlParseExternalEntityPrivate:
12619
 * @doc:  the document the chunk pertains to
12620
 * @oldctxt:  the previous parser context if available
12621
 * @sax:  the SAX handler block (possibly NULL)
12622
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12623
 * @depth:  Used for loop detection, use 0
12624
 * @URL:  the URL for the entity to load
12625
 * @ID:  the System ID for the entity to load
12626
 * @list:  the return value for the set of parsed nodes
12627
 *
12628
 * Private version of xmlParseExternalEntity()
12629
 *
12630
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12631
 *    the parser error code otherwise
12632
 */
12633
12634
static xmlParserErrors
12635
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12636
                xmlSAXHandlerPtr sax,
12637
          void *user_data, int depth, const xmlChar *URL,
12638
0
          const xmlChar *ID, xmlNodePtr *list) {
12639
0
    xmlParserCtxtPtr ctxt;
12640
0
    xmlDocPtr newDoc;
12641
0
    xmlNodePtr newRoot;
12642
0
    xmlParserErrors ret = XML_ERR_OK;
12643
0
    xmlChar start[4];
12644
0
    xmlCharEncoding enc;
12645
12646
0
    if (((depth > 40) &&
12647
0
  ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12648
0
  (depth > 100)) {
12649
0
  xmlFatalErrMsg(oldctxt, XML_ERR_ENTITY_LOOP,
12650
0
                       "Maximum entity nesting depth exceeded");
12651
0
        return(XML_ERR_ENTITY_LOOP);
12652
0
    }
12653
12654
0
    if (list != NULL)
12655
0
        *list = NULL;
12656
0
    if ((URL == NULL) && (ID == NULL))
12657
0
  return(XML_ERR_INTERNAL_ERROR);
12658
0
    if (doc == NULL)
12659
0
  return(XML_ERR_INTERNAL_ERROR);
12660
12661
0
    ctxt = xmlCreateEntityParserCtxtInternal(sax, user_data, URL, ID, NULL,
12662
0
                                             oldctxt);
12663
0
    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
12664
0
    if (oldctxt != NULL) {
12665
0
        ctxt->nbErrors = oldctxt->nbErrors;
12666
0
        ctxt->nbWarnings = oldctxt->nbWarnings;
12667
0
    }
12668
0
    xmlDetectSAX2(ctxt);
12669
12670
0
    newDoc = xmlNewDoc(BAD_CAST "1.0");
12671
0
    if (newDoc == NULL) {
12672
0
  xmlFreeParserCtxt(ctxt);
12673
0
  return(XML_ERR_INTERNAL_ERROR);
12674
0
    }
12675
0
    newDoc->properties = XML_DOC_INTERNAL;
12676
0
    if (doc) {
12677
0
        newDoc->intSubset = doc->intSubset;
12678
0
        newDoc->extSubset = doc->extSubset;
12679
0
        if (doc->dict) {
12680
0
            newDoc->dict = doc->dict;
12681
0
            xmlDictReference(newDoc->dict);
12682
0
        }
12683
0
        if (doc->URL != NULL) {
12684
0
            newDoc->URL = xmlStrdup(doc->URL);
12685
0
        }
12686
0
    }
12687
0
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12688
0
    if (newRoot == NULL) {
12689
0
  if (sax != NULL)
12690
0
  xmlFreeParserCtxt(ctxt);
12691
0
  newDoc->intSubset = NULL;
12692
0
  newDoc->extSubset = NULL;
12693
0
        xmlFreeDoc(newDoc);
12694
0
  return(XML_ERR_INTERNAL_ERROR);
12695
0
    }
12696
0
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
12697
0
    nodePush(ctxt, newDoc->children);
12698
0
    if (doc == NULL) {
12699
0
        ctxt->myDoc = newDoc;
12700
0
    } else {
12701
0
        ctxt->myDoc = doc;
12702
0
        newRoot->doc = doc;
12703
0
    }
12704
12705
    /*
12706
     * Get the 4 first bytes and decode the charset
12707
     * if enc != XML_CHAR_ENCODING_NONE
12708
     * plug some encoding conversion routines.
12709
     */
12710
0
    GROW;
12711
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12712
0
  start[0] = RAW;
12713
0
  start[1] = NXT(1);
12714
0
  start[2] = NXT(2);
12715
0
  start[3] = NXT(3);
12716
0
  enc = xmlDetectCharEncoding(start, 4);
12717
0
  if (enc != XML_CHAR_ENCODING_NONE) {
12718
0
      xmlSwitchEncoding(ctxt, enc);
12719
0
  }
12720
0
    }
12721
12722
    /*
12723
     * Parse a possible text declaration first
12724
     */
12725
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12726
0
  xmlParseTextDecl(ctxt);
12727
        /*
12728
         * An XML-1.0 document can't reference an entity not XML-1.0
12729
         */
12730
0
        if ((xmlStrEqual(oldctxt->version, BAD_CAST "1.0")) &&
12731
0
            (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12732
0
            xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12733
0
                           "Version mismatch between document and entity\n");
12734
0
        }
12735
0
    }
12736
12737
0
    ctxt->instate = XML_PARSER_CONTENT;
12738
0
    ctxt->depth = depth;
12739
0
    if (oldctxt != NULL) {
12740
0
  ctxt->_private = oldctxt->_private;
12741
0
  ctxt->loadsubset = oldctxt->loadsubset;
12742
0
  ctxt->validate = oldctxt->validate;
12743
0
  ctxt->valid = oldctxt->valid;
12744
0
  ctxt->replaceEntities = oldctxt->replaceEntities;
12745
0
        if (oldctxt->validate) {
12746
0
            ctxt->vctxt.error = oldctxt->vctxt.error;
12747
0
            ctxt->vctxt.warning = oldctxt->vctxt.warning;
12748
0
            ctxt->vctxt.userData = oldctxt->vctxt.userData;
12749
0
            ctxt->vctxt.flags = oldctxt->vctxt.flags;
12750
0
        }
12751
0
  ctxt->external = oldctxt->external;
12752
0
        if (ctxt->dict) xmlDictFree(ctxt->dict);
12753
0
        ctxt->dict = oldctxt->dict;
12754
0
        ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12755
0
        ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12756
0
        ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12757
0
        ctxt->dictNames = oldctxt->dictNames;
12758
0
        ctxt->attsDefault = oldctxt->attsDefault;
12759
0
        ctxt->attsSpecial = oldctxt->attsSpecial;
12760
0
        ctxt->linenumbers = oldctxt->linenumbers;
12761
0
  ctxt->record_info = oldctxt->record_info;
12762
0
  ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
12763
0
  ctxt->node_seq.length = oldctxt->node_seq.length;
12764
0
  ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
12765
0
    } else {
12766
  /*
12767
   * Doing validity checking on chunk without context
12768
   * doesn't make sense
12769
   */
12770
0
  ctxt->_private = NULL;
12771
0
  ctxt->validate = 0;
12772
0
  ctxt->external = 2;
12773
0
  ctxt->loadsubset = 0;
12774
0
    }
12775
12776
0
    xmlParseContent(ctxt);
12777
12778
0
    if ((RAW == '<') && (NXT(1) == '/')) {
12779
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12780
0
    } else if (RAW != 0) {
12781
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12782
0
    }
12783
0
    if (ctxt->node != newDoc->children) {
12784
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12785
0
    }
12786
12787
0
    if (!ctxt->wellFormed) {
12788
0
  ret = (xmlParserErrors)ctxt->errNo;
12789
0
        if (oldctxt != NULL) {
12790
0
            oldctxt->errNo = ctxt->errNo;
12791
0
            oldctxt->wellFormed = 0;
12792
0
            xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12793
0
        }
12794
0
    } else {
12795
0
  if (list != NULL) {
12796
0
      xmlNodePtr cur;
12797
12798
      /*
12799
       * Return the newly created nodeset after unlinking it from
12800
       * they pseudo parent.
12801
       */
12802
0
      cur = newDoc->children->children;
12803
0
      *list = cur;
12804
0
      while (cur != NULL) {
12805
0
    cur->parent = NULL;
12806
0
    cur = cur->next;
12807
0
      }
12808
0
            newDoc->children->children = NULL;
12809
0
  }
12810
0
  ret = XML_ERR_OK;
12811
0
    }
12812
12813
    /*
12814
     * Also record the size of the entity parsed
12815
     */
12816
0
    if (ctxt->input != NULL && oldctxt != NULL) {
12817
0
        unsigned long consumed = ctxt->input->consumed;
12818
12819
0
        xmlSaturatedAddSizeT(&consumed, ctxt->input->cur - ctxt->input->base);
12820
12821
0
        xmlSaturatedAdd(&oldctxt->sizeentities, consumed);
12822
0
        xmlSaturatedAdd(&oldctxt->sizeentities, ctxt->sizeentities);
12823
12824
0
        xmlSaturatedAdd(&oldctxt->sizeentcopy, consumed);
12825
0
        xmlSaturatedAdd(&oldctxt->sizeentcopy, ctxt->sizeentcopy);
12826
0
    }
12827
12828
0
    if (oldctxt != NULL) {
12829
0
        ctxt->dict = NULL;
12830
0
        ctxt->attsDefault = NULL;
12831
0
        ctxt->attsSpecial = NULL;
12832
0
        oldctxt->nbErrors = ctxt->nbErrors;
12833
0
        oldctxt->nbWarnings = ctxt->nbWarnings;
12834
0
        oldctxt->validate = ctxt->validate;
12835
0
        oldctxt->valid = ctxt->valid;
12836
0
        oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
12837
0
        oldctxt->node_seq.length = ctxt->node_seq.length;
12838
0
        oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
12839
0
    }
12840
0
    ctxt->node_seq.maximum = 0;
12841
0
    ctxt->node_seq.length = 0;
12842
0
    ctxt->node_seq.buffer = NULL;
12843
0
    xmlFreeParserCtxt(ctxt);
12844
0
    newDoc->intSubset = NULL;
12845
0
    newDoc->extSubset = NULL;
12846
0
    xmlFreeDoc(newDoc);
12847
12848
0
    return(ret);
12849
0
}
12850
12851
#ifdef LIBXML_SAX1_ENABLED
12852
/**
12853
 * xmlParseExternalEntity:
12854
 * @doc:  the document the chunk pertains to
12855
 * @sax:  the SAX handler block (possibly NULL)
12856
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12857
 * @depth:  Used for loop detection, use 0
12858
 * @URL:  the URL for the entity to load
12859
 * @ID:  the System ID for the entity to load
12860
 * @lst:  the return value for the set of parsed nodes
12861
 *
12862
 * Parse an external general entity
12863
 * An external general parsed entity is well-formed if it matches the
12864
 * production labeled extParsedEnt.
12865
 *
12866
 * [78] extParsedEnt ::= TextDecl? content
12867
 *
12868
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12869
 *    the parser error code otherwise
12870
 */
12871
12872
int
12873
xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
12874
    int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
12875
    return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
12876
                           ID, lst));
12877
}
12878
12879
/**
12880
 * xmlParseBalancedChunkMemory:
12881
 * @doc:  the document the chunk pertains to (must not be NULL)
12882
 * @sax:  the SAX handler block (possibly NULL)
12883
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12884
 * @depth:  Used for loop detection, use 0
12885
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
12886
 * @lst:  the return value for the set of parsed nodes
12887
 *
12888
 * Parse a well-balanced chunk of an XML document
12889
 * called by the parser
12890
 * The allowed sequence for the Well Balanced Chunk is the one defined by
12891
 * the content production in the XML grammar:
12892
 *
12893
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12894
 *
12895
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12896
 *    the parser error code otherwise
12897
 */
12898
12899
int
12900
xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12901
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
12902
    return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
12903
                                                depth, string, lst, 0 );
12904
}
12905
#endif /* LIBXML_SAX1_ENABLED */
12906
12907
/**
12908
 * xmlParseBalancedChunkMemoryInternal:
12909
 * @oldctxt:  the existing parsing context
12910
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
12911
 * @user_data:  the user data field for the parser context
12912
 * @lst:  the return value for the set of parsed nodes
12913
 *
12914
 *
12915
 * Parse a well-balanced chunk of an XML document
12916
 * called by the parser
12917
 * The allowed sequence for the Well Balanced Chunk is the one defined by
12918
 * the content production in the XML grammar:
12919
 *
12920
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12921
 *
12922
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12923
 * error code otherwise
12924
 *
12925
 * In case recover is set to 1, the nodelist will not be empty even if
12926
 * the parsed chunk is not well balanced.
12927
 */
12928
static xmlParserErrors
12929
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
12930
0
  const xmlChar *string, void *user_data, xmlNodePtr *lst) {
12931
0
    xmlParserCtxtPtr ctxt;
12932
0
    xmlDocPtr newDoc = NULL;
12933
0
    xmlNodePtr newRoot;
12934
0
    xmlSAXHandlerPtr oldsax = NULL;
12935
0
    xmlNodePtr content = NULL;
12936
0
    xmlNodePtr last = NULL;
12937
0
    int size;
12938
0
    xmlParserErrors ret = XML_ERR_OK;
12939
0
#ifdef SAX2
12940
0
    int i;
12941
0
#endif
12942
12943
0
    if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12944
0
        (oldctxt->depth >  100)) {
12945
0
  xmlFatalErrMsg(oldctxt, XML_ERR_ENTITY_LOOP,
12946
0
                       "Maximum entity nesting depth exceeded");
12947
0
  return(XML_ERR_ENTITY_LOOP);
12948
0
    }
12949
12950
12951
0
    if (lst != NULL)
12952
0
        *lst = NULL;
12953
0
    if (string == NULL)
12954
0
        return(XML_ERR_INTERNAL_ERROR);
12955
12956
0
    size = xmlStrlen(string);
12957
12958
0
    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
12959
0
    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
12960
0
    ctxt->nbErrors = oldctxt->nbErrors;
12961
0
    ctxt->nbWarnings = oldctxt->nbWarnings;
12962
0
    if (user_data != NULL)
12963
0
  ctxt->userData = user_data;
12964
0
    else
12965
0
  ctxt->userData = ctxt;
12966
0
    if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12967
0
    ctxt->dict = oldctxt->dict;
12968
0
    ctxt->input_id = oldctxt->input_id;
12969
0
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12970
0
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12971
0
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12972
12973
0
#ifdef SAX2
12974
    /* propagate namespaces down the entity */
12975
0
    for (i = 0;i < oldctxt->nsNr;i += 2) {
12976
0
        nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
12977
0
    }
12978
0
#endif
12979
12980
0
    oldsax = ctxt->sax;
12981
0
    ctxt->sax = oldctxt->sax;
12982
0
    xmlDetectSAX2(ctxt);
12983
0
    ctxt->replaceEntities = oldctxt->replaceEntities;
12984
0
    ctxt->options = oldctxt->options;
12985
12986
0
    ctxt->_private = oldctxt->_private;
12987
0
    if (oldctxt->myDoc == NULL) {
12988
0
  newDoc = xmlNewDoc(BAD_CAST "1.0");
12989
0
  if (newDoc == NULL) {
12990
0
      ctxt->sax = oldsax;
12991
0
      ctxt->dict = NULL;
12992
0
      xmlFreeParserCtxt(ctxt);
12993
0
      return(XML_ERR_INTERNAL_ERROR);
12994
0
  }
12995
0
  newDoc->properties = XML_DOC_INTERNAL;
12996
0
  newDoc->dict = ctxt->dict;
12997
0
  xmlDictReference(newDoc->dict);
12998
0
  ctxt->myDoc = newDoc;
12999
0
    } else {
13000
0
  ctxt->myDoc = oldctxt->myDoc;
13001
0
        content = ctxt->myDoc->children;
13002
0
  last = ctxt->myDoc->last;
13003
0
    }
13004
0
    newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13005
0
    if (newRoot == NULL) {
13006
0
  ctxt->sax = oldsax;
13007
0
  ctxt->dict = NULL;
13008
0
  xmlFreeParserCtxt(ctxt);
13009
0
  if (newDoc != NULL) {
13010
0
      xmlFreeDoc(newDoc);
13011
0
  }
13012
0
  return(XML_ERR_INTERNAL_ERROR);
13013
0
    }
13014
0
    ctxt->myDoc->children = NULL;
13015
0
    ctxt->myDoc->last = NULL;
13016
0
    xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13017
0
    nodePush(ctxt, ctxt->myDoc->children);
13018
0
    ctxt->instate = XML_PARSER_CONTENT;
13019
0
    ctxt->depth = oldctxt->depth;
13020
13021
0
    ctxt->validate = 0;
13022
0
    ctxt->loadsubset = oldctxt->loadsubset;
13023
0
    if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13024
  /*
13025
   * ID/IDREF registration will be done in xmlValidateElement below
13026
   */
13027
0
  ctxt->loadsubset |= XML_SKIP_IDS;
13028
0
    }
13029
0
    ctxt->dictNames = oldctxt->dictNames;
13030
0
    ctxt->attsDefault = oldctxt->attsDefault;
13031
0
    ctxt->attsSpecial = oldctxt->attsSpecial;
13032
13033
0
    xmlParseContent(ctxt);
13034
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13035
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13036
0
    } else if (RAW != 0) {
13037
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13038
0
    }
13039
0
    if (ctxt->node != ctxt->myDoc->children) {
13040
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13041
0
    }
13042
13043
0
    if (!ctxt->wellFormed) {
13044
0
  ret = (xmlParserErrors)ctxt->errNo;
13045
0
        oldctxt->errNo = ctxt->errNo;
13046
0
        oldctxt->wellFormed = 0;
13047
0
        xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13048
0
    } else {
13049
0
        ret = XML_ERR_OK;
13050
0
    }
13051
13052
0
    if ((lst != NULL) && (ret == XML_ERR_OK)) {
13053
0
  xmlNodePtr cur;
13054
13055
  /*
13056
   * Return the newly created nodeset after unlinking it from
13057
   * they pseudo parent.
13058
   */
13059
0
  cur = ctxt->myDoc->children->children;
13060
0
  *lst = cur;
13061
0
  while (cur != NULL) {
13062
#ifdef LIBXML_VALID_ENABLED
13063
      if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13064
    (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13065
    (cur->type == XML_ELEMENT_NODE)) {
13066
    oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13067
      oldctxt->myDoc, cur);
13068
      }
13069
#endif /* LIBXML_VALID_ENABLED */
13070
0
      cur->parent = NULL;
13071
0
      cur = cur->next;
13072
0
  }
13073
0
  ctxt->myDoc->children->children = NULL;
13074
0
    }
13075
0
    if (ctxt->myDoc != NULL) {
13076
0
  xmlFreeNode(ctxt->myDoc->children);
13077
0
        ctxt->myDoc->children = content;
13078
0
        ctxt->myDoc->last = last;
13079
0
    }
13080
13081
    /*
13082
     * Also record the size of the entity parsed
13083
     */
13084
0
    if (ctxt->input != NULL && oldctxt != NULL) {
13085
0
        unsigned long consumed = ctxt->input->consumed;
13086
13087
0
        xmlSaturatedAddSizeT(&consumed, ctxt->input->cur - ctxt->input->base);
13088
13089
0
        xmlSaturatedAdd(&oldctxt->sizeentcopy, consumed);
13090
0
        xmlSaturatedAdd(&oldctxt->sizeentcopy, ctxt->sizeentcopy);
13091
0
    }
13092
13093
0
    oldctxt->nbErrors = ctxt->nbErrors;
13094
0
    oldctxt->nbWarnings = ctxt->nbWarnings;
13095
0
    ctxt->sax = oldsax;
13096
0
    ctxt->dict = NULL;
13097
0
    ctxt->attsDefault = NULL;
13098
0
    ctxt->attsSpecial = NULL;
13099
0
    xmlFreeParserCtxt(ctxt);
13100
0
    if (newDoc != NULL) {
13101
0
  xmlFreeDoc(newDoc);
13102
0
    }
13103
13104
0
    return(ret);
13105
0
}
13106
13107
/**
13108
 * xmlParseInNodeContext:
13109
 * @node:  the context node
13110
 * @data:  the input string
13111
 * @datalen:  the input string length in bytes
13112
 * @options:  a combination of xmlParserOption
13113
 * @lst:  the return value for the set of parsed nodes
13114
 *
13115
 * Parse a well-balanced chunk of an XML document
13116
 * within the context (DTD, namespaces, etc ...) of the given node.
13117
 *
13118
 * The allowed sequence for the data is a Well Balanced Chunk defined by
13119
 * the content production in the XML grammar:
13120
 *
13121
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13122
 *
13123
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13124
 * error code otherwise
13125
 */
13126
xmlParserErrors
13127
xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13128
0
                      int options, xmlNodePtr *lst) {
13129
0
#ifdef SAX2
13130
0
    xmlParserCtxtPtr ctxt;
13131
0
    xmlDocPtr doc = NULL;
13132
0
    xmlNodePtr fake, cur;
13133
0
    int nsnr = 0;
13134
13135
0
    xmlParserErrors ret = XML_ERR_OK;
13136
13137
    /*
13138
     * check all input parameters, grab the document
13139
     */
13140
0
    if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13141
0
        return(XML_ERR_INTERNAL_ERROR);
13142
0
    switch (node->type) {
13143
0
        case XML_ELEMENT_NODE:
13144
0
        case XML_ATTRIBUTE_NODE:
13145
0
        case XML_TEXT_NODE:
13146
0
        case XML_CDATA_SECTION_NODE:
13147
0
        case XML_ENTITY_REF_NODE:
13148
0
        case XML_PI_NODE:
13149
0
        case XML_COMMENT_NODE:
13150
0
        case XML_DOCUMENT_NODE:
13151
0
        case XML_HTML_DOCUMENT_NODE:
13152
0
      break;
13153
0
  default:
13154
0
      return(XML_ERR_INTERNAL_ERROR);
13155
13156
0
    }
13157
0
    while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13158
0
           (node->type != XML_DOCUMENT_NODE) &&
13159
0
     (node->type != XML_HTML_DOCUMENT_NODE))
13160
0
  node = node->parent;
13161
0
    if (node == NULL)
13162
0
  return(XML_ERR_INTERNAL_ERROR);
13163
0
    if (node->type == XML_ELEMENT_NODE)
13164
0
  doc = node->doc;
13165
0
    else
13166
0
        doc = (xmlDocPtr) node;
13167
0
    if (doc == NULL)
13168
0
  return(XML_ERR_INTERNAL_ERROR);
13169
13170
    /*
13171
     * allocate a context and set-up everything not related to the
13172
     * node position in the tree
13173
     */
13174
0
    if (doc->type == XML_DOCUMENT_NODE)
13175
0
  ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13176
0
#ifdef LIBXML_HTML_ENABLED
13177
0
    else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13178
0
  ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13179
        /*
13180
         * When parsing in context, it makes no sense to add implied
13181
         * elements like html/body/etc...
13182
         */
13183
0
        options |= HTML_PARSE_NOIMPLIED;
13184
0
    }
13185
0
#endif
13186
0
    else
13187
0
        return(XML_ERR_INTERNAL_ERROR);
13188
13189
0
    if (ctxt == NULL)
13190
0
        return(XML_ERR_NO_MEMORY);
13191
13192
    /*
13193
     * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13194
     * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13195
     * we must wait until the last moment to free the original one.
13196
     */
13197
0
    if (doc->dict != NULL) {
13198
0
        if (ctxt->dict != NULL)
13199
0
      xmlDictFree(ctxt->dict);
13200
0
  ctxt->dict = doc->dict;
13201
0
    } else
13202
0
        options |= XML_PARSE_NODICT;
13203
13204
0
    if (doc->encoding != NULL) {
13205
0
        xmlCharEncodingHandlerPtr hdlr;
13206
13207
0
        if (ctxt->encoding != NULL)
13208
0
      xmlFree((xmlChar *) ctxt->encoding);
13209
0
        ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13210
13211
0
        hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
13212
0
        if (hdlr != NULL) {
13213
0
            xmlSwitchToEncoding(ctxt, hdlr);
13214
0
  } else {
13215
0
            return(XML_ERR_UNSUPPORTED_ENCODING);
13216
0
        }
13217
0
    }
13218
13219
0
    xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13220
0
    xmlDetectSAX2(ctxt);
13221
0
    ctxt->myDoc = doc;
13222
    /* parsing in context, i.e. as within existing content */
13223
0
    ctxt->input_id = 2;
13224
0
    ctxt->instate = XML_PARSER_CONTENT;
13225
13226
0
    fake = xmlNewDocComment(node->doc, NULL);
13227
0
    if (fake == NULL) {
13228
0
        xmlFreeParserCtxt(ctxt);
13229
0
  return(XML_ERR_NO_MEMORY);
13230
0
    }
13231
0
    xmlAddChild(node, fake);
13232
13233
0
    if (node->type == XML_ELEMENT_NODE) {
13234
0
  nodePush(ctxt, node);
13235
  /*
13236
   * initialize the SAX2 namespaces stack
13237
   */
13238
0
  cur = node;
13239
0
  while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13240
0
      xmlNsPtr ns = cur->nsDef;
13241
0
      const xmlChar *iprefix, *ihref;
13242
13243
0
      while (ns != NULL) {
13244
0
    if (ctxt->dict) {
13245
0
        iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13246
0
        ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13247
0
    } else {
13248
0
        iprefix = ns->prefix;
13249
0
        ihref = ns->href;
13250
0
    }
13251
13252
0
          if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13253
0
        nsPush(ctxt, iprefix, ihref);
13254
0
        nsnr++;
13255
0
    }
13256
0
    ns = ns->next;
13257
0
      }
13258
0
      cur = cur->parent;
13259
0
  }
13260
0
    }
13261
13262
0
    if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13263
  /*
13264
   * ID/IDREF registration will be done in xmlValidateElement below
13265
   */
13266
0
  ctxt->loadsubset |= XML_SKIP_IDS;
13267
0
    }
13268
13269
0
#ifdef LIBXML_HTML_ENABLED
13270
0
    if (doc->type == XML_HTML_DOCUMENT_NODE)
13271
0
        __htmlParseContent(ctxt);
13272
0
    else
13273
0
#endif
13274
0
  xmlParseContent(ctxt);
13275
13276
0
    nsPop(ctxt, nsnr);
13277
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13278
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13279
0
    } else if (RAW != 0) {
13280
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13281
0
    }
13282
0
    if ((ctxt->node != NULL) && (ctxt->node != node)) {
13283
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13284
0
  ctxt->wellFormed = 0;
13285
0
    }
13286
13287
0
    if (!ctxt->wellFormed) {
13288
0
        if (ctxt->errNo == 0)
13289
0
      ret = XML_ERR_INTERNAL_ERROR;
13290
0
  else
13291
0
      ret = (xmlParserErrors)ctxt->errNo;
13292
0
    } else {
13293
0
        ret = XML_ERR_OK;
13294
0
    }
13295
13296
    /*
13297
     * Return the newly created nodeset after unlinking it from
13298
     * the pseudo sibling.
13299
     */
13300
13301
0
    cur = fake->next;
13302
0
    fake->next = NULL;
13303
0
    node->last = fake;
13304
13305
0
    if (cur != NULL) {
13306
0
  cur->prev = NULL;
13307
0
    }
13308
13309
0
    *lst = cur;
13310
13311
0
    while (cur != NULL) {
13312
0
  cur->parent = NULL;
13313
0
  cur = cur->next;
13314
0
    }
13315
13316
0
    xmlUnlinkNode(fake);
13317
0
    xmlFreeNode(fake);
13318
13319
13320
0
    if (ret != XML_ERR_OK) {
13321
0
        xmlFreeNodeList(*lst);
13322
0
  *lst = NULL;
13323
0
    }
13324
13325
0
    if (doc->dict != NULL)
13326
0
        ctxt->dict = NULL;
13327
0
    xmlFreeParserCtxt(ctxt);
13328
13329
0
    return(ret);
13330
#else /* !SAX2 */
13331
    return(XML_ERR_INTERNAL_ERROR);
13332
#endif
13333
0
}
13334
13335
#ifdef LIBXML_SAX1_ENABLED
13336
/**
13337
 * xmlParseBalancedChunkMemoryRecover:
13338
 * @doc:  the document the chunk pertains to (must not be NULL)
13339
 * @sax:  the SAX handler block (possibly NULL)
13340
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13341
 * @depth:  Used for loop detection, use 0
13342
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13343
 * @lst:  the return value for the set of parsed nodes
13344
 * @recover: return nodes even if the data is broken (use 0)
13345
 *
13346
 *
13347
 * Parse a well-balanced chunk of an XML document
13348
 * called by the parser
13349
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13350
 * the content production in the XML grammar:
13351
 *
13352
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13353
 *
13354
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13355
 *    the parser error code otherwise
13356
 *
13357
 * In case recover is set to 1, the nodelist will not be empty even if
13358
 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13359
 * some extent.
13360
 */
13361
int
13362
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13363
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13364
     int recover) {
13365
    xmlParserCtxtPtr ctxt;
13366
    xmlDocPtr newDoc;
13367
    xmlSAXHandlerPtr oldsax = NULL;
13368
    xmlNodePtr content, newRoot;
13369
    int size;
13370
    int ret = 0;
13371
13372
    if (depth > 40) {
13373
  return(XML_ERR_ENTITY_LOOP);
13374
    }
13375
13376
13377
    if (lst != NULL)
13378
        *lst = NULL;
13379
    if (string == NULL)
13380
        return(-1);
13381
13382
    size = xmlStrlen(string);
13383
13384
    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13385
    if (ctxt == NULL) return(-1);
13386
    ctxt->userData = ctxt;
13387
    if (sax != NULL) {
13388
  oldsax = ctxt->sax;
13389
        ctxt->sax = sax;
13390
  if (user_data != NULL)
13391
      ctxt->userData = user_data;
13392
    }
13393
    newDoc = xmlNewDoc(BAD_CAST "1.0");
13394
    if (newDoc == NULL) {
13395
  xmlFreeParserCtxt(ctxt);
13396
  return(-1);
13397
    }
13398
    newDoc->properties = XML_DOC_INTERNAL;
13399
    if ((doc != NULL) && (doc->dict != NULL)) {
13400
        xmlDictFree(ctxt->dict);
13401
  ctxt->dict = doc->dict;
13402
  xmlDictReference(ctxt->dict);
13403
  ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13404
  ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13405
  ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13406
  ctxt->dictNames = 1;
13407
    } else {
13408
  xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13409
    }
13410
    /* doc == NULL is only supported for historic reasons */
13411
    if (doc != NULL) {
13412
  newDoc->intSubset = doc->intSubset;
13413
  newDoc->extSubset = doc->extSubset;
13414
    }
13415
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13416
    if (newRoot == NULL) {
13417
  if (sax != NULL)
13418
      ctxt->sax = oldsax;
13419
  xmlFreeParserCtxt(ctxt);
13420
  newDoc->intSubset = NULL;
13421
  newDoc->extSubset = NULL;
13422
        xmlFreeDoc(newDoc);
13423
  return(-1);
13424
    }
13425
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
13426
    nodePush(ctxt, newRoot);
13427
    /* doc == NULL is only supported for historic reasons */
13428
    if (doc == NULL) {
13429
  ctxt->myDoc = newDoc;
13430
    } else {
13431
  ctxt->myDoc = newDoc;
13432
  newDoc->children->doc = doc;
13433
  /* Ensure that doc has XML spec namespace */
13434
  xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13435
  newDoc->oldNs = doc->oldNs;
13436
    }
13437
    ctxt->instate = XML_PARSER_CONTENT;
13438
    ctxt->input_id = 2;
13439
    ctxt->depth = depth;
13440
13441
    /*
13442
     * Doing validity checking on chunk doesn't make sense
13443
     */
13444
    ctxt->validate = 0;
13445
    ctxt->loadsubset = 0;
13446
    xmlDetectSAX2(ctxt);
13447
13448
    if ( doc != NULL ){
13449
        content = doc->children;
13450
        doc->children = NULL;
13451
        xmlParseContent(ctxt);
13452
        doc->children = content;
13453
    }
13454
    else {
13455
        xmlParseContent(ctxt);
13456
    }
13457
    if ((RAW == '<') && (NXT(1) == '/')) {
13458
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13459
    } else if (RAW != 0) {
13460
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13461
    }
13462
    if (ctxt->node != newDoc->children) {
13463
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13464
    }
13465
13466
    if (!ctxt->wellFormed) {
13467
        if (ctxt->errNo == 0)
13468
      ret = 1;
13469
  else
13470
      ret = ctxt->errNo;
13471
    } else {
13472
      ret = 0;
13473
    }
13474
13475
    if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13476
  xmlNodePtr cur;
13477
13478
  /*
13479
   * Return the newly created nodeset after unlinking it from
13480
   * they pseudo parent.
13481
   */
13482
  cur = newDoc->children->children;
13483
  *lst = cur;
13484
  while (cur != NULL) {
13485
      xmlSetTreeDoc(cur, doc);
13486
      cur->parent = NULL;
13487
      cur = cur->next;
13488
  }
13489
  newDoc->children->children = NULL;
13490
    }
13491
13492
    if (sax != NULL)
13493
  ctxt->sax = oldsax;
13494
    xmlFreeParserCtxt(ctxt);
13495
    newDoc->intSubset = NULL;
13496
    newDoc->extSubset = NULL;
13497
    /* This leaks the namespace list if doc == NULL */
13498
    newDoc->oldNs = NULL;
13499
    xmlFreeDoc(newDoc);
13500
13501
    return(ret);
13502
}
13503
13504
/**
13505
 * xmlSAXParseEntity:
13506
 * @sax:  the SAX handler block
13507
 * @filename:  the filename
13508
 *
13509
 * DEPRECATED: Don't use.
13510
 *
13511
 * parse an XML external entity out of context and build a tree.
13512
 * It use the given SAX function block to handle the parsing callback.
13513
 * If sax is NULL, fallback to the default DOM tree building routines.
13514
 *
13515
 * [78] extParsedEnt ::= TextDecl? content
13516
 *
13517
 * This correspond to a "Well Balanced" chunk
13518
 *
13519
 * Returns the resulting document tree
13520
 */
13521
13522
xmlDocPtr
13523
xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13524
    xmlDocPtr ret;
13525
    xmlParserCtxtPtr ctxt;
13526
13527
    ctxt = xmlCreateFileParserCtxt(filename);
13528
    if (ctxt == NULL) {
13529
  return(NULL);
13530
    }
13531
    if (sax != NULL) {
13532
  if (ctxt->sax != NULL)
13533
      xmlFree(ctxt->sax);
13534
        ctxt->sax = sax;
13535
        ctxt->userData = NULL;
13536
    }
13537
13538
    xmlParseExtParsedEnt(ctxt);
13539
13540
    if (ctxt->wellFormed)
13541
  ret = ctxt->myDoc;
13542
    else {
13543
        ret = NULL;
13544
        xmlFreeDoc(ctxt->myDoc);
13545
        ctxt->myDoc = NULL;
13546
    }
13547
    if (sax != NULL)
13548
        ctxt->sax = NULL;
13549
    xmlFreeParserCtxt(ctxt);
13550
13551
    return(ret);
13552
}
13553
13554
/**
13555
 * xmlParseEntity:
13556
 * @filename:  the filename
13557
 *
13558
 * parse an XML external entity out of context and build a tree.
13559
 *
13560
 * [78] extParsedEnt ::= TextDecl? content
13561
 *
13562
 * This correspond to a "Well Balanced" chunk
13563
 *
13564
 * Returns the resulting document tree
13565
 */
13566
13567
xmlDocPtr
13568
xmlParseEntity(const char *filename) {
13569
    return(xmlSAXParseEntity(NULL, filename));
13570
}
13571
#endif /* LIBXML_SAX1_ENABLED */
13572
13573
/**
13574
 * xmlCreateEntityParserCtxtInternal:
13575
 * @URL:  the entity URL
13576
 * @ID:  the entity PUBLIC ID
13577
 * @base:  a possible base for the target URI
13578
 * @pctx:  parser context used to set options on new context
13579
 *
13580
 * Create a parser context for an external entity
13581
 * Automatic support for ZLIB/Compress compressed document is provided
13582
 * by default if found at compile-time.
13583
 *
13584
 * Returns the new parser context or NULL
13585
 */
13586
static xmlParserCtxtPtr
13587
xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
13588
        const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
13589
0
        xmlParserCtxtPtr pctx) {
13590
0
    xmlParserCtxtPtr ctxt;
13591
0
    xmlParserInputPtr inputStream;
13592
0
    char *directory = NULL;
13593
0
    xmlChar *uri;
13594
13595
0
    ctxt = xmlNewSAXParserCtxt(sax, userData);
13596
0
    if (ctxt == NULL) {
13597
0
  return(NULL);
13598
0
    }
13599
13600
0
    if (pctx != NULL) {
13601
0
        ctxt->options = pctx->options;
13602
0
        ctxt->_private = pctx->_private;
13603
0
  ctxt->input_id = pctx->input_id;
13604
0
    }
13605
13606
    /* Don't read from stdin. */
13607
0
    if (xmlStrcmp(URL, BAD_CAST "-") == 0)
13608
0
        URL = BAD_CAST "./-";
13609
13610
0
    uri = xmlBuildURI(URL, base);
13611
13612
0
    if (uri == NULL) {
13613
0
  inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13614
0
  if (inputStream == NULL) {
13615
0
      xmlFreeParserCtxt(ctxt);
13616
0
      return(NULL);
13617
0
  }
13618
13619
0
  inputPush(ctxt, inputStream);
13620
13621
0
  if ((ctxt->directory == NULL) && (directory == NULL))
13622
0
      directory = xmlParserGetDirectory((char *)URL);
13623
0
  if ((ctxt->directory == NULL) && (directory != NULL))
13624
0
      ctxt->directory = directory;
13625
0
    } else {
13626
0
  inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13627
0
  if (inputStream == NULL) {
13628
0
      xmlFree(uri);
13629
0
      xmlFreeParserCtxt(ctxt);
13630
0
      return(NULL);
13631
0
  }
13632
13633
0
  inputPush(ctxt, inputStream);
13634
13635
0
  if ((ctxt->directory == NULL) && (directory == NULL))
13636
0
      directory = xmlParserGetDirectory((char *)uri);
13637
0
  if ((ctxt->directory == NULL) && (directory != NULL))
13638
0
      ctxt->directory = directory;
13639
0
  xmlFree(uri);
13640
0
    }
13641
0
    return(ctxt);
13642
0
}
13643
13644
/**
13645
 * xmlCreateEntityParserCtxt:
13646
 * @URL:  the entity URL
13647
 * @ID:  the entity PUBLIC ID
13648
 * @base:  a possible base for the target URI
13649
 *
13650
 * Create a parser context for an external entity
13651
 * Automatic support for ZLIB/Compress compressed document is provided
13652
 * by default if found at compile-time.
13653
 *
13654
 * Returns the new parser context or NULL
13655
 */
13656
xmlParserCtxtPtr
13657
xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
13658
0
                    const xmlChar *base) {
13659
0
    return xmlCreateEntityParserCtxtInternal(NULL, NULL, URL, ID, base, NULL);
13660
13661
0
}
13662
13663
/************************************************************************
13664
 *                  *
13665
 *    Front ends when parsing from a file     *
13666
 *                  *
13667
 ************************************************************************/
13668
13669
/**
13670
 * xmlCreateURLParserCtxt:
13671
 * @filename:  the filename or URL
13672
 * @options:  a combination of xmlParserOption
13673
 *
13674
 * Create a parser context for a file or URL content.
13675
 * Automatic support for ZLIB/Compress compressed document is provided
13676
 * by default if found at compile-time and for file accesses
13677
 *
13678
 * Returns the new parser context or NULL
13679
 */
13680
xmlParserCtxtPtr
13681
xmlCreateURLParserCtxt(const char *filename, int options)
13682
0
{
13683
0
    xmlParserCtxtPtr ctxt;
13684
0
    xmlParserInputPtr inputStream;
13685
0
    char *directory = NULL;
13686
13687
0
    ctxt = xmlNewParserCtxt();
13688
0
    if (ctxt == NULL) {
13689
0
  xmlErrMemory(NULL, "cannot allocate parser context");
13690
0
  return(NULL);
13691
0
    }
13692
13693
0
    if (options)
13694
0
  xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13695
0
    ctxt->linenumbers = 1;
13696
13697
0
    inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
13698
0
    if (inputStream == NULL) {
13699
0
  xmlFreeParserCtxt(ctxt);
13700
0
  return(NULL);
13701
0
    }
13702
13703
0
    inputPush(ctxt, inputStream);
13704
0
    if ((ctxt->directory == NULL) && (directory == NULL))
13705
0
        directory = xmlParserGetDirectory(filename);
13706
0
    if ((ctxt->directory == NULL) && (directory != NULL))
13707
0
        ctxt->directory = directory;
13708
13709
0
    return(ctxt);
13710
0
}
13711
13712
/**
13713
 * xmlCreateFileParserCtxt:
13714
 * @filename:  the filename
13715
 *
13716
 * Create a parser context for a file content.
13717
 * Automatic support for ZLIB/Compress compressed document is provided
13718
 * by default if found at compile-time.
13719
 *
13720
 * Returns the new parser context or NULL
13721
 */
13722
xmlParserCtxtPtr
13723
xmlCreateFileParserCtxt(const char *filename)
13724
0
{
13725
0
    return(xmlCreateURLParserCtxt(filename, 0));
13726
0
}
13727
13728
#ifdef LIBXML_SAX1_ENABLED
13729
/**
13730
 * xmlSAXParseFileWithData:
13731
 * @sax:  the SAX handler block
13732
 * @filename:  the filename
13733
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13734
 *             documents
13735
 * @data:  the userdata
13736
 *
13737
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
13738
 *
13739
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13740
 * compressed document is provided by default if found at compile-time.
13741
 * It use the given SAX function block to handle the parsing callback.
13742
 * If sax is NULL, fallback to the default DOM tree building routines.
13743
 *
13744
 * User data (void *) is stored within the parser context in the
13745
 * context's _private member, so it is available nearly everywhere in libxml
13746
 *
13747
 * Returns the resulting document tree
13748
 */
13749
13750
xmlDocPtr
13751
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
13752
                        int recovery, void *data) {
13753
    xmlDocPtr ret;
13754
    xmlParserCtxtPtr ctxt;
13755
13756
    xmlInitParser();
13757
13758
    ctxt = xmlCreateFileParserCtxt(filename);
13759
    if (ctxt == NULL) {
13760
  return(NULL);
13761
    }
13762
    if (sax != NULL) {
13763
  if (ctxt->sax != NULL)
13764
      xmlFree(ctxt->sax);
13765
        ctxt->sax = sax;
13766
    }
13767
    xmlDetectSAX2(ctxt);
13768
    if (data!=NULL) {
13769
  ctxt->_private = data;
13770
    }
13771
13772
    if (ctxt->directory == NULL)
13773
        ctxt->directory = xmlParserGetDirectory(filename);
13774
13775
    ctxt->recovery = recovery;
13776
13777
    xmlParseDocument(ctxt);
13778
13779
    if ((ctxt->wellFormed) || recovery) {
13780
        ret = ctxt->myDoc;
13781
  if ((ret != NULL) && (ctxt->input->buf != NULL)) {
13782
      if (ctxt->input->buf->compressed > 0)
13783
    ret->compression = 9;
13784
      else
13785
    ret->compression = ctxt->input->buf->compressed;
13786
  }
13787
    }
13788
    else {
13789
       ret = NULL;
13790
       xmlFreeDoc(ctxt->myDoc);
13791
       ctxt->myDoc = NULL;
13792
    }
13793
    if (sax != NULL)
13794
        ctxt->sax = NULL;
13795
    xmlFreeParserCtxt(ctxt);
13796
13797
    return(ret);
13798
}
13799
13800
/**
13801
 * xmlSAXParseFile:
13802
 * @sax:  the SAX handler block
13803
 * @filename:  the filename
13804
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13805
 *             documents
13806
 *
13807
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
13808
 *
13809
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13810
 * compressed document is provided by default if found at compile-time.
13811
 * It use the given SAX function block to handle the parsing callback.
13812
 * If sax is NULL, fallback to the default DOM tree building routines.
13813
 *
13814
 * Returns the resulting document tree
13815
 */
13816
13817
xmlDocPtr
13818
xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
13819
                          int recovery) {
13820
    return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
13821
}
13822
13823
/**
13824
 * xmlRecoverDoc:
13825
 * @cur:  a pointer to an array of xmlChar
13826
 *
13827
 * DEPRECATED: Use xmlReadDoc with XML_PARSE_RECOVER.
13828
 *
13829
 * parse an XML in-memory document and build a tree.
13830
 * In the case the document is not Well Formed, a attempt to build a
13831
 * tree is tried anyway
13832
 *
13833
 * Returns the resulting document tree or NULL in case of failure
13834
 */
13835
13836
xmlDocPtr
13837
xmlRecoverDoc(const xmlChar *cur) {
13838
    return(xmlSAXParseDoc(NULL, cur, 1));
13839
}
13840
13841
/**
13842
 * xmlParseFile:
13843
 * @filename:  the filename
13844
 *
13845
 * DEPRECATED: Use xmlReadFile.
13846
 *
13847
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13848
 * compressed document is provided by default if found at compile-time.
13849
 *
13850
 * Returns the resulting document tree if the file was wellformed,
13851
 * NULL otherwise.
13852
 */
13853
13854
xmlDocPtr
13855
xmlParseFile(const char *filename) {
13856
    return(xmlSAXParseFile(NULL, filename, 0));
13857
}
13858
13859
/**
13860
 * xmlRecoverFile:
13861
 * @filename:  the filename
13862
 *
13863
 * DEPRECATED: Use xmlReadFile with XML_PARSE_RECOVER.
13864
 *
13865
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13866
 * compressed document is provided by default if found at compile-time.
13867
 * In the case the document is not Well Formed, it attempts to build
13868
 * a tree anyway
13869
 *
13870
 * Returns the resulting document tree or NULL in case of failure
13871
 */
13872
13873
xmlDocPtr
13874
xmlRecoverFile(const char *filename) {
13875
    return(xmlSAXParseFile(NULL, filename, 1));
13876
}
13877
13878
13879
/**
13880
 * xmlSetupParserForBuffer:
13881
 * @ctxt:  an XML parser context
13882
 * @buffer:  a xmlChar * buffer
13883
 * @filename:  a file name
13884
 *
13885
 * DEPRECATED: Don't use.
13886
 *
13887
 * Setup the parser context to parse a new buffer; Clears any prior
13888
 * contents from the parser context. The buffer parameter must not be
13889
 * NULL, but the filename parameter can be
13890
 */
13891
void
13892
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
13893
                             const char* filename)
13894
{
13895
    xmlParserInputPtr input;
13896
13897
    if ((ctxt == NULL) || (buffer == NULL))
13898
        return;
13899
13900
    input = xmlNewInputStream(ctxt);
13901
    if (input == NULL) {
13902
        xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
13903
        xmlClearParserCtxt(ctxt);
13904
        return;
13905
    }
13906
13907
    xmlClearParserCtxt(ctxt);
13908
    if (filename != NULL)
13909
        input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
13910
    input->base = buffer;
13911
    input->cur = buffer;
13912
    input->end = &buffer[xmlStrlen(buffer)];
13913
    inputPush(ctxt, input);
13914
}
13915
13916
/**
13917
 * xmlSAXUserParseFile:
13918
 * @sax:  a SAX handler
13919
 * @user_data:  The user data returned on SAX callbacks
13920
 * @filename:  a file name
13921
 *
13922
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
13923
 *
13924
 * parse an XML file and call the given SAX handler routines.
13925
 * Automatic support for ZLIB/Compress compressed document is provided
13926
 *
13927
 * Returns 0 in case of success or a error number otherwise
13928
 */
13929
int
13930
xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
13931
                    const char *filename) {
13932
    int ret = 0;
13933
    xmlParserCtxtPtr ctxt;
13934
13935
    ctxt = xmlCreateFileParserCtxt(filename);
13936
    if (ctxt == NULL) return -1;
13937
    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
13938
  xmlFree(ctxt->sax);
13939
    ctxt->sax = sax;
13940
    xmlDetectSAX2(ctxt);
13941
13942
    if (user_data != NULL)
13943
  ctxt->userData = user_data;
13944
13945
    xmlParseDocument(ctxt);
13946
13947
    if (ctxt->wellFormed)
13948
  ret = 0;
13949
    else {
13950
        if (ctxt->errNo != 0)
13951
      ret = ctxt->errNo;
13952
  else
13953
      ret = -1;
13954
    }
13955
    if (sax != NULL)
13956
  ctxt->sax = NULL;
13957
    if (ctxt->myDoc != NULL) {
13958
        xmlFreeDoc(ctxt->myDoc);
13959
  ctxt->myDoc = NULL;
13960
    }
13961
    xmlFreeParserCtxt(ctxt);
13962
13963
    return ret;
13964
}
13965
#endif /* LIBXML_SAX1_ENABLED */
13966
13967
/************************************************************************
13968
 *                  *
13969
 *    Front ends when parsing from memory     *
13970
 *                  *
13971
 ************************************************************************/
13972
13973
/**
13974
 * xmlCreateMemoryParserCtxt:
13975
 * @buffer:  a pointer to a char array
13976
 * @size:  the size of the array
13977
 *
13978
 * Create a parser context for an XML in-memory document.
13979
 *
13980
 * Returns the new parser context or NULL
13981
 */
13982
xmlParserCtxtPtr
13983
160
xmlCreateMemoryParserCtxt(const char *buffer, int size) {
13984
160
    xmlParserCtxtPtr ctxt;
13985
160
    xmlParserInputPtr input;
13986
160
    xmlParserInputBufferPtr buf;
13987
13988
160
    if (buffer == NULL)
13989
3
  return(NULL);
13990
157
    if (size <= 0)
13991
0
  return(NULL);
13992
13993
157
    ctxt = xmlNewParserCtxt();
13994
157
    if (ctxt == NULL)
13995
0
  return(NULL);
13996
13997
157
    buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
13998
157
    if (buf == NULL) {
13999
0
  xmlFreeParserCtxt(ctxt);
14000
0
  return(NULL);
14001
0
    }
14002
14003
157
    input = xmlNewInputStream(ctxt);
14004
157
    if (input == NULL) {
14005
0
  xmlFreeParserInputBuffer(buf);
14006
0
  xmlFreeParserCtxt(ctxt);
14007
0
  return(NULL);
14008
0
    }
14009
14010
157
    input->filename = NULL;
14011
157
    input->buf = buf;
14012
157
    xmlBufResetInput(input->buf->buffer, input);
14013
14014
157
    inputPush(ctxt, input);
14015
157
    return(ctxt);
14016
157
}
14017
14018
#ifdef LIBXML_SAX1_ENABLED
14019
/**
14020
 * xmlSAXParseMemoryWithData:
14021
 * @sax:  the SAX handler block
14022
 * @buffer:  an pointer to a char array
14023
 * @size:  the size of the array
14024
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14025
 *             documents
14026
 * @data:  the userdata
14027
 *
14028
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14029
 *
14030
 * parse an XML in-memory block and use the given SAX function block
14031
 * to handle the parsing callback. If sax is NULL, fallback to the default
14032
 * DOM tree building routines.
14033
 *
14034
 * User data (void *) is stored within the parser context in the
14035
 * context's _private member, so it is available nearly everywhere in libxml
14036
 *
14037
 * Returns the resulting document tree
14038
 */
14039
14040
xmlDocPtr
14041
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14042
            int size, int recovery, void *data) {
14043
    xmlDocPtr ret;
14044
    xmlParserCtxtPtr ctxt;
14045
14046
    xmlInitParser();
14047
14048
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14049
    if (ctxt == NULL) return(NULL);
14050
    if (sax != NULL) {
14051
  if (ctxt->sax != NULL)
14052
      xmlFree(ctxt->sax);
14053
        ctxt->sax = sax;
14054
    }
14055
    xmlDetectSAX2(ctxt);
14056
    if (data!=NULL) {
14057
  ctxt->_private=data;
14058
    }
14059
14060
    ctxt->recovery = recovery;
14061
14062
    xmlParseDocument(ctxt);
14063
14064
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14065
    else {
14066
       ret = NULL;
14067
       xmlFreeDoc(ctxt->myDoc);
14068
       ctxt->myDoc = NULL;
14069
    }
14070
    if (sax != NULL)
14071
  ctxt->sax = NULL;
14072
    xmlFreeParserCtxt(ctxt);
14073
14074
    return(ret);
14075
}
14076
14077
/**
14078
 * xmlSAXParseMemory:
14079
 * @sax:  the SAX handler block
14080
 * @buffer:  an pointer to a char array
14081
 * @size:  the size of the array
14082
 * @recovery:  work in recovery mode, i.e. tries to read not Well Formed
14083
 *             documents
14084
 *
14085
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14086
 *
14087
 * parse an XML in-memory block and use the given SAX function block
14088
 * to handle the parsing callback. If sax is NULL, fallback to the default
14089
 * DOM tree building routines.
14090
 *
14091
 * Returns the resulting document tree
14092
 */
14093
xmlDocPtr
14094
xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14095
            int size, int recovery) {
14096
    return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14097
}
14098
14099
/**
14100
 * xmlParseMemory:
14101
 * @buffer:  an pointer to a char array
14102
 * @size:  the size of the array
14103
 *
14104
 * DEPRECATED: Use xmlReadMemory.
14105
 *
14106
 * parse an XML in-memory block and build a tree.
14107
 *
14108
 * Returns the resulting document tree
14109
 */
14110
14111
xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14112
   return(xmlSAXParseMemory(NULL, buffer, size, 0));
14113
}
14114
14115
/**
14116
 * xmlRecoverMemory:
14117
 * @buffer:  an pointer to a char array
14118
 * @size:  the size of the array
14119
 *
14120
 * DEPRECATED: Use xmlReadMemory with XML_PARSE_RECOVER.
14121
 *
14122
 * parse an XML in-memory block and build a tree.
14123
 * In the case the document is not Well Formed, an attempt to
14124
 * build a tree is tried anyway
14125
 *
14126
 * Returns the resulting document tree or NULL in case of error
14127
 */
14128
14129
xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14130
   return(xmlSAXParseMemory(NULL, buffer, size, 1));
14131
}
14132
14133
/**
14134
 * xmlSAXUserParseMemory:
14135
 * @sax:  a SAX handler
14136
 * @user_data:  The user data returned on SAX callbacks
14137
 * @buffer:  an in-memory XML document input
14138
 * @size:  the length of the XML document in bytes
14139
 *
14140
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14141
 *
14142
 * parse an XML in-memory buffer and call the given SAX handler routines.
14143
 *
14144
 * Returns 0 in case of success or a error number otherwise
14145
 */
14146
int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14147
        const char *buffer, int size) {
14148
    int ret = 0;
14149
    xmlParserCtxtPtr ctxt;
14150
14151
    xmlInitParser();
14152
14153
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14154
    if (ctxt == NULL) return -1;
14155
    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14156
        xmlFree(ctxt->sax);
14157
    ctxt->sax = sax;
14158
    xmlDetectSAX2(ctxt);
14159
14160
    if (user_data != NULL)
14161
  ctxt->userData = user_data;
14162
14163
    xmlParseDocument(ctxt);
14164
14165
    if (ctxt->wellFormed)
14166
  ret = 0;
14167
    else {
14168
        if (ctxt->errNo != 0)
14169
      ret = ctxt->errNo;
14170
  else
14171
      ret = -1;
14172
    }
14173
    if (sax != NULL)
14174
        ctxt->sax = NULL;
14175
    if (ctxt->myDoc != NULL) {
14176
        xmlFreeDoc(ctxt->myDoc);
14177
  ctxt->myDoc = NULL;
14178
    }
14179
    xmlFreeParserCtxt(ctxt);
14180
14181
    return ret;
14182
}
14183
#endif /* LIBXML_SAX1_ENABLED */
14184
14185
/**
14186
 * xmlCreateDocParserCtxt:
14187
 * @cur:  a pointer to an array of xmlChar
14188
 *
14189
 * Creates a parser context for an XML in-memory document.
14190
 *
14191
 * Returns the new parser context or NULL
14192
 */
14193
xmlParserCtxtPtr
14194
0
xmlCreateDocParserCtxt(const xmlChar *cur) {
14195
0
    int len;
14196
14197
0
    if (cur == NULL)
14198
0
  return(NULL);
14199
0
    len = xmlStrlen(cur);
14200
0
    return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14201
0
}
14202
14203
#ifdef LIBXML_SAX1_ENABLED
14204
/**
14205
 * xmlSAXParseDoc:
14206
 * @sax:  the SAX handler block
14207
 * @cur:  a pointer to an array of xmlChar
14208
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14209
 *             documents
14210
 *
14211
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadDoc.
14212
 *
14213
 * parse an XML in-memory document and build a tree.
14214
 * It use the given SAX function block to handle the parsing callback.
14215
 * If sax is NULL, fallback to the default DOM tree building routines.
14216
 *
14217
 * Returns the resulting document tree
14218
 */
14219
14220
xmlDocPtr
14221
xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14222
    xmlDocPtr ret;
14223
    xmlParserCtxtPtr ctxt;
14224
    xmlSAXHandlerPtr oldsax = NULL;
14225
14226
    if (cur == NULL) return(NULL);
14227
14228
14229
    ctxt = xmlCreateDocParserCtxt(cur);
14230
    if (ctxt == NULL) return(NULL);
14231
    if (sax != NULL) {
14232
        oldsax = ctxt->sax;
14233
        ctxt->sax = sax;
14234
        ctxt->userData = NULL;
14235
    }
14236
    xmlDetectSAX2(ctxt);
14237
14238
    xmlParseDocument(ctxt);
14239
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14240
    else {
14241
       ret = NULL;
14242
       xmlFreeDoc(ctxt->myDoc);
14243
       ctxt->myDoc = NULL;
14244
    }
14245
    if (sax != NULL)
14246
  ctxt->sax = oldsax;
14247
    xmlFreeParserCtxt(ctxt);
14248
14249
    return(ret);
14250
}
14251
14252
/**
14253
 * xmlParseDoc:
14254
 * @cur:  a pointer to an array of xmlChar
14255
 *
14256
 * DEPRECATED: Use xmlReadDoc.
14257
 *
14258
 * parse an XML in-memory document and build a tree.
14259
 *
14260
 * Returns the resulting document tree
14261
 */
14262
14263
xmlDocPtr
14264
xmlParseDoc(const xmlChar *cur) {
14265
    return(xmlSAXParseDoc(NULL, cur, 0));
14266
}
14267
#endif /* LIBXML_SAX1_ENABLED */
14268
14269
#ifdef LIBXML_LEGACY_ENABLED
14270
/************************************************************************
14271
 *                  *
14272
 *  Specific function to keep track of entities references    *
14273
 *  and used by the XSLT debugger         *
14274
 *                  *
14275
 ************************************************************************/
14276
14277
static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14278
14279
/**
14280
 * xmlAddEntityReference:
14281
 * @ent : A valid entity
14282
 * @firstNode : A valid first node for children of entity
14283
 * @lastNode : A valid last node of children entity
14284
 *
14285
 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14286
 */
14287
static void
14288
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14289
                      xmlNodePtr lastNode)
14290
{
14291
    if (xmlEntityRefFunc != NULL) {
14292
        (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14293
    }
14294
}
14295
14296
14297
/**
14298
 * xmlSetEntityReferenceFunc:
14299
 * @func: A valid function
14300
 *
14301
 * Set the function to call call back when a xml reference has been made
14302
 */
14303
void
14304
xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14305
{
14306
    xmlEntityRefFunc = func;
14307
}
14308
#endif /* LIBXML_LEGACY_ENABLED */
14309
14310
/************************************************************************
14311
 *                  *
14312
 *        Miscellaneous       *
14313
 *                  *
14314
 ************************************************************************/
14315
14316
static int xmlParserInitialized = 0;
14317
14318
/**
14319
 * xmlInitParser:
14320
 *
14321
 * Initialization function for the XML parser.
14322
 * This is not reentrant. Call once before processing in case of
14323
 * use in multithreaded programs.
14324
 */
14325
14326
void
14327
1.67M
xmlInitParser(void) {
14328
    /*
14329
     * Note that the initialization code must not make memory allocations.
14330
     */
14331
1.67M
    if (xmlParserInitialized != 0)
14332
1.67M
  return;
14333
14334
2
#ifdef LIBXML_THREAD_ENABLED
14335
2
    __xmlGlobalInitMutexLock();
14336
2
    if (xmlParserInitialized == 0) {
14337
2
#endif
14338
#if defined(_WIN32) && (!defined(LIBXML_STATIC) || defined(LIBXML_STATIC_FOR_DLL))
14339
        if (xmlFree == free)
14340
            atexit(xmlCleanupParser);
14341
#endif
14342
14343
2
  xmlInitThreadsInternal();
14344
2
  xmlInitGlobalsInternal();
14345
2
  xmlInitMemoryInternal();
14346
2
        __xmlInitializeDict();
14347
2
  xmlInitEncodingInternal();
14348
2
  xmlRegisterDefaultInputCallbacks();
14349
2
#ifdef LIBXML_OUTPUT_ENABLED
14350
2
  xmlRegisterDefaultOutputCallbacks();
14351
2
#endif /* LIBXML_OUTPUT_ENABLED */
14352
2
#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
14353
2
  xmlInitXPathInternal();
14354
2
#endif
14355
2
  xmlParserInitialized = 1;
14356
2
#ifdef LIBXML_THREAD_ENABLED
14357
2
    }
14358
2
    __xmlGlobalInitMutexUnlock();
14359
2
#endif
14360
2
}
14361
14362
/**
14363
 * xmlCleanupParser:
14364
 *
14365
 * This function name is somewhat misleading. It does not clean up
14366
 * parser state, it cleans up memory allocated by the library itself.
14367
 * It is a cleanup function for the XML library. It tries to reclaim all
14368
 * related global memory allocated for the library processing.
14369
 * It doesn't deallocate any document related memory. One should
14370
 * call xmlCleanupParser() only when the process has finished using
14371
 * the library and all XML/HTML documents built with it.
14372
 * See also xmlInitParser() which has the opposite function of preparing
14373
 * the library for operations.
14374
 *
14375
 * WARNING: if your application is multithreaded or has plugin support
14376
 *          calling this may crash the application if another thread or
14377
 *          a plugin is still using libxml2. It's sometimes very hard to
14378
 *          guess if libxml2 is in use in the application, some libraries
14379
 *          or plugins may use it without notice. In case of doubt abstain
14380
 *          from calling this function or do it just before calling exit()
14381
 *          to avoid leak reports from valgrind !
14382
 */
14383
14384
void
14385
0
xmlCleanupParser(void) {
14386
0
    if (!xmlParserInitialized)
14387
0
  return;
14388
14389
0
    xmlCleanupCharEncodingHandlers();
14390
0
#ifdef LIBXML_CATALOG_ENABLED
14391
0
    xmlCatalogCleanup();
14392
0
#endif
14393
0
    xmlCleanupDictInternal();
14394
0
    xmlCleanupInputCallbacks();
14395
0
#ifdef LIBXML_OUTPUT_ENABLED
14396
0
    xmlCleanupOutputCallbacks();
14397
0
#endif
14398
#ifdef LIBXML_SCHEMAS_ENABLED
14399
    xmlSchemaCleanupTypes();
14400
    xmlRelaxNGCleanupTypes();
14401
#endif
14402
0
    xmlCleanupGlobalsInternal();
14403
0
    xmlCleanupThreadsInternal();
14404
0
    xmlCleanupMemoryInternal();
14405
0
    xmlParserInitialized = 0;
14406
0
}
14407
14408
#if defined(HAVE_ATTRIBUTE_DESTRUCTOR) && !defined(LIBXML_STATIC) && \
14409
    !defined(_WIN32)
14410
static void
14411
ATTRIBUTE_DESTRUCTOR
14412
0
xmlDestructor(void) {
14413
    /*
14414
     * Calling custom deallocation functions in a destructor can cause
14415
     * problems, for example with Nokogiri.
14416
     */
14417
0
    if (xmlFree == free)
14418
0
        xmlCleanupParser();
14419
0
}
14420
#endif
14421
14422
/************************************************************************
14423
 *                  *
14424
 *  New set (2.6.0) of simpler and more flexible APIs   *
14425
 *                  *
14426
 ************************************************************************/
14427
14428
/**
14429
 * DICT_FREE:
14430
 * @str:  a string
14431
 *
14432
 * Free a string if it is not owned by the "dict" dictionary in the
14433
 * current scope
14434
 */
14435
#define DICT_FREE(str)            \
14436
0
  if ((str) && ((!dict) ||       \
14437
0
      (xmlDictOwns(dict, (const xmlChar *)(str)) == 0)))  \
14438
0
      xmlFree((char *)(str));
14439
14440
/**
14441
 * xmlCtxtReset:
14442
 * @ctxt: an XML parser context
14443
 *
14444
 * Reset a parser context
14445
 */
14446
void
14447
xmlCtxtReset(xmlParserCtxtPtr ctxt)
14448
0
{
14449
0
    xmlParserInputPtr input;
14450
0
    xmlDictPtr dict;
14451
14452
0
    if (ctxt == NULL)
14453
0
        return;
14454
14455
0
    dict = ctxt->dict;
14456
14457
0
    while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14458
0
        xmlFreeInputStream(input);
14459
0
    }
14460
0
    ctxt->inputNr = 0;
14461
0
    ctxt->input = NULL;
14462
14463
0
    ctxt->spaceNr = 0;
14464
0
    if (ctxt->spaceTab != NULL) {
14465
0
  ctxt->spaceTab[0] = -1;
14466
0
  ctxt->space = &ctxt->spaceTab[0];
14467
0
    } else {
14468
0
        ctxt->space = NULL;
14469
0
    }
14470
14471
14472
0
    ctxt->nodeNr = 0;
14473
0
    ctxt->node = NULL;
14474
14475
0
    ctxt->nameNr = 0;
14476
0
    ctxt->name = NULL;
14477
14478
0
    ctxt->nsNr = 0;
14479
14480
0
    DICT_FREE(ctxt->version);
14481
0
    ctxt->version = NULL;
14482
0
    DICT_FREE(ctxt->encoding);
14483
0
    ctxt->encoding = NULL;
14484
0
    DICT_FREE(ctxt->directory);
14485
0
    ctxt->directory = NULL;
14486
0
    DICT_FREE(ctxt->extSubURI);
14487
0
    ctxt->extSubURI = NULL;
14488
0
    DICT_FREE(ctxt->extSubSystem);
14489
0
    ctxt->extSubSystem = NULL;
14490
0
    if (ctxt->myDoc != NULL)
14491
0
        xmlFreeDoc(ctxt->myDoc);
14492
0
    ctxt->myDoc = NULL;
14493
14494
0
    ctxt->standalone = -1;
14495
0
    ctxt->hasExternalSubset = 0;
14496
0
    ctxt->hasPErefs = 0;
14497
0
    ctxt->html = 0;
14498
0
    ctxt->external = 0;
14499
0
    ctxt->instate = XML_PARSER_START;
14500
0
    ctxt->token = 0;
14501
14502
0
    ctxt->wellFormed = 1;
14503
0
    ctxt->nsWellFormed = 1;
14504
0
    ctxt->disableSAX = 0;
14505
0
    ctxt->valid = 1;
14506
#if 0
14507
    ctxt->vctxt.userData = ctxt;
14508
    ctxt->vctxt.error = xmlParserValidityError;
14509
    ctxt->vctxt.warning = xmlParserValidityWarning;
14510
#endif
14511
0
    ctxt->record_info = 0;
14512
0
    ctxt->checkIndex = 0;
14513
0
    ctxt->endCheckState = 0;
14514
0
    ctxt->inSubset = 0;
14515
0
    ctxt->errNo = XML_ERR_OK;
14516
0
    ctxt->depth = 0;
14517
0
    ctxt->charset = XML_CHAR_ENCODING_UTF8;
14518
0
    ctxt->catalogs = NULL;
14519
0
    ctxt->sizeentities = 0;
14520
0
    ctxt->sizeentcopy = 0;
14521
0
    xmlInitNodeInfoSeq(&ctxt->node_seq);
14522
14523
0
    if (ctxt->attsDefault != NULL) {
14524
0
        xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
14525
0
        ctxt->attsDefault = NULL;
14526
0
    }
14527
0
    if (ctxt->attsSpecial != NULL) {
14528
0
        xmlHashFree(ctxt->attsSpecial, NULL);
14529
0
        ctxt->attsSpecial = NULL;
14530
0
    }
14531
14532
0
#ifdef LIBXML_CATALOG_ENABLED
14533
0
    if (ctxt->catalogs != NULL)
14534
0
  xmlCatalogFreeLocal(ctxt->catalogs);
14535
0
#endif
14536
0
    ctxt->nbErrors = 0;
14537
0
    ctxt->nbWarnings = 0;
14538
0
    if (ctxt->lastError.code != XML_ERR_OK)
14539
0
        xmlResetError(&ctxt->lastError);
14540
0
}
14541
14542
/**
14543
 * xmlCtxtResetPush:
14544
 * @ctxt: an XML parser context
14545
 * @chunk:  a pointer to an array of chars
14546
 * @size:  number of chars in the array
14547
 * @filename:  an optional file name or URI
14548
 * @encoding:  the document encoding, or NULL
14549
 *
14550
 * Reset a push parser context
14551
 *
14552
 * Returns 0 in case of success and 1 in case of error
14553
 */
14554
int
14555
xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14556
                 int size, const char *filename, const char *encoding)
14557
0
{
14558
0
    xmlParserInputPtr inputStream;
14559
0
    xmlParserInputBufferPtr buf;
14560
0
    xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14561
14562
0
    if (ctxt == NULL)
14563
0
        return(1);
14564
14565
0
    if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14566
0
        enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14567
14568
0
    buf = xmlAllocParserInputBuffer(enc);
14569
0
    if (buf == NULL)
14570
0
        return(1);
14571
14572
0
    if (ctxt == NULL) {
14573
0
        xmlFreeParserInputBuffer(buf);
14574
0
        return(1);
14575
0
    }
14576
14577
0
    xmlCtxtReset(ctxt);
14578
14579
0
    if (filename == NULL) {
14580
0
        ctxt->directory = NULL;
14581
0
    } else {
14582
0
        ctxt->directory = xmlParserGetDirectory(filename);
14583
0
    }
14584
14585
0
    inputStream = xmlNewInputStream(ctxt);
14586
0
    if (inputStream == NULL) {
14587
0
        xmlFreeParserInputBuffer(buf);
14588
0
        return(1);
14589
0
    }
14590
14591
0
    if (filename == NULL)
14592
0
        inputStream->filename = NULL;
14593
0
    else
14594
0
        inputStream->filename = (char *)
14595
0
            xmlCanonicPath((const xmlChar *) filename);
14596
0
    inputStream->buf = buf;
14597
0
    xmlBufResetInput(buf->buffer, inputStream);
14598
14599
0
    inputPush(ctxt, inputStream);
14600
14601
0
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14602
0
        (ctxt->input->buf != NULL)) {
14603
0
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14604
0
        size_t cur = ctxt->input->cur - ctxt->input->base;
14605
14606
0
        xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14607
14608
0
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
14609
#ifdef DEBUG_PUSH
14610
        xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14611
#endif
14612
0
    }
14613
14614
0
    if (encoding != NULL) {
14615
0
        xmlCharEncodingHandlerPtr hdlr;
14616
14617
0
        if (ctxt->encoding != NULL)
14618
0
      xmlFree((xmlChar *) ctxt->encoding);
14619
0
        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14620
14621
0
        hdlr = xmlFindCharEncodingHandler(encoding);
14622
0
        if (hdlr != NULL) {
14623
0
            xmlSwitchToEncoding(ctxt, hdlr);
14624
0
  } else {
14625
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14626
0
            "Unsupported encoding %s\n", BAD_CAST encoding);
14627
0
        }
14628
0
    } else if (enc != XML_CHAR_ENCODING_NONE) {
14629
0
        xmlSwitchEncoding(ctxt, enc);
14630
0
    }
14631
14632
0
    return(0);
14633
0
}
14634
14635
14636
/**
14637
 * xmlCtxtUseOptionsInternal:
14638
 * @ctxt: an XML parser context
14639
 * @options:  a combination of xmlParserOption
14640
 * @encoding:  the user provided encoding to use
14641
 *
14642
 * Applies the options to the parser context
14643
 *
14644
 * Returns 0 in case of success, the set of unknown or unimplemented options
14645
 *         in case of error.
14646
 */
14647
static int
14648
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
14649
157
{
14650
157
    if (ctxt == NULL)
14651
0
        return(-1);
14652
157
    if (encoding != NULL) {
14653
0
        if (ctxt->encoding != NULL)
14654
0
      xmlFree((xmlChar *) ctxt->encoding);
14655
0
        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14656
0
    }
14657
157
    if (options & XML_PARSE_RECOVER) {
14658
157
        ctxt->recovery = 1;
14659
157
        options -= XML_PARSE_RECOVER;
14660
157
  ctxt->options |= XML_PARSE_RECOVER;
14661
157
    } else
14662
0
        ctxt->recovery = 0;
14663
157
    if (options & XML_PARSE_DTDLOAD) {
14664
0
        ctxt->loadsubset = XML_DETECT_IDS;
14665
0
        options -= XML_PARSE_DTDLOAD;
14666
0
  ctxt->options |= XML_PARSE_DTDLOAD;
14667
0
    } else
14668
157
        ctxt->loadsubset = 0;
14669
157
    if (options & XML_PARSE_DTDATTR) {
14670
0
        ctxt->loadsubset |= XML_COMPLETE_ATTRS;
14671
0
        options -= XML_PARSE_DTDATTR;
14672
0
  ctxt->options |= XML_PARSE_DTDATTR;
14673
0
    }
14674
157
    if (options & XML_PARSE_NOENT) {
14675
0
        ctxt->replaceEntities = 1;
14676
        /* ctxt->loadsubset |= XML_DETECT_IDS; */
14677
0
        options -= XML_PARSE_NOENT;
14678
0
  ctxt->options |= XML_PARSE_NOENT;
14679
0
    } else
14680
157
        ctxt->replaceEntities = 0;
14681
157
    if (options & XML_PARSE_PEDANTIC) {
14682
0
        ctxt->pedantic = 1;
14683
0
        options -= XML_PARSE_PEDANTIC;
14684
0
  ctxt->options |= XML_PARSE_PEDANTIC;
14685
0
    } else
14686
157
        ctxt->pedantic = 0;
14687
157
    if (options & XML_PARSE_NOBLANKS) {
14688
0
        ctxt->keepBlanks = 0;
14689
0
        ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
14690
0
        options -= XML_PARSE_NOBLANKS;
14691
0
  ctxt->options |= XML_PARSE_NOBLANKS;
14692
0
    } else
14693
157
        ctxt->keepBlanks = 1;
14694
157
    if (options & XML_PARSE_DTDVALID) {
14695
0
        ctxt->validate = 1;
14696
0
        if (options & XML_PARSE_NOWARNING)
14697
0
            ctxt->vctxt.warning = NULL;
14698
0
        if (options & XML_PARSE_NOERROR)
14699
0
            ctxt->vctxt.error = NULL;
14700
0
        options -= XML_PARSE_DTDVALID;
14701
0
  ctxt->options |= XML_PARSE_DTDVALID;
14702
0
    } else
14703
157
        ctxt->validate = 0;
14704
157
    if (options & XML_PARSE_NOWARNING) {
14705
0
        ctxt->sax->warning = NULL;
14706
0
        options -= XML_PARSE_NOWARNING;
14707
0
    }
14708
157
    if (options & XML_PARSE_NOERROR) {
14709
0
        ctxt->sax->error = NULL;
14710
0
        ctxt->sax->fatalError = NULL;
14711
0
        options -= XML_PARSE_NOERROR;
14712
0
    }
14713
#ifdef LIBXML_SAX1_ENABLED
14714
    if (options & XML_PARSE_SAX1) {
14715
        ctxt->sax->startElement = xmlSAX2StartElement;
14716
        ctxt->sax->endElement = xmlSAX2EndElement;
14717
        ctxt->sax->startElementNs = NULL;
14718
        ctxt->sax->endElementNs = NULL;
14719
        ctxt->sax->initialized = 1;
14720
        options -= XML_PARSE_SAX1;
14721
  ctxt->options |= XML_PARSE_SAX1;
14722
    }
14723
#endif /* LIBXML_SAX1_ENABLED */
14724
157
    if (options & XML_PARSE_NODICT) {
14725
0
        ctxt->dictNames = 0;
14726
0
        options -= XML_PARSE_NODICT;
14727
0
  ctxt->options |= XML_PARSE_NODICT;
14728
157
    } else {
14729
157
        ctxt->dictNames = 1;
14730
157
    }
14731
157
    if (options & XML_PARSE_NOCDATA) {
14732
0
        ctxt->sax->cdataBlock = NULL;
14733
0
        options -= XML_PARSE_NOCDATA;
14734
0
  ctxt->options |= XML_PARSE_NOCDATA;
14735
0
    }
14736
157
    if (options & XML_PARSE_NSCLEAN) {
14737
0
  ctxt->options |= XML_PARSE_NSCLEAN;
14738
0
        options -= XML_PARSE_NSCLEAN;
14739
0
    }
14740
157
    if (options & XML_PARSE_NONET) {
14741
0
  ctxt->options |= XML_PARSE_NONET;
14742
0
        options -= XML_PARSE_NONET;
14743
0
    }
14744
157
    if (options & XML_PARSE_COMPACT) {
14745
0
  ctxt->options |= XML_PARSE_COMPACT;
14746
0
        options -= XML_PARSE_COMPACT;
14747
0
    }
14748
157
    if (options & XML_PARSE_OLD10) {
14749
0
  ctxt->options |= XML_PARSE_OLD10;
14750
0
        options -= XML_PARSE_OLD10;
14751
0
    }
14752
157
    if (options & XML_PARSE_NOBASEFIX) {
14753
0
  ctxt->options |= XML_PARSE_NOBASEFIX;
14754
0
        options -= XML_PARSE_NOBASEFIX;
14755
0
    }
14756
157
    if (options & XML_PARSE_HUGE) {
14757
0
  ctxt->options |= XML_PARSE_HUGE;
14758
0
        options -= XML_PARSE_HUGE;
14759
0
        if (ctxt->dict != NULL)
14760
0
            xmlDictSetLimit(ctxt->dict, 0);
14761
0
    }
14762
157
    if (options & XML_PARSE_OLDSAX) {
14763
0
  ctxt->options |= XML_PARSE_OLDSAX;
14764
0
        options -= XML_PARSE_OLDSAX;
14765
0
    }
14766
157
    if (options & XML_PARSE_IGNORE_ENC) {
14767
0
  ctxt->options |= XML_PARSE_IGNORE_ENC;
14768
0
        options -= XML_PARSE_IGNORE_ENC;
14769
0
    }
14770
157
    if (options & XML_PARSE_BIG_LINES) {
14771
0
  ctxt->options |= XML_PARSE_BIG_LINES;
14772
0
        options -= XML_PARSE_BIG_LINES;
14773
0
    }
14774
157
    ctxt->linenumbers = 1;
14775
157
    return (options);
14776
157
}
14777
14778
/**
14779
 * xmlCtxtUseOptions:
14780
 * @ctxt: an XML parser context
14781
 * @options:  a combination of xmlParserOption
14782
 *
14783
 * Applies the options to the parser context
14784
 *
14785
 * Returns 0 in case of success, the set of unknown or unimplemented options
14786
 *         in case of error.
14787
 */
14788
int
14789
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
14790
0
{
14791
0
   return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
14792
0
}
14793
14794
/**
14795
 * xmlDoRead:
14796
 * @ctxt:  an XML parser context
14797
 * @URL:  the base URL to use for the document
14798
 * @encoding:  the document encoding, or NULL
14799
 * @options:  a combination of xmlParserOption
14800
 * @reuse:  keep the context for reuse
14801
 *
14802
 * Common front-end for the xmlRead functions
14803
 *
14804
 * Returns the resulting document tree or NULL
14805
 */
14806
static xmlDocPtr
14807
xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
14808
          int options, int reuse)
14809
157
{
14810
157
    xmlDocPtr ret;
14811
14812
157
    xmlCtxtUseOptionsInternal(ctxt, options, encoding);
14813
157
    if (encoding != NULL) {
14814
0
        xmlCharEncodingHandlerPtr hdlr;
14815
14816
0
  hdlr = xmlFindCharEncodingHandler(encoding);
14817
0
  if (hdlr != NULL)
14818
0
      xmlSwitchToEncoding(ctxt, hdlr);
14819
0
    }
14820
157
    if ((URL != NULL) && (ctxt->input != NULL) &&
14821
157
        (ctxt->input->filename == NULL))
14822
0
        ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
14823
157
    xmlParseDocument(ctxt);
14824
157
    if ((ctxt->wellFormed) || ctxt->recovery)
14825
157
        ret = ctxt->myDoc;
14826
0
    else {
14827
0
        ret = NULL;
14828
0
  if (ctxt->myDoc != NULL) {
14829
0
      xmlFreeDoc(ctxt->myDoc);
14830
0
  }
14831
0
    }
14832
157
    ctxt->myDoc = NULL;
14833
157
    if (!reuse) {
14834
157
  xmlFreeParserCtxt(ctxt);
14835
157
    }
14836
14837
157
    return (ret);
14838
157
}
14839
14840
/**
14841
 * xmlReadDoc:
14842
 * @cur:  a pointer to a zero terminated string
14843
 * @URL:  the base URL to use for the document
14844
 * @encoding:  the document encoding, or NULL
14845
 * @options:  a combination of xmlParserOption
14846
 *
14847
 * parse an XML in-memory document and build a tree.
14848
 *
14849
 * Returns the resulting document tree
14850
 */
14851
xmlDocPtr
14852
xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
14853
0
{
14854
0
    xmlParserCtxtPtr ctxt;
14855
14856
0
    if (cur == NULL)
14857
0
        return (NULL);
14858
0
    xmlInitParser();
14859
14860
0
    ctxt = xmlCreateDocParserCtxt(cur);
14861
0
    if (ctxt == NULL)
14862
0
        return (NULL);
14863
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
14864
0
}
14865
14866
/**
14867
 * xmlReadFile:
14868
 * @filename:  a file or URL
14869
 * @encoding:  the document encoding, or NULL
14870
 * @options:  a combination of xmlParserOption
14871
 *
14872
 * parse an XML file from the filesystem or the network.
14873
 *
14874
 * Returns the resulting document tree
14875
 */
14876
xmlDocPtr
14877
xmlReadFile(const char *filename, const char *encoding, int options)
14878
0
{
14879
0
    xmlParserCtxtPtr ctxt;
14880
14881
0
    xmlInitParser();
14882
0
    ctxt = xmlCreateURLParserCtxt(filename, options);
14883
0
    if (ctxt == NULL)
14884
0
        return (NULL);
14885
0
    return (xmlDoRead(ctxt, NULL, encoding, options, 0));
14886
0
}
14887
14888
/**
14889
 * xmlReadMemory:
14890
 * @buffer:  a pointer to a char array
14891
 * @size:  the size of the array
14892
 * @URL:  the base URL to use for the document
14893
 * @encoding:  the document encoding, or NULL
14894
 * @options:  a combination of xmlParserOption
14895
 *
14896
 * parse an XML in-memory document and build a tree.
14897
 *
14898
 * Returns the resulting document tree
14899
 */
14900
xmlDocPtr
14901
xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
14902
160
{
14903
160
    xmlParserCtxtPtr ctxt;
14904
14905
160
    xmlInitParser();
14906
160
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14907
160
    if (ctxt == NULL)
14908
3
        return (NULL);
14909
157
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
14910
160
}
14911
14912
/**
14913
 * xmlReadFd:
14914
 * @fd:  an open file descriptor
14915
 * @URL:  the base URL to use for the document
14916
 * @encoding:  the document encoding, or NULL
14917
 * @options:  a combination of xmlParserOption
14918
 *
14919
 * parse an XML from a file descriptor and build a tree.
14920
 * NOTE that the file descriptor will not be closed when the
14921
 *      reader is closed or reset.
14922
 *
14923
 * Returns the resulting document tree
14924
 */
14925
xmlDocPtr
14926
xmlReadFd(int fd, const char *URL, const char *encoding, int options)
14927
0
{
14928
0
    xmlParserCtxtPtr ctxt;
14929
0
    xmlParserInputBufferPtr input;
14930
0
    xmlParserInputPtr stream;
14931
14932
0
    if (fd < 0)
14933
0
        return (NULL);
14934
0
    xmlInitParser();
14935
14936
0
    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14937
0
    if (input == NULL)
14938
0
        return (NULL);
14939
0
    input->closecallback = NULL;
14940
0
    ctxt = xmlNewParserCtxt();
14941
0
    if (ctxt == NULL) {
14942
0
        xmlFreeParserInputBuffer(input);
14943
0
        return (NULL);
14944
0
    }
14945
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14946
0
    if (stream == NULL) {
14947
0
        xmlFreeParserInputBuffer(input);
14948
0
  xmlFreeParserCtxt(ctxt);
14949
0
        return (NULL);
14950
0
    }
14951
0
    inputPush(ctxt, stream);
14952
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
14953
0
}
14954
14955
/**
14956
 * xmlReadIO:
14957
 * @ioread:  an I/O read function
14958
 * @ioclose:  an I/O close function
14959
 * @ioctx:  an I/O handler
14960
 * @URL:  the base URL to use for the document
14961
 * @encoding:  the document encoding, or NULL
14962
 * @options:  a combination of xmlParserOption
14963
 *
14964
 * parse an XML document from I/O functions and source and build a tree.
14965
 *
14966
 * Returns the resulting document tree
14967
 */
14968
xmlDocPtr
14969
xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
14970
          void *ioctx, const char *URL, const char *encoding, int options)
14971
0
{
14972
0
    xmlParserCtxtPtr ctxt;
14973
0
    xmlParserInputBufferPtr input;
14974
0
    xmlParserInputPtr stream;
14975
14976
0
    if (ioread == NULL)
14977
0
        return (NULL);
14978
0
    xmlInitParser();
14979
14980
0
    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14981
0
                                         XML_CHAR_ENCODING_NONE);
14982
0
    if (input == NULL) {
14983
0
        if (ioclose != NULL)
14984
0
            ioclose(ioctx);
14985
0
        return (NULL);
14986
0
    }
14987
0
    ctxt = xmlNewParserCtxt();
14988
0
    if (ctxt == NULL) {
14989
0
        xmlFreeParserInputBuffer(input);
14990
0
        return (NULL);
14991
0
    }
14992
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14993
0
    if (stream == NULL) {
14994
0
        xmlFreeParserInputBuffer(input);
14995
0
  xmlFreeParserCtxt(ctxt);
14996
0
        return (NULL);
14997
0
    }
14998
0
    inputPush(ctxt, stream);
14999
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15000
0
}
15001
15002
/**
15003
 * xmlCtxtReadDoc:
15004
 * @ctxt:  an XML parser context
15005
 * @cur:  a pointer to a zero terminated string
15006
 * @URL:  the base URL to use for the document
15007
 * @encoding:  the document encoding, or NULL
15008
 * @options:  a combination of xmlParserOption
15009
 *
15010
 * parse an XML in-memory document and build a tree.
15011
 * This reuses the existing @ctxt parser context
15012
 *
15013
 * Returns the resulting document tree
15014
 */
15015
xmlDocPtr
15016
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
15017
               const char *URL, const char *encoding, int options)
15018
0
{
15019
0
    if (cur == NULL)
15020
0
        return (NULL);
15021
0
    return (xmlCtxtReadMemory(ctxt, (const char *) cur, xmlStrlen(cur), URL,
15022
0
                              encoding, options));
15023
0
}
15024
15025
/**
15026
 * xmlCtxtReadFile:
15027
 * @ctxt:  an XML parser context
15028
 * @filename:  a file or URL
15029
 * @encoding:  the document encoding, or NULL
15030
 * @options:  a combination of xmlParserOption
15031
 *
15032
 * parse an XML file from the filesystem or the network.
15033
 * This reuses the existing @ctxt parser context
15034
 *
15035
 * Returns the resulting document tree
15036
 */
15037
xmlDocPtr
15038
xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15039
                const char *encoding, int options)
15040
0
{
15041
0
    xmlParserInputPtr stream;
15042
15043
0
    if (filename == NULL)
15044
0
        return (NULL);
15045
0
    if (ctxt == NULL)
15046
0
        return (NULL);
15047
0
    xmlInitParser();
15048
15049
0
    xmlCtxtReset(ctxt);
15050
15051
0
    stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15052
0
    if (stream == NULL) {
15053
0
        return (NULL);
15054
0
    }
15055
0
    inputPush(ctxt, stream);
15056
0
    return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15057
0
}
15058
15059
/**
15060
 * xmlCtxtReadMemory:
15061
 * @ctxt:  an XML parser context
15062
 * @buffer:  a pointer to a char array
15063
 * @size:  the size of the array
15064
 * @URL:  the base URL to use for the document
15065
 * @encoding:  the document encoding, or NULL
15066
 * @options:  a combination of xmlParserOption
15067
 *
15068
 * parse an XML in-memory document and build a tree.
15069
 * This reuses the existing @ctxt parser context
15070
 *
15071
 * Returns the resulting document tree
15072
 */
15073
xmlDocPtr
15074
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15075
                  const char *URL, const char *encoding, int options)
15076
0
{
15077
0
    xmlParserInputBufferPtr input;
15078
0
    xmlParserInputPtr stream;
15079
15080
0
    if (ctxt == NULL)
15081
0
        return (NULL);
15082
0
    if (buffer == NULL)
15083
0
        return (NULL);
15084
0
    xmlInitParser();
15085
15086
0
    xmlCtxtReset(ctxt);
15087
15088
0
    input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15089
0
    if (input == NULL) {
15090
0
  return(NULL);
15091
0
    }
15092
15093
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15094
0
    if (stream == NULL) {
15095
0
  xmlFreeParserInputBuffer(input);
15096
0
  return(NULL);
15097
0
    }
15098
15099
0
    inputPush(ctxt, stream);
15100
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15101
0
}
15102
15103
/**
15104
 * xmlCtxtReadFd:
15105
 * @ctxt:  an XML parser context
15106
 * @fd:  an open file descriptor
15107
 * @URL:  the base URL to use for the document
15108
 * @encoding:  the document encoding, or NULL
15109
 * @options:  a combination of xmlParserOption
15110
 *
15111
 * parse an XML from a file descriptor and build a tree.
15112
 * This reuses the existing @ctxt parser context
15113
 * NOTE that the file descriptor will not be closed when the
15114
 *      reader is closed or reset.
15115
 *
15116
 * Returns the resulting document tree
15117
 */
15118
xmlDocPtr
15119
xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15120
              const char *URL, const char *encoding, int options)
15121
0
{
15122
0
    xmlParserInputBufferPtr input;
15123
0
    xmlParserInputPtr stream;
15124
15125
0
    if (fd < 0)
15126
0
        return (NULL);
15127
0
    if (ctxt == NULL)
15128
0
        return (NULL);
15129
0
    xmlInitParser();
15130
15131
0
    xmlCtxtReset(ctxt);
15132
15133
15134
0
    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15135
0
    if (input == NULL)
15136
0
        return (NULL);
15137
0
    input->closecallback = NULL;
15138
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15139
0
    if (stream == NULL) {
15140
0
        xmlFreeParserInputBuffer(input);
15141
0
        return (NULL);
15142
0
    }
15143
0
    inputPush(ctxt, stream);
15144
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15145
0
}
15146
15147
/**
15148
 * xmlCtxtReadIO:
15149
 * @ctxt:  an XML parser context
15150
 * @ioread:  an I/O read function
15151
 * @ioclose:  an I/O close function
15152
 * @ioctx:  an I/O handler
15153
 * @URL:  the base URL to use for the document
15154
 * @encoding:  the document encoding, or NULL
15155
 * @options:  a combination of xmlParserOption
15156
 *
15157
 * parse an XML document from I/O functions and source and build a tree.
15158
 * This reuses the existing @ctxt parser context
15159
 *
15160
 * Returns the resulting document tree
15161
 */
15162
xmlDocPtr
15163
xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15164
              xmlInputCloseCallback ioclose, void *ioctx,
15165
        const char *URL,
15166
              const char *encoding, int options)
15167
0
{
15168
0
    xmlParserInputBufferPtr input;
15169
0
    xmlParserInputPtr stream;
15170
15171
0
    if (ioread == NULL)
15172
0
        return (NULL);
15173
0
    if (ctxt == NULL)
15174
0
        return (NULL);
15175
0
    xmlInitParser();
15176
15177
0
    xmlCtxtReset(ctxt);
15178
15179
0
    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15180
0
                                         XML_CHAR_ENCODING_NONE);
15181
0
    if (input == NULL) {
15182
0
        if (ioclose != NULL)
15183
0
            ioclose(ioctx);
15184
0
        return (NULL);
15185
0
    }
15186
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15187
0
    if (stream == NULL) {
15188
0
        xmlFreeParserInputBuffer(input);
15189
0
        return (NULL);
15190
0
    }
15191
0
    inputPush(ctxt, stream);
15192
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15193
0
}
15194