Coverage Report

Created: 2022-05-03 06:10

/src/libxml2/parser.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
 *            implemented on top of the SAX interfaces
4
 *
5
 * References:
6
 *   The XML specification:
7
 *     http://www.w3.org/TR/REC-xml
8
 *   Original 1.0 version:
9
 *     http://www.w3.org/TR/1998/REC-xml-19980210
10
 *   XML second edition working draft
11
 *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12
 *
13
 * Okay this is a big file, the parser core is around 7000 lines, then it
14
 * is followed by the progressive parser top routines, then the various
15
 * high level APIs to call the parser and a few miscellaneous functions.
16
 * A number of helper functions and deprecated ones have been moved to
17
 * parserInternals.c to reduce this file size.
18
 * As much as possible the functions are associated with their relative
19
 * production in the XML specification. A few productions defining the
20
 * different ranges of character are actually implanted either in
21
 * parserInternals.h or parserInternals.c
22
 * The DOM tree build is realized from the default SAX callbacks in
23
 * the module SAX.c.
24
 * The routines doing the validation checks are in valid.c and called either
25
 * from the SAX callbacks or as standalone functions using a preparsed
26
 * document.
27
 *
28
 * See Copyright for the status of this software.
29
 *
30
 * daniel@veillard.com
31
 */
32
33
/* To avoid EBCDIC trouble when parsing on zOS */
34
#if defined(__MVS__)
35
#pragma convert("ISO8859-1")
36
#endif
37
38
#define IN_LIBXML
39
#include "libxml.h"
40
41
#if defined(_WIN32)
42
#define XML_DIR_SEP '\\'
43
#else
44
#define XML_DIR_SEP '/'
45
#endif
46
47
#include <stdlib.h>
48
#include <limits.h>
49
#include <string.h>
50
#include <stdarg.h>
51
#include <stddef.h>
52
#include <ctype.h>
53
#include <stdlib.h>
54
#include <libxml/xmlmemory.h>
55
#include <libxml/threads.h>
56
#include <libxml/globals.h>
57
#include <libxml/tree.h>
58
#include <libxml/parser.h>
59
#include <libxml/parserInternals.h>
60
#include <libxml/valid.h>
61
#include <libxml/entities.h>
62
#include <libxml/xmlerror.h>
63
#include <libxml/encoding.h>
64
#include <libxml/xmlIO.h>
65
#include <libxml/uri.h>
66
#ifdef LIBXML_CATALOG_ENABLED
67
#include <libxml/catalog.h>
68
#endif
69
#ifdef LIBXML_SCHEMAS_ENABLED
70
#include <libxml/xmlschemastypes.h>
71
#include <libxml/relaxng.h>
72
#endif
73
74
#include "buf.h"
75
#include "enc.h"
76
77
struct _xmlStartTag {
78
    const xmlChar *prefix;
79
    const xmlChar *URI;
80
    int line;
81
    int nsNr;
82
};
83
84
static void
85
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
86
87
static xmlParserCtxtPtr
88
xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
89
                    const xmlChar *base, xmlParserCtxtPtr pctx);
90
91
static void xmlHaltParser(xmlParserCtxtPtr ctxt);
92
93
static int
94
xmlParseElementStart(xmlParserCtxtPtr ctxt);
95
96
static void
97
xmlParseElementEnd(xmlParserCtxtPtr ctxt);
98
99
/************************************************************************
100
 *                  *
101
 *  Arbitrary limits set in the parser. See XML_PARSE_HUGE    *
102
 *                  *
103
 ************************************************************************/
104
105
0
#define XML_PARSER_BIG_ENTITY 1000
106
#define XML_PARSER_LOT_ENTITY 5000
107
108
/*
109
 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
110
 *    replacement over the size in byte of the input indicates that you have
111
 *    and exponential behaviour. A value of 10 correspond to at least 3 entity
112
 *    replacement per byte of input.
113
 */
114
72
#define XML_PARSER_NON_LINEAR 10
115
116
/*
117
 * xmlParserEntityCheck
118
 *
119
 * Function to check non-linear entity expansion behaviour
120
 * This is here to detect and stop exponential linear entity expansion
121
 * This is not a limitation of the parser but a safety
122
 * boundary feature. It can be disabled with the XML_PARSE_HUGE
123
 * parser option.
124
 */
125
static int
126
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
127
                     xmlEntityPtr ent, size_t replacement)
128
239k
{
129
239k
    size_t consumed = 0;
130
239k
    int i;
131
132
239k
    if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
133
0
        return (0);
134
239k
    if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
135
0
        return (1);
136
137
    /*
138
     * This may look absurd but is needed to detect
139
     * entities problems
140
     */
141
239k
    if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
142
239k
  (ent->content != NULL) && (ent->checked == 0) &&
143
239k
  (ctxt->errNo != XML_ERR_ENTITY_LOOP)) {
144
0
  unsigned long oldnbent = ctxt->nbentities, diff;
145
0
  xmlChar *rep;
146
147
0
  ent->checked = 1;
148
149
0
        ++ctxt->depth;
150
0
  rep = xmlStringDecodeEntities(ctxt, ent->content,
151
0
          XML_SUBSTITUTE_REF, 0, 0, 0);
152
0
        --ctxt->depth;
153
0
  if ((rep == NULL) || (ctxt->errNo == XML_ERR_ENTITY_LOOP)) {
154
0
      ent->content[0] = 0;
155
0
  }
156
157
0
        diff = ctxt->nbentities - oldnbent + 1;
158
0
        if (diff > INT_MAX / 2)
159
0
            diff = INT_MAX / 2;
160
0
  ent->checked = diff * 2;
161
0
  if (rep != NULL) {
162
0
      if (xmlStrchr(rep, '<'))
163
0
    ent->checked |= 1;
164
0
      xmlFree(rep);
165
0
      rep = NULL;
166
0
  }
167
0
    }
168
169
    /*
170
     * Prevent entity exponential check, not just replacement while
171
     * parsing the DTD
172
     * The check is potentially costly so do that only once in a thousand
173
     */
174
239k
    if ((ctxt->instate == XML_PARSER_DTD) && (ctxt->nbentities > 10000) &&
175
239k
        (ctxt->nbentities % 1024 == 0)) {
176
144
  for (i = 0;i < ctxt->inputNr;i++) {
177
72
      consumed += ctxt->inputTab[i]->consumed +
178
72
                 (ctxt->inputTab[i]->cur - ctxt->inputTab[i]->base);
179
72
  }
180
72
  if (ctxt->nbentities > consumed * XML_PARSER_NON_LINEAR) {
181
0
      xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
182
0
      ctxt->instate = XML_PARSER_EOF;
183
0
      return (1);
184
0
  }
185
72
  consumed = 0;
186
72
    }
187
188
189
190
239k
    if (replacement != 0) {
191
0
  if (replacement < XML_MAX_TEXT_LENGTH)
192
0
      return(0);
193
194
        /*
195
   * If the volume of entity copy reaches 10 times the
196
   * amount of parsed data and over the large text threshold
197
   * then that's very likely to be an abuse.
198
   */
199
0
        if (ctxt->input != NULL) {
200
0
      consumed = ctxt->input->consumed +
201
0
                 (ctxt->input->cur - ctxt->input->base);
202
0
  }
203
0
        consumed += ctxt->sizeentities;
204
205
0
        if (replacement < XML_PARSER_NON_LINEAR * consumed)
206
0
      return(0);
207
239k
    } else if (size != 0) {
208
        /*
209
         * Do the check based on the replacement size of the entity
210
         */
211
0
        if (size < XML_PARSER_BIG_ENTITY)
212
0
      return(0);
213
214
        /*
215
         * A limit on the amount of text data reasonably used
216
         */
217
0
        if (ctxt->input != NULL) {
218
0
            consumed = ctxt->input->consumed +
219
0
                (ctxt->input->cur - ctxt->input->base);
220
0
        }
221
0
        consumed += ctxt->sizeentities;
222
223
0
        if ((size < XML_PARSER_NON_LINEAR * consumed) &&
224
0
      (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
225
0
            return (0);
226
239k
    } else if (ent != NULL) {
227
        /*
228
         * use the number of parsed entities in the replacement
229
         */
230
0
        size = ent->checked / 2;
231
232
        /*
233
         * The amount of data parsed counting entities size only once
234
         */
235
0
        if (ctxt->input != NULL) {
236
0
            consumed = ctxt->input->consumed +
237
0
                (ctxt->input->cur - ctxt->input->base);
238
0
        }
239
0
        consumed += ctxt->sizeentities;
240
241
        /*
242
         * Check the density of entities for the amount of data
243
   * knowing an entity reference will take at least 3 bytes
244
         */
245
0
        if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
246
0
            return (0);
247
239k
    } else {
248
        /*
249
         * strange we got no data for checking
250
         */
251
239k
  if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) &&
252
239k
       (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) ||
253
239k
      (ctxt->nbentities <= 10000))
254
171k
      return (0);
255
239k
    }
256
68.1k
    xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
257
68.1k
    return (1);
258
239k
}
259
260
/**
261
 * xmlParserMaxDepth:
262
 *
263
 * arbitrary depth limit for the XML documents that we allow to
264
 * process. This is not a limitation of the parser but a safety
265
 * boundary feature. It can be disabled with the XML_PARSE_HUGE
266
 * parser option.
267
 */
268
unsigned int xmlParserMaxDepth = 256;
269
270
271
272
#define SAX2 1
273
58.6M
#define XML_PARSER_BIG_BUFFER_SIZE 300
274
35.3M
#define XML_PARSER_BUFFER_SIZE 100
275
15.1k
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
276
277
/**
278
 * XML_PARSER_CHUNK_SIZE
279
 *
280
 * When calling GROW that's the minimal amount of data
281
 * the parser expected to have received. It is not a hard
282
 * limit but an optimization when reading strings like Names
283
 * It is not strictly needed as long as inputs available characters
284
 * are followed by 0, which should be provided by the I/O level
285
 */
286
34.5M
#define XML_PARSER_CHUNK_SIZE 100
287
288
/*
289
 * List of XML prefixed PI allowed by W3C specs
290
 */
291
292
static const char* const xmlW3CPIs[] = {
293
    "xml-stylesheet",
294
    "xml-model",
295
    NULL
296
};
297
298
299
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
300
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
301
                                              const xmlChar **str);
302
303
static xmlParserErrors
304
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
305
                xmlSAXHandlerPtr sax,
306
          void *user_data, int depth, const xmlChar *URL,
307
          const xmlChar *ID, xmlNodePtr *list);
308
309
static int
310
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
311
                          const char *encoding);
312
#ifdef LIBXML_LEGACY_ENABLED
313
static void
314
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
315
                      xmlNodePtr lastNode);
316
#endif /* LIBXML_LEGACY_ENABLED */
317
318
static xmlParserErrors
319
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
320
          const xmlChar *string, void *user_data, xmlNodePtr *lst);
321
322
static int
323
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
324
325
/************************************************************************
326
 *                  *
327
 *    Some factorized error routines        *
328
 *                  *
329
 ************************************************************************/
330
331
/**
332
 * xmlErrAttributeDup:
333
 * @ctxt:  an XML parser context
334
 * @prefix:  the attribute prefix
335
 * @localname:  the attribute localname
336
 *
337
 * Handle a redefinition of attribute error
338
 */
339
static void
340
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
341
                   const xmlChar * localname)
342
312k
{
343
312k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
344
312k
        (ctxt->instate == XML_PARSER_EOF))
345
0
  return;
346
312k
    if (ctxt != NULL)
347
312k
  ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
348
349
312k
    if (prefix == NULL)
350
311k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
351
311k
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
352
311k
                        (const char *) localname, NULL, NULL, 0, 0,
353
311k
                        "Attribute %s redefined\n", localname);
354
1.29k
    else
355
1.29k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
356
1.29k
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
357
1.29k
                        (const char *) prefix, (const char *) localname,
358
1.29k
                        NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
359
1.29k
                        localname);
360
312k
    if (ctxt != NULL) {
361
312k
  ctxt->wellFormed = 0;
362
312k
  if (ctxt->recovery == 0)
363
312k
      ctxt->disableSAX = 1;
364
312k
    }
365
312k
}
366
367
/**
368
 * xmlFatalErr:
369
 * @ctxt:  an XML parser context
370
 * @error:  the error number
371
 * @extra:  extra information string
372
 *
373
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
374
 */
375
static void
376
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
377
194k
{
378
194k
    const char *errmsg;
379
380
194k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
381
194k
        (ctxt->instate == XML_PARSER_EOF))
382
910
  return;
383
193k
    switch (error) {
384
14.5k
        case XML_ERR_INVALID_HEX_CHARREF:
385
14.5k
            errmsg = "CharRef: invalid hexadecimal value";
386
14.5k
            break;
387
14.3k
        case XML_ERR_INVALID_DEC_CHARREF:
388
14.3k
            errmsg = "CharRef: invalid decimal value";
389
14.3k
            break;
390
0
        case XML_ERR_INVALID_CHARREF:
391
0
            errmsg = "CharRef: invalid value";
392
0
            break;
393
3.09k
        case XML_ERR_INTERNAL_ERROR:
394
3.09k
            errmsg = "internal error";
395
3.09k
            break;
396
0
        case XML_ERR_PEREF_AT_EOF:
397
0
            errmsg = "PEReference at end of document";
398
0
            break;
399
0
        case XML_ERR_PEREF_IN_PROLOG:
400
0
            errmsg = "PEReference in prolog";
401
0
            break;
402
0
        case XML_ERR_PEREF_IN_EPILOG:
403
0
            errmsg = "PEReference in epilog";
404
0
            break;
405
0
        case XML_ERR_PEREF_NO_NAME:
406
0
            errmsg = "PEReference: no name";
407
0
            break;
408
1.71k
        case XML_ERR_PEREF_SEMICOL_MISSING:
409
1.71k
            errmsg = "PEReference: expecting ';'";
410
1.71k
            break;
411
68.1k
        case XML_ERR_ENTITY_LOOP:
412
68.1k
            errmsg = "Detected an entity reference loop";
413
68.1k
            break;
414
0
        case XML_ERR_ENTITY_NOT_STARTED:
415
0
            errmsg = "EntityValue: \" or ' expected";
416
0
            break;
417
632
        case XML_ERR_ENTITY_PE_INTERNAL:
418
632
            errmsg = "PEReferences forbidden in internal subset";
419
632
            break;
420
217
        case XML_ERR_ENTITY_NOT_FINISHED:
421
217
            errmsg = "EntityValue: \" or ' expected";
422
217
            break;
423
5.04k
        case XML_ERR_ATTRIBUTE_NOT_STARTED:
424
5.04k
            errmsg = "AttValue: \" or ' expected";
425
5.04k
            break;
426
22.2k
        case XML_ERR_LT_IN_ATTRIBUTE:
427
22.2k
            errmsg = "Unescaped '<' not allowed in attributes values";
428
22.2k
            break;
429
1.93k
        case XML_ERR_LITERAL_NOT_STARTED:
430
1.93k
            errmsg = "SystemLiteral \" or ' expected";
431
1.93k
            break;
432
1.20k
        case XML_ERR_LITERAL_NOT_FINISHED:
433
1.20k
            errmsg = "Unfinished System or Public ID \" or ' expected";
434
1.20k
            break;
435
1.33k
        case XML_ERR_MISPLACED_CDATA_END:
436
1.33k
            errmsg = "Sequence ']]>' not allowed in content";
437
1.33k
            break;
438
1.09k
        case XML_ERR_URI_REQUIRED:
439
1.09k
            errmsg = "SYSTEM or PUBLIC, the URI is missing";
440
1.09k
            break;
441
839
        case XML_ERR_PUBID_REQUIRED:
442
839
            errmsg = "PUBLIC, the Public Identifier is missing";
443
839
            break;
444
2.86k
        case XML_ERR_HYPHEN_IN_COMMENT:
445
2.86k
            errmsg = "Comment must not contain '--' (double-hyphen)";
446
2.86k
            break;
447
1.75k
        case XML_ERR_PI_NOT_STARTED:
448
1.75k
            errmsg = "xmlParsePI : no target name";
449
1.75k
            break;
450
1.01k
        case XML_ERR_RESERVED_XML_NAME:
451
1.01k
            errmsg = "Invalid PI name";
452
1.01k
            break;
453
565
        case XML_ERR_NOTATION_NOT_STARTED:
454
565
            errmsg = "NOTATION: Name expected here";
455
565
            break;
456
1.31k
        case XML_ERR_NOTATION_NOT_FINISHED:
457
1.31k
            errmsg = "'>' required to close NOTATION declaration";
458
1.31k
            break;
459
1.83k
        case XML_ERR_VALUE_REQUIRED:
460
1.83k
            errmsg = "Entity value required";
461
1.83k
            break;
462
559
        case XML_ERR_URI_FRAGMENT:
463
559
            errmsg = "Fragment not allowed";
464
559
            break;
465
805
        case XML_ERR_ATTLIST_NOT_STARTED:
466
805
            errmsg = "'(' required to start ATTLIST enumeration";
467
805
            break;
468
371
        case XML_ERR_NMTOKEN_REQUIRED:
469
371
            errmsg = "NmToken expected in ATTLIST enumeration";
470
371
            break;
471
1.04k
        case XML_ERR_ATTLIST_NOT_FINISHED:
472
1.04k
            errmsg = "')' required to finish ATTLIST enumeration";
473
1.04k
            break;
474
628
        case XML_ERR_MIXED_NOT_STARTED:
475
628
            errmsg = "MixedContentDecl : '|' or ')*' expected";
476
628
            break;
477
0
        case XML_ERR_PCDATA_REQUIRED:
478
0
            errmsg = "MixedContentDecl : '#PCDATA' expected";
479
0
            break;
480
623
        case XML_ERR_ELEMCONTENT_NOT_STARTED:
481
623
            errmsg = "ContentDecl : Name or '(' expected";
482
623
            break;
483
1.78k
        case XML_ERR_ELEMCONTENT_NOT_FINISHED:
484
1.78k
            errmsg = "ContentDecl : ',' '|' or ')' expected";
485
1.78k
            break;
486
0
        case XML_ERR_PEREF_IN_INT_SUBSET:
487
0
            errmsg =
488
0
                "PEReference: forbidden within markup decl in internal subset";
489
0
            break;
490
13.7k
        case XML_ERR_GT_REQUIRED:
491
13.7k
            errmsg = "expected '>'";
492
13.7k
            break;
493
0
        case XML_ERR_CONDSEC_INVALID:
494
0
            errmsg = "XML conditional section '[' expected";
495
0
            break;
496
0
        case XML_ERR_EXT_SUBSET_NOT_FINISHED:
497
0
            errmsg = "Content error in the external subset";
498
0
            break;
499
0
        case XML_ERR_CONDSEC_INVALID_KEYWORD:
500
0
            errmsg =
501
0
                "conditional section INCLUDE or IGNORE keyword expected";
502
0
            break;
503
0
        case XML_ERR_CONDSEC_NOT_FINISHED:
504
0
            errmsg = "XML conditional section not closed";
505
0
            break;
506
0
        case XML_ERR_XMLDECL_NOT_STARTED:
507
0
            errmsg = "Text declaration '<?xml' required";
508
0
            break;
509
1.05k
        case XML_ERR_XMLDECL_NOT_FINISHED:
510
1.05k
            errmsg = "parsing XML declaration: '?>' expected";
511
1.05k
            break;
512
0
        case XML_ERR_EXT_ENTITY_STANDALONE:
513
0
            errmsg = "external parsed entities cannot be standalone";
514
0
            break;
515
13.2k
        case XML_ERR_ENTITYREF_SEMICOL_MISSING:
516
13.2k
            errmsg = "EntityRef: expecting ';'";
517
13.2k
            break;
518
4.18k
        case XML_ERR_DOCTYPE_NOT_FINISHED:
519
4.18k
            errmsg = "DOCTYPE improperly terminated";
520
4.18k
            break;
521
0
        case XML_ERR_LTSLASH_REQUIRED:
522
0
            errmsg = "EndTag: '</' not found";
523
0
            break;
524
13
        case XML_ERR_EQUAL_REQUIRED:
525
13
            errmsg = "expected '='";
526
13
            break;
527
162
        case XML_ERR_STRING_NOT_CLOSED:
528
162
            errmsg = "String not closed expecting \" or '";
529
162
            break;
530
1.26k
        case XML_ERR_STRING_NOT_STARTED:
531
1.26k
            errmsg = "String not started expecting ' or \"";
532
1.26k
            break;
533
7
        case XML_ERR_ENCODING_NAME:
534
7
            errmsg = "Invalid XML encoding name";
535
7
            break;
536
14
        case XML_ERR_STANDALONE_VALUE:
537
14
            errmsg = "standalone accepts only 'yes' or 'no'";
538
14
            break;
539
58
        case XML_ERR_DOCUMENT_EMPTY:
540
58
            errmsg = "Document is empty";
541
58
            break;
542
1.41k
        case XML_ERR_DOCUMENT_END:
543
1.41k
            errmsg = "Extra content at the end of the document";
544
1.41k
            break;
545
0
        case XML_ERR_NOT_WELL_BALANCED:
546
0
            errmsg = "chunk is not well balanced";
547
0
            break;
548
0
        case XML_ERR_EXTRA_CONTENT:
549
0
            errmsg = "extra content at the end of well balanced chunk";
550
0
            break;
551
2.90k
        case XML_ERR_VERSION_MISSING:
552
2.90k
            errmsg = "Malformed declaration expecting version";
553
2.90k
            break;
554
1.23k
        case XML_ERR_NAME_TOO_LONG:
555
1.23k
            errmsg = "Name too long use XML_PARSE_HUGE option";
556
1.23k
            break;
557
#if 0
558
        case:
559
            errmsg = "";
560
            break;
561
#endif
562
2.48k
        default:
563
2.48k
            errmsg = "Unregistered error message";
564
193k
    }
565
193k
    if (ctxt != NULL)
566
193k
  ctxt->errNo = error;
567
193k
    if (info == NULL) {
568
188k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
569
188k
                        XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
570
188k
                        errmsg);
571
188k
    } else {
572
4.33k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
573
4.33k
                        XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
574
4.33k
                        errmsg, info);
575
4.33k
    }
576
193k
    if (ctxt != NULL) {
577
193k
  ctxt->wellFormed = 0;
578
193k
  if (ctxt->recovery == 0)
579
193k
      ctxt->disableSAX = 1;
580
193k
    }
581
193k
}
582
583
/**
584
 * xmlFatalErrMsg:
585
 * @ctxt:  an XML parser context
586
 * @error:  the error number
587
 * @msg:  the error message
588
 *
589
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
590
 */
591
static void LIBXML_ATTR_FORMAT(3,0)
592
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
593
               const char *msg)
594
445k
{
595
445k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
596
445k
        (ctxt->instate == XML_PARSER_EOF))
597
8
  return;
598
445k
    if (ctxt != NULL)
599
445k
  ctxt->errNo = error;
600
445k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
601
445k
                    XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
602
445k
    if (ctxt != NULL) {
603
445k
  ctxt->wellFormed = 0;
604
445k
  if (ctxt->recovery == 0)
605
445k
      ctxt->disableSAX = 1;
606
445k
    }
607
445k
}
608
609
/**
610
 * xmlWarningMsg:
611
 * @ctxt:  an XML parser context
612
 * @error:  the error number
613
 * @msg:  the error message
614
 * @str1:  extra data
615
 * @str2:  extra data
616
 *
617
 * Handle a warning.
618
 */
619
static void LIBXML_ATTR_FORMAT(3,0)
620
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
621
              const char *msg, const xmlChar *str1, const xmlChar *str2)
622
4.96k
{
623
4.96k
    xmlStructuredErrorFunc schannel = NULL;
624
625
4.96k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
626
4.96k
        (ctxt->instate == XML_PARSER_EOF))
627
0
  return;
628
4.96k
    if ((ctxt != NULL) && (ctxt->sax != NULL) &&
629
4.96k
        (ctxt->sax->initialized == XML_SAX2_MAGIC))
630
4.96k
        schannel = ctxt->sax->serror;
631
4.96k
    if (ctxt != NULL) {
632
4.96k
        __xmlRaiseError(schannel,
633
4.96k
                    (ctxt->sax) ? ctxt->sax->warning : NULL,
634
4.96k
                    ctxt->userData,
635
4.96k
                    ctxt, NULL, XML_FROM_PARSER, error,
636
4.96k
                    XML_ERR_WARNING, NULL, 0,
637
4.96k
        (const char *) str1, (const char *) str2, NULL, 0, 0,
638
4.96k
        msg, (const char *) str1, (const char *) str2);
639
4.96k
    } else {
640
0
        __xmlRaiseError(schannel, NULL, NULL,
641
0
                    ctxt, NULL, XML_FROM_PARSER, error,
642
0
                    XML_ERR_WARNING, NULL, 0,
643
0
        (const char *) str1, (const char *) str2, NULL, 0, 0,
644
0
        msg, (const char *) str1, (const char *) str2);
645
0
    }
646
4.96k
}
647
648
/**
649
 * xmlValidityError:
650
 * @ctxt:  an XML parser context
651
 * @error:  the error number
652
 * @msg:  the error message
653
 * @str1:  extra data
654
 *
655
 * Handle a validity error.
656
 */
657
static void LIBXML_ATTR_FORMAT(3,0)
658
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
659
              const char *msg, const xmlChar *str1, const xmlChar *str2)
660
0
{
661
0
    xmlStructuredErrorFunc schannel = NULL;
662
663
0
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
664
0
        (ctxt->instate == XML_PARSER_EOF))
665
0
  return;
666
0
    if (ctxt != NULL) {
667
0
  ctxt->errNo = error;
668
0
  if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
669
0
      schannel = ctxt->sax->serror;
670
0
    }
671
0
    if (ctxt != NULL) {
672
0
        __xmlRaiseError(schannel,
673
0
                    ctxt->vctxt.error, ctxt->vctxt.userData,
674
0
                    ctxt, NULL, XML_FROM_DTD, error,
675
0
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
676
0
        (const char *) str2, NULL, 0, 0,
677
0
        msg, (const char *) str1, (const char *) str2);
678
0
  ctxt->valid = 0;
679
0
    } else {
680
0
        __xmlRaiseError(schannel, NULL, NULL,
681
0
                    ctxt, NULL, XML_FROM_DTD, error,
682
0
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
683
0
        (const char *) str2, NULL, 0, 0,
684
0
        msg, (const char *) str1, (const char *) str2);
685
0
    }
686
0
}
687
688
/**
689
 * xmlFatalErrMsgInt:
690
 * @ctxt:  an XML parser context
691
 * @error:  the error number
692
 * @msg:  the error message
693
 * @val:  an integer value
694
 *
695
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
696
 */
697
static void LIBXML_ATTR_FORMAT(3,0)
698
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
699
                  const char *msg, int val)
700
131k
{
701
131k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
702
131k
        (ctxt->instate == XML_PARSER_EOF))
703
0
  return;
704
131k
    if (ctxt != NULL)
705
131k
  ctxt->errNo = error;
706
131k
    __xmlRaiseError(NULL, NULL, NULL,
707
131k
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
708
131k
                    NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
709
131k
    if (ctxt != NULL) {
710
131k
  ctxt->wellFormed = 0;
711
131k
  if (ctxt->recovery == 0)
712
131k
      ctxt->disableSAX = 1;
713
131k
    }
714
131k
}
715
716
/**
717
 * xmlFatalErrMsgStrIntStr:
718
 * @ctxt:  an XML parser context
719
 * @error:  the error number
720
 * @msg:  the error message
721
 * @str1:  an string info
722
 * @val:  an integer value
723
 * @str2:  an string info
724
 *
725
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
726
 */
727
static void LIBXML_ATTR_FORMAT(3,0)
728
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
729
                  const char *msg, const xmlChar *str1, int val,
730
      const xmlChar *str2)
731
128k
{
732
128k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
733
128k
        (ctxt->instate == XML_PARSER_EOF))
734
0
  return;
735
128k
    if (ctxt != NULL)
736
128k
  ctxt->errNo = error;
737
128k
    __xmlRaiseError(NULL, NULL, NULL,
738
128k
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
739
128k
                    NULL, 0, (const char *) str1, (const char *) str2,
740
128k
        NULL, val, 0, msg, str1, val, str2);
741
128k
    if (ctxt != NULL) {
742
128k
  ctxt->wellFormed = 0;
743
128k
  if (ctxt->recovery == 0)
744
128k
      ctxt->disableSAX = 1;
745
128k
    }
746
128k
}
747
748
/**
749
 * xmlFatalErrMsgStr:
750
 * @ctxt:  an XML parser context
751
 * @error:  the error number
752
 * @msg:  the error message
753
 * @val:  a string value
754
 *
755
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
756
 */
757
static void LIBXML_ATTR_FORMAT(3,0)
758
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
759
                  const char *msg, const xmlChar * val)
760
269k
{
761
269k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
762
269k
        (ctxt->instate == XML_PARSER_EOF))
763
0
  return;
764
269k
    if (ctxt != NULL)
765
269k
  ctxt->errNo = error;
766
269k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
767
269k
                    XML_FROM_PARSER, error, XML_ERR_FATAL,
768
269k
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
769
269k
                    val);
770
269k
    if (ctxt != NULL) {
771
269k
  ctxt->wellFormed = 0;
772
269k
  if (ctxt->recovery == 0)
773
269k
      ctxt->disableSAX = 1;
774
269k
    }
775
269k
}
776
777
/**
778
 * xmlErrMsgStr:
779
 * @ctxt:  an XML parser context
780
 * @error:  the error number
781
 * @msg:  the error message
782
 * @val:  a string value
783
 *
784
 * Handle a non fatal parser error
785
 */
786
static void LIBXML_ATTR_FORMAT(3,0)
787
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
788
                  const char *msg, const xmlChar * val)
789
3.14k
{
790
3.14k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
791
3.14k
        (ctxt->instate == XML_PARSER_EOF))
792
0
  return;
793
3.14k
    if (ctxt != NULL)
794
3.14k
  ctxt->errNo = error;
795
3.14k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
796
3.14k
                    XML_FROM_PARSER, error, XML_ERR_ERROR,
797
3.14k
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
798
3.14k
                    val);
799
3.14k
}
800
801
/**
802
 * xmlNsErr:
803
 * @ctxt:  an XML parser context
804
 * @error:  the error number
805
 * @msg:  the message
806
 * @info1:  extra information string
807
 * @info2:  extra information string
808
 *
809
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
810
 */
811
static void LIBXML_ATTR_FORMAT(3,0)
812
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
813
         const char *msg,
814
         const xmlChar * info1, const xmlChar * info2,
815
         const xmlChar * info3)
816
101k
{
817
101k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
818
101k
        (ctxt->instate == XML_PARSER_EOF))
819
1
  return;
820
101k
    if (ctxt != NULL)
821
101k
  ctxt->errNo = error;
822
101k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
823
101k
                    XML_ERR_ERROR, NULL, 0, (const char *) info1,
824
101k
                    (const char *) info2, (const char *) info3, 0, 0, msg,
825
101k
                    info1, info2, info3);
826
101k
    if (ctxt != NULL)
827
101k
  ctxt->nsWellFormed = 0;
828
101k
}
829
830
/**
831
 * xmlNsWarn
832
 * @ctxt:  an XML parser context
833
 * @error:  the error number
834
 * @msg:  the message
835
 * @info1:  extra information string
836
 * @info2:  extra information string
837
 *
838
 * Handle a namespace warning error
839
 */
840
static void LIBXML_ATTR_FORMAT(3,0)
841
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
842
         const char *msg,
843
         const xmlChar * info1, const xmlChar * info2,
844
         const xmlChar * info3)
845
18.5k
{
846
18.5k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
847
18.5k
        (ctxt->instate == XML_PARSER_EOF))
848
0
  return;
849
18.5k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
850
18.5k
                    XML_ERR_WARNING, NULL, 0, (const char *) info1,
851
18.5k
                    (const char *) info2, (const char *) info3, 0, 0, msg,
852
18.5k
                    info1, info2, info3);
853
18.5k
}
854
855
/************************************************************************
856
 *                  *
857
 *    Library wide options          *
858
 *                  *
859
 ************************************************************************/
860
861
/**
862
  * xmlHasFeature:
863
  * @feature: the feature to be examined
864
  *
865
  * Examines if the library has been compiled with a given feature.
866
  *
867
  * Returns a non-zero value if the feature exist, otherwise zero.
868
  * Returns zero (0) if the feature does not exist or an unknown
869
  * unknown feature is requested, non-zero otherwise.
870
  */
871
int
872
xmlHasFeature(xmlFeature feature)
873
0
{
874
0
    switch (feature) {
875
0
  case XML_WITH_THREAD:
876
0
#ifdef LIBXML_THREAD_ENABLED
877
0
      return(1);
878
#else
879
      return(0);
880
#endif
881
0
        case XML_WITH_TREE:
882
0
#ifdef LIBXML_TREE_ENABLED
883
0
            return(1);
884
#else
885
            return(0);
886
#endif
887
0
        case XML_WITH_OUTPUT:
888
0
#ifdef LIBXML_OUTPUT_ENABLED
889
0
            return(1);
890
#else
891
            return(0);
892
#endif
893
0
        case XML_WITH_PUSH:
894
0
#ifdef LIBXML_PUSH_ENABLED
895
0
            return(1);
896
#else
897
            return(0);
898
#endif
899
0
        case XML_WITH_READER:
900
0
#ifdef LIBXML_READER_ENABLED
901
0
            return(1);
902
#else
903
            return(0);
904
#endif
905
0
        case XML_WITH_PATTERN:
906
0
#ifdef LIBXML_PATTERN_ENABLED
907
0
            return(1);
908
#else
909
            return(0);
910
#endif
911
0
        case XML_WITH_WRITER:
912
0
#ifdef LIBXML_WRITER_ENABLED
913
0
            return(1);
914
#else
915
            return(0);
916
#endif
917
0
        case XML_WITH_SAX1:
918
0
#ifdef LIBXML_SAX1_ENABLED
919
0
            return(1);
920
#else
921
            return(0);
922
#endif
923
0
        case XML_WITH_FTP:
924
#ifdef LIBXML_FTP_ENABLED
925
            return(1);
926
#else
927
0
            return(0);
928
0
#endif
929
0
        case XML_WITH_HTTP:
930
#ifdef LIBXML_HTTP_ENABLED
931
            return(1);
932
#else
933
0
            return(0);
934
0
#endif
935
0
        case XML_WITH_VALID:
936
0
#ifdef LIBXML_VALID_ENABLED
937
0
            return(1);
938
#else
939
            return(0);
940
#endif
941
0
        case XML_WITH_HTML:
942
0
#ifdef LIBXML_HTML_ENABLED
943
0
            return(1);
944
#else
945
            return(0);
946
#endif
947
0
        case XML_WITH_LEGACY:
948
#ifdef LIBXML_LEGACY_ENABLED
949
            return(1);
950
#else
951
0
            return(0);
952
0
#endif
953
0
        case XML_WITH_C14N:
954
0
#ifdef LIBXML_C14N_ENABLED
955
0
            return(1);
956
#else
957
            return(0);
958
#endif
959
0
        case XML_WITH_CATALOG:
960
0
#ifdef LIBXML_CATALOG_ENABLED
961
0
            return(1);
962
#else
963
            return(0);
964
#endif
965
0
        case XML_WITH_XPATH:
966
0
#ifdef LIBXML_XPATH_ENABLED
967
0
            return(1);
968
#else
969
            return(0);
970
#endif
971
0
        case XML_WITH_XPTR:
972
0
#ifdef LIBXML_XPTR_ENABLED
973
0
            return(1);
974
#else
975
            return(0);
976
#endif
977
0
        case XML_WITH_XINCLUDE:
978
0
#ifdef LIBXML_XINCLUDE_ENABLED
979
0
            return(1);
980
#else
981
            return(0);
982
#endif
983
0
        case XML_WITH_ICONV:
984
0
#ifdef LIBXML_ICONV_ENABLED
985
0
            return(1);
986
#else
987
            return(0);
988
#endif
989
0
        case XML_WITH_ISO8859X:
990
0
#ifdef LIBXML_ISO8859X_ENABLED
991
0
            return(1);
992
#else
993
            return(0);
994
#endif
995
0
        case XML_WITH_UNICODE:
996
0
#ifdef LIBXML_UNICODE_ENABLED
997
0
            return(1);
998
#else
999
            return(0);
1000
#endif
1001
0
        case XML_WITH_REGEXP:
1002
0
#ifdef LIBXML_REGEXP_ENABLED
1003
0
            return(1);
1004
#else
1005
            return(0);
1006
#endif
1007
0
        case XML_WITH_AUTOMATA:
1008
0
#ifdef LIBXML_AUTOMATA_ENABLED
1009
0
            return(1);
1010
#else
1011
            return(0);
1012
#endif
1013
0
        case XML_WITH_EXPR:
1014
#ifdef LIBXML_EXPR_ENABLED
1015
            return(1);
1016
#else
1017
0
            return(0);
1018
0
#endif
1019
0
        case XML_WITH_SCHEMAS:
1020
0
#ifdef LIBXML_SCHEMAS_ENABLED
1021
0
            return(1);
1022
#else
1023
            return(0);
1024
#endif
1025
0
        case XML_WITH_SCHEMATRON:
1026
0
#ifdef LIBXML_SCHEMATRON_ENABLED
1027
0
            return(1);
1028
#else
1029
            return(0);
1030
#endif
1031
0
        case XML_WITH_MODULES:
1032
0
#ifdef LIBXML_MODULES_ENABLED
1033
0
            return(1);
1034
#else
1035
            return(0);
1036
#endif
1037
0
        case XML_WITH_DEBUG:
1038
#ifdef LIBXML_DEBUG_ENABLED
1039
            return(1);
1040
#else
1041
0
            return(0);
1042
0
#endif
1043
0
        case XML_WITH_DEBUG_MEM:
1044
#ifdef DEBUG_MEMORY_LOCATION
1045
            return(1);
1046
#else
1047
0
            return(0);
1048
0
#endif
1049
0
        case XML_WITH_DEBUG_RUN:
1050
#ifdef LIBXML_DEBUG_RUNTIME
1051
            return(1);
1052
#else
1053
0
            return(0);
1054
0
#endif
1055
0
        case XML_WITH_ZLIB:
1056
0
#ifdef LIBXML_ZLIB_ENABLED
1057
0
            return(1);
1058
#else
1059
            return(0);
1060
#endif
1061
0
        case XML_WITH_LZMA:
1062
0
#ifdef LIBXML_LZMA_ENABLED
1063
0
            return(1);
1064
#else
1065
            return(0);
1066
#endif
1067
0
        case XML_WITH_ICU:
1068
#ifdef LIBXML_ICU_ENABLED
1069
            return(1);
1070
#else
1071
0
            return(0);
1072
0
#endif
1073
0
        default:
1074
0
      break;
1075
0
     }
1076
0
     return(0);
1077
0
}
1078
1079
/************************************************************************
1080
 *                  *
1081
 *    SAX2 defaulted attributes handling      *
1082
 *                  *
1083
 ************************************************************************/
1084
1085
/**
1086
 * xmlDetectSAX2:
1087
 * @ctxt:  an XML parser context
1088
 *
1089
 * Do the SAX2 detection and specific initialization
1090
 */
1091
static void
1092
30.5k
xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1093
30.5k
    xmlSAXHandlerPtr sax;
1094
1095
    /* Avoid unused variable warning if features are disabled. */
1096
30.5k
    (void) sax;
1097
1098
30.5k
    if (ctxt == NULL) return;
1099
30.5k
    sax = ctxt->sax;
1100
30.5k
#ifdef LIBXML_SAX1_ENABLED
1101
30.5k
    if ((sax) &&  (sax->initialized == XML_SAX2_MAGIC) &&
1102
30.5k
        ((sax->startElementNs != NULL) ||
1103
30.5k
         (sax->endElementNs != NULL) ||
1104
30.5k
         ((sax->startElement == NULL) && (sax->endElement == NULL))))
1105
30.5k
        ctxt->sax2 = 1;
1106
#else
1107
    ctxt->sax2 = 1;
1108
#endif /* LIBXML_SAX1_ENABLED */
1109
1110
30.5k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1111
30.5k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1112
30.5k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1113
30.5k
    if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1114
30.5k
    (ctxt->str_xml_ns == NULL)) {
1115
0
        xmlErrMemory(ctxt, NULL);
1116
0
    }
1117
30.5k
}
1118
1119
typedef struct _xmlDefAttrs xmlDefAttrs;
1120
typedef xmlDefAttrs *xmlDefAttrsPtr;
1121
struct _xmlDefAttrs {
1122
    int nbAttrs;  /* number of defaulted attributes on that element */
1123
    int maxAttrs;       /* the size of the array */
1124
#if __STDC_VERSION__ >= 199901L
1125
    /* Using a C99 flexible array member avoids UBSan errors. */
1126
    const xmlChar *values[]; /* array of localname/prefix/values/external */
1127
#else
1128
    const xmlChar *values[5];
1129
#endif
1130
};
1131
1132
/**
1133
 * xmlAttrNormalizeSpace:
1134
 * @src: the source string
1135
 * @dst: the target string
1136
 *
1137
 * Normalize the space in non CDATA attribute values:
1138
 * If the attribute type is not CDATA, then the XML processor MUST further
1139
 * process the normalized attribute value by discarding any leading and
1140
 * trailing space (#x20) characters, and by replacing sequences of space
1141
 * (#x20) characters by a single space (#x20) character.
1142
 * Note that the size of dst need to be at least src, and if one doesn't need
1143
 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1144
 * passing src as dst is just fine.
1145
 *
1146
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1147
 *         is needed.
1148
 */
1149
static xmlChar *
1150
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1151
15.5k
{
1152
15.5k
    if ((src == NULL) || (dst == NULL))
1153
0
        return(NULL);
1154
1155
18.4k
    while (*src == 0x20) src++;
1156
253k
    while (*src != 0) {
1157
237k
  if (*src == 0x20) {
1158
44.3k
      while (*src == 0x20) src++;
1159
1.95k
      if (*src != 0)
1160
902
    *dst++ = 0x20;
1161
235k
  } else {
1162
235k
      *dst++ = *src++;
1163
235k
  }
1164
237k
    }
1165
15.5k
    *dst = 0;
1166
15.5k
    if (dst == src)
1167
13.4k
       return(NULL);
1168
2.12k
    return(dst);
1169
15.5k
}
1170
1171
/**
1172
 * xmlAttrNormalizeSpace2:
1173
 * @src: the source string
1174
 *
1175
 * Normalize the space in non CDATA attribute values, a slightly more complex
1176
 * front end to avoid allocation problems when running on attribute values
1177
 * coming from the input.
1178
 *
1179
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1180
 *         is needed.
1181
 */
1182
static const xmlChar *
1183
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1184
4.46k
{
1185
4.46k
    int i;
1186
4.46k
    int remove_head = 0;
1187
4.46k
    int need_realloc = 0;
1188
4.46k
    const xmlChar *cur;
1189
1190
4.46k
    if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1191
0
        return(NULL);
1192
4.46k
    i = *len;
1193
4.46k
    if (i <= 0)
1194
1.04k
        return(NULL);
1195
1196
3.42k
    cur = src;
1197
4.11k
    while (*cur == 0x20) {
1198
685
        cur++;
1199
685
  remove_head++;
1200
685
    }
1201
28.0k
    while (*cur != 0) {
1202
25.2k
  if (*cur == 0x20) {
1203
1.23k
      cur++;
1204
1.23k
      if ((*cur == 0x20) || (*cur == 0)) {
1205
610
          need_realloc = 1;
1206
610
    break;
1207
610
      }
1208
1.23k
  } else
1209
24.0k
      cur++;
1210
25.2k
    }
1211
3.42k
    if (need_realloc) {
1212
610
        xmlChar *ret;
1213
1214
610
  ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1215
610
  if (ret == NULL) {
1216
0
      xmlErrMemory(ctxt, NULL);
1217
0
      return(NULL);
1218
0
  }
1219
610
  xmlAttrNormalizeSpace(ret, ret);
1220
610
  *len = (int) strlen((const char *)ret);
1221
610
        return(ret);
1222
2.81k
    } else if (remove_head) {
1223
423
        *len -= remove_head;
1224
423
        memmove(src, src + remove_head, 1 + *len);
1225
423
  return(src);
1226
423
    }
1227
2.39k
    return(NULL);
1228
3.42k
}
1229
1230
/**
1231
 * xmlAddDefAttrs:
1232
 * @ctxt:  an XML parser context
1233
 * @fullname:  the element fullname
1234
 * @fullattr:  the attribute fullname
1235
 * @value:  the attribute value
1236
 *
1237
 * Add a defaulted attribute for an element
1238
 */
1239
static void
1240
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1241
               const xmlChar *fullname,
1242
               const xmlChar *fullattr,
1243
33.7k
               const xmlChar *value) {
1244
33.7k
    xmlDefAttrsPtr defaults;
1245
33.7k
    int len;
1246
33.7k
    const xmlChar *name;
1247
33.7k
    const xmlChar *prefix;
1248
1249
    /*
1250
     * Allows to detect attribute redefinitions
1251
     */
1252
33.7k
    if (ctxt->attsSpecial != NULL) {
1253
32.7k
        if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1254
14.2k
      return;
1255
32.7k
    }
1256
1257
19.5k
    if (ctxt->attsDefault == NULL) {
1258
1.01k
        ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1259
1.01k
  if (ctxt->attsDefault == NULL)
1260
0
      goto mem_error;
1261
1.01k
    }
1262
1263
    /*
1264
     * split the element name into prefix:localname , the string found
1265
     * are within the DTD and then not associated to namespace names.
1266
     */
1267
19.5k
    name = xmlSplitQName3(fullname, &len);
1268
19.5k
    if (name == NULL) {
1269
12.8k
        name = xmlDictLookup(ctxt->dict, fullname, -1);
1270
12.8k
  prefix = NULL;
1271
12.8k
    } else {
1272
6.69k
        name = xmlDictLookup(ctxt->dict, name, -1);
1273
6.69k
  prefix = xmlDictLookup(ctxt->dict, fullname, len);
1274
6.69k
    }
1275
1276
    /*
1277
     * make sure there is some storage
1278
     */
1279
19.5k
    defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1280
19.5k
    if (defaults == NULL) {
1281
2.75k
        defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1282
2.75k
                     (4 * 5) * sizeof(const xmlChar *));
1283
2.75k
  if (defaults == NULL)
1284
0
      goto mem_error;
1285
2.75k
  defaults->nbAttrs = 0;
1286
2.75k
  defaults->maxAttrs = 4;
1287
2.75k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1288
2.75k
                          defaults, NULL) < 0) {
1289
0
      xmlFree(defaults);
1290
0
      goto mem_error;
1291
0
  }
1292
16.8k
    } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1293
2.46k
        xmlDefAttrsPtr temp;
1294
1295
2.46k
        temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1296
2.46k
           (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1297
2.46k
  if (temp == NULL)
1298
0
      goto mem_error;
1299
2.46k
  defaults = temp;
1300
2.46k
  defaults->maxAttrs *= 2;
1301
2.46k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1302
2.46k
                          defaults, NULL) < 0) {
1303
0
      xmlFree(defaults);
1304
0
      goto mem_error;
1305
0
  }
1306
2.46k
    }
1307
1308
    /*
1309
     * Split the element name into prefix:localname , the string found
1310
     * are within the DTD and hen not associated to namespace names.
1311
     */
1312
19.5k
    name = xmlSplitQName3(fullattr, &len);
1313
19.5k
    if (name == NULL) {
1314
15.8k
        name = xmlDictLookup(ctxt->dict, fullattr, -1);
1315
15.8k
  prefix = NULL;
1316
15.8k
    } else {
1317
3.66k
        name = xmlDictLookup(ctxt->dict, name, -1);
1318
3.66k
  prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1319
3.66k
    }
1320
1321
19.5k
    defaults->values[5 * defaults->nbAttrs] = name;
1322
19.5k
    defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1323
    /* intern the string and precompute the end */
1324
19.5k
    len = xmlStrlen(value);
1325
19.5k
    value = xmlDictLookup(ctxt->dict, value, len);
1326
19.5k
    defaults->values[5 * defaults->nbAttrs + 2] = value;
1327
19.5k
    defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1328
19.5k
    if (ctxt->external)
1329
0
        defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1330
19.5k
    else
1331
19.5k
        defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1332
19.5k
    defaults->nbAttrs++;
1333
1334
19.5k
    return;
1335
1336
0
mem_error:
1337
0
    xmlErrMemory(ctxt, NULL);
1338
0
    return;
1339
19.5k
}
1340
1341
/**
1342
 * xmlAddSpecialAttr:
1343
 * @ctxt:  an XML parser context
1344
 * @fullname:  the element fullname
1345
 * @fullattr:  the attribute fullname
1346
 * @type:  the attribute type
1347
 *
1348
 * Register this attribute type
1349
 */
1350
static void
1351
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1352
      const xmlChar *fullname,
1353
      const xmlChar *fullattr,
1354
      int type)
1355
35.1k
{
1356
35.1k
    if (ctxt->attsSpecial == NULL) {
1357
1.36k
        ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1358
1.36k
  if (ctxt->attsSpecial == NULL)
1359
0
      goto mem_error;
1360
1.36k
    }
1361
1362
35.1k
    if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1363
15.1k
        return;
1364
1365
20.0k
    xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1366
20.0k
                     (void *) (ptrdiff_t) type);
1367
20.0k
    return;
1368
1369
0
mem_error:
1370
0
    xmlErrMemory(ctxt, NULL);
1371
0
    return;
1372
35.1k
}
1373
1374
/**
1375
 * xmlCleanSpecialAttrCallback:
1376
 *
1377
 * Removes CDATA attributes from the special attribute table
1378
 */
1379
static void
1380
xmlCleanSpecialAttrCallback(void *payload, void *data,
1381
                            const xmlChar *fullname, const xmlChar *fullattr,
1382
19.6k
                            const xmlChar *unused ATTRIBUTE_UNUSED) {
1383
19.6k
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1384
1385
19.6k
    if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1386
10.5k
        xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1387
10.5k
    }
1388
19.6k
}
1389
1390
/**
1391
 * xmlCleanSpecialAttr:
1392
 * @ctxt:  an XML parser context
1393
 *
1394
 * Trim the list of attributes defined to remove all those of type
1395
 * CDATA as they are not special. This call should be done when finishing
1396
 * to parse the DTD and before starting to parse the document root.
1397
 */
1398
static void
1399
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1400
3.55k
{
1401
3.55k
    if (ctxt->attsSpecial == NULL)
1402
2.21k
        return;
1403
1404
1.34k
    xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1405
1406
1.34k
    if (xmlHashSize(ctxt->attsSpecial) == 0) {
1407
36
        xmlHashFree(ctxt->attsSpecial, NULL);
1408
36
        ctxt->attsSpecial = NULL;
1409
36
    }
1410
1.34k
    return;
1411
3.55k
}
1412
1413
/**
1414
 * xmlCheckLanguageID:
1415
 * @lang:  pointer to the string value
1416
 *
1417
 * Checks that the value conforms to the LanguageID production:
1418
 *
1419
 * NOTE: this is somewhat deprecated, those productions were removed from
1420
 *       the XML Second edition.
1421
 *
1422
 * [33] LanguageID ::= Langcode ('-' Subcode)*
1423
 * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1424
 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1425
 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1426
 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1427
 * [38] Subcode ::= ([a-z] | [A-Z])+
1428
 *
1429
 * The current REC reference the successors of RFC 1766, currently 5646
1430
 *
1431
 * http://www.rfc-editor.org/rfc/rfc5646.txt
1432
 * langtag       = language
1433
 *                 ["-" script]
1434
 *                 ["-" region]
1435
 *                 *("-" variant)
1436
 *                 *("-" extension)
1437
 *                 ["-" privateuse]
1438
 * language      = 2*3ALPHA            ; shortest ISO 639 code
1439
 *                 ["-" extlang]       ; sometimes followed by
1440
 *                                     ; extended language subtags
1441
 *               / 4ALPHA              ; or reserved for future use
1442
 *               / 5*8ALPHA            ; or registered language subtag
1443
 *
1444
 * extlang       = 3ALPHA              ; selected ISO 639 codes
1445
 *                 *2("-" 3ALPHA)      ; permanently reserved
1446
 *
1447
 * script        = 4ALPHA              ; ISO 15924 code
1448
 *
1449
 * region        = 2ALPHA              ; ISO 3166-1 code
1450
 *               / 3DIGIT              ; UN M.49 code
1451
 *
1452
 * variant       = 5*8alphanum         ; registered variants
1453
 *               / (DIGIT 3alphanum)
1454
 *
1455
 * extension     = singleton 1*("-" (2*8alphanum))
1456
 *
1457
 *                                     ; Single alphanumerics
1458
 *                                     ; "x" reserved for private use
1459
 * singleton     = DIGIT               ; 0 - 9
1460
 *               / %x41-57             ; A - W
1461
 *               / %x59-5A             ; Y - Z
1462
 *               / %x61-77             ; a - w
1463
 *               / %x79-7A             ; y - z
1464
 *
1465
 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1466
 * The parser below doesn't try to cope with extension or privateuse
1467
 * that could be added but that's not interoperable anyway
1468
 *
1469
 * Returns 1 if correct 0 otherwise
1470
 **/
1471
int
1472
xmlCheckLanguageID(const xmlChar * lang)
1473
0
{
1474
0
    const xmlChar *cur = lang, *nxt;
1475
1476
0
    if (cur == NULL)
1477
0
        return (0);
1478
0
    if (((cur[0] == 'i') && (cur[1] == '-')) ||
1479
0
        ((cur[0] == 'I') && (cur[1] == '-')) ||
1480
0
        ((cur[0] == 'x') && (cur[1] == '-')) ||
1481
0
        ((cur[0] == 'X') && (cur[1] == '-'))) {
1482
        /*
1483
         * Still allow IANA code and user code which were coming
1484
         * from the previous version of the XML-1.0 specification
1485
         * it's deprecated but we should not fail
1486
         */
1487
0
        cur += 2;
1488
0
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1489
0
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
1490
0
            cur++;
1491
0
        return(cur[0] == 0);
1492
0
    }
1493
0
    nxt = cur;
1494
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1495
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1496
0
           nxt++;
1497
0
    if (nxt - cur >= 4) {
1498
        /*
1499
         * Reserved
1500
         */
1501
0
        if ((nxt - cur > 8) || (nxt[0] != 0))
1502
0
            return(0);
1503
0
        return(1);
1504
0
    }
1505
0
    if (nxt - cur < 2)
1506
0
        return(0);
1507
    /* we got an ISO 639 code */
1508
0
    if (nxt[0] == 0)
1509
0
        return(1);
1510
0
    if (nxt[0] != '-')
1511
0
        return(0);
1512
1513
0
    nxt++;
1514
0
    cur = nxt;
1515
    /* now we can have extlang or script or region or variant */
1516
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1517
0
        goto region_m49;
1518
1519
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1520
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1521
0
           nxt++;
1522
0
    if (nxt - cur == 4)
1523
0
        goto script;
1524
0
    if (nxt - cur == 2)
1525
0
        goto region;
1526
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1527
0
        goto variant;
1528
0
    if (nxt - cur != 3)
1529
0
        return(0);
1530
    /* we parsed an extlang */
1531
0
    if (nxt[0] == 0)
1532
0
        return(1);
1533
0
    if (nxt[0] != '-')
1534
0
        return(0);
1535
1536
0
    nxt++;
1537
0
    cur = nxt;
1538
    /* now we can have script or region or variant */
1539
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1540
0
        goto region_m49;
1541
1542
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1543
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1544
0
           nxt++;
1545
0
    if (nxt - cur == 2)
1546
0
        goto region;
1547
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1548
0
        goto variant;
1549
0
    if (nxt - cur != 4)
1550
0
        return(0);
1551
    /* we parsed a script */
1552
0
script:
1553
0
    if (nxt[0] == 0)
1554
0
        return(1);
1555
0
    if (nxt[0] != '-')
1556
0
        return(0);
1557
1558
0
    nxt++;
1559
0
    cur = nxt;
1560
    /* now we can have region or variant */
1561
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1562
0
        goto region_m49;
1563
1564
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1565
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1566
0
           nxt++;
1567
1568
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1569
0
        goto variant;
1570
0
    if (nxt - cur != 2)
1571
0
        return(0);
1572
    /* we parsed a region */
1573
0
region:
1574
0
    if (nxt[0] == 0)
1575
0
        return(1);
1576
0
    if (nxt[0] != '-')
1577
0
        return(0);
1578
1579
0
    nxt++;
1580
0
    cur = nxt;
1581
    /* now we can just have a variant */
1582
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1583
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1584
0
           nxt++;
1585
1586
0
    if ((nxt - cur < 5) || (nxt - cur > 8))
1587
0
        return(0);
1588
1589
    /* we parsed a variant */
1590
0
variant:
1591
0
    if (nxt[0] == 0)
1592
0
        return(1);
1593
0
    if (nxt[0] != '-')
1594
0
        return(0);
1595
    /* extensions and private use subtags not checked */
1596
0
    return (1);
1597
1598
0
region_m49:
1599
0
    if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1600
0
        ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1601
0
        nxt += 3;
1602
0
        goto region;
1603
0
    }
1604
0
    return(0);
1605
0
}
1606
1607
/************************************************************************
1608
 *                  *
1609
 *    Parser stacks related functions and macros    *
1610
 *                  *
1611
 ************************************************************************/
1612
1613
static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1614
                                            const xmlChar ** str);
1615
1616
#ifdef SAX2
1617
/**
1618
 * nsPush:
1619
 * @ctxt:  an XML parser context
1620
 * @prefix:  the namespace prefix or NULL
1621
 * @URL:  the namespace name
1622
 *
1623
 * Pushes a new parser namespace on top of the ns stack
1624
 *
1625
 * Returns -1 in case of error, -2 if the namespace should be discarded
1626
 *     and the index in the stack otherwise.
1627
 */
1628
static int
1629
nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1630
109k
{
1631
109k
    if (ctxt->options & XML_PARSE_NSCLEAN) {
1632
0
        int i;
1633
0
  for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1634
0
      if (ctxt->nsTab[i] == prefix) {
1635
    /* in scope */
1636
0
          if (ctxt->nsTab[i + 1] == URL)
1637
0
        return(-2);
1638
    /* out of scope keep it */
1639
0
    break;
1640
0
      }
1641
0
  }
1642
0
    }
1643
109k
    if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1644
3.70k
  ctxt->nsMax = 10;
1645
3.70k
  ctxt->nsNr = 0;
1646
3.70k
  ctxt->nsTab = (const xmlChar **)
1647
3.70k
                xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1648
3.70k
  if (ctxt->nsTab == NULL) {
1649
0
      xmlErrMemory(ctxt, NULL);
1650
0
      ctxt->nsMax = 0;
1651
0
            return (-1);
1652
0
  }
1653
105k
    } else if (ctxt->nsNr >= ctxt->nsMax) {
1654
2.17k
        const xmlChar ** tmp;
1655
2.17k
        ctxt->nsMax *= 2;
1656
2.17k
        tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1657
2.17k
            ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1658
2.17k
        if (tmp == NULL) {
1659
0
            xmlErrMemory(ctxt, NULL);
1660
0
      ctxt->nsMax /= 2;
1661
0
            return (-1);
1662
0
        }
1663
2.17k
  ctxt->nsTab = tmp;
1664
2.17k
    }
1665
109k
    ctxt->nsTab[ctxt->nsNr++] = prefix;
1666
109k
    ctxt->nsTab[ctxt->nsNr++] = URL;
1667
109k
    return (ctxt->nsNr);
1668
109k
}
1669
/**
1670
 * nsPop:
1671
 * @ctxt: an XML parser context
1672
 * @nr:  the number to pop
1673
 *
1674
 * Pops the top @nr parser prefix/namespace from the ns stack
1675
 *
1676
 * Returns the number of namespaces removed
1677
 */
1678
static int
1679
nsPop(xmlParserCtxtPtr ctxt, int nr)
1680
44.3k
{
1681
44.3k
    int i;
1682
1683
44.3k
    if (ctxt->nsTab == NULL) return(0);
1684
44.3k
    if (ctxt->nsNr < nr) {
1685
0
        xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1686
0
        nr = ctxt->nsNr;
1687
0
    }
1688
44.3k
    if (ctxt->nsNr <= 0)
1689
0
        return (0);
1690
1691
138k
    for (i = 0;i < nr;i++) {
1692
93.8k
         ctxt->nsNr--;
1693
93.8k
   ctxt->nsTab[ctxt->nsNr] = NULL;
1694
93.8k
    }
1695
44.3k
    return(nr);
1696
44.3k
}
1697
#endif
1698
1699
static int
1700
2.40k
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1701
2.40k
    const xmlChar **atts;
1702
2.40k
    int *attallocs;
1703
2.40k
    int maxatts;
1704
1705
2.40k
    if (ctxt->atts == NULL) {
1706
2.11k
  maxatts = 55; /* allow for 10 attrs by default */
1707
2.11k
  atts = (const xmlChar **)
1708
2.11k
         xmlMalloc(maxatts * sizeof(xmlChar *));
1709
2.11k
  if (atts == NULL) goto mem_error;
1710
2.11k
  ctxt->atts = atts;
1711
2.11k
  attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1712
2.11k
  if (attallocs == NULL) goto mem_error;
1713
2.11k
  ctxt->attallocs = attallocs;
1714
2.11k
  ctxt->maxatts = maxatts;
1715
2.11k
    } else if (nr + 5 > ctxt->maxatts) {
1716
293
  maxatts = (nr + 5) * 2;
1717
293
  atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1718
293
             maxatts * sizeof(const xmlChar *));
1719
293
  if (atts == NULL) goto mem_error;
1720
293
  ctxt->atts = atts;
1721
293
  attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1722
293
                               (maxatts / 5) * sizeof(int));
1723
293
  if (attallocs == NULL) goto mem_error;
1724
293
  ctxt->attallocs = attallocs;
1725
293
  ctxt->maxatts = maxatts;
1726
293
    }
1727
2.40k
    return(ctxt->maxatts);
1728
0
mem_error:
1729
0
    xmlErrMemory(ctxt, NULL);
1730
0
    return(-1);
1731
2.40k
}
1732
1733
/**
1734
 * inputPush:
1735
 * @ctxt:  an XML parser context
1736
 * @value:  the parser input
1737
 *
1738
 * Pushes a new parser input on top of the input stack
1739
 *
1740
 * Returns -1 in case of error, the index in the stack otherwise
1741
 */
1742
int
1743
inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1744
15.2k
{
1745
15.2k
    if ((ctxt == NULL) || (value == NULL))
1746
0
        return(-1);
1747
15.2k
    if (ctxt->inputNr >= ctxt->inputMax) {
1748
0
        ctxt->inputMax *= 2;
1749
0
        ctxt->inputTab =
1750
0
            (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1751
0
                                             ctxt->inputMax *
1752
0
                                             sizeof(ctxt->inputTab[0]));
1753
0
        if (ctxt->inputTab == NULL) {
1754
0
            xmlErrMemory(ctxt, NULL);
1755
0
      xmlFreeInputStream(value);
1756
0
      ctxt->inputMax /= 2;
1757
0
      value = NULL;
1758
0
            return (-1);
1759
0
        }
1760
0
    }
1761
15.2k
    ctxt->inputTab[ctxt->inputNr] = value;
1762
15.2k
    ctxt->input = value;
1763
15.2k
    return (ctxt->inputNr++);
1764
15.2k
}
1765
/**
1766
 * inputPop:
1767
 * @ctxt: an XML parser context
1768
 *
1769
 * Pops the top parser input from the input stack
1770
 *
1771
 * Returns the input just removed
1772
 */
1773
xmlParserInputPtr
1774
inputPop(xmlParserCtxtPtr ctxt)
1775
45.7k
{
1776
45.7k
    xmlParserInputPtr ret;
1777
1778
45.7k
    if (ctxt == NULL)
1779
0
        return(NULL);
1780
45.7k
    if (ctxt->inputNr <= 0)
1781
30.5k
        return (NULL);
1782
15.2k
    ctxt->inputNr--;
1783
15.2k
    if (ctxt->inputNr > 0)
1784
0
        ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1785
15.2k
    else
1786
15.2k
        ctxt->input = NULL;
1787
15.2k
    ret = ctxt->inputTab[ctxt->inputNr];
1788
15.2k
    ctxt->inputTab[ctxt->inputNr] = NULL;
1789
15.2k
    return (ret);
1790
45.7k
}
1791
/**
1792
 * nodePush:
1793
 * @ctxt:  an XML parser context
1794
 * @value:  the element node
1795
 *
1796
 * Pushes a new element node on top of the node stack
1797
 *
1798
 * Returns -1 in case of error, the index in the stack otherwise
1799
 */
1800
int
1801
nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1802
0
{
1803
0
    if (ctxt == NULL) return(0);
1804
0
    if (ctxt->nodeNr >= ctxt->nodeMax) {
1805
0
        xmlNodePtr *tmp;
1806
1807
0
  tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1808
0
                                      ctxt->nodeMax * 2 *
1809
0
                                      sizeof(ctxt->nodeTab[0]));
1810
0
        if (tmp == NULL) {
1811
0
            xmlErrMemory(ctxt, NULL);
1812
0
            return (-1);
1813
0
        }
1814
0
        ctxt->nodeTab = tmp;
1815
0
  ctxt->nodeMax *= 2;
1816
0
    }
1817
0
    if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1818
0
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1819
0
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1820
0
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1821
0
        xmlParserMaxDepth);
1822
0
  xmlHaltParser(ctxt);
1823
0
  return(-1);
1824
0
    }
1825
0
    ctxt->nodeTab[ctxt->nodeNr] = value;
1826
0
    ctxt->node = value;
1827
0
    return (ctxt->nodeNr++);
1828
0
}
1829
1830
/**
1831
 * nodePop:
1832
 * @ctxt: an XML parser context
1833
 *
1834
 * Pops the top element node from the node stack
1835
 *
1836
 * Returns the node just removed
1837
 */
1838
xmlNodePtr
1839
nodePop(xmlParserCtxtPtr ctxt)
1840
116k
{
1841
116k
    xmlNodePtr ret;
1842
1843
116k
    if (ctxt == NULL) return(NULL);
1844
116k
    if (ctxt->nodeNr <= 0)
1845
116k
        return (NULL);
1846
0
    ctxt->nodeNr--;
1847
0
    if (ctxt->nodeNr > 0)
1848
0
        ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1849
0
    else
1850
0
        ctxt->node = NULL;
1851
0
    ret = ctxt->nodeTab[ctxt->nodeNr];
1852
0
    ctxt->nodeTab[ctxt->nodeNr] = NULL;
1853
0
    return (ret);
1854
116k
}
1855
1856
/**
1857
 * nameNsPush:
1858
 * @ctxt:  an XML parser context
1859
 * @value:  the element name
1860
 * @prefix:  the element prefix
1861
 * @URI:  the element namespace name
1862
 * @line:  the current line number for error messages
1863
 * @nsNr:  the number of namespaces pushed on the namespace table
1864
 *
1865
 * Pushes a new element name/prefix/URL on top of the name stack
1866
 *
1867
 * Returns -1 in case of error, the index in the stack otherwise
1868
 */
1869
static int
1870
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1871
           const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
1872
222k
{
1873
222k
    xmlStartTag *tag;
1874
1875
222k
    if (ctxt->nameNr >= ctxt->nameMax) {
1876
2.87k
        const xmlChar * *tmp;
1877
2.87k
        xmlStartTag *tmp2;
1878
2.87k
        ctxt->nameMax *= 2;
1879
2.87k
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1880
2.87k
                                    ctxt->nameMax *
1881
2.87k
                                    sizeof(ctxt->nameTab[0]));
1882
2.87k
        if (tmp == NULL) {
1883
0
      ctxt->nameMax /= 2;
1884
0
      goto mem_error;
1885
0
        }
1886
2.87k
  ctxt->nameTab = tmp;
1887
2.87k
        tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
1888
2.87k
                                    ctxt->nameMax *
1889
2.87k
                                    sizeof(ctxt->pushTab[0]));
1890
2.87k
        if (tmp2 == NULL) {
1891
0
      ctxt->nameMax /= 2;
1892
0
      goto mem_error;
1893
0
        }
1894
2.87k
  ctxt->pushTab = tmp2;
1895
219k
    } else if (ctxt->pushTab == NULL) {
1896
9.10k
        ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
1897
9.10k
                                            sizeof(ctxt->pushTab[0]));
1898
9.10k
        if (ctxt->pushTab == NULL)
1899
0
            goto mem_error;
1900
9.10k
    }
1901
222k
    ctxt->nameTab[ctxt->nameNr] = value;
1902
222k
    ctxt->name = value;
1903
222k
    tag = &ctxt->pushTab[ctxt->nameNr];
1904
222k
    tag->prefix = prefix;
1905
222k
    tag->URI = URI;
1906
222k
    tag->line = line;
1907
222k
    tag->nsNr = nsNr;
1908
222k
    return (ctxt->nameNr++);
1909
0
mem_error:
1910
0
    xmlErrMemory(ctxt, NULL);
1911
0
    return (-1);
1912
222k
}
1913
#ifdef LIBXML_PUSH_ENABLED
1914
/**
1915
 * nameNsPop:
1916
 * @ctxt: an XML parser context
1917
 *
1918
 * Pops the top element/prefix/URI name from the name stack
1919
 *
1920
 * Returns the name just removed
1921
 */
1922
static const xmlChar *
1923
nameNsPop(xmlParserCtxtPtr ctxt)
1924
0
{
1925
0
    const xmlChar *ret;
1926
1927
0
    if (ctxt->nameNr <= 0)
1928
0
        return (NULL);
1929
0
    ctxt->nameNr--;
1930
0
    if (ctxt->nameNr > 0)
1931
0
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1932
0
    else
1933
0
        ctxt->name = NULL;
1934
0
    ret = ctxt->nameTab[ctxt->nameNr];
1935
0
    ctxt->nameTab[ctxt->nameNr] = NULL;
1936
0
    return (ret);
1937
0
}
1938
#endif /* LIBXML_PUSH_ENABLED */
1939
1940
/**
1941
 * namePush:
1942
 * @ctxt:  an XML parser context
1943
 * @value:  the element name
1944
 *
1945
 * Pushes a new element name on top of the name stack
1946
 *
1947
 * Returns -1 in case of error, the index in the stack otherwise
1948
 */
1949
int
1950
namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1951
0
{
1952
0
    if (ctxt == NULL) return (-1);
1953
1954
0
    if (ctxt->nameNr >= ctxt->nameMax) {
1955
0
        const xmlChar * *tmp;
1956
0
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1957
0
                                    ctxt->nameMax * 2 *
1958
0
                                    sizeof(ctxt->nameTab[0]));
1959
0
        if (tmp == NULL) {
1960
0
      goto mem_error;
1961
0
        }
1962
0
  ctxt->nameTab = tmp;
1963
0
        ctxt->nameMax *= 2;
1964
0
    }
1965
0
    ctxt->nameTab[ctxt->nameNr] = value;
1966
0
    ctxt->name = value;
1967
0
    return (ctxt->nameNr++);
1968
0
mem_error:
1969
0
    xmlErrMemory(ctxt, NULL);
1970
0
    return (-1);
1971
0
}
1972
/**
1973
 * namePop:
1974
 * @ctxt: an XML parser context
1975
 *
1976
 * Pops the top element name from the name stack
1977
 *
1978
 * Returns the name just removed
1979
 */
1980
const xmlChar *
1981
namePop(xmlParserCtxtPtr ctxt)
1982
141k
{
1983
141k
    const xmlChar *ret;
1984
1985
141k
    if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1986
0
        return (NULL);
1987
141k
    ctxt->nameNr--;
1988
141k
    if (ctxt->nameNr > 0)
1989
138k
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1990
3.49k
    else
1991
3.49k
        ctxt->name = NULL;
1992
141k
    ret = ctxt->nameTab[ctxt->nameNr];
1993
141k
    ctxt->nameTab[ctxt->nameNr] = NULL;
1994
141k
    return (ret);
1995
141k
}
1996
1997
258k
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1998
258k
    if (ctxt->spaceNr >= ctxt->spaceMax) {
1999
2.98k
        int *tmp;
2000
2001
2.98k
  ctxt->spaceMax *= 2;
2002
2.98k
        tmp = (int *) xmlRealloc(ctxt->spaceTab,
2003
2.98k
                           ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
2004
2.98k
        if (tmp == NULL) {
2005
0
      xmlErrMemory(ctxt, NULL);
2006
0
      ctxt->spaceMax /=2;
2007
0
      return(-1);
2008
0
  }
2009
2.98k
  ctxt->spaceTab = tmp;
2010
2.98k
    }
2011
258k
    ctxt->spaceTab[ctxt->spaceNr] = val;
2012
258k
    ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
2013
258k
    return(ctxt->spaceNr++);
2014
258k
}
2015
2016
177k
static int spacePop(xmlParserCtxtPtr ctxt) {
2017
177k
    int ret;
2018
177k
    if (ctxt->spaceNr <= 0) return(0);
2019
177k
    ctxt->spaceNr--;
2020
177k
    if (ctxt->spaceNr > 0)
2021
177k
  ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
2022
0
    else
2023
0
        ctxt->space = &ctxt->spaceTab[0];
2024
177k
    ret = ctxt->spaceTab[ctxt->spaceNr];
2025
177k
    ctxt->spaceTab[ctxt->spaceNr] = -1;
2026
177k
    return(ret);
2027
177k
}
2028
2029
/*
2030
 * Macros for accessing the content. Those should be used only by the parser,
2031
 * and not exported.
2032
 *
2033
 * Dirty macros, i.e. one often need to make assumption on the context to
2034
 * use them
2035
 *
2036
 *   CUR_PTR return the current pointer to the xmlChar to be parsed.
2037
 *           To be used with extreme caution since operations consuming
2038
 *           characters may move the input buffer to a different location !
2039
 *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
2040
 *           This should be used internally by the parser
2041
 *           only to compare to ASCII values otherwise it would break when
2042
 *           running with UTF-8 encoding.
2043
 *   RAW     same as CUR but in the input buffer, bypass any token
2044
 *           extraction that may have been done
2045
 *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
2046
 *           to compare on ASCII based substring.
2047
 *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2048
 *           strings without newlines within the parser.
2049
 *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2050
 *           defined char within the parser.
2051
 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2052
 *
2053
 *   NEXT    Skip to the next character, this does the proper decoding
2054
 *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2055
 *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2056
 *   CUR_CHAR(l) returns the current unicode character (int), set l
2057
 *           to the number of xmlChars used for the encoding [0-5].
2058
 *   CUR_SCHAR  same but operate on a string instead of the context
2059
 *   COPY_BUF  copy the current unicode char to the target buffer, increment
2060
 *            the index
2061
 *   GROW, SHRINK  handling of input buffers
2062
 */
2063
2064
6.83M
#define RAW (*ctxt->input->cur)
2065
3.39M
#define CUR (*ctxt->input->cur)
2066
332M
#define NXT(val) ctxt->input->cur[(val)]
2067
1.01M
#define CUR_PTR ctxt->input->cur
2068
204k
#define BASE_PTR ctxt->input->base
2069
2070
#define CMP4( s, c1, c2, c3, c4 ) \
2071
1.94M
  ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2072
985k
    ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2073
#define CMP5( s, c1, c2, c3, c4, c5 ) \
2074
1.82M
  ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2075
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2076
1.65M
  ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2077
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2078
1.54M
  ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2079
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2080
1.43M
  ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2081
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2082
672k
  ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2083
672k
    ((unsigned char *) s)[ 8 ] == c9 )
2084
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2085
4.38k
  ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2086
4.38k
    ((unsigned char *) s)[ 9 ] == c10 )
2087
2088
231k
#define SKIP(val) do {             \
2089
231k
    ctxt->input->cur += (val),ctxt->input->col+=(val);      \
2090
231k
    if (*ctxt->input->cur == 0)           \
2091
231k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);     \
2092
231k
  } while (0)
2093
2094
0
#define SKIPL(val) do {             \
2095
0
    int skipl;                \
2096
0
    for(skipl=0; skipl<val; skipl++) {         \
2097
0
  if (*(ctxt->input->cur) == '\n') {       \
2098
0
  ctxt->input->line++; ctxt->input->col = 1;      \
2099
0
  } else ctxt->input->col++;         \
2100
0
  ctxt->input->cur++;           \
2101
0
    }                 \
2102
0
    if (*ctxt->input->cur == 0)           \
2103
0
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);      \
2104
0
  } while (0)
2105
2106
5.29M
#define SHRINK if ((ctxt->progressive == 0) &&       \
2107
5.29M
       (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2108
5.29M
       (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2109
5.29M
  xmlSHRINK (ctxt);
2110
2111
4.24k
static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2112
4.24k
    xmlParserInputShrink(ctxt->input);
2113
4.24k
    if (*ctxt->input->cur == 0)
2114
452
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2115
4.24k
}
2116
2117
405M
#define GROW if ((ctxt->progressive == 0) &&       \
2118
405M
     (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK))  \
2119
405M
  xmlGROW (ctxt);
2120
2121
750k
static void xmlGROW (xmlParserCtxtPtr ctxt) {
2122
750k
    ptrdiff_t curEnd = ctxt->input->end - ctxt->input->cur;
2123
750k
    ptrdiff_t curBase = ctxt->input->cur - ctxt->input->base;
2124
2125
750k
    if (((curEnd > XML_MAX_LOOKUP_LIMIT) ||
2126
750k
         (curBase > XML_MAX_LOOKUP_LIMIT)) &&
2127
750k
         ((ctxt->input->buf) &&
2128
12
          (ctxt->input->buf->readcallback != xmlInputReadCallbackNop)) &&
2129
750k
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2130
12
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2131
12
        xmlHaltParser(ctxt);
2132
12
  return;
2133
12
    }
2134
750k
    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2135
750k
    if ((ctxt->input->cur > ctxt->input->end) ||
2136
750k
        (ctxt->input->cur < ctxt->input->base)) {
2137
0
        xmlHaltParser(ctxt);
2138
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2139
0
  return;
2140
0
    }
2141
750k
    if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0))
2142
32.8k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2143
750k
}
2144
2145
2.45M
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2146
2147
3.66M
#define NEXT xmlNextChar(ctxt)
2148
2149
356k
#define NEXT1 {               \
2150
356k
  ctxt->input->col++;           \
2151
356k
  ctxt->input->cur++;           \
2152
356k
  if (*ctxt->input->cur == 0)         \
2153
356k
      xmlParserInputGrow(ctxt->input, INPUT_CHUNK);   \
2154
356k
    }
2155
2156
623M
#define NEXTL(l) do {             \
2157
623M
    if (*(ctxt->input->cur) == '\n') {         \
2158
799k
  ctxt->input->line++; ctxt->input->col = 1;      \
2159
622M
    } else ctxt->input->col++;           \
2160
623M
    ctxt->input->cur += l;        \
2161
623M
  } while (0)
2162
2163
624M
#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2164
45.3M
#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2165
2166
#define COPY_BUF(l,b,i,v)           \
2167
637M
    if (l == 1) b[i++] = (xmlChar) v;         \
2168
637M
    else i += xmlCopyCharMultiByte(&b[i],v)
2169
2170
#define CUR_CONSUMED \
2171
2.68M
    (ctxt->input->consumed + (ctxt->input->cur - ctxt->input->base))
2172
2173
/**
2174
 * xmlSkipBlankChars:
2175
 * @ctxt:  the XML parser context
2176
 *
2177
 * skip all blanks character found at that point in the input streams.
2178
 * It pops up finished entities in the process if allowable at that point.
2179
 *
2180
 * Returns the number of space chars skipped
2181
 */
2182
2183
int
2184
2.45M
xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2185
2.45M
    int res = 0;
2186
2187
    /*
2188
     * It's Okay to use CUR/NEXT here since all the blanks are on
2189
     * the ASCII range.
2190
     */
2191
2.45M
    if (((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) ||
2192
2.45M
        (ctxt->instate == XML_PARSER_START)) {
2193
1.70M
  const xmlChar *cur;
2194
  /*
2195
   * if we are in the document content, go really fast
2196
   */
2197
1.70M
  cur = ctxt->input->cur;
2198
1.70M
  while (IS_BLANK_CH(*cur)) {
2199
560k
      if (*cur == '\n') {
2200
6.03k
    ctxt->input->line++; ctxt->input->col = 1;
2201
554k
      } else {
2202
554k
    ctxt->input->col++;
2203
554k
      }
2204
560k
      cur++;
2205
560k
      if (res < INT_MAX)
2206
560k
    res++;
2207
560k
      if (*cur == 0) {
2208
296
    ctxt->input->cur = cur;
2209
296
    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2210
296
    cur = ctxt->input->cur;
2211
296
      }
2212
560k
  }
2213
1.70M
  ctxt->input->cur = cur;
2214
1.70M
    } else {
2215
744k
        int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2216
2217
1.81M
  while (1) {
2218
1.81M
            if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2219
1.06M
    NEXT;
2220
1.06M
      } else if (CUR == '%') {
2221
                /*
2222
                 * Need to handle support of entities branching here
2223
                 */
2224
270k
          if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2225
270k
                    break;
2226
0
          xmlParsePEReference(ctxt);
2227
474k
            } else if (CUR == 0) {
2228
1.98k
                if (ctxt->inputNr <= 1)
2229
1.98k
                    break;
2230
0
                xmlPopInput(ctxt);
2231
472k
            } else {
2232
472k
                break;
2233
472k
            }
2234
2235
            /*
2236
             * Also increase the counter when entering or exiting a PERef.
2237
             * The spec says: "When a parameter-entity reference is recognized
2238
             * in the DTD and included, its replacement text MUST be enlarged
2239
             * by the attachment of one leading and one following space (#x20)
2240
             * character."
2241
             */
2242
1.06M
      if (res < INT_MAX)
2243
1.06M
    res++;
2244
1.06M
        }
2245
744k
    }
2246
2.45M
    return(res);
2247
2.45M
}
2248
2249
/************************************************************************
2250
 *                  *
2251
 *    Commodity functions to handle entities      *
2252
 *                  *
2253
 ************************************************************************/
2254
2255
/**
2256
 * xmlPopInput:
2257
 * @ctxt:  an XML parser context
2258
 *
2259
 * xmlPopInput: the current input pointed by ctxt->input came to an end
2260
 *          pop it and return the next char.
2261
 *
2262
 * Returns the current xmlChar in the parser context
2263
 */
2264
xmlChar
2265
0
xmlPopInput(xmlParserCtxtPtr ctxt) {
2266
0
    if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2267
0
    if (xmlParserDebugEntities)
2268
0
  xmlGenericError(xmlGenericErrorContext,
2269
0
    "Popping input %d\n", ctxt->inputNr);
2270
0
    if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2271
0
        (ctxt->instate != XML_PARSER_EOF))
2272
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2273
0
                    "Unfinished entity outside the DTD");
2274
0
    xmlFreeInputStream(inputPop(ctxt));
2275
0
    if (*ctxt->input->cur == 0)
2276
0
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2277
0
    return(CUR);
2278
0
}
2279
2280
/**
2281
 * xmlPushInput:
2282
 * @ctxt:  an XML parser context
2283
 * @input:  an XML parser input fragment (entity, XML fragment ...).
2284
 *
2285
 * xmlPushInput: switch to a new input stream which is stacked on top
2286
 *               of the previous one(s).
2287
 * Returns -1 in case of error or the index in the input stack
2288
 */
2289
int
2290
0
xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2291
0
    int ret;
2292
0
    if (input == NULL) return(-1);
2293
2294
0
    if (xmlParserDebugEntities) {
2295
0
  if ((ctxt->input != NULL) && (ctxt->input->filename))
2296
0
      xmlGenericError(xmlGenericErrorContext,
2297
0
        "%s(%d): ", ctxt->input->filename,
2298
0
        ctxt->input->line);
2299
0
  xmlGenericError(xmlGenericErrorContext,
2300
0
    "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2301
0
    }
2302
0
    if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2303
0
        (ctxt->inputNr > 1024)) {
2304
0
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2305
0
        while (ctxt->inputNr > 1)
2306
0
            xmlFreeInputStream(inputPop(ctxt));
2307
0
  return(-1);
2308
0
    }
2309
0
    ret = inputPush(ctxt, input);
2310
0
    if (ctxt->instate == XML_PARSER_EOF)
2311
0
        return(-1);
2312
0
    GROW;
2313
0
    return(ret);
2314
0
}
2315
2316
/**
2317
 * xmlParseCharRef:
2318
 * @ctxt:  an XML parser context
2319
 *
2320
 * parse Reference declarations
2321
 *
2322
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2323
 *                  '&#x' [0-9a-fA-F]+ ';'
2324
 *
2325
 * [ WFC: Legal Character ]
2326
 * Characters referred to using character references must match the
2327
 * production for Char.
2328
 *
2329
 * Returns the value parsed (as an int), 0 in case of error
2330
 */
2331
int
2332
54.2k
xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2333
54.2k
    int val = 0;
2334
54.2k
    int count = 0;
2335
2336
    /*
2337
     * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2338
     */
2339
54.2k
    if ((RAW == '&') && (NXT(1) == '#') &&
2340
54.2k
        (NXT(2) == 'x')) {
2341
22.6k
  SKIP(3);
2342
22.6k
  GROW;
2343
141k
  while (RAW != ';') { /* loop blocked by count */
2344
131k
      if (count++ > 20) {
2345
5.78k
    count = 0;
2346
5.78k
    GROW;
2347
5.78k
                if (ctxt->instate == XML_PARSER_EOF)
2348
0
                    return(0);
2349
5.78k
      }
2350
131k
      if ((RAW >= '0') && (RAW <= '9'))
2351
81.7k
          val = val * 16 + (CUR - '0');
2352
50.2k
      else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2353
6.58k
          val = val * 16 + (CUR - 'a') + 10;
2354
43.7k
      else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2355
30.2k
          val = val * 16 + (CUR - 'A') + 10;
2356
13.4k
      else {
2357
13.4k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2358
13.4k
    val = 0;
2359
13.4k
    break;
2360
13.4k
      }
2361
118k
      if (val > 0x110000)
2362
66.9k
          val = 0x110000;
2363
2364
118k
      NEXT;
2365
118k
      count++;
2366
118k
  }
2367
22.6k
  if (RAW == ';') {
2368
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2369
9.19k
      ctxt->input->col++;
2370
9.19k
      ctxt->input->cur++;
2371
9.19k
  }
2372
31.5k
    } else if  ((RAW == '&') && (NXT(1) == '#')) {
2373
31.5k
  SKIP(2);
2374
31.5k
  GROW;
2375
110k
  while (RAW != ';') { /* loop blocked by count */
2376
89.3k
      if (count++ > 20) {
2377
1.24k
    count = 0;
2378
1.24k
    GROW;
2379
1.24k
                if (ctxt->instate == XML_PARSER_EOF)
2380
0
                    return(0);
2381
1.24k
      }
2382
89.3k
      if ((RAW >= '0') && (RAW <= '9'))
2383
78.4k
          val = val * 10 + (CUR - '0');
2384
10.8k
      else {
2385
10.8k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2386
10.8k
    val = 0;
2387
10.8k
    break;
2388
10.8k
      }
2389
78.4k
      if (val > 0x110000)
2390
15.1k
          val = 0x110000;
2391
2392
78.4k
      NEXT;
2393
78.4k
      count++;
2394
78.4k
  }
2395
31.5k
  if (RAW == ';') {
2396
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2397
20.6k
      ctxt->input->col++;
2398
20.6k
      ctxt->input->cur++;
2399
20.6k
  }
2400
31.5k
    } else {
2401
0
        xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2402
0
    }
2403
2404
    /*
2405
     * [ WFC: Legal Character ]
2406
     * Characters referred to using character references must match the
2407
     * production for Char.
2408
     */
2409
54.2k
    if (val >= 0x110000) {
2410
1.07k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2411
1.07k
                "xmlParseCharRef: character reference out of bounds\n",
2412
1.07k
          val);
2413
53.1k
    } else if (IS_CHAR(val)) {
2414
22.1k
        return(val);
2415
30.9k
    } else {
2416
30.9k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2417
30.9k
                          "xmlParseCharRef: invalid xmlChar value %d\n",
2418
30.9k
                    val);
2419
30.9k
    }
2420
32.0k
    return(0);
2421
54.2k
}
2422
2423
/**
2424
 * xmlParseStringCharRef:
2425
 * @ctxt:  an XML parser context
2426
 * @str:  a pointer to an index in the string
2427
 *
2428
 * parse Reference declarations, variant parsing from a string rather
2429
 * than an an input flow.
2430
 *
2431
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2432
 *                  '&#x' [0-9a-fA-F]+ ';'
2433
 *
2434
 * [ WFC: Legal Character ]
2435
 * Characters referred to using character references must match the
2436
 * production for Char.
2437
 *
2438
 * Returns the value parsed (as an int), 0 in case of error, str will be
2439
 *         updated to the current value of the index
2440
 */
2441
static int
2442
20.8k
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2443
20.8k
    const xmlChar *ptr;
2444
20.8k
    xmlChar cur;
2445
20.8k
    int val = 0;
2446
2447
20.8k
    if ((str == NULL) || (*str == NULL)) return(0);
2448
20.8k
    ptr = *str;
2449
20.8k
    cur = *ptr;
2450
20.8k
    if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2451
4.59k
  ptr += 3;
2452
4.59k
  cur = *ptr;
2453
14.1k
  while (cur != ';') { /* Non input consuming loop */
2454
10.5k
      if ((cur >= '0') && (cur <= '9'))
2455
3.13k
          val = val * 16 + (cur - '0');
2456
7.42k
      else if ((cur >= 'a') && (cur <= 'f'))
2457
1.60k
          val = val * 16 + (cur - 'a') + 10;
2458
5.81k
      else if ((cur >= 'A') && (cur <= 'F'))
2459
4.79k
          val = val * 16 + (cur - 'A') + 10;
2460
1.02k
      else {
2461
1.02k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2462
1.02k
    val = 0;
2463
1.02k
    break;
2464
1.02k
      }
2465
9.52k
      if (val > 0x110000)
2466
1.47k
          val = 0x110000;
2467
2468
9.52k
      ptr++;
2469
9.52k
      cur = *ptr;
2470
9.52k
  }
2471
4.59k
  if (cur == ';')
2472
3.56k
      ptr++;
2473
16.2k
    } else if  ((cur == '&') && (ptr[1] == '#')){
2474
16.2k
  ptr += 2;
2475
16.2k
  cur = *ptr;
2476
80.3k
  while (cur != ';') { /* Non input consuming loops */
2477
67.6k
      if ((cur >= '0') && (cur <= '9'))
2478
64.1k
          val = val * 10 + (cur - '0');
2479
3.49k
      else {
2480
3.49k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2481
3.49k
    val = 0;
2482
3.49k
    break;
2483
3.49k
      }
2484
64.1k
      if (val > 0x110000)
2485
14.3k
          val = 0x110000;
2486
2487
64.1k
      ptr++;
2488
64.1k
      cur = *ptr;
2489
64.1k
  }
2490
16.2k
  if (cur == ';')
2491
12.7k
      ptr++;
2492
16.2k
    } else {
2493
0
  xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2494
0
  return(0);
2495
0
    }
2496
20.8k
    *str = ptr;
2497
2498
    /*
2499
     * [ WFC: Legal Character ]
2500
     * Characters referred to using character references must match the
2501
     * production for Char.
2502
     */
2503
20.8k
    if (val >= 0x110000) {
2504
255
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2505
255
                "xmlParseStringCharRef: character reference out of bounds\n",
2506
255
                val);
2507
20.5k
    } else if (IS_CHAR(val)) {
2508
14.0k
        return(val);
2509
14.0k
    } else {
2510
6.50k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2511
6.50k
        "xmlParseStringCharRef: invalid xmlChar value %d\n",
2512
6.50k
        val);
2513
6.50k
    }
2514
6.76k
    return(0);
2515
20.8k
}
2516
2517
/**
2518
 * xmlParserHandlePEReference:
2519
 * @ctxt:  the parser context
2520
 *
2521
 * [69] PEReference ::= '%' Name ';'
2522
 *
2523
 * [ WFC: No Recursion ]
2524
 * A parsed entity must not contain a recursive
2525
 * reference to itself, either directly or indirectly.
2526
 *
2527
 * [ WFC: Entity Declared ]
2528
 * In a document without any DTD, a document with only an internal DTD
2529
 * subset which contains no parameter entity references, or a document
2530
 * with "standalone='yes'", ...  ... The declaration of a parameter
2531
 * entity must precede any reference to it...
2532
 *
2533
 * [ VC: Entity Declared ]
2534
 * In a document with an external subset or external parameter entities
2535
 * with "standalone='no'", ...  ... The declaration of a parameter entity
2536
 * must precede any reference to it...
2537
 *
2538
 * [ WFC: In DTD ]
2539
 * Parameter-entity references may only appear in the DTD.
2540
 * NOTE: misleading but this is handled.
2541
 *
2542
 * A PEReference may have been detected in the current input stream
2543
 * the handling is done accordingly to
2544
 *      http://www.w3.org/TR/REC-xml#entproc
2545
 * i.e.
2546
 *   - Included in literal in entity values
2547
 *   - Included as Parameter Entity reference within DTDs
2548
 */
2549
void
2550
0
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2551
0
    switch(ctxt->instate) {
2552
0
  case XML_PARSER_CDATA_SECTION:
2553
0
      return;
2554
0
        case XML_PARSER_COMMENT:
2555
0
      return;
2556
0
  case XML_PARSER_START_TAG:
2557
0
      return;
2558
0
  case XML_PARSER_END_TAG:
2559
0
      return;
2560
0
        case XML_PARSER_EOF:
2561
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2562
0
      return;
2563
0
        case XML_PARSER_PROLOG:
2564
0
  case XML_PARSER_START:
2565
0
  case XML_PARSER_MISC:
2566
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2567
0
      return;
2568
0
  case XML_PARSER_ENTITY_DECL:
2569
0
        case XML_PARSER_CONTENT:
2570
0
        case XML_PARSER_ATTRIBUTE_VALUE:
2571
0
        case XML_PARSER_PI:
2572
0
  case XML_PARSER_SYSTEM_LITERAL:
2573
0
  case XML_PARSER_PUBLIC_LITERAL:
2574
      /* we just ignore it there */
2575
0
      return;
2576
0
        case XML_PARSER_EPILOG:
2577
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2578
0
      return;
2579
0
  case XML_PARSER_ENTITY_VALUE:
2580
      /*
2581
       * NOTE: in the case of entity values, we don't do the
2582
       *       substitution here since we need the literal
2583
       *       entity value to be able to save the internal
2584
       *       subset of the document.
2585
       *       This will be handled by xmlStringDecodeEntities
2586
       */
2587
0
      return;
2588
0
        case XML_PARSER_DTD:
2589
      /*
2590
       * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2591
       * In the internal DTD subset, parameter-entity references
2592
       * can occur only where markup declarations can occur, not
2593
       * within markup declarations.
2594
       * In that case this is handled in xmlParseMarkupDecl
2595
       */
2596
0
      if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2597
0
    return;
2598
0
      if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2599
0
    return;
2600
0
            break;
2601
0
        case XML_PARSER_IGNORE:
2602
0
            return;
2603
0
    }
2604
2605
0
    xmlParsePEReference(ctxt);
2606
0
}
2607
2608
/*
2609
 * Macro used to grow the current buffer.
2610
 * buffer##_size is expected to be a size_t
2611
 * mem_error: is expected to handle memory allocation failures
2612
 */
2613
32.0k
#define growBuffer(buffer, n) {           \
2614
32.0k
    xmlChar *tmp;             \
2615
32.0k
    size_t new_size = buffer##_size * 2 + n;                            \
2616
32.0k
    if (new_size < buffer##_size) goto mem_error;                       \
2617
32.0k
    tmp = (xmlChar *) xmlRealloc(buffer, new_size);                     \
2618
32.0k
    if (tmp == NULL) goto mem_error;         \
2619
32.0k
    buffer = tmp;             \
2620
32.0k
    buffer##_size = new_size;                                           \
2621
32.0k
}
2622
2623
/**
2624
 * xmlStringLenDecodeEntities:
2625
 * @ctxt:  the parser context
2626
 * @str:  the input string
2627
 * @len: the string length
2628
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2629
 * @end:  an end marker xmlChar, 0 if none
2630
 * @end2:  an end marker xmlChar, 0 if none
2631
 * @end3:  an end marker xmlChar, 0 if none
2632
 *
2633
 * Takes a entity string content and process to do the adequate substitutions.
2634
 *
2635
 * [67] Reference ::= EntityRef | CharRef
2636
 *
2637
 * [69] PEReference ::= '%' Name ';'
2638
 *
2639
 * Returns A newly allocated string with the substitution done. The caller
2640
 *      must deallocate it !
2641
 */
2642
xmlChar *
2643
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2644
12.7k
          int what, xmlChar end, xmlChar  end2, xmlChar end3) {
2645
12.7k
    xmlChar *buffer = NULL;
2646
12.7k
    size_t buffer_size = 0;
2647
12.7k
    size_t nbchars = 0;
2648
2649
12.7k
    xmlChar *current = NULL;
2650
12.7k
    xmlChar *rep = NULL;
2651
12.7k
    const xmlChar *last;
2652
12.7k
    xmlEntityPtr ent;
2653
12.7k
    int c,l;
2654
2655
12.7k
    if ((ctxt == NULL) || (str == NULL) || (len < 0))
2656
0
  return(NULL);
2657
12.7k
    last = str + len;
2658
2659
12.7k
    if (((ctxt->depth > 40) &&
2660
12.7k
         ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2661
12.7k
  (ctxt->depth > 1024)) {
2662
0
  xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2663
0
  return(NULL);
2664
0
    }
2665
2666
    /*
2667
     * allocate a translation buffer.
2668
     */
2669
12.7k
    buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2670
12.7k
    buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2671
12.7k
    if (buffer == NULL) goto mem_error;
2672
2673
    /*
2674
     * OK loop until we reach one of the ending char or a size limit.
2675
     * we are operating on already parsed values.
2676
     */
2677
12.7k
    if (str < last)
2678
11.9k
  c = CUR_SCHAR(str, l);
2679
790
    else
2680
790
        c = 0;
2681
35.1M
    while ((c != 0) && (c != end) && /* non input consuming loop */
2682
35.1M
           (c != end2) && (c != end3) &&
2683
35.1M
           (ctxt->instate != XML_PARSER_EOF)) {
2684
2685
35.1M
  if (c == 0) break;
2686
35.1M
        if ((c == '&') && (str[1] == '#')) {
2687
20.8k
      int val = xmlParseStringCharRef(ctxt, &str);
2688
20.8k
      if (val == 0)
2689
6.76k
                goto int_error;
2690
14.0k
      COPY_BUF(0,buffer,nbchars,val);
2691
14.0k
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2692
594
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2693
594
      }
2694
35.1M
  } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2695
0
      if (xmlParserDebugEntities)
2696
0
    xmlGenericError(xmlGenericErrorContext,
2697
0
      "String decoding Entity Reference: %.30s\n",
2698
0
      str);
2699
0
      ent = xmlParseStringEntityRef(ctxt, &str);
2700
0
      xmlParserEntityCheck(ctxt, 0, ent, 0);
2701
0
      if (ent != NULL)
2702
0
          ctxt->nbentities += ent->checked / 2;
2703
0
      if ((ent != NULL) &&
2704
0
    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2705
0
    if (ent->content != NULL) {
2706
0
        COPY_BUF(0,buffer,nbchars,ent->content[0]);
2707
0
        if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2708
0
      growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2709
0
        }
2710
0
    } else {
2711
0
        xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2712
0
          "predefined entity has no content\n");
2713
0
                    goto int_error;
2714
0
    }
2715
0
      } else if ((ent != NULL) && (ent->content != NULL)) {
2716
0
    ctxt->depth++;
2717
0
    rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2718
0
                            0, 0, 0);
2719
0
    ctxt->depth--;
2720
0
    if (rep == NULL) {
2721
0
                    ent->content[0] = 0;
2722
0
                    goto int_error;
2723
0
                }
2724
2725
0
                current = rep;
2726
0
                while (*current != 0) { /* non input consuming loop */
2727
0
                    buffer[nbchars++] = *current++;
2728
0
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2729
0
                        if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2730
0
                            goto int_error;
2731
0
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2732
0
                    }
2733
0
                }
2734
0
                xmlFree(rep);
2735
0
                rep = NULL;
2736
0
      } else if (ent != NULL) {
2737
0
    int i = xmlStrlen(ent->name);
2738
0
    const xmlChar *cur = ent->name;
2739
2740
0
    buffer[nbchars++] = '&';
2741
0
    if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2742
0
        growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2743
0
    }
2744
0
    for (;i > 0;i--)
2745
0
        buffer[nbchars++] = *cur++;
2746
0
    buffer[nbchars++] = ';';
2747
0
      }
2748
35.1M
  } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2749
0
      if (xmlParserDebugEntities)
2750
0
    xmlGenericError(xmlGenericErrorContext,
2751
0
      "String decoding PE Reference: %.30s\n", str);
2752
0
      ent = xmlParseStringPEReference(ctxt, &str);
2753
0
      xmlParserEntityCheck(ctxt, 0, ent, 0);
2754
0
      if (ent != NULL)
2755
0
          ctxt->nbentities += ent->checked / 2;
2756
0
      if (ent != NULL) {
2757
0
                if (ent->content == NULL) {
2758
        /*
2759
         * Note: external parsed entities will not be loaded,
2760
         * it is not required for a non-validating parser to
2761
         * complete external PEReferences coming from the
2762
         * internal subset
2763
         */
2764
0
        if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2765
0
      ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2766
0
      (ctxt->validate != 0)) {
2767
0
      xmlLoadEntityContent(ctxt, ent);
2768
0
        } else {
2769
0
      xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2770
0
      "not validating will not read content for PE entity %s\n",
2771
0
                          ent->name, NULL);
2772
0
        }
2773
0
    }
2774
0
    ctxt->depth++;
2775
0
    rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2776
0
                            0, 0, 0);
2777
0
    ctxt->depth--;
2778
0
    if (rep == NULL) {
2779
0
                    if (ent->content != NULL)
2780
0
                        ent->content[0] = 0;
2781
0
                    goto int_error;
2782
0
                }
2783
0
                current = rep;
2784
0
                while (*current != 0) { /* non input consuming loop */
2785
0
                    buffer[nbchars++] = *current++;
2786
0
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2787
0
                        if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2788
0
                            goto int_error;
2789
0
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2790
0
                    }
2791
0
                }
2792
0
                xmlFree(rep);
2793
0
                rep = NULL;
2794
0
      }
2795
35.1M
  } else {
2796
35.1M
      COPY_BUF(l,buffer,nbchars,c);
2797
35.1M
      str += l;
2798
35.1M
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2799
4.99k
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2800
4.99k
      }
2801
35.1M
  }
2802
35.1M
  if (str < last)
2803
35.1M
      c = CUR_SCHAR(str, l);
2804
5.21k
  else
2805
5.21k
      c = 0;
2806
35.1M
    }
2807
6.00k
    buffer[nbchars] = 0;
2808
6.00k
    return(buffer);
2809
2810
0
mem_error:
2811
0
    xmlErrMemory(ctxt, NULL);
2812
6.76k
int_error:
2813
6.76k
    if (rep != NULL)
2814
0
        xmlFree(rep);
2815
6.76k
    if (buffer != NULL)
2816
6.76k
        xmlFree(buffer);
2817
6.76k
    return(NULL);
2818
0
}
2819
2820
/**
2821
 * xmlStringDecodeEntities:
2822
 * @ctxt:  the parser context
2823
 * @str:  the input string
2824
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2825
 * @end:  an end marker xmlChar, 0 if none
2826
 * @end2:  an end marker xmlChar, 0 if none
2827
 * @end3:  an end marker xmlChar, 0 if none
2828
 *
2829
 * Takes a entity string content and process to do the adequate substitutions.
2830
 *
2831
 * [67] Reference ::= EntityRef | CharRef
2832
 *
2833
 * [69] PEReference ::= '%' Name ';'
2834
 *
2835
 * Returns A newly allocated string with the substitution done. The caller
2836
 *      must deallocate it !
2837
 */
2838
xmlChar *
2839
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2840
12.7k
            xmlChar end, xmlChar  end2, xmlChar end3) {
2841
12.7k
    if ((ctxt == NULL) || (str == NULL)) return(NULL);
2842
12.7k
    return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2843
12.7k
           end, end2, end3));
2844
12.7k
}
2845
2846
/************************************************************************
2847
 *                  *
2848
 *    Commodity functions, cleanup needed ?     *
2849
 *                  *
2850
 ************************************************************************/
2851
2852
/**
2853
 * areBlanks:
2854
 * @ctxt:  an XML parser context
2855
 * @str:  a xmlChar *
2856
 * @len:  the size of @str
2857
 * @blank_chars: we know the chars are blanks
2858
 *
2859
 * Is this a sequence of blank chars that one can ignore ?
2860
 *
2861
 * Returns 1 if ignorable 0 otherwise.
2862
 */
2863
2864
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2865
11.6k
                     int blank_chars) {
2866
11.6k
    int i, ret;
2867
11.6k
    xmlNodePtr lastChild;
2868
2869
    /*
2870
     * Don't spend time trying to differentiate them, the same callback is
2871
     * used !
2872
     */
2873
11.6k
    if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2874
0
  return(0);
2875
2876
    /*
2877
     * Check for xml:space value.
2878
     */
2879
11.6k
    if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2880
11.6k
        (*(ctxt->space) == -2))
2881
6.49k
  return(0);
2882
2883
    /*
2884
     * Check that the string is made of blanks
2885
     */
2886
5.13k
    if (blank_chars == 0) {
2887
10.3k
  for (i = 0;i < len;i++)
2888
8.68k
      if (!(IS_BLANK_CH(str[i]))) return(0);
2889
4.36k
    }
2890
2891
    /*
2892
     * Look if the element is mixed content in the DTD if available
2893
     */
2894
2.46k
    if (ctxt->node == NULL) return(0);
2895
0
    if (ctxt->myDoc != NULL) {
2896
0
  ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2897
0
        if (ret == 0) return(1);
2898
0
        if (ret == 1) return(0);
2899
0
    }
2900
2901
    /*
2902
     * Otherwise, heuristic :-\
2903
     */
2904
0
    if ((RAW != '<') && (RAW != 0xD)) return(0);
2905
0
    if ((ctxt->node->children == NULL) &&
2906
0
  (RAW == '<') && (NXT(1) == '/')) return(0);
2907
2908
0
    lastChild = xmlGetLastChild(ctxt->node);
2909
0
    if (lastChild == NULL) {
2910
0
        if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2911
0
            (ctxt->node->content != NULL)) return(0);
2912
0
    } else if (xmlNodeIsText(lastChild))
2913
0
        return(0);
2914
0
    else if ((ctxt->node->children != NULL) &&
2915
0
             (xmlNodeIsText(ctxt->node->children)))
2916
0
        return(0);
2917
0
    return(1);
2918
0
}
2919
2920
/************************************************************************
2921
 *                  *
2922
 *    Extra stuff for namespace support     *
2923
 *  Relates to http://www.w3.org/TR/WD-xml-names      *
2924
 *                  *
2925
 ************************************************************************/
2926
2927
/**
2928
 * xmlSplitQName:
2929
 * @ctxt:  an XML parser context
2930
 * @name:  an XML parser context
2931
 * @prefix:  a xmlChar **
2932
 *
2933
 * parse an UTF8 encoded XML qualified name string
2934
 *
2935
 * [NS 5] QName ::= (Prefix ':')? LocalPart
2936
 *
2937
 * [NS 6] Prefix ::= NCName
2938
 *
2939
 * [NS 7] LocalPart ::= NCName
2940
 *
2941
 * Returns the local part, and prefix is updated
2942
 *   to get the Prefix if any.
2943
 */
2944
2945
xmlChar *
2946
0
xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2947
0
    xmlChar buf[XML_MAX_NAMELEN + 5];
2948
0
    xmlChar *buffer = NULL;
2949
0
    int len = 0;
2950
0
    int max = XML_MAX_NAMELEN;
2951
0
    xmlChar *ret = NULL;
2952
0
    const xmlChar *cur = name;
2953
0
    int c;
2954
2955
0
    if (prefix == NULL) return(NULL);
2956
0
    *prefix = NULL;
2957
2958
0
    if (cur == NULL) return(NULL);
2959
2960
#ifndef XML_XML_NAMESPACE
2961
    /* xml: prefix is not really a namespace */
2962
    if ((cur[0] == 'x') && (cur[1] == 'm') &&
2963
        (cur[2] == 'l') && (cur[3] == ':'))
2964
  return(xmlStrdup(name));
2965
#endif
2966
2967
    /* nasty but well=formed */
2968
0
    if (cur[0] == ':')
2969
0
  return(xmlStrdup(name));
2970
2971
0
    c = *cur++;
2972
0
    while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2973
0
  buf[len++] = c;
2974
0
  c = *cur++;
2975
0
    }
2976
0
    if (len >= max) {
2977
  /*
2978
   * Okay someone managed to make a huge name, so he's ready to pay
2979
   * for the processing speed.
2980
   */
2981
0
  max = len * 2;
2982
2983
0
  buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2984
0
  if (buffer == NULL) {
2985
0
      xmlErrMemory(ctxt, NULL);
2986
0
      return(NULL);
2987
0
  }
2988
0
  memcpy(buffer, buf, len);
2989
0
  while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2990
0
      if (len + 10 > max) {
2991
0
          xmlChar *tmp;
2992
2993
0
    max *= 2;
2994
0
    tmp = (xmlChar *) xmlRealloc(buffer,
2995
0
            max * sizeof(xmlChar));
2996
0
    if (tmp == NULL) {
2997
0
        xmlFree(buffer);
2998
0
        xmlErrMemory(ctxt, NULL);
2999
0
        return(NULL);
3000
0
    }
3001
0
    buffer = tmp;
3002
0
      }
3003
0
      buffer[len++] = c;
3004
0
      c = *cur++;
3005
0
  }
3006
0
  buffer[len] = 0;
3007
0
    }
3008
3009
0
    if ((c == ':') && (*cur == 0)) {
3010
0
        if (buffer != NULL)
3011
0
      xmlFree(buffer);
3012
0
  *prefix = NULL;
3013
0
  return(xmlStrdup(name));
3014
0
    }
3015
3016
0
    if (buffer == NULL)
3017
0
  ret = xmlStrndup(buf, len);
3018
0
    else {
3019
0
  ret = buffer;
3020
0
  buffer = NULL;
3021
0
  max = XML_MAX_NAMELEN;
3022
0
    }
3023
3024
3025
0
    if (c == ':') {
3026
0
  c = *cur;
3027
0
        *prefix = ret;
3028
0
  if (c == 0) {
3029
0
      return(xmlStrndup(BAD_CAST "", 0));
3030
0
  }
3031
0
  len = 0;
3032
3033
  /*
3034
   * Check that the first character is proper to start
3035
   * a new name
3036
   */
3037
0
  if (!(((c >= 0x61) && (c <= 0x7A)) ||
3038
0
        ((c >= 0x41) && (c <= 0x5A)) ||
3039
0
        (c == '_') || (c == ':'))) {
3040
0
      int l;
3041
0
      int first = CUR_SCHAR(cur, l);
3042
3043
0
      if (!IS_LETTER(first) && (first != '_')) {
3044
0
    xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3045
0
          "Name %s is not XML Namespace compliant\n",
3046
0
          name);
3047
0
      }
3048
0
  }
3049
0
  cur++;
3050
3051
0
  while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3052
0
      buf[len++] = c;
3053
0
      c = *cur++;
3054
0
  }
3055
0
  if (len >= max) {
3056
      /*
3057
       * Okay someone managed to make a huge name, so he's ready to pay
3058
       * for the processing speed.
3059
       */
3060
0
      max = len * 2;
3061
3062
0
      buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3063
0
      if (buffer == NULL) {
3064
0
          xmlErrMemory(ctxt, NULL);
3065
0
    return(NULL);
3066
0
      }
3067
0
      memcpy(buffer, buf, len);
3068
0
      while (c != 0) { /* tested bigname2.xml */
3069
0
    if (len + 10 > max) {
3070
0
        xmlChar *tmp;
3071
3072
0
        max *= 2;
3073
0
        tmp = (xmlChar *) xmlRealloc(buffer,
3074
0
                max * sizeof(xmlChar));
3075
0
        if (tmp == NULL) {
3076
0
      xmlErrMemory(ctxt, NULL);
3077
0
      xmlFree(buffer);
3078
0
      return(NULL);
3079
0
        }
3080
0
        buffer = tmp;
3081
0
    }
3082
0
    buffer[len++] = c;
3083
0
    c = *cur++;
3084
0
      }
3085
0
      buffer[len] = 0;
3086
0
  }
3087
3088
0
  if (buffer == NULL)
3089
0
      ret = xmlStrndup(buf, len);
3090
0
  else {
3091
0
      ret = buffer;
3092
0
  }
3093
0
    }
3094
3095
0
    return(ret);
3096
0
}
3097
3098
/************************************************************************
3099
 *                  *
3100
 *      The parser itself       *
3101
 *  Relates to http://www.w3.org/TR/REC-xml       *
3102
 *                  *
3103
 ************************************************************************/
3104
3105
/************************************************************************
3106
 *                  *
3107
 *  Routines to parse Name, NCName and NmToken      *
3108
 *                  *
3109
 ************************************************************************/
3110
#ifdef DEBUG
3111
static unsigned long nbParseName = 0;
3112
static unsigned long nbParseNmToken = 0;
3113
static unsigned long nbParseNCName = 0;
3114
static unsigned long nbParseNCNameComplex = 0;
3115
static unsigned long nbParseNameComplex = 0;
3116
static unsigned long nbParseStringName = 0;
3117
#endif
3118
3119
/*
3120
 * The two following functions are related to the change of accepted
3121
 * characters for Name and NmToken in the Revision 5 of XML-1.0
3122
 * They correspond to the modified production [4] and the new production [4a]
3123
 * changes in that revision. Also note that the macros used for the
3124
 * productions Letter, Digit, CombiningChar and Extender are not needed
3125
 * anymore.
3126
 * We still keep compatibility to pre-revision5 parsing semantic if the
3127
 * new XML_PARSE_OLD10 option is given to the parser.
3128
 */
3129
static int
3130
153k
xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3131
153k
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3132
        /*
3133
   * Use the new checks of production [4] [4a] amd [5] of the
3134
   * Update 5 of XML-1.0
3135
   */
3136
153k
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3137
153k
      (((c >= 'a') && (c <= 'z')) ||
3138
152k
       ((c >= 'A') && (c <= 'Z')) ||
3139
152k
       (c == '_') || (c == ':') ||
3140
152k
       ((c >= 0xC0) && (c <= 0xD6)) ||
3141
152k
       ((c >= 0xD8) && (c <= 0xF6)) ||
3142
152k
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3143
152k
       ((c >= 0x370) && (c <= 0x37D)) ||
3144
152k
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3145
152k
       ((c >= 0x200C) && (c <= 0x200D)) ||
3146
152k
       ((c >= 0x2070) && (c <= 0x218F)) ||
3147
152k
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3148
152k
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3149
152k
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3150
152k
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3151
152k
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3152
62.9k
      return(1);
3153
153k
    } else {
3154
0
        if (IS_LETTER(c) || (c == '_') || (c == ':'))
3155
0
      return(1);
3156
0
    }
3157
90.0k
    return(0);
3158
153k
}
3159
3160
static int
3161
24.1M
xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3162
24.1M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3163
        /*
3164
   * Use the new checks of production [4] [4a] amd [5] of the
3165
   * Update 5 of XML-1.0
3166
   */
3167
24.1M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3168
24.1M
      (((c >= 'a') && (c <= 'z')) ||
3169
24.1M
       ((c >= 'A') && (c <= 'Z')) ||
3170
24.1M
       ((c >= '0') && (c <= '9')) || /* !start */
3171
24.1M
       (c == '_') || (c == ':') ||
3172
24.1M
       (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3173
24.1M
       ((c >= 0xC0) && (c <= 0xD6)) ||
3174
24.1M
       ((c >= 0xD8) && (c <= 0xF6)) ||
3175
24.1M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3176
24.1M
       ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3177
24.1M
       ((c >= 0x370) && (c <= 0x37D)) ||
3178
24.1M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3179
24.1M
       ((c >= 0x200C) && (c <= 0x200D)) ||
3180
24.1M
       ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3181
24.1M
       ((c >= 0x2070) && (c <= 0x218F)) ||
3182
24.1M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3183
24.1M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3184
24.1M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3185
24.1M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3186
24.1M
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3187
24.0M
       return(1);
3188
24.1M
    } else {
3189
0
        if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3190
0
            (c == '.') || (c == '-') ||
3191
0
      (c == '_') || (c == ':') ||
3192
0
      (IS_COMBINING(c)) ||
3193
0
      (IS_EXTENDER(c)))
3194
0
      return(1);
3195
0
    }
3196
53.0k
    return(0);
3197
24.1M
}
3198
3199
static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3200
                                          int *len, int *alloc, int normalize);
3201
3202
static const xmlChar *
3203
250k
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3204
250k
    int len = 0, l;
3205
250k
    int c;
3206
250k
    int count = 0;
3207
3208
#ifdef DEBUG
3209
    nbParseNameComplex++;
3210
#endif
3211
3212
    /*
3213
     * Handler for more complex cases
3214
     */
3215
250k
    GROW;
3216
250k
    if (ctxt->instate == XML_PARSER_EOF)
3217
0
        return(NULL);
3218
250k
    c = CUR_CHAR(l);
3219
250k
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3220
        /*
3221
   * Use the new checks of production [4] [4a] amd [5] of the
3222
   * Update 5 of XML-1.0
3223
   */
3224
250k
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3225
250k
      (!(((c >= 'a') && (c <= 'z')) ||
3226
242k
         ((c >= 'A') && (c <= 'Z')) ||
3227
242k
         (c == '_') || (c == ':') ||
3228
242k
         ((c >= 0xC0) && (c <= 0xD6)) ||
3229
242k
         ((c >= 0xD8) && (c <= 0xF6)) ||
3230
242k
         ((c >= 0xF8) && (c <= 0x2FF)) ||
3231
242k
         ((c >= 0x370) && (c <= 0x37D)) ||
3232
242k
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
3233
242k
         ((c >= 0x200C) && (c <= 0x200D)) ||
3234
242k
         ((c >= 0x2070) && (c <= 0x218F)) ||
3235
242k
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3236
242k
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
3237
242k
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
3238
242k
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3239
242k
         ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3240
184k
      return(NULL);
3241
184k
  }
3242
66.8k
  len += l;
3243
66.8k
  NEXTL(l);
3244
66.8k
  c = CUR_CHAR(l);
3245
20.6M
  while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3246
20.6M
         (((c >= 'a') && (c <= 'z')) ||
3247
20.6M
          ((c >= 'A') && (c <= 'Z')) ||
3248
20.6M
          ((c >= '0') && (c <= '9')) || /* !start */
3249
20.6M
          (c == '_') || (c == ':') ||
3250
20.6M
          (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3251
20.6M
          ((c >= 0xC0) && (c <= 0xD6)) ||
3252
20.6M
          ((c >= 0xD8) && (c <= 0xF6)) ||
3253
20.6M
          ((c >= 0xF8) && (c <= 0x2FF)) ||
3254
20.6M
          ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3255
20.6M
          ((c >= 0x370) && (c <= 0x37D)) ||
3256
20.6M
          ((c >= 0x37F) && (c <= 0x1FFF)) ||
3257
20.6M
          ((c >= 0x200C) && (c <= 0x200D)) ||
3258
20.6M
          ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3259
20.6M
          ((c >= 0x2070) && (c <= 0x218F)) ||
3260
20.6M
          ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3261
20.6M
          ((c >= 0x3001) && (c <= 0xD7FF)) ||
3262
20.6M
          ((c >= 0xF900) && (c <= 0xFDCF)) ||
3263
20.6M
          ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3264
20.6M
          ((c >= 0x10000) && (c <= 0xEFFFF))
3265
20.6M
    )) {
3266
20.5M
      if (count++ > XML_PARSER_CHUNK_SIZE) {
3267
190k
    count = 0;
3268
190k
    GROW;
3269
190k
                if (ctxt->instate == XML_PARSER_EOF)
3270
1
                    return(NULL);
3271
190k
      }
3272
20.5M
      len += l;
3273
20.5M
      NEXTL(l);
3274
20.5M
      c = CUR_CHAR(l);
3275
20.5M
  }
3276
66.8k
    } else {
3277
0
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3278
0
      (!IS_LETTER(c) && (c != '_') &&
3279
0
       (c != ':'))) {
3280
0
      return(NULL);
3281
0
  }
3282
0
  len += l;
3283
0
  NEXTL(l);
3284
0
  c = CUR_CHAR(l);
3285
3286
0
  while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3287
0
         ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3288
0
    (c == '.') || (c == '-') ||
3289
0
    (c == '_') || (c == ':') ||
3290
0
    (IS_COMBINING(c)) ||
3291
0
    (IS_EXTENDER(c)))) {
3292
0
      if (count++ > XML_PARSER_CHUNK_SIZE) {
3293
0
    count = 0;
3294
0
    GROW;
3295
0
                if (ctxt->instate == XML_PARSER_EOF)
3296
0
                    return(NULL);
3297
0
      }
3298
0
      len += l;
3299
0
      NEXTL(l);
3300
0
      c = CUR_CHAR(l);
3301
0
  }
3302
0
    }
3303
66.8k
    if ((len > XML_MAX_NAME_LENGTH) &&
3304
66.8k
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3305
477
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3306
477
        return(NULL);
3307
477
    }
3308
66.3k
    if (ctxt->input->cur - ctxt->input->base < len) {
3309
        /*
3310
         * There were a couple of bugs where PERefs lead to to a change
3311
         * of the buffer. Check the buffer size to avoid passing an invalid
3312
         * pointer to xmlDictLookup.
3313
         */
3314
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3315
0
                    "unexpected change of input buffer");
3316
0
        return (NULL);
3317
0
    }
3318
66.3k
    if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3319
434
        return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3320
65.8k
    return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3321
66.3k
}
3322
3323
/**
3324
 * xmlParseName:
3325
 * @ctxt:  an XML parser context
3326
 *
3327
 * parse an XML name.
3328
 *
3329
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3330
 *                  CombiningChar | Extender
3331
 *
3332
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3333
 *
3334
 * [6] Names ::= Name (#x20 Name)*
3335
 *
3336
 * Returns the Name parsed or NULL
3337
 */
3338
3339
const xmlChar *
3340
634k
xmlParseName(xmlParserCtxtPtr ctxt) {
3341
634k
    const xmlChar *in;
3342
634k
    const xmlChar *ret;
3343
634k
    int count = 0;
3344
3345
634k
    GROW;
3346
3347
#ifdef DEBUG
3348
    nbParseName++;
3349
#endif
3350
3351
    /*
3352
     * Accelerator for simple ASCII names
3353
     */
3354
634k
    in = ctxt->input->cur;
3355
634k
    if (((*in >= 0x61) && (*in <= 0x7A)) ||
3356
634k
  ((*in >= 0x41) && (*in <= 0x5A)) ||
3357
634k
  (*in == '_') || (*in == ':')) {
3358
411k
  in++;
3359
2.15M
  while (((*in >= 0x61) && (*in <= 0x7A)) ||
3360
2.15M
         ((*in >= 0x41) && (*in <= 0x5A)) ||
3361
2.15M
         ((*in >= 0x30) && (*in <= 0x39)) ||
3362
2.15M
         (*in == '_') || (*in == '-') ||
3363
2.15M
         (*in == ':') || (*in == '.'))
3364
1.74M
      in++;
3365
411k
  if ((*in > 0) && (*in < 0x80)) {
3366
383k
      count = in - ctxt->input->cur;
3367
383k
            if ((count > XML_MAX_NAME_LENGTH) &&
3368
383k
                ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3369
13
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3370
13
                return(NULL);
3371
13
            }
3372
383k
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3373
383k
      ctxt->input->cur = in;
3374
383k
      ctxt->input->col += count;
3375
383k
      if (ret == NULL)
3376
0
          xmlErrMemory(ctxt, NULL);
3377
383k
      return(ret);
3378
383k
  }
3379
411k
    }
3380
    /* accelerator for special cases */
3381
250k
    return(xmlParseNameComplex(ctxt));
3382
634k
}
3383
3384
static const xmlChar *
3385
165k
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3386
165k
    int len = 0, l;
3387
165k
    int c;
3388
165k
    int count = 0;
3389
165k
    size_t startPosition = 0;
3390
3391
#ifdef DEBUG
3392
    nbParseNCNameComplex++;
3393
#endif
3394
3395
    /*
3396
     * Handler for more complex cases
3397
     */
3398
165k
    GROW;
3399
165k
    startPosition = CUR_PTR - BASE_PTR;
3400
165k
    c = CUR_CHAR(l);
3401
165k
    if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3402
165k
  (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3403
125k
  return(NULL);
3404
125k
    }
3405
3406
10.1M
    while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3407
10.1M
     (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3408
10.1M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
3409
95.5k
            if ((len > XML_MAX_NAME_LENGTH) &&
3410
95.5k
                ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3411
160
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3412
160
                return(NULL);
3413
160
            }
3414
95.3k
      count = 0;
3415
95.3k
      GROW;
3416
95.3k
            if (ctxt->instate == XML_PARSER_EOF)
3417
5
                return(NULL);
3418
95.3k
  }
3419
10.1M
  len += l;
3420
10.1M
  NEXTL(l);
3421
10.1M
  c = CUR_CHAR(l);
3422
10.1M
  if (c == 0) {
3423
1.21k
      count = 0;
3424
      /*
3425
       * when shrinking to extend the buffer we really need to preserve
3426
       * the part of the name we already parsed. Hence rolling back
3427
       * by current length.
3428
       */
3429
1.21k
      ctxt->input->cur -= l;
3430
1.21k
      GROW;
3431
1.21k
            if (ctxt->instate == XML_PARSER_EOF)
3432
4
                return(NULL);
3433
1.21k
      ctxt->input->cur += l;
3434
1.21k
      c = CUR_CHAR(l);
3435
1.21k
  }
3436
10.1M
    }
3437
39.6k
    if ((len > XML_MAX_NAME_LENGTH) &&
3438
39.6k
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3439
75
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3440
75
        return(NULL);
3441
75
    }
3442
39.5k
    return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
3443
39.6k
}
3444
3445
/**
3446
 * xmlParseNCName:
3447
 * @ctxt:  an XML parser context
3448
 * @len:  length of the string parsed
3449
 *
3450
 * parse an XML name.
3451
 *
3452
 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3453
 *                      CombiningChar | Extender
3454
 *
3455
 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3456
 *
3457
 * Returns the Name parsed or NULL
3458
 */
3459
3460
static const xmlChar *
3461
855k
xmlParseNCName(xmlParserCtxtPtr ctxt) {
3462
855k
    const xmlChar *in, *e;
3463
855k
    const xmlChar *ret;
3464
855k
    int count = 0;
3465
3466
#ifdef DEBUG
3467
    nbParseNCName++;
3468
#endif
3469
3470
    /*
3471
     * Accelerator for simple ASCII names
3472
     */
3473
855k
    in = ctxt->input->cur;
3474
855k
    e = ctxt->input->end;
3475
855k
    if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3476
855k
   ((*in >= 0x41) && (*in <= 0x5A)) ||
3477
855k
   (*in == '_')) && (in < e)) {
3478
700k
  in++;
3479
2.54M
  while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3480
2.54M
          ((*in >= 0x41) && (*in <= 0x5A)) ||
3481
2.54M
          ((*in >= 0x30) && (*in <= 0x39)) ||
3482
2.54M
          (*in == '_') || (*in == '-') ||
3483
2.54M
          (*in == '.')) && (in < e))
3484
1.84M
      in++;
3485
700k
  if (in >= e)
3486
10
      goto complex;
3487
700k
  if ((*in > 0) && (*in < 0x80)) {
3488
689k
      count = in - ctxt->input->cur;
3489
689k
            if ((count > XML_MAX_NAME_LENGTH) &&
3490
689k
                ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3491
10
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3492
10
                return(NULL);
3493
10
            }
3494
689k
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3495
689k
      ctxt->input->cur = in;
3496
689k
      ctxt->input->col += count;
3497
689k
      if (ret == NULL) {
3498
0
          xmlErrMemory(ctxt, NULL);
3499
0
      }
3500
689k
      return(ret);
3501
689k
  }
3502
700k
    }
3503
165k
complex:
3504
165k
    return(xmlParseNCNameComplex(ctxt));
3505
855k
}
3506
3507
/**
3508
 * xmlParseNameAndCompare:
3509
 * @ctxt:  an XML parser context
3510
 *
3511
 * parse an XML name and compares for match
3512
 * (specialized for endtag parsing)
3513
 *
3514
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3515
 * and the name for mismatch
3516
 */
3517
3518
static const xmlChar *
3519
6.91k
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3520
6.91k
    register const xmlChar *cmp = other;
3521
6.91k
    register const xmlChar *in;
3522
6.91k
    const xmlChar *ret;
3523
3524
6.91k
    GROW;
3525
6.91k
    if (ctxt->instate == XML_PARSER_EOF)
3526
0
        return(NULL);
3527
3528
6.91k
    in = ctxt->input->cur;
3529
16.7k
    while (*in != 0 && *in == *cmp) {
3530
9.83k
  ++in;
3531
9.83k
  ++cmp;
3532
9.83k
    }
3533
6.91k
    if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3534
  /* success */
3535
1.74k
  ctxt->input->col += in - ctxt->input->cur;
3536
1.74k
  ctxt->input->cur = in;
3537
1.74k
  return (const xmlChar*) 1;
3538
1.74k
    }
3539
    /* failure (or end of input buffer), check with full function */
3540
5.17k
    ret = xmlParseName (ctxt);
3541
    /* strings coming from the dictionary direct compare possible */
3542
5.17k
    if (ret == other) {
3543
816
  return (const xmlChar*) 1;
3544
816
    }
3545
4.35k
    return ret;
3546
5.17k
}
3547
3548
/**
3549
 * xmlParseStringName:
3550
 * @ctxt:  an XML parser context
3551
 * @str:  a pointer to the string pointer (IN/OUT)
3552
 *
3553
 * parse an XML name.
3554
 *
3555
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3556
 *                  CombiningChar | Extender
3557
 *
3558
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3559
 *
3560
 * [6] Names ::= Name (#x20 Name)*
3561
 *
3562
 * Returns the Name parsed or NULL. The @str pointer
3563
 * is updated to the current location in the string.
3564
 */
3565
3566
static xmlChar *
3567
10.0k
xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3568
10.0k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3569
10.0k
    const xmlChar *cur = *str;
3570
10.0k
    int len = 0, l;
3571
10.0k
    int c;
3572
3573
#ifdef DEBUG
3574
    nbParseStringName++;
3575
#endif
3576
3577
10.0k
    c = CUR_SCHAR(cur, l);
3578
10.0k
    if (!xmlIsNameStartChar(ctxt, c)) {
3579
2.38k
  return(NULL);
3580
2.38k
    }
3581
3582
7.63k
    COPY_BUF(l,buf,len,c);
3583
7.63k
    cur += l;
3584
7.63k
    c = CUR_SCHAR(cur, l);
3585
146k
    while (xmlIsNameChar(ctxt, c)) {
3586
142k
  COPY_BUF(l,buf,len,c);
3587
142k
  cur += l;
3588
142k
  c = CUR_SCHAR(cur, l);
3589
142k
  if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3590
      /*
3591
       * Okay someone managed to make a huge name, so he's ready to pay
3592
       * for the processing speed.
3593
       */
3594
3.18k
      xmlChar *buffer;
3595
3.18k
      int max = len * 2;
3596
3597
3.18k
      buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3598
3.18k
      if (buffer == NULL) {
3599
0
          xmlErrMemory(ctxt, NULL);
3600
0
    return(NULL);
3601
0
      }
3602
3.18k
      memcpy(buffer, buf, len);
3603
9.99M
      while (xmlIsNameChar(ctxt, c)) {
3604
9.98M
    if (len + 10 > max) {
3605
10.5k
        xmlChar *tmp;
3606
3607
10.5k
                    if ((len > XML_MAX_NAME_LENGTH) &&
3608
10.5k
                        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3609
370
                        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3610
370
      xmlFree(buffer);
3611
370
                        return(NULL);
3612
370
                    }
3613
10.1k
        max *= 2;
3614
10.1k
        tmp = (xmlChar *) xmlRealloc(buffer,
3615
10.1k
                                  max * sizeof(xmlChar));
3616
10.1k
        if (tmp == NULL) {
3617
0
      xmlErrMemory(ctxt, NULL);
3618
0
      xmlFree(buffer);
3619
0
      return(NULL);
3620
0
        }
3621
10.1k
        buffer = tmp;
3622
10.1k
    }
3623
9.98M
    COPY_BUF(l,buffer,len,c);
3624
9.98M
    cur += l;
3625
9.98M
    c = CUR_SCHAR(cur, l);
3626
9.98M
      }
3627
2.81k
      buffer[len] = 0;
3628
2.81k
      *str = cur;
3629
2.81k
      return(buffer);
3630
3.18k
  }
3631
142k
    }
3632
4.44k
    if ((len > XML_MAX_NAME_LENGTH) &&
3633
4.44k
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3634
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3635
0
        return(NULL);
3636
0
    }
3637
4.44k
    *str = cur;
3638
4.44k
    return(xmlStrndup(buf, len));
3639
4.44k
}
3640
3641
/**
3642
 * xmlParseNmtoken:
3643
 * @ctxt:  an XML parser context
3644
 *
3645
 * parse an XML Nmtoken.
3646
 *
3647
 * [7] Nmtoken ::= (NameChar)+
3648
 *
3649
 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3650
 *
3651
 * Returns the Nmtoken parsed or NULL
3652
 */
3653
3654
xmlChar *
3655
25.0k
xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3656
25.0k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3657
25.0k
    int len = 0, l;
3658
25.0k
    int c;
3659
25.0k
    int count = 0;
3660
3661
#ifdef DEBUG
3662
    nbParseNmToken++;
3663
#endif
3664
3665
25.0k
    GROW;
3666
25.0k
    if (ctxt->instate == XML_PARSER_EOF)
3667
0
        return(NULL);
3668
25.0k
    c = CUR_CHAR(l);
3669
3670
142k
    while (xmlIsNameChar(ctxt, c)) {
3671
119k
  if (count++ > XML_PARSER_CHUNK_SIZE) {
3672
0
      count = 0;
3673
0
      GROW;
3674
0
  }
3675
119k
  COPY_BUF(l,buf,len,c);
3676
119k
  NEXTL(l);
3677
119k
  c = CUR_CHAR(l);
3678
119k
  if (c == 0) {
3679
70
      count = 0;
3680
70
      GROW;
3681
70
      if (ctxt->instate == XML_PARSER_EOF)
3682
0
    return(NULL);
3683
70
            c = CUR_CHAR(l);
3684
70
  }
3685
119k
  if (len >= XML_MAX_NAMELEN) {
3686
      /*
3687
       * Okay someone managed to make a huge token, so he's ready to pay
3688
       * for the processing speed.
3689
       */
3690
2.09k
      xmlChar *buffer;
3691
2.09k
      int max = len * 2;
3692
3693
2.09k
      buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3694
2.09k
      if (buffer == NULL) {
3695
0
          xmlErrMemory(ctxt, NULL);
3696
0
    return(NULL);
3697
0
      }
3698
2.09k
      memcpy(buffer, buf, len);
3699
3.69M
      while (xmlIsNameChar(ctxt, c)) {
3700
3.68M
    if (count++ > XML_PARSER_CHUNK_SIZE) {
3701
36.1k
        count = 0;
3702
36.1k
        GROW;
3703
36.1k
                    if (ctxt->instate == XML_PARSER_EOF) {
3704
0
                        xmlFree(buffer);
3705
0
                        return(NULL);
3706
0
                    }
3707
36.1k
    }
3708
3.68M
    if (len + 10 > max) {
3709
4.25k
        xmlChar *tmp;
3710
3711
4.25k
                    if ((max > XML_MAX_NAME_LENGTH) &&
3712
4.25k
                        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3713
131
                        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3714
131
                        xmlFree(buffer);
3715
131
                        return(NULL);
3716
131
                    }
3717
4.12k
        max *= 2;
3718
4.12k
        tmp = (xmlChar *) xmlRealloc(buffer,
3719
4.12k
                                  max * sizeof(xmlChar));
3720
4.12k
        if (tmp == NULL) {
3721
0
      xmlErrMemory(ctxt, NULL);
3722
0
      xmlFree(buffer);
3723
0
      return(NULL);
3724
0
        }
3725
4.12k
        buffer = tmp;
3726
4.12k
    }
3727
3.68M
    COPY_BUF(l,buffer,len,c);
3728
3.68M
    NEXTL(l);
3729
3.68M
    c = CUR_CHAR(l);
3730
3.68M
      }
3731
1.96k
      buffer[len] = 0;
3732
1.96k
      return(buffer);
3733
2.09k
  }
3734
119k
    }
3735
22.9k
    if (len == 0)
3736
14.8k
        return(NULL);
3737
8.16k
    if ((len > XML_MAX_NAME_LENGTH) &&
3738
8.16k
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3739
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3740
0
        return(NULL);
3741
0
    }
3742
8.16k
    return(xmlStrndup(buf, len));
3743
8.16k
}
3744
3745
/**
3746
 * xmlParseEntityValue:
3747
 * @ctxt:  an XML parser context
3748
 * @orig:  if non-NULL store a copy of the original entity value
3749
 *
3750
 * parse a value for ENTITY declarations
3751
 *
3752
 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3753
 *                 "'" ([^%&'] | PEReference | Reference)* "'"
3754
 *
3755
 * Returns the EntityValue parsed with reference substituted or NULL
3756
 */
3757
3758
xmlChar *
3759
20.2k
xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3760
20.2k
    xmlChar *buf = NULL;
3761
20.2k
    int len = 0;
3762
20.2k
    int size = XML_PARSER_BUFFER_SIZE;
3763
20.2k
    int c, l;
3764
20.2k
    xmlChar stop;
3765
20.2k
    xmlChar *ret = NULL;
3766
20.2k
    const xmlChar *cur = NULL;
3767
20.2k
    xmlParserInputPtr input;
3768
3769
20.2k
    if (RAW == '"') stop = '"';
3770
6.41k
    else if (RAW == '\'') stop = '\'';
3771
0
    else {
3772
0
  xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3773
0
  return(NULL);
3774
0
    }
3775
20.2k
    buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3776
20.2k
    if (buf == NULL) {
3777
0
  xmlErrMemory(ctxt, NULL);
3778
0
  return(NULL);
3779
0
    }
3780
3781
    /*
3782
     * The content of the entity definition is copied in a buffer.
3783
     */
3784
3785
20.2k
    ctxt->instate = XML_PARSER_ENTITY_VALUE;
3786
20.2k
    input = ctxt->input;
3787
20.2k
    GROW;
3788
20.2k
    if (ctxt->instate == XML_PARSER_EOF)
3789
0
        goto error;
3790
20.2k
    NEXT;
3791
20.2k
    c = CUR_CHAR(l);
3792
    /*
3793
     * NOTE: 4.4.5 Included in Literal
3794
     * When a parameter entity reference appears in a literal entity
3795
     * value, ... a single or double quote character in the replacement
3796
     * text is always treated as a normal data character and will not
3797
     * terminate the literal.
3798
     * In practice it means we stop the loop only when back at parsing
3799
     * the initial entity and the quote is found
3800
     */
3801
65.0M
    while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3802
65.0M
      (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3803
65.0M
  if (len + 5 >= size) {
3804
22.0k
      xmlChar *tmp;
3805
3806
22.0k
      size *= 2;
3807
22.0k
      tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3808
22.0k
      if (tmp == NULL) {
3809
0
    xmlErrMemory(ctxt, NULL);
3810
0
                goto error;
3811
0
      }
3812
22.0k
      buf = tmp;
3813
22.0k
  }
3814
65.0M
  COPY_BUF(l,buf,len,c);
3815
65.0M
  NEXTL(l);
3816
3817
65.0M
  GROW;
3818
65.0M
  c = CUR_CHAR(l);
3819
65.0M
  if (c == 0) {
3820
167
      GROW;
3821
167
      c = CUR_CHAR(l);
3822
167
  }
3823
65.0M
    }
3824
20.2k
    buf[len] = 0;
3825
20.2k
    if (ctxt->instate == XML_PARSER_EOF)
3826
1
        goto error;
3827
20.2k
    if (c != stop) {
3828
217
        xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3829
217
        goto error;
3830
217
    }
3831
20.0k
    NEXT;
3832
3833
    /*
3834
     * Raise problem w.r.t. '&' and '%' being used in non-entities
3835
     * reference constructs. Note Charref will be handled in
3836
     * xmlStringDecodeEntities()
3837
     */
3838
20.0k
    cur = buf;
3839
126M
    while (*cur != 0) { /* non input consuming */
3840
126M
  if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3841
10.0k
      xmlChar *name;
3842
10.0k
      xmlChar tmp = *cur;
3843
10.0k
            int nameOk = 0;
3844
3845
10.0k
      cur++;
3846
10.0k
      name = xmlParseStringName(ctxt, &cur);
3847
10.0k
            if (name != NULL) {
3848
7.26k
                nameOk = 1;
3849
7.26k
                xmlFree(name);
3850
7.26k
            }
3851
10.0k
            if ((nameOk == 0) || (*cur != ';')) {
3852
6.62k
    xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3853
6.62k
      "EntityValue: '%c' forbidden except for entities references\n",
3854
6.62k
                            tmp);
3855
6.62k
                goto error;
3856
6.62k
      }
3857
3.39k
      if ((tmp == '%') && (ctxt->inSubset == 1) &&
3858
3.39k
    (ctxt->inputNr == 1)) {
3859
632
    xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3860
632
                goto error;
3861
632
      }
3862
2.76k
      if (*cur == 0)
3863
0
          break;
3864
2.76k
  }
3865
126M
  cur++;
3866
126M
    }
3867
3868
    /*
3869
     * Then PEReference entities are substituted.
3870
     *
3871
     * NOTE: 4.4.7 Bypassed
3872
     * When a general entity reference appears in the EntityValue in
3873
     * an entity declaration, it is bypassed and left as is.
3874
     * so XML_SUBSTITUTE_REF is not set here.
3875
     */
3876
12.7k
    ++ctxt->depth;
3877
12.7k
    ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3878
12.7k
                                  0, 0, 0);
3879
12.7k
    --ctxt->depth;
3880
12.7k
    if (orig != NULL) {
3881
12.7k
        *orig = buf;
3882
12.7k
        buf = NULL;
3883
12.7k
    }
3884
3885
20.2k
error:
3886
20.2k
    if (buf != NULL)
3887
7.47k
        xmlFree(buf);
3888
20.2k
    return(ret);
3889
12.7k
}
3890
3891
/**
3892
 * xmlParseAttValueComplex:
3893
 * @ctxt:  an XML parser context
3894
 * @len:   the resulting attribute len
3895
 * @normalize:  whether to apply the inner normalization
3896
 *
3897
 * parse a value for an attribute, this is the fallback function
3898
 * of xmlParseAttValue() when the attribute parsing requires handling
3899
 * of non-ASCII characters, or normalization compaction.
3900
 *
3901
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3902
 */
3903
static xmlChar *
3904
89.5k
xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3905
89.5k
    xmlChar limit = 0;
3906
89.5k
    xmlChar *buf = NULL;
3907
89.5k
    xmlChar *rep = NULL;
3908
89.5k
    size_t len = 0;
3909
89.5k
    size_t buf_size = 0;
3910
89.5k
    int c, l, in_space = 0;
3911
89.5k
    xmlChar *current = NULL;
3912
89.5k
    xmlEntityPtr ent;
3913
3914
89.5k
    if (NXT(0) == '"') {
3915
83.8k
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3916
83.8k
  limit = '"';
3917
83.8k
        NEXT;
3918
83.8k
    } else if (NXT(0) == '\'') {
3919
5.64k
  limit = '\'';
3920
5.64k
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3921
5.64k
        NEXT;
3922
5.64k
    } else {
3923
0
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3924
0
  return(NULL);
3925
0
    }
3926
3927
    /*
3928
     * allocate a translation buffer.
3929
     */
3930
89.5k
    buf_size = XML_PARSER_BUFFER_SIZE;
3931
89.5k
    buf = (xmlChar *) xmlMallocAtomic(buf_size);
3932
89.5k
    if (buf == NULL) goto mem_error;
3933
3934
    /*
3935
     * OK loop until we reach one of the ending char or a size limit.
3936
     */
3937
89.5k
    c = CUR_CHAR(l);
3938
330M
    while (((NXT(0) != limit) && /* checked */
3939
330M
            (IS_CHAR(c)) && (c != '<')) &&
3940
330M
            (ctxt->instate != XML_PARSER_EOF)) {
3941
        /*
3942
         * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE
3943
         * special option is given
3944
         */
3945
330M
        if ((len > XML_MAX_TEXT_LENGTH) &&
3946
330M
            ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3947
1
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3948
1
                           "AttValue length too long\n");
3949
1
            goto mem_error;
3950
1
        }
3951
330M
  if (c == '&') {
3952
162k
      in_space = 0;
3953
162k
      if (NXT(1) == '#') {
3954
31.9k
    int val = xmlParseCharRef(ctxt);
3955
3956
31.9k
    if (val == '&') {
3957
8.80k
        if (ctxt->replaceEntities) {
3958
0
      if (len + 10 > buf_size) {
3959
0
          growBuffer(buf, 10);
3960
0
      }
3961
0
      buf[len++] = '&';
3962
8.80k
        } else {
3963
      /*
3964
       * The reparsing will be done in xmlStringGetNodeList()
3965
       * called by the attribute() function in SAX.c
3966
       */
3967
8.80k
      if (len + 10 > buf_size) {
3968
608
          growBuffer(buf, 10);
3969
608
      }
3970
8.80k
      buf[len++] = '&';
3971
8.80k
      buf[len++] = '#';
3972
8.80k
      buf[len++] = '3';
3973
8.80k
      buf[len++] = '8';
3974
8.80k
      buf[len++] = ';';
3975
8.80k
        }
3976
23.0k
    } else if (val != 0) {
3977
4.93k
        if (len + 10 > buf_size) {
3978
518
      growBuffer(buf, 10);
3979
518
        }
3980
4.93k
        len += xmlCopyChar(0, &buf[len], val);
3981
4.93k
    }
3982
130k
      } else {
3983
130k
    ent = xmlParseEntityRef(ctxt);
3984
130k
    ctxt->nbentities++;
3985
130k
    if (ent != NULL)
3986
14.0k
        ctxt->nbentities += ent->owner;
3987
130k
    if ((ent != NULL) &&
3988
130k
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3989
14.0k
        if (len + 10 > buf_size) {
3990
424
      growBuffer(buf, 10);
3991
424
        }
3992
14.0k
        if ((ctxt->replaceEntities == 0) &&
3993
14.0k
            (ent->content[0] == '&')) {
3994
10.6k
      buf[len++] = '&';
3995
10.6k
      buf[len++] = '#';
3996
10.6k
      buf[len++] = '3';
3997
10.6k
      buf[len++] = '8';
3998
10.6k
      buf[len++] = ';';
3999
10.6k
        } else {
4000
3.41k
      buf[len++] = ent->content[0];
4001
3.41k
        }
4002
116k
    } else if ((ent != NULL) &&
4003
116k
               (ctxt->replaceEntities != 0)) {
4004
0
        if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4005
0
      ++ctxt->depth;
4006
0
      rep = xmlStringDecodeEntities(ctxt, ent->content,
4007
0
                  XML_SUBSTITUTE_REF,
4008
0
                  0, 0, 0);
4009
0
      --ctxt->depth;
4010
0
      if (rep != NULL) {
4011
0
          current = rep;
4012
0
          while (*current != 0) { /* non input consuming */
4013
0
                                if ((*current == 0xD) || (*current == 0xA) ||
4014
0
                                    (*current == 0x9)) {
4015
0
                                    buf[len++] = 0x20;
4016
0
                                    current++;
4017
0
                                } else
4018
0
                                    buf[len++] = *current++;
4019
0
        if (len + 10 > buf_size) {
4020
0
            growBuffer(buf, 10);
4021
0
        }
4022
0
          }
4023
0
          xmlFree(rep);
4024
0
          rep = NULL;
4025
0
      }
4026
0
        } else {
4027
0
      if (len + 10 > buf_size) {
4028
0
          growBuffer(buf, 10);
4029
0
      }
4030
0
      if (ent->content != NULL)
4031
0
          buf[len++] = ent->content[0];
4032
0
        }
4033
116k
    } else if (ent != NULL) {
4034
0
        int i = xmlStrlen(ent->name);
4035
0
        const xmlChar *cur = ent->name;
4036
4037
        /*
4038
         * This may look absurd but is needed to detect
4039
         * entities problems
4040
         */
4041
0
        if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4042
0
      (ent->content != NULL) && (ent->checked == 0)) {
4043
0
      unsigned long oldnbent = ctxt->nbentities, diff;
4044
4045
0
      ++ctxt->depth;
4046
0
      rep = xmlStringDecodeEntities(ctxt, ent->content,
4047
0
              XML_SUBSTITUTE_REF, 0, 0, 0);
4048
0
      --ctxt->depth;
4049
4050
0
                        diff = ctxt->nbentities - oldnbent + 1;
4051
0
                        if (diff > INT_MAX / 2)
4052
0
                            diff = INT_MAX / 2;
4053
0
                        ent->checked = diff * 2;
4054
0
      if (rep != NULL) {
4055
0
          if (xmlStrchr(rep, '<'))
4056
0
              ent->checked |= 1;
4057
0
          xmlFree(rep);
4058
0
          rep = NULL;
4059
0
      } else {
4060
0
                            ent->content[0] = 0;
4061
0
                        }
4062
0
        }
4063
4064
        /*
4065
         * Just output the reference
4066
         */
4067
0
        buf[len++] = '&';
4068
0
        while (len + i + 10 > buf_size) {
4069
0
      growBuffer(buf, i + 10);
4070
0
        }
4071
0
        for (;i > 0;i--)
4072
0
      buf[len++] = *cur++;
4073
0
        buf[len++] = ';';
4074
0
    }
4075
130k
      }
4076
330M
  } else {
4077
330M
      if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4078
157k
          if ((len != 0) || (!normalize)) {
4079
154k
        if ((!normalize) || (!in_space)) {
4080
152k
      COPY_BUF(l,buf,len,0x20);
4081
152k
      while (len + 10 > buf_size) {
4082
554
          growBuffer(buf, 10);
4083
554
      }
4084
152k
        }
4085
154k
        in_space = 1;
4086
154k
    }
4087
330M
      } else {
4088
330M
          in_space = 0;
4089
330M
    COPY_BUF(l,buf,len,c);
4090
330M
    if (len + 10 > buf_size) {
4091
56.3k
        growBuffer(buf, 10);
4092
56.3k
    }
4093
330M
      }
4094
330M
      NEXTL(l);
4095
330M
  }
4096
330M
  GROW;
4097
330M
  c = CUR_CHAR(l);
4098
330M
    }
4099
89.5k
    if (ctxt->instate == XML_PARSER_EOF)
4100
1
        goto error;
4101
4102
89.5k
    if ((in_space) && (normalize)) {
4103
2.24k
        while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4104
1.02k
    }
4105
89.5k
    buf[len] = 0;
4106
89.5k
    if (RAW == '<') {
4107
22.2k
  xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4108
67.2k
    } else if (RAW != limit) {
4109
5.54k
  if ((c != 0) && (!IS_CHAR(c))) {
4110
3.15k
      xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4111
3.15k
         "invalid character in attribute value\n");
4112
3.15k
  } else {
4113
2.39k
      xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4114
2.39k
         "AttValue: ' expected\n");
4115
2.39k
        }
4116
5.54k
    } else
4117
61.7k
  NEXT;
4118
4119
    /*
4120
     * There we potentially risk an overflow, don't allow attribute value of
4121
     * length more than INT_MAX it is a very reasonable assumption !
4122
     */
4123
89.5k
    if (len >= INT_MAX) {
4124
0
        xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4125
0
                       "AttValue length too long\n");
4126
0
        goto mem_error;
4127
0
    }
4128
4129
89.5k
    if (attlen != NULL) *attlen = (int) len;
4130
89.5k
    return(buf);
4131
4132
1
mem_error:
4133
1
    xmlErrMemory(ctxt, NULL);
4134
2
error:
4135
2
    if (buf != NULL)
4136
2
        xmlFree(buf);
4137
2
    if (rep != NULL)
4138
0
        xmlFree(rep);
4139
2
    return(NULL);
4140
1
}
4141
4142
/**
4143
 * xmlParseAttValue:
4144
 * @ctxt:  an XML parser context
4145
 *
4146
 * parse a value for an attribute
4147
 * Note: the parser won't do substitution of entities here, this
4148
 * will be handled later in xmlStringGetNodeList
4149
 *
4150
 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4151
 *                   "'" ([^<&'] | Reference)* "'"
4152
 *
4153
 * 3.3.3 Attribute-Value Normalization:
4154
 * Before the value of an attribute is passed to the application or
4155
 * checked for validity, the XML processor must normalize it as follows:
4156
 * - a character reference is processed by appending the referenced
4157
 *   character to the attribute value
4158
 * - an entity reference is processed by recursively processing the
4159
 *   replacement text of the entity
4160
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4161
 *   appending #x20 to the normalized value, except that only a single
4162
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4163
 *   parsed entity or the literal entity value of an internal parsed entity
4164
 * - other characters are processed by appending them to the normalized value
4165
 * If the declared value is not CDATA, then the XML processor must further
4166
 * process the normalized attribute value by discarding any leading and
4167
 * trailing space (#x20) characters, and by replacing sequences of space
4168
 * (#x20) characters by a single space (#x20) character.
4169
 * All attributes for which no declaration has been read should be treated
4170
 * by a non-validating parser as if declared CDATA.
4171
 *
4172
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4173
 */
4174
4175
4176
xmlChar *
4177
35.9k
xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4178
35.9k
    if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4179
35.9k
    return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4180
35.9k
}
4181
4182
/**
4183
 * xmlParseSystemLiteral:
4184
 * @ctxt:  an XML parser context
4185
 *
4186
 * parse an XML Literal
4187
 *
4188
 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4189
 *
4190
 * Returns the SystemLiteral parsed or NULL
4191
 */
4192
4193
xmlChar *
4194
4.52k
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4195
4.52k
    xmlChar *buf = NULL;
4196
4.52k
    int len = 0;
4197
4.52k
    int size = XML_PARSER_BUFFER_SIZE;
4198
4.52k
    int cur, l;
4199
4.52k
    xmlChar stop;
4200
4.52k
    int state = ctxt->instate;
4201
4.52k
    int count = 0;
4202
4203
4.52k
    SHRINK;
4204
4.52k
    if (RAW == '"') {
4205
1.91k
        NEXT;
4206
1.91k
  stop = '"';
4207
2.61k
    } else if (RAW == '\'') {
4208
1.51k
        NEXT;
4209
1.51k
  stop = '\'';
4210
1.51k
    } else {
4211
1.09k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4212
1.09k
  return(NULL);
4213
1.09k
    }
4214
4215
3.43k
    buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4216
3.43k
    if (buf == NULL) {
4217
0
        xmlErrMemory(ctxt, NULL);
4218
0
  return(NULL);
4219
0
    }
4220
3.43k
    ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4221
3.43k
    cur = CUR_CHAR(l);
4222
1.76M
    while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4223
1.76M
  if (len + 5 >= size) {
4224
3.00k
      xmlChar *tmp;
4225
4226
3.00k
            if ((size > XML_MAX_NAME_LENGTH) &&
4227
3.00k
                ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4228
2
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4229
2
                xmlFree(buf);
4230
2
    ctxt->instate = (xmlParserInputState) state;
4231
2
                return(NULL);
4232
2
            }
4233
2.99k
      size *= 2;
4234
2.99k
      tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4235
2.99k
      if (tmp == NULL) {
4236
0
          xmlFree(buf);
4237
0
    xmlErrMemory(ctxt, NULL);
4238
0
    ctxt->instate = (xmlParserInputState) state;
4239
0
    return(NULL);
4240
0
      }
4241
2.99k
      buf = tmp;
4242
2.99k
  }
4243
1.76M
  count++;
4244
1.76M
  if (count > 50) {
4245
33.8k
      SHRINK;
4246
33.8k
      GROW;
4247
33.8k
      count = 0;
4248
33.8k
            if (ctxt->instate == XML_PARSER_EOF) {
4249
0
          xmlFree(buf);
4250
0
    return(NULL);
4251
0
            }
4252
33.8k
  }
4253
1.76M
  COPY_BUF(l,buf,len,cur);
4254
1.76M
  NEXTL(l);
4255
1.76M
  cur = CUR_CHAR(l);
4256
1.76M
  if (cur == 0) {
4257
170
      GROW;
4258
170
      SHRINK;
4259
170
      cur = CUR_CHAR(l);
4260
170
  }
4261
1.76M
    }
4262
3.43k
    buf[len] = 0;
4263
3.43k
    ctxt->instate = (xmlParserInputState) state;
4264
3.43k
    if (!IS_CHAR(cur)) {
4265
226
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4266
3.20k
    } else {
4267
3.20k
  NEXT;
4268
3.20k
    }
4269
3.43k
    return(buf);
4270
3.43k
}
4271
4272
/**
4273
 * xmlParsePubidLiteral:
4274
 * @ctxt:  an XML parser context
4275
 *
4276
 * parse an XML public literal
4277
 *
4278
 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4279
 *
4280
 * Returns the PubidLiteral parsed or NULL.
4281
 */
4282
4283
xmlChar *
4284
3.94k
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4285
3.94k
    xmlChar *buf = NULL;
4286
3.94k
    int len = 0;
4287
3.94k
    int size = XML_PARSER_BUFFER_SIZE;
4288
3.94k
    xmlChar cur;
4289
3.94k
    xmlChar stop;
4290
3.94k
    int count = 0;
4291
3.94k
    xmlParserInputState oldstate = ctxt->instate;
4292
4293
3.94k
    SHRINK;
4294
3.94k
    if (RAW == '"') {
4295
1.63k
        NEXT;
4296
1.63k
  stop = '"';
4297
2.30k
    } else if (RAW == '\'') {
4298
1.46k
        NEXT;
4299
1.46k
  stop = '\'';
4300
1.46k
    } else {
4301
838
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4302
838
  return(NULL);
4303
838
    }
4304
3.10k
    buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4305
3.10k
    if (buf == NULL) {
4306
0
  xmlErrMemory(ctxt, NULL);
4307
0
  return(NULL);
4308
0
    }
4309
3.10k
    ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4310
3.10k
    cur = CUR;
4311
200k
    while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4312
196k
  if (len + 1 >= size) {
4313
210
      xmlChar *tmp;
4314
4315
210
            if ((size > XML_MAX_NAME_LENGTH) &&
4316
210
                ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4317
1
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4318
1
                xmlFree(buf);
4319
1
                return(NULL);
4320
1
            }
4321
209
      size *= 2;
4322
209
      tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4323
209
      if (tmp == NULL) {
4324
0
    xmlErrMemory(ctxt, NULL);
4325
0
    xmlFree(buf);
4326
0
    return(NULL);
4327
0
      }
4328
209
      buf = tmp;
4329
209
  }
4330
196k
  buf[len++] = cur;
4331
196k
  count++;
4332
196k
  if (count > 50) {
4333
3.70k
      SHRINK;
4334
3.70k
      GROW;
4335
3.70k
      count = 0;
4336
3.70k
            if (ctxt->instate == XML_PARSER_EOF) {
4337
0
    xmlFree(buf);
4338
0
    return(NULL);
4339
0
            }
4340
3.70k
  }
4341
196k
  NEXT;
4342
196k
  cur = CUR;
4343
196k
  if (cur == 0) {
4344
51
      GROW;
4345
51
      SHRINK;
4346
51
      cur = CUR;
4347
51
  }
4348
196k
    }
4349
3.10k
    buf[len] = 0;
4350
3.10k
    if (cur != stop) {
4351
974
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4352
2.13k
    } else {
4353
2.13k
  NEXT;
4354
2.13k
    }
4355
3.10k
    ctxt->instate = oldstate;
4356
3.10k
    return(buf);
4357
3.10k
}
4358
4359
static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
4360
4361
/*
4362
 * used for the test in the inner loop of the char data testing
4363
 */
4364
static const unsigned char test_char_data[256] = {
4365
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4366
    0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4367
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4368
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4369
    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4370
    0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4371
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4372
    0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4373
    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4374
    0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4375
    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4376
    0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4377
    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4378
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4379
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4380
    0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4381
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4382
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4383
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4384
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4385
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4386
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4387
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4388
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4389
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4390
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4391
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4392
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4393
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4394
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4395
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4396
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4397
};
4398
4399
/**
4400
 * xmlParseCharData:
4401
 * @ctxt:  an XML parser context
4402
 * @cdata:  int indicating whether we are within a CDATA section
4403
 *
4404
 * parse a CharData section.
4405
 * if we are within a CDATA section ']]>' marks an end of section.
4406
 *
4407
 * The right angle bracket (>) may be represented using the string "&gt;",
4408
 * and must, for compatibility, be escaped using "&gt;" or a character
4409
 * reference when it appears in the string "]]>" in content, when that
4410
 * string is not marking the end of a CDATA section.
4411
 *
4412
 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4413
 */
4414
4415
void
4416
234k
xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
4417
234k
    const xmlChar *in;
4418
234k
    int nbchar = 0;
4419
234k
    int line = ctxt->input->line;
4420
234k
    int col = ctxt->input->col;
4421
234k
    int ccol;
4422
4423
234k
    SHRINK;
4424
234k
    GROW;
4425
    /*
4426
     * Accelerated common case where input don't need to be
4427
     * modified before passing it to the handler.
4428
     */
4429
234k
    if (!cdata) {
4430
234k
  in = ctxt->input->cur;
4431
235k
  do {
4432
239k
get_more_space:
4433
244k
      while (*in == 0x20) { in++; ctxt->input->col++; }
4434
239k
      if (*in == 0xA) {
4435
5.15k
    do {
4436
5.15k
        ctxt->input->line++; ctxt->input->col = 1;
4437
5.15k
        in++;
4438
5.15k
    } while (*in == 0xA);
4439
3.95k
    goto get_more_space;
4440
3.95k
      }
4441
235k
      if (*in == '<') {
4442
2.28k
    nbchar = in - ctxt->input->cur;
4443
2.28k
    if (nbchar > 0) {
4444
2.28k
        const xmlChar *tmp = ctxt->input->cur;
4445
2.28k
        ctxt->input->cur = in;
4446
4447
2.28k
        if ((ctxt->sax != NULL) &&
4448
2.28k
            (ctxt->sax->ignorableWhitespace !=
4449
2.28k
             ctxt->sax->characters)) {
4450
2.28k
      if (areBlanks(ctxt, tmp, nbchar, 1)) {
4451
0
          if (ctxt->sax->ignorableWhitespace != NULL)
4452
0
        ctxt->sax->ignorableWhitespace(ctxt->userData,
4453
0
                   tmp, nbchar);
4454
2.28k
      } else {
4455
2.28k
          if (ctxt->sax->characters != NULL)
4456
2.28k
        ctxt->sax->characters(ctxt->userData,
4457
2.28k
                  tmp, nbchar);
4458
2.28k
          if (*ctxt->space == -1)
4459
768
              *ctxt->space = -2;
4460
2.28k
      }
4461
2.28k
        } else if ((ctxt->sax != NULL) &&
4462
0
                   (ctxt->sax->characters != NULL)) {
4463
0
      ctxt->sax->characters(ctxt->userData,
4464
0
                tmp, nbchar);
4465
0
        }
4466
2.28k
    }
4467
2.28k
    return;
4468
2.28k
      }
4469
4470
240k
get_more:
4471
240k
            ccol = ctxt->input->col;
4472
1.95M
      while (test_char_data[*in]) {
4473
1.71M
    in++;
4474
1.71M
    ccol++;
4475
1.71M
      }
4476
240k
      ctxt->input->col = ccol;
4477
240k
      if (*in == 0xA) {
4478
5.46k
    do {
4479
5.46k
        ctxt->input->line++; ctxt->input->col = 1;
4480
5.46k
        in++;
4481
5.46k
    } while (*in == 0xA);
4482
3.60k
    goto get_more;
4483
3.60k
      }
4484
236k
      if (*in == ']') {
4485
3.95k
    if ((in[1] == ']') && (in[2] == '>')) {
4486
581
        xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4487
581
        ctxt->input->cur = in + 1;
4488
581
        return;
4489
581
    }
4490
3.37k
    in++;
4491
3.37k
    ctxt->input->col++;
4492
3.37k
    goto get_more;
4493
3.95k
      }
4494
232k
      nbchar = in - ctxt->input->cur;
4495
232k
      if (nbchar > 0) {
4496
135k
    if ((ctxt->sax != NULL) &&
4497
135k
        (ctxt->sax->ignorableWhitespace !=
4498
135k
         ctxt->sax->characters) &&
4499
135k
        (IS_BLANK_CH(*ctxt->input->cur))) {
4500
7.59k
        const xmlChar *tmp = ctxt->input->cur;
4501
7.59k
        ctxt->input->cur = in;
4502
4503
7.59k
        if (areBlanks(ctxt, tmp, nbchar, 0)) {
4504
0
            if (ctxt->sax->ignorableWhitespace != NULL)
4505
0
          ctxt->sax->ignorableWhitespace(ctxt->userData,
4506
0
                 tmp, nbchar);
4507
7.59k
        } else {
4508
7.59k
            if (ctxt->sax->characters != NULL)
4509
7.59k
          ctxt->sax->characters(ctxt->userData,
4510
7.59k
              tmp, nbchar);
4511
7.59k
      if (*ctxt->space == -1)
4512
3.53k
          *ctxt->space = -2;
4513
7.59k
        }
4514
7.59k
                    line = ctxt->input->line;
4515
7.59k
                    col = ctxt->input->col;
4516
128k
    } else if (ctxt->sax != NULL) {
4517
128k
        if (ctxt->sax->characters != NULL)
4518
128k
      ctxt->sax->characters(ctxt->userData,
4519
128k
                ctxt->input->cur, nbchar);
4520
128k
                    line = ctxt->input->line;
4521
128k
                    col = ctxt->input->col;
4522
128k
    }
4523
                /* something really bad happened in the SAX callback */
4524
135k
                if (ctxt->instate != XML_PARSER_CONTENT)
4525
0
                    return;
4526
135k
      }
4527
232k
      ctxt->input->cur = in;
4528
232k
      if (*in == 0xD) {
4529
8.69k
    in++;
4530
8.69k
    if (*in == 0xA) {
4531
949
        ctxt->input->cur = in;
4532
949
        in++;
4533
949
        ctxt->input->line++; ctxt->input->col = 1;
4534
949
        continue; /* while */
4535
949
    }
4536
7.74k
    in--;
4537
7.74k
      }
4538
231k
      if (*in == '<') {
4539
67.1k
    return;
4540
67.1k
      }
4541
164k
      if (*in == '&') {
4542
32.9k
    return;
4543
32.9k
      }
4544
131k
      SHRINK;
4545
131k
      GROW;
4546
131k
            if (ctxt->instate == XML_PARSER_EOF)
4547
0
    return;
4548
131k
      in = ctxt->input->cur;
4549
132k
  } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
4550
131k
  nbchar = 0;
4551
131k
    }
4552
131k
    ctxt->input->line = line;
4553
131k
    ctxt->input->col = col;
4554
131k
    xmlParseCharDataComplex(ctxt, cdata);
4555
131k
}
4556
4557
/**
4558
 * xmlParseCharDataComplex:
4559
 * @ctxt:  an XML parser context
4560
 * @cdata:  int indicating whether we are within a CDATA section
4561
 *
4562
 * parse a CharData section.this is the fallback function
4563
 * of xmlParseCharData() when the parsing requires handling
4564
 * of non-ASCII characters.
4565
 */
4566
static void
4567
131k
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
4568
131k
    xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4569
131k
    int nbchar = 0;
4570
131k
    int cur, l;
4571
131k
    int count = 0;
4572
4573
131k
    SHRINK;
4574
131k
    GROW;
4575
131k
    cur = CUR_CHAR(l);
4576
58.7M
    while ((cur != '<') && /* checked */
4577
58.7M
           (cur != '&') &&
4578
58.7M
     (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4579
58.6M
  if ((cur == ']') && (NXT(1) == ']') &&
4580
58.6M
      (NXT(2) == '>')) {
4581
753
      if (cdata) break;
4582
753
      else {
4583
753
    xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4584
753
      }
4585
753
  }
4586
58.6M
  COPY_BUF(l,buf,nbchar,cur);
4587
  /* move current position before possible calling of ctxt->sax->characters */
4588
58.6M
  NEXTL(l);
4589
58.6M
  cur = CUR_CHAR(l);
4590
58.6M
  if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4591
451k
      buf[nbchar] = 0;
4592
4593
      /*
4594
       * OK the segment is to be consumed as chars.
4595
       */
4596
451k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4597
519
    if (areBlanks(ctxt, buf, nbchar, 0)) {
4598
0
        if (ctxt->sax->ignorableWhitespace != NULL)
4599
0
      ctxt->sax->ignorableWhitespace(ctxt->userData,
4600
0
                                     buf, nbchar);
4601
519
    } else {
4602
519
        if (ctxt->sax->characters != NULL)
4603
519
      ctxt->sax->characters(ctxt->userData, buf, nbchar);
4604
519
        if ((ctxt->sax->characters !=
4605
519
             ctxt->sax->ignorableWhitespace) &&
4606
519
      (*ctxt->space == -1))
4607
86
      *ctxt->space = -2;
4608
519
    }
4609
519
      }
4610
451k
      nbchar = 0;
4611
            /* something really bad happened in the SAX callback */
4612
451k
            if (ctxt->instate != XML_PARSER_CONTENT)
4613
0
                return;
4614
451k
  }
4615
58.6M
  count++;
4616
58.6M
  if (count > 50) {
4617
1.13M
      SHRINK;
4618
1.13M
      GROW;
4619
1.13M
      count = 0;
4620
1.13M
            if (ctxt->instate == XML_PARSER_EOF)
4621
0
    return;
4622
1.13M
  }
4623
58.6M
    }
4624
131k
    if (nbchar != 0) {
4625
57.1k
        buf[nbchar] = 0;
4626
  /*
4627
   * OK the segment is to be consumed as chars.
4628
   */
4629
57.1k
  if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4630
1.23k
      if (areBlanks(ctxt, buf, nbchar, 0)) {
4631
0
    if (ctxt->sax->ignorableWhitespace != NULL)
4632
0
        ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4633
1.23k
      } else {
4634
1.23k
    if (ctxt->sax->characters != NULL)
4635
1.23k
        ctxt->sax->characters(ctxt->userData, buf, nbchar);
4636
1.23k
    if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4637
1.23k
        (*ctxt->space == -1))
4638
598
        *ctxt->space = -2;
4639
1.23k
      }
4640
1.23k
  }
4641
57.1k
    }
4642
131k
    if ((cur != 0) && (!IS_CHAR(cur))) {
4643
  /* Generate the error and skip the offending character */
4644
80.2k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4645
80.2k
                          "PCDATA invalid Char value %d\n",
4646
80.2k
                    cur);
4647
80.2k
  NEXTL(l);
4648
80.2k
    }
4649
131k
}
4650
4651
/**
4652
 * xmlParseExternalID:
4653
 * @ctxt:  an XML parser context
4654
 * @publicID:  a xmlChar** receiving PubidLiteral
4655
 * @strict: indicate whether we should restrict parsing to only
4656
 *          production [75], see NOTE below
4657
 *
4658
 * Parse an External ID or a Public ID
4659
 *
4660
 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4661
 *       'PUBLIC' S PubidLiteral S SystemLiteral
4662
 *
4663
 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4664
 *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
4665
 *
4666
 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4667
 *
4668
 * Returns the function returns SystemLiteral and in the second
4669
 *                case publicID receives PubidLiteral, is strict is off
4670
 *                it is possible to return NULL and have publicID set.
4671
 */
4672
4673
xmlChar *
4674
11.7k
xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4675
11.7k
    xmlChar *URI = NULL;
4676
4677
11.7k
    SHRINK;
4678
4679
11.7k
    *publicID = NULL;
4680
11.7k
    if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4681
1.83k
        SKIP(6);
4682
1.83k
  if (SKIP_BLANKS == 0) {
4683
801
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4684
801
                     "Space required after 'SYSTEM'\n");
4685
801
  }
4686
1.83k
  URI = xmlParseSystemLiteral(ctxt);
4687
1.83k
  if (URI == NULL) {
4688
231
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4689
231
        }
4690
9.92k
    } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4691
3.94k
        SKIP(6);
4692
3.94k
  if (SKIP_BLANKS == 0) {
4693
1.56k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4694
1.56k
        "Space required after 'PUBLIC'\n");
4695
1.56k
  }
4696
3.94k
  *publicID = xmlParsePubidLiteral(ctxt);
4697
3.94k
  if (*publicID == NULL) {
4698
839
      xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4699
839
  }
4700
3.94k
  if (strict) {
4701
      /*
4702
       * We don't handle [83] so "S SystemLiteral" is required.
4703
       */
4704
2.40k
      if (SKIP_BLANKS == 0) {
4705
1.08k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4706
1.08k
      "Space required after the Public Identifier\n");
4707
1.08k
      }
4708
2.40k
  } else {
4709
      /*
4710
       * We handle [83] so we return immediately, if
4711
       * "S SystemLiteral" is not detected. We skip blanks if no
4712
             * system literal was found, but this is harmless since we must
4713
             * be at the end of a NotationDecl.
4714
       */
4715
1.53k
      if (SKIP_BLANKS == 0) return(NULL);
4716
546
      if ((CUR != '\'') && (CUR != '"')) return(NULL);
4717
546
  }
4718
2.69k
  URI = xmlParseSystemLiteral(ctxt);
4719
2.69k
  if (URI == NULL) {
4720
867
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4721
867
        }
4722
2.69k
    }
4723
10.5k
    return(URI);
4724
11.7k
}
4725
4726
/**
4727
 * xmlParseCommentComplex:
4728
 * @ctxt:  an XML parser context
4729
 * @buf:  the already parsed part of the buffer
4730
 * @len:  number of bytes in the buffer
4731
 * @size:  allocated size of the buffer
4732
 *
4733
 * Skip an XML (SGML) comment <!-- .... -->
4734
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4735
 *  must not occur within comments. "
4736
 * This is the slow routine in case the accelerator for ascii didn't work
4737
 *
4738
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4739
 */
4740
static void
4741
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4742
8.92k
                       size_t len, size_t size) {
4743
8.92k
    int q, ql;
4744
8.92k
    int r, rl;
4745
8.92k
    int cur, l;
4746
8.92k
    size_t count = 0;
4747
8.92k
    int inputid;
4748
4749
8.92k
    inputid = ctxt->input->id;
4750
4751
8.92k
    if (buf == NULL) {
4752
8.92k
        len = 0;
4753
8.92k
  size = XML_PARSER_BUFFER_SIZE;
4754
8.92k
  buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4755
8.92k
  if (buf == NULL) {
4756
0
      xmlErrMemory(ctxt, NULL);
4757
0
      return;
4758
0
  }
4759
8.92k
    }
4760
8.92k
    GROW; /* Assure there's enough input data */
4761
8.92k
    q = CUR_CHAR(ql);
4762
8.92k
    if (q == 0)
4763
140
        goto not_terminated;
4764
8.78k
    if (!IS_CHAR(q)) {
4765
2.28k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4766
2.28k
                          "xmlParseComment: invalid xmlChar value %d\n",
4767
2.28k
                    q);
4768
2.28k
  xmlFree (buf);
4769
2.28k
  return;
4770
2.28k
    }
4771
6.50k
    NEXTL(ql);
4772
6.50k
    r = CUR_CHAR(rl);
4773
6.50k
    if (r == 0)
4774
42
        goto not_terminated;
4775
6.46k
    if (!IS_CHAR(r)) {
4776
1.31k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4777
1.31k
                          "xmlParseComment: invalid xmlChar value %d\n",
4778
1.31k
                    q);
4779
1.31k
  xmlFree (buf);
4780
1.31k
  return;
4781
1.31k
    }
4782
5.15k
    NEXTL(rl);
4783
5.15k
    cur = CUR_CHAR(l);
4784
5.15k
    if (cur == 0)
4785
30
        goto not_terminated;
4786
47.3M
    while (IS_CHAR(cur) && /* checked */
4787
47.3M
           ((cur != '>') ||
4788
47.3M
      (r != '-') || (q != '-'))) {
4789
47.3M
  if ((r == '-') && (q == '-')) {
4790
2.86k
      xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4791
2.86k
  }
4792
47.3M
        if ((len > XML_MAX_TEXT_LENGTH) &&
4793
47.3M
            ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4794
1
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4795
1
                         "Comment too big found", NULL);
4796
1
            xmlFree (buf);
4797
1
            return;
4798
1
        }
4799
47.3M
  if (len + 5 >= size) {
4800
5.08k
      xmlChar *new_buf;
4801
5.08k
            size_t new_size;
4802
4803
5.08k
      new_size = size * 2;
4804
5.08k
      new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4805
5.08k
      if (new_buf == NULL) {
4806
0
    xmlFree (buf);
4807
0
    xmlErrMemory(ctxt, NULL);
4808
0
    return;
4809
0
      }
4810
5.08k
      buf = new_buf;
4811
5.08k
            size = new_size;
4812
5.08k
  }
4813
47.3M
  COPY_BUF(ql,buf,len,q);
4814
47.3M
  q = r;
4815
47.3M
  ql = rl;
4816
47.3M
  r = cur;
4817
47.3M
  rl = l;
4818
4819
47.3M
  count++;
4820
47.3M
  if (count > 50) {
4821
927k
      SHRINK;
4822
927k
      GROW;
4823
927k
      count = 0;
4824
927k
            if (ctxt->instate == XML_PARSER_EOF) {
4825
0
    xmlFree(buf);
4826
0
    return;
4827
0
            }
4828
927k
  }
4829
47.3M
  NEXTL(l);
4830
47.3M
  cur = CUR_CHAR(l);
4831
47.3M
  if (cur == 0) {
4832
367
      SHRINK;
4833
367
      GROW;
4834
367
      cur = CUR_CHAR(l);
4835
367
  }
4836
47.3M
    }
4837
5.12k
    buf[len] = 0;
4838
5.12k
    if (cur == 0) {
4839
367
  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4840
367
                       "Comment not terminated \n<!--%.50s\n", buf);
4841
4.75k
    } else if (!IS_CHAR(cur)) {
4842
2.16k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4843
2.16k
                          "xmlParseComment: invalid xmlChar value %d\n",
4844
2.16k
                    cur);
4845
2.59k
    } else {
4846
2.59k
  if (inputid != ctxt->input->id) {
4847
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4848
0
               "Comment doesn't start and stop in the same"
4849
0
                           " entity\n");
4850
0
  }
4851
2.59k
        NEXT;
4852
2.59k
  if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4853
2.59k
      (!ctxt->disableSAX))
4854
0
      ctxt->sax->comment(ctxt->userData, buf);
4855
2.59k
    }
4856
5.12k
    xmlFree(buf);
4857
5.12k
    return;
4858
212
not_terminated:
4859
212
    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4860
212
       "Comment not terminated\n", NULL);
4861
212
    xmlFree(buf);
4862
212
    return;
4863
5.12k
}
4864
4865
/**
4866
 * xmlParseComment:
4867
 * @ctxt:  an XML parser context
4868
 *
4869
 * Skip an XML (SGML) comment <!-- .... -->
4870
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4871
 *  must not occur within comments. "
4872
 *
4873
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4874
 */
4875
void
4876
12.1k
xmlParseComment(xmlParserCtxtPtr ctxt) {
4877
12.1k
    xmlChar *buf = NULL;
4878
12.1k
    size_t size = XML_PARSER_BUFFER_SIZE;
4879
12.1k
    size_t len = 0;
4880
12.1k
    xmlParserInputState state;
4881
12.1k
    const xmlChar *in;
4882
12.1k
    size_t nbchar = 0;
4883
12.1k
    int ccol;
4884
12.1k
    int inputid;
4885
4886
    /*
4887
     * Check that there is a comment right here.
4888
     */
4889
12.1k
    if ((RAW != '<') || (NXT(1) != '!') ||
4890
12.1k
        (NXT(2) != '-') || (NXT(3) != '-')) return;
4891
12.1k
    state = ctxt->instate;
4892
12.1k
    ctxt->instate = XML_PARSER_COMMENT;
4893
12.1k
    inputid = ctxt->input->id;
4894
12.1k
    SKIP(4);
4895
12.1k
    SHRINK;
4896
12.1k
    GROW;
4897
4898
    /*
4899
     * Accelerated common case where input don't need to be
4900
     * modified before passing it to the handler.
4901
     */
4902
12.1k
    in = ctxt->input->cur;
4903
12.1k
    do {
4904
12.1k
  if (*in == 0xA) {
4905
860
      do {
4906
860
    ctxt->input->line++; ctxt->input->col = 1;
4907
860
    in++;
4908
860
      } while (*in == 0xA);
4909
456
  }
4910
18.9k
get_more:
4911
18.9k
        ccol = ctxt->input->col;
4912
76.9k
  while (((*in > '-') && (*in <= 0x7F)) ||
4913
76.9k
         ((*in >= 0x20) && (*in < '-')) ||
4914
76.9k
         (*in == 0x09)) {
4915
58.0k
        in++;
4916
58.0k
        ccol++;
4917
58.0k
  }
4918
18.9k
  ctxt->input->col = ccol;
4919
18.9k
  if (*in == 0xA) {
4920
952
      do {
4921
952
    ctxt->input->line++; ctxt->input->col = 1;
4922
952
    in++;
4923
952
      } while (*in == 0xA);
4924
594
      goto get_more;
4925
594
  }
4926
18.3k
  nbchar = in - ctxt->input->cur;
4927
  /*
4928
   * save current set of data
4929
   */
4930
18.3k
  if (nbchar > 0) {
4931
10.5k
      if ((ctxt->sax != NULL) &&
4932
10.5k
    (ctxt->sax->comment != NULL)) {
4933
0
    if (buf == NULL) {
4934
0
        if ((*in == '-') && (in[1] == '-'))
4935
0
            size = nbchar + 1;
4936
0
        else
4937
0
            size = XML_PARSER_BUFFER_SIZE + nbchar;
4938
0
        buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4939
0
        if (buf == NULL) {
4940
0
            xmlErrMemory(ctxt, NULL);
4941
0
      ctxt->instate = state;
4942
0
      return;
4943
0
        }
4944
0
        len = 0;
4945
0
    } else if (len + nbchar + 1 >= size) {
4946
0
        xmlChar *new_buf;
4947
0
        size  += len + nbchar + XML_PARSER_BUFFER_SIZE;
4948
0
        new_buf = (xmlChar *) xmlRealloc(buf,
4949
0
                                         size * sizeof(xmlChar));
4950
0
        if (new_buf == NULL) {
4951
0
            xmlFree (buf);
4952
0
      xmlErrMemory(ctxt, NULL);
4953
0
      ctxt->instate = state;
4954
0
      return;
4955
0
        }
4956
0
        buf = new_buf;
4957
0
    }
4958
0
    memcpy(&buf[len], ctxt->input->cur, nbchar);
4959
0
    len += nbchar;
4960
0
    buf[len] = 0;
4961
0
      }
4962
10.5k
  }
4963
18.3k
        if ((len > XML_MAX_TEXT_LENGTH) &&
4964
18.3k
            ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4965
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4966
0
                         "Comment too big found", NULL);
4967
0
            xmlFree (buf);
4968
0
            return;
4969
0
        }
4970
18.3k
  ctxt->input->cur = in;
4971
18.3k
  if (*in == 0xA) {
4972
0
      in++;
4973
0
      ctxt->input->line++; ctxt->input->col = 1;
4974
0
  }
4975
18.3k
  if (*in == 0xD) {
4976
1.68k
      in++;
4977
1.68k
      if (*in == 0xA) {
4978
397
    ctxt->input->cur = in;
4979
397
    in++;
4980
397
    ctxt->input->line++; ctxt->input->col = 1;
4981
397
    goto get_more;
4982
397
      }
4983
1.28k
      in--;
4984
1.28k
  }
4985
17.9k
  SHRINK;
4986
17.9k
  GROW;
4987
17.9k
        if (ctxt->instate == XML_PARSER_EOF) {
4988
0
            xmlFree(buf);
4989
0
            return;
4990
0
        }
4991
17.9k
  in = ctxt->input->cur;
4992
17.9k
  if (*in == '-') {
4993
9.02k
      if (in[1] == '-') {
4994
6.60k
          if (in[2] == '>') {
4995
3.23k
        if (ctxt->input->id != inputid) {
4996
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4997
0
                     "comment doesn't start and stop in the"
4998
0
                                       " same entity\n");
4999
0
        }
5000
3.23k
        SKIP(3);
5001
3.23k
        if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5002
3.23k
            (!ctxt->disableSAX)) {
5003
0
      if (buf != NULL)
5004
0
          ctxt->sax->comment(ctxt->userData, buf);
5005
0
      else
5006
0
          ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5007
0
        }
5008
3.23k
        if (buf != NULL)
5009
0
            xmlFree(buf);
5010
3.23k
        if (ctxt->instate != XML_PARSER_EOF)
5011
3.23k
      ctxt->instate = state;
5012
3.23k
        return;
5013
3.23k
    }
5014
3.37k
    if (buf != NULL) {
5015
0
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5016
0
                          "Double hyphen within comment: "
5017
0
                                      "<!--%.50s\n",
5018
0
              buf);
5019
0
    } else
5020
3.37k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5021
3.37k
                          "Double hyphen within comment\n", NULL);
5022
3.37k
                if (ctxt->instate == XML_PARSER_EOF) {
5023
0
                    xmlFree(buf);
5024
0
                    return;
5025
0
                }
5026
3.37k
    in++;
5027
3.37k
    ctxt->input->col++;
5028
3.37k
      }
5029
5.78k
      in++;
5030
5.78k
      ctxt->input->col++;
5031
5.78k
      goto get_more;
5032
9.02k
  }
5033
17.9k
    } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5034
8.92k
    xmlParseCommentComplex(ctxt, buf, len, size);
5035
8.92k
    ctxt->instate = state;
5036
8.92k
    return;
5037
12.1k
}
5038
5039
5040
/**
5041
 * xmlParsePITarget:
5042
 * @ctxt:  an XML parser context
5043
 *
5044
 * parse the name of a PI
5045
 *
5046
 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5047
 *
5048
 * Returns the PITarget name or NULL
5049
 */
5050
5051
const xmlChar *
5052
15.7k
xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5053
15.7k
    const xmlChar *name;
5054
5055
15.7k
    name = xmlParseName(ctxt);
5056
15.7k
    if ((name != NULL) &&
5057
15.7k
        ((name[0] == 'x') || (name[0] == 'X')) &&
5058
15.7k
        ((name[1] == 'm') || (name[1] == 'M')) &&
5059
15.7k
        ((name[2] == 'l') || (name[2] == 'L'))) {
5060
4.02k
  int i;
5061
4.02k
  if ((name[0] == 'x') && (name[1] == 'm') &&
5062
4.02k
      (name[2] == 'l') && (name[3] == 0)) {
5063
2.25k
      xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5064
2.25k
     "XML declaration allowed only at the start of the document\n");
5065
2.25k
      return(name);
5066
2.25k
  } else if (name[3] == 0) {
5067
1.01k
      xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5068
1.01k
      return(name);
5069
1.01k
  }
5070
1.97k
  for (i = 0;;i++) {
5071
1.97k
      if (xmlW3CPIs[i] == NULL) break;
5072
1.51k
      if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5073
290
          return(name);
5074
1.51k
  }
5075
466
  xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5076
466
          "xmlParsePITarget: invalid name prefix 'xml'\n",
5077
466
          NULL, NULL);
5078
466
    }
5079
12.2k
    if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5080
1.03k
  xmlNsErr(ctxt, XML_NS_ERR_COLON,
5081
1.03k
     "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5082
1.03k
    }
5083
12.2k
    return(name);
5084
15.7k
}
5085
5086
#ifdef LIBXML_CATALOG_ENABLED
5087
/**
5088
 * xmlParseCatalogPI:
5089
 * @ctxt:  an XML parser context
5090
 * @catalog:  the PI value string
5091
 *
5092
 * parse an XML Catalog Processing Instruction.
5093
 *
5094
 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5095
 *
5096
 * Occurs only if allowed by the user and if happening in the Misc
5097
 * part of the document before any doctype information
5098
 * This will add the given catalog to the parsing context in order
5099
 * to be used if there is a resolution need further down in the document
5100
 */
5101
5102
static void
5103
2.54k
xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5104
2.54k
    xmlChar *URL = NULL;
5105
2.54k
    const xmlChar *tmp, *base;
5106
2.54k
    xmlChar marker;
5107
5108
2.54k
    tmp = catalog;
5109
2.54k
    while (IS_BLANK_CH(*tmp)) tmp++;
5110
2.54k
    if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5111
765
  goto error;
5112
1.77k
    tmp += 7;
5113
1.77k
    while (IS_BLANK_CH(*tmp)) tmp++;
5114
1.77k
    if (*tmp != '=') {
5115
246
  return;
5116
246
    }
5117
1.53k
    tmp++;
5118
1.53k
    while (IS_BLANK_CH(*tmp)) tmp++;
5119
1.53k
    marker = *tmp;
5120
1.53k
    if ((marker != '\'') && (marker != '"'))
5121
421
  goto error;
5122
1.10k
    tmp++;
5123
1.10k
    base = tmp;
5124
1.81k
    while ((*tmp != 0) && (*tmp != marker)) tmp++;
5125
1.10k
    if (*tmp == 0)
5126
259
  goto error;
5127
850
    URL = xmlStrndup(base, tmp - base);
5128
850
    tmp++;
5129
898
    while (IS_BLANK_CH(*tmp)) tmp++;
5130
850
    if (*tmp != 0)
5131
299
  goto error;
5132
5133
551
    if (URL != NULL) {
5134
551
  ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5135
551
  xmlFree(URL);
5136
551
    }
5137
551
    return;
5138
5139
1.74k
error:
5140
1.74k
    xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5141
1.74k
            "Catalog PI syntax error: %s\n",
5142
1.74k
      catalog, NULL);
5143
1.74k
    if (URL != NULL)
5144
299
  xmlFree(URL);
5145
1.74k
}
5146
#endif
5147
5148
/**
5149
 * xmlParsePI:
5150
 * @ctxt:  an XML parser context
5151
 *
5152
 * parse an XML Processing Instruction.
5153
 *
5154
 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5155
 *
5156
 * The processing is transferred to SAX once parsed.
5157
 */
5158
5159
void
5160
15.7k
xmlParsePI(xmlParserCtxtPtr ctxt) {
5161
15.7k
    xmlChar *buf = NULL;
5162
15.7k
    size_t len = 0;
5163
15.7k
    size_t size = XML_PARSER_BUFFER_SIZE;
5164
15.7k
    int cur, l;
5165
15.7k
    const xmlChar *target;
5166
15.7k
    xmlParserInputState state;
5167
15.7k
    int count = 0;
5168
5169
15.7k
    if ((RAW == '<') && (NXT(1) == '?')) {
5170
15.7k
  int inputid = ctxt->input->id;
5171
15.7k
  state = ctxt->instate;
5172
15.7k
        ctxt->instate = XML_PARSER_PI;
5173
  /*
5174
   * this is a Processing Instruction.
5175
   */
5176
15.7k
  SKIP(2);
5177
15.7k
  SHRINK;
5178
5179
  /*
5180
   * Parse the target name and check for special support like
5181
   * namespace.
5182
   */
5183
15.7k
        target = xmlParsePITarget(ctxt);
5184
15.7k
  if (target != NULL) {
5185
14.0k
      if ((RAW == '?') && (NXT(1) == '>')) {
5186
2.94k
    if (inputid != ctxt->input->id) {
5187
0
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5188
0
                             "PI declaration doesn't start and stop in"
5189
0
                                   " the same entity\n");
5190
0
    }
5191
2.94k
    SKIP(2);
5192
5193
    /*
5194
     * SAX: PI detected.
5195
     */
5196
2.94k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5197
2.94k
        (ctxt->sax->processingInstruction != NULL))
5198
0
        ctxt->sax->processingInstruction(ctxt->userData,
5199
0
                                         target, NULL);
5200
2.94k
    if (ctxt->instate != XML_PARSER_EOF)
5201
2.94k
        ctxt->instate = state;
5202
2.94k
    return;
5203
2.94k
      }
5204
11.0k
      buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
5205
11.0k
      if (buf == NULL) {
5206
0
    xmlErrMemory(ctxt, NULL);
5207
0
    ctxt->instate = state;
5208
0
    return;
5209
0
      }
5210
11.0k
      if (SKIP_BLANKS == 0) {
5211
4.89k
    xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5212
4.89k
        "ParsePI: PI %s space expected\n", target);
5213
4.89k
      }
5214
11.0k
      cur = CUR_CHAR(l);
5215
64.6M
      while (IS_CHAR(cur) && /* checked */
5216
64.6M
       ((cur != '?') || (NXT(1) != '>'))) {
5217
64.6M
    if (len + 5 >= size) {
5218
5.27k
        xmlChar *tmp;
5219
5.27k
                    size_t new_size = size * 2;
5220
5.27k
        tmp = (xmlChar *) xmlRealloc(buf, new_size);
5221
5.27k
        if (tmp == NULL) {
5222
0
      xmlErrMemory(ctxt, NULL);
5223
0
      xmlFree(buf);
5224
0
      ctxt->instate = state;
5225
0
      return;
5226
0
        }
5227
5.27k
        buf = tmp;
5228
5.27k
                    size = new_size;
5229
5.27k
    }
5230
64.6M
    count++;
5231
64.6M
    if (count > 50) {
5232
1.26M
        SHRINK;
5233
1.26M
        GROW;
5234
1.26M
                    if (ctxt->instate == XML_PARSER_EOF) {
5235
0
                        xmlFree(buf);
5236
0
                        return;
5237
0
                    }
5238
1.26M
        count = 0;
5239
1.26M
                    if ((len > XML_MAX_TEXT_LENGTH) &&
5240
1.26M
                        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5241
1
                        xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5242
1
                                          "PI %s too big found", target);
5243
1
                        xmlFree(buf);
5244
1
                        ctxt->instate = state;
5245
1
                        return;
5246
1
                    }
5247
1.26M
    }
5248
64.6M
    COPY_BUF(l,buf,len,cur);
5249
64.6M
    NEXTL(l);
5250
64.6M
    cur = CUR_CHAR(l);
5251
64.6M
    if (cur == 0) {
5252
432
        SHRINK;
5253
432
        GROW;
5254
432
        cur = CUR_CHAR(l);
5255
432
    }
5256
64.6M
      }
5257
11.0k
            if ((len > XML_MAX_TEXT_LENGTH) &&
5258
11.0k
                ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5259
1
                xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5260
1
                                  "PI %s too big found", target);
5261
1
                xmlFree(buf);
5262
1
                ctxt->instate = state;
5263
1
                return;
5264
1
            }
5265
11.0k
      buf[len] = 0;
5266
11.0k
      if (cur != '?') {
5267
3.45k
    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5268
3.45k
          "ParsePI: PI %s never end ...\n", target);
5269
7.61k
      } else {
5270
7.61k
    if (inputid != ctxt->input->id) {
5271
0
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5272
0
                             "PI declaration doesn't start and stop in"
5273
0
                                   " the same entity\n");
5274
0
    }
5275
7.61k
    SKIP(2);
5276
5277
7.61k
#ifdef LIBXML_CATALOG_ENABLED
5278
7.61k
    if (((state == XML_PARSER_MISC) ||
5279
7.61k
               (state == XML_PARSER_START)) &&
5280
7.61k
        (xmlStrEqual(target, XML_CATALOG_PI))) {
5281
2.54k
        xmlCatalogAllow allow = xmlCatalogGetDefaults();
5282
2.54k
        if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5283
2.54k
      (allow == XML_CATA_ALLOW_ALL))
5284
2.54k
      xmlParseCatalogPI(ctxt, buf);
5285
2.54k
    }
5286
7.61k
#endif
5287
5288
5289
    /*
5290
     * SAX: PI detected.
5291
     */
5292
7.61k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5293
7.61k
        (ctxt->sax->processingInstruction != NULL))
5294
0
        ctxt->sax->processingInstruction(ctxt->userData,
5295
0
                                         target, buf);
5296
7.61k
      }
5297
11.0k
      xmlFree(buf);
5298
11.0k
  } else {
5299
1.75k
      xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5300
1.75k
  }
5301
12.8k
  if (ctxt->instate != XML_PARSER_EOF)
5302
12.8k
      ctxt->instate = state;
5303
12.8k
    }
5304
15.7k
}
5305
5306
/**
5307
 * xmlParseNotationDecl:
5308
 * @ctxt:  an XML parser context
5309
 *
5310
 * parse a notation declaration
5311
 *
5312
 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>'
5313
 *
5314
 * Hence there is actually 3 choices:
5315
 *     'PUBLIC' S PubidLiteral
5316
 *     'PUBLIC' S PubidLiteral S SystemLiteral
5317
 * and 'SYSTEM' S SystemLiteral
5318
 *
5319
 * See the NOTE on xmlParseExternalID().
5320
 */
5321
5322
void
5323
3.30k
xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5324
3.30k
    const xmlChar *name;
5325
3.30k
    xmlChar *Pubid;
5326
3.30k
    xmlChar *Systemid;
5327
5328
3.30k
    if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5329
3.23k
  int inputid = ctxt->input->id;
5330
3.23k
  SHRINK;
5331
3.23k
  SKIP(10);
5332
3.23k
  if (SKIP_BLANKS == 0) {
5333
261
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5334
261
         "Space required after '<!NOTATION'\n");
5335
261
      return;
5336
261
  }
5337
5338
2.97k
        name = xmlParseName(ctxt);
5339
2.97k
  if (name == NULL) {
5340
518
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5341
518
      return;
5342
518
  }
5343
2.45k
  if (xmlStrchr(name, ':') != NULL) {
5344
356
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5345
356
         "colons are forbidden from notation names '%s'\n",
5346
356
         name, NULL, NULL);
5347
356
  }
5348
2.45k
  if (SKIP_BLANKS == 0) {
5349
640
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5350
640
         "Space required after the NOTATION name'\n");
5351
640
      return;
5352
640
  }
5353
5354
  /*
5355
   * Parse the IDs.
5356
   */
5357
1.81k
  Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5358
1.81k
  SKIP_BLANKS;
5359
5360
1.81k
  if (RAW == '>') {
5361
582
      if (inputid != ctxt->input->id) {
5362
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5363
0
                         "Notation declaration doesn't start and stop"
5364
0
                               " in the same entity\n");
5365
0
      }
5366
582
      NEXT;
5367
582
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5368
582
    (ctxt->sax->notationDecl != NULL))
5369
0
    ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5370
1.23k
  } else {
5371
1.23k
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5372
1.23k
  }
5373
1.81k
  if (Systemid != NULL) xmlFree(Systemid);
5374
1.81k
  if (Pubid != NULL) xmlFree(Pubid);
5375
1.81k
    }
5376
3.30k
}
5377
5378
/**
5379
 * xmlParseEntityDecl:
5380
 * @ctxt:  an XML parser context
5381
 *
5382
 * parse <!ENTITY declarations
5383
 *
5384
 * [70] EntityDecl ::= GEDecl | PEDecl
5385
 *
5386
 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5387
 *
5388
 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5389
 *
5390
 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5391
 *
5392
 * [74] PEDef ::= EntityValue | ExternalID
5393
 *
5394
 * [76] NDataDecl ::= S 'NDATA' S Name
5395
 *
5396
 * [ VC: Notation Declared ]
5397
 * The Name must match the declared name of a notation.
5398
 */
5399
5400
void
5401
26.5k
xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5402
26.5k
    const xmlChar *name = NULL;
5403
26.5k
    xmlChar *value = NULL;
5404
26.5k
    xmlChar *URI = NULL, *literal = NULL;
5405
26.5k
    const xmlChar *ndata = NULL;
5406
26.5k
    int isParameter = 0;
5407
26.5k
    xmlChar *orig = NULL;
5408
5409
    /* GROW; done in the caller */
5410
26.5k
    if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
5411
26.4k
  int inputid = ctxt->input->id;
5412
26.4k
  SHRINK;
5413
26.4k
  SKIP(8);
5414
26.4k
  if (SKIP_BLANKS == 0) {
5415
8.82k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5416
8.82k
         "Space required after '<!ENTITY'\n");
5417
8.82k
  }
5418
5419
26.4k
  if (RAW == '%') {
5420
7.56k
      NEXT;
5421
7.56k
      if (SKIP_BLANKS == 0) {
5422
5.13k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5423
5.13k
             "Space required after '%%'\n");
5424
5.13k
      }
5425
7.56k
      isParameter = 1;
5426
7.56k
  }
5427
5428
26.4k
        name = xmlParseName(ctxt);
5429
26.4k
  if (name == NULL) {
5430
706
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5431
706
                     "xmlParseEntityDecl: no name\n");
5432
706
            return;
5433
706
  }
5434
25.7k
  if (xmlStrchr(name, ':') != NULL) {
5435
5.22k
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5436
5.22k
         "colons are forbidden from entities names '%s'\n",
5437
5.22k
         name, NULL, NULL);
5438
5.22k
  }
5439
25.7k
  if (SKIP_BLANKS == 0) {
5440
9.67k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5441
9.67k
         "Space required after the entity name\n");
5442
9.67k
  }
5443
5444
25.7k
  ctxt->instate = XML_PARSER_ENTITY_DECL;
5445
  /*
5446
   * handle the various case of definitions...
5447
   */
5448
25.7k
  if (isParameter) {
5449
7.54k
      if ((RAW == '"') || (RAW == '\'')) {
5450
5.63k
          value = xmlParseEntityValue(ctxt, &orig);
5451
5.63k
    if (value) {
5452
1.39k
        if ((ctxt->sax != NULL) &&
5453
1.39k
      (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5454
0
      ctxt->sax->entityDecl(ctxt->userData, name,
5455
0
                        XML_INTERNAL_PARAMETER_ENTITY,
5456
0
            NULL, NULL, value);
5457
1.39k
    }
5458
5.63k
      } else {
5459
1.91k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5460
1.91k
    if ((URI == NULL) && (literal == NULL)) {
5461
237
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5462
237
    }
5463
1.91k
    if (URI) {
5464
1.51k
        xmlURIPtr uri;
5465
5466
1.51k
        uri = xmlParseURI((const char *) URI);
5467
1.51k
        if (uri == NULL) {
5468
1.16k
            xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5469
1.16k
             "Invalid URI: %s\n", URI);
5470
      /*
5471
       * This really ought to be a well formedness error
5472
       * but the XML Core WG decided otherwise c.f. issue
5473
       * E26 of the XML erratas.
5474
       */
5475
1.16k
        } else {
5476
355
      if (uri->fragment != NULL) {
5477
          /*
5478
           * Okay this is foolish to block those but not
5479
           * invalid URIs.
5480
           */
5481
198
          xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5482
198
      } else {
5483
157
          if ((ctxt->sax != NULL) &&
5484
157
        (!ctxt->disableSAX) &&
5485
157
        (ctxt->sax->entityDecl != NULL))
5486
0
        ctxt->sax->entityDecl(ctxt->userData, name,
5487
0
              XML_EXTERNAL_PARAMETER_ENTITY,
5488
0
              literal, URI, NULL);
5489
157
      }
5490
355
      xmlFreeURI(uri);
5491
355
        }
5492
1.51k
    }
5493
1.91k
      }
5494
18.1k
  } else {
5495
18.1k
      if ((RAW == '"') || (RAW == '\'')) {
5496
14.6k
          value = xmlParseEntityValue(ctxt, &orig);
5497
14.6k
    if ((ctxt->sax != NULL) &&
5498
14.6k
        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5499
0
        ctxt->sax->entityDecl(ctxt->userData, name,
5500
0
        XML_INTERNAL_GENERAL_ENTITY,
5501
0
        NULL, NULL, value);
5502
    /*
5503
     * For expat compatibility in SAX mode.
5504
     */
5505
14.6k
    if ((ctxt->myDoc == NULL) ||
5506
14.6k
        (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5507
14.6k
        if (ctxt->myDoc == NULL) {
5508
1.12k
      ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5509
1.12k
      if (ctxt->myDoc == NULL) {
5510
0
          xmlErrMemory(ctxt, "New Doc failed");
5511
0
          return;
5512
0
      }
5513
1.12k
      ctxt->myDoc->properties = XML_DOC_INTERNAL;
5514
1.12k
        }
5515
14.6k
        if (ctxt->myDoc->intSubset == NULL)
5516
1.12k
      ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5517
1.12k
              BAD_CAST "fake", NULL, NULL);
5518
5519
14.6k
        xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5520
14.6k
                    NULL, NULL, value);
5521
14.6k
    }
5522
14.6k
      } else {
5523
3.57k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5524
3.57k
    if ((URI == NULL) && (literal == NULL)) {
5525
1.59k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5526
1.59k
    }
5527
3.57k
    if (URI) {
5528
1.32k
        xmlURIPtr uri;
5529
5530
1.32k
        uri = xmlParseURI((const char *)URI);
5531
1.32k
        if (uri == NULL) {
5532
535
            xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5533
535
             "Invalid URI: %s\n", URI);
5534
      /*
5535
       * This really ought to be a well formedness error
5536
       * but the XML Core WG decided otherwise c.f. issue
5537
       * E26 of the XML erratas.
5538
       */
5539
793
        } else {
5540
793
      if (uri->fragment != NULL) {
5541
          /*
5542
           * Okay this is foolish to block those but not
5543
           * invalid URIs.
5544
           */
5545
361
          xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5546
361
      }
5547
793
      xmlFreeURI(uri);
5548
793
        }
5549
1.32k
    }
5550
3.57k
    if ((RAW != '>') && (SKIP_BLANKS == 0)) {
5551
675
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5552
675
           "Space required before 'NDATA'\n");
5553
675
    }
5554
3.57k
    if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5555
592
        SKIP(5);
5556
592
        if (SKIP_BLANKS == 0) {
5557
430
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5558
430
               "Space required after 'NDATA'\n");
5559
430
        }
5560
592
        ndata = xmlParseName(ctxt);
5561
592
        if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5562
592
            (ctxt->sax->unparsedEntityDecl != NULL))
5563
0
      ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5564
0
            literal, URI, ndata);
5565
2.98k
    } else {
5566
2.98k
        if ((ctxt->sax != NULL) &&
5567
2.98k
            (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5568
0
      ctxt->sax->entityDecl(ctxt->userData, name,
5569
0
            XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5570
0
            literal, URI, NULL);
5571
        /*
5572
         * For expat compatibility in SAX mode.
5573
         * assuming the entity replacement was asked for
5574
         */
5575
2.98k
        if ((ctxt->replaceEntities != 0) &&
5576
2.98k
      ((ctxt->myDoc == NULL) ||
5577
0
      (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5578
0
      if (ctxt->myDoc == NULL) {
5579
0
          ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5580
0
          if (ctxt->myDoc == NULL) {
5581
0
              xmlErrMemory(ctxt, "New Doc failed");
5582
0
        return;
5583
0
          }
5584
0
          ctxt->myDoc->properties = XML_DOC_INTERNAL;
5585
0
      }
5586
5587
0
      if (ctxt->myDoc->intSubset == NULL)
5588
0
          ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5589
0
            BAD_CAST "fake", NULL, NULL);
5590
0
      xmlSAX2EntityDecl(ctxt, name,
5591
0
                  XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5592
0
                  literal, URI, NULL);
5593
0
        }
5594
2.98k
    }
5595
3.57k
      }
5596
18.1k
  }
5597
25.7k
  if (ctxt->instate == XML_PARSER_EOF)
5598
1
      goto done;
5599
25.7k
  SKIP_BLANKS;
5600
25.7k
  if (RAW != '>') {
5601
897
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5602
897
              "xmlParseEntityDecl: entity %s not terminated\n", name);
5603
897
      xmlHaltParser(ctxt);
5604
24.8k
  } else {
5605
24.8k
      if (inputid != ctxt->input->id) {
5606
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5607
0
                         "Entity declaration doesn't start and stop in"
5608
0
                               " the same entity\n");
5609
0
      }
5610
24.8k
      NEXT;
5611
24.8k
  }
5612
25.7k
  if (orig != NULL) {
5613
      /*
5614
       * Ugly mechanism to save the raw entity value.
5615
       */
5616
12.7k
      xmlEntityPtr cur = NULL;
5617
5618
12.7k
      if (isParameter) {
5619
2.49k
          if ((ctxt->sax != NULL) &&
5620
2.49k
        (ctxt->sax->getParameterEntity != NULL))
5621
0
        cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5622
10.2k
      } else {
5623
10.2k
          if ((ctxt->sax != NULL) &&
5624
10.2k
        (ctxt->sax->getEntity != NULL))
5625
0
        cur = ctxt->sax->getEntity(ctxt->userData, name);
5626
10.2k
    if ((cur == NULL) && (ctxt->userData==ctxt)) {
5627
0
        cur = xmlSAX2GetEntity(ctxt, name);
5628
0
    }
5629
10.2k
      }
5630
12.7k
            if ((cur != NULL) && (cur->orig == NULL)) {
5631
0
    cur->orig = orig;
5632
0
                orig = NULL;
5633
0
      }
5634
12.7k
  }
5635
5636
25.7k
done:
5637
25.7k
  if (value != NULL) xmlFree(value);
5638
25.7k
  if (URI != NULL) xmlFree(URI);
5639
25.7k
  if (literal != NULL) xmlFree(literal);
5640
25.7k
        if (orig != NULL) xmlFree(orig);
5641
25.7k
    }
5642
26.5k
}
5643
5644
/**
5645
 * xmlParseDefaultDecl:
5646
 * @ctxt:  an XML parser context
5647
 * @value:  Receive a possible fixed default value for the attribute
5648
 *
5649
 * Parse an attribute default declaration
5650
 *
5651
 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5652
 *
5653
 * [ VC: Required Attribute ]
5654
 * if the default declaration is the keyword #REQUIRED, then the
5655
 * attribute must be specified for all elements of the type in the
5656
 * attribute-list declaration.
5657
 *
5658
 * [ VC: Attribute Default Legal ]
5659
 * The declared default value must meet the lexical constraints of
5660
 * the declared attribute type c.f. xmlValidateAttributeDecl()
5661
 *
5662
 * [ VC: Fixed Attribute Default ]
5663
 * if an attribute has a default value declared with the #FIXED
5664
 * keyword, instances of that attribute must match the default value.
5665
 *
5666
 * [ WFC: No < in Attribute Values ]
5667
 * handled in xmlParseAttValue()
5668
 *
5669
 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5670
 *          or XML_ATTRIBUTE_FIXED.
5671
 */
5672
5673
int
5674
36.6k
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5675
36.6k
    int val;
5676
36.6k
    xmlChar *ret;
5677
5678
36.6k
    *value = NULL;
5679
36.6k
    if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5680
423
  SKIP(9);
5681
423
  return(XML_ATTRIBUTE_REQUIRED);
5682
423
    }
5683
36.2k
    if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5684
269
  SKIP(8);
5685
269
  return(XML_ATTRIBUTE_IMPLIED);
5686
269
    }
5687
35.9k
    val = XML_ATTRIBUTE_NONE;
5688
35.9k
    if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5689
315
  SKIP(6);
5690
315
  val = XML_ATTRIBUTE_FIXED;
5691
315
  if (SKIP_BLANKS == 0) {
5692
79
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5693
79
         "Space required after '#FIXED'\n");
5694
79
  }
5695
315
    }
5696
35.9k
    ret = xmlParseAttValue(ctxt);
5697
35.9k
    ctxt->instate = XML_PARSER_DTD;
5698
35.9k
    if (ret == NULL) {
5699
1.23k
  xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5700
1.23k
           "Attribute default value declaration error\n");
5701
1.23k
    } else
5702
34.7k
        *value = ret;
5703
35.9k
    return(val);
5704
36.2k
}
5705
5706
/**
5707
 * xmlParseNotationType:
5708
 * @ctxt:  an XML parser context
5709
 *
5710
 * parse an Notation attribute type.
5711
 *
5712
 * Note: the leading 'NOTATION' S part has already being parsed...
5713
 *
5714
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5715
 *
5716
 * [ VC: Notation Attributes ]
5717
 * Values of this type must match one of the notation names included
5718
 * in the declaration; all notation names in the declaration must be declared.
5719
 *
5720
 * Returns: the notation attribute tree built while parsing
5721
 */
5722
5723
xmlEnumerationPtr
5724
3.87k
xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5725
3.87k
    const xmlChar *name;
5726
3.87k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5727
5728
3.87k
    if (RAW != '(') {
5729
47
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5730
47
  return(NULL);
5731
47
    }
5732
3.82k
    SHRINK;
5733
4.56k
    do {
5734
4.56k
        NEXT;
5735
4.56k
  SKIP_BLANKS;
5736
4.56k
        name = xmlParseName(ctxt);
5737
4.56k
  if (name == NULL) {
5738
861
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5739
861
         "Name expected in NOTATION declaration\n");
5740
861
            xmlFreeEnumeration(ret);
5741
861
      return(NULL);
5742
861
  }
5743
3.70k
  tmp = ret;
5744
5.39k
  while (tmp != NULL) {
5745
1.69k
      if (xmlStrEqual(name, tmp->name)) {
5746
0
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5747
0
    "standalone: attribute notation value token %s duplicated\n",
5748
0
         name, NULL);
5749
0
    if (!xmlDictOwns(ctxt->dict, name))
5750
0
        xmlFree((xmlChar *) name);
5751
0
    break;
5752
0
      }
5753
1.69k
      tmp = tmp->next;
5754
1.69k
  }
5755
3.70k
  if (tmp == NULL) {
5756
3.70k
      cur = xmlCreateEnumeration(name);
5757
3.70k
      if (cur == NULL) {
5758
0
                xmlFreeEnumeration(ret);
5759
0
                return(NULL);
5760
0
            }
5761
3.70k
      if (last == NULL) ret = last = cur;
5762
569
      else {
5763
569
    last->next = cur;
5764
569
    last = cur;
5765
569
      }
5766
3.70k
  }
5767
3.70k
  SKIP_BLANKS;
5768
3.70k
    } while (RAW == '|');
5769
2.96k
    if (RAW != ')') {
5770
83
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5771
83
        xmlFreeEnumeration(ret);
5772
83
  return(NULL);
5773
83
    }
5774
2.88k
    NEXT;
5775
2.88k
    return(ret);
5776
2.96k
}
5777
5778
/**
5779
 * xmlParseEnumerationType:
5780
 * @ctxt:  an XML parser context
5781
 *
5782
 * parse an Enumeration attribute type.
5783
 *
5784
 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5785
 *
5786
 * [ VC: Enumeration ]
5787
 * Values of this type must match one of the Nmtoken tokens in
5788
 * the declaration
5789
 *
5790
 * Returns: the enumeration attribute tree built while parsing
5791
 */
5792
5793
xmlEnumerationPtr
5794
5.88k
xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5795
5.88k
    xmlChar *name;
5796
5.88k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5797
5798
5.88k
    if (RAW != '(') {
5799
805
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5800
805
  return(NULL);
5801
805
    }
5802
5.07k
    SHRINK;
5803
6.74k
    do {
5804
6.74k
        NEXT;
5805
6.74k
  SKIP_BLANKS;
5806
6.74k
        name = xmlParseNmtoken(ctxt);
5807
6.74k
  if (name == NULL) {
5808
371
      xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5809
371
      return(ret);
5810
371
  }
5811
6.37k
  tmp = ret;
5812
9.43k
  while (tmp != NULL) {
5813
3.05k
      if (xmlStrEqual(name, tmp->name)) {
5814
0
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5815
0
    "standalone: attribute enumeration value token %s duplicated\n",
5816
0
         name, NULL);
5817
0
    if (!xmlDictOwns(ctxt->dict, name))
5818
0
        xmlFree(name);
5819
0
    break;
5820
0
      }
5821
3.05k
      tmp = tmp->next;
5822
3.05k
  }
5823
6.37k
  if (tmp == NULL) {
5824
6.37k
      cur = xmlCreateEnumeration(name);
5825
6.37k
      if (!xmlDictOwns(ctxt->dict, name))
5826
6.37k
    xmlFree(name);
5827
6.37k
      if (cur == NULL) {
5828
0
                xmlFreeEnumeration(ret);
5829
0
                return(NULL);
5830
0
            }
5831
6.37k
      if (last == NULL) ret = last = cur;
5832
1.31k
      else {
5833
1.31k
    last->next = cur;
5834
1.31k
    last = cur;
5835
1.31k
      }
5836
6.37k
  }
5837
6.37k
  SKIP_BLANKS;
5838
6.37k
    } while (RAW == '|');
5839
4.70k
    if (RAW != ')') {
5840
1.04k
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5841
1.04k
  return(ret);
5842
1.04k
    }
5843
3.66k
    NEXT;
5844
3.66k
    return(ret);
5845
4.70k
}
5846
5847
/**
5848
 * xmlParseEnumeratedType:
5849
 * @ctxt:  an XML parser context
5850
 * @tree:  the enumeration tree built while parsing
5851
 *
5852
 * parse an Enumerated attribute type.
5853
 *
5854
 * [57] EnumeratedType ::= NotationType | Enumeration
5855
 *
5856
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5857
 *
5858
 *
5859
 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5860
 */
5861
5862
int
5863
9.82k
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5864
9.82k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5865
3.94k
  SKIP(8);
5866
3.94k
  if (SKIP_BLANKS == 0) {
5867
74
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5868
74
         "Space required after 'NOTATION'\n");
5869
74
      return(0);
5870
74
  }
5871
3.87k
  *tree = xmlParseNotationType(ctxt);
5872
3.87k
  if (*tree == NULL) return(0);
5873
2.88k
  return(XML_ATTRIBUTE_NOTATION);
5874
3.87k
    }
5875
5.88k
    *tree = xmlParseEnumerationType(ctxt);
5876
5.88k
    if (*tree == NULL) return(0);
5877
5.06k
    return(XML_ATTRIBUTE_ENUMERATION);
5878
5.88k
}
5879
5880
/**
5881
 * xmlParseAttributeType:
5882
 * @ctxt:  an XML parser context
5883
 * @tree:  the enumeration tree built while parsing
5884
 *
5885
 * parse the Attribute list def for an element
5886
 *
5887
 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5888
 *
5889
 * [55] StringType ::= 'CDATA'
5890
 *
5891
 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5892
 *                        'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5893
 *
5894
 * Validity constraints for attribute values syntax are checked in
5895
 * xmlValidateAttributeValue()
5896
 *
5897
 * [ VC: ID ]
5898
 * Values of type ID must match the Name production. A name must not
5899
 * appear more than once in an XML document as a value of this type;
5900
 * i.e., ID values must uniquely identify the elements which bear them.
5901
 *
5902
 * [ VC: One ID per Element Type ]
5903
 * No element type may have more than one ID attribute specified.
5904
 *
5905
 * [ VC: ID Attribute Default ]
5906
 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5907
 *
5908
 * [ VC: IDREF ]
5909
 * Values of type IDREF must match the Name production, and values
5910
 * of type IDREFS must match Names; each IDREF Name must match the value
5911
 * of an ID attribute on some element in the XML document; i.e. IDREF
5912
 * values must match the value of some ID attribute.
5913
 *
5914
 * [ VC: Entity Name ]
5915
 * Values of type ENTITY must match the Name production, values
5916
 * of type ENTITIES must match Names; each Entity Name must match the
5917
 * name of an unparsed entity declared in the DTD.
5918
 *
5919
 * [ VC: Name Token ]
5920
 * Values of type NMTOKEN must match the Nmtoken production; values
5921
 * of type NMTOKENS must match Nmtokens.
5922
 *
5923
 * Returns the attribute type
5924
 */
5925
int
5926
40.1k
xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5927
40.1k
    SHRINK;
5928
40.1k
    if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5929
20.2k
  SKIP(5);
5930
20.2k
  return(XML_ATTRIBUTE_CDATA);
5931
20.2k
     } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5932
53
  SKIP(6);
5933
53
  return(XML_ATTRIBUTE_IDREFS);
5934
19.8k
     } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5935
32
  SKIP(5);
5936
32
  return(XML_ATTRIBUTE_IDREF);
5937
19.7k
     } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5938
9.21k
        SKIP(2);
5939
9.21k
  return(XML_ATTRIBUTE_ID);
5940
10.5k
     } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5941
440
  SKIP(6);
5942
440
  return(XML_ATTRIBUTE_ENTITY);
5943
10.1k
     } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5944
135
  SKIP(8);
5945
135
  return(XML_ATTRIBUTE_ENTITIES);
5946
9.97k
     } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5947
73
  SKIP(8);
5948
73
  return(XML_ATTRIBUTE_NMTOKENS);
5949
9.90k
     } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5950
78
  SKIP(7);
5951
78
  return(XML_ATTRIBUTE_NMTOKEN);
5952
78
     }
5953
9.82k
     return(xmlParseEnumeratedType(ctxt, tree));
5954
40.1k
}
5955
5956
/**
5957
 * xmlParseAttributeListDecl:
5958
 * @ctxt:  an XML parser context
5959
 *
5960
 * : parse the Attribute list def for an element
5961
 *
5962
 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5963
 *
5964
 * [53] AttDef ::= S Name S AttType S DefaultDecl
5965
 *
5966
 */
5967
void
5968
9.03k
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
5969
9.03k
    const xmlChar *elemName;
5970
9.03k
    const xmlChar *attrName;
5971
9.03k
    xmlEnumerationPtr tree;
5972
5973
9.03k
    if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5974
8.95k
  int inputid = ctxt->input->id;
5975
5976
8.95k
  SKIP(9);
5977
8.95k
  if (SKIP_BLANKS == 0) {
5978
5.92k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5979
5.92k
                     "Space required after '<!ATTLIST'\n");
5980
5.92k
  }
5981
8.95k
        elemName = xmlParseName(ctxt);
5982
8.95k
  if (elemName == NULL) {
5983
293
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5984
293
         "ATTLIST: no name for Element\n");
5985
293
      return;
5986
293
  }
5987
8.65k
  SKIP_BLANKS;
5988
8.65k
  GROW;
5989
43.8k
  while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
5990
42.1k
      int type;
5991
42.1k
      int def;
5992
42.1k
      xmlChar *defaultValue = NULL;
5993
5994
42.1k
      GROW;
5995
42.1k
            tree = NULL;
5996
42.1k
      attrName = xmlParseName(ctxt);
5997
42.1k
      if (attrName == NULL) {
5998
890
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5999
890
             "ATTLIST: no name for Attribute\n");
6000
890
    break;
6001
890
      }
6002
41.2k
      GROW;
6003
41.2k
      if (SKIP_BLANKS == 0) {
6004
1.15k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6005
1.15k
            "Space required after the attribute name\n");
6006
1.15k
    break;
6007
1.15k
      }
6008
6009
40.1k
      type = xmlParseAttributeType(ctxt, &tree);
6010
40.1k
      if (type <= 0) {
6011
1.88k
          break;
6012
1.88k
      }
6013
6014
38.2k
      GROW;
6015
38.2k
      if (SKIP_BLANKS == 0) {
6016
1.58k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6017
1.58k
             "Space required after the attribute type\n");
6018
1.58k
          if (tree != NULL)
6019
1.40k
        xmlFreeEnumeration(tree);
6020
1.58k
    break;
6021
1.58k
      }
6022
6023
36.6k
      def = xmlParseDefaultDecl(ctxt, &defaultValue);
6024
36.6k
      if (def <= 0) {
6025
0
                if (defaultValue != NULL)
6026
0
        xmlFree(defaultValue);
6027
0
          if (tree != NULL)
6028
0
        xmlFreeEnumeration(tree);
6029
0
          break;
6030
0
      }
6031
36.6k
      if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6032
14.9k
          xmlAttrNormalizeSpace(defaultValue, defaultValue);
6033
6034
36.6k
      GROW;
6035
36.6k
            if (RAW != '>') {
6036
35.6k
    if (SKIP_BLANKS == 0) {
6037
1.49k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6038
1.49k
      "Space required after the attribute default value\n");
6039
1.49k
        if (defaultValue != NULL)
6040
975
      xmlFree(defaultValue);
6041
1.49k
        if (tree != NULL)
6042
738
      xmlFreeEnumeration(tree);
6043
1.49k
        break;
6044
1.49k
    }
6045
35.6k
      }
6046
35.1k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6047
35.1k
    (ctxt->sax->attributeDecl != NULL))
6048
0
    ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6049
0
                          type, def, defaultValue, tree);
6050
35.1k
      else if (tree != NULL)
6051
5.79k
    xmlFreeEnumeration(tree);
6052
6053
35.1k
      if ((ctxt->sax2) && (defaultValue != NULL) &&
6054
35.1k
          (def != XML_ATTRIBUTE_IMPLIED) &&
6055
35.1k
    (def != XML_ATTRIBUTE_REQUIRED)) {
6056
33.7k
    xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6057
33.7k
      }
6058
35.1k
      if (ctxt->sax2) {
6059
35.1k
    xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6060
35.1k
      }
6061
35.1k
      if (defaultValue != NULL)
6062
33.7k
          xmlFree(defaultValue);
6063
35.1k
      GROW;
6064
35.1k
  }
6065
8.65k
  if (RAW == '>') {
6066
1.83k
      if (inputid != ctxt->input->id) {
6067
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6068
0
                               "Attribute list declaration doesn't start and"
6069
0
                               " stop in the same entity\n");
6070
0
      }
6071
1.83k
      NEXT;
6072
1.83k
  }
6073
8.65k
    }
6074
9.03k
}
6075
6076
/**
6077
 * xmlParseElementMixedContentDecl:
6078
 * @ctxt:  an XML parser context
6079
 * @inputchk:  the input used for the current entity, needed for boundary checks
6080
 *
6081
 * parse the declaration for a Mixed Element content
6082
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6083
 *
6084
 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6085
 *                '(' S? '#PCDATA' S? ')'
6086
 *
6087
 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6088
 *
6089
 * [ VC: No Duplicate Types ]
6090
 * The same name must not appear more than once in a single
6091
 * mixed-content declaration.
6092
 *
6093
 * returns: the list of the xmlElementContentPtr describing the element choices
6094
 */
6095
xmlElementContentPtr
6096
1.85k
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6097
1.85k
    xmlElementContentPtr ret = NULL, cur = NULL, n;
6098
1.85k
    const xmlChar *elem = NULL;
6099
6100
1.85k
    GROW;
6101
1.85k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6102
1.85k
  SKIP(7);
6103
1.85k
  SKIP_BLANKS;
6104
1.85k
  SHRINK;
6105
1.85k
  if (RAW == ')') {
6106
784
      if (ctxt->input->id != inputchk) {
6107
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6108
0
                               "Element content declaration doesn't start and"
6109
0
                               " stop in the same entity\n");
6110
0
      }
6111
784
      NEXT;
6112
784
      ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6113
784
      if (ret == NULL)
6114
0
          return(NULL);
6115
784
      if (RAW == '*') {
6116
534
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6117
534
    NEXT;
6118
534
      }
6119
784
      return(ret);
6120
784
  }
6121
1.07k
  if ((RAW == '(') || (RAW == '|')) {
6122
649
      ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6123
649
      if (ret == NULL) return(NULL);
6124
649
  }
6125
28.3k
  while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6126
27.6k
      NEXT;
6127
27.6k
      if (elem == NULL) {
6128
648
          ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6129
648
    if (ret == NULL) {
6130
0
        xmlFreeDocElementContent(ctxt->myDoc, cur);
6131
0
                    return(NULL);
6132
0
                }
6133
648
    ret->c1 = cur;
6134
648
    if (cur != NULL)
6135
648
        cur->parent = ret;
6136
648
    cur = ret;
6137
26.9k
      } else {
6138
26.9k
          n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6139
26.9k
    if (n == NULL) {
6140
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6141
0
                    return(NULL);
6142
0
                }
6143
26.9k
    n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6144
26.9k
    if (n->c1 != NULL)
6145
26.9k
        n->c1->parent = n;
6146
26.9k
          cur->c2 = n;
6147
26.9k
    if (n != NULL)
6148
26.9k
        n->parent = cur;
6149
26.9k
    cur = n;
6150
26.9k
      }
6151
27.6k
      SKIP_BLANKS;
6152
27.6k
      elem = xmlParseName(ctxt);
6153
27.6k
      if (elem == NULL) {
6154
336
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6155
336
      "xmlParseElementMixedContentDecl : Name expected\n");
6156
336
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6157
336
    return(NULL);
6158
336
      }
6159
27.3k
      SKIP_BLANKS;
6160
27.3k
      GROW;
6161
27.3k
  }
6162
735
  if ((RAW == ')') && (NXT(1) == '*')) {
6163
107
      if (elem != NULL) {
6164
107
    cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6165
107
                                   XML_ELEMENT_CONTENT_ELEMENT);
6166
107
    if (cur->c2 != NULL)
6167
107
        cur->c2->parent = cur;
6168
107
            }
6169
107
            if (ret != NULL)
6170
107
                ret->ocur = XML_ELEMENT_CONTENT_MULT;
6171
107
      if (ctxt->input->id != inputchk) {
6172
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6173
0
                               "Element content declaration doesn't start and"
6174
0
                               " stop in the same entity\n");
6175
0
      }
6176
107
      SKIP(2);
6177
628
  } else {
6178
628
      xmlFreeDocElementContent(ctxt->myDoc, ret);
6179
628
      xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6180
628
      return(NULL);
6181
628
  }
6182
6183
735
    } else {
6184
0
  xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6185
0
    }
6186
107
    return(ret);
6187
1.85k
}
6188
6189
/**
6190
 * xmlParseElementChildrenContentDeclPriv:
6191
 * @ctxt:  an XML parser context
6192
 * @inputchk:  the input used for the current entity, needed for boundary checks
6193
 * @depth: the level of recursion
6194
 *
6195
 * parse the declaration for a Mixed Element content
6196
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6197
 *
6198
 *
6199
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6200
 *
6201
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6202
 *
6203
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6204
 *
6205
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6206
 *
6207
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6208
 * TODO Parameter-entity replacement text must be properly nested
6209
 *  with parenthesized groups. That is to say, if either of the
6210
 *  opening or closing parentheses in a choice, seq, or Mixed
6211
 *  construct is contained in the replacement text for a parameter
6212
 *  entity, both must be contained in the same replacement text. For
6213
 *  interoperability, if a parameter-entity reference appears in a
6214
 *  choice, seq, or Mixed construct, its replacement text should not
6215
 *  be empty, and neither the first nor last non-blank character of
6216
 *  the replacement text should be a connector (| or ,).
6217
 *
6218
 * Returns the tree of xmlElementContentPtr describing the element
6219
 *          hierarchy.
6220
 */
6221
static xmlElementContentPtr
6222
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6223
26.9k
                                       int depth) {
6224
26.9k
    xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6225
26.9k
    const xmlChar *elem;
6226
26.9k
    xmlChar type = 0;
6227
6228
26.9k
    if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6229
26.9k
        (depth >  2048)) {
6230
4
        xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6231
4
"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6232
4
                          depth);
6233
4
  return(NULL);
6234
4
    }
6235
26.9k
    SKIP_BLANKS;
6236
26.9k
    GROW;
6237
26.9k
    if (RAW == '(') {
6238
13.6k
  int inputid = ctxt->input->id;
6239
6240
        /* Recurse on first child */
6241
13.6k
  NEXT;
6242
13.6k
  SKIP_BLANKS;
6243
13.6k
        cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6244
13.6k
                                                           depth + 1);
6245
13.6k
        if (cur == NULL)
6246
6.31k
            return(NULL);
6247
7.38k
  SKIP_BLANKS;
6248
7.38k
  GROW;
6249
13.2k
    } else {
6250
13.2k
  elem = xmlParseName(ctxt);
6251
13.2k
  if (elem == NULL) {
6252
491
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6253
491
      return(NULL);
6254
491
  }
6255
12.7k
        cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6256
12.7k
  if (cur == NULL) {
6257
0
      xmlErrMemory(ctxt, NULL);
6258
0
      return(NULL);
6259
0
  }
6260
12.7k
  GROW;
6261
12.7k
  if (RAW == '?') {
6262
1.46k
      cur->ocur = XML_ELEMENT_CONTENT_OPT;
6263
1.46k
      NEXT;
6264
11.3k
  } else if (RAW == '*') {
6265
7.96k
      cur->ocur = XML_ELEMENT_CONTENT_MULT;
6266
7.96k
      NEXT;
6267
7.96k
  } else if (RAW == '+') {
6268
267
      cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6269
267
      NEXT;
6270
3.09k
  } else {
6271
3.09k
      cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6272
3.09k
  }
6273
12.7k
  GROW;
6274
12.7k
    }
6275
20.1k
    SKIP_BLANKS;
6276
20.1k
    SHRINK;
6277
34.5k
    while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6278
        /*
6279
   * Each loop we parse one separator and one element.
6280
   */
6281
23.0k
        if (RAW == ',') {
6282
3.52k
      if (type == 0) type = CUR;
6283
6284
      /*
6285
       * Detect "Name | Name , Name" error
6286
       */
6287
2.07k
      else if (type != CUR) {
6288
1
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6289
1
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6290
1
                      type);
6291
1
    if ((last != NULL) && (last != ret))
6292
1
        xmlFreeDocElementContent(ctxt->myDoc, last);
6293
1
    if (ret != NULL)
6294
1
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6295
1
    return(NULL);
6296
1
      }
6297
3.52k
      NEXT;
6298
6299
3.52k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6300
3.52k
      if (op == NULL) {
6301
0
    if ((last != NULL) && (last != ret))
6302
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6303
0
          xmlFreeDocElementContent(ctxt->myDoc, ret);
6304
0
    return(NULL);
6305
0
      }
6306
3.52k
      if (last == NULL) {
6307
1.45k
    op->c1 = ret;
6308
1.45k
    if (ret != NULL)
6309
1.45k
        ret->parent = op;
6310
1.45k
    ret = cur = op;
6311
2.07k
      } else {
6312
2.07k
          cur->c2 = op;
6313
2.07k
    if (op != NULL)
6314
2.07k
        op->parent = cur;
6315
2.07k
    op->c1 = last;
6316
2.07k
    if (last != NULL)
6317
2.07k
        last->parent = op;
6318
2.07k
    cur =op;
6319
2.07k
    last = NULL;
6320
2.07k
      }
6321
19.5k
  } else if (RAW == '|') {
6322
17.7k
      if (type == 0) type = CUR;
6323
6324
      /*
6325
       * Detect "Name , Name | Name" error
6326
       */
6327
6.41k
      else if (type != CUR) {
6328
1
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6329
1
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6330
1
          type);
6331
1
    if ((last != NULL) && (last != ret))
6332
1
        xmlFreeDocElementContent(ctxt->myDoc, last);
6333
1
    if (ret != NULL)
6334
1
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6335
1
    return(NULL);
6336
1
      }
6337
17.7k
      NEXT;
6338
6339
17.7k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6340
17.7k
      if (op == NULL) {
6341
0
    if ((last != NULL) && (last != ret))
6342
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6343
0
    if (ret != NULL)
6344
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6345
0
    return(NULL);
6346
0
      }
6347
17.7k
      if (last == NULL) {
6348
11.3k
    op->c1 = ret;
6349
11.3k
    if (ret != NULL)
6350
11.3k
        ret->parent = op;
6351
11.3k
    ret = cur = op;
6352
11.3k
      } else {
6353
6.41k
          cur->c2 = op;
6354
6.41k
    if (op != NULL)
6355
6.41k
        op->parent = cur;
6356
6.41k
    op->c1 = last;
6357
6.41k
    if (last != NULL)
6358
6.41k
        last->parent = op;
6359
6.41k
    cur =op;
6360
6.41k
    last = NULL;
6361
6.41k
      }
6362
17.7k
  } else {
6363
1.78k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6364
1.78k
      if ((last != NULL) && (last != ret))
6365
1.09k
          xmlFreeDocElementContent(ctxt->myDoc, last);
6366
1.78k
      if (ret != NULL)
6367
1.78k
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6368
1.78k
      return(NULL);
6369
1.78k
  }
6370
21.3k
  GROW;
6371
21.3k
  SKIP_BLANKS;
6372
21.3k
  GROW;
6373
21.3k
  if (RAW == '(') {
6374
10.5k
      int inputid = ctxt->input->id;
6375
      /* Recurse on second child */
6376
10.5k
      NEXT;
6377
10.5k
      SKIP_BLANKS;
6378
10.5k
      last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6379
10.5k
                                                          depth + 1);
6380
10.5k
            if (last == NULL) {
6381
6.80k
    if (ret != NULL)
6382
6.80k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6383
6.80k
    return(NULL);
6384
6.80k
            }
6385
3.73k
      SKIP_BLANKS;
6386
10.7k
  } else {
6387
10.7k
      elem = xmlParseName(ctxt);
6388
10.7k
      if (elem == NULL) {
6389
132
    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6390
132
    if (ret != NULL)
6391
132
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6392
132
    return(NULL);
6393
132
      }
6394
10.6k
      last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6395
10.6k
      if (last == NULL) {
6396
0
    if (ret != NULL)
6397
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6398
0
    return(NULL);
6399
0
      }
6400
10.6k
      if (RAW == '?') {
6401
525
    last->ocur = XML_ELEMENT_CONTENT_OPT;
6402
525
    NEXT;
6403
10.1k
      } else if (RAW == '*') {
6404
2.97k
    last->ocur = XML_ELEMENT_CONTENT_MULT;
6405
2.97k
    NEXT;
6406
7.12k
      } else if (RAW == '+') {
6407
181
    last->ocur = XML_ELEMENT_CONTENT_PLUS;
6408
181
    NEXT;
6409
6.94k
      } else {
6410
6.94k
    last->ocur = XML_ELEMENT_CONTENT_ONCE;
6411
6.94k
      }
6412
10.6k
  }
6413
14.3k
  SKIP_BLANKS;
6414
14.3k
  GROW;
6415
14.3k
    }
6416
11.4k
    if ((cur != NULL) && (last != NULL)) {
6417
4.78k
        cur->c2 = last;
6418
4.78k
  if (last != NULL)
6419
4.78k
      last->parent = cur;
6420
4.78k
    }
6421
11.4k
    if (ctxt->input->id != inputchk) {
6422
0
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6423
0
                       "Element content declaration doesn't start and stop in"
6424
0
                       " the same entity\n");
6425
0
    }
6426
11.4k
    NEXT;
6427
11.4k
    if (RAW == '?') {
6428
1.08k
  if (ret != NULL) {
6429
1.08k
      if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6430
1.08k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6431
563
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6432
524
      else
6433
524
          ret->ocur = XML_ELEMENT_CONTENT_OPT;
6434
1.08k
  }
6435
1.08k
  NEXT;
6436
10.3k
    } else if (RAW == '*') {
6437
1.21k
  if (ret != NULL) {
6438
1.21k
      ret->ocur = XML_ELEMENT_CONTENT_MULT;
6439
1.21k
      cur = ret;
6440
      /*
6441
       * Some normalization:
6442
       * (a | b* | c?)* == (a | b | c)*
6443
       */
6444
6.54k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6445
5.32k
    if ((cur->c1 != NULL) &&
6446
5.32k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6447
5.32k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6448
1.66k
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6449
5.32k
    if ((cur->c2 != NULL) &&
6450
5.32k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6451
5.32k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6452
831
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6453
5.32k
    cur = cur->c2;
6454
5.32k
      }
6455
1.21k
  }
6456
1.21k
  NEXT;
6457
9.14k
    } else if (RAW == '+') {
6458
3.46k
  if (ret != NULL) {
6459
3.46k
      int found = 0;
6460
6461
3.46k
      if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6462
3.46k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6463
1.30k
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6464
2.15k
      else
6465
2.15k
          ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6466
      /*
6467
       * Some normalization:
6468
       * (a | b*)+ == (a | b)*
6469
       * (a | b?)+ == (a | b)*
6470
       */
6471
6.63k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6472
3.17k
    if ((cur->c1 != NULL) &&
6473
3.17k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6474
3.17k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6475
1.90k
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6476
1.90k
        found = 1;
6477
1.90k
    }
6478
3.17k
    if ((cur->c2 != NULL) &&
6479
3.17k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6480
3.17k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6481
1.17k
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6482
1.17k
        found = 1;
6483
1.17k
    }
6484
3.17k
    cur = cur->c2;
6485
3.17k
      }
6486
3.46k
      if (found)
6487
1.76k
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6488
3.46k
  }
6489
3.46k
  NEXT;
6490
3.46k
    }
6491
11.4k
    return(ret);
6492
20.1k
}
6493
6494
/**
6495
 * xmlParseElementChildrenContentDecl:
6496
 * @ctxt:  an XML parser context
6497
 * @inputchk:  the input used for the current entity, needed for boundary checks
6498
 *
6499
 * parse the declaration for a Mixed Element content
6500
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6501
 *
6502
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6503
 *
6504
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6505
 *
6506
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6507
 *
6508
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6509
 *
6510
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6511
 * TODO Parameter-entity replacement text must be properly nested
6512
 *  with parenthesized groups. That is to say, if either of the
6513
 *  opening or closing parentheses in a choice, seq, or Mixed
6514
 *  construct is contained in the replacement text for a parameter
6515
 *  entity, both must be contained in the same replacement text. For
6516
 *  interoperability, if a parameter-entity reference appears in a
6517
 *  choice, seq, or Mixed construct, its replacement text should not
6518
 *  be empty, and neither the first nor last non-blank character of
6519
 *  the replacement text should be a connector (| or ,).
6520
 *
6521
 * Returns the tree of xmlElementContentPtr describing the element
6522
 *          hierarchy.
6523
 */
6524
xmlElementContentPtr
6525
0
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6526
    /* stub left for API/ABI compat */
6527
0
    return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6528
0
}
6529
6530
/**
6531
 * xmlParseElementContentDecl:
6532
 * @ctxt:  an XML parser context
6533
 * @name:  the name of the element being defined.
6534
 * @result:  the Element Content pointer will be stored here if any
6535
 *
6536
 * parse the declaration for an Element content either Mixed or Children,
6537
 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6538
 *
6539
 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6540
 *
6541
 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6542
 */
6543
6544
int
6545
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6546
4.59k
                           xmlElementContentPtr *result) {
6547
6548
4.59k
    xmlElementContentPtr tree = NULL;
6549
4.59k
    int inputid = ctxt->input->id;
6550
4.59k
    int res;
6551
6552
4.59k
    *result = NULL;
6553
6554
4.59k
    if (RAW != '(') {
6555
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6556
0
    "xmlParseElementContentDecl : %s '(' expected\n", name);
6557
0
  return(-1);
6558
0
    }
6559
4.59k
    NEXT;
6560
4.59k
    GROW;
6561
4.59k
    if (ctxt->instate == XML_PARSER_EOF)
6562
0
        return(-1);
6563
4.59k
    SKIP_BLANKS;
6564
4.59k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6565
1.85k
        tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6566
1.85k
  res = XML_ELEMENT_TYPE_MIXED;
6567
2.74k
    } else {
6568
2.74k
        tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6569
2.74k
  res = XML_ELEMENT_TYPE_ELEMENT;
6570
2.74k
    }
6571
4.59k
    SKIP_BLANKS;
6572
4.59k
    *result = tree;
6573
4.59k
    return(res);
6574
4.59k
}
6575
6576
/**
6577
 * xmlParseElementDecl:
6578
 * @ctxt:  an XML parser context
6579
 *
6580
 * parse an Element declaration.
6581
 *
6582
 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6583
 *
6584
 * [ VC: Unique Element Type Declaration ]
6585
 * No element type may be declared more than once
6586
 *
6587
 * Returns the type of the element, or -1 in case of error
6588
 */
6589
int
6590
6.06k
xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6591
6.06k
    const xmlChar *name;
6592
6.06k
    int ret = -1;
6593
6.06k
    xmlElementContentPtr content  = NULL;
6594
6595
    /* GROW; done in the caller */
6596
6.06k
    if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6597
6.01k
  int inputid = ctxt->input->id;
6598
6599
6.01k
  SKIP(9);
6600
6.01k
  if (SKIP_BLANKS == 0) {
6601
174
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6602
174
               "Space required after 'ELEMENT'\n");
6603
174
      return(-1);
6604
174
  }
6605
5.84k
        name = xmlParseName(ctxt);
6606
5.84k
  if (name == NULL) {
6607
218
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6608
218
         "xmlParseElementDecl: no name for Element\n");
6609
218
      return(-1);
6610
218
  }
6611
5.62k
  if (SKIP_BLANKS == 0) {
6612
2.50k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6613
2.50k
         "Space required after the element name\n");
6614
2.50k
  }
6615
5.62k
  if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6616
347
      SKIP(5);
6617
      /*
6618
       * Element must always be empty.
6619
       */
6620
347
      ret = XML_ELEMENT_TYPE_EMPTY;
6621
5.27k
  } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6622
5.27k
             (NXT(2) == 'Y')) {
6623
229
      SKIP(3);
6624
      /*
6625
       * Element is a generic container.
6626
       */
6627
229
      ret = XML_ELEMENT_TYPE_ANY;
6628
5.04k
  } else if (RAW == '(') {
6629
4.59k
      ret = xmlParseElementContentDecl(ctxt, name, &content);
6630
4.59k
  } else {
6631
      /*
6632
       * [ WFC: PEs in Internal Subset ] error handling.
6633
       */
6634
450
      if ((RAW == '%') && (ctxt->external == 0) &&
6635
450
          (ctxt->inputNr == 1)) {
6636
160
    xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6637
160
    "PEReference: forbidden within markup decl in internal subset\n");
6638
290
      } else {
6639
290
    xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6640
290
          "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6641
290
            }
6642
450
      return(-1);
6643
450
  }
6644
6645
5.17k
  SKIP_BLANKS;
6646
6647
5.17k
  if (RAW != '>') {
6648
3.34k
      xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6649
3.34k
      if (content != NULL) {
6650
205
    xmlFreeDocElementContent(ctxt->myDoc, content);
6651
205
      }
6652
3.34k
  } else {
6653
1.83k
      if (inputid != ctxt->input->id) {
6654
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6655
0
                               "Element declaration doesn't start and stop in"
6656
0
                               " the same entity\n");
6657
0
      }
6658
6659
1.83k
      NEXT;
6660
1.83k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6661
1.83k
    (ctxt->sax->elementDecl != NULL)) {
6662
0
    if (content != NULL)
6663
0
        content->parent = NULL;
6664
0
          ctxt->sax->elementDecl(ctxt->userData, name, ret,
6665
0
                           content);
6666
0
    if ((content != NULL) && (content->parent == NULL)) {
6667
        /*
6668
         * this is a trick: if xmlAddElementDecl is called,
6669
         * instead of copying the full tree it is plugged directly
6670
         * if called from the parser. Avoid duplicating the
6671
         * interfaces or change the API/ABI
6672
         */
6673
0
        xmlFreeDocElementContent(ctxt->myDoc, content);
6674
0
    }
6675
1.83k
      } else if (content != NULL) {
6676
1.01k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6677
1.01k
      }
6678
1.83k
  }
6679
5.17k
    }
6680
5.22k
    return(ret);
6681
6.06k
}
6682
6683
/**
6684
 * xmlParseConditionalSections
6685
 * @ctxt:  an XML parser context
6686
 *
6687
 * [61] conditionalSect ::= includeSect | ignoreSect
6688
 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6689
 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6690
 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6691
 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6692
 */
6693
6694
static void
6695
0
xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6696
0
    int *inputIds = NULL;
6697
0
    size_t inputIdsSize = 0;
6698
0
    size_t depth = 0;
6699
6700
0
    while (ctxt->instate != XML_PARSER_EOF) {
6701
0
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6702
0
            int id = ctxt->input->id;
6703
6704
0
            SKIP(3);
6705
0
            SKIP_BLANKS;
6706
6707
0
            if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6708
0
                SKIP(7);
6709
0
                SKIP_BLANKS;
6710
0
                if (RAW != '[') {
6711
0
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6712
0
                    xmlHaltParser(ctxt);
6713
0
                    goto error;
6714
0
                }
6715
0
                if (ctxt->input->id != id) {
6716
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6717
0
                                   "All markup of the conditional section is"
6718
0
                                   " not in the same entity\n");
6719
0
                }
6720
0
                NEXT;
6721
6722
0
                if (inputIdsSize <= depth) {
6723
0
                    int *tmp;
6724
6725
0
                    inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2);
6726
0
                    tmp = (int *) xmlRealloc(inputIds,
6727
0
                            inputIdsSize * sizeof(int));
6728
0
                    if (tmp == NULL) {
6729
0
                        xmlErrMemory(ctxt, NULL);
6730
0
                        goto error;
6731
0
                    }
6732
0
                    inputIds = tmp;
6733
0
                }
6734
0
                inputIds[depth] = id;
6735
0
                depth++;
6736
0
            } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6737
0
                int state;
6738
0
                xmlParserInputState instate;
6739
0
                size_t ignoreDepth = 0;
6740
6741
0
                SKIP(6);
6742
0
                SKIP_BLANKS;
6743
0
                if (RAW != '[') {
6744
0
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6745
0
                    xmlHaltParser(ctxt);
6746
0
                    goto error;
6747
0
                }
6748
0
                if (ctxt->input->id != id) {
6749
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6750
0
                                   "All markup of the conditional section is"
6751
0
                                   " not in the same entity\n");
6752
0
                }
6753
0
                NEXT;
6754
6755
                /*
6756
                 * Parse up to the end of the conditional section but disable
6757
                 * SAX event generating DTD building in the meantime
6758
                 */
6759
0
                state = ctxt->disableSAX;
6760
0
                instate = ctxt->instate;
6761
0
                if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6762
0
                ctxt->instate = XML_PARSER_IGNORE;
6763
6764
0
                while (RAW != 0) {
6765
0
                    if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6766
0
                        SKIP(3);
6767
0
                        ignoreDepth++;
6768
                        /* Check for integer overflow */
6769
0
                        if (ignoreDepth == 0) {
6770
0
                            xmlErrMemory(ctxt, NULL);
6771
0
                            goto error;
6772
0
                        }
6773
0
                    } else if ((RAW == ']') && (NXT(1) == ']') &&
6774
0
                               (NXT(2) == '>')) {
6775
0
                        if (ignoreDepth == 0)
6776
0
                            break;
6777
0
                        SKIP(3);
6778
0
                        ignoreDepth--;
6779
0
                    } else {
6780
0
                        NEXT;
6781
0
                    }
6782
0
                }
6783
6784
0
                ctxt->disableSAX = state;
6785
0
                ctxt->instate = instate;
6786
6787
0
    if (RAW == 0) {
6788
0
        xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6789
0
                    goto error;
6790
0
    }
6791
0
                if (ctxt->input->id != id) {
6792
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6793
0
                                   "All markup of the conditional section is"
6794
0
                                   " not in the same entity\n");
6795
0
                }
6796
0
                SKIP(3);
6797
0
            } else {
6798
0
                xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6799
0
                xmlHaltParser(ctxt);
6800
0
                goto error;
6801
0
            }
6802
0
        } else if ((depth > 0) &&
6803
0
                   (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6804
0
            depth--;
6805
0
            if (ctxt->input->id != inputIds[depth]) {
6806
0
                xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6807
0
                               "All markup of the conditional section is not"
6808
0
                               " in the same entity\n");
6809
0
            }
6810
0
            SKIP(3);
6811
0
        } else {
6812
0
            int id = ctxt->input->id;
6813
0
            unsigned long cons = CUR_CONSUMED;
6814
6815
0
            xmlParseMarkupDecl(ctxt);
6816
6817
0
            if ((id == ctxt->input->id) && (cons == CUR_CONSUMED)) {
6818
0
                xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6819
0
                xmlHaltParser(ctxt);
6820
0
                goto error;
6821
0
            }
6822
0
        }
6823
6824
0
        if (depth == 0)
6825
0
            break;
6826
6827
0
        SKIP_BLANKS;
6828
0
        GROW;
6829
0
    }
6830
6831
0
error:
6832
0
    xmlFree(inputIds);
6833
0
}
6834
6835
/**
6836
 * xmlParseMarkupDecl:
6837
 * @ctxt:  an XML parser context
6838
 *
6839
 * parse Markup declarations
6840
 *
6841
 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6842
 *                     NotationDecl | PI | Comment
6843
 *
6844
 * [ VC: Proper Declaration/PE Nesting ]
6845
 * Parameter-entity replacement text must be properly nested with
6846
 * markup declarations. That is to say, if either the first character
6847
 * or the last character of a markup declaration (markupdecl above) is
6848
 * contained in the replacement text for a parameter-entity reference,
6849
 * both must be contained in the same replacement text.
6850
 *
6851
 * [ WFC: PEs in Internal Subset ]
6852
 * In the internal DTD subset, parameter-entity references can occur
6853
 * only where markup declarations can occur, not within markup declarations.
6854
 * (This does not apply to references that occur in external parameter
6855
 * entities or to the external subset.)
6856
 */
6857
void
6858
313k
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6859
313k
    GROW;
6860
313k
    if (CUR == '<') {
6861
50.3k
        if (NXT(1) == '!') {
6862
48.2k
      switch (NXT(2)) {
6863
32.5k
          case 'E':
6864
32.5k
        if (NXT(3) == 'L')
6865
6.06k
      xmlParseElementDecl(ctxt);
6866
26.5k
        else if (NXT(3) == 'N')
6867
26.5k
      xmlParseEntityDecl(ctxt);
6868
32.5k
        break;
6869
9.03k
          case 'A':
6870
9.03k
        xmlParseAttributeListDecl(ctxt);
6871
9.03k
        break;
6872
3.30k
          case 'N':
6873
3.30k
        xmlParseNotationDecl(ctxt);
6874
3.30k
        break;
6875
3.21k
          case '-':
6876
3.21k
        xmlParseComment(ctxt);
6877
3.21k
        break;
6878
76
    default:
6879
        /* there is an error but it will be detected later */
6880
76
        break;
6881
48.2k
      }
6882
48.2k
  } else if (NXT(1) == '?') {
6883
1.15k
      xmlParsePI(ctxt);
6884
1.15k
  }
6885
50.3k
    }
6886
6887
    /*
6888
     * detect requirement to exit there and act accordingly
6889
     * and avoid having instate overridden later on
6890
     */
6891
313k
    if (ctxt->instate == XML_PARSER_EOF)
6892
898
        return;
6893
6894
313k
    ctxt->instate = XML_PARSER_DTD;
6895
313k
}
6896
6897
/**
6898
 * xmlParseTextDecl:
6899
 * @ctxt:  an XML parser context
6900
 *
6901
 * parse an XML declaration header for external entities
6902
 *
6903
 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6904
 */
6905
6906
void
6907
0
xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6908
0
    xmlChar *version;
6909
0
    const xmlChar *encoding;
6910
0
    int oldstate;
6911
6912
    /*
6913
     * We know that '<?xml' is here.
6914
     */
6915
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6916
0
  SKIP(5);
6917
0
    } else {
6918
0
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6919
0
  return;
6920
0
    }
6921
6922
    /* Avoid expansion of parameter entities when skipping blanks. */
6923
0
    oldstate = ctxt->instate;
6924
0
    ctxt->instate = XML_PARSER_START;
6925
6926
0
    if (SKIP_BLANKS == 0) {
6927
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6928
0
           "Space needed after '<?xml'\n");
6929
0
    }
6930
6931
    /*
6932
     * We may have the VersionInfo here.
6933
     */
6934
0
    version = xmlParseVersionInfo(ctxt);
6935
0
    if (version == NULL)
6936
0
  version = xmlCharStrdup(XML_DEFAULT_VERSION);
6937
0
    else {
6938
0
  if (SKIP_BLANKS == 0) {
6939
0
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6940
0
               "Space needed here\n");
6941
0
  }
6942
0
    }
6943
0
    ctxt->input->version = version;
6944
6945
    /*
6946
     * We must have the encoding declaration
6947
     */
6948
0
    encoding = xmlParseEncodingDecl(ctxt);
6949
0
    if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6950
  /*
6951
   * The XML REC instructs us to stop parsing right here
6952
   */
6953
0
        ctxt->instate = oldstate;
6954
0
        return;
6955
0
    }
6956
0
    if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6957
0
  xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6958
0
           "Missing encoding in text declaration\n");
6959
0
    }
6960
6961
0
    SKIP_BLANKS;
6962
0
    if ((RAW == '?') && (NXT(1) == '>')) {
6963
0
        SKIP(2);
6964
0
    } else if (RAW == '>') {
6965
        /* Deprecated old WD ... */
6966
0
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6967
0
  NEXT;
6968
0
    } else {
6969
0
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6970
0
  MOVETO_ENDTAG(CUR_PTR);
6971
0
  NEXT;
6972
0
    }
6973
6974
0
    ctxt->instate = oldstate;
6975
0
}
6976
6977
/**
6978
 * xmlParseExternalSubset:
6979
 * @ctxt:  an XML parser context
6980
 * @ExternalID: the external identifier
6981
 * @SystemID: the system identifier (or URL)
6982
 *
6983
 * parse Markup declarations from an external subset
6984
 *
6985
 * [30] extSubset ::= textDecl? extSubsetDecl
6986
 *
6987
 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6988
 */
6989
void
6990
xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6991
0
                       const xmlChar *SystemID) {
6992
0
    xmlDetectSAX2(ctxt);
6993
0
    GROW;
6994
6995
0
    if ((ctxt->encoding == NULL) &&
6996
0
        (ctxt->input->end - ctxt->input->cur >= 4)) {
6997
0
        xmlChar start[4];
6998
0
  xmlCharEncoding enc;
6999
7000
0
  start[0] = RAW;
7001
0
  start[1] = NXT(1);
7002
0
  start[2] = NXT(2);
7003
0
  start[3] = NXT(3);
7004
0
  enc = xmlDetectCharEncoding(start, 4);
7005
0
  if (enc != XML_CHAR_ENCODING_NONE)
7006
0
      xmlSwitchEncoding(ctxt, enc);
7007
0
    }
7008
7009
0
    if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7010
0
  xmlParseTextDecl(ctxt);
7011
0
  if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7012
      /*
7013
       * The XML REC instructs us to stop parsing right here
7014
       */
7015
0
      xmlHaltParser(ctxt);
7016
0
      return;
7017
0
  }
7018
0
    }
7019
0
    if (ctxt->myDoc == NULL) {
7020
0
        ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7021
0
  if (ctxt->myDoc == NULL) {
7022
0
      xmlErrMemory(ctxt, "New Doc failed");
7023
0
      return;
7024
0
  }
7025
0
  ctxt->myDoc->properties = XML_DOC_INTERNAL;
7026
0
    }
7027
0
    if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7028
0
        xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7029
7030
0
    ctxt->instate = XML_PARSER_DTD;
7031
0
    ctxt->external = 1;
7032
0
    SKIP_BLANKS;
7033
0
    while (((RAW == '<') && (NXT(1) == '?')) ||
7034
0
           ((RAW == '<') && (NXT(1) == '!')) ||
7035
0
     (RAW == '%')) {
7036
0
  int id = ctxt->input->id;
7037
0
  unsigned long cons = CUR_CONSUMED;
7038
7039
0
  GROW;
7040
0
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7041
0
      xmlParseConditionalSections(ctxt);
7042
0
  } else
7043
0
      xmlParseMarkupDecl(ctxt);
7044
0
        SKIP_BLANKS;
7045
7046
0
  if ((id == ctxt->input->id) && (cons == CUR_CONSUMED)) {
7047
0
      xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7048
0
      break;
7049
0
  }
7050
0
    }
7051
7052
0
    if (RAW != 0) {
7053
0
  xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7054
0
    }
7055
7056
0
}
7057
7058
/**
7059
 * xmlParseReference:
7060
 * @ctxt:  an XML parser context
7061
 *
7062
 * parse and handle entity references in content, depending on the SAX
7063
 * interface, this may end-up in a call to character() if this is a
7064
 * CharRef, a predefined entity, if there is no reference() callback.
7065
 * or if the parser was asked to switch to that mode.
7066
 *
7067
 * [67] Reference ::= EntityRef | CharRef
7068
 */
7069
void
7070
77.7k
xmlParseReference(xmlParserCtxtPtr ctxt) {
7071
77.7k
    xmlEntityPtr ent;
7072
77.7k
    xmlChar *val;
7073
77.7k
    int was_checked;
7074
77.7k
    xmlNodePtr list = NULL;
7075
77.7k
    xmlParserErrors ret = XML_ERR_OK;
7076
7077
7078
77.7k
    if (RAW != '&')
7079
0
        return;
7080
7081
    /*
7082
     * Simple case of a CharRef
7083
     */
7084
77.7k
    if (NXT(1) == '#') {
7085
22.2k
  int i = 0;
7086
22.2k
  xmlChar out[16];
7087
22.2k
  int hex = NXT(2);
7088
22.2k
  int value = xmlParseCharRef(ctxt);
7089
7090
22.2k
  if (value == 0)
7091
13.8k
      return;
7092
8.45k
  if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7093
      /*
7094
       * So we are using non-UTF-8 buffers
7095
       * Check that the char fit on 8bits, if not
7096
       * generate a CharRef.
7097
       */
7098
3.55k
      if (value <= 0xFF) {
7099
1.45k
    out[0] = value;
7100
1.45k
    out[1] = 0;
7101
1.45k
    if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7102
1.45k
        (!ctxt->disableSAX))
7103
0
        ctxt->sax->characters(ctxt->userData, out, 1);
7104
2.09k
      } else {
7105
2.09k
    if ((hex == 'x') || (hex == 'X'))
7106
560
        snprintf((char *)out, sizeof(out), "#x%X", value);
7107
1.53k
    else
7108
1.53k
        snprintf((char *)out, sizeof(out), "#%d", value);
7109
2.09k
    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7110
2.09k
        (!ctxt->disableSAX))
7111
0
        ctxt->sax->reference(ctxt->userData, out);
7112
2.09k
      }
7113
4.90k
  } else {
7114
      /*
7115
       * Just encode the value in UTF-8
7116
       */
7117
4.90k
      COPY_BUF(0 ,out, i, value);
7118
4.90k
      out[i] = 0;
7119
4.90k
      if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7120
4.90k
    (!ctxt->disableSAX))
7121
258
    ctxt->sax->characters(ctxt->userData, out, i);
7122
4.90k
  }
7123
8.45k
  return;
7124
22.2k
    }
7125
7126
    /*
7127
     * We are seeing an entity reference
7128
     */
7129
55.4k
    ent = xmlParseEntityRef(ctxt);
7130
55.4k
    if (ent == NULL) return;
7131
7.09k
    if (!ctxt->wellFormed)
7132
6.49k
  return;
7133
596
    was_checked = ent->checked;
7134
7135
    /* special case of predefined entities */
7136
596
    if ((ent->name == NULL) ||
7137
596
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7138
596
  val = ent->content;
7139
596
  if (val == NULL) return;
7140
  /*
7141
   * inline the entity.
7142
   */
7143
596
  if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7144
596
      (!ctxt->disableSAX))
7145
596
      ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7146
596
  return;
7147
596
    }
7148
7149
    /*
7150
     * The first reference to the entity trigger a parsing phase
7151
     * where the ent->children is filled with the result from
7152
     * the parsing.
7153
     * Note: external parsed entities will not be loaded, it is not
7154
     * required for a non-validating parser, unless the parsing option
7155
     * of validating, or substituting entities were given. Doing so is
7156
     * far more secure as the parser will only process data coming from
7157
     * the document entity by default.
7158
     */
7159
0
    if (((ent->checked == 0) ||
7160
0
         ((ent->children == NULL) && (ctxt->options & XML_PARSE_NOENT))) &&
7161
0
        ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7162
0
         (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7163
0
  unsigned long oldnbent = ctxt->nbentities, diff;
7164
7165
  /*
7166
   * This is a bit hackish but this seems the best
7167
   * way to make sure both SAX and DOM entity support
7168
   * behaves okay.
7169
   */
7170
0
  void *user_data;
7171
0
  if (ctxt->userData == ctxt)
7172
0
      user_data = NULL;
7173
0
  else
7174
0
      user_data = ctxt->userData;
7175
7176
  /*
7177
   * Check that this entity is well formed
7178
   * 4.3.2: An internal general parsed entity is well-formed
7179
   * if its replacement text matches the production labeled
7180
   * content.
7181
   */
7182
0
  if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7183
0
      ctxt->depth++;
7184
0
      ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7185
0
                                                user_data, &list);
7186
0
      ctxt->depth--;
7187
7188
0
  } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7189
0
      ctxt->depth++;
7190
0
      ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7191
0
                                     user_data, ctxt->depth, ent->URI,
7192
0
             ent->ExternalID, &list);
7193
0
      ctxt->depth--;
7194
0
  } else {
7195
0
      ret = XML_ERR_ENTITY_PE_INTERNAL;
7196
0
      xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7197
0
       "invalid entity type found\n", NULL);
7198
0
  }
7199
7200
  /*
7201
   * Store the number of entities needing parsing for this entity
7202
   * content and do checkings
7203
   */
7204
0
        diff = ctxt->nbentities - oldnbent + 1;
7205
0
        if (diff > INT_MAX / 2)
7206
0
            diff = INT_MAX / 2;
7207
0
        ent->checked = diff * 2;
7208
0
  if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7209
0
      ent->checked |= 1;
7210
0
  if (ret == XML_ERR_ENTITY_LOOP) {
7211
0
      xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7212
0
            xmlHaltParser(ctxt);
7213
0
      xmlFreeNodeList(list);
7214
0
      return;
7215
0
  }
7216
0
  if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
7217
0
      xmlFreeNodeList(list);
7218
0
      return;
7219
0
  }
7220
7221
0
  if ((ret == XML_ERR_OK) && (list != NULL)) {
7222
0
      if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7223
0
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
7224
0
    (ent->children == NULL)) {
7225
0
    ent->children = list;
7226
                /*
7227
                 * Prune it directly in the generated document
7228
                 * except for single text nodes.
7229
                 */
7230
0
                if ((ctxt->replaceEntities == 0) ||
7231
0
                    (ctxt->parseMode == XML_PARSE_READER) ||
7232
0
                    ((list->type == XML_TEXT_NODE) &&
7233
0
                     (list->next == NULL))) {
7234
0
                    ent->owner = 1;
7235
0
                    while (list != NULL) {
7236
0
                        list->parent = (xmlNodePtr) ent;
7237
0
                        xmlSetTreeDoc(list, ent->doc);
7238
0
                        if (list->next == NULL)
7239
0
                            ent->last = list;
7240
0
                        list = list->next;
7241
0
                    }
7242
0
                    list = NULL;
7243
0
                } else {
7244
0
                    ent->owner = 0;
7245
0
                    while (list != NULL) {
7246
0
                        list->parent = (xmlNodePtr) ctxt->node;
7247
0
                        list->doc = ctxt->myDoc;
7248
0
                        if (list->next == NULL)
7249
0
                            ent->last = list;
7250
0
                        list = list->next;
7251
0
                    }
7252
0
                    list = ent->children;
7253
#ifdef LIBXML_LEGACY_ENABLED
7254
                    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7255
                        xmlAddEntityReference(ent, list, NULL);
7256
#endif /* LIBXML_LEGACY_ENABLED */
7257
0
                }
7258
0
      } else {
7259
0
    xmlFreeNodeList(list);
7260
0
    list = NULL;
7261
0
      }
7262
0
  } else if ((ret != XML_ERR_OK) &&
7263
0
       (ret != XML_WAR_UNDECLARED_ENTITY)) {
7264
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7265
0
         "Entity '%s' failed to parse\n", ent->name);
7266
0
            if (ent->content != NULL)
7267
0
                ent->content[0] = 0;
7268
0
      xmlParserEntityCheck(ctxt, 0, ent, 0);
7269
0
  } else if (list != NULL) {
7270
0
      xmlFreeNodeList(list);
7271
0
      list = NULL;
7272
0
  }
7273
0
  if (ent->checked == 0)
7274
0
      ent->checked = 2;
7275
7276
        /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7277
0
        was_checked = 0;
7278
0
    } else if (ent->checked != 1) {
7279
0
  ctxt->nbentities += ent->checked / 2;
7280
0
    }
7281
7282
    /*
7283
     * Now that the entity content has been gathered
7284
     * provide it to the application, this can take different forms based
7285
     * on the parsing modes.
7286
     */
7287
0
    if (ent->children == NULL) {
7288
  /*
7289
   * Probably running in SAX mode and the callbacks don't
7290
   * build the entity content. So unless we already went
7291
   * though parsing for first checking go though the entity
7292
   * content to generate callbacks associated to the entity
7293
   */
7294
0
  if (was_checked != 0) {
7295
0
      void *user_data;
7296
      /*
7297
       * This is a bit hackish but this seems the best
7298
       * way to make sure both SAX and DOM entity support
7299
       * behaves okay.
7300
       */
7301
0
      if (ctxt->userData == ctxt)
7302
0
    user_data = NULL;
7303
0
      else
7304
0
    user_data = ctxt->userData;
7305
7306
0
      if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7307
0
    ctxt->depth++;
7308
0
    ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7309
0
           ent->content, user_data, NULL);
7310
0
    ctxt->depth--;
7311
0
      } else if (ent->etype ==
7312
0
           XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7313
0
    ctxt->depth++;
7314
0
    ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7315
0
         ctxt->sax, user_data, ctxt->depth,
7316
0
         ent->URI, ent->ExternalID, NULL);
7317
0
    ctxt->depth--;
7318
0
      } else {
7319
0
    ret = XML_ERR_ENTITY_PE_INTERNAL;
7320
0
    xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7321
0
           "invalid entity type found\n", NULL);
7322
0
      }
7323
0
      if (ret == XML_ERR_ENTITY_LOOP) {
7324
0
    xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7325
0
    return;
7326
0
      }
7327
0
  }
7328
0
  if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7329
0
      (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7330
      /*
7331
       * Entity reference callback comes second, it's somewhat
7332
       * superfluous but a compatibility to historical behaviour
7333
       */
7334
0
      ctxt->sax->reference(ctxt->userData, ent->name);
7335
0
  }
7336
0
  return;
7337
0
    }
7338
7339
    /*
7340
     * If we didn't get any children for the entity being built
7341
     */
7342
0
    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7343
0
  (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7344
  /*
7345
   * Create a node.
7346
   */
7347
0
  ctxt->sax->reference(ctxt->userData, ent->name);
7348
0
  return;
7349
0
    }
7350
7351
0
    if ((ctxt->replaceEntities) || (ent->children == NULL))  {
7352
  /*
7353
   * There is a problem on the handling of _private for entities
7354
   * (bug 155816): Should we copy the content of the field from
7355
   * the entity (possibly overwriting some value set by the user
7356
   * when a copy is created), should we leave it alone, or should
7357
   * we try to take care of different situations?  The problem
7358
   * is exacerbated by the usage of this field by the xmlReader.
7359
   * To fix this bug, we look at _private on the created node
7360
   * and, if it's NULL, we copy in whatever was in the entity.
7361
   * If it's not NULL we leave it alone.  This is somewhat of a
7362
   * hack - maybe we should have further tests to determine
7363
   * what to do.
7364
   */
7365
0
  if ((ctxt->node != NULL) && (ent->children != NULL)) {
7366
      /*
7367
       * Seems we are generating the DOM content, do
7368
       * a simple tree copy for all references except the first
7369
       * In the first occurrence list contains the replacement.
7370
       */
7371
0
      if (((list == NULL) && (ent->owner == 0)) ||
7372
0
    (ctxt->parseMode == XML_PARSE_READER)) {
7373
0
    xmlNodePtr nw = NULL, cur, firstChild = NULL;
7374
7375
    /*
7376
     * We are copying here, make sure there is no abuse
7377
     */
7378
0
    ctxt->sizeentcopy += ent->length + 5;
7379
0
    if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7380
0
        return;
7381
7382
    /*
7383
     * when operating on a reader, the entities definitions
7384
     * are always owning the entities subtree.
7385
    if (ctxt->parseMode == XML_PARSE_READER)
7386
        ent->owner = 1;
7387
     */
7388
7389
0
    cur = ent->children;
7390
0
    while (cur != NULL) {
7391
0
        nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7392
0
        if (nw != NULL) {
7393
0
      if (nw->_private == NULL)
7394
0
          nw->_private = cur->_private;
7395
0
      if (firstChild == NULL){
7396
0
          firstChild = nw;
7397
0
      }
7398
0
      nw = xmlAddChild(ctxt->node, nw);
7399
0
        }
7400
0
        if (cur == ent->last) {
7401
      /*
7402
       * needed to detect some strange empty
7403
       * node cases in the reader tests
7404
       */
7405
0
      if ((ctxt->parseMode == XML_PARSE_READER) &&
7406
0
          (nw != NULL) &&
7407
0
          (nw->type == XML_ELEMENT_NODE) &&
7408
0
          (nw->children == NULL))
7409
0
          nw->extra = 1;
7410
7411
0
      break;
7412
0
        }
7413
0
        cur = cur->next;
7414
0
    }
7415
#ifdef LIBXML_LEGACY_ENABLED
7416
    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7417
      xmlAddEntityReference(ent, firstChild, nw);
7418
#endif /* LIBXML_LEGACY_ENABLED */
7419
0
      } else if ((list == NULL) || (ctxt->inputNr > 0)) {
7420
0
    xmlNodePtr nw = NULL, cur, next, last,
7421
0
         firstChild = NULL;
7422
7423
    /*
7424
     * We are copying here, make sure there is no abuse
7425
     */
7426
0
    ctxt->sizeentcopy += ent->length + 5;
7427
0
    if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7428
0
        return;
7429
7430
    /*
7431
     * Copy the entity child list and make it the new
7432
     * entity child list. The goal is to make sure any
7433
     * ID or REF referenced will be the one from the
7434
     * document content and not the entity copy.
7435
     */
7436
0
    cur = ent->children;
7437
0
    ent->children = NULL;
7438
0
    last = ent->last;
7439
0
    ent->last = NULL;
7440
0
    while (cur != NULL) {
7441
0
        next = cur->next;
7442
0
        cur->next = NULL;
7443
0
        cur->parent = NULL;
7444
0
        nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7445
0
        if (nw != NULL) {
7446
0
      if (nw->_private == NULL)
7447
0
          nw->_private = cur->_private;
7448
0
      if (firstChild == NULL){
7449
0
          firstChild = cur;
7450
0
      }
7451
0
      xmlAddChild((xmlNodePtr) ent, nw);
7452
0
      xmlAddChild(ctxt->node, cur);
7453
0
        }
7454
0
        if (cur == last)
7455
0
      break;
7456
0
        cur = next;
7457
0
    }
7458
0
    if (ent->owner == 0)
7459
0
        ent->owner = 1;
7460
#ifdef LIBXML_LEGACY_ENABLED
7461
    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7462
      xmlAddEntityReference(ent, firstChild, nw);
7463
#endif /* LIBXML_LEGACY_ENABLED */
7464
0
      } else {
7465
0
    const xmlChar *nbktext;
7466
7467
    /*
7468
     * the name change is to avoid coalescing of the
7469
     * node with a possible previous text one which
7470
     * would make ent->children a dangling pointer
7471
     */
7472
0
    nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7473
0
          -1);
7474
0
    if (ent->children->type == XML_TEXT_NODE)
7475
0
        ent->children->name = nbktext;
7476
0
    if ((ent->last != ent->children) &&
7477
0
        (ent->last->type == XML_TEXT_NODE))
7478
0
        ent->last->name = nbktext;
7479
0
    xmlAddChildList(ctxt->node, ent->children);
7480
0
      }
7481
7482
      /*
7483
       * This is to avoid a nasty side effect, see
7484
       * characters() in SAX.c
7485
       */
7486
0
      ctxt->nodemem = 0;
7487
0
      ctxt->nodelen = 0;
7488
0
      return;
7489
0
  }
7490
0
    }
7491
0
}
7492
7493
/**
7494
 * xmlParseEntityRef:
7495
 * @ctxt:  an XML parser context
7496
 *
7497
 * parse ENTITY references declarations
7498
 *
7499
 * [68] EntityRef ::= '&' Name ';'
7500
 *
7501
 * [ WFC: Entity Declared ]
7502
 * In a document without any DTD, a document with only an internal DTD
7503
 * subset which contains no parameter entity references, or a document
7504
 * with "standalone='yes'", the Name given in the entity reference
7505
 * must match that in an entity declaration, except that well-formed
7506
 * documents need not declare any of the following entities: amp, lt,
7507
 * gt, apos, quot.  The declaration of a parameter entity must precede
7508
 * any reference to it.  Similarly, the declaration of a general entity
7509
 * must precede any reference to it which appears in a default value in an
7510
 * attribute-list declaration. Note that if entities are declared in the
7511
 * external subset or in external parameter entities, a non-validating
7512
 * processor is not obligated to read and process their declarations;
7513
 * for such documents, the rule that an entity must be declared is a
7514
 * well-formedness constraint only if standalone='yes'.
7515
 *
7516
 * [ WFC: Parsed Entity ]
7517
 * An entity reference must not contain the name of an unparsed entity
7518
 *
7519
 * Returns the xmlEntityPtr if found, or NULL otherwise.
7520
 */
7521
xmlEntityPtr
7522
186k
xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7523
186k
    const xmlChar *name;
7524
186k
    xmlEntityPtr ent = NULL;
7525
7526
186k
    GROW;
7527
186k
    if (ctxt->instate == XML_PARSER_EOF)
7528
0
        return(NULL);
7529
7530
186k
    if (RAW != '&')
7531
0
        return(NULL);
7532
186k
    NEXT;
7533
186k
    name = xmlParseName(ctxt);
7534
186k
    if (name == NULL) {
7535
130k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7536
130k
           "xmlParseEntityRef: no name\n");
7537
130k
        return(NULL);
7538
130k
    }
7539
55.6k
    if (RAW != ';') {
7540
13.2k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7541
13.2k
  return(NULL);
7542
13.2k
    }
7543
42.3k
    NEXT;
7544
7545
    /*
7546
     * Predefined entities override any extra definition
7547
     */
7548
42.3k
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7549
42.3k
        ent = xmlGetPredefinedEntity(name);
7550
42.3k
        if (ent != NULL)
7551
21.1k
            return(ent);
7552
42.3k
    }
7553
7554
    /*
7555
     * Increase the number of entity references parsed
7556
     */
7557
21.2k
    ctxt->nbentities++;
7558
7559
    /*
7560
     * Ask first SAX for entity resolution, otherwise try the
7561
     * entities which may have stored in the parser context.
7562
     */
7563
21.2k
    if (ctxt->sax != NULL) {
7564
21.2k
  if (ctxt->sax->getEntity != NULL)
7565
0
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7566
21.2k
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7567
21.2k
      (ctxt->options & XML_PARSE_OLDSAX))
7568
0
      ent = xmlGetPredefinedEntity(name);
7569
21.2k
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7570
21.2k
      (ctxt->userData==ctxt)) {
7571
0
      ent = xmlSAX2GetEntity(ctxt, name);
7572
0
  }
7573
21.2k
    }
7574
21.2k
    if (ctxt->instate == XML_PARSER_EOF)
7575
0
  return(NULL);
7576
    /*
7577
     * [ WFC: Entity Declared ]
7578
     * In a document without any DTD, a document with only an
7579
     * internal DTD subset which contains no parameter entity
7580
     * references, or a document with "standalone='yes'", the
7581
     * Name given in the entity reference must match that in an
7582
     * entity declaration, except that well-formed documents
7583
     * need not declare any of the following entities: amp, lt,
7584
     * gt, apos, quot.
7585
     * The declaration of a parameter entity must precede any
7586
     * reference to it.
7587
     * Similarly, the declaration of a general entity must
7588
     * precede any reference to it which appears in a default
7589
     * value in an attribute-list declaration. Note that if
7590
     * entities are declared in the external subset or in
7591
     * external parameter entities, a non-validating processor
7592
     * is not obligated to read and process their declarations;
7593
     * for such documents, the rule that an entity must be
7594
     * declared is a well-formedness constraint only if
7595
     * standalone='yes'.
7596
     */
7597
21.2k
    if (ent == NULL) {
7598
21.2k
  if ((ctxt->standalone == 1) ||
7599
21.2k
      ((ctxt->hasExternalSubset == 0) &&
7600
21.0k
       (ctxt->hasPErefs == 0))) {
7601
19.8k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7602
19.8k
         "Entity '%s' not defined\n", name);
7603
19.8k
  } else {
7604
1.44k
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7605
1.44k
         "Entity '%s' not defined\n", name);
7606
1.44k
      if ((ctxt->inSubset == 0) &&
7607
1.44k
    (ctxt->sax != NULL) &&
7608
1.44k
    (ctxt->sax->reference != NULL)) {
7609
0
    ctxt->sax->reference(ctxt->userData, name);
7610
0
      }
7611
1.44k
  }
7612
21.2k
  xmlParserEntityCheck(ctxt, 0, ent, 0);
7613
21.2k
  ctxt->valid = 0;
7614
21.2k
    }
7615
7616
    /*
7617
     * [ WFC: Parsed Entity ]
7618
     * An entity reference must not contain the name of an
7619
     * unparsed entity
7620
     */
7621
0
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7622
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7623
0
     "Entity reference to unparsed entity %s\n", name);
7624
0
    }
7625
7626
    /*
7627
     * [ WFC: No External Entity References ]
7628
     * Attribute values cannot contain direct or indirect
7629
     * entity references to external entities.
7630
     */
7631
0
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7632
0
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7633
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7634
0
       "Attribute references external entity '%s'\n", name);
7635
0
    }
7636
    /*
7637
     * [ WFC: No < in Attribute Values ]
7638
     * The replacement text of any entity referred to directly or
7639
     * indirectly in an attribute value (other than "&lt;") must
7640
     * not contain a <.
7641
     */
7642
0
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7643
0
       (ent != NULL) && 
7644
0
       (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7645
0
  if (((ent->checked & 1) || (ent->checked == 0)) &&
7646
0
       (ent->content != NULL) && (xmlStrchr(ent->content, '<'))) {
7647
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7648
0
  "'<' in entity '%s' is not allowed in attributes values\n", name);
7649
0
        }
7650
0
    }
7651
7652
    /*
7653
     * Internal check, no parameter entities here ...
7654
     */
7655
0
    else {
7656
0
  switch (ent->etype) {
7657
0
      case XML_INTERNAL_PARAMETER_ENTITY:
7658
0
      case XML_EXTERNAL_PARAMETER_ENTITY:
7659
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7660
0
       "Attempt to reference the parameter entity '%s'\n",
7661
0
            name);
7662
0
      break;
7663
0
      default:
7664
0
      break;
7665
0
  }
7666
0
    }
7667
7668
    /*
7669
     * [ WFC: No Recursion ]
7670
     * A parsed entity must not contain a recursive reference
7671
     * to itself, either directly or indirectly.
7672
     * Done somewhere else
7673
     */
7674
21.2k
    return(ent);
7675
21.2k
}
7676
7677
/**
7678
 * xmlParseStringEntityRef:
7679
 * @ctxt:  an XML parser context
7680
 * @str:  a pointer to an index in the string
7681
 *
7682
 * parse ENTITY references declarations, but this version parses it from
7683
 * a string value.
7684
 *
7685
 * [68] EntityRef ::= '&' Name ';'
7686
 *
7687
 * [ WFC: Entity Declared ]
7688
 * In a document without any DTD, a document with only an internal DTD
7689
 * subset which contains no parameter entity references, or a document
7690
 * with "standalone='yes'", the Name given in the entity reference
7691
 * must match that in an entity declaration, except that well-formed
7692
 * documents need not declare any of the following entities: amp, lt,
7693
 * gt, apos, quot.  The declaration of a parameter entity must precede
7694
 * any reference to it.  Similarly, the declaration of a general entity
7695
 * must precede any reference to it which appears in a default value in an
7696
 * attribute-list declaration. Note that if entities are declared in the
7697
 * external subset or in external parameter entities, a non-validating
7698
 * processor is not obligated to read and process their declarations;
7699
 * for such documents, the rule that an entity must be declared is a
7700
 * well-formedness constraint only if standalone='yes'.
7701
 *
7702
 * [ WFC: Parsed Entity ]
7703
 * An entity reference must not contain the name of an unparsed entity
7704
 *
7705
 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7706
 * is updated to the current location in the string.
7707
 */
7708
static xmlEntityPtr
7709
0
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7710
0
    xmlChar *name;
7711
0
    const xmlChar *ptr;
7712
0
    xmlChar cur;
7713
0
    xmlEntityPtr ent = NULL;
7714
7715
0
    if ((str == NULL) || (*str == NULL))
7716
0
        return(NULL);
7717
0
    ptr = *str;
7718
0
    cur = *ptr;
7719
0
    if (cur != '&')
7720
0
  return(NULL);
7721
7722
0
    ptr++;
7723
0
    name = xmlParseStringName(ctxt, &ptr);
7724
0
    if (name == NULL) {
7725
0
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7726
0
           "xmlParseStringEntityRef: no name\n");
7727
0
  *str = ptr;
7728
0
  return(NULL);
7729
0
    }
7730
0
    if (*ptr != ';') {
7731
0
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7732
0
        xmlFree(name);
7733
0
  *str = ptr;
7734
0
  return(NULL);
7735
0
    }
7736
0
    ptr++;
7737
7738
7739
    /*
7740
     * Predefined entities override any extra definition
7741
     */
7742
0
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7743
0
        ent = xmlGetPredefinedEntity(name);
7744
0
        if (ent != NULL) {
7745
0
            xmlFree(name);
7746
0
            *str = ptr;
7747
0
            return(ent);
7748
0
        }
7749
0
    }
7750
7751
    /*
7752
     * Increase the number of entity references parsed
7753
     */
7754
0
    ctxt->nbentities++;
7755
7756
    /*
7757
     * Ask first SAX for entity resolution, otherwise try the
7758
     * entities which may have stored in the parser context.
7759
     */
7760
0
    if (ctxt->sax != NULL) {
7761
0
  if (ctxt->sax->getEntity != NULL)
7762
0
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7763
0
  if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7764
0
      ent = xmlGetPredefinedEntity(name);
7765
0
  if ((ent == NULL) && (ctxt->userData==ctxt)) {
7766
0
      ent = xmlSAX2GetEntity(ctxt, name);
7767
0
  }
7768
0
    }
7769
0
    if (ctxt->instate == XML_PARSER_EOF) {
7770
0
  xmlFree(name);
7771
0
  return(NULL);
7772
0
    }
7773
7774
    /*
7775
     * [ WFC: Entity Declared ]
7776
     * In a document without any DTD, a document with only an
7777
     * internal DTD subset which contains no parameter entity
7778
     * references, or a document with "standalone='yes'", the
7779
     * Name given in the entity reference must match that in an
7780
     * entity declaration, except that well-formed documents
7781
     * need not declare any of the following entities: amp, lt,
7782
     * gt, apos, quot.
7783
     * The declaration of a parameter entity must precede any
7784
     * reference to it.
7785
     * Similarly, the declaration of a general entity must
7786
     * precede any reference to it which appears in a default
7787
     * value in an attribute-list declaration. Note that if
7788
     * entities are declared in the external subset or in
7789
     * external parameter entities, a non-validating processor
7790
     * is not obligated to read and process their declarations;
7791
     * for such documents, the rule that an entity must be
7792
     * declared is a well-formedness constraint only if
7793
     * standalone='yes'.
7794
     */
7795
0
    if (ent == NULL) {
7796
0
  if ((ctxt->standalone == 1) ||
7797
0
      ((ctxt->hasExternalSubset == 0) &&
7798
0
       (ctxt->hasPErefs == 0))) {
7799
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7800
0
         "Entity '%s' not defined\n", name);
7801
0
  } else {
7802
0
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7803
0
        "Entity '%s' not defined\n",
7804
0
        name);
7805
0
  }
7806
0
  xmlParserEntityCheck(ctxt, 0, ent, 0);
7807
  /* TODO ? check regressions ctxt->valid = 0; */
7808
0
    }
7809
7810
    /*
7811
     * [ WFC: Parsed Entity ]
7812
     * An entity reference must not contain the name of an
7813
     * unparsed entity
7814
     */
7815
0
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7816
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7817
0
     "Entity reference to unparsed entity %s\n", name);
7818
0
    }
7819
7820
    /*
7821
     * [ WFC: No External Entity References ]
7822
     * Attribute values cannot contain direct or indirect
7823
     * entity references to external entities.
7824
     */
7825
0
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7826
0
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7827
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7828
0
   "Attribute references external entity '%s'\n", name);
7829
0
    }
7830
    /*
7831
     * [ WFC: No < in Attribute Values ]
7832
     * The replacement text of any entity referred to directly or
7833
     * indirectly in an attribute value (other than "&lt;") must
7834
     * not contain a <.
7835
     */
7836
0
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7837
0
       (ent != NULL) && (ent->content != NULL) &&
7838
0
       (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7839
0
       (xmlStrchr(ent->content, '<'))) {
7840
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7841
0
     "'<' in entity '%s' is not allowed in attributes values\n",
7842
0
        name);
7843
0
    }
7844
7845
    /*
7846
     * Internal check, no parameter entities here ...
7847
     */
7848
0
    else {
7849
0
  switch (ent->etype) {
7850
0
      case XML_INTERNAL_PARAMETER_ENTITY:
7851
0
      case XML_EXTERNAL_PARAMETER_ENTITY:
7852
0
    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7853
0
       "Attempt to reference the parameter entity '%s'\n",
7854
0
          name);
7855
0
      break;
7856
0
      default:
7857
0
      break;
7858
0
  }
7859
0
    }
7860
7861
    /*
7862
     * [ WFC: No Recursion ]
7863
     * A parsed entity must not contain a recursive reference
7864
     * to itself, either directly or indirectly.
7865
     * Done somewhere else
7866
     */
7867
7868
0
    xmlFree(name);
7869
0
    *str = ptr;
7870
0
    return(ent);
7871
0
}
7872
7873
/**
7874
 * xmlParsePEReference:
7875
 * @ctxt:  an XML parser context
7876
 *
7877
 * parse PEReference declarations
7878
 * The entity content is handled directly by pushing it's content as
7879
 * a new input stream.
7880
 *
7881
 * [69] PEReference ::= '%' Name ';'
7882
 *
7883
 * [ WFC: No Recursion ]
7884
 * A parsed entity must not contain a recursive
7885
 * reference to itself, either directly or indirectly.
7886
 *
7887
 * [ WFC: Entity Declared ]
7888
 * In a document without any DTD, a document with only an internal DTD
7889
 * subset which contains no parameter entity references, or a document
7890
 * with "standalone='yes'", ...  ... The declaration of a parameter
7891
 * entity must precede any reference to it...
7892
 *
7893
 * [ VC: Entity Declared ]
7894
 * In a document with an external subset or external parameter entities
7895
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7896
 * must precede any reference to it...
7897
 *
7898
 * [ WFC: In DTD ]
7899
 * Parameter-entity references may only appear in the DTD.
7900
 * NOTE: misleading but this is handled.
7901
 */
7902
void
7903
xmlParsePEReference(xmlParserCtxtPtr ctxt)
7904
313k
{
7905
313k
    const xmlChar *name;
7906
313k
    xmlEntityPtr entity = NULL;
7907
313k
    xmlParserInputPtr input;
7908
7909
313k
    if (RAW != '%')
7910
51.4k
        return;
7911
262k
    NEXT;
7912
262k
    name = xmlParseName(ctxt);
7913
262k
    if (name == NULL) {
7914
42.1k
  xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
7915
42.1k
  return;
7916
42.1k
    }
7917
220k
    if (xmlParserDebugEntities)
7918
0
  xmlGenericError(xmlGenericErrorContext,
7919
0
    "PEReference: %s\n", name);
7920
220k
    if (RAW != ';') {
7921
1.71k
  xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
7922
1.71k
        return;
7923
1.71k
    }
7924
7925
218k
    NEXT;
7926
7927
    /*
7928
     * Increase the number of entity references parsed
7929
     */
7930
218k
    ctxt->nbentities++;
7931
7932
    /*
7933
     * Request the entity from SAX
7934
     */
7935
218k
    if ((ctxt->sax != NULL) &&
7936
218k
  (ctxt->sax->getParameterEntity != NULL))
7937
0
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7938
218k
    if (ctxt->instate == XML_PARSER_EOF)
7939
0
  return;
7940
218k
    if (entity == NULL) {
7941
  /*
7942
   * [ WFC: Entity Declared ]
7943
   * In a document without any DTD, a document with only an
7944
   * internal DTD subset which contains no parameter entity
7945
   * references, or a document with "standalone='yes'", ...
7946
   * ... The declaration of a parameter entity must precede
7947
   * any reference to it...
7948
   */
7949
218k
  if ((ctxt->standalone == 1) ||
7950
218k
      ((ctxt->hasExternalSubset == 0) &&
7951
216k
       (ctxt->hasPErefs == 0))) {
7952
216k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7953
216k
            "PEReference: %%%s; not found\n",
7954
216k
            name);
7955
216k
  } else {
7956
      /*
7957
       * [ VC: Entity Declared ]
7958
       * In a document with an external subset or external
7959
       * parameter entities with "standalone='no'", ...
7960
       * ... The declaration of a parameter entity must
7961
       * precede any reference to it...
7962
       */
7963
1.94k
            if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
7964
0
                xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
7965
0
                                 "PEReference: %%%s; not found\n",
7966
0
                                 name, NULL);
7967
0
            } else
7968
1.94k
                xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7969
1.94k
                              "PEReference: %%%s; not found\n",
7970
1.94k
                              name, NULL);
7971
1.94k
            ctxt->valid = 0;
7972
1.94k
  }
7973
218k
  xmlParserEntityCheck(ctxt, 0, NULL, 0);
7974
218k
    } else {
7975
  /*
7976
   * Internal checking in case the entity quest barfed
7977
   */
7978
0
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7979
0
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7980
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7981
0
      "Internal: %%%s; is not a parameter entity\n",
7982
0
        name, NULL);
7983
0
  } else {
7984
0
            xmlChar start[4];
7985
0
            xmlCharEncoding enc;
7986
7987
0
      if (xmlParserEntityCheck(ctxt, 0, entity, 0))
7988
0
          return;
7989
7990
0
      if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7991
0
          ((ctxt->options & XML_PARSE_NOENT) == 0) &&
7992
0
    ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
7993
0
    ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
7994
0
    ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
7995
0
    (ctxt->replaceEntities == 0) &&
7996
0
    (ctxt->validate == 0))
7997
0
    return;
7998
7999
0
      input = xmlNewEntityInputStream(ctxt, entity);
8000
0
      if (xmlPushInput(ctxt, input) < 0) {
8001
0
                xmlFreeInputStream(input);
8002
0
    return;
8003
0
            }
8004
8005
0
      if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
8006
                /*
8007
                 * Get the 4 first bytes and decode the charset
8008
                 * if enc != XML_CHAR_ENCODING_NONE
8009
                 * plug some encoding conversion routines.
8010
                 * Note that, since we may have some non-UTF8
8011
                 * encoding (like UTF16, bug 135229), the 'length'
8012
                 * is not known, but we can calculate based upon
8013
                 * the amount of data in the buffer.
8014
                 */
8015
0
                GROW
8016
0
                if (ctxt->instate == XML_PARSER_EOF)
8017
0
                    return;
8018
0
                if ((ctxt->input->end - ctxt->input->cur)>=4) {
8019
0
                    start[0] = RAW;
8020
0
                    start[1] = NXT(1);
8021
0
                    start[2] = NXT(2);
8022
0
                    start[3] = NXT(3);
8023
0
                    enc = xmlDetectCharEncoding(start, 4);
8024
0
                    if (enc != XML_CHAR_ENCODING_NONE) {
8025
0
                        xmlSwitchEncoding(ctxt, enc);
8026
0
                    }
8027
0
                }
8028
8029
0
                if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8030
0
                    (IS_BLANK_CH(NXT(5)))) {
8031
0
                    xmlParseTextDecl(ctxt);
8032
0
                }
8033
0
            }
8034
0
  }
8035
0
    }
8036
218k
    ctxt->hasPErefs = 1;
8037
218k
}
8038
8039
/**
8040
 * xmlLoadEntityContent:
8041
 * @ctxt:  an XML parser context
8042
 * @entity: an unloaded system entity
8043
 *
8044
 * Load the original content of the given system entity from the
8045
 * ExternalID/SystemID given. This is to be used for Included in Literal
8046
 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8047
 *
8048
 * Returns 0 in case of success and -1 in case of failure
8049
 */
8050
static int
8051
0
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8052
0
    xmlParserInputPtr input;
8053
0
    xmlBufferPtr buf;
8054
0
    int l, c;
8055
0
    int count = 0;
8056
8057
0
    if ((ctxt == NULL) || (entity == NULL) ||
8058
0
        ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8059
0
   (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8060
0
  (entity->content != NULL)) {
8061
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8062
0
              "xmlLoadEntityContent parameter error");
8063
0
        return(-1);
8064
0
    }
8065
8066
0
    if (xmlParserDebugEntities)
8067
0
  xmlGenericError(xmlGenericErrorContext,
8068
0
    "Reading %s entity content input\n", entity->name);
8069
8070
0
    buf = xmlBufferCreate();
8071
0
    if (buf == NULL) {
8072
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8073
0
              "xmlLoadEntityContent parameter error");
8074
0
        return(-1);
8075
0
    }
8076
0
    xmlBufferSetAllocationScheme(buf, XML_BUFFER_ALLOC_DOUBLEIT);
8077
8078
0
    input = xmlNewEntityInputStream(ctxt, entity);
8079
0
    if (input == NULL) {
8080
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8081
0
              "xmlLoadEntityContent input error");
8082
0
  xmlBufferFree(buf);
8083
0
        return(-1);
8084
0
    }
8085
8086
    /*
8087
     * Push the entity as the current input, read char by char
8088
     * saving to the buffer until the end of the entity or an error
8089
     */
8090
0
    if (xmlPushInput(ctxt, input) < 0) {
8091
0
        xmlBufferFree(buf);
8092
0
  return(-1);
8093
0
    }
8094
8095
0
    GROW;
8096
0
    c = CUR_CHAR(l);
8097
0
    while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8098
0
           (IS_CHAR(c))) {
8099
0
        xmlBufferAdd(buf, ctxt->input->cur, l);
8100
0
  if (count++ > XML_PARSER_CHUNK_SIZE) {
8101
0
      count = 0;
8102
0
      GROW;
8103
0
            if (ctxt->instate == XML_PARSER_EOF) {
8104
0
                xmlBufferFree(buf);
8105
0
                return(-1);
8106
0
            }
8107
0
  }
8108
0
  NEXTL(l);
8109
0
  c = CUR_CHAR(l);
8110
0
  if (c == 0) {
8111
0
      count = 0;
8112
0
      GROW;
8113
0
            if (ctxt->instate == XML_PARSER_EOF) {
8114
0
                xmlBufferFree(buf);
8115
0
                return(-1);
8116
0
            }
8117
0
      c = CUR_CHAR(l);
8118
0
  }
8119
0
    }
8120
8121
0
    if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8122
0
        xmlPopInput(ctxt);
8123
0
    } else if (!IS_CHAR(c)) {
8124
0
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8125
0
                          "xmlLoadEntityContent: invalid char value %d\n",
8126
0
                    c);
8127
0
  xmlBufferFree(buf);
8128
0
  return(-1);
8129
0
    }
8130
0
    entity->content = buf->content;
8131
0
    buf->content = NULL;
8132
0
    xmlBufferFree(buf);
8133
8134
0
    return(0);
8135
0
}
8136
8137
/**
8138
 * xmlParseStringPEReference:
8139
 * @ctxt:  an XML parser context
8140
 * @str:  a pointer to an index in the string
8141
 *
8142
 * parse PEReference declarations
8143
 *
8144
 * [69] PEReference ::= '%' Name ';'
8145
 *
8146
 * [ WFC: No Recursion ]
8147
 * A parsed entity must not contain a recursive
8148
 * reference to itself, either directly or indirectly.
8149
 *
8150
 * [ WFC: Entity Declared ]
8151
 * In a document without any DTD, a document with only an internal DTD
8152
 * subset which contains no parameter entity references, or a document
8153
 * with "standalone='yes'", ...  ... The declaration of a parameter
8154
 * entity must precede any reference to it...
8155
 *
8156
 * [ VC: Entity Declared ]
8157
 * In a document with an external subset or external parameter entities
8158
 * with "standalone='no'", ...  ... The declaration of a parameter entity
8159
 * must precede any reference to it...
8160
 *
8161
 * [ WFC: In DTD ]
8162
 * Parameter-entity references may only appear in the DTD.
8163
 * NOTE: misleading but this is handled.
8164
 *
8165
 * Returns the string of the entity content.
8166
 *         str is updated to the current value of the index
8167
 */
8168
static xmlEntityPtr
8169
0
xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8170
0
    const xmlChar *ptr;
8171
0
    xmlChar cur;
8172
0
    xmlChar *name;
8173
0
    xmlEntityPtr entity = NULL;
8174
8175
0
    if ((str == NULL) || (*str == NULL)) return(NULL);
8176
0
    ptr = *str;
8177
0
    cur = *ptr;
8178
0
    if (cur != '%')
8179
0
        return(NULL);
8180
0
    ptr++;
8181
0
    name = xmlParseStringName(ctxt, &ptr);
8182
0
    if (name == NULL) {
8183
0
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8184
0
           "xmlParseStringPEReference: no name\n");
8185
0
  *str = ptr;
8186
0
  return(NULL);
8187
0
    }
8188
0
    cur = *ptr;
8189
0
    if (cur != ';') {
8190
0
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8191
0
  xmlFree(name);
8192
0
  *str = ptr;
8193
0
  return(NULL);
8194
0
    }
8195
0
    ptr++;
8196
8197
    /*
8198
     * Increase the number of entity references parsed
8199
     */
8200
0
    ctxt->nbentities++;
8201
8202
    /*
8203
     * Request the entity from SAX
8204
     */
8205
0
    if ((ctxt->sax != NULL) &&
8206
0
  (ctxt->sax->getParameterEntity != NULL))
8207
0
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8208
0
    if (ctxt->instate == XML_PARSER_EOF) {
8209
0
  xmlFree(name);
8210
0
  *str = ptr;
8211
0
  return(NULL);
8212
0
    }
8213
0
    if (entity == NULL) {
8214
  /*
8215
   * [ WFC: Entity Declared ]
8216
   * In a document without any DTD, a document with only an
8217
   * internal DTD subset which contains no parameter entity
8218
   * references, or a document with "standalone='yes'", ...
8219
   * ... The declaration of a parameter entity must precede
8220
   * any reference to it...
8221
   */
8222
0
  if ((ctxt->standalone == 1) ||
8223
0
      ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8224
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8225
0
     "PEReference: %%%s; not found\n", name);
8226
0
  } else {
8227
      /*
8228
       * [ VC: Entity Declared ]
8229
       * In a document with an external subset or external
8230
       * parameter entities with "standalone='no'", ...
8231
       * ... The declaration of a parameter entity must
8232
       * precede any reference to it...
8233
       */
8234
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8235
0
        "PEReference: %%%s; not found\n",
8236
0
        name, NULL);
8237
0
      ctxt->valid = 0;
8238
0
  }
8239
0
  xmlParserEntityCheck(ctxt, 0, NULL, 0);
8240
0
    } else {
8241
  /*
8242
   * Internal checking in case the entity quest barfed
8243
   */
8244
0
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8245
0
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8246
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8247
0
        "%%%s; is not a parameter entity\n",
8248
0
        name, NULL);
8249
0
  }
8250
0
    }
8251
0
    ctxt->hasPErefs = 1;
8252
0
    xmlFree(name);
8253
0
    *str = ptr;
8254
0
    return(entity);
8255
0
}
8256
8257
/**
8258
 * xmlParseDocTypeDecl:
8259
 * @ctxt:  an XML parser context
8260
 *
8261
 * parse a DOCTYPE declaration
8262
 *
8263
 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8264
 *                      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8265
 *
8266
 * [ VC: Root Element Type ]
8267
 * The Name in the document type declaration must match the element
8268
 * type of the root element.
8269
 */
8270
8271
void
8272
4.45k
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8273
4.45k
    const xmlChar *name = NULL;
8274
4.45k
    xmlChar *ExternalID = NULL;
8275
4.45k
    xmlChar *URI = NULL;
8276
8277
    /*
8278
     * We know that '<!DOCTYPE' has been detected.
8279
     */
8280
4.45k
    SKIP(9);
8281
8282
4.45k
    SKIP_BLANKS;
8283
8284
    /*
8285
     * Parse the DOCTYPE name.
8286
     */
8287
4.45k
    name = xmlParseName(ctxt);
8288
4.45k
    if (name == NULL) {
8289
2.20k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8290
2.20k
           "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8291
2.20k
    }
8292
4.45k
    ctxt->intSubName = name;
8293
8294
4.45k
    SKIP_BLANKS;
8295
8296
    /*
8297
     * Check for SystemID and ExternalID
8298
     */
8299
4.45k
    URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8300
8301
4.45k
    if ((URI != NULL) || (ExternalID != NULL)) {
8302
189
        ctxt->hasExternalSubset = 1;
8303
189
    }
8304
4.45k
    ctxt->extSubURI = URI;
8305
4.45k
    ctxt->extSubSystem = ExternalID;
8306
8307
4.45k
    SKIP_BLANKS;
8308
8309
    /*
8310
     * Create and update the internal subset.
8311
     */
8312
4.45k
    if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8313
4.45k
  (!ctxt->disableSAX))
8314
0
  ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8315
4.45k
    if (ctxt->instate == XML_PARSER_EOF)
8316
0
  return;
8317
8318
    /*
8319
     * Is there any internal subset declarations ?
8320
     * they are handled separately in xmlParseInternalSubset()
8321
     */
8322
4.45k
    if (RAW == '[')
8323
3.14k
  return;
8324
8325
    /*
8326
     * We should be at the end of the DOCTYPE declaration.
8327
     */
8328
1.31k
    if (RAW != '>') {
8329
1.29k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8330
1.29k
    }
8331
1.31k
    NEXT;
8332
1.31k
}
8333
8334
/**
8335
 * xmlParseInternalSubset:
8336
 * @ctxt:  an XML parser context
8337
 *
8338
 * parse the internal subset declaration
8339
 *
8340
 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8341
 */
8342
8343
static void
8344
4.16k
xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8345
    /*
8346
     * Is there any DTD definition ?
8347
     */
8348
4.16k
    if (RAW == '[') {
8349
4.16k
        int baseInputNr = ctxt->inputNr;
8350
4.16k
        ctxt->instate = XML_PARSER_DTD;
8351
4.16k
        NEXT;
8352
  /*
8353
   * Parse the succession of Markup declarations and
8354
   * PEReferences.
8355
   * Subsequence (markupdecl | PEReference | S)*
8356
   */
8357
315k
  while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) &&
8358
315k
               (ctxt->instate != XML_PARSER_EOF)) {
8359
313k
      int id = ctxt->input->id;
8360
313k
      unsigned long cons = CUR_CONSUMED;
8361
8362
313k
      SKIP_BLANKS;
8363
313k
      xmlParseMarkupDecl(ctxt);
8364
313k
      xmlParsePEReference(ctxt);
8365
8366
            /*
8367
             * Conditional sections are allowed from external entities included
8368
             * by PE References in the internal subset.
8369
             */
8370
313k
            if ((ctxt->inputNr > 1) && (ctxt->input->filename != NULL) &&
8371
313k
                (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8372
0
                xmlParseConditionalSections(ctxt);
8373
0
            }
8374
8375
313k
      if ((id == ctxt->input->id) && (cons == CUR_CONSUMED)) {
8376
2.90k
    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8377
2.90k
       "xmlParseInternalSubset: error detected in Markup declaration\n");
8378
2.90k
                if (ctxt->inputNr > baseInputNr)
8379
0
                    xmlPopInput(ctxt);
8380
2.90k
                else
8381
2.90k
        break;
8382
2.90k
      }
8383
313k
  }
8384
4.16k
  if (RAW == ']') {
8385
359
      NEXT;
8386
359
      SKIP_BLANKS;
8387
359
  }
8388
4.16k
    }
8389
8390
    /*
8391
     * We should be at the end of the DOCTYPE declaration.
8392
     */
8393
4.16k
    if (RAW != '>') {
8394
3.79k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8395
3.79k
  return;
8396
3.79k
    }
8397
369
    NEXT;
8398
369
}
8399
8400
#ifdef LIBXML_SAX1_ENABLED
8401
/**
8402
 * xmlParseAttribute:
8403
 * @ctxt:  an XML parser context
8404
 * @value:  a xmlChar ** used to store the value of the attribute
8405
 *
8406
 * parse an attribute
8407
 *
8408
 * [41] Attribute ::= Name Eq AttValue
8409
 *
8410
 * [ WFC: No External Entity References ]
8411
 * Attribute values cannot contain direct or indirect entity references
8412
 * to external entities.
8413
 *
8414
 * [ WFC: No < in Attribute Values ]
8415
 * The replacement text of any entity referred to directly or indirectly in
8416
 * an attribute value (other than "&lt;") must not contain a <.
8417
 *
8418
 * [ VC: Attribute Value Type ]
8419
 * The attribute must have been declared; the value must be of the type
8420
 * declared for it.
8421
 *
8422
 * [25] Eq ::= S? '=' S?
8423
 *
8424
 * With namespace:
8425
 *
8426
 * [NS 11] Attribute ::= QName Eq AttValue
8427
 *
8428
 * Also the case QName == xmlns:??? is handled independently as a namespace
8429
 * definition.
8430
 *
8431
 * Returns the attribute name, and the value in *value.
8432
 */
8433
8434
const xmlChar *
8435
0
xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8436
0
    const xmlChar *name;
8437
0
    xmlChar *val;
8438
8439
0
    *value = NULL;
8440
0
    GROW;
8441
0
    name = xmlParseName(ctxt);
8442
0
    if (name == NULL) {
8443
0
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8444
0
                 "error parsing attribute name\n");
8445
0
        return(NULL);
8446
0
    }
8447
8448
    /*
8449
     * read the value
8450
     */
8451
0
    SKIP_BLANKS;
8452
0
    if (RAW == '=') {
8453
0
        NEXT;
8454
0
  SKIP_BLANKS;
8455
0
  val = xmlParseAttValue(ctxt);
8456
0
  ctxt->instate = XML_PARSER_CONTENT;
8457
0
    } else {
8458
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8459
0
         "Specification mandates value for attribute %s\n", name);
8460
0
  return(NULL);
8461
0
    }
8462
8463
    /*
8464
     * Check that xml:lang conforms to the specification
8465
     * No more registered as an error, just generate a warning now
8466
     * since this was deprecated in XML second edition
8467
     */
8468
0
    if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8469
0
  if (!xmlCheckLanguageID(val)) {
8470
0
      xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8471
0
              "Malformed value for xml:lang : %s\n",
8472
0
        val, NULL);
8473
0
  }
8474
0
    }
8475
8476
    /*
8477
     * Check that xml:space conforms to the specification
8478
     */
8479
0
    if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8480
0
  if (xmlStrEqual(val, BAD_CAST "default"))
8481
0
      *(ctxt->space) = 0;
8482
0
  else if (xmlStrEqual(val, BAD_CAST "preserve"))
8483
0
      *(ctxt->space) = 1;
8484
0
  else {
8485
0
    xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8486
0
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8487
0
                                 val, NULL);
8488
0
  }
8489
0
    }
8490
8491
0
    *value = val;
8492
0
    return(name);
8493
0
}
8494
8495
/**
8496
 * xmlParseStartTag:
8497
 * @ctxt:  an XML parser context
8498
 *
8499
 * parse a start of tag either for rule element or
8500
 * EmptyElement. In both case we don't parse the tag closing chars.
8501
 *
8502
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8503
 *
8504
 * [ WFC: Unique Att Spec ]
8505
 * No attribute name may appear more than once in the same start-tag or
8506
 * empty-element tag.
8507
 *
8508
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8509
 *
8510
 * [ WFC: Unique Att Spec ]
8511
 * No attribute name may appear more than once in the same start-tag or
8512
 * empty-element tag.
8513
 *
8514
 * With namespace:
8515
 *
8516
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8517
 *
8518
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8519
 *
8520
 * Returns the element name parsed
8521
 */
8522
8523
const xmlChar *
8524
0
xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8525
0
    const xmlChar *name;
8526
0
    const xmlChar *attname;
8527
0
    xmlChar *attvalue;
8528
0
    const xmlChar **atts = ctxt->atts;
8529
0
    int nbatts = 0;
8530
0
    int maxatts = ctxt->maxatts;
8531
0
    int i;
8532
8533
0
    if (RAW != '<') return(NULL);
8534
0
    NEXT1;
8535
8536
0
    name = xmlParseName(ctxt);
8537
0
    if (name == NULL) {
8538
0
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8539
0
       "xmlParseStartTag: invalid element name\n");
8540
0
        return(NULL);
8541
0
    }
8542
8543
    /*
8544
     * Now parse the attributes, it ends up with the ending
8545
     *
8546
     * (S Attribute)* S?
8547
     */
8548
0
    SKIP_BLANKS;
8549
0
    GROW;
8550
8551
0
    while (((RAW != '>') &&
8552
0
     ((RAW != '/') || (NXT(1) != '>')) &&
8553
0
     (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8554
0
        int id = ctxt->input->id;
8555
0
  unsigned long cons = CUR_CONSUMED;
8556
8557
0
  attname = xmlParseAttribute(ctxt, &attvalue);
8558
0
        if ((attname != NULL) && (attvalue != NULL)) {
8559
      /*
8560
       * [ WFC: Unique Att Spec ]
8561
       * No attribute name may appear more than once in the same
8562
       * start-tag or empty-element tag.
8563
       */
8564
0
      for (i = 0; i < nbatts;i += 2) {
8565
0
          if (xmlStrEqual(atts[i], attname)) {
8566
0
        xmlErrAttributeDup(ctxt, NULL, attname);
8567
0
        xmlFree(attvalue);
8568
0
        goto failed;
8569
0
    }
8570
0
      }
8571
      /*
8572
       * Add the pair to atts
8573
       */
8574
0
      if (atts == NULL) {
8575
0
          maxatts = 22; /* allow for 10 attrs by default */
8576
0
          atts = (const xmlChar **)
8577
0
           xmlMalloc(maxatts * sizeof(xmlChar *));
8578
0
    if (atts == NULL) {
8579
0
        xmlErrMemory(ctxt, NULL);
8580
0
        if (attvalue != NULL)
8581
0
      xmlFree(attvalue);
8582
0
        goto failed;
8583
0
    }
8584
0
    ctxt->atts = atts;
8585
0
    ctxt->maxatts = maxatts;
8586
0
      } else if (nbatts + 4 > maxatts) {
8587
0
          const xmlChar **n;
8588
8589
0
          maxatts *= 2;
8590
0
          n = (const xmlChar **) xmlRealloc((void *) atts,
8591
0
               maxatts * sizeof(const xmlChar *));
8592
0
    if (n == NULL) {
8593
0
        xmlErrMemory(ctxt, NULL);
8594
0
        if (attvalue != NULL)
8595
0
      xmlFree(attvalue);
8596
0
        goto failed;
8597
0
    }
8598
0
    atts = n;
8599
0
    ctxt->atts = atts;
8600
0
    ctxt->maxatts = maxatts;
8601
0
      }
8602
0
      atts[nbatts++] = attname;
8603
0
      atts[nbatts++] = attvalue;
8604
0
      atts[nbatts] = NULL;
8605
0
      atts[nbatts + 1] = NULL;
8606
0
  } else {
8607
0
      if (attvalue != NULL)
8608
0
    xmlFree(attvalue);
8609
0
  }
8610
8611
0
failed:
8612
8613
0
  GROW
8614
0
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8615
0
      break;
8616
0
  if (SKIP_BLANKS == 0) {
8617
0
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8618
0
         "attributes construct error\n");
8619
0
  }
8620
0
        if ((cons == CUR_CONSUMED) && (id == ctxt->input->id) &&
8621
0
            (attname == NULL) && (attvalue == NULL)) {
8622
0
      xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8623
0
         "xmlParseStartTag: problem parsing attributes\n");
8624
0
      break;
8625
0
  }
8626
0
  SHRINK;
8627
0
        GROW;
8628
0
    }
8629
8630
    /*
8631
     * SAX: Start of Element !
8632
     */
8633
0
    if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8634
0
  (!ctxt->disableSAX)) {
8635
0
  if (nbatts > 0)
8636
0
      ctxt->sax->startElement(ctxt->userData, name, atts);
8637
0
  else
8638
0
      ctxt->sax->startElement(ctxt->userData, name, NULL);
8639
0
    }
8640
8641
0
    if (atts != NULL) {
8642
        /* Free only the content strings */
8643
0
        for (i = 1;i < nbatts;i+=2)
8644
0
      if (atts[i] != NULL)
8645
0
         xmlFree((xmlChar *) atts[i]);
8646
0
    }
8647
0
    return(name);
8648
0
}
8649
8650
/**
8651
 * xmlParseEndTag1:
8652
 * @ctxt:  an XML parser context
8653
 * @line:  line of the start tag
8654
 * @nsNr:  number of namespaces on the start tag
8655
 *
8656
 * parse an end of tag
8657
 *
8658
 * [42] ETag ::= '</' Name S? '>'
8659
 *
8660
 * With namespace
8661
 *
8662
 * [NS 9] ETag ::= '</' QName S? '>'
8663
 */
8664
8665
static void
8666
0
xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8667
0
    const xmlChar *name;
8668
8669
0
    GROW;
8670
0
    if ((RAW != '<') || (NXT(1) != '/')) {
8671
0
  xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8672
0
           "xmlParseEndTag: '</' not found\n");
8673
0
  return;
8674
0
    }
8675
0
    SKIP(2);
8676
8677
0
    name = xmlParseNameAndCompare(ctxt,ctxt->name);
8678
8679
    /*
8680
     * We should definitely be at the ending "S? '>'" part
8681
     */
8682
0
    GROW;
8683
0
    SKIP_BLANKS;
8684
0
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8685
0
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8686
0
    } else
8687
0
  NEXT1;
8688
8689
    /*
8690
     * [ WFC: Element Type Match ]
8691
     * The Name in an element's end-tag must match the element type in the
8692
     * start-tag.
8693
     *
8694
     */
8695
0
    if (name != (xmlChar*)1) {
8696
0
        if (name == NULL) name = BAD_CAST "unparsable";
8697
0
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8698
0
         "Opening and ending tag mismatch: %s line %d and %s\n",
8699
0
                    ctxt->name, line, name);
8700
0
    }
8701
8702
    /*
8703
     * SAX: End of Tag
8704
     */
8705
0
    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8706
0
  (!ctxt->disableSAX))
8707
0
        ctxt->sax->endElement(ctxt->userData, ctxt->name);
8708
8709
0
    namePop(ctxt);
8710
0
    spacePop(ctxt);
8711
0
    return;
8712
0
}
8713
8714
/**
8715
 * xmlParseEndTag:
8716
 * @ctxt:  an XML parser context
8717
 *
8718
 * parse an end of tag
8719
 *
8720
 * [42] ETag ::= '</' Name S? '>'
8721
 *
8722
 * With namespace
8723
 *
8724
 * [NS 9] ETag ::= '</' QName S? '>'
8725
 */
8726
8727
void
8728
0
xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8729
0
    xmlParseEndTag1(ctxt, 0);
8730
0
}
8731
#endif /* LIBXML_SAX1_ENABLED */
8732
8733
/************************************************************************
8734
 *                  *
8735
 *          SAX 2 specific operations       *
8736
 *                  *
8737
 ************************************************************************/
8738
8739
/*
8740
 * xmlGetNamespace:
8741
 * @ctxt:  an XML parser context
8742
 * @prefix:  the prefix to lookup
8743
 *
8744
 * Lookup the namespace name for the @prefix (which ca be NULL)
8745
 * The prefix must come from the @ctxt->dict dictionary
8746
 *
8747
 * Returns the namespace name or NULL if not bound
8748
 */
8749
static const xmlChar *
8750
248k
xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8751
248k
    int i;
8752
8753
248k
    if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8754
2.02M
    for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8755
1.89M
        if (ctxt->nsTab[i] == prefix) {
8756
117k
      if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8757
12.6k
          return(NULL);
8758
104k
      return(ctxt->nsTab[i + 1]);
8759
117k
  }
8760
128k
    return(NULL);
8761
245k
}
8762
8763
/**
8764
 * xmlParseQName:
8765
 * @ctxt:  an XML parser context
8766
 * @prefix:  pointer to store the prefix part
8767
 *
8768
 * parse an XML Namespace QName
8769
 *
8770
 * [6]  QName  ::= (Prefix ':')? LocalPart
8771
 * [7]  Prefix  ::= NCName
8772
 * [8]  LocalPart  ::= NCName
8773
 *
8774
 * Returns the Name parsed or NULL
8775
 */
8776
8777
static const xmlChar *
8778
771k
xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8779
771k
    const xmlChar *l, *p;
8780
8781
771k
    GROW;
8782
8783
771k
    l = xmlParseNCName(ctxt);
8784
771k
    if (l == NULL) {
8785
107k
        if (CUR == ':') {
8786
12.9k
      l = xmlParseName(ctxt);
8787
12.9k
      if (l != NULL) {
8788
12.4k
          xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8789
12.4k
             "Failed to parse QName '%s'\n", l, NULL, NULL);
8790
12.4k
    *prefix = NULL;
8791
12.4k
    return(l);
8792
12.4k
      }
8793
12.9k
  }
8794
95.1k
        return(NULL);
8795
107k
    }
8796
664k
    if (CUR == ':') {
8797
83.6k
        NEXT;
8798
83.6k
  p = l;
8799
83.6k
  l = xmlParseNCName(ctxt);
8800
83.6k
  if (l == NULL) {
8801
18.3k
      xmlChar *tmp;
8802
8803
18.3k
            if (ctxt->instate == XML_PARSER_EOF)
8804
2
                return(NULL);
8805
18.3k
            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8806
18.3k
               "Failed to parse QName '%s:'\n", p, NULL, NULL);
8807
18.3k
      l = xmlParseNmtoken(ctxt);
8808
18.3k
      if (l == NULL) {
8809
14.5k
                if (ctxt->instate == XML_PARSER_EOF)
8810
0
                    return(NULL);
8811
14.5k
    tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8812
14.5k
            } else {
8813
3.75k
    tmp = xmlBuildQName(l, p, NULL, 0);
8814
3.75k
    xmlFree((char *)l);
8815
3.75k
      }
8816
18.3k
      p = xmlDictLookup(ctxt->dict, tmp, -1);
8817
18.3k
      if (tmp != NULL) xmlFree(tmp);
8818
18.3k
      *prefix = NULL;
8819
18.3k
      return(p);
8820
18.3k
  }
8821
65.2k
  if (CUR == ':') {
8822
4.34k
      xmlChar *tmp;
8823
8824
4.34k
            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8825
4.34k
               "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8826
4.34k
      NEXT;
8827
4.34k
      tmp = (xmlChar *) xmlParseName(ctxt);
8828
4.34k
      if (tmp != NULL) {
8829
2.69k
          tmp = xmlBuildQName(tmp, l, NULL, 0);
8830
2.69k
    l = xmlDictLookup(ctxt->dict, tmp, -1);
8831
2.69k
    if (tmp != NULL) xmlFree(tmp);
8832
2.69k
    *prefix = p;
8833
2.69k
    return(l);
8834
2.69k
      }
8835
1.64k
            if (ctxt->instate == XML_PARSER_EOF)
8836
0
                return(NULL);
8837
1.64k
      tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8838
1.64k
      l = xmlDictLookup(ctxt->dict, tmp, -1);
8839
1.64k
      if (tmp != NULL) xmlFree(tmp);
8840
1.64k
      *prefix = p;
8841
1.64k
      return(l);
8842
1.64k
  }
8843
60.9k
  *prefix = p;
8844
60.9k
    } else
8845
580k
        *prefix = NULL;
8846
641k
    return(l);
8847
664k
}
8848
8849
/**
8850
 * xmlParseQNameAndCompare:
8851
 * @ctxt:  an XML parser context
8852
 * @name:  the localname
8853
 * @prefix:  the prefix, if any.
8854
 *
8855
 * parse an XML name and compares for match
8856
 * (specialized for endtag parsing)
8857
 *
8858
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8859
 * and the name for mismatch
8860
 */
8861
8862
static const xmlChar *
8863
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8864
7.16k
                        xmlChar const *prefix) {
8865
7.16k
    const xmlChar *cmp;
8866
7.16k
    const xmlChar *in;
8867
7.16k
    const xmlChar *ret;
8868
7.16k
    const xmlChar *prefix2;
8869
8870
7.16k
    if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8871
8872
7.16k
    GROW;
8873
7.16k
    in = ctxt->input->cur;
8874
8875
7.16k
    cmp = prefix;
8876
15.1k
    while (*in != 0 && *in == *cmp) {
8877
7.95k
  ++in;
8878
7.95k
  ++cmp;
8879
7.95k
    }
8880
7.16k
    if ((*cmp == 0) && (*in == ':')) {
8881
5.03k
        in++;
8882
5.03k
  cmp = name;
8883
9.90k
  while (*in != 0 && *in == *cmp) {
8884
4.87k
      ++in;
8885
4.87k
      ++cmp;
8886
4.87k
  }
8887
5.03k
  if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8888
      /* success */
8889
3.33k
            ctxt->input->col += in - ctxt->input->cur;
8890
3.33k
      ctxt->input->cur = in;
8891
3.33k
      return((const xmlChar*) 1);
8892
3.33k
  }
8893
5.03k
    }
8894
    /*
8895
     * all strings coms from the dictionary, equality can be done directly
8896
     */
8897
3.83k
    ret = xmlParseQName (ctxt, &prefix2);
8898
3.83k
    if ((ret == name) && (prefix == prefix2))
8899
962
  return((const xmlChar*) 1);
8900
2.87k
    return ret;
8901
3.83k
}
8902
8903
/**
8904
 * xmlParseAttValueInternal:
8905
 * @ctxt:  an XML parser context
8906
 * @len:  attribute len result
8907
 * @alloc:  whether the attribute was reallocated as a new string
8908
 * @normalize:  if 1 then further non-CDATA normalization must be done
8909
 *
8910
 * parse a value for an attribute.
8911
 * NOTE: if no normalization is needed, the routine will return pointers
8912
 *       directly from the data buffer.
8913
 *
8914
 * 3.3.3 Attribute-Value Normalization:
8915
 * Before the value of an attribute is passed to the application or
8916
 * checked for validity, the XML processor must normalize it as follows:
8917
 * - a character reference is processed by appending the referenced
8918
 *   character to the attribute value
8919
 * - an entity reference is processed by recursively processing the
8920
 *   replacement text of the entity
8921
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8922
 *   appending #x20 to the normalized value, except that only a single
8923
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
8924
 *   parsed entity or the literal entity value of an internal parsed entity
8925
 * - other characters are processed by appending them to the normalized value
8926
 * If the declared value is not CDATA, then the XML processor must further
8927
 * process the normalized attribute value by discarding any leading and
8928
 * trailing space (#x20) characters, and by replacing sequences of space
8929
 * (#x20) characters by a single space (#x20) character.
8930
 * All attributes for which no declaration has been read should be treated
8931
 * by a non-validating parser as if declared CDATA.
8932
 *
8933
 * Returns the AttValue parsed or NULL. The value has to be freed by the
8934
 *     caller if it was copied, this can be detected by val[*len] == 0.
8935
 */
8936
8937
#define GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) \
8938
18
    const xmlChar *oldbase = ctxt->input->base;\
8939
18
    GROW;\
8940
18
    if (ctxt->instate == XML_PARSER_EOF)\
8941
18
        return(NULL);\
8942
18
    if (oldbase != ctxt->input->base) {\
8943
0
        ptrdiff_t delta = ctxt->input->base - oldbase;\
8944
0
        start = start + delta;\
8945
0
        in = in + delta;\
8946
0
    }\
8947
18
    end = ctxt->input->end;
8948
8949
static xmlChar *
8950
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8951
                         int normalize)
8952
470k
{
8953
470k
    xmlChar limit = 0;
8954
470k
    const xmlChar *in = NULL, *start, *end, *last;
8955
470k
    xmlChar *ret = NULL;
8956
470k
    int line, col;
8957
8958
470k
    GROW;
8959
470k
    in = (xmlChar *) CUR_PTR;
8960
470k
    line = ctxt->input->line;
8961
470k
    col = ctxt->input->col;
8962
470k
    if (*in != '"' && *in != '\'') {
8963
5.04k
        xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
8964
5.04k
        return (NULL);
8965
5.04k
    }
8966
465k
    ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
8967
8968
    /*
8969
     * try to handle in this routine the most common case where no
8970
     * allocation of a new string is required and where content is
8971
     * pure ASCII.
8972
     */
8973
465k
    limit = *in++;
8974
465k
    col++;
8975
465k
    end = ctxt->input->end;
8976
465k
    start = in;
8977
465k
    if (in >= end) {
8978
2
        GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
8979
2
    }
8980
465k
    if (normalize) {
8981
        /*
8982
   * Skip any leading spaces
8983
   */
8984
10.1k
  while ((in < end) && (*in != limit) &&
8985
10.1k
         ((*in == 0x20) || (*in == 0x9) ||
8986
8.66k
          (*in == 0xA) || (*in == 0xD))) {
8987
3.53k
      if (*in == 0xA) {
8988
254
          line++; col = 1;
8989
3.28k
      } else {
8990
3.28k
          col++;
8991
3.28k
      }
8992
3.53k
      in++;
8993
3.53k
      start = in;
8994
3.53k
      if (in >= end) {
8995
3
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
8996
3
                if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8997
3
                    ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8998
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8999
0
                                   "AttValue length too long\n");
9000
0
                    return(NULL);
9001
0
                }
9002
3
      }
9003
3.53k
  }
9004
17.0k
  while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9005
17.0k
         (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9006
10.9k
      col++;
9007
10.9k
      if ((*in++ == 0x20) && (*in == 0x20)) break;
9008
10.4k
      if (in >= end) {
9009
2
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9010
2
                if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9011
2
                    ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9012
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9013
0
                                   "AttValue length too long\n");
9014
0
                    return(NULL);
9015
0
                }
9016
2
      }
9017
10.4k
  }
9018
6.62k
  last = in;
9019
  /*
9020
   * skip the trailing blanks
9021
   */
9022
7.67k
  while ((last[-1] == 0x20) && (last > start)) last--;
9023
8.32k
  while ((in < end) && (*in != limit) &&
9024
8.32k
         ((*in == 0x20) || (*in == 0x9) ||
9025
6.15k
          (*in == 0xA) || (*in == 0xD))) {
9026
1.69k
      if (*in == 0xA) {
9027
338
          line++, col = 1;
9028
1.35k
      } else {
9029
1.35k
          col++;
9030
1.35k
      }
9031
1.69k
      in++;
9032
1.69k
      if (in >= end) {
9033
6
    const xmlChar *oldbase = ctxt->input->base;
9034
6
    GROW;
9035
6
                if (ctxt->instate == XML_PARSER_EOF)
9036
0
                    return(NULL);
9037
6
    if (oldbase != ctxt->input->base) {
9038
0
        ptrdiff_t delta = ctxt->input->base - oldbase;
9039
0
        start = start + delta;
9040
0
        in = in + delta;
9041
0
        last = last + delta;
9042
0
    }
9043
6
    end = ctxt->input->end;
9044
6
                if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9045
6
                    ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9046
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9047
0
                                   "AttValue length too long\n");
9048
0
                    return(NULL);
9049
0
                }
9050
6
      }
9051
1.69k
  }
9052
6.62k
        if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9053
6.62k
            ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9054
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9055
0
                           "AttValue length too long\n");
9056
0
            return(NULL);
9057
0
        }
9058
6.62k
  if (*in != limit) goto need_complex;
9059
459k
    } else {
9060
1.22M
  while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9061
1.22M
         (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9062
761k
      in++;
9063
761k
      col++;
9064
761k
      if (in >= end) {
9065
11
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9066
11
                if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9067
11
                    ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9068
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9069
0
                                   "AttValue length too long\n");
9070
0
                    return(NULL);
9071
0
                }
9072
11
      }
9073
761k
  }
9074
459k
  last = in;
9075
459k
        if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9076
459k
            ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9077
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9078
0
                           "AttValue length too long\n");
9079
0
            return(NULL);
9080
0
        }
9081
459k
  if (*in != limit) goto need_complex;
9082
459k
    }
9083
376k
    in++;
9084
376k
    col++;
9085
376k
    if (len != NULL) {
9086
354k
        *len = last - start;
9087
354k
        ret = (xmlChar *) start;
9088
354k
    } else {
9089
21.6k
        if (alloc) *alloc = 1;
9090
21.6k
        ret = xmlStrndup(start, last - start);
9091
21.6k
    }
9092
376k
    CUR_PTR = in;
9093
376k
    ctxt->input->line = line;
9094
376k
    ctxt->input->col = col;
9095
376k
    if (alloc) *alloc = 0;
9096
376k
    return ret;
9097
89.5k
need_complex:
9098
89.5k
    if (alloc) *alloc = 1;
9099
89.5k
    return xmlParseAttValueComplex(ctxt, len, normalize);
9100
465k
}
9101
9102
/**
9103
 * xmlParseAttribute2:
9104
 * @ctxt:  an XML parser context
9105
 * @pref:  the element prefix
9106
 * @elem:  the element name
9107
 * @prefix:  a xmlChar ** used to store the value of the attribute prefix
9108
 * @value:  a xmlChar ** used to store the value of the attribute
9109
 * @len:  an int * to save the length of the attribute
9110
 * @alloc:  an int * to indicate if the attribute was allocated
9111
 *
9112
 * parse an attribute in the new SAX2 framework.
9113
 *
9114
 * Returns the attribute name, and the value in *value, .
9115
 */
9116
9117
static const xmlChar *
9118
xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9119
                   const xmlChar * pref, const xmlChar * elem,
9120
                   const xmlChar ** prefix, xmlChar ** value,
9121
                   int *len, int *alloc)
9122
509k
{
9123
509k
    const xmlChar *name;
9124
509k
    xmlChar *val, *internal_val = NULL;
9125
509k
    int normalize = 0;
9126
9127
509k
    *value = NULL;
9128
509k
    GROW;
9129
509k
    name = xmlParseQName(ctxt, prefix);
9130
509k
    if (name == NULL) {
9131
59.0k
        xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9132
59.0k
                       "error parsing attribute name\n");
9133
59.0k
        return (NULL);
9134
59.0k
    }
9135
9136
    /*
9137
     * get the type if needed
9138
     */
9139
450k
    if (ctxt->attsSpecial != NULL) {
9140
53.9k
        int type;
9141
9142
53.9k
        type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
9143
53.9k
                                                 pref, elem, *prefix, name);
9144
53.9k
        if (type != 0)
9145
6.86k
            normalize = 1;
9146
53.9k
    }
9147
9148
    /*
9149
     * read the value
9150
     */
9151
450k
    SKIP_BLANKS;
9152
450k
    if (RAW == '=') {
9153
434k
        NEXT;
9154
434k
        SKIP_BLANKS;
9155
434k
        val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9156
434k
  if (normalize) {
9157
      /*
9158
       * Sometimes a second normalisation pass for spaces is needed
9159
       * but that only happens if charrefs or entities references
9160
       * have been used in the attribute value, i.e. the attribute
9161
       * value have been extracted in an allocated string already.
9162
       */
9163
6.70k
      if (*alloc) {
9164
4.46k
          const xmlChar *val2;
9165
9166
4.46k
          val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9167
4.46k
    if ((val2 != NULL) && (val2 != val)) {
9168
610
        xmlFree(val);
9169
610
        val = (xmlChar *) val2;
9170
610
    }
9171
4.46k
      }
9172
6.70k
  }
9173
434k
        ctxt->instate = XML_PARSER_CONTENT;
9174
434k
    } else {
9175
15.9k
        xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9176
15.9k
                          "Specification mandates value for attribute %s\n",
9177
15.9k
                          name);
9178
15.9k
        return (NULL);
9179
15.9k
    }
9180
9181
434k
    if (*prefix == ctxt->str_xml) {
9182
        /*
9183
         * Check that xml:lang conforms to the specification
9184
         * No more registered as an error, just generate a warning now
9185
         * since this was deprecated in XML second edition
9186
         */
9187
2.54k
        if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9188
0
            internal_val = xmlStrndup(val, *len);
9189
0
            if (!xmlCheckLanguageID(internal_val)) {
9190
0
                xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9191
0
                              "Malformed value for xml:lang : %s\n",
9192
0
                              internal_val, NULL);
9193
0
            }
9194
0
        }
9195
9196
        /*
9197
         * Check that xml:space conforms to the specification
9198
         */
9199
2.54k
        if (xmlStrEqual(name, BAD_CAST "space")) {
9200
1.27k
            internal_val = xmlStrndup(val, *len);
9201
1.27k
            if (xmlStrEqual(internal_val, BAD_CAST "default"))
9202
210
                *(ctxt->space) = 0;
9203
1.06k
            else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9204
331
                *(ctxt->space) = 1;
9205
738
            else {
9206
738
                xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9207
738
                              "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9208
738
                              internal_val, NULL);
9209
738
            }
9210
1.27k
        }
9211
2.54k
        if (internal_val) {
9212
874
            xmlFree(internal_val);
9213
874
        }
9214
2.54k
    }
9215
9216
434k
    *value = val;
9217
434k
    return (name);
9218
450k
}
9219
/**
9220
 * xmlParseStartTag2:
9221
 * @ctxt:  an XML parser context
9222
 *
9223
 * parse a start of tag either for rule element or
9224
 * EmptyElement. In both case we don't parse the tag closing chars.
9225
 * This routine is called when running SAX2 parsing
9226
 *
9227
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9228
 *
9229
 * [ WFC: Unique Att Spec ]
9230
 * No attribute name may appear more than once in the same start-tag or
9231
 * empty-element tag.
9232
 *
9233
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9234
 *
9235
 * [ WFC: Unique Att Spec ]
9236
 * No attribute name may appear more than once in the same start-tag or
9237
 * empty-element tag.
9238
 *
9239
 * With namespace:
9240
 *
9241
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9242
 *
9243
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9244
 *
9245
 * Returns the element name parsed
9246
 */
9247
9248
static const xmlChar *
9249
xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9250
258k
                  const xmlChar **URI, int *tlen) {
9251
258k
    const xmlChar *localname;
9252
258k
    const xmlChar *prefix;
9253
258k
    const xmlChar *attname;
9254
258k
    const xmlChar *aprefix;
9255
258k
    const xmlChar *nsname;
9256
258k
    xmlChar *attvalue;
9257
258k
    const xmlChar **atts = ctxt->atts;
9258
258k
    int maxatts = ctxt->maxatts;
9259
258k
    int nratts, nbatts, nbdef, inputid;
9260
258k
    int i, j, nbNs, attval;
9261
258k
    unsigned long cur;
9262
258k
    int nsNr = ctxt->nsNr;
9263
9264
258k
    if (RAW != '<') return(NULL);
9265
258k
    NEXT1;
9266
9267
    /*
9268
     * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9269
     *       point since the attribute values may be stored as pointers to
9270
     *       the buffer and calling SHRINK would destroy them !
9271
     *       The Shrinking is only possible once the full set of attribute
9272
     *       callbacks have been done.
9273
     */
9274
258k
    SHRINK;
9275
258k
    cur = ctxt->input->cur - ctxt->input->base;
9276
258k
    inputid = ctxt->input->id;
9277
258k
    nbatts = 0;
9278
258k
    nratts = 0;
9279
258k
    nbdef = 0;
9280
258k
    nbNs = 0;
9281
258k
    attval = 0;
9282
    /* Forget any namespaces added during an earlier parse of this element. */
9283
258k
    ctxt->nsNr = nsNr;
9284
9285
258k
    localname = xmlParseQName(ctxt, &prefix);
9286
258k
    if (localname == NULL) {
9287
35.7k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9288
35.7k
           "StartTag: invalid element name\n");
9289
35.7k
        return(NULL);
9290
35.7k
    }
9291
222k
    *tlen = ctxt->input->cur - ctxt->input->base - cur;
9292
9293
    /*
9294
     * Now parse the attributes, it ends up with the ending
9295
     *
9296
     * (S Attribute)* S?
9297
     */
9298
222k
    SKIP_BLANKS;
9299
222k
    GROW;
9300
9301
580k
    while (((RAW != '>') &&
9302
580k
     ((RAW != '/') || (NXT(1) != '>')) &&
9303
580k
     (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9304
509k
  int id = ctxt->input->id;
9305
509k
  unsigned long cons = CUR_CONSUMED;
9306
509k
  int len = -1, alloc = 0;
9307
9308
509k
  attname = xmlParseAttribute2(ctxt, prefix, localname,
9309
509k
                               &aprefix, &attvalue, &len, &alloc);
9310
509k
        if ((attname == NULL) || (attvalue == NULL))
9311
78.7k
            goto next_attr;
9312
431k
  if (len < 0) len = xmlStrlen(attvalue);
9313
9314
431k
        if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9315
65.9k
            const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9316
65.9k
            xmlURIPtr uri;
9317
9318
65.9k
            if (URL == NULL) {
9319
1
                xmlErrMemory(ctxt, "dictionary allocation failure");
9320
1
                if ((attvalue != NULL) && (alloc != 0))
9321
1
                    xmlFree(attvalue);
9322
1
                localname = NULL;
9323
1
                goto done;
9324
1
            }
9325
65.9k
            if (*URL != 0) {
9326
56.5k
                uri = xmlParseURI((const char *) URL);
9327
56.5k
                if (uri == NULL) {
9328
28.6k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9329
28.6k
                             "xmlns: '%s' is not a valid URI\n",
9330
28.6k
                                       URL, NULL, NULL);
9331
28.6k
                } else {
9332
27.9k
                    if (uri->scheme == NULL) {
9333
18.5k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9334
18.5k
                                  "xmlns: URI %s is not absolute\n",
9335
18.5k
                                  URL, NULL, NULL);
9336
18.5k
                    }
9337
27.9k
                    xmlFreeURI(uri);
9338
27.9k
                }
9339
56.5k
                if (URL == ctxt->str_xml_ns) {
9340
67
                    if (attname != ctxt->str_xml) {
9341
67
                        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9342
67
                     "xml namespace URI cannot be the default namespace\n",
9343
67
                                 NULL, NULL, NULL);
9344
67
                    }
9345
67
                    goto next_attr;
9346
67
                }
9347
56.4k
                if ((len == 29) &&
9348
56.4k
                    (xmlStrEqual(URL,
9349
444
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9350
69
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9351
69
                         "reuse of the xmlns namespace name is forbidden\n",
9352
69
                             NULL, NULL, NULL);
9353
69
                    goto next_attr;
9354
69
                }
9355
56.4k
            }
9356
            /*
9357
             * check that it's not a defined namespace
9358
             */
9359
90.3k
            for (j = 1;j <= nbNs;j++)
9360
27.4k
                if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9361
2.82k
                    break;
9362
65.7k
            if (j <= nbNs)
9363
2.82k
                xmlErrAttributeDup(ctxt, NULL, attname);
9364
62.9k
            else
9365
62.9k
                if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9366
9367
365k
        } else if (aprefix == ctxt->str_xmlns) {
9368
38.9k
            const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9369
38.9k
            xmlURIPtr uri;
9370
9371
38.9k
            if (attname == ctxt->str_xml) {
9372
988
                if (URL != ctxt->str_xml_ns) {
9373
791
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9374
791
                             "xml namespace prefix mapped to wrong URI\n",
9375
791
                             NULL, NULL, NULL);
9376
791
                }
9377
                /*
9378
                 * Do not keep a namespace definition node
9379
                 */
9380
988
                goto next_attr;
9381
988
            }
9382
37.9k
            if (URL == ctxt->str_xml_ns) {
9383
66
                if (attname != ctxt->str_xml) {
9384
66
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9385
66
                             "xml namespace URI mapped to wrong prefix\n",
9386
66
                             NULL, NULL, NULL);
9387
66
                }
9388
66
                goto next_attr;
9389
66
            }
9390
37.8k
            if (attname == ctxt->str_xmlns) {
9391
392
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9392
392
                         "redefinition of the xmlns prefix is forbidden\n",
9393
392
                         NULL, NULL, NULL);
9394
392
                goto next_attr;
9395
392
            }
9396
37.4k
            if ((len == 29) &&
9397
37.4k
                (xmlStrEqual(URL,
9398
482
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9399
203
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9400
203
                         "reuse of the xmlns namespace name is forbidden\n",
9401
203
                         NULL, NULL, NULL);
9402
203
                goto next_attr;
9403
203
            }
9404
37.2k
            if ((URL == NULL) || (URL[0] == 0)) {
9405
271
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9406
271
                         "xmlns:%s: Empty XML namespace is not allowed\n",
9407
271
                              attname, NULL, NULL);
9408
271
                goto next_attr;
9409
36.9k
            } else {
9410
36.9k
                uri = xmlParseURI((const char *) URL);
9411
36.9k
                if (uri == NULL) {
9412
5.86k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9413
5.86k
                         "xmlns:%s: '%s' is not a valid URI\n",
9414
5.86k
                                       attname, URL, NULL);
9415
31.1k
                } else {
9416
31.1k
                    if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9417
0
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9418
0
                                  "xmlns:%s: URI %s is not absolute\n",
9419
0
                                  attname, URL, NULL);
9420
0
                    }
9421
31.1k
                    xmlFreeURI(uri);
9422
31.1k
                }
9423
36.9k
            }
9424
9425
            /*
9426
             * check that it's not a defined namespace
9427
             */
9428
127k
            for (j = 1;j <= nbNs;j++)
9429
90.9k
                if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9430
615
                    break;
9431
36.9k
            if (j <= nbNs)
9432
615
                xmlErrAttributeDup(ctxt, aprefix, attname);
9433
36.3k
            else
9434
36.3k
                if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9435
9436
326k
        } else {
9437
            /*
9438
             * Add the pair to atts
9439
             */
9440
326k
            if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9441
2.21k
                if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9442
0
                    goto next_attr;
9443
0
                }
9444
2.21k
                maxatts = ctxt->maxatts;
9445
2.21k
                atts = ctxt->atts;
9446
2.21k
            }
9447
326k
            ctxt->attallocs[nratts++] = alloc;
9448
326k
            atts[nbatts++] = attname;
9449
326k
            atts[nbatts++] = aprefix;
9450
            /*
9451
             * The namespace URI field is used temporarily to point at the
9452
             * base of the current input buffer for non-alloced attributes.
9453
             * When the input buffer is reallocated, all the pointers become
9454
             * invalid, but they can be reconstructed later.
9455
             */
9456
326k
            if (alloc)
9457
45.3k
                atts[nbatts++] = NULL;
9458
280k
            else
9459
280k
                atts[nbatts++] = ctxt->input->base;
9460
326k
            atts[nbatts++] = attvalue;
9461
326k
            attvalue += len;
9462
326k
            atts[nbatts++] = attvalue;
9463
            /*
9464
             * tag if some deallocation is needed
9465
             */
9466
326k
            if (alloc != 0) attval = 1;
9467
326k
            attvalue = NULL; /* moved into atts */
9468
326k
        }
9469
9470
509k
next_attr:
9471
509k
        if ((attvalue != NULL) && (alloc != 0)) {
9472
31.1k
            xmlFree(attvalue);
9473
31.1k
            attvalue = NULL;
9474
31.1k
        }
9475
9476
509k
  GROW
9477
509k
        if (ctxt->instate == XML_PARSER_EOF)
9478
6
            break;
9479
509k
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9480
38.9k
      break;
9481
470k
  if (SKIP_BLANKS == 0) {
9482
113k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9483
113k
         "attributes construct error\n");
9484
113k
      break;
9485
113k
  }
9486
357k
        if ((cons == CUR_CONSUMED) && (id == ctxt->input->id) &&
9487
357k
            (attname == NULL) && (attvalue == NULL)) {
9488
0
      xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9489
0
           "xmlParseStartTag: problem parsing attributes\n");
9490
0
      break;
9491
0
  }
9492
357k
        GROW;
9493
357k
    }
9494
9495
222k
    if (ctxt->input->id != inputid) {
9496
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9497
0
                    "Unexpected change of input\n");
9498
0
        localname = NULL;
9499
0
        goto done;
9500
0
    }
9501
9502
    /* Reconstruct attribute value pointers. */
9503
548k
    for (i = 0, j = 0; j < nratts; i += 5, j++) {
9504
326k
        if (atts[i+2] != NULL) {
9505
            /*
9506
             * Arithmetic on dangling pointers is technically undefined
9507
             * behavior, but well...
9508
             */
9509
280k
            ptrdiff_t offset = ctxt->input->base - atts[i+2];
9510
280k
            atts[i+2]  = NULL;    /* Reset repurposed namespace URI */
9511
280k
            atts[i+3] += offset;  /* value */
9512
280k
            atts[i+4] += offset;  /* valuend */
9513
280k
        }
9514
326k
    }
9515
9516
    /*
9517
     * The attributes defaulting
9518
     */
9519
222k
    if (ctxt->attsDefault != NULL) {
9520
26.9k
        xmlDefAttrsPtr defaults;
9521
9522
26.9k
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9523
26.9k
  if (defaults != NULL) {
9524
39.0k
      for (i = 0;i < defaults->nbAttrs;i++) {
9525
27.8k
          attname = defaults->values[5 * i];
9526
27.8k
    aprefix = defaults->values[5 * i + 1];
9527
9528
                /*
9529
     * special work for namespaces defaulted defs
9530
     */
9531
27.8k
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9532
        /*
9533
         * check that it's not a defined namespace
9534
         */
9535
6.86k
        for (j = 1;j <= nbNs;j++)
9536
2.10k
            if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9537
1.02k
          break;
9538
5.79k
              if (j <= nbNs) continue;
9539
9540
4.76k
        nsname = xmlGetNamespace(ctxt, NULL);
9541
4.76k
        if (nsname != defaults->values[5 * i + 2]) {
9542
3.84k
      if (nsPush(ctxt, NULL,
9543
3.84k
                 defaults->values[5 * i + 2]) > 0)
9544
3.84k
          nbNs++;
9545
3.84k
        }
9546
22.0k
    } else if (aprefix == ctxt->str_xmlns) {
9547
        /*
9548
         * check that it's not a defined namespace
9549
         */
9550
17.4k
        for (j = 1;j <= nbNs;j++)
9551
9.66k
            if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9552
1.55k
          break;
9553
9.29k
              if (j <= nbNs) continue;
9554
9555
7.74k
        nsname = xmlGetNamespace(ctxt, attname);
9556
7.74k
        if (nsname != defaults->values[2]) {
9557
6.23k
      if (nsPush(ctxt, attname,
9558
6.23k
                 defaults->values[5 * i + 2]) > 0)
9559
6.23k
          nbNs++;
9560
6.23k
        }
9561
12.7k
    } else {
9562
        /*
9563
         * check that it's not a defined attribute
9564
         */
9565
124k
        for (j = 0;j < nbatts;j+=5) {
9566
112k
      if ((attname == atts[j]) && (aprefix == atts[j+1]))
9567
642
          break;
9568
112k
        }
9569
12.7k
        if (j < nbatts) continue;
9570
9571
12.0k
        if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9572
193
      if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9573
0
                            localname = NULL;
9574
0
                            goto done;
9575
0
      }
9576
193
      maxatts = ctxt->maxatts;
9577
193
      atts = ctxt->atts;
9578
193
        }
9579
12.0k
        atts[nbatts++] = attname;
9580
12.0k
        atts[nbatts++] = aprefix;
9581
12.0k
        if (aprefix == NULL)
9582
6.86k
      atts[nbatts++] = NULL;
9583
5.23k
        else
9584
5.23k
            atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9585
12.0k
        atts[nbatts++] = defaults->values[5 * i + 2];
9586
12.0k
        atts[nbatts++] = defaults->values[5 * i + 3];
9587
12.0k
        if ((ctxt->standalone == 1) &&
9588
12.0k
            (defaults->values[5 * i + 4] != NULL)) {
9589
0
      xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9590
0
    "standalone: attribute %s on %s defaulted from external subset\n",
9591
0
                                   attname, localname);
9592
0
        }
9593
12.0k
        nbdef++;
9594
12.0k
    }
9595
27.8k
      }
9596
11.2k
  }
9597
26.9k
    }
9598
9599
    /*
9600
     * The attributes checkings
9601
     */
9602
560k
    for (i = 0; i < nbatts;i += 5) {
9603
        /*
9604
  * The default namespace does not apply to attribute names.
9605
  */
9606
338k
  if (atts[i + 1] != NULL) {
9607
8.42k
      nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9608
8.42k
      if (nsname == NULL) {
9609
5.54k
    xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9610
5.54k
        "Namespace prefix %s for %s on %s is not defined\n",
9611
5.54k
        atts[i + 1], atts[i], localname);
9612
5.54k
      }
9613
8.42k
      atts[i + 2] = nsname;
9614
8.42k
  } else
9615
329k
      nsname = NULL;
9616
  /*
9617
   * [ WFC: Unique Att Spec ]
9618
   * No attribute name may appear more than once in the same
9619
   * start-tag or empty-element tag.
9620
   * As extended by the Namespace in XML REC.
9621
   */
9622
6.70M
        for (j = 0; j < i;j += 5) {
9623
6.67M
      if (atts[i] == atts[j]) {
9624
311k
          if (atts[i+1] == atts[j+1]) {
9625
309k
        xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9626
309k
        break;
9627
309k
    }
9628
2.23k
    if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9629
209
        xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9630
209
           "Namespaced Attribute %s in '%s' redefined\n",
9631
209
           atts[i], nsname, NULL);
9632
209
        break;
9633
209
    }
9634
2.23k
      }
9635
6.67M
  }
9636
338k
    }
9637
9638
222k
    nsname = xmlGetNamespace(ctxt, prefix);
9639
222k
    if ((prefix != NULL) && (nsname == NULL)) {
9640
17.8k
  xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9641
17.8k
           "Namespace prefix %s on %s is not defined\n",
9642
17.8k
     prefix, localname, NULL);
9643
17.8k
    }
9644
222k
    *pref = prefix;
9645
222k
    *URI = nsname;
9646
9647
    /*
9648
     * SAX: Start of Element !
9649
     */
9650
222k
    if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9651
222k
  (!ctxt->disableSAX)) {
9652
17.6k
  if (nbNs > 0)
9653
7.35k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9654
7.35k
        nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9655
7.35k
        nbatts / 5, nbdef, atts);
9656
10.2k
  else
9657
10.2k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9658
10.2k
                    nsname, 0, NULL, nbatts / 5, nbdef, atts);
9659
17.6k
    }
9660
9661
222k
done:
9662
    /*
9663
     * Free up attribute allocated strings if needed
9664
     */
9665
222k
    if (attval != 0) {
9666
326k
  for (i = 3,j = 0; j < nratts;i += 5,j++)
9667
317k
      if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9668
45.3k
          xmlFree((xmlChar *) atts[i]);
9669
8.90k
    }
9670
9671
222k
    return(localname);
9672
222k
}
9673
9674
/**
9675
 * xmlParseEndTag2:
9676
 * @ctxt:  an XML parser context
9677
 * @line:  line of the start tag
9678
 * @nsNr:  number of namespaces on the start tag
9679
 *
9680
 * parse an end of tag
9681
 *
9682
 * [42] ETag ::= '</' Name S? '>'
9683
 *
9684
 * With namespace
9685
 *
9686
 * [NS 9] ETag ::= '</' QName S? '>'
9687
 */
9688
9689
static void
9690
14.0k
xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9691
14.0k
    const xmlChar *name;
9692
9693
14.0k
    GROW;
9694
14.0k
    if ((RAW != '<') || (NXT(1) != '/')) {
9695
0
  xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9696
0
  return;
9697
0
    }
9698
14.0k
    SKIP(2);
9699
9700
14.0k
    if (tag->prefix == NULL)
9701
6.91k
        name = xmlParseNameAndCompare(ctxt, ctxt->name);
9702
7.16k
    else
9703
7.16k
        name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9704
9705
    /*
9706
     * We should definitely be at the ending "S? '>'" part
9707
     */
9708
14.0k
    GROW;
9709
14.0k
    if (ctxt->instate == XML_PARSER_EOF)
9710
1
        return;
9711
14.0k
    SKIP_BLANKS;
9712
14.0k
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9713
10.3k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9714
10.3k
    } else
9715
3.70k
  NEXT1;
9716
9717
    /*
9718
     * [ WFC: Element Type Match ]
9719
     * The Name in an element's end-tag must match the element type in the
9720
     * start-tag.
9721
     *
9722
     */
9723
14.0k
    if (name != (xmlChar*)1) {
9724
7.22k
        if (name == NULL) name = BAD_CAST "unparsable";
9725
7.22k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9726
7.22k
         "Opening and ending tag mismatch: %s line %d and %s\n",
9727
7.22k
                    ctxt->name, tag->line, name);
9728
7.22k
    }
9729
9730
    /*
9731
     * SAX: End of Tag
9732
     */
9733
14.0k
    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9734
14.0k
  (!ctxt->disableSAX))
9735
458
  ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9736
458
                                tag->URI);
9737
9738
14.0k
    spacePop(ctxt);
9739
14.0k
    if (tag->nsNr != 0)
9740
1.18k
  nsPop(ctxt, tag->nsNr);
9741
14.0k
}
9742
9743
/**
9744
 * xmlParseCDSect:
9745
 * @ctxt:  an XML parser context
9746
 *
9747
 * Parse escaped pure raw content.
9748
 *
9749
 * [18] CDSect ::= CDStart CData CDEnd
9750
 *
9751
 * [19] CDStart ::= '<![CDATA['
9752
 *
9753
 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9754
 *
9755
 * [21] CDEnd ::= ']]>'
9756
 */
9757
void
9758
7.70k
xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9759
7.70k
    xmlChar *buf = NULL;
9760
7.70k
    int len = 0;
9761
7.70k
    int size = XML_PARSER_BUFFER_SIZE;
9762
7.70k
    int r, rl;
9763
7.70k
    int s, sl;
9764
7.70k
    int cur, l;
9765
7.70k
    int count = 0;
9766
9767
    /* Check 2.6.0 was NXT(0) not RAW */
9768
7.70k
    if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9769
7.70k
  SKIP(9);
9770
7.70k
    } else
9771
0
        return;
9772
9773
7.70k
    ctxt->instate = XML_PARSER_CDATA_SECTION;
9774
7.70k
    r = CUR_CHAR(rl);
9775
7.70k
    if (!IS_CHAR(r)) {
9776
672
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9777
672
  ctxt->instate = XML_PARSER_CONTENT;
9778
672
        return;
9779
672
    }
9780
7.03k
    NEXTL(rl);
9781
7.03k
    s = CUR_CHAR(sl);
9782
7.03k
    if (!IS_CHAR(s)) {
9783
1.80k
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9784
1.80k
  ctxt->instate = XML_PARSER_CONTENT;
9785
1.80k
        return;
9786
1.80k
    }
9787
5.22k
    NEXTL(sl);
9788
5.22k
    cur = CUR_CHAR(l);
9789
5.22k
    buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9790
5.22k
    if (buf == NULL) {
9791
0
  xmlErrMemory(ctxt, NULL);
9792
0
  return;
9793
0
    }
9794
20.8M
    while (IS_CHAR(cur) &&
9795
20.8M
           ((r != ']') || (s != ']') || (cur != '>'))) {
9796
20.8M
  if (len + 5 >= size) {
9797
2.63k
      xmlChar *tmp;
9798
9799
2.63k
            if ((size > XML_MAX_TEXT_LENGTH) &&
9800
2.63k
                ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9801
0
                xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9802
0
                             "CData section too big found", NULL);
9803
0
                xmlFree (buf);
9804
0
                return;
9805
0
            }
9806
2.63k
      tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar));
9807
2.63k
      if (tmp == NULL) {
9808
0
          xmlFree(buf);
9809
0
    xmlErrMemory(ctxt, NULL);
9810
0
    return;
9811
0
      }
9812
2.63k
      buf = tmp;
9813
2.63k
      size *= 2;
9814
2.63k
  }
9815
20.8M
  COPY_BUF(rl,buf,len,r);
9816
20.8M
  r = s;
9817
20.8M
  rl = sl;
9818
20.8M
  s = cur;
9819
20.8M
  sl = l;
9820
20.8M
  count++;
9821
20.8M
  if (count > 50) {
9822
407k
      SHRINK;
9823
407k
      GROW;
9824
407k
            if (ctxt->instate == XML_PARSER_EOF) {
9825
0
    xmlFree(buf);
9826
0
    return;
9827
0
            }
9828
407k
      count = 0;
9829
407k
  }
9830
20.8M
  NEXTL(l);
9831
20.8M
  cur = CUR_CHAR(l);
9832
20.8M
    }
9833
5.22k
    buf[len] = 0;
9834
5.22k
    ctxt->instate = XML_PARSER_CONTENT;
9835
5.22k
    if (cur != '>') {
9836
3.26k
  xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9837
3.26k
                       "CData section not finished\n%.50s\n", buf);
9838
3.26k
  xmlFree(buf);
9839
3.26k
        return;
9840
3.26k
    }
9841
1.95k
    NEXTL(l);
9842
9843
    /*
9844
     * OK the buffer is to be consumed as cdata.
9845
     */
9846
1.95k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9847
222
  if (ctxt->sax->cdataBlock != NULL)
9848
0
      ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9849
222
  else if (ctxt->sax->characters != NULL)
9850
222
      ctxt->sax->characters(ctxt->userData, buf, len);
9851
222
    }
9852
1.95k
    xmlFree(buf);
9853
1.95k
}
9854
9855
/**
9856
 * xmlParseContentInternal:
9857
 * @ctxt:  an XML parser context
9858
 *
9859
 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9860
 * unexpected EOF to the caller.
9861
 */
9862
9863
static void
9864
5.75k
xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9865
5.75k
    int nameNr = ctxt->nameNr;
9866
9867
5.75k
    GROW;
9868
601k
    while ((RAW != 0) &&
9869
601k
     (ctxt->instate != XML_PARSER_EOF)) {
9870
596k
        int id = ctxt->input->id;
9871
596k
  unsigned long cons = CUR_CONSUMED;
9872
596k
  const xmlChar *cur = ctxt->input->cur;
9873
9874
  /*
9875
   * First case : a Processing Instruction.
9876
   */
9877
596k
  if ((*cur == '<') && (cur[1] == '?')) {
9878
6.70k
      xmlParsePI(ctxt);
9879
6.70k
  }
9880
9881
  /*
9882
   * Second case : a CDSection
9883
   */
9884
  /* 2.6.0 test was *cur not RAW */
9885
589k
  else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9886
7.70k
      xmlParseCDSect(ctxt);
9887
7.70k
  }
9888
9889
  /*
9890
   * Third case :  a comment
9891
   */
9892
581k
  else if ((*cur == '<') && (NXT(1) == '!') &&
9893
581k
     (NXT(2) == '-') && (NXT(3) == '-')) {
9894
7.34k
      xmlParseComment(ctxt);
9895
7.34k
      ctxt->instate = XML_PARSER_CONTENT;
9896
7.34k
  }
9897
9898
  /*
9899
   * Fourth case :  a sub-element.
9900
   */
9901
574k
  else if (*cur == '<') {
9902
262k
            if (NXT(1) == '/') {
9903
14.0k
                if (ctxt->nameNr <= nameNr)
9904
141
                    break;
9905
13.9k
          xmlParseElementEnd(ctxt);
9906
248k
            } else {
9907
248k
          xmlParseElementStart(ctxt);
9908
248k
            }
9909
262k
  }
9910
9911
  /*
9912
   * Fifth case : a reference. If if has not been resolved,
9913
   *    parsing returns it's Name, create the node
9914
   */
9915
9916
312k
  else if (*cur == '&') {
9917
77.7k
      xmlParseReference(ctxt);
9918
77.7k
  }
9919
9920
  /*
9921
   * Last case, text. Note that References are handled directly.
9922
   */
9923
234k
  else {
9924
234k
      xmlParseCharData(ctxt, 0);
9925
234k
  }
9926
9927
596k
  GROW;
9928
596k
  SHRINK;
9929
9930
596k
  if ((cons == CUR_CONSUMED) && (id == ctxt->input->id)) {
9931
177
      xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9932
177
                  "detected an error in element content\n");
9933
177
      xmlHaltParser(ctxt);
9934
177
            break;
9935
177
  }
9936
596k
    }
9937
5.75k
}
9938
9939
/**
9940
 * xmlParseContent:
9941
 * @ctxt:  an XML parser context
9942
 *
9943
 * Parse a content sequence. Stops at EOF or '</'.
9944
 *
9945
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9946
 */
9947
9948
void
9949
0
xmlParseContent(xmlParserCtxtPtr ctxt) {
9950
0
    int nameNr = ctxt->nameNr;
9951
9952
0
    xmlParseContentInternal(ctxt);
9953
9954
0
    if ((ctxt->instate != XML_PARSER_EOF) && (ctxt->nameNr > nameNr)) {
9955
0
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9956
0
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9957
0
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9958
0
                "Premature end of data in tag %s line %d\n",
9959
0
    name, line, NULL);
9960
0
    }
9961
0
}
9962
9963
/**
9964
 * xmlParseElement:
9965
 * @ctxt:  an XML parser context
9966
 *
9967
 * parse an XML element
9968
 *
9969
 * [39] element ::= EmptyElemTag | STag content ETag
9970
 *
9971
 * [ WFC: Element Type Match ]
9972
 * The Name in an element's end-tag must match the element type in the
9973
 * start-tag.
9974
 *
9975
 */
9976
9977
void
9978
9.85k
xmlParseElement(xmlParserCtxtPtr ctxt) {
9979
9.85k
    if (xmlParseElementStart(ctxt) != 0)
9980
4.10k
        return;
9981
9982
5.75k
    xmlParseContentInternal(ctxt);
9983
5.75k
    if (ctxt->instate == XML_PARSER_EOF)
9984
223
  return;
9985
9986
5.52k
    if (CUR == 0) {
9987
5.38k
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9988
5.38k
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9989
5.38k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9990
5.38k
                "Premature end of data in tag %s line %d\n",
9991
5.38k
    name, line, NULL);
9992
5.38k
        return;
9993
5.38k
    }
9994
9995
141
    xmlParseElementEnd(ctxt);
9996
141
}
9997
9998
/**
9999
 * xmlParseElementStart:
10000
 * @ctxt:  an XML parser context
10001
 *
10002
 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
10003
 * opening tag was parsed, 1 if an empty element was parsed.
10004
 */
10005
static int
10006
258k
xmlParseElementStart(xmlParserCtxtPtr ctxt) {
10007
258k
    const xmlChar *name;
10008
258k
    const xmlChar *prefix = NULL;
10009
258k
    const xmlChar *URI = NULL;
10010
258k
    xmlParserNodeInfo node_info;
10011
258k
    int line, tlen = 0;
10012
258k
    xmlNodePtr ret;
10013
258k
    int nsNr = ctxt->nsNr;
10014
10015
258k
    if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
10016
258k
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
10017
35
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
10018
35
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
10019
35
        xmlParserMaxDepth);
10020
35
  xmlHaltParser(ctxt);
10021
35
  return(-1);
10022
35
    }
10023
10024
    /* Capture start position */
10025
258k
    if (ctxt->record_info) {
10026
0
        node_info.begin_pos = ctxt->input->consumed +
10027
0
                          (CUR_PTR - ctxt->input->base);
10028
0
  node_info.begin_line = ctxt->input->line;
10029
0
    }
10030
10031
258k
    if (ctxt->spaceNr == 0)
10032
0
  spacePush(ctxt, -1);
10033
258k
    else if (*ctxt->space == -2)
10034
26.6k
  spacePush(ctxt, -1);
10035
231k
    else
10036
231k
  spacePush(ctxt, *ctxt->space);
10037
10038
258k
    line = ctxt->input->line;
10039
258k
#ifdef LIBXML_SAX1_ENABLED
10040
258k
    if (ctxt->sax2)
10041
258k
#endif /* LIBXML_SAX1_ENABLED */
10042
258k
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
10043
0
#ifdef LIBXML_SAX1_ENABLED
10044
0
    else
10045
0
  name = xmlParseStartTag(ctxt);
10046
258k
#endif /* LIBXML_SAX1_ENABLED */
10047
258k
    if (ctxt->instate == XML_PARSER_EOF)
10048
9
  return(-1);
10049
258k
    if (name == NULL) {
10050
35.7k
  spacePop(ctxt);
10051
35.7k
        return(-1);
10052
35.7k
    }
10053
222k
    nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
10054
222k
    ret = ctxt->node;
10055
10056
222k
#ifdef LIBXML_VALID_ENABLED
10057
    /*
10058
     * [ VC: Root Element Type ]
10059
     * The Name in the document type declaration must match the element
10060
     * type of the root element.
10061
     */
10062
222k
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10063
222k
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
10064
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10065
222k
#endif /* LIBXML_VALID_ENABLED */
10066
10067
    /*
10068
     * Check for an Empty Element.
10069
     */
10070
222k
    if ((RAW == '/') && (NXT(1) == '>')) {
10071
11.6k
        SKIP(2);
10072
11.6k
  if (ctxt->sax2) {
10073
11.6k
      if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10074
11.6k
    (!ctxt->disableSAX))
10075
1.48k
    ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
10076
11.6k
#ifdef LIBXML_SAX1_ENABLED
10077
11.6k
  } else {
10078
0
      if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10079
0
    (!ctxt->disableSAX))
10080
0
    ctxt->sax->endElement(ctxt->userData, name);
10081
0
#endif /* LIBXML_SAX1_ENABLED */
10082
0
  }
10083
11.6k
  namePop(ctxt);
10084
11.6k
  spacePop(ctxt);
10085
11.6k
  if (nsNr != ctxt->nsNr)
10086
9.11k
      nsPop(ctxt, ctxt->nsNr - nsNr);
10087
11.6k
  if ( ret != NULL && ctxt->record_info ) {
10088
0
     node_info.end_pos = ctxt->input->consumed +
10089
0
            (CUR_PTR - ctxt->input->base);
10090
0
     node_info.end_line = ctxt->input->line;
10091
0
     node_info.node = ret;
10092
0
     xmlParserAddNodeInfo(ctxt, &node_info);
10093
0
  }
10094
11.6k
  return(1);
10095
11.6k
    }
10096
210k
    if (RAW == '>') {
10097
94.5k
        NEXT1;
10098
116k
    } else {
10099
116k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10100
116k
         "Couldn't find end of Start Tag %s line %d\n",
10101
116k
                    name, line, NULL);
10102
10103
  /*
10104
   * end of parsing of this node.
10105
   */
10106
116k
  nodePop(ctxt);
10107
116k
  namePop(ctxt);
10108
116k
  spacePop(ctxt);
10109
116k
  if (nsNr != ctxt->nsNr)
10110
34.0k
      nsPop(ctxt, ctxt->nsNr - nsNr);
10111
10112
  /*
10113
   * Capture end position and add node
10114
   */
10115
116k
  if ( ret != NULL && ctxt->record_info ) {
10116
0
     node_info.end_pos = ctxt->input->consumed +
10117
0
            (CUR_PTR - ctxt->input->base);
10118
0
     node_info.end_line = ctxt->input->line;
10119
0
     node_info.node = ret;
10120
0
     xmlParserAddNodeInfo(ctxt, &node_info);
10121
0
  }
10122
116k
  return(-1);
10123
116k
    }
10124
10125
94.5k
    return(0);
10126
210k
}
10127
10128
/**
10129
 * xmlParseElementEnd:
10130
 * @ctxt:  an XML parser context
10131
 *
10132
 * Parse the end of an XML element.
10133
 */
10134
static void
10135
14.0k
xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
10136
14.0k
    xmlParserNodeInfo node_info;
10137
14.0k
    xmlNodePtr ret = ctxt->node;
10138
10139
14.0k
    if (ctxt->nameNr <= 0)
10140
0
        return;
10141
10142
    /*
10143
     * parse the end of tag: '</' should be here.
10144
     */
10145
14.0k
    if (ctxt->sax2) {
10146
14.0k
  xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
10147
14.0k
  namePop(ctxt);
10148
14.0k
    }
10149
0
#ifdef LIBXML_SAX1_ENABLED
10150
0
    else
10151
0
  xmlParseEndTag1(ctxt, 0);
10152
14.0k
#endif /* LIBXML_SAX1_ENABLED */
10153
10154
    /*
10155
     * Capture end position and add node
10156
     */
10157
14.0k
    if ( ret != NULL && ctxt->record_info ) {
10158
0
       node_info.end_pos = ctxt->input->consumed +
10159
0
                          (CUR_PTR - ctxt->input->base);
10160
0
       node_info.end_line = ctxt->input->line;
10161
0
       node_info.node = ret;
10162
0
       xmlParserAddNodeInfo(ctxt, &node_info);
10163
0
    }
10164
14.0k
}
10165
10166
/**
10167
 * xmlParseVersionNum:
10168
 * @ctxt:  an XML parser context
10169
 *
10170
 * parse the XML version value.
10171
 *
10172
 * [26] VersionNum ::= '1.' [0-9]+
10173
 *
10174
 * In practice allow [0-9].[0-9]+ at that level
10175
 *
10176
 * Returns the string giving the XML version number, or NULL
10177
 */
10178
xmlChar *
10179
178
xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10180
178
    xmlChar *buf = NULL;
10181
178
    int len = 0;
10182
178
    int size = 10;
10183
178
    xmlChar cur;
10184
10185
178
    buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10186
178
    if (buf == NULL) {
10187
0
  xmlErrMemory(ctxt, NULL);
10188
0
  return(NULL);
10189
0
    }
10190
178
    cur = CUR;
10191
178
    if (!((cur >= '0') && (cur <= '9'))) {
10192
48
  xmlFree(buf);
10193
48
  return(NULL);
10194
48
    }
10195
130
    buf[len++] = cur;
10196
130
    NEXT;
10197
130
    cur=CUR;
10198
130
    if (cur != '.') {
10199
4
  xmlFree(buf);
10200
4
  return(NULL);
10201
4
    }
10202
126
    buf[len++] = cur;
10203
126
    NEXT;
10204
126
    cur=CUR;
10205
9.31k
    while ((cur >= '0') && (cur <= '9')) {
10206
9.19k
  if (len + 1 >= size) {
10207
104
      xmlChar *tmp;
10208
10209
104
      size *= 2;
10210
104
      tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10211
104
      if (tmp == NULL) {
10212
0
          xmlFree(buf);
10213
0
    xmlErrMemory(ctxt, NULL);
10214
0
    return(NULL);
10215
0
      }
10216
104
      buf = tmp;
10217
104
  }
10218
9.19k
  buf[len++] = cur;
10219
9.19k
  NEXT;
10220
9.19k
  cur=CUR;
10221
9.19k
    }
10222
126
    buf[len] = 0;
10223
126
    return(buf);
10224
126
}
10225
10226
/**
10227
 * xmlParseVersionInfo:
10228
 * @ctxt:  an XML parser context
10229
 *
10230
 * parse the XML version.
10231
 *
10232
 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10233
 *
10234
 * [25] Eq ::= S? '=' S?
10235
 *
10236
 * Returns the version string, e.g. "1.0"
10237
 */
10238
10239
xmlChar *
10240
3.03k
xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10241
3.03k
    xmlChar *version = NULL;
10242
10243
3.03k
    if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10244
1.43k
  SKIP(7);
10245
1.43k
  SKIP_BLANKS;
10246
1.43k
  if (RAW != '=') {
10247
6
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10248
6
      return(NULL);
10249
6
        }
10250
1.43k
  NEXT;
10251
1.43k
  SKIP_BLANKS;
10252
1.43k
  if (RAW == '"') {
10253
175
      NEXT;
10254
175
      version = xmlParseVersionNum(ctxt);
10255
175
      if (RAW != '"') {
10256
39
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10257
39
      } else
10258
136
          NEXT;
10259
1.25k
  } else if (RAW == '\''){
10260
3
      NEXT;
10261
3
      version = xmlParseVersionNum(ctxt);
10262
3
      if (RAW != '\'') {
10263
1
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10264
1
      } else
10265
2
          NEXT;
10266
1.25k
  } else {
10267
1.25k
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10268
1.25k
  }
10269
1.43k
    }
10270
3.02k
    return(version);
10271
3.03k
}
10272
10273
/**
10274
 * xmlParseEncName:
10275
 * @ctxt:  an XML parser context
10276
 *
10277
 * parse the XML encoding name
10278
 *
10279
 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10280
 *
10281
 * Returns the encoding name value or NULL
10282
 */
10283
xmlChar *
10284
2.74k
xmlParseEncName(xmlParserCtxtPtr ctxt) {
10285
2.74k
    xmlChar *buf = NULL;
10286
2.74k
    int len = 0;
10287
2.74k
    int size = 10;
10288
2.74k
    xmlChar cur;
10289
10290
2.74k
    cur = CUR;
10291
2.74k
    if (((cur >= 'a') && (cur <= 'z')) ||
10292
2.74k
        ((cur >= 'A') && (cur <= 'Z'))) {
10293
2.73k
  buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10294
2.73k
  if (buf == NULL) {
10295
0
      xmlErrMemory(ctxt, NULL);
10296
0
      return(NULL);
10297
0
  }
10298
10299
2.73k
  buf[len++] = cur;
10300
2.73k
  NEXT;
10301
2.73k
  cur = CUR;
10302
569k
  while (((cur >= 'a') && (cur <= 'z')) ||
10303
569k
         ((cur >= 'A') && (cur <= 'Z')) ||
10304
569k
         ((cur >= '0') && (cur <= '9')) ||
10305
569k
         (cur == '.') || (cur == '_') ||
10306
569k
         (cur == '-')) {
10307
566k
      if (len + 1 >= size) {
10308
579
          xmlChar *tmp;
10309
10310
579
    size *= 2;
10311
579
    tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10312
579
    if (tmp == NULL) {
10313
0
        xmlErrMemory(ctxt, NULL);
10314
0
        xmlFree(buf);
10315
0
        return(NULL);
10316
0
    }
10317
579
    buf = tmp;
10318
579
      }
10319
566k
      buf[len++] = cur;
10320
566k
      NEXT;
10321
566k
      cur = CUR;
10322
566k
      if (cur == 0) {
10323
47
          SHRINK;
10324
47
    GROW;
10325
47
    cur = CUR;
10326
47
      }
10327
566k
        }
10328
2.73k
  buf[len] = 0;
10329
2.73k
    } else {
10330
7
  xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10331
7
    }
10332
2.74k
    return(buf);
10333
2.74k
}
10334
10335
/**
10336
 * xmlParseEncodingDecl:
10337
 * @ctxt:  an XML parser context
10338
 *
10339
 * parse the XML encoding declaration
10340
 *
10341
 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'")
10342
 *
10343
 * this setups the conversion filters.
10344
 *
10345
 * Returns the encoding value or NULL
10346
 */
10347
10348
const xmlChar *
10349
3.03k
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10350
3.03k
    xmlChar *encoding = NULL;
10351
10352
3.03k
    SKIP_BLANKS;
10353
3.03k
    if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10354
2.76k
  SKIP(8);
10355
2.76k
  SKIP_BLANKS;
10356
2.76k
  if (RAW != '=') {
10357
5
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10358
5
      return(NULL);
10359
5
        }
10360
2.75k
  NEXT;
10361
2.75k
  SKIP_BLANKS;
10362
2.75k
  if (RAW == '"') {
10363
2.64k
      NEXT;
10364
2.64k
      encoding = xmlParseEncName(ctxt);
10365
2.64k
      if (RAW != '"') {
10366
73
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10367
73
    xmlFree((xmlChar *) encoding);
10368
73
    return(NULL);
10369
73
      } else
10370
2.56k
          NEXT;
10371
2.64k
  } else if (RAW == '\''){
10372
103
      NEXT;
10373
103
      encoding = xmlParseEncName(ctxt);
10374
103
      if (RAW != '\'') {
10375
9
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10376
9
    xmlFree((xmlChar *) encoding);
10377
9
    return(NULL);
10378
9
      } else
10379
94
          NEXT;
10380
103
  } else {
10381
12
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10382
12
  }
10383
10384
        /*
10385
         * Non standard parsing, allowing the user to ignore encoding
10386
         */
10387
2.67k
        if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10388
0
      xmlFree((xmlChar *) encoding);
10389
0
            return(NULL);
10390
0
  }
10391
10392
  /*
10393
   * UTF-16 encoding switch has already taken place at this stage,
10394
   * more over the little-endian/big-endian selection is already done
10395
   */
10396
2.67k
        if ((encoding != NULL) &&
10397
2.67k
      ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10398
2.65k
       (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10399
      /*
10400
       * If no encoding was passed to the parser, that we are
10401
       * using UTF-16 and no decoder is present i.e. the
10402
       * document is apparently UTF-8 compatible, then raise an
10403
       * encoding mismatch fatal error
10404
       */
10405
3
      if ((ctxt->encoding == NULL) &&
10406
3
          (ctxt->input->buf != NULL) &&
10407
3
          (ctxt->input->buf->encoder == NULL)) {
10408
2
    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10409
2
      "Document labelled UTF-16 but has UTF-8 content\n");
10410
2
      }
10411
3
      if (ctxt->encoding != NULL)
10412
0
    xmlFree((xmlChar *) ctxt->encoding);
10413
3
      ctxt->encoding = encoding;
10414
3
  }
10415
  /*
10416
   * UTF-8 encoding is handled natively
10417
   */
10418
2.67k
        else if ((encoding != NULL) &&
10419
2.67k
      ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10420
2.65k
       (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10421
2
      if (ctxt->encoding != NULL)
10422
0
    xmlFree((xmlChar *) ctxt->encoding);
10423
2
      ctxt->encoding = encoding;
10424
2
  }
10425
2.66k
  else if (encoding != NULL) {
10426
2.65k
      xmlCharEncodingHandlerPtr handler;
10427
10428
2.65k
      if (ctxt->input->encoding != NULL)
10429
0
    xmlFree((xmlChar *) ctxt->input->encoding);
10430
2.65k
      ctxt->input->encoding = encoding;
10431
10432
2.65k
            handler = xmlFindCharEncodingHandler((const char *) encoding);
10433
2.65k
      if (handler != NULL) {
10434
2.25k
    if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10435
        /* failed to convert */
10436
2
        ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10437
2
        return(NULL);
10438
2
    }
10439
2.25k
      } else {
10440
403
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10441
403
      "Unsupported encoding %s\n", encoding);
10442
403
    return(NULL);
10443
403
      }
10444
2.65k
  }
10445
2.67k
    }
10446
2.53k
    return(encoding);
10447
3.03k
}
10448
10449
/**
10450
 * xmlParseSDDecl:
10451
 * @ctxt:  an XML parser context
10452
 *
10453
 * parse the XML standalone declaration
10454
 *
10455
 * [32] SDDecl ::= S 'standalone' Eq
10456
 *                 (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10457
 *
10458
 * [ VC: Standalone Document Declaration ]
10459
 * TODO The standalone document declaration must have the value "no"
10460
 * if any external markup declarations contain declarations of:
10461
 *  - attributes with default values, if elements to which these
10462
 *    attributes apply appear in the document without specifications
10463
 *    of values for these attributes, or
10464
 *  - entities (other than amp, lt, gt, apos, quot), if references
10465
 *    to those entities appear in the document, or
10466
 *  - attributes with values subject to normalization, where the
10467
 *    attribute appears in the document with a value which will change
10468
 *    as a result of normalization, or
10469
 *  - element types with element content, if white space occurs directly
10470
 *    within any instance of those types.
10471
 *
10472
 * Returns:
10473
 *   1 if standalone="yes"
10474
 *   0 if standalone="no"
10475
 *  -2 if standalone attribute is missing or invalid
10476
 *    (A standalone value of -2 means that the XML declaration was found,
10477
 *     but no value was specified for the standalone attribute).
10478
 */
10479
10480
int
10481
1.07k
xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10482
1.07k
    int standalone = -2;
10483
10484
1.07k
    SKIP_BLANKS;
10485
1.07k
    if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10486
69
  SKIP(10);
10487
69
        SKIP_BLANKS;
10488
69
  if (RAW != '=') {
10489
2
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10490
2
      return(standalone);
10491
2
        }
10492
67
  NEXT;
10493
67
  SKIP_BLANKS;
10494
67
        if (RAW == '\''){
10495
14
      NEXT;
10496
14
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10497
1
          standalone = 0;
10498
1
                SKIP(2);
10499
13
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10500
13
                 (NXT(2) == 's')) {
10501
6
          standalone = 1;
10502
6
    SKIP(3);
10503
7
            } else {
10504
7
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10505
7
      }
10506
14
      if (RAW != '\'') {
10507
13
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10508
13
      } else
10509
1
          NEXT;
10510
53
  } else if (RAW == '"'){
10511
51
      NEXT;
10512
51
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10513
2
          standalone = 0;
10514
2
    SKIP(2);
10515
49
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10516
49
                 (NXT(2) == 's')) {
10517
42
          standalone = 1;
10518
42
                SKIP(3);
10519
42
            } else {
10520
7
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10521
7
      }
10522
51
      if (RAW != '"') {
10523
27
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10524
27
      } else
10525
24
          NEXT;
10526
51
  } else {
10527
2
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10528
2
        }
10529
67
    }
10530
1.07k
    return(standalone);
10531
1.07k
}
10532
10533
/**
10534
 * xmlParseXMLDecl:
10535
 * @ctxt:  an XML parser context
10536
 *
10537
 * parse an XML declaration header
10538
 *
10539
 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10540
 */
10541
10542
void
10543
3.03k
xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10544
3.03k
    xmlChar *version;
10545
10546
    /*
10547
     * This value for standalone indicates that the document has an
10548
     * XML declaration but it does not have a standalone attribute.
10549
     * It will be overwritten later if a standalone attribute is found.
10550
     */
10551
3.03k
    ctxt->input->standalone = -2;
10552
10553
    /*
10554
     * We know that '<?xml' is here.
10555
     */
10556
3.03k
    SKIP(5);
10557
10558
3.03k
    if (!IS_BLANK_CH(RAW)) {
10559
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10560
0
                 "Blank needed after '<?xml'\n");
10561
0
    }
10562
3.03k
    SKIP_BLANKS;
10563
10564
    /*
10565
     * We must have the VersionInfo here.
10566
     */
10567
3.03k
    version = xmlParseVersionInfo(ctxt);
10568
3.03k
    if (version == NULL) {
10569
2.90k
  xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10570
2.90k
    } else {
10571
126
  if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10572
      /*
10573
       * Changed here for XML-1.0 5th edition
10574
       */
10575
105
      if (ctxt->options & XML_PARSE_OLD10) {
10576
0
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10577
0
                "Unsupported version '%s'\n",
10578
0
                version);
10579
105
      } else {
10580
105
          if ((version[0] == '1') && ((version[1] == '.'))) {
10581
68
        xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10582
68
                      "Unsupported version '%s'\n",
10583
68
          version, NULL);
10584
68
    } else {
10585
37
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10586
37
              "Unsupported version '%s'\n",
10587
37
              version);
10588
37
    }
10589
105
      }
10590
105
  }
10591
126
  if (ctxt->version != NULL)
10592
0
      xmlFree((void *) ctxt->version);
10593
126
  ctxt->version = version;
10594
126
    }
10595
10596
    /*
10597
     * We may have the encoding declaration
10598
     */
10599
3.03k
    if (!IS_BLANK_CH(RAW)) {
10600
2.89k
        if ((RAW == '?') && (NXT(1) == '>')) {
10601
2
      SKIP(2);
10602
2
      return;
10603
2
  }
10604
2.89k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10605
2.89k
    }
10606
3.03k
    xmlParseEncodingDecl(ctxt);
10607
3.03k
    if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10608
3.03k
         (ctxt->instate == XML_PARSER_EOF)) {
10609
  /*
10610
   * The XML REC instructs us to stop parsing right here
10611
   */
10612
405
        return;
10613
405
    }
10614
10615
    /*
10616
     * We may have the standalone status.
10617
     */
10618
2.62k
    if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10619
2.24k
        if ((RAW == '?') && (NXT(1) == '>')) {
10620
1.54k
      SKIP(2);
10621
1.54k
      return;
10622
1.54k
  }
10623
698
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10624
698
    }
10625
10626
    /*
10627
     * We can grow the input buffer freely at that point
10628
     */
10629
1.07k
    GROW;
10630
10631
1.07k
    SKIP_BLANKS;
10632
1.07k
    ctxt->input->standalone = xmlParseSDDecl(ctxt);
10633
10634
1.07k
    SKIP_BLANKS;
10635
1.07k
    if ((RAW == '?') && (NXT(1) == '>')) {
10636
27
        SKIP(2);
10637
1.05k
    } else if (RAW == '>') {
10638
        /* Deprecated old WD ... */
10639
48
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10640
48
  NEXT;
10641
1.00k
    } else {
10642
1.00k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10643
1.00k
  MOVETO_ENDTAG(CUR_PTR);
10644
1.00k
  NEXT;
10645
1.00k
    }
10646
1.07k
}
10647
10648
/**
10649
 * xmlParseMisc:
10650
 * @ctxt:  an XML parser context
10651
 *
10652
 * parse an XML Misc* optional field.
10653
 *
10654
 * [27] Misc ::= Comment | PI |  S
10655
 */
10656
10657
void
10658
28.1k
xmlParseMisc(xmlParserCtxtPtr ctxt) {
10659
37.6k
    while (ctxt->instate != XML_PARSER_EOF) {
10660
37.6k
        SKIP_BLANKS;
10661
37.6k
        GROW;
10662
37.6k
        if ((RAW == '<') && (NXT(1) == '?')) {
10663
7.90k
      xmlParsePI(ctxt);
10664
29.7k
        } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10665
1.59k
      xmlParseComment(ctxt);
10666
28.1k
        } else {
10667
28.1k
            break;
10668
28.1k
        }
10669
37.6k
    }
10670
28.1k
}
10671
10672
/**
10673
 * xmlParseDocument:
10674
 * @ctxt:  an XML parser context
10675
 *
10676
 * parse an XML document (and build a tree if using the standard SAX
10677
 * interface).
10678
 *
10679
 * [1] document ::= prolog element Misc*
10680
 *
10681
 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10682
 *
10683
 * Returns 0, -1 in case of error. the parser context is augmented
10684
 *                as a result of the parsing.
10685
 */
10686
10687
int
10688
15.2k
xmlParseDocument(xmlParserCtxtPtr ctxt) {
10689
15.2k
    xmlChar start[4];
10690
15.2k
    xmlCharEncoding enc;
10691
10692
15.2k
    xmlInitParser();
10693
10694
15.2k
    if ((ctxt == NULL) || (ctxt->input == NULL))
10695
0
        return(-1);
10696
10697
15.2k
    GROW;
10698
10699
    /*
10700
     * SAX: detecting the level.
10701
     */
10702
15.2k
    xmlDetectSAX2(ctxt);
10703
10704
    /*
10705
     * SAX: beginning of the document processing.
10706
     */
10707
15.2k
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10708
0
        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10709
15.2k
    if (ctxt->instate == XML_PARSER_EOF)
10710
0
  return(-1);
10711
10712
15.2k
    if ((ctxt->encoding == NULL) &&
10713
15.2k
        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10714
  /*
10715
   * Get the 4 first bytes and decode the charset
10716
   * if enc != XML_CHAR_ENCODING_NONE
10717
   * plug some encoding conversion routines.
10718
   */
10719
15.1k
  start[0] = RAW;
10720
15.1k
  start[1] = NXT(1);
10721
15.1k
  start[2] = NXT(2);
10722
15.1k
  start[3] = NXT(3);
10723
15.1k
  enc = xmlDetectCharEncoding(&start[0], 4);
10724
15.1k
  if (enc != XML_CHAR_ENCODING_NONE) {
10725
4.44k
      xmlSwitchEncoding(ctxt, enc);
10726
4.44k
  }
10727
15.1k
    }
10728
10729
10730
15.2k
    if (CUR == 0) {
10731
67
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10732
67
  return(-1);
10733
67
    }
10734
10735
    /*
10736
     * Check for the XMLDecl in the Prolog.
10737
     * do not GROW here to avoid the detected encoder to decode more
10738
     * than just the first line, unless the amount of data is really
10739
     * too small to hold "<?xml version="1.0" encoding="foo"
10740
     */
10741
15.1k
    if ((ctxt->input->end - ctxt->input->cur) < 35) {
10742
6.06k
       GROW;
10743
6.06k
    }
10744
15.1k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10745
10746
  /*
10747
   * Note that we will switch encoding on the fly.
10748
   */
10749
3.03k
  xmlParseXMLDecl(ctxt);
10750
3.03k
  if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10751
3.03k
      (ctxt->instate == XML_PARSER_EOF)) {
10752
      /*
10753
       * The XML REC instructs us to stop parsing right here
10754
       */
10755
405
      return(-1);
10756
405
  }
10757
2.62k
  ctxt->standalone = ctxt->input->standalone;
10758
2.62k
  SKIP_BLANKS;
10759
12.1k
    } else {
10760
12.1k
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10761
12.1k
    }
10762
14.7k
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10763
0
        ctxt->sax->startDocument(ctxt->userData);
10764
14.7k
    if (ctxt->instate == XML_PARSER_EOF)
10765
0
  return(-1);
10766
14.7k
    if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10767
14.7k
        (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10768
0
  ctxt->myDoc->compression = ctxt->input->buf->compressed;
10769
0
    }
10770
10771
    /*
10772
     * The Misc part of the Prolog
10773
     */
10774
14.7k
    xmlParseMisc(ctxt);
10775
10776
    /*
10777
     * Then possibly doc type declaration(s) and more Misc
10778
     * (doctypedecl Misc*)?
10779
     */
10780
14.7k
    GROW;
10781
14.7k
    if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10782
10783
4.45k
  ctxt->inSubset = 1;
10784
4.45k
  xmlParseDocTypeDecl(ctxt);
10785
4.45k
  if (RAW == '[') {
10786
4.16k
      ctxt->instate = XML_PARSER_DTD;
10787
4.16k
      xmlParseInternalSubset(ctxt);
10788
4.16k
      if (ctxt->instate == XML_PARSER_EOF)
10789
898
    return(-1);
10790
4.16k
  }
10791
10792
  /*
10793
   * Create and update the external subset.
10794
   */
10795
3.55k
  ctxt->inSubset = 2;
10796
3.55k
  if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10797
3.55k
      (!ctxt->disableSAX))
10798
0
      ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10799
0
                                ctxt->extSubSystem, ctxt->extSubURI);
10800
3.55k
  if (ctxt->instate == XML_PARSER_EOF)
10801
0
      return(-1);
10802
3.55k
  ctxt->inSubset = 0;
10803
10804
3.55k
        xmlCleanSpecialAttr(ctxt);
10805
10806
3.55k
  ctxt->instate = XML_PARSER_PROLOG;
10807
3.55k
  xmlParseMisc(ctxt);
10808
3.55k
    }
10809
10810
    /*
10811
     * Time to start parsing the tree itself
10812
     */
10813
13.8k
    GROW;
10814
13.8k
    if (RAW != '<') {
10815
4.03k
  xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10816
4.03k
           "Start tag expected, '<' not found\n");
10817
9.85k
    } else {
10818
9.85k
  ctxt->instate = XML_PARSER_CONTENT;
10819
9.85k
  xmlParseElement(ctxt);
10820
9.85k
  ctxt->instate = XML_PARSER_EPILOG;
10821
10822
10823
  /*
10824
   * The Misc part at the end
10825
   */
10826
9.85k
  xmlParseMisc(ctxt);
10827
10828
9.85k
  if (RAW != 0) {
10829
1.41k
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10830
1.41k
  }
10831
9.85k
  ctxt->instate = XML_PARSER_EOF;
10832
9.85k
    }
10833
10834
    /*
10835
     * SAX: end of the document processing.
10836
     */
10837
13.8k
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10838
0
        ctxt->sax->endDocument(ctxt->userData);
10839
10840
    /*
10841
     * Remove locally kept entity definitions if the tree was not built
10842
     */
10843
13.8k
    if ((ctxt->myDoc != NULL) &&
10844
13.8k
  (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10845
499
  xmlFreeDoc(ctxt->myDoc);
10846
499
  ctxt->myDoc = NULL;
10847
499
    }
10848
10849
13.8k
    if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10850
0
        ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10851
0
  if (ctxt->valid)
10852
0
      ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10853
0
  if (ctxt->nsWellFormed)
10854
0
      ctxt->myDoc->properties |= XML_DOC_NSVALID;
10855
0
  if (ctxt->options & XML_PARSE_OLD10)
10856
0
      ctxt->myDoc->properties |= XML_DOC_OLD10;
10857
0
    }
10858
13.8k
    if (! ctxt->wellFormed) {
10859
13.8k
  ctxt->valid = 0;
10860
13.8k
  return(-1);
10861
13.8k
    }
10862
7
    return(0);
10863
13.8k
}
10864
10865
/**
10866
 * xmlParseExtParsedEnt:
10867
 * @ctxt:  an XML parser context
10868
 *
10869
 * parse a general parsed entity
10870
 * An external general parsed entity is well-formed if it matches the
10871
 * production labeled extParsedEnt.
10872
 *
10873
 * [78] extParsedEnt ::= TextDecl? content
10874
 *
10875
 * Returns 0, -1 in case of error. the parser context is augmented
10876
 *                as a result of the parsing.
10877
 */
10878
10879
int
10880
0
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10881
0
    xmlChar start[4];
10882
0
    xmlCharEncoding enc;
10883
10884
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
10885
0
        return(-1);
10886
10887
0
    xmlDetectSAX2(ctxt);
10888
10889
0
    GROW;
10890
10891
    /*
10892
     * SAX: beginning of the document processing.
10893
     */
10894
0
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10895
0
        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10896
10897
    /*
10898
     * Get the 4 first bytes and decode the charset
10899
     * if enc != XML_CHAR_ENCODING_NONE
10900
     * plug some encoding conversion routines.
10901
     */
10902
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10903
0
  start[0] = RAW;
10904
0
  start[1] = NXT(1);
10905
0
  start[2] = NXT(2);
10906
0
  start[3] = NXT(3);
10907
0
  enc = xmlDetectCharEncoding(start, 4);
10908
0
  if (enc != XML_CHAR_ENCODING_NONE) {
10909
0
      xmlSwitchEncoding(ctxt, enc);
10910
0
  }
10911
0
    }
10912
10913
10914
0
    if (CUR == 0) {
10915
0
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10916
0
    }
10917
10918
    /*
10919
     * Check for the XMLDecl in the Prolog.
10920
     */
10921
0
    GROW;
10922
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10923
10924
  /*
10925
   * Note that we will switch encoding on the fly.
10926
   */
10927
0
  xmlParseXMLDecl(ctxt);
10928
0
  if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10929
      /*
10930
       * The XML REC instructs us to stop parsing right here
10931
       */
10932
0
      return(-1);
10933
0
  }
10934
0
  SKIP_BLANKS;
10935
0
    } else {
10936
0
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10937
0
    }
10938
0
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10939
0
        ctxt->sax->startDocument(ctxt->userData);
10940
0
    if (ctxt->instate == XML_PARSER_EOF)
10941
0
  return(-1);
10942
10943
    /*
10944
     * Doing validity checking on chunk doesn't make sense
10945
     */
10946
0
    ctxt->instate = XML_PARSER_CONTENT;
10947
0
    ctxt->validate = 0;
10948
0
    ctxt->loadsubset = 0;
10949
0
    ctxt->depth = 0;
10950
10951
0
    xmlParseContent(ctxt);
10952
0
    if (ctxt->instate == XML_PARSER_EOF)
10953
0
  return(-1);
10954
10955
0
    if ((RAW == '<') && (NXT(1) == '/')) {
10956
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10957
0
    } else if (RAW != 0) {
10958
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
10959
0
    }
10960
10961
    /*
10962
     * SAX: end of the document processing.
10963
     */
10964
0
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10965
0
        ctxt->sax->endDocument(ctxt->userData);
10966
10967
0
    if (! ctxt->wellFormed) return(-1);
10968
0
    return(0);
10969
0
}
10970
10971
#ifdef LIBXML_PUSH_ENABLED
10972
/************************************************************************
10973
 *                  *
10974
 *    Progressive parsing interfaces        *
10975
 *                  *
10976
 ************************************************************************/
10977
10978
/**
10979
 * xmlParseLookupSequence:
10980
 * @ctxt:  an XML parser context
10981
 * @first:  the first char to lookup
10982
 * @next:  the next char to lookup or zero
10983
 * @third:  the next char to lookup or zero
10984
 *
10985
 * Try to find if a sequence (first, next, third) or  just (first next) or
10986
 * (first) is available in the input stream.
10987
 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10988
 * to avoid rescanning sequences of bytes, it DOES change the state of the
10989
 * parser, do not use liberally.
10990
 *
10991
 * Returns the index to the current parsing point if the full sequence
10992
 *      is available, -1 otherwise.
10993
 */
10994
static int
10995
xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10996
0
                       xmlChar next, xmlChar third) {
10997
0
    int base, len;
10998
0
    xmlParserInputPtr in;
10999
0
    const xmlChar *buf;
11000
11001
0
    in = ctxt->input;
11002
0
    if (in == NULL) return(-1);
11003
0
    base = in->cur - in->base;
11004
0
    if (base < 0) return(-1);
11005
0
    if (ctxt->checkIndex > base)
11006
0
        base = ctxt->checkIndex;
11007
0
    if (in->buf == NULL) {
11008
0
  buf = in->base;
11009
0
  len = in->length;
11010
0
    } else {
11011
0
  buf = xmlBufContent(in->buf->buffer);
11012
0
  len = xmlBufUse(in->buf->buffer);
11013
0
    }
11014
    /* take into account the sequence length */
11015
0
    if (third) len -= 2;
11016
0
    else if (next) len --;
11017
0
    for (;base < len;base++) {
11018
0
        if (buf[base] == first) {
11019
0
      if (third != 0) {
11020
0
    if ((buf[base + 1] != next) ||
11021
0
        (buf[base + 2] != third)) continue;
11022
0
      } else if (next != 0) {
11023
0
    if (buf[base + 1] != next) continue;
11024
0
      }
11025
0
      ctxt->checkIndex = 0;
11026
#ifdef DEBUG_PUSH
11027
      if (next == 0)
11028
    xmlGenericError(xmlGenericErrorContext,
11029
      "PP: lookup '%c' found at %d\n",
11030
      first, base);
11031
      else if (third == 0)
11032
    xmlGenericError(xmlGenericErrorContext,
11033
      "PP: lookup '%c%c' found at %d\n",
11034
      first, next, base);
11035
      else
11036
    xmlGenericError(xmlGenericErrorContext,
11037
      "PP: lookup '%c%c%c' found at %d\n",
11038
      first, next, third, base);
11039
#endif
11040
0
      return(base - (in->cur - in->base));
11041
0
  }
11042
0
    }
11043
0
    ctxt->checkIndex = base;
11044
#ifdef DEBUG_PUSH
11045
    if (next == 0)
11046
  xmlGenericError(xmlGenericErrorContext,
11047
    "PP: lookup '%c' failed\n", first);
11048
    else if (third == 0)
11049
  xmlGenericError(xmlGenericErrorContext,
11050
    "PP: lookup '%c%c' failed\n", first, next);
11051
    else
11052
  xmlGenericError(xmlGenericErrorContext,
11053
    "PP: lookup '%c%c%c' failed\n", first, next, third);
11054
#endif
11055
0
    return(-1);
11056
0
}
11057
11058
/**
11059
 * xmlParseGetLasts:
11060
 * @ctxt:  an XML parser context
11061
 * @lastlt:  pointer to store the last '<' from the input
11062
 * @lastgt:  pointer to store the last '>' from the input
11063
 *
11064
 * Lookup the last < and > in the current chunk
11065
 */
11066
static void
11067
xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
11068
0
                 const xmlChar **lastgt) {
11069
0
    const xmlChar *tmp;
11070
11071
0
    if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
11072
0
  xmlGenericError(xmlGenericErrorContext,
11073
0
        "Internal error: xmlParseGetLasts\n");
11074
0
  return;
11075
0
    }
11076
0
    if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
11077
0
        tmp = ctxt->input->end;
11078
0
  tmp--;
11079
0
  while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
11080
0
  if (tmp < ctxt->input->base) {
11081
0
      *lastlt = NULL;
11082
0
      *lastgt = NULL;
11083
0
  } else {
11084
0
      *lastlt = tmp;
11085
0
      tmp++;
11086
0
      while ((tmp < ctxt->input->end) && (*tmp != '>')) {
11087
0
          if (*tmp == '\'') {
11088
0
        tmp++;
11089
0
        while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
11090
0
        if (tmp < ctxt->input->end) tmp++;
11091
0
    } else if (*tmp == '"') {
11092
0
        tmp++;
11093
0
        while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
11094
0
        if (tmp < ctxt->input->end) tmp++;
11095
0
    } else
11096
0
        tmp++;
11097
0
      }
11098
0
      if (tmp < ctxt->input->end)
11099
0
          *lastgt = tmp;
11100
0
      else {
11101
0
          tmp = *lastlt;
11102
0
    tmp--;
11103
0
    while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
11104
0
    if (tmp >= ctxt->input->base)
11105
0
        *lastgt = tmp;
11106
0
    else
11107
0
        *lastgt = NULL;
11108
0
      }
11109
0
  }
11110
0
    } else {
11111
0
        *lastlt = NULL;
11112
0
  *lastgt = NULL;
11113
0
    }
11114
0
}
11115
/**
11116
 * xmlCheckCdataPush:
11117
 * @cur: pointer to the block of characters
11118
 * @len: length of the block in bytes
11119
 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
11120
 *
11121
 * Check that the block of characters is okay as SCdata content [20]
11122
 *
11123
 * Returns the number of bytes to pass if okay, a negative index where an
11124
 *         UTF-8 error occurred otherwise
11125
 */
11126
static int
11127
0
xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
11128
0
    int ix;
11129
0
    unsigned char c;
11130
0
    int codepoint;
11131
11132
0
    if ((utf == NULL) || (len <= 0))
11133
0
        return(0);
11134
11135
0
    for (ix = 0; ix < len;) {      /* string is 0-terminated */
11136
0
        c = utf[ix];
11137
0
        if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11138
0
      if (c >= 0x20)
11139
0
    ix++;
11140
0
      else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11141
0
          ix++;
11142
0
      else
11143
0
          return(-ix);
11144
0
  } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11145
0
      if (ix + 2 > len) return(complete ? -ix : ix);
11146
0
      if ((utf[ix+1] & 0xc0 ) != 0x80)
11147
0
          return(-ix);
11148
0
      codepoint = (utf[ix] & 0x1f) << 6;
11149
0
      codepoint |= utf[ix+1] & 0x3f;
11150
0
      if (!xmlIsCharQ(codepoint))
11151
0
          return(-ix);
11152
0
      ix += 2;
11153
0
  } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11154
0
      if (ix + 3 > len) return(complete ? -ix : ix);
11155
0
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11156
0
          ((utf[ix+2] & 0xc0) != 0x80))
11157
0
        return(-ix);
11158
0
      codepoint = (utf[ix] & 0xf) << 12;
11159
0
      codepoint |= (utf[ix+1] & 0x3f) << 6;
11160
0
      codepoint |= utf[ix+2] & 0x3f;
11161
0
      if (!xmlIsCharQ(codepoint))
11162
0
          return(-ix);
11163
0
      ix += 3;
11164
0
  } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11165
0
      if (ix + 4 > len) return(complete ? -ix : ix);
11166
0
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11167
0
          ((utf[ix+2] & 0xc0) != 0x80) ||
11168
0
    ((utf[ix+3] & 0xc0) != 0x80))
11169
0
        return(-ix);
11170
0
      codepoint = (utf[ix] & 0x7) << 18;
11171
0
      codepoint |= (utf[ix+1] & 0x3f) << 12;
11172
0
      codepoint |= (utf[ix+2] & 0x3f) << 6;
11173
0
      codepoint |= utf[ix+3] & 0x3f;
11174
0
      if (!xmlIsCharQ(codepoint))
11175
0
          return(-ix);
11176
0
      ix += 4;
11177
0
  } else       /* unknown encoding */
11178
0
      return(-ix);
11179
0
      }
11180
0
      return(ix);
11181
0
}
11182
11183
/**
11184
 * xmlParseTryOrFinish:
11185
 * @ctxt:  an XML parser context
11186
 * @terminate:  last chunk indicator
11187
 *
11188
 * Try to progress on parsing
11189
 *
11190
 * Returns zero if no parsing was possible
11191
 */
11192
static int
11193
0
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11194
0
    int ret = 0;
11195
0
    int avail, tlen;
11196
0
    xmlChar cur, next;
11197
0
    const xmlChar *lastlt, *lastgt;
11198
11199
0
    if (ctxt->input == NULL)
11200
0
        return(0);
11201
11202
#ifdef DEBUG_PUSH
11203
    switch (ctxt->instate) {
11204
  case XML_PARSER_EOF:
11205
      xmlGenericError(xmlGenericErrorContext,
11206
        "PP: try EOF\n"); break;
11207
  case XML_PARSER_START:
11208
      xmlGenericError(xmlGenericErrorContext,
11209
        "PP: try START\n"); break;
11210
  case XML_PARSER_MISC:
11211
      xmlGenericError(xmlGenericErrorContext,
11212
        "PP: try MISC\n");break;
11213
  case XML_PARSER_COMMENT:
11214
      xmlGenericError(xmlGenericErrorContext,
11215
        "PP: try COMMENT\n");break;
11216
  case XML_PARSER_PROLOG:
11217
      xmlGenericError(xmlGenericErrorContext,
11218
        "PP: try PROLOG\n");break;
11219
  case XML_PARSER_START_TAG:
11220
      xmlGenericError(xmlGenericErrorContext,
11221
        "PP: try START_TAG\n");break;
11222
  case XML_PARSER_CONTENT:
11223
      xmlGenericError(xmlGenericErrorContext,
11224
        "PP: try CONTENT\n");break;
11225
  case XML_PARSER_CDATA_SECTION:
11226
      xmlGenericError(xmlGenericErrorContext,
11227
        "PP: try CDATA_SECTION\n");break;
11228
  case XML_PARSER_END_TAG:
11229
      xmlGenericError(xmlGenericErrorContext,
11230
        "PP: try END_TAG\n");break;
11231
  case XML_PARSER_ENTITY_DECL:
11232
      xmlGenericError(xmlGenericErrorContext,
11233
        "PP: try ENTITY_DECL\n");break;
11234
  case XML_PARSER_ENTITY_VALUE:
11235
      xmlGenericError(xmlGenericErrorContext,
11236
        "PP: try ENTITY_VALUE\n");break;
11237
  case XML_PARSER_ATTRIBUTE_VALUE:
11238
      xmlGenericError(xmlGenericErrorContext,
11239
        "PP: try ATTRIBUTE_VALUE\n");break;
11240
  case XML_PARSER_DTD:
11241
      xmlGenericError(xmlGenericErrorContext,
11242
        "PP: try DTD\n");break;
11243
  case XML_PARSER_EPILOG:
11244
      xmlGenericError(xmlGenericErrorContext,
11245
        "PP: try EPILOG\n");break;
11246
  case XML_PARSER_PI:
11247
      xmlGenericError(xmlGenericErrorContext,
11248
        "PP: try PI\n");break;
11249
        case XML_PARSER_IGNORE:
11250
            xmlGenericError(xmlGenericErrorContext,
11251
        "PP: try IGNORE\n");break;
11252
    }
11253
#endif
11254
11255
0
    if ((ctxt->input != NULL) &&
11256
0
        (ctxt->input->cur - ctxt->input->base > 4096)) {
11257
0
  xmlSHRINK(ctxt);
11258
0
  ctxt->checkIndex = 0;
11259
0
    }
11260
0
    xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11261
11262
0
    while (ctxt->instate != XML_PARSER_EOF) {
11263
0
  if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11264
0
      return(0);
11265
11266
0
  if (ctxt->input == NULL) break;
11267
0
  if (ctxt->input->buf == NULL)
11268
0
      avail = ctxt->input->length -
11269
0
              (ctxt->input->cur - ctxt->input->base);
11270
0
  else {
11271
      /*
11272
       * If we are operating on converted input, try to flush
11273
       * remaining chars to avoid them stalling in the non-converted
11274
       * buffer. But do not do this in document start where
11275
       * encoding="..." may not have been read and we work on a
11276
       * guessed encoding.
11277
       */
11278
0
      if ((ctxt->instate != XML_PARSER_START) &&
11279
0
          (ctxt->input->buf->raw != NULL) &&
11280
0
    (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11281
0
                size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11282
0
                                                 ctxt->input);
11283
0
    size_t current = ctxt->input->cur - ctxt->input->base;
11284
11285
0
    xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11286
0
                xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11287
0
                                      base, current);
11288
0
      }
11289
0
      avail = xmlBufUse(ctxt->input->buf->buffer) -
11290
0
        (ctxt->input->cur - ctxt->input->base);
11291
0
  }
11292
0
        if (avail < 1)
11293
0
      goto done;
11294
0
        switch (ctxt->instate) {
11295
0
            case XML_PARSER_EOF:
11296
          /*
11297
     * Document parsing is done !
11298
     */
11299
0
          goto done;
11300
0
            case XML_PARSER_START:
11301
0
    if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11302
0
        xmlChar start[4];
11303
0
        xmlCharEncoding enc;
11304
11305
        /*
11306
         * Very first chars read from the document flow.
11307
         */
11308
0
        if (avail < 4)
11309
0
      goto done;
11310
11311
        /*
11312
         * Get the 4 first bytes and decode the charset
11313
         * if enc != XML_CHAR_ENCODING_NONE
11314
         * plug some encoding conversion routines,
11315
         * else xmlSwitchEncoding will set to (default)
11316
         * UTF8.
11317
         */
11318
0
        start[0] = RAW;
11319
0
        start[1] = NXT(1);
11320
0
        start[2] = NXT(2);
11321
0
        start[3] = NXT(3);
11322
0
        enc = xmlDetectCharEncoding(start, 4);
11323
0
        xmlSwitchEncoding(ctxt, enc);
11324
0
        break;
11325
0
    }
11326
11327
0
    if (avail < 2)
11328
0
        goto done;
11329
0
    cur = ctxt->input->cur[0];
11330
0
    next = ctxt->input->cur[1];
11331
0
    if (cur == 0) {
11332
0
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11333
0
      ctxt->sax->setDocumentLocator(ctxt->userData,
11334
0
                  &xmlDefaultSAXLocator);
11335
0
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11336
0
        xmlHaltParser(ctxt);
11337
#ifdef DEBUG_PUSH
11338
        xmlGenericError(xmlGenericErrorContext,
11339
          "PP: entering EOF\n");
11340
#endif
11341
0
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11342
0
      ctxt->sax->endDocument(ctxt->userData);
11343
0
        goto done;
11344
0
    }
11345
0
          if ((cur == '<') && (next == '?')) {
11346
        /* PI or XML decl */
11347
0
        if (avail < 5) return(ret);
11348
0
        if ((!terminate) &&
11349
0
            (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11350
0
      return(ret);
11351
0
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11352
0
      ctxt->sax->setDocumentLocator(ctxt->userData,
11353
0
                  &xmlDefaultSAXLocator);
11354
0
        if ((ctxt->input->cur[2] == 'x') &&
11355
0
      (ctxt->input->cur[3] == 'm') &&
11356
0
      (ctxt->input->cur[4] == 'l') &&
11357
0
      (IS_BLANK_CH(ctxt->input->cur[5]))) {
11358
0
      ret += 5;
11359
#ifdef DEBUG_PUSH
11360
      xmlGenericError(xmlGenericErrorContext,
11361
        "PP: Parsing XML Decl\n");
11362
#endif
11363
0
      xmlParseXMLDecl(ctxt);
11364
0
      if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11365
          /*
11366
           * The XML REC instructs us to stop parsing right
11367
           * here
11368
           */
11369
0
          xmlHaltParser(ctxt);
11370
0
          return(0);
11371
0
      }
11372
0
      ctxt->standalone = ctxt->input->standalone;
11373
0
      if ((ctxt->encoding == NULL) &&
11374
0
          (ctxt->input->encoding != NULL))
11375
0
          ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11376
0
      if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11377
0
          (!ctxt->disableSAX))
11378
0
          ctxt->sax->startDocument(ctxt->userData);
11379
0
      ctxt->instate = XML_PARSER_MISC;
11380
#ifdef DEBUG_PUSH
11381
      xmlGenericError(xmlGenericErrorContext,
11382
        "PP: entering MISC\n");
11383
#endif
11384
0
        } else {
11385
0
      ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11386
0
      if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11387
0
          (!ctxt->disableSAX))
11388
0
          ctxt->sax->startDocument(ctxt->userData);
11389
0
      ctxt->instate = XML_PARSER_MISC;
11390
#ifdef DEBUG_PUSH
11391
      xmlGenericError(xmlGenericErrorContext,
11392
        "PP: entering MISC\n");
11393
#endif
11394
0
        }
11395
0
    } else {
11396
0
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11397
0
      ctxt->sax->setDocumentLocator(ctxt->userData,
11398
0
                  &xmlDefaultSAXLocator);
11399
0
        ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11400
0
        if (ctxt->version == NULL) {
11401
0
            xmlErrMemory(ctxt, NULL);
11402
0
      break;
11403
0
        }
11404
0
        if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11405
0
            (!ctxt->disableSAX))
11406
0
      ctxt->sax->startDocument(ctxt->userData);
11407
0
        ctxt->instate = XML_PARSER_MISC;
11408
#ifdef DEBUG_PUSH
11409
        xmlGenericError(xmlGenericErrorContext,
11410
          "PP: entering MISC\n");
11411
#endif
11412
0
    }
11413
0
    break;
11414
0
            case XML_PARSER_START_TAG: {
11415
0
          const xmlChar *name;
11416
0
    const xmlChar *prefix = NULL;
11417
0
    const xmlChar *URI = NULL;
11418
0
                int line = ctxt->input->line;
11419
0
    int nsNr = ctxt->nsNr;
11420
11421
0
    if ((avail < 2) && (ctxt->inputNr == 1))
11422
0
        goto done;
11423
0
    cur = ctxt->input->cur[0];
11424
0
          if (cur != '<') {
11425
0
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11426
0
        xmlHaltParser(ctxt);
11427
0
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11428
0
      ctxt->sax->endDocument(ctxt->userData);
11429
0
        goto done;
11430
0
    }
11431
0
    if (!terminate) {
11432
0
        if (ctxt->progressive) {
11433
            /* > can be found unescaped in attribute values */
11434
0
            if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11435
0
          goto done;
11436
0
        } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11437
0
      goto done;
11438
0
        }
11439
0
    }
11440
0
    if (ctxt->spaceNr == 0)
11441
0
        spacePush(ctxt, -1);
11442
0
    else if (*ctxt->space == -2)
11443
0
        spacePush(ctxt, -1);
11444
0
    else
11445
0
        spacePush(ctxt, *ctxt->space);
11446
0
#ifdef LIBXML_SAX1_ENABLED
11447
0
    if (ctxt->sax2)
11448
0
#endif /* LIBXML_SAX1_ENABLED */
11449
0
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11450
0
#ifdef LIBXML_SAX1_ENABLED
11451
0
    else
11452
0
        name = xmlParseStartTag(ctxt);
11453
0
#endif /* LIBXML_SAX1_ENABLED */
11454
0
    if (ctxt->instate == XML_PARSER_EOF)
11455
0
        goto done;
11456
0
    if (name == NULL) {
11457
0
        spacePop(ctxt);
11458
0
        xmlHaltParser(ctxt);
11459
0
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11460
0
      ctxt->sax->endDocument(ctxt->userData);
11461
0
        goto done;
11462
0
    }
11463
0
#ifdef LIBXML_VALID_ENABLED
11464
    /*
11465
     * [ VC: Root Element Type ]
11466
     * The Name in the document type declaration must match
11467
     * the element type of the root element.
11468
     */
11469
0
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11470
0
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
11471
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11472
0
#endif /* LIBXML_VALID_ENABLED */
11473
11474
    /*
11475
     * Check for an Empty Element.
11476
     */
11477
0
    if ((RAW == '/') && (NXT(1) == '>')) {
11478
0
        SKIP(2);
11479
11480
0
        if (ctxt->sax2) {
11481
0
      if ((ctxt->sax != NULL) &&
11482
0
          (ctxt->sax->endElementNs != NULL) &&
11483
0
          (!ctxt->disableSAX))
11484
0
          ctxt->sax->endElementNs(ctxt->userData, name,
11485
0
                                  prefix, URI);
11486
0
      if (ctxt->nsNr - nsNr > 0)
11487
0
          nsPop(ctxt, ctxt->nsNr - nsNr);
11488
0
#ifdef LIBXML_SAX1_ENABLED
11489
0
        } else {
11490
0
      if ((ctxt->sax != NULL) &&
11491
0
          (ctxt->sax->endElement != NULL) &&
11492
0
          (!ctxt->disableSAX))
11493
0
          ctxt->sax->endElement(ctxt->userData, name);
11494
0
#endif /* LIBXML_SAX1_ENABLED */
11495
0
        }
11496
0
        if (ctxt->instate == XML_PARSER_EOF)
11497
0
      goto done;
11498
0
        spacePop(ctxt);
11499
0
        if (ctxt->nameNr == 0) {
11500
0
      ctxt->instate = XML_PARSER_EPILOG;
11501
0
        } else {
11502
0
      ctxt->instate = XML_PARSER_CONTENT;
11503
0
        }
11504
0
                    ctxt->progressive = 1;
11505
0
        break;
11506
0
    }
11507
0
    if (RAW == '>') {
11508
0
        NEXT;
11509
0
    } else {
11510
0
        xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11511
0
           "Couldn't find end of Start Tag %s\n",
11512
0
           name);
11513
0
        nodePop(ctxt);
11514
0
        spacePop(ctxt);
11515
0
    }
11516
0
                nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
11517
11518
0
    ctxt->instate = XML_PARSER_CONTENT;
11519
0
                ctxt->progressive = 1;
11520
0
                break;
11521
0
      }
11522
0
            case XML_PARSER_CONTENT: {
11523
0
    int id;
11524
0
    unsigned long cons;
11525
0
    if ((avail < 2) && (ctxt->inputNr == 1))
11526
0
        goto done;
11527
0
    cur = ctxt->input->cur[0];
11528
0
    next = ctxt->input->cur[1];
11529
11530
0
    id = ctxt->input->id;
11531
0
          cons = CUR_CONSUMED;
11532
0
    if ((cur == '<') && (next == '/')) {
11533
0
        ctxt->instate = XML_PARSER_END_TAG;
11534
0
        break;
11535
0
          } else if ((cur == '<') && (next == '?')) {
11536
0
        if ((!terminate) &&
11537
0
            (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11538
0
                        ctxt->progressive = XML_PARSER_PI;
11539
0
      goto done;
11540
0
                    }
11541
0
        xmlParsePI(ctxt);
11542
0
        ctxt->instate = XML_PARSER_CONTENT;
11543
0
                    ctxt->progressive = 1;
11544
0
    } else if ((cur == '<') && (next != '!')) {
11545
0
        ctxt->instate = XML_PARSER_START_TAG;
11546
0
        break;
11547
0
    } else if ((cur == '<') && (next == '!') &&
11548
0
               (ctxt->input->cur[2] == '-') &&
11549
0
         (ctxt->input->cur[3] == '-')) {
11550
0
        int term;
11551
11552
0
              if (avail < 4)
11553
0
            goto done;
11554
0
        ctxt->input->cur += 4;
11555
0
        term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11556
0
        ctxt->input->cur -= 4;
11557
0
        if ((!terminate) && (term < 0)) {
11558
0
                        ctxt->progressive = XML_PARSER_COMMENT;
11559
0
      goto done;
11560
0
                    }
11561
0
        xmlParseComment(ctxt);
11562
0
        ctxt->instate = XML_PARSER_CONTENT;
11563
0
                    ctxt->progressive = 1;
11564
0
    } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11565
0
        (ctxt->input->cur[2] == '[') &&
11566
0
        (ctxt->input->cur[3] == 'C') &&
11567
0
        (ctxt->input->cur[4] == 'D') &&
11568
0
        (ctxt->input->cur[5] == 'A') &&
11569
0
        (ctxt->input->cur[6] == 'T') &&
11570
0
        (ctxt->input->cur[7] == 'A') &&
11571
0
        (ctxt->input->cur[8] == '[')) {
11572
0
        SKIP(9);
11573
0
        ctxt->instate = XML_PARSER_CDATA_SECTION;
11574
0
        break;
11575
0
    } else if ((cur == '<') && (next == '!') &&
11576
0
               (avail < 9)) {
11577
0
        goto done;
11578
0
    } else if (cur == '&') {
11579
0
        if ((!terminate) &&
11580
0
            (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11581
0
      goto done;
11582
0
        xmlParseReference(ctxt);
11583
0
    } else {
11584
        /* TODO Avoid the extra copy, handle directly !!! */
11585
        /*
11586
         * Goal of the following test is:
11587
         *  - minimize calls to the SAX 'character' callback
11588
         *    when they are mergeable
11589
         *  - handle an problem for isBlank when we only parse
11590
         *    a sequence of blank chars and the next one is
11591
         *    not available to check against '<' presence.
11592
         *  - tries to homogenize the differences in SAX
11593
         *    callbacks between the push and pull versions
11594
         *    of the parser.
11595
         */
11596
0
        if ((ctxt->inputNr == 1) &&
11597
0
            (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11598
0
      if (!terminate) {
11599
0
          if (ctxt->progressive) {
11600
0
        if ((lastlt == NULL) ||
11601
0
            (ctxt->input->cur > lastlt))
11602
0
            goto done;
11603
0
          } else if (xmlParseLookupSequence(ctxt,
11604
0
                                            '<', 0, 0) < 0) {
11605
0
        goto done;
11606
0
          }
11607
0
      }
11608
0
                    }
11609
0
        ctxt->checkIndex = 0;
11610
0
        xmlParseCharData(ctxt, 0);
11611
0
    }
11612
0
    if ((cons == CUR_CONSUMED) && (id == ctxt->input->id)) {
11613
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11614
0
                    "detected an error in element content\n");
11615
0
        xmlHaltParser(ctxt);
11616
0
        break;
11617
0
    }
11618
0
    break;
11619
0
      }
11620
0
            case XML_PARSER_END_TAG:
11621
0
    if (avail < 2)
11622
0
        goto done;
11623
0
    if (!terminate) {
11624
0
        if (ctxt->progressive) {
11625
            /* > can be found unescaped in attribute values */
11626
0
            if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11627
0
          goto done;
11628
0
        } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11629
0
      goto done;
11630
0
        }
11631
0
    }
11632
0
    if (ctxt->sax2) {
11633
0
              xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11634
0
        nameNsPop(ctxt);
11635
0
    }
11636
0
#ifdef LIBXML_SAX1_ENABLED
11637
0
      else
11638
0
        xmlParseEndTag1(ctxt, 0);
11639
0
#endif /* LIBXML_SAX1_ENABLED */
11640
0
    if (ctxt->instate == XML_PARSER_EOF) {
11641
        /* Nothing */
11642
0
    } else if (ctxt->nameNr == 0) {
11643
0
        ctxt->instate = XML_PARSER_EPILOG;
11644
0
    } else {
11645
0
        ctxt->instate = XML_PARSER_CONTENT;
11646
0
    }
11647
0
    break;
11648
0
            case XML_PARSER_CDATA_SECTION: {
11649
          /*
11650
     * The Push mode need to have the SAX callback for
11651
     * cdataBlock merge back contiguous callbacks.
11652
     */
11653
0
    int base;
11654
11655
0
    base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11656
0
    if (base < 0) {
11657
0
        if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
11658
0
            int tmp;
11659
11660
0
      tmp = xmlCheckCdataPush(ctxt->input->cur,
11661
0
                              XML_PARSER_BIG_BUFFER_SIZE, 0);
11662
0
      if (tmp < 0) {
11663
0
          tmp = -tmp;
11664
0
          ctxt->input->cur += tmp;
11665
0
          goto encoding_error;
11666
0
      }
11667
0
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11668
0
          if (ctxt->sax->cdataBlock != NULL)
11669
0
        ctxt->sax->cdataBlock(ctxt->userData,
11670
0
                              ctxt->input->cur, tmp);
11671
0
          else if (ctxt->sax->characters != NULL)
11672
0
        ctxt->sax->characters(ctxt->userData,
11673
0
                              ctxt->input->cur, tmp);
11674
0
      }
11675
0
      if (ctxt->instate == XML_PARSER_EOF)
11676
0
          goto done;
11677
0
      SKIPL(tmp);
11678
0
      ctxt->checkIndex = 0;
11679
0
        }
11680
0
        goto done;
11681
0
    } else {
11682
0
        int tmp;
11683
11684
0
        tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11685
0
        if ((tmp < 0) || (tmp != base)) {
11686
0
      tmp = -tmp;
11687
0
      ctxt->input->cur += tmp;
11688
0
      goto encoding_error;
11689
0
        }
11690
0
        if ((ctxt->sax != NULL) && (base == 0) &&
11691
0
            (ctxt->sax->cdataBlock != NULL) &&
11692
0
            (!ctxt->disableSAX)) {
11693
      /*
11694
       * Special case to provide identical behaviour
11695
       * between pull and push parsers on enpty CDATA
11696
       * sections
11697
       */
11698
0
       if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11699
0
           (!strncmp((const char *)&ctxt->input->cur[-9],
11700
0
                     "<![CDATA[", 9)))
11701
0
           ctxt->sax->cdataBlock(ctxt->userData,
11702
0
                                 BAD_CAST "", 0);
11703
0
        } else if ((ctxt->sax != NULL) && (base > 0) &&
11704
0
      (!ctxt->disableSAX)) {
11705
0
      if (ctxt->sax->cdataBlock != NULL)
11706
0
          ctxt->sax->cdataBlock(ctxt->userData,
11707
0
              ctxt->input->cur, base);
11708
0
      else if (ctxt->sax->characters != NULL)
11709
0
          ctxt->sax->characters(ctxt->userData,
11710
0
              ctxt->input->cur, base);
11711
0
        }
11712
0
        if (ctxt->instate == XML_PARSER_EOF)
11713
0
      goto done;
11714
0
        SKIPL(base + 3);
11715
0
        ctxt->checkIndex = 0;
11716
0
        ctxt->instate = XML_PARSER_CONTENT;
11717
#ifdef DEBUG_PUSH
11718
        xmlGenericError(xmlGenericErrorContext,
11719
          "PP: entering CONTENT\n");
11720
#endif
11721
0
    }
11722
0
    break;
11723
0
      }
11724
0
            case XML_PARSER_MISC:
11725
0
    SKIP_BLANKS;
11726
0
    if (ctxt->input->buf == NULL)
11727
0
        avail = ctxt->input->length -
11728
0
                (ctxt->input->cur - ctxt->input->base);
11729
0
    else
11730
0
        avail = xmlBufUse(ctxt->input->buf->buffer) -
11731
0
                (ctxt->input->cur - ctxt->input->base);
11732
0
    if (avail < 2)
11733
0
        goto done;
11734
0
    cur = ctxt->input->cur[0];
11735
0
    next = ctxt->input->cur[1];
11736
0
          if ((cur == '<') && (next == '?')) {
11737
0
        if ((!terminate) &&
11738
0
            (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11739
0
                        ctxt->progressive = XML_PARSER_PI;
11740
0
      goto done;
11741
0
                    }
11742
#ifdef DEBUG_PUSH
11743
        xmlGenericError(xmlGenericErrorContext,
11744
          "PP: Parsing PI\n");
11745
#endif
11746
0
        xmlParsePI(ctxt);
11747
0
        if (ctxt->instate == XML_PARSER_EOF)
11748
0
      goto done;
11749
0
        ctxt->instate = XML_PARSER_MISC;
11750
0
                    ctxt->progressive = 1;
11751
0
        ctxt->checkIndex = 0;
11752
0
    } else if ((cur == '<') && (next == '!') &&
11753
0
        (ctxt->input->cur[2] == '-') &&
11754
0
        (ctxt->input->cur[3] == '-')) {
11755
0
        if ((!terminate) &&
11756
0
            (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11757
0
                        ctxt->progressive = XML_PARSER_COMMENT;
11758
0
      goto done;
11759
0
                    }
11760
#ifdef DEBUG_PUSH
11761
        xmlGenericError(xmlGenericErrorContext,
11762
          "PP: Parsing Comment\n");
11763
#endif
11764
0
        xmlParseComment(ctxt);
11765
0
        if (ctxt->instate == XML_PARSER_EOF)
11766
0
      goto done;
11767
0
        ctxt->instate = XML_PARSER_MISC;
11768
0
                    ctxt->progressive = 1;
11769
0
        ctxt->checkIndex = 0;
11770
0
    } else if ((cur == '<') && (next == '!') &&
11771
0
        (ctxt->input->cur[2] == 'D') &&
11772
0
        (ctxt->input->cur[3] == 'O') &&
11773
0
        (ctxt->input->cur[4] == 'C') &&
11774
0
        (ctxt->input->cur[5] == 'T') &&
11775
0
        (ctxt->input->cur[6] == 'Y') &&
11776
0
        (ctxt->input->cur[7] == 'P') &&
11777
0
        (ctxt->input->cur[8] == 'E')) {
11778
0
        if ((!terminate) &&
11779
0
            (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
11780
0
                        ctxt->progressive = XML_PARSER_DTD;
11781
0
      goto done;
11782
0
                    }
11783
#ifdef DEBUG_PUSH
11784
        xmlGenericError(xmlGenericErrorContext,
11785
          "PP: Parsing internal subset\n");
11786
#endif
11787
0
        ctxt->inSubset = 1;
11788
0
                    ctxt->progressive = 0;
11789
0
        ctxt->checkIndex = 0;
11790
0
        xmlParseDocTypeDecl(ctxt);
11791
0
        if (ctxt->instate == XML_PARSER_EOF)
11792
0
      goto done;
11793
0
        if (RAW == '[') {
11794
0
      ctxt->instate = XML_PARSER_DTD;
11795
#ifdef DEBUG_PUSH
11796
      xmlGenericError(xmlGenericErrorContext,
11797
        "PP: entering DTD\n");
11798
#endif
11799
0
        } else {
11800
      /*
11801
       * Create and update the external subset.
11802
       */
11803
0
      ctxt->inSubset = 2;
11804
0
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11805
0
          (ctxt->sax->externalSubset != NULL))
11806
0
          ctxt->sax->externalSubset(ctxt->userData,
11807
0
            ctxt->intSubName, ctxt->extSubSystem,
11808
0
            ctxt->extSubURI);
11809
0
      ctxt->inSubset = 0;
11810
0
      xmlCleanSpecialAttr(ctxt);
11811
0
      ctxt->instate = XML_PARSER_PROLOG;
11812
#ifdef DEBUG_PUSH
11813
      xmlGenericError(xmlGenericErrorContext,
11814
        "PP: entering PROLOG\n");
11815
#endif
11816
0
        }
11817
0
    } else if ((cur == '<') && (next == '!') &&
11818
0
               (avail < 9)) {
11819
0
        goto done;
11820
0
    } else {
11821
0
        ctxt->instate = XML_PARSER_START_TAG;
11822
0
        ctxt->progressive = XML_PARSER_START_TAG;
11823
0
        xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11824
#ifdef DEBUG_PUSH
11825
        xmlGenericError(xmlGenericErrorContext,
11826
          "PP: entering START_TAG\n");
11827
#endif
11828
0
    }
11829
0
    break;
11830
0
            case XML_PARSER_PROLOG:
11831
0
    SKIP_BLANKS;
11832
0
    if (ctxt->input->buf == NULL)
11833
0
        avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11834
0
    else
11835
0
        avail = xmlBufUse(ctxt->input->buf->buffer) -
11836
0
                            (ctxt->input->cur - ctxt->input->base);
11837
0
    if (avail < 2)
11838
0
        goto done;
11839
0
    cur = ctxt->input->cur[0];
11840
0
    next = ctxt->input->cur[1];
11841
0
          if ((cur == '<') && (next == '?')) {
11842
0
        if ((!terminate) &&
11843
0
            (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11844
0
                        ctxt->progressive = XML_PARSER_PI;
11845
0
      goto done;
11846
0
                    }
11847
#ifdef DEBUG_PUSH
11848
        xmlGenericError(xmlGenericErrorContext,
11849
          "PP: Parsing PI\n");
11850
#endif
11851
0
        xmlParsePI(ctxt);
11852
0
        if (ctxt->instate == XML_PARSER_EOF)
11853
0
      goto done;
11854
0
        ctxt->instate = XML_PARSER_PROLOG;
11855
0
                    ctxt->progressive = 1;
11856
0
    } else if ((cur == '<') && (next == '!') &&
11857
0
        (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11858
0
        if ((!terminate) &&
11859
0
            (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11860
0
                        ctxt->progressive = XML_PARSER_COMMENT;
11861
0
      goto done;
11862
0
                    }
11863
#ifdef DEBUG_PUSH
11864
        xmlGenericError(xmlGenericErrorContext,
11865
          "PP: Parsing Comment\n");
11866
#endif
11867
0
        xmlParseComment(ctxt);
11868
0
        if (ctxt->instate == XML_PARSER_EOF)
11869
0
      goto done;
11870
0
        ctxt->instate = XML_PARSER_PROLOG;
11871
0
                    ctxt->progressive = 1;
11872
0
    } else if ((cur == '<') && (next == '!') &&
11873
0
               (avail < 4)) {
11874
0
        goto done;
11875
0
    } else {
11876
0
        ctxt->instate = XML_PARSER_START_TAG;
11877
0
        if (ctxt->progressive == 0)
11878
0
      ctxt->progressive = XML_PARSER_START_TAG;
11879
0
        xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11880
#ifdef DEBUG_PUSH
11881
        xmlGenericError(xmlGenericErrorContext,
11882
          "PP: entering START_TAG\n");
11883
#endif
11884
0
    }
11885
0
    break;
11886
0
            case XML_PARSER_EPILOG:
11887
0
    SKIP_BLANKS;
11888
0
    if (ctxt->input->buf == NULL)
11889
0
        avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11890
0
    else
11891
0
        avail = xmlBufUse(ctxt->input->buf->buffer) -
11892
0
                            (ctxt->input->cur - ctxt->input->base);
11893
0
    if (avail < 2)
11894
0
        goto done;
11895
0
    cur = ctxt->input->cur[0];
11896
0
    next = ctxt->input->cur[1];
11897
0
          if ((cur == '<') && (next == '?')) {
11898
0
        if ((!terminate) &&
11899
0
            (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11900
0
                        ctxt->progressive = XML_PARSER_PI;
11901
0
      goto done;
11902
0
                    }
11903
#ifdef DEBUG_PUSH
11904
        xmlGenericError(xmlGenericErrorContext,
11905
          "PP: Parsing PI\n");
11906
#endif
11907
0
        xmlParsePI(ctxt);
11908
0
        if (ctxt->instate == XML_PARSER_EOF)
11909
0
      goto done;
11910
0
        ctxt->instate = XML_PARSER_EPILOG;
11911
0
                    ctxt->progressive = 1;
11912
0
    } else if ((cur == '<') && (next == '!') &&
11913
0
        (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11914
0
        if ((!terminate) &&
11915
0
            (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11916
0
                        ctxt->progressive = XML_PARSER_COMMENT;
11917
0
      goto done;
11918
0
                    }
11919
#ifdef DEBUG_PUSH
11920
        xmlGenericError(xmlGenericErrorContext,
11921
          "PP: Parsing Comment\n");
11922
#endif
11923
0
        xmlParseComment(ctxt);
11924
0
        if (ctxt->instate == XML_PARSER_EOF)
11925
0
      goto done;
11926
0
        ctxt->instate = XML_PARSER_EPILOG;
11927
0
                    ctxt->progressive = 1;
11928
0
    } else if ((cur == '<') && (next == '!') &&
11929
0
               (avail < 4)) {
11930
0
        goto done;
11931
0
    } else {
11932
0
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11933
0
        xmlHaltParser(ctxt);
11934
#ifdef DEBUG_PUSH
11935
        xmlGenericError(xmlGenericErrorContext,
11936
          "PP: entering EOF\n");
11937
#endif
11938
0
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11939
0
      ctxt->sax->endDocument(ctxt->userData);
11940
0
        goto done;
11941
0
    }
11942
0
    break;
11943
0
            case XML_PARSER_DTD: {
11944
          /*
11945
     * Sorry but progressive parsing of the internal subset
11946
     * is not expected to be supported. We first check that
11947
     * the full content of the internal subset is available and
11948
     * the parsing is launched only at that point.
11949
     * Internal subset ends up with "']' S? '>'" in an unescaped
11950
     * section and not in a ']]>' sequence which are conditional
11951
     * sections (whoever argued to keep that crap in XML deserve
11952
     * a place in hell !).
11953
     */
11954
0
    int base, i;
11955
0
    xmlChar *buf;
11956
0
          xmlChar quote = 0;
11957
0
                size_t use;
11958
11959
0
    base = ctxt->input->cur - ctxt->input->base;
11960
0
    if (base < 0) return(0);
11961
0
    if (ctxt->checkIndex > base)
11962
0
        base = ctxt->checkIndex;
11963
0
    buf = xmlBufContent(ctxt->input->buf->buffer);
11964
0
                use = xmlBufUse(ctxt->input->buf->buffer);
11965
0
    for (;(unsigned int) base < use; base++) {
11966
0
        if (quote != 0) {
11967
0
            if (buf[base] == quote)
11968
0
          quote = 0;
11969
0
      continue;
11970
0
        }
11971
0
        if ((quote == 0) && (buf[base] == '<')) {
11972
0
            int found  = 0;
11973
      /* special handling of comments */
11974
0
            if (((unsigned int) base + 4 < use) &&
11975
0
          (buf[base + 1] == '!') &&
11976
0
          (buf[base + 2] == '-') &&
11977
0
          (buf[base + 3] == '-')) {
11978
0
          for (;(unsigned int) base + 3 < use; base++) {
11979
0
        if ((buf[base] == '-') &&
11980
0
            (buf[base + 1] == '-') &&
11981
0
            (buf[base + 2] == '>')) {
11982
0
            found = 1;
11983
0
            base += 2;
11984
0
            break;
11985
0
        }
11986
0
                }
11987
0
          if (!found) {
11988
#if 0
11989
              fprintf(stderr, "unfinished comment\n");
11990
#endif
11991
0
              break; /* for */
11992
0
                }
11993
0
                continue;
11994
0
      }
11995
0
        }
11996
0
        if (buf[base] == '"') {
11997
0
            quote = '"';
11998
0
      continue;
11999
0
        }
12000
0
        if (buf[base] == '\'') {
12001
0
            quote = '\'';
12002
0
      continue;
12003
0
        }
12004
0
        if (buf[base] == ']') {
12005
#if 0
12006
            fprintf(stderr, "%c%c%c%c: ", buf[base],
12007
              buf[base + 1], buf[base + 2], buf[base + 3]);
12008
#endif
12009
0
            if ((unsigned int) base +1 >= use)
12010
0
          break;
12011
0
      if (buf[base + 1] == ']') {
12012
          /* conditional crap, skip both ']' ! */
12013
0
          base++;
12014
0
          continue;
12015
0
      }
12016
0
            for (i = 1; (unsigned int) base + i < use; i++) {
12017
0
          if (buf[base + i] == '>') {
12018
#if 0
12019
              fprintf(stderr, "found\n");
12020
#endif
12021
0
              goto found_end_int_subset;
12022
0
          }
12023
0
          if (!IS_BLANK_CH(buf[base + i])) {
12024
#if 0
12025
              fprintf(stderr, "not found\n");
12026
#endif
12027
0
              goto not_end_of_int_subset;
12028
0
          }
12029
0
      }
12030
#if 0
12031
      fprintf(stderr, "end of stream\n");
12032
#endif
12033
0
            break;
12034
12035
0
        }
12036
0
not_end_of_int_subset:
12037
0
                    continue; /* for */
12038
0
    }
12039
    /*
12040
     * We didn't found the end of the Internal subset
12041
     */
12042
0
                if (quote == 0)
12043
0
                    ctxt->checkIndex = base;
12044
0
                else
12045
0
                    ctxt->checkIndex = 0;
12046
#ifdef DEBUG_PUSH
12047
    if (next == 0)
12048
        xmlGenericError(xmlGenericErrorContext,
12049
          "PP: lookup of int subset end filed\n");
12050
#endif
12051
0
          goto done;
12052
12053
0
found_end_int_subset:
12054
0
                ctxt->checkIndex = 0;
12055
0
    xmlParseInternalSubset(ctxt);
12056
0
    if (ctxt->instate == XML_PARSER_EOF)
12057
0
        goto done;
12058
0
    ctxt->inSubset = 2;
12059
0
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
12060
0
        (ctxt->sax->externalSubset != NULL))
12061
0
        ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
12062
0
          ctxt->extSubSystem, ctxt->extSubURI);
12063
0
    ctxt->inSubset = 0;
12064
0
    xmlCleanSpecialAttr(ctxt);
12065
0
    if (ctxt->instate == XML_PARSER_EOF)
12066
0
        goto done;
12067
0
    ctxt->instate = XML_PARSER_PROLOG;
12068
0
    ctxt->checkIndex = 0;
12069
#ifdef DEBUG_PUSH
12070
    xmlGenericError(xmlGenericErrorContext,
12071
      "PP: entering PROLOG\n");
12072
#endif
12073
0
                break;
12074
0
      }
12075
0
            case XML_PARSER_COMMENT:
12076
0
    xmlGenericError(xmlGenericErrorContext,
12077
0
      "PP: internal error, state == COMMENT\n");
12078
0
    ctxt->instate = XML_PARSER_CONTENT;
12079
#ifdef DEBUG_PUSH
12080
    xmlGenericError(xmlGenericErrorContext,
12081
      "PP: entering CONTENT\n");
12082
#endif
12083
0
    break;
12084
0
            case XML_PARSER_IGNORE:
12085
0
    xmlGenericError(xmlGenericErrorContext,
12086
0
      "PP: internal error, state == IGNORE");
12087
0
          ctxt->instate = XML_PARSER_DTD;
12088
#ifdef DEBUG_PUSH
12089
    xmlGenericError(xmlGenericErrorContext,
12090
      "PP: entering DTD\n");
12091
#endif
12092
0
          break;
12093
0
            case XML_PARSER_PI:
12094
0
    xmlGenericError(xmlGenericErrorContext,
12095
0
      "PP: internal error, state == PI\n");
12096
0
    ctxt->instate = XML_PARSER_CONTENT;
12097
#ifdef DEBUG_PUSH
12098
    xmlGenericError(xmlGenericErrorContext,
12099
      "PP: entering CONTENT\n");
12100
#endif
12101
0
    break;
12102
0
            case XML_PARSER_ENTITY_DECL:
12103
0
    xmlGenericError(xmlGenericErrorContext,
12104
0
      "PP: internal error, state == ENTITY_DECL\n");
12105
0
    ctxt->instate = XML_PARSER_DTD;
12106
#ifdef DEBUG_PUSH
12107
    xmlGenericError(xmlGenericErrorContext,
12108
      "PP: entering DTD\n");
12109
#endif
12110
0
    break;
12111
0
            case XML_PARSER_ENTITY_VALUE:
12112
0
    xmlGenericError(xmlGenericErrorContext,
12113
0
      "PP: internal error, state == ENTITY_VALUE\n");
12114
0
    ctxt->instate = XML_PARSER_CONTENT;
12115
#ifdef DEBUG_PUSH
12116
    xmlGenericError(xmlGenericErrorContext,
12117
      "PP: entering DTD\n");
12118
#endif
12119
0
    break;
12120
0
            case XML_PARSER_ATTRIBUTE_VALUE:
12121
0
    xmlGenericError(xmlGenericErrorContext,
12122
0
      "PP: internal error, state == ATTRIBUTE_VALUE\n");
12123
0
    ctxt->instate = XML_PARSER_START_TAG;
12124
#ifdef DEBUG_PUSH
12125
    xmlGenericError(xmlGenericErrorContext,
12126
      "PP: entering START_TAG\n");
12127
#endif
12128
0
    break;
12129
0
            case XML_PARSER_SYSTEM_LITERAL:
12130
0
    xmlGenericError(xmlGenericErrorContext,
12131
0
      "PP: internal error, state == SYSTEM_LITERAL\n");
12132
0
    ctxt->instate = XML_PARSER_START_TAG;
12133
#ifdef DEBUG_PUSH
12134
    xmlGenericError(xmlGenericErrorContext,
12135
      "PP: entering START_TAG\n");
12136
#endif
12137
0
    break;
12138
0
            case XML_PARSER_PUBLIC_LITERAL:
12139
0
    xmlGenericError(xmlGenericErrorContext,
12140
0
      "PP: internal error, state == PUBLIC_LITERAL\n");
12141
0
    ctxt->instate = XML_PARSER_START_TAG;
12142
#ifdef DEBUG_PUSH
12143
    xmlGenericError(xmlGenericErrorContext,
12144
      "PP: entering START_TAG\n");
12145
#endif
12146
0
    break;
12147
0
  }
12148
0
    }
12149
0
done:
12150
#ifdef DEBUG_PUSH
12151
    xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12152
#endif
12153
0
    return(ret);
12154
0
encoding_error:
12155
0
    {
12156
0
        char buffer[150];
12157
12158
0
  snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12159
0
      ctxt->input->cur[0], ctxt->input->cur[1],
12160
0
      ctxt->input->cur[2], ctxt->input->cur[3]);
12161
0
  __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12162
0
         "Input is not proper UTF-8, indicate encoding !\n%s",
12163
0
         BAD_CAST buffer, NULL);
12164
0
    }
12165
0
    return(0);
12166
0
}
12167
12168
/**
12169
 * xmlParseCheckTransition:
12170
 * @ctxt:  an XML parser context
12171
 * @chunk:  a char array
12172
 * @size:  the size in byte of the chunk
12173
 *
12174
 * Check depending on the current parser state if the chunk given must be
12175
 * processed immediately or one need more data to advance on parsing.
12176
 *
12177
 * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12178
 */
12179
static int
12180
0
xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
12181
0
    if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
12182
0
        return(-1);
12183
0
    if (ctxt->instate == XML_PARSER_START_TAG) {
12184
0
        if (memchr(chunk, '>', size) != NULL)
12185
0
            return(1);
12186
0
        return(0);
12187
0
    }
12188
0
    if (ctxt->progressive == XML_PARSER_COMMENT) {
12189
0
        if (memchr(chunk, '>', size) != NULL)
12190
0
            return(1);
12191
0
        return(0);
12192
0
    }
12193
0
    if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
12194
0
        if (memchr(chunk, '>', size) != NULL)
12195
0
            return(1);
12196
0
        return(0);
12197
0
    }
12198
0
    if (ctxt->progressive == XML_PARSER_PI) {
12199
0
        if (memchr(chunk, '>', size) != NULL)
12200
0
            return(1);
12201
0
        return(0);
12202
0
    }
12203
0
    if (ctxt->instate == XML_PARSER_END_TAG) {
12204
0
        if (memchr(chunk, '>', size) != NULL)
12205
0
            return(1);
12206
0
        return(0);
12207
0
    }
12208
0
    if ((ctxt->progressive == XML_PARSER_DTD) ||
12209
0
        (ctxt->instate == XML_PARSER_DTD)) {
12210
0
        if (memchr(chunk, '>', size) != NULL)
12211
0
            return(1);
12212
0
        return(0);
12213
0
    }
12214
0
    return(1);
12215
0
}
12216
12217
/**
12218
 * xmlParseChunk:
12219
 * @ctxt:  an XML parser context
12220
 * @chunk:  an char array
12221
 * @size:  the size in byte of the chunk
12222
 * @terminate:  last chunk indicator
12223
 *
12224
 * Parse a Chunk of memory
12225
 *
12226
 * Returns zero if no error, the xmlParserErrors otherwise.
12227
 */
12228
int
12229
xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12230
0
              int terminate) {
12231
0
    int end_in_lf = 0;
12232
0
    int remain = 0;
12233
0
    size_t old_avail = 0;
12234
0
    size_t avail = 0;
12235
12236
0
    if (ctxt == NULL)
12237
0
        return(XML_ERR_INTERNAL_ERROR);
12238
0
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12239
0
        return(ctxt->errNo);
12240
0
    if (ctxt->instate == XML_PARSER_EOF)
12241
0
        return(-1);
12242
0
    if (ctxt->instate == XML_PARSER_START)
12243
0
        xmlDetectSAX2(ctxt);
12244
0
    if ((size > 0) && (chunk != NULL) && (!terminate) &&
12245
0
        (chunk[size - 1] == '\r')) {
12246
0
  end_in_lf = 1;
12247
0
  size--;
12248
0
    }
12249
12250
0
xmldecl_done:
12251
12252
0
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12253
0
        (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF))  {
12254
0
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12255
0
  size_t cur = ctxt->input->cur - ctxt->input->base;
12256
0
  int res;
12257
12258
0
        old_avail = xmlBufUse(ctxt->input->buf->buffer);
12259
        /*
12260
         * Specific handling if we autodetected an encoding, we should not
12261
         * push more than the first line ... which depend on the encoding
12262
         * And only push the rest once the final encoding was detected
12263
         */
12264
0
        if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12265
0
            (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
12266
0
            unsigned int len = 45;
12267
12268
0
            if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12269
0
                               BAD_CAST "UTF-16")) ||
12270
0
                (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12271
0
                               BAD_CAST "UTF16")))
12272
0
                len = 90;
12273
0
            else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12274
0
                                    BAD_CAST "UCS-4")) ||
12275
0
                     (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12276
0
                                    BAD_CAST "UCS4")))
12277
0
                len = 180;
12278
12279
0
            if (ctxt->input->buf->rawconsumed < len)
12280
0
                len -= ctxt->input->buf->rawconsumed;
12281
12282
            /*
12283
             * Change size for reading the initial declaration only
12284
             * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12285
             * will blindly copy extra bytes from memory.
12286
             */
12287
0
            if ((unsigned int) size > len) {
12288
0
                remain = size - len;
12289
0
                size = len;
12290
0
            } else {
12291
0
                remain = 0;
12292
0
            }
12293
0
        }
12294
0
  res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12295
0
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12296
0
  if (res < 0) {
12297
0
      ctxt->errNo = XML_PARSER_EOF;
12298
0
      xmlHaltParser(ctxt);
12299
0
      return (XML_PARSER_EOF);
12300
0
  }
12301
#ifdef DEBUG_PUSH
12302
  xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12303
#endif
12304
12305
0
    } else if (ctxt->instate != XML_PARSER_EOF) {
12306
0
  if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12307
0
      xmlParserInputBufferPtr in = ctxt->input->buf;
12308
0
      if ((in->encoder != NULL) && (in->buffer != NULL) &&
12309
0
        (in->raw != NULL)) {
12310
0
    int nbchars;
12311
0
    size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12312
0
    size_t current = ctxt->input->cur - ctxt->input->base;
12313
12314
0
    nbchars = xmlCharEncInput(in, terminate);
12315
0
    xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
12316
0
    if (nbchars < 0) {
12317
        /* TODO 2.6.0 */
12318
0
        xmlGenericError(xmlGenericErrorContext,
12319
0
            "xmlParseChunk: encoder error\n");
12320
0
                    xmlHaltParser(ctxt);
12321
0
        return(XML_ERR_INVALID_ENCODING);
12322
0
    }
12323
0
      }
12324
0
  }
12325
0
    }
12326
0
    if (remain != 0) {
12327
0
        xmlParseTryOrFinish(ctxt, 0);
12328
0
    } else {
12329
0
        if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
12330
0
            avail = xmlBufUse(ctxt->input->buf->buffer);
12331
        /*
12332
         * Depending on the current state it may not be such
12333
         * a good idea to try parsing if there is nothing in the chunk
12334
         * which would be worth doing a parser state transition and we
12335
         * need to wait for more data
12336
         */
12337
0
        if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
12338
0
            (old_avail == 0) || (avail == 0) ||
12339
0
            (xmlParseCheckTransition(ctxt,
12340
0
                       (const char *)&ctxt->input->base[old_avail],
12341
0
                                     avail - old_avail)))
12342
0
            xmlParseTryOrFinish(ctxt, terminate);
12343
0
    }
12344
0
    if (ctxt->instate == XML_PARSER_EOF)
12345
0
        return(ctxt->errNo);
12346
12347
0
    if ((ctxt->input != NULL) &&
12348
0
         (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12349
0
         ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12350
0
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12351
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12352
0
        xmlHaltParser(ctxt);
12353
0
    }
12354
0
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12355
0
        return(ctxt->errNo);
12356
12357
0
    if (remain != 0) {
12358
0
        chunk += size;
12359
0
        size = remain;
12360
0
        remain = 0;
12361
0
        goto xmldecl_done;
12362
0
    }
12363
0
    if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12364
0
        (ctxt->input->buf != NULL)) {
12365
0
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12366
0
           ctxt->input);
12367
0
  size_t current = ctxt->input->cur - ctxt->input->base;
12368
12369
0
  xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12370
12371
0
  xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12372
0
            base, current);
12373
0
    }
12374
0
    if (terminate) {
12375
  /*
12376
   * Check for termination
12377
   */
12378
0
  int cur_avail = 0;
12379
12380
0
  if (ctxt->input != NULL) {
12381
0
      if (ctxt->input->buf == NULL)
12382
0
    cur_avail = ctxt->input->length -
12383
0
          (ctxt->input->cur - ctxt->input->base);
12384
0
      else
12385
0
    cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12386
0
                    (ctxt->input->cur - ctxt->input->base);
12387
0
  }
12388
12389
0
  if ((ctxt->instate != XML_PARSER_EOF) &&
12390
0
      (ctxt->instate != XML_PARSER_EPILOG)) {
12391
0
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12392
0
  }
12393
0
  if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
12394
0
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12395
0
  }
12396
0
  if (ctxt->instate != XML_PARSER_EOF) {
12397
0
      if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12398
0
    ctxt->sax->endDocument(ctxt->userData);
12399
0
  }
12400
0
  ctxt->instate = XML_PARSER_EOF;
12401
0
    }
12402
0
    if (ctxt->wellFormed == 0)
12403
0
  return((xmlParserErrors) ctxt->errNo);
12404
0
    else
12405
0
        return(0);
12406
0
}
12407
12408
/************************************************************************
12409
 *                  *
12410
 *    I/O front end functions to the parser     *
12411
 *                  *
12412
 ************************************************************************/
12413
12414
/**
12415
 * xmlCreatePushParserCtxt:
12416
 * @sax:  a SAX handler
12417
 * @user_data:  The user data returned on SAX callbacks
12418
 * @chunk:  a pointer to an array of chars
12419
 * @size:  number of chars in the array
12420
 * @filename:  an optional file name or URI
12421
 *
12422
 * Create a parser context for using the XML parser in push mode.
12423
 * If @buffer and @size are non-NULL, the data is used to detect
12424
 * the encoding.  The remaining characters will be parsed so they
12425
 * don't need to be fed in again through xmlParseChunk.
12426
 * To allow content encoding detection, @size should be >= 4
12427
 * The value of @filename is used for fetching external entities
12428
 * and error/warning reports.
12429
 *
12430
 * Returns the new parser context or NULL
12431
 */
12432
12433
xmlParserCtxtPtr
12434
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12435
0
                        const char *chunk, int size, const char *filename) {
12436
0
    xmlParserCtxtPtr ctxt;
12437
0
    xmlParserInputPtr inputStream;
12438
0
    xmlParserInputBufferPtr buf;
12439
0
    xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12440
12441
    /*
12442
     * plug some encoding conversion routines
12443
     */
12444
0
    if ((chunk != NULL) && (size >= 4))
12445
0
  enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12446
12447
0
    buf = xmlAllocParserInputBuffer(enc);
12448
0
    if (buf == NULL) return(NULL);
12449
12450
0
    ctxt = xmlNewParserCtxt();
12451
0
    if (ctxt == NULL) {
12452
0
        xmlErrMemory(NULL, "creating parser: out of memory\n");
12453
0
  xmlFreeParserInputBuffer(buf);
12454
0
  return(NULL);
12455
0
    }
12456
0
    ctxt->dictNames = 1;
12457
0
    if (sax != NULL) {
12458
0
#ifdef LIBXML_SAX1_ENABLED
12459
0
  if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12460
0
#endif /* LIBXML_SAX1_ENABLED */
12461
0
      xmlFree(ctxt->sax);
12462
0
  ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12463
0
  if (ctxt->sax == NULL) {
12464
0
      xmlErrMemory(ctxt, NULL);
12465
0
      xmlFreeParserInputBuffer(buf);
12466
0
      xmlFreeParserCtxt(ctxt);
12467
0
      return(NULL);
12468
0
  }
12469
0
  memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12470
0
  if (sax->initialized == XML_SAX2_MAGIC)
12471
0
      memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12472
0
  else
12473
0
      memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12474
0
  if (user_data != NULL)
12475
0
      ctxt->userData = user_data;
12476
0
    }
12477
0
    if (filename == NULL) {
12478
0
  ctxt->directory = NULL;
12479
0
    } else {
12480
0
        ctxt->directory = xmlParserGetDirectory(filename);
12481
0
    }
12482
12483
0
    inputStream = xmlNewInputStream(ctxt);
12484
0
    if (inputStream == NULL) {
12485
0
  xmlFreeParserCtxt(ctxt);
12486
0
  xmlFreeParserInputBuffer(buf);
12487
0
  return(NULL);
12488
0
    }
12489
12490
0
    if (filename == NULL)
12491
0
  inputStream->filename = NULL;
12492
0
    else {
12493
0
  inputStream->filename = (char *)
12494
0
      xmlCanonicPath((const xmlChar *) filename);
12495
0
  if (inputStream->filename == NULL) {
12496
0
      xmlFreeParserCtxt(ctxt);
12497
0
      xmlFreeParserInputBuffer(buf);
12498
0
      return(NULL);
12499
0
  }
12500
0
    }
12501
0
    inputStream->buf = buf;
12502
0
    xmlBufResetInput(inputStream->buf->buffer, inputStream);
12503
0
    inputPush(ctxt, inputStream);
12504
12505
    /*
12506
     * If the caller didn't provide an initial 'chunk' for determining
12507
     * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12508
     * that it can be automatically determined later
12509
     */
12510
0
    if ((size == 0) || (chunk == NULL)) {
12511
0
  ctxt->charset = XML_CHAR_ENCODING_NONE;
12512
0
    } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12513
0
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12514
0
  size_t cur = ctxt->input->cur - ctxt->input->base;
12515
12516
0
  xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12517
12518
0
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12519
#ifdef DEBUG_PUSH
12520
  xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12521
#endif
12522
0
    }
12523
12524
0
    if (enc != XML_CHAR_ENCODING_NONE) {
12525
0
        xmlSwitchEncoding(ctxt, enc);
12526
0
    }
12527
12528
0
    return(ctxt);
12529
0
}
12530
#endif /* LIBXML_PUSH_ENABLED */
12531
12532
/**
12533
 * xmlHaltParser:
12534
 * @ctxt:  an XML parser context
12535
 *
12536
 * Blocks further parser processing don't override error
12537
 * for internal use
12538
 */
12539
static void
12540
1.13k
xmlHaltParser(xmlParserCtxtPtr ctxt) {
12541
1.13k
    if (ctxt == NULL)
12542
0
        return;
12543
1.13k
    ctxt->instate = XML_PARSER_EOF;
12544
1.13k
    ctxt->disableSAX = 1;
12545
1.13k
    while (ctxt->inputNr > 1)
12546
0
        xmlFreeInputStream(inputPop(ctxt));
12547
1.13k
    if (ctxt->input != NULL) {
12548
        /*
12549
   * in case there was a specific allocation deallocate before
12550
   * overriding base
12551
   */
12552
1.13k
        if (ctxt->input->free != NULL) {
12553
0
      ctxt->input->free((xmlChar *) ctxt->input->base);
12554
0
      ctxt->input->free = NULL;
12555
0
  }
12556
1.13k
        if (ctxt->input->buf != NULL) {
12557
1.13k
            xmlFreeParserInputBuffer(ctxt->input->buf);
12558
1.13k
            ctxt->input->buf = NULL;
12559
1.13k
        }
12560
1.13k
  ctxt->input->cur = BAD_CAST"";
12561
1.13k
        ctxt->input->length = 0;
12562
1.13k
  ctxt->input->base = ctxt->input->cur;
12563
1.13k
        ctxt->input->end = ctxt->input->cur;
12564
1.13k
    }
12565
1.13k
}
12566
12567
/**
12568
 * xmlStopParser:
12569
 * @ctxt:  an XML parser context
12570
 *
12571
 * Blocks further parser processing
12572
 */
12573
void
12574
9
xmlStopParser(xmlParserCtxtPtr ctxt) {
12575
9
    if (ctxt == NULL)
12576
0
        return;
12577
9
    xmlHaltParser(ctxt);
12578
9
    ctxt->errNo = XML_ERR_USER_STOP;
12579
9
}
12580
12581
/**
12582
 * xmlCreateIOParserCtxt:
12583
 * @sax:  a SAX handler
12584
 * @user_data:  The user data returned on SAX callbacks
12585
 * @ioread:  an I/O read function
12586
 * @ioclose:  an I/O close function
12587
 * @ioctx:  an I/O handler
12588
 * @enc:  the charset encoding if known
12589
 *
12590
 * Create a parser context for using the XML parser with an existing
12591
 * I/O stream
12592
 *
12593
 * Returns the new parser context or NULL
12594
 */
12595
xmlParserCtxtPtr
12596
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12597
  xmlInputReadCallback   ioread, xmlInputCloseCallback  ioclose,
12598
0
  void *ioctx, xmlCharEncoding enc) {
12599
0
    xmlParserCtxtPtr ctxt;
12600
0
    xmlParserInputPtr inputStream;
12601
0
    xmlParserInputBufferPtr buf;
12602
12603
0
    if (ioread == NULL) return(NULL);
12604
12605
0
    buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12606
0
    if (buf == NULL) {
12607
0
        if (ioclose != NULL)
12608
0
            ioclose(ioctx);
12609
0
        return (NULL);
12610
0
    }
12611
12612
0
    ctxt = xmlNewParserCtxt();
12613
0
    if (ctxt == NULL) {
12614
0
  xmlFreeParserInputBuffer(buf);
12615
0
  return(NULL);
12616
0
    }
12617
0
    if (sax != NULL) {
12618
0
#ifdef LIBXML_SAX1_ENABLED
12619
0
  if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12620
0
#endif /* LIBXML_SAX1_ENABLED */
12621
0
      xmlFree(ctxt->sax);
12622
0
  ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12623
0
  if (ctxt->sax == NULL) {
12624
0
      xmlFreeParserInputBuffer(buf);
12625
0
      xmlErrMemory(ctxt, NULL);
12626
0
      xmlFreeParserCtxt(ctxt);
12627
0
      return(NULL);
12628
0
  }
12629
0
  memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12630
0
  if (sax->initialized == XML_SAX2_MAGIC)
12631
0
      memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12632
0
  else
12633
0
      memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12634
0
  if (user_data != NULL)
12635
0
      ctxt->userData = user_data;
12636
0
    }
12637
12638
0
    inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12639
0
    if (inputStream == NULL) {
12640
0
  xmlFreeParserCtxt(ctxt);
12641
0
  return(NULL);
12642
0
    }
12643
0
    inputPush(ctxt, inputStream);
12644
12645
0
    return(ctxt);
12646
0
}
12647
12648
#ifdef LIBXML_VALID_ENABLED
12649
/************************************************************************
12650
 *                  *
12651
 *    Front ends when parsing a DTD       *
12652
 *                  *
12653
 ************************************************************************/
12654
12655
/**
12656
 * xmlIOParseDTD:
12657
 * @sax:  the SAX handler block or NULL
12658
 * @input:  an Input Buffer
12659
 * @enc:  the charset encoding if known
12660
 *
12661
 * Load and parse a DTD
12662
 *
12663
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12664
 * @input will be freed by the function in any case.
12665
 */
12666
12667
xmlDtdPtr
12668
xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12669
0
        xmlCharEncoding enc) {
12670
0
    xmlDtdPtr ret = NULL;
12671
0
    xmlParserCtxtPtr ctxt;
12672
0
    xmlParserInputPtr pinput = NULL;
12673
0
    xmlChar start[4];
12674
12675
0
    if (input == NULL)
12676
0
  return(NULL);
12677
12678
0
    ctxt = xmlNewParserCtxt();
12679
0
    if (ctxt == NULL) {
12680
0
        xmlFreeParserInputBuffer(input);
12681
0
  return(NULL);
12682
0
    }
12683
12684
    /* We are loading a DTD */
12685
0
    ctxt->options |= XML_PARSE_DTDLOAD;
12686
12687
    /*
12688
     * Set-up the SAX context
12689
     */
12690
0
    if (sax != NULL) {
12691
0
  if (ctxt->sax != NULL)
12692
0
      xmlFree(ctxt->sax);
12693
0
        ctxt->sax = sax;
12694
0
        ctxt->userData = ctxt;
12695
0
    }
12696
0
    xmlDetectSAX2(ctxt);
12697
12698
    /*
12699
     * generate a parser input from the I/O handler
12700
     */
12701
12702
0
    pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12703
0
    if (pinput == NULL) {
12704
0
        if (sax != NULL) ctxt->sax = NULL;
12705
0
        xmlFreeParserInputBuffer(input);
12706
0
  xmlFreeParserCtxt(ctxt);
12707
0
  return(NULL);
12708
0
    }
12709
12710
    /*
12711
     * plug some encoding conversion routines here.
12712
     */
12713
0
    if (xmlPushInput(ctxt, pinput) < 0) {
12714
0
        if (sax != NULL) ctxt->sax = NULL;
12715
0
  xmlFreeParserCtxt(ctxt);
12716
0
  return(NULL);
12717
0
    }
12718
0
    if (enc != XML_CHAR_ENCODING_NONE) {
12719
0
        xmlSwitchEncoding(ctxt, enc);
12720
0
    }
12721
12722
0
    pinput->filename = NULL;
12723
0
    pinput->line = 1;
12724
0
    pinput->col = 1;
12725
0
    pinput->base = ctxt->input->cur;
12726
0
    pinput->cur = ctxt->input->cur;
12727
0
    pinput->free = NULL;
12728
12729
    /*
12730
     * let's parse that entity knowing it's an external subset.
12731
     */
12732
0
    ctxt->inSubset = 2;
12733
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12734
0
    if (ctxt->myDoc == NULL) {
12735
0
  xmlErrMemory(ctxt, "New Doc failed");
12736
0
  return(NULL);
12737
0
    }
12738
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12739
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12740
0
                                 BAD_CAST "none", BAD_CAST "none");
12741
12742
0
    if ((enc == XML_CHAR_ENCODING_NONE) &&
12743
0
        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12744
  /*
12745
   * Get the 4 first bytes and decode the charset
12746
   * if enc != XML_CHAR_ENCODING_NONE
12747
   * plug some encoding conversion routines.
12748
   */
12749
0
  start[0] = RAW;
12750
0
  start[1] = NXT(1);
12751
0
  start[2] = NXT(2);
12752
0
  start[3] = NXT(3);
12753
0
  enc = xmlDetectCharEncoding(start, 4);
12754
0
  if (enc != XML_CHAR_ENCODING_NONE) {
12755
0
      xmlSwitchEncoding(ctxt, enc);
12756
0
  }
12757
0
    }
12758
12759
0
    xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12760
12761
0
    if (ctxt->myDoc != NULL) {
12762
0
  if (ctxt->wellFormed) {
12763
0
      ret = ctxt->myDoc->extSubset;
12764
0
      ctxt->myDoc->extSubset = NULL;
12765
0
      if (ret != NULL) {
12766
0
    xmlNodePtr tmp;
12767
12768
0
    ret->doc = NULL;
12769
0
    tmp = ret->children;
12770
0
    while (tmp != NULL) {
12771
0
        tmp->doc = NULL;
12772
0
        tmp = tmp->next;
12773
0
    }
12774
0
      }
12775
0
  } else {
12776
0
      ret = NULL;
12777
0
  }
12778
0
        xmlFreeDoc(ctxt->myDoc);
12779
0
        ctxt->myDoc = NULL;
12780
0
    }
12781
0
    if (sax != NULL) ctxt->sax = NULL;
12782
0
    xmlFreeParserCtxt(ctxt);
12783
12784
0
    return(ret);
12785
0
}
12786
12787
/**
12788
 * xmlSAXParseDTD:
12789
 * @sax:  the SAX handler block
12790
 * @ExternalID:  a NAME* containing the External ID of the DTD
12791
 * @SystemID:  a NAME* containing the URL to the DTD
12792
 *
12793
 * Load and parse an external subset.
12794
 *
12795
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12796
 */
12797
12798
xmlDtdPtr
12799
xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12800
0
                          const xmlChar *SystemID) {
12801
0
    xmlDtdPtr ret = NULL;
12802
0
    xmlParserCtxtPtr ctxt;
12803
0
    xmlParserInputPtr input = NULL;
12804
0
    xmlCharEncoding enc;
12805
0
    xmlChar* systemIdCanonic;
12806
12807
0
    if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12808
12809
0
    ctxt = xmlNewParserCtxt();
12810
0
    if (ctxt == NULL) {
12811
0
  return(NULL);
12812
0
    }
12813
12814
    /* We are loading a DTD */
12815
0
    ctxt->options |= XML_PARSE_DTDLOAD;
12816
12817
    /*
12818
     * Set-up the SAX context
12819
     */
12820
0
    if (sax != NULL) {
12821
0
  if (ctxt->sax != NULL)
12822
0
      xmlFree(ctxt->sax);
12823
0
        ctxt->sax = sax;
12824
0
        ctxt->userData = ctxt;
12825
0
    }
12826
12827
    /*
12828
     * Canonicalise the system ID
12829
     */
12830
0
    systemIdCanonic = xmlCanonicPath(SystemID);
12831
0
    if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12832
0
  xmlFreeParserCtxt(ctxt);
12833
0
  return(NULL);
12834
0
    }
12835
12836
    /*
12837
     * Ask the Entity resolver to load the damn thing
12838
     */
12839
12840
0
    if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12841
0
  input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12842
0
                                   systemIdCanonic);
12843
0
    if (input == NULL) {
12844
0
        if (sax != NULL) ctxt->sax = NULL;
12845
0
  xmlFreeParserCtxt(ctxt);
12846
0
  if (systemIdCanonic != NULL)
12847
0
      xmlFree(systemIdCanonic);
12848
0
  return(NULL);
12849
0
    }
12850
12851
    /*
12852
     * plug some encoding conversion routines here.
12853
     */
12854
0
    if (xmlPushInput(ctxt, input) < 0) {
12855
0
        if (sax != NULL) ctxt->sax = NULL;
12856
0
  xmlFreeParserCtxt(ctxt);
12857
0
  if (systemIdCanonic != NULL)
12858
0
      xmlFree(systemIdCanonic);
12859
0
  return(NULL);
12860
0
    }
12861
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12862
0
  enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12863
0
  xmlSwitchEncoding(ctxt, enc);
12864
0
    }
12865
12866
0
    if (input->filename == NULL)
12867
0
  input->filename = (char *) systemIdCanonic;
12868
0
    else
12869
0
  xmlFree(systemIdCanonic);
12870
0
    input->line = 1;
12871
0
    input->col = 1;
12872
0
    input->base = ctxt->input->cur;
12873
0
    input->cur = ctxt->input->cur;
12874
0
    input->free = NULL;
12875
12876
    /*
12877
     * let's parse that entity knowing it's an external subset.
12878
     */
12879
0
    ctxt->inSubset = 2;
12880
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12881
0
    if (ctxt->myDoc == NULL) {
12882
0
  xmlErrMemory(ctxt, "New Doc failed");
12883
0
        if (sax != NULL) ctxt->sax = NULL;
12884
0
  xmlFreeParserCtxt(ctxt);
12885
0
  return(NULL);
12886
0
    }
12887
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12888
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12889
0
                                 ExternalID, SystemID);
12890
0
    xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12891
12892
0
    if (ctxt->myDoc != NULL) {
12893
0
  if (ctxt->wellFormed) {
12894
0
      ret = ctxt->myDoc->extSubset;
12895
0
      ctxt->myDoc->extSubset = NULL;
12896
0
      if (ret != NULL) {
12897
0
    xmlNodePtr tmp;
12898
12899
0
    ret->doc = NULL;
12900
0
    tmp = ret->children;
12901
0
    while (tmp != NULL) {
12902
0
        tmp->doc = NULL;
12903
0
        tmp = tmp->next;
12904
0
    }
12905
0
      }
12906
0
  } else {
12907
0
      ret = NULL;
12908
0
  }
12909
0
        xmlFreeDoc(ctxt->myDoc);
12910
0
        ctxt->myDoc = NULL;
12911
0
    }
12912
0
    if (sax != NULL) ctxt->sax = NULL;
12913
0
    xmlFreeParserCtxt(ctxt);
12914
12915
0
    return(ret);
12916
0
}
12917
12918
12919
/**
12920
 * xmlParseDTD:
12921
 * @ExternalID:  a NAME* containing the External ID of the DTD
12922
 * @SystemID:  a NAME* containing the URL to the DTD
12923
 *
12924
 * Load and parse an external subset.
12925
 *
12926
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12927
 */
12928
12929
xmlDtdPtr
12930
0
xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12931
0
    return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12932
0
}
12933
#endif /* LIBXML_VALID_ENABLED */
12934
12935
/************************************************************************
12936
 *                  *
12937
 *    Front ends when parsing an Entity     *
12938
 *                  *
12939
 ************************************************************************/
12940
12941
/**
12942
 * xmlParseCtxtExternalEntity:
12943
 * @ctx:  the existing parsing context
12944
 * @URL:  the URL for the entity to load
12945
 * @ID:  the System ID for the entity to load
12946
 * @lst:  the return value for the set of parsed nodes
12947
 *
12948
 * Parse an external general entity within an existing parsing context
12949
 * An external general parsed entity is well-formed if it matches the
12950
 * production labeled extParsedEnt.
12951
 *
12952
 * [78] extParsedEnt ::= TextDecl? content
12953
 *
12954
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12955
 *    the parser error code otherwise
12956
 */
12957
12958
int
12959
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12960
0
                 const xmlChar *ID, xmlNodePtr *lst) {
12961
0
    void *userData;
12962
12963
0
    if (ctx == NULL) return(-1);
12964
    /*
12965
     * If the user provided their own SAX callbacks, then reuse the
12966
     * userData callback field, otherwise the expected setup in a
12967
     * DOM builder is to have userData == ctxt
12968
     */
12969
0
    if (ctx->userData == ctx)
12970
0
        userData = NULL;
12971
0
    else
12972
0
        userData = ctx->userData;
12973
0
    return xmlParseExternalEntityPrivate(ctx->myDoc, ctx, ctx->sax,
12974
0
                                         userData, ctx->depth + 1,
12975
0
                                         URL, ID, lst);
12976
0
}
12977
12978
/**
12979
 * xmlParseExternalEntityPrivate:
12980
 * @doc:  the document the chunk pertains to
12981
 * @oldctxt:  the previous parser context if available
12982
 * @sax:  the SAX handler block (possibly NULL)
12983
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12984
 * @depth:  Used for loop detection, use 0
12985
 * @URL:  the URL for the entity to load
12986
 * @ID:  the System ID for the entity to load
12987
 * @list:  the return value for the set of parsed nodes
12988
 *
12989
 * Private version of xmlParseExternalEntity()
12990
 *
12991
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12992
 *    the parser error code otherwise
12993
 */
12994
12995
static xmlParserErrors
12996
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12997
                xmlSAXHandlerPtr sax,
12998
          void *user_data, int depth, const xmlChar *URL,
12999
0
          const xmlChar *ID, xmlNodePtr *list) {
13000
0
    xmlParserCtxtPtr ctxt;
13001
0
    xmlDocPtr newDoc;
13002
0
    xmlNodePtr newRoot;
13003
0
    xmlSAXHandlerPtr oldsax = NULL;
13004
0
    xmlParserErrors ret = XML_ERR_OK;
13005
0
    xmlChar start[4];
13006
0
    xmlCharEncoding enc;
13007
13008
0
    if (((depth > 40) &&
13009
0
  ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13010
0
  (depth > 1024)) {
13011
0
  return(XML_ERR_ENTITY_LOOP);
13012
0
    }
13013
13014
0
    if (list != NULL)
13015
0
        *list = NULL;
13016
0
    if ((URL == NULL) && (ID == NULL))
13017
0
  return(XML_ERR_INTERNAL_ERROR);
13018
0
    if (doc == NULL)
13019
0
  return(XML_ERR_INTERNAL_ERROR);
13020
13021
13022
0
    ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
13023
0
    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13024
0
    ctxt->userData = ctxt;
13025
0
    if (sax != NULL) {
13026
0
  oldsax = ctxt->sax;
13027
0
        ctxt->sax = sax;
13028
0
  if (user_data != NULL)
13029
0
      ctxt->userData = user_data;
13030
0
    }
13031
0
    xmlDetectSAX2(ctxt);
13032
0
    newDoc = xmlNewDoc(BAD_CAST "1.0");
13033
0
    if (newDoc == NULL) {
13034
0
  xmlFreeParserCtxt(ctxt);
13035
0
  return(XML_ERR_INTERNAL_ERROR);
13036
0
    }
13037
0
    newDoc->properties = XML_DOC_INTERNAL;
13038
0
    if (doc) {
13039
0
        newDoc->intSubset = doc->intSubset;
13040
0
        newDoc->extSubset = doc->extSubset;
13041
0
        if (doc->dict) {
13042
0
            newDoc->dict = doc->dict;
13043
0
            xmlDictReference(newDoc->dict);
13044
0
        }
13045
0
        if (doc->URL != NULL) {
13046
0
            newDoc->URL = xmlStrdup(doc->URL);
13047
0
        }
13048
0
    }
13049
0
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13050
0
    if (newRoot == NULL) {
13051
0
  if (sax != NULL)
13052
0
      ctxt->sax = oldsax;
13053
0
  xmlFreeParserCtxt(ctxt);
13054
0
  newDoc->intSubset = NULL;
13055
0
  newDoc->extSubset = NULL;
13056
0
        xmlFreeDoc(newDoc);
13057
0
  return(XML_ERR_INTERNAL_ERROR);
13058
0
    }
13059
0
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
13060
0
    nodePush(ctxt, newDoc->children);
13061
0
    if (doc == NULL) {
13062
0
        ctxt->myDoc = newDoc;
13063
0
    } else {
13064
0
        ctxt->myDoc = doc;
13065
0
        newRoot->doc = doc;
13066
0
    }
13067
13068
    /*
13069
     * Get the 4 first bytes and decode the charset
13070
     * if enc != XML_CHAR_ENCODING_NONE
13071
     * plug some encoding conversion routines.
13072
     */
13073
0
    GROW;
13074
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13075
0
  start[0] = RAW;
13076
0
  start[1] = NXT(1);
13077
0
  start[2] = NXT(2);
13078
0
  start[3] = NXT(3);
13079
0
  enc = xmlDetectCharEncoding(start, 4);
13080
0
  if (enc != XML_CHAR_ENCODING_NONE) {
13081
0
      xmlSwitchEncoding(ctxt, enc);
13082
0
  }
13083
0
    }
13084
13085
    /*
13086
     * Parse a possible text declaration first
13087
     */
13088
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
13089
0
  xmlParseTextDecl(ctxt);
13090
        /*
13091
         * An XML-1.0 document can't reference an entity not XML-1.0
13092
         */
13093
0
        if ((xmlStrEqual(oldctxt->version, BAD_CAST "1.0")) &&
13094
0
            (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
13095
0
            xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
13096
0
                           "Version mismatch between document and entity\n");
13097
0
        }
13098
0
    }
13099
13100
0
    ctxt->instate = XML_PARSER_CONTENT;
13101
0
    ctxt->depth = depth;
13102
0
    if (oldctxt != NULL) {
13103
0
  ctxt->_private = oldctxt->_private;
13104
0
  ctxt->loadsubset = oldctxt->loadsubset;
13105
0
  ctxt->validate = oldctxt->validate;
13106
0
  ctxt->valid = oldctxt->valid;
13107
0
  ctxt->replaceEntities = oldctxt->replaceEntities;
13108
0
        if (oldctxt->validate) {
13109
0
            ctxt->vctxt.error = oldctxt->vctxt.error;
13110
0
            ctxt->vctxt.warning = oldctxt->vctxt.warning;
13111
0
            ctxt->vctxt.userData = oldctxt->vctxt.userData;
13112
0
        }
13113
0
  ctxt->external = oldctxt->external;
13114
0
        if (ctxt->dict) xmlDictFree(ctxt->dict);
13115
0
        ctxt->dict = oldctxt->dict;
13116
0
        ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13117
0
        ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13118
0
        ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13119
0
        ctxt->dictNames = oldctxt->dictNames;
13120
0
        ctxt->attsDefault = oldctxt->attsDefault;
13121
0
        ctxt->attsSpecial = oldctxt->attsSpecial;
13122
0
        ctxt->linenumbers = oldctxt->linenumbers;
13123
0
  ctxt->record_info = oldctxt->record_info;
13124
0
  ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
13125
0
  ctxt->node_seq.length = oldctxt->node_seq.length;
13126
0
  ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
13127
0
    } else {
13128
  /*
13129
   * Doing validity checking on chunk without context
13130
   * doesn't make sense
13131
   */
13132
0
  ctxt->_private = NULL;
13133
0
  ctxt->validate = 0;
13134
0
  ctxt->external = 2;
13135
0
  ctxt->loadsubset = 0;
13136
0
    }
13137
13138
0
    xmlParseContent(ctxt);
13139
13140
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13141
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13142
0
    } else if (RAW != 0) {
13143
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13144
0
    }
13145
0
    if (ctxt->node != newDoc->children) {
13146
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13147
0
    }
13148
13149
0
    if (!ctxt->wellFormed) {
13150
0
        if (ctxt->errNo == 0)
13151
0
      ret = XML_ERR_INTERNAL_ERROR;
13152
0
  else
13153
0
      ret = (xmlParserErrors)ctxt->errNo;
13154
0
    } else {
13155
0
  if (list != NULL) {
13156
0
      xmlNodePtr cur;
13157
13158
      /*
13159
       * Return the newly created nodeset after unlinking it from
13160
       * they pseudo parent.
13161
       */
13162
0
      cur = newDoc->children->children;
13163
0
      *list = cur;
13164
0
      while (cur != NULL) {
13165
0
    cur->parent = NULL;
13166
0
    cur = cur->next;
13167
0
      }
13168
0
            newDoc->children->children = NULL;
13169
0
  }
13170
0
  ret = XML_ERR_OK;
13171
0
    }
13172
13173
    /*
13174
     * Record in the parent context the number of entities replacement
13175
     * done when parsing that reference.
13176
     */
13177
0
    if (oldctxt != NULL)
13178
0
        oldctxt->nbentities += ctxt->nbentities;
13179
13180
    /*
13181
     * Also record the size of the entity parsed
13182
     */
13183
0
    if (ctxt->input != NULL && oldctxt != NULL) {
13184
0
  oldctxt->sizeentities += ctxt->input->consumed;
13185
0
  oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
13186
0
    }
13187
    /*
13188
     * And record the last error if any
13189
     */
13190
0
    if ((oldctxt != NULL) && (ctxt->lastError.code != XML_ERR_OK))
13191
0
        xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13192
13193
0
    if (sax != NULL)
13194
0
  ctxt->sax = oldsax;
13195
0
    if (oldctxt != NULL) {
13196
0
        ctxt->dict = NULL;
13197
0
        ctxt->attsDefault = NULL;
13198
0
        ctxt->attsSpecial = NULL;
13199
0
        oldctxt->validate = ctxt->validate;
13200
0
        oldctxt->valid = ctxt->valid;
13201
0
        oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13202
0
        oldctxt->node_seq.length = ctxt->node_seq.length;
13203
0
        oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13204
0
    }
13205
0
    ctxt->node_seq.maximum = 0;
13206
0
    ctxt->node_seq.length = 0;
13207
0
    ctxt->node_seq.buffer = NULL;
13208
0
    xmlFreeParserCtxt(ctxt);
13209
0
    newDoc->intSubset = NULL;
13210
0
    newDoc->extSubset = NULL;
13211
0
    xmlFreeDoc(newDoc);
13212
13213
0
    return(ret);
13214
0
}
13215
13216
#ifdef LIBXML_SAX1_ENABLED
13217
/**
13218
 * xmlParseExternalEntity:
13219
 * @doc:  the document the chunk pertains to
13220
 * @sax:  the SAX handler block (possibly NULL)
13221
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13222
 * @depth:  Used for loop detection, use 0
13223
 * @URL:  the URL for the entity to load
13224
 * @ID:  the System ID for the entity to load
13225
 * @lst:  the return value for the set of parsed nodes
13226
 *
13227
 * Parse an external general entity
13228
 * An external general parsed entity is well-formed if it matches the
13229
 * production labeled extParsedEnt.
13230
 *
13231
 * [78] extParsedEnt ::= TextDecl? content
13232
 *
13233
 * Returns 0 if the entity is well formed, -1 in case of args problem and
13234
 *    the parser error code otherwise
13235
 */
13236
13237
int
13238
xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
13239
0
    int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
13240
0
    return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
13241
0
                           ID, lst));
13242
0
}
13243
13244
/**
13245
 * xmlParseBalancedChunkMemory:
13246
 * @doc:  the document the chunk pertains to (must not be NULL)
13247
 * @sax:  the SAX handler block (possibly NULL)
13248
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13249
 * @depth:  Used for loop detection, use 0
13250
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13251
 * @lst:  the return value for the set of parsed nodes
13252
 *
13253
 * Parse a well-balanced chunk of an XML document
13254
 * called by the parser
13255
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13256
 * the content production in the XML grammar:
13257
 *
13258
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13259
 *
13260
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13261
 *    the parser error code otherwise
13262
 */
13263
13264
int
13265
xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13266
0
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
13267
0
    return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13268
0
                                                depth, string, lst, 0 );
13269
0
}
13270
#endif /* LIBXML_SAX1_ENABLED */
13271
13272
/**
13273
 * xmlParseBalancedChunkMemoryInternal:
13274
 * @oldctxt:  the existing parsing context
13275
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13276
 * @user_data:  the user data field for the parser context
13277
 * @lst:  the return value for the set of parsed nodes
13278
 *
13279
 *
13280
 * Parse a well-balanced chunk of an XML document
13281
 * called by the parser
13282
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13283
 * the content production in the XML grammar:
13284
 *
13285
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13286
 *
13287
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13288
 * error code otherwise
13289
 *
13290
 * In case recover is set to 1, the nodelist will not be empty even if
13291
 * the parsed chunk is not well balanced.
13292
 */
13293
static xmlParserErrors
13294
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13295
0
  const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13296
0
    xmlParserCtxtPtr ctxt;
13297
0
    xmlDocPtr newDoc = NULL;
13298
0
    xmlNodePtr newRoot;
13299
0
    xmlSAXHandlerPtr oldsax = NULL;
13300
0
    xmlNodePtr content = NULL;
13301
0
    xmlNodePtr last = NULL;
13302
0
    int size;
13303
0
    xmlParserErrors ret = XML_ERR_OK;
13304
0
#ifdef SAX2
13305
0
    int i;
13306
0
#endif
13307
13308
0
    if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13309
0
        (oldctxt->depth >  1024)) {
13310
0
  return(XML_ERR_ENTITY_LOOP);
13311
0
    }
13312
13313
13314
0
    if (lst != NULL)
13315
0
        *lst = NULL;
13316
0
    if (string == NULL)
13317
0
        return(XML_ERR_INTERNAL_ERROR);
13318
13319
0
    size = xmlStrlen(string);
13320
13321
0
    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13322
0
    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13323
0
    if (user_data != NULL)
13324
0
  ctxt->userData = user_data;
13325
0
    else
13326
0
  ctxt->userData = ctxt;
13327
0
    if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13328
0
    ctxt->dict = oldctxt->dict;
13329
0
    ctxt->input_id = oldctxt->input_id + 1;
13330
0
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13331
0
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13332
0
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13333
13334
0
#ifdef SAX2
13335
    /* propagate namespaces down the entity */
13336
0
    for (i = 0;i < oldctxt->nsNr;i += 2) {
13337
0
        nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13338
0
    }
13339
0
#endif
13340
13341
0
    oldsax = ctxt->sax;
13342
0
    ctxt->sax = oldctxt->sax;
13343
0
    xmlDetectSAX2(ctxt);
13344
0
    ctxt->replaceEntities = oldctxt->replaceEntities;
13345
0
    ctxt->options = oldctxt->options;
13346
13347
0
    ctxt->_private = oldctxt->_private;
13348
0
    if (oldctxt->myDoc == NULL) {
13349
0
  newDoc = xmlNewDoc(BAD_CAST "1.0");
13350
0
  if (newDoc == NULL) {
13351
0
      ctxt->sax = oldsax;
13352
0
      ctxt->dict = NULL;
13353
0
      xmlFreeParserCtxt(ctxt);
13354
0
      return(XML_ERR_INTERNAL_ERROR);
13355
0
  }
13356
0
  newDoc->properties = XML_DOC_INTERNAL;
13357
0
  newDoc->dict = ctxt->dict;
13358
0
  xmlDictReference(newDoc->dict);
13359
0
  ctxt->myDoc = newDoc;
13360
0
    } else {
13361
0
  ctxt->myDoc = oldctxt->myDoc;
13362
0
        content = ctxt->myDoc->children;
13363
0
  last = ctxt->myDoc->last;
13364
0
    }
13365
0
    newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13366
0
    if (newRoot == NULL) {
13367
0
  ctxt->sax = oldsax;
13368
0
  ctxt->dict = NULL;
13369
0
  xmlFreeParserCtxt(ctxt);
13370
0
  if (newDoc != NULL) {
13371
0
      xmlFreeDoc(newDoc);
13372
0
  }
13373
0
  return(XML_ERR_INTERNAL_ERROR);
13374
0
    }
13375
0
    ctxt->myDoc->children = NULL;
13376
0
    ctxt->myDoc->last = NULL;
13377
0
    xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13378
0
    nodePush(ctxt, ctxt->myDoc->children);
13379
0
    ctxt->instate = XML_PARSER_CONTENT;
13380
0
    ctxt->depth = oldctxt->depth + 1;
13381
13382
0
    ctxt->validate = 0;
13383
0
    ctxt->loadsubset = oldctxt->loadsubset;
13384
0
    if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13385
  /*
13386
   * ID/IDREF registration will be done in xmlValidateElement below
13387
   */
13388
0
  ctxt->loadsubset |= XML_SKIP_IDS;
13389
0
    }
13390
0
    ctxt->dictNames = oldctxt->dictNames;
13391
0
    ctxt->attsDefault = oldctxt->attsDefault;
13392
0
    ctxt->attsSpecial = oldctxt->attsSpecial;
13393
13394
0
    xmlParseContent(ctxt);
13395
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13396
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13397
0
    } else if (RAW != 0) {
13398
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13399
0
    }
13400
0
    if (ctxt->node != ctxt->myDoc->children) {
13401
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13402
0
    }
13403
13404
0
    if (!ctxt->wellFormed) {
13405
0
        if (ctxt->errNo == 0)
13406
0
      ret = XML_ERR_INTERNAL_ERROR;
13407
0
  else
13408
0
      ret = (xmlParserErrors)ctxt->errNo;
13409
0
    } else {
13410
0
      ret = XML_ERR_OK;
13411
0
    }
13412
13413
0
    if ((lst != NULL) && (ret == XML_ERR_OK)) {
13414
0
  xmlNodePtr cur;
13415
13416
  /*
13417
   * Return the newly created nodeset after unlinking it from
13418
   * they pseudo parent.
13419
   */
13420
0
  cur = ctxt->myDoc->children->children;
13421
0
  *lst = cur;
13422
0
  while (cur != NULL) {
13423
0
#ifdef LIBXML_VALID_ENABLED
13424
0
      if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13425
0
    (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13426
0
    (cur->type == XML_ELEMENT_NODE)) {
13427
0
    oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13428
0
      oldctxt->myDoc, cur);
13429
0
      }
13430
0
#endif /* LIBXML_VALID_ENABLED */
13431
0
      cur->parent = NULL;
13432
0
      cur = cur->next;
13433
0
  }
13434
0
  ctxt->myDoc->children->children = NULL;
13435
0
    }
13436
0
    if (ctxt->myDoc != NULL) {
13437
0
  xmlFreeNode(ctxt->myDoc->children);
13438
0
        ctxt->myDoc->children = content;
13439
0
        ctxt->myDoc->last = last;
13440
0
    }
13441
13442
    /*
13443
     * Record in the parent context the number of entities replacement
13444
     * done when parsing that reference.
13445
     */
13446
0
    if (oldctxt != NULL)
13447
0
        oldctxt->nbentities += ctxt->nbentities;
13448
13449
    /*
13450
     * Also record the last error if any
13451
     */
13452
0
    if (ctxt->lastError.code != XML_ERR_OK)
13453
0
        xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13454
13455
0
    ctxt->sax = oldsax;
13456
0
    ctxt->dict = NULL;
13457
0
    ctxt->attsDefault = NULL;
13458
0
    ctxt->attsSpecial = NULL;
13459
0
    xmlFreeParserCtxt(ctxt);
13460
0
    if (newDoc != NULL) {
13461
0
  xmlFreeDoc(newDoc);
13462
0
    }
13463
13464
0
    return(ret);
13465
0
}
13466
13467
/**
13468
 * xmlParseInNodeContext:
13469
 * @node:  the context node
13470
 * @data:  the input string
13471
 * @datalen:  the input string length in bytes
13472
 * @options:  a combination of xmlParserOption
13473
 * @lst:  the return value for the set of parsed nodes
13474
 *
13475
 * Parse a well-balanced chunk of an XML document
13476
 * within the context (DTD, namespaces, etc ...) of the given node.
13477
 *
13478
 * The allowed sequence for the data is a Well Balanced Chunk defined by
13479
 * the content production in the XML grammar:
13480
 *
13481
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13482
 *
13483
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13484
 * error code otherwise
13485
 */
13486
xmlParserErrors
13487
xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13488
0
                      int options, xmlNodePtr *lst) {
13489
0
#ifdef SAX2
13490
0
    xmlParserCtxtPtr ctxt;
13491
0
    xmlDocPtr doc = NULL;
13492
0
    xmlNodePtr fake, cur;
13493
0
    int nsnr = 0;
13494
13495
0
    xmlParserErrors ret = XML_ERR_OK;
13496
13497
    /*
13498
     * check all input parameters, grab the document
13499
     */
13500
0
    if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13501
0
        return(XML_ERR_INTERNAL_ERROR);
13502
0
    switch (node->type) {
13503
0
        case XML_ELEMENT_NODE:
13504
0
        case XML_ATTRIBUTE_NODE:
13505
0
        case XML_TEXT_NODE:
13506
0
        case XML_CDATA_SECTION_NODE:
13507
0
        case XML_ENTITY_REF_NODE:
13508
0
        case XML_PI_NODE:
13509
0
        case XML_COMMENT_NODE:
13510
0
        case XML_DOCUMENT_NODE:
13511
0
        case XML_HTML_DOCUMENT_NODE:
13512
0
      break;
13513
0
  default:
13514
0
      return(XML_ERR_INTERNAL_ERROR);
13515
13516
0
    }
13517
0
    while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13518
0
           (node->type != XML_DOCUMENT_NODE) &&
13519
0
     (node->type != XML_HTML_DOCUMENT_NODE))
13520
0
  node = node->parent;
13521
0
    if (node == NULL)
13522
0
  return(XML_ERR_INTERNAL_ERROR);
13523
0
    if (node->type == XML_ELEMENT_NODE)
13524
0
  doc = node->doc;
13525
0
    else
13526
0
        doc = (xmlDocPtr) node;
13527
0
    if (doc == NULL)
13528
0
  return(XML_ERR_INTERNAL_ERROR);
13529
13530
    /*
13531
     * allocate a context and set-up everything not related to the
13532
     * node position in the tree
13533
     */
13534
0
    if (doc->type == XML_DOCUMENT_NODE)
13535
0
  ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13536
0
#ifdef LIBXML_HTML_ENABLED
13537
0
    else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13538
0
  ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13539
        /*
13540
         * When parsing in context, it makes no sense to add implied
13541
         * elements like html/body/etc...
13542
         */
13543
0
        options |= HTML_PARSE_NOIMPLIED;
13544
0
    }
13545
0
#endif
13546
0
    else
13547
0
        return(XML_ERR_INTERNAL_ERROR);
13548
13549
0
    if (ctxt == NULL)
13550
0
        return(XML_ERR_NO_MEMORY);
13551
13552
    /*
13553
     * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13554
     * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13555
     * we must wait until the last moment to free the original one.
13556
     */
13557
0
    if (doc->dict != NULL) {
13558
0
        if (ctxt->dict != NULL)
13559
0
      xmlDictFree(ctxt->dict);
13560
0
  ctxt->dict = doc->dict;
13561
0
    } else
13562
0
        options |= XML_PARSE_NODICT;
13563
13564
0
    if (doc->encoding != NULL) {
13565
0
        xmlCharEncodingHandlerPtr hdlr;
13566
13567
0
        if (ctxt->encoding != NULL)
13568
0
      xmlFree((xmlChar *) ctxt->encoding);
13569
0
        ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13570
13571
0
        hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
13572
0
        if (hdlr != NULL) {
13573
0
            xmlSwitchToEncoding(ctxt, hdlr);
13574
0
  } else {
13575
0
            return(XML_ERR_UNSUPPORTED_ENCODING);
13576
0
        }
13577
0
    }
13578
13579
0
    xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13580
0
    xmlDetectSAX2(ctxt);
13581
0
    ctxt->myDoc = doc;
13582
    /* parsing in context, i.e. as within existing content */
13583
0
    ctxt->input_id = 2;
13584
0
    ctxt->instate = XML_PARSER_CONTENT;
13585
13586
0
    fake = xmlNewComment(NULL);
13587
0
    if (fake == NULL) {
13588
0
        xmlFreeParserCtxt(ctxt);
13589
0
  return(XML_ERR_NO_MEMORY);
13590
0
    }
13591
0
    xmlAddChild(node, fake);
13592
13593
0
    if (node->type == XML_ELEMENT_NODE) {
13594
0
  nodePush(ctxt, node);
13595
  /*
13596
   * initialize the SAX2 namespaces stack
13597
   */
13598
0
  cur = node;
13599
0
  while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13600
0
      xmlNsPtr ns = cur->nsDef;
13601
0
      const xmlChar *iprefix, *ihref;
13602
13603
0
      while (ns != NULL) {
13604
0
    if (ctxt->dict) {
13605
0
        iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13606
0
        ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13607
0
    } else {
13608
0
        iprefix = ns->prefix;
13609
0
        ihref = ns->href;
13610
0
    }
13611
13612
0
          if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13613
0
        nsPush(ctxt, iprefix, ihref);
13614
0
        nsnr++;
13615
0
    }
13616
0
    ns = ns->next;
13617
0
      }
13618
0
      cur = cur->parent;
13619
0
  }
13620
0
    }
13621
13622
0
    if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13623
  /*
13624
   * ID/IDREF registration will be done in xmlValidateElement below
13625
   */
13626
0
  ctxt->loadsubset |= XML_SKIP_IDS;
13627
0
    }
13628
13629
0
#ifdef LIBXML_HTML_ENABLED
13630
0
    if (doc->type == XML_HTML_DOCUMENT_NODE)
13631
0
        __htmlParseContent(ctxt);
13632
0
    else
13633
0
#endif
13634
0
  xmlParseContent(ctxt);
13635
13636
0
    nsPop(ctxt, nsnr);
13637
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13638
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13639
0
    } else if (RAW != 0) {
13640
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13641
0
    }
13642
0
    if ((ctxt->node != NULL) && (ctxt->node != node)) {
13643
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13644
0
  ctxt->wellFormed = 0;
13645
0
    }
13646
13647
0
    if (!ctxt->wellFormed) {
13648
0
        if (ctxt->errNo == 0)
13649
0
      ret = XML_ERR_INTERNAL_ERROR;
13650
0
  else
13651
0
      ret = (xmlParserErrors)ctxt->errNo;
13652
0
    } else {
13653
0
        ret = XML_ERR_OK;
13654
0
    }
13655
13656
    /*
13657
     * Return the newly created nodeset after unlinking it from
13658
     * the pseudo sibling.
13659
     */
13660
13661
0
    cur = fake->next;
13662
0
    fake->next = NULL;
13663
0
    node->last = fake;
13664
13665
0
    if (cur != NULL) {
13666
0
  cur->prev = NULL;
13667
0
    }
13668
13669
0
    *lst = cur;
13670
13671
0
    while (cur != NULL) {
13672
0
  cur->parent = NULL;
13673
0
  cur = cur->next;
13674
0
    }
13675
13676
0
    xmlUnlinkNode(fake);
13677
0
    xmlFreeNode(fake);
13678
13679
13680
0
    if (ret != XML_ERR_OK) {
13681
0
        xmlFreeNodeList(*lst);
13682
0
  *lst = NULL;
13683
0
    }
13684
13685
0
    if (doc->dict != NULL)
13686
0
        ctxt->dict = NULL;
13687
0
    xmlFreeParserCtxt(ctxt);
13688
13689
0
    return(ret);
13690
#else /* !SAX2 */
13691
    return(XML_ERR_INTERNAL_ERROR);
13692
#endif
13693
0
}
13694
13695
#ifdef LIBXML_SAX1_ENABLED
13696
/**
13697
 * xmlParseBalancedChunkMemoryRecover:
13698
 * @doc:  the document the chunk pertains to (must not be NULL)
13699
 * @sax:  the SAX handler block (possibly NULL)
13700
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13701
 * @depth:  Used for loop detection, use 0
13702
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13703
 * @lst:  the return value for the set of parsed nodes
13704
 * @recover: return nodes even if the data is broken (use 0)
13705
 *
13706
 *
13707
 * Parse a well-balanced chunk of an XML document
13708
 * called by the parser
13709
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13710
 * the content production in the XML grammar:
13711
 *
13712
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13713
 *
13714
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13715
 *    the parser error code otherwise
13716
 *
13717
 * In case recover is set to 1, the nodelist will not be empty even if
13718
 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13719
 * some extent.
13720
 */
13721
int
13722
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13723
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13724
0
     int recover) {
13725
0
    xmlParserCtxtPtr ctxt;
13726
0
    xmlDocPtr newDoc;
13727
0
    xmlSAXHandlerPtr oldsax = NULL;
13728
0
    xmlNodePtr content, newRoot;
13729
0
    int size;
13730
0
    int ret = 0;
13731
13732
0
    if (depth > 40) {
13733
0
  return(XML_ERR_ENTITY_LOOP);
13734
0
    }
13735
13736
13737
0
    if (lst != NULL)
13738
0
        *lst = NULL;
13739
0
    if (string == NULL)
13740
0
        return(-1);
13741
13742
0
    size = xmlStrlen(string);
13743
13744
0
    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13745
0
    if (ctxt == NULL) return(-1);
13746
0
    ctxt->userData = ctxt;
13747
0
    if (sax != NULL) {
13748
0
  oldsax = ctxt->sax;
13749
0
        ctxt->sax = sax;
13750
0
  if (user_data != NULL)
13751
0
      ctxt->userData = user_data;
13752
0
    }
13753
0
    newDoc = xmlNewDoc(BAD_CAST "1.0");
13754
0
    if (newDoc == NULL) {
13755
0
  xmlFreeParserCtxt(ctxt);
13756
0
  return(-1);
13757
0
    }
13758
0
    newDoc->properties = XML_DOC_INTERNAL;
13759
0
    if ((doc != NULL) && (doc->dict != NULL)) {
13760
0
        xmlDictFree(ctxt->dict);
13761
0
  ctxt->dict = doc->dict;
13762
0
  xmlDictReference(ctxt->dict);
13763
0
  ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13764
0
  ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13765
0
  ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13766
0
  ctxt->dictNames = 1;
13767
0
    } else {
13768
0
  xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13769
0
    }
13770
    /* doc == NULL is only supported for historic reasons */
13771
0
    if (doc != NULL) {
13772
0
  newDoc->intSubset = doc->intSubset;
13773
0
  newDoc->extSubset = doc->extSubset;
13774
0
    }
13775
0
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13776
0
    if (newRoot == NULL) {
13777
0
  if (sax != NULL)
13778
0
      ctxt->sax = oldsax;
13779
0
  xmlFreeParserCtxt(ctxt);
13780
0
  newDoc->intSubset = NULL;
13781
0
  newDoc->extSubset = NULL;
13782
0
        xmlFreeDoc(newDoc);
13783
0
  return(-1);
13784
0
    }
13785
0
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
13786
0
    nodePush(ctxt, newRoot);
13787
    /* doc == NULL is only supported for historic reasons */
13788
0
    if (doc == NULL) {
13789
0
  ctxt->myDoc = newDoc;
13790
0
    } else {
13791
0
  ctxt->myDoc = newDoc;
13792
0
  newDoc->children->doc = doc;
13793
  /* Ensure that doc has XML spec namespace */
13794
0
  xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13795
0
  newDoc->oldNs = doc->oldNs;
13796
0
    }
13797
0
    ctxt->instate = XML_PARSER_CONTENT;
13798
0
    ctxt->input_id = 2;
13799
0
    ctxt->depth = depth;
13800
13801
    /*
13802
     * Doing validity checking on chunk doesn't make sense
13803
     */
13804
0
    ctxt->validate = 0;
13805
0
    ctxt->loadsubset = 0;
13806
0
    xmlDetectSAX2(ctxt);
13807
13808
0
    if ( doc != NULL ){
13809
0
        content = doc->children;
13810
0
        doc->children = NULL;
13811
0
        xmlParseContent(ctxt);
13812
0
        doc->children = content;
13813
0
    }
13814
0
    else {
13815
0
        xmlParseContent(ctxt);
13816
0
    }
13817
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13818
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13819
0
    } else if (RAW != 0) {
13820
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13821
0
    }
13822
0
    if (ctxt->node != newDoc->children) {
13823
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13824
0
    }
13825
13826
0
    if (!ctxt->wellFormed) {
13827
0
        if (ctxt->errNo == 0)
13828
0
      ret = 1;
13829
0
  else
13830
0
      ret = ctxt->errNo;
13831
0
    } else {
13832
0
      ret = 0;
13833
0
    }
13834
13835
0
    if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13836
0
  xmlNodePtr cur;
13837
13838
  /*
13839
   * Return the newly created nodeset after unlinking it from
13840
   * they pseudo parent.
13841
   */
13842
0
  cur = newDoc->children->children;
13843
0
  *lst = cur;
13844
0
  while (cur != NULL) {
13845
0
      xmlSetTreeDoc(cur, doc);
13846
0
      cur->parent = NULL;
13847
0
      cur = cur->next;
13848
0
  }
13849
0
  newDoc->children->children = NULL;
13850
0
    }
13851
13852
0
    if (sax != NULL)
13853
0
  ctxt->sax = oldsax;
13854
0
    xmlFreeParserCtxt(ctxt);
13855
0
    newDoc->intSubset = NULL;
13856
0
    newDoc->extSubset = NULL;
13857
    /* This leaks the namespace list if doc == NULL */
13858
0
    newDoc->oldNs = NULL;
13859
0
    xmlFreeDoc(newDoc);
13860
13861
0
    return(ret);
13862
0
}
13863
13864
/**
13865
 * xmlSAXParseEntity:
13866
 * @sax:  the SAX handler block
13867
 * @filename:  the filename
13868
 *
13869
 * parse an XML external entity out of context and build a tree.
13870
 * It use the given SAX function block to handle the parsing callback.
13871
 * If sax is NULL, fallback to the default DOM tree building routines.
13872
 *
13873
 * [78] extParsedEnt ::= TextDecl? content
13874
 *
13875
 * This correspond to a "Well Balanced" chunk
13876
 *
13877
 * Returns the resulting document tree
13878
 */
13879
13880
xmlDocPtr
13881
0
xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13882
0
    xmlDocPtr ret;
13883
0
    xmlParserCtxtPtr ctxt;
13884
13885
0
    ctxt = xmlCreateFileParserCtxt(filename);
13886
0
    if (ctxt == NULL) {
13887
0
  return(NULL);
13888
0
    }
13889
0
    if (sax != NULL) {
13890
0
  if (ctxt->sax != NULL)
13891
0
      xmlFree(ctxt->sax);
13892
0
        ctxt->sax = sax;
13893
0
        ctxt->userData = NULL;
13894
0
    }
13895
13896
0
    xmlParseExtParsedEnt(ctxt);
13897
13898
0
    if (ctxt->wellFormed)
13899
0
  ret = ctxt->myDoc;
13900
0
    else {
13901
0
        ret = NULL;
13902
0
        xmlFreeDoc(ctxt->myDoc);
13903
0
        ctxt->myDoc = NULL;
13904
0
    }
13905
0
    if (sax != NULL)
13906
0
        ctxt->sax = NULL;
13907
0
    xmlFreeParserCtxt(ctxt);
13908
13909
0
    return(ret);
13910
0
}
13911
13912
/**
13913
 * xmlParseEntity:
13914
 * @filename:  the filename
13915
 *
13916
 * parse an XML external entity out of context and build a tree.
13917
 *
13918
 * [78] extParsedEnt ::= TextDecl? content
13919
 *
13920
 * This correspond to a "Well Balanced" chunk
13921
 *
13922
 * Returns the resulting document tree
13923
 */
13924
13925
xmlDocPtr
13926
0
xmlParseEntity(const char *filename) {
13927
0
    return(xmlSAXParseEntity(NULL, filename));
13928
0
}
13929
#endif /* LIBXML_SAX1_ENABLED */
13930
13931
/**
13932
 * xmlCreateEntityParserCtxtInternal:
13933
 * @URL:  the entity URL
13934
 * @ID:  the entity PUBLIC ID
13935
 * @base:  a possible base for the target URI
13936
 * @pctx:  parser context used to set options on new context
13937
 *
13938
 * Create a parser context for an external entity
13939
 * Automatic support for ZLIB/Compress compressed document is provided
13940
 * by default if found at compile-time.
13941
 *
13942
 * Returns the new parser context or NULL
13943
 */
13944
static xmlParserCtxtPtr
13945
xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
13946
0
                    const xmlChar *base, xmlParserCtxtPtr pctx) {
13947
0
    xmlParserCtxtPtr ctxt;
13948
0
    xmlParserInputPtr inputStream;
13949
0
    char *directory = NULL;
13950
0
    xmlChar *uri;
13951
13952
0
    ctxt = xmlNewParserCtxt();
13953
0
    if (ctxt == NULL) {
13954
0
  return(NULL);
13955
0
    }
13956
13957
0
    if (pctx != NULL) {
13958
0
        ctxt->options = pctx->options;
13959
0
        ctxt->_private = pctx->_private;
13960
  /*
13961
   * this is a subparser of pctx, so the input_id should be
13962
   * incremented to distinguish from main entity
13963
   */
13964
0
  ctxt->input_id = pctx->input_id + 1;
13965
0
    }
13966
13967
    /* Don't read from stdin. */
13968
0
    if (xmlStrcmp(URL, BAD_CAST "-") == 0)
13969
0
        URL = BAD_CAST "./-";
13970
13971
0
    uri = xmlBuildURI(URL, base);
13972
13973
0
    if (uri == NULL) {
13974
0
  inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13975
0
  if (inputStream == NULL) {
13976
0
      xmlFreeParserCtxt(ctxt);
13977
0
      return(NULL);
13978
0
  }
13979
13980
0
  inputPush(ctxt, inputStream);
13981
13982
0
  if ((ctxt->directory == NULL) && (directory == NULL))
13983
0
      directory = xmlParserGetDirectory((char *)URL);
13984
0
  if ((ctxt->directory == NULL) && (directory != NULL))
13985
0
      ctxt->directory = directory;
13986
0
    } else {
13987
0
  inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13988
0
  if (inputStream == NULL) {
13989
0
      xmlFree(uri);
13990
0
      xmlFreeParserCtxt(ctxt);
13991
0
      return(NULL);
13992
0
  }
13993
13994
0
  inputPush(ctxt, inputStream);
13995
13996
0
  if ((ctxt->directory == NULL) && (directory == NULL))
13997
0
      directory = xmlParserGetDirectory((char *)uri);
13998
0
  if ((ctxt->directory == NULL) && (directory != NULL))
13999
0
      ctxt->directory = directory;
14000
0
  xmlFree(uri);
14001
0
    }
14002
0
    return(ctxt);
14003
0
}
14004
14005
/**
14006
 * xmlCreateEntityParserCtxt:
14007
 * @URL:  the entity URL
14008
 * @ID:  the entity PUBLIC ID
14009
 * @base:  a possible base for the target URI
14010
 *
14011
 * Create a parser context for an external entity
14012
 * Automatic support for ZLIB/Compress compressed document is provided
14013
 * by default if found at compile-time.
14014
 *
14015
 * Returns the new parser context or NULL
14016
 */
14017
xmlParserCtxtPtr
14018
xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
14019
0
                    const xmlChar *base) {
14020
0
    return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
14021
14022
0
}
14023
14024
/************************************************************************
14025
 *                  *
14026
 *    Front ends when parsing from a file     *
14027
 *                  *
14028
 ************************************************************************/
14029
14030
/**
14031
 * xmlCreateURLParserCtxt:
14032
 * @filename:  the filename or URL
14033
 * @options:  a combination of xmlParserOption
14034
 *
14035
 * Create a parser context for a file or URL content.
14036
 * Automatic support for ZLIB/Compress compressed document is provided
14037
 * by default if found at compile-time and for file accesses
14038
 *
14039
 * Returns the new parser context or NULL
14040
 */
14041
xmlParserCtxtPtr
14042
xmlCreateURLParserCtxt(const char *filename, int options)
14043
0
{
14044
0
    xmlParserCtxtPtr ctxt;
14045
0
    xmlParserInputPtr inputStream;
14046
0
    char *directory = NULL;
14047
14048
0
    ctxt = xmlNewParserCtxt();
14049
0
    if (ctxt == NULL) {
14050
0
  xmlErrMemory(NULL, "cannot allocate parser context");
14051
0
  return(NULL);
14052
0
    }
14053
14054
0
    if (options)
14055
0
  xmlCtxtUseOptionsInternal(ctxt, options, NULL);
14056
0
    ctxt->linenumbers = 1;
14057
14058
0
    inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
14059
0
    if (inputStream == NULL) {
14060
0
  xmlFreeParserCtxt(ctxt);
14061
0
  return(NULL);
14062
0
    }
14063
14064
0
    inputPush(ctxt, inputStream);
14065
0
    if ((ctxt->directory == NULL) && (directory == NULL))
14066
0
        directory = xmlParserGetDirectory(filename);
14067
0
    if ((ctxt->directory == NULL) && (directory != NULL))
14068
0
        ctxt->directory = directory;
14069
14070
0
    return(ctxt);
14071
0
}
14072
14073
/**
14074
 * xmlCreateFileParserCtxt:
14075
 * @filename:  the filename
14076
 *
14077
 * Create a parser context for a file content.
14078
 * Automatic support for ZLIB/Compress compressed document is provided
14079
 * by default if found at compile-time.
14080
 *
14081
 * Returns the new parser context or NULL
14082
 */
14083
xmlParserCtxtPtr
14084
xmlCreateFileParserCtxt(const char *filename)
14085
0
{
14086
0
    return(xmlCreateURLParserCtxt(filename, 0));
14087
0
}
14088
14089
#ifdef LIBXML_SAX1_ENABLED
14090
/**
14091
 * xmlSAXParseFileWithData:
14092
 * @sax:  the SAX handler block
14093
 * @filename:  the filename
14094
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14095
 *             documents
14096
 * @data:  the userdata
14097
 *
14098
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14099
 * compressed document is provided by default if found at compile-time.
14100
 * It use the given SAX function block to handle the parsing callback.
14101
 * If sax is NULL, fallback to the default DOM tree building routines.
14102
 *
14103
 * User data (void *) is stored within the parser context in the
14104
 * context's _private member, so it is available nearly everywhere in libxml
14105
 *
14106
 * Returns the resulting document tree
14107
 */
14108
14109
xmlDocPtr
14110
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
14111
0
                        int recovery, void *data) {
14112
0
    xmlDocPtr ret;
14113
0
    xmlParserCtxtPtr ctxt;
14114
14115
0
    xmlInitParser();
14116
14117
0
    ctxt = xmlCreateFileParserCtxt(filename);
14118
0
    if (ctxt == NULL) {
14119
0
  return(NULL);
14120
0
    }
14121
0
    if (sax != NULL) {
14122
0
  if (ctxt->sax != NULL)
14123
0
      xmlFree(ctxt->sax);
14124
0
        ctxt->sax = sax;
14125
0
    }
14126
0
    xmlDetectSAX2(ctxt);
14127
0
    if (data!=NULL) {
14128
0
  ctxt->_private = data;
14129
0
    }
14130
14131
0
    if (ctxt->directory == NULL)
14132
0
        ctxt->directory = xmlParserGetDirectory(filename);
14133
14134
0
    ctxt->recovery = recovery;
14135
14136
0
    xmlParseDocument(ctxt);
14137
14138
0
    if ((ctxt->wellFormed) || recovery) {
14139
0
        ret = ctxt->myDoc;
14140
0
  if ((ret != NULL) && (ctxt->input->buf != NULL)) {
14141
0
      if (ctxt->input->buf->compressed > 0)
14142
0
    ret->compression = 9;
14143
0
      else
14144
0
    ret->compression = ctxt->input->buf->compressed;
14145
0
  }
14146
0
    }
14147
0
    else {
14148
0
       ret = NULL;
14149
0
       xmlFreeDoc(ctxt->myDoc);
14150
0
       ctxt->myDoc = NULL;
14151
0
    }
14152
0
    if (sax != NULL)
14153
0
        ctxt->sax = NULL;
14154
0
    xmlFreeParserCtxt(ctxt);
14155
14156
0
    return(ret);
14157
0
}
14158
14159
/**
14160
 * xmlSAXParseFile:
14161
 * @sax:  the SAX handler block
14162
 * @filename:  the filename
14163
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14164
 *             documents
14165
 *
14166
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14167
 * compressed document is provided by default if found at compile-time.
14168
 * It use the given SAX function block to handle the parsing callback.
14169
 * If sax is NULL, fallback to the default DOM tree building routines.
14170
 *
14171
 * Returns the resulting document tree
14172
 */
14173
14174
xmlDocPtr
14175
xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14176
0
                          int recovery) {
14177
0
    return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14178
0
}
14179
14180
/**
14181
 * xmlRecoverDoc:
14182
 * @cur:  a pointer to an array of xmlChar
14183
 *
14184
 * parse an XML in-memory document and build a tree.
14185
 * In the case the document is not Well Formed, a attempt to build a
14186
 * tree is tried anyway
14187
 *
14188
 * Returns the resulting document tree or NULL in case of failure
14189
 */
14190
14191
xmlDocPtr
14192
0
xmlRecoverDoc(const xmlChar *cur) {
14193
0
    return(xmlSAXParseDoc(NULL, cur, 1));
14194
0
}
14195
14196
/**
14197
 * xmlParseFile:
14198
 * @filename:  the filename
14199
 *
14200
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14201
 * compressed document is provided by default if found at compile-time.
14202
 *
14203
 * Returns the resulting document tree if the file was wellformed,
14204
 * NULL otherwise.
14205
 */
14206
14207
xmlDocPtr
14208
0
xmlParseFile(const char *filename) {
14209
0
    return(xmlSAXParseFile(NULL, filename, 0));
14210
0
}
14211
14212
/**
14213
 * xmlRecoverFile:
14214
 * @filename:  the filename
14215
 *
14216
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14217
 * compressed document is provided by default if found at compile-time.
14218
 * In the case the document is not Well Formed, it attempts to build
14219
 * a tree anyway
14220
 *
14221
 * Returns the resulting document tree or NULL in case of failure
14222
 */
14223
14224
xmlDocPtr
14225
0
xmlRecoverFile(const char *filename) {
14226
0
    return(xmlSAXParseFile(NULL, filename, 1));
14227
0
}
14228
14229
14230
/**
14231
 * xmlSetupParserForBuffer:
14232
 * @ctxt:  an XML parser context
14233
 * @buffer:  a xmlChar * buffer
14234
 * @filename:  a file name
14235
 *
14236
 * Setup the parser context to parse a new buffer; Clears any prior
14237
 * contents from the parser context. The buffer parameter must not be
14238
 * NULL, but the filename parameter can be
14239
 */
14240
void
14241
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14242
                             const char* filename)
14243
0
{
14244
0
    xmlParserInputPtr input;
14245
14246
0
    if ((ctxt == NULL) || (buffer == NULL))
14247
0
        return;
14248
14249
0
    input = xmlNewInputStream(ctxt);
14250
0
    if (input == NULL) {
14251
0
        xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
14252
0
        xmlClearParserCtxt(ctxt);
14253
0
        return;
14254
0
    }
14255
14256
0
    xmlClearParserCtxt(ctxt);
14257
0
    if (filename != NULL)
14258
0
        input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
14259
0
    input->base = buffer;
14260
0
    input->cur = buffer;
14261
0
    input->end = &buffer[xmlStrlen(buffer)];
14262
0
    inputPush(ctxt, input);
14263
0
}
14264
14265
/**
14266
 * xmlSAXUserParseFile:
14267
 * @sax:  a SAX handler
14268
 * @user_data:  The user data returned on SAX callbacks
14269
 * @filename:  a file name
14270
 *
14271
 * parse an XML file and call the given SAX handler routines.
14272
 * Automatic support for ZLIB/Compress compressed document is provided
14273
 *
14274
 * Returns 0 in case of success or a error number otherwise
14275
 */
14276
int
14277
xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14278
0
                    const char *filename) {
14279
0
    int ret = 0;
14280
0
    xmlParserCtxtPtr ctxt;
14281
14282
0
    ctxt = xmlCreateFileParserCtxt(filename);
14283
0
    if (ctxt == NULL) return -1;
14284
0
    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14285
0
  xmlFree(ctxt->sax);
14286
0
    ctxt->sax = sax;
14287
0
    xmlDetectSAX2(ctxt);
14288
14289
0
    if (user_data != NULL)
14290
0
  ctxt->userData = user_data;
14291
14292
0
    xmlParseDocument(ctxt);
14293
14294
0
    if (ctxt->wellFormed)
14295
0
  ret = 0;
14296
0
    else {
14297
0
        if (ctxt->errNo != 0)
14298
0
      ret = ctxt->errNo;
14299
0
  else
14300
0
      ret = -1;
14301
0
    }
14302
0
    if (sax != NULL)
14303
0
  ctxt->sax = NULL;
14304
0
    if (ctxt->myDoc != NULL) {
14305
0
        xmlFreeDoc(ctxt->myDoc);
14306
0
  ctxt->myDoc = NULL;
14307
0
    }
14308
0
    xmlFreeParserCtxt(ctxt);
14309
14310
0
    return ret;
14311
0
}
14312
#endif /* LIBXML_SAX1_ENABLED */
14313
14314
/************************************************************************
14315
 *                  *
14316
 *    Front ends when parsing from memory     *
14317
 *                  *
14318
 ************************************************************************/
14319
14320
/**
14321
 * xmlCreateMemoryParserCtxt:
14322
 * @buffer:  a pointer to a char array
14323
 * @size:  the size of the array
14324
 *
14325
 * Create a parser context for an XML in-memory document.
14326
 *
14327
 * Returns the new parser context or NULL
14328
 */
14329
xmlParserCtxtPtr
14330
15.2k
xmlCreateMemoryParserCtxt(const char *buffer, int size) {
14331
15.2k
    xmlParserCtxtPtr ctxt;
14332
15.2k
    xmlParserInputPtr input;
14333
15.2k
    xmlParserInputBufferPtr buf;
14334
14335
15.2k
    if (buffer == NULL)
14336
0
  return(NULL);
14337
15.2k
    if (size <= 0)
14338
0
  return(NULL);
14339
14340
15.2k
    ctxt = xmlNewParserCtxt();
14341
15.2k
    if (ctxt == NULL)
14342
0
  return(NULL);
14343
14344
    /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
14345
15.2k
    buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14346
15.2k
    if (buf == NULL) {
14347
0
  xmlFreeParserCtxt(ctxt);
14348
0
  return(NULL);
14349
0
    }
14350
14351
15.2k
    input = xmlNewInputStream(ctxt);
14352
15.2k
    if (input == NULL) {
14353
0
  xmlFreeParserInputBuffer(buf);
14354
0
  xmlFreeParserCtxt(ctxt);
14355
0
  return(NULL);
14356
0
    }
14357
14358
15.2k
    input->filename = NULL;
14359
15.2k
    input->buf = buf;
14360
15.2k
    xmlBufResetInput(input->buf->buffer, input);
14361
14362
15.2k
    inputPush(ctxt, input);
14363
15.2k
    return(ctxt);
14364
15.2k
}
14365
14366
#ifdef LIBXML_SAX1_ENABLED
14367
/**
14368
 * xmlSAXParseMemoryWithData:
14369
 * @sax:  the SAX handler block
14370
 * @buffer:  an pointer to a char array
14371
 * @size:  the size of the array
14372
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14373
 *             documents
14374
 * @data:  the userdata
14375
 *
14376
 * parse an XML in-memory block and use the given SAX function block
14377
 * to handle the parsing callback. If sax is NULL, fallback to the default
14378
 * DOM tree building routines.
14379
 *
14380
 * User data (void *) is stored within the parser context in the
14381
 * context's _private member, so it is available nearly everywhere in libxml
14382
 *
14383
 * Returns the resulting document tree
14384
 */
14385
14386
xmlDocPtr
14387
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14388
0
            int size, int recovery, void *data) {
14389
0
    xmlDocPtr ret;
14390
0
    xmlParserCtxtPtr ctxt;
14391
14392
0
    xmlInitParser();
14393
14394
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14395
0
    if (ctxt == NULL) return(NULL);
14396
0
    if (sax != NULL) {
14397
0
  if (ctxt->sax != NULL)
14398
0
      xmlFree(ctxt->sax);
14399
0
        ctxt->sax = sax;
14400
0
    }
14401
0
    xmlDetectSAX2(ctxt);
14402
0
    if (data!=NULL) {
14403
0
  ctxt->_private=data;
14404
0
    }
14405
14406
0
    ctxt->recovery = recovery;
14407
14408
0
    xmlParseDocument(ctxt);
14409
14410
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14411
0
    else {
14412
0
       ret = NULL;
14413
0
       xmlFreeDoc(ctxt->myDoc);
14414
0
       ctxt->myDoc = NULL;
14415
0
    }
14416
0
    if (sax != NULL)
14417
0
  ctxt->sax = NULL;
14418
0
    xmlFreeParserCtxt(ctxt);
14419
14420
0
    return(ret);
14421
0
}
14422
14423
/**
14424
 * xmlSAXParseMemory:
14425
 * @sax:  the SAX handler block
14426
 * @buffer:  an pointer to a char array
14427
 * @size:  the size of the array
14428
 * @recovery:  work in recovery mode, i.e. tries to read not Well Formed
14429
 *             documents
14430
 *
14431
 * parse an XML in-memory block and use the given SAX function block
14432
 * to handle the parsing callback. If sax is NULL, fallback to the default
14433
 * DOM tree building routines.
14434
 *
14435
 * Returns the resulting document tree
14436
 */
14437
xmlDocPtr
14438
xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14439
0
            int size, int recovery) {
14440
0
    return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14441
0
}
14442
14443
/**
14444
 * xmlParseMemory:
14445
 * @buffer:  an pointer to a char array
14446
 * @size:  the size of the array
14447
 *
14448
 * parse an XML in-memory block and build a tree.
14449
 *
14450
 * Returns the resulting document tree
14451
 */
14452
14453
0
xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14454
0
   return(xmlSAXParseMemory(NULL, buffer, size, 0));
14455
0
}
14456
14457
/**
14458
 * xmlRecoverMemory:
14459
 * @buffer:  an pointer to a char array
14460
 * @size:  the size of the array
14461
 *
14462
 * parse an XML in-memory block and build a tree.
14463
 * In the case the document is not Well Formed, an attempt to
14464
 * build a tree is tried anyway
14465
 *
14466
 * Returns the resulting document tree or NULL in case of error
14467
 */
14468
14469
0
xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14470
0
   return(xmlSAXParseMemory(NULL, buffer, size, 1));
14471
0
}
14472
14473
/**
14474
 * xmlSAXUserParseMemory:
14475
 * @sax:  a SAX handler
14476
 * @user_data:  The user data returned on SAX callbacks
14477
 * @buffer:  an in-memory XML document input
14478
 * @size:  the length of the XML document in bytes
14479
 *
14480
 * A better SAX parsing routine.
14481
 * parse an XML in-memory buffer and call the given SAX handler routines.
14482
 *
14483
 * Returns 0 in case of success or a error number otherwise
14484
 */
14485
int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14486
15.2k
        const char *buffer, int size) {
14487
15.2k
    int ret = 0;
14488
15.2k
    xmlParserCtxtPtr ctxt;
14489
14490
15.2k
    xmlInitParser();
14491
14492
15.2k
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14493
15.2k
    if (ctxt == NULL) return -1;
14494
15.2k
    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14495
15.2k
        xmlFree(ctxt->sax);
14496
15.2k
    ctxt->sax = sax;
14497
15.2k
    xmlDetectSAX2(ctxt);
14498
14499
15.2k
    if (user_data != NULL)
14500
15.2k
  ctxt->userData = user_data;
14501
14502
15.2k
    xmlParseDocument(ctxt);
14503
14504
15.2k
    if (ctxt->wellFormed)
14505
7
  ret = 0;
14506
15.2k
    else {
14507
15.2k
        if (ctxt->errNo != 0)
14508
15.2k
      ret = ctxt->errNo;
14509
0
  else
14510
0
      ret = -1;
14511
15.2k
    }
14512
15.2k
    if (sax != NULL)
14513
15.2k
        ctxt->sax = NULL;
14514
15.2k
    if (ctxt->myDoc != NULL) {
14515
624
        xmlFreeDoc(ctxt->myDoc);
14516
624
  ctxt->myDoc = NULL;
14517
624
    }
14518
15.2k
    xmlFreeParserCtxt(ctxt);
14519
14520
15.2k
    return ret;
14521
15.2k
}
14522
#endif /* LIBXML_SAX1_ENABLED */
14523
14524
/**
14525
 * xmlCreateDocParserCtxt:
14526
 * @cur:  a pointer to an array of xmlChar
14527
 *
14528
 * Creates a parser context for an XML in-memory document.
14529
 *
14530
 * Returns the new parser context or NULL
14531
 */
14532
xmlParserCtxtPtr
14533
0
xmlCreateDocParserCtxt(const xmlChar *cur) {
14534
0
    int len;
14535
14536
0
    if (cur == NULL)
14537
0
  return(NULL);
14538
0
    len = xmlStrlen(cur);
14539
0
    return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14540
0
}
14541
14542
#ifdef LIBXML_SAX1_ENABLED
14543
/**
14544
 * xmlSAXParseDoc:
14545
 * @sax:  the SAX handler block
14546
 * @cur:  a pointer to an array of xmlChar
14547
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14548
 *             documents
14549
 *
14550
 * parse an XML in-memory document and build a tree.
14551
 * It use the given SAX function block to handle the parsing callback.
14552
 * If sax is NULL, fallback to the default DOM tree building routines.
14553
 *
14554
 * Returns the resulting document tree
14555
 */
14556
14557
xmlDocPtr
14558
0
xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14559
0
    xmlDocPtr ret;
14560
0
    xmlParserCtxtPtr ctxt;
14561
0
    xmlSAXHandlerPtr oldsax = NULL;
14562
14563
0
    if (cur == NULL) return(NULL);
14564
14565
14566
0
    ctxt = xmlCreateDocParserCtxt(cur);
14567
0
    if (ctxt == NULL) return(NULL);
14568
0
    if (sax != NULL) {
14569
0
        oldsax = ctxt->sax;
14570
0
        ctxt->sax = sax;
14571
0
        ctxt->userData = NULL;
14572
0
    }
14573
0
    xmlDetectSAX2(ctxt);
14574
14575
0
    xmlParseDocument(ctxt);
14576
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14577
0
    else {
14578
0
       ret = NULL;
14579
0
       xmlFreeDoc(ctxt->myDoc);
14580
0
       ctxt->myDoc = NULL;
14581
0
    }
14582
0
    if (sax != NULL)
14583
0
  ctxt->sax = oldsax;
14584
0
    xmlFreeParserCtxt(ctxt);
14585
14586
0
    return(ret);
14587
0
}
14588
14589
/**
14590
 * xmlParseDoc:
14591
 * @cur:  a pointer to an array of xmlChar
14592
 *
14593
 * parse an XML in-memory document and build a tree.
14594
 *
14595
 * Returns the resulting document tree
14596
 */
14597
14598
xmlDocPtr
14599
0
xmlParseDoc(const xmlChar *cur) {
14600
0
    return(xmlSAXParseDoc(NULL, cur, 0));
14601
0
}
14602
#endif /* LIBXML_SAX1_ENABLED */
14603
14604
#ifdef LIBXML_LEGACY_ENABLED
14605
/************************************************************************
14606
 *                  *
14607
 *  Specific function to keep track of entities references    *
14608
 *  and used by the XSLT debugger         *
14609
 *                  *
14610
 ************************************************************************/
14611
14612
static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14613
14614
/**
14615
 * xmlAddEntityReference:
14616
 * @ent : A valid entity
14617
 * @firstNode : A valid first node for children of entity
14618
 * @lastNode : A valid last node of children entity
14619
 *
14620
 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14621
 */
14622
static void
14623
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14624
                      xmlNodePtr lastNode)
14625
{
14626
    if (xmlEntityRefFunc != NULL) {
14627
        (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14628
    }
14629
}
14630
14631
14632
/**
14633
 * xmlSetEntityReferenceFunc:
14634
 * @func: A valid function
14635
 *
14636
 * Set the function to call call back when a xml reference has been made
14637
 */
14638
void
14639
xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14640
{
14641
    xmlEntityRefFunc = func;
14642
}
14643
#endif /* LIBXML_LEGACY_ENABLED */
14644
14645
/************************************************************************
14646
 *                  *
14647
 *        Miscellaneous       *
14648
 *                  *
14649
 ************************************************************************/
14650
14651
#ifdef LIBXML_XPATH_ENABLED
14652
#include <libxml/xpath.h>
14653
#endif
14654
14655
extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
14656
static int xmlParserInitialized = 0;
14657
14658
/**
14659
 * xmlInitParser:
14660
 *
14661
 * Initialization function for the XML parser.
14662
 * This is not reentrant. Call once before processing in case of
14663
 * use in multithreaded programs.
14664
 */
14665
14666
void
14667
45.7k
xmlInitParser(void) {
14668
45.7k
    if (xmlParserInitialized != 0)
14669
45.7k
  return;
14670
14671
#if defined(_WIN32) && (!defined(LIBXML_STATIC) || defined(LIBXML_STATIC_FOR_DLL))
14672
    if (xmlFree == free)
14673
        atexit(xmlCleanupParser);
14674
#endif
14675
14676
1
#ifdef LIBXML_THREAD_ENABLED
14677
1
    __xmlGlobalInitMutexLock();
14678
1
    if (xmlParserInitialized == 0) {
14679
1
#endif
14680
1
  xmlInitThreads();
14681
1
  xmlInitGlobals();
14682
1
  if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14683
1
      (xmlGenericError == NULL))
14684
1
      initGenericErrorDefaultFunc(NULL);
14685
1
  xmlInitMemory();
14686
1
        xmlInitializeDict();
14687
1
  xmlInitCharEncodingHandlers();
14688
1
  xmlDefaultSAXHandlerInit();
14689
1
  xmlRegisterDefaultInputCallbacks();
14690
1
#ifdef LIBXML_OUTPUT_ENABLED
14691
1
  xmlRegisterDefaultOutputCallbacks();
14692
1
#endif /* LIBXML_OUTPUT_ENABLED */
14693
1
#ifdef LIBXML_HTML_ENABLED
14694
1
  htmlInitAutoClose();
14695
1
  htmlDefaultSAXHandlerInit();
14696
1
#endif
14697
1
#ifdef LIBXML_XPATH_ENABLED
14698
1
  xmlXPathInit();
14699
1
#endif
14700
1
  xmlParserInitialized = 1;
14701
1
#ifdef LIBXML_THREAD_ENABLED
14702
1
    }
14703
1
    __xmlGlobalInitMutexUnlock();
14704
1
#endif
14705
1
}
14706
14707
/**
14708
 * xmlCleanupParser:
14709
 *
14710
 * This function name is somewhat misleading. It does not clean up
14711
 * parser state, it cleans up memory allocated by the library itself.
14712
 * It is a cleanup function for the XML library. It tries to reclaim all
14713
 * related global memory allocated for the library processing.
14714
 * It doesn't deallocate any document related memory. One should
14715
 * call xmlCleanupParser() only when the process has finished using
14716
 * the library and all XML/HTML documents built with it.
14717
 * See also xmlInitParser() which has the opposite function of preparing
14718
 * the library for operations.
14719
 *
14720
 * WARNING: if your application is multithreaded or has plugin support
14721
 *          calling this may crash the application if another thread or
14722
 *          a plugin is still using libxml2. It's sometimes very hard to
14723
 *          guess if libxml2 is in use in the application, some libraries
14724
 *          or plugins may use it without notice. In case of doubt abstain
14725
 *          from calling this function or do it just before calling exit()
14726
 *          to avoid leak reports from valgrind !
14727
 */
14728
14729
void
14730
0
xmlCleanupParser(void) {
14731
0
    if (!xmlParserInitialized)
14732
0
  return;
14733
14734
0
    xmlCleanupCharEncodingHandlers();
14735
0
#ifdef LIBXML_CATALOG_ENABLED
14736
0
    xmlCatalogCleanup();
14737
0
#endif
14738
0
    xmlDictCleanup();
14739
0
    xmlCleanupInputCallbacks();
14740
0
#ifdef LIBXML_OUTPUT_ENABLED
14741
0
    xmlCleanupOutputCallbacks();
14742
0
#endif
14743
0
#ifdef LIBXML_SCHEMAS_ENABLED
14744
0
    xmlSchemaCleanupTypes();
14745
0
    xmlRelaxNGCleanupTypes();
14746
0
#endif
14747
0
    xmlCleanupGlobals();
14748
0
    xmlCleanupThreads(); /* must be last if called not from the main thread */
14749
0
    xmlCleanupMemory();
14750
0
    xmlParserInitialized = 0;
14751
0
}
14752
14753
#if defined(HAVE_ATTRIBUTE_DESTRUCTOR) && !defined(LIBXML_STATIC) && \
14754
    !defined(_WIN32)
14755
static void
14756
ATTRIBUTE_DESTRUCTOR
14757
xmlDestructor(void) {
14758
    /*
14759
     * Calling custom deallocation functions in a destructor can cause
14760
     * problems, for example with Nokogiri.
14761
     */
14762
    if (xmlFree == free)
14763
        xmlCleanupParser();
14764
}
14765
#endif
14766
14767
/************************************************************************
14768
 *                  *
14769
 *  New set (2.6.0) of simpler and more flexible APIs   *
14770
 *                  *
14771
 ************************************************************************/
14772
14773
/**
14774
 * DICT_FREE:
14775
 * @str:  a string
14776
 *
14777
 * Free a string if it is not owned by the "dict" dictionary in the
14778
 * current scope
14779
 */
14780
#define DICT_FREE(str)            \
14781
0
  if ((str) && ((!dict) ||       \
14782
0
      (xmlDictOwns(dict, (const xmlChar *)(str)) == 0)))  \
14783
0
      xmlFree((char *)(str));
14784
14785
/**
14786
 * xmlCtxtReset:
14787
 * @ctxt: an XML parser context
14788
 *
14789
 * Reset a parser context
14790
 */
14791
void
14792
xmlCtxtReset(xmlParserCtxtPtr ctxt)
14793
0
{
14794
0
    xmlParserInputPtr input;
14795
0
    xmlDictPtr dict;
14796
14797
0
    if (ctxt == NULL)
14798
0
        return;
14799
14800
0
    dict = ctxt->dict;
14801
14802
0
    while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14803
0
        xmlFreeInputStream(input);
14804
0
    }
14805
0
    ctxt->inputNr = 0;
14806
0
    ctxt->input = NULL;
14807
14808
0
    ctxt->spaceNr = 0;
14809
0
    if (ctxt->spaceTab != NULL) {
14810
0
  ctxt->spaceTab[0] = -1;
14811
0
  ctxt->space = &ctxt->spaceTab[0];
14812
0
    } else {
14813
0
        ctxt->space = NULL;
14814
0
    }
14815
14816
14817
0
    ctxt->nodeNr = 0;
14818
0
    ctxt->node = NULL;
14819
14820
0
    ctxt->nameNr = 0;
14821
0
    ctxt->name = NULL;
14822
14823
0
    DICT_FREE(ctxt->version);
14824
0
    ctxt->version = NULL;
14825
0
    DICT_FREE(ctxt->encoding);
14826
0
    ctxt->encoding = NULL;
14827
0
    DICT_FREE(ctxt->directory);
14828
0
    ctxt->directory = NULL;
14829
0
    DICT_FREE(ctxt->extSubURI);
14830
0
    ctxt->extSubURI = NULL;
14831
0
    DICT_FREE(ctxt->extSubSystem);
14832
0
    ctxt->extSubSystem = NULL;
14833
0
    if (ctxt->myDoc != NULL)
14834
0
        xmlFreeDoc(ctxt->myDoc);
14835
0
    ctxt->myDoc = NULL;
14836
14837
0
    ctxt->standalone = -1;
14838
0
    ctxt->hasExternalSubset = 0;
14839
0
    ctxt->hasPErefs = 0;
14840
0
    ctxt->html = 0;
14841
0
    ctxt->external = 0;
14842
0
    ctxt->instate = XML_PARSER_START;
14843
0
    ctxt->token = 0;
14844
14845
0
    ctxt->wellFormed = 1;
14846
0
    ctxt->nsWellFormed = 1;
14847
0
    ctxt->disableSAX = 0;
14848
0
    ctxt->valid = 1;
14849
#if 0
14850
    ctxt->vctxt.userData = ctxt;
14851
    ctxt->vctxt.error = xmlParserValidityError;
14852
    ctxt->vctxt.warning = xmlParserValidityWarning;
14853
#endif
14854
0
    ctxt->record_info = 0;
14855
0
    ctxt->checkIndex = 0;
14856
0
    ctxt->inSubset = 0;
14857
0
    ctxt->errNo = XML_ERR_OK;
14858
0
    ctxt->depth = 0;
14859
0
    ctxt->charset = XML_CHAR_ENCODING_UTF8;
14860
0
    ctxt->catalogs = NULL;
14861
0
    ctxt->nbentities = 0;
14862
0
    ctxt->sizeentities = 0;
14863
0
    ctxt->sizeentcopy = 0;
14864
0
    xmlInitNodeInfoSeq(&ctxt->node_seq);
14865
14866
0
    if (ctxt->attsDefault != NULL) {
14867
0
        xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
14868
0
        ctxt->attsDefault = NULL;
14869
0
    }
14870
0
    if (ctxt->attsSpecial != NULL) {
14871
0
        xmlHashFree(ctxt->attsSpecial, NULL);
14872
0
        ctxt->attsSpecial = NULL;
14873
0
    }
14874
14875
0
#ifdef LIBXML_CATALOG_ENABLED
14876
0
    if (ctxt->catalogs != NULL)
14877
0
  xmlCatalogFreeLocal(ctxt->catalogs);
14878
0
#endif
14879
0
    if (ctxt->lastError.code != XML_ERR_OK)
14880
0
        xmlResetError(&ctxt->lastError);
14881
0
}
14882
14883
/**
14884
 * xmlCtxtResetPush:
14885
 * @ctxt: an XML parser context
14886
 * @chunk:  a pointer to an array of chars
14887
 * @size:  number of chars in the array
14888
 * @filename:  an optional file name or URI
14889
 * @encoding:  the document encoding, or NULL
14890
 *
14891
 * Reset a push parser context
14892
 *
14893
 * Returns 0 in case of success and 1 in case of error
14894
 */
14895
int
14896
xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14897
                 int size, const char *filename, const char *encoding)
14898
0
{
14899
0
    xmlParserInputPtr inputStream;
14900
0
    xmlParserInputBufferPtr buf;
14901
0
    xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14902
14903
0
    if (ctxt == NULL)
14904
0
        return(1);
14905
14906
0
    if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14907
0
        enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14908
14909
0
    buf = xmlAllocParserInputBuffer(enc);
14910
0
    if (buf == NULL)
14911
0
        return(1);
14912
14913
0
    if (ctxt == NULL) {
14914
0
        xmlFreeParserInputBuffer(buf);
14915
0
        return(1);
14916
0
    }
14917
14918
0
    xmlCtxtReset(ctxt);
14919
14920
0
    if (filename == NULL) {
14921
0
        ctxt->directory = NULL;
14922
0
    } else {
14923
0
        ctxt->directory = xmlParserGetDirectory(filename);
14924
0
    }
14925
14926
0
    inputStream = xmlNewInputStream(ctxt);
14927
0
    if (inputStream == NULL) {
14928
0
        xmlFreeParserInputBuffer(buf);
14929
0
        return(1);
14930
0
    }
14931
14932
0
    if (filename == NULL)
14933
0
        inputStream->filename = NULL;
14934
0
    else
14935
0
        inputStream->filename = (char *)
14936
0
            xmlCanonicPath((const xmlChar *) filename);
14937
0
    inputStream->buf = buf;
14938
0
    xmlBufResetInput(buf->buffer, inputStream);
14939
14940
0
    inputPush(ctxt, inputStream);
14941
14942
0
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14943
0
        (ctxt->input->buf != NULL)) {
14944
0
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14945
0
        size_t cur = ctxt->input->cur - ctxt->input->base;
14946
14947
0
        xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14948
14949
0
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
14950
#ifdef DEBUG_PUSH
14951
        xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14952
#endif
14953
0
    }
14954
14955
0
    if (encoding != NULL) {
14956
0
        xmlCharEncodingHandlerPtr hdlr;
14957
14958
0
        if (ctxt->encoding != NULL)
14959
0
      xmlFree((xmlChar *) ctxt->encoding);
14960
0
        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14961
14962
0
        hdlr = xmlFindCharEncodingHandler(encoding);
14963
0
        if (hdlr != NULL) {
14964
0
            xmlSwitchToEncoding(ctxt, hdlr);
14965
0
  } else {
14966
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14967
0
            "Unsupported encoding %s\n", BAD_CAST encoding);
14968
0
        }
14969
0
    } else if (enc != XML_CHAR_ENCODING_NONE) {
14970
0
        xmlSwitchEncoding(ctxt, enc);
14971
0
    }
14972
14973
0
    return(0);
14974
0
}
14975
14976
14977
/**
14978
 * xmlCtxtUseOptionsInternal:
14979
 * @ctxt: an XML parser context
14980
 * @options:  a combination of xmlParserOption
14981
 * @encoding:  the user provided encoding to use
14982
 *
14983
 * Applies the options to the parser context
14984
 *
14985
 * Returns 0 in case of success, the set of unknown or unimplemented options
14986
 *         in case of error.
14987
 */
14988
static int
14989
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
14990
0
{
14991
0
    if (ctxt == NULL)
14992
0
        return(-1);
14993
0
    if (encoding != NULL) {
14994
0
        if (ctxt->encoding != NULL)
14995
0
      xmlFree((xmlChar *) ctxt->encoding);
14996
0
        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14997
0
    }
14998
0
    if (options & XML_PARSE_RECOVER) {
14999
0
        ctxt->recovery = 1;
15000
0
        options -= XML_PARSE_RECOVER;
15001
0
  ctxt->options |= XML_PARSE_RECOVER;
15002
0
    } else
15003
0
        ctxt->recovery = 0;
15004
0
    if (options & XML_PARSE_DTDLOAD) {
15005
0
        ctxt->loadsubset = XML_DETECT_IDS;
15006
0
        options -= XML_PARSE_DTDLOAD;
15007
0
  ctxt->options |= XML_PARSE_DTDLOAD;
15008
0
    } else
15009
0
        ctxt->loadsubset = 0;
15010
0
    if (options & XML_PARSE_DTDATTR) {
15011
0
        ctxt->loadsubset |= XML_COMPLETE_ATTRS;
15012
0
        options -= XML_PARSE_DTDATTR;
15013
0
  ctxt->options |= XML_PARSE_DTDATTR;
15014
0
    }
15015
0
    if (options & XML_PARSE_NOENT) {
15016
0
        ctxt->replaceEntities = 1;
15017
        /* ctxt->loadsubset |= XML_DETECT_IDS; */
15018
0
        options -= XML_PARSE_NOENT;
15019
0
  ctxt->options |= XML_PARSE_NOENT;
15020
0
    } else
15021
0
        ctxt->replaceEntities = 0;
15022
0
    if (options & XML_PARSE_PEDANTIC) {
15023
0
        ctxt->pedantic = 1;
15024
0
        options -= XML_PARSE_PEDANTIC;
15025
0
  ctxt->options |= XML_PARSE_PEDANTIC;
15026
0
    } else
15027
0
        ctxt->pedantic = 0;
15028
0
    if (options & XML_PARSE_NOBLANKS) {
15029
0
        ctxt->keepBlanks = 0;
15030
0
        ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
15031
0
        options -= XML_PARSE_NOBLANKS;
15032
0
  ctxt->options |= XML_PARSE_NOBLANKS;
15033
0
    } else
15034
0
        ctxt->keepBlanks = 1;
15035
0
    if (options & XML_PARSE_DTDVALID) {
15036
0
        ctxt->validate = 1;
15037
0
        if (options & XML_PARSE_NOWARNING)
15038
0
            ctxt->vctxt.warning = NULL;
15039
0
        if (options & XML_PARSE_NOERROR)
15040
0
            ctxt->vctxt.error = NULL;
15041
0
        options -= XML_PARSE_DTDVALID;
15042
0
  ctxt->options |= XML_PARSE_DTDVALID;
15043
0
    } else
15044
0
        ctxt->validate = 0;
15045
0
    if (options & XML_PARSE_NOWARNING) {
15046
0
        ctxt->sax->warning = NULL;
15047
0
        options -= XML_PARSE_NOWARNING;
15048
0
    }
15049
0
    if (options & XML_PARSE_NOERROR) {
15050
0
        ctxt->sax->error = NULL;
15051
0
        ctxt->sax->fatalError = NULL;
15052
0
        options -= XML_PARSE_NOERROR;
15053
0
    }
15054
0
#ifdef LIBXML_SAX1_ENABLED
15055
0
    if (options & XML_PARSE_SAX1) {
15056
0
        ctxt->sax->startElement = xmlSAX2StartElement;
15057
0
        ctxt->sax->endElement = xmlSAX2EndElement;
15058
0
        ctxt->sax->startElementNs = NULL;
15059
0
        ctxt->sax->endElementNs = NULL;
15060
0
        ctxt->sax->initialized = 1;
15061
0
        options -= XML_PARSE_SAX1;
15062
0
  ctxt->options |= XML_PARSE_SAX1;
15063
0
    }
15064
0
#endif /* LIBXML_SAX1_ENABLED */
15065
0
    if (options & XML_PARSE_NODICT) {
15066
0
        ctxt->dictNames = 0;
15067
0
        options -= XML_PARSE_NODICT;
15068
0
  ctxt->options |= XML_PARSE_NODICT;
15069
0
    } else {
15070
0
        ctxt->dictNames = 1;
15071
0
    }
15072
0
    if (options & XML_PARSE_NOCDATA) {
15073
0
        ctxt->sax->cdataBlock = NULL;
15074
0
        options -= XML_PARSE_NOCDATA;
15075
0
  ctxt->options |= XML_PARSE_NOCDATA;
15076
0
    }
15077
0
    if (options & XML_PARSE_NSCLEAN) {
15078
0
  ctxt->options |= XML_PARSE_NSCLEAN;
15079
0
        options -= XML_PARSE_NSCLEAN;
15080
0
    }
15081
0
    if (options & XML_PARSE_NONET) {
15082
0
  ctxt->options |= XML_PARSE_NONET;
15083
0
        options -= XML_PARSE_NONET;
15084
0
    }
15085
0
    if (options & XML_PARSE_COMPACT) {
15086
0
  ctxt->options |= XML_PARSE_COMPACT;
15087
0
        options -= XML_PARSE_COMPACT;
15088
0
    }
15089
0
    if (options & XML_PARSE_OLD10) {
15090
0
  ctxt->options |= XML_PARSE_OLD10;
15091
0
        options -= XML_PARSE_OLD10;
15092
0
    }
15093
0
    if (options & XML_PARSE_NOBASEFIX) {
15094
0
  ctxt->options |= XML_PARSE_NOBASEFIX;
15095
0
        options -= XML_PARSE_NOBASEFIX;
15096
0
    }
15097
0
    if (options & XML_PARSE_HUGE) {
15098
0
  ctxt->options |= XML_PARSE_HUGE;
15099
0
        options -= XML_PARSE_HUGE;
15100
0
        if (ctxt->dict != NULL)
15101
0
            xmlDictSetLimit(ctxt->dict, 0);
15102
0
    }
15103
0
    if (options & XML_PARSE_OLDSAX) {
15104
0
  ctxt->options |= XML_PARSE_OLDSAX;
15105
0
        options -= XML_PARSE_OLDSAX;
15106
0
    }
15107
0
    if (options & XML_PARSE_IGNORE_ENC) {
15108
0
  ctxt->options |= XML_PARSE_IGNORE_ENC;
15109
0
        options -= XML_PARSE_IGNORE_ENC;
15110
0
    }
15111
0
    if (options & XML_PARSE_BIG_LINES) {
15112
0
  ctxt->options |= XML_PARSE_BIG_LINES;
15113
0
        options -= XML_PARSE_BIG_LINES;
15114
0
    }
15115
0
    ctxt->linenumbers = 1;
15116
0
    return (options);
15117
0
}
15118
15119
/**
15120
 * xmlCtxtUseOptions:
15121
 * @ctxt: an XML parser context
15122
 * @options:  a combination of xmlParserOption
15123
 *
15124
 * Applies the options to the parser context
15125
 *
15126
 * Returns 0 in case of success, the set of unknown or unimplemented options
15127
 *         in case of error.
15128
 */
15129
int
15130
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
15131
0
{
15132
0
   return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
15133
0
}
15134
15135
/**
15136
 * xmlDoRead:
15137
 * @ctxt:  an XML parser context
15138
 * @URL:  the base URL to use for the document
15139
 * @encoding:  the document encoding, or NULL
15140
 * @options:  a combination of xmlParserOption
15141
 * @reuse:  keep the context for reuse
15142
 *
15143
 * Common front-end for the xmlRead functions
15144
 *
15145
 * Returns the resulting document tree or NULL
15146
 */
15147
static xmlDocPtr
15148
xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15149
          int options, int reuse)
15150
0
{
15151
0
    xmlDocPtr ret;
15152
15153
0
    xmlCtxtUseOptionsInternal(ctxt, options, encoding);
15154
0
    if (encoding != NULL) {
15155
0
        xmlCharEncodingHandlerPtr hdlr;
15156
15157
0
  hdlr = xmlFindCharEncodingHandler(encoding);
15158
0
  if (hdlr != NULL)
15159
0
      xmlSwitchToEncoding(ctxt, hdlr);
15160
0
    }
15161
0
    if ((URL != NULL) && (ctxt->input != NULL) &&
15162
0
        (ctxt->input->filename == NULL))
15163
0
        ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
15164
0
    xmlParseDocument(ctxt);
15165
0
    if ((ctxt->wellFormed) || ctxt->recovery)
15166
0
        ret = ctxt->myDoc;
15167
0
    else {
15168
0
        ret = NULL;
15169
0
  if (ctxt->myDoc != NULL) {
15170
0
      xmlFreeDoc(ctxt->myDoc);
15171
0
  }
15172
0
    }
15173
0
    ctxt->myDoc = NULL;
15174
0
    if (!reuse) {
15175
0
  xmlFreeParserCtxt(ctxt);
15176
0
    }
15177
15178
0
    return (ret);
15179
0
}
15180
15181
/**
15182
 * xmlReadDoc:
15183
 * @cur:  a pointer to a zero terminated string
15184
 * @URL:  the base URL to use for the document
15185
 * @encoding:  the document encoding, or NULL
15186
 * @options:  a combination of xmlParserOption
15187
 *
15188
 * parse an XML in-memory document and build a tree.
15189
 *
15190
 * Returns the resulting document tree
15191
 */
15192
xmlDocPtr
15193
xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
15194
0
{
15195
0
    xmlParserCtxtPtr ctxt;
15196
15197
0
    if (cur == NULL)
15198
0
        return (NULL);
15199
0
    xmlInitParser();
15200
15201
0
    ctxt = xmlCreateDocParserCtxt(cur);
15202
0
    if (ctxt == NULL)
15203
0
        return (NULL);
15204
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15205
0
}
15206
15207
/**
15208
 * xmlReadFile:
15209
 * @filename:  a file or URL
15210
 * @encoding:  the document encoding, or NULL
15211
 * @options:  a combination of xmlParserOption
15212
 *
15213
 * parse an XML file from the filesystem or the network.
15214
 *
15215
 * Returns the resulting document tree
15216
 */
15217
xmlDocPtr
15218
xmlReadFile(const char *filename, const char *encoding, int options)
15219
0
{
15220
0
    xmlParserCtxtPtr ctxt;
15221
15222
0
    xmlInitParser();
15223
0
    ctxt = xmlCreateURLParserCtxt(filename, options);
15224
0
    if (ctxt == NULL)
15225
0
        return (NULL);
15226
0
    return (xmlDoRead(ctxt, NULL, encoding, options, 0));
15227
0
}
15228
15229
/**
15230
 * xmlReadMemory:
15231
 * @buffer:  a pointer to a char array
15232
 * @size:  the size of the array
15233
 * @URL:  the base URL to use for the document
15234
 * @encoding:  the document encoding, or NULL
15235
 * @options:  a combination of xmlParserOption
15236
 *
15237
 * parse an XML in-memory document and build a tree.
15238
 *
15239
 * Returns the resulting document tree
15240
 */
15241
xmlDocPtr
15242
xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
15243
0
{
15244
0
    xmlParserCtxtPtr ctxt;
15245
15246
0
    xmlInitParser();
15247
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15248
0
    if (ctxt == NULL)
15249
0
        return (NULL);
15250
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15251
0
}
15252
15253
/**
15254
 * xmlReadFd:
15255
 * @fd:  an open file descriptor
15256
 * @URL:  the base URL to use for the document
15257
 * @encoding:  the document encoding, or NULL
15258
 * @options:  a combination of xmlParserOption
15259
 *
15260
 * parse an XML from a file descriptor and build a tree.
15261
 * NOTE that the file descriptor will not be closed when the
15262
 *      reader is closed or reset.
15263
 *
15264
 * Returns the resulting document tree
15265
 */
15266
xmlDocPtr
15267
xmlReadFd(int fd, const char *URL, const char *encoding, int options)
15268
0
{
15269
0
    xmlParserCtxtPtr ctxt;
15270
0
    xmlParserInputBufferPtr input;
15271
0
    xmlParserInputPtr stream;
15272
15273
0
    if (fd < 0)
15274
0
        return (NULL);
15275
0
    xmlInitParser();
15276
15277
0
    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15278
0
    if (input == NULL)
15279
0
        return (NULL);
15280
0
    input->closecallback = NULL;
15281
0
    ctxt = xmlNewParserCtxt();
15282
0
    if (ctxt == NULL) {
15283
0
        xmlFreeParserInputBuffer(input);
15284
0
        return (NULL);
15285
0
    }
15286
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15287
0
    if (stream == NULL) {
15288
0
        xmlFreeParserInputBuffer(input);
15289
0
  xmlFreeParserCtxt(ctxt);
15290
0
        return (NULL);
15291
0
    }
15292
0
    inputPush(ctxt, stream);
15293
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15294
0
}
15295
15296
/**
15297
 * xmlReadIO:
15298
 * @ioread:  an I/O read function
15299
 * @ioclose:  an I/O close function
15300
 * @ioctx:  an I/O handler
15301
 * @URL:  the base URL to use for the document
15302
 * @encoding:  the document encoding, or NULL
15303
 * @options:  a combination of xmlParserOption
15304
 *
15305
 * parse an XML document from I/O functions and source and build a tree.
15306
 *
15307
 * Returns the resulting document tree
15308
 */
15309
xmlDocPtr
15310
xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
15311
          void *ioctx, const char *URL, const char *encoding, int options)
15312
0
{
15313
0
    xmlParserCtxtPtr ctxt;
15314
0
    xmlParserInputBufferPtr input;
15315
0
    xmlParserInputPtr stream;
15316
15317
0
    if (ioread == NULL)
15318
0
        return (NULL);
15319
0
    xmlInitParser();
15320
15321
0
    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15322
0
                                         XML_CHAR_ENCODING_NONE);
15323
0
    if (input == NULL) {
15324
0
        if (ioclose != NULL)
15325
0
            ioclose(ioctx);
15326
0
        return (NULL);
15327
0
    }
15328
0
    ctxt = xmlNewParserCtxt();
15329
0
    if (ctxt == NULL) {
15330
0
        xmlFreeParserInputBuffer(input);
15331
0
        return (NULL);
15332
0
    }
15333
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15334
0
    if (stream == NULL) {
15335
0
        xmlFreeParserInputBuffer(input);
15336
0
  xmlFreeParserCtxt(ctxt);
15337
0
        return (NULL);
15338
0
    }
15339
0
    inputPush(ctxt, stream);
15340
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15341
0
}
15342
15343
/**
15344
 * xmlCtxtReadDoc:
15345
 * @ctxt:  an XML parser context
15346
 * @cur:  a pointer to a zero terminated string
15347
 * @URL:  the base URL to use for the document
15348
 * @encoding:  the document encoding, or NULL
15349
 * @options:  a combination of xmlParserOption
15350
 *
15351
 * parse an XML in-memory document and build a tree.
15352
 * This reuses the existing @ctxt parser context
15353
 *
15354
 * Returns the resulting document tree
15355
 */
15356
xmlDocPtr
15357
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
15358
               const char *URL, const char *encoding, int options)
15359
0
{
15360
0
    xmlParserInputPtr stream;
15361
15362
0
    if (cur == NULL)
15363
0
        return (NULL);
15364
0
    if (ctxt == NULL)
15365
0
        return (NULL);
15366
0
    xmlInitParser();
15367
15368
0
    xmlCtxtReset(ctxt);
15369
15370
0
    stream = xmlNewStringInputStream(ctxt, cur);
15371
0
    if (stream == NULL) {
15372
0
        return (NULL);
15373
0
    }
15374
0
    inputPush(ctxt, stream);
15375
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15376
0
}
15377
15378
/**
15379
 * xmlCtxtReadFile:
15380
 * @ctxt:  an XML parser context
15381
 * @filename:  a file or URL
15382
 * @encoding:  the document encoding, or NULL
15383
 * @options:  a combination of xmlParserOption
15384
 *
15385
 * parse an XML file from the filesystem or the network.
15386
 * This reuses the existing @ctxt parser context
15387
 *
15388
 * Returns the resulting document tree
15389
 */
15390
xmlDocPtr
15391
xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15392
                const char *encoding, int options)
15393
0
{
15394
0
    xmlParserInputPtr stream;
15395
15396
0
    if (filename == NULL)
15397
0
        return (NULL);
15398
0
    if (ctxt == NULL)
15399
0
        return (NULL);
15400
0
    xmlInitParser();
15401
15402
0
    xmlCtxtReset(ctxt);
15403
15404
0
    stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15405
0
    if (stream == NULL) {
15406
0
        return (NULL);
15407
0
    }
15408
0
    inputPush(ctxt, stream);
15409
0
    return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15410
0
}
15411
15412
/**
15413
 * xmlCtxtReadMemory:
15414
 * @ctxt:  an XML parser context
15415
 * @buffer:  a pointer to a char array
15416
 * @size:  the size of the array
15417
 * @URL:  the base URL to use for the document
15418
 * @encoding:  the document encoding, or NULL
15419
 * @options:  a combination of xmlParserOption
15420
 *
15421
 * parse an XML in-memory document and build a tree.
15422
 * This reuses the existing @ctxt parser context
15423
 *
15424
 * Returns the resulting document tree
15425
 */
15426
xmlDocPtr
15427
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15428
                  const char *URL, const char *encoding, int options)
15429
0
{
15430
0
    xmlParserInputBufferPtr input;
15431
0
    xmlParserInputPtr stream;
15432
15433
0
    if (ctxt == NULL)
15434
0
        return (NULL);
15435
0
    if (buffer == NULL)
15436
0
        return (NULL);
15437
0
    xmlInitParser();
15438
15439
0
    xmlCtxtReset(ctxt);
15440
15441
0
    input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15442
0
    if (input == NULL) {
15443
0
  return(NULL);
15444
0
    }
15445
15446
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15447
0
    if (stream == NULL) {
15448
0
  xmlFreeParserInputBuffer(input);
15449
0
  return(NULL);
15450
0
    }
15451
15452
0
    inputPush(ctxt, stream);
15453
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15454
0
}
15455
15456
/**
15457
 * xmlCtxtReadFd:
15458
 * @ctxt:  an XML parser context
15459
 * @fd:  an open file descriptor
15460
 * @URL:  the base URL to use for the document
15461
 * @encoding:  the document encoding, or NULL
15462
 * @options:  a combination of xmlParserOption
15463
 *
15464
 * parse an XML from a file descriptor and build a tree.
15465
 * This reuses the existing @ctxt parser context
15466
 * NOTE that the file descriptor will not be closed when the
15467
 *      reader is closed or reset.
15468
 *
15469
 * Returns the resulting document tree
15470
 */
15471
xmlDocPtr
15472
xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15473
              const char *URL, const char *encoding, int options)
15474
0
{
15475
0
    xmlParserInputBufferPtr input;
15476
0
    xmlParserInputPtr stream;
15477
15478
0
    if (fd < 0)
15479
0
        return (NULL);
15480
0
    if (ctxt == NULL)
15481
0
        return (NULL);
15482
0
    xmlInitParser();
15483
15484
0
    xmlCtxtReset(ctxt);
15485
15486
15487
0
    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15488
0
    if (input == NULL)
15489
0
        return (NULL);
15490
0
    input->closecallback = NULL;
15491
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15492
0
    if (stream == NULL) {
15493
0
        xmlFreeParserInputBuffer(input);
15494
0
        return (NULL);
15495
0
    }
15496
0
    inputPush(ctxt, stream);
15497
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15498
0
}
15499
15500
/**
15501
 * xmlCtxtReadIO:
15502
 * @ctxt:  an XML parser context
15503
 * @ioread:  an I/O read function
15504
 * @ioclose:  an I/O close function
15505
 * @ioctx:  an I/O handler
15506
 * @URL:  the base URL to use for the document
15507
 * @encoding:  the document encoding, or NULL
15508
 * @options:  a combination of xmlParserOption
15509
 *
15510
 * parse an XML document from I/O functions and source and build a tree.
15511
 * This reuses the existing @ctxt parser context
15512
 *
15513
 * Returns the resulting document tree
15514
 */
15515
xmlDocPtr
15516
xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15517
              xmlInputCloseCallback ioclose, void *ioctx,
15518
        const char *URL,
15519
              const char *encoding, int options)
15520
0
{
15521
0
    xmlParserInputBufferPtr input;
15522
0
    xmlParserInputPtr stream;
15523
15524
0
    if (ioread == NULL)
15525
0
        return (NULL);
15526
0
    if (ctxt == NULL)
15527
0
        return (NULL);
15528
0
    xmlInitParser();
15529
15530
0
    xmlCtxtReset(ctxt);
15531
15532
0
    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15533
0
                                         XML_CHAR_ENCODING_NONE);
15534
0
    if (input == NULL) {
15535
0
        if (ioclose != NULL)
15536
0
            ioclose(ioctx);
15537
0
        return (NULL);
15538
0
    }
15539
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15540
0
    if (stream == NULL) {
15541
0
        xmlFreeParserInputBuffer(input);
15542
0
        return (NULL);
15543
0
    }
15544
0
    inputPush(ctxt, stream);
15545
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15546
0
}
15547