Coverage Report

Created: 2023-06-07 06:05

/src/libxml2-2.10.3/parser.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
 *            implemented on top of the SAX interfaces
4
 *
5
 * References:
6
 *   The XML specification:
7
 *     http://www.w3.org/TR/REC-xml
8
 *   Original 1.0 version:
9
 *     http://www.w3.org/TR/1998/REC-xml-19980210
10
 *   XML second edition working draft
11
 *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12
 *
13
 * Okay this is a big file, the parser core is around 7000 lines, then it
14
 * is followed by the progressive parser top routines, then the various
15
 * high level APIs to call the parser and a few miscellaneous functions.
16
 * A number of helper functions and deprecated ones have been moved to
17
 * parserInternals.c to reduce this file size.
18
 * As much as possible the functions are associated with their relative
19
 * production in the XML specification. A few productions defining the
20
 * different ranges of character are actually implanted either in
21
 * parserInternals.h or parserInternals.c
22
 * The DOM tree build is realized from the default SAX callbacks in
23
 * the module SAX.c.
24
 * The routines doing the validation checks are in valid.c and called either
25
 * from the SAX callbacks or as standalone functions using a preparsed
26
 * document.
27
 *
28
 * See Copyright for the status of this software.
29
 *
30
 * daniel@veillard.com
31
 */
32
33
/* To avoid EBCDIC trouble when parsing on zOS */
34
#if defined(__MVS__)
35
#pragma convert("ISO8859-1")
36
#endif
37
38
#define IN_LIBXML
39
#include "libxml.h"
40
41
#if defined(_WIN32)
42
#define XML_DIR_SEP '\\'
43
#else
44
#define XML_DIR_SEP '/'
45
#endif
46
47
#include <stdlib.h>
48
#include <limits.h>
49
#include <string.h>
50
#include <stdarg.h>
51
#include <stddef.h>
52
#include <ctype.h>
53
#include <stdlib.h>
54
#include <libxml/xmlmemory.h>
55
#include <libxml/threads.h>
56
#include <libxml/globals.h>
57
#include <libxml/tree.h>
58
#include <libxml/parser.h>
59
#include <libxml/parserInternals.h>
60
#include <libxml/valid.h>
61
#include <libxml/entities.h>
62
#include <libxml/xmlerror.h>
63
#include <libxml/encoding.h>
64
#include <libxml/xmlIO.h>
65
#include <libxml/uri.h>
66
#ifdef LIBXML_CATALOG_ENABLED
67
#include <libxml/catalog.h>
68
#endif
69
#ifdef LIBXML_SCHEMAS_ENABLED
70
#include <libxml/xmlschemastypes.h>
71
#include <libxml/relaxng.h>
72
#endif
73
74
#include "buf.h"
75
#include "enc.h"
76
77
struct _xmlStartTag {
78
    const xmlChar *prefix;
79
    const xmlChar *URI;
80
    int line;
81
    int nsNr;
82
};
83
84
static void
85
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
86
87
static xmlParserCtxtPtr
88
xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
89
                    const xmlChar *base, xmlParserCtxtPtr pctx);
90
91
static void xmlHaltParser(xmlParserCtxtPtr ctxt);
92
93
static int
94
xmlParseElementStart(xmlParserCtxtPtr ctxt);
95
96
static void
97
xmlParseElementEnd(xmlParserCtxtPtr ctxt);
98
99
/************************************************************************
100
 *                  *
101
 *  Arbitrary limits set in the parser. See XML_PARSE_HUGE    *
102
 *                  *
103
 ************************************************************************/
104
105
968k
#define XML_MAX_HUGE_LENGTH 1000000000
106
107
0
#define XML_PARSER_BIG_ENTITY 1000
108
#define XML_PARSER_LOT_ENTITY 5000
109
110
/*
111
 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
112
 *    replacement over the size in byte of the input indicates that you have
113
 *    and exponential behaviour. A value of 10 correspond to at least 3 entity
114
 *    replacement per byte of input.
115
 */
116
0
#define XML_PARSER_NON_LINEAR 10
117
118
/*
119
 * xmlParserEntityCheck
120
 *
121
 * Function to check non-linear entity expansion behaviour
122
 * This is here to detect and stop exponential linear entity expansion
123
 * This is not a limitation of the parser but a safety
124
 * boundary feature. It can be disabled with the XML_PARSE_HUGE
125
 * parser option.
126
 */
127
static int
128
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
129
                     xmlEntityPtr ent, size_t replacement)
130
448
{
131
448
    size_t consumed = 0;
132
448
    int i;
133
134
448
    if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
135
448
        return (0);
136
0
    if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
137
0
        return (1);
138
139
    /*
140
     * This may look absurd but is needed to detect
141
     * entities problems
142
     */
143
0
    if ((ent != NULL) && (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
144
0
  (ent->content != NULL) && (ent->checked == 0) &&
145
0
  (ctxt->errNo != XML_ERR_ENTITY_LOOP)) {
146
0
  unsigned long oldnbent = ctxt->nbentities, diff;
147
0
  xmlChar *rep;
148
149
0
  ent->checked = 1;
150
151
0
        ++ctxt->depth;
152
0
  rep = xmlStringDecodeEntities(ctxt, ent->content,
153
0
          XML_SUBSTITUTE_REF, 0, 0, 0);
154
0
        --ctxt->depth;
155
0
  if ((rep == NULL) || (ctxt->errNo == XML_ERR_ENTITY_LOOP)) {
156
0
      ent->content[0] = 0;
157
0
  }
158
159
0
        diff = ctxt->nbentities - oldnbent + 1;
160
0
        if (diff > INT_MAX / 2)
161
0
            diff = INT_MAX / 2;
162
0
  ent->checked = diff * 2;
163
0
  if (rep != NULL) {
164
0
      if (xmlStrchr(rep, '<'))
165
0
    ent->checked |= 1;
166
0
      xmlFree(rep);
167
0
      rep = NULL;
168
0
  }
169
0
    }
170
171
    /*
172
     * Prevent entity exponential check, not just replacement while
173
     * parsing the DTD
174
     * The check is potentially costly so do that only once in a thousand
175
     */
176
0
    if ((ctxt->instate == XML_PARSER_DTD) && (ctxt->nbentities > 10000) &&
177
0
        (ctxt->nbentities % 1024 == 0)) {
178
0
  for (i = 0;i < ctxt->inputNr;i++) {
179
0
      consumed += ctxt->inputTab[i]->consumed +
180
0
                 (ctxt->inputTab[i]->cur - ctxt->inputTab[i]->base);
181
0
  }
182
0
  if (ctxt->nbentities > consumed * XML_PARSER_NON_LINEAR) {
183
0
      xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
184
0
      ctxt->instate = XML_PARSER_EOF;
185
0
      return (1);
186
0
  }
187
0
  consumed = 0;
188
0
    }
189
190
191
192
0
    if (replacement != 0) {
193
0
  if (replacement < XML_MAX_TEXT_LENGTH)
194
0
      return(0);
195
196
        /*
197
   * If the volume of entity copy reaches 10 times the
198
   * amount of parsed data and over the large text threshold
199
   * then that's very likely to be an abuse.
200
   */
201
0
        if (ctxt->input != NULL) {
202
0
      consumed = ctxt->input->consumed +
203
0
                 (ctxt->input->cur - ctxt->input->base);
204
0
  }
205
0
        consumed += ctxt->sizeentities;
206
207
0
        if (replacement < XML_PARSER_NON_LINEAR * consumed)
208
0
      return(0);
209
0
    } else if (size != 0) {
210
        /*
211
         * Do the check based on the replacement size of the entity
212
         */
213
0
        if (size < XML_PARSER_BIG_ENTITY)
214
0
      return(0);
215
216
        /*
217
         * A limit on the amount of text data reasonably used
218
         */
219
0
        if (ctxt->input != NULL) {
220
0
            consumed = ctxt->input->consumed +
221
0
                (ctxt->input->cur - ctxt->input->base);
222
0
        }
223
0
        consumed += ctxt->sizeentities;
224
225
0
        if ((size < XML_PARSER_NON_LINEAR * consumed) &&
226
0
      (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
227
0
            return (0);
228
0
    } else if (ent != NULL) {
229
        /*
230
         * use the number of parsed entities in the replacement
231
         */
232
0
        size = ent->checked / 2;
233
234
        /*
235
         * The amount of data parsed counting entities size only once
236
         */
237
0
        if (ctxt->input != NULL) {
238
0
            consumed = ctxt->input->consumed +
239
0
                (ctxt->input->cur - ctxt->input->base);
240
0
        }
241
0
        consumed += ctxt->sizeentities;
242
243
        /*
244
         * Check the density of entities for the amount of data
245
   * knowing an entity reference will take at least 3 bytes
246
         */
247
0
        if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
248
0
            return (0);
249
0
    } else {
250
        /*
251
         * strange we got no data for checking
252
         */
253
0
  if (((ctxt->lastError.code != XML_ERR_UNDECLARED_ENTITY) &&
254
0
       (ctxt->lastError.code != XML_WAR_UNDECLARED_ENTITY)) ||
255
0
      (ctxt->nbentities <= 10000))
256
0
      return (0);
257
0
    }
258
0
    xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
259
0
    return (1);
260
0
}
261
262
/**
263
 * xmlParserMaxDepth:
264
 *
265
 * arbitrary depth limit for the XML documents that we allow to
266
 * process. This is not a limitation of the parser but a safety
267
 * boundary feature. It can be disabled with the XML_PARSE_HUGE
268
 * parser option.
269
 */
270
unsigned int xmlParserMaxDepth = 256;
271
272
273
274
#define SAX2 1
275
8.65M
#define XML_PARSER_BIG_BUFFER_SIZE 300
276
14.8M
#define XML_PARSER_BUFFER_SIZE 100
277
8.81k
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
278
279
/**
280
 * XML_PARSER_CHUNK_SIZE
281
 *
282
 * When calling GROW that's the minimal amount of data
283
 * the parser expected to have received. It is not a hard
284
 * limit but an optimization when reading strings like Names
285
 * It is not strictly needed as long as inputs available characters
286
 * are followed by 0, which should be provided by the I/O level
287
 */
288
219M
#define XML_PARSER_CHUNK_SIZE 100
289
290
/*
291
 * List of XML prefixed PI allowed by W3C specs
292
 */
293
294
static const char* const xmlW3CPIs[] = {
295
    "xml-stylesheet",
296
    "xml-model",
297
    NULL
298
};
299
300
301
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
302
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
303
                                              const xmlChar **str);
304
305
static xmlParserErrors
306
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
307
                xmlSAXHandlerPtr sax,
308
          void *user_data, int depth, const xmlChar *URL,
309
          const xmlChar *ID, xmlNodePtr *list);
310
311
static int
312
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
313
                          const char *encoding);
314
#ifdef LIBXML_LEGACY_ENABLED
315
static void
316
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
317
                      xmlNodePtr lastNode);
318
#endif /* LIBXML_LEGACY_ENABLED */
319
320
static xmlParserErrors
321
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
322
          const xmlChar *string, void *user_data, xmlNodePtr *lst);
323
324
static int
325
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
326
327
/************************************************************************
328
 *                  *
329
 *    Some factorized error routines        *
330
 *                  *
331
 ************************************************************************/
332
333
/**
334
 * xmlErrAttributeDup:
335
 * @ctxt:  an XML parser context
336
 * @prefix:  the attribute prefix
337
 * @localname:  the attribute localname
338
 *
339
 * Handle a redefinition of attribute error
340
 */
341
static void
342
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
343
                   const xmlChar * localname)
344
180k
{
345
180k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
346
180k
        (ctxt->instate == XML_PARSER_EOF))
347
0
  return;
348
180k
    if (ctxt != NULL)
349
180k
  ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
350
351
180k
    if (prefix == NULL)
352
171k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
353
171k
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
354
171k
                        (const char *) localname, NULL, NULL, 0, 0,
355
171k
                        "Attribute %s redefined\n", localname);
356
8.34k
    else
357
8.34k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
358
8.34k
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
359
8.34k
                        (const char *) prefix, (const char *) localname,
360
8.34k
                        NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
361
8.34k
                        localname);
362
180k
    if (ctxt != NULL) {
363
180k
  ctxt->wellFormed = 0;
364
180k
  if (ctxt->recovery == 0)
365
180k
      ctxt->disableSAX = 1;
366
180k
    }
367
180k
}
368
369
/**
370
 * xmlFatalErr:
371
 * @ctxt:  an XML parser context
372
 * @error:  the error number
373
 * @extra:  extra information string
374
 *
375
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
376
 */
377
static void
378
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
379
306k
{
380
306k
    const char *errmsg;
381
382
306k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
383
306k
        (ctxt->instate == XML_PARSER_EOF))
384
993
  return;
385
305k
    switch (error) {
386
9.59k
        case XML_ERR_INVALID_HEX_CHARREF:
387
9.59k
            errmsg = "CharRef: invalid hexadecimal value";
388
9.59k
            break;
389
209k
        case XML_ERR_INVALID_DEC_CHARREF:
390
209k
            errmsg = "CharRef: invalid decimal value";
391
209k
            break;
392
0
        case XML_ERR_INVALID_CHARREF:
393
0
            errmsg = "CharRef: invalid value";
394
0
            break;
395
1.77k
        case XML_ERR_INTERNAL_ERROR:
396
1.77k
            errmsg = "internal error";
397
1.77k
            break;
398
0
        case XML_ERR_PEREF_AT_EOF:
399
0
            errmsg = "PEReference at end of document";
400
0
            break;
401
0
        case XML_ERR_PEREF_IN_PROLOG:
402
0
            errmsg = "PEReference in prolog";
403
0
            break;
404
0
        case XML_ERR_PEREF_IN_EPILOG:
405
0
            errmsg = "PEReference in epilog";
406
0
            break;
407
0
        case XML_ERR_PEREF_NO_NAME:
408
0
            errmsg = "PEReference: no name";
409
0
            break;
410
720
        case XML_ERR_PEREF_SEMICOL_MISSING:
411
720
            errmsg = "PEReference: expecting ';'";
412
720
            break;
413
0
        case XML_ERR_ENTITY_LOOP:
414
0
            errmsg = "Detected an entity reference loop";
415
0
            break;
416
0
        case XML_ERR_ENTITY_NOT_STARTED:
417
0
            errmsg = "EntityValue: \" or ' expected";
418
0
            break;
419
225
        case XML_ERR_ENTITY_PE_INTERNAL:
420
225
            errmsg = "PEReferences forbidden in internal subset";
421
225
            break;
422
60
        case XML_ERR_ENTITY_NOT_FINISHED:
423
60
            errmsg = "EntityValue: \" or ' expected";
424
60
            break;
425
942
        case XML_ERR_ATTRIBUTE_NOT_STARTED:
426
942
            errmsg = "AttValue: \" or ' expected";
427
942
            break;
428
833
        case XML_ERR_LT_IN_ATTRIBUTE:
429
833
            errmsg = "Unescaped '<' not allowed in attributes values";
430
833
            break;
431
2.29k
        case XML_ERR_LITERAL_NOT_STARTED:
432
2.29k
            errmsg = "SystemLiteral \" or ' expected";
433
2.29k
            break;
434
964
        case XML_ERR_LITERAL_NOT_FINISHED:
435
964
            errmsg = "Unfinished System or Public ID \" or ' expected";
436
964
            break;
437
224
        case XML_ERR_MISPLACED_CDATA_END:
438
224
            errmsg = "Sequence ']]>' not allowed in content";
439
224
            break;
440
1.70k
        case XML_ERR_URI_REQUIRED:
441
1.70k
            errmsg = "SYSTEM or PUBLIC, the URI is missing";
442
1.70k
            break;
443
586
        case XML_ERR_PUBID_REQUIRED:
444
586
            errmsg = "PUBLIC, the Public Identifier is missing";
445
586
            break;
446
1.52k
        case XML_ERR_HYPHEN_IN_COMMENT:
447
1.52k
            errmsg = "Comment must not contain '--' (double-hyphen)";
448
1.52k
            break;
449
1.21k
        case XML_ERR_PI_NOT_STARTED:
450
1.21k
            errmsg = "xmlParsePI : no target name";
451
1.21k
            break;
452
720
        case XML_ERR_RESERVED_XML_NAME:
453
720
            errmsg = "Invalid PI name";
454
720
            break;
455
469
        case XML_ERR_NOTATION_NOT_STARTED:
456
469
            errmsg = "NOTATION: Name expected here";
457
469
            break;
458
1.16k
        case XML_ERR_NOTATION_NOT_FINISHED:
459
1.16k
            errmsg = "'>' required to close NOTATION declaration";
460
1.16k
            break;
461
1.78k
        case XML_ERR_VALUE_REQUIRED:
462
1.78k
            errmsg = "Entity value required";
463
1.78k
            break;
464
457
        case XML_ERR_URI_FRAGMENT:
465
457
            errmsg = "Fragment not allowed";
466
457
            break;
467
458
        case XML_ERR_ATTLIST_NOT_STARTED:
468
458
            errmsg = "'(' required to start ATTLIST enumeration";
469
458
            break;
470
243
        case XML_ERR_NMTOKEN_REQUIRED:
471
243
            errmsg = "NmToken expected in ATTLIST enumeration";
472
243
            break;
473
570
        case XML_ERR_ATTLIST_NOT_FINISHED:
474
570
            errmsg = "')' required to finish ATTLIST enumeration";
475
570
            break;
476
625
        case XML_ERR_MIXED_NOT_STARTED:
477
625
            errmsg = "MixedContentDecl : '|' or ')*' expected";
478
625
            break;
479
0
        case XML_ERR_PCDATA_REQUIRED:
480
0
            errmsg = "MixedContentDecl : '#PCDATA' expected";
481
0
            break;
482
801
        case XML_ERR_ELEMCONTENT_NOT_STARTED:
483
801
            errmsg = "ContentDecl : Name or '(' expected";
484
801
            break;
485
1.80k
        case XML_ERR_ELEMCONTENT_NOT_FINISHED:
486
1.80k
            errmsg = "ContentDecl : ',' '|' or ')' expected";
487
1.80k
            break;
488
0
        case XML_ERR_PEREF_IN_INT_SUBSET:
489
0
            errmsg =
490
0
                "PEReference: forbidden within markup decl in internal subset";
491
0
            break;
492
4.51k
        case XML_ERR_GT_REQUIRED:
493
4.51k
            errmsg = "expected '>'";
494
4.51k
            break;
495
0
        case XML_ERR_CONDSEC_INVALID:
496
0
            errmsg = "XML conditional section '[' expected";
497
0
            break;
498
0
        case XML_ERR_EXT_SUBSET_NOT_FINISHED:
499
0
            errmsg = "Content error in the external subset";
500
0
            break;
501
0
        case XML_ERR_CONDSEC_INVALID_KEYWORD:
502
0
            errmsg =
503
0
                "conditional section INCLUDE or IGNORE keyword expected";
504
0
            break;
505
0
        case XML_ERR_CONDSEC_NOT_FINISHED:
506
0
            errmsg = "XML conditional section not closed";
507
0
            break;
508
0
        case XML_ERR_XMLDECL_NOT_STARTED:
509
0
            errmsg = "Text declaration '<?xml' required";
510
0
            break;
511
425
        case XML_ERR_XMLDECL_NOT_FINISHED:
512
425
            errmsg = "parsing XML declaration: '?>' expected";
513
425
            break;
514
0
        case XML_ERR_EXT_ENTITY_STANDALONE:
515
0
            errmsg = "external parsed entities cannot be standalone";
516
0
            break;
517
52.4k
        case XML_ERR_ENTITYREF_SEMICOL_MISSING:
518
52.4k
            errmsg = "EntityRef: expecting ';'";
519
52.4k
            break;
520
2.14k
        case XML_ERR_DOCTYPE_NOT_FINISHED:
521
2.14k
            errmsg = "DOCTYPE improperly terminated";
522
2.14k
            break;
523
0
        case XML_ERR_LTSLASH_REQUIRED:
524
0
            errmsg = "EndTag: '</' not found";
525
0
            break;
526
19
        case XML_ERR_EQUAL_REQUIRED:
527
19
            errmsg = "expected '='";
528
19
            break;
529
197
        case XML_ERR_STRING_NOT_CLOSED:
530
197
            errmsg = "String not closed expecting \" or '";
531
197
            break;
532
19
        case XML_ERR_STRING_NOT_STARTED:
533
19
            errmsg = "String not started expecting ' or \"";
534
19
            break;
535
12
        case XML_ERR_ENCODING_NAME:
536
12
            errmsg = "Invalid XML encoding name";
537
12
            break;
538
18
        case XML_ERR_STANDALONE_VALUE:
539
18
            errmsg = "standalone accepts only 'yes' or 'no'";
540
18
            break;
541
300
        case XML_ERR_DOCUMENT_EMPTY:
542
300
            errmsg = "Document is empty";
543
300
            break;
544
2.62k
        case XML_ERR_DOCUMENT_END:
545
2.62k
            errmsg = "Extra content at the end of the document";
546
2.62k
            break;
547
0
        case XML_ERR_NOT_WELL_BALANCED:
548
0
            errmsg = "chunk is not well balanced";
549
0
            break;
550
0
        case XML_ERR_EXTRA_CONTENT:
551
0
            errmsg = "extra content at the end of well balanced chunk";
552
0
            break;
553
788
        case XML_ERR_VERSION_MISSING:
554
788
            errmsg = "Malformed declaration expecting version";
555
788
            break;
556
33
        case XML_ERR_NAME_TOO_LONG:
557
33
            errmsg = "Name too long";
558
33
            break;
559
#if 0
560
        case:
561
            errmsg = "";
562
            break;
563
#endif
564
0
        default:
565
0
            errmsg = "Unregistered error message";
566
305k
    }
567
305k
    if (ctxt != NULL)
568
305k
  ctxt->errNo = error;
569
305k
    if (info == NULL) {
570
303k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
571
303k
                        XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
572
303k
                        errmsg);
573
303k
    } else {
574
1.81k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
575
1.81k
                        XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
576
1.81k
                        errmsg, info);
577
1.81k
    }
578
305k
    if (ctxt != NULL) {
579
305k
  ctxt->wellFormed = 0;
580
305k
  if (ctxt->recovery == 0)
581
305k
      ctxt->disableSAX = 1;
582
305k
    }
583
305k
}
584
585
/**
586
 * xmlFatalErrMsg:
587
 * @ctxt:  an XML parser context
588
 * @error:  the error number
589
 * @msg:  the error message
590
 *
591
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
592
 */
593
static void LIBXML_ATTR_FORMAT(3,0)
594
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
595
               const char *msg)
596
232k
{
597
232k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
598
232k
        (ctxt->instate == XML_PARSER_EOF))
599
0
  return;
600
232k
    if (ctxt != NULL)
601
232k
  ctxt->errNo = error;
602
232k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
603
232k
                    XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
604
232k
    if (ctxt != NULL) {
605
232k
  ctxt->wellFormed = 0;
606
232k
  if (ctxt->recovery == 0)
607
232k
      ctxt->disableSAX = 1;
608
232k
    }
609
232k
}
610
611
/**
612
 * xmlWarningMsg:
613
 * @ctxt:  an XML parser context
614
 * @error:  the error number
615
 * @msg:  the error message
616
 * @str1:  extra data
617
 * @str2:  extra data
618
 *
619
 * Handle a warning.
620
 */
621
static void LIBXML_ATTR_FORMAT(3,0)
622
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
623
              const char *msg, const xmlChar *str1, const xmlChar *str2)
624
4.41k
{
625
4.41k
    xmlStructuredErrorFunc schannel = NULL;
626
627
4.41k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
628
4.41k
        (ctxt->instate == XML_PARSER_EOF))
629
0
  return;
630
4.41k
    if ((ctxt != NULL) && (ctxt->sax != NULL) &&
631
4.41k
        (ctxt->sax->initialized == XML_SAX2_MAGIC))
632
4.41k
        schannel = ctxt->sax->serror;
633
4.41k
    if (ctxt != NULL) {
634
4.41k
        __xmlRaiseError(schannel,
635
4.41k
                    (ctxt->sax) ? ctxt->sax->warning : NULL,
636
4.41k
                    ctxt->userData,
637
4.41k
                    ctxt, NULL, XML_FROM_PARSER, error,
638
4.41k
                    XML_ERR_WARNING, NULL, 0,
639
4.41k
        (const char *) str1, (const char *) str2, NULL, 0, 0,
640
4.41k
        msg, (const char *) str1, (const char *) str2);
641
4.41k
    } else {
642
0
        __xmlRaiseError(schannel, NULL, NULL,
643
0
                    ctxt, NULL, XML_FROM_PARSER, error,
644
0
                    XML_ERR_WARNING, NULL, 0,
645
0
        (const char *) str1, (const char *) str2, NULL, 0, 0,
646
0
        msg, (const char *) str1, (const char *) str2);
647
0
    }
648
4.41k
}
649
650
/**
651
 * xmlValidityError:
652
 * @ctxt:  an XML parser context
653
 * @error:  the error number
654
 * @msg:  the error message
655
 * @str1:  extra data
656
 *
657
 * Handle a validity error.
658
 */
659
static void LIBXML_ATTR_FORMAT(3,0)
660
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
661
              const char *msg, const xmlChar *str1, const xmlChar *str2)
662
2.08k
{
663
2.08k
    xmlStructuredErrorFunc schannel = NULL;
664
665
2.08k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
666
2.08k
        (ctxt->instate == XML_PARSER_EOF))
667
0
  return;
668
2.08k
    if (ctxt != NULL) {
669
2.08k
  ctxt->errNo = error;
670
2.08k
  if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
671
2.08k
      schannel = ctxt->sax->serror;
672
2.08k
    }
673
2.08k
    if (ctxt != NULL) {
674
2.08k
        __xmlRaiseError(schannel,
675
2.08k
                    ctxt->vctxt.error, ctxt->vctxt.userData,
676
2.08k
                    ctxt, NULL, XML_FROM_DTD, error,
677
2.08k
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
678
2.08k
        (const char *) str2, NULL, 0, 0,
679
2.08k
        msg, (const char *) str1, (const char *) str2);
680
2.08k
  ctxt->valid = 0;
681
2.08k
    } else {
682
0
        __xmlRaiseError(schannel, NULL, NULL,
683
0
                    ctxt, NULL, XML_FROM_DTD, error,
684
0
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
685
0
        (const char *) str2, NULL, 0, 0,
686
0
        msg, (const char *) str1, (const char *) str2);
687
0
    }
688
2.08k
}
689
690
/**
691
 * xmlFatalErrMsgInt:
692
 * @ctxt:  an XML parser context
693
 * @error:  the error number
694
 * @msg:  the error message
695
 * @val:  an integer value
696
 *
697
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
698
 */
699
static void LIBXML_ATTR_FORMAT(3,0)
700
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
701
                  const char *msg, int val)
702
247k
{
703
247k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
704
247k
        (ctxt->instate == XML_PARSER_EOF))
705
0
  return;
706
247k
    if (ctxt != NULL)
707
247k
  ctxt->errNo = error;
708
247k
    __xmlRaiseError(NULL, NULL, NULL,
709
247k
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
710
247k
                    NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
711
247k
    if (ctxt != NULL) {
712
247k
  ctxt->wellFormed = 0;
713
247k
  if (ctxt->recovery == 0)
714
247k
      ctxt->disableSAX = 1;
715
247k
    }
716
247k
}
717
718
/**
719
 * xmlFatalErrMsgStrIntStr:
720
 * @ctxt:  an XML parser context
721
 * @error:  the error number
722
 * @msg:  the error message
723
 * @str1:  an string info
724
 * @val:  an integer value
725
 * @str2:  an string info
726
 *
727
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
728
 */
729
static void LIBXML_ATTR_FORMAT(3,0)
730
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
731
                  const char *msg, const xmlChar *str1, int val,
732
      const xmlChar *str2)
733
163
{
734
163
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
735
163
        (ctxt->instate == XML_PARSER_EOF))
736
0
  return;
737
163
    if (ctxt != NULL)
738
163
  ctxt->errNo = error;
739
163
    __xmlRaiseError(NULL, NULL, NULL,
740
163
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
741
163
                    NULL, 0, (const char *) str1, (const char *) str2,
742
163
        NULL, val, 0, msg, str1, val, str2);
743
163
    if (ctxt != NULL) {
744
163
  ctxt->wellFormed = 0;
745
163
  if (ctxt->recovery == 0)
746
163
      ctxt->disableSAX = 1;
747
163
    }
748
163
}
749
750
/**
751
 * xmlFatalErrMsgStr:
752
 * @ctxt:  an XML parser context
753
 * @error:  the error number
754
 * @msg:  the error message
755
 * @val:  a string value
756
 *
757
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
758
 */
759
static void LIBXML_ATTR_FORMAT(3,0)
760
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
761
                  const char *msg, const xmlChar * val)
762
28.4k
{
763
28.4k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
764
28.4k
        (ctxt->instate == XML_PARSER_EOF))
765
0
  return;
766
28.4k
    if (ctxt != NULL)
767
28.4k
  ctxt->errNo = error;
768
28.4k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
769
28.4k
                    XML_FROM_PARSER, error, XML_ERR_FATAL,
770
28.4k
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
771
28.4k
                    val);
772
28.4k
    if (ctxt != NULL) {
773
28.4k
  ctxt->wellFormed = 0;
774
28.4k
  if (ctxt->recovery == 0)
775
28.4k
      ctxt->disableSAX = 1;
776
28.4k
    }
777
28.4k
}
778
779
/**
780
 * xmlErrMsgStr:
781
 * @ctxt:  an XML parser context
782
 * @error:  the error number
783
 * @msg:  the error message
784
 * @val:  a string value
785
 *
786
 * Handle a non fatal parser error
787
 */
788
static void LIBXML_ATTR_FORMAT(3,0)
789
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
790
                  const char *msg, const xmlChar * val)
791
1.03k
{
792
1.03k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
793
1.03k
        (ctxt->instate == XML_PARSER_EOF))
794
0
  return;
795
1.03k
    if (ctxt != NULL)
796
1.03k
  ctxt->errNo = error;
797
1.03k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
798
1.03k
                    XML_FROM_PARSER, error, XML_ERR_ERROR,
799
1.03k
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
800
1.03k
                    val);
801
1.03k
}
802
803
/**
804
 * xmlNsErr:
805
 * @ctxt:  an XML parser context
806
 * @error:  the error number
807
 * @msg:  the message
808
 * @info1:  extra information string
809
 * @info2:  extra information string
810
 *
811
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
812
 */
813
static void LIBXML_ATTR_FORMAT(3,0)
814
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
815
         const char *msg,
816
         const xmlChar * info1, const xmlChar * info2,
817
         const xmlChar * info3)
818
69.0k
{
819
69.0k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
820
69.0k
        (ctxt->instate == XML_PARSER_EOF))
821
0
  return;
822
69.0k
    if (ctxt != NULL)
823
69.0k
  ctxt->errNo = error;
824
69.0k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
825
69.0k
                    XML_ERR_ERROR, NULL, 0, (const char *) info1,
826
69.0k
                    (const char *) info2, (const char *) info3, 0, 0, msg,
827
69.0k
                    info1, info2, info3);
828
69.0k
    if (ctxt != NULL)
829
69.0k
  ctxt->nsWellFormed = 0;
830
69.0k
}
831
832
/**
833
 * xmlNsWarn
834
 * @ctxt:  an XML parser context
835
 * @error:  the error number
836
 * @msg:  the message
837
 * @info1:  extra information string
838
 * @info2:  extra information string
839
 *
840
 * Handle a namespace warning error
841
 */
842
static void LIBXML_ATTR_FORMAT(3,0)
843
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
844
         const char *msg,
845
         const xmlChar * info1, const xmlChar * info2,
846
         const xmlChar * info3)
847
10.0k
{
848
10.0k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
849
10.0k
        (ctxt->instate == XML_PARSER_EOF))
850
0
  return;
851
10.0k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
852
10.0k
                    XML_ERR_WARNING, NULL, 0, (const char *) info1,
853
10.0k
                    (const char *) info2, (const char *) info3, 0, 0, msg,
854
10.0k
                    info1, info2, info3);
855
10.0k
}
856
857
/************************************************************************
858
 *                  *
859
 *    Library wide options          *
860
 *                  *
861
 ************************************************************************/
862
863
/**
864
  * xmlHasFeature:
865
  * @feature: the feature to be examined
866
  *
867
  * Examines if the library has been compiled with a given feature.
868
  *
869
  * Returns a non-zero value if the feature exist, otherwise zero.
870
  * Returns zero (0) if the feature does not exist or an unknown
871
  * unknown feature is requested, non-zero otherwise.
872
  */
873
int
874
xmlHasFeature(xmlFeature feature)
875
0
{
876
0
    switch (feature) {
877
0
  case XML_WITH_THREAD:
878
0
#ifdef LIBXML_THREAD_ENABLED
879
0
      return(1);
880
#else
881
      return(0);
882
#endif
883
0
        case XML_WITH_TREE:
884
0
#ifdef LIBXML_TREE_ENABLED
885
0
            return(1);
886
#else
887
            return(0);
888
#endif
889
0
        case XML_WITH_OUTPUT:
890
0
#ifdef LIBXML_OUTPUT_ENABLED
891
0
            return(1);
892
#else
893
            return(0);
894
#endif
895
0
        case XML_WITH_PUSH:
896
0
#ifdef LIBXML_PUSH_ENABLED
897
0
            return(1);
898
#else
899
            return(0);
900
#endif
901
0
        case XML_WITH_READER:
902
0
#ifdef LIBXML_READER_ENABLED
903
0
            return(1);
904
#else
905
            return(0);
906
#endif
907
0
        case XML_WITH_PATTERN:
908
0
#ifdef LIBXML_PATTERN_ENABLED
909
0
            return(1);
910
#else
911
            return(0);
912
#endif
913
0
        case XML_WITH_WRITER:
914
0
#ifdef LIBXML_WRITER_ENABLED
915
0
            return(1);
916
#else
917
            return(0);
918
#endif
919
0
        case XML_WITH_SAX1:
920
0
#ifdef LIBXML_SAX1_ENABLED
921
0
            return(1);
922
#else
923
            return(0);
924
#endif
925
0
        case XML_WITH_FTP:
926
#ifdef LIBXML_FTP_ENABLED
927
            return(1);
928
#else
929
0
            return(0);
930
0
#endif
931
0
        case XML_WITH_HTTP:
932
0
#ifdef LIBXML_HTTP_ENABLED
933
0
            return(1);
934
#else
935
            return(0);
936
#endif
937
0
        case XML_WITH_VALID:
938
0
#ifdef LIBXML_VALID_ENABLED
939
0
            return(1);
940
#else
941
            return(0);
942
#endif
943
0
        case XML_WITH_HTML:
944
0
#ifdef LIBXML_HTML_ENABLED
945
0
            return(1);
946
#else
947
            return(0);
948
#endif
949
0
        case XML_WITH_LEGACY:
950
#ifdef LIBXML_LEGACY_ENABLED
951
            return(1);
952
#else
953
0
            return(0);
954
0
#endif
955
0
        case XML_WITH_C14N:
956
0
#ifdef LIBXML_C14N_ENABLED
957
0
            return(1);
958
#else
959
            return(0);
960
#endif
961
0
        case XML_WITH_CATALOG:
962
0
#ifdef LIBXML_CATALOG_ENABLED
963
0
            return(1);
964
#else
965
            return(0);
966
#endif
967
0
        case XML_WITH_XPATH:
968
0
#ifdef LIBXML_XPATH_ENABLED
969
0
            return(1);
970
#else
971
            return(0);
972
#endif
973
0
        case XML_WITH_XPTR:
974
0
#ifdef LIBXML_XPTR_ENABLED
975
0
            return(1);
976
#else
977
            return(0);
978
#endif
979
0
        case XML_WITH_XINCLUDE:
980
0
#ifdef LIBXML_XINCLUDE_ENABLED
981
0
            return(1);
982
#else
983
            return(0);
984
#endif
985
0
        case XML_WITH_ICONV:
986
0
#ifdef LIBXML_ICONV_ENABLED
987
0
            return(1);
988
#else
989
            return(0);
990
#endif
991
0
        case XML_WITH_ISO8859X:
992
0
#ifdef LIBXML_ISO8859X_ENABLED
993
0
            return(1);
994
#else
995
            return(0);
996
#endif
997
0
        case XML_WITH_UNICODE:
998
0
#ifdef LIBXML_UNICODE_ENABLED
999
0
            return(1);
1000
#else
1001
            return(0);
1002
#endif
1003
0
        case XML_WITH_REGEXP:
1004
0
#ifdef LIBXML_REGEXP_ENABLED
1005
0
            return(1);
1006
#else
1007
            return(0);
1008
#endif
1009
0
        case XML_WITH_AUTOMATA:
1010
0
#ifdef LIBXML_AUTOMATA_ENABLED
1011
0
            return(1);
1012
#else
1013
            return(0);
1014
#endif
1015
0
        case XML_WITH_EXPR:
1016
#ifdef LIBXML_EXPR_ENABLED
1017
            return(1);
1018
#else
1019
0
            return(0);
1020
0
#endif
1021
0
        case XML_WITH_SCHEMAS:
1022
0
#ifdef LIBXML_SCHEMAS_ENABLED
1023
0
            return(1);
1024
#else
1025
            return(0);
1026
#endif
1027
0
        case XML_WITH_SCHEMATRON:
1028
0
#ifdef LIBXML_SCHEMATRON_ENABLED
1029
0
            return(1);
1030
#else
1031
            return(0);
1032
#endif
1033
0
        case XML_WITH_MODULES:
1034
#ifdef LIBXML_MODULES_ENABLED
1035
            return(1);
1036
#else
1037
0
            return(0);
1038
0
#endif
1039
0
        case XML_WITH_DEBUG:
1040
0
#ifdef LIBXML_DEBUG_ENABLED
1041
0
            return(1);
1042
#else
1043
            return(0);
1044
#endif
1045
0
        case XML_WITH_DEBUG_MEM:
1046
#ifdef DEBUG_MEMORY_LOCATION
1047
            return(1);
1048
#else
1049
0
            return(0);
1050
0
#endif
1051
0
        case XML_WITH_DEBUG_RUN:
1052
#ifdef LIBXML_DEBUG_RUNTIME
1053
            return(1);
1054
#else
1055
0
            return(0);
1056
0
#endif
1057
0
        case XML_WITH_ZLIB:
1058
#ifdef LIBXML_ZLIB_ENABLED
1059
            return(1);
1060
#else
1061
0
            return(0);
1062
0
#endif
1063
0
        case XML_WITH_LZMA:
1064
#ifdef LIBXML_LZMA_ENABLED
1065
            return(1);
1066
#else
1067
0
            return(0);
1068
0
#endif
1069
0
        case XML_WITH_ICU:
1070
#ifdef LIBXML_ICU_ENABLED
1071
            return(1);
1072
#else
1073
0
            return(0);
1074
0
#endif
1075
0
        default:
1076
0
      break;
1077
0
     }
1078
0
     return(0);
1079
0
}
1080
1081
/************************************************************************
1082
 *                  *
1083
 *    SAX2 defaulted attributes handling      *
1084
 *                  *
1085
 ************************************************************************/
1086
1087
/**
1088
 * xmlDetectSAX2:
1089
 * @ctxt:  an XML parser context
1090
 *
1091
 * Do the SAX2 detection and specific initialization
1092
 */
1093
static void
1094
22.1k
xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1095
22.1k
    xmlSAXHandlerPtr sax;
1096
1097
    /* Avoid unused variable warning if features are disabled. */
1098
22.1k
    (void) sax;
1099
1100
22.1k
    if (ctxt == NULL) return;
1101
22.1k
    sax = ctxt->sax;
1102
22.1k
#ifdef LIBXML_SAX1_ENABLED
1103
22.1k
    if ((sax) &&  (sax->initialized == XML_SAX2_MAGIC) &&
1104
22.1k
        ((sax->startElementNs != NULL) ||
1105
22.1k
         (sax->endElementNs != NULL) ||
1106
22.1k
         ((sax->startElement == NULL) && (sax->endElement == NULL))))
1107
22.1k
        ctxt->sax2 = 1;
1108
#else
1109
    ctxt->sax2 = 1;
1110
#endif /* LIBXML_SAX1_ENABLED */
1111
1112
22.1k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1113
22.1k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1114
22.1k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1115
22.1k
    if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1116
22.1k
    (ctxt->str_xml_ns == NULL)) {
1117
0
        xmlErrMemory(ctxt, NULL);
1118
0
    }
1119
22.1k
}
1120
1121
typedef struct _xmlDefAttrs xmlDefAttrs;
1122
typedef xmlDefAttrs *xmlDefAttrsPtr;
1123
struct _xmlDefAttrs {
1124
    int nbAttrs;  /* number of defaulted attributes on that element */
1125
    int maxAttrs;       /* the size of the array */
1126
#if __STDC_VERSION__ >= 199901L
1127
    /* Using a C99 flexible array member avoids UBSan errors. */
1128
    const xmlChar *values[]; /* array of localname/prefix/values/external */
1129
#else
1130
    const xmlChar *values[5];
1131
#endif
1132
};
1133
1134
/**
1135
 * xmlAttrNormalizeSpace:
1136
 * @src: the source string
1137
 * @dst: the target string
1138
 *
1139
 * Normalize the space in non CDATA attribute values:
1140
 * If the attribute type is not CDATA, then the XML processor MUST further
1141
 * process the normalized attribute value by discarding any leading and
1142
 * trailing space (#x20) characters, and by replacing sequences of space
1143
 * (#x20) characters by a single space (#x20) character.
1144
 * Note that the size of dst need to be at least src, and if one doesn't need
1145
 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1146
 * passing src as dst is just fine.
1147
 *
1148
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1149
 *         is needed.
1150
 */
1151
static xmlChar *
1152
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1153
21.1k
{
1154
21.1k
    if ((src == NULL) || (dst == NULL))
1155
0
        return(NULL);
1156
1157
21.3k
    while (*src == 0x20) src++;
1158
196M
    while (*src != 0) {
1159
196M
  if (*src == 0x20) {
1160
81.5k
      while (*src == 0x20) src++;
1161
19.4k
      if (*src != 0)
1162
18.5k
    *dst++ = 0x20;
1163
196M
  } else {
1164
196M
      *dst++ = *src++;
1165
196M
  }
1166
196M
    }
1167
21.1k
    *dst = 0;
1168
21.1k
    if (dst == src)
1169
20.0k
       return(NULL);
1170
1.00k
    return(dst);
1171
21.1k
}
1172
1173
/**
1174
 * xmlAttrNormalizeSpace2:
1175
 * @src: the source string
1176
 *
1177
 * Normalize the space in non CDATA attribute values, a slightly more complex
1178
 * front end to avoid allocation problems when running on attribute values
1179
 * coming from the input.
1180
 *
1181
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1182
 *         is needed.
1183
 */
1184
static const xmlChar *
1185
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1186
2.44k
{
1187
2.44k
    int i;
1188
2.44k
    int remove_head = 0;
1189
2.44k
    int need_realloc = 0;
1190
2.44k
    const xmlChar *cur;
1191
1192
2.44k
    if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1193
0
        return(NULL);
1194
2.44k
    i = *len;
1195
2.44k
    if (i <= 0)
1196
512
        return(NULL);
1197
1198
1.93k
    cur = src;
1199
2.63k
    while (*cur == 0x20) {
1200
697
        cur++;
1201
697
  remove_head++;
1202
697
    }
1203
114k
    while (*cur != 0) {
1204
113k
  if (*cur == 0x20) {
1205
1.75k
      cur++;
1206
1.75k
      if ((*cur == 0x20) || (*cur == 0)) {
1207
647
          need_realloc = 1;
1208
647
    break;
1209
647
      }
1210
1.75k
  } else
1211
111k
      cur++;
1212
113k
    }
1213
1.93k
    if (need_realloc) {
1214
647
        xmlChar *ret;
1215
1216
647
  ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1217
647
  if (ret == NULL) {
1218
0
      xmlErrMemory(ctxt, NULL);
1219
0
      return(NULL);
1220
0
  }
1221
647
  xmlAttrNormalizeSpace(ret, ret);
1222
647
  *len = (int) strlen((const char *)ret);
1223
647
        return(ret);
1224
1.29k
    } else if (remove_head) {
1225
321
        *len -= remove_head;
1226
321
        memmove(src, src + remove_head, 1 + *len);
1227
321
  return(src);
1228
321
    }
1229
969
    return(NULL);
1230
1.93k
}
1231
1232
/**
1233
 * xmlAddDefAttrs:
1234
 * @ctxt:  an XML parser context
1235
 * @fullname:  the element fullname
1236
 * @fullattr:  the attribute fullname
1237
 * @value:  the attribute value
1238
 *
1239
 * Add a defaulted attribute for an element
1240
 */
1241
static void
1242
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1243
               const xmlChar *fullname,
1244
               const xmlChar *fullattr,
1245
20.5k
               const xmlChar *value) {
1246
20.5k
    xmlDefAttrsPtr defaults;
1247
20.5k
    int len;
1248
20.5k
    const xmlChar *name;
1249
20.5k
    const xmlChar *prefix;
1250
1251
    /*
1252
     * Allows to detect attribute redefinitions
1253
     */
1254
20.5k
    if (ctxt->attsSpecial != NULL) {
1255
19.3k
        if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1256
3.53k
      return;
1257
19.3k
    }
1258
1259
17.0k
    if (ctxt->attsDefault == NULL) {
1260
1.18k
        ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1261
1.18k
  if (ctxt->attsDefault == NULL)
1262
0
      goto mem_error;
1263
1.18k
    }
1264
1265
    /*
1266
     * split the element name into prefix:localname , the string found
1267
     * are within the DTD and then not associated to namespace names.
1268
     */
1269
17.0k
    name = xmlSplitQName3(fullname, &len);
1270
17.0k
    if (name == NULL) {
1271
7.80k
        name = xmlDictLookup(ctxt->dict, fullname, -1);
1272
7.80k
  prefix = NULL;
1273
9.23k
    } else {
1274
9.23k
        name = xmlDictLookup(ctxt->dict, name, -1);
1275
9.23k
  prefix = xmlDictLookup(ctxt->dict, fullname, len);
1276
9.23k
    }
1277
1278
    /*
1279
     * make sure there is some storage
1280
     */
1281
17.0k
    defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1282
17.0k
    if (defaults == NULL) {
1283
2.50k
        defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1284
2.50k
                     (4 * 5) * sizeof(const xmlChar *));
1285
2.50k
  if (defaults == NULL)
1286
0
      goto mem_error;
1287
2.50k
  defaults->nbAttrs = 0;
1288
2.50k
  defaults->maxAttrs = 4;
1289
2.50k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1290
2.50k
                          defaults, NULL) < 0) {
1291
0
      xmlFree(defaults);
1292
0
      goto mem_error;
1293
0
  }
1294
14.5k
    } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1295
2.03k
        xmlDefAttrsPtr temp;
1296
1297
2.03k
        temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1298
2.03k
           (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1299
2.03k
  if (temp == NULL)
1300
0
      goto mem_error;
1301
2.03k
  defaults = temp;
1302
2.03k
  defaults->maxAttrs *= 2;
1303
2.03k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1304
2.03k
                          defaults, NULL) < 0) {
1305
0
      xmlFree(defaults);
1306
0
      goto mem_error;
1307
0
  }
1308
2.03k
    }
1309
1310
    /*
1311
     * Split the element name into prefix:localname , the string found
1312
     * are within the DTD and hen not associated to namespace names.
1313
     */
1314
17.0k
    name = xmlSplitQName3(fullattr, &len);
1315
17.0k
    if (name == NULL) {
1316
7.30k
        name = xmlDictLookup(ctxt->dict, fullattr, -1);
1317
7.30k
  prefix = NULL;
1318
9.73k
    } else {
1319
9.73k
        name = xmlDictLookup(ctxt->dict, name, -1);
1320
9.73k
  prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1321
9.73k
    }
1322
1323
17.0k
    defaults->values[5 * defaults->nbAttrs] = name;
1324
17.0k
    defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1325
    /* intern the string and precompute the end */
1326
17.0k
    len = xmlStrlen(value);
1327
17.0k
    value = xmlDictLookup(ctxt->dict, value, len);
1328
17.0k
    defaults->values[5 * defaults->nbAttrs + 2] = value;
1329
17.0k
    defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1330
17.0k
    if (ctxt->external)
1331
0
        defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1332
17.0k
    else
1333
17.0k
        defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1334
17.0k
    defaults->nbAttrs++;
1335
1336
17.0k
    return;
1337
1338
0
mem_error:
1339
0
    xmlErrMemory(ctxt, NULL);
1340
0
    return;
1341
17.0k
}
1342
1343
/**
1344
 * xmlAddSpecialAttr:
1345
 * @ctxt:  an XML parser context
1346
 * @fullname:  the element fullname
1347
 * @fullattr:  the attribute fullname
1348
 * @type:  the attribute type
1349
 *
1350
 * Register this attribute type
1351
 */
1352
static void
1353
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1354
      const xmlChar *fullname,
1355
      const xmlChar *fullattr,
1356
      int type)
1357
21.0k
{
1358
21.0k
    if (ctxt->attsSpecial == NULL) {
1359
1.32k
        ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1360
1.32k
  if (ctxt->attsSpecial == NULL)
1361
0
      goto mem_error;
1362
1.32k
    }
1363
1364
21.0k
    if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1365
3.83k
        return;
1366
1367
17.2k
    xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1368
17.2k
                     (void *) (ptrdiff_t) type);
1369
17.2k
    return;
1370
1371
0
mem_error:
1372
0
    xmlErrMemory(ctxt, NULL);
1373
0
    return;
1374
21.0k
}
1375
1376
/**
1377
 * xmlCleanSpecialAttrCallback:
1378
 *
1379
 * Removes CDATA attributes from the special attribute table
1380
 */
1381
static void
1382
xmlCleanSpecialAttrCallback(void *payload, void *data,
1383
                            const xmlChar *fullname, const xmlChar *fullattr,
1384
17.2k
                            const xmlChar *unused ATTRIBUTE_UNUSED) {
1385
17.2k
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1386
1387
17.2k
    if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1388
874
        xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1389
874
    }
1390
17.2k
}
1391
1392
/**
1393
 * xmlCleanSpecialAttr:
1394
 * @ctxt:  an XML parser context
1395
 *
1396
 * Trim the list of attributes defined to remove all those of type
1397
 * CDATA as they are not special. This call should be done when finishing
1398
 * to parse the DTD and before starting to parse the document root.
1399
 */
1400
static void
1401
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1402
4.16k
{
1403
4.16k
    if (ctxt->attsSpecial == NULL)
1404
2.85k
        return;
1405
1406
1.31k
    xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1407
1408
1.31k
    if (xmlHashSize(ctxt->attsSpecial) == 0) {
1409
29
        xmlHashFree(ctxt->attsSpecial, NULL);
1410
29
        ctxt->attsSpecial = NULL;
1411
29
    }
1412
1.31k
    return;
1413
4.16k
}
1414
1415
/**
1416
 * xmlCheckLanguageID:
1417
 * @lang:  pointer to the string value
1418
 *
1419
 * Checks that the value conforms to the LanguageID production:
1420
 *
1421
 * NOTE: this is somewhat deprecated, those productions were removed from
1422
 *       the XML Second edition.
1423
 *
1424
 * [33] LanguageID ::= Langcode ('-' Subcode)*
1425
 * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1426
 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1427
 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1428
 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1429
 * [38] Subcode ::= ([a-z] | [A-Z])+
1430
 *
1431
 * The current REC reference the successors of RFC 1766, currently 5646
1432
 *
1433
 * http://www.rfc-editor.org/rfc/rfc5646.txt
1434
 * langtag       = language
1435
 *                 ["-" script]
1436
 *                 ["-" region]
1437
 *                 *("-" variant)
1438
 *                 *("-" extension)
1439
 *                 ["-" privateuse]
1440
 * language      = 2*3ALPHA            ; shortest ISO 639 code
1441
 *                 ["-" extlang]       ; sometimes followed by
1442
 *                                     ; extended language subtags
1443
 *               / 4ALPHA              ; or reserved for future use
1444
 *               / 5*8ALPHA            ; or registered language subtag
1445
 *
1446
 * extlang       = 3ALPHA              ; selected ISO 639 codes
1447
 *                 *2("-" 3ALPHA)      ; permanently reserved
1448
 *
1449
 * script        = 4ALPHA              ; ISO 15924 code
1450
 *
1451
 * region        = 2ALPHA              ; ISO 3166-1 code
1452
 *               / 3DIGIT              ; UN M.49 code
1453
 *
1454
 * variant       = 5*8alphanum         ; registered variants
1455
 *               / (DIGIT 3alphanum)
1456
 *
1457
 * extension     = singleton 1*("-" (2*8alphanum))
1458
 *
1459
 *                                     ; Single alphanumerics
1460
 *                                     ; "x" reserved for private use
1461
 * singleton     = DIGIT               ; 0 - 9
1462
 *               / %x41-57             ; A - W
1463
 *               / %x59-5A             ; Y - Z
1464
 *               / %x61-77             ; a - w
1465
 *               / %x79-7A             ; y - z
1466
 *
1467
 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1468
 * The parser below doesn't try to cope with extension or privateuse
1469
 * that could be added but that's not interoperable anyway
1470
 *
1471
 * Returns 1 if correct 0 otherwise
1472
 **/
1473
int
1474
xmlCheckLanguageID(const xmlChar * lang)
1475
0
{
1476
0
    const xmlChar *cur = lang, *nxt;
1477
1478
0
    if (cur == NULL)
1479
0
        return (0);
1480
0
    if (((cur[0] == 'i') && (cur[1] == '-')) ||
1481
0
        ((cur[0] == 'I') && (cur[1] == '-')) ||
1482
0
        ((cur[0] == 'x') && (cur[1] == '-')) ||
1483
0
        ((cur[0] == 'X') && (cur[1] == '-'))) {
1484
        /*
1485
         * Still allow IANA code and user code which were coming
1486
         * from the previous version of the XML-1.0 specification
1487
         * it's deprecated but we should not fail
1488
         */
1489
0
        cur += 2;
1490
0
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1491
0
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
1492
0
            cur++;
1493
0
        return(cur[0] == 0);
1494
0
    }
1495
0
    nxt = cur;
1496
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1497
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1498
0
           nxt++;
1499
0
    if (nxt - cur >= 4) {
1500
        /*
1501
         * Reserved
1502
         */
1503
0
        if ((nxt - cur > 8) || (nxt[0] != 0))
1504
0
            return(0);
1505
0
        return(1);
1506
0
    }
1507
0
    if (nxt - cur < 2)
1508
0
        return(0);
1509
    /* we got an ISO 639 code */
1510
0
    if (nxt[0] == 0)
1511
0
        return(1);
1512
0
    if (nxt[0] != '-')
1513
0
        return(0);
1514
1515
0
    nxt++;
1516
0
    cur = nxt;
1517
    /* now we can have extlang or script or region or variant */
1518
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1519
0
        goto region_m49;
1520
1521
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1522
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1523
0
           nxt++;
1524
0
    if (nxt - cur == 4)
1525
0
        goto script;
1526
0
    if (nxt - cur == 2)
1527
0
        goto region;
1528
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1529
0
        goto variant;
1530
0
    if (nxt - cur != 3)
1531
0
        return(0);
1532
    /* we parsed an extlang */
1533
0
    if (nxt[0] == 0)
1534
0
        return(1);
1535
0
    if (nxt[0] != '-')
1536
0
        return(0);
1537
1538
0
    nxt++;
1539
0
    cur = nxt;
1540
    /* now we can have script or region or variant */
1541
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1542
0
        goto region_m49;
1543
1544
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1545
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1546
0
           nxt++;
1547
0
    if (nxt - cur == 2)
1548
0
        goto region;
1549
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1550
0
        goto variant;
1551
0
    if (nxt - cur != 4)
1552
0
        return(0);
1553
    /* we parsed a script */
1554
0
script:
1555
0
    if (nxt[0] == 0)
1556
0
        return(1);
1557
0
    if (nxt[0] != '-')
1558
0
        return(0);
1559
1560
0
    nxt++;
1561
0
    cur = nxt;
1562
    /* now we can have region or variant */
1563
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1564
0
        goto region_m49;
1565
1566
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1567
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1568
0
           nxt++;
1569
1570
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1571
0
        goto variant;
1572
0
    if (nxt - cur != 2)
1573
0
        return(0);
1574
    /* we parsed a region */
1575
0
region:
1576
0
    if (nxt[0] == 0)
1577
0
        return(1);
1578
0
    if (nxt[0] != '-')
1579
0
        return(0);
1580
1581
0
    nxt++;
1582
0
    cur = nxt;
1583
    /* now we can just have a variant */
1584
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1585
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1586
0
           nxt++;
1587
1588
0
    if ((nxt - cur < 5) || (nxt - cur > 8))
1589
0
        return(0);
1590
1591
    /* we parsed a variant */
1592
0
variant:
1593
0
    if (nxt[0] == 0)
1594
0
        return(1);
1595
0
    if (nxt[0] != '-')
1596
0
        return(0);
1597
    /* extensions and private use subtags not checked */
1598
0
    return (1);
1599
1600
0
region_m49:
1601
0
    if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1602
0
        ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1603
0
        nxt += 3;
1604
0
        goto region;
1605
0
    }
1606
0
    return(0);
1607
0
}
1608
1609
/************************************************************************
1610
 *                  *
1611
 *    Parser stacks related functions and macros    *
1612
 *                  *
1613
 ************************************************************************/
1614
1615
static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1616
                                            const xmlChar ** str);
1617
1618
#ifdef SAX2
1619
/**
1620
 * nsPush:
1621
 * @ctxt:  an XML parser context
1622
 * @prefix:  the namespace prefix or NULL
1623
 * @URL:  the namespace name
1624
 *
1625
 * Pushes a new parser namespace on top of the ns stack
1626
 *
1627
 * Returns -1 in case of error, -2 if the namespace should be discarded
1628
 *     and the index in the stack otherwise.
1629
 */
1630
static int
1631
nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1632
18.4k
{
1633
18.4k
    if (ctxt->options & XML_PARSE_NSCLEAN) {
1634
18.4k
        int i;
1635
25.2k
  for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1636
19.1k
      if (ctxt->nsTab[i] == prefix) {
1637
    /* in scope */
1638
12.3k
          if (ctxt->nsTab[i + 1] == URL)
1639
5.14k
        return(-2);
1640
    /* out of scope keep it */
1641
7.20k
    break;
1642
12.3k
      }
1643
19.1k
  }
1644
18.4k
    }
1645
13.3k
    if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1646
3.05k
  ctxt->nsMax = 10;
1647
3.05k
  ctxt->nsNr = 0;
1648
3.05k
  ctxt->nsTab = (const xmlChar **)
1649
3.05k
                xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1650
3.05k
  if (ctxt->nsTab == NULL) {
1651
0
      xmlErrMemory(ctxt, NULL);
1652
0
      ctxt->nsMax = 0;
1653
0
            return (-1);
1654
0
  }
1655
10.2k
    } else if (ctxt->nsNr >= ctxt->nsMax) {
1656
277
        const xmlChar ** tmp;
1657
277
        ctxt->nsMax *= 2;
1658
277
        tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1659
277
            ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1660
277
        if (tmp == NULL) {
1661
0
            xmlErrMemory(ctxt, NULL);
1662
0
      ctxt->nsMax /= 2;
1663
0
            return (-1);
1664
0
        }
1665
277
  ctxt->nsTab = tmp;
1666
277
    }
1667
13.3k
    ctxt->nsTab[ctxt->nsNr++] = prefix;
1668
13.3k
    ctxt->nsTab[ctxt->nsNr++] = URL;
1669
13.3k
    return (ctxt->nsNr);
1670
13.3k
}
1671
/**
1672
 * nsPop:
1673
 * @ctxt: an XML parser context
1674
 * @nr:  the number to pop
1675
 *
1676
 * Pops the top @nr parser prefix/namespace from the ns stack
1677
 *
1678
 * Returns the number of namespaces removed
1679
 */
1680
static int
1681
nsPop(xmlParserCtxtPtr ctxt, int nr)
1682
1.42k
{
1683
1.42k
    int i;
1684
1685
1.42k
    if (ctxt->nsTab == NULL) return(0);
1686
1.42k
    if (ctxt->nsNr < nr) {
1687
0
        xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1688
0
        nr = ctxt->nsNr;
1689
0
    }
1690
1.42k
    if (ctxt->nsNr <= 0)
1691
0
        return (0);
1692
1693
6.64k
    for (i = 0;i < nr;i++) {
1694
5.22k
         ctxt->nsNr--;
1695
5.22k
   ctxt->nsTab[ctxt->nsNr] = NULL;
1696
5.22k
    }
1697
1.42k
    return(nr);
1698
1.42k
}
1699
#endif
1700
1701
static int
1702
4.27k
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1703
4.27k
    const xmlChar **atts;
1704
4.27k
    int *attallocs;
1705
4.27k
    int maxatts;
1706
1707
4.27k
    if (ctxt->atts == NULL) {
1708
3.74k
  maxatts = 55; /* allow for 10 attrs by default */
1709
3.74k
  atts = (const xmlChar **)
1710
3.74k
         xmlMalloc(maxatts * sizeof(xmlChar *));
1711
3.74k
  if (atts == NULL) goto mem_error;
1712
3.74k
  ctxt->atts = atts;
1713
3.74k
  attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1714
3.74k
  if (attallocs == NULL) goto mem_error;
1715
3.74k
  ctxt->attallocs = attallocs;
1716
3.74k
  ctxt->maxatts = maxatts;
1717
3.74k
    } else if (nr + 5 > ctxt->maxatts) {
1718
536
  maxatts = (nr + 5) * 2;
1719
536
  atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1720
536
             maxatts * sizeof(const xmlChar *));
1721
536
  if (atts == NULL) goto mem_error;
1722
536
  ctxt->atts = atts;
1723
536
  attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1724
536
                               (maxatts / 5) * sizeof(int));
1725
536
  if (attallocs == NULL) goto mem_error;
1726
536
  ctxt->attallocs = attallocs;
1727
536
  ctxt->maxatts = maxatts;
1728
536
    }
1729
4.27k
    return(ctxt->maxatts);
1730
0
mem_error:
1731
0
    xmlErrMemory(ctxt, NULL);
1732
0
    return(-1);
1733
4.27k
}
1734
1735
/**
1736
 * inputPush:
1737
 * @ctxt:  an XML parser context
1738
 * @value:  the parser input
1739
 *
1740
 * Pushes a new parser input on top of the input stack
1741
 *
1742
 * Returns -1 in case of error, the index in the stack otherwise
1743
 */
1744
int
1745
inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1746
18.3k
{
1747
18.3k
    if ((ctxt == NULL) || (value == NULL))
1748
0
        return(-1);
1749
18.3k
    if (ctxt->inputNr >= ctxt->inputMax) {
1750
0
        ctxt->inputMax *= 2;
1751
0
        ctxt->inputTab =
1752
0
            (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1753
0
                                             ctxt->inputMax *
1754
0
                                             sizeof(ctxt->inputTab[0]));
1755
0
        if (ctxt->inputTab == NULL) {
1756
0
            xmlErrMemory(ctxt, NULL);
1757
0
      ctxt->inputMax /= 2;
1758
0
            return (-1);
1759
0
        }
1760
0
    }
1761
18.3k
    ctxt->inputTab[ctxt->inputNr] = value;
1762
18.3k
    ctxt->input = value;
1763
18.3k
    return (ctxt->inputNr++);
1764
18.3k
}
1765
/**
1766
 * inputPop:
1767
 * @ctxt: an XML parser context
1768
 *
1769
 * Pops the top parser input from the input stack
1770
 *
1771
 * Returns the input just removed
1772
 */
1773
xmlParserInputPtr
1774
inputPop(xmlParserCtxtPtr ctxt)
1775
54.9k
{
1776
54.9k
    xmlParserInputPtr ret;
1777
1778
54.9k
    if (ctxt == NULL)
1779
0
        return(NULL);
1780
54.9k
    if (ctxt->inputNr <= 0)
1781
36.6k
        return (NULL);
1782
18.3k
    ctxt->inputNr--;
1783
18.3k
    if (ctxt->inputNr > 0)
1784
0
        ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1785
18.3k
    else
1786
18.3k
        ctxt->input = NULL;
1787
18.3k
    ret = ctxt->inputTab[ctxt->inputNr];
1788
18.3k
    ctxt->inputTab[ctxt->inputNr] = NULL;
1789
18.3k
    return (ret);
1790
54.9k
}
1791
/**
1792
 * nodePush:
1793
 * @ctxt:  an XML parser context
1794
 * @value:  the element node
1795
 *
1796
 * Pushes a new element node on top of the node stack
1797
 *
1798
 * Returns -1 in case of error, the index in the stack otherwise
1799
 */
1800
int
1801
nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1802
0
{
1803
0
    if (ctxt == NULL) return(0);
1804
0
    if (ctxt->nodeNr >= ctxt->nodeMax) {
1805
0
        xmlNodePtr *tmp;
1806
1807
0
  tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1808
0
                                      ctxt->nodeMax * 2 *
1809
0
                                      sizeof(ctxt->nodeTab[0]));
1810
0
        if (tmp == NULL) {
1811
0
            xmlErrMemory(ctxt, NULL);
1812
0
            return (-1);
1813
0
        }
1814
0
        ctxt->nodeTab = tmp;
1815
0
  ctxt->nodeMax *= 2;
1816
0
    }
1817
0
    if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1818
0
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1819
0
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1820
0
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1821
0
        xmlParserMaxDepth);
1822
0
  xmlHaltParser(ctxt);
1823
0
  return(-1);
1824
0
    }
1825
0
    ctxt->nodeTab[ctxt->nodeNr] = value;
1826
0
    ctxt->node = value;
1827
0
    return (ctxt->nodeNr++);
1828
0
}
1829
1830
/**
1831
 * nodePop:
1832
 * @ctxt: an XML parser context
1833
 *
1834
 * Pops the top element node from the node stack
1835
 *
1836
 * Returns the node just removed
1837
 */
1838
xmlNodePtr
1839
nodePop(xmlParserCtxtPtr ctxt)
1840
6.34k
{
1841
6.34k
    xmlNodePtr ret;
1842
1843
6.34k
    if (ctxt == NULL) return(NULL);
1844
6.34k
    if (ctxt->nodeNr <= 0)
1845
6.34k
        return (NULL);
1846
0
    ctxt->nodeNr--;
1847
0
    if (ctxt->nodeNr > 0)
1848
0
        ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1849
0
    else
1850
0
        ctxt->node = NULL;
1851
0
    ret = ctxt->nodeTab[ctxt->nodeNr];
1852
0
    ctxt->nodeTab[ctxt->nodeNr] = NULL;
1853
0
    return (ret);
1854
6.34k
}
1855
1856
/**
1857
 * nameNsPush:
1858
 * @ctxt:  an XML parser context
1859
 * @value:  the element name
1860
 * @prefix:  the element prefix
1861
 * @URI:  the element namespace name
1862
 * @line:  the current line number for error messages
1863
 * @nsNr:  the number of namespaces pushed on the namespace table
1864
 *
1865
 * Pushes a new element name/prefix/URL on top of the name stack
1866
 *
1867
 * Returns -1 in case of error, the index in the stack otherwise
1868
 */
1869
static int
1870
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1871
           const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
1872
1.47M
{
1873
1.47M
    xmlStartTag *tag;
1874
1875
1.47M
    if (ctxt->nameNr >= ctxt->nameMax) {
1876
1.46k
        const xmlChar * *tmp;
1877
1.46k
        xmlStartTag *tmp2;
1878
1.46k
        ctxt->nameMax *= 2;
1879
1.46k
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1880
1.46k
                                    ctxt->nameMax *
1881
1.46k
                                    sizeof(ctxt->nameTab[0]));
1882
1.46k
        if (tmp == NULL) {
1883
0
      ctxt->nameMax /= 2;
1884
0
      goto mem_error;
1885
0
        }
1886
1.46k
  ctxt->nameTab = tmp;
1887
1.46k
        tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
1888
1.46k
                                    ctxt->nameMax *
1889
1.46k
                                    sizeof(ctxt->pushTab[0]));
1890
1.46k
        if (tmp2 == NULL) {
1891
0
      ctxt->nameMax /= 2;
1892
0
      goto mem_error;
1893
0
        }
1894
1.46k
  ctxt->pushTab = tmp2;
1895
1.47M
    } else if (ctxt->pushTab == NULL) {
1896
9.99k
        ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
1897
9.99k
                                            sizeof(ctxt->pushTab[0]));
1898
9.99k
        if (ctxt->pushTab == NULL)
1899
0
            goto mem_error;
1900
9.99k
    }
1901
1.47M
    ctxt->nameTab[ctxt->nameNr] = value;
1902
1.47M
    ctxt->name = value;
1903
1.47M
    tag = &ctxt->pushTab[ctxt->nameNr];
1904
1.47M
    tag->prefix = prefix;
1905
1.47M
    tag->URI = URI;
1906
1.47M
    tag->line = line;
1907
1.47M
    tag->nsNr = nsNr;
1908
1.47M
    return (ctxt->nameNr++);
1909
0
mem_error:
1910
0
    xmlErrMemory(ctxt, NULL);
1911
0
    return (-1);
1912
1.47M
}
1913
#ifdef LIBXML_PUSH_ENABLED
1914
/**
1915
 * nameNsPop:
1916
 * @ctxt: an XML parser context
1917
 *
1918
 * Pops the top element/prefix/URI name from the name stack
1919
 *
1920
 * Returns the name just removed
1921
 */
1922
static const xmlChar *
1923
nameNsPop(xmlParserCtxtPtr ctxt)
1924
30.7k
{
1925
30.7k
    const xmlChar *ret;
1926
1927
30.7k
    if (ctxt->nameNr <= 0)
1928
0
        return (NULL);
1929
30.7k
    ctxt->nameNr--;
1930
30.7k
    if (ctxt->nameNr > 0)
1931
29.8k
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1932
870
    else
1933
870
        ctxt->name = NULL;
1934
30.7k
    ret = ctxt->nameTab[ctxt->nameNr];
1935
30.7k
    ctxt->nameTab[ctxt->nameNr] = NULL;
1936
30.7k
    return (ret);
1937
30.7k
}
1938
#endif /* LIBXML_PUSH_ENABLED */
1939
1940
/**
1941
 * namePush:
1942
 * @ctxt:  an XML parser context
1943
 * @value:  the element name
1944
 *
1945
 * Pushes a new element name on top of the name stack
1946
 *
1947
 * Returns -1 in case of error, the index in the stack otherwise
1948
 */
1949
int
1950
namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1951
0
{
1952
0
    if (ctxt == NULL) return (-1);
1953
1954
0
    if (ctxt->nameNr >= ctxt->nameMax) {
1955
0
        const xmlChar * *tmp;
1956
0
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1957
0
                                    ctxt->nameMax * 2 *
1958
0
                                    sizeof(ctxt->nameTab[0]));
1959
0
        if (tmp == NULL) {
1960
0
      goto mem_error;
1961
0
        }
1962
0
  ctxt->nameTab = tmp;
1963
0
        ctxt->nameMax *= 2;
1964
0
    }
1965
0
    ctxt->nameTab[ctxt->nameNr] = value;
1966
0
    ctxt->name = value;
1967
0
    return (ctxt->nameNr++);
1968
0
mem_error:
1969
0
    xmlErrMemory(ctxt, NULL);
1970
0
    return (-1);
1971
0
}
1972
/**
1973
 * namePop:
1974
 * @ctxt: an XML parser context
1975
 *
1976
 * Pops the top element name from the name stack
1977
 *
1978
 * Returns the name just removed
1979
 */
1980
const xmlChar *
1981
namePop(xmlParserCtxtPtr ctxt)
1982
0
{
1983
0
    const xmlChar *ret;
1984
1985
0
    if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1986
0
        return (NULL);
1987
0
    ctxt->nameNr--;
1988
0
    if (ctxt->nameNr > 0)
1989
0
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1990
0
    else
1991
0
        ctxt->name = NULL;
1992
0
    ret = ctxt->nameTab[ctxt->nameNr];
1993
0
    ctxt->nameTab[ctxt->nameNr] = NULL;
1994
0
    return (ret);
1995
0
}
1996
1997
1.73M
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1998
1.73M
    if (ctxt->spaceNr >= ctxt->spaceMax) {
1999
1.49k
        int *tmp;
2000
2001
1.49k
  ctxt->spaceMax *= 2;
2002
1.49k
        tmp = (int *) xmlRealloc(ctxt->spaceTab,
2003
1.49k
                           ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
2004
1.49k
        if (tmp == NULL) {
2005
0
      xmlErrMemory(ctxt, NULL);
2006
0
      ctxt->spaceMax /=2;
2007
0
      return(-1);
2008
0
  }
2009
1.49k
  ctxt->spaceTab = tmp;
2010
1.49k
    }
2011
1.73M
    ctxt->spaceTab[ctxt->spaceNr] = val;
2012
1.73M
    ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
2013
1.73M
    return(ctxt->spaceNr++);
2014
1.73M
}
2015
2016
296k
static int spacePop(xmlParserCtxtPtr ctxt) {
2017
296k
    int ret;
2018
296k
    if (ctxt->spaceNr <= 0) return(0);
2019
296k
    ctxt->spaceNr--;
2020
296k
    if (ctxt->spaceNr > 0)
2021
296k
  ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
2022
0
    else
2023
0
        ctxt->space = &ctxt->spaceTab[0];
2024
296k
    ret = ctxt->spaceTab[ctxt->spaceNr];
2025
296k
    ctxt->spaceTab[ctxt->spaceNr] = -1;
2026
296k
    return(ret);
2027
296k
}
2028
2029
/*
2030
 * Macros for accessing the content. Those should be used only by the parser,
2031
 * and not exported.
2032
 *
2033
 * Dirty macros, i.e. one often need to make assumption on the context to
2034
 * use them
2035
 *
2036
 *   CUR_PTR return the current pointer to the xmlChar to be parsed.
2037
 *           To be used with extreme caution since operations consuming
2038
 *           characters may move the input buffer to a different location !
2039
 *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
2040
 *           This should be used internally by the parser
2041
 *           only to compare to ASCII values otherwise it would break when
2042
 *           running with UTF-8 encoding.
2043
 *   RAW     same as CUR but in the input buffer, bypass any token
2044
 *           extraction that may have been done
2045
 *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
2046
 *           to compare on ASCII based substring.
2047
 *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2048
 *           strings without newlines within the parser.
2049
 *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2050
 *           defined char within the parser.
2051
 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2052
 *
2053
 *   NEXT    Skip to the next character, this does the proper decoding
2054
 *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2055
 *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2056
 *   CUR_CHAR(l) returns the current unicode character (int), set l
2057
 *           to the number of xmlChars used for the encoding [0-5].
2058
 *   CUR_SCHAR  same but operate on a string instead of the context
2059
 *   COPY_BUF  copy the current unicode char to the target buffer, increment
2060
 *            the index
2061
 *   GROW, SHRINK  handling of input buffers
2062
 */
2063
2064
16.8M
#define RAW (*ctxt->input->cur)
2065
8.23M
#define CUR (*ctxt->input->cur)
2066
120M
#define NXT(val) ctxt->input->cur[(val)]
2067
1.41M
#define CUR_PTR ctxt->input->cur
2068
68.4k
#define BASE_PTR ctxt->input->base
2069
2070
#define CMP4( s, c1, c2, c3, c4 ) \
2071
475k
  ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2072
237k
    ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2073
#define CMP5( s, c1, c2, c3, c4, c5 ) \
2074
416k
  ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2075
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2076
284k
  ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2077
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2078
198k
  ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2079
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2080
132k
  ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2081
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2082
43.1k
  ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2083
43.1k
    ((unsigned char *) s)[ 8 ] == c9 )
2084
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2085
2.84k
  ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2086
2.84k
    ((unsigned char *) s)[ 9 ] == c10 )
2087
2088
935k
#define SKIP(val) do {             \
2089
935k
    ctxt->input->cur += (val),ctxt->input->col+=(val);      \
2090
935k
    if (*ctxt->input->cur == 0)           \
2091
935k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);     \
2092
935k
  } while (0)
2093
2094
126k
#define SKIPL(val) do {             \
2095
126k
    int skipl;                \
2096
25.6M
    for(skipl=0; skipl<val; skipl++) {         \
2097
25.5M
  if (*(ctxt->input->cur) == '\n') {       \
2098
15.0k
  ctxt->input->line++; ctxt->input->col = 1;      \
2099
25.5M
  } else ctxt->input->col++;         \
2100
25.5M
  ctxt->input->cur++;           \
2101
25.5M
    }                  \
2102
126k
    if (*ctxt->input->cur == 0)           \
2103
126k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);     \
2104
126k
  } while (0)
2105
2106
4.66M
#define SHRINK if ((ctxt->progressive == 0) &&       \
2107
4.66M
       (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2108
4.66M
       (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2109
4.66M
  xmlSHRINK (ctxt);
2110
2111
7.61k
static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2112
7.61k
    xmlParserInputShrink(ctxt->input);
2113
7.61k
    if (*ctxt->input->cur == 0)
2114
113
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2115
7.61k
}
2116
2117
176M
#define GROW if ((ctxt->progressive == 0) &&       \
2118
176M
     (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2119
176M
  xmlGROW (ctxt);
2120
2121
165k
static void xmlGROW (xmlParserCtxtPtr ctxt) {
2122
165k
    ptrdiff_t curEnd = ctxt->input->end - ctxt->input->cur;
2123
165k
    ptrdiff_t curBase = ctxt->input->cur - ctxt->input->base;
2124
2125
165k
    if (((curEnd > XML_MAX_LOOKUP_LIMIT) ||
2126
165k
         (curBase > XML_MAX_LOOKUP_LIMIT)) &&
2127
165k
         ((ctxt->input->buf) &&
2128
486
          (ctxt->input->buf->readcallback != xmlInputReadCallbackNop)) &&
2129
165k
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2130
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2131
0
        xmlHaltParser(ctxt);
2132
0
  return;
2133
0
    }
2134
165k
    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2135
165k
    if ((ctxt->input->cur > ctxt->input->end) ||
2136
165k
        (ctxt->input->cur < ctxt->input->base)) {
2137
0
        xmlHaltParser(ctxt);
2138
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2139
0
  return;
2140
0
    }
2141
165k
    if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0))
2142
1.70k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2143
165k
}
2144
2145
4.38M
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2146
2147
9.36M
#define NEXT xmlNextChar(ctxt)
2148
2149
1.76M
#define NEXT1 {               \
2150
1.76M
  ctxt->input->col++;           \
2151
1.76M
  ctxt->input->cur++;           \
2152
1.76M
  if (*ctxt->input->cur == 0)         \
2153
1.76M
      xmlParserInputGrow(ctxt->input, INPUT_CHUNK);   \
2154
1.76M
    }
2155
2156
501M
#define NEXTL(l) do {             \
2157
501M
    if (*(ctxt->input->cur) == '\n') {         \
2158
34.2k
  ctxt->input->line++; ctxt->input->col = 1;      \
2159
501M
    } else ctxt->input->col++;           \
2160
501M
    ctxt->input->cur += l;        \
2161
501M
  } while (0)
2162
2163
502M
#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2164
39.0M
#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2165
2166
#define COPY_BUF(l,b,i,v)           \
2167
332M
    if (l == 1) b[i++] = (xmlChar) v;         \
2168
332M
    else i += xmlCopyCharMultiByte(&b[i],v)
2169
2170
#define CUR_CONSUMED \
2171
4.09M
    (ctxt->input->consumed + (ctxt->input->cur - ctxt->input->base))
2172
2173
/**
2174
 * xmlSkipBlankChars:
2175
 * @ctxt:  the XML parser context
2176
 *
2177
 * skip all blanks character found at that point in the input streams.
2178
 * It pops up finished entities in the process if allowable at that point.
2179
 *
2180
 * Returns the number of space chars skipped
2181
 */
2182
2183
int
2184
4.38M
xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2185
4.38M
    int res = 0;
2186
2187
    /*
2188
     * It's Okay to use CUR/NEXT here since all the blanks are on
2189
     * the ASCII range.
2190
     */
2191
4.38M
    if (((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) ||
2192
4.38M
        (ctxt->instate == XML_PARSER_START)) {
2193
4.02M
  const xmlChar *cur;
2194
  /*
2195
   * if we are in the document content, go really fast
2196
   */
2197
4.02M
  cur = ctxt->input->cur;
2198
4.02M
  while (IS_BLANK_CH(*cur)) {
2199
1.67M
      if (*cur == '\n') {
2200
4.37k
    ctxt->input->line++; ctxt->input->col = 1;
2201
1.67M
      } else {
2202
1.67M
    ctxt->input->col++;
2203
1.67M
      }
2204
1.67M
      cur++;
2205
1.67M
      if (res < INT_MAX)
2206
1.67M
    res++;
2207
1.67M
      if (*cur == 0) {
2208
735
    ctxt->input->cur = cur;
2209
735
    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2210
735
    cur = ctxt->input->cur;
2211
735
      }
2212
1.67M
  }
2213
4.02M
  ctxt->input->cur = cur;
2214
4.02M
    } else {
2215
363k
        int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2216
2217
1.72M
  while (1) {
2218
1.72M
            if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2219
1.36M
    NEXT;
2220
1.36M
      } else if (CUR == '%') {
2221
                /*
2222
                 * Need to handle support of entities branching here
2223
                 */
2224
5.76k
          if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2225
5.76k
                    break;
2226
0
          xmlParsePEReference(ctxt);
2227
357k
            } else if (CUR == 0) {
2228
776
                if (ctxt->inputNr <= 1)
2229
776
                    break;
2230
0
                xmlPopInput(ctxt);
2231
356k
            } else {
2232
356k
                break;
2233
356k
            }
2234
2235
            /*
2236
             * Also increase the counter when entering or exiting a PERef.
2237
             * The spec says: "When a parameter-entity reference is recognized
2238
             * in the DTD and included, its replacement text MUST be enlarged
2239
             * by the attachment of one leading and one following space (#x20)
2240
             * character."
2241
             */
2242
1.36M
      if (res < INT_MAX)
2243
1.36M
    res++;
2244
1.36M
        }
2245
363k
    }
2246
4.38M
    return(res);
2247
4.38M
}
2248
2249
/************************************************************************
2250
 *                  *
2251
 *    Commodity functions to handle entities      *
2252
 *                  *
2253
 ************************************************************************/
2254
2255
/**
2256
 * xmlPopInput:
2257
 * @ctxt:  an XML parser context
2258
 *
2259
 * xmlPopInput: the current input pointed by ctxt->input came to an end
2260
 *          pop it and return the next char.
2261
 *
2262
 * Returns the current xmlChar in the parser context
2263
 */
2264
xmlChar
2265
0
xmlPopInput(xmlParserCtxtPtr ctxt) {
2266
0
    if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2267
0
    if (xmlParserDebugEntities)
2268
0
  xmlGenericError(xmlGenericErrorContext,
2269
0
    "Popping input %d\n", ctxt->inputNr);
2270
0
    if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2271
0
        (ctxt->instate != XML_PARSER_EOF))
2272
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2273
0
                    "Unfinished entity outside the DTD");
2274
0
    xmlFreeInputStream(inputPop(ctxt));
2275
0
    if (*ctxt->input->cur == 0)
2276
0
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2277
0
    return(CUR);
2278
0
}
2279
2280
/**
2281
 * xmlPushInput:
2282
 * @ctxt:  an XML parser context
2283
 * @input:  an XML parser input fragment (entity, XML fragment ...).
2284
 *
2285
 * xmlPushInput: switch to a new input stream which is stacked on top
2286
 *               of the previous one(s).
2287
 * Returns -1 in case of error or the index in the input stack
2288
 */
2289
int
2290
0
xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2291
0
    int ret;
2292
0
    if (input == NULL) return(-1);
2293
2294
0
    if (xmlParserDebugEntities) {
2295
0
  if ((ctxt->input != NULL) && (ctxt->input->filename))
2296
0
      xmlGenericError(xmlGenericErrorContext,
2297
0
        "%s(%d): ", ctxt->input->filename,
2298
0
        ctxt->input->line);
2299
0
  xmlGenericError(xmlGenericErrorContext,
2300
0
    "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2301
0
    }
2302
0
    if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2303
0
        (ctxt->inputNr > 1024)) {
2304
0
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2305
0
        while (ctxt->inputNr > 1)
2306
0
            xmlFreeInputStream(inputPop(ctxt));
2307
0
  return(-1);
2308
0
    }
2309
0
    ret = inputPush(ctxt, input);
2310
0
    if (ctxt->instate == XML_PARSER_EOF)
2311
0
        return(-1);
2312
0
    GROW;
2313
0
    return(ret);
2314
0
}
2315
2316
/**
2317
 * xmlParseCharRef:
2318
 * @ctxt:  an XML parser context
2319
 *
2320
 * parse Reference declarations
2321
 *
2322
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2323
 *                  '&#x' [0-9a-fA-F]+ ';'
2324
 *
2325
 * [ WFC: Legal Character ]
2326
 * Characters referred to using character references must match the
2327
 * production for Char.
2328
 *
2329
 * Returns the value parsed (as an int), 0 in case of error
2330
 */
2331
int
2332
379k
xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2333
379k
    int val = 0;
2334
379k
    int count = 0;
2335
2336
    /*
2337
     * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2338
     */
2339
379k
    if ((RAW == '&') && (NXT(1) == '#') &&
2340
379k
        (NXT(2) == 'x')) {
2341
24.8k
  SKIP(3);
2342
24.8k
  GROW;
2343
280k
  while (RAW != ';') { /* loop blocked by count */
2344
264k
      if (count++ > 20) {
2345
11.1k
    count = 0;
2346
11.1k
    GROW;
2347
11.1k
                if (ctxt->instate == XML_PARSER_EOF)
2348
0
                    return(0);
2349
11.1k
      }
2350
264k
      if ((RAW >= '0') && (RAW <= '9'))
2351
161k
          val = val * 16 + (CUR - '0');
2352
103k
      else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2353
43.5k
          val = val * 16 + (CUR - 'a') + 10;
2354
59.8k
      else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2355
51.1k
          val = val * 16 + (CUR - 'A') + 10;
2356
8.74k
      else {
2357
8.74k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2358
8.74k
    val = 0;
2359
8.74k
    break;
2360
8.74k
      }
2361
256k
      if (val > 0x110000)
2362
143k
          val = 0x110000;
2363
2364
256k
      NEXT;
2365
256k
      count++;
2366
256k
  }
2367
24.8k
  if (RAW == ';') {
2368
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2369
16.0k
      ctxt->input->col++;
2370
16.0k
      ctxt->input->cur++;
2371
16.0k
  }
2372
354k
    } else if  ((RAW == '&') && (NXT(1) == '#')) {
2373
354k
  SKIP(2);
2374
354k
  GROW;
2375
1.11M
  while (RAW != ';') { /* loop blocked by count */
2376
971k
      if (count++ > 20) {
2377
13.1k
    count = 0;
2378
13.1k
    GROW;
2379
13.1k
                if (ctxt->instate == XML_PARSER_EOF)
2380
0
                    return(0);
2381
13.1k
      }
2382
971k
      if ((RAW >= '0') && (RAW <= '9'))
2383
762k
          val = val * 10 + (CUR - '0');
2384
209k
      else {
2385
209k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2386
209k
    val = 0;
2387
209k
    break;
2388
209k
      }
2389
762k
      if (val > 0x110000)
2390
5.39k
          val = 0x110000;
2391
2392
762k
      NEXT;
2393
762k
      count++;
2394
762k
  }
2395
354k
  if (RAW == ';') {
2396
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2397
145k
      ctxt->input->col++;
2398
145k
      ctxt->input->cur++;
2399
145k
  }
2400
354k
    } else {
2401
0
        xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2402
0
    }
2403
2404
    /*
2405
     * [ WFC: Legal Character ]
2406
     * Characters referred to using character references must match the
2407
     * production for Char.
2408
     */
2409
379k
    if (val >= 0x110000) {
2410
3.87k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2411
3.87k
                "xmlParseCharRef: character reference out of bounds\n",
2412
3.87k
          val);
2413
375k
    } else if (IS_CHAR(val)) {
2414
137k
        return(val);
2415
238k
    } else {
2416
238k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2417
238k
                          "xmlParseCharRef: invalid xmlChar value %d\n",
2418
238k
                    val);
2419
238k
    }
2420
242k
    return(0);
2421
379k
}
2422
2423
/**
2424
 * xmlParseStringCharRef:
2425
 * @ctxt:  an XML parser context
2426
 * @str:  a pointer to an index in the string
2427
 *
2428
 * parse Reference declarations, variant parsing from a string rather
2429
 * than an an input flow.
2430
 *
2431
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2432
 *                  '&#x' [0-9a-fA-F]+ ';'
2433
 *
2434
 * [ WFC: Legal Character ]
2435
 * Characters referred to using character references must match the
2436
 * production for Char.
2437
 *
2438
 * Returns the value parsed (as an int), 0 in case of error, str will be
2439
 *         updated to the current value of the index
2440
 */
2441
static int
2442
4.95k
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2443
4.95k
    const xmlChar *ptr;
2444
4.95k
    xmlChar cur;
2445
4.95k
    int val = 0;
2446
2447
4.95k
    if ((str == NULL) || (*str == NULL)) return(0);
2448
4.95k
    ptr = *str;
2449
4.95k
    cur = *ptr;
2450
4.95k
    if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2451
2.36k
  ptr += 3;
2452
2.36k
  cur = *ptr;
2453
8.28k
  while (cur != ';') { /* Non input consuming loop */
2454
6.77k
      if ((cur >= '0') && (cur <= '9'))
2455
1.82k
          val = val * 16 + (cur - '0');
2456
4.95k
      else if ((cur >= 'a') && (cur <= 'f'))
2457
2.47k
          val = val * 16 + (cur - 'a') + 10;
2458
2.48k
      else if ((cur >= 'A') && (cur <= 'F'))
2459
1.62k
          val = val * 16 + (cur - 'A') + 10;
2460
852
      else {
2461
852
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2462
852
    val = 0;
2463
852
    break;
2464
852
      }
2465
5.92k
      if (val > 0x110000)
2466
803
          val = 0x110000;
2467
2468
5.92k
      ptr++;
2469
5.92k
      cur = *ptr;
2470
5.92k
  }
2471
2.36k
  if (cur == ';')
2472
1.51k
      ptr++;
2473
2.59k
    } else if  ((cur == '&') && (ptr[1] == '#')){
2474
2.59k
  ptr += 2;
2475
2.59k
  cur = *ptr;
2476
8.87k
  while (cur != ';') { /* Non input consuming loops */
2477
7.06k
      if ((cur >= '0') && (cur <= '9'))
2478
6.27k
          val = val * 10 + (cur - '0');
2479
788
      else {
2480
788
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2481
788
    val = 0;
2482
788
    break;
2483
788
      }
2484
6.27k
      if (val > 0x110000)
2485
1.17k
          val = 0x110000;
2486
2487
6.27k
      ptr++;
2488
6.27k
      cur = *ptr;
2489
6.27k
  }
2490
2.59k
  if (cur == ';')
2491
1.80k
      ptr++;
2492
2.59k
    } else {
2493
0
  xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2494
0
  return(0);
2495
0
    }
2496
4.95k
    *str = ptr;
2497
2498
    /*
2499
     * [ WFC: Legal Character ]
2500
     * Characters referred to using character references must match the
2501
     * production for Char.
2502
     */
2503
4.95k
    if (val >= 0x110000) {
2504
221
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2505
221
                "xmlParseStringCharRef: character reference out of bounds\n",
2506
221
                val);
2507
4.73k
    } else if (IS_CHAR(val)) {
2508
2.42k
        return(val);
2509
2.42k
    } else {
2510
2.31k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2511
2.31k
        "xmlParseStringCharRef: invalid xmlChar value %d\n",
2512
2.31k
        val);
2513
2.31k
    }
2514
2.53k
    return(0);
2515
4.95k
}
2516
2517
/**
2518
 * xmlParserHandlePEReference:
2519
 * @ctxt:  the parser context
2520
 *
2521
 * [69] PEReference ::= '%' Name ';'
2522
 *
2523
 * [ WFC: No Recursion ]
2524
 * A parsed entity must not contain a recursive
2525
 * reference to itself, either directly or indirectly.
2526
 *
2527
 * [ WFC: Entity Declared ]
2528
 * In a document without any DTD, a document with only an internal DTD
2529
 * subset which contains no parameter entity references, or a document
2530
 * with "standalone='yes'", ...  ... The declaration of a parameter
2531
 * entity must precede any reference to it...
2532
 *
2533
 * [ VC: Entity Declared ]
2534
 * In a document with an external subset or external parameter entities
2535
 * with "standalone='no'", ...  ... The declaration of a parameter entity
2536
 * must precede any reference to it...
2537
 *
2538
 * [ WFC: In DTD ]
2539
 * Parameter-entity references may only appear in the DTD.
2540
 * NOTE: misleading but this is handled.
2541
 *
2542
 * A PEReference may have been detected in the current input stream
2543
 * the handling is done accordingly to
2544
 *      http://www.w3.org/TR/REC-xml#entproc
2545
 * i.e.
2546
 *   - Included in literal in entity values
2547
 *   - Included as Parameter Entity reference within DTDs
2548
 */
2549
void
2550
0
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2551
0
    switch(ctxt->instate) {
2552
0
  case XML_PARSER_CDATA_SECTION:
2553
0
      return;
2554
0
        case XML_PARSER_COMMENT:
2555
0
      return;
2556
0
  case XML_PARSER_START_TAG:
2557
0
      return;
2558
0
  case XML_PARSER_END_TAG:
2559
0
      return;
2560
0
        case XML_PARSER_EOF:
2561
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2562
0
      return;
2563
0
        case XML_PARSER_PROLOG:
2564
0
  case XML_PARSER_START:
2565
0
  case XML_PARSER_MISC:
2566
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2567
0
      return;
2568
0
  case XML_PARSER_ENTITY_DECL:
2569
0
        case XML_PARSER_CONTENT:
2570
0
        case XML_PARSER_ATTRIBUTE_VALUE:
2571
0
        case XML_PARSER_PI:
2572
0
  case XML_PARSER_SYSTEM_LITERAL:
2573
0
  case XML_PARSER_PUBLIC_LITERAL:
2574
      /* we just ignore it there */
2575
0
      return;
2576
0
        case XML_PARSER_EPILOG:
2577
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2578
0
      return;
2579
0
  case XML_PARSER_ENTITY_VALUE:
2580
      /*
2581
       * NOTE: in the case of entity values, we don't do the
2582
       *       substitution here since we need the literal
2583
       *       entity value to be able to save the internal
2584
       *       subset of the document.
2585
       *       This will be handled by xmlStringDecodeEntities
2586
       */
2587
0
      return;
2588
0
        case XML_PARSER_DTD:
2589
      /*
2590
       * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2591
       * In the internal DTD subset, parameter-entity references
2592
       * can occur only where markup declarations can occur, not
2593
       * within markup declarations.
2594
       * In that case this is handled in xmlParseMarkupDecl
2595
       */
2596
0
      if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2597
0
    return;
2598
0
      if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2599
0
    return;
2600
0
            break;
2601
0
        case XML_PARSER_IGNORE:
2602
0
            return;
2603
0
    }
2604
2605
0
    xmlParsePEReference(ctxt);
2606
0
}
2607
2608
/*
2609
 * Macro used to grow the current buffer.
2610
 * buffer##_size is expected to be a size_t
2611
 * mem_error: is expected to handle memory allocation failures
2612
 */
2613
10.9k
#define growBuffer(buffer, n) {           \
2614
10.9k
    xmlChar *tmp;             \
2615
10.9k
    size_t new_size = buffer##_size * 2 + n;                            \
2616
10.9k
    if (new_size < buffer##_size) goto mem_error;                       \
2617
10.9k
    tmp = (xmlChar *) xmlRealloc(buffer, new_size);                     \
2618
10.9k
    if (tmp == NULL) goto mem_error;         \
2619
10.9k
    buffer = tmp;             \
2620
10.9k
    buffer##_size = new_size;                                           \
2621
10.9k
}
2622
2623
/**
2624
 * xmlStringLenDecodeEntities:
2625
 * @ctxt:  the parser context
2626
 * @str:  the input string
2627
 * @len: the string length
2628
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2629
 * @end:  an end marker xmlChar, 0 if none
2630
 * @end2:  an end marker xmlChar, 0 if none
2631
 * @end3:  an end marker xmlChar, 0 if none
2632
 *
2633
 * Takes a entity string content and process to do the adequate substitutions.
2634
 *
2635
 * [67] Reference ::= EntityRef | CharRef
2636
 *
2637
 * [69] PEReference ::= '%' Name ';'
2638
 *
2639
 * Returns A newly allocated string with the substitution done. The caller
2640
 *      must deallocate it !
2641
 */
2642
xmlChar *
2643
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2644
7.25k
          int what, xmlChar end, xmlChar  end2, xmlChar end3) {
2645
7.25k
    xmlChar *buffer = NULL;
2646
7.25k
    size_t buffer_size = 0;
2647
7.25k
    size_t nbchars = 0;
2648
2649
7.25k
    xmlChar *current = NULL;
2650
7.25k
    xmlChar *rep = NULL;
2651
7.25k
    const xmlChar *last;
2652
7.25k
    xmlEntityPtr ent;
2653
7.25k
    int c,l;
2654
2655
7.25k
    if ((ctxt == NULL) || (str == NULL) || (len < 0))
2656
0
  return(NULL);
2657
7.25k
    last = str + len;
2658
2659
7.25k
    if (((ctxt->depth > 40) &&
2660
7.25k
         ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2661
7.25k
  (ctxt->depth > 1024)) {
2662
0
  xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2663
0
  return(NULL);
2664
0
    }
2665
2666
    /*
2667
     * allocate a translation buffer.
2668
     */
2669
7.25k
    buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2670
7.25k
    buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2671
7.25k
    if (buffer == NULL) goto mem_error;
2672
2673
    /*
2674
     * OK loop until we reach one of the ending char or a size limit.
2675
     * we are operating on already parsed values.
2676
     */
2677
7.25k
    if (str < last)
2678
5.51k
  c = CUR_SCHAR(str, l);
2679
1.74k
    else
2680
1.74k
        c = 0;
2681
14.6M
    while ((c != 0) && (c != end) && /* non input consuming loop */
2682
14.6M
           (c != end2) && (c != end3) &&
2683
14.6M
           (ctxt->instate != XML_PARSER_EOF)) {
2684
2685
14.6M
  if (c == 0) break;
2686
14.6M
        if ((c == '&') && (str[1] == '#')) {
2687
4.95k
      int val = xmlParseStringCharRef(ctxt, &str);
2688
4.95k
      if (val == 0)
2689
2.53k
                goto int_error;
2690
2.42k
      COPY_BUF(0,buffer,nbchars,val);
2691
2.42k
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2692
414
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2693
414
      }
2694
14.6M
  } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2695
0
      if (xmlParserDebugEntities)
2696
0
    xmlGenericError(xmlGenericErrorContext,
2697
0
      "String decoding Entity Reference: %.30s\n",
2698
0
      str);
2699
0
      ent = xmlParseStringEntityRef(ctxt, &str);
2700
0
      xmlParserEntityCheck(ctxt, 0, ent, 0);
2701
0
      if (ent != NULL)
2702
0
          ctxt->nbentities += ent->checked / 2;
2703
0
      if ((ent != NULL) &&
2704
0
    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2705
0
    if (ent->content != NULL) {
2706
0
        COPY_BUF(0,buffer,nbchars,ent->content[0]);
2707
0
        if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2708
0
      growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2709
0
        }
2710
0
    } else {
2711
0
        xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2712
0
          "predefined entity has no content\n");
2713
0
                    goto int_error;
2714
0
    }
2715
0
      } else if ((ent != NULL) && (ent->content != NULL)) {
2716
0
    ctxt->depth++;
2717
0
    rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2718
0
                            0, 0, 0);
2719
0
    ctxt->depth--;
2720
0
    if (rep == NULL) {
2721
0
                    ent->content[0] = 0;
2722
0
                    goto int_error;
2723
0
                }
2724
2725
0
                current = rep;
2726
0
                while (*current != 0) { /* non input consuming loop */
2727
0
                    buffer[nbchars++] = *current++;
2728
0
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2729
0
                        if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2730
0
                            goto int_error;
2731
0
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2732
0
                    }
2733
0
                }
2734
0
                xmlFree(rep);
2735
0
                rep = NULL;
2736
0
      } else if (ent != NULL) {
2737
0
    int i = xmlStrlen(ent->name);
2738
0
    const xmlChar *cur = ent->name;
2739
2740
0
    buffer[nbchars++] = '&';
2741
0
    if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2742
0
        growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2743
0
    }
2744
0
    for (;i > 0;i--)
2745
0
        buffer[nbchars++] = *cur++;
2746
0
    buffer[nbchars++] = ';';
2747
0
      }
2748
14.6M
  } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2749
0
      if (xmlParserDebugEntities)
2750
0
    xmlGenericError(xmlGenericErrorContext,
2751
0
      "String decoding PE Reference: %.30s\n", str);
2752
0
      ent = xmlParseStringPEReference(ctxt, &str);
2753
0
      xmlParserEntityCheck(ctxt, 0, ent, 0);
2754
0
      if (ent != NULL)
2755
0
          ctxt->nbentities += ent->checked / 2;
2756
0
      if (ent != NULL) {
2757
0
                if (ent->content == NULL) {
2758
        /*
2759
         * Note: external parsed entities will not be loaded,
2760
         * it is not required for a non-validating parser to
2761
         * complete external PEReferences coming from the
2762
         * internal subset
2763
         */
2764
0
        if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2765
0
      ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2766
0
      (ctxt->validate != 0)) {
2767
0
      xmlLoadEntityContent(ctxt, ent);
2768
0
        } else {
2769
0
      xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2770
0
      "not validating will not read content for PE entity %s\n",
2771
0
                          ent->name, NULL);
2772
0
        }
2773
0
    }
2774
0
    ctxt->depth++;
2775
0
    rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2776
0
                            0, 0, 0);
2777
0
    ctxt->depth--;
2778
0
    if (rep == NULL) {
2779
0
                    if (ent->content != NULL)
2780
0
                        ent->content[0] = 0;
2781
0
                    goto int_error;
2782
0
                }
2783
0
                current = rep;
2784
0
                while (*current != 0) { /* non input consuming loop */
2785
0
                    buffer[nbchars++] = *current++;
2786
0
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2787
0
                        if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
2788
0
                            goto int_error;
2789
0
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2790
0
                    }
2791
0
                }
2792
0
                xmlFree(rep);
2793
0
                rep = NULL;
2794
0
      }
2795
14.6M
  } else {
2796
14.6M
      COPY_BUF(l,buffer,nbchars,c);
2797
14.6M
      str += l;
2798
14.6M
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2799
1.69k
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2800
1.69k
      }
2801
14.6M
  }
2802
14.6M
  if (str < last)
2803
14.6M
      c = CUR_SCHAR(str, l);
2804
2.98k
  else
2805
2.98k
      c = 0;
2806
14.6M
    }
2807
4.72k
    buffer[nbchars] = 0;
2808
4.72k
    return(buffer);
2809
2810
0
mem_error:
2811
0
    xmlErrMemory(ctxt, NULL);
2812
2.53k
int_error:
2813
2.53k
    if (rep != NULL)
2814
0
        xmlFree(rep);
2815
2.53k
    if (buffer != NULL)
2816
2.53k
        xmlFree(buffer);
2817
2.53k
    return(NULL);
2818
0
}
2819
2820
/**
2821
 * xmlStringDecodeEntities:
2822
 * @ctxt:  the parser context
2823
 * @str:  the input string
2824
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2825
 * @end:  an end marker xmlChar, 0 if none
2826
 * @end2:  an end marker xmlChar, 0 if none
2827
 * @end3:  an end marker xmlChar, 0 if none
2828
 *
2829
 * Takes a entity string content and process to do the adequate substitutions.
2830
 *
2831
 * [67] Reference ::= EntityRef | CharRef
2832
 *
2833
 * [69] PEReference ::= '%' Name ';'
2834
 *
2835
 * Returns A newly allocated string with the substitution done. The caller
2836
 *      must deallocate it !
2837
 */
2838
xmlChar *
2839
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2840
7.25k
            xmlChar end, xmlChar  end2, xmlChar end3) {
2841
7.25k
    if ((ctxt == NULL) || (str == NULL)) return(NULL);
2842
7.25k
    return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2843
7.25k
           end, end2, end3));
2844
7.25k
}
2845
2846
/************************************************************************
2847
 *                  *
2848
 *    Commodity functions, cleanup needed ?     *
2849
 *                  *
2850
 ************************************************************************/
2851
2852
/**
2853
 * areBlanks:
2854
 * @ctxt:  an XML parser context
2855
 * @str:  a xmlChar *
2856
 * @len:  the size of @str
2857
 * @blank_chars: we know the chars are blanks
2858
 *
2859
 * Is this a sequence of blank chars that one can ignore ?
2860
 *
2861
 * Returns 1 if ignorable 0 otherwise.
2862
 */
2863
2864
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2865
209k
                     int blank_chars) {
2866
209k
    int i, ret;
2867
209k
    xmlNodePtr lastChild;
2868
2869
    /*
2870
     * Don't spend time trying to differentiate them, the same callback is
2871
     * used !
2872
     */
2873
209k
    if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2874
0
  return(0);
2875
2876
    /*
2877
     * Check for xml:space value.
2878
     */
2879
209k
    if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2880
209k
        (*(ctxt->space) == -2))
2881
97.8k
  return(0);
2882
2883
    /*
2884
     * Check that the string is made of blanks
2885
     */
2886
112k
    if (blank_chars == 0) {
2887
106k
  for (i = 0;i < len;i++)
2888
99.9k
      if (!(IS_BLANK_CH(str[i]))) return(0);
2889
47.9k
    }
2890
2891
    /*
2892
     * Look if the element is mixed content in the DTD if available
2893
     */
2894
70.5k
    if (ctxt->node == NULL) return(0);
2895
0
    if (ctxt->myDoc != NULL) {
2896
0
  ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2897
0
        if (ret == 0) return(1);
2898
0
        if (ret == 1) return(0);
2899
0
    }
2900
2901
    /*
2902
     * Otherwise, heuristic :-\
2903
     */
2904
0
    if ((RAW != '<') && (RAW != 0xD)) return(0);
2905
0
    if ((ctxt->node->children == NULL) &&
2906
0
  (RAW == '<') && (NXT(1) == '/')) return(0);
2907
2908
0
    lastChild = xmlGetLastChild(ctxt->node);
2909
0
    if (lastChild == NULL) {
2910
0
        if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2911
0
            (ctxt->node->content != NULL)) return(0);
2912
0
    } else if (xmlNodeIsText(lastChild))
2913
0
        return(0);
2914
0
    else if ((ctxt->node->children != NULL) &&
2915
0
             (xmlNodeIsText(ctxt->node->children)))
2916
0
        return(0);
2917
0
    return(1);
2918
0
}
2919
2920
/************************************************************************
2921
 *                  *
2922
 *    Extra stuff for namespace support     *
2923
 *  Relates to http://www.w3.org/TR/WD-xml-names      *
2924
 *                  *
2925
 ************************************************************************/
2926
2927
/**
2928
 * xmlSplitQName:
2929
 * @ctxt:  an XML parser context
2930
 * @name:  an XML parser context
2931
 * @prefix:  a xmlChar **
2932
 *
2933
 * parse an UTF8 encoded XML qualified name string
2934
 *
2935
 * [NS 5] QName ::= (Prefix ':')? LocalPart
2936
 *
2937
 * [NS 6] Prefix ::= NCName
2938
 *
2939
 * [NS 7] LocalPart ::= NCName
2940
 *
2941
 * Returns the local part, and prefix is updated
2942
 *   to get the Prefix if any.
2943
 */
2944
2945
xmlChar *
2946
0
xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2947
0
    xmlChar buf[XML_MAX_NAMELEN + 5];
2948
0
    xmlChar *buffer = NULL;
2949
0
    int len = 0;
2950
0
    int max = XML_MAX_NAMELEN;
2951
0
    xmlChar *ret = NULL;
2952
0
    const xmlChar *cur = name;
2953
0
    int c;
2954
2955
0
    if (prefix == NULL) return(NULL);
2956
0
    *prefix = NULL;
2957
2958
0
    if (cur == NULL) return(NULL);
2959
2960
#ifndef XML_XML_NAMESPACE
2961
    /* xml: prefix is not really a namespace */
2962
    if ((cur[0] == 'x') && (cur[1] == 'm') &&
2963
        (cur[2] == 'l') && (cur[3] == ':'))
2964
  return(xmlStrdup(name));
2965
#endif
2966
2967
    /* nasty but well=formed */
2968
0
    if (cur[0] == ':')
2969
0
  return(xmlStrdup(name));
2970
2971
0
    c = *cur++;
2972
0
    while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2973
0
  buf[len++] = c;
2974
0
  c = *cur++;
2975
0
    }
2976
0
    if (len >= max) {
2977
  /*
2978
   * Okay someone managed to make a huge name, so he's ready to pay
2979
   * for the processing speed.
2980
   */
2981
0
  max = len * 2;
2982
2983
0
  buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
2984
0
  if (buffer == NULL) {
2985
0
      xmlErrMemory(ctxt, NULL);
2986
0
      return(NULL);
2987
0
  }
2988
0
  memcpy(buffer, buf, len);
2989
0
  while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2990
0
      if (len + 10 > max) {
2991
0
          xmlChar *tmp;
2992
2993
0
    max *= 2;
2994
0
    tmp = (xmlChar *) xmlRealloc(buffer,
2995
0
            max * sizeof(xmlChar));
2996
0
    if (tmp == NULL) {
2997
0
        xmlFree(buffer);
2998
0
        xmlErrMemory(ctxt, NULL);
2999
0
        return(NULL);
3000
0
    }
3001
0
    buffer = tmp;
3002
0
      }
3003
0
      buffer[len++] = c;
3004
0
      c = *cur++;
3005
0
  }
3006
0
  buffer[len] = 0;
3007
0
    }
3008
3009
0
    if ((c == ':') && (*cur == 0)) {
3010
0
        if (buffer != NULL)
3011
0
      xmlFree(buffer);
3012
0
  *prefix = NULL;
3013
0
  return(xmlStrdup(name));
3014
0
    }
3015
3016
0
    if (buffer == NULL)
3017
0
  ret = xmlStrndup(buf, len);
3018
0
    else {
3019
0
  ret = buffer;
3020
0
  buffer = NULL;
3021
0
  max = XML_MAX_NAMELEN;
3022
0
    }
3023
3024
3025
0
    if (c == ':') {
3026
0
  c = *cur;
3027
0
        *prefix = ret;
3028
0
  if (c == 0) {
3029
0
      return(xmlStrndup(BAD_CAST "", 0));
3030
0
  }
3031
0
  len = 0;
3032
3033
  /*
3034
   * Check that the first character is proper to start
3035
   * a new name
3036
   */
3037
0
  if (!(((c >= 0x61) && (c <= 0x7A)) ||
3038
0
        ((c >= 0x41) && (c <= 0x5A)) ||
3039
0
        (c == '_') || (c == ':'))) {
3040
0
      int l;
3041
0
      int first = CUR_SCHAR(cur, l);
3042
3043
0
      if (!IS_LETTER(first) && (first != '_')) {
3044
0
    xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3045
0
          "Name %s is not XML Namespace compliant\n",
3046
0
          name);
3047
0
      }
3048
0
  }
3049
0
  cur++;
3050
3051
0
  while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3052
0
      buf[len++] = c;
3053
0
      c = *cur++;
3054
0
  }
3055
0
  if (len >= max) {
3056
      /*
3057
       * Okay someone managed to make a huge name, so he's ready to pay
3058
       * for the processing speed.
3059
       */
3060
0
      max = len * 2;
3061
3062
0
      buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3063
0
      if (buffer == NULL) {
3064
0
          xmlErrMemory(ctxt, NULL);
3065
0
    return(NULL);
3066
0
      }
3067
0
      memcpy(buffer, buf, len);
3068
0
      while (c != 0) { /* tested bigname2.xml */
3069
0
    if (len + 10 > max) {
3070
0
        xmlChar *tmp;
3071
3072
0
        max *= 2;
3073
0
        tmp = (xmlChar *) xmlRealloc(buffer,
3074
0
                max * sizeof(xmlChar));
3075
0
        if (tmp == NULL) {
3076
0
      xmlErrMemory(ctxt, NULL);
3077
0
      xmlFree(buffer);
3078
0
      return(NULL);
3079
0
        }
3080
0
        buffer = tmp;
3081
0
    }
3082
0
    buffer[len++] = c;
3083
0
    c = *cur++;
3084
0
      }
3085
0
      buffer[len] = 0;
3086
0
  }
3087
3088
0
  if (buffer == NULL)
3089
0
      ret = xmlStrndup(buf, len);
3090
0
  else {
3091
0
      ret = buffer;
3092
0
  }
3093
0
    }
3094
3095
0
    return(ret);
3096
0
}
3097
3098
/************************************************************************
3099
 *                  *
3100
 *      The parser itself       *
3101
 *  Relates to http://www.w3.org/TR/REC-xml       *
3102
 *                  *
3103
 ************************************************************************/
3104
3105
/************************************************************************
3106
 *                  *
3107
 *  Routines to parse Name, NCName and NmToken      *
3108
 *                  *
3109
 ************************************************************************/
3110
#ifdef DEBUG
3111
static unsigned long nbParseName = 0;
3112
static unsigned long nbParseNmToken = 0;
3113
static unsigned long nbParseNCName = 0;
3114
static unsigned long nbParseNCNameComplex = 0;
3115
static unsigned long nbParseNameComplex = 0;
3116
static unsigned long nbParseStringName = 0;
3117
#endif
3118
3119
/*
3120
 * The two following functions are related to the change of accepted
3121
 * characters for Name and NmToken in the Revision 5 of XML-1.0
3122
 * They correspond to the modified production [4] and the new production [4a]
3123
 * changes in that revision. Also note that the macros used for the
3124
 * productions Letter, Digit, CombiningChar and Extender are not needed
3125
 * anymore.
3126
 * We still keep compatibility to pre-revision5 parsing semantic if the
3127
 * new XML_PARSE_OLD10 option is given to the parser.
3128
 */
3129
static int
3130
46.0k
xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3131
46.0k
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3132
        /*
3133
   * Use the new checks of production [4] [4a] amd [5] of the
3134
   * Update 5 of XML-1.0
3135
   */
3136
46.0k
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3137
46.0k
      (((c >= 'a') && (c <= 'z')) ||
3138
45.5k
       ((c >= 'A') && (c <= 'Z')) ||
3139
45.5k
       (c == '_') || (c == ':') ||
3140
45.5k
       ((c >= 0xC0) && (c <= 0xD6)) ||
3141
45.5k
       ((c >= 0xD8) && (c <= 0xF6)) ||
3142
45.5k
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3143
45.5k
       ((c >= 0x370) && (c <= 0x37D)) ||
3144
45.5k
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3145
45.5k
       ((c >= 0x200C) && (c <= 0x200D)) ||
3146
45.5k
       ((c >= 0x2070) && (c <= 0x218F)) ||
3147
45.5k
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3148
45.5k
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3149
45.5k
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3150
45.5k
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3151
45.5k
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3152
43.0k
      return(1);
3153
46.0k
    } else {
3154
0
        if (IS_LETTER(c) || (c == '_') || (c == ':'))
3155
0
      return(1);
3156
0
    }
3157
3.07k
    return(0);
3158
46.0k
}
3159
3160
static int
3161
121M
xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3162
121M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3163
        /*
3164
   * Use the new checks of production [4] [4a] amd [5] of the
3165
   * Update 5 of XML-1.0
3166
   */
3167
121M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3168
121M
      (((c >= 'a') && (c <= 'z')) ||
3169
121M
       ((c >= 'A') && (c <= 'Z')) ||
3170
121M
       ((c >= '0') && (c <= '9')) || /* !start */
3171
121M
       (c == '_') || (c == ':') ||
3172
121M
       (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3173
121M
       ((c >= 0xC0) && (c <= 0xD6)) ||
3174
121M
       ((c >= 0xD8) && (c <= 0xF6)) ||
3175
121M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3176
121M
       ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3177
121M
       ((c >= 0x370) && (c <= 0x37D)) ||
3178
121M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3179
121M
       ((c >= 0x200C) && (c <= 0x200D)) ||
3180
121M
       ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3181
121M
       ((c >= 0x2070) && (c <= 0x218F)) ||
3182
121M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3183
121M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3184
121M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3185
121M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3186
121M
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3187
121M
       return(1);
3188
121M
    } else {
3189
0
        if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3190
0
            (c == '.') || (c == '-') ||
3191
0
      (c == '_') || (c == ':') ||
3192
0
      (IS_COMBINING(c)) ||
3193
0
      (IS_EXTENDER(c)))
3194
0
      return(1);
3195
0
    }
3196
19.5k
    return(0);
3197
121M
}
3198
3199
static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3200
                                          int *len, int *alloc, int normalize);
3201
3202
static const xmlChar *
3203
241k
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3204
241k
    int len = 0, l;
3205
241k
    int c;
3206
241k
    int count = 0;
3207
241k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3208
241k
                    XML_MAX_TEXT_LENGTH :
3209
241k
                    XML_MAX_NAME_LENGTH;
3210
3211
#ifdef DEBUG
3212
    nbParseNameComplex++;
3213
#endif
3214
3215
    /*
3216
     * Handler for more complex cases
3217
     */
3218
241k
    GROW;
3219
241k
    if (ctxt->instate == XML_PARSER_EOF)
3220
0
        return(NULL);
3221
241k
    c = CUR_CHAR(l);
3222
241k
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3223
        /*
3224
   * Use the new checks of production [4] [4a] amd [5] of the
3225
   * Update 5 of XML-1.0
3226
   */
3227
241k
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3228
241k
      (!(((c >= 'a') && (c <= 'z')) ||
3229
227k
         ((c >= 'A') && (c <= 'Z')) ||
3230
227k
         (c == '_') || (c == ':') ||
3231
227k
         ((c >= 0xC0) && (c <= 0xD6)) ||
3232
227k
         ((c >= 0xD8) && (c <= 0xF6)) ||
3233
227k
         ((c >= 0xF8) && (c <= 0x2FF)) ||
3234
227k
         ((c >= 0x370) && (c <= 0x37D)) ||
3235
227k
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
3236
227k
         ((c >= 0x200C) && (c <= 0x200D)) ||
3237
227k
         ((c >= 0x2070) && (c <= 0x218F)) ||
3238
227k
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3239
227k
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
3240
227k
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
3241
227k
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3242
227k
         ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3243
184k
      return(NULL);
3244
184k
  }
3245
56.6k
  len += l;
3246
56.6k
  NEXTL(l);
3247
56.6k
  c = CUR_CHAR(l);
3248
122M
  while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3249
122M
         (((c >= 'a') && (c <= 'z')) ||
3250
122M
          ((c >= 'A') && (c <= 'Z')) ||
3251
122M
          ((c >= '0') && (c <= '9')) || /* !start */
3252
122M
          (c == '_') || (c == ':') ||
3253
122M
          (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3254
122M
          ((c >= 0xC0) && (c <= 0xD6)) ||
3255
122M
          ((c >= 0xD8) && (c <= 0xF6)) ||
3256
122M
          ((c >= 0xF8) && (c <= 0x2FF)) ||
3257
122M
          ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3258
122M
          ((c >= 0x370) && (c <= 0x37D)) ||
3259
122M
          ((c >= 0x37F) && (c <= 0x1FFF)) ||
3260
122M
          ((c >= 0x200C) && (c <= 0x200D)) ||
3261
122M
          ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3262
122M
          ((c >= 0x2070) && (c <= 0x218F)) ||
3263
122M
          ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3264
122M
          ((c >= 0x3001) && (c <= 0xD7FF)) ||
3265
122M
          ((c >= 0xF900) && (c <= 0xFDCF)) ||
3266
122M
          ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3267
122M
          ((c >= 0x10000) && (c <= 0xEFFFF))
3268
122M
    )) {
3269
122M
      if (count++ > XML_PARSER_CHUNK_SIZE) {
3270
1.20M
    count = 0;
3271
1.20M
    GROW;
3272
1.20M
                if (ctxt->instate == XML_PARSER_EOF)
3273
0
                    return(NULL);
3274
1.20M
      }
3275
122M
            if (len <= INT_MAX - l)
3276
122M
          len += l;
3277
122M
      NEXTL(l);
3278
122M
      c = CUR_CHAR(l);
3279
122M
  }
3280
56.6k
    } else {
3281
0
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3282
0
      (!IS_LETTER(c) && (c != '_') &&
3283
0
       (c != ':'))) {
3284
0
      return(NULL);
3285
0
  }
3286
0
  len += l;
3287
0
  NEXTL(l);
3288
0
  c = CUR_CHAR(l);
3289
3290
0
  while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3291
0
         ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3292
0
    (c == '.') || (c == '-') ||
3293
0
    (c == '_') || (c == ':') ||
3294
0
    (IS_COMBINING(c)) ||
3295
0
    (IS_EXTENDER(c)))) {
3296
0
      if (count++ > XML_PARSER_CHUNK_SIZE) {
3297
0
    count = 0;
3298
0
    GROW;
3299
0
                if (ctxt->instate == XML_PARSER_EOF)
3300
0
                    return(NULL);
3301
0
      }
3302
0
            if (len <= INT_MAX - l)
3303
0
          len += l;
3304
0
      NEXTL(l);
3305
0
      c = CUR_CHAR(l);
3306
0
  }
3307
0
    }
3308
56.6k
    if (len > maxLength) {
3309
15
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3310
15
        return(NULL);
3311
15
    }
3312
56.6k
    if (ctxt->input->cur - ctxt->input->base < len) {
3313
        /*
3314
         * There were a couple of bugs where PERefs lead to to a change
3315
         * of the buffer. Check the buffer size to avoid passing an invalid
3316
         * pointer to xmlDictLookup.
3317
         */
3318
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3319
0
                    "unexpected change of input buffer");
3320
0
        return (NULL);
3321
0
    }
3322
56.6k
    if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3323
340
        return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3324
56.3k
    return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3325
56.6k
}
3326
3327
/**
3328
 * xmlParseName:
3329
 * @ctxt:  an XML parser context
3330
 *
3331
 * parse an XML name.
3332
 *
3333
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3334
 *                  CombiningChar | Extender
3335
 *
3336
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3337
 *
3338
 * [6] Names ::= Name (#x20 Name)*
3339
 *
3340
 * Returns the Name parsed or NULL
3341
 */
3342
3343
const xmlChar *
3344
402k
xmlParseName(xmlParserCtxtPtr ctxt) {
3345
402k
    const xmlChar *in;
3346
402k
    const xmlChar *ret;
3347
402k
    size_t count = 0;
3348
402k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3349
402k
                       XML_MAX_TEXT_LENGTH :
3350
402k
                       XML_MAX_NAME_LENGTH;
3351
3352
402k
    GROW;
3353
3354
#ifdef DEBUG
3355
    nbParseName++;
3356
#endif
3357
3358
    /*
3359
     * Accelerator for simple ASCII names
3360
     */
3361
402k
    in = ctxt->input->cur;
3362
402k
    if (((*in >= 0x61) && (*in <= 0x7A)) ||
3363
402k
  ((*in >= 0x41) && (*in <= 0x5A)) ||
3364
402k
  (*in == '_') || (*in == ':')) {
3365
178k
  in++;
3366
723k
  while (((*in >= 0x61) && (*in <= 0x7A)) ||
3367
723k
         ((*in >= 0x41) && (*in <= 0x5A)) ||
3368
723k
         ((*in >= 0x30) && (*in <= 0x39)) ||
3369
723k
         (*in == '_') || (*in == '-') ||
3370
723k
         (*in == ':') || (*in == '.'))
3371
545k
      in++;
3372
178k
  if ((*in > 0) && (*in < 0x80)) {
3373
161k
      count = in - ctxt->input->cur;
3374
161k
            if (count > maxLength) {
3375
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3376
0
                return(NULL);
3377
0
            }
3378
161k
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3379
161k
      ctxt->input->cur = in;
3380
161k
      ctxt->input->col += count;
3381
161k
      if (ret == NULL)
3382
0
          xmlErrMemory(ctxt, NULL);
3383
161k
      return(ret);
3384
161k
  }
3385
178k
    }
3386
    /* accelerator for special cases */
3387
241k
    return(xmlParseNameComplex(ctxt));
3388
402k
}
3389
3390
static const xmlChar *
3391
43.0k
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3392
43.0k
    int len = 0, l;
3393
43.0k
    int c;
3394
43.0k
    int count = 0;
3395
43.0k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3396
43.0k
                    XML_MAX_TEXT_LENGTH :
3397
43.0k
                    XML_MAX_NAME_LENGTH;
3398
43.0k
    size_t startPosition = 0;
3399
3400
#ifdef DEBUG
3401
    nbParseNCNameComplex++;
3402
#endif
3403
3404
    /*
3405
     * Handler for more complex cases
3406
     */
3407
43.0k
    GROW;
3408
43.0k
    startPosition = CUR_PTR - BASE_PTR;
3409
43.0k
    c = CUR_CHAR(l);
3410
43.0k
    if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3411
43.0k
  (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3412
17.5k
  return(NULL);
3413
17.5k
    }
3414
3415
85.4M
    while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3416
85.4M
     (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3417
85.4M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
3418
836k
      count = 0;
3419
836k
      GROW;
3420
836k
            if (ctxt->instate == XML_PARSER_EOF)
3421
0
                return(NULL);
3422
836k
  }
3423
85.4M
        if (len <= INT_MAX - l)
3424
85.4M
      len += l;
3425
85.4M
  NEXTL(l);
3426
85.4M
  c = CUR_CHAR(l);
3427
85.4M
  if (c == 0) {
3428
1.27k
      count = 0;
3429
      /*
3430
       * when shrinking to extend the buffer we really need to preserve
3431
       * the part of the name we already parsed. Hence rolling back
3432
       * by current length.
3433
       */
3434
1.27k
      ctxt->input->cur -= l;
3435
1.27k
      GROW;
3436
1.27k
            if (ctxt->instate == XML_PARSER_EOF)
3437
0
                return(NULL);
3438
1.27k
      ctxt->input->cur += l;
3439
1.27k
      c = CUR_CHAR(l);
3440
1.27k
  }
3441
85.4M
    }
3442
25.4k
    if (len > maxLength) {
3443
12
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3444
12
        return(NULL);
3445
12
    }
3446
25.4k
    return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
3447
25.4k
}
3448
3449
/**
3450
 * xmlParseNCName:
3451
 * @ctxt:  an XML parser context
3452
 * @len:  length of the string parsed
3453
 *
3454
 * parse an XML name.
3455
 *
3456
 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3457
 *                      CombiningChar | Extender
3458
 *
3459
 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3460
 *
3461
 * Returns the Name parsed or NULL
3462
 */
3463
3464
static const xmlChar *
3465
2.50M
xmlParseNCName(xmlParserCtxtPtr ctxt) {
3466
2.50M
    const xmlChar *in, *e;
3467
2.50M
    const xmlChar *ret;
3468
2.50M
    size_t count = 0;
3469
2.50M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3470
2.50M
                       XML_MAX_TEXT_LENGTH :
3471
2.50M
                       XML_MAX_NAME_LENGTH;
3472
3473
#ifdef DEBUG
3474
    nbParseNCName++;
3475
#endif
3476
3477
    /*
3478
     * Accelerator for simple ASCII names
3479
     */
3480
2.50M
    in = ctxt->input->cur;
3481
2.50M
    e = ctxt->input->end;
3482
2.50M
    if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3483
2.50M
   ((*in >= 0x41) && (*in <= 0x5A)) ||
3484
2.50M
   (*in == '_')) && (in < e)) {
3485
2.46M
  in++;
3486
5.59M
  while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3487
5.59M
          ((*in >= 0x41) && (*in <= 0x5A)) ||
3488
5.59M
          ((*in >= 0x30) && (*in <= 0x39)) ||
3489
5.59M
          (*in == '_') || (*in == '-') ||
3490
5.59M
          (*in == '.')) && (in < e))
3491
3.12M
      in++;
3492
2.46M
  if (in >= e)
3493
709
      goto complex;
3494
2.46M
  if ((*in > 0) && (*in < 0x80)) {
3495
2.45M
      count = in - ctxt->input->cur;
3496
2.45M
            if (count > maxLength) {
3497
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3498
0
                return(NULL);
3499
0
            }
3500
2.45M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3501
2.45M
      ctxt->input->cur = in;
3502
2.45M
      ctxt->input->col += count;
3503
2.45M
      if (ret == NULL) {
3504
0
          xmlErrMemory(ctxt, NULL);
3505
0
      }
3506
2.45M
      return(ret);
3507
2.45M
  }
3508
2.46M
    }
3509
43.0k
complex:
3510
43.0k
    return(xmlParseNCNameComplex(ctxt));
3511
2.50M
}
3512
3513
/**
3514
 * xmlParseNameAndCompare:
3515
 * @ctxt:  an XML parser context
3516
 *
3517
 * parse an XML name and compares for match
3518
 * (specialized for endtag parsing)
3519
 *
3520
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3521
 * and the name for mismatch
3522
 */
3523
3524
static const xmlChar *
3525
29.9k
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3526
29.9k
    register const xmlChar *cmp = other;
3527
29.9k
    register const xmlChar *in;
3528
29.9k
    const xmlChar *ret;
3529
3530
29.9k
    GROW;
3531
29.9k
    if (ctxt->instate == XML_PARSER_EOF)
3532
0
        return(NULL);
3533
3534
29.9k
    in = ctxt->input->cur;
3535
174k
    while (*in != 0 && *in == *cmp) {
3536
144k
  ++in;
3537
144k
  ++cmp;
3538
144k
    }
3539
29.9k
    if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3540
  /* success */
3541
29.8k
  ctxt->input->col += in - ctxt->input->cur;
3542
29.8k
  ctxt->input->cur = in;
3543
29.8k
  return (const xmlChar*) 1;
3544
29.8k
    }
3545
    /* failure (or end of input buffer), check with full function */
3546
151
    ret = xmlParseName (ctxt);
3547
    /* strings coming from the dictionary direct compare possible */
3548
151
    if (ret == other) {
3549
18
  return (const xmlChar*) 1;
3550
18
    }
3551
133
    return ret;
3552
151
}
3553
3554
/**
3555
 * xmlParseStringName:
3556
 * @ctxt:  an XML parser context
3557
 * @str:  a pointer to the string pointer (IN/OUT)
3558
 *
3559
 * parse an XML name.
3560
 *
3561
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3562
 *                  CombiningChar | Extender
3563
 *
3564
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3565
 *
3566
 * [6] Names ::= Name (#x20 Name)*
3567
 *
3568
 * Returns the Name parsed or NULL. The @str pointer
3569
 * is updated to the current location in the string.
3570
 */
3571
3572
static xmlChar *
3573
4.12k
xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3574
4.12k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3575
4.12k
    const xmlChar *cur = *str;
3576
4.12k
    int len = 0, l;
3577
4.12k
    int c;
3578
4.12k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3579
4.12k
                    XML_MAX_TEXT_LENGTH :
3580
4.12k
                    XML_MAX_NAME_LENGTH;
3581
3582
#ifdef DEBUG
3583
    nbParseStringName++;
3584
#endif
3585
3586
4.12k
    c = CUR_SCHAR(cur, l);
3587
4.12k
    if (!xmlIsNameStartChar(ctxt, c)) {
3588
1.26k
  return(NULL);
3589
1.26k
    }
3590
3591
2.85k
    COPY_BUF(l,buf,len,c);
3592
2.85k
    cur += l;
3593
2.85k
    c = CUR_SCHAR(cur, l);
3594
45.6k
    while (xmlIsNameChar(ctxt, c)) {
3595
43.5k
  COPY_BUF(l,buf,len,c);
3596
43.5k
  cur += l;
3597
43.5k
  c = CUR_SCHAR(cur, l);
3598
43.5k
  if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3599
      /*
3600
       * Okay someone managed to make a huge name, so he's ready to pay
3601
       * for the processing speed.
3602
       */
3603
693
      xmlChar *buffer;
3604
693
      int max = len * 2;
3605
3606
693
      buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3607
693
      if (buffer == NULL) {
3608
0
          xmlErrMemory(ctxt, NULL);
3609
0
    return(NULL);
3610
0
      }
3611
693
      memcpy(buffer, buf, len);
3612
24.2M
      while (xmlIsNameChar(ctxt, c)) {
3613
24.2M
    if (len + 10 > max) {
3614
2.86k
        xmlChar *tmp;
3615
3616
2.86k
        max *= 2;
3617
2.86k
        tmp = (xmlChar *) xmlRealloc(buffer,
3618
2.86k
                                  max * sizeof(xmlChar));
3619
2.86k
        if (tmp == NULL) {
3620
0
      xmlErrMemory(ctxt, NULL);
3621
0
      xmlFree(buffer);
3622
0
      return(NULL);
3623
0
        }
3624
2.86k
        buffer = tmp;
3625
2.86k
    }
3626
24.2M
    COPY_BUF(l,buffer,len,c);
3627
24.2M
    cur += l;
3628
24.2M
    c = CUR_SCHAR(cur, l);
3629
24.2M
                if (len > maxLength) {
3630
2
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3631
2
                    xmlFree(buffer);
3632
2
                    return(NULL);
3633
2
                }
3634
24.2M
      }
3635
691
      buffer[len] = 0;
3636
691
      *str = cur;
3637
691
      return(buffer);
3638
693
  }
3639
43.5k
    }
3640
2.16k
    if (len > maxLength) {
3641
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3642
0
        return(NULL);
3643
0
    }
3644
2.16k
    *str = cur;
3645
2.16k
    return(xmlStrndup(buf, len));
3646
2.16k
}
3647
3648
/**
3649
 * xmlParseNmtoken:
3650
 * @ctxt:  an XML parser context
3651
 *
3652
 * parse an XML Nmtoken.
3653
 *
3654
 * [7] Nmtoken ::= (NameChar)+
3655
 *
3656
 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3657
 *
3658
 * Returns the Nmtoken parsed or NULL
3659
 */
3660
3661
xmlChar *
3662
5.63k
xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3663
5.63k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3664
5.63k
    int len = 0, l;
3665
5.63k
    int c;
3666
5.63k
    int count = 0;
3667
5.63k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3668
5.63k
                    XML_MAX_TEXT_LENGTH :
3669
5.63k
                    XML_MAX_NAME_LENGTH;
3670
3671
#ifdef DEBUG
3672
    nbParseNmToken++;
3673
#endif
3674
3675
5.63k
    GROW;
3676
5.63k
    if (ctxt->instate == XML_PARSER_EOF)
3677
0
        return(NULL);
3678
5.63k
    c = CUR_CHAR(l);
3679
3680
38.7k
    while (xmlIsNameChar(ctxt, c)) {
3681
33.7k
  if (count++ > XML_PARSER_CHUNK_SIZE) {
3682
0
      count = 0;
3683
0
      GROW;
3684
0
  }
3685
33.7k
  COPY_BUF(l,buf,len,c);
3686
33.7k
  NEXTL(l);
3687
33.7k
  c = CUR_CHAR(l);
3688
33.7k
  if (c == 0) {
3689
60
      count = 0;
3690
60
      GROW;
3691
60
      if (ctxt->instate == XML_PARSER_EOF)
3692
0
    return(NULL);
3693
60
            c = CUR_CHAR(l);
3694
60
  }
3695
33.7k
  if (len >= XML_MAX_NAMELEN) {
3696
      /*
3697
       * Okay someone managed to make a huge token, so he's ready to pay
3698
       * for the processing speed.
3699
       */
3700
625
      xmlChar *buffer;
3701
625
      int max = len * 2;
3702
3703
625
      buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
3704
625
      if (buffer == NULL) {
3705
0
          xmlErrMemory(ctxt, NULL);
3706
0
    return(NULL);
3707
0
      }
3708
625
      memcpy(buffer, buf, len);
3709
11.3M
      while (xmlIsNameChar(ctxt, c)) {
3710
11.3M
    if (count++ > XML_PARSER_CHUNK_SIZE) {
3711
111k
        count = 0;
3712
111k
        GROW;
3713
111k
                    if (ctxt->instate == XML_PARSER_EOF) {
3714
0
                        xmlFree(buffer);
3715
0
                        return(NULL);
3716
0
                    }
3717
111k
    }
3718
11.3M
    if (len + 10 > max) {
3719
853
        xmlChar *tmp;
3720
3721
853
        max *= 2;
3722
853
        tmp = (xmlChar *) xmlRealloc(buffer,
3723
853
                                  max * sizeof(xmlChar));
3724
853
        if (tmp == NULL) {
3725
0
      xmlErrMemory(ctxt, NULL);
3726
0
      xmlFree(buffer);
3727
0
      return(NULL);
3728
0
        }
3729
853
        buffer = tmp;
3730
853
    }
3731
11.3M
    COPY_BUF(l,buffer,len,c);
3732
11.3M
    NEXTL(l);
3733
11.3M
    c = CUR_CHAR(l);
3734
11.3M
                if (len > maxLength) {
3735
2
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3736
2
                    xmlFree(buffer);
3737
2
                    return(NULL);
3738
2
                }
3739
11.3M
      }
3740
623
      buffer[len] = 0;
3741
623
      return(buffer);
3742
625
  }
3743
33.7k
    }
3744
5.00k
    if (len == 0)
3745
1.33k
        return(NULL);
3746
3.67k
    if (len > maxLength) {
3747
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3748
0
        return(NULL);
3749
0
    }
3750
3.67k
    return(xmlStrndup(buf, len));
3751
3.67k
}
3752
3753
/**
3754
 * xmlParseEntityValue:
3755
 * @ctxt:  an XML parser context
3756
 * @orig:  if non-NULL store a copy of the original entity value
3757
 *
3758
 * parse a value for ENTITY declarations
3759
 *
3760
 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3761
 *                 "'" ([^%&'] | PEReference | Reference)* "'"
3762
 *
3763
 * Returns the EntityValue parsed with reference substituted or NULL
3764
 */
3765
3766
xmlChar *
3767
10.0k
xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3768
10.0k
    xmlChar *buf = NULL;
3769
10.0k
    int len = 0;
3770
10.0k
    int size = XML_PARSER_BUFFER_SIZE;
3771
10.0k
    int c, l;
3772
10.0k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3773
10.0k
                    XML_MAX_HUGE_LENGTH :
3774
10.0k
                    XML_MAX_TEXT_LENGTH;
3775
10.0k
    xmlChar stop;
3776
10.0k
    xmlChar *ret = NULL;
3777
10.0k
    const xmlChar *cur = NULL;
3778
10.0k
    xmlParserInputPtr input;
3779
3780
10.0k
    if (RAW == '"') stop = '"';
3781
5.01k
    else if (RAW == '\'') stop = '\'';
3782
0
    else {
3783
0
  xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3784
0
  return(NULL);
3785
0
    }
3786
10.0k
    buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
3787
10.0k
    if (buf == NULL) {
3788
0
  xmlErrMemory(ctxt, NULL);
3789
0
  return(NULL);
3790
0
    }
3791
3792
    /*
3793
     * The content of the entity definition is copied in a buffer.
3794
     */
3795
3796
10.0k
    ctxt->instate = XML_PARSER_ENTITY_VALUE;
3797
10.0k
    input = ctxt->input;
3798
10.0k
    GROW;
3799
10.0k
    if (ctxt->instate == XML_PARSER_EOF)
3800
0
        goto error;
3801
10.0k
    NEXT;
3802
10.0k
    c = CUR_CHAR(l);
3803
    /*
3804
     * NOTE: 4.4.5 Included in Literal
3805
     * When a parameter entity reference appears in a literal entity
3806
     * value, ... a single or double quote character in the replacement
3807
     * text is always treated as a normal data character and will not
3808
     * terminate the literal.
3809
     * In practice it means we stop the loop only when back at parsing
3810
     * the initial entity and the quote is found
3811
     */
3812
45.1M
    while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3813
45.1M
      (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3814
45.0M
  if (len + 5 >= size) {
3815
3.38k
      xmlChar *tmp;
3816
3817
3.38k
      size *= 2;
3818
3.38k
      tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3819
3.38k
      if (tmp == NULL) {
3820
0
    xmlErrMemory(ctxt, NULL);
3821
0
                goto error;
3822
0
      }
3823
3.38k
      buf = tmp;
3824
3.38k
  }
3825
45.0M
  COPY_BUF(l,buf,len,c);
3826
45.0M
  NEXTL(l);
3827
3828
45.0M
  GROW;
3829
45.0M
  c = CUR_CHAR(l);
3830
45.0M
  if (c == 0) {
3831
32
      GROW;
3832
32
      c = CUR_CHAR(l);
3833
32
  }
3834
3835
45.0M
        if (len > maxLength) {
3836
0
            xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
3837
0
                           "entity value too long\n");
3838
0
            goto error;
3839
0
        }
3840
45.0M
    }
3841
10.0k
    buf[len] = 0;
3842
10.0k
    if (ctxt->instate == XML_PARSER_EOF)
3843
0
        goto error;
3844
10.0k
    if (c != stop) {
3845
60
        xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3846
60
        goto error;
3847
60
    }
3848
9.94k
    NEXT;
3849
3850
    /*
3851
     * Raise problem w.r.t. '&' and '%' being used in non-entities
3852
     * reference constructs. Note Charref will be handled in
3853
     * xmlStringDecodeEntities()
3854
     */
3855
9.94k
    cur = buf;
3856
8.03M
    while (*cur != 0) { /* non input consuming */
3857
8.02M
  if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3858
4.12k
      xmlChar *name;
3859
4.12k
      xmlChar tmp = *cur;
3860
4.12k
            int nameOk = 0;
3861
3862
4.12k
      cur++;
3863
4.12k
      name = xmlParseStringName(ctxt, &cur);
3864
4.12k
            if (name != NULL) {
3865
2.85k
                nameOk = 1;
3866
2.85k
                xmlFree(name);
3867
2.85k
            }
3868
4.12k
            if ((nameOk == 0) || (*cur != ';')) {
3869
2.46k
    xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3870
2.46k
      "EntityValue: '%c' forbidden except for entities references\n",
3871
2.46k
                            tmp);
3872
2.46k
                goto error;
3873
2.46k
      }
3874
1.66k
      if ((tmp == '%') && (ctxt->inSubset == 1) &&
3875
1.66k
    (ctxt->inputNr == 1)) {
3876
225
    xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3877
225
                goto error;
3878
225
      }
3879
1.43k
      if (*cur == 0)
3880
0
          break;
3881
1.43k
  }
3882
8.02M
  cur++;
3883
8.02M
    }
3884
3885
    /*
3886
     * Then PEReference entities are substituted.
3887
     *
3888
     * NOTE: 4.4.7 Bypassed
3889
     * When a general entity reference appears in the EntityValue in
3890
     * an entity declaration, it is bypassed and left as is.
3891
     * so XML_SUBSTITUTE_REF is not set here.
3892
     */
3893
7.25k
    ++ctxt->depth;
3894
7.25k
    ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3895
7.25k
                                  0, 0, 0);
3896
7.25k
    --ctxt->depth;
3897
7.25k
    if (orig != NULL) {
3898
7.25k
        *orig = buf;
3899
7.25k
        buf = NULL;
3900
7.25k
    }
3901
3902
10.0k
error:
3903
10.0k
    if (buf != NULL)
3904
2.74k
        xmlFree(buf);
3905
10.0k
    return(ret);
3906
7.25k
}
3907
3908
/**
3909
 * xmlParseAttValueComplex:
3910
 * @ctxt:  an XML parser context
3911
 * @len:   the resulting attribute len
3912
 * @normalize:  whether to apply the inner normalization
3913
 *
3914
 * parse a value for an attribute, this is the fallback function
3915
 * of xmlParseAttValue() when the attribute parsing requires handling
3916
 * of non-ASCII characters, or normalization compaction.
3917
 *
3918
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3919
 */
3920
static xmlChar *
3921
156k
xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3922
156k
    xmlChar limit = 0;
3923
156k
    xmlChar *buf = NULL;
3924
156k
    xmlChar *rep = NULL;
3925
156k
    size_t len = 0;
3926
156k
    size_t buf_size = 0;
3927
156k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3928
156k
                       XML_MAX_HUGE_LENGTH :
3929
156k
                       XML_MAX_TEXT_LENGTH;
3930
156k
    int c, l, in_space = 0;
3931
156k
    xmlChar *current = NULL;
3932
156k
    xmlEntityPtr ent;
3933
3934
156k
    if (NXT(0) == '"') {
3935
153k
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3936
153k
  limit = '"';
3937
153k
        NEXT;
3938
153k
    } else if (NXT(0) == '\'') {
3939
2.89k
  limit = '\'';
3940
2.89k
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3941
2.89k
        NEXT;
3942
2.89k
    } else {
3943
0
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3944
0
  return(NULL);
3945
0
    }
3946
3947
    /*
3948
     * allocate a translation buffer.
3949
     */
3950
156k
    buf_size = XML_PARSER_BUFFER_SIZE;
3951
156k
    buf = (xmlChar *) xmlMallocAtomic(buf_size);
3952
156k
    if (buf == NULL) goto mem_error;
3953
3954
    /*
3955
     * OK loop until we reach one of the ending char or a size limit.
3956
     */
3957
156k
    c = CUR_CHAR(l);
3958
117M
    while (((NXT(0) != limit) && /* checked */
3959
117M
            (IS_CHAR(c)) && (c != '<')) &&
3960
117M
            (ctxt->instate != XML_PARSER_EOF)) {
3961
117M
  if (c == '&') {
3962
652k
      in_space = 0;
3963
652k
      if (NXT(1) == '#') {
3964
378k
    int val = xmlParseCharRef(ctxt);
3965
3966
378k
    if (val == '&') {
3967
7.57k
        if (ctxt->replaceEntities) {
3968
0
      if (len + 10 > buf_size) {
3969
0
          growBuffer(buf, 10);
3970
0
      }
3971
0
      buf[len++] = '&';
3972
7.57k
        } else {
3973
      /*
3974
       * The reparsing will be done in xmlStringGetNodeList()
3975
       * called by the attribute() function in SAX.c
3976
       */
3977
7.57k
      if (len + 10 > buf_size) {
3978
432
          growBuffer(buf, 10);
3979
432
      }
3980
7.57k
      buf[len++] = '&';
3981
7.57k
      buf[len++] = '#';
3982
7.57k
      buf[len++] = '3';
3983
7.57k
      buf[len++] = '8';
3984
7.57k
      buf[len++] = ';';
3985
7.57k
        }
3986
371k
    } else if (val != 0) {
3987
129k
        if (len + 10 > buf_size) {
3988
446
      growBuffer(buf, 10);
3989
446
        }
3990
129k
        len += xmlCopyChar(0, &buf[len], val);
3991
129k
    }
3992
378k
      } else {
3993
273k
    ent = xmlParseEntityRef(ctxt);
3994
273k
    ctxt->nbentities++;
3995
273k
    if (ent != NULL)
3996
44.3k
        ctxt->nbentities += ent->owner;
3997
273k
    if ((ent != NULL) &&
3998
273k
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3999
30.2k
        if (len + 10 > buf_size) {
4000
672
      growBuffer(buf, 10);
4001
672
        }
4002
30.2k
        if ((ctxt->replaceEntities == 0) &&
4003
30.2k
            (ent->content[0] == '&')) {
4004
18.6k
      buf[len++] = '&';
4005
18.6k
      buf[len++] = '#';
4006
18.6k
      buf[len++] = '3';
4007
18.6k
      buf[len++] = '8';
4008
18.6k
      buf[len++] = ';';
4009
18.6k
        } else {
4010
11.5k
      buf[len++] = ent->content[0];
4011
11.5k
        }
4012
243k
    } else if ((ent != NULL) &&
4013
243k
               (ctxt->replaceEntities != 0)) {
4014
0
        if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4015
0
      ++ctxt->depth;
4016
0
      rep = xmlStringDecodeEntities(ctxt, ent->content,
4017
0
                  XML_SUBSTITUTE_REF,
4018
0
                  0, 0, 0);
4019
0
      --ctxt->depth;
4020
0
      if (rep != NULL) {
4021
0
          current = rep;
4022
0
          while (*current != 0) { /* non input consuming */
4023
0
                                if ((*current == 0xD) || (*current == 0xA) ||
4024
0
                                    (*current == 0x9)) {
4025
0
                                    buf[len++] = 0x20;
4026
0
                                    current++;
4027
0
                                } else
4028
0
                                    buf[len++] = *current++;
4029
0
        if (len + 10 > buf_size) {
4030
0
            growBuffer(buf, 10);
4031
0
        }
4032
0
          }
4033
0
          xmlFree(rep);
4034
0
          rep = NULL;
4035
0
      }
4036
0
        } else {
4037
0
      if (len + 10 > buf_size) {
4038
0
          growBuffer(buf, 10);
4039
0
      }
4040
0
      if (ent->content != NULL)
4041
0
          buf[len++] = ent->content[0];
4042
0
        }
4043
243k
    } else if (ent != NULL) {
4044
14.1k
        int i = xmlStrlen(ent->name);
4045
14.1k
        const xmlChar *cur = ent->name;
4046
4047
        /*
4048
         * This may look absurd but is needed to detect
4049
         * entities problems
4050
         */
4051
14.1k
        if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4052
14.1k
      (ent->content != NULL) && (ent->checked == 0)) {
4053
0
      unsigned long oldnbent = ctxt->nbentities, diff;
4054
4055
0
      ++ctxt->depth;
4056
0
      rep = xmlStringDecodeEntities(ctxt, ent->content,
4057
0
              XML_SUBSTITUTE_REF, 0, 0, 0);
4058
0
      --ctxt->depth;
4059
4060
0
                        diff = ctxt->nbentities - oldnbent + 1;
4061
0
                        if (diff > INT_MAX / 2)
4062
0
                            diff = INT_MAX / 2;
4063
0
                        ent->checked = diff * 2;
4064
0
      if (rep != NULL) {
4065
0
          if (xmlStrchr(rep, '<'))
4066
0
              ent->checked |= 1;
4067
0
          xmlFree(rep);
4068
0
          rep = NULL;
4069
0
      } else {
4070
0
                            ent->content[0] = 0;
4071
0
                        }
4072
0
        }
4073
4074
        /*
4075
         * Just output the reference
4076
         */
4077
14.1k
        buf[len++] = '&';
4078
14.2k
        while (len + i + 10 > buf_size) {
4079
218
      growBuffer(buf, i + 10);
4080
218
        }
4081
14.1k
        for (;i > 0;i--)
4082
0
      buf[len++] = *cur++;
4083
14.1k
        buf[len++] = ';';
4084
14.1k
    }
4085
273k
      }
4086
116M
  } else {
4087
116M
      if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4088
1.40M
          if ((len != 0) || (!normalize)) {
4089
1.40M
        if ((!normalize) || (!in_space)) {
4090
1.39M
      COPY_BUF(l,buf,len,0x20);
4091
1.39M
      while (len + 10 > buf_size) {
4092
1.08k
          growBuffer(buf, 10);
4093
1.08k
      }
4094
1.39M
        }
4095
1.40M
        in_space = 1;
4096
1.40M
    }
4097
115M
      } else {
4098
115M
          in_space = 0;
4099
115M
    COPY_BUF(l,buf,len,c);
4100
115M
    if (len + 10 > buf_size) {
4101
16.8k
        growBuffer(buf, 10);
4102
16.8k
    }
4103
115M
      }
4104
116M
      NEXTL(l);
4105
116M
  }
4106
117M
  GROW;
4107
117M
  c = CUR_CHAR(l);
4108
117M
        if (len > maxLength) {
4109
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4110
0
                           "AttValue length too long\n");
4111
0
            goto mem_error;
4112
0
        }
4113
117M
    }
4114
156k
    if (ctxt->instate == XML_PARSER_EOF)
4115
0
        goto error;
4116
4117
156k
    if ((in_space) && (normalize)) {
4118
2.61k
        while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4119
894
    }
4120
156k
    buf[len] = 0;
4121
156k
    if (RAW == '<') {
4122
833
  xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4123
155k
    } else if (RAW != limit) {
4124
3.31k
  if ((c != 0) && (!IS_CHAR(c))) {
4125
114
      xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4126
114
         "invalid character in attribute value\n");
4127
3.20k
  } else {
4128
3.20k
      xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4129
3.20k
         "AttValue: ' expected\n");
4130
3.20k
        }
4131
3.31k
    } else
4132
152k
  NEXT;
4133
4134
156k
    if (attlen != NULL) *attlen = (int) len;
4135
156k
    return(buf);
4136
4137
0
mem_error:
4138
0
    xmlErrMemory(ctxt, NULL);
4139
0
error:
4140
0
    if (buf != NULL)
4141
0
        xmlFree(buf);
4142
0
    if (rep != NULL)
4143
0
        xmlFree(rep);
4144
0
    return(NULL);
4145
0
}
4146
4147
/**
4148
 * xmlParseAttValue:
4149
 * @ctxt:  an XML parser context
4150
 *
4151
 * parse a value for an attribute
4152
 * Note: the parser won't do substitution of entities here, this
4153
 * will be handled later in xmlStringGetNodeList
4154
 *
4155
 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4156
 *                   "'" ([^<&'] | Reference)* "'"
4157
 *
4158
 * 3.3.3 Attribute-Value Normalization:
4159
 * Before the value of an attribute is passed to the application or
4160
 * checked for validity, the XML processor must normalize it as follows:
4161
 * - a character reference is processed by appending the referenced
4162
 *   character to the attribute value
4163
 * - an entity reference is processed by recursively processing the
4164
 *   replacement text of the entity
4165
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4166
 *   appending #x20 to the normalized value, except that only a single
4167
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4168
 *   parsed entity or the literal entity value of an internal parsed entity
4169
 * - other characters are processed by appending them to the normalized value
4170
 * If the declared value is not CDATA, then the XML processor must further
4171
 * process the normalized attribute value by discarding any leading and
4172
 * trailing space (#x20) characters, and by replacing sequences of space
4173
 * (#x20) characters by a single space (#x20) character.
4174
 * All attributes for which no declaration has been read should be treated
4175
 * by a non-validating parser as if declared CDATA.
4176
 *
4177
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4178
 */
4179
4180
4181
xmlChar *
4182
22.3k
xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4183
22.3k
    if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4184
22.3k
    return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4185
22.3k
}
4186
4187
/**
4188
 * xmlParseSystemLiteral:
4189
 * @ctxt:  an XML parser context
4190
 *
4191
 * parse an XML Literal
4192
 *
4193
 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4194
 *
4195
 * Returns the SystemLiteral parsed or NULL
4196
 */
4197
4198
xmlChar *
4199
4.70k
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4200
4.70k
    xmlChar *buf = NULL;
4201
4.70k
    int len = 0;
4202
4.70k
    int size = XML_PARSER_BUFFER_SIZE;
4203
4.70k
    int cur, l;
4204
4.70k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4205
4.70k
                    XML_MAX_TEXT_LENGTH :
4206
4.70k
                    XML_MAX_NAME_LENGTH;
4207
4.70k
    xmlChar stop;
4208
4.70k
    int state = ctxt->instate;
4209
4.70k
    int count = 0;
4210
4211
4.70k
    SHRINK;
4212
4.70k
    if (RAW == '"') {
4213
1.66k
        NEXT;
4214
1.66k
  stop = '"';
4215
3.04k
    } else if (RAW == '\'') {
4216
1.34k
        NEXT;
4217
1.34k
  stop = '\'';
4218
1.70k
    } else {
4219
1.70k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4220
1.70k
  return(NULL);
4221
1.70k
    }
4222
4223
3.00k
    buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4224
3.00k
    if (buf == NULL) {
4225
0
        xmlErrMemory(ctxt, NULL);
4226
0
  return(NULL);
4227
0
    }
4228
3.00k
    ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4229
3.00k
    cur = CUR_CHAR(l);
4230
29.4M
    while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4231
29.4M
  if (len + 5 >= size) {
4232
1.92k
      xmlChar *tmp;
4233
4234
1.92k
      size *= 2;
4235
1.92k
      tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4236
1.92k
      if (tmp == NULL) {
4237
0
          xmlFree(buf);
4238
0
    xmlErrMemory(ctxt, NULL);
4239
0
    ctxt->instate = (xmlParserInputState) state;
4240
0
    return(NULL);
4241
0
      }
4242
1.92k
      buf = tmp;
4243
1.92k
  }
4244
29.4M
  count++;
4245
29.4M
  if (count > 50) {
4246
577k
      SHRINK;
4247
577k
      GROW;
4248
577k
      count = 0;
4249
577k
            if (ctxt->instate == XML_PARSER_EOF) {
4250
0
          xmlFree(buf);
4251
0
    return(NULL);
4252
0
            }
4253
577k
  }
4254
29.4M
  COPY_BUF(l,buf,len,cur);
4255
29.4M
  NEXTL(l);
4256
29.4M
  cur = CUR_CHAR(l);
4257
29.4M
  if (cur == 0) {
4258
169
      GROW;
4259
169
      SHRINK;
4260
169
      cur = CUR_CHAR(l);
4261
169
  }
4262
29.4M
        if (len > maxLength) {
4263
2
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4264
2
            xmlFree(buf);
4265
2
            ctxt->instate = (xmlParserInputState) state;
4266
2
            return(NULL);
4267
2
        }
4268
29.4M
    }
4269
3.00k
    buf[len] = 0;
4270
3.00k
    ctxt->instate = (xmlParserInputState) state;
4271
3.00k
    if (!IS_CHAR(cur)) {
4272
244
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4273
2.75k
    } else {
4274
2.75k
  NEXT;
4275
2.75k
    }
4276
3.00k
    return(buf);
4277
3.00k
}
4278
4279
/**
4280
 * xmlParsePubidLiteral:
4281
 * @ctxt:  an XML parser context
4282
 *
4283
 * parse an XML public literal
4284
 *
4285
 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4286
 *
4287
 * Returns the PubidLiteral parsed or NULL.
4288
 */
4289
4290
xmlChar *
4291
3.80k
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4292
3.80k
    xmlChar *buf = NULL;
4293
3.80k
    int len = 0;
4294
3.80k
    int size = XML_PARSER_BUFFER_SIZE;
4295
3.80k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4296
3.80k
                    XML_MAX_TEXT_LENGTH :
4297
3.80k
                    XML_MAX_NAME_LENGTH;
4298
3.80k
    xmlChar cur;
4299
3.80k
    xmlChar stop;
4300
3.80k
    int count = 0;
4301
3.80k
    xmlParserInputState oldstate = ctxt->instate;
4302
4303
3.80k
    SHRINK;
4304
3.80k
    if (RAW == '"') {
4305
2.17k
        NEXT;
4306
2.17k
  stop = '"';
4307
2.17k
    } else if (RAW == '\'') {
4308
1.04k
        NEXT;
4309
1.04k
  stop = '\'';
4310
1.04k
    } else {
4311
586
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4312
586
  return(NULL);
4313
586
    }
4314
3.22k
    buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4315
3.22k
    if (buf == NULL) {
4316
0
  xmlErrMemory(ctxt, NULL);
4317
0
  return(NULL);
4318
0
    }
4319
3.22k
    ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4320
3.22k
    cur = CUR;
4321
60.4k
    while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4322
57.1k
  if (len + 1 >= size) {
4323
194
      xmlChar *tmp;
4324
4325
194
      size *= 2;
4326
194
      tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4327
194
      if (tmp == NULL) {
4328
0
    xmlErrMemory(ctxt, NULL);
4329
0
    xmlFree(buf);
4330
0
    return(NULL);
4331
0
      }
4332
194
      buf = tmp;
4333
194
  }
4334
57.1k
  buf[len++] = cur;
4335
57.1k
  count++;
4336
57.1k
  if (count > 50) {
4337
937
      SHRINK;
4338
937
      GROW;
4339
937
      count = 0;
4340
937
            if (ctxt->instate == XML_PARSER_EOF) {
4341
0
    xmlFree(buf);
4342
0
    return(NULL);
4343
0
            }
4344
937
  }
4345
57.1k
  NEXT;
4346
57.1k
  cur = CUR;
4347
57.1k
  if (cur == 0) {
4348
86
      GROW;
4349
86
      SHRINK;
4350
86
      cur = CUR;
4351
86
  }
4352
57.1k
        if (len > maxLength) {
4353
0
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4354
0
            xmlFree(buf);
4355
0
            return(NULL);
4356
0
        }
4357
57.1k
    }
4358
3.22k
    buf[len] = 0;
4359
3.22k
    if (cur != stop) {
4360
720
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4361
2.50k
    } else {
4362
2.50k
  NEXT;
4363
2.50k
    }
4364
3.22k
    ctxt->instate = oldstate;
4365
3.22k
    return(buf);
4366
3.22k
}
4367
4368
static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
4369
4370
/*
4371
 * used for the test in the inner loop of the char data testing
4372
 */
4373
static const unsigned char test_char_data[256] = {
4374
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4375
    0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4376
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4377
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4378
    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4379
    0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4380
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4381
    0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4382
    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4383
    0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4384
    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4385
    0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4386
    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4387
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4388
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4389
    0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4390
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4391
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4392
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4393
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4394
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4395
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4396
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4397
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4398
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4399
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4400
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4401
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4402
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4403
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4404
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4405
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4406
};
4407
4408
/**
4409
 * xmlParseCharData:
4410
 * @ctxt:  an XML parser context
4411
 * @cdata:  int indicating whether we are within a CDATA section
4412
 *
4413
 * parse a CharData section.
4414
 * if we are within a CDATA section ']]>' marks an end of section.
4415
 *
4416
 * The right angle bracket (>) may be represented using the string "&gt;",
4417
 * and must, for compatibility, be escaped using "&gt;" or a character
4418
 * reference when it appears in the string "]]>" in content, when that
4419
 * string is not marking the end of a CDATA section.
4420
 *
4421
 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4422
 */
4423
4424
void
4425
356k
xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
4426
356k
    const xmlChar *in;
4427
356k
    int nbchar = 0;
4428
356k
    int line = ctxt->input->line;
4429
356k
    int col = ctxt->input->col;
4430
356k
    int ccol;
4431
4432
356k
    SHRINK;
4433
356k
    GROW;
4434
    /*
4435
     * Accelerated common case where input don't need to be
4436
     * modified before passing it to the handler.
4437
     */
4438
356k
    if (!cdata) {
4439
356k
  in = ctxt->input->cur;
4440
372k
  do {
4441
466k
get_more_space:
4442
478k
      while (*in == 0x20) { in++; ctxt->input->col++; }
4443
466k
      if (*in == 0xA) {
4444
130k
    do {
4445
130k
        ctxt->input->line++; ctxt->input->col = 1;
4446
130k
        in++;
4447
130k
    } while (*in == 0xA);
4448
93.2k
    goto get_more_space;
4449
93.2k
      }
4450
372k
      if (*in == '<') {
4451
74.8k
    nbchar = in - ctxt->input->cur;
4452
74.8k
    if (nbchar > 0) {
4453
74.8k
        const xmlChar *tmp = ctxt->input->cur;
4454
74.8k
        ctxt->input->cur = in;
4455
4456
74.8k
        if ((ctxt->sax != NULL) &&
4457
74.8k
            (ctxt->sax->ignorableWhitespace !=
4458
74.8k
             ctxt->sax->characters)) {
4459
74.8k
      if (areBlanks(ctxt, tmp, nbchar, 1)) {
4460
0
          if (ctxt->sax->ignorableWhitespace != NULL)
4461
0
        ctxt->sax->ignorableWhitespace(ctxt->userData,
4462
0
                   tmp, nbchar);
4463
74.8k
      } else {
4464
74.8k
          if (ctxt->sax->characters != NULL)
4465
74.8k
        ctxt->sax->characters(ctxt->userData,
4466
74.8k
                  tmp, nbchar);
4467
74.8k
          if (*ctxt->space == -1)
4468
64.1k
              *ctxt->space = -2;
4469
74.8k
      }
4470
74.8k
        } else if ((ctxt->sax != NULL) &&
4471
0
                   (ctxt->sax->characters != NULL)) {
4472
0
      ctxt->sax->characters(ctxt->userData,
4473
0
                tmp, nbchar);
4474
0
        }
4475
74.8k
    }
4476
74.8k
    return;
4477
74.8k
      }
4478
4479
465k
get_more:
4480
465k
            ccol = ctxt->input->col;
4481
4.24M
      while (test_char_data[*in]) {
4482
3.78M
    in++;
4483
3.78M
    ccol++;
4484
3.78M
      }
4485
465k
      ctxt->input->col = ccol;
4486
465k
      if (*in == 0xA) {
4487
101k
    do {
4488
101k
        ctxt->input->line++; ctxt->input->col = 1;
4489
101k
        in++;
4490
101k
    } while (*in == 0xA);
4491
100k
    goto get_more;
4492
100k
      }
4493
365k
      if (*in == ']') {
4494
67.1k
    if ((in[1] == ']') && (in[2] == '>')) {
4495
1
        xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4496
1
        ctxt->input->cur = in + 1;
4497
1
        return;
4498
1
    }
4499
67.1k
    in++;
4500
67.1k
    ctxt->input->col++;
4501
67.1k
    goto get_more;
4502
67.1k
      }
4503
297k
      nbchar = in - ctxt->input->cur;
4504
297k
      if (nbchar > 0) {
4505
247k
    if ((ctxt->sax != NULL) &&
4506
247k
        (ctxt->sax->ignorableWhitespace !=
4507
247k
         ctxt->sax->characters) &&
4508
247k
        (IS_BLANK_CH(*ctxt->input->cur))) {
4509
24.9k
        const xmlChar *tmp = ctxt->input->cur;
4510
24.9k
        ctxt->input->cur = in;
4511
4512
24.9k
        if (areBlanks(ctxt, tmp, nbchar, 0)) {
4513
0
            if (ctxt->sax->ignorableWhitespace != NULL)
4514
0
          ctxt->sax->ignorableWhitespace(ctxt->userData,
4515
0
                 tmp, nbchar);
4516
24.9k
        } else {
4517
24.9k
            if (ctxt->sax->characters != NULL)
4518
24.9k
          ctxt->sax->characters(ctxt->userData,
4519
24.9k
              tmp, nbchar);
4520
24.9k
      if (*ctxt->space == -1)
4521
13.2k
          *ctxt->space = -2;
4522
24.9k
        }
4523
24.9k
                    line = ctxt->input->line;
4524
24.9k
                    col = ctxt->input->col;
4525
222k
    } else if (ctxt->sax != NULL) {
4526
222k
        if (ctxt->sax->characters != NULL)
4527
222k
      ctxt->sax->characters(ctxt->userData,
4528
222k
                ctxt->input->cur, nbchar);
4529
222k
                    line = ctxt->input->line;
4530
222k
                    col = ctxt->input->col;
4531
222k
    }
4532
                /* something really bad happened in the SAX callback */
4533
247k
                if (ctxt->instate != XML_PARSER_CONTENT)
4534
0
                    return;
4535
247k
      }
4536
297k
      ctxt->input->cur = in;
4537
297k
      if (*in == 0xD) {
4538
47.1k
    in++;
4539
47.1k
    if (*in == 0xA) {
4540
16.0k
        ctxt->input->cur = in;
4541
16.0k
        in++;
4542
16.0k
        ctxt->input->line++; ctxt->input->col = 1;
4543
16.0k
        continue; /* while */
4544
16.0k
    }
4545
31.0k
    in--;
4546
31.0k
      }
4547
281k
      if (*in == '<') {
4548
238k
    return;
4549
238k
      }
4550
43.9k
      if (*in == '&') {
4551
890
    return;
4552
890
      }
4553
43.0k
      SHRINK;
4554
43.0k
      GROW;
4555
43.0k
            if (ctxt->instate == XML_PARSER_EOF)
4556
0
    return;
4557
43.0k
      in = ctxt->input->cur;
4558
59.0k
  } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
4559
43.0k
  nbchar = 0;
4560
43.0k
    }
4561
43.0k
    ctxt->input->line = line;
4562
43.0k
    ctxt->input->col = col;
4563
43.0k
    xmlParseCharDataComplex(ctxt, cdata);
4564
43.0k
}
4565
4566
/**
4567
 * xmlParseCharDataComplex:
4568
 * @ctxt:  an XML parser context
4569
 * @cdata:  int indicating whether we are within a CDATA section
4570
 *
4571
 * parse a CharData section.this is the fallback function
4572
 * of xmlParseCharData() when the parsing requires handling
4573
 * of non-ASCII characters.
4574
 */
4575
static void
4576
43.0k
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
4577
43.0k
    xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4578
43.0k
    int nbchar = 0;
4579
43.0k
    int cur, l;
4580
43.0k
    int count = 0;
4581
4582
43.0k
    SHRINK;
4583
43.0k
    GROW;
4584
43.0k
    cur = CUR_CHAR(l);
4585
8.31M
    while ((cur != '<') && /* checked */
4586
8.31M
           (cur != '&') &&
4587
8.31M
     (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4588
8.27M
  if ((cur == ']') && (NXT(1) == ']') &&
4589
8.27M
      (NXT(2) == '>')) {
4590
223
      if (cdata) break;
4591
223
      else {
4592
223
    xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4593
223
      }
4594
223
  }
4595
8.27M
  COPY_BUF(l,buf,nbchar,cur);
4596
  /* move current position before possible calling of ctxt->sax->characters */
4597
8.27M
  NEXTL(l);
4598
8.27M
  cur = CUR_CHAR(l);
4599
8.27M
  if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4600
68.2k
      buf[nbchar] = 0;
4601
4602
      /*
4603
       * OK the segment is to be consumed as chars.
4604
       */
4605
68.2k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4606
68.0k
    if (areBlanks(ctxt, buf, nbchar, 0)) {
4607
0
        if (ctxt->sax->ignorableWhitespace != NULL)
4608
0
      ctxt->sax->ignorableWhitespace(ctxt->userData,
4609
0
                                     buf, nbchar);
4610
68.0k
    } else {
4611
68.0k
        if (ctxt->sax->characters != NULL)
4612
68.0k
      ctxt->sax->characters(ctxt->userData, buf, nbchar);
4613
68.0k
        if ((ctxt->sax->characters !=
4614
68.0k
             ctxt->sax->ignorableWhitespace) &&
4615
68.0k
      (*ctxt->space == -1))
4616
1.24k
      *ctxt->space = -2;
4617
68.0k
    }
4618
68.0k
      }
4619
68.2k
      nbchar = 0;
4620
            /* something really bad happened in the SAX callback */
4621
68.2k
            if (ctxt->instate != XML_PARSER_CONTENT)
4622
0
                return;
4623
68.2k
  }
4624
8.27M
  count++;
4625
8.27M
  if (count > 50) {
4626
153k
      SHRINK;
4627
153k
      GROW;
4628
153k
      count = 0;
4629
153k
            if (ctxt->instate == XML_PARSER_EOF)
4630
0
    return;
4631
153k
  }
4632
8.27M
    }
4633
43.0k
    if (nbchar != 0) {
4634
42.1k
        buf[nbchar] = 0;
4635
  /*
4636
   * OK the segment is to be consumed as chars.
4637
   */
4638
42.1k
  if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4639
42.0k
      if (areBlanks(ctxt, buf, nbchar, 0)) {
4640
0
    if (ctxt->sax->ignorableWhitespace != NULL)
4641
0
        ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4642
42.0k
      } else {
4643
42.0k
    if (ctxt->sax->characters != NULL)
4644
42.0k
        ctxt->sax->characters(ctxt->userData, buf, nbchar);
4645
42.0k
    if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4646
42.0k
        (*ctxt->space == -1))
4647
33.4k
        *ctxt->space = -2;
4648
42.0k
      }
4649
42.0k
  }
4650
42.1k
    }
4651
43.0k
    if ((cur != 0) && (!IS_CHAR(cur))) {
4652
  /* Generate the error and skip the offending character */
4653
77
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4654
77
                          "PCDATA invalid Char value %d\n",
4655
77
                    cur);
4656
77
  NEXTL(l);
4657
77
    }
4658
43.0k
}
4659
4660
/**
4661
 * xmlParseExternalID:
4662
 * @ctxt:  an XML parser context
4663
 * @publicID:  a xmlChar** receiving PubidLiteral
4664
 * @strict: indicate whether we should restrict parsing to only
4665
 *          production [75], see NOTE below
4666
 *
4667
 * Parse an External ID or a Public ID
4668
 *
4669
 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4670
 *       'PUBLIC' S PubidLiteral S SystemLiteral
4671
 *
4672
 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4673
 *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
4674
 *
4675
 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4676
 *
4677
 * Returns the function returns SystemLiteral and in the second
4678
 *                case publicID receives PubidLiteral, is strict is off
4679
 *                it is possible to return NULL and have publicID set.
4680
 */
4681
4682
xmlChar *
4683
11.8k
xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4684
11.8k
    xmlChar *URI = NULL;
4685
4686
11.8k
    SHRINK;
4687
4688
11.8k
    *publicID = NULL;
4689
11.8k
    if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4690
1.34k
        SKIP(6);
4691
1.34k
  if (SKIP_BLANKS == 0) {
4692
322
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4693
322
                     "Space required after 'SYSTEM'\n");
4694
322
  }
4695
1.34k
  URI = xmlParseSystemLiteral(ctxt);
4696
1.34k
  if (URI == NULL) {
4697
85
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4698
85
        }
4699
10.5k
    } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4700
3.80k
        SKIP(6);
4701
3.80k
  if (SKIP_BLANKS == 0) {
4702
2.12k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4703
2.12k
        "Space required after 'PUBLIC'\n");
4704
2.12k
  }
4705
3.80k
  *publicID = xmlParsePubidLiteral(ctxt);
4706
3.80k
  if (*publicID == NULL) {
4707
586
      xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4708
586
  }
4709
3.80k
  if (strict) {
4710
      /*
4711
       * We don't handle [83] so "S SystemLiteral" is required.
4712
       */
4713
3.08k
      if (SKIP_BLANKS == 0) {
4714
2.15k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4715
2.15k
      "Space required after the Public Identifier\n");
4716
2.15k
      }
4717
3.08k
  } else {
4718
      /*
4719
       * We handle [83] so we return immediately, if
4720
       * "S SystemLiteral" is not detected. We skip blanks if no
4721
             * system literal was found, but this is harmless since we must
4722
             * be at the end of a NotationDecl.
4723
       */
4724
722
      if (SKIP_BLANKS == 0) return(NULL);
4725
531
      if ((CUR != '\'') && (CUR != '"')) return(NULL);
4726
531
  }
4727
3.36k
  URI = xmlParseSystemLiteral(ctxt);
4728
3.36k
  if (URI == NULL) {
4729
1.62k
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4730
1.62k
        }
4731
3.36k
    }
4732
11.4k
    return(URI);
4733
11.8k
}
4734
4735
/**
4736
 * xmlParseCommentComplex:
4737
 * @ctxt:  an XML parser context
4738
 * @buf:  the already parsed part of the buffer
4739
 * @len:  number of bytes in the buffer
4740
 * @size:  allocated size of the buffer
4741
 *
4742
 * Skip an XML (SGML) comment <!-- .... -->
4743
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4744
 *  must not occur within comments. "
4745
 * This is the slow routine in case the accelerator for ascii didn't work
4746
 *
4747
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4748
 */
4749
static void
4750
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4751
5.69k
                       size_t len, size_t size) {
4752
5.69k
    int q, ql;
4753
5.69k
    int r, rl;
4754
5.69k
    int cur, l;
4755
5.69k
    size_t count = 0;
4756
5.69k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4757
5.69k
                       XML_MAX_HUGE_LENGTH :
4758
5.69k
                       XML_MAX_TEXT_LENGTH;
4759
5.69k
    int inputid;
4760
4761
5.69k
    inputid = ctxt->input->id;
4762
4763
5.69k
    if (buf == NULL) {
4764
5.69k
        len = 0;
4765
5.69k
  size = XML_PARSER_BUFFER_SIZE;
4766
5.69k
  buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4767
5.69k
  if (buf == NULL) {
4768
0
      xmlErrMemory(ctxt, NULL);
4769
0
      return;
4770
0
  }
4771
5.69k
    }
4772
5.69k
    GROW; /* Assure there's enough input data */
4773
5.69k
    q = CUR_CHAR(ql);
4774
5.69k
    if (q == 0)
4775
185
        goto not_terminated;
4776
5.50k
    if (!IS_CHAR(q)) {
4777
47
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4778
47
                          "xmlParseComment: invalid xmlChar value %d\n",
4779
47
                    q);
4780
47
  xmlFree (buf);
4781
47
  return;
4782
47
    }
4783
5.46k
    NEXTL(ql);
4784
5.46k
    r = CUR_CHAR(rl);
4785
5.46k
    if (r == 0)
4786
68
        goto not_terminated;
4787
5.39k
    if (!IS_CHAR(r)) {
4788
23
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4789
23
                          "xmlParseComment: invalid xmlChar value %d\n",
4790
23
                    q);
4791
23
  xmlFree (buf);
4792
23
  return;
4793
23
    }
4794
5.37k
    NEXTL(rl);
4795
5.37k
    cur = CUR_CHAR(l);
4796
5.37k
    if (cur == 0)
4797
85
        goto not_terminated;
4798
35.6M
    while (IS_CHAR(cur) && /* checked */
4799
35.6M
           ((cur != '>') ||
4800
35.6M
      (r != '-') || (q != '-'))) {
4801
35.6M
  if ((r == '-') && (q == '-')) {
4802
1.52k
      xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4803
1.52k
  }
4804
35.6M
  if (len + 5 >= size) {
4805
5.05k
      xmlChar *new_buf;
4806
5.05k
            size_t new_size;
4807
4808
5.05k
      new_size = size * 2;
4809
5.05k
      new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4810
5.05k
      if (new_buf == NULL) {
4811
0
    xmlFree (buf);
4812
0
    xmlErrMemory(ctxt, NULL);
4813
0
    return;
4814
0
      }
4815
5.05k
      buf = new_buf;
4816
5.05k
            size = new_size;
4817
5.05k
  }
4818
35.6M
  COPY_BUF(ql,buf,len,q);
4819
35.6M
  q = r;
4820
35.6M
  ql = rl;
4821
35.6M
  r = cur;
4822
35.6M
  rl = l;
4823
4824
35.6M
  count++;
4825
35.6M
  if (count > 50) {
4826
698k
      SHRINK;
4827
698k
      GROW;
4828
698k
      count = 0;
4829
698k
            if (ctxt->instate == XML_PARSER_EOF) {
4830
0
    xmlFree(buf);
4831
0
    return;
4832
0
            }
4833
698k
  }
4834
35.6M
  NEXTL(l);
4835
35.6M
  cur = CUR_CHAR(l);
4836
35.6M
  if (cur == 0) {
4837
439
      SHRINK;
4838
439
      GROW;
4839
439
      cur = CUR_CHAR(l);
4840
439
  }
4841
4842
35.6M
        if (len > maxLength) {
4843
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4844
0
                         "Comment too big found", NULL);
4845
0
            xmlFree (buf);
4846
0
            return;
4847
0
        }
4848
35.6M
    }
4849
5.28k
    buf[len] = 0;
4850
5.28k
    if (cur == 0) {
4851
439
  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4852
439
                       "Comment not terminated \n<!--%.50s\n", buf);
4853
4.84k
    } else if (!IS_CHAR(cur)) {
4854
98
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4855
98
                          "xmlParseComment: invalid xmlChar value %d\n",
4856
98
                    cur);
4857
4.74k
    } else {
4858
4.74k
  if (inputid != ctxt->input->id) {
4859
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4860
0
               "Comment doesn't start and stop in the same"
4861
0
                           " entity\n");
4862
0
  }
4863
4.74k
        NEXT;
4864
4.74k
  if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4865
4.74k
      (!ctxt->disableSAX))
4866
0
      ctxt->sax->comment(ctxt->userData, buf);
4867
4.74k
    }
4868
5.28k
    xmlFree(buf);
4869
5.28k
    return;
4870
338
not_terminated:
4871
338
    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4872
338
       "Comment not terminated\n", NULL);
4873
338
    xmlFree(buf);
4874
338
    return;
4875
5.28k
}
4876
4877
/**
4878
 * xmlParseComment:
4879
 * @ctxt:  an XML parser context
4880
 *
4881
 * Skip an XML (SGML) comment <!-- .... -->
4882
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4883
 *  must not occur within comments. "
4884
 *
4885
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4886
 */
4887
void
4888
12.0k
xmlParseComment(xmlParserCtxtPtr ctxt) {
4889
12.0k
    xmlChar *buf = NULL;
4890
12.0k
    size_t size = XML_PARSER_BUFFER_SIZE;
4891
12.0k
    size_t len = 0;
4892
12.0k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4893
12.0k
                       XML_MAX_HUGE_LENGTH :
4894
12.0k
                       XML_MAX_TEXT_LENGTH;
4895
12.0k
    xmlParserInputState state;
4896
12.0k
    const xmlChar *in;
4897
12.0k
    size_t nbchar = 0;
4898
12.0k
    int ccol;
4899
12.0k
    int inputid;
4900
4901
    /*
4902
     * Check that there is a comment right here.
4903
     */
4904
12.0k
    if ((RAW != '<') || (NXT(1) != '!') ||
4905
12.0k
        (NXT(2) != '-') || (NXT(3) != '-')) return;
4906
11.9k
    state = ctxt->instate;
4907
11.9k
    ctxt->instate = XML_PARSER_COMMENT;
4908
11.9k
    inputid = ctxt->input->id;
4909
11.9k
    SKIP(4);
4910
11.9k
    SHRINK;
4911
11.9k
    GROW;
4912
4913
    /*
4914
     * Accelerated common case where input don't need to be
4915
     * modified before passing it to the handler.
4916
     */
4917
11.9k
    in = ctxt->input->cur;
4918
11.9k
    do {
4919
11.9k
  if (*in == 0xA) {
4920
21.3k
      do {
4921
21.3k
    ctxt->input->line++; ctxt->input->col = 1;
4922
21.3k
    in++;
4923
21.3k
      } while (*in == 0xA);
4924
5.40k
  }
4925
27.7k
get_more:
4926
27.7k
        ccol = ctxt->input->col;
4927
789k
  while (((*in > '-') && (*in <= 0x7F)) ||
4928
789k
         ((*in >= 0x20) && (*in < '-')) ||
4929
789k
         (*in == 0x09)) {
4930
762k
        in++;
4931
762k
        ccol++;
4932
762k
  }
4933
27.7k
  ctxt->input->col = ccol;
4934
27.7k
  if (*in == 0xA) {
4935
22.9k
      do {
4936
22.9k
    ctxt->input->line++; ctxt->input->col = 1;
4937
22.9k
    in++;
4938
22.9k
      } while (*in == 0xA);
4939
8.42k
      goto get_more;
4940
8.42k
  }
4941
19.3k
  nbchar = in - ctxt->input->cur;
4942
  /*
4943
   * save current set of data
4944
   */
4945
19.3k
  if (nbchar > 0) {
4946
16.2k
      if ((ctxt->sax != NULL) &&
4947
16.2k
    (ctxt->sax->comment != NULL)) {
4948
0
    if (buf == NULL) {
4949
0
        if ((*in == '-') && (in[1] == '-'))
4950
0
            size = nbchar + 1;
4951
0
        else
4952
0
            size = XML_PARSER_BUFFER_SIZE + nbchar;
4953
0
        buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4954
0
        if (buf == NULL) {
4955
0
            xmlErrMemory(ctxt, NULL);
4956
0
      ctxt->instate = state;
4957
0
      return;
4958
0
        }
4959
0
        len = 0;
4960
0
    } else if (len + nbchar + 1 >= size) {
4961
0
        xmlChar *new_buf;
4962
0
        size  += len + nbchar + XML_PARSER_BUFFER_SIZE;
4963
0
        new_buf = (xmlChar *) xmlRealloc(buf,
4964
0
                                         size * sizeof(xmlChar));
4965
0
        if (new_buf == NULL) {
4966
0
            xmlFree (buf);
4967
0
      xmlErrMemory(ctxt, NULL);
4968
0
      ctxt->instate = state;
4969
0
      return;
4970
0
        }
4971
0
        buf = new_buf;
4972
0
    }
4973
0
    memcpy(&buf[len], ctxt->input->cur, nbchar);
4974
0
    len += nbchar;
4975
0
    buf[len] = 0;
4976
0
      }
4977
16.2k
  }
4978
19.3k
        if (len > maxLength) {
4979
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4980
0
                         "Comment too big found", NULL);
4981
0
            xmlFree (buf);
4982
0
            return;
4983
0
        }
4984
19.3k
  ctxt->input->cur = in;
4985
19.3k
  if (*in == 0xA) {
4986
0
      in++;
4987
0
      ctxt->input->line++; ctxt->input->col = 1;
4988
0
  }
4989
19.3k
  if (*in == 0xD) {
4990
1.40k
      in++;
4991
1.40k
      if (*in == 0xA) {
4992
290
    ctxt->input->cur = in;
4993
290
    in++;
4994
290
    ctxt->input->line++; ctxt->input->col = 1;
4995
290
    goto get_more;
4996
290
      }
4997
1.11k
      in--;
4998
1.11k
  }
4999
19.0k
  SHRINK;
5000
19.0k
  GROW;
5001
19.0k
        if (ctxt->instate == XML_PARSER_EOF) {
5002
0
            xmlFree(buf);
5003
0
            return;
5004
0
        }
5005
19.0k
  in = ctxt->input->cur;
5006
19.0k
  if (*in == '-') {
5007
13.3k
      if (in[1] == '-') {
5008
7.68k
          if (in[2] == '>') {
5009
6.30k
        if (ctxt->input->id != inputid) {
5010
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5011
0
                     "comment doesn't start and stop in the"
5012
0
                                       " same entity\n");
5013
0
        }
5014
6.30k
        SKIP(3);
5015
6.30k
        if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5016
6.30k
            (!ctxt->disableSAX)) {
5017
0
      if (buf != NULL)
5018
0
          ctxt->sax->comment(ctxt->userData, buf);
5019
0
      else
5020
0
          ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5021
0
        }
5022
6.30k
        if (buf != NULL)
5023
0
            xmlFree(buf);
5024
6.30k
        if (ctxt->instate != XML_PARSER_EOF)
5025
6.30k
      ctxt->instate = state;
5026
6.30k
        return;
5027
6.30k
    }
5028
1.38k
    if (buf != NULL) {
5029
0
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5030
0
                          "Double hyphen within comment: "
5031
0
                                      "<!--%.50s\n",
5032
0
              buf);
5033
0
    } else
5034
1.38k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5035
1.38k
                          "Double hyphen within comment\n", NULL);
5036
1.38k
                if (ctxt->instate == XML_PARSER_EOF) {
5037
0
                    xmlFree(buf);
5038
0
                    return;
5039
0
                }
5040
1.38k
    in++;
5041
1.38k
    ctxt->input->col++;
5042
1.38k
      }
5043
7.02k
      in++;
5044
7.02k
      ctxt->input->col++;
5045
7.02k
      goto get_more;
5046
13.3k
  }
5047
19.0k
    } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5048
5.69k
    xmlParseCommentComplex(ctxt, buf, len, size);
5049
5.69k
    ctxt->instate = state;
5050
5.69k
    return;
5051
11.9k
}
5052
5053
5054
/**
5055
 * xmlParsePITarget:
5056
 * @ctxt:  an XML parser context
5057
 *
5058
 * parse the name of a PI
5059
 *
5060
 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5061
 *
5062
 * Returns the PITarget name or NULL
5063
 */
5064
5065
const xmlChar *
5066
21.8k
xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5067
21.8k
    const xmlChar *name;
5068
5069
21.8k
    name = xmlParseName(ctxt);
5070
21.8k
    if ((name != NULL) &&
5071
21.8k
        ((name[0] == 'x') || (name[0] == 'X')) &&
5072
21.8k
        ((name[1] == 'm') || (name[1] == 'M')) &&
5073
21.8k
        ((name[2] == 'l') || (name[2] == 'L'))) {
5074
2.42k
  int i;
5075
2.42k
  if ((name[0] == 'x') && (name[1] == 'm') &&
5076
2.42k
      (name[2] == 'l') && (name[3] == 0)) {
5077
672
      xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5078
672
     "XML declaration allowed only at the start of the document\n");
5079
672
      return(name);
5080
1.75k
  } else if (name[3] == 0) {
5081
720
      xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5082
720
      return(name);
5083
720
  }
5084
2.96k
  for (i = 0;;i++) {
5085
2.96k
      if (xmlW3CPIs[i] == NULL) break;
5086
2.00k
      if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5087
68
          return(name);
5088
2.00k
  }
5089
965
  xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5090
965
          "xmlParsePITarget: invalid name prefix 'xml'\n",
5091
965
          NULL, NULL);
5092
965
    }
5093
20.3k
    if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5094
872
  xmlNsErr(ctxt, XML_NS_ERR_COLON,
5095
872
     "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5096
872
    }
5097
20.3k
    return(name);
5098
21.8k
}
5099
5100
#ifdef LIBXML_CATALOG_ENABLED
5101
/**
5102
 * xmlParseCatalogPI:
5103
 * @ctxt:  an XML parser context
5104
 * @catalog:  the PI value string
5105
 *
5106
 * parse an XML Catalog Processing Instruction.
5107
 *
5108
 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5109
 *
5110
 * Occurs only if allowed by the user and if happening in the Misc
5111
 * part of the document before any doctype information
5112
 * This will add the given catalog to the parsing context in order
5113
 * to be used if there is a resolution need further down in the document
5114
 */
5115
5116
static void
5117
2.69k
xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5118
2.69k
    xmlChar *URL = NULL;
5119
2.69k
    const xmlChar *tmp, *base;
5120
2.69k
    xmlChar marker;
5121
5122
2.69k
    tmp = catalog;
5123
2.69k
    while (IS_BLANK_CH(*tmp)) tmp++;
5124
2.69k
    if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5125
509
  goto error;
5126
2.18k
    tmp += 7;
5127
2.18k
    while (IS_BLANK_CH(*tmp)) tmp++;
5128
2.18k
    if (*tmp != '=') {
5129
461
  return;
5130
461
    }
5131
1.72k
    tmp++;
5132
1.72k
    while (IS_BLANK_CH(*tmp)) tmp++;
5133
1.72k
    marker = *tmp;
5134
1.72k
    if ((marker != '\'') && (marker != '"'))
5135
351
  goto error;
5136
1.37k
    tmp++;
5137
1.37k
    base = tmp;
5138
3.16k
    while ((*tmp != 0) && (*tmp != marker)) tmp++;
5139
1.37k
    if (*tmp == 0)
5140
224
  goto error;
5141
1.14k
    URL = xmlStrndup(base, tmp - base);
5142
1.14k
    tmp++;
5143
1.14k
    while (IS_BLANK_CH(*tmp)) tmp++;
5144
1.14k
    if (*tmp != 0)
5145
347
  goto error;
5146
5147
799
    if (URL != NULL) {
5148
799
  ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5149
799
  xmlFree(URL);
5150
799
    }
5151
799
    return;
5152
5153
1.43k
error:
5154
1.43k
    xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5155
1.43k
            "Catalog PI syntax error: %s\n",
5156
1.43k
      catalog, NULL);
5157
1.43k
    if (URL != NULL)
5158
347
  xmlFree(URL);
5159
1.43k
}
5160
#endif
5161
5162
/**
5163
 * xmlParsePI:
5164
 * @ctxt:  an XML parser context
5165
 *
5166
 * parse an XML Processing Instruction.
5167
 *
5168
 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5169
 *
5170
 * The processing is transferred to SAX once parsed.
5171
 */
5172
5173
void
5174
21.8k
xmlParsePI(xmlParserCtxtPtr ctxt) {
5175
21.8k
    xmlChar *buf = NULL;
5176
21.8k
    size_t len = 0;
5177
21.8k
    size_t size = XML_PARSER_BUFFER_SIZE;
5178
21.8k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5179
21.8k
                       XML_MAX_HUGE_LENGTH :
5180
21.8k
                       XML_MAX_TEXT_LENGTH;
5181
21.8k
    int cur, l;
5182
21.8k
    const xmlChar *target;
5183
21.8k
    xmlParserInputState state;
5184
21.8k
    int count = 0;
5185
5186
21.8k
    if ((RAW == '<') && (NXT(1) == '?')) {
5187
21.8k
  int inputid = ctxt->input->id;
5188
21.8k
  state = ctxt->instate;
5189
21.8k
        ctxt->instate = XML_PARSER_PI;
5190
  /*
5191
   * this is a Processing Instruction.
5192
   */
5193
21.8k
  SKIP(2);
5194
21.8k
  SHRINK;
5195
5196
  /*
5197
   * Parse the target name and check for special support like
5198
   * namespace.
5199
   */
5200
21.8k
        target = xmlParsePITarget(ctxt);
5201
21.8k
  if (target != NULL) {
5202
20.6k
      if ((RAW == '?') && (NXT(1) == '>')) {
5203
3.83k
    if (inputid != ctxt->input->id) {
5204
0
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5205
0
                             "PI declaration doesn't start and stop in"
5206
0
                                   " the same entity\n");
5207
0
    }
5208
3.83k
    SKIP(2);
5209
5210
    /*
5211
     * SAX: PI detected.
5212
     */
5213
3.83k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5214
3.83k
        (ctxt->sax->processingInstruction != NULL))
5215
0
        ctxt->sax->processingInstruction(ctxt->userData,
5216
0
                                         target, NULL);
5217
3.83k
    if (ctxt->instate != XML_PARSER_EOF)
5218
3.83k
        ctxt->instate = state;
5219
3.83k
    return;
5220
3.83k
      }
5221
16.7k
      buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
5222
16.7k
      if (buf == NULL) {
5223
0
    xmlErrMemory(ctxt, NULL);
5224
0
    ctxt->instate = state;
5225
0
    return;
5226
0
      }
5227
16.7k
      if (SKIP_BLANKS == 0) {
5228
2.90k
    xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5229
2.90k
        "ParsePI: PI %s space expected\n", target);
5230
2.90k
      }
5231
16.7k
      cur = CUR_CHAR(l);
5232
46.6M
      while (IS_CHAR(cur) && /* checked */
5233
46.6M
       ((cur != '?') || (NXT(1) != '>'))) {
5234
46.6M
    if (len + 5 >= size) {
5235
4.33k
        xmlChar *tmp;
5236
4.33k
                    size_t new_size = size * 2;
5237
4.33k
        tmp = (xmlChar *) xmlRealloc(buf, new_size);
5238
4.33k
        if (tmp == NULL) {
5239
0
      xmlErrMemory(ctxt, NULL);
5240
0
      xmlFree(buf);
5241
0
      ctxt->instate = state;
5242
0
      return;
5243
0
        }
5244
4.33k
        buf = tmp;
5245
4.33k
                    size = new_size;
5246
4.33k
    }
5247
46.6M
    count++;
5248
46.6M
    if (count > 50) {
5249
912k
        SHRINK;
5250
912k
        GROW;
5251
912k
                    if (ctxt->instate == XML_PARSER_EOF) {
5252
0
                        xmlFree(buf);
5253
0
                        return;
5254
0
                    }
5255
912k
        count = 0;
5256
912k
    }
5257
46.6M
    COPY_BUF(l,buf,len,cur);
5258
46.6M
    NEXTL(l);
5259
46.6M
    cur = CUR_CHAR(l);
5260
46.6M
    if (cur == 0) {
5261
461
        SHRINK;
5262
461
        GROW;
5263
461
        cur = CUR_CHAR(l);
5264
461
    }
5265
46.6M
                if (len > maxLength) {
5266
0
                    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5267
0
                                      "PI %s too big found", target);
5268
0
                    xmlFree(buf);
5269
0
                    ctxt->instate = state;
5270
0
                    return;
5271
0
                }
5272
46.6M
      }
5273
16.7k
      buf[len] = 0;
5274
16.7k
      if (cur != '?') {
5275
1.01k
    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5276
1.01k
          "ParsePI: PI %s never end ...\n", target);
5277
15.7k
      } else {
5278
15.7k
    if (inputid != ctxt->input->id) {
5279
0
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5280
0
                             "PI declaration doesn't start and stop in"
5281
0
                                   " the same entity\n");
5282
0
    }
5283
15.7k
    SKIP(2);
5284
5285
15.7k
#ifdef LIBXML_CATALOG_ENABLED
5286
15.7k
    if (((state == XML_PARSER_MISC) ||
5287
15.7k
               (state == XML_PARSER_START)) &&
5288
15.7k
        (xmlStrEqual(target, XML_CATALOG_PI))) {
5289
2.69k
        xmlCatalogAllow allow = xmlCatalogGetDefaults();
5290
2.69k
        if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5291
2.69k
      (allow == XML_CATA_ALLOW_ALL))
5292
2.69k
      xmlParseCatalogPI(ctxt, buf);
5293
2.69k
    }
5294
15.7k
#endif
5295
5296
5297
    /*
5298
     * SAX: PI detected.
5299
     */
5300
15.7k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5301
15.7k
        (ctxt->sax->processingInstruction != NULL))
5302
0
        ctxt->sax->processingInstruction(ctxt->userData,
5303
0
                                         target, buf);
5304
15.7k
      }
5305
16.7k
      xmlFree(buf);
5306
16.7k
  } else {
5307
1.21k
      xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5308
1.21k
  }
5309
18.0k
  if (ctxt->instate != XML_PARSER_EOF)
5310
18.0k
      ctxt->instate = state;
5311
18.0k
    }
5312
21.8k
}
5313
5314
/**
5315
 * xmlParseNotationDecl:
5316
 * @ctxt:  an XML parser context
5317
 *
5318
 * parse a notation declaration
5319
 *
5320
 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>'
5321
 *
5322
 * Hence there is actually 3 choices:
5323
 *     'PUBLIC' S PubidLiteral
5324
 *     'PUBLIC' S PubidLiteral S SystemLiteral
5325
 * and 'SYSTEM' S SystemLiteral
5326
 *
5327
 * See the NOTE on xmlParseExternalID().
5328
 */
5329
5330
void
5331
2.38k
xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5332
2.38k
    const xmlChar *name;
5333
2.38k
    xmlChar *Pubid;
5334
2.38k
    xmlChar *Systemid;
5335
5336
2.38k
    if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5337
2.34k
  int inputid = ctxt->input->id;
5338
2.34k
  SHRINK;
5339
2.34k
  SKIP(10);
5340
2.34k
  if (SKIP_BLANKS == 0) {
5341
107
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5342
107
         "Space required after '<!NOTATION'\n");
5343
107
      return;
5344
107
  }
5345
5346
2.23k
        name = xmlParseName(ctxt);
5347
2.23k
  if (name == NULL) {
5348
446
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5349
446
      return;
5350
446
  }
5351
1.79k
  if (xmlStrchr(name, ':') != NULL) {
5352
444
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5353
444
         "colons are forbidden from notation names '%s'\n",
5354
444
         name, NULL, NULL);
5355
444
  }
5356
1.79k
  if (SKIP_BLANKS == 0) {
5357
777
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5358
777
         "Space required after the NOTATION name'\n");
5359
777
      return;
5360
777
  }
5361
5362
  /*
5363
   * Parse the IDs.
5364
   */
5365
1.01k
  Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5366
1.01k
  SKIP_BLANKS;
5367
5368
1.01k
  if (RAW == '>') {
5369
243
      if (inputid != ctxt->input->id) {
5370
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5371
0
                         "Notation declaration doesn't start and stop"
5372
0
                               " in the same entity\n");
5373
0
      }
5374
243
      NEXT;
5375
243
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5376
243
    (ctxt->sax->notationDecl != NULL))
5377
0
    ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5378
771
  } else {
5379
771
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5380
771
  }
5381
1.01k
  if (Systemid != NULL) xmlFree(Systemid);
5382
1.01k
  if (Pubid != NULL) xmlFree(Pubid);
5383
1.01k
    }
5384
2.38k
}
5385
5386
/**
5387
 * xmlParseEntityDecl:
5388
 * @ctxt:  an XML parser context
5389
 *
5390
 * parse <!ENTITY declarations
5391
 *
5392
 * [70] EntityDecl ::= GEDecl | PEDecl
5393
 *
5394
 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5395
 *
5396
 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5397
 *
5398
 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5399
 *
5400
 * [74] PEDef ::= EntityValue | ExternalID
5401
 *
5402
 * [76] NDataDecl ::= S 'NDATA' S Name
5403
 *
5404
 * [ VC: Notation Declared ]
5405
 * The Name must match the declared name of a notation.
5406
 */
5407
5408
void
5409
16.1k
xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5410
16.1k
    const xmlChar *name = NULL;
5411
16.1k
    xmlChar *value = NULL;
5412
16.1k
    xmlChar *URI = NULL, *literal = NULL;
5413
16.1k
    const xmlChar *ndata = NULL;
5414
16.1k
    int isParameter = 0;
5415
16.1k
    xmlChar *orig = NULL;
5416
5417
    /* GROW; done in the caller */
5418
16.1k
    if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
5419
16.0k
  int inputid = ctxt->input->id;
5420
16.0k
  SHRINK;
5421
16.0k
  SKIP(8);
5422
16.0k
  if (SKIP_BLANKS == 0) {
5423
10.4k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5424
10.4k
         "Space required after '<!ENTITY'\n");
5425
10.4k
  }
5426
5427
16.0k
  if (RAW == '%') {
5428
2.93k
      NEXT;
5429
2.93k
      if (SKIP_BLANKS == 0) {
5430
1.99k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5431
1.99k
             "Space required after '%%'\n");
5432
1.99k
      }
5433
2.93k
      isParameter = 1;
5434
2.93k
  }
5435
5436
16.0k
        name = xmlParseName(ctxt);
5437
16.0k
  if (name == NULL) {
5438
692
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5439
692
                     "xmlParseEntityDecl: no name\n");
5440
692
            return;
5441
692
  }
5442
15.3k
  if (xmlStrchr(name, ':') != NULL) {
5443
1.47k
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5444
1.47k
         "colons are forbidden from entities names '%s'\n",
5445
1.47k
         name, NULL, NULL);
5446
1.47k
  }
5447
15.3k
  if (SKIP_BLANKS == 0) {
5448
7.81k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5449
7.81k
         "Space required after the entity name\n");
5450
7.81k
  }
5451
5452
15.3k
  ctxt->instate = XML_PARSER_ENTITY_DECL;
5453
  /*
5454
   * handle the various case of definitions...
5455
   */
5456
15.3k
  if (isParameter) {
5457
2.86k
      if ((RAW == '"') || (RAW == '\'')) {
5458
1.18k
          value = xmlParseEntityValue(ctxt, &orig);
5459
1.18k
    if (value) {
5460
363
        if ((ctxt->sax != NULL) &&
5461
363
      (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5462
0
      ctxt->sax->entityDecl(ctxt->userData, name,
5463
0
                        XML_INTERNAL_PARAMETER_ENTITY,
5464
0
            NULL, NULL, value);
5465
363
    }
5466
1.67k
      } else {
5467
1.67k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5468
1.67k
    if ((URI == NULL) && (literal == NULL)) {
5469
186
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5470
186
    }
5471
1.67k
    if (URI) {
5472
933
        xmlURIPtr uri;
5473
5474
933
        uri = xmlParseURI((const char *) URI);
5475
933
        if (uri == NULL) {
5476
95
            xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5477
95
             "Invalid URI: %s\n", URI);
5478
      /*
5479
       * This really ought to be a well formedness error
5480
       * but the XML Core WG decided otherwise c.f. issue
5481
       * E26 of the XML erratas.
5482
       */
5483
838
        } else {
5484
838
      if (uri->fragment != NULL) {
5485
          /*
5486
           * Okay this is foolish to block those but not
5487
           * invalid URIs.
5488
           */
5489
34
          xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5490
804
      } else {
5491
804
          if ((ctxt->sax != NULL) &&
5492
804
        (!ctxt->disableSAX) &&
5493
804
        (ctxt->sax->entityDecl != NULL))
5494
0
        ctxt->sax->entityDecl(ctxt->userData, name,
5495
0
              XML_EXTERNAL_PARAMETER_ENTITY,
5496
0
              literal, URI, NULL);
5497
804
      }
5498
838
      xmlFreeURI(uri);
5499
838
        }
5500
933
    }
5501
1.67k
      }
5502
12.4k
  } else {
5503
12.4k
      if ((RAW == '"') || (RAW == '\'')) {
5504
8.81k
          value = xmlParseEntityValue(ctxt, &orig);
5505
8.81k
    if ((ctxt->sax != NULL) &&
5506
8.81k
        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5507
0
        ctxt->sax->entityDecl(ctxt->userData, name,
5508
0
        XML_INTERNAL_GENERAL_ENTITY,
5509
0
        NULL, NULL, value);
5510
    /*
5511
     * For expat compatibility in SAX mode.
5512
     */
5513
8.81k
    if ((ctxt->myDoc == NULL) ||
5514
8.81k
        (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5515
8.81k
        if (ctxt->myDoc == NULL) {
5516
894
      ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5517
894
      if (ctxt->myDoc == NULL) {
5518
0
          xmlErrMemory(ctxt, "New Doc failed");
5519
0
          return;
5520
0
      }
5521
894
      ctxt->myDoc->properties = XML_DOC_INTERNAL;
5522
894
        }
5523
8.81k
        if (ctxt->myDoc->intSubset == NULL)
5524
894
      ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5525
894
              BAD_CAST "fake", NULL, NULL);
5526
5527
8.81k
        xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5528
8.81k
                    NULL, NULL, value);
5529
8.81k
    }
5530
8.81k
      } else {
5531
3.66k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5532
3.66k
    if ((URI == NULL) && (literal == NULL)) {
5533
1.60k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5534
1.60k
    }
5535
3.66k
    if (URI) {
5536
1.59k
        xmlURIPtr uri;
5537
5538
1.59k
        uri = xmlParseURI((const char *)URI);
5539
1.59k
        if (uri == NULL) {
5540
944
            xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5541
944
             "Invalid URI: %s\n", URI);
5542
      /*
5543
       * This really ought to be a well formedness error
5544
       * but the XML Core WG decided otherwise c.f. issue
5545
       * E26 of the XML erratas.
5546
       */
5547
944
        } else {
5548
650
      if (uri->fragment != NULL) {
5549
          /*
5550
           * Okay this is foolish to block those but not
5551
           * invalid URIs.
5552
           */
5553
423
          xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5554
423
      }
5555
650
      xmlFreeURI(uri);
5556
650
        }
5557
1.59k
    }
5558
3.66k
    if ((RAW != '>') && (SKIP_BLANKS == 0)) {
5559
631
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5560
631
           "Space required before 'NDATA'\n");
5561
631
    }
5562
3.66k
    if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5563
456
        SKIP(5);
5564
456
        if (SKIP_BLANKS == 0) {
5565
47
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5566
47
               "Space required after 'NDATA'\n");
5567
47
        }
5568
456
        ndata = xmlParseName(ctxt);
5569
456
        if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5570
456
            (ctxt->sax->unparsedEntityDecl != NULL))
5571
0
      ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5572
0
            literal, URI, ndata);
5573
3.20k
    } else {
5574
3.20k
        if ((ctxt->sax != NULL) &&
5575
3.20k
            (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5576
0
      ctxt->sax->entityDecl(ctxt->userData, name,
5577
0
            XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5578
0
            literal, URI, NULL);
5579
        /*
5580
         * For expat compatibility in SAX mode.
5581
         * assuming the entity replacement was asked for
5582
         */
5583
3.20k
        if ((ctxt->replaceEntities != 0) &&
5584
3.20k
      ((ctxt->myDoc == NULL) ||
5585
0
      (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5586
0
      if (ctxt->myDoc == NULL) {
5587
0
          ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5588
0
          if (ctxt->myDoc == NULL) {
5589
0
              xmlErrMemory(ctxt, "New Doc failed");
5590
0
        return;
5591
0
          }
5592
0
          ctxt->myDoc->properties = XML_DOC_INTERNAL;
5593
0
      }
5594
5595
0
      if (ctxt->myDoc->intSubset == NULL)
5596
0
          ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5597
0
            BAD_CAST "fake", NULL, NULL);
5598
0
      xmlSAX2EntityDecl(ctxt, name,
5599
0
                  XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5600
0
                  literal, URI, NULL);
5601
0
        }
5602
3.20k
    }
5603
3.66k
      }
5604
12.4k
  }
5605
15.3k
  if (ctxt->instate == XML_PARSER_EOF)
5606
0
      goto done;
5607
15.3k
  SKIP_BLANKS;
5608
15.3k
  if (RAW != '>') {
5609
992
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5610
992
              "xmlParseEntityDecl: entity %s not terminated\n", name);
5611
992
      xmlHaltParser(ctxt);
5612
14.3k
  } else {
5613
14.3k
      if (inputid != ctxt->input->id) {
5614
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5615
0
                         "Entity declaration doesn't start and stop in"
5616
0
                               " the same entity\n");
5617
0
      }
5618
14.3k
      NEXT;
5619
14.3k
  }
5620
15.3k
  if (orig != NULL) {
5621
      /*
5622
       * Ugly mechanism to save the raw entity value.
5623
       */
5624
7.25k
      xmlEntityPtr cur = NULL;
5625
5626
7.25k
      if (isParameter) {
5627
732
          if ((ctxt->sax != NULL) &&
5628
732
        (ctxt->sax->getParameterEntity != NULL))
5629
0
        cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5630
6.52k
      } else {
5631
6.52k
          if ((ctxt->sax != NULL) &&
5632
6.52k
        (ctxt->sax->getEntity != NULL))
5633
6.52k
        cur = ctxt->sax->getEntity(ctxt->userData, name);
5634
6.52k
    if ((cur == NULL) && (ctxt->userData==ctxt)) {
5635
0
        cur = xmlSAX2GetEntity(ctxt, name);
5636
0
    }
5637
6.52k
      }
5638
7.25k
            if ((cur != NULL) && (cur->orig == NULL)) {
5639
1
    cur->orig = orig;
5640
1
                orig = NULL;
5641
1
      }
5642
7.25k
  }
5643
5644
15.3k
done:
5645
15.3k
  if (value != NULL) xmlFree(value);
5646
15.3k
  if (URI != NULL) xmlFree(URI);
5647
15.3k
  if (literal != NULL) xmlFree(literal);
5648
15.3k
        if (orig != NULL) xmlFree(orig);
5649
15.3k
    }
5650
16.1k
}
5651
5652
/**
5653
 * xmlParseDefaultDecl:
5654
 * @ctxt:  an XML parser context
5655
 * @value:  Receive a possible fixed default value for the attribute
5656
 *
5657
 * Parse an attribute default declaration
5658
 *
5659
 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5660
 *
5661
 * [ VC: Required Attribute ]
5662
 * if the default declaration is the keyword #REQUIRED, then the
5663
 * attribute must be specified for all elements of the type in the
5664
 * attribute-list declaration.
5665
 *
5666
 * [ VC: Attribute Default Legal ]
5667
 * The declared default value must meet the lexical constraints of
5668
 * the declared attribute type c.f. xmlValidateAttributeDecl()
5669
 *
5670
 * [ VC: Fixed Attribute Default ]
5671
 * if an attribute has a default value declared with the #FIXED
5672
 * keyword, instances of that attribute must match the default value.
5673
 *
5674
 * [ WFC: No < in Attribute Values ]
5675
 * handled in xmlParseAttValue()
5676
 *
5677
 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5678
 *          or XML_ATTRIBUTE_FIXED.
5679
 */
5680
5681
int
5682
22.8k
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5683
22.8k
    int val;
5684
22.8k
    xmlChar *ret;
5685
5686
22.8k
    *value = NULL;
5687
22.8k
    if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5688
159
  SKIP(9);
5689
159
  return(XML_ATTRIBUTE_REQUIRED);
5690
159
    }
5691
22.6k
    if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5692
292
  SKIP(8);
5693
292
  return(XML_ATTRIBUTE_IMPLIED);
5694
292
    }
5695
22.3k
    val = XML_ATTRIBUTE_NONE;
5696
22.3k
    if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5697
130
  SKIP(6);
5698
130
  val = XML_ATTRIBUTE_FIXED;
5699
130
  if (SKIP_BLANKS == 0) {
5700
52
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5701
52
         "Space required after '#FIXED'\n");
5702
52
  }
5703
130
    }
5704
22.3k
    ret = xmlParseAttValue(ctxt);
5705
22.3k
    ctxt->instate = XML_PARSER_DTD;
5706
22.3k
    if (ret == NULL) {
5707
867
  xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5708
867
           "Attribute default value declaration error\n");
5709
867
    } else
5710
21.5k
        *value = ret;
5711
22.3k
    return(val);
5712
22.6k
}
5713
5714
/**
5715
 * xmlParseNotationType:
5716
 * @ctxt:  an XML parser context
5717
 *
5718
 * parse an Notation attribute type.
5719
 *
5720
 * Note: the leading 'NOTATION' S part has already being parsed...
5721
 *
5722
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5723
 *
5724
 * [ VC: Notation Attributes ]
5725
 * Values of this type must match one of the notation names included
5726
 * in the declaration; all notation names in the declaration must be declared.
5727
 *
5728
 * Returns: the notation attribute tree built while parsing
5729
 */
5730
5731
xmlEnumerationPtr
5732
900
xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5733
900
    const xmlChar *name;
5734
900
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5735
5736
900
    if (RAW != '(') {
5737
23
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5738
23
  return(NULL);
5739
23
    }
5740
877
    SHRINK;
5741
2.26k
    do {
5742
2.26k
        NEXT;
5743
2.26k
  SKIP_BLANKS;
5744
2.26k
        name = xmlParseName(ctxt);
5745
2.26k
  if (name == NULL) {
5746
274
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5747
274
         "Name expected in NOTATION declaration\n");
5748
274
            xmlFreeEnumeration(ret);
5749
274
      return(NULL);
5750
274
  }
5751
1.98k
  tmp = ret;
5752
5.92k
  while (tmp != NULL) {
5753
4.75k
      if (xmlStrEqual(name, tmp->name)) {
5754
813
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5755
813
    "standalone: attribute notation value token %s duplicated\n",
5756
813
         name, NULL);
5757
813
    if (!xmlDictOwns(ctxt->dict, name))
5758
0
        xmlFree((xmlChar *) name);
5759
813
    break;
5760
813
      }
5761
3.93k
      tmp = tmp->next;
5762
3.93k
  }
5763
1.98k
  if (tmp == NULL) {
5764
1.17k
      cur = xmlCreateEnumeration(name);
5765
1.17k
      if (cur == NULL) {
5766
0
                xmlFreeEnumeration(ret);
5767
0
                return(NULL);
5768
0
            }
5769
1.17k
      if (last == NULL) ret = last = cur;
5770
564
      else {
5771
564
    last->next = cur;
5772
564
    last = cur;
5773
564
      }
5774
1.17k
  }
5775
1.98k
  SKIP_BLANKS;
5776
1.98k
    } while (RAW == '|');
5777
603
    if (RAW != ')') {
5778
397
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5779
397
        xmlFreeEnumeration(ret);
5780
397
  return(NULL);
5781
397
    }
5782
206
    NEXT;
5783
206
    return(ret);
5784
603
}
5785
5786
/**
5787
 * xmlParseEnumerationType:
5788
 * @ctxt:  an XML parser context
5789
 *
5790
 * parse an Enumeration attribute type.
5791
 *
5792
 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5793
 *
5794
 * [ VC: Enumeration ]
5795
 * Values of this type must match one of the Nmtoken tokens in
5796
 * the declaration
5797
 *
5798
 * Returns: the enumeration attribute tree built while parsing
5799
 */
5800
5801
xmlEnumerationPtr
5802
1.84k
xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5803
1.84k
    xmlChar *name;
5804
1.84k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5805
5806
1.84k
    if (RAW != '(') {
5807
458
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5808
458
  return(NULL);
5809
458
    }
5810
1.38k
    SHRINK;
5811
3.58k
    do {
5812
3.58k
        NEXT;
5813
3.58k
  SKIP_BLANKS;
5814
3.58k
        name = xmlParseNmtoken(ctxt);
5815
3.58k
  if (name == NULL) {
5816
243
      xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5817
243
      return(ret);
5818
243
  }
5819
3.33k
  tmp = ret;
5820
9.53k
  while (tmp != NULL) {
5821
7.46k
      if (xmlStrEqual(name, tmp->name)) {
5822
1.26k
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5823
1.26k
    "standalone: attribute enumeration value token %s duplicated\n",
5824
1.26k
         name, NULL);
5825
1.26k
    if (!xmlDictOwns(ctxt->dict, name))
5826
1.26k
        xmlFree(name);
5827
1.26k
    break;
5828
1.26k
      }
5829
6.20k
      tmp = tmp->next;
5830
6.20k
  }
5831
3.33k
  if (tmp == NULL) {
5832
2.06k
      cur = xmlCreateEnumeration(name);
5833
2.06k
      if (!xmlDictOwns(ctxt->dict, name))
5834
2.06k
    xmlFree(name);
5835
2.06k
      if (cur == NULL) {
5836
0
                xmlFreeEnumeration(ret);
5837
0
                return(NULL);
5838
0
            }
5839
2.06k
      if (last == NULL) ret = last = cur;
5840
729
      else {
5841
729
    last->next = cur;
5842
729
    last = cur;
5843
729
      }
5844
2.06k
  }
5845
3.33k
  SKIP_BLANKS;
5846
3.33k
    } while (RAW == '|');
5847
1.14k
    if (RAW != ')') {
5848
570
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5849
570
  return(ret);
5850
570
    }
5851
574
    NEXT;
5852
574
    return(ret);
5853
1.14k
}
5854
5855
/**
5856
 * xmlParseEnumeratedType:
5857
 * @ctxt:  an XML parser context
5858
 * @tree:  the enumeration tree built while parsing
5859
 *
5860
 * parse an Enumerated attribute type.
5861
 *
5862
 * [57] EnumeratedType ::= NotationType | Enumeration
5863
 *
5864
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5865
 *
5866
 *
5867
 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5868
 */
5869
5870
int
5871
2.95k
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5872
2.95k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5873
1.10k
  SKIP(8);
5874
1.10k
  if (SKIP_BLANKS == 0) {
5875
207
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5876
207
         "Space required after 'NOTATION'\n");
5877
207
      return(0);
5878
207
  }
5879
900
  *tree = xmlParseNotationType(ctxt);
5880
900
  if (*tree == NULL) return(0);
5881
206
  return(XML_ATTRIBUTE_NOTATION);
5882
900
    }
5883
1.84k
    *tree = xmlParseEnumerationType(ctxt);
5884
1.84k
    if (*tree == NULL) return(0);
5885
1.34k
    return(XML_ATTRIBUTE_ENUMERATION);
5886
1.84k
}
5887
5888
/**
5889
 * xmlParseAttributeType:
5890
 * @ctxt:  an XML parser context
5891
 * @tree:  the enumeration tree built while parsing
5892
 *
5893
 * parse the Attribute list def for an element
5894
 *
5895
 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5896
 *
5897
 * [55] StringType ::= 'CDATA'
5898
 *
5899
 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5900
 *                        'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5901
 *
5902
 * Validity constraints for attribute values syntax are checked in
5903
 * xmlValidateAttributeValue()
5904
 *
5905
 * [ VC: ID ]
5906
 * Values of type ID must match the Name production. A name must not
5907
 * appear more than once in an XML document as a value of this type;
5908
 * i.e., ID values must uniquely identify the elements which bear them.
5909
 *
5910
 * [ VC: One ID per Element Type ]
5911
 * No element type may have more than one ID attribute specified.
5912
 *
5913
 * [ VC: ID Attribute Default ]
5914
 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5915
 *
5916
 * [ VC: IDREF ]
5917
 * Values of type IDREF must match the Name production, and values
5918
 * of type IDREFS must match Names; each IDREF Name must match the value
5919
 * of an ID attribute on some element in the XML document; i.e. IDREF
5920
 * values must match the value of some ID attribute.
5921
 *
5922
 * [ VC: Entity Name ]
5923
 * Values of type ENTITY must match the Name production, values
5924
 * of type ENTITIES must match Names; each Entity Name must match the
5925
 * name of an unparsed entity declared in the DTD.
5926
 *
5927
 * [ VC: Name Token ]
5928
 * Values of type NMTOKEN must match the Nmtoken production; values
5929
 * of type NMTOKENS must match Nmtokens.
5930
 *
5931
 * Returns the attribute type
5932
 */
5933
int
5934
25.5k
xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5935
25.5k
    SHRINK;
5936
25.5k
    if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5937
1.09k
  SKIP(5);
5938
1.09k
  return(XML_ATTRIBUTE_CDATA);
5939
24.4k
     } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5940
200
  SKIP(6);
5941
200
  return(XML_ATTRIBUTE_IDREFS);
5942
24.2k
     } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5943
99
  SKIP(5);
5944
99
  return(XML_ATTRIBUTE_IDREF);
5945
24.1k
     } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5946
20.7k
        SKIP(2);
5947
20.7k
  return(XML_ATTRIBUTE_ID);
5948
20.7k
     } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5949
11
  SKIP(6);
5950
11
  return(XML_ATTRIBUTE_ENTITY);
5951
3.45k
     } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5952
204
  SKIP(8);
5953
204
  return(XML_ATTRIBUTE_ENTITIES);
5954
3.25k
     } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5955
93
  SKIP(8);
5956
93
  return(XML_ATTRIBUTE_NMTOKENS);
5957
3.15k
     } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5958
205
  SKIP(7);
5959
205
  return(XML_ATTRIBUTE_NMTOKEN);
5960
205
     }
5961
2.95k
     return(xmlParseEnumeratedType(ctxt, tree));
5962
25.5k
}
5963
5964
/**
5965
 * xmlParseAttributeListDecl:
5966
 * @ctxt:  an XML parser context
5967
 *
5968
 * : parse the Attribute list def for an element
5969
 *
5970
 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5971
 *
5972
 * [53] AttDef ::= S Name S AttType S DefaultDecl
5973
 *
5974
 */
5975
void
5976
8.53k
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
5977
8.53k
    const xmlChar *elemName;
5978
8.53k
    const xmlChar *attrName;
5979
8.53k
    xmlEnumerationPtr tree;
5980
5981
8.53k
    if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5982
8.46k
  int inputid = ctxt->input->id;
5983
5984
8.46k
  SKIP(9);
5985
8.46k
  if (SKIP_BLANKS == 0) {
5986
4.24k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5987
4.24k
                     "Space required after '<!ATTLIST'\n");
5988
4.24k
  }
5989
8.46k
        elemName = xmlParseName(ctxt);
5990
8.46k
  if (elemName == NULL) {
5991
118
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5992
118
         "ATTLIST: no name for Element\n");
5993
118
      return;
5994
118
  }
5995
8.34k
  SKIP_BLANKS;
5996
8.34k
  GROW;
5997
29.4k
  while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
5998
26.8k
      int type;
5999
26.8k
      int def;
6000
26.8k
      xmlChar *defaultValue = NULL;
6001
6002
26.8k
      GROW;
6003
26.8k
            tree = NULL;
6004
26.8k
      attrName = xmlParseName(ctxt);
6005
26.8k
      if (attrName == NULL) {
6006
878
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6007
878
             "ATTLIST: no name for Attribute\n");
6008
878
    break;
6009
878
      }
6010
26.0k
      GROW;
6011
26.0k
      if (SKIP_BLANKS == 0) {
6012
438
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6013
438
            "Space required after the attribute name\n");
6014
438
    break;
6015
438
      }
6016
6017
25.5k
      type = xmlParseAttributeType(ctxt, &tree);
6018
25.5k
      if (type <= 0) {
6019
1.40k
          break;
6020
1.40k
      }
6021
6022
24.1k
      GROW;
6023
24.1k
      if (SKIP_BLANKS == 0) {
6024
1.33k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6025
1.33k
             "Space required after the attribute type\n");
6026
1.33k
          if (tree != NULL)
6027
772
        xmlFreeEnumeration(tree);
6028
1.33k
    break;
6029
1.33k
      }
6030
6031
22.8k
      def = xmlParseDefaultDecl(ctxt, &defaultValue);
6032
22.8k
      if (def <= 0) {
6033
0
                if (defaultValue != NULL)
6034
0
        xmlFree(defaultValue);
6035
0
          if (tree != NULL)
6036
0
        xmlFreeEnumeration(tree);
6037
0
          break;
6038
0
      }
6039
22.8k
      if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6040
20.4k
          xmlAttrNormalizeSpace(defaultValue, defaultValue);
6041
6042
22.8k
      GROW;
6043
22.8k
            if (RAW != '>') {
6044
20.3k
    if (SKIP_BLANKS == 0) {
6045
1.76k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6046
1.76k
      "Space required after the attribute default value\n");
6047
1.76k
        if (defaultValue != NULL)
6048
946
      xmlFree(defaultValue);
6049
1.76k
        if (tree != NULL)
6050
484
      xmlFreeEnumeration(tree);
6051
1.76k
        break;
6052
1.76k
    }
6053
20.3k
      }
6054
21.0k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6055
21.0k
    (ctxt->sax->attributeDecl != NULL))
6056
0
    ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6057
0
                          type, def, defaultValue, tree);
6058
21.0k
      else if (tree != NULL)
6059
290
    xmlFreeEnumeration(tree);
6060
6061
21.0k
      if ((ctxt->sax2) && (defaultValue != NULL) &&
6062
21.0k
          (def != XML_ATTRIBUTE_IMPLIED) &&
6063
21.0k
    (def != XML_ATTRIBUTE_REQUIRED)) {
6064
20.5k
    xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6065
20.5k
      }
6066
21.0k
      if (ctxt->sax2) {
6067
21.0k
    xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6068
21.0k
      }
6069
21.0k
      if (defaultValue != NULL)
6070
20.5k
          xmlFree(defaultValue);
6071
21.0k
      GROW;
6072
21.0k
  }
6073
8.34k
  if (RAW == '>') {
6074
2.57k
      if (inputid != ctxt->input->id) {
6075
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6076
0
                               "Attribute list declaration doesn't start and"
6077
0
                               " stop in the same entity\n");
6078
0
      }
6079
2.57k
      NEXT;
6080
2.57k
  }
6081
8.34k
    }
6082
8.53k
}
6083
6084
/**
6085
 * xmlParseElementMixedContentDecl:
6086
 * @ctxt:  an XML parser context
6087
 * @inputchk:  the input used for the current entity, needed for boundary checks
6088
 *
6089
 * parse the declaration for a Mixed Element content
6090
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6091
 *
6092
 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6093
 *                '(' S? '#PCDATA' S? ')'
6094
 *
6095
 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6096
 *
6097
 * [ VC: No Duplicate Types ]
6098
 * The same name must not appear more than once in a single
6099
 * mixed-content declaration.
6100
 *
6101
 * returns: the list of the xmlElementContentPtr describing the element choices
6102
 */
6103
xmlElementContentPtr
6104
1.87k
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6105
1.87k
    xmlElementContentPtr ret = NULL, cur = NULL, n;
6106
1.87k
    const xmlChar *elem = NULL;
6107
6108
1.87k
    GROW;
6109
1.87k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6110
1.87k
  SKIP(7);
6111
1.87k
  SKIP_BLANKS;
6112
1.87k
  SHRINK;
6113
1.87k
  if (RAW == ')') {
6114
555
      if (ctxt->input->id != inputchk) {
6115
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6116
0
                               "Element content declaration doesn't start and"
6117
0
                               " stop in the same entity\n");
6118
0
      }
6119
555
      NEXT;
6120
555
      ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6121
555
      if (ret == NULL)
6122
0
          return(NULL);
6123
555
      if (RAW == '*') {
6124
96
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6125
96
    NEXT;
6126
96
      }
6127
555
      return(ret);
6128
555
  }
6129
1.32k
  if ((RAW == '(') || (RAW == '|')) {
6130
842
      ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6131
842
      if (ret == NULL) return(NULL);
6132
842
  }
6133
2.43k
  while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6134
1.52k
      NEXT;
6135
1.52k
      if (elem == NULL) {
6136
840
          ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6137
840
    if (ret == NULL) {
6138
0
        xmlFreeDocElementContent(ctxt->myDoc, cur);
6139
0
                    return(NULL);
6140
0
                }
6141
840
    ret->c1 = cur;
6142
840
    if (cur != NULL)
6143
840
        cur->parent = ret;
6144
840
    cur = ret;
6145
840
      } else {
6146
682
          n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6147
682
    if (n == NULL) {
6148
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6149
0
                    return(NULL);
6150
0
                }
6151
682
    n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6152
682
    if (n->c1 != NULL)
6153
682
        n->c1->parent = n;
6154
682
          cur->c2 = n;
6155
682
    if (n != NULL)
6156
682
        n->parent = cur;
6157
682
    cur = n;
6158
682
      }
6159
1.52k
      SKIP_BLANKS;
6160
1.52k
      elem = xmlParseName(ctxt);
6161
1.52k
      if (elem == NULL) {
6162
409
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6163
409
      "xmlParseElementMixedContentDecl : Name expected\n");
6164
409
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6165
409
    return(NULL);
6166
409
      }
6167
1.11k
      SKIP_BLANKS;
6168
1.11k
      GROW;
6169
1.11k
  }
6170
912
  if ((RAW == ')') && (NXT(1) == '*')) {
6171
287
      if (elem != NULL) {
6172
287
    cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6173
287
                                   XML_ELEMENT_CONTENT_ELEMENT);
6174
287
    if (cur->c2 != NULL)
6175
287
        cur->c2->parent = cur;
6176
287
            }
6177
287
            if (ret != NULL)
6178
287
                ret->ocur = XML_ELEMENT_CONTENT_MULT;
6179
287
      if (ctxt->input->id != inputchk) {
6180
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6181
0
                               "Element content declaration doesn't start and"
6182
0
                               " stop in the same entity\n");
6183
0
      }
6184
287
      SKIP(2);
6185
625
  } else {
6186
625
      xmlFreeDocElementContent(ctxt->myDoc, ret);
6187
625
      xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6188
625
      return(NULL);
6189
625
  }
6190
6191
912
    } else {
6192
0
  xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6193
0
    }
6194
287
    return(ret);
6195
1.87k
}
6196
6197
/**
6198
 * xmlParseElementChildrenContentDeclPriv:
6199
 * @ctxt:  an XML parser context
6200
 * @inputchk:  the input used for the current entity, needed for boundary checks
6201
 * @depth: the level of recursion
6202
 *
6203
 * parse the declaration for a Mixed Element content
6204
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6205
 *
6206
 *
6207
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6208
 *
6209
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6210
 *
6211
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6212
 *
6213
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6214
 *
6215
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6216
 * TODO Parameter-entity replacement text must be properly nested
6217
 *  with parenthesized groups. That is to say, if either of the
6218
 *  opening or closing parentheses in a choice, seq, or Mixed
6219
 *  construct is contained in the replacement text for a parameter
6220
 *  entity, both must be contained in the same replacement text. For
6221
 *  interoperability, if a parameter-entity reference appears in a
6222
 *  choice, seq, or Mixed construct, its replacement text should not
6223
 *  be empty, and neither the first nor last non-blank character of
6224
 *  the replacement text should be a connector (| or ,).
6225
 *
6226
 * Returns the tree of xmlElementContentPtr describing the element
6227
 *          hierarchy.
6228
 */
6229
static xmlElementContentPtr
6230
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6231
54.2k
                                       int depth) {
6232
54.2k
    xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6233
54.2k
    const xmlChar *elem;
6234
54.2k
    xmlChar type = 0;
6235
6236
54.2k
    if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6237
54.2k
        (depth >  2048)) {
6238
2
        xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6239
2
"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6240
2
                          depth);
6241
2
  return(NULL);
6242
2
    }
6243
54.2k
    SKIP_BLANKS;
6244
54.2k
    GROW;
6245
54.2k
    if (RAW == '(') {
6246
43.7k
  int inputid = ctxt->input->id;
6247
6248
        /* Recurse on first child */
6249
43.7k
  NEXT;
6250
43.7k
  SKIP_BLANKS;
6251
43.7k
        cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6252
43.7k
                                                           depth + 1);
6253
43.7k
        if (cur == NULL)
6254
40.1k
            return(NULL);
6255
3.53k
  SKIP_BLANKS;
6256
3.53k
  GROW;
6257
10.5k
    } else {
6258
10.5k
  elem = xmlParseName(ctxt);
6259
10.5k
  if (elem == NULL) {
6260
266
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6261
266
      return(NULL);
6262
266
  }
6263
10.2k
        cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6264
10.2k
  if (cur == NULL) {
6265
0
      xmlErrMemory(ctxt, NULL);
6266
0
      return(NULL);
6267
0
  }
6268
10.2k
  GROW;
6269
10.2k
  if (RAW == '?') {
6270
3.77k
      cur->ocur = XML_ELEMENT_CONTENT_OPT;
6271
3.77k
      NEXT;
6272
6.51k
  } else if (RAW == '*') {
6273
283
      cur->ocur = XML_ELEMENT_CONTENT_MULT;
6274
283
      NEXT;
6275
6.23k
  } else if (RAW == '+') {
6276
352
      cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6277
352
      NEXT;
6278
5.88k
  } else {
6279
5.88k
      cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6280
5.88k
  }
6281
10.2k
  GROW;
6282
10.2k
    }
6283
13.8k
    SKIP_BLANKS;
6284
13.8k
    SHRINK;
6285
21.7k
    while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6286
        /*
6287
   * Each loop we parse one separator and one element.
6288
   */
6289
14.5k
        if (RAW == ',') {
6290
2.11k
      if (type == 0) type = CUR;
6291
6292
      /*
6293
       * Detect "Name | Name , Name" error
6294
       */
6295
1.45k
      else if (type != CUR) {
6296
1
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6297
1
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6298
1
                      type);
6299
1
    if ((last != NULL) && (last != ret))
6300
1
        xmlFreeDocElementContent(ctxt->myDoc, last);
6301
1
    if (ret != NULL)
6302
1
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6303
1
    return(NULL);
6304
1
      }
6305
2.11k
      NEXT;
6306
6307
2.11k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6308
2.11k
      if (op == NULL) {
6309
0
    if ((last != NULL) && (last != ret))
6310
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6311
0
          xmlFreeDocElementContent(ctxt->myDoc, ret);
6312
0
    return(NULL);
6313
0
      }
6314
2.11k
      if (last == NULL) {
6315
663
    op->c1 = ret;
6316
663
    if (ret != NULL)
6317
663
        ret->parent = op;
6318
663
    ret = cur = op;
6319
1.45k
      } else {
6320
1.45k
          cur->c2 = op;
6321
1.45k
    if (op != NULL)
6322
1.45k
        op->parent = cur;
6323
1.45k
    op->c1 = last;
6324
1.45k
    if (last != NULL)
6325
1.45k
        last->parent = op;
6326
1.45k
    cur =op;
6327
1.45k
    last = NULL;
6328
1.45k
      }
6329
12.3k
  } else if (RAW == '|') {
6330
10.5k
      if (type == 0) type = CUR;
6331
6332
      /*
6333
       * Detect "Name , Name | Name" error
6334
       */
6335
3.31k
      else if (type != CUR) {
6336
2
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6337
2
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6338
2
          type);
6339
2
    if ((last != NULL) && (last != ret))
6340
2
        xmlFreeDocElementContent(ctxt->myDoc, last);
6341
2
    if (ret != NULL)
6342
2
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6343
2
    return(NULL);
6344
2
      }
6345
10.5k
      NEXT;
6346
6347
10.5k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6348
10.5k
      if (op == NULL) {
6349
0
    if ((last != NULL) && (last != ret))
6350
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6351
0
    if (ret != NULL)
6352
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6353
0
    return(NULL);
6354
0
      }
6355
10.5k
      if (last == NULL) {
6356
7.28k
    op->c1 = ret;
6357
7.28k
    if (ret != NULL)
6358
7.28k
        ret->parent = op;
6359
7.28k
    ret = cur = op;
6360
7.28k
      } else {
6361
3.30k
          cur->c2 = op;
6362
3.30k
    if (op != NULL)
6363
3.30k
        op->parent = cur;
6364
3.30k
    op->c1 = last;
6365
3.30k
    if (last != NULL)
6366
3.30k
        last->parent = op;
6367
3.30k
    cur =op;
6368
3.30k
    last = NULL;
6369
3.30k
      }
6370
10.5k
  } else {
6371
1.80k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6372
1.80k
      if ((last != NULL) && (last != ret))
6373
669
          xmlFreeDocElementContent(ctxt->myDoc, last);
6374
1.80k
      if (ret != NULL)
6375
1.80k
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6376
1.80k
      return(NULL);
6377
1.80k
  }
6378
12.7k
  GROW;
6379
12.7k
  SKIP_BLANKS;
6380
12.7k
  GROW;
6381
12.7k
  if (RAW == '(') {
6382
7.65k
      int inputid = ctxt->input->id;
6383
      /* Recurse on second child */
6384
7.65k
      NEXT;
6385
7.65k
      SKIP_BLANKS;
6386
7.65k
      last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6387
7.65k
                                                          depth + 1);
6388
7.65k
            if (last == NULL) {
6389
4.29k
    if (ret != NULL)
6390
4.29k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6391
4.29k
    return(NULL);
6392
4.29k
            }
6393
3.35k
      SKIP_BLANKS;
6394
5.05k
  } else {
6395
5.05k
      elem = xmlParseName(ctxt);
6396
5.05k
      if (elem == NULL) {
6397
535
    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6398
535
    if (ret != NULL)
6399
535
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6400
535
    return(NULL);
6401
535
      }
6402
4.52k
      last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6403
4.52k
      if (last == NULL) {
6404
0
    if (ret != NULL)
6405
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6406
0
    return(NULL);
6407
0
      }
6408
4.52k
      if (RAW == '?') {
6409
693
    last->ocur = XML_ELEMENT_CONTENT_OPT;
6410
693
    NEXT;
6411
3.82k
      } else if (RAW == '*') {
6412
461
    last->ocur = XML_ELEMENT_CONTENT_MULT;
6413
461
    NEXT;
6414
3.36k
      } else if (RAW == '+') {
6415
209
    last->ocur = XML_ELEMENT_CONTENT_PLUS;
6416
209
    NEXT;
6417
3.15k
      } else {
6418
3.15k
    last->ocur = XML_ELEMENT_CONTENT_ONCE;
6419
3.15k
      }
6420
4.52k
  }
6421
7.87k
  SKIP_BLANKS;
6422
7.87k
  GROW;
6423
7.87k
    }
6424
7.19k
    if ((cur != NULL) && (last != NULL)) {
6425
2.44k
        cur->c2 = last;
6426
2.44k
  if (last != NULL)
6427
2.44k
      last->parent = cur;
6428
2.44k
    }
6429
7.19k
    if (ctxt->input->id != inputchk) {
6430
0
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6431
0
                       "Element content declaration doesn't start and stop in"
6432
0
                       " the same entity\n");
6433
0
    }
6434
7.19k
    NEXT;
6435
7.19k
    if (RAW == '?') {
6436
688
  if (ret != NULL) {
6437
688
      if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6438
688
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6439
316
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6440
372
      else
6441
372
          ret->ocur = XML_ELEMENT_CONTENT_OPT;
6442
688
  }
6443
688
  NEXT;
6444
6.50k
    } else if (RAW == '*') {
6445
731
  if (ret != NULL) {
6446
731
      ret->ocur = XML_ELEMENT_CONTENT_MULT;
6447
731
      cur = ret;
6448
      /*
6449
       * Some normalization:
6450
       * (a | b* | c?)* == (a | b | c)*
6451
       */
6452
1.92k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6453
1.19k
    if ((cur->c1 != NULL) &&
6454
1.19k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6455
1.19k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6456
526
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6457
1.19k
    if ((cur->c2 != NULL) &&
6458
1.19k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6459
1.19k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6460
423
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6461
1.19k
    cur = cur->c2;
6462
1.19k
      }
6463
731
  }
6464
731
  NEXT;
6465
5.77k
    } else if (RAW == '+') {
6466
4.38k
  if (ret != NULL) {
6467
4.38k
      int found = 0;
6468
6469
4.38k
      if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6470
4.38k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6471
2.79k
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6472
1.59k
      else
6473
1.59k
          ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6474
      /*
6475
       * Some normalization:
6476
       * (a | b*)+ == (a | b)*
6477
       * (a | b?)+ == (a | b)*
6478
       */
6479
11.5k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6480
7.14k
    if ((cur->c1 != NULL) &&
6481
7.14k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6482
7.14k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6483
1.26k
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6484
1.26k
        found = 1;
6485
1.26k
    }
6486
7.14k
    if ((cur->c2 != NULL) &&
6487
7.14k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6488
7.14k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6489
957
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6490
957
        found = 1;
6491
957
    }
6492
7.14k
    cur = cur->c2;
6493
7.14k
      }
6494
4.38k
      if (found)
6495
1.20k
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6496
4.38k
  }
6497
4.38k
  NEXT;
6498
4.38k
    }
6499
7.19k
    return(ret);
6500
13.8k
}
6501
6502
/**
6503
 * xmlParseElementChildrenContentDecl:
6504
 * @ctxt:  an XML parser context
6505
 * @inputchk:  the input used for the current entity, needed for boundary checks
6506
 *
6507
 * parse the declaration for a Mixed Element content
6508
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6509
 *
6510
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6511
 *
6512
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6513
 *
6514
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6515
 *
6516
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6517
 *
6518
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6519
 * TODO Parameter-entity replacement text must be properly nested
6520
 *  with parenthesized groups. That is to say, if either of the
6521
 *  opening or closing parentheses in a choice, seq, or Mixed
6522
 *  construct is contained in the replacement text for a parameter
6523
 *  entity, both must be contained in the same replacement text. For
6524
 *  interoperability, if a parameter-entity reference appears in a
6525
 *  choice, seq, or Mixed construct, its replacement text should not
6526
 *  be empty, and neither the first nor last non-blank character of
6527
 *  the replacement text should be a connector (| or ,).
6528
 *
6529
 * Returns the tree of xmlElementContentPtr describing the element
6530
 *          hierarchy.
6531
 */
6532
xmlElementContentPtr
6533
0
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6534
    /* stub left for API/ABI compat */
6535
0
    return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6536
0
}
6537
6538
/**
6539
 * xmlParseElementContentDecl:
6540
 * @ctxt:  an XML parser context
6541
 * @name:  the name of the element being defined.
6542
 * @result:  the Element Content pointer will be stored here if any
6543
 *
6544
 * parse the declaration for an Element content either Mixed or Children,
6545
 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6546
 *
6547
 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6548
 *
6549
 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6550
 */
6551
6552
int
6553
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6554
4.79k
                           xmlElementContentPtr *result) {
6555
6556
4.79k
    xmlElementContentPtr tree = NULL;
6557
4.79k
    int inputid = ctxt->input->id;
6558
4.79k
    int res;
6559
6560
4.79k
    *result = NULL;
6561
6562
4.79k
    if (RAW != '(') {
6563
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6564
0
    "xmlParseElementContentDecl : %s '(' expected\n", name);
6565
0
  return(-1);
6566
0
    }
6567
4.79k
    NEXT;
6568
4.79k
    GROW;
6569
4.79k
    if (ctxt->instate == XML_PARSER_EOF)
6570
0
        return(-1);
6571
4.79k
    SKIP_BLANKS;
6572
4.79k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6573
1.87k
        tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6574
1.87k
  res = XML_ELEMENT_TYPE_MIXED;
6575
2.91k
    } else {
6576
2.91k
        tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6577
2.91k
  res = XML_ELEMENT_TYPE_ELEMENT;
6578
2.91k
    }
6579
4.79k
    SKIP_BLANKS;
6580
4.79k
    *result = tree;
6581
4.79k
    return(res);
6582
4.79k
}
6583
6584
/**
6585
 * xmlParseElementDecl:
6586
 * @ctxt:  an XML parser context
6587
 *
6588
 * parse an Element declaration.
6589
 *
6590
 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6591
 *
6592
 * [ VC: Unique Element Type Declaration ]
6593
 * No element type may be declared more than once
6594
 *
6595
 * Returns the type of the element, or -1 in case of error
6596
 */
6597
int
6598
6.10k
xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6599
6.10k
    const xmlChar *name;
6600
6.10k
    int ret = -1;
6601
6.10k
    xmlElementContentPtr content  = NULL;
6602
6603
    /* GROW; done in the caller */
6604
6.10k
    if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6605
6.04k
  int inputid = ctxt->input->id;
6606
6607
6.04k
  SKIP(9);
6608
6.04k
  if (SKIP_BLANKS == 0) {
6609
124
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6610
124
               "Space required after 'ELEMENT'\n");
6611
124
      return(-1);
6612
124
  }
6613
5.92k
        name = xmlParseName(ctxt);
6614
5.92k
  if (name == NULL) {
6615
346
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6616
346
         "xmlParseElementDecl: no name for Element\n");
6617
346
      return(-1);
6618
346
  }
6619
5.57k
  if (SKIP_BLANKS == 0) {
6620
4.61k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6621
4.61k
         "Space required after the element name\n");
6622
4.61k
  }
6623
5.57k
  if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6624
11
      SKIP(5);
6625
      /*
6626
       * Element must always be empty.
6627
       */
6628
11
      ret = XML_ELEMENT_TYPE_EMPTY;
6629
5.56k
  } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6630
5.56k
             (NXT(2) == 'Y')) {
6631
67
      SKIP(3);
6632
      /*
6633
       * Element is a generic container.
6634
       */
6635
67
      ret = XML_ELEMENT_TYPE_ANY;
6636
5.49k
  } else if (RAW == '(') {
6637
4.79k
      ret = xmlParseElementContentDecl(ctxt, name, &content);
6638
4.79k
  } else {
6639
      /*
6640
       * [ WFC: PEs in Internal Subset ] error handling.
6641
       */
6642
709
      if ((RAW == '%') && (ctxt->external == 0) &&
6643
709
          (ctxt->inputNr == 1)) {
6644
67
    xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6645
67
    "PEReference: forbidden within markup decl in internal subset\n");
6646
642
      } else {
6647
642
    xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6648
642
          "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6649
642
            }
6650
709
      return(-1);
6651
709
  }
6652
6653
4.86k
  SKIP_BLANKS;
6654
6655
4.86k
  if (RAW != '>') {
6656
4.33k
      xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6657
4.33k
      if (content != NULL) {
6658
703
    xmlFreeDocElementContent(ctxt->myDoc, content);
6659
703
      }
6660
4.33k
  } else {
6661
538
      if (inputid != ctxt->input->id) {
6662
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6663
0
                               "Element declaration doesn't start and stop in"
6664
0
                               " the same entity\n");
6665
0
      }
6666
6667
538
      NEXT;
6668
538
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6669
538
    (ctxt->sax->elementDecl != NULL)) {
6670
0
    if (content != NULL)
6671
0
        content->parent = NULL;
6672
0
          ctxt->sax->elementDecl(ctxt->userData, name, ret,
6673
0
                           content);
6674
0
    if ((content != NULL) && (content->parent == NULL)) {
6675
        /*
6676
         * this is a trick: if xmlAddElementDecl is called,
6677
         * instead of copying the full tree it is plugged directly
6678
         * if called from the parser. Avoid duplicating the
6679
         * interfaces or change the API/ABI
6680
         */
6681
0
        xmlFreeDocElementContent(ctxt->myDoc, content);
6682
0
    }
6683
538
      } else if (content != NULL) {
6684
446
    xmlFreeDocElementContent(ctxt->myDoc, content);
6685
446
      }
6686
538
  }
6687
4.86k
    }
6688
4.92k
    return(ret);
6689
6.10k
}
6690
6691
/**
6692
 * xmlParseConditionalSections
6693
 * @ctxt:  an XML parser context
6694
 *
6695
 * [61] conditionalSect ::= includeSect | ignoreSect
6696
 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6697
 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6698
 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6699
 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6700
 */
6701
6702
static void
6703
0
xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6704
0
    int *inputIds = NULL;
6705
0
    size_t inputIdsSize = 0;
6706
0
    size_t depth = 0;
6707
6708
0
    while (ctxt->instate != XML_PARSER_EOF) {
6709
0
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6710
0
            int id = ctxt->input->id;
6711
6712
0
            SKIP(3);
6713
0
            SKIP_BLANKS;
6714
6715
0
            if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6716
0
                SKIP(7);
6717
0
                SKIP_BLANKS;
6718
0
                if (RAW != '[') {
6719
0
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6720
0
                    xmlHaltParser(ctxt);
6721
0
                    goto error;
6722
0
                }
6723
0
                if (ctxt->input->id != id) {
6724
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6725
0
                                   "All markup of the conditional section is"
6726
0
                                   " not in the same entity\n");
6727
0
                }
6728
0
                NEXT;
6729
6730
0
                if (inputIdsSize <= depth) {
6731
0
                    int *tmp;
6732
6733
0
                    inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2);
6734
0
                    tmp = (int *) xmlRealloc(inputIds,
6735
0
                            inputIdsSize * sizeof(int));
6736
0
                    if (tmp == NULL) {
6737
0
                        xmlErrMemory(ctxt, NULL);
6738
0
                        goto error;
6739
0
                    }
6740
0
                    inputIds = tmp;
6741
0
                }
6742
0
                inputIds[depth] = id;
6743
0
                depth++;
6744
0
            } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6745
0
                int state;
6746
0
                xmlParserInputState instate;
6747
0
                size_t ignoreDepth = 0;
6748
6749
0
                SKIP(6);
6750
0
                SKIP_BLANKS;
6751
0
                if (RAW != '[') {
6752
0
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6753
0
                    xmlHaltParser(ctxt);
6754
0
                    goto error;
6755
0
                }
6756
0
                if (ctxt->input->id != id) {
6757
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6758
0
                                   "All markup of the conditional section is"
6759
0
                                   " not in the same entity\n");
6760
0
                }
6761
0
                NEXT;
6762
6763
                /*
6764
                 * Parse up to the end of the conditional section but disable
6765
                 * SAX event generating DTD building in the meantime
6766
                 */
6767
0
                state = ctxt->disableSAX;
6768
0
                instate = ctxt->instate;
6769
0
                if (ctxt->recovery == 0) ctxt->disableSAX = 1;
6770
0
                ctxt->instate = XML_PARSER_IGNORE;
6771
6772
0
                while (RAW != 0) {
6773
0
                    if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6774
0
                        SKIP(3);
6775
0
                        ignoreDepth++;
6776
                        /* Check for integer overflow */
6777
0
                        if (ignoreDepth == 0) {
6778
0
                            xmlErrMemory(ctxt, NULL);
6779
0
                            goto error;
6780
0
                        }
6781
0
                    } else if ((RAW == ']') && (NXT(1) == ']') &&
6782
0
                               (NXT(2) == '>')) {
6783
0
                        if (ignoreDepth == 0)
6784
0
                            break;
6785
0
                        SKIP(3);
6786
0
                        ignoreDepth--;
6787
0
                    } else {
6788
0
                        NEXT;
6789
0
                    }
6790
0
                }
6791
6792
0
                ctxt->disableSAX = state;
6793
0
                ctxt->instate = instate;
6794
6795
0
    if (RAW == 0) {
6796
0
        xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6797
0
                    goto error;
6798
0
    }
6799
0
                if (ctxt->input->id != id) {
6800
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6801
0
                                   "All markup of the conditional section is"
6802
0
                                   " not in the same entity\n");
6803
0
                }
6804
0
                SKIP(3);
6805
0
            } else {
6806
0
                xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6807
0
                xmlHaltParser(ctxt);
6808
0
                goto error;
6809
0
            }
6810
0
        } else if ((depth > 0) &&
6811
0
                   (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6812
0
            depth--;
6813
0
            if (ctxt->input->id != inputIds[depth]) {
6814
0
                xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6815
0
                               "All markup of the conditional section is not"
6816
0
                               " in the same entity\n");
6817
0
            }
6818
0
            SKIP(3);
6819
0
        } else {
6820
0
            int id = ctxt->input->id;
6821
0
            unsigned long cons = CUR_CONSUMED;
6822
6823
0
            xmlParseMarkupDecl(ctxt);
6824
6825
0
            if ((id == ctxt->input->id) && (cons == CUR_CONSUMED)) {
6826
0
                xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6827
0
                xmlHaltParser(ctxt);
6828
0
                goto error;
6829
0
            }
6830
0
        }
6831
6832
0
        if (depth == 0)
6833
0
            break;
6834
6835
0
        SKIP_BLANKS;
6836
0
        GROW;
6837
0
    }
6838
6839
0
error:
6840
0
    xmlFree(inputIds);
6841
0
}
6842
6843
/**
6844
 * xmlParseMarkupDecl:
6845
 * @ctxt:  an XML parser context
6846
 *
6847
 * parse Markup declarations
6848
 *
6849
 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6850
 *                     NotationDecl | PI | Comment
6851
 *
6852
 * [ VC: Proper Declaration/PE Nesting ]
6853
 * Parameter-entity replacement text must be properly nested with
6854
 * markup declarations. That is to say, if either the first character
6855
 * or the last character of a markup declaration (markupdecl above) is
6856
 * contained in the replacement text for a parameter-entity reference,
6857
 * both must be contained in the same replacement text.
6858
 *
6859
 * [ WFC: PEs in Internal Subset ]
6860
 * In the internal DTD subset, parameter-entity references can occur
6861
 * only where markup declarations can occur, not within markup declarations.
6862
 * (This does not apply to references that occur in external parameter
6863
 * entities or to the external subset.)
6864
 */
6865
void
6866
43.4k
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6867
43.4k
    GROW;
6868
43.4k
    if (CUR == '<') {
6869
40.0k
        if (NXT(1) == '!') {
6870
34.8k
      switch (NXT(2)) {
6871
22.2k
          case 'E':
6872
22.2k
        if (NXT(3) == 'L')
6873
6.10k
      xmlParseElementDecl(ctxt);
6874
16.1k
        else if (NXT(3) == 'N')
6875
16.1k
      xmlParseEntityDecl(ctxt);
6876
22.2k
        break;
6877
8.53k
          case 'A':
6878
8.53k
        xmlParseAttributeListDecl(ctxt);
6879
8.53k
        break;
6880
2.38k
          case 'N':
6881
2.38k
        xmlParseNotationDecl(ctxt);
6882
2.38k
        break;
6883
1.62k
          case '-':
6884
1.62k
        xmlParseComment(ctxt);
6885
1.62k
        break;
6886
63
    default:
6887
        /* there is an error but it will be detected later */
6888
63
        break;
6889
34.8k
      }
6890
34.8k
  } else if (NXT(1) == '?') {
6891
5.04k
      xmlParsePI(ctxt);
6892
5.04k
  }
6893
40.0k
    }
6894
6895
    /*
6896
     * detect requirement to exit there and act accordingly
6897
     * and avoid having instate overridden later on
6898
     */
6899
43.4k
    if (ctxt->instate == XML_PARSER_EOF)
6900
992
        return;
6901
6902
42.4k
    ctxt->instate = XML_PARSER_DTD;
6903
42.4k
}
6904
6905
/**
6906
 * xmlParseTextDecl:
6907
 * @ctxt:  an XML parser context
6908
 *
6909
 * parse an XML declaration header for external entities
6910
 *
6911
 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6912
 */
6913
6914
void
6915
0
xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6916
0
    xmlChar *version;
6917
0
    const xmlChar *encoding;
6918
0
    int oldstate;
6919
6920
    /*
6921
     * We know that '<?xml' is here.
6922
     */
6923
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6924
0
  SKIP(5);
6925
0
    } else {
6926
0
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6927
0
  return;
6928
0
    }
6929
6930
    /* Avoid expansion of parameter entities when skipping blanks. */
6931
0
    oldstate = ctxt->instate;
6932
0
    ctxt->instate = XML_PARSER_START;
6933
6934
0
    if (SKIP_BLANKS == 0) {
6935
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6936
0
           "Space needed after '<?xml'\n");
6937
0
    }
6938
6939
    /*
6940
     * We may have the VersionInfo here.
6941
     */
6942
0
    version = xmlParseVersionInfo(ctxt);
6943
0
    if (version == NULL)
6944
0
  version = xmlCharStrdup(XML_DEFAULT_VERSION);
6945
0
    else {
6946
0
  if (SKIP_BLANKS == 0) {
6947
0
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6948
0
               "Space needed here\n");
6949
0
  }
6950
0
    }
6951
0
    ctxt->input->version = version;
6952
6953
    /*
6954
     * We must have the encoding declaration
6955
     */
6956
0
    encoding = xmlParseEncodingDecl(ctxt);
6957
0
    if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6958
  /*
6959
   * The XML REC instructs us to stop parsing right here
6960
   */
6961
0
        ctxt->instate = oldstate;
6962
0
        return;
6963
0
    }
6964
0
    if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6965
0
  xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6966
0
           "Missing encoding in text declaration\n");
6967
0
    }
6968
6969
0
    SKIP_BLANKS;
6970
0
    if ((RAW == '?') && (NXT(1) == '>')) {
6971
0
        SKIP(2);
6972
0
    } else if (RAW == '>') {
6973
        /* Deprecated old WD ... */
6974
0
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6975
0
  NEXT;
6976
0
    } else {
6977
0
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6978
0
  MOVETO_ENDTAG(CUR_PTR);
6979
0
  NEXT;
6980
0
    }
6981
6982
0
    ctxt->instate = oldstate;
6983
0
}
6984
6985
/**
6986
 * xmlParseExternalSubset:
6987
 * @ctxt:  an XML parser context
6988
 * @ExternalID: the external identifier
6989
 * @SystemID: the system identifier (or URL)
6990
 *
6991
 * parse Markup declarations from an external subset
6992
 *
6993
 * [30] extSubset ::= textDecl? extSubsetDecl
6994
 *
6995
 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6996
 */
6997
void
6998
xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6999
0
                       const xmlChar *SystemID) {
7000
0
    xmlDetectSAX2(ctxt);
7001
0
    GROW;
7002
7003
0
    if ((ctxt->encoding == NULL) &&
7004
0
        (ctxt->input->end - ctxt->input->cur >= 4)) {
7005
0
        xmlChar start[4];
7006
0
  xmlCharEncoding enc;
7007
7008
0
  start[0] = RAW;
7009
0
  start[1] = NXT(1);
7010
0
  start[2] = NXT(2);
7011
0
  start[3] = NXT(3);
7012
0
  enc = xmlDetectCharEncoding(start, 4);
7013
0
  if (enc != XML_CHAR_ENCODING_NONE)
7014
0
      xmlSwitchEncoding(ctxt, enc);
7015
0
    }
7016
7017
0
    if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7018
0
  xmlParseTextDecl(ctxt);
7019
0
  if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7020
      /*
7021
       * The XML REC instructs us to stop parsing right here
7022
       */
7023
0
      xmlHaltParser(ctxt);
7024
0
      return;
7025
0
  }
7026
0
    }
7027
0
    if (ctxt->myDoc == NULL) {
7028
0
        ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7029
0
  if (ctxt->myDoc == NULL) {
7030
0
      xmlErrMemory(ctxt, "New Doc failed");
7031
0
      return;
7032
0
  }
7033
0
  ctxt->myDoc->properties = XML_DOC_INTERNAL;
7034
0
    }
7035
0
    if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7036
0
        xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7037
7038
0
    ctxt->instate = XML_PARSER_DTD;
7039
0
    ctxt->external = 1;
7040
0
    SKIP_BLANKS;
7041
0
    while (((RAW == '<') && (NXT(1) == '?')) ||
7042
0
           ((RAW == '<') && (NXT(1) == '!')) ||
7043
0
     (RAW == '%')) {
7044
0
  int id = ctxt->input->id;
7045
0
  unsigned long cons = CUR_CONSUMED;
7046
7047
0
  GROW;
7048
0
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7049
0
      xmlParseConditionalSections(ctxt);
7050
0
  } else
7051
0
      xmlParseMarkupDecl(ctxt);
7052
0
        SKIP_BLANKS;
7053
7054
0
  if ((id == ctxt->input->id) && (cons == CUR_CONSUMED)) {
7055
0
      xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7056
0
      break;
7057
0
  }
7058
0
    }
7059
7060
0
    if (RAW != 0) {
7061
0
  xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7062
0
    }
7063
7064
0
}
7065
7066
/**
7067
 * xmlParseReference:
7068
 * @ctxt:  an XML parser context
7069
 *
7070
 * parse and handle entity references in content, depending on the SAX
7071
 * interface, this may end-up in a call to character() if this is a
7072
 * CharRef, a predefined entity, if there is no reference() callback.
7073
 * or if the parser was asked to switch to that mode.
7074
 *
7075
 * [67] Reference ::= EntityRef | CharRef
7076
 */
7077
void
7078
2.91k
xmlParseReference(xmlParserCtxtPtr ctxt) {
7079
2.91k
    xmlEntityPtr ent;
7080
2.91k
    xmlChar *val;
7081
2.91k
    int was_checked;
7082
2.91k
    xmlNodePtr list = NULL;
7083
2.91k
    xmlParserErrors ret = XML_ERR_OK;
7084
7085
7086
2.91k
    if (RAW != '&')
7087
0
        return;
7088
7089
    /*
7090
     * Simple case of a CharRef
7091
     */
7092
2.91k
    if (NXT(1) == '#') {
7093
485
  int i = 0;
7094
485
  xmlChar out[16];
7095
485
  int hex = NXT(2);
7096
485
  int value = xmlParseCharRef(ctxt);
7097
7098
485
  if (value == 0)
7099
136
      return;
7100
349
  if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7101
      /*
7102
       * So we are using non-UTF-8 buffers
7103
       * Check that the char fit on 8bits, if not
7104
       * generate a CharRef.
7105
       */
7106
0
      if (value <= 0xFF) {
7107
0
    out[0] = value;
7108
0
    out[1] = 0;
7109
0
    if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7110
0
        (!ctxt->disableSAX))
7111
0
        ctxt->sax->characters(ctxt->userData, out, 1);
7112
0
      } else {
7113
0
    if ((hex == 'x') || (hex == 'X'))
7114
0
        snprintf((char *)out, sizeof(out), "#x%X", value);
7115
0
    else
7116
0
        snprintf((char *)out, sizeof(out), "#%d", value);
7117
0
    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7118
0
        (!ctxt->disableSAX))
7119
0
        ctxt->sax->reference(ctxt->userData, out);
7120
0
      }
7121
349
  } else {
7122
      /*
7123
       * Just encode the value in UTF-8
7124
       */
7125
349
      COPY_BUF(0 ,out, i, value);
7126
349
      out[i] = 0;
7127
349
      if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7128
349
    (!ctxt->disableSAX))
7129
349
    ctxt->sax->characters(ctxt->userData, out, i);
7130
349
  }
7131
349
  return;
7132
485
    }
7133
7134
    /*
7135
     * We are seeing an entity reference
7136
     */
7137
2.43k
    ent = xmlParseEntityRef(ctxt);
7138
2.43k
    if (ent == NULL) return;
7139
2.35k
    if (!ctxt->wellFormed)
7140
1
  return;
7141
2.35k
    was_checked = ent->checked;
7142
7143
    /* special case of predefined entities */
7144
2.35k
    if ((ent->name == NULL) ||
7145
2.35k
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7146
2.35k
  val = ent->content;
7147
2.35k
  if (val == NULL) return;
7148
  /*
7149
   * inline the entity.
7150
   */
7151
933
  if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7152
933
      (!ctxt->disableSAX))
7153
933
      ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7154
933
  return;
7155
2.35k
    }
7156
7157
    /*
7158
     * The first reference to the entity trigger a parsing phase
7159
     * where the ent->children is filled with the result from
7160
     * the parsing.
7161
     * Note: external parsed entities will not be loaded, it is not
7162
     * required for a non-validating parser, unless the parsing option
7163
     * of validating, or substituting entities were given. Doing so is
7164
     * far more secure as the parser will only process data coming from
7165
     * the document entity by default.
7166
     */
7167
0
    if (((ent->checked == 0) ||
7168
0
         ((ent->children == NULL) && (ctxt->options & XML_PARSE_NOENT))) &&
7169
0
        ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7170
0
         (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7171
0
  unsigned long oldnbent = ctxt->nbentities, diff;
7172
7173
  /*
7174
   * This is a bit hackish but this seems the best
7175
   * way to make sure both SAX and DOM entity support
7176
   * behaves okay.
7177
   */
7178
0
  void *user_data;
7179
0
  if (ctxt->userData == ctxt)
7180
0
      user_data = NULL;
7181
0
  else
7182
0
      user_data = ctxt->userData;
7183
7184
  /*
7185
   * Check that this entity is well formed
7186
   * 4.3.2: An internal general parsed entity is well-formed
7187
   * if its replacement text matches the production labeled
7188
   * content.
7189
   */
7190
0
  if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7191
0
      ctxt->depth++;
7192
0
      ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7193
0
                                                user_data, &list);
7194
0
      ctxt->depth--;
7195
7196
0
  } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7197
0
      ctxt->depth++;
7198
0
      ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7199
0
                                     user_data, ctxt->depth, ent->URI,
7200
0
             ent->ExternalID, &list);
7201
0
      ctxt->depth--;
7202
0
  } else {
7203
0
      ret = XML_ERR_ENTITY_PE_INTERNAL;
7204
0
      xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7205
0
       "invalid entity type found\n", NULL);
7206
0
  }
7207
7208
  /*
7209
   * Store the number of entities needing parsing for this entity
7210
   * content and do checkings
7211
   */
7212
0
        diff = ctxt->nbentities - oldnbent + 1;
7213
0
        if (diff > INT_MAX / 2)
7214
0
            diff = INT_MAX / 2;
7215
0
        ent->checked = diff * 2;
7216
0
  if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7217
0
      ent->checked |= 1;
7218
0
  if (ret == XML_ERR_ENTITY_LOOP) {
7219
0
      xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7220
0
            xmlHaltParser(ctxt);
7221
0
      xmlFreeNodeList(list);
7222
0
      return;
7223
0
  }
7224
0
  if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
7225
0
      xmlFreeNodeList(list);
7226
0
      return;
7227
0
  }
7228
7229
0
  if ((ret == XML_ERR_OK) && (list != NULL)) {
7230
0
      if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7231
0
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
7232
0
    (ent->children == NULL)) {
7233
0
    ent->children = list;
7234
                /*
7235
                 * Prune it directly in the generated document
7236
                 * except for single text nodes.
7237
                 */
7238
0
                if ((ctxt->replaceEntities == 0) ||
7239
0
                    (ctxt->parseMode == XML_PARSE_READER) ||
7240
0
                    ((list->type == XML_TEXT_NODE) &&
7241
0
                     (list->next == NULL))) {
7242
0
                    ent->owner = 1;
7243
0
                    while (list != NULL) {
7244
0
                        list->parent = (xmlNodePtr) ent;
7245
0
                        if (list->doc != ent->doc)
7246
0
                            xmlSetTreeDoc(list, ent->doc);
7247
0
                        if (list->next == NULL)
7248
0
                            ent->last = list;
7249
0
                        list = list->next;
7250
0
                    }
7251
0
                    list = NULL;
7252
0
                } else {
7253
0
                    ent->owner = 0;
7254
0
                    while (list != NULL) {
7255
0
                        list->parent = (xmlNodePtr) ctxt->node;
7256
0
                        list->doc = ctxt->myDoc;
7257
0
                        if (list->next == NULL)
7258
0
                            ent->last = list;
7259
0
                        list = list->next;
7260
0
                    }
7261
0
                    list = ent->children;
7262
#ifdef LIBXML_LEGACY_ENABLED
7263
                    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7264
                        xmlAddEntityReference(ent, list, NULL);
7265
#endif /* LIBXML_LEGACY_ENABLED */
7266
0
                }
7267
0
      } else {
7268
0
    xmlFreeNodeList(list);
7269
0
    list = NULL;
7270
0
      }
7271
0
  } else if ((ret != XML_ERR_OK) &&
7272
0
       (ret != XML_WAR_UNDECLARED_ENTITY)) {
7273
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7274
0
         "Entity '%s' failed to parse\n", ent->name);
7275
0
            if (ent->content != NULL)
7276
0
                ent->content[0] = 0;
7277
0
      xmlParserEntityCheck(ctxt, 0, ent, 0);
7278
0
  } else if (list != NULL) {
7279
0
      xmlFreeNodeList(list);
7280
0
      list = NULL;
7281
0
  }
7282
0
  if (ent->checked == 0)
7283
0
      ent->checked = 2;
7284
7285
        /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7286
0
        was_checked = 0;
7287
0
    } else if (ent->checked != 1) {
7288
0
  ctxt->nbentities += ent->checked / 2;
7289
0
    }
7290
7291
    /*
7292
     * Now that the entity content has been gathered
7293
     * provide it to the application, this can take different forms based
7294
     * on the parsing modes.
7295
     */
7296
0
    if (ent->children == NULL) {
7297
  /*
7298
   * Probably running in SAX mode and the callbacks don't
7299
   * build the entity content. So unless we already went
7300
   * though parsing for first checking go though the entity
7301
   * content to generate callbacks associated to the entity
7302
   */
7303
0
  if (was_checked != 0) {
7304
0
      void *user_data;
7305
      /*
7306
       * This is a bit hackish but this seems the best
7307
       * way to make sure both SAX and DOM entity support
7308
       * behaves okay.
7309
       */
7310
0
      if (ctxt->userData == ctxt)
7311
0
    user_data = NULL;
7312
0
      else
7313
0
    user_data = ctxt->userData;
7314
7315
0
      if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7316
0
    ctxt->depth++;
7317
0
    ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7318
0
           ent->content, user_data, NULL);
7319
0
    ctxt->depth--;
7320
0
      } else if (ent->etype ==
7321
0
           XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7322
0
    ctxt->depth++;
7323
0
    ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7324
0
         ctxt->sax, user_data, ctxt->depth,
7325
0
         ent->URI, ent->ExternalID, NULL);
7326
0
    ctxt->depth--;
7327
0
      } else {
7328
0
    ret = XML_ERR_ENTITY_PE_INTERNAL;
7329
0
    xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7330
0
           "invalid entity type found\n", NULL);
7331
0
      }
7332
0
      if (ret == XML_ERR_ENTITY_LOOP) {
7333
0
    xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7334
0
    return;
7335
0
      }
7336
0
  }
7337
0
  if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7338
0
      (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7339
      /*
7340
       * Entity reference callback comes second, it's somewhat
7341
       * superfluous but a compatibility to historical behaviour
7342
       */
7343
0
      ctxt->sax->reference(ctxt->userData, ent->name);
7344
0
  }
7345
0
  return;
7346
0
    }
7347
7348
    /*
7349
     * If we didn't get any children for the entity being built
7350
     */
7351
0
    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7352
0
  (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7353
  /*
7354
   * Create a node.
7355
   */
7356
0
  ctxt->sax->reference(ctxt->userData, ent->name);
7357
0
  return;
7358
0
    }
7359
7360
0
    if ((ctxt->replaceEntities) || (ent->children == NULL))  {
7361
  /*
7362
   * There is a problem on the handling of _private for entities
7363
   * (bug 155816): Should we copy the content of the field from
7364
   * the entity (possibly overwriting some value set by the user
7365
   * when a copy is created), should we leave it alone, or should
7366
   * we try to take care of different situations?  The problem
7367
   * is exacerbated by the usage of this field by the xmlReader.
7368
   * To fix this bug, we look at _private on the created node
7369
   * and, if it's NULL, we copy in whatever was in the entity.
7370
   * If it's not NULL we leave it alone.  This is somewhat of a
7371
   * hack - maybe we should have further tests to determine
7372
   * what to do.
7373
   */
7374
0
  if ((ctxt->node != NULL) && (ent->children != NULL)) {
7375
      /*
7376
       * Seems we are generating the DOM content, do
7377
       * a simple tree copy for all references except the first
7378
       * In the first occurrence list contains the replacement.
7379
       */
7380
0
      if (((list == NULL) && (ent->owner == 0)) ||
7381
0
    (ctxt->parseMode == XML_PARSE_READER)) {
7382
0
    xmlNodePtr nw = NULL, cur, firstChild = NULL;
7383
7384
    /*
7385
     * We are copying here, make sure there is no abuse
7386
     */
7387
0
    ctxt->sizeentcopy += ent->length + 5;
7388
0
    if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7389
0
        return;
7390
7391
    /*
7392
     * when operating on a reader, the entities definitions
7393
     * are always owning the entities subtree.
7394
    if (ctxt->parseMode == XML_PARSE_READER)
7395
        ent->owner = 1;
7396
     */
7397
7398
0
    cur = ent->children;
7399
0
    while (cur != NULL) {
7400
0
        nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7401
0
        if (nw != NULL) {
7402
0
      if (nw->_private == NULL)
7403
0
          nw->_private = cur->_private;
7404
0
      if (firstChild == NULL){
7405
0
          firstChild = nw;
7406
0
      }
7407
0
      nw = xmlAddChild(ctxt->node, nw);
7408
0
        }
7409
0
        if (cur == ent->last) {
7410
      /*
7411
       * needed to detect some strange empty
7412
       * node cases in the reader tests
7413
       */
7414
0
      if ((ctxt->parseMode == XML_PARSE_READER) &&
7415
0
          (nw != NULL) &&
7416
0
          (nw->type == XML_ELEMENT_NODE) &&
7417
0
          (nw->children == NULL))
7418
0
          nw->extra = 1;
7419
7420
0
      break;
7421
0
        }
7422
0
        cur = cur->next;
7423
0
    }
7424
#ifdef LIBXML_LEGACY_ENABLED
7425
    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7426
      xmlAddEntityReference(ent, firstChild, nw);
7427
#endif /* LIBXML_LEGACY_ENABLED */
7428
0
      } else if ((list == NULL) || (ctxt->inputNr > 0)) {
7429
0
    xmlNodePtr nw = NULL, cur, next, last,
7430
0
         firstChild = NULL;
7431
7432
    /*
7433
     * We are copying here, make sure there is no abuse
7434
     */
7435
0
    ctxt->sizeentcopy += ent->length + 5;
7436
0
    if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7437
0
        return;
7438
7439
    /*
7440
     * Copy the entity child list and make it the new
7441
     * entity child list. The goal is to make sure any
7442
     * ID or REF referenced will be the one from the
7443
     * document content and not the entity copy.
7444
     */
7445
0
    cur = ent->children;
7446
0
    ent->children = NULL;
7447
0
    last = ent->last;
7448
0
    ent->last = NULL;
7449
0
    while (cur != NULL) {
7450
0
        next = cur->next;
7451
0
        cur->next = NULL;
7452
0
        cur->parent = NULL;
7453
0
        nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7454
0
        if (nw != NULL) {
7455
0
      if (nw->_private == NULL)
7456
0
          nw->_private = cur->_private;
7457
0
      if (firstChild == NULL){
7458
0
          firstChild = cur;
7459
0
      }
7460
0
      xmlAddChild((xmlNodePtr) ent, nw);
7461
0
      xmlAddChild(ctxt->node, cur);
7462
0
        }
7463
0
        if (cur == last)
7464
0
      break;
7465
0
        cur = next;
7466
0
    }
7467
0
    if (ent->owner == 0)
7468
0
        ent->owner = 1;
7469
#ifdef LIBXML_LEGACY_ENABLED
7470
    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7471
      xmlAddEntityReference(ent, firstChild, nw);
7472
#endif /* LIBXML_LEGACY_ENABLED */
7473
0
      } else {
7474
0
    const xmlChar *nbktext;
7475
7476
    /*
7477
     * the name change is to avoid coalescing of the
7478
     * node with a possible previous text one which
7479
     * would make ent->children a dangling pointer
7480
     */
7481
0
    nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7482
0
          -1);
7483
0
    if (ent->children->type == XML_TEXT_NODE)
7484
0
        ent->children->name = nbktext;
7485
0
    if ((ent->last != ent->children) &&
7486
0
        (ent->last->type == XML_TEXT_NODE))
7487
0
        ent->last->name = nbktext;
7488
0
    xmlAddChildList(ctxt->node, ent->children);
7489
0
      }
7490
7491
      /*
7492
       * This is to avoid a nasty side effect, see
7493
       * characters() in SAX.c
7494
       */
7495
0
      ctxt->nodemem = 0;
7496
0
      ctxt->nodelen = 0;
7497
0
      return;
7498
0
  }
7499
0
    }
7500
0
}
7501
7502
/**
7503
 * xmlParseEntityRef:
7504
 * @ctxt:  an XML parser context
7505
 *
7506
 * parse ENTITY references declarations
7507
 *
7508
 * [68] EntityRef ::= '&' Name ';'
7509
 *
7510
 * [ WFC: Entity Declared ]
7511
 * In a document without any DTD, a document with only an internal DTD
7512
 * subset which contains no parameter entity references, or a document
7513
 * with "standalone='yes'", the Name given in the entity reference
7514
 * must match that in an entity declaration, except that well-formed
7515
 * documents need not declare any of the following entities: amp, lt,
7516
 * gt, apos, quot.  The declaration of a parameter entity must precede
7517
 * any reference to it.  Similarly, the declaration of a general entity
7518
 * must precede any reference to it which appears in a default value in an
7519
 * attribute-list declaration. Note that if entities are declared in the
7520
 * external subset or in external parameter entities, a non-validating
7521
 * processor is not obligated to read and process their declarations;
7522
 * for such documents, the rule that an entity must be declared is a
7523
 * well-formedness constraint only if standalone='yes'.
7524
 *
7525
 * [ WFC: Parsed Entity ]
7526
 * An entity reference must not contain the name of an unparsed entity
7527
 *
7528
 * Returns the xmlEntityPtr if found, or NULL otherwise.
7529
 */
7530
xmlEntityPtr
7531
276k
xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7532
276k
    const xmlChar *name;
7533
276k
    xmlEntityPtr ent = NULL;
7534
7535
276k
    GROW;
7536
276k
    if (ctxt->instate == XML_PARSER_EOF)
7537
0
        return(NULL);
7538
7539
276k
    if (RAW != '&')
7540
0
        return(NULL);
7541
276k
    NEXT;
7542
276k
    name = xmlParseName(ctxt);
7543
276k
    if (name == NULL) {
7544
176k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7545
176k
           "xmlParseEntityRef: no name\n");
7546
176k
        return(NULL);
7547
176k
    }
7548
99.1k
    if (RAW != ';') {
7549
52.4k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7550
52.4k
  return(NULL);
7551
52.4k
    }
7552
46.6k
    NEXT;
7553
7554
    /*
7555
     * Predefined entities override any extra definition
7556
     */
7557
46.6k
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7558
46.6k
        ent = xmlGetPredefinedEntity(name);
7559
46.6k
        if (ent != NULL)
7560
31.1k
            return(ent);
7561
46.6k
    }
7562
7563
    /*
7564
     * Increase the number of entity references parsed
7565
     */
7566
15.5k
    ctxt->nbentities++;
7567
7568
    /*
7569
     * Ask first SAX for entity resolution, otherwise try the
7570
     * entities which may have stored in the parser context.
7571
     */
7572
15.5k
    if (ctxt->sax != NULL) {
7573
15.5k
  if (ctxt->sax->getEntity != NULL)
7574
15.5k
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7575
15.5k
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7576
15.5k
      (ctxt->options & XML_PARSE_OLDSAX))
7577
0
      ent = xmlGetPredefinedEntity(name);
7578
15.5k
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7579
15.5k
      (ctxt->userData==ctxt)) {
7580
0
      ent = xmlSAX2GetEntity(ctxt, name);
7581
0
  }
7582
15.5k
    }
7583
15.5k
    if (ctxt->instate == XML_PARSER_EOF)
7584
0
  return(NULL);
7585
    /*
7586
     * [ WFC: Entity Declared ]
7587
     * In a document without any DTD, a document with only an
7588
     * internal DTD subset which contains no parameter entity
7589
     * references, or a document with "standalone='yes'", the
7590
     * Name given in the entity reference must match that in an
7591
     * entity declaration, except that well-formed documents
7592
     * need not declare any of the following entities: amp, lt,
7593
     * gt, apos, quot.
7594
     * The declaration of a parameter entity must precede any
7595
     * reference to it.
7596
     * Similarly, the declaration of a general entity must
7597
     * precede any reference to it which appears in a default
7598
     * value in an attribute-list declaration. Note that if
7599
     * entities are declared in the external subset or in
7600
     * external parameter entities, a non-validating processor
7601
     * is not obligated to read and process their declarations;
7602
     * for such documents, the rule that an entity must be
7603
     * declared is a well-formedness constraint only if
7604
     * standalone='yes'.
7605
     */
7606
15.5k
    if (ent == NULL) {
7607
0
  if ((ctxt->standalone == 1) ||
7608
0
      ((ctxt->hasExternalSubset == 0) &&
7609
0
       (ctxt->hasPErefs == 0))) {
7610
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7611
0
         "Entity '%s' not defined\n", name);
7612
0
  } else {
7613
0
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7614
0
         "Entity '%s' not defined\n", name);
7615
0
      if ((ctxt->inSubset == 0) &&
7616
0
    (ctxt->sax != NULL) &&
7617
0
    (ctxt->sax->reference != NULL)) {
7618
0
    ctxt->sax->reference(ctxt->userData, name);
7619
0
      }
7620
0
  }
7621
0
  xmlParserEntityCheck(ctxt, 0, ent, 0);
7622
0
  ctxt->valid = 0;
7623
0
    }
7624
7625
    /*
7626
     * [ WFC: Parsed Entity ]
7627
     * An entity reference must not contain the name of an
7628
     * unparsed entity
7629
     */
7630
15.5k
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7631
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7632
0
     "Entity reference to unparsed entity %s\n", name);
7633
0
    }
7634
7635
    /*
7636
     * [ WFC: No External Entity References ]
7637
     * Attribute values cannot contain direct or indirect
7638
     * entity references to external entities.
7639
     */
7640
15.5k
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7641
15.5k
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7642
14.1k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7643
14.1k
       "Attribute references external entity '%s'\n", name);
7644
14.1k
    }
7645
    /*
7646
     * [ WFC: No < in Attribute Values ]
7647
     * The replacement text of any entity referred to directly or
7648
     * indirectly in an attribute value (other than "&lt;") must
7649
     * not contain a <.
7650
     */
7651
1.41k
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7652
1.41k
       (ent != NULL) && 
7653
1.41k
       (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7654
0
  if (((ent->checked & 1) || (ent->checked == 0)) &&
7655
0
       (ent->content != NULL) && (xmlStrchr(ent->content, '<'))) {
7656
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7657
0
  "'<' in entity '%s' is not allowed in attributes values\n", name);
7658
0
        }
7659
0
    }
7660
7661
    /*
7662
     * Internal check, no parameter entities here ...
7663
     */
7664
1.41k
    else {
7665
1.41k
  switch (ent->etype) {
7666
0
      case XML_INTERNAL_PARAMETER_ENTITY:
7667
0
      case XML_EXTERNAL_PARAMETER_ENTITY:
7668
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7669
0
       "Attempt to reference the parameter entity '%s'\n",
7670
0
            name);
7671
0
      break;
7672
1.41k
      default:
7673
1.41k
      break;
7674
1.41k
  }
7675
1.41k
    }
7676
7677
    /*
7678
     * [ WFC: No Recursion ]
7679
     * A parsed entity must not contain a recursive reference
7680
     * to itself, either directly or indirectly.
7681
     * Done somewhere else
7682
     */
7683
15.5k
    return(ent);
7684
15.5k
}
7685
7686
/**
7687
 * xmlParseStringEntityRef:
7688
 * @ctxt:  an XML parser context
7689
 * @str:  a pointer to an index in the string
7690
 *
7691
 * parse ENTITY references declarations, but this version parses it from
7692
 * a string value.
7693
 *
7694
 * [68] EntityRef ::= '&' Name ';'
7695
 *
7696
 * [ WFC: Entity Declared ]
7697
 * In a document without any DTD, a document with only an internal DTD
7698
 * subset which contains no parameter entity references, or a document
7699
 * with "standalone='yes'", the Name given in the entity reference
7700
 * must match that in an entity declaration, except that well-formed
7701
 * documents need not declare any of the following entities: amp, lt,
7702
 * gt, apos, quot.  The declaration of a parameter entity must precede
7703
 * any reference to it.  Similarly, the declaration of a general entity
7704
 * must precede any reference to it which appears in a default value in an
7705
 * attribute-list declaration. Note that if entities are declared in the
7706
 * external subset or in external parameter entities, a non-validating
7707
 * processor is not obligated to read and process their declarations;
7708
 * for such documents, the rule that an entity must be declared is a
7709
 * well-formedness constraint only if standalone='yes'.
7710
 *
7711
 * [ WFC: Parsed Entity ]
7712
 * An entity reference must not contain the name of an unparsed entity
7713
 *
7714
 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7715
 * is updated to the current location in the string.
7716
 */
7717
static xmlEntityPtr
7718
0
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7719
0
    xmlChar *name;
7720
0
    const xmlChar *ptr;
7721
0
    xmlChar cur;
7722
0
    xmlEntityPtr ent = NULL;
7723
7724
0
    if ((str == NULL) || (*str == NULL))
7725
0
        return(NULL);
7726
0
    ptr = *str;
7727
0
    cur = *ptr;
7728
0
    if (cur != '&')
7729
0
  return(NULL);
7730
7731
0
    ptr++;
7732
0
    name = xmlParseStringName(ctxt, &ptr);
7733
0
    if (name == NULL) {
7734
0
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7735
0
           "xmlParseStringEntityRef: no name\n");
7736
0
  *str = ptr;
7737
0
  return(NULL);
7738
0
    }
7739
0
    if (*ptr != ';') {
7740
0
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7741
0
        xmlFree(name);
7742
0
  *str = ptr;
7743
0
  return(NULL);
7744
0
    }
7745
0
    ptr++;
7746
7747
7748
    /*
7749
     * Predefined entities override any extra definition
7750
     */
7751
0
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7752
0
        ent = xmlGetPredefinedEntity(name);
7753
0
        if (ent != NULL) {
7754
0
            xmlFree(name);
7755
0
            *str = ptr;
7756
0
            return(ent);
7757
0
        }
7758
0
    }
7759
7760
    /*
7761
     * Increase the number of entity references parsed
7762
     */
7763
0
    ctxt->nbentities++;
7764
7765
    /*
7766
     * Ask first SAX for entity resolution, otherwise try the
7767
     * entities which may have stored in the parser context.
7768
     */
7769
0
    if (ctxt->sax != NULL) {
7770
0
  if (ctxt->sax->getEntity != NULL)
7771
0
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7772
0
  if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7773
0
      ent = xmlGetPredefinedEntity(name);
7774
0
  if ((ent == NULL) && (ctxt->userData==ctxt)) {
7775
0
      ent = xmlSAX2GetEntity(ctxt, name);
7776
0
  }
7777
0
    }
7778
0
    if (ctxt->instate == XML_PARSER_EOF) {
7779
0
  xmlFree(name);
7780
0
  return(NULL);
7781
0
    }
7782
7783
    /*
7784
     * [ WFC: Entity Declared ]
7785
     * In a document without any DTD, a document with only an
7786
     * internal DTD subset which contains no parameter entity
7787
     * references, or a document with "standalone='yes'", the
7788
     * Name given in the entity reference must match that in an
7789
     * entity declaration, except that well-formed documents
7790
     * need not declare any of the following entities: amp, lt,
7791
     * gt, apos, quot.
7792
     * The declaration of a parameter entity must precede any
7793
     * reference to it.
7794
     * Similarly, the declaration of a general entity must
7795
     * precede any reference to it which appears in a default
7796
     * value in an attribute-list declaration. Note that if
7797
     * entities are declared in the external subset or in
7798
     * external parameter entities, a non-validating processor
7799
     * is not obligated to read and process their declarations;
7800
     * for such documents, the rule that an entity must be
7801
     * declared is a well-formedness constraint only if
7802
     * standalone='yes'.
7803
     */
7804
0
    if (ent == NULL) {
7805
0
  if ((ctxt->standalone == 1) ||
7806
0
      ((ctxt->hasExternalSubset == 0) &&
7807
0
       (ctxt->hasPErefs == 0))) {
7808
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7809
0
         "Entity '%s' not defined\n", name);
7810
0
  } else {
7811
0
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7812
0
        "Entity '%s' not defined\n",
7813
0
        name);
7814
0
  }
7815
0
  xmlParserEntityCheck(ctxt, 0, ent, 0);
7816
  /* TODO ? check regressions ctxt->valid = 0; */
7817
0
    }
7818
7819
    /*
7820
     * [ WFC: Parsed Entity ]
7821
     * An entity reference must not contain the name of an
7822
     * unparsed entity
7823
     */
7824
0
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7825
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7826
0
     "Entity reference to unparsed entity %s\n", name);
7827
0
    }
7828
7829
    /*
7830
     * [ WFC: No External Entity References ]
7831
     * Attribute values cannot contain direct or indirect
7832
     * entity references to external entities.
7833
     */
7834
0
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7835
0
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7836
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7837
0
   "Attribute references external entity '%s'\n", name);
7838
0
    }
7839
    /*
7840
     * [ WFC: No < in Attribute Values ]
7841
     * The replacement text of any entity referred to directly or
7842
     * indirectly in an attribute value (other than "&lt;") must
7843
     * not contain a <.
7844
     */
7845
0
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7846
0
       (ent != NULL) && (ent->content != NULL) &&
7847
0
       (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
7848
0
       (xmlStrchr(ent->content, '<'))) {
7849
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7850
0
     "'<' in entity '%s' is not allowed in attributes values\n",
7851
0
        name);
7852
0
    }
7853
7854
    /*
7855
     * Internal check, no parameter entities here ...
7856
     */
7857
0
    else {
7858
0
  switch (ent->etype) {
7859
0
      case XML_INTERNAL_PARAMETER_ENTITY:
7860
0
      case XML_EXTERNAL_PARAMETER_ENTITY:
7861
0
    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7862
0
       "Attempt to reference the parameter entity '%s'\n",
7863
0
          name);
7864
0
      break;
7865
0
      default:
7866
0
      break;
7867
0
  }
7868
0
    }
7869
7870
    /*
7871
     * [ WFC: No Recursion ]
7872
     * A parsed entity must not contain a recursive reference
7873
     * to itself, either directly or indirectly.
7874
     * Done somewhere else
7875
     */
7876
7877
0
    xmlFree(name);
7878
0
    *str = ptr;
7879
0
    return(ent);
7880
0
}
7881
7882
/**
7883
 * xmlParsePEReference:
7884
 * @ctxt:  an XML parser context
7885
 *
7886
 * parse PEReference declarations
7887
 * The entity content is handled directly by pushing it's content as
7888
 * a new input stream.
7889
 *
7890
 * [69] PEReference ::= '%' Name ';'
7891
 *
7892
 * [ WFC: No Recursion ]
7893
 * A parsed entity must not contain a recursive
7894
 * reference to itself, either directly or indirectly.
7895
 *
7896
 * [ WFC: Entity Declared ]
7897
 * In a document without any DTD, a document with only an internal DTD
7898
 * subset which contains no parameter entity references, or a document
7899
 * with "standalone='yes'", ...  ... The declaration of a parameter
7900
 * entity must precede any reference to it...
7901
 *
7902
 * [ VC: Entity Declared ]
7903
 * In a document with an external subset or external parameter entities
7904
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7905
 * must precede any reference to it...
7906
 *
7907
 * [ WFC: In DTD ]
7908
 * Parameter-entity references may only appear in the DTD.
7909
 * NOTE: misleading but this is handled.
7910
 */
7911
void
7912
xmlParsePEReference(xmlParserCtxtPtr ctxt)
7913
43.4k
{
7914
43.4k
    const xmlChar *name;
7915
43.4k
    xmlEntityPtr entity = NULL;
7916
43.4k
    xmlParserInputPtr input;
7917
7918
43.4k
    if (RAW != '%')
7919
40.7k
        return;
7920
2.66k
    NEXT;
7921
2.66k
    name = xmlParseName(ctxt);
7922
2.66k
    if (name == NULL) {
7923
1.49k
  xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
7924
1.49k
  return;
7925
1.49k
    }
7926
1.16k
    if (xmlParserDebugEntities)
7927
0
  xmlGenericError(xmlGenericErrorContext,
7928
0
    "PEReference: %s\n", name);
7929
1.16k
    if (RAW != ';') {
7930
720
  xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
7931
720
        return;
7932
720
    }
7933
7934
448
    NEXT;
7935
7936
    /*
7937
     * Increase the number of entity references parsed
7938
     */
7939
448
    ctxt->nbentities++;
7940
7941
    /*
7942
     * Request the entity from SAX
7943
     */
7944
448
    if ((ctxt->sax != NULL) &&
7945
448
  (ctxt->sax->getParameterEntity != NULL))
7946
0
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7947
448
    if (ctxt->instate == XML_PARSER_EOF)
7948
0
  return;
7949
448
    if (entity == NULL) {
7950
  /*
7951
   * [ WFC: Entity Declared ]
7952
   * In a document without any DTD, a document with only an
7953
   * internal DTD subset which contains no parameter entity
7954
   * references, or a document with "standalone='yes'", ...
7955
   * ... The declaration of a parameter entity must precede
7956
   * any reference to it...
7957
   */
7958
448
  if ((ctxt->standalone == 1) ||
7959
448
      ((ctxt->hasExternalSubset == 0) &&
7960
448
       (ctxt->hasPErefs == 0))) {
7961
14
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7962
14
            "PEReference: %%%s; not found\n",
7963
14
            name);
7964
434
  } else {
7965
      /*
7966
       * [ VC: Entity Declared ]
7967
       * In a document with an external subset or external
7968
       * parameter entities with "standalone='no'", ...
7969
       * ... The declaration of a parameter entity must
7970
       * precede any reference to it...
7971
       */
7972
434
            if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
7973
0
                xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
7974
0
                                 "PEReference: %%%s; not found\n",
7975
0
                                 name, NULL);
7976
0
            } else
7977
434
                xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7978
434
                              "PEReference: %%%s; not found\n",
7979
434
                              name, NULL);
7980
434
            ctxt->valid = 0;
7981
434
  }
7982
448
  xmlParserEntityCheck(ctxt, 0, NULL, 0);
7983
448
    } else {
7984
  /*
7985
   * Internal checking in case the entity quest barfed
7986
   */
7987
0
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7988
0
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7989
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7990
0
      "Internal: %%%s; is not a parameter entity\n",
7991
0
        name, NULL);
7992
0
  } else {
7993
0
            xmlChar start[4];
7994
0
            xmlCharEncoding enc;
7995
7996
0
      if (xmlParserEntityCheck(ctxt, 0, entity, 0))
7997
0
          return;
7998
7999
0
      if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8000
0
          ((ctxt->options & XML_PARSE_NOENT) == 0) &&
8001
0
    ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
8002
0
    ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
8003
0
    ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
8004
0
    (ctxt->replaceEntities == 0) &&
8005
0
    (ctxt->validate == 0))
8006
0
    return;
8007
8008
0
      input = xmlNewEntityInputStream(ctxt, entity);
8009
0
      if (xmlPushInput(ctxt, input) < 0) {
8010
0
                xmlFreeInputStream(input);
8011
0
    return;
8012
0
            }
8013
8014
0
      if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
8015
                /*
8016
                 * Get the 4 first bytes and decode the charset
8017
                 * if enc != XML_CHAR_ENCODING_NONE
8018
                 * plug some encoding conversion routines.
8019
                 * Note that, since we may have some non-UTF8
8020
                 * encoding (like UTF16, bug 135229), the 'length'
8021
                 * is not known, but we can calculate based upon
8022
                 * the amount of data in the buffer.
8023
                 */
8024
0
                GROW
8025
0
                if (ctxt->instate == XML_PARSER_EOF)
8026
0
                    return;
8027
0
                if ((ctxt->input->end - ctxt->input->cur)>=4) {
8028
0
                    start[0] = RAW;
8029
0
                    start[1] = NXT(1);
8030
0
                    start[2] = NXT(2);
8031
0
                    start[3] = NXT(3);
8032
0
                    enc = xmlDetectCharEncoding(start, 4);
8033
0
                    if (enc != XML_CHAR_ENCODING_NONE) {
8034
0
                        xmlSwitchEncoding(ctxt, enc);
8035
0
                    }
8036
0
                }
8037
8038
0
                if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8039
0
                    (IS_BLANK_CH(NXT(5)))) {
8040
0
                    xmlParseTextDecl(ctxt);
8041
0
                }
8042
0
            }
8043
0
  }
8044
0
    }
8045
448
    ctxt->hasPErefs = 1;
8046
448
}
8047
8048
/**
8049
 * xmlLoadEntityContent:
8050
 * @ctxt:  an XML parser context
8051
 * @entity: an unloaded system entity
8052
 *
8053
 * Load the original content of the given system entity from the
8054
 * ExternalID/SystemID given. This is to be used for Included in Literal
8055
 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8056
 *
8057
 * Returns 0 in case of success and -1 in case of failure
8058
 */
8059
static int
8060
0
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8061
0
    xmlParserInputPtr input;
8062
0
    xmlBufferPtr buf;
8063
0
    int l, c;
8064
0
    int count = 0;
8065
8066
0
    if ((ctxt == NULL) || (entity == NULL) ||
8067
0
        ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8068
0
   (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8069
0
  (entity->content != NULL)) {
8070
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8071
0
              "xmlLoadEntityContent parameter error");
8072
0
        return(-1);
8073
0
    }
8074
8075
0
    if (xmlParserDebugEntities)
8076
0
  xmlGenericError(xmlGenericErrorContext,
8077
0
    "Reading %s entity content input\n", entity->name);
8078
8079
0
    buf = xmlBufferCreate();
8080
0
    if (buf == NULL) {
8081
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8082
0
              "xmlLoadEntityContent parameter error");
8083
0
        return(-1);
8084
0
    }
8085
0
    xmlBufferSetAllocationScheme(buf, XML_BUFFER_ALLOC_DOUBLEIT);
8086
8087
0
    input = xmlNewEntityInputStream(ctxt, entity);
8088
0
    if (input == NULL) {
8089
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8090
0
              "xmlLoadEntityContent input error");
8091
0
  xmlBufferFree(buf);
8092
0
        return(-1);
8093
0
    }
8094
8095
    /*
8096
     * Push the entity as the current input, read char by char
8097
     * saving to the buffer until the end of the entity or an error
8098
     */
8099
0
    if (xmlPushInput(ctxt, input) < 0) {
8100
0
        xmlBufferFree(buf);
8101
0
  xmlFreeInputStream(input);
8102
0
  return(-1);
8103
0
    }
8104
8105
0
    GROW;
8106
0
    c = CUR_CHAR(l);
8107
0
    while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8108
0
           (IS_CHAR(c))) {
8109
0
        xmlBufferAdd(buf, ctxt->input->cur, l);
8110
0
  if (count++ > XML_PARSER_CHUNK_SIZE) {
8111
0
      count = 0;
8112
0
      GROW;
8113
0
            if (ctxt->instate == XML_PARSER_EOF) {
8114
0
                xmlBufferFree(buf);
8115
0
                return(-1);
8116
0
            }
8117
0
  }
8118
0
  NEXTL(l);
8119
0
  c = CUR_CHAR(l);
8120
0
  if (c == 0) {
8121
0
      count = 0;
8122
0
      GROW;
8123
0
            if (ctxt->instate == XML_PARSER_EOF) {
8124
0
                xmlBufferFree(buf);
8125
0
                return(-1);
8126
0
            }
8127
0
      c = CUR_CHAR(l);
8128
0
  }
8129
0
    }
8130
8131
0
    if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8132
0
        xmlPopInput(ctxt);
8133
0
    } else if (!IS_CHAR(c)) {
8134
0
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8135
0
                          "xmlLoadEntityContent: invalid char value %d\n",
8136
0
                    c);
8137
0
  xmlBufferFree(buf);
8138
0
  return(-1);
8139
0
    }
8140
0
    entity->content = buf->content;
8141
0
    buf->content = NULL;
8142
0
    xmlBufferFree(buf);
8143
8144
0
    return(0);
8145
0
}
8146
8147
/**
8148
 * xmlParseStringPEReference:
8149
 * @ctxt:  an XML parser context
8150
 * @str:  a pointer to an index in the string
8151
 *
8152
 * parse PEReference declarations
8153
 *
8154
 * [69] PEReference ::= '%' Name ';'
8155
 *
8156
 * [ WFC: No Recursion ]
8157
 * A parsed entity must not contain a recursive
8158
 * reference to itself, either directly or indirectly.
8159
 *
8160
 * [ WFC: Entity Declared ]
8161
 * In a document without any DTD, a document with only an internal DTD
8162
 * subset which contains no parameter entity references, or a document
8163
 * with "standalone='yes'", ...  ... The declaration of a parameter
8164
 * entity must precede any reference to it...
8165
 *
8166
 * [ VC: Entity Declared ]
8167
 * In a document with an external subset or external parameter entities
8168
 * with "standalone='no'", ...  ... The declaration of a parameter entity
8169
 * must precede any reference to it...
8170
 *
8171
 * [ WFC: In DTD ]
8172
 * Parameter-entity references may only appear in the DTD.
8173
 * NOTE: misleading but this is handled.
8174
 *
8175
 * Returns the string of the entity content.
8176
 *         str is updated to the current value of the index
8177
 */
8178
static xmlEntityPtr
8179
0
xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8180
0
    const xmlChar *ptr;
8181
0
    xmlChar cur;
8182
0
    xmlChar *name;
8183
0
    xmlEntityPtr entity = NULL;
8184
8185
0
    if ((str == NULL) || (*str == NULL)) return(NULL);
8186
0
    ptr = *str;
8187
0
    cur = *ptr;
8188
0
    if (cur != '%')
8189
0
        return(NULL);
8190
0
    ptr++;
8191
0
    name = xmlParseStringName(ctxt, &ptr);
8192
0
    if (name == NULL) {
8193
0
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8194
0
           "xmlParseStringPEReference: no name\n");
8195
0
  *str = ptr;
8196
0
  return(NULL);
8197
0
    }
8198
0
    cur = *ptr;
8199
0
    if (cur != ';') {
8200
0
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8201
0
  xmlFree(name);
8202
0
  *str = ptr;
8203
0
  return(NULL);
8204
0
    }
8205
0
    ptr++;
8206
8207
    /*
8208
     * Increase the number of entity references parsed
8209
     */
8210
0
    ctxt->nbentities++;
8211
8212
    /*
8213
     * Request the entity from SAX
8214
     */
8215
0
    if ((ctxt->sax != NULL) &&
8216
0
  (ctxt->sax->getParameterEntity != NULL))
8217
0
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8218
0
    if (ctxt->instate == XML_PARSER_EOF) {
8219
0
  xmlFree(name);
8220
0
  *str = ptr;
8221
0
  return(NULL);
8222
0
    }
8223
0
    if (entity == NULL) {
8224
  /*
8225
   * [ WFC: Entity Declared ]
8226
   * In a document without any DTD, a document with only an
8227
   * internal DTD subset which contains no parameter entity
8228
   * references, or a document with "standalone='yes'", ...
8229
   * ... The declaration of a parameter entity must precede
8230
   * any reference to it...
8231
   */
8232
0
  if ((ctxt->standalone == 1) ||
8233
0
      ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8234
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8235
0
     "PEReference: %%%s; not found\n", name);
8236
0
  } else {
8237
      /*
8238
       * [ VC: Entity Declared ]
8239
       * In a document with an external subset or external
8240
       * parameter entities with "standalone='no'", ...
8241
       * ... The declaration of a parameter entity must
8242
       * precede any reference to it...
8243
       */
8244
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8245
0
        "PEReference: %%%s; not found\n",
8246
0
        name, NULL);
8247
0
      ctxt->valid = 0;
8248
0
  }
8249
0
  xmlParserEntityCheck(ctxt, 0, NULL, 0);
8250
0
    } else {
8251
  /*
8252
   * Internal checking in case the entity quest barfed
8253
   */
8254
0
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8255
0
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8256
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8257
0
        "%%%s; is not a parameter entity\n",
8258
0
        name, NULL);
8259
0
  }
8260
0
    }
8261
0
    ctxt->hasPErefs = 1;
8262
0
    xmlFree(name);
8263
0
    *str = ptr;
8264
0
    return(entity);
8265
0
}
8266
8267
/**
8268
 * xmlParseDocTypeDecl:
8269
 * @ctxt:  an XML parser context
8270
 *
8271
 * parse a DOCTYPE declaration
8272
 *
8273
 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8274
 *                      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8275
 *
8276
 * [ VC: Root Element Type ]
8277
 * The Name in the document type declaration must match the element
8278
 * type of the root element.
8279
 */
8280
8281
void
8282
5.50k
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8283
5.50k
    const xmlChar *name = NULL;
8284
5.50k
    xmlChar *ExternalID = NULL;
8285
5.50k
    xmlChar *URI = NULL;
8286
8287
    /*
8288
     * We know that '<!DOCTYPE' has been detected.
8289
     */
8290
5.50k
    SKIP(9);
8291
8292
5.50k
    SKIP_BLANKS;
8293
8294
    /*
8295
     * Parse the DOCTYPE name.
8296
     */
8297
5.50k
    name = xmlParseName(ctxt);
8298
5.50k
    if (name == NULL) {
8299
52
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8300
52
           "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8301
52
    }
8302
5.50k
    ctxt->intSubName = name;
8303
8304
5.50k
    SKIP_BLANKS;
8305
8306
    /*
8307
     * Check for SystemID and ExternalID
8308
     */
8309
5.50k
    URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8310
8311
5.50k
    if ((URI != NULL) || (ExternalID != NULL)) {
8312
299
        ctxt->hasExternalSubset = 1;
8313
299
    }
8314
5.50k
    ctxt->extSubURI = URI;
8315
5.50k
    ctxt->extSubSystem = ExternalID;
8316
8317
5.50k
    SKIP_BLANKS;
8318
8319
    /*
8320
     * Create and update the internal subset.
8321
     */
8322
5.50k
    if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8323
5.50k
  (!ctxt->disableSAX))
8324
0
  ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8325
5.50k
    if (ctxt->instate == XML_PARSER_EOF)
8326
0
  return;
8327
8328
    /*
8329
     * Is there any internal subset declarations ?
8330
     * they are handled separately in xmlParseInternalSubset()
8331
     */
8332
5.50k
    if (RAW == '[')
8333
4.84k
  return;
8334
8335
    /*
8336
     * We should be at the end of the DOCTYPE declaration.
8337
     */
8338
656
    if (RAW != '>') {
8339
423
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8340
423
    }
8341
656
    NEXT;
8342
656
}
8343
8344
/**
8345
 * xmlParseInternalSubset:
8346
 * @ctxt:  an XML parser context
8347
 *
8348
 * parse the internal subset declaration
8349
 *
8350
 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8351
 */
8352
8353
static void
8354
4.50k
xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8355
    /*
8356
     * Is there any DTD definition ?
8357
     */
8358
4.50k
    if (RAW == '[') {
8359
4.50k
        int baseInputNr = ctxt->inputNr;
8360
4.50k
        ctxt->instate = XML_PARSER_DTD;
8361
4.50k
        NEXT;
8362
  /*
8363
   * Parse the succession of Markup declarations and
8364
   * PEReferences.
8365
   * Subsequence (markupdecl | PEReference | S)*
8366
   */
8367
46.2k
  while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) &&
8368
46.2k
               (ctxt->instate != XML_PARSER_EOF)) {
8369
43.4k
      int id = ctxt->input->id;
8370
43.4k
      unsigned long cons = CUR_CONSUMED;
8371
8372
43.4k
      SKIP_BLANKS;
8373
43.4k
      xmlParseMarkupDecl(ctxt);
8374
43.4k
      xmlParsePEReference(ctxt);
8375
8376
            /*
8377
             * Conditional sections are allowed from external entities included
8378
             * by PE References in the internal subset.
8379
             */
8380
43.4k
            if ((ctxt->inputNr > 1) && (ctxt->input->filename != NULL) &&
8381
43.4k
                (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8382
0
                xmlParseConditionalSections(ctxt);
8383
0
            }
8384
8385
43.4k
      if ((id == ctxt->input->id) && (cons == CUR_CONSUMED)) {
8386
1.72k
    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8387
1.72k
       "xmlParseInternalSubset: error detected in Markup declaration\n");
8388
1.72k
                if (ctxt->inputNr > baseInputNr)
8389
0
                    xmlPopInput(ctxt);
8390
1.72k
                else
8391
1.72k
        break;
8392
1.72k
      }
8393
43.4k
  }
8394
4.50k
  if (RAW == ']') {
8395
1.78k
      NEXT;
8396
1.78k
      SKIP_BLANKS;
8397
1.78k
  }
8398
4.50k
    }
8399
8400
    /*
8401
     * We should be at the end of the DOCTYPE declaration.
8402
     */
8403
4.50k
    if (RAW != '>') {
8404
2.71k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8405
2.71k
  return;
8406
2.71k
    }
8407
1.79k
    NEXT;
8408
1.79k
}
8409
8410
#ifdef LIBXML_SAX1_ENABLED
8411
/**
8412
 * xmlParseAttribute:
8413
 * @ctxt:  an XML parser context
8414
 * @value:  a xmlChar ** used to store the value of the attribute
8415
 *
8416
 * parse an attribute
8417
 *
8418
 * [41] Attribute ::= Name Eq AttValue
8419
 *
8420
 * [ WFC: No External Entity References ]
8421
 * Attribute values cannot contain direct or indirect entity references
8422
 * to external entities.
8423
 *
8424
 * [ WFC: No < in Attribute Values ]
8425
 * The replacement text of any entity referred to directly or indirectly in
8426
 * an attribute value (other than "&lt;") must not contain a <.
8427
 *
8428
 * [ VC: Attribute Value Type ]
8429
 * The attribute must have been declared; the value must be of the type
8430
 * declared for it.
8431
 *
8432
 * [25] Eq ::= S? '=' S?
8433
 *
8434
 * With namespace:
8435
 *
8436
 * [NS 11] Attribute ::= QName Eq AttValue
8437
 *
8438
 * Also the case QName == xmlns:??? is handled independently as a namespace
8439
 * definition.
8440
 *
8441
 * Returns the attribute name, and the value in *value.
8442
 */
8443
8444
const xmlChar *
8445
0
xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8446
0
    const xmlChar *name;
8447
0
    xmlChar *val;
8448
8449
0
    *value = NULL;
8450
0
    GROW;
8451
0
    name = xmlParseName(ctxt);
8452
0
    if (name == NULL) {
8453
0
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8454
0
                 "error parsing attribute name\n");
8455
0
        return(NULL);
8456
0
    }
8457
8458
    /*
8459
     * read the value
8460
     */
8461
0
    SKIP_BLANKS;
8462
0
    if (RAW == '=') {
8463
0
        NEXT;
8464
0
  SKIP_BLANKS;
8465
0
  val = xmlParseAttValue(ctxt);
8466
0
  ctxt->instate = XML_PARSER_CONTENT;
8467
0
    } else {
8468
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8469
0
         "Specification mandates value for attribute %s\n", name);
8470
0
  return(NULL);
8471
0
    }
8472
8473
    /*
8474
     * Check that xml:lang conforms to the specification
8475
     * No more registered as an error, just generate a warning now
8476
     * since this was deprecated in XML second edition
8477
     */
8478
0
    if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8479
0
  if (!xmlCheckLanguageID(val)) {
8480
0
      xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8481
0
              "Malformed value for xml:lang : %s\n",
8482
0
        val, NULL);
8483
0
  }
8484
0
    }
8485
8486
    /*
8487
     * Check that xml:space conforms to the specification
8488
     */
8489
0
    if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8490
0
  if (xmlStrEqual(val, BAD_CAST "default"))
8491
0
      *(ctxt->space) = 0;
8492
0
  else if (xmlStrEqual(val, BAD_CAST "preserve"))
8493
0
      *(ctxt->space) = 1;
8494
0
  else {
8495
0
    xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8496
0
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8497
0
                                 val, NULL);
8498
0
  }
8499
0
    }
8500
8501
0
    *value = val;
8502
0
    return(name);
8503
0
}
8504
8505
/**
8506
 * xmlParseStartTag:
8507
 * @ctxt:  an XML parser context
8508
 *
8509
 * parse a start of tag either for rule element or
8510
 * EmptyElement. In both case we don't parse the tag closing chars.
8511
 *
8512
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8513
 *
8514
 * [ WFC: Unique Att Spec ]
8515
 * No attribute name may appear more than once in the same start-tag or
8516
 * empty-element tag.
8517
 *
8518
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8519
 *
8520
 * [ WFC: Unique Att Spec ]
8521
 * No attribute name may appear more than once in the same start-tag or
8522
 * empty-element tag.
8523
 *
8524
 * With namespace:
8525
 *
8526
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8527
 *
8528
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8529
 *
8530
 * Returns the element name parsed
8531
 */
8532
8533
const xmlChar *
8534
0
xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8535
0
    const xmlChar *name;
8536
0
    const xmlChar *attname;
8537
0
    xmlChar *attvalue;
8538
0
    const xmlChar **atts = ctxt->atts;
8539
0
    int nbatts = 0;
8540
0
    int maxatts = ctxt->maxatts;
8541
0
    int i;
8542
8543
0
    if (RAW != '<') return(NULL);
8544
0
    NEXT1;
8545
8546
0
    name = xmlParseName(ctxt);
8547
0
    if (name == NULL) {
8548
0
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8549
0
       "xmlParseStartTag: invalid element name\n");
8550
0
        return(NULL);
8551
0
    }
8552
8553
    /*
8554
     * Now parse the attributes, it ends up with the ending
8555
     *
8556
     * (S Attribute)* S?
8557
     */
8558
0
    SKIP_BLANKS;
8559
0
    GROW;
8560
8561
0
    while (((RAW != '>') &&
8562
0
     ((RAW != '/') || (NXT(1) != '>')) &&
8563
0
     (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8564
0
        int id = ctxt->input->id;
8565
0
  unsigned long cons = CUR_CONSUMED;
8566
8567
0
  attname = xmlParseAttribute(ctxt, &attvalue);
8568
0
        if ((attname != NULL) && (attvalue != NULL)) {
8569
      /*
8570
       * [ WFC: Unique Att Spec ]
8571
       * No attribute name may appear more than once in the same
8572
       * start-tag or empty-element tag.
8573
       */
8574
0
      for (i = 0; i < nbatts;i += 2) {
8575
0
          if (xmlStrEqual(atts[i], attname)) {
8576
0
        xmlErrAttributeDup(ctxt, NULL, attname);
8577
0
        xmlFree(attvalue);
8578
0
        goto failed;
8579
0
    }
8580
0
      }
8581
      /*
8582
       * Add the pair to atts
8583
       */
8584
0
      if (atts == NULL) {
8585
0
          maxatts = 22; /* allow for 10 attrs by default */
8586
0
          atts = (const xmlChar **)
8587
0
           xmlMalloc(maxatts * sizeof(xmlChar *));
8588
0
    if (atts == NULL) {
8589
0
        xmlErrMemory(ctxt, NULL);
8590
0
        if (attvalue != NULL)
8591
0
      xmlFree(attvalue);
8592
0
        goto failed;
8593
0
    }
8594
0
    ctxt->atts = atts;
8595
0
    ctxt->maxatts = maxatts;
8596
0
      } else if (nbatts + 4 > maxatts) {
8597
0
          const xmlChar **n;
8598
8599
0
          maxatts *= 2;
8600
0
          n = (const xmlChar **) xmlRealloc((void *) atts,
8601
0
               maxatts * sizeof(const xmlChar *));
8602
0
    if (n == NULL) {
8603
0
        xmlErrMemory(ctxt, NULL);
8604
0
        if (attvalue != NULL)
8605
0
      xmlFree(attvalue);
8606
0
        goto failed;
8607
0
    }
8608
0
    atts = n;
8609
0
    ctxt->atts = atts;
8610
0
    ctxt->maxatts = maxatts;
8611
0
      }
8612
0
      atts[nbatts++] = attname;
8613
0
      atts[nbatts++] = attvalue;
8614
0
      atts[nbatts] = NULL;
8615
0
      atts[nbatts + 1] = NULL;
8616
0
  } else {
8617
0
      if (attvalue != NULL)
8618
0
    xmlFree(attvalue);
8619
0
  }
8620
8621
0
failed:
8622
8623
0
  GROW
8624
0
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8625
0
      break;
8626
0
  if (SKIP_BLANKS == 0) {
8627
0
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8628
0
         "attributes construct error\n");
8629
0
  }
8630
0
        if ((cons == CUR_CONSUMED) && (id == ctxt->input->id) &&
8631
0
            (attname == NULL) && (attvalue == NULL)) {
8632
0
      xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8633
0
         "xmlParseStartTag: problem parsing attributes\n");
8634
0
      break;
8635
0
  }
8636
0
  SHRINK;
8637
0
        GROW;
8638
0
    }
8639
8640
    /*
8641
     * SAX: Start of Element !
8642
     */
8643
0
    if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8644
0
  (!ctxt->disableSAX)) {
8645
0
  if (nbatts > 0)
8646
0
      ctxt->sax->startElement(ctxt->userData, name, atts);
8647
0
  else
8648
0
      ctxt->sax->startElement(ctxt->userData, name, NULL);
8649
0
    }
8650
8651
0
    if (atts != NULL) {
8652
        /* Free only the content strings */
8653
0
        for (i = 1;i < nbatts;i+=2)
8654
0
      if (atts[i] != NULL)
8655
0
         xmlFree((xmlChar *) atts[i]);
8656
0
    }
8657
0
    return(name);
8658
0
}
8659
8660
/**
8661
 * xmlParseEndTag1:
8662
 * @ctxt:  an XML parser context
8663
 * @line:  line of the start tag
8664
 * @nsNr:  number of namespaces on the start tag
8665
 *
8666
 * parse an end of tag
8667
 *
8668
 * [42] ETag ::= '</' Name S? '>'
8669
 *
8670
 * With namespace
8671
 *
8672
 * [NS 9] ETag ::= '</' QName S? '>'
8673
 */
8674
8675
static void
8676
0
xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8677
0
    const xmlChar *name;
8678
8679
0
    GROW;
8680
0
    if ((RAW != '<') || (NXT(1) != '/')) {
8681
0
  xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8682
0
           "xmlParseEndTag: '</' not found\n");
8683
0
  return;
8684
0
    }
8685
0
    SKIP(2);
8686
8687
0
    name = xmlParseNameAndCompare(ctxt,ctxt->name);
8688
8689
    /*
8690
     * We should definitely be at the ending "S? '>'" part
8691
     */
8692
0
    GROW;
8693
0
    SKIP_BLANKS;
8694
0
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8695
0
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8696
0
    } else
8697
0
  NEXT1;
8698
8699
    /*
8700
     * [ WFC: Element Type Match ]
8701
     * The Name in an element's end-tag must match the element type in the
8702
     * start-tag.
8703
     *
8704
     */
8705
0
    if (name != (xmlChar*)1) {
8706
0
        if (name == NULL) name = BAD_CAST "unparsable";
8707
0
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8708
0
         "Opening and ending tag mismatch: %s line %d and %s\n",
8709
0
                    ctxt->name, line, name);
8710
0
    }
8711
8712
    /*
8713
     * SAX: End of Tag
8714
     */
8715
0
    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8716
0
  (!ctxt->disableSAX))
8717
0
        ctxt->sax->endElement(ctxt->userData, ctxt->name);
8718
8719
0
    namePop(ctxt);
8720
0
    spacePop(ctxt);
8721
0
    return;
8722
0
}
8723
8724
/**
8725
 * xmlParseEndTag:
8726
 * @ctxt:  an XML parser context
8727
 *
8728
 * parse an end of tag
8729
 *
8730
 * [42] ETag ::= '</' Name S? '>'
8731
 *
8732
 * With namespace
8733
 *
8734
 * [NS 9] ETag ::= '</' QName S? '>'
8735
 */
8736
8737
void
8738
0
xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8739
0
    xmlParseEndTag1(ctxt, 0);
8740
0
}
8741
#endif /* LIBXML_SAX1_ENABLED */
8742
8743
/************************************************************************
8744
 *                  *
8745
 *          SAX 2 specific operations       *
8746
 *                  *
8747
 ************************************************************************/
8748
8749
/*
8750
 * xmlGetNamespace:
8751
 * @ctxt:  an XML parser context
8752
 * @prefix:  the prefix to lookup
8753
 *
8754
 * Lookup the namespace name for the @prefix (which ca be NULL)
8755
 * The prefix must come from the @ctxt->dict dictionary
8756
 *
8757
 * Returns the namespace name or NULL if not bound
8758
 */
8759
static const xmlChar *
8760
1.76M
xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8761
1.76M
    int i;
8762
8763
1.76M
    if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8764
2.46M
    for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8765
722k
        if (ctxt->nsTab[i] == prefix) {
8766
22.7k
      if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8767
2.40k
          return(NULL);
8768
20.3k
      return(ctxt->nsTab[i + 1]);
8769
22.7k
  }
8770
1.74M
    return(NULL);
8771
1.76M
}
8772
8773
/**
8774
 * xmlParseQName:
8775
 * @ctxt:  an XML parser context
8776
 * @prefix:  pointer to store the prefix part
8777
 *
8778
 * parse an XML Namespace QName
8779
 *
8780
 * [6]  QName  ::= (Prefix ':')? LocalPart
8781
 * [7]  Prefix  ::= NCName
8782
 * [8]  LocalPart  ::= NCName
8783
 *
8784
 * Returns the Name parsed or NULL
8785
 */
8786
8787
static const xmlChar *
8788
2.48M
xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8789
2.48M
    const xmlChar *l, *p;
8790
8791
2.48M
    GROW;
8792
8793
2.48M
    l = xmlParseNCName(ctxt);
8794
2.48M
    if (l == NULL) {
8795
15.5k
        if (CUR == ':') {
8796
14.5k
      l = xmlParseName(ctxt);
8797
14.5k
      if (l != NULL) {
8798
14.5k
          xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8799
14.5k
             "Failed to parse QName '%s'\n", l, NULL, NULL);
8800
14.5k
    *prefix = NULL;
8801
14.5k
    return(l);
8802
14.5k
      }
8803
14.5k
  }
8804
995
        return(NULL);
8805
15.5k
    }
8806
2.46M
    if (CUR == ':') {
8807
21.2k
        NEXT;
8808
21.2k
  p = l;
8809
21.2k
  l = xmlParseNCName(ctxt);
8810
21.2k
  if (l == NULL) {
8811
2.05k
      xmlChar *tmp;
8812
8813
2.05k
            if (ctxt->instate == XML_PARSER_EOF)
8814
0
                return(NULL);
8815
2.05k
            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8816
2.05k
               "Failed to parse QName '%s:'\n", p, NULL, NULL);
8817
2.05k
      l = xmlParseNmtoken(ctxt);
8818
2.05k
      if (l == NULL) {
8819
1.09k
                if (ctxt->instate == XML_PARSER_EOF)
8820
0
                    return(NULL);
8821
1.09k
    tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8822
1.09k
            } else {
8823
961
    tmp = xmlBuildQName(l, p, NULL, 0);
8824
961
    xmlFree((char *)l);
8825
961
      }
8826
2.05k
      p = xmlDictLookup(ctxt->dict, tmp, -1);
8827
2.05k
      if (tmp != NULL) xmlFree(tmp);
8828
2.05k
      *prefix = NULL;
8829
2.05k
      return(p);
8830
2.05k
  }
8831
19.2k
  if (CUR == ':') {
8832
2.55k
      xmlChar *tmp;
8833
8834
2.55k
            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8835
2.55k
               "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8836
2.55k
      NEXT;
8837
2.55k
      tmp = (xmlChar *) xmlParseName(ctxt);
8838
2.55k
      if (tmp != NULL) {
8839
1.55k
          tmp = xmlBuildQName(tmp, l, NULL, 0);
8840
1.55k
    l = xmlDictLookup(ctxt->dict, tmp, -1);
8841
1.55k
    if (tmp != NULL) xmlFree(tmp);
8842
1.55k
    *prefix = p;
8843
1.55k
    return(l);
8844
1.55k
      }
8845
1.00k
            if (ctxt->instate == XML_PARSER_EOF)
8846
0
                return(NULL);
8847
1.00k
      tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8848
1.00k
      l = xmlDictLookup(ctxt->dict, tmp, -1);
8849
1.00k
      if (tmp != NULL) xmlFree(tmp);
8850
1.00k
      *prefix = p;
8851
1.00k
      return(l);
8852
1.00k
  }
8853
16.6k
  *prefix = p;
8854
16.6k
    } else
8855
2.44M
        *prefix = NULL;
8856
2.46M
    return(l);
8857
2.46M
}
8858
8859
/**
8860
 * xmlParseQNameAndCompare:
8861
 * @ctxt:  an XML parser context
8862
 * @name:  the localname
8863
 * @prefix:  the prefix, if any.
8864
 *
8865
 * parse an XML name and compares for match
8866
 * (specialized for endtag parsing)
8867
 *
8868
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8869
 * and the name for mismatch
8870
 */
8871
8872
static const xmlChar *
8873
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8874
742
                        xmlChar const *prefix) {
8875
742
    const xmlChar *cmp;
8876
742
    const xmlChar *in;
8877
742
    const xmlChar *ret;
8878
742
    const xmlChar *prefix2;
8879
8880
742
    if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8881
8882
742
    GROW;
8883
742
    in = ctxt->input->cur;
8884
8885
742
    cmp = prefix;
8886
1.74k
    while (*in != 0 && *in == *cmp) {
8887
1.00k
  ++in;
8888
1.00k
  ++cmp;
8889
1.00k
    }
8890
742
    if ((*cmp == 0) && (*in == ':')) {
8891
718
        in++;
8892
718
  cmp = name;
8893
1.81k
  while (*in != 0 && *in == *cmp) {
8894
1.09k
      ++in;
8895
1.09k
      ++cmp;
8896
1.09k
  }
8897
718
  if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8898
      /* success */
8899
703
            ctxt->input->col += in - ctxt->input->cur;
8900
703
      ctxt->input->cur = in;
8901
703
      return((const xmlChar*) 1);
8902
703
  }
8903
718
    }
8904
    /*
8905
     * all strings coms from the dictionary, equality can be done directly
8906
     */
8907
39
    ret = xmlParseQName (ctxt, &prefix2);
8908
39
    if ((ret == name) && (prefix == prefix2))
8909
9
  return((const xmlChar*) 1);
8910
30
    return ret;
8911
39
}
8912
8913
/**
8914
 * xmlParseAttValueInternal:
8915
 * @ctxt:  an XML parser context
8916
 * @len:  attribute len result
8917
 * @alloc:  whether the attribute was reallocated as a new string
8918
 * @normalize:  if 1 then further non-CDATA normalization must be done
8919
 *
8920
 * parse a value for an attribute.
8921
 * NOTE: if no normalization is needed, the routine will return pointers
8922
 *       directly from the data buffer.
8923
 *
8924
 * 3.3.3 Attribute-Value Normalization:
8925
 * Before the value of an attribute is passed to the application or
8926
 * checked for validity, the XML processor must normalize it as follows:
8927
 * - a character reference is processed by appending the referenced
8928
 *   character to the attribute value
8929
 * - an entity reference is processed by recursively processing the
8930
 *   replacement text of the entity
8931
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8932
 *   appending #x20 to the normalized value, except that only a single
8933
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
8934
 *   parsed entity or the literal entity value of an internal parsed entity
8935
 * - other characters are processed by appending them to the normalized value
8936
 * If the declared value is not CDATA, then the XML processor must further
8937
 * process the normalized attribute value by discarding any leading and
8938
 * trailing space (#x20) characters, and by replacing sequences of space
8939
 * (#x20) characters by a single space (#x20) character.
8940
 * All attributes for which no declaration has been read should be treated
8941
 * by a non-validating parser as if declared CDATA.
8942
 *
8943
 * Returns the AttValue parsed or NULL. The value has to be freed by the
8944
 *     caller if it was copied, this can be detected by val[*len] == 0.
8945
 */
8946
8947
#define GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) \
8948
2.04k
    const xmlChar *oldbase = ctxt->input->base;\
8949
2.04k
    GROW;\
8950
2.04k
    if (ctxt->instate == XML_PARSER_EOF)\
8951
2.04k
        return(NULL);\
8952
2.04k
    if (oldbase != ctxt->input->base) {\
8953
0
        ptrdiff_t delta = ctxt->input->base - oldbase;\
8954
0
        start = start + delta;\
8955
0
        in = in + delta;\
8956
0
    }\
8957
2.04k
    end = ctxt->input->end;
8958
8959
static xmlChar *
8960
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8961
                         int normalize)
8962
762k
{
8963
762k
    xmlChar limit = 0;
8964
762k
    const xmlChar *in = NULL, *start, *end, *last;
8965
762k
    xmlChar *ret = NULL;
8966
762k
    int line, col;
8967
762k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
8968
762k
                    XML_MAX_HUGE_LENGTH :
8969
762k
                    XML_MAX_TEXT_LENGTH;
8970
8971
762k
    GROW;
8972
762k
    in = (xmlChar *) CUR_PTR;
8973
762k
    line = ctxt->input->line;
8974
762k
    col = ctxt->input->col;
8975
762k
    if (*in != '"' && *in != '\'') {
8976
942
        xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
8977
942
        return (NULL);
8978
942
    }
8979
761k
    ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
8980
8981
    /*
8982
     * try to handle in this routine the most common case where no
8983
     * allocation of a new string is required and where content is
8984
     * pure ASCII.
8985
     */
8986
761k
    limit = *in++;
8987
761k
    col++;
8988
761k
    end = ctxt->input->end;
8989
761k
    start = in;
8990
761k
    if (in >= end) {
8991
134
        GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
8992
134
    }
8993
761k
    if (normalize) {
8994
        /*
8995
   * Skip any leading spaces
8996
   */
8997
8.44k
  while ((in < end) && (*in != limit) &&
8998
8.44k
         ((*in == 0x20) || (*in == 0x9) ||
8999
7.09k
          (*in == 0xA) || (*in == 0xD))) {
9000
2.83k
      if (*in == 0xA) {
9001
553
          line++; col = 1;
9002
2.27k
      } else {
9003
2.27k
          col++;
9004
2.27k
      }
9005
2.83k
      in++;
9006
2.83k
      start = in;
9007
2.83k
      if (in >= end) {
9008
22
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9009
22
                if ((in - start) > maxLength) {
9010
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9011
0
                                   "AttValue length too long\n");
9012
0
                    return(NULL);
9013
0
                }
9014
22
      }
9015
2.83k
  }
9016
112k
  while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9017
112k
         (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9018
107k
      col++;
9019
107k
      if ((*in++ == 0x20) && (*in == 0x20)) break;
9020
107k
      if (in >= end) {
9021
49
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9022
49
                if ((in - start) > maxLength) {
9023
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9024
0
                                   "AttValue length too long\n");
9025
0
                    return(NULL);
9026
0
                }
9027
49
      }
9028
107k
  }
9029
5.61k
  last = in;
9030
  /*
9031
   * skip the trailing blanks
9032
   */
9033
6.42k
  while ((last[-1] == 0x20) && (last > start)) last--;
9034
13.7k
  while ((in < end) && (*in != limit) &&
9035
13.7k
         ((*in == 0x20) || (*in == 0x9) ||
9036
10.4k
          (*in == 0xA) || (*in == 0xD))) {
9037
8.09k
      if (*in == 0xA) {
9038
206
          line++, col = 1;
9039
7.88k
      } else {
9040
7.88k
          col++;
9041
7.88k
      }
9042
8.09k
      in++;
9043
8.09k
      if (in >= end) {
9044
34
    const xmlChar *oldbase = ctxt->input->base;
9045
34
    GROW;
9046
34
                if (ctxt->instate == XML_PARSER_EOF)
9047
0
                    return(NULL);
9048
34
    if (oldbase != ctxt->input->base) {
9049
0
        ptrdiff_t delta = ctxt->input->base - oldbase;
9050
0
        start = start + delta;
9051
0
        in = in + delta;
9052
0
        last = last + delta;
9053
0
    }
9054
34
    end = ctxt->input->end;
9055
34
                if ((in - start) > maxLength) {
9056
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9057
0
                                   "AttValue length too long\n");
9058
0
                    return(NULL);
9059
0
                }
9060
34
      }
9061
8.09k
  }
9062
5.61k
        if ((in - start) > maxLength) {
9063
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9064
0
                           "AttValue length too long\n");
9065
0
            return(NULL);
9066
0
        }
9067
5.61k
  if (*in != limit) goto need_complex;
9068
756k
    } else {
9069
3.35M
  while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9070
3.35M
         (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9071
2.59M
      in++;
9072
2.59M
      col++;
9073
2.59M
      if (in >= end) {
9074
1.83k
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9075
1.83k
                if ((in - start) > maxLength) {
9076
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9077
0
                                   "AttValue length too long\n");
9078
0
                    return(NULL);
9079
0
                }
9080
1.83k
      }
9081
2.59M
  }
9082
756k
  last = in;
9083
756k
        if ((in - start) > maxLength) {
9084
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9085
0
                           "AttValue length too long\n");
9086
0
            return(NULL);
9087
0
        }
9088
756k
  if (*in != limit) goto need_complex;
9089
756k
    }
9090
605k
    in++;
9091
605k
    col++;
9092
605k
    if (len != NULL) {
9093
585k
        *len = last - start;
9094
585k
        ret = (xmlChar *) start;
9095
585k
    } else {
9096
19.7k
        if (alloc) *alloc = 1;
9097
19.7k
        ret = xmlStrndup(start, last - start);
9098
19.7k
    }
9099
605k
    CUR_PTR = in;
9100
605k
    ctxt->input->line = line;
9101
605k
    ctxt->input->col = col;
9102
605k
    if (alloc) *alloc = 0;
9103
605k
    return ret;
9104
156k
need_complex:
9105
156k
    if (alloc) *alloc = 1;
9106
156k
    return xmlParseAttValueComplex(ctxt, len, normalize);
9107
761k
}
9108
9109
/**
9110
 * xmlParseAttribute2:
9111
 * @ctxt:  an XML parser context
9112
 * @pref:  the element prefix
9113
 * @elem:  the element name
9114
 * @prefix:  a xmlChar ** used to store the value of the attribute prefix
9115
 * @value:  a xmlChar ** used to store the value of the attribute
9116
 * @len:  an int * to save the length of the attribute
9117
 * @alloc:  an int * to indicate if the attribute was allocated
9118
 *
9119
 * parse an attribute in the new SAX2 framework.
9120
 *
9121
 * Returns the attribute name, and the value in *value, .
9122
 */
9123
9124
static const xmlChar *
9125
xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9126
                   const xmlChar * pref, const xmlChar * elem,
9127
                   const xmlChar ** prefix, xmlChar ** value,
9128
                   int *len, int *alloc)
9129
741k
{
9130
741k
    const xmlChar *name;
9131
741k
    xmlChar *val, *internal_val = NULL;
9132
741k
    int normalize = 0;
9133
9134
741k
    *value = NULL;
9135
741k
    GROW;
9136
741k
    name = xmlParseQName(ctxt, prefix);
9137
741k
    if (name == NULL) {
9138
623
        xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9139
623
                       "error parsing attribute name\n");
9140
623
        return (NULL);
9141
623
    }
9142
9143
    /*
9144
     * get the type if needed
9145
     */
9146
741k
    if (ctxt->attsSpecial != NULL) {
9147
153k
        int type;
9148
9149
153k
        type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
9150
153k
                                                 pref, elem, *prefix, name);
9151
153k
        if (type != 0)
9152
5.65k
            normalize = 1;
9153
153k
    }
9154
9155
    /*
9156
     * read the value
9157
     */
9158
741k
    SKIP_BLANKS;
9159
741k
    if (RAW == '=') {
9160
740k
        NEXT;
9161
740k
        SKIP_BLANKS;
9162
740k
        val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9163
740k
  if (normalize) {
9164
      /*
9165
       * Sometimes a second normalisation pass for spaces is needed
9166
       * but that only happens if charrefs or entities references
9167
       * have been used in the attribute value, i.e. the attribute
9168
       * value have been extracted in an allocated string already.
9169
       */
9170
5.63k
      if (*alloc) {
9171
2.44k
          const xmlChar *val2;
9172
9173
2.44k
          val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9174
2.44k
    if ((val2 != NULL) && (val2 != val)) {
9175
647
        xmlFree(val);
9176
647
        val = (xmlChar *) val2;
9177
647
    }
9178
2.44k
      }
9179
5.63k
  }
9180
740k
        ctxt->instate = XML_PARSER_CONTENT;
9181
740k
    } else {
9182
478
        xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9183
478
                          "Specification mandates value for attribute %s\n",
9184
478
                          name);
9185
478
        return (NULL);
9186
478
    }
9187
9188
740k
    if (*prefix == ctxt->str_xml) {
9189
        /*
9190
         * Check that xml:lang conforms to the specification
9191
         * No more registered as an error, just generate a warning now
9192
         * since this was deprecated in XML second edition
9193
         */
9194
1.40k
        if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9195
0
            internal_val = xmlStrndup(val, *len);
9196
0
            if (!xmlCheckLanguageID(internal_val)) {
9197
0
                xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9198
0
                              "Malformed value for xml:lang : %s\n",
9199
0
                              internal_val, NULL);
9200
0
            }
9201
0
        }
9202
9203
        /*
9204
         * Check that xml:space conforms to the specification
9205
         */
9206
1.40k
        if (xmlStrEqual(name, BAD_CAST "space")) {
9207
529
            internal_val = xmlStrndup(val, *len);
9208
529
            if (xmlStrEqual(internal_val, BAD_CAST "default"))
9209
37
                *(ctxt->space) = 0;
9210
492
            else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9211
328
                *(ctxt->space) = 1;
9212
164
            else {
9213
164
                xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9214
164
                              "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9215
164
                              internal_val, NULL);
9216
164
            }
9217
529
        }
9218
1.40k
        if (internal_val) {
9219
528
            xmlFree(internal_val);
9220
528
        }
9221
1.40k
    }
9222
9223
740k
    *value = val;
9224
740k
    return (name);
9225
741k
}
9226
/**
9227
 * xmlParseStartTag2:
9228
 * @ctxt:  an XML parser context
9229
 *
9230
 * parse a start of tag either for rule element or
9231
 * EmptyElement. In both case we don't parse the tag closing chars.
9232
 * This routine is called when running SAX2 parsing
9233
 *
9234
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9235
 *
9236
 * [ WFC: Unique Att Spec ]
9237
 * No attribute name may appear more than once in the same start-tag or
9238
 * empty-element tag.
9239
 *
9240
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9241
 *
9242
 * [ WFC: Unique Att Spec ]
9243
 * No attribute name may appear more than once in the same start-tag or
9244
 * empty-element tag.
9245
 *
9246
 * With namespace:
9247
 *
9248
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9249
 *
9250
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9251
 *
9252
 * Returns the element name parsed
9253
 */
9254
9255
static const xmlChar *
9256
xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9257
1.73M
                  const xmlChar **URI, int *tlen) {
9258
1.73M
    const xmlChar *localname;
9259
1.73M
    const xmlChar *prefix;
9260
1.73M
    const xmlChar *attname;
9261
1.73M
    const xmlChar *aprefix;
9262
1.73M
    const xmlChar *nsname;
9263
1.73M
    xmlChar *attvalue;
9264
1.73M
    const xmlChar **atts = ctxt->atts;
9265
1.73M
    int maxatts = ctxt->maxatts;
9266
1.73M
    int nratts, nbatts, nbdef, inputid;
9267
1.73M
    int i, j, nbNs, attval;
9268
1.73M
    unsigned long cur;
9269
1.73M
    int nsNr = ctxt->nsNr;
9270
9271
1.73M
    if (RAW != '<') return(NULL);
9272
1.73M
    NEXT1;
9273
9274
    /*
9275
     * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9276
     *       point since the attribute values may be stored as pointers to
9277
     *       the buffer and calling SHRINK would destroy them !
9278
     *       The Shrinking is only possible once the full set of attribute
9279
     *       callbacks have been done.
9280
     */
9281
1.73M
    SHRINK;
9282
1.73M
    cur = ctxt->input->cur - ctxt->input->base;
9283
1.73M
    inputid = ctxt->input->id;
9284
1.73M
    nbatts = 0;
9285
1.73M
    nratts = 0;
9286
1.73M
    nbdef = 0;
9287
1.73M
    nbNs = 0;
9288
1.73M
    attval = 0;
9289
    /* Forget any namespaces added during an earlier parse of this element. */
9290
1.73M
    ctxt->nsNr = nsNr;
9291
9292
1.73M
    localname = xmlParseQName(ctxt, &prefix);
9293
1.73M
    if (localname == NULL) {
9294
364
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9295
364
           "StartTag: invalid element name\n");
9296
364
        return(NULL);
9297
364
    }
9298
1.73M
    *tlen = ctxt->input->cur - ctxt->input->base - cur;
9299
9300
    /*
9301
     * Now parse the attributes, it ends up with the ending
9302
     *
9303
     * (S Attribute)* S?
9304
     */
9305
1.73M
    SKIP_BLANKS;
9306
1.73M
    GROW;
9307
9308
2.38M
    while (((RAW != '>') &&
9309
2.38M
     ((RAW != '/') || (NXT(1) != '>')) &&
9310
2.38M
     (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9311
741k
  int id = ctxt->input->id;
9312
741k
  unsigned long cons = CUR_CONSUMED;
9313
741k
  int len = -1, alloc = 0;
9314
9315
741k
  attname = xmlParseAttribute2(ctxt, prefix, localname,
9316
741k
                               &aprefix, &attvalue, &len, &alloc);
9317
741k
        if ((attname == NULL) || (attvalue == NULL))
9318
1.17k
            goto next_attr;
9319
740k
  if (len < 0) len = xmlStrlen(attvalue);
9320
9321
740k
        if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9322
41.2k
            const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9323
41.2k
            xmlURIPtr uri;
9324
9325
41.2k
            if (URL == NULL) {
9326
0
                xmlErrMemory(ctxt, "dictionary allocation failure");
9327
0
                if ((attvalue != NULL) && (alloc != 0))
9328
0
                    xmlFree(attvalue);
9329
0
                localname = NULL;
9330
0
                goto done;
9331
0
            }
9332
41.2k
            if (*URL != 0) {
9333
40.0k
                uri = xmlParseURI((const char *) URL);
9334
40.0k
                if (uri == NULL) {
9335
25.4k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9336
25.4k
                             "xmlns: '%s' is not a valid URI\n",
9337
25.4k
                                       URL, NULL, NULL);
9338
25.4k
                } else {
9339
14.5k
                    if (uri->scheme == NULL) {
9340
10.0k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9341
10.0k
                                  "xmlns: URI %s is not absolute\n",
9342
10.0k
                                  URL, NULL, NULL);
9343
10.0k
                    }
9344
14.5k
                    xmlFreeURI(uri);
9345
14.5k
                }
9346
40.0k
                if (URL == ctxt->str_xml_ns) {
9347
196
                    if (attname != ctxt->str_xml) {
9348
196
                        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9349
196
                     "xml namespace URI cannot be the default namespace\n",
9350
196
                                 NULL, NULL, NULL);
9351
196
                    }
9352
196
                    goto next_attr;
9353
196
                }
9354
39.8k
                if ((len == 29) &&
9355
39.8k
                    (xmlStrEqual(URL,
9356
496
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9357
196
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9358
196
                         "reuse of the xmlns namespace name is forbidden\n",
9359
196
                             NULL, NULL, NULL);
9360
196
                    goto next_attr;
9361
196
                }
9362
39.8k
            }
9363
            /*
9364
             * check that it's not a defined namespace
9365
             */
9366
44.3k
            for (j = 1;j <= nbNs;j++)
9367
33.8k
                if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9368
30.3k
                    break;
9369
40.8k
            if (j <= nbNs)
9370
30.3k
                xmlErrAttributeDup(ctxt, NULL, attname);
9371
10.5k
            else
9372
10.5k
                if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9373
9374
699k
        } else if (aprefix == ctxt->str_xmlns) {
9375
10.4k
            const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9376
10.4k
            xmlURIPtr uri;
9377
9378
10.4k
            if (attname == ctxt->str_xml) {
9379
233
                if (URL != ctxt->str_xml_ns) {
9380
167
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9381
167
                             "xml namespace prefix mapped to wrong URI\n",
9382
167
                             NULL, NULL, NULL);
9383
167
                }
9384
                /*
9385
                 * Do not keep a namespace definition node
9386
                 */
9387
233
                goto next_attr;
9388
233
            }
9389
10.1k
            if (URL == ctxt->str_xml_ns) {
9390
228
                if (attname != ctxt->str_xml) {
9391
228
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9392
228
                             "xml namespace URI mapped to wrong prefix\n",
9393
228
                             NULL, NULL, NULL);
9394
228
                }
9395
228
                goto next_attr;
9396
228
            }
9397
9.93k
            if (attname == ctxt->str_xmlns) {
9398
459
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9399
459
                         "redefinition of the xmlns prefix is forbidden\n",
9400
459
                         NULL, NULL, NULL);
9401
459
                goto next_attr;
9402
459
            }
9403
9.48k
            if ((len == 29) &&
9404
9.48k
                (xmlStrEqual(URL,
9405
2.23k
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9406
1.78k
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9407
1.78k
                         "reuse of the xmlns namespace name is forbidden\n",
9408
1.78k
                         NULL, NULL, NULL);
9409
1.78k
                goto next_attr;
9410
1.78k
            }
9411
7.69k
            if ((URL == NULL) || (URL[0] == 0)) {
9412
473
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9413
473
                         "xmlns:%s: Empty XML namespace is not allowed\n",
9414
473
                              attname, NULL, NULL);
9415
473
                goto next_attr;
9416
7.22k
            } else {
9417
7.22k
                uri = xmlParseURI((const char *) URL);
9418
7.22k
                if (uri == NULL) {
9419
3.93k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9420
3.93k
                         "xmlns:%s: '%s' is not a valid URI\n",
9421
3.93k
                                       attname, URL, NULL);
9422
3.93k
                } else {
9423
3.28k
                    if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9424
0
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9425
0
                                  "xmlns:%s: URI %s is not absolute\n",
9426
0
                                  attname, URL, NULL);
9427
0
                    }
9428
3.28k
                    xmlFreeURI(uri);
9429
3.28k
                }
9430
7.22k
            }
9431
9432
            /*
9433
             * check that it's not a defined namespace
9434
             */
9435
16.0k
            for (j = 1;j <= nbNs;j++)
9436
13.9k
                if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9437
5.09k
                    break;
9438
7.22k
            if (j <= nbNs)
9439
5.09k
                xmlErrAttributeDup(ctxt, aprefix, attname);
9440
2.13k
            else
9441
2.13k
                if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9442
9443
688k
        } else {
9444
            /*
9445
             * Add the pair to atts
9446
             */
9447
688k
            if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9448
4.08k
                if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9449
0
                    goto next_attr;
9450
0
                }
9451
4.08k
                maxatts = ctxt->maxatts;
9452
4.08k
                atts = ctxt->atts;
9453
4.08k
            }
9454
688k
            ctxt->attallocs[nratts++] = alloc;
9455
688k
            atts[nbatts++] = attname;
9456
688k
            atts[nbatts++] = aprefix;
9457
            /*
9458
             * The namespace URI field is used temporarily to point at the
9459
             * base of the current input buffer for non-alloced attributes.
9460
             * When the input buffer is reallocated, all the pointers become
9461
             * invalid, but they can be reconstructed later.
9462
             */
9463
688k
            if (alloc)
9464
135k
                atts[nbatts++] = NULL;
9465
553k
            else
9466
553k
                atts[nbatts++] = ctxt->input->base;
9467
688k
            atts[nbatts++] = attvalue;
9468
688k
            attvalue += len;
9469
688k
            atts[nbatts++] = attvalue;
9470
            /*
9471
             * tag if some deallocation is needed
9472
             */
9473
688k
            if (alloc != 0) attval = 1;
9474
688k
            attvalue = NULL; /* moved into atts */
9475
688k
        }
9476
9477
741k
next_attr:
9478
741k
        if ((attvalue != NULL) && (alloc != 0)) {
9479
19.3k
            xmlFree(attvalue);
9480
19.3k
            attvalue = NULL;
9481
19.3k
        }
9482
9483
741k
  GROW
9484
741k
        if (ctxt->instate == XML_PARSER_EOF)
9485
0
            break;
9486
741k
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9487
90.5k
      break;
9488
651k
  if (SKIP_BLANKS == 0) {
9489
4.73k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9490
4.73k
         "attributes construct error\n");
9491
4.73k
      break;
9492
4.73k
  }
9493
646k
        if ((cons == CUR_CONSUMED) && (id == ctxt->input->id) &&
9494
646k
            (attname == NULL) && (attvalue == NULL)) {
9495
0
      xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9496
0
           "xmlParseStartTag: problem parsing attributes\n");
9497
0
      break;
9498
0
  }
9499
646k
        GROW;
9500
646k
    }
9501
9502
1.73M
    if (ctxt->input->id != inputid) {
9503
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9504
0
                    "Unexpected change of input\n");
9505
0
        localname = NULL;
9506
0
        goto done;
9507
0
    }
9508
9509
    /* Reconstruct attribute value pointers. */
9510
2.42M
    for (i = 0, j = 0; j < nratts; i += 5, j++) {
9511
688k
        if (atts[i+2] != NULL) {
9512
            /*
9513
             * Arithmetic on dangling pointers is technically undefined
9514
             * behavior, but well...
9515
             */
9516
553k
            ptrdiff_t offset = ctxt->input->base - atts[i+2];
9517
553k
            atts[i+2]  = NULL;    /* Reset repurposed namespace URI */
9518
553k
            atts[i+3] += offset;  /* value */
9519
553k
            atts[i+4] += offset;  /* valuend */
9520
553k
        }
9521
688k
    }
9522
9523
    /*
9524
     * The attributes defaulting
9525
     */
9526
1.73M
    if (ctxt->attsDefault != NULL) {
9527
9.01k
        xmlDefAttrsPtr defaults;
9528
9529
9.01k
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9530
9.01k
  if (defaults != NULL) {
9531
32.5k
      for (i = 0;i < defaults->nbAttrs;i++) {
9532
25.1k
          attname = defaults->values[5 * i];
9533
25.1k
    aprefix = defaults->values[5 * i + 1];
9534
9535
                /*
9536
     * special work for namespaces defaulted defs
9537
     */
9538
25.1k
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9539
        /*
9540
         * check that it's not a defined namespace
9541
         */
9542
4.53k
        for (j = 1;j <= nbNs;j++)
9543
2.54k
            if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9544
1.59k
          break;
9545
3.58k
              if (j <= nbNs) continue;
9546
9547
1.98k
        nsname = xmlGetNamespace(ctxt, NULL);
9548
1.98k
        if (nsname != defaults->values[5 * i + 2]) {
9549
1.51k
      if (nsPush(ctxt, NULL,
9550
1.51k
                 defaults->values[5 * i + 2]) > 0)
9551
1.17k
          nbNs++;
9552
1.51k
        }
9553
21.5k
    } else if (aprefix == ctxt->str_xmlns) {
9554
        /*
9555
         * check that it's not a defined namespace
9556
         */
9557
7.39k
        for (j = 1;j <= nbNs;j++)
9558
2.24k
            if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9559
726
          break;
9560
5.87k
              if (j <= nbNs) continue;
9561
9562
5.14k
        nsname = xmlGetNamespace(ctxt, attname);
9563
5.14k
        if (nsname != defaults->values[2]) {
9564
4.32k
      if (nsPush(ctxt, attname,
9565
4.32k
                 defaults->values[5 * i + 2]) > 0)
9566
1.64k
          nbNs++;
9567
4.32k
        }
9568
15.6k
    } else {
9569
        /*
9570
         * check that it's not a defined attribute
9571
         */
9572
485k
        for (j = 0;j < nbatts;j+=5) {
9573
470k
      if ((attname == atts[j]) && (aprefix == atts[j+1]))
9574
357
          break;
9575
470k
        }
9576
15.6k
        if (j < nbatts) continue;
9577
9578
15.2k
        if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9579
193
      if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9580
0
                            localname = NULL;
9581
0
                            goto done;
9582
0
      }
9583
193
      maxatts = ctxt->maxatts;
9584
193
      atts = ctxt->atts;
9585
193
        }
9586
15.2k
        atts[nbatts++] = attname;
9587
15.2k
        atts[nbatts++] = aprefix;
9588
15.2k
        if (aprefix == NULL)
9589
6.31k
      atts[nbatts++] = NULL;
9590
8.98k
        else
9591
8.98k
            atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9592
15.2k
        atts[nbatts++] = defaults->values[5 * i + 2];
9593
15.2k
        atts[nbatts++] = defaults->values[5 * i + 3];
9594
15.2k
        if ((ctxt->standalone == 1) &&
9595
15.2k
            (defaults->values[5 * i + 4] != NULL)) {
9596
0
      xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9597
0
    "standalone: attribute %s on %s defaulted from external subset\n",
9598
0
                                   attname, localname);
9599
0
        }
9600
15.2k
        nbdef++;
9601
15.2k
    }
9602
25.1k
      }
9603
7.47k
  }
9604
9.01k
    }
9605
9606
    /*
9607
     * The attributes checkings
9608
     */
9609
2.44M
    for (i = 0; i < nbatts;i += 5) {
9610
        /*
9611
  * The default namespace does not apply to attribute names.
9612
  */
9613
704k
  if (atts[i + 1] != NULL) {
9614
14.3k
      nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9615
14.3k
      if (nsname == NULL) {
9616
10.7k
    xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9617
10.7k
        "Namespace prefix %s for %s on %s is not defined\n",
9618
10.7k
        atts[i + 1], atts[i], localname);
9619
10.7k
      }
9620
14.3k
      atts[i + 2] = nsname;
9621
14.3k
  } else
9622
689k
      nsname = NULL;
9623
  /*
9624
   * [ WFC: Unique Att Spec ]
9625
   * No attribute name may appear more than once in the same
9626
   * start-tag or empty-element tag.
9627
   * As extended by the Namespace in XML REC.
9628
   */
9629
2.79M
        for (j = 0; j < i;j += 5) {
9630
2.23M
      if (atts[i] == atts[j]) {
9631
200k
          if (atts[i+1] == atts[j+1]) {
9632
144k
        xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9633
144k
        break;
9634
144k
    }
9635
55.5k
    if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9636
402
        xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9637
402
           "Namespaced Attribute %s in '%s' redefined\n",
9638
402
           atts[i], nsname, NULL);
9639
402
        break;
9640
402
    }
9641
55.5k
      }
9642
2.23M
  }
9643
704k
    }
9644
9645
1.73M
    nsname = xmlGetNamespace(ctxt, prefix);
9646
1.73M
    if ((prefix != NULL) && (nsname == NULL)) {
9647
3.01k
  xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9648
3.01k
           "Namespace prefix %s on %s is not defined\n",
9649
3.01k
     prefix, localname, NULL);
9650
3.01k
    }
9651
1.73M
    *pref = prefix;
9652
1.73M
    *URI = nsname;
9653
9654
    /*
9655
     * SAX: Start of Element !
9656
     */
9657
1.73M
    if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9658
1.73M
  (!ctxt->disableSAX)) {
9659
1.73M
  if (nbNs > 0)
9660
8.83k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9661
8.83k
        nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9662
8.83k
        nbatts / 5, nbdef, atts);
9663
1.72M
  else
9664
1.72M
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9665
1.72M
                    nsname, 0, NULL, nbatts / 5, nbdef, atts);
9666
1.73M
    }
9667
9668
1.73M
done:
9669
    /*
9670
     * Free up attribute allocated strings if needed
9671
     */
9672
1.73M
    if (attval != 0) {
9673
147k
  for (i = 3,j = 0; j < nratts;i += 5,j++)
9674
145k
      if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9675
135k
          xmlFree((xmlChar *) atts[i]);
9676
1.71k
    }
9677
9678
1.73M
    return(localname);
9679
1.73M
}
9680
9681
/**
9682
 * xmlParseEndTag2:
9683
 * @ctxt:  an XML parser context
9684
 * @line:  line of the start tag
9685
 * @nsNr:  number of namespaces on the start tag
9686
 *
9687
 * parse an end of tag
9688
 *
9689
 * [42] ETag ::= '</' Name S? '>'
9690
 *
9691
 * With namespace
9692
 *
9693
 * [NS 9] ETag ::= '</' QName S? '>'
9694
 */
9695
9696
static void
9697
30.7k
xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9698
30.7k
    const xmlChar *name;
9699
9700
30.7k
    GROW;
9701
30.7k
    if ((RAW != '<') || (NXT(1) != '/')) {
9702
0
  xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9703
0
  return;
9704
0
    }
9705
30.7k
    SKIP(2);
9706
9707
30.7k
    if (tag->prefix == NULL)
9708
29.9k
        name = xmlParseNameAndCompare(ctxt, ctxt->name);
9709
742
    else
9710
742
        name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9711
9712
    /*
9713
     * We should definitely be at the ending "S? '>'" part
9714
     */
9715
30.7k
    GROW;
9716
30.7k
    if (ctxt->instate == XML_PARSER_EOF)
9717
0
        return;
9718
30.7k
    SKIP_BLANKS;
9719
30.7k
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9720
186
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9721
186
    } else
9722
30.5k
  NEXT1;
9723
9724
    /*
9725
     * [ WFC: Element Type Match ]
9726
     * The Name in an element's end-tag must match the element type in the
9727
     * start-tag.
9728
     *
9729
     */
9730
30.7k
    if (name != (xmlChar*)1) {
9731
163
        if (name == NULL) name = BAD_CAST "unparsable";
9732
163
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9733
163
         "Opening and ending tag mismatch: %s line %d and %s\n",
9734
163
                    ctxt->name, tag->line, name);
9735
163
    }
9736
9737
    /*
9738
     * SAX: End of Tag
9739
     */
9740
30.7k
    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9741
30.7k
  (!ctxt->disableSAX))
9742
30.4k
  ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9743
30.4k
                                tag->URI);
9744
9745
30.7k
    spacePop(ctxt);
9746
30.7k
    if (tag->nsNr != 0)
9747
196
  nsPop(ctxt, tag->nsNr);
9748
30.7k
}
9749
9750
/**
9751
 * xmlParseCDSect:
9752
 * @ctxt:  an XML parser context
9753
 *
9754
 * Parse escaped pure raw content.
9755
 *
9756
 * [18] CDSect ::= CDStart CData CDEnd
9757
 *
9758
 * [19] CDStart ::= '<![CDATA['
9759
 *
9760
 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9761
 *
9762
 * [21] CDEnd ::= ']]>'
9763
 */
9764
void
9765
0
xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9766
0
    xmlChar *buf = NULL;
9767
0
    int len = 0;
9768
0
    int size = XML_PARSER_BUFFER_SIZE;
9769
0
    int r, rl;
9770
0
    int s, sl;
9771
0
    int cur, l;
9772
0
    int count = 0;
9773
0
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9774
0
                    XML_MAX_HUGE_LENGTH :
9775
0
                    XML_MAX_TEXT_LENGTH;
9776
9777
    /* Check 2.6.0 was NXT(0) not RAW */
9778
0
    if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9779
0
  SKIP(9);
9780
0
    } else
9781
0
        return;
9782
9783
0
    ctxt->instate = XML_PARSER_CDATA_SECTION;
9784
0
    r = CUR_CHAR(rl);
9785
0
    if (!IS_CHAR(r)) {
9786
0
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9787
0
  ctxt->instate = XML_PARSER_CONTENT;
9788
0
        return;
9789
0
    }
9790
0
    NEXTL(rl);
9791
0
    s = CUR_CHAR(sl);
9792
0
    if (!IS_CHAR(s)) {
9793
0
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9794
0
  ctxt->instate = XML_PARSER_CONTENT;
9795
0
        return;
9796
0
    }
9797
0
    NEXTL(sl);
9798
0
    cur = CUR_CHAR(l);
9799
0
    buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
9800
0
    if (buf == NULL) {
9801
0
  xmlErrMemory(ctxt, NULL);
9802
0
  return;
9803
0
    }
9804
0
    while (IS_CHAR(cur) &&
9805
0
           ((r != ']') || (s != ']') || (cur != '>'))) {
9806
0
  if (len + 5 >= size) {
9807
0
      xmlChar *tmp;
9808
9809
0
      tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar));
9810
0
      if (tmp == NULL) {
9811
0
          xmlFree(buf);
9812
0
    xmlErrMemory(ctxt, NULL);
9813
0
    return;
9814
0
      }
9815
0
      buf = tmp;
9816
0
      size *= 2;
9817
0
  }
9818
0
  COPY_BUF(rl,buf,len,r);
9819
0
  r = s;
9820
0
  rl = sl;
9821
0
  s = cur;
9822
0
  sl = l;
9823
0
  count++;
9824
0
  if (count > 50) {
9825
0
      SHRINK;
9826
0
      GROW;
9827
0
            if (ctxt->instate == XML_PARSER_EOF) {
9828
0
    xmlFree(buf);
9829
0
    return;
9830
0
            }
9831
0
      count = 0;
9832
0
  }
9833
0
  NEXTL(l);
9834
0
  cur = CUR_CHAR(l);
9835
0
        if (len > maxLength) {
9836
0
            xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9837
0
                           "CData section too big found\n");
9838
0
            xmlFree(buf);
9839
0
            return;
9840
0
        }
9841
0
    }
9842
0
    buf[len] = 0;
9843
0
    ctxt->instate = XML_PARSER_CONTENT;
9844
0
    if (cur != '>') {
9845
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9846
0
                       "CData section not finished\n%.50s\n", buf);
9847
0
  xmlFree(buf);
9848
0
        return;
9849
0
    }
9850
0
    NEXTL(l);
9851
9852
    /*
9853
     * OK the buffer is to be consumed as cdata.
9854
     */
9855
0
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9856
0
  if (ctxt->sax->cdataBlock != NULL)
9857
0
      ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9858
0
  else if (ctxt->sax->characters != NULL)
9859
0
      ctxt->sax->characters(ctxt->userData, buf, len);
9860
0
    }
9861
0
    xmlFree(buf);
9862
0
}
9863
9864
/**
9865
 * xmlParseContentInternal:
9866
 * @ctxt:  an XML parser context
9867
 *
9868
 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9869
 * unexpected EOF to the caller.
9870
 */
9871
9872
static void
9873
0
xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9874
0
    int nameNr = ctxt->nameNr;
9875
9876
0
    GROW;
9877
0
    while ((RAW != 0) &&
9878
0
     (ctxt->instate != XML_PARSER_EOF)) {
9879
0
        int id = ctxt->input->id;
9880
0
  unsigned long cons = CUR_CONSUMED;
9881
0
  const xmlChar *cur = ctxt->input->cur;
9882
9883
  /*
9884
   * First case : a Processing Instruction.
9885
   */
9886
0
  if ((*cur == '<') && (cur[1] == '?')) {
9887
0
      xmlParsePI(ctxt);
9888
0
  }
9889
9890
  /*
9891
   * Second case : a CDSection
9892
   */
9893
  /* 2.6.0 test was *cur not RAW */
9894
0
  else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9895
0
      xmlParseCDSect(ctxt);
9896
0
  }
9897
9898
  /*
9899
   * Third case :  a comment
9900
   */
9901
0
  else if ((*cur == '<') && (NXT(1) == '!') &&
9902
0
     (NXT(2) == '-') && (NXT(3) == '-')) {
9903
0
      xmlParseComment(ctxt);
9904
0
      ctxt->instate = XML_PARSER_CONTENT;
9905
0
  }
9906
9907
  /*
9908
   * Fourth case :  a sub-element.
9909
   */
9910
0
  else if (*cur == '<') {
9911
0
            if (NXT(1) == '/') {
9912
0
                if (ctxt->nameNr <= nameNr)
9913
0
                    break;
9914
0
          xmlParseElementEnd(ctxt);
9915
0
            } else {
9916
0
          xmlParseElementStart(ctxt);
9917
0
            }
9918
0
  }
9919
9920
  /*
9921
   * Fifth case : a reference. If if has not been resolved,
9922
   *    parsing returns it's Name, create the node
9923
   */
9924
9925
0
  else if (*cur == '&') {
9926
0
      xmlParseReference(ctxt);
9927
0
  }
9928
9929
  /*
9930
   * Last case, text. Note that References are handled directly.
9931
   */
9932
0
  else {
9933
0
      xmlParseCharData(ctxt, 0);
9934
0
  }
9935
9936
0
  GROW;
9937
0
  SHRINK;
9938
9939
0
  if ((cons == CUR_CONSUMED) && (id == ctxt->input->id)) {
9940
0
      xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9941
0
                  "detected an error in element content\n");
9942
0
      xmlHaltParser(ctxt);
9943
0
            break;
9944
0
  }
9945
0
    }
9946
0
}
9947
9948
/**
9949
 * xmlParseContent:
9950
 * @ctxt:  an XML parser context
9951
 *
9952
 * Parse a content sequence. Stops at EOF or '</'.
9953
 *
9954
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9955
 */
9956
9957
void
9958
0
xmlParseContent(xmlParserCtxtPtr ctxt) {
9959
0
    int nameNr = ctxt->nameNr;
9960
9961
0
    xmlParseContentInternal(ctxt);
9962
9963
0
    if ((ctxt->instate != XML_PARSER_EOF) && (ctxt->nameNr > nameNr)) {
9964
0
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9965
0
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9966
0
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9967
0
                "Premature end of data in tag %s line %d\n",
9968
0
    name, line, NULL);
9969
0
    }
9970
0
}
9971
9972
/**
9973
 * xmlParseElement:
9974
 * @ctxt:  an XML parser context
9975
 *
9976
 * parse an XML element
9977
 *
9978
 * [39] element ::= EmptyElemTag | STag content ETag
9979
 *
9980
 * [ WFC: Element Type Match ]
9981
 * The Name in an element's end-tag must match the element type in the
9982
 * start-tag.
9983
 *
9984
 */
9985
9986
void
9987
0
xmlParseElement(xmlParserCtxtPtr ctxt) {
9988
0
    if (xmlParseElementStart(ctxt) != 0)
9989
0
        return;
9990
9991
0
    xmlParseContentInternal(ctxt);
9992
0
    if (ctxt->instate == XML_PARSER_EOF)
9993
0
  return;
9994
9995
0
    if (CUR == 0) {
9996
0
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9997
0
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9998
0
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9999
0
                "Premature end of data in tag %s line %d\n",
10000
0
    name, line, NULL);
10001
0
        return;
10002
0
    }
10003
10004
0
    xmlParseElementEnd(ctxt);
10005
0
}
10006
10007
/**
10008
 * xmlParseElementStart:
10009
 * @ctxt:  an XML parser context
10010
 *
10011
 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
10012
 * opening tag was parsed, 1 if an empty element was parsed.
10013
 */
10014
static int
10015
0
xmlParseElementStart(xmlParserCtxtPtr ctxt) {
10016
0
    const xmlChar *name;
10017
0
    const xmlChar *prefix = NULL;
10018
0
    const xmlChar *URI = NULL;
10019
0
    xmlParserNodeInfo node_info;
10020
0
    int line, tlen = 0;
10021
0
    xmlNodePtr ret;
10022
0
    int nsNr = ctxt->nsNr;
10023
10024
0
    if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
10025
0
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
10026
0
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
10027
0
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
10028
0
        xmlParserMaxDepth);
10029
0
  xmlHaltParser(ctxt);
10030
0
  return(-1);
10031
0
    }
10032
10033
    /* Capture start position */
10034
0
    if (ctxt->record_info) {
10035
0
        node_info.begin_pos = ctxt->input->consumed +
10036
0
                          (CUR_PTR - ctxt->input->base);
10037
0
  node_info.begin_line = ctxt->input->line;
10038
0
    }
10039
10040
0
    if (ctxt->spaceNr == 0)
10041
0
  spacePush(ctxt, -1);
10042
0
    else if (*ctxt->space == -2)
10043
0
  spacePush(ctxt, -1);
10044
0
    else
10045
0
  spacePush(ctxt, *ctxt->space);
10046
10047
0
    line = ctxt->input->line;
10048
0
#ifdef LIBXML_SAX1_ENABLED
10049
0
    if (ctxt->sax2)
10050
0
#endif /* LIBXML_SAX1_ENABLED */
10051
0
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
10052
0
#ifdef LIBXML_SAX1_ENABLED
10053
0
    else
10054
0
  name = xmlParseStartTag(ctxt);
10055
0
#endif /* LIBXML_SAX1_ENABLED */
10056
0
    if (ctxt->instate == XML_PARSER_EOF)
10057
0
  return(-1);
10058
0
    if (name == NULL) {
10059
0
  spacePop(ctxt);
10060
0
        return(-1);
10061
0
    }
10062
0
    nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
10063
0
    ret = ctxt->node;
10064
10065
0
#ifdef LIBXML_VALID_ENABLED
10066
    /*
10067
     * [ VC: Root Element Type ]
10068
     * The Name in the document type declaration must match the element
10069
     * type of the root element.
10070
     */
10071
0
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10072
0
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
10073
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10074
0
#endif /* LIBXML_VALID_ENABLED */
10075
10076
    /*
10077
     * Check for an Empty Element.
10078
     */
10079
0
    if ((RAW == '/') && (NXT(1) == '>')) {
10080
0
        SKIP(2);
10081
0
  if (ctxt->sax2) {
10082
0
      if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10083
0
    (!ctxt->disableSAX))
10084
0
    ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
10085
0
#ifdef LIBXML_SAX1_ENABLED
10086
0
  } else {
10087
0
      if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10088
0
    (!ctxt->disableSAX))
10089
0
    ctxt->sax->endElement(ctxt->userData, name);
10090
0
#endif /* LIBXML_SAX1_ENABLED */
10091
0
  }
10092
0
  namePop(ctxt);
10093
0
  spacePop(ctxt);
10094
0
  if (nsNr != ctxt->nsNr)
10095
0
      nsPop(ctxt, ctxt->nsNr - nsNr);
10096
0
  if ( ret != NULL && ctxt->record_info ) {
10097
0
     node_info.end_pos = ctxt->input->consumed +
10098
0
            (CUR_PTR - ctxt->input->base);
10099
0
     node_info.end_line = ctxt->input->line;
10100
0
     node_info.node = ret;
10101
0
     xmlParserAddNodeInfo(ctxt, &node_info);
10102
0
  }
10103
0
  return(1);
10104
0
    }
10105
0
    if (RAW == '>') {
10106
0
        NEXT1;
10107
0
    } else {
10108
0
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10109
0
         "Couldn't find end of Start Tag %s line %d\n",
10110
0
                    name, line, NULL);
10111
10112
  /*
10113
   * end of parsing of this node.
10114
   */
10115
0
  nodePop(ctxt);
10116
0
  namePop(ctxt);
10117
0
  spacePop(ctxt);
10118
0
  if (nsNr != ctxt->nsNr)
10119
0
      nsPop(ctxt, ctxt->nsNr - nsNr);
10120
10121
  /*
10122
   * Capture end position and add node
10123
   */
10124
0
  if ( ret != NULL && ctxt->record_info ) {
10125
0
     node_info.end_pos = ctxt->input->consumed +
10126
0
            (CUR_PTR - ctxt->input->base);
10127
0
     node_info.end_line = ctxt->input->line;
10128
0
     node_info.node = ret;
10129
0
     xmlParserAddNodeInfo(ctxt, &node_info);
10130
0
  }
10131
0
  return(-1);
10132
0
    }
10133
10134
0
    return(0);
10135
0
}
10136
10137
/**
10138
 * xmlParseElementEnd:
10139
 * @ctxt:  an XML parser context
10140
 *
10141
 * Parse the end of an XML element.
10142
 */
10143
static void
10144
0
xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
10145
0
    xmlParserNodeInfo node_info;
10146
0
    xmlNodePtr ret = ctxt->node;
10147
10148
0
    if (ctxt->nameNr <= 0)
10149
0
        return;
10150
10151
    /*
10152
     * parse the end of tag: '</' should be here.
10153
     */
10154
0
    if (ctxt->sax2) {
10155
0
  xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
10156
0
  namePop(ctxt);
10157
0
    }
10158
0
#ifdef LIBXML_SAX1_ENABLED
10159
0
    else
10160
0
  xmlParseEndTag1(ctxt, 0);
10161
0
#endif /* LIBXML_SAX1_ENABLED */
10162
10163
    /*
10164
     * Capture end position and add node
10165
     */
10166
0
    if ( ret != NULL && ctxt->record_info ) {
10167
0
       node_info.end_pos = ctxt->input->consumed +
10168
0
                          (CUR_PTR - ctxt->input->base);
10169
0
       node_info.end_line = ctxt->input->line;
10170
0
       node_info.node = ret;
10171
0
       xmlParserAddNodeInfo(ctxt, &node_info);
10172
0
    }
10173
0
}
10174
10175
/**
10176
 * xmlParseVersionNum:
10177
 * @ctxt:  an XML parser context
10178
 *
10179
 * parse the XML version value.
10180
 *
10181
 * [26] VersionNum ::= '1.' [0-9]+
10182
 *
10183
 * In practice allow [0-9].[0-9]+ at that level
10184
 *
10185
 * Returns the string giving the XML version number, or NULL
10186
 */
10187
xmlChar *
10188
2.97k
xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10189
2.97k
    xmlChar *buf = NULL;
10190
2.97k
    int len = 0;
10191
2.97k
    int size = 10;
10192
2.97k
    xmlChar cur;
10193
10194
2.97k
    buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10195
2.97k
    if (buf == NULL) {
10196
0
  xmlErrMemory(ctxt, NULL);
10197
0
  return(NULL);
10198
0
    }
10199
2.97k
    cur = CUR;
10200
2.97k
    if (!((cur >= '0') && (cur <= '9'))) {
10201
17
  xmlFree(buf);
10202
17
  return(NULL);
10203
17
    }
10204
2.95k
    buf[len++] = cur;
10205
2.95k
    NEXT;
10206
2.95k
    cur=CUR;
10207
2.95k
    if (cur != '.') {
10208
12
  xmlFree(buf);
10209
12
  return(NULL);
10210
12
    }
10211
2.94k
    buf[len++] = cur;
10212
2.94k
    NEXT;
10213
2.94k
    cur=CUR;
10214
2.77M
    while ((cur >= '0') && (cur <= '9')) {
10215
2.76M
  if (len + 1 >= size) {
10216
325
      xmlChar *tmp;
10217
10218
325
      size *= 2;
10219
325
      tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10220
325
      if (tmp == NULL) {
10221
0
          xmlFree(buf);
10222
0
    xmlErrMemory(ctxt, NULL);
10223
0
    return(NULL);
10224
0
      }
10225
325
      buf = tmp;
10226
325
  }
10227
2.76M
  buf[len++] = cur;
10228
2.76M
  NEXT;
10229
2.76M
  cur=CUR;
10230
2.76M
    }
10231
2.94k
    buf[len] = 0;
10232
2.94k
    return(buf);
10233
2.94k
}
10234
10235
/**
10236
 * xmlParseVersionInfo:
10237
 * @ctxt:  an XML parser context
10238
 *
10239
 * parse the XML version.
10240
 *
10241
 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10242
 *
10243
 * [25] Eq ::= S? '=' S?
10244
 *
10245
 * Returns the version string, e.g. "1.0"
10246
 */
10247
10248
xmlChar *
10249
3.73k
xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10250
3.73k
    xmlChar *version = NULL;
10251
10252
3.73k
    if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10253
2.99k
  SKIP(7);
10254
2.99k
  SKIP_BLANKS;
10255
2.99k
  if (RAW != '=') {
10256
11
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10257
11
      return(NULL);
10258
11
        }
10259
2.98k
  NEXT;
10260
2.98k
  SKIP_BLANKS;
10261
2.98k
  if (RAW == '"') {
10262
2.96k
      NEXT;
10263
2.96k
      version = xmlParseVersionNum(ctxt);
10264
2.96k
      if (RAW != '"') {
10265
63
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10266
63
      } else
10267
2.90k
          NEXT;
10268
2.96k
  } else if (RAW == '\''){
10269
8
      NEXT;
10270
8
      version = xmlParseVersionNum(ctxt);
10271
8
      if (RAW != '\'') {
10272
6
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10273
6
      } else
10274
2
          NEXT;
10275
12
  } else {
10276
12
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10277
12
  }
10278
2.98k
    }
10279
3.72k
    return(version);
10280
3.73k
}
10281
10282
/**
10283
 * xmlParseEncName:
10284
 * @ctxt:  an XML parser context
10285
 *
10286
 * parse the XML encoding name
10287
 *
10288
 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10289
 *
10290
 * Returns the encoding name value or NULL
10291
 */
10292
xmlChar *
10293
3.50k
xmlParseEncName(xmlParserCtxtPtr ctxt) {
10294
3.50k
    xmlChar *buf = NULL;
10295
3.50k
    int len = 0;
10296
3.50k
    int size = 10;
10297
3.50k
    xmlChar cur;
10298
10299
3.50k
    cur = CUR;
10300
3.50k
    if (((cur >= 'a') && (cur <= 'z')) ||
10301
3.50k
        ((cur >= 'A') && (cur <= 'Z'))) {
10302
3.48k
  buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
10303
3.48k
  if (buf == NULL) {
10304
0
      xmlErrMemory(ctxt, NULL);
10305
0
      return(NULL);
10306
0
  }
10307
10308
3.48k
  buf[len++] = cur;
10309
3.48k
  NEXT;
10310
3.48k
  cur = CUR;
10311
1.09M
  while (((cur >= 'a') && (cur <= 'z')) ||
10312
1.09M
         ((cur >= 'A') && (cur <= 'Z')) ||
10313
1.09M
         ((cur >= '0') && (cur <= '9')) ||
10314
1.09M
         (cur == '.') || (cur == '_') ||
10315
1.09M
         (cur == '-')) {
10316
1.08M
      if (len + 1 >= size) {
10317
408
          xmlChar *tmp;
10318
10319
408
    size *= 2;
10320
408
    tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10321
408
    if (tmp == NULL) {
10322
0
        xmlErrMemory(ctxt, NULL);
10323
0
        xmlFree(buf);
10324
0
        return(NULL);
10325
0
    }
10326
408
    buf = tmp;
10327
408
      }
10328
1.08M
      buf[len++] = cur;
10329
1.08M
      NEXT;
10330
1.08M
      cur = CUR;
10331
1.08M
      if (cur == 0) {
10332
71
          SHRINK;
10333
71
    GROW;
10334
71
    cur = CUR;
10335
71
      }
10336
1.08M
        }
10337
3.48k
  buf[len] = 0;
10338
3.48k
    } else {
10339
12
  xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10340
12
    }
10341
3.50k
    return(buf);
10342
3.50k
}
10343
10344
/**
10345
 * xmlParseEncodingDecl:
10346
 * @ctxt:  an XML parser context
10347
 *
10348
 * parse the XML encoding declaration
10349
 *
10350
 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'")
10351
 *
10352
 * this setups the conversion filters.
10353
 *
10354
 * Returns the encoding value or NULL
10355
 */
10356
10357
const xmlChar *
10358
3.73k
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10359
3.73k
    xmlChar *encoding = NULL;
10360
10361
3.73k
    SKIP_BLANKS;
10362
3.73k
    if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10363
3.50k
  SKIP(8);
10364
3.50k
  SKIP_BLANKS;
10365
3.50k
  if (RAW != '=') {
10366
5
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10367
5
      return(NULL);
10368
5
        }
10369
3.50k
  NEXT;
10370
3.50k
  SKIP_BLANKS;
10371
3.50k
  if (RAW == '"') {
10372
3.48k
      NEXT;
10373
3.48k
      encoding = xmlParseEncName(ctxt);
10374
3.48k
      if (RAW != '"') {
10375
97
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10376
97
    xmlFree((xmlChar *) encoding);
10377
97
    return(NULL);
10378
97
      } else
10379
3.39k
          NEXT;
10380
3.48k
  } else if (RAW == '\''){
10381
12
      NEXT;
10382
12
      encoding = xmlParseEncName(ctxt);
10383
12
      if (RAW != '\'') {
10384
8
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10385
8
    xmlFree((xmlChar *) encoding);
10386
8
    return(NULL);
10387
8
      } else
10388
4
          NEXT;
10389
12
  } else {
10390
3
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10391
3
  }
10392
10393
        /*
10394
         * Non standard parsing, allowing the user to ignore encoding
10395
         */
10396
3.39k
        if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10397
0
      xmlFree((xmlChar *) encoding);
10398
0
            return(NULL);
10399
0
  }
10400
10401
  /*
10402
   * UTF-16 encoding switch has already taken place at this stage,
10403
   * more over the little-endian/big-endian selection is already done
10404
   */
10405
3.39k
        if ((encoding != NULL) &&
10406
3.39k
      ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10407
3.39k
       (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10408
      /*
10409
       * If no encoding was passed to the parser, that we are
10410
       * using UTF-16 and no decoder is present i.e. the
10411
       * document is apparently UTF-8 compatible, then raise an
10412
       * encoding mismatch fatal error
10413
       */
10414
3
      if ((ctxt->encoding == NULL) &&
10415
3
          (ctxt->input->buf != NULL) &&
10416
3
          (ctxt->input->buf->encoder == NULL)) {
10417
2
    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10418
2
      "Document labelled UTF-16 but has UTF-8 content\n");
10419
2
      }
10420
3
      if (ctxt->encoding != NULL)
10421
0
    xmlFree((xmlChar *) ctxt->encoding);
10422
3
      ctxt->encoding = encoding;
10423
3
  }
10424
  /*
10425
   * UTF-8 encoding is handled natively
10426
   */
10427
3.39k
        else if ((encoding != NULL) &&
10428
3.39k
      ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10429
3.39k
       (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10430
35
      if (ctxt->encoding != NULL)
10431
0
    xmlFree((xmlChar *) ctxt->encoding);
10432
35
      ctxt->encoding = encoding;
10433
35
  }
10434
3.36k
  else if (encoding != NULL) {
10435
3.35k
      xmlCharEncodingHandlerPtr handler;
10436
10437
3.35k
      if (ctxt->input->encoding != NULL)
10438
0
    xmlFree((xmlChar *) ctxt->input->encoding);
10439
3.35k
      ctxt->input->encoding = encoding;
10440
10441
3.35k
            handler = xmlFindCharEncodingHandler((const char *) encoding);
10442
3.35k
      if (handler != NULL) {
10443
2.93k
    if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10444
        /* failed to convert */
10445
5
        ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10446
5
        return(NULL);
10447
5
    }
10448
2.93k
      } else {
10449
424
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10450
424
      "Unsupported encoding %s\n", encoding);
10451
424
    return(NULL);
10452
424
      }
10453
3.35k
  }
10454
3.39k
    }
10455
3.19k
    return(encoding);
10456
3.73k
}
10457
10458
/**
10459
 * xmlParseSDDecl:
10460
 * @ctxt:  an XML parser context
10461
 *
10462
 * parse the XML standalone declaration
10463
 *
10464
 * [32] SDDecl ::= S 'standalone' Eq
10465
 *                 (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10466
 *
10467
 * [ VC: Standalone Document Declaration ]
10468
 * TODO The standalone document declaration must have the value "no"
10469
 * if any external markup declarations contain declarations of:
10470
 *  - attributes with default values, if elements to which these
10471
 *    attributes apply appear in the document without specifications
10472
 *    of values for these attributes, or
10473
 *  - entities (other than amp, lt, gt, apos, quot), if references
10474
 *    to those entities appear in the document, or
10475
 *  - attributes with values subject to normalization, where the
10476
 *    attribute appears in the document with a value which will change
10477
 *    as a result of normalization, or
10478
 *  - element types with element content, if white space occurs directly
10479
 *    within any instance of those types.
10480
 *
10481
 * Returns:
10482
 *   1 if standalone="yes"
10483
 *   0 if standalone="no"
10484
 *  -2 if standalone attribute is missing or invalid
10485
 *    (A standalone value of -2 means that the XML declaration was found,
10486
 *     but no value was specified for the standalone attribute).
10487
 */
10488
10489
int
10490
460
xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10491
460
    int standalone = -2;
10492
10493
460
    SKIP_BLANKS;
10494
460
    if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10495
33
  SKIP(10);
10496
33
        SKIP_BLANKS;
10497
33
  if (RAW != '=') {
10498
3
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10499
3
      return(standalone);
10500
3
        }
10501
30
  NEXT;
10502
30
  SKIP_BLANKS;
10503
30
        if (RAW == '\''){
10504
14
      NEXT;
10505
14
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10506
2
          standalone = 0;
10507
2
                SKIP(2);
10508
12
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10509
12
                 (NXT(2) == 's')) {
10510
2
          standalone = 1;
10511
2
    SKIP(3);
10512
10
            } else {
10513
10
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10514
10
      }
10515
14
      if (RAW != '\'') {
10516
13
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10517
13
      } else
10518
1
          NEXT;
10519
16
  } else if (RAW == '"'){
10520
12
      NEXT;
10521
12
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10522
2
          standalone = 0;
10523
2
    SKIP(2);
10524
10
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10525
10
                 (NXT(2) == 's')) {
10526
2
          standalone = 1;
10527
2
                SKIP(3);
10528
8
            } else {
10529
8
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10530
8
      }
10531
12
      if (RAW != '"') {
10532
10
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10533
10
      } else
10534
2
          NEXT;
10535
12
  } else {
10536
4
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10537
4
        }
10538
30
    }
10539
457
    return(standalone);
10540
460
}
10541
10542
/**
10543
 * xmlParseXMLDecl:
10544
 * @ctxt:  an XML parser context
10545
 *
10546
 * parse an XML declaration header
10547
 *
10548
 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10549
 */
10550
10551
void
10552
3.73k
xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10553
3.73k
    xmlChar *version;
10554
10555
    /*
10556
     * This value for standalone indicates that the document has an
10557
     * XML declaration but it does not have a standalone attribute.
10558
     * It will be overwritten later if a standalone attribute is found.
10559
     */
10560
3.73k
    ctxt->input->standalone = -2;
10561
10562
    /*
10563
     * We know that '<?xml' is here.
10564
     */
10565
3.73k
    SKIP(5);
10566
10567
3.73k
    if (!IS_BLANK_CH(RAW)) {
10568
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10569
0
                 "Blank needed after '<?xml'\n");
10570
0
    }
10571
3.73k
    SKIP_BLANKS;
10572
10573
    /*
10574
     * We must have the VersionInfo here.
10575
     */
10576
3.73k
    version = xmlParseVersionInfo(ctxt);
10577
3.73k
    if (version == NULL) {
10578
788
  xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10579
2.94k
    } else {
10580
2.94k
  if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10581
      /*
10582
       * Changed here for XML-1.0 5th edition
10583
       */
10584
1.44k
      if (ctxt->options & XML_PARSE_OLD10) {
10585
0
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10586
0
                "Unsupported version '%s'\n",
10587
0
                version);
10588
1.44k
      } else {
10589
1.44k
          if ((version[0] == '1') && ((version[1] == '.'))) {
10590
1.41k
        xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10591
1.41k
                      "Unsupported version '%s'\n",
10592
1.41k
          version, NULL);
10593
1.41k
    } else {
10594
25
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10595
25
              "Unsupported version '%s'\n",
10596
25
              version);
10597
25
    }
10598
1.44k
      }
10599
1.44k
  }
10600
2.94k
  if (ctxt->version != NULL)
10601
0
      xmlFree((void *) ctxt->version);
10602
2.94k
  ctxt->version = version;
10603
2.94k
    }
10604
10605
    /*
10606
     * We may have the encoding declaration
10607
     */
10608
3.73k
    if (!IS_BLANK_CH(RAW)) {
10609
828
        if ((RAW == '?') && (NXT(1) == '>')) {
10610
4
      SKIP(2);
10611
4
      return;
10612
4
  }
10613
824
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10614
824
    }
10615
3.73k
    xmlParseEncodingDecl(ctxt);
10616
3.73k
    if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10617
3.73k
         (ctxt->instate == XML_PARSER_EOF)) {
10618
  /*
10619
   * The XML REC instructs us to stop parsing right here
10620
   */
10621
429
        return;
10622
429
    }
10623
10624
    /*
10625
     * We may have the standalone status.
10626
     */
10627
3.30k
    if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10628
2.92k
        if ((RAW == '?') && (NXT(1) == '>')) {
10629
2.84k
      SKIP(2);
10630
2.84k
      return;
10631
2.84k
  }
10632
79
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10633
79
    }
10634
10635
    /*
10636
     * We can grow the input buffer freely at that point
10637
     */
10638
460
    GROW;
10639
10640
460
    SKIP_BLANKS;
10641
460
    ctxt->input->standalone = xmlParseSDDecl(ctxt);
10642
10643
460
    SKIP_BLANKS;
10644
460
    if ((RAW == '?') && (NXT(1) == '>')) {
10645
35
        SKIP(2);
10646
425
    } else if (RAW == '>') {
10647
        /* Deprecated old WD ... */
10648
5
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10649
5
  NEXT;
10650
420
    } else {
10651
420
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10652
420
  MOVETO_ENDTAG(CUR_PTR);
10653
420
  NEXT;
10654
420
    }
10655
460
}
10656
10657
/**
10658
 * xmlParseMisc:
10659
 * @ctxt:  an XML parser context
10660
 *
10661
 * parse an XML Misc* optional field.
10662
 *
10663
 * [27] Misc ::= Comment | PI |  S
10664
 */
10665
10666
void
10667
0
xmlParseMisc(xmlParserCtxtPtr ctxt) {
10668
0
    while (ctxt->instate != XML_PARSER_EOF) {
10669
0
        SKIP_BLANKS;
10670
0
        GROW;
10671
0
        if ((RAW == '<') && (NXT(1) == '?')) {
10672
0
      xmlParsePI(ctxt);
10673
0
        } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10674
0
      xmlParseComment(ctxt);
10675
0
        } else {
10676
0
            break;
10677
0
        }
10678
0
    }
10679
0
}
10680
10681
/**
10682
 * xmlParseDocument:
10683
 * @ctxt:  an XML parser context
10684
 *
10685
 * parse an XML document (and build a tree if using the standard SAX
10686
 * interface).
10687
 *
10688
 * [1] document ::= prolog element Misc*
10689
 *
10690
 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10691
 *
10692
 * Returns 0, -1 in case of error. the parser context is augmented
10693
 *                as a result of the parsing.
10694
 */
10695
10696
int
10697
0
xmlParseDocument(xmlParserCtxtPtr ctxt) {
10698
0
    xmlChar start[4];
10699
0
    xmlCharEncoding enc;
10700
10701
0
    xmlInitParser();
10702
10703
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
10704
0
        return(-1);
10705
10706
0
    GROW;
10707
10708
    /*
10709
     * SAX: detecting the level.
10710
     */
10711
0
    xmlDetectSAX2(ctxt);
10712
10713
    /*
10714
     * SAX: beginning of the document processing.
10715
     */
10716
0
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10717
0
        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10718
0
    if (ctxt->instate == XML_PARSER_EOF)
10719
0
  return(-1);
10720
10721
0
    if ((ctxt->encoding == NULL) &&
10722
0
        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10723
  /*
10724
   * Get the 4 first bytes and decode the charset
10725
   * if enc != XML_CHAR_ENCODING_NONE
10726
   * plug some encoding conversion routines.
10727
   */
10728
0
  start[0] = RAW;
10729
0
  start[1] = NXT(1);
10730
0
  start[2] = NXT(2);
10731
0
  start[3] = NXT(3);
10732
0
  enc = xmlDetectCharEncoding(&start[0], 4);
10733
0
  if (enc != XML_CHAR_ENCODING_NONE) {
10734
0
      xmlSwitchEncoding(ctxt, enc);
10735
0
  }
10736
0
    }
10737
10738
10739
0
    if (CUR == 0) {
10740
0
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10741
0
  return(-1);
10742
0
    }
10743
10744
    /*
10745
     * Check for the XMLDecl in the Prolog.
10746
     * do not GROW here to avoid the detected encoder to decode more
10747
     * than just the first line, unless the amount of data is really
10748
     * too small to hold "<?xml version="1.0" encoding="foo"
10749
     */
10750
0
    if ((ctxt->input->end - ctxt->input->cur) < 35) {
10751
0
       GROW;
10752
0
    }
10753
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10754
10755
  /*
10756
   * Note that we will switch encoding on the fly.
10757
   */
10758
0
  xmlParseXMLDecl(ctxt);
10759
0
  if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10760
0
      (ctxt->instate == XML_PARSER_EOF)) {
10761
      /*
10762
       * The XML REC instructs us to stop parsing right here
10763
       */
10764
0
      return(-1);
10765
0
  }
10766
0
  ctxt->standalone = ctxt->input->standalone;
10767
0
  SKIP_BLANKS;
10768
0
    } else {
10769
0
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10770
0
    }
10771
0
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10772
0
        ctxt->sax->startDocument(ctxt->userData);
10773
0
    if (ctxt->instate == XML_PARSER_EOF)
10774
0
  return(-1);
10775
0
    if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10776
0
        (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10777
0
  ctxt->myDoc->compression = ctxt->input->buf->compressed;
10778
0
    }
10779
10780
    /*
10781
     * The Misc part of the Prolog
10782
     */
10783
0
    xmlParseMisc(ctxt);
10784
10785
    /*
10786
     * Then possibly doc type declaration(s) and more Misc
10787
     * (doctypedecl Misc*)?
10788
     */
10789
0
    GROW;
10790
0
    if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10791
10792
0
  ctxt->inSubset = 1;
10793
0
  xmlParseDocTypeDecl(ctxt);
10794
0
  if (RAW == '[') {
10795
0
      ctxt->instate = XML_PARSER_DTD;
10796
0
      xmlParseInternalSubset(ctxt);
10797
0
      if (ctxt->instate == XML_PARSER_EOF)
10798
0
    return(-1);
10799
0
  }
10800
10801
  /*
10802
   * Create and update the external subset.
10803
   */
10804
0
  ctxt->inSubset = 2;
10805
0
  if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10806
0
      (!ctxt->disableSAX))
10807
0
      ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10808
0
                                ctxt->extSubSystem, ctxt->extSubURI);
10809
0
  if (ctxt->instate == XML_PARSER_EOF)
10810
0
      return(-1);
10811
0
  ctxt->inSubset = 0;
10812
10813
0
        xmlCleanSpecialAttr(ctxt);
10814
10815
0
  ctxt->instate = XML_PARSER_PROLOG;
10816
0
  xmlParseMisc(ctxt);
10817
0
    }
10818
10819
    /*
10820
     * Time to start parsing the tree itself
10821
     */
10822
0
    GROW;
10823
0
    if (RAW != '<') {
10824
0
  xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10825
0
           "Start tag expected, '<' not found\n");
10826
0
    } else {
10827
0
  ctxt->instate = XML_PARSER_CONTENT;
10828
0
  xmlParseElement(ctxt);
10829
0
  ctxt->instate = XML_PARSER_EPILOG;
10830
10831
10832
  /*
10833
   * The Misc part at the end
10834
   */
10835
0
  xmlParseMisc(ctxt);
10836
10837
0
  if (RAW != 0) {
10838
0
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10839
0
  }
10840
0
  ctxt->instate = XML_PARSER_EOF;
10841
0
    }
10842
10843
    /*
10844
     * SAX: end of the document processing.
10845
     */
10846
0
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10847
0
        ctxt->sax->endDocument(ctxt->userData);
10848
10849
    /*
10850
     * Remove locally kept entity definitions if the tree was not built
10851
     */
10852
0
    if ((ctxt->myDoc != NULL) &&
10853
0
  (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10854
0
  xmlFreeDoc(ctxt->myDoc);
10855
0
  ctxt->myDoc = NULL;
10856
0
    }
10857
10858
0
    if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10859
0
        ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10860
0
  if (ctxt->valid)
10861
0
      ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10862
0
  if (ctxt->nsWellFormed)
10863
0
      ctxt->myDoc->properties |= XML_DOC_NSVALID;
10864
0
  if (ctxt->options & XML_PARSE_OLD10)
10865
0
      ctxt->myDoc->properties |= XML_DOC_OLD10;
10866
0
    }
10867
0
    if (! ctxt->wellFormed) {
10868
0
  ctxt->valid = 0;
10869
0
  return(-1);
10870
0
    }
10871
0
    return(0);
10872
0
}
10873
10874
/**
10875
 * xmlParseExtParsedEnt:
10876
 * @ctxt:  an XML parser context
10877
 *
10878
 * parse a general parsed entity
10879
 * An external general parsed entity is well-formed if it matches the
10880
 * production labeled extParsedEnt.
10881
 *
10882
 * [78] extParsedEnt ::= TextDecl? content
10883
 *
10884
 * Returns 0, -1 in case of error. the parser context is augmented
10885
 *                as a result of the parsing.
10886
 */
10887
10888
int
10889
0
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10890
0
    xmlChar start[4];
10891
0
    xmlCharEncoding enc;
10892
10893
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
10894
0
        return(-1);
10895
10896
0
    xmlDetectSAX2(ctxt);
10897
10898
0
    GROW;
10899
10900
    /*
10901
     * SAX: beginning of the document processing.
10902
     */
10903
0
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10904
0
        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10905
10906
    /*
10907
     * Get the 4 first bytes and decode the charset
10908
     * if enc != XML_CHAR_ENCODING_NONE
10909
     * plug some encoding conversion routines.
10910
     */
10911
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10912
0
  start[0] = RAW;
10913
0
  start[1] = NXT(1);
10914
0
  start[2] = NXT(2);
10915
0
  start[3] = NXT(3);
10916
0
  enc = xmlDetectCharEncoding(start, 4);
10917
0
  if (enc != XML_CHAR_ENCODING_NONE) {
10918
0
      xmlSwitchEncoding(ctxt, enc);
10919
0
  }
10920
0
    }
10921
10922
10923
0
    if (CUR == 0) {
10924
0
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10925
0
    }
10926
10927
    /*
10928
     * Check for the XMLDecl in the Prolog.
10929
     */
10930
0
    GROW;
10931
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10932
10933
  /*
10934
   * Note that we will switch encoding on the fly.
10935
   */
10936
0
  xmlParseXMLDecl(ctxt);
10937
0
  if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10938
      /*
10939
       * The XML REC instructs us to stop parsing right here
10940
       */
10941
0
      return(-1);
10942
0
  }
10943
0
  SKIP_BLANKS;
10944
0
    } else {
10945
0
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10946
0
    }
10947
0
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10948
0
        ctxt->sax->startDocument(ctxt->userData);
10949
0
    if (ctxt->instate == XML_PARSER_EOF)
10950
0
  return(-1);
10951
10952
    /*
10953
     * Doing validity checking on chunk doesn't make sense
10954
     */
10955
0
    ctxt->instate = XML_PARSER_CONTENT;
10956
0
    ctxt->validate = 0;
10957
0
    ctxt->loadsubset = 0;
10958
0
    ctxt->depth = 0;
10959
10960
0
    xmlParseContent(ctxt);
10961
0
    if (ctxt->instate == XML_PARSER_EOF)
10962
0
  return(-1);
10963
10964
0
    if ((RAW == '<') && (NXT(1) == '/')) {
10965
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10966
0
    } else if (RAW != 0) {
10967
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
10968
0
    }
10969
10970
    /*
10971
     * SAX: end of the document processing.
10972
     */
10973
0
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10974
0
        ctxt->sax->endDocument(ctxt->userData);
10975
10976
0
    if (! ctxt->wellFormed) return(-1);
10977
0
    return(0);
10978
0
}
10979
10980
#ifdef LIBXML_PUSH_ENABLED
10981
/************************************************************************
10982
 *                  *
10983
 *    Progressive parsing interfaces        *
10984
 *                  *
10985
 ************************************************************************/
10986
10987
/**
10988
 * xmlParseLookupSequence:
10989
 * @ctxt:  an XML parser context
10990
 * @first:  the first char to lookup
10991
 * @next:  the next char to lookup or zero
10992
 * @third:  the next char to lookup or zero
10993
 *
10994
 * Try to find if a sequence (first, next, third) or  just (first next) or
10995
 * (first) is available in the input stream.
10996
 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10997
 * to avoid rescanning sequences of bytes, it DOES change the state of the
10998
 * parser, do not use liberally.
10999
 *
11000
 * Returns the index to the current parsing point if the full sequence
11001
 *      is available, -1 otherwise.
11002
 */
11003
static int
11004
xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
11005
214k
                       xmlChar next, xmlChar third) {
11006
214k
    int base, len;
11007
214k
    xmlParserInputPtr in;
11008
214k
    const xmlChar *buf;
11009
11010
214k
    in = ctxt->input;
11011
214k
    if (in == NULL) return(-1);
11012
214k
    base = in->cur - in->base;
11013
214k
    if (base < 0) return(-1);
11014
214k
    if (ctxt->checkIndex > base)
11015
64.7k
        base = ctxt->checkIndex;
11016
214k
    if (in->buf == NULL) {
11017
0
  buf = in->base;
11018
0
  len = in->length;
11019
214k
    } else {
11020
214k
  buf = xmlBufContent(in->buf->buffer);
11021
214k
  len = xmlBufUse(in->buf->buffer);
11022
214k
    }
11023
    /* take into account the sequence length */
11024
214k
    if (third) len -= 2;
11025
78.1k
    else if (next) len --;
11026
2.22G
    for (;base < len;base++) {
11027
2.22G
        if (buf[base] == first) {
11028
1.40M
      if (third != 0) {
11029
1.38M
    if ((buf[base + 1] != next) ||
11030
1.38M
        (buf[base + 2] != third)) continue;
11031
1.38M
      } else if (next != 0) {
11032
27.9k
    if (buf[base + 1] != next) continue;
11033
27.9k
      }
11034
144k
      ctxt->checkIndex = 0;
11035
#ifdef DEBUG_PUSH
11036
      if (next == 0)
11037
    xmlGenericError(xmlGenericErrorContext,
11038
      "PP: lookup '%c' found at %d\n",
11039
      first, base);
11040
      else if (third == 0)
11041
    xmlGenericError(xmlGenericErrorContext,
11042
      "PP: lookup '%c%c' found at %d\n",
11043
      first, next, base);
11044
      else
11045
    xmlGenericError(xmlGenericErrorContext,
11046
      "PP: lookup '%c%c%c' found at %d\n",
11047
      first, next, third, base);
11048
#endif
11049
144k
      return(base - (in->cur - in->base));
11050
1.40M
  }
11051
2.22G
    }
11052
70.0k
    ctxt->checkIndex = base;
11053
#ifdef DEBUG_PUSH
11054
    if (next == 0)
11055
  xmlGenericError(xmlGenericErrorContext,
11056
    "PP: lookup '%c' failed\n", first);
11057
    else if (third == 0)
11058
  xmlGenericError(xmlGenericErrorContext,
11059
    "PP: lookup '%c%c' failed\n", first, next);
11060
    else
11061
  xmlGenericError(xmlGenericErrorContext,
11062
    "PP: lookup '%c%c%c' failed\n", first, next, third);
11063
#endif
11064
70.0k
    return(-1);
11065
214k
}
11066
11067
/**
11068
 * xmlParseGetLasts:
11069
 * @ctxt:  an XML parser context
11070
 * @lastlt:  pointer to store the last '<' from the input
11071
 * @lastgt:  pointer to store the last '>' from the input
11072
 *
11073
 * Lookup the last < and > in the current chunk
11074
 */
11075
static void
11076
xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
11077
115k
                 const xmlChar **lastgt) {
11078
115k
    const xmlChar *tmp;
11079
11080
115k
    if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
11081
0
  xmlGenericError(xmlGenericErrorContext,
11082
0
        "Internal error: xmlParseGetLasts\n");
11083
0
  return;
11084
0
    }
11085
115k
    if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
11086
30.9k
        tmp = ctxt->input->end;
11087
30.9k
  tmp--;
11088
2.76G
  while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
11089
30.9k
  if (tmp < ctxt->input->base) {
11090
2.03k
      *lastlt = NULL;
11091
2.03k
      *lastgt = NULL;
11092
28.9k
  } else {
11093
28.9k
      *lastlt = tmp;
11094
28.9k
      tmp++;
11095
1.51G
      while ((tmp < ctxt->input->end) && (*tmp != '>')) {
11096
1.51G
          if (*tmp == '\'') {
11097
9.07k
        tmp++;
11098
515M
        while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
11099
9.07k
        if (tmp < ctxt->input->end) tmp++;
11100
1.51G
    } else if (*tmp == '"') {
11101
437k
        tmp++;
11102
516M
        while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
11103
437k
        if (tmp < ctxt->input->end) tmp++;
11104
437k
    } else
11105
1.51G
        tmp++;
11106
1.51G
      }
11107
28.9k
      if (tmp < ctxt->input->end)
11108
8.70k
          *lastgt = tmp;
11109
20.2k
      else {
11110
20.2k
          tmp = *lastlt;
11111
20.2k
    tmp--;
11112
1.15G
    while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
11113
20.2k
    if (tmp >= ctxt->input->base)
11114
14.6k
        *lastgt = tmp;
11115
5.63k
    else
11116
5.63k
        *lastgt = NULL;
11117
20.2k
      }
11118
28.9k
  }
11119
84.2k
    } else {
11120
84.2k
        *lastlt = NULL;
11121
84.2k
  *lastgt = NULL;
11122
84.2k
    }
11123
115k
}
11124
/**
11125
 * xmlCheckCdataPush:
11126
 * @cur: pointer to the block of characters
11127
 * @len: length of the block in bytes
11128
 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
11129
 *
11130
 * Check that the block of characters is okay as SCdata content [20]
11131
 *
11132
 * Returns the number of bytes to pass if okay, a negative index where an
11133
 *         UTF-8 error occurred otherwise
11134
 */
11135
static int
11136
126k
xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
11137
126k
    int ix;
11138
126k
    unsigned char c;
11139
126k
    int codepoint;
11140
11141
126k
    if ((utf == NULL) || (len <= 0))
11142
26.5k
        return(0);
11143
11144
13.8M
    for (ix = 0; ix < len;) {      /* string is 0-terminated */
11145
13.7M
        c = utf[ix];
11146
13.7M
        if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11147
2.12M
      if (c >= 0x20)
11148
2.06M
    ix++;
11149
65.5k
      else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11150
65.1k
          ix++;
11151
427
      else
11152
427
          return(-ix);
11153
11.6M
  } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11154
1.25M
      if (ix + 2 > len) return(complete ? -ix : ix);
11155
1.25M
      if ((utf[ix+1] & 0xc0 ) != 0x80)
11156
212
          return(-ix);
11157
1.25M
      codepoint = (utf[ix] & 0x1f) << 6;
11158
1.25M
      codepoint |= utf[ix+1] & 0x3f;
11159
1.25M
      if (!xmlIsCharQ(codepoint))
11160
197
          return(-ix);
11161
1.25M
      ix += 2;
11162
10.3M
  } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11163
10.3M
      if (ix + 3 > len) return(complete ? -ix : ix);
11164
10.3M
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11165
10.3M
          ((utf[ix+2] & 0xc0) != 0x80))
11166
553
        return(-ix);
11167
10.3M
      codepoint = (utf[ix] & 0xf) << 12;
11168
10.3M
      codepoint |= (utf[ix+1] & 0x3f) << 6;
11169
10.3M
      codepoint |= utf[ix+2] & 0x3f;
11170
10.3M
      if (!xmlIsCharQ(codepoint))
11171
598
          return(-ix);
11172
10.3M
      ix += 3;
11173
10.3M
  } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11174
2.31k
      if (ix + 4 > len) return(complete ? -ix : ix);
11175
2.29k
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11176
2.29k
          ((utf[ix+2] & 0xc0) != 0x80) ||
11177
2.29k
    ((utf[ix+3] & 0xc0) != 0x80))
11178
584
        return(-ix);
11179
1.71k
      codepoint = (utf[ix] & 0x7) << 18;
11180
1.71k
      codepoint |= (utf[ix+1] & 0x3f) << 12;
11181
1.71k
      codepoint |= (utf[ix+2] & 0x3f) << 6;
11182
1.71k
      codepoint |= utf[ix+3] & 0x3f;
11183
1.71k
      if (!xmlIsCharQ(codepoint))
11184
599
          return(-ix);
11185
1.11k
      ix += 4;
11186
1.11k
  } else       /* unknown encoding */
11187
212
      return(-ix);
11188
13.7M
      }
11189
95.9k
      return(ix);
11190
99.9k
}
11191
11192
/**
11193
 * xmlParseTryOrFinish:
11194
 * @ctxt:  an XML parser context
11195
 * @terminate:  last chunk indicator
11196
 *
11197
 * Try to progress on parsing
11198
 *
11199
 * Returns zero if no parsing was possible
11200
 */
11201
static int
11202
104k
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11203
104k
    int ret = 0;
11204
104k
    int avail, tlen;
11205
104k
    xmlChar cur, next;
11206
104k
    const xmlChar *lastlt, *lastgt;
11207
11208
104k
    if (ctxt->input == NULL)
11209
0
        return(0);
11210
11211
#ifdef DEBUG_PUSH
11212
    switch (ctxt->instate) {
11213
  case XML_PARSER_EOF:
11214
      xmlGenericError(xmlGenericErrorContext,
11215
        "PP: try EOF\n"); break;
11216
  case XML_PARSER_START:
11217
      xmlGenericError(xmlGenericErrorContext,
11218
        "PP: try START\n"); break;
11219
  case XML_PARSER_MISC:
11220
      xmlGenericError(xmlGenericErrorContext,
11221
        "PP: try MISC\n");break;
11222
  case XML_PARSER_COMMENT:
11223
      xmlGenericError(xmlGenericErrorContext,
11224
        "PP: try COMMENT\n");break;
11225
  case XML_PARSER_PROLOG:
11226
      xmlGenericError(xmlGenericErrorContext,
11227
        "PP: try PROLOG\n");break;
11228
  case XML_PARSER_START_TAG:
11229
      xmlGenericError(xmlGenericErrorContext,
11230
        "PP: try START_TAG\n");break;
11231
  case XML_PARSER_CONTENT:
11232
      xmlGenericError(xmlGenericErrorContext,
11233
        "PP: try CONTENT\n");break;
11234
  case XML_PARSER_CDATA_SECTION:
11235
      xmlGenericError(xmlGenericErrorContext,
11236
        "PP: try CDATA_SECTION\n");break;
11237
  case XML_PARSER_END_TAG:
11238
      xmlGenericError(xmlGenericErrorContext,
11239
        "PP: try END_TAG\n");break;
11240
  case XML_PARSER_ENTITY_DECL:
11241
      xmlGenericError(xmlGenericErrorContext,
11242
        "PP: try ENTITY_DECL\n");break;
11243
  case XML_PARSER_ENTITY_VALUE:
11244
      xmlGenericError(xmlGenericErrorContext,
11245
        "PP: try ENTITY_VALUE\n");break;
11246
  case XML_PARSER_ATTRIBUTE_VALUE:
11247
      xmlGenericError(xmlGenericErrorContext,
11248
        "PP: try ATTRIBUTE_VALUE\n");break;
11249
  case XML_PARSER_DTD:
11250
      xmlGenericError(xmlGenericErrorContext,
11251
        "PP: try DTD\n");break;
11252
  case XML_PARSER_EPILOG:
11253
      xmlGenericError(xmlGenericErrorContext,
11254
        "PP: try EPILOG\n");break;
11255
  case XML_PARSER_PI:
11256
      xmlGenericError(xmlGenericErrorContext,
11257
        "PP: try PI\n");break;
11258
        case XML_PARSER_IGNORE:
11259
            xmlGenericError(xmlGenericErrorContext,
11260
        "PP: try IGNORE\n");break;
11261
    }
11262
#endif
11263
11264
104k
    if ((ctxt->input != NULL) &&
11265
104k
        (ctxt->input->cur - ctxt->input->base > 4096)) {
11266
6.49k
  xmlSHRINK(ctxt);
11267
6.49k
  ctxt->checkIndex = 0;
11268
6.49k
    }
11269
104k
    xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11270
11271
4.30M
    while (ctxt->instate != XML_PARSER_EOF) {
11272
4.30M
  if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11273
12.5k
      return(0);
11274
11275
4.29M
  if (ctxt->input == NULL) break;
11276
4.29M
  if (ctxt->input->buf == NULL)
11277
0
      avail = ctxt->input->length -
11278
0
              (ctxt->input->cur - ctxt->input->base);
11279
4.29M
  else {
11280
      /*
11281
       * If we are operating on converted input, try to flush
11282
       * remaining chars to avoid them stalling in the non-converted
11283
       * buffer. But do not do this in document start where
11284
       * encoding="..." may not have been read and we work on a
11285
       * guessed encoding.
11286
       */
11287
4.29M
      if ((ctxt->instate != XML_PARSER_START) &&
11288
4.29M
          (ctxt->input->buf->raw != NULL) &&
11289
4.29M
    (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11290
18.9k
                size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11291
18.9k
                                                 ctxt->input);
11292
18.9k
    size_t current = ctxt->input->cur - ctxt->input->base;
11293
11294
18.9k
    xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11295
18.9k
                xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11296
18.9k
                                      base, current);
11297
18.9k
      }
11298
4.29M
      avail = xmlBufUse(ctxt->input->buf->buffer) -
11299
4.29M
        (ctxt->input->cur - ctxt->input->base);
11300
4.29M
  }
11301
4.29M
        if (avail < 1)
11302
3.83k
      goto done;
11303
4.28M
        switch (ctxt->instate) {
11304
0
            case XML_PARSER_EOF:
11305
          /*
11306
     * Document parsing is done !
11307
     */
11308
0
          goto done;
11309
79.9k
            case XML_PARSER_START:
11310
79.9k
    if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11311
0
        xmlChar start[4];
11312
0
        xmlCharEncoding enc;
11313
11314
        /*
11315
         * Very first chars read from the document flow.
11316
         */
11317
0
        if (avail < 4)
11318
0
      goto done;
11319
11320
        /*
11321
         * Get the 4 first bytes and decode the charset
11322
         * if enc != XML_CHAR_ENCODING_NONE
11323
         * plug some encoding conversion routines,
11324
         * else xmlSwitchEncoding will set to (default)
11325
         * UTF8.
11326
         */
11327
0
        start[0] = RAW;
11328
0
        start[1] = NXT(1);
11329
0
        start[2] = NXT(2);
11330
0
        start[3] = NXT(3);
11331
0
        enc = xmlDetectCharEncoding(start, 4);
11332
0
        xmlSwitchEncoding(ctxt, enc);
11333
0
        break;
11334
0
    }
11335
11336
79.9k
    if (avail < 2)
11337
11
        goto done;
11338
79.9k
    cur = ctxt->input->cur[0];
11339
79.9k
    next = ctxt->input->cur[1];
11340
79.9k
    if (cur == 0) {
11341
46
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11342
0
      ctxt->sax->setDocumentLocator(ctxt->userData,
11343
0
                  &xmlDefaultSAXLocator);
11344
46
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11345
46
        xmlHaltParser(ctxt);
11346
#ifdef DEBUG_PUSH
11347
        xmlGenericError(xmlGenericErrorContext,
11348
          "PP: entering EOF\n");
11349
#endif
11350
46
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11351
0
      ctxt->sax->endDocument(ctxt->userData);
11352
46
        goto done;
11353
46
    }
11354
79.8k
          if ((cur == '<') && (next == '?')) {
11355
        /* PI or XML decl */
11356
66.5k
        if (avail < 5) return(ret);
11357
66.5k
        if ((!terminate) &&
11358
66.5k
            (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11359
61.6k
      return(ret);
11360
4.81k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11361
0
      ctxt->sax->setDocumentLocator(ctxt->userData,
11362
0
                  &xmlDefaultSAXLocator);
11363
4.81k
        if ((ctxt->input->cur[2] == 'x') &&
11364
4.81k
      (ctxt->input->cur[3] == 'm') &&
11365
4.81k
      (ctxt->input->cur[4] == 'l') &&
11366
4.81k
      (IS_BLANK_CH(ctxt->input->cur[5]))) {
11367
3.73k
      ret += 5;
11368
#ifdef DEBUG_PUSH
11369
      xmlGenericError(xmlGenericErrorContext,
11370
        "PP: Parsing XML Decl\n");
11371
#endif
11372
3.73k
      xmlParseXMLDecl(ctxt);
11373
3.73k
      if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11374
          /*
11375
           * The XML REC instructs us to stop parsing right
11376
           * here
11377
           */
11378
429
          xmlHaltParser(ctxt);
11379
429
          return(0);
11380
429
      }
11381
3.30k
      ctxt->standalone = ctxt->input->standalone;
11382
3.30k
      if ((ctxt->encoding == NULL) &&
11383
3.30k
          (ctxt->input->encoding != NULL))
11384
2.92k
          ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11385
3.30k
      if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11386
3.30k
          (!ctxt->disableSAX))
11387
2.86k
          ctxt->sax->startDocument(ctxt->userData);
11388
3.30k
      ctxt->instate = XML_PARSER_MISC;
11389
#ifdef DEBUG_PUSH
11390
      xmlGenericError(xmlGenericErrorContext,
11391
        "PP: entering MISC\n");
11392
#endif
11393
3.30k
        } else {
11394
1.07k
      ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11395
1.07k
      if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11396
1.07k
          (!ctxt->disableSAX))
11397
1.07k
          ctxt->sax->startDocument(ctxt->userData);
11398
1.07k
      ctxt->instate = XML_PARSER_MISC;
11399
#ifdef DEBUG_PUSH
11400
      xmlGenericError(xmlGenericErrorContext,
11401
        "PP: entering MISC\n");
11402
#endif
11403
1.07k
        }
11404
13.3k
    } else {
11405
13.3k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11406
0
      ctxt->sax->setDocumentLocator(ctxt->userData,
11407
0
                  &xmlDefaultSAXLocator);
11408
13.3k
        ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11409
13.3k
        if (ctxt->version == NULL) {
11410
0
            xmlErrMemory(ctxt, NULL);
11411
0
      break;
11412
0
        }
11413
13.3k
        if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11414
13.3k
            (!ctxt->disableSAX))
11415
13.3k
      ctxt->sax->startDocument(ctxt->userData);
11416
13.3k
        ctxt->instate = XML_PARSER_MISC;
11417
#ifdef DEBUG_PUSH
11418
        xmlGenericError(xmlGenericErrorContext,
11419
          "PP: entering MISC\n");
11420
#endif
11421
13.3k
    }
11422
17.7k
    break;
11423
1.74M
            case XML_PARSER_START_TAG: {
11424
1.74M
          const xmlChar *name;
11425
1.74M
    const xmlChar *prefix = NULL;
11426
1.74M
    const xmlChar *URI = NULL;
11427
1.74M
                int line = ctxt->input->line;
11428
1.74M
    int nsNr = ctxt->nsNr;
11429
11430
1.74M
    if ((avail < 2) && (ctxt->inputNr == 1))
11431
0
        goto done;
11432
1.74M
    cur = ctxt->input->cur[0];
11433
1.74M
          if (cur != '<') {
11434
254
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11435
254
        xmlHaltParser(ctxt);
11436
254
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11437
0
      ctxt->sax->endDocument(ctxt->userData);
11438
254
        goto done;
11439
254
    }
11440
1.74M
    if (!terminate) {
11441
1.67M
        if (ctxt->progressive) {
11442
            /* > can be found unescaped in attribute values */
11443
1.67M
            if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11444
3.37k
          goto done;
11445
1.67M
        } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11446
0
      goto done;
11447
0
        }
11448
1.67M
    }
11449
1.73M
    if (ctxt->spaceNr == 0)
11450
0
        spacePush(ctxt, -1);
11451
1.73M
    else if (*ctxt->space == -2)
11452
198k
        spacePush(ctxt, -1);
11453
1.54M
    else
11454
1.54M
        spacePush(ctxt, *ctxt->space);
11455
1.73M
#ifdef LIBXML_SAX1_ENABLED
11456
1.73M
    if (ctxt->sax2)
11457
1.73M
#endif /* LIBXML_SAX1_ENABLED */
11458
1.73M
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11459
0
#ifdef LIBXML_SAX1_ENABLED
11460
0
    else
11461
0
        name = xmlParseStartTag(ctxt);
11462
1.73M
#endif /* LIBXML_SAX1_ENABLED */
11463
1.73M
    if (ctxt->instate == XML_PARSER_EOF)
11464
0
        goto done;
11465
1.73M
    if (name == NULL) {
11466
364
        spacePop(ctxt);
11467
364
        xmlHaltParser(ctxt);
11468
364
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11469
0
      ctxt->sax->endDocument(ctxt->userData);
11470
364
        goto done;
11471
364
    }
11472
1.73M
#ifdef LIBXML_VALID_ENABLED
11473
    /*
11474
     * [ VC: Root Element Type ]
11475
     * The Name in the document type declaration must match
11476
     * the element type of the root element.
11477
     */
11478
1.73M
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11479
1.73M
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
11480
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11481
1.73M
#endif /* LIBXML_VALID_ENABLED */
11482
11483
    /*
11484
     * Check for an Empty Element.
11485
     */
11486
1.73M
    if ((RAW == '/') && (NXT(1) == '>')) {
11487
258k
        SKIP(2);
11488
11489
258k
        if (ctxt->sax2) {
11490
258k
      if ((ctxt->sax != NULL) &&
11491
258k
          (ctxt->sax->endElementNs != NULL) &&
11492
258k
          (!ctxt->disableSAX))
11493
258k
          ctxt->sax->endElementNs(ctxt->userData, name,
11494
258k
                                  prefix, URI);
11495
258k
      if (ctxt->nsNr - nsNr > 0)
11496
1.22k
          nsPop(ctxt, ctxt->nsNr - nsNr);
11497
258k
#ifdef LIBXML_SAX1_ENABLED
11498
258k
        } else {
11499
0
      if ((ctxt->sax != NULL) &&
11500
0
          (ctxt->sax->endElement != NULL) &&
11501
0
          (!ctxt->disableSAX))
11502
0
          ctxt->sax->endElement(ctxt->userData, name);
11503
0
#endif /* LIBXML_SAX1_ENABLED */
11504
0
        }
11505
258k
        if (ctxt->instate == XML_PARSER_EOF)
11506
0
      goto done;
11507
258k
        spacePop(ctxt);
11508
258k
        if (ctxt->nameNr == 0) {
11509
112
      ctxt->instate = XML_PARSER_EPILOG;
11510
258k
        } else {
11511
258k
      ctxt->instate = XML_PARSER_CONTENT;
11512
258k
        }
11513
258k
                    ctxt->progressive = 1;
11514
258k
        break;
11515
258k
    }
11516
1.47M
    if (RAW == '>') {
11517
1.47M
        NEXT;
11518
1.47M
    } else {
11519
6.34k
        xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11520
6.34k
           "Couldn't find end of Start Tag %s\n",
11521
6.34k
           name);
11522
6.34k
        nodePop(ctxt);
11523
6.34k
        spacePop(ctxt);
11524
6.34k
    }
11525
1.47M
                nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
11526
11527
1.47M
    ctxt->instate = XML_PARSER_CONTENT;
11528
1.47M
                ctxt->progressive = 1;
11529
1.47M
                break;
11530
1.73M
      }
11531
2.25M
            case XML_PARSER_CONTENT: {
11532
2.25M
    int id;
11533
2.25M
    unsigned long cons;
11534
2.25M
    if ((avail < 2) && (ctxt->inputNr == 1))
11535
911
        goto done;
11536
2.25M
    cur = ctxt->input->cur[0];
11537
2.25M
    next = ctxt->input->cur[1];
11538
11539
2.25M
    id = ctxt->input->id;
11540
2.25M
          cons = CUR_CONSUMED;
11541
2.25M
    if ((cur == '<') && (next == '/')) {
11542
30.7k
        ctxt->instate = XML_PARSER_END_TAG;
11543
30.7k
        break;
11544
2.22M
          } else if ((cur == '<') && (next == '?')) {
11545
1.61k
        if ((!terminate) &&
11546
1.61k
            (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11547
328
                        ctxt->progressive = XML_PARSER_PI;
11548
328
      goto done;
11549
328
                    }
11550
1.28k
        xmlParsePI(ctxt);
11551
1.28k
        ctxt->instate = XML_PARSER_CONTENT;
11552
1.28k
                    ctxt->progressive = 1;
11553
2.22M
    } else if ((cur == '<') && (next != '!')) {
11554
1.72M
        ctxt->instate = XML_PARSER_START_TAG;
11555
1.72M
        break;
11556
1.72M
    } else if ((cur == '<') && (next == '!') &&
11557
494k
               (ctxt->input->cur[2] == '-') &&
11558
494k
         (ctxt->input->cur[3] == '-')) {
11559
7.28k
        int term;
11560
11561
7.28k
              if (avail < 4)
11562
0
            goto done;
11563
7.28k
        ctxt->input->cur += 4;
11564
7.28k
        term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11565
7.28k
        ctxt->input->cur -= 4;
11566
7.28k
        if ((!terminate) && (term < 0)) {
11567
402
                        ctxt->progressive = XML_PARSER_COMMENT;
11568
402
      goto done;
11569
402
                    }
11570
6.88k
        xmlParseComment(ctxt);
11571
6.88k
        ctxt->instate = XML_PARSER_CONTENT;
11572
6.88k
                    ctxt->progressive = 1;
11573
486k
    } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11574
486k
        (ctxt->input->cur[2] == '[') &&
11575
486k
        (ctxt->input->cur[3] == 'C') &&
11576
486k
        (ctxt->input->cur[4] == 'D') &&
11577
486k
        (ctxt->input->cur[5] == 'A') &&
11578
486k
        (ctxt->input->cur[6] == 'T') &&
11579
486k
        (ctxt->input->cur[7] == 'A') &&
11580
486k
        (ctxt->input->cur[8] == '[')) {
11581
122k
        SKIP(9);
11582
122k
        ctxt->instate = XML_PARSER_CDATA_SECTION;
11583
122k
        break;
11584
364k
    } else if ((cur == '<') && (next == '!') &&
11585
364k
               (avail < 9)) {
11586
1.74k
        goto done;
11587
362k
    } else if (cur == '&') {
11588
3.71k
        if ((!terminate) &&
11589
3.71k
            (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11590
795
      goto done;
11591
2.91k
        xmlParseReference(ctxt);
11592
358k
    } else {
11593
        /* TODO Avoid the extra copy, handle directly !!! */
11594
        /*
11595
         * Goal of the following test is:
11596
         *  - minimize calls to the SAX 'character' callback
11597
         *    when they are mergeable
11598
         *  - handle an problem for isBlank when we only parse
11599
         *    a sequence of blank chars and the next one is
11600
         *    not available to check against '<' presence.
11601
         *  - tries to homogenize the differences in SAX
11602
         *    callbacks between the push and pull versions
11603
         *    of the parser.
11604
         */
11605
358k
        if ((ctxt->inputNr == 1) &&
11606
358k
            (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11607
33.9k
      if (!terminate) {
11608
24.3k
          if (ctxt->progressive) {
11609
24.3k
        if ((lastlt == NULL) ||
11610
24.3k
            (ctxt->input->cur > lastlt))
11611
1.87k
            goto done;
11612
24.3k
          } else if (xmlParseLookupSequence(ctxt,
11613
0
                                            '<', 0, 0) < 0) {
11614
0
        goto done;
11615
0
          }
11616
24.3k
      }
11617
33.9k
                    }
11618
356k
        ctxt->checkIndex = 0;
11619
356k
        xmlParseCharData(ctxt, 0);
11620
356k
    }
11621
367k
    if ((cons == CUR_CONSUMED) && (id == ctxt->input->id)) {
11622
54
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11623
54
                    "detected an error in element content\n");
11624
54
        xmlHaltParser(ctxt);
11625
54
        break;
11626
54
    }
11627
367k
    break;
11628
367k
      }
11629
367k
            case XML_PARSER_END_TAG:
11630
31.1k
    if (avail < 2)
11631
0
        goto done;
11632
31.1k
    if (!terminate) {
11633
24.5k
        if (ctxt->progressive) {
11634
            /* > can be found unescaped in attribute values */
11635
24.5k
            if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
11636
450
          goto done;
11637
24.5k
        } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11638
0
      goto done;
11639
0
        }
11640
24.5k
    }
11641
30.7k
    if (ctxt->sax2) {
11642
30.7k
              xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11643
30.7k
        nameNsPop(ctxt);
11644
30.7k
    }
11645
0
#ifdef LIBXML_SAX1_ENABLED
11646
0
      else
11647
0
        xmlParseEndTag1(ctxt, 0);
11648
30.7k
#endif /* LIBXML_SAX1_ENABLED */
11649
30.7k
    if (ctxt->instate == XML_PARSER_EOF) {
11650
        /* Nothing */
11651
30.7k
    } else if (ctxt->nameNr == 0) {
11652
870
        ctxt->instate = XML_PARSER_EPILOG;
11653
29.8k
    } else {
11654
29.8k
        ctxt->instate = XML_PARSER_CONTENT;
11655
29.8k
    }
11656
30.7k
    break;
11657
127k
            case XML_PARSER_CDATA_SECTION: {
11658
          /*
11659
     * The Push mode need to have the SAX callback for
11660
     * cdataBlock merge back contiguous callbacks.
11661
     */
11662
127k
    int base;
11663
11664
127k
    base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11665
127k
    if (base < 0) {
11666
4.60k
        if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
11667
4.12k
            int tmp;
11668
11669
4.12k
      tmp = xmlCheckCdataPush(ctxt->input->cur,
11670
4.12k
                              XML_PARSER_BIG_BUFFER_SIZE, 0);
11671
4.12k
      if (tmp < 0) {
11672
12
          tmp = -tmp;
11673
12
          ctxt->input->cur += tmp;
11674
12
          goto encoding_error;
11675
12
      }
11676
4.11k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11677
4.11k
          if (ctxt->sax->cdataBlock != NULL)
11678
0
        ctxt->sax->cdataBlock(ctxt->userData,
11679
0
                              ctxt->input->cur, tmp);
11680
4.11k
          else if (ctxt->sax->characters != NULL)
11681
4.11k
        ctxt->sax->characters(ctxt->userData,
11682
4.11k
                              ctxt->input->cur, tmp);
11683
4.11k
      }
11684
4.11k
      if (ctxt->instate == XML_PARSER_EOF)
11685
0
          goto done;
11686
4.11k
      SKIPL(tmp);
11687
4.11k
      ctxt->checkIndex = 0;
11688
4.11k
        }
11689
4.59k
        goto done;
11690
122k
    } else {
11691
122k
        int tmp;
11692
11693
122k
        tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11694
122k
        if ((tmp < 0) || (tmp != base)) {
11695
104
      tmp = -tmp;
11696
104
      ctxt->input->cur += tmp;
11697
104
      goto encoding_error;
11698
104
        }
11699
122k
        if ((ctxt->sax != NULL) && (base == 0) &&
11700
122k
            (ctxt->sax->cdataBlock != NULL) &&
11701
122k
            (!ctxt->disableSAX)) {
11702
      /*
11703
       * Special case to provide identical behaviour
11704
       * between pull and push parsers on enpty CDATA
11705
       * sections
11706
       */
11707
0
       if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11708
0
           (!strncmp((const char *)&ctxt->input->cur[-9],
11709
0
                     "<![CDATA[", 9)))
11710
0
           ctxt->sax->cdataBlock(ctxt->userData,
11711
0
                                 BAD_CAST "", 0);
11712
122k
        } else if ((ctxt->sax != NULL) && (base > 0) &&
11713
122k
      (!ctxt->disableSAX)) {
11714
95.7k
      if (ctxt->sax->cdataBlock != NULL)
11715
0
          ctxt->sax->cdataBlock(ctxt->userData,
11716
0
              ctxt->input->cur, base);
11717
95.7k
      else if (ctxt->sax->characters != NULL)
11718
95.7k
          ctxt->sax->characters(ctxt->userData,
11719
95.7k
              ctxt->input->cur, base);
11720
95.7k
        }
11721
122k
        if (ctxt->instate == XML_PARSER_EOF)
11722
0
      goto done;
11723
122k
        SKIPL(base + 3);
11724
122k
        ctxt->checkIndex = 0;
11725
122k
        ctxt->instate = XML_PARSER_CONTENT;
11726
#ifdef DEBUG_PUSH
11727
        xmlGenericError(xmlGenericErrorContext,
11728
          "PP: entering CONTENT\n");
11729
#endif
11730
122k
    }
11731
122k
    break;
11732
127k
      }
11733
122k
            case XML_PARSER_MISC:
11734
36.0k
    SKIP_BLANKS;
11735
36.0k
    if (ctxt->input->buf == NULL)
11736
0
        avail = ctxt->input->length -
11737
0
                (ctxt->input->cur - ctxt->input->base);
11738
36.0k
    else
11739
36.0k
        avail = xmlBufUse(ctxt->input->buf->buffer) -
11740
36.0k
                (ctxt->input->cur - ctxt->input->base);
11741
36.0k
    if (avail < 2)
11742
363
        goto done;
11743
35.7k
    cur = ctxt->input->cur[0];
11744
35.7k
    next = ctxt->input->cur[1];
11745
35.7k
          if ((cur == '<') && (next == '?')) {
11746
14.7k
        if ((!terminate) &&
11747
14.7k
            (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11748
362
                        ctxt->progressive = XML_PARSER_PI;
11749
362
      goto done;
11750
362
                    }
11751
#ifdef DEBUG_PUSH
11752
        xmlGenericError(xmlGenericErrorContext,
11753
          "PP: Parsing PI\n");
11754
#endif
11755
14.3k
        xmlParsePI(ctxt);
11756
14.3k
        if (ctxt->instate == XML_PARSER_EOF)
11757
0
      goto done;
11758
14.3k
        ctxt->instate = XML_PARSER_MISC;
11759
14.3k
                    ctxt->progressive = 1;
11760
14.3k
        ctxt->checkIndex = 0;
11761
20.9k
    } else if ((cur == '<') && (next == '!') &&
11762
20.9k
        (ctxt->input->cur[2] == '-') &&
11763
20.9k
        (ctxt->input->cur[3] == '-')) {
11764
2.33k
        if ((!terminate) &&
11765
2.33k
            (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11766
289
                        ctxt->progressive = XML_PARSER_COMMENT;
11767
289
      goto done;
11768
289
                    }
11769
#ifdef DEBUG_PUSH
11770
        xmlGenericError(xmlGenericErrorContext,
11771
          "PP: Parsing Comment\n");
11772
#endif
11773
2.04k
        xmlParseComment(ctxt);
11774
2.04k
        if (ctxt->instate == XML_PARSER_EOF)
11775
0
      goto done;
11776
2.04k
        ctxt->instate = XML_PARSER_MISC;
11777
2.04k
                    ctxt->progressive = 1;
11778
2.04k
        ctxt->checkIndex = 0;
11779
18.6k
    } else if ((cur == '<') && (next == '!') &&
11780
18.6k
        (ctxt->input->cur[2] == 'D') &&
11781
18.6k
        (ctxt->input->cur[3] == 'O') &&
11782
18.6k
        (ctxt->input->cur[4] == 'C') &&
11783
18.6k
        (ctxt->input->cur[5] == 'T') &&
11784
18.6k
        (ctxt->input->cur[6] == 'Y') &&
11785
18.6k
        (ctxt->input->cur[7] == 'P') &&
11786
18.6k
        (ctxt->input->cur[8] == 'E')) {
11787
6.05k
        if ((!terminate) &&
11788
6.05k
            (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
11789
551
                        ctxt->progressive = XML_PARSER_DTD;
11790
551
      goto done;
11791
551
                    }
11792
#ifdef DEBUG_PUSH
11793
        xmlGenericError(xmlGenericErrorContext,
11794
          "PP: Parsing internal subset\n");
11795
#endif
11796
5.50k
        ctxt->inSubset = 1;
11797
5.50k
                    ctxt->progressive = 0;
11798
5.50k
        ctxt->checkIndex = 0;
11799
5.50k
        xmlParseDocTypeDecl(ctxt);
11800
5.50k
        if (ctxt->instate == XML_PARSER_EOF)
11801
0
      goto done;
11802
5.50k
        if (RAW == '[') {
11803
4.84k
      ctxt->instate = XML_PARSER_DTD;
11804
#ifdef DEBUG_PUSH
11805
      xmlGenericError(xmlGenericErrorContext,
11806
        "PP: entering DTD\n");
11807
#endif
11808
4.84k
        } else {
11809
      /*
11810
       * Create and update the external subset.
11811
       */
11812
656
      ctxt->inSubset = 2;
11813
656
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11814
656
          (ctxt->sax->externalSubset != NULL))
11815
0
          ctxt->sax->externalSubset(ctxt->userData,
11816
0
            ctxt->intSubName, ctxt->extSubSystem,
11817
0
            ctxt->extSubURI);
11818
656
      ctxt->inSubset = 0;
11819
656
      xmlCleanSpecialAttr(ctxt);
11820
656
      ctxt->instate = XML_PARSER_PROLOG;
11821
#ifdef DEBUG_PUSH
11822
      xmlGenericError(xmlGenericErrorContext,
11823
        "PP: entering PROLOG\n");
11824
#endif
11825
656
        }
11826
12.5k
    } else if ((cur == '<') && (next == '!') &&
11827
12.5k
               (avail < 9)) {
11828
2.76k
        goto done;
11829
9.82k
    } else {
11830
9.82k
        ctxt->instate = XML_PARSER_START_TAG;
11831
9.82k
        ctxt->progressive = XML_PARSER_START_TAG;
11832
9.82k
        xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11833
#ifdef DEBUG_PUSH
11834
        xmlGenericError(xmlGenericErrorContext,
11835
          "PP: entering START_TAG\n");
11836
#endif
11837
9.82k
    }
11838
31.7k
    break;
11839
31.7k
            case XML_PARSER_PROLOG:
11840
3.73k
    SKIP_BLANKS;
11841
3.73k
    if (ctxt->input->buf == NULL)
11842
0
        avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11843
3.73k
    else
11844
3.73k
        avail = xmlBufUse(ctxt->input->buf->buffer) -
11845
3.73k
                            (ctxt->input->cur - ctxt->input->base);
11846
3.73k
    if (avail < 2)
11847
324
        goto done;
11848
3.41k
    cur = ctxt->input->cur[0];
11849
3.41k
    next = ctxt->input->cur[1];
11850
3.41k
          if ((cur == '<') && (next == '?')) {
11851
871
        if ((!terminate) &&
11852
871
            (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11853
264
                        ctxt->progressive = XML_PARSER_PI;
11854
264
      goto done;
11855
264
                    }
11856
#ifdef DEBUG_PUSH
11857
        xmlGenericError(xmlGenericErrorContext,
11858
          "PP: Parsing PI\n");
11859
#endif
11860
607
        xmlParsePI(ctxt);
11861
607
        if (ctxt->instate == XML_PARSER_EOF)
11862
0
      goto done;
11863
607
        ctxt->instate = XML_PARSER_PROLOG;
11864
607
                    ctxt->progressive = 1;
11865
2.54k
    } else if ((cur == '<') && (next == '!') &&
11866
2.54k
        (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11867
1.18k
        if ((!terminate) &&
11868
1.18k
            (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11869
228
                        ctxt->progressive = XML_PARSER_COMMENT;
11870
228
      goto done;
11871
228
                    }
11872
#ifdef DEBUG_PUSH
11873
        xmlGenericError(xmlGenericErrorContext,
11874
          "PP: Parsing Comment\n");
11875
#endif
11876
953
        xmlParseComment(ctxt);
11877
953
        if (ctxt->instate == XML_PARSER_EOF)
11878
0
      goto done;
11879
953
        ctxt->instate = XML_PARSER_PROLOG;
11880
953
                    ctxt->progressive = 1;
11881
1.36k
    } else if ((cur == '<') && (next == '!') &&
11882
1.36k
               (avail < 4)) {
11883
536
        goto done;
11884
824
    } else {
11885
824
        ctxt->instate = XML_PARSER_START_TAG;
11886
824
        if (ctxt->progressive == 0)
11887
773
      ctxt->progressive = XML_PARSER_START_TAG;
11888
824
        xmlParseGetLasts(ctxt, &lastlt, &lastgt);
11889
#ifdef DEBUG_PUSH
11890
        xmlGenericError(xmlGenericErrorContext,
11891
          "PP: entering START_TAG\n");
11892
#endif
11893
824
    }
11894
2.38k
    break;
11895
2.47k
            case XML_PARSER_EPILOG:
11896
2.47k
    SKIP_BLANKS;
11897
2.47k
    if (ctxt->input->buf == NULL)
11898
0
        avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11899
2.47k
    else
11900
2.47k
        avail = xmlBufUse(ctxt->input->buf->buffer) -
11901
2.47k
                            (ctxt->input->cur - ctxt->input->base);
11902
2.47k
    if (avail < 2)
11903
588
        goto done;
11904
1.88k
    cur = ctxt->input->cur[0];
11905
1.88k
    next = ctxt->input->cur[1];
11906
1.88k
          if ((cur == '<') && (next == '?')) {
11907
753
        if ((!terminate) &&
11908
753
            (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11909
227
                        ctxt->progressive = XML_PARSER_PI;
11910
227
      goto done;
11911
227
                    }
11912
#ifdef DEBUG_PUSH
11913
        xmlGenericError(xmlGenericErrorContext,
11914
          "PP: Parsing PI\n");
11915
#endif
11916
526
        xmlParsePI(ctxt);
11917
526
        if (ctxt->instate == XML_PARSER_EOF)
11918
0
      goto done;
11919
526
        ctxt->instate = XML_PARSER_EPILOG;
11920
526
                    ctxt->progressive = 1;
11921
1.13k
    } else if ((cur == '<') && (next == '!') &&
11922
1.13k
        (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11923
691
        if ((!terminate) &&
11924
691
            (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11925
194
                        ctxt->progressive = XML_PARSER_COMMENT;
11926
194
      goto done;
11927
194
                    }
11928
#ifdef DEBUG_PUSH
11929
        xmlGenericError(xmlGenericErrorContext,
11930
          "PP: Parsing Comment\n");
11931
#endif
11932
497
        xmlParseComment(ctxt);
11933
497
        if (ctxt->instate == XML_PARSER_EOF)
11934
0
      goto done;
11935
497
        ctxt->instate = XML_PARSER_EPILOG;
11936
497
                    ctxt->progressive = 1;
11937
497
    } else if ((cur == '<') && (next == '!') &&
11938
445
               (avail < 4)) {
11939
412
        goto done;
11940
412
    } else {
11941
33
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11942
33
        xmlHaltParser(ctxt);
11943
#ifdef DEBUG_PUSH
11944
        xmlGenericError(xmlGenericErrorContext,
11945
          "PP: entering EOF\n");
11946
#endif
11947
33
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11948
0
      ctxt->sax->endDocument(ctxt->userData);
11949
33
        goto done;
11950
33
    }
11951
1.02k
    break;
11952
7.13k
            case XML_PARSER_DTD: {
11953
          /*
11954
     * Sorry but progressive parsing of the internal subset
11955
     * is not expected to be supported. We first check that
11956
     * the full content of the internal subset is available and
11957
     * the parsing is launched only at that point.
11958
     * Internal subset ends up with "']' S? '>'" in an unescaped
11959
     * section and not in a ']]>' sequence which are conditional
11960
     * sections (whoever argued to keep that crap in XML deserve
11961
     * a place in hell !).
11962
     */
11963
7.13k
    int base, i;
11964
7.13k
    xmlChar *buf;
11965
7.13k
          xmlChar quote = 0;
11966
7.13k
                size_t use;
11967
11968
7.13k
    base = ctxt->input->cur - ctxt->input->base;
11969
7.13k
    if (base < 0) return(0);
11970
7.13k
    if (ctxt->checkIndex > base)
11971
1.41k
        base = ctxt->checkIndex;
11972
7.13k
    buf = xmlBufContent(ctxt->input->buf->buffer);
11973
7.13k
                use = xmlBufUse(ctxt->input->buf->buffer);
11974
2.51G
    for (;(unsigned int) base < use; base++) {
11975
2.51G
        if (quote != 0) {
11976
1.56G
            if (buf[base] == quote)
11977
58.4k
          quote = 0;
11978
1.56G
      continue;
11979
1.56G
        }
11980
952M
        if ((quote == 0) && (buf[base] == '<')) {
11981
383k
            int found  = 0;
11982
      /* special handling of comments */
11983
383k
            if (((unsigned int) base + 4 < use) &&
11984
383k
          (buf[base + 1] == '!') &&
11985
383k
          (buf[base + 2] == '-') &&
11986
383k
          (buf[base + 3] == '-')) {
11987
297M
          for (;(unsigned int) base + 3 < use; base++) {
11988
297M
        if ((buf[base] == '-') &&
11989
297M
            (buf[base + 1] == '-') &&
11990
297M
            (buf[base + 2] == '>')) {
11991
9.16k
            found = 1;
11992
9.16k
            base += 2;
11993
9.16k
            break;
11994
9.16k
        }
11995
297M
                }
11996
9.58k
          if (!found) {
11997
#if 0
11998
              fprintf(stderr, "unfinished comment\n");
11999
#endif
12000
412
              break; /* for */
12001
412
                }
12002
9.16k
                continue;
12003
9.58k
      }
12004
383k
        }
12005
952M
        if (buf[base] == '"') {
12006
15.8k
            quote = '"';
12007
15.8k
      continue;
12008
15.8k
        }
12009
952M
        if (buf[base] == '\'') {
12010
43.5k
            quote = '\'';
12011
43.5k
      continue;
12012
43.5k
        }
12013
952M
        if (buf[base] == ']') {
12014
#if 0
12015
            fprintf(stderr, "%c%c%c%c: ", buf[base],
12016
              buf[base + 1], buf[base + 2], buf[base + 3]);
12017
#endif
12018
130k
            if ((unsigned int) base +1 >= use)
12019
70
          break;
12020
130k
      if (buf[base + 1] == ']') {
12021
          /* conditional crap, skip both ']' ! */
12022
113k
          base++;
12023
113k
          continue;
12024
113k
      }
12025
1.11M
            for (i = 1; (unsigned int) base + i < use; i++) {
12026
1.11M
          if (buf[base + i] == '>') {
12027
#if 0
12028
              fprintf(stderr, "found\n");
12029
#endif
12030
4.50k
              goto found_end_int_subset;
12031
4.50k
          }
12032
1.11M
          if (!IS_BLANK_CH(buf[base + i])) {
12033
#if 0
12034
              fprintf(stderr, "not found\n");
12035
#endif
12036
12.4k
              goto not_end_of_int_subset;
12037
12.4k
          }
12038
1.11M
      }
12039
#if 0
12040
      fprintf(stderr, "end of stream\n");
12041
#endif
12042
214
            break;
12043
12044
17.1k
        }
12045
951M
not_end_of_int_subset:
12046
951M
                    continue; /* for */
12047
952M
    }
12048
    /*
12049
     * We didn't found the end of the Internal subset
12050
     */
12051
2.62k
                if (quote == 0)
12052
1.68k
                    ctxt->checkIndex = base;
12053
940
                else
12054
940
                    ctxt->checkIndex = 0;
12055
#ifdef DEBUG_PUSH
12056
    if (next == 0)
12057
        xmlGenericError(xmlGenericErrorContext,
12058
          "PP: lookup of int subset end filed\n");
12059
#endif
12060
2.62k
          goto done;
12061
12062
4.50k
found_end_int_subset:
12063
4.50k
                ctxt->checkIndex = 0;
12064
4.50k
    xmlParseInternalSubset(ctxt);
12065
4.50k
    if (ctxt->instate == XML_PARSER_EOF)
12066
992
        goto done;
12067
3.51k
    ctxt->inSubset = 2;
12068
3.51k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
12069
3.51k
        (ctxt->sax->externalSubset != NULL))
12070
0
        ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
12071
0
          ctxt->extSubSystem, ctxt->extSubURI);
12072
3.51k
    ctxt->inSubset = 0;
12073
3.51k
    xmlCleanSpecialAttr(ctxt);
12074
3.51k
    if (ctxt->instate == XML_PARSER_EOF)
12075
0
        goto done;
12076
3.51k
    ctxt->instate = XML_PARSER_PROLOG;
12077
3.51k
    ctxt->checkIndex = 0;
12078
#ifdef DEBUG_PUSH
12079
    xmlGenericError(xmlGenericErrorContext,
12080
      "PP: entering PROLOG\n");
12081
#endif
12082
3.51k
                break;
12083
3.51k
      }
12084
0
            case XML_PARSER_COMMENT:
12085
0
    xmlGenericError(xmlGenericErrorContext,
12086
0
      "PP: internal error, state == COMMENT\n");
12087
0
    ctxt->instate = XML_PARSER_CONTENT;
12088
#ifdef DEBUG_PUSH
12089
    xmlGenericError(xmlGenericErrorContext,
12090
      "PP: entering CONTENT\n");
12091
#endif
12092
0
    break;
12093
0
            case XML_PARSER_IGNORE:
12094
0
    xmlGenericError(xmlGenericErrorContext,
12095
0
      "PP: internal error, state == IGNORE");
12096
0
          ctxt->instate = XML_PARSER_DTD;
12097
#ifdef DEBUG_PUSH
12098
    xmlGenericError(xmlGenericErrorContext,
12099
      "PP: entering DTD\n");
12100
#endif
12101
0
          break;
12102
0
            case XML_PARSER_PI:
12103
0
    xmlGenericError(xmlGenericErrorContext,
12104
0
      "PP: internal error, state == PI\n");
12105
0
    ctxt->instate = XML_PARSER_CONTENT;
12106
#ifdef DEBUG_PUSH
12107
    xmlGenericError(xmlGenericErrorContext,
12108
      "PP: entering CONTENT\n");
12109
#endif
12110
0
    break;
12111
0
            case XML_PARSER_ENTITY_DECL:
12112
0
    xmlGenericError(xmlGenericErrorContext,
12113
0
      "PP: internal error, state == ENTITY_DECL\n");
12114
0
    ctxt->instate = XML_PARSER_DTD;
12115
#ifdef DEBUG_PUSH
12116
    xmlGenericError(xmlGenericErrorContext,
12117
      "PP: entering DTD\n");
12118
#endif
12119
0
    break;
12120
0
            case XML_PARSER_ENTITY_VALUE:
12121
0
    xmlGenericError(xmlGenericErrorContext,
12122
0
      "PP: internal error, state == ENTITY_VALUE\n");
12123
0
    ctxt->instate = XML_PARSER_CONTENT;
12124
#ifdef DEBUG_PUSH
12125
    xmlGenericError(xmlGenericErrorContext,
12126
      "PP: entering DTD\n");
12127
#endif
12128
0
    break;
12129
0
            case XML_PARSER_ATTRIBUTE_VALUE:
12130
0
    xmlGenericError(xmlGenericErrorContext,
12131
0
      "PP: internal error, state == ATTRIBUTE_VALUE\n");
12132
0
    ctxt->instate = XML_PARSER_START_TAG;
12133
#ifdef DEBUG_PUSH
12134
    xmlGenericError(xmlGenericErrorContext,
12135
      "PP: entering START_TAG\n");
12136
#endif
12137
0
    break;
12138
0
            case XML_PARSER_SYSTEM_LITERAL:
12139
0
    xmlGenericError(xmlGenericErrorContext,
12140
0
      "PP: internal error, state == SYSTEM_LITERAL\n");
12141
0
    ctxt->instate = XML_PARSER_START_TAG;
12142
#ifdef DEBUG_PUSH
12143
    xmlGenericError(xmlGenericErrorContext,
12144
      "PP: entering START_TAG\n");
12145
#endif
12146
0
    break;
12147
0
            case XML_PARSER_PUBLIC_LITERAL:
12148
0
    xmlGenericError(xmlGenericErrorContext,
12149
0
      "PP: internal error, state == PUBLIC_LITERAL\n");
12150
0
    ctxt->instate = XML_PARSER_START_TAG;
12151
#ifdef DEBUG_PUSH
12152
    xmlGenericError(xmlGenericErrorContext,
12153
      "PP: entering START_TAG\n");
12154
#endif
12155
0
    break;
12156
4.28M
  }
12157
4.28M
    }
12158
29.7k
done:
12159
#ifdef DEBUG_PUSH
12160
    xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12161
#endif
12162
29.7k
    return(ret);
12163
116
encoding_error:
12164
116
    {
12165
116
        char buffer[150];
12166
12167
116
  snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12168
116
      ctxt->input->cur[0], ctxt->input->cur[1],
12169
116
      ctxt->input->cur[2], ctxt->input->cur[3]);
12170
116
  __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12171
116
         "Input is not proper UTF-8, indicate encoding !\n%s",
12172
116
         BAD_CAST buffer, NULL);
12173
116
    }
12174
116
    return(0);
12175
104k
}
12176
12177
/**
12178
 * xmlParseCheckTransition:
12179
 * @ctxt:  an XML parser context
12180
 * @chunk:  a char array
12181
 * @size:  the size in byte of the chunk
12182
 *
12183
 * Check depending on the current parser state if the chunk given must be
12184
 * processed immediately or one need more data to advance on parsing.
12185
 *
12186
 * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12187
 */
12188
static int
12189
84.3k
xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
12190
84.3k
    if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
12191
0
        return(-1);
12192
84.3k
    if (ctxt->instate == XML_PARSER_START_TAG) {
12193
14.0k
        if (memchr(chunk, '>', size) != NULL)
12194
3.01k
            return(1);
12195
10.9k
        return(0);
12196
14.0k
    }
12197
70.3k
    if (ctxt->progressive == XML_PARSER_COMMENT) {
12198
3.87k
        if (memchr(chunk, '>', size) != NULL)
12199
1.01k
            return(1);
12200
2.85k
        return(0);
12201
3.87k
    }
12202
66.4k
    if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
12203
5.27k
        if (memchr(chunk, '>', size) != NULL)
12204
4.24k
            return(1);
12205
1.02k
        return(0);
12206
5.27k
    }
12207
61.1k
    if (ctxt->progressive == XML_PARSER_PI) {
12208
3.78k
        if (memchr(chunk, '>', size) != NULL)
12209
1.04k
            return(1);
12210
2.73k
        return(0);
12211
3.78k
    }
12212
57.3k
    if (ctxt->instate == XML_PARSER_END_TAG) {
12213
650
        if (memchr(chunk, '>', size) != NULL)
12214
418
            return(1);
12215
232
        return(0);
12216
650
    }
12217
56.7k
    if ((ctxt->progressive == XML_PARSER_DTD) ||
12218
56.7k
        (ctxt->instate == XML_PARSER_DTD)) {
12219
40.1k
        if (memchr(chunk, '>', size) != NULL)
12220
2.39k
            return(1);
12221
37.7k
        return(0);
12222
40.1k
    }
12223
16.6k
    return(1);
12224
56.7k
}
12225
12226
/**
12227
 * xmlParseChunk:
12228
 * @ctxt:  an XML parser context
12229
 * @chunk:  an char array
12230
 * @size:  the size in byte of the chunk
12231
 * @terminate:  last chunk indicator
12232
 *
12233
 * Parse a Chunk of memory
12234
 *
12235
 * Returns zero if no error, the xmlParserErrors otherwise.
12236
 */
12237
int
12238
xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12239
102k
              int terminate) {
12240
102k
    int end_in_lf = 0;
12241
102k
    int remain = 0;
12242
102k
    size_t old_avail = 0;
12243
102k
    size_t avail = 0;
12244
12245
102k
    if (ctxt == NULL)
12246
0
        return(XML_ERR_INTERNAL_ERROR);
12247
102k
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12248
718
        return(ctxt->errNo);
12249
102k
    if (ctxt->instate == XML_PARSER_EOF)
12250
1
        return(-1);
12251
102k
    if (ctxt->instate == XML_PARSER_START)
12252
22.1k
        xmlDetectSAX2(ctxt);
12253
102k
    if ((size > 0) && (chunk != NULL) && (!terminate) &&
12254
102k
        (chunk[size - 1] == '\r')) {
12255
2.49k
  end_in_lf = 1;
12256
2.49k
  size--;
12257
2.49k
    }
12258
12259
160k
xmldecl_done:
12260
12261
160k
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12262
160k
        (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF))  {
12263
142k
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12264
142k
  size_t cur = ctxt->input->cur - ctxt->input->base;
12265
142k
  int res;
12266
12267
142k
        old_avail = xmlBufUse(ctxt->input->buf->buffer);
12268
        /*
12269
         * Specific handling if we autodetected an encoding, we should not
12270
         * push more than the first line ... which depend on the encoding
12271
         * And only push the rest once the final encoding was detected
12272
         */
12273
142k
        if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12274
142k
            (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
12275
59.4k
            unsigned int len = 45;
12276
12277
59.4k
            if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12278
59.4k
                               BAD_CAST "UTF-16")) ||
12279
59.4k
                (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12280
1.49k
                               BAD_CAST "UTF16")))
12281
57.9k
                len = 90;
12282
1.49k
            else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12283
1.49k
                                    BAD_CAST "UCS-4")) ||
12284
1.49k
                     (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12285
1.25k
                                    BAD_CAST "UCS4")))
12286
237
                len = 180;
12287
12288
59.4k
            if (ctxt->input->buf->rawconsumed < len)
12289
10
                len -= ctxt->input->buf->rawconsumed;
12290
12291
            /*
12292
             * Change size for reading the initial declaration only
12293
             * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12294
             * will blindly copy extra bytes from memory.
12295
             */
12296
59.4k
            if ((unsigned int) size > len) {
12297
58.0k
                remain = size - len;
12298
58.0k
                size = len;
12299
58.0k
            } else {
12300
1.35k
                remain = 0;
12301
1.35k
            }
12302
59.4k
        }
12303
142k
  res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12304
142k
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12305
142k
  if (res < 0) {
12306
91
      ctxt->errNo = XML_PARSER_EOF;
12307
91
      xmlHaltParser(ctxt);
12308
91
      return (XML_PARSER_EOF);
12309
91
  }
12310
#ifdef DEBUG_PUSH
12311
  xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12312
#endif
12313
12314
142k
    } else if (ctxt->instate != XML_PARSER_EOF) {
12315
17.3k
  if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12316
17.3k
      xmlParserInputBufferPtr in = ctxt->input->buf;
12317
17.3k
      if ((in->encoder != NULL) && (in->buffer != NULL) &&
12318
17.3k
        (in->raw != NULL)) {
12319
1.36k
    int nbchars;
12320
1.36k
    size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12321
1.36k
    size_t current = ctxt->input->cur - ctxt->input->base;
12322
12323
1.36k
    nbchars = xmlCharEncInput(in, terminate);
12324
1.36k
    xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
12325
1.36k
    if (nbchars < 0) {
12326
        /* TODO 2.6.0 */
12327
26
        xmlGenericError(xmlGenericErrorContext,
12328
26
            "xmlParseChunk: encoder error\n");
12329
26
                    xmlHaltParser(ctxt);
12330
26
        return(XML_ERR_INVALID_ENCODING);
12331
26
    }
12332
1.36k
      }
12333
17.3k
  }
12334
17.3k
    }
12335
160k
    if (remain != 0) {
12336
58.0k
        xmlParseTryOrFinish(ctxt, 0);
12337
102k
    } else {
12338
102k
        if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
12339
102k
            avail = xmlBufUse(ctxt->input->buf->buffer);
12340
        /*
12341
         * Depending on the current state it may not be such
12342
         * a good idea to try parsing if there is nothing in the chunk
12343
         * which would be worth doing a parser state transition and we
12344
         * need to wait for more data
12345
         */
12346
102k
        if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
12347
102k
            (old_avail == 0) || (avail == 0) ||
12348
102k
            (xmlParseCheckTransition(ctxt,
12349
84.3k
                       (const char *)&ctxt->input->base[old_avail],
12350
84.3k
                                     avail - old_avail)))
12351
46.5k
            xmlParseTryOrFinish(ctxt, terminate);
12352
102k
    }
12353
160k
    if (ctxt->instate == XML_PARSER_EOF)
12354
2.17k
        return(ctxt->errNo);
12355
12356
158k
    if ((ctxt->input != NULL) &&
12357
158k
         (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12358
158k
         ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12359
158k
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12360
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12361
0
        xmlHaltParser(ctxt);
12362
0
    }
12363
158k
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12364
12.6k
        return(ctxt->errNo);
12365
12366
145k
    if (remain != 0) {
12367
58.0k
        chunk += size;
12368
58.0k
        size = remain;
12369
58.0k
        remain = 0;
12370
58.0k
        goto xmldecl_done;
12371
58.0k
    }
12372
87.2k
    if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12373
87.2k
        (ctxt->input->buf != NULL)) {
12374
2.48k
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12375
2.48k
           ctxt->input);
12376
2.48k
  size_t current = ctxt->input->cur - ctxt->input->base;
12377
12378
2.48k
  xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12379
12380
2.48k
  xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12381
2.48k
            base, current);
12382
2.48k
    }
12383
87.2k
    if (terminate) {
12384
  /*
12385
   * Check for termination
12386
   */
12387
3.33k
  int cur_avail = 0;
12388
12389
3.33k
  if (ctxt->input != NULL) {
12390
3.33k
      if (ctxt->input->buf == NULL)
12391
0
    cur_avail = ctxt->input->length -
12392
0
          (ctxt->input->cur - ctxt->input->base);
12393
3.33k
      else
12394
3.33k
    cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12395
3.33k
                    (ctxt->input->cur - ctxt->input->base);
12396
3.33k
  }
12397
12398
3.33k
  if ((ctxt->instate != XML_PARSER_EOF) &&
12399
3.33k
      (ctxt->instate != XML_PARSER_EPILOG)) {
12400
2.55k
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12401
2.55k
  }
12402
3.33k
  if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
12403
40
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12404
40
  }
12405
3.33k
  if (ctxt->instate != XML_PARSER_EOF) {
12406
3.33k
      if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12407
0
    ctxt->sax->endDocument(ctxt->userData);
12408
3.33k
  }
12409
3.33k
  ctxt->instate = XML_PARSER_EOF;
12410
3.33k
    }
12411
87.2k
    if (ctxt->wellFormed == 0)
12412
2.59k
  return((xmlParserErrors) ctxt->errNo);
12413
84.6k
    else
12414
84.6k
        return(0);
12415
87.2k
}
12416
12417
/************************************************************************
12418
 *                  *
12419
 *    I/O front end functions to the parser     *
12420
 *                  *
12421
 ************************************************************************/
12422
12423
/**
12424
 * xmlCreatePushParserCtxt:
12425
 * @sax:  a SAX handler
12426
 * @user_data:  The user data returned on SAX callbacks
12427
 * @chunk:  a pointer to an array of chars
12428
 * @size:  number of chars in the array
12429
 * @filename:  an optional file name or URI
12430
 *
12431
 * Create a parser context for using the XML parser in push mode.
12432
 * If @buffer and @size are non-NULL, the data is used to detect
12433
 * the encoding.  The remaining characters will be parsed so they
12434
 * don't need to be fed in again through xmlParseChunk.
12435
 * To allow content encoding detection, @size should be >= 4
12436
 * The value of @filename is used for fetching external entities
12437
 * and error/warning reports.
12438
 *
12439
 * Returns the new parser context or NULL
12440
 */
12441
12442
xmlParserCtxtPtr
12443
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12444
18.3k
                        const char *chunk, int size, const char *filename) {
12445
18.3k
    xmlParserCtxtPtr ctxt;
12446
18.3k
    xmlParserInputPtr inputStream;
12447
18.3k
    xmlParserInputBufferPtr buf;
12448
18.3k
    xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12449
12450
    /*
12451
     * plug some encoding conversion routines
12452
     */
12453
18.3k
    if ((chunk != NULL) && (size >= 4))
12454
17.9k
  enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12455
12456
18.3k
    buf = xmlAllocParserInputBuffer(enc);
12457
18.3k
    if (buf == NULL) return(NULL);
12458
12459
18.3k
    ctxt = xmlNewParserCtxt();
12460
18.3k
    if (ctxt == NULL) {
12461
0
        xmlErrMemory(NULL, "creating parser: out of memory\n");
12462
0
  xmlFreeParserInputBuffer(buf);
12463
0
  return(NULL);
12464
0
    }
12465
18.3k
    ctxt->dictNames = 1;
12466
18.3k
    if (sax != NULL) {
12467
18.3k
#ifdef LIBXML_SAX1_ENABLED
12468
18.3k
  if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12469
18.3k
#endif /* LIBXML_SAX1_ENABLED */
12470
18.3k
      xmlFree(ctxt->sax);
12471
18.3k
  ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12472
18.3k
  if (ctxt->sax == NULL) {
12473
0
      xmlErrMemory(ctxt, NULL);
12474
0
      xmlFreeParserInputBuffer(buf);
12475
0
      xmlFreeParserCtxt(ctxt);
12476
0
      return(NULL);
12477
0
  }
12478
18.3k
  memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12479
18.3k
  if (sax->initialized == XML_SAX2_MAGIC)
12480
18.3k
      memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12481
0
  else
12482
0
      memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12483
18.3k
  if (user_data != NULL)
12484
18.3k
      ctxt->userData = user_data;
12485
18.3k
    }
12486
18.3k
    if (filename == NULL) {
12487
18.3k
  ctxt->directory = NULL;
12488
18.3k
    } else {
12489
0
        ctxt->directory = xmlParserGetDirectory(filename);
12490
0
    }
12491
12492
18.3k
    inputStream = xmlNewInputStream(ctxt);
12493
18.3k
    if (inputStream == NULL) {
12494
0
  xmlFreeParserCtxt(ctxt);
12495
0
  xmlFreeParserInputBuffer(buf);
12496
0
  return(NULL);
12497
0
    }
12498
12499
18.3k
    if (filename == NULL)
12500
18.3k
  inputStream->filename = NULL;
12501
0
    else {
12502
0
  inputStream->filename = (char *)
12503
0
      xmlCanonicPath((const xmlChar *) filename);
12504
0
  if (inputStream->filename == NULL) {
12505
0
      xmlFreeParserCtxt(ctxt);
12506
0
      xmlFreeParserInputBuffer(buf);
12507
0
      return(NULL);
12508
0
  }
12509
0
    }
12510
18.3k
    inputStream->buf = buf;
12511
18.3k
    xmlBufResetInput(inputStream->buf->buffer, inputStream);
12512
18.3k
    inputPush(ctxt, inputStream);
12513
12514
    /*
12515
     * If the caller didn't provide an initial 'chunk' for determining
12516
     * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12517
     * that it can be automatically determined later
12518
     */
12519
18.3k
    if ((size == 0) || (chunk == NULL)) {
12520
0
  ctxt->charset = XML_CHAR_ENCODING_NONE;
12521
18.3k
    } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12522
18.3k
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12523
18.3k
  size_t cur = ctxt->input->cur - ctxt->input->base;
12524
12525
18.3k
  xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12526
12527
18.3k
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12528
#ifdef DEBUG_PUSH
12529
  xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12530
#endif
12531
18.3k
    }
12532
12533
18.3k
    if (enc != XML_CHAR_ENCODING_NONE) {
12534
4.66k
        xmlSwitchEncoding(ctxt, enc);
12535
4.66k
    }
12536
12537
18.3k
    return(ctxt);
12538
18.3k
}
12539
#endif /* LIBXML_PUSH_ENABLED */
12540
12541
/**
12542
 * xmlHaltParser:
12543
 * @ctxt:  an XML parser context
12544
 *
12545
 * Blocks further parser processing don't override error
12546
 * for internal use
12547
 */
12548
static void
12549
2.29k
xmlHaltParser(xmlParserCtxtPtr ctxt) {
12550
2.29k
    if (ctxt == NULL)
12551
0
        return;
12552
2.29k
    ctxt->instate = XML_PARSER_EOF;
12553
2.29k
    ctxt->disableSAX = 1;
12554
2.29k
    while (ctxt->inputNr > 1)
12555
0
        xmlFreeInputStream(inputPop(ctxt));
12556
2.29k
    if (ctxt->input != NULL) {
12557
        /*
12558
   * in case there was a specific allocation deallocate before
12559
   * overriding base
12560
   */
12561
2.29k
        if (ctxt->input->free != NULL) {
12562
0
      ctxt->input->free((xmlChar *) ctxt->input->base);
12563
0
      ctxt->input->free = NULL;
12564
0
  }
12565
2.29k
        if (ctxt->input->buf != NULL) {
12566
2.29k
            xmlFreeParserInputBuffer(ctxt->input->buf);
12567
2.29k
            ctxt->input->buf = NULL;
12568
2.29k
        }
12569
2.29k
  ctxt->input->cur = BAD_CAST"";
12570
2.29k
        ctxt->input->length = 0;
12571
2.29k
  ctxt->input->base = ctxt->input->cur;
12572
2.29k
        ctxt->input->end = ctxt->input->cur;
12573
2.29k
    }
12574
2.29k
}
12575
12576
/**
12577
 * xmlStopParser:
12578
 * @ctxt:  an XML parser context
12579
 *
12580
 * Blocks further parser processing
12581
 */
12582
void
12583
3
xmlStopParser(xmlParserCtxtPtr ctxt) {
12584
3
    if (ctxt == NULL)
12585
0
        return;
12586
3
    xmlHaltParser(ctxt);
12587
3
    ctxt->errNo = XML_ERR_USER_STOP;
12588
3
}
12589
12590
/**
12591
 * xmlCreateIOParserCtxt:
12592
 * @sax:  a SAX handler
12593
 * @user_data:  The user data returned on SAX callbacks
12594
 * @ioread:  an I/O read function
12595
 * @ioclose:  an I/O close function
12596
 * @ioctx:  an I/O handler
12597
 * @enc:  the charset encoding if known
12598
 *
12599
 * Create a parser context for using the XML parser with an existing
12600
 * I/O stream
12601
 *
12602
 * Returns the new parser context or NULL
12603
 */
12604
xmlParserCtxtPtr
12605
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12606
  xmlInputReadCallback   ioread, xmlInputCloseCallback  ioclose,
12607
0
  void *ioctx, xmlCharEncoding enc) {
12608
0
    xmlParserCtxtPtr ctxt;
12609
0
    xmlParserInputPtr inputStream;
12610
0
    xmlParserInputBufferPtr buf;
12611
12612
0
    if (ioread == NULL) return(NULL);
12613
12614
0
    buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12615
0
    if (buf == NULL) {
12616
0
        if (ioclose != NULL)
12617
0
            ioclose(ioctx);
12618
0
        return (NULL);
12619
0
    }
12620
12621
0
    ctxt = xmlNewParserCtxt();
12622
0
    if (ctxt == NULL) {
12623
0
  xmlFreeParserInputBuffer(buf);
12624
0
  return(NULL);
12625
0
    }
12626
0
    if (sax != NULL) {
12627
0
#ifdef LIBXML_SAX1_ENABLED
12628
0
  if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
12629
0
#endif /* LIBXML_SAX1_ENABLED */
12630
0
      xmlFree(ctxt->sax);
12631
0
  ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12632
0
  if (ctxt->sax == NULL) {
12633
0
      xmlFreeParserInputBuffer(buf);
12634
0
      xmlErrMemory(ctxt, NULL);
12635
0
      xmlFreeParserCtxt(ctxt);
12636
0
      return(NULL);
12637
0
  }
12638
0
  memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12639
0
  if (sax->initialized == XML_SAX2_MAGIC)
12640
0
      memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12641
0
  else
12642
0
      memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
12643
0
  if (user_data != NULL)
12644
0
      ctxt->userData = user_data;
12645
0
    }
12646
12647
0
    inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12648
0
    if (inputStream == NULL) {
12649
0
  xmlFreeParserCtxt(ctxt);
12650
0
  return(NULL);
12651
0
    }
12652
0
    inputPush(ctxt, inputStream);
12653
12654
0
    return(ctxt);
12655
0
}
12656
12657
#ifdef LIBXML_VALID_ENABLED
12658
/************************************************************************
12659
 *                  *
12660
 *    Front ends when parsing a DTD       *
12661
 *                  *
12662
 ************************************************************************/
12663
12664
/**
12665
 * xmlIOParseDTD:
12666
 * @sax:  the SAX handler block or NULL
12667
 * @input:  an Input Buffer
12668
 * @enc:  the charset encoding if known
12669
 *
12670
 * Load and parse a DTD
12671
 *
12672
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12673
 * @input will be freed by the function in any case.
12674
 */
12675
12676
xmlDtdPtr
12677
xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12678
0
        xmlCharEncoding enc) {
12679
0
    xmlDtdPtr ret = NULL;
12680
0
    xmlParserCtxtPtr ctxt;
12681
0
    xmlParserInputPtr pinput = NULL;
12682
0
    xmlChar start[4];
12683
12684
0
    if (input == NULL)
12685
0
  return(NULL);
12686
12687
0
    ctxt = xmlNewParserCtxt();
12688
0
    if (ctxt == NULL) {
12689
0
        xmlFreeParserInputBuffer(input);
12690
0
  return(NULL);
12691
0
    }
12692
12693
    /* We are loading a DTD */
12694
0
    ctxt->options |= XML_PARSE_DTDLOAD;
12695
12696
    /*
12697
     * Set-up the SAX context
12698
     */
12699
0
    if (sax != NULL) {
12700
0
  if (ctxt->sax != NULL)
12701
0
      xmlFree(ctxt->sax);
12702
0
        ctxt->sax = sax;
12703
0
        ctxt->userData = ctxt;
12704
0
    }
12705
0
    xmlDetectSAX2(ctxt);
12706
12707
    /*
12708
     * generate a parser input from the I/O handler
12709
     */
12710
12711
0
    pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12712
0
    if (pinput == NULL) {
12713
0
        if (sax != NULL) ctxt->sax = NULL;
12714
0
        xmlFreeParserInputBuffer(input);
12715
0
  xmlFreeParserCtxt(ctxt);
12716
0
  return(NULL);
12717
0
    }
12718
12719
    /*
12720
     * plug some encoding conversion routines here.
12721
     */
12722
0
    if (xmlPushInput(ctxt, pinput) < 0) {
12723
0
        if (sax != NULL) ctxt->sax = NULL;
12724
0
  xmlFreeParserCtxt(ctxt);
12725
0
  return(NULL);
12726
0
    }
12727
0
    if (enc != XML_CHAR_ENCODING_NONE) {
12728
0
        xmlSwitchEncoding(ctxt, enc);
12729
0
    }
12730
12731
0
    pinput->filename = NULL;
12732
0
    pinput->line = 1;
12733
0
    pinput->col = 1;
12734
0
    pinput->base = ctxt->input->cur;
12735
0
    pinput->cur = ctxt->input->cur;
12736
0
    pinput->free = NULL;
12737
12738
    /*
12739
     * let's parse that entity knowing it's an external subset.
12740
     */
12741
0
    ctxt->inSubset = 2;
12742
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12743
0
    if (ctxt->myDoc == NULL) {
12744
0
  xmlErrMemory(ctxt, "New Doc failed");
12745
0
  return(NULL);
12746
0
    }
12747
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12748
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12749
0
                                 BAD_CAST "none", BAD_CAST "none");
12750
12751
0
    if ((enc == XML_CHAR_ENCODING_NONE) &&
12752
0
        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12753
  /*
12754
   * Get the 4 first bytes and decode the charset
12755
   * if enc != XML_CHAR_ENCODING_NONE
12756
   * plug some encoding conversion routines.
12757
   */
12758
0
  start[0] = RAW;
12759
0
  start[1] = NXT(1);
12760
0
  start[2] = NXT(2);
12761
0
  start[3] = NXT(3);
12762
0
  enc = xmlDetectCharEncoding(start, 4);
12763
0
  if (enc != XML_CHAR_ENCODING_NONE) {
12764
0
      xmlSwitchEncoding(ctxt, enc);
12765
0
  }
12766
0
    }
12767
12768
0
    xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12769
12770
0
    if (ctxt->myDoc != NULL) {
12771
0
  if (ctxt->wellFormed) {
12772
0
      ret = ctxt->myDoc->extSubset;
12773
0
      ctxt->myDoc->extSubset = NULL;
12774
0
      if (ret != NULL) {
12775
0
    xmlNodePtr tmp;
12776
12777
0
    ret->doc = NULL;
12778
0
    tmp = ret->children;
12779
0
    while (tmp != NULL) {
12780
0
        tmp->doc = NULL;
12781
0
        tmp = tmp->next;
12782
0
    }
12783
0
      }
12784
0
  } else {
12785
0
      ret = NULL;
12786
0
  }
12787
0
        xmlFreeDoc(ctxt->myDoc);
12788
0
        ctxt->myDoc = NULL;
12789
0
    }
12790
0
    if (sax != NULL) ctxt->sax = NULL;
12791
0
    xmlFreeParserCtxt(ctxt);
12792
12793
0
    return(ret);
12794
0
}
12795
12796
/**
12797
 * xmlSAXParseDTD:
12798
 * @sax:  the SAX handler block
12799
 * @ExternalID:  a NAME* containing the External ID of the DTD
12800
 * @SystemID:  a NAME* containing the URL to the DTD
12801
 *
12802
 * Load and parse an external subset.
12803
 *
12804
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12805
 */
12806
12807
xmlDtdPtr
12808
xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12809
0
                          const xmlChar *SystemID) {
12810
0
    xmlDtdPtr ret = NULL;
12811
0
    xmlParserCtxtPtr ctxt;
12812
0
    xmlParserInputPtr input = NULL;
12813
0
    xmlCharEncoding enc;
12814
0
    xmlChar* systemIdCanonic;
12815
12816
0
    if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12817
12818
0
    ctxt = xmlNewParserCtxt();
12819
0
    if (ctxt == NULL) {
12820
0
  return(NULL);
12821
0
    }
12822
12823
    /* We are loading a DTD */
12824
0
    ctxt->options |= XML_PARSE_DTDLOAD;
12825
12826
    /*
12827
     * Set-up the SAX context
12828
     */
12829
0
    if (sax != NULL) {
12830
0
  if (ctxt->sax != NULL)
12831
0
      xmlFree(ctxt->sax);
12832
0
        ctxt->sax = sax;
12833
0
        ctxt->userData = ctxt;
12834
0
    }
12835
12836
    /*
12837
     * Canonicalise the system ID
12838
     */
12839
0
    systemIdCanonic = xmlCanonicPath(SystemID);
12840
0
    if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12841
0
  xmlFreeParserCtxt(ctxt);
12842
0
  return(NULL);
12843
0
    }
12844
12845
    /*
12846
     * Ask the Entity resolver to load the damn thing
12847
     */
12848
12849
0
    if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12850
0
  input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12851
0
                                   systemIdCanonic);
12852
0
    if (input == NULL) {
12853
0
        if (sax != NULL) ctxt->sax = NULL;
12854
0
  xmlFreeParserCtxt(ctxt);
12855
0
  if (systemIdCanonic != NULL)
12856
0
      xmlFree(systemIdCanonic);
12857
0
  return(NULL);
12858
0
    }
12859
12860
    /*
12861
     * plug some encoding conversion routines here.
12862
     */
12863
0
    if (xmlPushInput(ctxt, input) < 0) {
12864
0
        if (sax != NULL) ctxt->sax = NULL;
12865
0
  xmlFreeParserCtxt(ctxt);
12866
0
  if (systemIdCanonic != NULL)
12867
0
      xmlFree(systemIdCanonic);
12868
0
  return(NULL);
12869
0
    }
12870
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12871
0
  enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12872
0
  xmlSwitchEncoding(ctxt, enc);
12873
0
    }
12874
12875
0
    if (input->filename == NULL)
12876
0
  input->filename = (char *) systemIdCanonic;
12877
0
    else
12878
0
  xmlFree(systemIdCanonic);
12879
0
    input->line = 1;
12880
0
    input->col = 1;
12881
0
    input->base = ctxt->input->cur;
12882
0
    input->cur = ctxt->input->cur;
12883
0
    input->free = NULL;
12884
12885
    /*
12886
     * let's parse that entity knowing it's an external subset.
12887
     */
12888
0
    ctxt->inSubset = 2;
12889
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12890
0
    if (ctxt->myDoc == NULL) {
12891
0
  xmlErrMemory(ctxt, "New Doc failed");
12892
0
        if (sax != NULL) ctxt->sax = NULL;
12893
0
  xmlFreeParserCtxt(ctxt);
12894
0
  return(NULL);
12895
0
    }
12896
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12897
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12898
0
                                 ExternalID, SystemID);
12899
0
    xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12900
12901
0
    if (ctxt->myDoc != NULL) {
12902
0
  if (ctxt->wellFormed) {
12903
0
      ret = ctxt->myDoc->extSubset;
12904
0
      ctxt->myDoc->extSubset = NULL;
12905
0
      if (ret != NULL) {
12906
0
    xmlNodePtr tmp;
12907
12908
0
    ret->doc = NULL;
12909
0
    tmp = ret->children;
12910
0
    while (tmp != NULL) {
12911
0
        tmp->doc = NULL;
12912
0
        tmp = tmp->next;
12913
0
    }
12914
0
      }
12915
0
  } else {
12916
0
      ret = NULL;
12917
0
  }
12918
0
        xmlFreeDoc(ctxt->myDoc);
12919
0
        ctxt->myDoc = NULL;
12920
0
    }
12921
0
    if (sax != NULL) ctxt->sax = NULL;
12922
0
    xmlFreeParserCtxt(ctxt);
12923
12924
0
    return(ret);
12925
0
}
12926
12927
12928
/**
12929
 * xmlParseDTD:
12930
 * @ExternalID:  a NAME* containing the External ID of the DTD
12931
 * @SystemID:  a NAME* containing the URL to the DTD
12932
 *
12933
 * Load and parse an external subset.
12934
 *
12935
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12936
 */
12937
12938
xmlDtdPtr
12939
0
xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12940
0
    return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12941
0
}
12942
#endif /* LIBXML_VALID_ENABLED */
12943
12944
/************************************************************************
12945
 *                  *
12946
 *    Front ends when parsing an Entity     *
12947
 *                  *
12948
 ************************************************************************/
12949
12950
/**
12951
 * xmlParseCtxtExternalEntity:
12952
 * @ctx:  the existing parsing context
12953
 * @URL:  the URL for the entity to load
12954
 * @ID:  the System ID for the entity to load
12955
 * @lst:  the return value for the set of parsed nodes
12956
 *
12957
 * Parse an external general entity within an existing parsing context
12958
 * An external general parsed entity is well-formed if it matches the
12959
 * production labeled extParsedEnt.
12960
 *
12961
 * [78] extParsedEnt ::= TextDecl? content
12962
 *
12963
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12964
 *    the parser error code otherwise
12965
 */
12966
12967
int
12968
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12969
0
                 const xmlChar *ID, xmlNodePtr *lst) {
12970
0
    void *userData;
12971
12972
0
    if (ctx == NULL) return(-1);
12973
    /*
12974
     * If the user provided their own SAX callbacks, then reuse the
12975
     * userData callback field, otherwise the expected setup in a
12976
     * DOM builder is to have userData == ctxt
12977
     */
12978
0
    if (ctx->userData == ctx)
12979
0
        userData = NULL;
12980
0
    else
12981
0
        userData = ctx->userData;
12982
0
    return xmlParseExternalEntityPrivate(ctx->myDoc, ctx, ctx->sax,
12983
0
                                         userData, ctx->depth + 1,
12984
0
                                         URL, ID, lst);
12985
0
}
12986
12987
/**
12988
 * xmlParseExternalEntityPrivate:
12989
 * @doc:  the document the chunk pertains to
12990
 * @oldctxt:  the previous parser context if available
12991
 * @sax:  the SAX handler block (possibly NULL)
12992
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12993
 * @depth:  Used for loop detection, use 0
12994
 * @URL:  the URL for the entity to load
12995
 * @ID:  the System ID for the entity to load
12996
 * @list:  the return value for the set of parsed nodes
12997
 *
12998
 * Private version of xmlParseExternalEntity()
12999
 *
13000
 * Returns 0 if the entity is well formed, -1 in case of args problem and
13001
 *    the parser error code otherwise
13002
 */
13003
13004
static xmlParserErrors
13005
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
13006
                xmlSAXHandlerPtr sax,
13007
          void *user_data, int depth, const xmlChar *URL,
13008
0
          const xmlChar *ID, xmlNodePtr *list) {
13009
0
    xmlParserCtxtPtr ctxt;
13010
0
    xmlDocPtr newDoc;
13011
0
    xmlNodePtr newRoot;
13012
0
    xmlSAXHandlerPtr oldsax = NULL;
13013
0
    xmlParserErrors ret = XML_ERR_OK;
13014
0
    xmlChar start[4];
13015
0
    xmlCharEncoding enc;
13016
13017
0
    if (((depth > 40) &&
13018
0
  ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13019
0
  (depth > 1024)) {
13020
0
  return(XML_ERR_ENTITY_LOOP);
13021
0
    }
13022
13023
0
    if (list != NULL)
13024
0
        *list = NULL;
13025
0
    if ((URL == NULL) && (ID == NULL))
13026
0
  return(XML_ERR_INTERNAL_ERROR);
13027
0
    if (doc == NULL)
13028
0
  return(XML_ERR_INTERNAL_ERROR);
13029
13030
13031
0
    ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
13032
0
    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13033
0
    ctxt->userData = ctxt;
13034
0
    if (sax != NULL) {
13035
0
  oldsax = ctxt->sax;
13036
0
        ctxt->sax = sax;
13037
0
  if (user_data != NULL)
13038
0
      ctxt->userData = user_data;
13039
0
    }
13040
0
    xmlDetectSAX2(ctxt);
13041
0
    newDoc = xmlNewDoc(BAD_CAST "1.0");
13042
0
    if (newDoc == NULL) {
13043
0
  xmlFreeParserCtxt(ctxt);
13044
0
  return(XML_ERR_INTERNAL_ERROR);
13045
0
    }
13046
0
    newDoc->properties = XML_DOC_INTERNAL;
13047
0
    if (doc) {
13048
0
        newDoc->intSubset = doc->intSubset;
13049
0
        newDoc->extSubset = doc->extSubset;
13050
0
        if (doc->dict) {
13051
0
            newDoc->dict = doc->dict;
13052
0
            xmlDictReference(newDoc->dict);
13053
0
        }
13054
0
        if (doc->URL != NULL) {
13055
0
            newDoc->URL = xmlStrdup(doc->URL);
13056
0
        }
13057
0
    }
13058
0
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13059
0
    if (newRoot == NULL) {
13060
0
  if (sax != NULL)
13061
0
      ctxt->sax = oldsax;
13062
0
  xmlFreeParserCtxt(ctxt);
13063
0
  newDoc->intSubset = NULL;
13064
0
  newDoc->extSubset = NULL;
13065
0
        xmlFreeDoc(newDoc);
13066
0
  return(XML_ERR_INTERNAL_ERROR);
13067
0
    }
13068
0
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
13069
0
    nodePush(ctxt, newDoc->children);
13070
0
    if (doc == NULL) {
13071
0
        ctxt->myDoc = newDoc;
13072
0
    } else {
13073
0
        ctxt->myDoc = doc;
13074
0
        newRoot->doc = doc;
13075
0
    }
13076
13077
    /*
13078
     * Get the 4 first bytes and decode the charset
13079
     * if enc != XML_CHAR_ENCODING_NONE
13080
     * plug some encoding conversion routines.
13081
     */
13082
0
    GROW;
13083
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13084
0
  start[0] = RAW;
13085
0
  start[1] = NXT(1);
13086
0
  start[2] = NXT(2);
13087
0
  start[3] = NXT(3);
13088
0
  enc = xmlDetectCharEncoding(start, 4);
13089
0
  if (enc != XML_CHAR_ENCODING_NONE) {
13090
0
      xmlSwitchEncoding(ctxt, enc);
13091
0
  }
13092
0
    }
13093
13094
    /*
13095
     * Parse a possible text declaration first
13096
     */
13097
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
13098
0
  xmlParseTextDecl(ctxt);
13099
        /*
13100
         * An XML-1.0 document can't reference an entity not XML-1.0
13101
         */
13102
0
        if ((xmlStrEqual(oldctxt->version, BAD_CAST "1.0")) &&
13103
0
            (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
13104
0
            xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
13105
0
                           "Version mismatch between document and entity\n");
13106
0
        }
13107
0
    }
13108
13109
0
    ctxt->instate = XML_PARSER_CONTENT;
13110
0
    ctxt->depth = depth;
13111
0
    if (oldctxt != NULL) {
13112
0
  ctxt->_private = oldctxt->_private;
13113
0
  ctxt->loadsubset = oldctxt->loadsubset;
13114
0
  ctxt->validate = oldctxt->validate;
13115
0
  ctxt->valid = oldctxt->valid;
13116
0
  ctxt->replaceEntities = oldctxt->replaceEntities;
13117
0
        if (oldctxt->validate) {
13118
0
            ctxt->vctxt.error = oldctxt->vctxt.error;
13119
0
            ctxt->vctxt.warning = oldctxt->vctxt.warning;
13120
0
            ctxt->vctxt.userData = oldctxt->vctxt.userData;
13121
0
        }
13122
0
  ctxt->external = oldctxt->external;
13123
0
        if (ctxt->dict) xmlDictFree(ctxt->dict);
13124
0
        ctxt->dict = oldctxt->dict;
13125
0
        ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13126
0
        ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13127
0
        ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13128
0
        ctxt->dictNames = oldctxt->dictNames;
13129
0
        ctxt->attsDefault = oldctxt->attsDefault;
13130
0
        ctxt->attsSpecial = oldctxt->attsSpecial;
13131
0
        ctxt->linenumbers = oldctxt->linenumbers;
13132
0
  ctxt->record_info = oldctxt->record_info;
13133
0
  ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
13134
0
  ctxt->node_seq.length = oldctxt->node_seq.length;
13135
0
  ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
13136
0
    } else {
13137
  /*
13138
   * Doing validity checking on chunk without context
13139
   * doesn't make sense
13140
   */
13141
0
  ctxt->_private = NULL;
13142
0
  ctxt->validate = 0;
13143
0
  ctxt->external = 2;
13144
0
  ctxt->loadsubset = 0;
13145
0
    }
13146
13147
0
    xmlParseContent(ctxt);
13148
13149
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13150
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13151
0
    } else if (RAW != 0) {
13152
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13153
0
    }
13154
0
    if (ctxt->node != newDoc->children) {
13155
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13156
0
    }
13157
13158
0
    if (!ctxt->wellFormed) {
13159
0
        if (ctxt->errNo == 0)
13160
0
      ret = XML_ERR_INTERNAL_ERROR;
13161
0
  else
13162
0
      ret = (xmlParserErrors)ctxt->errNo;
13163
0
    } else {
13164
0
  if (list != NULL) {
13165
0
      xmlNodePtr cur;
13166
13167
      /*
13168
       * Return the newly created nodeset after unlinking it from
13169
       * they pseudo parent.
13170
       */
13171
0
      cur = newDoc->children->children;
13172
0
      *list = cur;
13173
0
      while (cur != NULL) {
13174
0
    cur->parent = NULL;
13175
0
    cur = cur->next;
13176
0
      }
13177
0
            newDoc->children->children = NULL;
13178
0
  }
13179
0
  ret = XML_ERR_OK;
13180
0
    }
13181
13182
    /*
13183
     * Record in the parent context the number of entities replacement
13184
     * done when parsing that reference.
13185
     */
13186
0
    if (oldctxt != NULL)
13187
0
        oldctxt->nbentities += ctxt->nbentities;
13188
13189
    /*
13190
     * Also record the size of the entity parsed
13191
     */
13192
0
    if (ctxt->input != NULL && oldctxt != NULL) {
13193
0
  oldctxt->sizeentities += ctxt->input->consumed;
13194
0
  oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
13195
0
    }
13196
    /*
13197
     * And record the last error if any
13198
     */
13199
0
    if ((oldctxt != NULL) && (ctxt->lastError.code != XML_ERR_OK))
13200
0
        xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13201
13202
0
    if (sax != NULL)
13203
0
  ctxt->sax = oldsax;
13204
0
    if (oldctxt != NULL) {
13205
0
        ctxt->dict = NULL;
13206
0
        ctxt->attsDefault = NULL;
13207
0
        ctxt->attsSpecial = NULL;
13208
0
        oldctxt->validate = ctxt->validate;
13209
0
        oldctxt->valid = ctxt->valid;
13210
0
        oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13211
0
        oldctxt->node_seq.length = ctxt->node_seq.length;
13212
0
        oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13213
0
    }
13214
0
    ctxt->node_seq.maximum = 0;
13215
0
    ctxt->node_seq.length = 0;
13216
0
    ctxt->node_seq.buffer = NULL;
13217
0
    xmlFreeParserCtxt(ctxt);
13218
0
    newDoc->intSubset = NULL;
13219
0
    newDoc->extSubset = NULL;
13220
0
    xmlFreeDoc(newDoc);
13221
13222
0
    return(ret);
13223
0
}
13224
13225
#ifdef LIBXML_SAX1_ENABLED
13226
/**
13227
 * xmlParseExternalEntity:
13228
 * @doc:  the document the chunk pertains to
13229
 * @sax:  the SAX handler block (possibly NULL)
13230
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13231
 * @depth:  Used for loop detection, use 0
13232
 * @URL:  the URL for the entity to load
13233
 * @ID:  the System ID for the entity to load
13234
 * @lst:  the return value for the set of parsed nodes
13235
 *
13236
 * Parse an external general entity
13237
 * An external general parsed entity is well-formed if it matches the
13238
 * production labeled extParsedEnt.
13239
 *
13240
 * [78] extParsedEnt ::= TextDecl? content
13241
 *
13242
 * Returns 0 if the entity is well formed, -1 in case of args problem and
13243
 *    the parser error code otherwise
13244
 */
13245
13246
int
13247
xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
13248
0
    int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
13249
0
    return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
13250
0
                           ID, lst));
13251
0
}
13252
13253
/**
13254
 * xmlParseBalancedChunkMemory:
13255
 * @doc:  the document the chunk pertains to (must not be NULL)
13256
 * @sax:  the SAX handler block (possibly NULL)
13257
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13258
 * @depth:  Used for loop detection, use 0
13259
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13260
 * @lst:  the return value for the set of parsed nodes
13261
 *
13262
 * Parse a well-balanced chunk of an XML document
13263
 * called by the parser
13264
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13265
 * the content production in the XML grammar:
13266
 *
13267
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13268
 *
13269
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13270
 *    the parser error code otherwise
13271
 */
13272
13273
int
13274
xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13275
0
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
13276
0
    return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13277
0
                                                depth, string, lst, 0 );
13278
0
}
13279
#endif /* LIBXML_SAX1_ENABLED */
13280
13281
/**
13282
 * xmlParseBalancedChunkMemoryInternal:
13283
 * @oldctxt:  the existing parsing context
13284
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13285
 * @user_data:  the user data field for the parser context
13286
 * @lst:  the return value for the set of parsed nodes
13287
 *
13288
 *
13289
 * Parse a well-balanced chunk of an XML document
13290
 * called by the parser
13291
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13292
 * the content production in the XML grammar:
13293
 *
13294
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13295
 *
13296
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13297
 * error code otherwise
13298
 *
13299
 * In case recover is set to 1, the nodelist will not be empty even if
13300
 * the parsed chunk is not well balanced.
13301
 */
13302
static xmlParserErrors
13303
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13304
0
  const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13305
0
    xmlParserCtxtPtr ctxt;
13306
0
    xmlDocPtr newDoc = NULL;
13307
0
    xmlNodePtr newRoot;
13308
0
    xmlSAXHandlerPtr oldsax = NULL;
13309
0
    xmlNodePtr content = NULL;
13310
0
    xmlNodePtr last = NULL;
13311
0
    int size;
13312
0
    xmlParserErrors ret = XML_ERR_OK;
13313
0
#ifdef SAX2
13314
0
    int i;
13315
0
#endif
13316
13317
0
    if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13318
0
        (oldctxt->depth >  1024)) {
13319
0
  return(XML_ERR_ENTITY_LOOP);
13320
0
    }
13321
13322
13323
0
    if (lst != NULL)
13324
0
        *lst = NULL;
13325
0
    if (string == NULL)
13326
0
        return(XML_ERR_INTERNAL_ERROR);
13327
13328
0
    size = xmlStrlen(string);
13329
13330
0
    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13331
0
    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13332
0
    if (user_data != NULL)
13333
0
  ctxt->userData = user_data;
13334
0
    else
13335
0
  ctxt->userData = ctxt;
13336
0
    if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13337
0
    ctxt->dict = oldctxt->dict;
13338
0
    ctxt->input_id = oldctxt->input_id + 1;
13339
0
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13340
0
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13341
0
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13342
13343
0
#ifdef SAX2
13344
    /* propagate namespaces down the entity */
13345
0
    for (i = 0;i < oldctxt->nsNr;i += 2) {
13346
0
        nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13347
0
    }
13348
0
#endif
13349
13350
0
    oldsax = ctxt->sax;
13351
0
    ctxt->sax = oldctxt->sax;
13352
0
    xmlDetectSAX2(ctxt);
13353
0
    ctxt->replaceEntities = oldctxt->replaceEntities;
13354
0
    ctxt->options = oldctxt->options;
13355
13356
0
    ctxt->_private = oldctxt->_private;
13357
0
    if (oldctxt->myDoc == NULL) {
13358
0
  newDoc = xmlNewDoc(BAD_CAST "1.0");
13359
0
  if (newDoc == NULL) {
13360
0
      ctxt->sax = oldsax;
13361
0
      ctxt->dict = NULL;
13362
0
      xmlFreeParserCtxt(ctxt);
13363
0
      return(XML_ERR_INTERNAL_ERROR);
13364
0
  }
13365
0
  newDoc->properties = XML_DOC_INTERNAL;
13366
0
  newDoc->dict = ctxt->dict;
13367
0
  xmlDictReference(newDoc->dict);
13368
0
  ctxt->myDoc = newDoc;
13369
0
    } else {
13370
0
  ctxt->myDoc = oldctxt->myDoc;
13371
0
        content = ctxt->myDoc->children;
13372
0
  last = ctxt->myDoc->last;
13373
0
    }
13374
0
    newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13375
0
    if (newRoot == NULL) {
13376
0
  ctxt->sax = oldsax;
13377
0
  ctxt->dict = NULL;
13378
0
  xmlFreeParserCtxt(ctxt);
13379
0
  if (newDoc != NULL) {
13380
0
      xmlFreeDoc(newDoc);
13381
0
  }
13382
0
  return(XML_ERR_INTERNAL_ERROR);
13383
0
    }
13384
0
    ctxt->myDoc->children = NULL;
13385
0
    ctxt->myDoc->last = NULL;
13386
0
    xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13387
0
    nodePush(ctxt, ctxt->myDoc->children);
13388
0
    ctxt->instate = XML_PARSER_CONTENT;
13389
0
    ctxt->depth = oldctxt->depth + 1;
13390
13391
0
    ctxt->validate = 0;
13392
0
    ctxt->loadsubset = oldctxt->loadsubset;
13393
0
    if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13394
  /*
13395
   * ID/IDREF registration will be done in xmlValidateElement below
13396
   */
13397
0
  ctxt->loadsubset |= XML_SKIP_IDS;
13398
0
    }
13399
0
    ctxt->dictNames = oldctxt->dictNames;
13400
0
    ctxt->attsDefault = oldctxt->attsDefault;
13401
0
    ctxt->attsSpecial = oldctxt->attsSpecial;
13402
13403
0
    xmlParseContent(ctxt);
13404
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13405
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13406
0
    } else if (RAW != 0) {
13407
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13408
0
    }
13409
0
    if (ctxt->node != ctxt->myDoc->children) {
13410
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13411
0
    }
13412
13413
0
    if (!ctxt->wellFormed) {
13414
0
        if (ctxt->errNo == 0)
13415
0
      ret = XML_ERR_INTERNAL_ERROR;
13416
0
  else
13417
0
      ret = (xmlParserErrors)ctxt->errNo;
13418
0
    } else {
13419
0
      ret = XML_ERR_OK;
13420
0
    }
13421
13422
0
    if ((lst != NULL) && (ret == XML_ERR_OK)) {
13423
0
  xmlNodePtr cur;
13424
13425
  /*
13426
   * Return the newly created nodeset after unlinking it from
13427
   * they pseudo parent.
13428
   */
13429
0
  cur = ctxt->myDoc->children->children;
13430
0
  *lst = cur;
13431
0
  while (cur != NULL) {
13432
0
#ifdef LIBXML_VALID_ENABLED
13433
0
      if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13434
0
    (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13435
0
    (cur->type == XML_ELEMENT_NODE)) {
13436
0
    oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13437
0
      oldctxt->myDoc, cur);
13438
0
      }
13439
0
#endif /* LIBXML_VALID_ENABLED */
13440
0
      cur->parent = NULL;
13441
0
      cur = cur->next;
13442
0
  }
13443
0
  ctxt->myDoc->children->children = NULL;
13444
0
    }
13445
0
    if (ctxt->myDoc != NULL) {
13446
0
  xmlFreeNode(ctxt->myDoc->children);
13447
0
        ctxt->myDoc->children = content;
13448
0
        ctxt->myDoc->last = last;
13449
0
    }
13450
13451
    /*
13452
     * Record in the parent context the number of entities replacement
13453
     * done when parsing that reference.
13454
     */
13455
0
    if (oldctxt != NULL)
13456
0
        oldctxt->nbentities += ctxt->nbentities;
13457
13458
    /*
13459
     * Also record the last error if any
13460
     */
13461
0
    if (ctxt->lastError.code != XML_ERR_OK)
13462
0
        xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13463
13464
0
    ctxt->sax = oldsax;
13465
0
    ctxt->dict = NULL;
13466
0
    ctxt->attsDefault = NULL;
13467
0
    ctxt->attsSpecial = NULL;
13468
0
    xmlFreeParserCtxt(ctxt);
13469
0
    if (newDoc != NULL) {
13470
0
  xmlFreeDoc(newDoc);
13471
0
    }
13472
13473
0
    return(ret);
13474
0
}
13475
13476
/**
13477
 * xmlParseInNodeContext:
13478
 * @node:  the context node
13479
 * @data:  the input string
13480
 * @datalen:  the input string length in bytes
13481
 * @options:  a combination of xmlParserOption
13482
 * @lst:  the return value for the set of parsed nodes
13483
 *
13484
 * Parse a well-balanced chunk of an XML document
13485
 * within the context (DTD, namespaces, etc ...) of the given node.
13486
 *
13487
 * The allowed sequence for the data is a Well Balanced Chunk defined by
13488
 * the content production in the XML grammar:
13489
 *
13490
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13491
 *
13492
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13493
 * error code otherwise
13494
 */
13495
xmlParserErrors
13496
xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13497
0
                      int options, xmlNodePtr *lst) {
13498
0
#ifdef SAX2
13499
0
    xmlParserCtxtPtr ctxt;
13500
0
    xmlDocPtr doc = NULL;
13501
0
    xmlNodePtr fake, cur;
13502
0
    int nsnr = 0;
13503
13504
0
    xmlParserErrors ret = XML_ERR_OK;
13505
13506
    /*
13507
     * check all input parameters, grab the document
13508
     */
13509
0
    if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13510
0
        return(XML_ERR_INTERNAL_ERROR);
13511
0
    switch (node->type) {
13512
0
        case XML_ELEMENT_NODE:
13513
0
        case XML_ATTRIBUTE_NODE:
13514
0
        case XML_TEXT_NODE:
13515
0
        case XML_CDATA_SECTION_NODE:
13516
0
        case XML_ENTITY_REF_NODE:
13517
0
        case XML_PI_NODE:
13518
0
        case XML_COMMENT_NODE:
13519
0
        case XML_DOCUMENT_NODE:
13520
0
        case XML_HTML_DOCUMENT_NODE:
13521
0
      break;
13522
0
  default:
13523
0
      return(XML_ERR_INTERNAL_ERROR);
13524
13525
0
    }
13526
0
    while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13527
0
           (node->type != XML_DOCUMENT_NODE) &&
13528
0
     (node->type != XML_HTML_DOCUMENT_NODE))
13529
0
  node = node->parent;
13530
0
    if (node == NULL)
13531
0
  return(XML_ERR_INTERNAL_ERROR);
13532
0
    if (node->type == XML_ELEMENT_NODE)
13533
0
  doc = node->doc;
13534
0
    else
13535
0
        doc = (xmlDocPtr) node;
13536
0
    if (doc == NULL)
13537
0
  return(XML_ERR_INTERNAL_ERROR);
13538
13539
    /*
13540
     * allocate a context and set-up everything not related to the
13541
     * node position in the tree
13542
     */
13543
0
    if (doc->type == XML_DOCUMENT_NODE)
13544
0
  ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13545
0
#ifdef LIBXML_HTML_ENABLED
13546
0
    else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13547
0
  ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13548
        /*
13549
         * When parsing in context, it makes no sense to add implied
13550
         * elements like html/body/etc...
13551
         */
13552
0
        options |= HTML_PARSE_NOIMPLIED;
13553
0
    }
13554
0
#endif
13555
0
    else
13556
0
        return(XML_ERR_INTERNAL_ERROR);
13557
13558
0
    if (ctxt == NULL)
13559
0
        return(XML_ERR_NO_MEMORY);
13560
13561
    /*
13562
     * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13563
     * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13564
     * we must wait until the last moment to free the original one.
13565
     */
13566
0
    if (doc->dict != NULL) {
13567
0
        if (ctxt->dict != NULL)
13568
0
      xmlDictFree(ctxt->dict);
13569
0
  ctxt->dict = doc->dict;
13570
0
    } else
13571
0
        options |= XML_PARSE_NODICT;
13572
13573
0
    if (doc->encoding != NULL) {
13574
0
        xmlCharEncodingHandlerPtr hdlr;
13575
13576
0
        if (ctxt->encoding != NULL)
13577
0
      xmlFree((xmlChar *) ctxt->encoding);
13578
0
        ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13579
13580
0
        hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
13581
0
        if (hdlr != NULL) {
13582
0
            xmlSwitchToEncoding(ctxt, hdlr);
13583
0
  } else {
13584
0
            return(XML_ERR_UNSUPPORTED_ENCODING);
13585
0
        }
13586
0
    }
13587
13588
0
    xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13589
0
    xmlDetectSAX2(ctxt);
13590
0
    ctxt->myDoc = doc;
13591
    /* parsing in context, i.e. as within existing content */
13592
0
    ctxt->input_id = 2;
13593
0
    ctxt->instate = XML_PARSER_CONTENT;
13594
13595
0
    fake = xmlNewDocComment(node->doc, NULL);
13596
0
    if (fake == NULL) {
13597
0
        xmlFreeParserCtxt(ctxt);
13598
0
  return(XML_ERR_NO_MEMORY);
13599
0
    }
13600
0
    xmlAddChild(node, fake);
13601
13602
0
    if (node->type == XML_ELEMENT_NODE) {
13603
0
  nodePush(ctxt, node);
13604
  /*
13605
   * initialize the SAX2 namespaces stack
13606
   */
13607
0
  cur = node;
13608
0
  while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13609
0
      xmlNsPtr ns = cur->nsDef;
13610
0
      const xmlChar *iprefix, *ihref;
13611
13612
0
      while (ns != NULL) {
13613
0
    if (ctxt->dict) {
13614
0
        iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13615
0
        ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13616
0
    } else {
13617
0
        iprefix = ns->prefix;
13618
0
        ihref = ns->href;
13619
0
    }
13620
13621
0
          if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13622
0
        nsPush(ctxt, iprefix, ihref);
13623
0
        nsnr++;
13624
0
    }
13625
0
    ns = ns->next;
13626
0
      }
13627
0
      cur = cur->parent;
13628
0
  }
13629
0
    }
13630
13631
0
    if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13632
  /*
13633
   * ID/IDREF registration will be done in xmlValidateElement below
13634
   */
13635
0
  ctxt->loadsubset |= XML_SKIP_IDS;
13636
0
    }
13637
13638
0
#ifdef LIBXML_HTML_ENABLED
13639
0
    if (doc->type == XML_HTML_DOCUMENT_NODE)
13640
0
        __htmlParseContent(ctxt);
13641
0
    else
13642
0
#endif
13643
0
  xmlParseContent(ctxt);
13644
13645
0
    nsPop(ctxt, nsnr);
13646
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13647
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13648
0
    } else if (RAW != 0) {
13649
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13650
0
    }
13651
0
    if ((ctxt->node != NULL) && (ctxt->node != node)) {
13652
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13653
0
  ctxt->wellFormed = 0;
13654
0
    }
13655
13656
0
    if (!ctxt->wellFormed) {
13657
0
        if (ctxt->errNo == 0)
13658
0
      ret = XML_ERR_INTERNAL_ERROR;
13659
0
  else
13660
0
      ret = (xmlParserErrors)ctxt->errNo;
13661
0
    } else {
13662
0
        ret = XML_ERR_OK;
13663
0
    }
13664
13665
    /*
13666
     * Return the newly created nodeset after unlinking it from
13667
     * the pseudo sibling.
13668
     */
13669
13670
0
    cur = fake->next;
13671
0
    fake->next = NULL;
13672
0
    node->last = fake;
13673
13674
0
    if (cur != NULL) {
13675
0
  cur->prev = NULL;
13676
0
    }
13677
13678
0
    *lst = cur;
13679
13680
0
    while (cur != NULL) {
13681
0
  cur->parent = NULL;
13682
0
  cur = cur->next;
13683
0
    }
13684
13685
0
    xmlUnlinkNode(fake);
13686
0
    xmlFreeNode(fake);
13687
13688
13689
0
    if (ret != XML_ERR_OK) {
13690
0
        xmlFreeNodeList(*lst);
13691
0
  *lst = NULL;
13692
0
    }
13693
13694
0
    if (doc->dict != NULL)
13695
0
        ctxt->dict = NULL;
13696
0
    xmlFreeParserCtxt(ctxt);
13697
13698
0
    return(ret);
13699
#else /* !SAX2 */
13700
    return(XML_ERR_INTERNAL_ERROR);
13701
#endif
13702
0
}
13703
13704
#ifdef LIBXML_SAX1_ENABLED
13705
/**
13706
 * xmlParseBalancedChunkMemoryRecover:
13707
 * @doc:  the document the chunk pertains to (must not be NULL)
13708
 * @sax:  the SAX handler block (possibly NULL)
13709
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13710
 * @depth:  Used for loop detection, use 0
13711
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13712
 * @lst:  the return value for the set of parsed nodes
13713
 * @recover: return nodes even if the data is broken (use 0)
13714
 *
13715
 *
13716
 * Parse a well-balanced chunk of an XML document
13717
 * called by the parser
13718
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13719
 * the content production in the XML grammar:
13720
 *
13721
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13722
 *
13723
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13724
 *    the parser error code otherwise
13725
 *
13726
 * In case recover is set to 1, the nodelist will not be empty even if
13727
 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13728
 * some extent.
13729
 */
13730
int
13731
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13732
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13733
0
     int recover) {
13734
0
    xmlParserCtxtPtr ctxt;
13735
0
    xmlDocPtr newDoc;
13736
0
    xmlSAXHandlerPtr oldsax = NULL;
13737
0
    xmlNodePtr content, newRoot;
13738
0
    int size;
13739
0
    int ret = 0;
13740
13741
0
    if (depth > 40) {
13742
0
  return(XML_ERR_ENTITY_LOOP);
13743
0
    }
13744
13745
13746
0
    if (lst != NULL)
13747
0
        *lst = NULL;
13748
0
    if (string == NULL)
13749
0
        return(-1);
13750
13751
0
    size = xmlStrlen(string);
13752
13753
0
    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13754
0
    if (ctxt == NULL) return(-1);
13755
0
    ctxt->userData = ctxt;
13756
0
    if (sax != NULL) {
13757
0
  oldsax = ctxt->sax;
13758
0
        ctxt->sax = sax;
13759
0
  if (user_data != NULL)
13760
0
      ctxt->userData = user_data;
13761
0
    }
13762
0
    newDoc = xmlNewDoc(BAD_CAST "1.0");
13763
0
    if (newDoc == NULL) {
13764
0
  xmlFreeParserCtxt(ctxt);
13765
0
  return(-1);
13766
0
    }
13767
0
    newDoc->properties = XML_DOC_INTERNAL;
13768
0
    if ((doc != NULL) && (doc->dict != NULL)) {
13769
0
        xmlDictFree(ctxt->dict);
13770
0
  ctxt->dict = doc->dict;
13771
0
  xmlDictReference(ctxt->dict);
13772
0
  ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13773
0
  ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13774
0
  ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13775
0
  ctxt->dictNames = 1;
13776
0
    } else {
13777
0
  xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13778
0
    }
13779
    /* doc == NULL is only supported for historic reasons */
13780
0
    if (doc != NULL) {
13781
0
  newDoc->intSubset = doc->intSubset;
13782
0
  newDoc->extSubset = doc->extSubset;
13783
0
    }
13784
0
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13785
0
    if (newRoot == NULL) {
13786
0
  if (sax != NULL)
13787
0
      ctxt->sax = oldsax;
13788
0
  xmlFreeParserCtxt(ctxt);
13789
0
  newDoc->intSubset = NULL;
13790
0
  newDoc->extSubset = NULL;
13791
0
        xmlFreeDoc(newDoc);
13792
0
  return(-1);
13793
0
    }
13794
0
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
13795
0
    nodePush(ctxt, newRoot);
13796
    /* doc == NULL is only supported for historic reasons */
13797
0
    if (doc == NULL) {
13798
0
  ctxt->myDoc = newDoc;
13799
0
    } else {
13800
0
  ctxt->myDoc = newDoc;
13801
0
  newDoc->children->doc = doc;
13802
  /* Ensure that doc has XML spec namespace */
13803
0
  xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13804
0
  newDoc->oldNs = doc->oldNs;
13805
0
    }
13806
0
    ctxt->instate = XML_PARSER_CONTENT;
13807
0
    ctxt->input_id = 2;
13808
0
    ctxt->depth = depth;
13809
13810
    /*
13811
     * Doing validity checking on chunk doesn't make sense
13812
     */
13813
0
    ctxt->validate = 0;
13814
0
    ctxt->loadsubset = 0;
13815
0
    xmlDetectSAX2(ctxt);
13816
13817
0
    if ( doc != NULL ){
13818
0
        content = doc->children;
13819
0
        doc->children = NULL;
13820
0
        xmlParseContent(ctxt);
13821
0
        doc->children = content;
13822
0
    }
13823
0
    else {
13824
0
        xmlParseContent(ctxt);
13825
0
    }
13826
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13827
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13828
0
    } else if (RAW != 0) {
13829
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13830
0
    }
13831
0
    if (ctxt->node != newDoc->children) {
13832
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13833
0
    }
13834
13835
0
    if (!ctxt->wellFormed) {
13836
0
        if (ctxt->errNo == 0)
13837
0
      ret = 1;
13838
0
  else
13839
0
      ret = ctxt->errNo;
13840
0
    } else {
13841
0
      ret = 0;
13842
0
    }
13843
13844
0
    if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13845
0
  xmlNodePtr cur;
13846
13847
  /*
13848
   * Return the newly created nodeset after unlinking it from
13849
   * they pseudo parent.
13850
   */
13851
0
  cur = newDoc->children->children;
13852
0
  *lst = cur;
13853
0
  while (cur != NULL) {
13854
0
      xmlSetTreeDoc(cur, doc);
13855
0
      cur->parent = NULL;
13856
0
      cur = cur->next;
13857
0
  }
13858
0
  newDoc->children->children = NULL;
13859
0
    }
13860
13861
0
    if (sax != NULL)
13862
0
  ctxt->sax = oldsax;
13863
0
    xmlFreeParserCtxt(ctxt);
13864
0
    newDoc->intSubset = NULL;
13865
0
    newDoc->extSubset = NULL;
13866
    /* This leaks the namespace list if doc == NULL */
13867
0
    newDoc->oldNs = NULL;
13868
0
    xmlFreeDoc(newDoc);
13869
13870
0
    return(ret);
13871
0
}
13872
13873
/**
13874
 * xmlSAXParseEntity:
13875
 * @sax:  the SAX handler block
13876
 * @filename:  the filename
13877
 *
13878
 * parse an XML external entity out of context and build a tree.
13879
 * It use the given SAX function block to handle the parsing callback.
13880
 * If sax is NULL, fallback to the default DOM tree building routines.
13881
 *
13882
 * [78] extParsedEnt ::= TextDecl? content
13883
 *
13884
 * This correspond to a "Well Balanced" chunk
13885
 *
13886
 * Returns the resulting document tree
13887
 */
13888
13889
xmlDocPtr
13890
0
xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13891
0
    xmlDocPtr ret;
13892
0
    xmlParserCtxtPtr ctxt;
13893
13894
0
    ctxt = xmlCreateFileParserCtxt(filename);
13895
0
    if (ctxt == NULL) {
13896
0
  return(NULL);
13897
0
    }
13898
0
    if (sax != NULL) {
13899
0
  if (ctxt->sax != NULL)
13900
0
      xmlFree(ctxt->sax);
13901
0
        ctxt->sax = sax;
13902
0
        ctxt->userData = NULL;
13903
0
    }
13904
13905
0
    xmlParseExtParsedEnt(ctxt);
13906
13907
0
    if (ctxt->wellFormed)
13908
0
  ret = ctxt->myDoc;
13909
0
    else {
13910
0
        ret = NULL;
13911
0
        xmlFreeDoc(ctxt->myDoc);
13912
0
        ctxt->myDoc = NULL;
13913
0
    }
13914
0
    if (sax != NULL)
13915
0
        ctxt->sax = NULL;
13916
0
    xmlFreeParserCtxt(ctxt);
13917
13918
0
    return(ret);
13919
0
}
13920
13921
/**
13922
 * xmlParseEntity:
13923
 * @filename:  the filename
13924
 *
13925
 * parse an XML external entity out of context and build a tree.
13926
 *
13927
 * [78] extParsedEnt ::= TextDecl? content
13928
 *
13929
 * This correspond to a "Well Balanced" chunk
13930
 *
13931
 * Returns the resulting document tree
13932
 */
13933
13934
xmlDocPtr
13935
0
xmlParseEntity(const char *filename) {
13936
0
    return(xmlSAXParseEntity(NULL, filename));
13937
0
}
13938
#endif /* LIBXML_SAX1_ENABLED */
13939
13940
/**
13941
 * xmlCreateEntityParserCtxtInternal:
13942
 * @URL:  the entity URL
13943
 * @ID:  the entity PUBLIC ID
13944
 * @base:  a possible base for the target URI
13945
 * @pctx:  parser context used to set options on new context
13946
 *
13947
 * Create a parser context for an external entity
13948
 * Automatic support for ZLIB/Compress compressed document is provided
13949
 * by default if found at compile-time.
13950
 *
13951
 * Returns the new parser context or NULL
13952
 */
13953
static xmlParserCtxtPtr
13954
xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
13955
0
                    const xmlChar *base, xmlParserCtxtPtr pctx) {
13956
0
    xmlParserCtxtPtr ctxt;
13957
0
    xmlParserInputPtr inputStream;
13958
0
    char *directory = NULL;
13959
0
    xmlChar *uri;
13960
13961
0
    ctxt = xmlNewParserCtxt();
13962
0
    if (ctxt == NULL) {
13963
0
  return(NULL);
13964
0
    }
13965
13966
0
    if (pctx != NULL) {
13967
0
        ctxt->options = pctx->options;
13968
0
        ctxt->_private = pctx->_private;
13969
  /*
13970
   * this is a subparser of pctx, so the input_id should be
13971
   * incremented to distinguish from main entity
13972
   */
13973
0
  ctxt->input_id = pctx->input_id + 1;
13974
0
    }
13975
13976
    /* Don't read from stdin. */
13977
0
    if (xmlStrcmp(URL, BAD_CAST "-") == 0)
13978
0
        URL = BAD_CAST "./-";
13979
13980
0
    uri = xmlBuildURI(URL, base);
13981
13982
0
    if (uri == NULL) {
13983
0
  inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13984
0
  if (inputStream == NULL) {
13985
0
      xmlFreeParserCtxt(ctxt);
13986
0
      return(NULL);
13987
0
  }
13988
13989
0
  inputPush(ctxt, inputStream);
13990
13991
0
  if ((ctxt->directory == NULL) && (directory == NULL))
13992
0
      directory = xmlParserGetDirectory((char *)URL);
13993
0
  if ((ctxt->directory == NULL) && (directory != NULL))
13994
0
      ctxt->directory = directory;
13995
0
    } else {
13996
0
  inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13997
0
  if (inputStream == NULL) {
13998
0
      xmlFree(uri);
13999
0
      xmlFreeParserCtxt(ctxt);
14000
0
      return(NULL);
14001
0
  }
14002
14003
0
  inputPush(ctxt, inputStream);
14004
14005
0
  if ((ctxt->directory == NULL) && (directory == NULL))
14006
0
      directory = xmlParserGetDirectory((char *)uri);
14007
0
  if ((ctxt->directory == NULL) && (directory != NULL))
14008
0
      ctxt->directory = directory;
14009
0
  xmlFree(uri);
14010
0
    }
14011
0
    return(ctxt);
14012
0
}
14013
14014
/**
14015
 * xmlCreateEntityParserCtxt:
14016
 * @URL:  the entity URL
14017
 * @ID:  the entity PUBLIC ID
14018
 * @base:  a possible base for the target URI
14019
 *
14020
 * Create a parser context for an external entity
14021
 * Automatic support for ZLIB/Compress compressed document is provided
14022
 * by default if found at compile-time.
14023
 *
14024
 * Returns the new parser context or NULL
14025
 */
14026
xmlParserCtxtPtr
14027
xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
14028
0
                    const xmlChar *base) {
14029
0
    return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
14030
14031
0
}
14032
14033
/************************************************************************
14034
 *                  *
14035
 *    Front ends when parsing from a file     *
14036
 *                  *
14037
 ************************************************************************/
14038
14039
/**
14040
 * xmlCreateURLParserCtxt:
14041
 * @filename:  the filename or URL
14042
 * @options:  a combination of xmlParserOption
14043
 *
14044
 * Create a parser context for a file or URL content.
14045
 * Automatic support for ZLIB/Compress compressed document is provided
14046
 * by default if found at compile-time and for file accesses
14047
 *
14048
 * Returns the new parser context or NULL
14049
 */
14050
xmlParserCtxtPtr
14051
xmlCreateURLParserCtxt(const char *filename, int options)
14052
0
{
14053
0
    xmlParserCtxtPtr ctxt;
14054
0
    xmlParserInputPtr inputStream;
14055
0
    char *directory = NULL;
14056
14057
0
    ctxt = xmlNewParserCtxt();
14058
0
    if (ctxt == NULL) {
14059
0
  xmlErrMemory(NULL, "cannot allocate parser context");
14060
0
  return(NULL);
14061
0
    }
14062
14063
0
    if (options)
14064
0
  xmlCtxtUseOptionsInternal(ctxt, options, NULL);
14065
0
    ctxt->linenumbers = 1;
14066
14067
0
    inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
14068
0
    if (inputStream == NULL) {
14069
0
  xmlFreeParserCtxt(ctxt);
14070
0
  return(NULL);
14071
0
    }
14072
14073
0
    inputPush(ctxt, inputStream);
14074
0
    if ((ctxt->directory == NULL) && (directory == NULL))
14075
0
        directory = xmlParserGetDirectory(filename);
14076
0
    if ((ctxt->directory == NULL) && (directory != NULL))
14077
0
        ctxt->directory = directory;
14078
14079
0
    return(ctxt);
14080
0
}
14081
14082
/**
14083
 * xmlCreateFileParserCtxt:
14084
 * @filename:  the filename
14085
 *
14086
 * Create a parser context for a file content.
14087
 * Automatic support for ZLIB/Compress compressed document is provided
14088
 * by default if found at compile-time.
14089
 *
14090
 * Returns the new parser context or NULL
14091
 */
14092
xmlParserCtxtPtr
14093
xmlCreateFileParserCtxt(const char *filename)
14094
0
{
14095
0
    return(xmlCreateURLParserCtxt(filename, 0));
14096
0
}
14097
14098
#ifdef LIBXML_SAX1_ENABLED
14099
/**
14100
 * xmlSAXParseFileWithData:
14101
 * @sax:  the SAX handler block
14102
 * @filename:  the filename
14103
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14104
 *             documents
14105
 * @data:  the userdata
14106
 *
14107
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14108
 * compressed document is provided by default if found at compile-time.
14109
 * It use the given SAX function block to handle the parsing callback.
14110
 * If sax is NULL, fallback to the default DOM tree building routines.
14111
 *
14112
 * User data (void *) is stored within the parser context in the
14113
 * context's _private member, so it is available nearly everywhere in libxml
14114
 *
14115
 * Returns the resulting document tree
14116
 */
14117
14118
xmlDocPtr
14119
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
14120
0
                        int recovery, void *data) {
14121
0
    xmlDocPtr ret;
14122
0
    xmlParserCtxtPtr ctxt;
14123
14124
0
    xmlInitParser();
14125
14126
0
    ctxt = xmlCreateFileParserCtxt(filename);
14127
0
    if (ctxt == NULL) {
14128
0
  return(NULL);
14129
0
    }
14130
0
    if (sax != NULL) {
14131
0
  if (ctxt->sax != NULL)
14132
0
      xmlFree(ctxt->sax);
14133
0
        ctxt->sax = sax;
14134
0
    }
14135
0
    xmlDetectSAX2(ctxt);
14136
0
    if (data!=NULL) {
14137
0
  ctxt->_private = data;
14138
0
    }
14139
14140
0
    if (ctxt->directory == NULL)
14141
0
        ctxt->directory = xmlParserGetDirectory(filename);
14142
14143
0
    ctxt->recovery = recovery;
14144
14145
0
    xmlParseDocument(ctxt);
14146
14147
0
    if ((ctxt->wellFormed) || recovery) {
14148
0
        ret = ctxt->myDoc;
14149
0
  if ((ret != NULL) && (ctxt->input->buf != NULL)) {
14150
0
      if (ctxt->input->buf->compressed > 0)
14151
0
    ret->compression = 9;
14152
0
      else
14153
0
    ret->compression = ctxt->input->buf->compressed;
14154
0
  }
14155
0
    }
14156
0
    else {
14157
0
       ret = NULL;
14158
0
       xmlFreeDoc(ctxt->myDoc);
14159
0
       ctxt->myDoc = NULL;
14160
0
    }
14161
0
    if (sax != NULL)
14162
0
        ctxt->sax = NULL;
14163
0
    xmlFreeParserCtxt(ctxt);
14164
14165
0
    return(ret);
14166
0
}
14167
14168
/**
14169
 * xmlSAXParseFile:
14170
 * @sax:  the SAX handler block
14171
 * @filename:  the filename
14172
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14173
 *             documents
14174
 *
14175
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14176
 * compressed document is provided by default if found at compile-time.
14177
 * It use the given SAX function block to handle the parsing callback.
14178
 * If sax is NULL, fallback to the default DOM tree building routines.
14179
 *
14180
 * Returns the resulting document tree
14181
 */
14182
14183
xmlDocPtr
14184
xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14185
0
                          int recovery) {
14186
0
    return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14187
0
}
14188
14189
/**
14190
 * xmlRecoverDoc:
14191
 * @cur:  a pointer to an array of xmlChar
14192
 *
14193
 * parse an XML in-memory document and build a tree.
14194
 * In the case the document is not Well Formed, a attempt to build a
14195
 * tree is tried anyway
14196
 *
14197
 * Returns the resulting document tree or NULL in case of failure
14198
 */
14199
14200
xmlDocPtr
14201
0
xmlRecoverDoc(const xmlChar *cur) {
14202
0
    return(xmlSAXParseDoc(NULL, cur, 1));
14203
0
}
14204
14205
/**
14206
 * xmlParseFile:
14207
 * @filename:  the filename
14208
 *
14209
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14210
 * compressed document is provided by default if found at compile-time.
14211
 *
14212
 * Returns the resulting document tree if the file was wellformed,
14213
 * NULL otherwise.
14214
 */
14215
14216
xmlDocPtr
14217
0
xmlParseFile(const char *filename) {
14218
0
    return(xmlSAXParseFile(NULL, filename, 0));
14219
0
}
14220
14221
/**
14222
 * xmlRecoverFile:
14223
 * @filename:  the filename
14224
 *
14225
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14226
 * compressed document is provided by default if found at compile-time.
14227
 * In the case the document is not Well Formed, it attempts to build
14228
 * a tree anyway
14229
 *
14230
 * Returns the resulting document tree or NULL in case of failure
14231
 */
14232
14233
xmlDocPtr
14234
0
xmlRecoverFile(const char *filename) {
14235
0
    return(xmlSAXParseFile(NULL, filename, 1));
14236
0
}
14237
14238
14239
/**
14240
 * xmlSetupParserForBuffer:
14241
 * @ctxt:  an XML parser context
14242
 * @buffer:  a xmlChar * buffer
14243
 * @filename:  a file name
14244
 *
14245
 * Setup the parser context to parse a new buffer; Clears any prior
14246
 * contents from the parser context. The buffer parameter must not be
14247
 * NULL, but the filename parameter can be
14248
 */
14249
void
14250
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14251
                             const char* filename)
14252
0
{
14253
0
    xmlParserInputPtr input;
14254
14255
0
    if ((ctxt == NULL) || (buffer == NULL))
14256
0
        return;
14257
14258
0
    input = xmlNewInputStream(ctxt);
14259
0
    if (input == NULL) {
14260
0
        xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
14261
0
        xmlClearParserCtxt(ctxt);
14262
0
        return;
14263
0
    }
14264
14265
0
    xmlClearParserCtxt(ctxt);
14266
0
    if (filename != NULL)
14267
0
        input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
14268
0
    input->base = buffer;
14269
0
    input->cur = buffer;
14270
0
    input->end = &buffer[xmlStrlen(buffer)];
14271
0
    inputPush(ctxt, input);
14272
0
}
14273
14274
/**
14275
 * xmlSAXUserParseFile:
14276
 * @sax:  a SAX handler
14277
 * @user_data:  The user data returned on SAX callbacks
14278
 * @filename:  a file name
14279
 *
14280
 * parse an XML file and call the given SAX handler routines.
14281
 * Automatic support for ZLIB/Compress compressed document is provided
14282
 *
14283
 * Returns 0 in case of success or a error number otherwise
14284
 */
14285
int
14286
xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14287
0
                    const char *filename) {
14288
0
    int ret = 0;
14289
0
    xmlParserCtxtPtr ctxt;
14290
14291
0
    ctxt = xmlCreateFileParserCtxt(filename);
14292
0
    if (ctxt == NULL) return -1;
14293
0
    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14294
0
  xmlFree(ctxt->sax);
14295
0
    ctxt->sax = sax;
14296
0
    xmlDetectSAX2(ctxt);
14297
14298
0
    if (user_data != NULL)
14299
0
  ctxt->userData = user_data;
14300
14301
0
    xmlParseDocument(ctxt);
14302
14303
0
    if (ctxt->wellFormed)
14304
0
  ret = 0;
14305
0
    else {
14306
0
        if (ctxt->errNo != 0)
14307
0
      ret = ctxt->errNo;
14308
0
  else
14309
0
      ret = -1;
14310
0
    }
14311
0
    if (sax != NULL)
14312
0
  ctxt->sax = NULL;
14313
0
    if (ctxt->myDoc != NULL) {
14314
0
        xmlFreeDoc(ctxt->myDoc);
14315
0
  ctxt->myDoc = NULL;
14316
0
    }
14317
0
    xmlFreeParserCtxt(ctxt);
14318
14319
0
    return ret;
14320
0
}
14321
#endif /* LIBXML_SAX1_ENABLED */
14322
14323
/************************************************************************
14324
 *                  *
14325
 *    Front ends when parsing from memory     *
14326
 *                  *
14327
 ************************************************************************/
14328
14329
/**
14330
 * xmlCreateMemoryParserCtxt:
14331
 * @buffer:  a pointer to a char array
14332
 * @size:  the size of the array
14333
 *
14334
 * Create a parser context for an XML in-memory document.
14335
 *
14336
 * Returns the new parser context or NULL
14337
 */
14338
xmlParserCtxtPtr
14339
0
xmlCreateMemoryParserCtxt(const char *buffer, int size) {
14340
0
    xmlParserCtxtPtr ctxt;
14341
0
    xmlParserInputPtr input;
14342
0
    xmlParserInputBufferPtr buf;
14343
14344
0
    if (buffer == NULL)
14345
0
  return(NULL);
14346
0
    if (size <= 0)
14347
0
  return(NULL);
14348
14349
0
    ctxt = xmlNewParserCtxt();
14350
0
    if (ctxt == NULL)
14351
0
  return(NULL);
14352
14353
    /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
14354
0
    buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14355
0
    if (buf == NULL) {
14356
0
  xmlFreeParserCtxt(ctxt);
14357
0
  return(NULL);
14358
0
    }
14359
14360
0
    input = xmlNewInputStream(ctxt);
14361
0
    if (input == NULL) {
14362
0
  xmlFreeParserInputBuffer(buf);
14363
0
  xmlFreeParserCtxt(ctxt);
14364
0
  return(NULL);
14365
0
    }
14366
14367
0
    input->filename = NULL;
14368
0
    input->buf = buf;
14369
0
    xmlBufResetInput(input->buf->buffer, input);
14370
14371
0
    inputPush(ctxt, input);
14372
0
    return(ctxt);
14373
0
}
14374
14375
#ifdef LIBXML_SAX1_ENABLED
14376
/**
14377
 * xmlSAXParseMemoryWithData:
14378
 * @sax:  the SAX handler block
14379
 * @buffer:  an pointer to a char array
14380
 * @size:  the size of the array
14381
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14382
 *             documents
14383
 * @data:  the userdata
14384
 *
14385
 * parse an XML in-memory block and use the given SAX function block
14386
 * to handle the parsing callback. If sax is NULL, fallback to the default
14387
 * DOM tree building routines.
14388
 *
14389
 * User data (void *) is stored within the parser context in the
14390
 * context's _private member, so it is available nearly everywhere in libxml
14391
 *
14392
 * Returns the resulting document tree
14393
 */
14394
14395
xmlDocPtr
14396
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14397
0
            int size, int recovery, void *data) {
14398
0
    xmlDocPtr ret;
14399
0
    xmlParserCtxtPtr ctxt;
14400
14401
0
    xmlInitParser();
14402
14403
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14404
0
    if (ctxt == NULL) return(NULL);
14405
0
    if (sax != NULL) {
14406
0
  if (ctxt->sax != NULL)
14407
0
      xmlFree(ctxt->sax);
14408
0
        ctxt->sax = sax;
14409
0
    }
14410
0
    xmlDetectSAX2(ctxt);
14411
0
    if (data!=NULL) {
14412
0
  ctxt->_private=data;
14413
0
    }
14414
14415
0
    ctxt->recovery = recovery;
14416
14417
0
    xmlParseDocument(ctxt);
14418
14419
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14420
0
    else {
14421
0
       ret = NULL;
14422
0
       xmlFreeDoc(ctxt->myDoc);
14423
0
       ctxt->myDoc = NULL;
14424
0
    }
14425
0
    if (sax != NULL)
14426
0
  ctxt->sax = NULL;
14427
0
    xmlFreeParserCtxt(ctxt);
14428
14429
0
    return(ret);
14430
0
}
14431
14432
/**
14433
 * xmlSAXParseMemory:
14434
 * @sax:  the SAX handler block
14435
 * @buffer:  an pointer to a char array
14436
 * @size:  the size of the array
14437
 * @recovery:  work in recovery mode, i.e. tries to read not Well Formed
14438
 *             documents
14439
 *
14440
 * parse an XML in-memory block and use the given SAX function block
14441
 * to handle the parsing callback. If sax is NULL, fallback to the default
14442
 * DOM tree building routines.
14443
 *
14444
 * Returns the resulting document tree
14445
 */
14446
xmlDocPtr
14447
xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14448
0
            int size, int recovery) {
14449
0
    return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14450
0
}
14451
14452
/**
14453
 * xmlParseMemory:
14454
 * @buffer:  an pointer to a char array
14455
 * @size:  the size of the array
14456
 *
14457
 * parse an XML in-memory block and build a tree.
14458
 *
14459
 * Returns the resulting document tree
14460
 */
14461
14462
0
xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14463
0
   return(xmlSAXParseMemory(NULL, buffer, size, 0));
14464
0
}
14465
14466
/**
14467
 * xmlRecoverMemory:
14468
 * @buffer:  an pointer to a char array
14469
 * @size:  the size of the array
14470
 *
14471
 * parse an XML in-memory block and build a tree.
14472
 * In the case the document is not Well Formed, an attempt to
14473
 * build a tree is tried anyway
14474
 *
14475
 * Returns the resulting document tree or NULL in case of error
14476
 */
14477
14478
0
xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14479
0
   return(xmlSAXParseMemory(NULL, buffer, size, 1));
14480
0
}
14481
14482
/**
14483
 * xmlSAXUserParseMemory:
14484
 * @sax:  a SAX handler
14485
 * @user_data:  The user data returned on SAX callbacks
14486
 * @buffer:  an in-memory XML document input
14487
 * @size:  the length of the XML document in bytes
14488
 *
14489
 * A better SAX parsing routine.
14490
 * parse an XML in-memory buffer and call the given SAX handler routines.
14491
 *
14492
 * Returns 0 in case of success or a error number otherwise
14493
 */
14494
int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14495
0
        const char *buffer, int size) {
14496
0
    int ret = 0;
14497
0
    xmlParserCtxtPtr ctxt;
14498
14499
0
    xmlInitParser();
14500
14501
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14502
0
    if (ctxt == NULL) return -1;
14503
0
    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14504
0
        xmlFree(ctxt->sax);
14505
0
    ctxt->sax = sax;
14506
0
    xmlDetectSAX2(ctxt);
14507
14508
0
    if (user_data != NULL)
14509
0
  ctxt->userData = user_data;
14510
14511
0
    xmlParseDocument(ctxt);
14512
14513
0
    if (ctxt->wellFormed)
14514
0
  ret = 0;
14515
0
    else {
14516
0
        if (ctxt->errNo != 0)
14517
0
      ret = ctxt->errNo;
14518
0
  else
14519
0
      ret = -1;
14520
0
    }
14521
0
    if (sax != NULL)
14522
0
        ctxt->sax = NULL;
14523
0
    if (ctxt->myDoc != NULL) {
14524
0
        xmlFreeDoc(ctxt->myDoc);
14525
0
  ctxt->myDoc = NULL;
14526
0
    }
14527
0
    xmlFreeParserCtxt(ctxt);
14528
14529
0
    return ret;
14530
0
}
14531
#endif /* LIBXML_SAX1_ENABLED */
14532
14533
/**
14534
 * xmlCreateDocParserCtxt:
14535
 * @cur:  a pointer to an array of xmlChar
14536
 *
14537
 * Creates a parser context for an XML in-memory document.
14538
 *
14539
 * Returns the new parser context or NULL
14540
 */
14541
xmlParserCtxtPtr
14542
0
xmlCreateDocParserCtxt(const xmlChar *cur) {
14543
0
    int len;
14544
14545
0
    if (cur == NULL)
14546
0
  return(NULL);
14547
0
    len = xmlStrlen(cur);
14548
0
    return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14549
0
}
14550
14551
#ifdef LIBXML_SAX1_ENABLED
14552
/**
14553
 * xmlSAXParseDoc:
14554
 * @sax:  the SAX handler block
14555
 * @cur:  a pointer to an array of xmlChar
14556
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14557
 *             documents
14558
 *
14559
 * parse an XML in-memory document and build a tree.
14560
 * It use the given SAX function block to handle the parsing callback.
14561
 * If sax is NULL, fallback to the default DOM tree building routines.
14562
 *
14563
 * Returns the resulting document tree
14564
 */
14565
14566
xmlDocPtr
14567
0
xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14568
0
    xmlDocPtr ret;
14569
0
    xmlParserCtxtPtr ctxt;
14570
0
    xmlSAXHandlerPtr oldsax = NULL;
14571
14572
0
    if (cur == NULL) return(NULL);
14573
14574
14575
0
    ctxt = xmlCreateDocParserCtxt(cur);
14576
0
    if (ctxt == NULL) return(NULL);
14577
0
    if (sax != NULL) {
14578
0
        oldsax = ctxt->sax;
14579
0
        ctxt->sax = sax;
14580
0
        ctxt->userData = NULL;
14581
0
    }
14582
0
    xmlDetectSAX2(ctxt);
14583
14584
0
    xmlParseDocument(ctxt);
14585
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14586
0
    else {
14587
0
       ret = NULL;
14588
0
       xmlFreeDoc(ctxt->myDoc);
14589
0
       ctxt->myDoc = NULL;
14590
0
    }
14591
0
    if (sax != NULL)
14592
0
  ctxt->sax = oldsax;
14593
0
    xmlFreeParserCtxt(ctxt);
14594
14595
0
    return(ret);
14596
0
}
14597
14598
/**
14599
 * xmlParseDoc:
14600
 * @cur:  a pointer to an array of xmlChar
14601
 *
14602
 * parse an XML in-memory document and build a tree.
14603
 *
14604
 * Returns the resulting document tree
14605
 */
14606
14607
xmlDocPtr
14608
0
xmlParseDoc(const xmlChar *cur) {
14609
0
    return(xmlSAXParseDoc(NULL, cur, 0));
14610
0
}
14611
#endif /* LIBXML_SAX1_ENABLED */
14612
14613
#ifdef LIBXML_LEGACY_ENABLED
14614
/************************************************************************
14615
 *                  *
14616
 *  Specific function to keep track of entities references    *
14617
 *  and used by the XSLT debugger         *
14618
 *                  *
14619
 ************************************************************************/
14620
14621
static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14622
14623
/**
14624
 * xmlAddEntityReference:
14625
 * @ent : A valid entity
14626
 * @firstNode : A valid first node for children of entity
14627
 * @lastNode : A valid last node of children entity
14628
 *
14629
 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14630
 */
14631
static void
14632
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14633
                      xmlNodePtr lastNode)
14634
{
14635
    if (xmlEntityRefFunc != NULL) {
14636
        (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14637
    }
14638
}
14639
14640
14641
/**
14642
 * xmlSetEntityReferenceFunc:
14643
 * @func: A valid function
14644
 *
14645
 * Set the function to call call back when a xml reference has been made
14646
 */
14647
void
14648
xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14649
{
14650
    xmlEntityRefFunc = func;
14651
}
14652
#endif /* LIBXML_LEGACY_ENABLED */
14653
14654
/************************************************************************
14655
 *                  *
14656
 *        Miscellaneous       *
14657
 *                  *
14658
 ************************************************************************/
14659
14660
#ifdef LIBXML_XPATH_ENABLED
14661
#include <libxml/xpath.h>
14662
#endif
14663
14664
extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
14665
static int xmlParserInitialized = 0;
14666
14667
/**
14668
 * xmlInitParser:
14669
 *
14670
 * Initialization function for the XML parser.
14671
 * This is not reentrant. Call once before processing in case of
14672
 * use in multithreaded programs.
14673
 */
14674
14675
void
14676
36.6k
xmlInitParser(void) {
14677
36.6k
    if (xmlParserInitialized != 0)
14678
36.6k
  return;
14679
14680
#if defined(_WIN32) && (!defined(LIBXML_STATIC) || defined(LIBXML_STATIC_FOR_DLL))
14681
    if (xmlFree == free)
14682
        atexit(xmlCleanupParser);
14683
#endif
14684
14685
1
#ifdef LIBXML_THREAD_ENABLED
14686
1
    __xmlGlobalInitMutexLock();
14687
1
    if (xmlParserInitialized == 0) {
14688
1
#endif
14689
1
  xmlInitThreads();
14690
1
  xmlInitGlobals();
14691
1
  if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14692
1
      (xmlGenericError == NULL))
14693
1
      initGenericErrorDefaultFunc(NULL);
14694
1
  xmlInitMemory();
14695
1
        xmlInitializeDict();
14696
1
  xmlInitCharEncodingHandlers();
14697
1
  xmlDefaultSAXHandlerInit();
14698
1
  xmlRegisterDefaultInputCallbacks();
14699
1
#ifdef LIBXML_OUTPUT_ENABLED
14700
1
  xmlRegisterDefaultOutputCallbacks();
14701
1
#endif /* LIBXML_OUTPUT_ENABLED */
14702
1
#ifdef LIBXML_HTML_ENABLED
14703
1
  htmlInitAutoClose();
14704
1
  htmlDefaultSAXHandlerInit();
14705
1
#endif
14706
1
#ifdef LIBXML_XPATH_ENABLED
14707
1
  xmlXPathInit();
14708
1
#endif
14709
1
  xmlParserInitialized = 1;
14710
1
#ifdef LIBXML_THREAD_ENABLED
14711
1
    }
14712
1
    __xmlGlobalInitMutexUnlock();
14713
1
#endif
14714
1
}
14715
14716
/**
14717
 * xmlCleanupParser:
14718
 *
14719
 * This function name is somewhat misleading. It does not clean up
14720
 * parser state, it cleans up memory allocated by the library itself.
14721
 * It is a cleanup function for the XML library. It tries to reclaim all
14722
 * related global memory allocated for the library processing.
14723
 * It doesn't deallocate any document related memory. One should
14724
 * call xmlCleanupParser() only when the process has finished using
14725
 * the library and all XML/HTML documents built with it.
14726
 * See also xmlInitParser() which has the opposite function of preparing
14727
 * the library for operations.
14728
 *
14729
 * WARNING: if your application is multithreaded or has plugin support
14730
 *          calling this may crash the application if another thread or
14731
 *          a plugin is still using libxml2. It's sometimes very hard to
14732
 *          guess if libxml2 is in use in the application, some libraries
14733
 *          or plugins may use it without notice. In case of doubt abstain
14734
 *          from calling this function or do it just before calling exit()
14735
 *          to avoid leak reports from valgrind !
14736
 */
14737
14738
void
14739
0
xmlCleanupParser(void) {
14740
0
    if (!xmlParserInitialized)
14741
0
  return;
14742
14743
0
    xmlCleanupCharEncodingHandlers();
14744
0
#ifdef LIBXML_CATALOG_ENABLED
14745
0
    xmlCatalogCleanup();
14746
0
#endif
14747
0
    xmlDictCleanup();
14748
0
    xmlCleanupInputCallbacks();
14749
0
#ifdef LIBXML_OUTPUT_ENABLED
14750
0
    xmlCleanupOutputCallbacks();
14751
0
#endif
14752
0
#ifdef LIBXML_SCHEMAS_ENABLED
14753
0
    xmlSchemaCleanupTypes();
14754
0
    xmlRelaxNGCleanupTypes();
14755
0
#endif
14756
0
    xmlCleanupGlobals();
14757
0
    xmlCleanupThreads(); /* must be last if called not from the main thread */
14758
0
    xmlCleanupMemory();
14759
0
    xmlParserInitialized = 0;
14760
0
}
14761
14762
#if defined(HAVE_ATTRIBUTE_DESTRUCTOR) && !defined(LIBXML_STATIC) && \
14763
    !defined(_WIN32)
14764
static void
14765
ATTRIBUTE_DESTRUCTOR
14766
xmlDestructor(void) {
14767
    /*
14768
     * Calling custom deallocation functions in a destructor can cause
14769
     * problems, for example with Nokogiri.
14770
     */
14771
    if (xmlFree == free)
14772
        xmlCleanupParser();
14773
}
14774
#endif
14775
14776
/************************************************************************
14777
 *                  *
14778
 *  New set (2.6.0) of simpler and more flexible APIs   *
14779
 *                  *
14780
 ************************************************************************/
14781
14782
/**
14783
 * DICT_FREE:
14784
 * @str:  a string
14785
 *
14786
 * Free a string if it is not owned by the "dict" dictionary in the
14787
 * current scope
14788
 */
14789
#define DICT_FREE(str)            \
14790
0
  if ((str) && ((!dict) ||       \
14791
0
      (xmlDictOwns(dict, (const xmlChar *)(str)) == 0)))  \
14792
0
      xmlFree((char *)(str));
14793
14794
/**
14795
 * xmlCtxtReset:
14796
 * @ctxt: an XML parser context
14797
 *
14798
 * Reset a parser context
14799
 */
14800
void
14801
xmlCtxtReset(xmlParserCtxtPtr ctxt)
14802
0
{
14803
0
    xmlParserInputPtr input;
14804
0
    xmlDictPtr dict;
14805
14806
0
    if (ctxt == NULL)
14807
0
        return;
14808
14809
0
    dict = ctxt->dict;
14810
14811
0
    while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14812
0
        xmlFreeInputStream(input);
14813
0
    }
14814
0
    ctxt->inputNr = 0;
14815
0
    ctxt->input = NULL;
14816
14817
0
    ctxt->spaceNr = 0;
14818
0
    if (ctxt->spaceTab != NULL) {
14819
0
  ctxt->spaceTab[0] = -1;
14820
0
  ctxt->space = &ctxt->spaceTab[0];
14821
0
    } else {
14822
0
        ctxt->space = NULL;
14823
0
    }
14824
14825
14826
0
    ctxt->nodeNr = 0;
14827
0
    ctxt->node = NULL;
14828
14829
0
    ctxt->nameNr = 0;
14830
0
    ctxt->name = NULL;
14831
14832
0
    ctxt->nsNr = 0;
14833
14834
0
    DICT_FREE(ctxt->version);
14835
0
    ctxt->version = NULL;
14836
0
    DICT_FREE(ctxt->encoding);
14837
0
    ctxt->encoding = NULL;
14838
0
    DICT_FREE(ctxt->directory);
14839
0
    ctxt->directory = NULL;
14840
0
    DICT_FREE(ctxt->extSubURI);
14841
0
    ctxt->extSubURI = NULL;
14842
0
    DICT_FREE(ctxt->extSubSystem);
14843
0
    ctxt->extSubSystem = NULL;
14844
0
    if (ctxt->myDoc != NULL)
14845
0
        xmlFreeDoc(ctxt->myDoc);
14846
0
    ctxt->myDoc = NULL;
14847
14848
0
    ctxt->standalone = -1;
14849
0
    ctxt->hasExternalSubset = 0;
14850
0
    ctxt->hasPErefs = 0;
14851
0
    ctxt->html = 0;
14852
0
    ctxt->external = 0;
14853
0
    ctxt->instate = XML_PARSER_START;
14854
0
    ctxt->token = 0;
14855
14856
0
    ctxt->wellFormed = 1;
14857
0
    ctxt->nsWellFormed = 1;
14858
0
    ctxt->disableSAX = 0;
14859
0
    ctxt->valid = 1;
14860
#if 0
14861
    ctxt->vctxt.userData = ctxt;
14862
    ctxt->vctxt.error = xmlParserValidityError;
14863
    ctxt->vctxt.warning = xmlParserValidityWarning;
14864
#endif
14865
0
    ctxt->record_info = 0;
14866
0
    ctxt->checkIndex = 0;
14867
0
    ctxt->inSubset = 0;
14868
0
    ctxt->errNo = XML_ERR_OK;
14869
0
    ctxt->depth = 0;
14870
0
    ctxt->charset = XML_CHAR_ENCODING_UTF8;
14871
0
    ctxt->catalogs = NULL;
14872
0
    ctxt->nbentities = 0;
14873
0
    ctxt->sizeentities = 0;
14874
0
    ctxt->sizeentcopy = 0;
14875
0
    xmlInitNodeInfoSeq(&ctxt->node_seq);
14876
14877
0
    if (ctxt->attsDefault != NULL) {
14878
0
        xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
14879
0
        ctxt->attsDefault = NULL;
14880
0
    }
14881
0
    if (ctxt->attsSpecial != NULL) {
14882
0
        xmlHashFree(ctxt->attsSpecial, NULL);
14883
0
        ctxt->attsSpecial = NULL;
14884
0
    }
14885
14886
0
#ifdef LIBXML_CATALOG_ENABLED
14887
0
    if (ctxt->catalogs != NULL)
14888
0
  xmlCatalogFreeLocal(ctxt->catalogs);
14889
0
#endif
14890
0
    if (ctxt->lastError.code != XML_ERR_OK)
14891
0
        xmlResetError(&ctxt->lastError);
14892
0
}
14893
14894
/**
14895
 * xmlCtxtResetPush:
14896
 * @ctxt: an XML parser context
14897
 * @chunk:  a pointer to an array of chars
14898
 * @size:  number of chars in the array
14899
 * @filename:  an optional file name or URI
14900
 * @encoding:  the document encoding, or NULL
14901
 *
14902
 * Reset a push parser context
14903
 *
14904
 * Returns 0 in case of success and 1 in case of error
14905
 */
14906
int
14907
xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14908
                 int size, const char *filename, const char *encoding)
14909
0
{
14910
0
    xmlParserInputPtr inputStream;
14911
0
    xmlParserInputBufferPtr buf;
14912
0
    xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14913
14914
0
    if (ctxt == NULL)
14915
0
        return(1);
14916
14917
0
    if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14918
0
        enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14919
14920
0
    buf = xmlAllocParserInputBuffer(enc);
14921
0
    if (buf == NULL)
14922
0
        return(1);
14923
14924
0
    if (ctxt == NULL) {
14925
0
        xmlFreeParserInputBuffer(buf);
14926
0
        return(1);
14927
0
    }
14928
14929
0
    xmlCtxtReset(ctxt);
14930
14931
0
    if (filename == NULL) {
14932
0
        ctxt->directory = NULL;
14933
0
    } else {
14934
0
        ctxt->directory = xmlParserGetDirectory(filename);
14935
0
    }
14936
14937
0
    inputStream = xmlNewInputStream(ctxt);
14938
0
    if (inputStream == NULL) {
14939
0
        xmlFreeParserInputBuffer(buf);
14940
0
        return(1);
14941
0
    }
14942
14943
0
    if (filename == NULL)
14944
0
        inputStream->filename = NULL;
14945
0
    else
14946
0
        inputStream->filename = (char *)
14947
0
            xmlCanonicPath((const xmlChar *) filename);
14948
0
    inputStream->buf = buf;
14949
0
    xmlBufResetInput(buf->buffer, inputStream);
14950
14951
0
    inputPush(ctxt, inputStream);
14952
14953
0
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14954
0
        (ctxt->input->buf != NULL)) {
14955
0
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14956
0
        size_t cur = ctxt->input->cur - ctxt->input->base;
14957
14958
0
        xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14959
14960
0
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
14961
#ifdef DEBUG_PUSH
14962
        xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14963
#endif
14964
0
    }
14965
14966
0
    if (encoding != NULL) {
14967
0
        xmlCharEncodingHandlerPtr hdlr;
14968
14969
0
        if (ctxt->encoding != NULL)
14970
0
      xmlFree((xmlChar *) ctxt->encoding);
14971
0
        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14972
14973
0
        hdlr = xmlFindCharEncodingHandler(encoding);
14974
0
        if (hdlr != NULL) {
14975
0
            xmlSwitchToEncoding(ctxt, hdlr);
14976
0
  } else {
14977
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14978
0
            "Unsupported encoding %s\n", BAD_CAST encoding);
14979
0
        }
14980
0
    } else if (enc != XML_CHAR_ENCODING_NONE) {
14981
0
        xmlSwitchEncoding(ctxt, enc);
14982
0
    }
14983
14984
0
    return(0);
14985
0
}
14986
14987
14988
/**
14989
 * xmlCtxtUseOptionsInternal:
14990
 * @ctxt: an XML parser context
14991
 * @options:  a combination of xmlParserOption
14992
 * @encoding:  the user provided encoding to use
14993
 *
14994
 * Applies the options to the parser context
14995
 *
14996
 * Returns 0 in case of success, the set of unknown or unimplemented options
14997
 *         in case of error.
14998
 */
14999
static int
15000
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
15001
18.3k
{
15002
18.3k
    if (ctxt == NULL)
15003
0
        return(-1);
15004
18.3k
    if (encoding != NULL) {
15005
0
        if (ctxt->encoding != NULL)
15006
0
      xmlFree((xmlChar *) ctxt->encoding);
15007
0
        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15008
0
    }
15009
18.3k
    if (options & XML_PARSE_RECOVER) {
15010
0
        ctxt->recovery = 1;
15011
0
        options -= XML_PARSE_RECOVER;
15012
0
  ctxt->options |= XML_PARSE_RECOVER;
15013
0
    } else
15014
18.3k
        ctxt->recovery = 0;
15015
18.3k
    if (options & XML_PARSE_DTDLOAD) {
15016
0
        ctxt->loadsubset = XML_DETECT_IDS;
15017
0
        options -= XML_PARSE_DTDLOAD;
15018
0
  ctxt->options |= XML_PARSE_DTDLOAD;
15019
0
    } else
15020
18.3k
        ctxt->loadsubset = 0;
15021
18.3k
    if (options & XML_PARSE_DTDATTR) {
15022
0
        ctxt->loadsubset |= XML_COMPLETE_ATTRS;
15023
0
        options -= XML_PARSE_DTDATTR;
15024
0
  ctxt->options |= XML_PARSE_DTDATTR;
15025
0
    }
15026
18.3k
    if (options & XML_PARSE_NOENT) {
15027
0
        ctxt->replaceEntities = 1;
15028
        /* ctxt->loadsubset |= XML_DETECT_IDS; */
15029
0
        options -= XML_PARSE_NOENT;
15030
0
  ctxt->options |= XML_PARSE_NOENT;
15031
0
    } else
15032
18.3k
        ctxt->replaceEntities = 0;
15033
18.3k
    if (options & XML_PARSE_PEDANTIC) {
15034
0
        ctxt->pedantic = 1;
15035
0
        options -= XML_PARSE_PEDANTIC;
15036
0
  ctxt->options |= XML_PARSE_PEDANTIC;
15037
0
    } else
15038
18.3k
        ctxt->pedantic = 0;
15039
18.3k
    if (options & XML_PARSE_NOBLANKS) {
15040
18.3k
        ctxt->keepBlanks = 0;
15041
18.3k
        ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
15042
18.3k
        options -= XML_PARSE_NOBLANKS;
15043
18.3k
  ctxt->options |= XML_PARSE_NOBLANKS;
15044
18.3k
    } else
15045
0
        ctxt->keepBlanks = 1;
15046
18.3k
    if (options & XML_PARSE_DTDVALID) {
15047
0
        ctxt->validate = 1;
15048
0
        if (options & XML_PARSE_NOWARNING)
15049
0
            ctxt->vctxt.warning = NULL;
15050
0
        if (options & XML_PARSE_NOERROR)
15051
0
            ctxt->vctxt.error = NULL;
15052
0
        options -= XML_PARSE_DTDVALID;
15053
0
  ctxt->options |= XML_PARSE_DTDVALID;
15054
0
    } else
15055
18.3k
        ctxt->validate = 0;
15056
18.3k
    if (options & XML_PARSE_NOWARNING) {
15057
0
        ctxt->sax->warning = NULL;
15058
0
        options -= XML_PARSE_NOWARNING;
15059
0
    }
15060
18.3k
    if (options & XML_PARSE_NOERROR) {
15061
0
        ctxt->sax->error = NULL;
15062
0
        ctxt->sax->fatalError = NULL;
15063
0
        options -= XML_PARSE_NOERROR;
15064
0
    }
15065
18.3k
#ifdef LIBXML_SAX1_ENABLED
15066
18.3k
    if (options & XML_PARSE_SAX1) {
15067
0
        ctxt->sax->startElement = xmlSAX2StartElement;
15068
0
        ctxt->sax->endElement = xmlSAX2EndElement;
15069
0
        ctxt->sax->startElementNs = NULL;
15070
0
        ctxt->sax->endElementNs = NULL;
15071
0
        ctxt->sax->initialized = 1;
15072
0
        options -= XML_PARSE_SAX1;
15073
0
  ctxt->options |= XML_PARSE_SAX1;
15074
0
    }
15075
18.3k
#endif /* LIBXML_SAX1_ENABLED */
15076
18.3k
    if (options & XML_PARSE_NODICT) {
15077
0
        ctxt->dictNames = 0;
15078
0
        options -= XML_PARSE_NODICT;
15079
0
  ctxt->options |= XML_PARSE_NODICT;
15080
18.3k
    } else {
15081
18.3k
        ctxt->dictNames = 1;
15082
18.3k
    }
15083
18.3k
    if (options & XML_PARSE_NOCDATA) {
15084
18.3k
        ctxt->sax->cdataBlock = NULL;
15085
18.3k
        options -= XML_PARSE_NOCDATA;
15086
18.3k
  ctxt->options |= XML_PARSE_NOCDATA;
15087
18.3k
    }
15088
18.3k
    if (options & XML_PARSE_NSCLEAN) {
15089
18.3k
  ctxt->options |= XML_PARSE_NSCLEAN;
15090
18.3k
        options -= XML_PARSE_NSCLEAN;
15091
18.3k
    }
15092
18.3k
    if (options & XML_PARSE_NONET) {
15093
18.3k
  ctxt->options |= XML_PARSE_NONET;
15094
18.3k
        options -= XML_PARSE_NONET;
15095
18.3k
    }
15096
18.3k
    if (options & XML_PARSE_COMPACT) {
15097
0
  ctxt->options |= XML_PARSE_COMPACT;
15098
0
        options -= XML_PARSE_COMPACT;
15099
0
    }
15100
18.3k
    if (options & XML_PARSE_OLD10) {
15101
0
  ctxt->options |= XML_PARSE_OLD10;
15102
0
        options -= XML_PARSE_OLD10;
15103
0
    }
15104
18.3k
    if (options & XML_PARSE_NOBASEFIX) {
15105
0
  ctxt->options |= XML_PARSE_NOBASEFIX;
15106
0
        options -= XML_PARSE_NOBASEFIX;
15107
0
    }
15108
18.3k
    if (options & XML_PARSE_HUGE) {
15109
18.3k
  ctxt->options |= XML_PARSE_HUGE;
15110
18.3k
        options -= XML_PARSE_HUGE;
15111
18.3k
        if (ctxt->dict != NULL)
15112
18.3k
            xmlDictSetLimit(ctxt->dict, 0);
15113
18.3k
    }
15114
18.3k
    if (options & XML_PARSE_OLDSAX) {
15115
0
  ctxt->options |= XML_PARSE_OLDSAX;
15116
0
        options -= XML_PARSE_OLDSAX;
15117
0
    }
15118
18.3k
    if (options & XML_PARSE_IGNORE_ENC) {
15119
0
  ctxt->options |= XML_PARSE_IGNORE_ENC;
15120
0
        options -= XML_PARSE_IGNORE_ENC;
15121
0
    }
15122
18.3k
    if (options & XML_PARSE_BIG_LINES) {
15123
0
  ctxt->options |= XML_PARSE_BIG_LINES;
15124
0
        options -= XML_PARSE_BIG_LINES;
15125
0
    }
15126
18.3k
    ctxt->linenumbers = 1;
15127
18.3k
    return (options);
15128
18.3k
}
15129
15130
/**
15131
 * xmlCtxtUseOptions:
15132
 * @ctxt: an XML parser context
15133
 * @options:  a combination of xmlParserOption
15134
 *
15135
 * Applies the options to the parser context
15136
 *
15137
 * Returns 0 in case of success, the set of unknown or unimplemented options
15138
 *         in case of error.
15139
 */
15140
int
15141
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
15142
18.3k
{
15143
18.3k
   return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
15144
18.3k
}
15145
15146
/**
15147
 * xmlDoRead:
15148
 * @ctxt:  an XML parser context
15149
 * @URL:  the base URL to use for the document
15150
 * @encoding:  the document encoding, or NULL
15151
 * @options:  a combination of xmlParserOption
15152
 * @reuse:  keep the context for reuse
15153
 *
15154
 * Common front-end for the xmlRead functions
15155
 *
15156
 * Returns the resulting document tree or NULL
15157
 */
15158
static xmlDocPtr
15159
xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15160
          int options, int reuse)
15161
0
{
15162
0
    xmlDocPtr ret;
15163
15164
0
    xmlCtxtUseOptionsInternal(ctxt, options, encoding);
15165
0
    if (encoding != NULL) {
15166
0
        xmlCharEncodingHandlerPtr hdlr;
15167
15168
0
  hdlr = xmlFindCharEncodingHandler(encoding);
15169
0
  if (hdlr != NULL)
15170
0
      xmlSwitchToEncoding(ctxt, hdlr);
15171
0
    }
15172
0
    if ((URL != NULL) && (ctxt->input != NULL) &&
15173
0
        (ctxt->input->filename == NULL))
15174
0
        ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
15175
0
    xmlParseDocument(ctxt);
15176
0
    if ((ctxt->wellFormed) || ctxt->recovery)
15177
0
        ret = ctxt->myDoc;
15178
0
    else {
15179
0
        ret = NULL;
15180
0
  if (ctxt->myDoc != NULL) {
15181
0
      xmlFreeDoc(ctxt->myDoc);
15182
0
  }
15183
0
    }
15184
0
    ctxt->myDoc = NULL;
15185
0
    if (!reuse) {
15186
0
  xmlFreeParserCtxt(ctxt);
15187
0
    }
15188
15189
0
    return (ret);
15190
0
}
15191
15192
/**
15193
 * xmlReadDoc:
15194
 * @cur:  a pointer to a zero terminated string
15195
 * @URL:  the base URL to use for the document
15196
 * @encoding:  the document encoding, or NULL
15197
 * @options:  a combination of xmlParserOption
15198
 *
15199
 * parse an XML in-memory document and build a tree.
15200
 *
15201
 * Returns the resulting document tree
15202
 */
15203
xmlDocPtr
15204
xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
15205
0
{
15206
0
    xmlParserCtxtPtr ctxt;
15207
15208
0
    if (cur == NULL)
15209
0
        return (NULL);
15210
0
    xmlInitParser();
15211
15212
0
    ctxt = xmlCreateDocParserCtxt(cur);
15213
0
    if (ctxt == NULL)
15214
0
        return (NULL);
15215
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15216
0
}
15217
15218
/**
15219
 * xmlReadFile:
15220
 * @filename:  a file or URL
15221
 * @encoding:  the document encoding, or NULL
15222
 * @options:  a combination of xmlParserOption
15223
 *
15224
 * parse an XML file from the filesystem or the network.
15225
 *
15226
 * Returns the resulting document tree
15227
 */
15228
xmlDocPtr
15229
xmlReadFile(const char *filename, const char *encoding, int options)
15230
0
{
15231
0
    xmlParserCtxtPtr ctxt;
15232
15233
0
    xmlInitParser();
15234
0
    ctxt = xmlCreateURLParserCtxt(filename, options);
15235
0
    if (ctxt == NULL)
15236
0
        return (NULL);
15237
0
    return (xmlDoRead(ctxt, NULL, encoding, options, 0));
15238
0
}
15239
15240
/**
15241
 * xmlReadMemory:
15242
 * @buffer:  a pointer to a char array
15243
 * @size:  the size of the array
15244
 * @URL:  the base URL to use for the document
15245
 * @encoding:  the document encoding, or NULL
15246
 * @options:  a combination of xmlParserOption
15247
 *
15248
 * parse an XML in-memory document and build a tree.
15249
 *
15250
 * Returns the resulting document tree
15251
 */
15252
xmlDocPtr
15253
xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
15254
0
{
15255
0
    xmlParserCtxtPtr ctxt;
15256
15257
0
    xmlInitParser();
15258
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15259
0
    if (ctxt == NULL)
15260
0
        return (NULL);
15261
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15262
0
}
15263
15264
/**
15265
 * xmlReadFd:
15266
 * @fd:  an open file descriptor
15267
 * @URL:  the base URL to use for the document
15268
 * @encoding:  the document encoding, or NULL
15269
 * @options:  a combination of xmlParserOption
15270
 *
15271
 * parse an XML from a file descriptor and build a tree.
15272
 * NOTE that the file descriptor will not be closed when the
15273
 *      reader is closed or reset.
15274
 *
15275
 * Returns the resulting document tree
15276
 */
15277
xmlDocPtr
15278
xmlReadFd(int fd, const char *URL, const char *encoding, int options)
15279
0
{
15280
0
    xmlParserCtxtPtr ctxt;
15281
0
    xmlParserInputBufferPtr input;
15282
0
    xmlParserInputPtr stream;
15283
15284
0
    if (fd < 0)
15285
0
        return (NULL);
15286
0
    xmlInitParser();
15287
15288
0
    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15289
0
    if (input == NULL)
15290
0
        return (NULL);
15291
0
    input->closecallback = NULL;
15292
0
    ctxt = xmlNewParserCtxt();
15293
0
    if (ctxt == NULL) {
15294
0
        xmlFreeParserInputBuffer(input);
15295
0
        return (NULL);
15296
0
    }
15297
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15298
0
    if (stream == NULL) {
15299
0
        xmlFreeParserInputBuffer(input);
15300
0
  xmlFreeParserCtxt(ctxt);
15301
0
        return (NULL);
15302
0
    }
15303
0
    inputPush(ctxt, stream);
15304
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15305
0
}
15306
15307
/**
15308
 * xmlReadIO:
15309
 * @ioread:  an I/O read function
15310
 * @ioclose:  an I/O close function
15311
 * @ioctx:  an I/O handler
15312
 * @URL:  the base URL to use for the document
15313
 * @encoding:  the document encoding, or NULL
15314
 * @options:  a combination of xmlParserOption
15315
 *
15316
 * parse an XML document from I/O functions and source and build a tree.
15317
 *
15318
 * Returns the resulting document tree
15319
 */
15320
xmlDocPtr
15321
xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
15322
          void *ioctx, const char *URL, const char *encoding, int options)
15323
0
{
15324
0
    xmlParserCtxtPtr ctxt;
15325
0
    xmlParserInputBufferPtr input;
15326
0
    xmlParserInputPtr stream;
15327
15328
0
    if (ioread == NULL)
15329
0
        return (NULL);
15330
0
    xmlInitParser();
15331
15332
0
    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15333
0
                                         XML_CHAR_ENCODING_NONE);
15334
0
    if (input == NULL) {
15335
0
        if (ioclose != NULL)
15336
0
            ioclose(ioctx);
15337
0
        return (NULL);
15338
0
    }
15339
0
    ctxt = xmlNewParserCtxt();
15340
0
    if (ctxt == NULL) {
15341
0
        xmlFreeParserInputBuffer(input);
15342
0
        return (NULL);
15343
0
    }
15344
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15345
0
    if (stream == NULL) {
15346
0
        xmlFreeParserInputBuffer(input);
15347
0
  xmlFreeParserCtxt(ctxt);
15348
0
        return (NULL);
15349
0
    }
15350
0
    inputPush(ctxt, stream);
15351
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15352
0
}
15353
15354
/**
15355
 * xmlCtxtReadDoc:
15356
 * @ctxt:  an XML parser context
15357
 * @cur:  a pointer to a zero terminated string
15358
 * @URL:  the base URL to use for the document
15359
 * @encoding:  the document encoding, or NULL
15360
 * @options:  a combination of xmlParserOption
15361
 *
15362
 * parse an XML in-memory document and build a tree.
15363
 * This reuses the existing @ctxt parser context
15364
 *
15365
 * Returns the resulting document tree
15366
 */
15367
xmlDocPtr
15368
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
15369
               const char *URL, const char *encoding, int options)
15370
0
{
15371
0
    if (cur == NULL)
15372
0
        return (NULL);
15373
0
    return (xmlCtxtReadMemory(ctxt, (const char *) cur, xmlStrlen(cur), URL,
15374
0
                              encoding, options));
15375
0
}
15376
15377
/**
15378
 * xmlCtxtReadFile:
15379
 * @ctxt:  an XML parser context
15380
 * @filename:  a file or URL
15381
 * @encoding:  the document encoding, or NULL
15382
 * @options:  a combination of xmlParserOption
15383
 *
15384
 * parse an XML file from the filesystem or the network.
15385
 * This reuses the existing @ctxt parser context
15386
 *
15387
 * Returns the resulting document tree
15388
 */
15389
xmlDocPtr
15390
xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15391
                const char *encoding, int options)
15392
0
{
15393
0
    xmlParserInputPtr stream;
15394
15395
0
    if (filename == NULL)
15396
0
        return (NULL);
15397
0
    if (ctxt == NULL)
15398
0
        return (NULL);
15399
0
    xmlInitParser();
15400
15401
0
    xmlCtxtReset(ctxt);
15402
15403
0
    stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15404
0
    if (stream == NULL) {
15405
0
        return (NULL);
15406
0
    }
15407
0
    inputPush(ctxt, stream);
15408
0
    return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15409
0
}
15410
15411
/**
15412
 * xmlCtxtReadMemory:
15413
 * @ctxt:  an XML parser context
15414
 * @buffer:  a pointer to a char array
15415
 * @size:  the size of the array
15416
 * @URL:  the base URL to use for the document
15417
 * @encoding:  the document encoding, or NULL
15418
 * @options:  a combination of xmlParserOption
15419
 *
15420
 * parse an XML in-memory document and build a tree.
15421
 * This reuses the existing @ctxt parser context
15422
 *
15423
 * Returns the resulting document tree
15424
 */
15425
xmlDocPtr
15426
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15427
                  const char *URL, const char *encoding, int options)
15428
0
{
15429
0
    xmlParserInputBufferPtr input;
15430
0
    xmlParserInputPtr stream;
15431
15432
0
    if (ctxt == NULL)
15433
0
        return (NULL);
15434
0
    if (buffer == NULL)
15435
0
        return (NULL);
15436
0
    xmlInitParser();
15437
15438
0
    xmlCtxtReset(ctxt);
15439
15440
0
    input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15441
0
    if (input == NULL) {
15442
0
  return(NULL);
15443
0
    }
15444
15445
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15446
0
    if (stream == NULL) {
15447
0
  xmlFreeParserInputBuffer(input);
15448
0
  return(NULL);
15449
0
    }
15450
15451
0
    inputPush(ctxt, stream);
15452
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15453
0
}
15454
15455
/**
15456
 * xmlCtxtReadFd:
15457
 * @ctxt:  an XML parser context
15458
 * @fd:  an open file descriptor
15459
 * @URL:  the base URL to use for the document
15460
 * @encoding:  the document encoding, or NULL
15461
 * @options:  a combination of xmlParserOption
15462
 *
15463
 * parse an XML from a file descriptor and build a tree.
15464
 * This reuses the existing @ctxt parser context
15465
 * NOTE that the file descriptor will not be closed when the
15466
 *      reader is closed or reset.
15467
 *
15468
 * Returns the resulting document tree
15469
 */
15470
xmlDocPtr
15471
xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15472
              const char *URL, const char *encoding, int options)
15473
0
{
15474
0
    xmlParserInputBufferPtr input;
15475
0
    xmlParserInputPtr stream;
15476
15477
0
    if (fd < 0)
15478
0
        return (NULL);
15479
0
    if (ctxt == NULL)
15480
0
        return (NULL);
15481
0
    xmlInitParser();
15482
15483
0
    xmlCtxtReset(ctxt);
15484
15485
15486
0
    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15487
0
    if (input == NULL)
15488
0
        return (NULL);
15489
0
    input->closecallback = NULL;
15490
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15491
0
    if (stream == NULL) {
15492
0
        xmlFreeParserInputBuffer(input);
15493
0
        return (NULL);
15494
0
    }
15495
0
    inputPush(ctxt, stream);
15496
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15497
0
}
15498
15499
/**
15500
 * xmlCtxtReadIO:
15501
 * @ctxt:  an XML parser context
15502
 * @ioread:  an I/O read function
15503
 * @ioclose:  an I/O close function
15504
 * @ioctx:  an I/O handler
15505
 * @URL:  the base URL to use for the document
15506
 * @encoding:  the document encoding, or NULL
15507
 * @options:  a combination of xmlParserOption
15508
 *
15509
 * parse an XML document from I/O functions and source and build a tree.
15510
 * This reuses the existing @ctxt parser context
15511
 *
15512
 * Returns the resulting document tree
15513
 */
15514
xmlDocPtr
15515
xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15516
              xmlInputCloseCallback ioclose, void *ioctx,
15517
        const char *URL,
15518
              const char *encoding, int options)
15519
0
{
15520
0
    xmlParserInputBufferPtr input;
15521
0
    xmlParserInputPtr stream;
15522
15523
0
    if (ioread == NULL)
15524
0
        return (NULL);
15525
0
    if (ctxt == NULL)
15526
0
        return (NULL);
15527
0
    xmlInitParser();
15528
15529
0
    xmlCtxtReset(ctxt);
15530
15531
0
    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15532
0
                                         XML_CHAR_ENCODING_NONE);
15533
0
    if (input == NULL) {
15534
0
        if (ioclose != NULL)
15535
0
            ioclose(ioctx);
15536
0
        return (NULL);
15537
0
    }
15538
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15539
0
    if (stream == NULL) {
15540
0
        xmlFreeParserInputBuffer(input);
15541
0
        return (NULL);
15542
0
    }
15543
0
    inputPush(ctxt, stream);
15544
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15545
0
}
15546