Coverage Report

Created: 2025-03-12 04:16

/src/libxml2/parser.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
 *            implemented on top of the SAX interfaces
4
 *
5
 * References:
6
 *   The XML specification:
7
 *     http://www.w3.org/TR/REC-xml
8
 *   Original 1.0 version:
9
 *     http://www.w3.org/TR/1998/REC-xml-19980210
10
 *   XML second edition working draft
11
 *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12
 *
13
 * Okay this is a big file, the parser core is around 7000 lines, then it
14
 * is followed by the progressive parser top routines, then the various
15
 * high level APIs to call the parser and a few miscellaneous functions.
16
 * A number of helper functions and deprecated ones have been moved to
17
 * parserInternals.c to reduce this file size.
18
 * As much as possible the functions are associated with their relative
19
 * production in the XML specification. A few productions defining the
20
 * different ranges of character are actually implanted either in
21
 * parserInternals.h or parserInternals.c
22
 * The DOM tree build is realized from the default SAX callbacks in
23
 * the module SAX.c.
24
 * The routines doing the validation checks are in valid.c and called either
25
 * from the SAX callbacks or as standalone functions using a preparsed
26
 * document.
27
 *
28
 * See Copyright for the status of this software.
29
 *
30
 * daniel@veillard.com
31
 */
32
33
/* To avoid EBCDIC trouble when parsing on zOS */
34
#if defined(__MVS__)
35
#pragma convert("ISO8859-1")
36
#endif
37
38
#define IN_LIBXML
39
#include "libxml.h"
40
41
#if defined(_WIN32)
42
#define XML_DIR_SEP '\\'
43
#else
44
#define XML_DIR_SEP '/'
45
#endif
46
47
#include <stdlib.h>
48
#include <limits.h>
49
#include <string.h>
50
#include <stdarg.h>
51
#include <stddef.h>
52
#include <ctype.h>
53
#include <stdlib.h>
54
#include <libxml/xmlmemory.h>
55
#include <libxml/threads.h>
56
#include <libxml/globals.h>
57
#include <libxml/tree.h>
58
#include <libxml/parser.h>
59
#include <libxml/parserInternals.h>
60
#include <libxml/HTMLparser.h>
61
#include <libxml/valid.h>
62
#include <libxml/entities.h>
63
#include <libxml/xmlerror.h>
64
#include <libxml/encoding.h>
65
#include <libxml/xmlIO.h>
66
#include <libxml/uri.h>
67
#ifdef LIBXML_CATALOG_ENABLED
68
#include <libxml/catalog.h>
69
#endif
70
#ifdef LIBXML_SCHEMAS_ENABLED
71
#include <libxml/xmlschemastypes.h>
72
#include <libxml/relaxng.h>
73
#endif
74
#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
75
#include <libxml/xpath.h>
76
#endif
77
78
#include "private/buf.h"
79
#include "private/dict.h"
80
#include "private/enc.h"
81
#include "private/entities.h"
82
#include "private/error.h"
83
#include "private/globals.h"
84
#include "private/html.h"
85
#include "private/io.h"
86
#include "private/memory.h"
87
#include "private/parser.h"
88
#include "private/threads.h"
89
#include "private/xpath.h"
90
91
struct _xmlStartTag {
92
    const xmlChar *prefix;
93
    const xmlChar *URI;
94
    int line;
95
    int nsNr;
96
};
97
98
static xmlParserCtxtPtr
99
xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
100
        const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
101
        xmlParserCtxtPtr pctx);
102
103
static void xmlHaltParser(xmlParserCtxtPtr ctxt);
104
105
static int
106
xmlParseElementStart(xmlParserCtxtPtr ctxt);
107
108
static void
109
xmlParseElementEnd(xmlParserCtxtPtr ctxt);
110
111
/************************************************************************
112
 *                  *
113
 *  Arbitrary limits set in the parser. See XML_PARSE_HUGE    *
114
 *                  *
115
 ************************************************************************/
116
117
220k
#define XML_MAX_HUGE_LENGTH 1000000000
118
119
#define XML_PARSER_BIG_ENTITY 1000
120
#define XML_PARSER_LOT_ENTITY 5000
121
122
/*
123
 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
124
 *    replacement over the size in byte of the input indicates that you have
125
 *    and exponential behaviour. A value of 10 correspond to at least 3 entity
126
 *    replacement per byte of input.
127
 */
128
144
#define XML_PARSER_NON_LINEAR 10
129
130
8.45M
#define XML_ENT_FIXED_COST 50
131
132
/**
133
 * xmlParserMaxDepth:
134
 *
135
 * arbitrary depth limit for the XML documents that we allow to
136
 * process. This is not a limitation of the parser but a safety
137
 * boundary feature. It can be disabled with the XML_PARSE_HUGE
138
 * parser option.
139
 */
140
unsigned int xmlParserMaxDepth = 256;
141
142
143
144
#define SAX2 1
145
16.2M
#define XML_PARSER_BIG_BUFFER_SIZE 300
146
2.01G
#define XML_PARSER_BUFFER_SIZE 100
147
84.4k
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
148
149
/**
150
 * XML_PARSER_CHUNK_SIZE
151
 *
152
 * When calling GROW that's the minimal amount of data
153
 * the parser expected to have received. It is not a hard
154
 * limit but an optimization when reading strings like Names
155
 * It is not strictly needed as long as inputs available characters
156
 * are followed by 0, which should be provided by the I/O level
157
 */
158
4.23M
#define XML_PARSER_CHUNK_SIZE 100
159
160
/*
161
 * List of XML prefixed PI allowed by W3C specs
162
 */
163
164
static const char* const xmlW3CPIs[] = {
165
    "xml-stylesheet",
166
    "xml-model",
167
    NULL
168
};
169
170
171
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
172
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
173
                                              const xmlChar **str);
174
175
static xmlParserErrors
176
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
177
                xmlSAXHandlerPtr sax,
178
          void *user_data, int depth, const xmlChar *URL,
179
          const xmlChar *ID, xmlNodePtr *list);
180
181
static int
182
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
183
                          const char *encoding);
184
#ifdef LIBXML_LEGACY_ENABLED
185
static void
186
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
187
                      xmlNodePtr lastNode);
188
#endif /* LIBXML_LEGACY_ENABLED */
189
190
static xmlParserErrors
191
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
192
          const xmlChar *string, void *user_data, xmlNodePtr *lst);
193
194
static int
195
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
196
197
/************************************************************************
198
 *                  *
199
 *    Some factorized error routines        *
200
 *                  *
201
 ************************************************************************/
202
203
/**
204
 * xmlErrAttributeDup:
205
 * @ctxt:  an XML parser context
206
 * @prefix:  the attribute prefix
207
 * @localname:  the attribute localname
208
 *
209
 * Handle a redefinition of attribute error
210
 */
211
static void
212
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
213
                   const xmlChar * localname)
214
2.90k
{
215
2.90k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
216
2.90k
        (ctxt->instate == XML_PARSER_EOF))
217
0
  return;
218
2.90k
    if (ctxt != NULL)
219
2.90k
  ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
220
221
2.90k
    if (prefix == NULL)
222
1.48k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
223
1.48k
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
224
1.48k
                        (const char *) localname, NULL, NULL, 0, 0,
225
1.48k
                        "Attribute %s redefined\n", localname);
226
1.42k
    else
227
1.42k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
228
1.42k
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
229
1.42k
                        (const char *) prefix, (const char *) localname,
230
1.42k
                        NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
231
1.42k
                        localname);
232
2.90k
    if (ctxt != NULL) {
233
2.90k
  ctxt->wellFormed = 0;
234
2.90k
  if (ctxt->recovery == 0)
235
1.10k
      ctxt->disableSAX = 1;
236
2.90k
    }
237
2.90k
}
238
239
/**
240
 * xmlFatalErr:
241
 * @ctxt:  an XML parser context
242
 * @error:  the error number
243
 * @extra:  extra information string
244
 *
245
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
246
 */
247
static void
248
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
249
406k
{
250
406k
    const char *errmsg;
251
252
406k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
253
406k
        (ctxt->instate == XML_PARSER_EOF))
254
3.71k
  return;
255
402k
    switch (error) {
256
4.10k
        case XML_ERR_INVALID_HEX_CHARREF:
257
4.10k
            errmsg = "CharRef: invalid hexadecimal value";
258
4.10k
            break;
259
5.60k
        case XML_ERR_INVALID_DEC_CHARREF:
260
5.60k
            errmsg = "CharRef: invalid decimal value";
261
5.60k
            break;
262
0
        case XML_ERR_INVALID_CHARREF:
263
0
            errmsg = "CharRef: invalid value";
264
0
            break;
265
147k
        case XML_ERR_INTERNAL_ERROR:
266
147k
            errmsg = "internal error";
267
147k
            break;
268
0
        case XML_ERR_PEREF_AT_EOF:
269
0
            errmsg = "PEReference at end of document";
270
0
            break;
271
0
        case XML_ERR_PEREF_IN_PROLOG:
272
0
            errmsg = "PEReference in prolog";
273
0
            break;
274
0
        case XML_ERR_PEREF_IN_EPILOG:
275
0
            errmsg = "PEReference in epilog";
276
0
            break;
277
0
        case XML_ERR_PEREF_NO_NAME:
278
0
            errmsg = "PEReference: no name";
279
0
            break;
280
689
        case XML_ERR_PEREF_SEMICOL_MISSING:
281
689
            errmsg = "PEReference: expecting ';'";
282
689
            break;
283
92
        case XML_ERR_ENTITY_LOOP:
284
92
            errmsg = "Detected an entity reference loop";
285
92
            break;
286
0
        case XML_ERR_ENTITY_NOT_STARTED:
287
0
            errmsg = "EntityValue: \" or ' expected";
288
0
            break;
289
139
        case XML_ERR_ENTITY_PE_INTERNAL:
290
139
            errmsg = "PEReferences forbidden in internal subset";
291
139
            break;
292
791
        case XML_ERR_ENTITY_NOT_FINISHED:
293
791
            errmsg = "EntityValue: \" or ' expected";
294
791
            break;
295
5.98k
        case XML_ERR_ATTRIBUTE_NOT_STARTED:
296
5.98k
            errmsg = "AttValue: \" or ' expected";
297
5.98k
            break;
298
27.5k
        case XML_ERR_LT_IN_ATTRIBUTE:
299
27.5k
            errmsg = "Unescaped '<' not allowed in attributes values";
300
27.5k
            break;
301
1.62k
        case XML_ERR_LITERAL_NOT_STARTED:
302
1.62k
            errmsg = "SystemLiteral \" or ' expected";
303
1.62k
            break;
304
1.86k
        case XML_ERR_LITERAL_NOT_FINISHED:
305
1.86k
            errmsg = "Unfinished System or Public ID \" or ' expected";
306
1.86k
            break;
307
12.6k
        case XML_ERR_MISPLACED_CDATA_END:
308
12.6k
            errmsg = "Sequence ']]>' not allowed in content";
309
12.6k
            break;
310
1.44k
        case XML_ERR_URI_REQUIRED:
311
1.44k
            errmsg = "SYSTEM or PUBLIC, the URI is missing";
312
1.44k
            break;
313
176
        case XML_ERR_PUBID_REQUIRED:
314
176
            errmsg = "PUBLIC, the Public Identifier is missing";
315
176
            break;
316
6.47k
        case XML_ERR_HYPHEN_IN_COMMENT:
317
6.47k
            errmsg = "Comment must not contain '--' (double-hyphen)";
318
6.47k
            break;
319
5.58k
        case XML_ERR_PI_NOT_STARTED:
320
5.58k
            errmsg = "xmlParsePI : no target name";
321
5.58k
            break;
322
343
        case XML_ERR_RESERVED_XML_NAME:
323
343
            errmsg = "Invalid PI name";
324
343
            break;
325
100
        case XML_ERR_NOTATION_NOT_STARTED:
326
100
            errmsg = "NOTATION: Name expected here";
327
100
            break;
328
286
        case XML_ERR_NOTATION_NOT_FINISHED:
329
286
            errmsg = "'>' required to close NOTATION declaration";
330
286
            break;
331
1.04k
        case XML_ERR_VALUE_REQUIRED:
332
1.04k
            errmsg = "Entity value required";
333
1.04k
            break;
334
185
        case XML_ERR_URI_FRAGMENT:
335
185
            errmsg = "Fragment not allowed";
336
185
            break;
337
1.38k
        case XML_ERR_ATTLIST_NOT_STARTED:
338
1.38k
            errmsg = "'(' required to start ATTLIST enumeration";
339
1.38k
            break;
340
96
        case XML_ERR_NMTOKEN_REQUIRED:
341
96
            errmsg = "NmToken expected in ATTLIST enumeration";
342
96
            break;
343
292
        case XML_ERR_ATTLIST_NOT_FINISHED:
344
292
            errmsg = "')' required to finish ATTLIST enumeration";
345
292
            break;
346
356
        case XML_ERR_MIXED_NOT_STARTED:
347
356
            errmsg = "MixedContentDecl : '|' or ')*' expected";
348
356
            break;
349
0
        case XML_ERR_PCDATA_REQUIRED:
350
0
            errmsg = "MixedContentDecl : '#PCDATA' expected";
351
0
            break;
352
1.20k
        case XML_ERR_ELEMCONTENT_NOT_STARTED:
353
1.20k
            errmsg = "ContentDecl : Name or '(' expected";
354
1.20k
            break;
355
2.32k
        case XML_ERR_ELEMCONTENT_NOT_FINISHED:
356
2.32k
            errmsg = "ContentDecl : ',' '|' or ')' expected";
357
2.32k
            break;
358
0
        case XML_ERR_PEREF_IN_INT_SUBSET:
359
0
            errmsg =
360
0
                "PEReference: forbidden within markup decl in internal subset";
361
0
            break;
362
41.7k
        case XML_ERR_GT_REQUIRED:
363
41.7k
            errmsg = "expected '>'";
364
41.7k
            break;
365
0
        case XML_ERR_CONDSEC_INVALID:
366
0
            errmsg = "XML conditional section '[' expected";
367
0
            break;
368
1.38k
        case XML_ERR_EXT_SUBSET_NOT_FINISHED:
369
1.38k
            errmsg = "Content error in the external subset";
370
1.38k
            break;
371
35
        case XML_ERR_CONDSEC_INVALID_KEYWORD:
372
35
            errmsg =
373
35
                "conditional section INCLUDE or IGNORE keyword expected";
374
35
            break;
375
3
        case XML_ERR_CONDSEC_NOT_FINISHED:
376
3
            errmsg = "XML conditional section not closed";
377
3
            break;
378
39
        case XML_ERR_XMLDECL_NOT_STARTED:
379
39
            errmsg = "Text declaration '<?xml' required";
380
39
            break;
381
30.3k
        case XML_ERR_XMLDECL_NOT_FINISHED:
382
30.3k
            errmsg = "parsing XML declaration: '?>' expected";
383
30.3k
            break;
384
0
        case XML_ERR_EXT_ENTITY_STANDALONE:
385
0
            errmsg = "external parsed entities cannot be standalone";
386
0
            break;
387
11.7k
        case XML_ERR_ENTITYREF_SEMICOL_MISSING:
388
11.7k
            errmsg = "EntityRef: expecting ';'";
389
11.7k
            break;
390
8.41k
        case XML_ERR_DOCTYPE_NOT_FINISHED:
391
8.41k
            errmsg = "DOCTYPE improperly terminated";
392
8.41k
            break;
393
0
        case XML_ERR_LTSLASH_REQUIRED:
394
0
            errmsg = "EndTag: '</' not found";
395
0
            break;
396
2.74k
        case XML_ERR_EQUAL_REQUIRED:
397
2.74k
            errmsg = "expected '='";
398
2.74k
            break;
399
7.89k
        case XML_ERR_STRING_NOT_CLOSED:
400
7.89k
            errmsg = "String not closed expecting \" or '";
401
7.89k
            break;
402
1.04k
        case XML_ERR_STRING_NOT_STARTED:
403
1.04k
            errmsg = "String not started expecting ' or \"";
404
1.04k
            break;
405
125
        case XML_ERR_ENCODING_NAME:
406
125
            errmsg = "Invalid XML encoding name";
407
125
            break;
408
447
        case XML_ERR_STANDALONE_VALUE:
409
447
            errmsg = "standalone accepts only 'yes' or 'no'";
410
447
            break;
411
6.40k
        case XML_ERR_DOCUMENT_EMPTY:
412
6.40k
            errmsg = "Document is empty";
413
6.40k
            break;
414
39.4k
        case XML_ERR_DOCUMENT_END:
415
39.4k
            errmsg = "Extra content at the end of the document";
416
39.4k
            break;
417
137
        case XML_ERR_NOT_WELL_BALANCED:
418
137
            errmsg = "chunk is not well balanced";
419
137
            break;
420
0
        case XML_ERR_EXTRA_CONTENT:
421
0
            errmsg = "extra content at the end of well balanced chunk";
422
0
            break;
423
20.7k
        case XML_ERR_VERSION_MISSING:
424
20.7k
            errmsg = "Malformed declaration expecting version";
425
20.7k
            break;
426
0
        case XML_ERR_NAME_TOO_LONG:
427
0
            errmsg = "Name too long";
428
0
            break;
429
#if 0
430
        case:
431
            errmsg = "";
432
            break;
433
#endif
434
115
        default:
435
115
            errmsg = "Unregistered error message";
436
402k
    }
437
402k
    if (ctxt != NULL)
438
402k
  ctxt->errNo = error;
439
402k
    if (info == NULL) {
440
255k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
441
255k
                        XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
442
255k
                        errmsg);
443
255k
    } else {
444
147k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
445
147k
                        XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
446
147k
                        errmsg, info);
447
147k
    }
448
402k
    if (ctxt != NULL) {
449
402k
  ctxt->wellFormed = 0;
450
402k
  if (ctxt->recovery == 0)
451
106k
      ctxt->disableSAX = 1;
452
402k
    }
453
402k
}
454
455
/**
456
 * xmlFatalErrMsg:
457
 * @ctxt:  an XML parser context
458
 * @error:  the error number
459
 * @msg:  the error message
460
 *
461
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
462
 */
463
static void LIBXML_ATTR_FORMAT(3,0)
464
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
465
               const char *msg)
466
497k
{
467
497k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
468
497k
        (ctxt->instate == XML_PARSER_EOF))
469
0
  return;
470
497k
    if (ctxt != NULL)
471
497k
  ctxt->errNo = error;
472
497k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
473
497k
                    XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
474
497k
    if (ctxt != NULL) {
475
497k
  ctxt->wellFormed = 0;
476
497k
  if (ctxt->recovery == 0)
477
104k
      ctxt->disableSAX = 1;
478
497k
    }
479
497k
}
480
481
/**
482
 * xmlWarningMsg:
483
 * @ctxt:  an XML parser context
484
 * @error:  the error number
485
 * @msg:  the error message
486
 * @str1:  extra data
487
 * @str2:  extra data
488
 *
489
 * Handle a warning.
490
 */
491
static void LIBXML_ATTR_FORMAT(3,0)
492
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
493
              const char *msg, const xmlChar *str1, const xmlChar *str2)
494
9.42k
{
495
9.42k
    xmlStructuredErrorFunc schannel = NULL;
496
497
9.42k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
498
9.42k
        (ctxt->instate == XML_PARSER_EOF))
499
0
  return;
500
9.42k
    if ((ctxt != NULL) && (ctxt->sax != NULL) &&
501
9.42k
        (ctxt->sax->initialized == XML_SAX2_MAGIC))
502
6.20k
        schannel = ctxt->sax->serror;
503
9.42k
    if (ctxt != NULL) {
504
9.42k
        __xmlRaiseError(schannel,
505
9.42k
                    (ctxt->sax) ? ctxt->sax->warning : NULL,
506
9.42k
                    ctxt->userData,
507
9.42k
                    ctxt, NULL, XML_FROM_PARSER, error,
508
9.42k
                    XML_ERR_WARNING, NULL, 0,
509
9.42k
        (const char *) str1, (const char *) str2, NULL, 0, 0,
510
9.42k
        msg, (const char *) str1, (const char *) str2);
511
9.42k
    } else {
512
0
        __xmlRaiseError(schannel, NULL, NULL,
513
0
                    ctxt, NULL, XML_FROM_PARSER, error,
514
0
                    XML_ERR_WARNING, NULL, 0,
515
0
        (const char *) str1, (const char *) str2, NULL, 0, 0,
516
0
        msg, (const char *) str1, (const char *) str2);
517
0
    }
518
9.42k
}
519
520
/**
521
 * xmlValidityError:
522
 * @ctxt:  an XML parser context
523
 * @error:  the error number
524
 * @msg:  the error message
525
 * @str1:  extra data
526
 *
527
 * Handle a validity error.
528
 */
529
static void LIBXML_ATTR_FORMAT(3,0)
530
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
531
              const char *msg, const xmlChar *str1, const xmlChar *str2)
532
252
{
533
252
    xmlStructuredErrorFunc schannel = NULL;
534
535
252
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
536
252
        (ctxt->instate == XML_PARSER_EOF))
537
0
  return;
538
252
    if (ctxt != NULL) {
539
252
  ctxt->errNo = error;
540
252
  if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
541
103
      schannel = ctxt->sax->serror;
542
252
    }
543
252
    if (ctxt != NULL) {
544
252
        __xmlRaiseError(schannel,
545
252
                    ctxt->vctxt.error, ctxt->vctxt.userData,
546
252
                    ctxt, NULL, XML_FROM_DTD, error,
547
252
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
548
252
        (const char *) str2, NULL, 0, 0,
549
252
        msg, (const char *) str1, (const char *) str2);
550
252
  ctxt->valid = 0;
551
252
    } else {
552
0
        __xmlRaiseError(schannel, NULL, NULL,
553
0
                    ctxt, NULL, XML_FROM_DTD, error,
554
0
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
555
0
        (const char *) str2, NULL, 0, 0,
556
0
        msg, (const char *) str1, (const char *) str2);
557
0
    }
558
252
}
559
560
/**
561
 * xmlFatalErrMsgInt:
562
 * @ctxt:  an XML parser context
563
 * @error:  the error number
564
 * @msg:  the error message
565
 * @val:  an integer value
566
 *
567
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
568
 */
569
static void LIBXML_ATTR_FORMAT(3,0)
570
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
571
                  const char *msg, int val)
572
780k
{
573
780k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
574
780k
        (ctxt->instate == XML_PARSER_EOF))
575
0
  return;
576
780k
    if (ctxt != NULL)
577
780k
  ctxt->errNo = error;
578
780k
    __xmlRaiseError(NULL, NULL, NULL,
579
780k
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
580
780k
                    NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
581
780k
    if (ctxt != NULL) {
582
780k
  ctxt->wellFormed = 0;
583
780k
  if (ctxt->recovery == 0)
584
46.4k
      ctxt->disableSAX = 1;
585
780k
    }
586
780k
}
587
588
/**
589
 * xmlFatalErrMsgStrIntStr:
590
 * @ctxt:  an XML parser context
591
 * @error:  the error number
592
 * @msg:  the error message
593
 * @str1:  an string info
594
 * @val:  an integer value
595
 * @str2:  an string info
596
 *
597
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
598
 */
599
static void LIBXML_ATTR_FORMAT(3,0)
600
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
601
                  const char *msg, const xmlChar *str1, int val,
602
      const xmlChar *str2)
603
166k
{
604
166k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
605
166k
        (ctxt->instate == XML_PARSER_EOF))
606
0
  return;
607
166k
    if (ctxt != NULL)
608
166k
  ctxt->errNo = error;
609
166k
    __xmlRaiseError(NULL, NULL, NULL,
610
166k
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
611
166k
                    NULL, 0, (const char *) str1, (const char *) str2,
612
166k
        NULL, val, 0, msg, str1, val, str2);
613
166k
    if (ctxt != NULL) {
614
166k
  ctxt->wellFormed = 0;
615
166k
  if (ctxt->recovery == 0)
616
52.1k
      ctxt->disableSAX = 1;
617
166k
    }
618
166k
}
619
620
/**
621
 * xmlFatalErrMsgStr:
622
 * @ctxt:  an XML parser context
623
 * @error:  the error number
624
 * @msg:  the error message
625
 * @val:  a string value
626
 *
627
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
628
 */
629
static void LIBXML_ATTR_FORMAT(3,0)
630
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
631
                  const char *msg, const xmlChar * val)
632
766k
{
633
766k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
634
766k
        (ctxt->instate == XML_PARSER_EOF))
635
0
  return;
636
766k
    if (ctxt != NULL)
637
766k
  ctxt->errNo = error;
638
766k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
639
766k
                    XML_FROM_PARSER, error, XML_ERR_FATAL,
640
766k
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
641
766k
                    val);
642
766k
    if (ctxt != NULL) {
643
766k
  ctxt->wellFormed = 0;
644
766k
  if (ctxt->recovery == 0)
645
524k
      ctxt->disableSAX = 1;
646
766k
    }
647
766k
}
648
649
/**
650
 * xmlErrMsgStr:
651
 * @ctxt:  an XML parser context
652
 * @error:  the error number
653
 * @msg:  the error message
654
 * @val:  a string value
655
 *
656
 * Handle a non fatal parser error
657
 */
658
static void LIBXML_ATTR_FORMAT(3,0)
659
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
660
                  const char *msg, const xmlChar * val)
661
32.6k
{
662
32.6k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
663
32.6k
        (ctxt->instate == XML_PARSER_EOF))
664
0
  return;
665
32.6k
    if (ctxt != NULL)
666
32.6k
  ctxt->errNo = error;
667
32.6k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
668
32.6k
                    XML_FROM_PARSER, error, XML_ERR_ERROR,
669
32.6k
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
670
32.6k
                    val);
671
32.6k
}
672
673
/**
674
 * xmlNsErr:
675
 * @ctxt:  an XML parser context
676
 * @error:  the error number
677
 * @msg:  the message
678
 * @info1:  extra information string
679
 * @info2:  extra information string
680
 *
681
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
682
 */
683
static void LIBXML_ATTR_FORMAT(3,0)
684
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
685
         const char *msg,
686
         const xmlChar * info1, const xmlChar * info2,
687
         const xmlChar * info3)
688
110k
{
689
110k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
690
110k
        (ctxt->instate == XML_PARSER_EOF))
691
0
  return;
692
110k
    if (ctxt != NULL)
693
110k
  ctxt->errNo = error;
694
110k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
695
110k
                    XML_ERR_ERROR, NULL, 0, (const char *) info1,
696
110k
                    (const char *) info2, (const char *) info3, 0, 0, msg,
697
110k
                    info1, info2, info3);
698
110k
    if (ctxt != NULL)
699
110k
  ctxt->nsWellFormed = 0;
700
110k
}
701
702
/**
703
 * xmlNsWarn
704
 * @ctxt:  an XML parser context
705
 * @error:  the error number
706
 * @msg:  the message
707
 * @info1:  extra information string
708
 * @info2:  extra information string
709
 *
710
 * Handle a namespace warning error
711
 */
712
static void LIBXML_ATTR_FORMAT(3,0)
713
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
714
         const char *msg,
715
         const xmlChar * info1, const xmlChar * info2,
716
         const xmlChar * info3)
717
2.90k
{
718
2.90k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
719
2.90k
        (ctxt->instate == XML_PARSER_EOF))
720
0
  return;
721
2.90k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
722
2.90k
                    XML_ERR_WARNING, NULL, 0, (const char *) info1,
723
2.90k
                    (const char *) info2, (const char *) info3, 0, 0, msg,
724
2.90k
                    info1, info2, info3);
725
2.90k
}
726
727
static void
728
31.4M
xmlSaturatedAdd(unsigned long *dst, unsigned long val) {
729
31.4M
    if (val > ULONG_MAX - *dst)
730
0
        *dst = ULONG_MAX;
731
31.4M
    else
732
31.4M
        *dst += val;
733
31.4M
}
734
735
static void
736
8.49M
xmlSaturatedAddSizeT(unsigned long *dst, unsigned long val) {
737
8.49M
    if (val > ULONG_MAX - *dst)
738
0
        *dst = ULONG_MAX;
739
8.49M
    else
740
8.49M
        *dst += val;
741
8.49M
}
742
743
/**
744
 * xmlParserEntityCheck:
745
 * @ctxt:  parser context
746
 * @extra:  sum of unexpanded entity sizes
747
 *
748
 * Check for non-linear entity expansion behaviour.
749
 *
750
 * In some cases like xmlStringDecodeEntities, this function is called
751
 * for each, possibly nested entity and its unexpanded content length.
752
 *
753
 * In other cases like xmlParseReference, it's only called for each
754
 * top-level entity with its unexpanded content length plus the sum of
755
 * the unexpanded content lengths (plus fixed cost) of all nested
756
 * entities.
757
 *
758
 * Summing the unexpanded lengths also adds the length of the reference.
759
 * This is by design. Taking the length of the entity name into account
760
 * discourages attacks that try to waste CPU time with abusively long
761
 * entity names. See test/recurse/lol6.xml for example. Each call also
762
 * adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with
763
 * short entities.
764
 *
765
 * Returns 1 on error, 0 on success.
766
 */
767
static int
768
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long extra)
769
8.45M
{
770
8.45M
    unsigned long consumed;
771
8.45M
    xmlParserInputPtr input = ctxt->input;
772
8.45M
    xmlEntityPtr entity = input->entity;
773
774
    /*
775
     * Compute total consumed bytes so far, including input streams of
776
     * external entities.
777
     */
778
8.45M
    consumed = input->parentConsumed;
779
8.45M
    if ((entity == NULL) ||
780
8.45M
        ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
781
6.05M
         ((entity->flags & XML_ENT_PARSED) == 0))) {
782
6.05M
        xmlSaturatedAdd(&consumed, input->consumed);
783
6.05M
        xmlSaturatedAddSizeT(&consumed, input->cur - input->base);
784
6.05M
    }
785
8.45M
    xmlSaturatedAdd(&consumed, ctxt->sizeentities);
786
787
    /*
788
     * Add extra cost and some fixed cost.
789
     */
790
8.45M
    xmlSaturatedAdd(&ctxt->sizeentcopy, extra);
791
8.45M
    xmlSaturatedAdd(&ctxt->sizeentcopy, XML_ENT_FIXED_COST);
792
793
    /*
794
     * It's important to always use saturation arithmetic when tracking
795
     * entity sizes to make the size checks reliable. If "sizeentcopy"
796
     * overflows, we have to abort.
797
     */
798
8.45M
    if ((ctxt->sizeentcopy > XML_MAX_TEXT_LENGTH) &&
799
8.45M
        ((ctxt->sizeentcopy >= ULONG_MAX) ||
800
144
         (ctxt->sizeentcopy / XML_PARSER_NON_LINEAR > consumed))) {
801
144
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP,
802
144
                       "Maximum entity amplification factor exceeded");
803
144
        xmlHaltParser(ctxt);
804
144
        return(1);
805
144
    }
806
807
8.45M
    return(0);
808
8.45M
}
809
810
/************************************************************************
811
 *                  *
812
 *    Library wide options          *
813
 *                  *
814
 ************************************************************************/
815
816
/**
817
  * xmlHasFeature:
818
  * @feature: the feature to be examined
819
  *
820
  * Examines if the library has been compiled with a given feature.
821
  *
822
  * Returns a non-zero value if the feature exist, otherwise zero.
823
  * Returns zero (0) if the feature does not exist or an unknown
824
  * unknown feature is requested, non-zero otherwise.
825
  */
826
int
827
xmlHasFeature(xmlFeature feature)
828
0
{
829
0
    switch (feature) {
830
0
  case XML_WITH_THREAD:
831
0
#ifdef LIBXML_THREAD_ENABLED
832
0
      return(1);
833
#else
834
      return(0);
835
#endif
836
0
        case XML_WITH_TREE:
837
0
#ifdef LIBXML_TREE_ENABLED
838
0
            return(1);
839
#else
840
            return(0);
841
#endif
842
0
        case XML_WITH_OUTPUT:
843
0
#ifdef LIBXML_OUTPUT_ENABLED
844
0
            return(1);
845
#else
846
            return(0);
847
#endif
848
0
        case XML_WITH_PUSH:
849
0
#ifdef LIBXML_PUSH_ENABLED
850
0
            return(1);
851
#else
852
            return(0);
853
#endif
854
0
        case XML_WITH_READER:
855
0
#ifdef LIBXML_READER_ENABLED
856
0
            return(1);
857
#else
858
            return(0);
859
#endif
860
0
        case XML_WITH_PATTERN:
861
0
#ifdef LIBXML_PATTERN_ENABLED
862
0
            return(1);
863
#else
864
            return(0);
865
#endif
866
0
        case XML_WITH_WRITER:
867
0
#ifdef LIBXML_WRITER_ENABLED
868
0
            return(1);
869
#else
870
            return(0);
871
#endif
872
0
        case XML_WITH_SAX1:
873
0
#ifdef LIBXML_SAX1_ENABLED
874
0
            return(1);
875
#else
876
            return(0);
877
#endif
878
0
        case XML_WITH_FTP:
879
#ifdef LIBXML_FTP_ENABLED
880
            return(1);
881
#else
882
0
            return(0);
883
0
#endif
884
0
        case XML_WITH_HTTP:
885
#ifdef LIBXML_HTTP_ENABLED
886
            return(1);
887
#else
888
0
            return(0);
889
0
#endif
890
0
        case XML_WITH_VALID:
891
0
#ifdef LIBXML_VALID_ENABLED
892
0
            return(1);
893
#else
894
            return(0);
895
#endif
896
0
        case XML_WITH_HTML:
897
0
#ifdef LIBXML_HTML_ENABLED
898
0
            return(1);
899
#else
900
            return(0);
901
#endif
902
0
        case XML_WITH_LEGACY:
903
#ifdef LIBXML_LEGACY_ENABLED
904
            return(1);
905
#else
906
0
            return(0);
907
0
#endif
908
0
        case XML_WITH_C14N:
909
0
#ifdef LIBXML_C14N_ENABLED
910
0
            return(1);
911
#else
912
            return(0);
913
#endif
914
0
        case XML_WITH_CATALOG:
915
0
#ifdef LIBXML_CATALOG_ENABLED
916
0
            return(1);
917
#else
918
            return(0);
919
#endif
920
0
        case XML_WITH_XPATH:
921
0
#ifdef LIBXML_XPATH_ENABLED
922
0
            return(1);
923
#else
924
            return(0);
925
#endif
926
0
        case XML_WITH_XPTR:
927
0
#ifdef LIBXML_XPTR_ENABLED
928
0
            return(1);
929
#else
930
            return(0);
931
#endif
932
0
        case XML_WITH_XINCLUDE:
933
0
#ifdef LIBXML_XINCLUDE_ENABLED
934
0
            return(1);
935
#else
936
            return(0);
937
#endif
938
0
        case XML_WITH_ICONV:
939
0
#ifdef LIBXML_ICONV_ENABLED
940
0
            return(1);
941
#else
942
            return(0);
943
#endif
944
0
        case XML_WITH_ISO8859X:
945
0
#ifdef LIBXML_ISO8859X_ENABLED
946
0
            return(1);
947
#else
948
            return(0);
949
#endif
950
0
        case XML_WITH_UNICODE:
951
0
#ifdef LIBXML_UNICODE_ENABLED
952
0
            return(1);
953
#else
954
            return(0);
955
#endif
956
0
        case XML_WITH_REGEXP:
957
0
#ifdef LIBXML_REGEXP_ENABLED
958
0
            return(1);
959
#else
960
            return(0);
961
#endif
962
0
        case XML_WITH_AUTOMATA:
963
0
#ifdef LIBXML_AUTOMATA_ENABLED
964
0
            return(1);
965
#else
966
            return(0);
967
#endif
968
0
        case XML_WITH_EXPR:
969
#ifdef LIBXML_EXPR_ENABLED
970
            return(1);
971
#else
972
0
            return(0);
973
0
#endif
974
0
        case XML_WITH_SCHEMAS:
975
0
#ifdef LIBXML_SCHEMAS_ENABLED
976
0
            return(1);
977
#else
978
            return(0);
979
#endif
980
0
        case XML_WITH_SCHEMATRON:
981
0
#ifdef LIBXML_SCHEMATRON_ENABLED
982
0
            return(1);
983
#else
984
            return(0);
985
#endif
986
0
        case XML_WITH_MODULES:
987
0
#ifdef LIBXML_MODULES_ENABLED
988
0
            return(1);
989
#else
990
            return(0);
991
#endif
992
0
        case XML_WITH_DEBUG:
993
#ifdef LIBXML_DEBUG_ENABLED
994
            return(1);
995
#else
996
0
            return(0);
997
0
#endif
998
0
        case XML_WITH_DEBUG_MEM:
999
#ifdef DEBUG_MEMORY_LOCATION
1000
            return(1);
1001
#else
1002
0
            return(0);
1003
0
#endif
1004
0
        case XML_WITH_DEBUG_RUN:
1005
0
            return(0);
1006
0
        case XML_WITH_ZLIB:
1007
0
#ifdef LIBXML_ZLIB_ENABLED
1008
0
            return(1);
1009
#else
1010
            return(0);
1011
#endif
1012
0
        case XML_WITH_LZMA:
1013
0
#ifdef LIBXML_LZMA_ENABLED
1014
0
            return(1);
1015
#else
1016
            return(0);
1017
#endif
1018
0
        case XML_WITH_ICU:
1019
#ifdef LIBXML_ICU_ENABLED
1020
            return(1);
1021
#else
1022
0
            return(0);
1023
0
#endif
1024
0
        default:
1025
0
      break;
1026
0
     }
1027
0
     return(0);
1028
0
}
1029
1030
/************************************************************************
1031
 *                  *
1032
 *    SAX2 defaulted attributes handling      *
1033
 *                  *
1034
 ************************************************************************/
1035
1036
/**
1037
 * xmlDetectSAX2:
1038
 * @ctxt:  an XML parser context
1039
 *
1040
 * Do the SAX2 detection and specific initialization
1041
 */
1042
static void
1043
235k
xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1044
235k
    xmlSAXHandlerPtr sax;
1045
1046
    /* Avoid unused variable warning if features are disabled. */
1047
235k
    (void) sax;
1048
1049
235k
    if (ctxt == NULL) return;
1050
235k
    sax = ctxt->sax;
1051
235k
#ifdef LIBXML_SAX1_ENABLED
1052
235k
    if ((sax) &&  (sax->initialized == XML_SAX2_MAGIC) &&
1053
235k
        ((sax->startElementNs != NULL) ||
1054
160k
         (sax->endElementNs != NULL) ||
1055
160k
         ((sax->startElement == NULL) && (sax->endElement == NULL))))
1056
160k
        ctxt->sax2 = 1;
1057
#else
1058
    ctxt->sax2 = 1;
1059
#endif /* LIBXML_SAX1_ENABLED */
1060
1061
235k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1062
235k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1063
235k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1064
235k
    if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1065
235k
    (ctxt->str_xml_ns == NULL)) {
1066
0
        xmlErrMemory(ctxt, NULL);
1067
0
    }
1068
235k
}
1069
1070
typedef struct _xmlDefAttrs xmlDefAttrs;
1071
typedef xmlDefAttrs *xmlDefAttrsPtr;
1072
struct _xmlDefAttrs {
1073
    int nbAttrs;  /* number of defaulted attributes on that element */
1074
    int maxAttrs;       /* the size of the array */
1075
#if __STDC_VERSION__ >= 199901L
1076
    /* Using a C99 flexible array member avoids UBSan errors. */
1077
    const xmlChar *values[]; /* array of localname/prefix/values/external */
1078
#else
1079
    const xmlChar *values[5];
1080
#endif
1081
};
1082
1083
/**
1084
 * xmlAttrNormalizeSpace:
1085
 * @src: the source string
1086
 * @dst: the target string
1087
 *
1088
 * Normalize the space in non CDATA attribute values:
1089
 * If the attribute type is not CDATA, then the XML processor MUST further
1090
 * process the normalized attribute value by discarding any leading and
1091
 * trailing space (#x20) characters, and by replacing sequences of space
1092
 * (#x20) characters by a single space (#x20) character.
1093
 * Note that the size of dst need to be at least src, and if one doesn't need
1094
 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1095
 * passing src as dst is just fine.
1096
 *
1097
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1098
 *         is needed.
1099
 */
1100
static xmlChar *
1101
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1102
4.58k
{
1103
4.58k
    if ((src == NULL) || (dst == NULL))
1104
0
        return(NULL);
1105
1106
5.21k
    while (*src == 0x20) src++;
1107
57.1k
    while (*src != 0) {
1108
52.5k
  if (*src == 0x20) {
1109
5.25k
      while (*src == 0x20) src++;
1110
1.85k
      if (*src != 0)
1111
1.30k
    *dst++ = 0x20;
1112
50.7k
  } else {
1113
50.7k
      *dst++ = *src++;
1114
50.7k
  }
1115
52.5k
    }
1116
4.58k
    *dst = 0;
1117
4.58k
    if (dst == src)
1118
3.76k
       return(NULL);
1119
824
    return(dst);
1120
4.58k
}
1121
1122
/**
1123
 * xmlAttrNormalizeSpace2:
1124
 * @src: the source string
1125
 *
1126
 * Normalize the space in non CDATA attribute values, a slightly more complex
1127
 * front end to avoid allocation problems when running on attribute values
1128
 * coming from the input.
1129
 *
1130
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1131
 *         is needed.
1132
 */
1133
static const xmlChar *
1134
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1135
10.5k
{
1136
10.5k
    int i;
1137
10.5k
    int remove_head = 0;
1138
10.5k
    int need_realloc = 0;
1139
10.5k
    const xmlChar *cur;
1140
1141
10.5k
    if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1142
0
        return(NULL);
1143
10.5k
    i = *len;
1144
10.5k
    if (i <= 0)
1145
284
        return(NULL);
1146
1147
10.3k
    cur = src;
1148
10.3k
    while (*cur == 0x20) {
1149
63
        cur++;
1150
63
  remove_head++;
1151
63
    }
1152
104k
    while (*cur != 0) {
1153
94.8k
  if (*cur == 0x20) {
1154
5.94k
      cur++;
1155
5.94k
      if ((*cur == 0x20) || (*cur == 0)) {
1156
437
          need_realloc = 1;
1157
437
    break;
1158
437
      }
1159
5.94k
  } else
1160
88.9k
      cur++;
1161
94.8k
    }
1162
10.3k
    if (need_realloc) {
1163
437
        xmlChar *ret;
1164
1165
437
  ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1166
437
  if (ret == NULL) {
1167
0
      xmlErrMemory(ctxt, NULL);
1168
0
      return(NULL);
1169
0
  }
1170
437
  xmlAttrNormalizeSpace(ret, ret);
1171
437
  *len = strlen((const char *)ret);
1172
437
        return(ret);
1173
9.86k
    } else if (remove_head) {
1174
3
        *len -= remove_head;
1175
3
        memmove(src, src + remove_head, 1 + *len);
1176
3
  return(src);
1177
3
    }
1178
9.86k
    return(NULL);
1179
10.3k
}
1180
1181
/**
1182
 * xmlAddDefAttrs:
1183
 * @ctxt:  an XML parser context
1184
 * @fullname:  the element fullname
1185
 * @fullattr:  the attribute fullname
1186
 * @value:  the attribute value
1187
 *
1188
 * Add a defaulted attribute for an element
1189
 */
1190
static void
1191
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1192
               const xmlChar *fullname,
1193
               const xmlChar *fullattr,
1194
7.96k
               const xmlChar *value) {
1195
7.96k
    xmlDefAttrsPtr defaults;
1196
7.96k
    int len;
1197
7.96k
    const xmlChar *name;
1198
7.96k
    const xmlChar *prefix;
1199
1200
    /*
1201
     * Allows to detect attribute redefinitions
1202
     */
1203
7.96k
    if (ctxt->attsSpecial != NULL) {
1204
5.76k
        if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1205
583
      return;
1206
5.76k
    }
1207
1208
7.37k
    if (ctxt->attsDefault == NULL) {
1209
2.40k
        ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1210
2.40k
  if (ctxt->attsDefault == NULL)
1211
0
      goto mem_error;
1212
2.40k
    }
1213
1214
    /*
1215
     * split the element name into prefix:localname , the string found
1216
     * are within the DTD and then not associated to namespace names.
1217
     */
1218
7.37k
    name = xmlSplitQName3(fullname, &len);
1219
7.37k
    if (name == NULL) {
1220
7.00k
        name = xmlDictLookup(ctxt->dict, fullname, -1);
1221
7.00k
  prefix = NULL;
1222
7.00k
    } else {
1223
374
        name = xmlDictLookup(ctxt->dict, name, -1);
1224
374
  prefix = xmlDictLookup(ctxt->dict, fullname, len);
1225
374
    }
1226
1227
    /*
1228
     * make sure there is some storage
1229
     */
1230
7.37k
    defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1231
7.37k
    if (defaults == NULL) {
1232
4.17k
        defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1233
4.17k
                     (4 * 5) * sizeof(const xmlChar *));
1234
4.17k
  if (defaults == NULL)
1235
0
      goto mem_error;
1236
4.17k
  defaults->nbAttrs = 0;
1237
4.17k
  defaults->maxAttrs = 4;
1238
4.17k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1239
4.17k
                          defaults, NULL) < 0) {
1240
0
      xmlFree(defaults);
1241
0
      goto mem_error;
1242
0
  }
1243
4.17k
    } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1244
18
        xmlDefAttrsPtr temp;
1245
1246
18
        temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1247
18
           (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1248
18
  if (temp == NULL)
1249
0
      goto mem_error;
1250
18
  defaults = temp;
1251
18
  defaults->maxAttrs *= 2;
1252
18
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1253
18
                          defaults, NULL) < 0) {
1254
0
      xmlFree(defaults);
1255
0
      goto mem_error;
1256
0
  }
1257
18
    }
1258
1259
    /*
1260
     * Split the element name into prefix:localname , the string found
1261
     * are within the DTD and hen not associated to namespace names.
1262
     */
1263
7.37k
    name = xmlSplitQName3(fullattr, &len);
1264
7.37k
    if (name == NULL) {
1265
6.26k
        name = xmlDictLookup(ctxt->dict, fullattr, -1);
1266
6.26k
  prefix = NULL;
1267
6.26k
    } else {
1268
1.11k
        name = xmlDictLookup(ctxt->dict, name, -1);
1269
1.11k
  prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1270
1.11k
    }
1271
1272
7.37k
    defaults->values[5 * defaults->nbAttrs] = name;
1273
7.37k
    defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1274
    /* intern the string and precompute the end */
1275
7.37k
    len = xmlStrlen(value);
1276
7.37k
    value = xmlDictLookup(ctxt->dict, value, len);
1277
7.37k
    if (value == NULL)
1278
0
        goto mem_error;
1279
7.37k
    defaults->values[5 * defaults->nbAttrs + 2] = value;
1280
7.37k
    defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1281
7.37k
    if (ctxt->external)
1282
4.06k
        defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1283
3.31k
    else
1284
3.31k
        defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1285
7.37k
    defaults->nbAttrs++;
1286
1287
7.37k
    return;
1288
1289
0
mem_error:
1290
0
    xmlErrMemory(ctxt, NULL);
1291
0
    return;
1292
7.37k
}
1293
1294
/**
1295
 * xmlAddSpecialAttr:
1296
 * @ctxt:  an XML parser context
1297
 * @fullname:  the element fullname
1298
 * @fullattr:  the attribute fullname
1299
 * @type:  the attribute type
1300
 *
1301
 * Register this attribute type
1302
 */
1303
static void
1304
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1305
      const xmlChar *fullname,
1306
      const xmlChar *fullattr,
1307
      int type)
1308
59.5k
{
1309
59.5k
    if (ctxt->attsSpecial == NULL) {
1310
7.25k
        ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1311
7.25k
  if (ctxt->attsSpecial == NULL)
1312
0
      goto mem_error;
1313
7.25k
    }
1314
1315
59.5k
    if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1316
750
        return;
1317
1318
58.7k
    xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1319
58.7k
                     (void *) (ptrdiff_t) type);
1320
58.7k
    return;
1321
1322
0
mem_error:
1323
0
    xmlErrMemory(ctxt, NULL);
1324
0
    return;
1325
59.5k
}
1326
1327
/**
1328
 * xmlCleanSpecialAttrCallback:
1329
 *
1330
 * Removes CDATA attributes from the special attribute table
1331
 */
1332
static void
1333
xmlCleanSpecialAttrCallback(void *payload, void *data,
1334
                            const xmlChar *fullname, const xmlChar *fullattr,
1335
53.3k
                            const xmlChar *unused ATTRIBUTE_UNUSED) {
1336
53.3k
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1337
1338
53.3k
    if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1339
16.6k
        xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1340
16.6k
    }
1341
53.3k
}
1342
1343
/**
1344
 * xmlCleanSpecialAttr:
1345
 * @ctxt:  an XML parser context
1346
 *
1347
 * Trim the list of attributes defined to remove all those of type
1348
 * CDATA as they are not special. This call should be done when finishing
1349
 * to parse the DTD and before starting to parse the document root.
1350
 */
1351
static void
1352
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1353
30.2k
{
1354
30.2k
    if (ctxt->attsSpecial == NULL)
1355
25.4k
        return;
1356
1357
4.87k
    xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1358
1359
4.87k
    if (xmlHashSize(ctxt->attsSpecial) == 0) {
1360
839
        xmlHashFree(ctxt->attsSpecial, NULL);
1361
839
        ctxt->attsSpecial = NULL;
1362
839
    }
1363
4.87k
    return;
1364
30.2k
}
1365
1366
/**
1367
 * xmlCheckLanguageID:
1368
 * @lang:  pointer to the string value
1369
 *
1370
 * Checks that the value conforms to the LanguageID production:
1371
 *
1372
 * NOTE: this is somewhat deprecated, those productions were removed from
1373
 *       the XML Second edition.
1374
 *
1375
 * [33] LanguageID ::= Langcode ('-' Subcode)*
1376
 * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1377
 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1378
 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1379
 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1380
 * [38] Subcode ::= ([a-z] | [A-Z])+
1381
 *
1382
 * The current REC reference the successors of RFC 1766, currently 5646
1383
 *
1384
 * http://www.rfc-editor.org/rfc/rfc5646.txt
1385
 * langtag       = language
1386
 *                 ["-" script]
1387
 *                 ["-" region]
1388
 *                 *("-" variant)
1389
 *                 *("-" extension)
1390
 *                 ["-" privateuse]
1391
 * language      = 2*3ALPHA            ; shortest ISO 639 code
1392
 *                 ["-" extlang]       ; sometimes followed by
1393
 *                                     ; extended language subtags
1394
 *               / 4ALPHA              ; or reserved for future use
1395
 *               / 5*8ALPHA            ; or registered language subtag
1396
 *
1397
 * extlang       = 3ALPHA              ; selected ISO 639 codes
1398
 *                 *2("-" 3ALPHA)      ; permanently reserved
1399
 *
1400
 * script        = 4ALPHA              ; ISO 15924 code
1401
 *
1402
 * region        = 2ALPHA              ; ISO 3166-1 code
1403
 *               / 3DIGIT              ; UN M.49 code
1404
 *
1405
 * variant       = 5*8alphanum         ; registered variants
1406
 *               / (DIGIT 3alphanum)
1407
 *
1408
 * extension     = singleton 1*("-" (2*8alphanum))
1409
 *
1410
 *                                     ; Single alphanumerics
1411
 *                                     ; "x" reserved for private use
1412
 * singleton     = DIGIT               ; 0 - 9
1413
 *               / %x41-57             ; A - W
1414
 *               / %x59-5A             ; Y - Z
1415
 *               / %x61-77             ; a - w
1416
 *               / %x79-7A             ; y - z
1417
 *
1418
 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1419
 * The parser below doesn't try to cope with extension or privateuse
1420
 * that could be added but that's not interoperable anyway
1421
 *
1422
 * Returns 1 if correct 0 otherwise
1423
 **/
1424
int
1425
xmlCheckLanguageID(const xmlChar * lang)
1426
388
{
1427
388
    const xmlChar *cur = lang, *nxt;
1428
1429
388
    if (cur == NULL)
1430
22
        return (0);
1431
366
    if (((cur[0] == 'i') && (cur[1] == '-')) ||
1432
366
        ((cur[0] == 'I') && (cur[1] == '-')) ||
1433
366
        ((cur[0] == 'x') && (cur[1] == '-')) ||
1434
366
        ((cur[0] == 'X') && (cur[1] == '-'))) {
1435
        /*
1436
         * Still allow IANA code and user code which were coming
1437
         * from the previous version of the XML-1.0 specification
1438
         * it's deprecated but we should not fail
1439
         */
1440
0
        cur += 2;
1441
0
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1442
0
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
1443
0
            cur++;
1444
0
        return(cur[0] == 0);
1445
0
    }
1446
366
    nxt = cur;
1447
2.29k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1448
2.29k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1449
1.93k
           nxt++;
1450
366
    if (nxt - cur >= 4) {
1451
        /*
1452
         * Reserved
1453
         */
1454
86
        if ((nxt - cur > 8) || (nxt[0] != 0))
1455
75
            return(0);
1456
11
        return(1);
1457
86
    }
1458
280
    if (nxt - cur < 2)
1459
41
        return(0);
1460
    /* we got an ISO 639 code */
1461
239
    if (nxt[0] == 0)
1462
45
        return(1);
1463
194
    if (nxt[0] != '-')
1464
98
        return(0);
1465
1466
96
    nxt++;
1467
96
    cur = nxt;
1468
    /* now we can have extlang or script or region or variant */
1469
96
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1470
59
        goto region_m49;
1471
1472
99
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1473
99
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1474
62
           nxt++;
1475
37
    if (nxt - cur == 4)
1476
2
        goto script;
1477
35
    if (nxt - cur == 2)
1478
12
        goto region;
1479
23
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1480
2
        goto variant;
1481
21
    if (nxt - cur != 3)
1482
16
        return(0);
1483
    /* we parsed an extlang */
1484
5
    if (nxt[0] == 0)
1485
0
        return(1);
1486
5
    if (nxt[0] != '-')
1487
3
        return(0);
1488
1489
2
    nxt++;
1490
2
    cur = nxt;
1491
    /* now we can have script or region or variant */
1492
2
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1493
0
        goto region_m49;
1494
1495
6
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1496
6
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1497
4
           nxt++;
1498
2
    if (nxt - cur == 2)
1499
2
        goto region;
1500
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1501
0
        goto variant;
1502
0
    if (nxt - cur != 4)
1503
0
        return(0);
1504
    /* we parsed a script */
1505
2
script:
1506
2
    if (nxt[0] == 0)
1507
0
        return(1);
1508
2
    if (nxt[0] != '-')
1509
2
        return(0);
1510
1511
0
    nxt++;
1512
0
    cur = nxt;
1513
    /* now we can have region or variant */
1514
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1515
0
        goto region_m49;
1516
1517
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1518
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1519
0
           nxt++;
1520
1521
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1522
0
        goto variant;
1523
0
    if (nxt - cur != 2)
1524
0
        return(0);
1525
    /* we parsed a region */
1526
17
region:
1527
17
    if (nxt[0] == 0)
1528
10
        return(1);
1529
7
    if (nxt[0] != '-')
1530
7
        return(0);
1531
1532
0
    nxt++;
1533
0
    cur = nxt;
1534
    /* now we can just have a variant */
1535
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1536
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1537
0
           nxt++;
1538
1539
0
    if ((nxt - cur < 5) || (nxt - cur > 8))
1540
0
        return(0);
1541
1542
    /* we parsed a variant */
1543
2
variant:
1544
2
    if (nxt[0] == 0)
1545
0
        return(1);
1546
2
    if (nxt[0] != '-')
1547
2
        return(0);
1548
    /* extensions and private use subtags not checked */
1549
0
    return (1);
1550
1551
59
region_m49:
1552
59
    if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1553
59
        ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1554
3
        nxt += 3;
1555
3
        goto region;
1556
3
    }
1557
56
    return(0);
1558
59
}
1559
1560
/************************************************************************
1561
 *                  *
1562
 *    Parser stacks related functions and macros    *
1563
 *                  *
1564
 ************************************************************************/
1565
1566
static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1567
                                            const xmlChar ** str);
1568
1569
#ifdef SAX2
1570
/**
1571
 * nsPush:
1572
 * @ctxt:  an XML parser context
1573
 * @prefix:  the namespace prefix or NULL
1574
 * @URL:  the namespace name
1575
 *
1576
 * Pushes a new parser namespace on top of the ns stack
1577
 *
1578
 * Returns -1 in case of error, -2 if the namespace should be discarded
1579
 *     and the index in the stack otherwise.
1580
 */
1581
static int
1582
nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1583
37.7k
{
1584
37.7k
    if (ctxt->options & XML_PARSE_NSCLEAN) {
1585
12.7k
        int i;
1586
16.9k
  for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1587
9.22k
      if (ctxt->nsTab[i] == prefix) {
1588
    /* in scope */
1589
5.00k
          if (ctxt->nsTab[i + 1] == URL)
1590
2.83k
        return(-2);
1591
    /* out of scope keep it */
1592
2.17k
    break;
1593
5.00k
      }
1594
9.22k
  }
1595
12.7k
    }
1596
34.9k
    if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1597
17.1k
  ctxt->nsMax = 10;
1598
17.1k
  ctxt->nsNr = 0;
1599
17.1k
  ctxt->nsTab = (const xmlChar **)
1600
17.1k
                xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1601
17.1k
  if (ctxt->nsTab == NULL) {
1602
0
      xmlErrMemory(ctxt, NULL);
1603
0
      ctxt->nsMax = 0;
1604
0
            return (-1);
1605
0
  }
1606
17.8k
    } else if (ctxt->nsNr >= ctxt->nsMax) {
1607
758
        const xmlChar ** tmp;
1608
758
        ctxt->nsMax *= 2;
1609
758
        tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1610
758
            ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1611
758
        if (tmp == NULL) {
1612
0
            xmlErrMemory(ctxt, NULL);
1613
0
      ctxt->nsMax /= 2;
1614
0
            return (-1);
1615
0
        }
1616
758
  ctxt->nsTab = tmp;
1617
758
    }
1618
34.9k
    ctxt->nsTab[ctxt->nsNr++] = prefix;
1619
34.9k
    ctxt->nsTab[ctxt->nsNr++] = URL;
1620
34.9k
    return (ctxt->nsNr);
1621
34.9k
}
1622
/**
1623
 * nsPop:
1624
 * @ctxt: an XML parser context
1625
 * @nr:  the number to pop
1626
 *
1627
 * Pops the top @nr parser prefix/namespace from the ns stack
1628
 *
1629
 * Returns the number of namespaces removed
1630
 */
1631
static int
1632
nsPop(xmlParserCtxtPtr ctxt, int nr)
1633
10.3k
{
1634
10.3k
    int i;
1635
1636
10.3k
    if (ctxt->nsTab == NULL) return(0);
1637
10.3k
    if (ctxt->nsNr < nr) {
1638
0
        xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1639
0
        nr = ctxt->nsNr;
1640
0
    }
1641
10.3k
    if (ctxt->nsNr <= 0)
1642
0
        return (0);
1643
1644
34.2k
    for (i = 0;i < nr;i++) {
1645
23.9k
         ctxt->nsNr--;
1646
23.9k
   ctxt->nsTab[ctxt->nsNr] = NULL;
1647
23.9k
    }
1648
10.3k
    return(nr);
1649
10.3k
}
1650
#endif
1651
1652
static int
1653
23.4k
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1654
23.4k
    const xmlChar **atts;
1655
23.4k
    int *attallocs;
1656
23.4k
    int maxatts;
1657
1658
23.4k
    if (nr + 5 > ctxt->maxatts) {
1659
23.4k
  maxatts = ctxt->maxatts == 0 ? 55 : (nr + 5) * 2;
1660
23.4k
  atts = (const xmlChar **) xmlMalloc(
1661
23.4k
             maxatts * sizeof(const xmlChar *));
1662
23.4k
  if (atts == NULL) goto mem_error;
1663
23.4k
  attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1664
23.4k
                               (maxatts / 5) * sizeof(int));
1665
23.4k
  if (attallocs == NULL) {
1666
0
            xmlFree(atts);
1667
0
            goto mem_error;
1668
0
        }
1669
23.4k
        if (ctxt->maxatts > 0)
1670
39
            memcpy(atts, ctxt->atts, ctxt->maxatts * sizeof(const xmlChar *));
1671
23.4k
        xmlFree(ctxt->atts);
1672
23.4k
  ctxt->atts = atts;
1673
23.4k
  ctxt->attallocs = attallocs;
1674
23.4k
  ctxt->maxatts = maxatts;
1675
23.4k
    }
1676
23.4k
    return(ctxt->maxatts);
1677
0
mem_error:
1678
0
    xmlErrMemory(ctxt, NULL);
1679
0
    return(-1);
1680
23.4k
}
1681
1682
/**
1683
 * inputPush:
1684
 * @ctxt:  an XML parser context
1685
 * @value:  the parser input
1686
 *
1687
 * Pushes a new parser input on top of the input stack
1688
 *
1689
 * Returns -1 in case of error, the index in the stack otherwise
1690
 */
1691
int
1692
inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1693
2.57M
{
1694
2.57M
    if ((ctxt == NULL) || (value == NULL))
1695
0
        return(-1);
1696
2.57M
    if (ctxt->inputNr >= ctxt->inputMax) {
1697
45
        size_t newSize = ctxt->inputMax * 2;
1698
45
        xmlParserInputPtr *tmp;
1699
1700
45
        tmp = (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1701
45
                                               newSize * sizeof(*tmp));
1702
45
        if (tmp == NULL) {
1703
0
            xmlErrMemory(ctxt, NULL);
1704
0
            return (-1);
1705
0
        }
1706
45
        ctxt->inputTab = tmp;
1707
45
        ctxt->inputMax = newSize;
1708
45
    }
1709
2.57M
    ctxt->inputTab[ctxt->inputNr] = value;
1710
2.57M
    ctxt->input = value;
1711
2.57M
    return (ctxt->inputNr++);
1712
2.57M
}
1713
/**
1714
 * inputPop:
1715
 * @ctxt: an XML parser context
1716
 *
1717
 * Pops the top parser input from the input stack
1718
 *
1719
 * Returns the input just removed
1720
 */
1721
xmlParserInputPtr
1722
inputPop(xmlParserCtxtPtr ctxt)
1723
2.89M
{
1724
2.89M
    xmlParserInputPtr ret;
1725
1726
2.89M
    if (ctxt == NULL)
1727
0
        return(NULL);
1728
2.89M
    if (ctxt->inputNr <= 0)
1729
322k
        return (NULL);
1730
2.56M
    ctxt->inputNr--;
1731
2.56M
    if (ctxt->inputNr > 0)
1732
2.40M
        ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1733
160k
    else
1734
160k
        ctxt->input = NULL;
1735
2.56M
    ret = ctxt->inputTab[ctxt->inputNr];
1736
2.56M
    ctxt->inputTab[ctxt->inputNr] = NULL;
1737
2.56M
    return (ret);
1738
2.89M
}
1739
/**
1740
 * nodePush:
1741
 * @ctxt:  an XML parser context
1742
 * @value:  the element node
1743
 *
1744
 * Pushes a new element node on top of the node stack
1745
 *
1746
 * Returns -1 in case of error, the index in the stack otherwise
1747
 */
1748
int
1749
nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1750
1.02M
{
1751
1.02M
    if (ctxt == NULL) return(0);
1752
1.02M
    if (ctxt->nodeNr >= ctxt->nodeMax) {
1753
3.64k
        xmlNodePtr *tmp;
1754
1755
3.64k
  tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1756
3.64k
                                      ctxt->nodeMax * 2 *
1757
3.64k
                                      sizeof(ctxt->nodeTab[0]));
1758
3.64k
        if (tmp == NULL) {
1759
0
            xmlErrMemory(ctxt, NULL);
1760
0
            return (-1);
1761
0
        }
1762
3.64k
        ctxt->nodeTab = tmp;
1763
3.64k
  ctxt->nodeMax *= 2;
1764
3.64k
    }
1765
1.02M
    if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1766
1.02M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1767
0
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1768
0
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1769
0
        xmlParserMaxDepth);
1770
0
  xmlHaltParser(ctxt);
1771
0
  return(-1);
1772
0
    }
1773
1.02M
    ctxt->nodeTab[ctxt->nodeNr] = value;
1774
1.02M
    ctxt->node = value;
1775
1.02M
    return (ctxt->nodeNr++);
1776
1.02M
}
1777
1778
/**
1779
 * nodePop:
1780
 * @ctxt: an XML parser context
1781
 *
1782
 * Pops the top element node from the node stack
1783
 *
1784
 * Returns the node just removed
1785
 */
1786
xmlNodePtr
1787
nodePop(xmlParserCtxtPtr ctxt)
1788
935k
{
1789
935k
    xmlNodePtr ret;
1790
1791
935k
    if (ctxt == NULL) return(NULL);
1792
935k
    if (ctxt->nodeNr <= 0)
1793
42.3k
        return (NULL);
1794
893k
    ctxt->nodeNr--;
1795
893k
    if (ctxt->nodeNr > 0)
1796
777k
        ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1797
115k
    else
1798
115k
        ctxt->node = NULL;
1799
893k
    ret = ctxt->nodeTab[ctxt->nodeNr];
1800
893k
    ctxt->nodeTab[ctxt->nodeNr] = NULL;
1801
893k
    return (ret);
1802
935k
}
1803
1804
/**
1805
 * nameNsPush:
1806
 * @ctxt:  an XML parser context
1807
 * @value:  the element name
1808
 * @prefix:  the element prefix
1809
 * @URI:  the element namespace name
1810
 * @line:  the current line number for error messages
1811
 * @nsNr:  the number of namespaces pushed on the namespace table
1812
 *
1813
 * Pushes a new element name/prefix/URL on top of the name stack
1814
 *
1815
 * Returns -1 in case of error, the index in the stack otherwise
1816
 */
1817
static int
1818
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1819
           const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
1820
976k
{
1821
976k
    xmlStartTag *tag;
1822
1823
976k
    if (ctxt->nameNr >= ctxt->nameMax) {
1824
9.66k
        const xmlChar * *tmp;
1825
9.66k
        xmlStartTag *tmp2;
1826
9.66k
        ctxt->nameMax *= 2;
1827
9.66k
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1828
9.66k
                                    ctxt->nameMax *
1829
9.66k
                                    sizeof(ctxt->nameTab[0]));
1830
9.66k
        if (tmp == NULL) {
1831
0
      ctxt->nameMax /= 2;
1832
0
      goto mem_error;
1833
0
        }
1834
9.66k
  ctxt->nameTab = tmp;
1835
9.66k
        tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
1836
9.66k
                                    ctxt->nameMax *
1837
9.66k
                                    sizeof(ctxt->pushTab[0]));
1838
9.66k
        if (tmp2 == NULL) {
1839
0
      ctxt->nameMax /= 2;
1840
0
      goto mem_error;
1841
0
        }
1842
9.66k
  ctxt->pushTab = tmp2;
1843
967k
    } else if (ctxt->pushTab == NULL) {
1844
86.5k
        ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
1845
86.5k
                                            sizeof(ctxt->pushTab[0]));
1846
86.5k
        if (ctxt->pushTab == NULL)
1847
0
            goto mem_error;
1848
86.5k
    }
1849
976k
    ctxt->nameTab[ctxt->nameNr] = value;
1850
976k
    ctxt->name = value;
1851
976k
    tag = &ctxt->pushTab[ctxt->nameNr];
1852
976k
    tag->prefix = prefix;
1853
976k
    tag->URI = URI;
1854
976k
    tag->line = line;
1855
976k
    tag->nsNr = nsNr;
1856
976k
    return (ctxt->nameNr++);
1857
0
mem_error:
1858
0
    xmlErrMemory(ctxt, NULL);
1859
0
    return (-1);
1860
976k
}
1861
#ifdef LIBXML_PUSH_ENABLED
1862
/**
1863
 * nameNsPop:
1864
 * @ctxt: an XML parser context
1865
 *
1866
 * Pops the top element/prefix/URI name from the name stack
1867
 *
1868
 * Returns the name just removed
1869
 */
1870
static const xmlChar *
1871
nameNsPop(xmlParserCtxtPtr ctxt)
1872
207k
{
1873
207k
    const xmlChar *ret;
1874
1875
207k
    if (ctxt->nameNr <= 0)
1876
0
        return (NULL);
1877
207k
    ctxt->nameNr--;
1878
207k
    if (ctxt->nameNr > 0)
1879
203k
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1880
4.52k
    else
1881
4.52k
        ctxt->name = NULL;
1882
207k
    ret = ctxt->nameTab[ctxt->nameNr];
1883
207k
    ctxt->nameTab[ctxt->nameNr] = NULL;
1884
207k
    return (ret);
1885
207k
}
1886
#endif /* LIBXML_PUSH_ENABLED */
1887
1888
/**
1889
 * namePush:
1890
 * @ctxt:  an XML parser context
1891
 * @value:  the element name
1892
 *
1893
 * Pushes a new element name on top of the name stack
1894
 *
1895
 * Returns -1 in case of error, the index in the stack otherwise
1896
 */
1897
int
1898
namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1899
0
{
1900
0
    if (ctxt == NULL) return (-1);
1901
1902
0
    if (ctxt->nameNr >= ctxt->nameMax) {
1903
0
        const xmlChar * *tmp;
1904
0
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1905
0
                                    ctxt->nameMax * 2 *
1906
0
                                    sizeof(ctxt->nameTab[0]));
1907
0
        if (tmp == NULL) {
1908
0
      goto mem_error;
1909
0
        }
1910
0
  ctxt->nameTab = tmp;
1911
0
        ctxt->nameMax *= 2;
1912
0
    }
1913
0
    ctxt->nameTab[ctxt->nameNr] = value;
1914
0
    ctxt->name = value;
1915
0
    return (ctxt->nameNr++);
1916
0
mem_error:
1917
0
    xmlErrMemory(ctxt, NULL);
1918
0
    return (-1);
1919
0
}
1920
/**
1921
 * namePop:
1922
 * @ctxt: an XML parser context
1923
 *
1924
 * Pops the top element name from the name stack
1925
 *
1926
 * Returns the name just removed
1927
 */
1928
const xmlChar *
1929
namePop(xmlParserCtxtPtr ctxt)
1930
468k
{
1931
468k
    const xmlChar *ret;
1932
1933
468k
    if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1934
0
        return (NULL);
1935
468k
    ctxt->nameNr--;
1936
468k
    if (ctxt->nameNr > 0)
1937
446k
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1938
21.7k
    else
1939
21.7k
        ctxt->name = NULL;
1940
468k
    ret = ctxt->nameTab[ctxt->nameNr];
1941
468k
    ctxt->nameTab[ctxt->nameNr] = NULL;
1942
468k
    return (ret);
1943
468k
}
1944
1945
1.21M
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1946
1.21M
    if (ctxt->spaceNr >= ctxt->spaceMax) {
1947
5.03k
        int *tmp;
1948
1949
5.03k
  ctxt->spaceMax *= 2;
1950
5.03k
        tmp = (int *) xmlRealloc(ctxt->spaceTab,
1951
5.03k
                           ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1952
5.03k
        if (tmp == NULL) {
1953
0
      xmlErrMemory(ctxt, NULL);
1954
0
      ctxt->spaceMax /=2;
1955
0
      return(-1);
1956
0
  }
1957
5.03k
  ctxt->spaceTab = tmp;
1958
5.03k
    }
1959
1.21M
    ctxt->spaceTab[ctxt->spaceNr] = val;
1960
1.21M
    ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1961
1.21M
    return(ctxt->spaceNr++);
1962
1.21M
}
1963
1964
1.08M
static int spacePop(xmlParserCtxtPtr ctxt) {
1965
1.08M
    int ret;
1966
1.08M
    if (ctxt->spaceNr <= 0) return(0);
1967
1.07M
    ctxt->spaceNr--;
1968
1.07M
    if (ctxt->spaceNr > 0)
1969
1.03M
  ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1970
33.2k
    else
1971
33.2k
        ctxt->space = &ctxt->spaceTab[0];
1972
1.07M
    ret = ctxt->spaceTab[ctxt->spaceNr];
1973
1.07M
    ctxt->spaceTab[ctxt->spaceNr] = -1;
1974
1.07M
    return(ret);
1975
1.08M
}
1976
1977
/*
1978
 * Macros for accessing the content. Those should be used only by the parser,
1979
 * and not exported.
1980
 *
1981
 * Dirty macros, i.e. one often need to make assumption on the context to
1982
 * use them
1983
 *
1984
 *   CUR_PTR return the current pointer to the xmlChar to be parsed.
1985
 *           To be used with extreme caution since operations consuming
1986
 *           characters may move the input buffer to a different location !
1987
 *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
1988
 *           This should be used internally by the parser
1989
 *           only to compare to ASCII values otherwise it would break when
1990
 *           running with UTF-8 encoding.
1991
 *   RAW     same as CUR but in the input buffer, bypass any token
1992
 *           extraction that may have been done
1993
 *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
1994
 *           to compare on ASCII based substring.
1995
 *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1996
 *           strings without newlines within the parser.
1997
 *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1998
 *           defined char within the parser.
1999
 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2000
 *
2001
 *   NEXT    Skip to the next character, this does the proper decoding
2002
 *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2003
 *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2004
 *   CUR_CHAR(l) returns the current unicode character (int), set l
2005
 *           to the number of xmlChars used for the encoding [0-5].
2006
 *   CUR_SCHAR  same but operate on a string instead of the context
2007
 *   COPY_BUF  copy the current unicode char to the target buffer, increment
2008
 *            the index
2009
 *   GROW, SHRINK  handling of input buffers
2010
 */
2011
2012
31.8M
#define RAW (*ctxt->input->cur)
2013
24.3M
#define CUR (*ctxt->input->cur)
2014
23.1M
#define NXT(val) ctxt->input->cur[(val)]
2015
1.80M
#define CUR_PTR ctxt->input->cur
2016
301k
#define BASE_PTR ctxt->input->base
2017
2018
#define CMP4( s, c1, c2, c3, c4 ) \
2019
5.81M
  ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2020
2.95M
    ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2021
#define CMP5( s, c1, c2, c3, c4, c5 ) \
2022
5.47M
  ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2023
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2024
4.96M
  ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2025
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2026
4.41M
  ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2027
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2028
3.96M
  ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2029
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2030
1.94M
  ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2031
1.94M
    ((unsigned char *) s)[ 8 ] == c9 )
2032
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2033
49.7k
  ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2034
49.7k
    ((unsigned char *) s)[ 9 ] == c10 )
2035
2036
9.13M
#define SKIP(val) do {             \
2037
9.13M
    ctxt->input->cur += (val),ctxt->input->col+=(val);      \
2038
9.13M
    if (*ctxt->input->cur == 0)           \
2039
9.13M
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);     \
2040
9.13M
  } while (0)
2041
2042
8.85k
#define SKIPL(val) do {             \
2043
8.85k
    int skipl;                \
2044
993k
    for(skipl=0; skipl<val; skipl++) {         \
2045
985k
  if (*(ctxt->input->cur) == '\n') {       \
2046
23.2k
  ctxt->input->line++; ctxt->input->col = 1;      \
2047
961k
  } else ctxt->input->col++;         \
2048
985k
  ctxt->input->cur++;           \
2049
985k
    }                  \
2050
8.85k
    if (*ctxt->input->cur == 0)           \
2051
8.85k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);     \
2052
8.85k
  } while (0)
2053
2054
12.5M
#define SHRINK if ((ctxt->progressive == 0) &&       \
2055
12.5M
       (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2056
12.5M
       (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2057
12.5M
  xmlSHRINK (ctxt);
2058
2059
330k
static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2060
    /* Don't shrink memory buffers. */
2061
330k
    if ((ctxt->input->buf) &&
2062
330k
        ((ctxt->input->buf->encoder) || (ctxt->input->buf->readcallback)))
2063
777
        xmlParserInputShrink(ctxt->input);
2064
330k
    if (*ctxt->input->cur == 0)
2065
10.9k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2066
330k
}
2067
2068
37.2M
#define GROW if ((ctxt->progressive == 0) &&       \
2069
37.2M
     (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2070
37.2M
  xmlGROW (ctxt);
2071
2072
6.13M
static void xmlGROW (xmlParserCtxtPtr ctxt) {
2073
6.13M
    ptrdiff_t curEnd = ctxt->input->end - ctxt->input->cur;
2074
6.13M
    ptrdiff_t curBase = ctxt->input->cur - ctxt->input->base;
2075
2076
6.13M
    if (((curEnd > XML_MAX_LOOKUP_LIMIT) ||
2077
6.13M
         (curBase > XML_MAX_LOOKUP_LIMIT)) &&
2078
6.13M
         ((ctxt->input->buf) &&
2079
0
          (ctxt->input->buf->readcallback != NULL)) &&
2080
6.13M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2081
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2082
0
        xmlHaltParser(ctxt);
2083
0
  return;
2084
0
    }
2085
6.13M
    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2086
6.13M
    if ((ctxt->input->cur > ctxt->input->end) ||
2087
6.13M
        (ctxt->input->cur < ctxt->input->base)) {
2088
0
        xmlHaltParser(ctxt);
2089
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2090
0
  return;
2091
0
    }
2092
6.13M
    if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0))
2093
86.1k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2094
6.13M
}
2095
2096
8.55M
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2097
2098
14.8M
#define NEXT xmlNextChar(ctxt)
2099
2100
1.94M
#define NEXT1 {               \
2101
1.94M
  ctxt->input->col++;           \
2102
1.94M
  ctxt->input->cur++;           \
2103
1.94M
  if (*ctxt->input->cur == 0)         \
2104
1.94M
      xmlParserInputGrow(ctxt->input, INPUT_CHUNK);   \
2105
1.94M
    }
2106
2107
24.7M
#define NEXTL(l) do {             \
2108
24.7M
    if (*(ctxt->input->cur) == '\n') {         \
2109
370k
  ctxt->input->line++; ctxt->input->col = 1;      \
2110
24.4M
    } else ctxt->input->col++;           \
2111
24.7M
    ctxt->input->cur += l;        \
2112
24.7M
  } while (0)
2113
2114
26.7M
#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2115
688M
#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2116
2117
#define COPY_BUF(l,b,i,v)           \
2118
697M
    if (l == 1) b[i++] = v;           \
2119
697M
    else i += xmlCopyCharMultiByte(&b[i],v)
2120
2121
/**
2122
 * xmlSkipBlankChars:
2123
 * @ctxt:  the XML parser context
2124
 *
2125
 * skip all blanks character found at that point in the input streams.
2126
 * It pops up finished entities in the process if allowable at that point.
2127
 *
2128
 * Returns the number of space chars skipped
2129
 */
2130
2131
int
2132
8.55M
xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2133
8.55M
    int res = 0;
2134
2135
    /*
2136
     * It's Okay to use CUR/NEXT here since all the blanks are on
2137
     * the ASCII range.
2138
     */
2139
8.55M
    if (((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) ||
2140
8.55M
        (ctxt->instate == XML_PARSER_START)) {
2141
4.80M
  const xmlChar *cur;
2142
  /*
2143
   * if we are in the document content, go really fast
2144
   */
2145
4.80M
  cur = ctxt->input->cur;
2146
4.80M
  while (IS_BLANK_CH(*cur)) {
2147
2.59M
      if (*cur == '\n') {
2148
182k
    ctxt->input->line++; ctxt->input->col = 1;
2149
2.40M
      } else {
2150
2.40M
    ctxt->input->col++;
2151
2.40M
      }
2152
2.59M
      cur++;
2153
2.59M
      if (res < INT_MAX)
2154
2.59M
    res++;
2155
2.59M
      if (*cur == 0) {
2156
9.09k
    ctxt->input->cur = cur;
2157
9.09k
    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2158
9.09k
    cur = ctxt->input->cur;
2159
9.09k
      }
2160
2.59M
  }
2161
4.80M
  ctxt->input->cur = cur;
2162
4.80M
    } else {
2163
3.74M
        int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2164
2165
11.9M
  while (ctxt->instate != XML_PARSER_EOF) {
2166
11.9M
            if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2167
3.37M
    NEXT;
2168
8.56M
      } else if (CUR == '%') {
2169
                /*
2170
                 * Need to handle support of entities branching here
2171
                 */
2172
2.43M
          if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2173
28.6k
                    break;
2174
2.40M
          xmlParsePEReference(ctxt);
2175
6.12M
            } else if (CUR == 0) {
2176
2.41M
                unsigned long consumed;
2177
2.41M
                xmlEntityPtr ent;
2178
2179
2.41M
                if (ctxt->inputNr <= 1)
2180
5.86k
                    break;
2181
2182
2.40M
                consumed = ctxt->input->consumed;
2183
2.40M
                xmlSaturatedAddSizeT(&consumed,
2184
2.40M
                                     ctxt->input->cur - ctxt->input->base);
2185
2186
                /*
2187
                 * Add to sizeentities when parsing an external entity
2188
                 * for the first time.
2189
                 */
2190
2.40M
                ent = ctxt->input->entity;
2191
2.40M
                if ((ent->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2192
2.40M
                    ((ent->flags & XML_ENT_PARSED) == 0)) {
2193
61
                    ent->flags |= XML_ENT_PARSED;
2194
2195
61
                    xmlSaturatedAdd(&ctxt->sizeentities, consumed);
2196
61
                }
2197
2198
2.40M
                xmlParserEntityCheck(ctxt, consumed);
2199
2200
2.40M
                xmlPopInput(ctxt);
2201
3.71M
            } else {
2202
3.71M
                break;
2203
3.71M
            }
2204
2205
            /*
2206
             * Also increase the counter when entering or exiting a PERef.
2207
             * The spec says: "When a parameter-entity reference is recognized
2208
             * in the DTD and included, its replacement text MUST be enlarged
2209
             * by the attachment of one leading and one following space (#x20)
2210
             * character."
2211
             */
2212
8.19M
      if (res < INT_MAX)
2213
8.19M
    res++;
2214
8.19M
        }
2215
3.74M
    }
2216
8.55M
    return(res);
2217
8.55M
}
2218
2219
/************************************************************************
2220
 *                  *
2221
 *    Commodity functions to handle entities      *
2222
 *                  *
2223
 ************************************************************************/
2224
2225
/**
2226
 * xmlPopInput:
2227
 * @ctxt:  an XML parser context
2228
 *
2229
 * xmlPopInput: the current input pointed by ctxt->input came to an end
2230
 *          pop it and return the next char.
2231
 *
2232
 * Returns the current xmlChar in the parser context
2233
 */
2234
xmlChar
2235
2.40M
xmlPopInput(xmlParserCtxtPtr ctxt) {
2236
2.40M
    xmlParserInputPtr input;
2237
2238
2.40M
    if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2239
2.40M
    if (xmlParserDebugEntities)
2240
0
  xmlGenericError(xmlGenericErrorContext,
2241
0
    "Popping input %d\n", ctxt->inputNr);
2242
2.40M
    if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2243
2.40M
        (ctxt->instate != XML_PARSER_EOF))
2244
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2245
0
                    "Unfinished entity outside the DTD");
2246
2.40M
    input = inputPop(ctxt);
2247
2.40M
    if (input->entity != NULL)
2248
2.40M
        input->entity->flags &= ~XML_ENT_EXPANDING;
2249
2.40M
    xmlFreeInputStream(input);
2250
2.40M
    if (*ctxt->input->cur == 0)
2251
1.19M
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2252
2.40M
    return(CUR);
2253
2.40M
}
2254
2255
/**
2256
 * xmlPushInput:
2257
 * @ctxt:  an XML parser context
2258
 * @input:  an XML parser input fragment (entity, XML fragment ...).
2259
 *
2260
 * xmlPushInput: switch to a new input stream which is stacked on top
2261
 *               of the previous one(s).
2262
 * Returns -1 in case of error or the index in the input stack
2263
 */
2264
int
2265
2.41M
xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2266
2.41M
    int ret;
2267
2.41M
    if (input == NULL) return(-1);
2268
2269
2.41M
    if (xmlParserDebugEntities) {
2270
0
  if ((ctxt->input != NULL) && (ctxt->input->filename))
2271
0
      xmlGenericError(xmlGenericErrorContext,
2272
0
        "%s(%d): ", ctxt->input->filename,
2273
0
        ctxt->input->line);
2274
0
  xmlGenericError(xmlGenericErrorContext,
2275
0
    "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2276
0
    }
2277
2.41M
    if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2278
2.41M
        (ctxt->inputNr > 100)) {
2279
0
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2280
0
        while (ctxt->inputNr > 1)
2281
0
            xmlFreeInputStream(inputPop(ctxt));
2282
0
  return(-1);
2283
0
    }
2284
2.41M
    ret = inputPush(ctxt, input);
2285
2.41M
    if (ctxt->instate == XML_PARSER_EOF)
2286
0
        return(-1);
2287
2.41M
    GROW;
2288
2.41M
    return(ret);
2289
2.41M
}
2290
2291
/**
2292
 * xmlParseCharRef:
2293
 * @ctxt:  an XML parser context
2294
 *
2295
 * DEPRECATED: Internal function, don't use.
2296
 *
2297
 * Parse a numeric character reference. Always consumes '&'.
2298
 *
2299
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2300
 *                  '&#x' [0-9a-fA-F]+ ';'
2301
 *
2302
 * [ WFC: Legal Character ]
2303
 * Characters referred to using character references must match the
2304
 * production for Char.
2305
 *
2306
 * Returns the value parsed (as an int), 0 in case of error
2307
 */
2308
int
2309
46.7k
xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2310
46.7k
    int val = 0;
2311
46.7k
    int count = 0;
2312
2313
    /*
2314
     * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2315
     */
2316
46.7k
    if ((RAW == '&') && (NXT(1) == '#') &&
2317
46.7k
        (NXT(2) == 'x')) {
2318
13.3k
  SKIP(3);
2319
13.3k
  GROW;
2320
52.1k
  while (RAW != ';') { /* loop blocked by count */
2321
42.8k
      if (count++ > 20) {
2322
1.61k
    count = 0;
2323
1.61k
    GROW;
2324
1.61k
                if (ctxt->instate == XML_PARSER_EOF)
2325
0
                    return(0);
2326
1.61k
      }
2327
42.8k
      if ((RAW >= '0') && (RAW <= '9'))
2328
29.8k
          val = val * 16 + (CUR - '0');
2329
12.9k
      else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2330
8.08k
          val = val * 16 + (CUR - 'a') + 10;
2331
4.87k
      else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2332
858
          val = val * 16 + (CUR - 'A') + 10;
2333
4.02k
      else {
2334
4.02k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2335
4.02k
    val = 0;
2336
4.02k
    break;
2337
4.02k
      }
2338
38.7k
      if (val > 0x110000)
2339
17.7k
          val = 0x110000;
2340
2341
38.7k
      NEXT;
2342
38.7k
      count++;
2343
38.7k
  }
2344
13.3k
  if (RAW == ';') {
2345
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2346
9.35k
      ctxt->input->col++;
2347
9.35k
      ctxt->input->cur++;
2348
9.35k
  }
2349
33.3k
    } else if  ((RAW == '&') && (NXT(1) == '#')) {
2350
33.3k
  SKIP(2);
2351
33.3k
  GROW;
2352
128k
  while (RAW != ';') { /* loop blocked by count */
2353
100k
      if (count++ > 20) {
2354
2.65k
    count = 0;
2355
2.65k
    GROW;
2356
2.65k
                if (ctxt->instate == XML_PARSER_EOF)
2357
0
                    return(0);
2358
2.65k
      }
2359
100k
      if ((RAW >= '0') && (RAW <= '9'))
2360
95.2k
          val = val * 10 + (CUR - '0');
2361
5.22k
      else {
2362
5.22k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2363
5.22k
    val = 0;
2364
5.22k
    break;
2365
5.22k
      }
2366
95.2k
      if (val > 0x110000)
2367
28.1k
          val = 0x110000;
2368
2369
95.2k
      NEXT;
2370
95.2k
      count++;
2371
95.2k
  }
2372
33.3k
  if (RAW == ';') {
2373
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2374
28.1k
      ctxt->input->col++;
2375
28.1k
      ctxt->input->cur++;
2376
28.1k
  }
2377
33.3k
    } else {
2378
0
        if (RAW == '&')
2379
0
            SKIP(1);
2380
0
        xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2381
0
    }
2382
2383
    /*
2384
     * [ WFC: Legal Character ]
2385
     * Characters referred to using character references must match the
2386
     * production for Char.
2387
     */
2388
46.7k
    if (val >= 0x110000) {
2389
165
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2390
165
                "xmlParseCharRef: character reference out of bounds\n",
2391
165
          val);
2392
46.5k
    } else if (IS_CHAR(val)) {
2393
35.6k
        return(val);
2394
35.6k
    } else {
2395
10.9k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2396
10.9k
                          "xmlParseCharRef: invalid xmlChar value %d\n",
2397
10.9k
                    val);
2398
10.9k
    }
2399
11.0k
    return(0);
2400
46.7k
}
2401
2402
/**
2403
 * xmlParseStringCharRef:
2404
 * @ctxt:  an XML parser context
2405
 * @str:  a pointer to an index in the string
2406
 *
2407
 * parse Reference declarations, variant parsing from a string rather
2408
 * than an an input flow.
2409
 *
2410
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2411
 *                  '&#x' [0-9a-fA-F]+ ';'
2412
 *
2413
 * [ WFC: Legal Character ]
2414
 * Characters referred to using character references must match the
2415
 * production for Char.
2416
 *
2417
 * Returns the value parsed (as an int), 0 in case of error, str will be
2418
 *         updated to the current value of the index
2419
 */
2420
static int
2421
20.7k
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2422
20.7k
    const xmlChar *ptr;
2423
20.7k
    xmlChar cur;
2424
20.7k
    int val = 0;
2425
2426
20.7k
    if ((str == NULL) || (*str == NULL)) return(0);
2427
20.7k
    ptr = *str;
2428
20.7k
    cur = *ptr;
2429
20.7k
    if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2430
346
  ptr += 3;
2431
346
  cur = *ptr;
2432
991
  while (cur != ';') { /* Non input consuming loop */
2433
726
      if ((cur >= '0') && (cur <= '9'))
2434
428
          val = val * 16 + (cur - '0');
2435
298
      else if ((cur >= 'a') && (cur <= 'f'))
2436
143
          val = val * 16 + (cur - 'a') + 10;
2437
155
      else if ((cur >= 'A') && (cur <= 'F'))
2438
74
          val = val * 16 + (cur - 'A') + 10;
2439
81
      else {
2440
81
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2441
81
    val = 0;
2442
81
    break;
2443
81
      }
2444
645
      if (val > 0x110000)
2445
58
          val = 0x110000;
2446
2447
645
      ptr++;
2448
645
      cur = *ptr;
2449
645
  }
2450
346
  if (cur == ';')
2451
265
      ptr++;
2452
20.4k
    } else if  ((cur == '&') && (ptr[1] == '#')){
2453
20.4k
  ptr += 2;
2454
20.4k
  cur = *ptr;
2455
70.7k
  while (cur != ';') { /* Non input consuming loops */
2456
50.6k
      if ((cur >= '0') && (cur <= '9'))
2457
50.2k
          val = val * 10 + (cur - '0');
2458
380
      else {
2459
380
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2460
380
    val = 0;
2461
380
    break;
2462
380
      }
2463
50.2k
      if (val > 0x110000)
2464
975
          val = 0x110000;
2465
2466
50.2k
      ptr++;
2467
50.2k
      cur = *ptr;
2468
50.2k
  }
2469
20.4k
  if (cur == ';')
2470
20.0k
      ptr++;
2471
20.4k
    } else {
2472
0
  xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2473
0
  return(0);
2474
0
    }
2475
20.7k
    *str = ptr;
2476
2477
    /*
2478
     * [ WFC: Legal Character ]
2479
     * Characters referred to using character references must match the
2480
     * production for Char.
2481
     */
2482
20.7k
    if (val >= 0x110000) {
2483
20
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2484
20
                "xmlParseStringCharRef: character reference out of bounds\n",
2485
20
                val);
2486
20.7k
    } else if (IS_CHAR(val)) {
2487
19.2k
        return(val);
2488
19.2k
    } else {
2489
1.46k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2490
1.46k
        "xmlParseStringCharRef: invalid xmlChar value %d\n",
2491
1.46k
        val);
2492
1.46k
    }
2493
1.48k
    return(0);
2494
20.7k
}
2495
2496
/**
2497
 * xmlParserHandlePEReference:
2498
 * @ctxt:  the parser context
2499
 *
2500
 * [69] PEReference ::= '%' Name ';'
2501
 *
2502
 * [ WFC: No Recursion ]
2503
 * A parsed entity must not contain a recursive
2504
 * reference to itself, either directly or indirectly.
2505
 *
2506
 * [ WFC: Entity Declared ]
2507
 * In a document without any DTD, a document with only an internal DTD
2508
 * subset which contains no parameter entity references, or a document
2509
 * with "standalone='yes'", ...  ... The declaration of a parameter
2510
 * entity must precede any reference to it...
2511
 *
2512
 * [ VC: Entity Declared ]
2513
 * In a document with an external subset or external parameter entities
2514
 * with "standalone='no'", ...  ... The declaration of a parameter entity
2515
 * must precede any reference to it...
2516
 *
2517
 * [ WFC: In DTD ]
2518
 * Parameter-entity references may only appear in the DTD.
2519
 * NOTE: misleading but this is handled.
2520
 *
2521
 * A PEReference may have been detected in the current input stream
2522
 * the handling is done accordingly to
2523
 *      http://www.w3.org/TR/REC-xml#entproc
2524
 * i.e.
2525
 *   - Included in literal in entity values
2526
 *   - Included as Parameter Entity reference within DTDs
2527
 */
2528
void
2529
0
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2530
0
    switch(ctxt->instate) {
2531
0
  case XML_PARSER_CDATA_SECTION:
2532
0
      return;
2533
0
        case XML_PARSER_COMMENT:
2534
0
      return;
2535
0
  case XML_PARSER_START_TAG:
2536
0
      return;
2537
0
  case XML_PARSER_END_TAG:
2538
0
      return;
2539
0
        case XML_PARSER_EOF:
2540
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2541
0
      return;
2542
0
        case XML_PARSER_PROLOG:
2543
0
  case XML_PARSER_START:
2544
0
  case XML_PARSER_MISC:
2545
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2546
0
      return;
2547
0
  case XML_PARSER_ENTITY_DECL:
2548
0
        case XML_PARSER_CONTENT:
2549
0
        case XML_PARSER_ATTRIBUTE_VALUE:
2550
0
        case XML_PARSER_PI:
2551
0
  case XML_PARSER_SYSTEM_LITERAL:
2552
0
  case XML_PARSER_PUBLIC_LITERAL:
2553
      /* we just ignore it there */
2554
0
      return;
2555
0
        case XML_PARSER_EPILOG:
2556
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2557
0
      return;
2558
0
  case XML_PARSER_ENTITY_VALUE:
2559
      /*
2560
       * NOTE: in the case of entity values, we don't do the
2561
       *       substitution here since we need the literal
2562
       *       entity value to be able to save the internal
2563
       *       subset of the document.
2564
       *       This will be handled by xmlStringDecodeEntities
2565
       */
2566
0
      return;
2567
0
        case XML_PARSER_DTD:
2568
      /*
2569
       * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2570
       * In the internal DTD subset, parameter-entity references
2571
       * can occur only where markup declarations can occur, not
2572
       * within markup declarations.
2573
       * In that case this is handled in xmlParseMarkupDecl
2574
       */
2575
0
      if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2576
0
    return;
2577
0
      if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2578
0
    return;
2579
0
            break;
2580
0
        case XML_PARSER_IGNORE:
2581
0
            return;
2582
0
    }
2583
2584
0
    xmlParsePEReference(ctxt);
2585
0
}
2586
2587
/*
2588
 * Macro used to grow the current buffer.
2589
 * buffer##_size is expected to be a size_t
2590
 * mem_error: is expected to handle memory allocation failures
2591
 */
2592
244k
#define growBuffer(buffer, n) {           \
2593
244k
    xmlChar *tmp;             \
2594
244k
    size_t new_size = buffer##_size * 2 + n;                            \
2595
244k
    if (new_size < buffer##_size) goto mem_error;                       \
2596
244k
    tmp = (xmlChar *) xmlRealloc(buffer, new_size);                     \
2597
244k
    if (tmp == NULL) goto mem_error;         \
2598
244k
    buffer = tmp;             \
2599
244k
    buffer##_size = new_size;                                           \
2600
244k
}
2601
2602
/**
2603
 * xmlStringDecodeEntitiesInt:
2604
 * @ctxt:  the parser context
2605
 * @str:  the input string
2606
 * @len: the string length
2607
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2608
 * @end:  an end marker xmlChar, 0 if none
2609
 * @end2:  an end marker xmlChar, 0 if none
2610
 * @end3:  an end marker xmlChar, 0 if none
2611
 * @check:  whether to perform entity checks
2612
 */
2613
static xmlChar *
2614
xmlStringDecodeEntitiesInt(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2615
               int what, xmlChar end, xmlChar  end2, xmlChar end3,
2616
5.80M
                           int check) {
2617
5.80M
    xmlChar *buffer = NULL;
2618
5.80M
    size_t buffer_size = 0;
2619
5.80M
    size_t nbchars = 0;
2620
2621
5.80M
    xmlChar *current = NULL;
2622
5.80M
    xmlChar *rep = NULL;
2623
5.80M
    const xmlChar *last;
2624
5.80M
    xmlEntityPtr ent;
2625
5.80M
    int c,l;
2626
2627
5.80M
    if (str == NULL)
2628
191
        return(NULL);
2629
5.79M
    last = str + len;
2630
2631
5.79M
    if (((ctxt->depth > 40) &&
2632
5.79M
         ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2633
5.79M
  (ctxt->depth > 100)) {
2634
0
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP,
2635
0
                       "Maximum entity nesting depth exceeded");
2636
0
  return(NULL);
2637
0
    }
2638
2639
    /*
2640
     * allocate a translation buffer.
2641
     */
2642
5.79M
    buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2643
5.79M
    buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2644
5.79M
    if (buffer == NULL) goto mem_error;
2645
2646
    /*
2647
     * OK loop until we reach one of the ending char or a size limit.
2648
     * we are operating on already parsed values.
2649
     */
2650
5.79M
    if (str < last)
2651
5.79M
  c = CUR_SCHAR(str, l);
2652
5.59k
    else
2653
5.59k
        c = 0;
2654
524M
    while ((c != 0) && (c != end) && /* non input consuming loop */
2655
524M
           (c != end2) && (c != end3) &&
2656
524M
           (ctxt->instate != XML_PARSER_EOF)) {
2657
2658
518M
  if (c == 0) break;
2659
518M
        if ((c == '&') && (str[1] == '#')) {
2660
20.7k
      int val = xmlParseStringCharRef(ctxt, &str);
2661
20.7k
      if (val == 0)
2662
1.48k
                goto int_error;
2663
19.2k
      COPY_BUF(0,buffer,nbchars,val);
2664
19.2k
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2665
24
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2666
24
      }
2667
518M
  } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2668
6.04M
      if (xmlParserDebugEntities)
2669
0
    xmlGenericError(xmlGenericErrorContext,
2670
0
      "String decoding Entity Reference: %.30s\n",
2671
0
      str);
2672
6.04M
      ent = xmlParseStringEntityRef(ctxt, &str);
2673
6.04M
      if ((ent != NULL) &&
2674
6.04M
    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2675
124
    if (ent->content != NULL) {
2676
124
        COPY_BUF(0,buffer,nbchars,ent->content[0]);
2677
124
        if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2678
0
      growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2679
0
        }
2680
124
    } else {
2681
0
        xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2682
0
          "predefined entity has no content\n");
2683
0
                    goto int_error;
2684
0
    }
2685
6.04M
      } else if ((ent != NULL) && (ent->content != NULL)) {
2686
5.58M
          if ((check) && (xmlParserEntityCheck(ctxt, ent->length)))
2687
57
                    goto int_error;
2688
2689
5.58M
                if (ent->flags & XML_ENT_EXPANDING) {
2690
18
              xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2691
18
                    xmlHaltParser(ctxt);
2692
18
                    ent->content[0] = 0;
2693
18
                    goto int_error;
2694
18
                }
2695
2696
5.58M
                ent->flags |= XML_ENT_EXPANDING;
2697
5.58M
    ctxt->depth++;
2698
5.58M
    rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
2699
5.58M
                        ent->length, what, 0, 0, 0, check);
2700
5.58M
    ctxt->depth--;
2701
5.58M
                ent->flags &= ~XML_ENT_EXPANDING;
2702
2703
5.58M
    if (rep == NULL) {
2704
924
                    ent->content[0] = 0;
2705
924
                    goto int_error;
2706
924
                }
2707
2708
5.58M
                current = rep;
2709
1.16G
                while (*current != 0) { /* non input consuming loop */
2710
1.16G
                    buffer[nbchars++] = *current++;
2711
1.16G
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2712
371k
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2713
371k
                    }
2714
1.16G
                }
2715
5.58M
                xmlFree(rep);
2716
5.58M
                rep = NULL;
2717
5.58M
      } else if (ent != NULL) {
2718
344
    int i = xmlStrlen(ent->name);
2719
344
    const xmlChar *cur = ent->name;
2720
2721
344
    buffer[nbchars++] = '&';
2722
344
    if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2723
0
        growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2724
0
    }
2725
688
    for (;i > 0;i--)
2726
344
        buffer[nbchars++] = *cur++;
2727
344
    buffer[nbchars++] = ';';
2728
344
      }
2729
512M
  } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2730
22.8k
      if (xmlParserDebugEntities)
2731
0
    xmlGenericError(xmlGenericErrorContext,
2732
0
      "String decoding PE Reference: %.30s\n", str);
2733
22.8k
      ent = xmlParseStringPEReference(ctxt, &str);
2734
22.8k
      if (ent != NULL) {
2735
18.9k
                if (ent->content == NULL) {
2736
        /*
2737
         * Note: external parsed entities will not be loaded,
2738
         * it is not required for a non-validating parser to
2739
         * complete external PEReferences coming from the
2740
         * internal subset
2741
         */
2742
371
        if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2743
371
      ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2744
371
      (ctxt->validate != 0)) {
2745
360
      xmlLoadEntityContent(ctxt, ent);
2746
360
        } else {
2747
11
      xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2748
11
      "not validating will not read content for PE entity %s\n",
2749
11
                          ent->name, NULL);
2750
11
        }
2751
371
    }
2752
2753
18.9k
          if ((check) && (xmlParserEntityCheck(ctxt, ent->length)))
2754
36
                    goto int_error;
2755
2756
18.8k
                if (ent->flags & XML_ENT_EXPANDING) {
2757
15
              xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2758
15
                    xmlHaltParser(ctxt);
2759
15
                    if (ent->content != NULL)
2760
15
                        ent->content[0] = 0;
2761
15
                    goto int_error;
2762
15
                }
2763
2764
18.8k
                ent->flags |= XML_ENT_EXPANDING;
2765
18.8k
    ctxt->depth++;
2766
18.8k
    rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
2767
18.8k
                        ent->length, what, 0, 0, 0, check);
2768
18.8k
    ctxt->depth--;
2769
18.8k
                ent->flags &= ~XML_ENT_EXPANDING;
2770
2771
18.8k
    if (rep == NULL) {
2772
179
                    if (ent->content != NULL)
2773
18
                        ent->content[0] = 0;
2774
179
                    goto int_error;
2775
179
                }
2776
18.6k
                current = rep;
2777
330M
                while (*current != 0) { /* non input consuming loop */
2778
330M
                    buffer[nbchars++] = *current++;
2779
330M
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2780
8.28k
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2781
8.28k
                    }
2782
330M
                }
2783
18.6k
                xmlFree(rep);
2784
18.6k
                rep = NULL;
2785
18.6k
      }
2786
512M
  } else {
2787
512M
      COPY_BUF(l,buffer,nbchars,c);
2788
512M
      str += l;
2789
512M
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2790
96.3k
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2791
96.3k
      }
2792
512M
  }
2793
518M
  if (str < last)
2794
512M
      c = CUR_SCHAR(str, l);
2795
5.79M
  else
2796
5.79M
      c = 0;
2797
518M
    }
2798
5.79M
    buffer[nbchars] = 0;
2799
5.79M
    return(buffer);
2800
2801
0
mem_error:
2802
0
    xmlErrMemory(ctxt, NULL);
2803
2.71k
int_error:
2804
2.71k
    if (rep != NULL)
2805
0
        xmlFree(rep);
2806
2.71k
    if (buffer != NULL)
2807
2.71k
        xmlFree(buffer);
2808
2.71k
    return(NULL);
2809
0
}
2810
2811
/**
2812
 * xmlStringLenDecodeEntities:
2813
 * @ctxt:  the parser context
2814
 * @str:  the input string
2815
 * @len: the string length
2816
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2817
 * @end:  an end marker xmlChar, 0 if none
2818
 * @end2:  an end marker xmlChar, 0 if none
2819
 * @end3:  an end marker xmlChar, 0 if none
2820
 *
2821
 * DEPRECATED: Internal function, don't use.
2822
 *
2823
 * Takes a entity string content and process to do the adequate substitutions.
2824
 *
2825
 * [67] Reference ::= EntityRef | CharRef
2826
 *
2827
 * [69] PEReference ::= '%' Name ';'
2828
 *
2829
 * Returns A newly allocated string with the substitution done. The caller
2830
 *      must deallocate it !
2831
 */
2832
xmlChar *
2833
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2834
                           int what, xmlChar end, xmlChar  end2,
2835
702
                           xmlChar end3) {
2836
702
    if ((ctxt == NULL) || (str == NULL) || (len < 0))
2837
0
        return(NULL);
2838
702
    return(xmlStringDecodeEntitiesInt(ctxt, str, len, what,
2839
702
                                      end, end2, end3, 0));
2840
702
}
2841
2842
/**
2843
 * xmlStringDecodeEntities:
2844
 * @ctxt:  the parser context
2845
 * @str:  the input string
2846
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2847
 * @end:  an end marker xmlChar, 0 if none
2848
 * @end2:  an end marker xmlChar, 0 if none
2849
 * @end3:  an end marker xmlChar, 0 if none
2850
 *
2851
 * DEPRECATED: Internal function, don't use.
2852
 *
2853
 * Takes a entity string content and process to do the adequate substitutions.
2854
 *
2855
 * [67] Reference ::= EntityRef | CharRef
2856
 *
2857
 * [69] PEReference ::= '%' Name ';'
2858
 *
2859
 * Returns A newly allocated string with the substitution done. The caller
2860
 *      must deallocate it !
2861
 */
2862
xmlChar *
2863
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2864
14.3k
            xmlChar end, xmlChar  end2, xmlChar end3) {
2865
14.3k
    if ((ctxt == NULL) || (str == NULL)) return(NULL);
2866
14.3k
    return(xmlStringDecodeEntitiesInt(ctxt, str, xmlStrlen(str), what,
2867
14.3k
                                      end, end2, end3, 0));
2868
14.3k
}
2869
2870
/************************************************************************
2871
 *                  *
2872
 *    Commodity functions, cleanup needed ?     *
2873
 *                  *
2874
 ************************************************************************/
2875
2876
/**
2877
 * areBlanks:
2878
 * @ctxt:  an XML parser context
2879
 * @str:  a xmlChar *
2880
 * @len:  the size of @str
2881
 * @blank_chars: we know the chars are blanks
2882
 *
2883
 * Is this a sequence of blank chars that one can ignore ?
2884
 *
2885
 * Returns 1 if ignorable 0 otherwise.
2886
 */
2887
2888
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2889
493k
                     int blank_chars) {
2890
493k
    int i, ret;
2891
493k
    xmlNodePtr lastChild;
2892
2893
    /*
2894
     * Don't spend time trying to differentiate them, the same callback is
2895
     * used !
2896
     */
2897
493k
    if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2898
113k
  return(0);
2899
2900
    /*
2901
     * Check for xml:space value.
2902
     */
2903
380k
    if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2904
380k
        (*(ctxt->space) == -2))
2905
135k
  return(0);
2906
2907
    /*
2908
     * Check that the string is made of blanks
2909
     */
2910
244k
    if (blank_chars == 0) {
2911
178k
  for (i = 0;i < len;i++)
2912
167k
      if (!(IS_BLANK_CH(str[i]))) return(0);
2913
72.6k
    }
2914
2915
    /*
2916
     * Look if the element is mixed content in the DTD if available
2917
     */
2918
183k
    if (ctxt->node == NULL) return(0);
2919
166k
    if (ctxt->myDoc != NULL) {
2920
166k
  ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2921
166k
        if (ret == 0) return(1);
2922
160k
        if (ret == 1) return(0);
2923
160k
    }
2924
2925
    /*
2926
     * Otherwise, heuristic :-\
2927
     */
2928
160k
    if ((RAW != '<') && (RAW != 0xD)) return(0);
2929
156k
    if ((ctxt->node->children == NULL) &&
2930
156k
  (RAW == '<') && (NXT(1) == '/')) return(0);
2931
2932
155k
    lastChild = xmlGetLastChild(ctxt->node);
2933
155k
    if (lastChild == NULL) {
2934
53.1k
        if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2935
53.1k
            (ctxt->node->content != NULL)) return(0);
2936
102k
    } else if (xmlNodeIsText(lastChild))
2937
9.75k
        return(0);
2938
92.6k
    else if ((ctxt->node->children != NULL) &&
2939
92.6k
             (xmlNodeIsText(ctxt->node->children)))
2940
3.09k
        return(0);
2941
142k
    return(1);
2942
155k
}
2943
2944
/************************************************************************
2945
 *                  *
2946
 *    Extra stuff for namespace support     *
2947
 *  Relates to http://www.w3.org/TR/WD-xml-names      *
2948
 *                  *
2949
 ************************************************************************/
2950
2951
/**
2952
 * xmlSplitQName:
2953
 * @ctxt:  an XML parser context
2954
 * @name:  an XML parser context
2955
 * @prefix:  a xmlChar **
2956
 *
2957
 * parse an UTF8 encoded XML qualified name string
2958
 *
2959
 * [NS 5] QName ::= (Prefix ':')? LocalPart
2960
 *
2961
 * [NS 6] Prefix ::= NCName
2962
 *
2963
 * [NS 7] LocalPart ::= NCName
2964
 *
2965
 * Returns the local part, and prefix is updated
2966
 *   to get the Prefix if any.
2967
 */
2968
2969
xmlChar *
2970
503k
xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2971
503k
    xmlChar buf[XML_MAX_NAMELEN + 5];
2972
503k
    xmlChar *buffer = NULL;
2973
503k
    int len = 0;
2974
503k
    int max = XML_MAX_NAMELEN;
2975
503k
    xmlChar *ret = NULL;
2976
503k
    const xmlChar *cur = name;
2977
503k
    int c;
2978
2979
503k
    if (prefix == NULL) return(NULL);
2980
503k
    *prefix = NULL;
2981
2982
503k
    if (cur == NULL) return(NULL);
2983
2984
#ifndef XML_XML_NAMESPACE
2985
    /* xml: prefix is not really a namespace */
2986
    if ((cur[0] == 'x') && (cur[1] == 'm') &&
2987
        (cur[2] == 'l') && (cur[3] == ':'))
2988
  return(xmlStrdup(name));
2989
#endif
2990
2991
    /* nasty but well=formed */
2992
503k
    if (cur[0] == ':')
2993
2.53k
  return(xmlStrdup(name));
2994
2995
500k
    c = *cur++;
2996
2.64M
    while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2997
2.14M
  buf[len++] = c;
2998
2.14M
  c = *cur++;
2999
2.14M
    }
3000
500k
    if (len >= max) {
3001
  /*
3002
   * Okay someone managed to make a huge name, so he's ready to pay
3003
   * for the processing speed.
3004
   */
3005
1.36k
  max = len * 2;
3006
3007
1.36k
  buffer = (xmlChar *) xmlMallocAtomic(max);
3008
1.36k
  if (buffer == NULL) {
3009
0
      xmlErrMemory(ctxt, NULL);
3010
0
      return(NULL);
3011
0
  }
3012
1.36k
  memcpy(buffer, buf, len);
3013
406k
  while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3014
404k
      if (len + 10 > max) {
3015
427
          xmlChar *tmp;
3016
3017
427
    max *= 2;
3018
427
    tmp = (xmlChar *) xmlRealloc(buffer, max);
3019
427
    if (tmp == NULL) {
3020
0
        xmlFree(buffer);
3021
0
        xmlErrMemory(ctxt, NULL);
3022
0
        return(NULL);
3023
0
    }
3024
427
    buffer = tmp;
3025
427
      }
3026
404k
      buffer[len++] = c;
3027
404k
      c = *cur++;
3028
404k
  }
3029
1.36k
  buffer[len] = 0;
3030
1.36k
    }
3031
3032
500k
    if ((c == ':') && (*cur == 0)) {
3033
2.82k
        if (buffer != NULL)
3034
74
      xmlFree(buffer);
3035
2.82k
  *prefix = NULL;
3036
2.82k
  return(xmlStrdup(name));
3037
2.82k
    }
3038
3039
498k
    if (buffer == NULL)
3040
496k
  ret = xmlStrndup(buf, len);
3041
1.29k
    else {
3042
1.29k
  ret = buffer;
3043
1.29k
  buffer = NULL;
3044
1.29k
  max = XML_MAX_NAMELEN;
3045
1.29k
    }
3046
3047
3048
498k
    if (c == ':') {
3049
101k
  c = *cur;
3050
101k
        *prefix = ret;
3051
101k
  if (c == 0) {
3052
0
      return(xmlStrndup(BAD_CAST "", 0));
3053
0
  }
3054
101k
  len = 0;
3055
3056
  /*
3057
   * Check that the first character is proper to start
3058
   * a new name
3059
   */
3060
101k
  if (!(((c >= 0x61) && (c <= 0x7A)) ||
3061
101k
        ((c >= 0x41) && (c <= 0x5A)) ||
3062
101k
        (c == '_') || (c == ':'))) {
3063
2.96k
      int l;
3064
2.96k
      int first = CUR_SCHAR(cur, l);
3065
3066
2.96k
      if (!IS_LETTER(first) && (first != '_')) {
3067
794
    xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3068
794
          "Name %s is not XML Namespace compliant\n",
3069
794
          name);
3070
794
      }
3071
2.96k
  }
3072
101k
  cur++;
3073
3074
686k
  while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3075
585k
      buf[len++] = c;
3076
585k
      c = *cur++;
3077
585k
  }
3078
101k
  if (len >= max) {
3079
      /*
3080
       * Okay someone managed to make a huge name, so he's ready to pay
3081
       * for the processing speed.
3082
       */
3083
485
      max = len * 2;
3084
3085
485
      buffer = (xmlChar *) xmlMallocAtomic(max);
3086
485
      if (buffer == NULL) {
3087
0
          xmlErrMemory(ctxt, NULL);
3088
0
    return(NULL);
3089
0
      }
3090
485
      memcpy(buffer, buf, len);
3091
102k
      while (c != 0) { /* tested bigname2.xml */
3092
101k
    if (len + 10 > max) {
3093
159
        xmlChar *tmp;
3094
3095
159
        max *= 2;
3096
159
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3097
159
        if (tmp == NULL) {
3098
0
      xmlErrMemory(ctxt, NULL);
3099
0
      xmlFree(buffer);
3100
0
      return(NULL);
3101
0
        }
3102
159
        buffer = tmp;
3103
159
    }
3104
101k
    buffer[len++] = c;
3105
101k
    c = *cur++;
3106
101k
      }
3107
485
      buffer[len] = 0;
3108
485
  }
3109
3110
101k
  if (buffer == NULL)
3111
100k
      ret = xmlStrndup(buf, len);
3112
485
  else {
3113
485
      ret = buffer;
3114
485
  }
3115
101k
    }
3116
3117
498k
    return(ret);
3118
498k
}
3119
3120
/************************************************************************
3121
 *                  *
3122
 *      The parser itself       *
3123
 *  Relates to http://www.w3.org/TR/REC-xml       *
3124
 *                  *
3125
 ************************************************************************/
3126
3127
/************************************************************************
3128
 *                  *
3129
 *  Routines to parse Name, NCName and NmToken      *
3130
 *                  *
3131
 ************************************************************************/
3132
#ifdef DEBUG
3133
static unsigned long nbParseName = 0;
3134
static unsigned long nbParseNmToken = 0;
3135
static unsigned long nbParseNCName = 0;
3136
static unsigned long nbParseNCNameComplex = 0;
3137
static unsigned long nbParseNameComplex = 0;
3138
static unsigned long nbParseStringName = 0;
3139
#endif
3140
3141
/*
3142
 * The two following functions are related to the change of accepted
3143
 * characters for Name and NmToken in the Revision 5 of XML-1.0
3144
 * They correspond to the modified production [4] and the new production [4a]
3145
 * changes in that revision. Also note that the macros used for the
3146
 * productions Letter, Digit, CombiningChar and Extender are not needed
3147
 * anymore.
3148
 * We still keep compatibility to pre-revision5 parsing semantic if the
3149
 * new XML_PARSE_OLD10 option is given to the parser.
3150
 */
3151
static int
3152
6.32M
xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3153
6.32M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3154
        /*
3155
   * Use the new checks of production [4] [4a] amd [5] of the
3156
   * Update 5 of XML-1.0
3157
   */
3158
5.57M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3159
5.57M
      (((c >= 'a') && (c <= 'z')) ||
3160
5.57M
       ((c >= 'A') && (c <= 'Z')) ||
3161
5.57M
       (c == '_') || (c == ':') ||
3162
5.57M
       ((c >= 0xC0) && (c <= 0xD6)) ||
3163
5.57M
       ((c >= 0xD8) && (c <= 0xF6)) ||
3164
5.57M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3165
5.57M
       ((c >= 0x370) && (c <= 0x37D)) ||
3166
5.57M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3167
5.57M
       ((c >= 0x200C) && (c <= 0x200D)) ||
3168
5.57M
       ((c >= 0x2070) && (c <= 0x218F)) ||
3169
5.57M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3170
5.57M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3171
5.57M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3172
5.57M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3173
5.57M
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3174
5.49M
      return(1);
3175
5.57M
    } else {
3176
753k
        if (IS_LETTER(c) || (c == '_') || (c == ':'))
3177
727k
      return(1);
3178
753k
    }
3179
99.8k
    return(0);
3180
6.32M
}
3181
3182
static int
3183
166M
xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3184
166M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3185
        /*
3186
   * Use the new checks of production [4] [4a] amd [5] of the
3187
   * Update 5 of XML-1.0
3188
   */
3189
159M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3190
159M
      (((c >= 'a') && (c <= 'z')) ||
3191
159M
       ((c >= 'A') && (c <= 'Z')) ||
3192
159M
       ((c >= '0') && (c <= '9')) || /* !start */
3193
159M
       (c == '_') || (c == ':') ||
3194
159M
       (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3195
159M
       ((c >= 0xC0) && (c <= 0xD6)) ||
3196
159M
       ((c >= 0xD8) && (c <= 0xF6)) ||
3197
159M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3198
159M
       ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3199
159M
       ((c >= 0x370) && (c <= 0x37D)) ||
3200
159M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3201
159M
       ((c >= 0x200C) && (c <= 0x200D)) ||
3202
159M
       ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3203
159M
       ((c >= 0x2070) && (c <= 0x218F)) ||
3204
159M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3205
159M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3206
159M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3207
159M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3208
159M
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3209
154M
       return(1);
3210
159M
    } else {
3211
6.61M
        if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3212
6.61M
            (c == '.') || (c == '-') ||
3213
6.61M
      (c == '_') || (c == ':') ||
3214
6.61M
      (IS_COMBINING(c)) ||
3215
6.61M
      (IS_EXTENDER(c)))
3216
5.88M
      return(1);
3217
6.61M
    }
3218
6.20M
    return(0);
3219
166M
}
3220
3221
static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3222
                                          int *len, int *alloc, int normalize);
3223
3224
static const xmlChar *
3225
258k
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3226
258k
    int len = 0, l;
3227
258k
    int c;
3228
258k
    int count = 0;
3229
258k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3230
90.1k
                    XML_MAX_TEXT_LENGTH :
3231
258k
                    XML_MAX_NAME_LENGTH;
3232
3233
#ifdef DEBUG
3234
    nbParseNameComplex++;
3235
#endif
3236
3237
    /*
3238
     * Handler for more complex cases
3239
     */
3240
258k
    GROW;
3241
258k
    if (ctxt->instate == XML_PARSER_EOF)
3242
0
        return(NULL);
3243
258k
    c = CUR_CHAR(l);
3244
258k
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3245
        /*
3246
   * Use the new checks of production [4] [4a] amd [5] of the
3247
   * Update 5 of XML-1.0
3248
   */
3249
154k
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3250
154k
      (!(((c >= 'a') && (c <= 'z')) ||
3251
143k
         ((c >= 'A') && (c <= 'Z')) ||
3252
143k
         (c == '_') || (c == ':') ||
3253
143k
         ((c >= 0xC0) && (c <= 0xD6)) ||
3254
143k
         ((c >= 0xD8) && (c <= 0xF6)) ||
3255
143k
         ((c >= 0xF8) && (c <= 0x2FF)) ||
3256
143k
         ((c >= 0x370) && (c <= 0x37D)) ||
3257
143k
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
3258
143k
         ((c >= 0x200C) && (c <= 0x200D)) ||
3259
143k
         ((c >= 0x2070) && (c <= 0x218F)) ||
3260
143k
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3261
143k
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
3262
143k
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
3263
143k
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3264
143k
         ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3265
83.3k
      return(NULL);
3266
83.3k
  }
3267
71.3k
  len += l;
3268
71.3k
  NEXTL(l);
3269
71.3k
  c = CUR_CHAR(l);
3270
1.19M
  while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3271
1.19M
         (((c >= 'a') && (c <= 'z')) ||
3272
1.17M
          ((c >= 'A') && (c <= 'Z')) ||
3273
1.17M
          ((c >= '0') && (c <= '9')) || /* !start */
3274
1.17M
          (c == '_') || (c == ':') ||
3275
1.17M
          (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3276
1.17M
          ((c >= 0xC0) && (c <= 0xD6)) ||
3277
1.17M
          ((c >= 0xD8) && (c <= 0xF6)) ||
3278
1.17M
          ((c >= 0xF8) && (c <= 0x2FF)) ||
3279
1.17M
          ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3280
1.17M
          ((c >= 0x370) && (c <= 0x37D)) ||
3281
1.17M
          ((c >= 0x37F) && (c <= 0x1FFF)) ||
3282
1.17M
          ((c >= 0x200C) && (c <= 0x200D)) ||
3283
1.17M
          ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3284
1.17M
          ((c >= 0x2070) && (c <= 0x218F)) ||
3285
1.17M
          ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3286
1.17M
          ((c >= 0x3001) && (c <= 0xD7FF)) ||
3287
1.17M
          ((c >= 0xF900) && (c <= 0xFDCF)) ||
3288
1.17M
          ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3289
1.17M
          ((c >= 0x10000) && (c <= 0xEFFFF))
3290
1.17M
    )) {
3291
1.12M
      if (count++ > XML_PARSER_CHUNK_SIZE) {
3292
3.98k
    count = 0;
3293
3.98k
    GROW;
3294
3.98k
                if (ctxt->instate == XML_PARSER_EOF)
3295
0
                    return(NULL);
3296
3.98k
      }
3297
1.12M
            if (len <= INT_MAX - l)
3298
1.12M
          len += l;
3299
1.12M
      NEXTL(l);
3300
1.12M
      c = CUR_CHAR(l);
3301
1.12M
  }
3302
103k
    } else {
3303
103k
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3304
103k
      (!IS_LETTER(c) && (c != '_') &&
3305
95.2k
       (c != ':'))) {
3306
60.9k
      return(NULL);
3307
60.9k
  }
3308
42.5k
  len += l;
3309
42.5k
  NEXTL(l);
3310
42.5k
  c = CUR_CHAR(l);
3311
3312
1.03M
  while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3313
1.03M
         ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3314
1.02M
    (c == '.') || (c == '-') ||
3315
1.02M
    (c == '_') || (c == ':') ||
3316
1.02M
    (IS_COMBINING(c)) ||
3317
1.02M
    (IS_EXTENDER(c)))) {
3318
993k
      if (count++ > XML_PARSER_CHUNK_SIZE) {
3319
5.48k
    count = 0;
3320
5.48k
    GROW;
3321
5.48k
                if (ctxt->instate == XML_PARSER_EOF)
3322
0
                    return(NULL);
3323
5.48k
      }
3324
993k
            if (len <= INT_MAX - l)
3325
993k
          len += l;
3326
993k
      NEXTL(l);
3327
993k
      c = CUR_CHAR(l);
3328
993k
  }
3329
42.5k
    }
3330
113k
    if (len > maxLength) {
3331
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3332
0
        return(NULL);
3333
0
    }
3334
113k
    if (ctxt->input->cur - ctxt->input->base < len) {
3335
        /*
3336
         * There were a couple of bugs where PERefs lead to to a change
3337
         * of the buffer. Check the buffer size to avoid passing an invalid
3338
         * pointer to xmlDictLookup.
3339
         */
3340
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3341
0
                    "unexpected change of input buffer");
3342
0
        return (NULL);
3343
0
    }
3344
113k
    if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3345
435
        return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3346
113k
    return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3347
113k
}
3348
3349
/**
3350
 * xmlParseName:
3351
 * @ctxt:  an XML parser context
3352
 *
3353
 * DEPRECATED: Internal function, don't use.
3354
 *
3355
 * parse an XML name.
3356
 *
3357
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3358
 *                  CombiningChar | Extender
3359
 *
3360
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3361
 *
3362
 * [6] Names ::= Name (#x20 Name)*
3363
 *
3364
 * Returns the Name parsed or NULL
3365
 */
3366
3367
const xmlChar *
3368
4.34M
xmlParseName(xmlParserCtxtPtr ctxt) {
3369
4.34M
    const xmlChar *in;
3370
4.34M
    const xmlChar *ret;
3371
4.34M
    size_t count = 0;
3372
4.34M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3373
502k
                       XML_MAX_TEXT_LENGTH :
3374
4.34M
                       XML_MAX_NAME_LENGTH;
3375
3376
4.34M
    GROW;
3377
3378
#ifdef DEBUG
3379
    nbParseName++;
3380
#endif
3381
3382
    /*
3383
     * Accelerator for simple ASCII names
3384
     */
3385
4.34M
    in = ctxt->input->cur;
3386
4.34M
    if (((*in >= 0x61) && (*in <= 0x7A)) ||
3387
4.34M
  ((*in >= 0x41) && (*in <= 0x5A)) ||
3388
4.34M
  (*in == '_') || (*in == ':')) {
3389
4.17M
  in++;
3390
16.5M
  while (((*in >= 0x61) && (*in <= 0x7A)) ||
3391
16.5M
         ((*in >= 0x41) && (*in <= 0x5A)) ||
3392
16.5M
         ((*in >= 0x30) && (*in <= 0x39)) ||
3393
16.5M
         (*in == '_') || (*in == '-') ||
3394
16.5M
         (*in == ':') || (*in == '.'))
3395
12.4M
      in++;
3396
4.17M
  if ((*in > 0) && (*in < 0x80)) {
3397
4.08M
      count = in - ctxt->input->cur;
3398
4.08M
            if (count > maxLength) {
3399
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3400
0
                return(NULL);
3401
0
            }
3402
4.08M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3403
4.08M
      ctxt->input->cur = in;
3404
4.08M
      ctxt->input->col += count;
3405
4.08M
      if (ret == NULL)
3406
0
          xmlErrMemory(ctxt, NULL);
3407
4.08M
      return(ret);
3408
4.08M
  }
3409
4.17M
    }
3410
    /* accelerator for special cases */
3411
258k
    return(xmlParseNameComplex(ctxt));
3412
4.34M
}
3413
3414
static const xmlChar *
3415
214k
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3416
214k
    int len = 0, l;
3417
214k
    int c;
3418
214k
    int count = 0;
3419
214k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3420
50.7k
                    XML_MAX_TEXT_LENGTH :
3421
214k
                    XML_MAX_NAME_LENGTH;
3422
214k
    size_t startPosition = 0;
3423
3424
#ifdef DEBUG
3425
    nbParseNCNameComplex++;
3426
#endif
3427
3428
    /*
3429
     * Handler for more complex cases
3430
     */
3431
214k
    GROW;
3432
214k
    startPosition = CUR_PTR - BASE_PTR;
3433
214k
    c = CUR_CHAR(l);
3434
214k
    if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3435
214k
  (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3436
126k
  return(NULL);
3437
126k
    }
3438
3439
1.75M
    while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3440
1.75M
     (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3441
1.66M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
3442
6.92k
      count = 0;
3443
6.92k
      GROW;
3444
6.92k
            if (ctxt->instate == XML_PARSER_EOF)
3445
0
                return(NULL);
3446
6.92k
  }
3447
1.66M
        if (len <= INT_MAX - l)
3448
1.66M
      len += l;
3449
1.66M
  NEXTL(l);
3450
1.66M
  c = CUR_CHAR(l);
3451
1.66M
  if (c == 0) {
3452
6.58k
      count = 0;
3453
      /*
3454
       * when shrinking to extend the buffer we really need to preserve
3455
       * the part of the name we already parsed. Hence rolling back
3456
       * by current length.
3457
       */
3458
6.58k
      ctxt->input->cur -= l;
3459
6.58k
      GROW;
3460
6.58k
            if (ctxt->instate == XML_PARSER_EOF)
3461
0
                return(NULL);
3462
6.58k
      ctxt->input->cur += l;
3463
6.58k
      c = CUR_CHAR(l);
3464
6.58k
  }
3465
1.66M
    }
3466
87.7k
    if (len > maxLength) {
3467
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3468
0
        return(NULL);
3469
0
    }
3470
87.7k
    return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
3471
87.7k
}
3472
3473
/**
3474
 * xmlParseNCName:
3475
 * @ctxt:  an XML parser context
3476
 * @len:  length of the string parsed
3477
 *
3478
 * parse an XML name.
3479
 *
3480
 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3481
 *                      CombiningChar | Extender
3482
 *
3483
 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3484
 *
3485
 * Returns the Name parsed or NULL
3486
 */
3487
3488
static const xmlChar *
3489
1.85M
xmlParseNCName(xmlParserCtxtPtr ctxt) {
3490
1.85M
    const xmlChar *in, *e;
3491
1.85M
    const xmlChar *ret;
3492
1.85M
    size_t count = 0;
3493
1.85M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3494
276k
                       XML_MAX_TEXT_LENGTH :
3495
1.85M
                       XML_MAX_NAME_LENGTH;
3496
3497
#ifdef DEBUG
3498
    nbParseNCName++;
3499
#endif
3500
3501
    /*
3502
     * Accelerator for simple ASCII names
3503
     */
3504
1.85M
    in = ctxt->input->cur;
3505
1.85M
    e = ctxt->input->end;
3506
1.85M
    if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3507
1.85M
   ((*in >= 0x41) && (*in <= 0x5A)) ||
3508
1.85M
   (*in == '_')) && (in < e)) {
3509
1.70M
  in++;
3510
8.39M
  while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3511
8.39M
          ((*in >= 0x41) && (*in <= 0x5A)) ||
3512
8.39M
          ((*in >= 0x30) && (*in <= 0x39)) ||
3513
8.39M
          (*in == '_') || (*in == '-') ||
3514
8.39M
          (*in == '.')) && (in < e))
3515
6.69M
      in++;
3516
1.70M
  if (in >= e)
3517
788
      goto complex;
3518
1.70M
  if ((*in > 0) && (*in < 0x80)) {
3519
1.64M
      count = in - ctxt->input->cur;
3520
1.64M
            if (count > maxLength) {
3521
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3522
0
                return(NULL);
3523
0
            }
3524
1.64M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3525
1.64M
      ctxt->input->cur = in;
3526
1.64M
      ctxt->input->col += count;
3527
1.64M
      if (ret == NULL) {
3528
0
          xmlErrMemory(ctxt, NULL);
3529
0
      }
3530
1.64M
      return(ret);
3531
1.64M
  }
3532
1.70M
    }
3533
214k
complex:
3534
214k
    return(xmlParseNCNameComplex(ctxt));
3535
1.85M
}
3536
3537
/**
3538
 * xmlParseNameAndCompare:
3539
 * @ctxt:  an XML parser context
3540
 *
3541
 * parse an XML name and compares for match
3542
 * (specialized for endtag parsing)
3543
 *
3544
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3545
 * and the name for mismatch
3546
 */
3547
3548
static const xmlChar *
3549
441k
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3550
441k
    register const xmlChar *cmp = other;
3551
441k
    register const xmlChar *in;
3552
441k
    const xmlChar *ret;
3553
3554
441k
    GROW;
3555
441k
    if (ctxt->instate == XML_PARSER_EOF)
3556
0
        return(NULL);
3557
3558
441k
    in = ctxt->input->cur;
3559
2.09M
    while (*in != 0 && *in == *cmp) {
3560
1.65M
  ++in;
3561
1.65M
  ++cmp;
3562
1.65M
    }
3563
441k
    if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3564
  /* success */
3565
374k
  ctxt->input->col += in - ctxt->input->cur;
3566
374k
  ctxt->input->cur = in;
3567
374k
  return (const xmlChar*) 1;
3568
374k
    }
3569
    /* failure (or end of input buffer), check with full function */
3570
67.0k
    ret = xmlParseName (ctxt);
3571
    /* strings coming from the dictionary direct compare possible */
3572
67.0k
    if (ret == other) {
3573
5.19k
  return (const xmlChar*) 1;
3574
5.19k
    }
3575
61.8k
    return ret;
3576
67.0k
}
3577
3578
/**
3579
 * xmlParseStringName:
3580
 * @ctxt:  an XML parser context
3581
 * @str:  a pointer to the string pointer (IN/OUT)
3582
 *
3583
 * parse an XML name.
3584
 *
3585
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3586
 *                  CombiningChar | Extender
3587
 *
3588
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3589
 *
3590
 * [6] Names ::= Name (#x20 Name)*
3591
 *
3592
 * Returns the Name parsed or NULL. The @str pointer
3593
 * is updated to the current location in the string.
3594
 */
3595
3596
static xmlChar *
3597
6.13M
xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3598
6.13M
    xmlChar buf[XML_MAX_NAMELEN + 5];
3599
6.13M
    const xmlChar *cur = *str;
3600
6.13M
    int len = 0, l;
3601
6.13M
    int c;
3602
6.13M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3603
17.3k
                    XML_MAX_TEXT_LENGTH :
3604
6.13M
                    XML_MAX_NAME_LENGTH;
3605
3606
#ifdef DEBUG
3607
    nbParseStringName++;
3608
#endif
3609
3610
6.13M
    c = CUR_SCHAR(cur, l);
3611
6.13M
    if (!xmlIsNameStartChar(ctxt, c)) {
3612
684
  return(NULL);
3613
684
    }
3614
3615
6.13M
    COPY_BUF(l,buf,len,c);
3616
6.13M
    cur += l;
3617
6.13M
    c = CUR_SCHAR(cur, l);
3618
73.9M
    while (xmlIsNameChar(ctxt, c)) {
3619
68.2M
  COPY_BUF(l,buf,len,c);
3620
68.2M
  cur += l;
3621
68.2M
  c = CUR_SCHAR(cur, l);
3622
68.2M
  if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3623
      /*
3624
       * Okay someone managed to make a huge name, so he's ready to pay
3625
       * for the processing speed.
3626
       */
3627
396k
      xmlChar *buffer;
3628
396k
      int max = len * 2;
3629
3630
396k
      buffer = (xmlChar *) xmlMallocAtomic(max);
3631
396k
      if (buffer == NULL) {
3632
0
          xmlErrMemory(ctxt, NULL);
3633
0
    return(NULL);
3634
0
      }
3635
396k
      memcpy(buffer, buf, len);
3636
90.0M
      while (xmlIsNameChar(ctxt, c)) {
3637
89.6M
    if (len + 10 > max) {
3638
396k
        xmlChar *tmp;
3639
3640
396k
        max *= 2;
3641
396k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3642
396k
        if (tmp == NULL) {
3643
0
      xmlErrMemory(ctxt, NULL);
3644
0
      xmlFree(buffer);
3645
0
      return(NULL);
3646
0
        }
3647
396k
        buffer = tmp;
3648
396k
    }
3649
89.6M
    COPY_BUF(l,buffer,len,c);
3650
89.6M
    cur += l;
3651
89.6M
    c = CUR_SCHAR(cur, l);
3652
89.6M
                if (len > maxLength) {
3653
0
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3654
0
                    xmlFree(buffer);
3655
0
                    return(NULL);
3656
0
                }
3657
89.6M
      }
3658
396k
      buffer[len] = 0;
3659
396k
      *str = cur;
3660
396k
      return(buffer);
3661
396k
  }
3662
68.2M
    }
3663
5.73M
    if (len > maxLength) {
3664
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3665
0
        return(NULL);
3666
0
    }
3667
5.73M
    *str = cur;
3668
5.73M
    return(xmlStrndup(buf, len));
3669
5.73M
}
3670
3671
/**
3672
 * xmlParseNmtoken:
3673
 * @ctxt:  an XML parser context
3674
 *
3675
 * DEPRECATED: Internal function, don't use.
3676
 *
3677
 * parse an XML Nmtoken.
3678
 *
3679
 * [7] Nmtoken ::= (NameChar)+
3680
 *
3681
 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3682
 *
3683
 * Returns the Nmtoken parsed or NULL
3684
 */
3685
3686
xmlChar *
3687
25.8k
xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3688
25.8k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3689
25.8k
    int len = 0, l;
3690
25.8k
    int c;
3691
25.8k
    int count = 0;
3692
25.8k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3693
4.61k
                    XML_MAX_TEXT_LENGTH :
3694
25.8k
                    XML_MAX_NAME_LENGTH;
3695
3696
#ifdef DEBUG
3697
    nbParseNmToken++;
3698
#endif
3699
3700
25.8k
    GROW;
3701
25.8k
    if (ctxt->instate == XML_PARSER_EOF)
3702
0
        return(NULL);
3703
25.8k
    c = CUR_CHAR(l);
3704
3705
148k
    while (xmlIsNameChar(ctxt, c)) {
3706
122k
  if (count++ > XML_PARSER_CHUNK_SIZE) {
3707
0
      count = 0;
3708
0
      GROW;
3709
0
  }
3710
122k
  COPY_BUF(l,buf,len,c);
3711
122k
  NEXTL(l);
3712
122k
  c = CUR_CHAR(l);
3713
122k
  if (c == 0) {
3714
179
      count = 0;
3715
179
      GROW;
3716
179
      if (ctxt->instate == XML_PARSER_EOF)
3717
0
    return(NULL);
3718
179
            c = CUR_CHAR(l);
3719
179
  }
3720
122k
  if (len >= XML_MAX_NAMELEN) {
3721
      /*
3722
       * Okay someone managed to make a huge token, so he's ready to pay
3723
       * for the processing speed.
3724
       */
3725
374
      xmlChar *buffer;
3726
374
      int max = len * 2;
3727
3728
374
      buffer = (xmlChar *) xmlMallocAtomic(max);
3729
374
      if (buffer == NULL) {
3730
0
          xmlErrMemory(ctxt, NULL);
3731
0
    return(NULL);
3732
0
      }
3733
374
      memcpy(buffer, buf, len);
3734
225k
      while (xmlIsNameChar(ctxt, c)) {
3735
225k
    if (count++ > XML_PARSER_CHUNK_SIZE) {
3736
2.41k
        count = 0;
3737
2.41k
        GROW;
3738
2.41k
                    if (ctxt->instate == XML_PARSER_EOF) {
3739
0
                        xmlFree(buffer);
3740
0
                        return(NULL);
3741
0
                    }
3742
2.41k
    }
3743
225k
    if (len + 10 > max) {
3744
396
        xmlChar *tmp;
3745
3746
396
        max *= 2;
3747
396
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3748
396
        if (tmp == NULL) {
3749
0
      xmlErrMemory(ctxt, NULL);
3750
0
      xmlFree(buffer);
3751
0
      return(NULL);
3752
0
        }
3753
396
        buffer = tmp;
3754
396
    }
3755
225k
    COPY_BUF(l,buffer,len,c);
3756
225k
    NEXTL(l);
3757
225k
    c = CUR_CHAR(l);
3758
225k
                if (len > maxLength) {
3759
0
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3760
0
                    xmlFree(buffer);
3761
0
                    return(NULL);
3762
0
                }
3763
225k
      }
3764
374
      buffer[len] = 0;
3765
374
      return(buffer);
3766
374
  }
3767
122k
    }
3768
25.4k
    if (len == 0)
3769
7.52k
        return(NULL);
3770
17.9k
    if (len > maxLength) {
3771
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3772
0
        return(NULL);
3773
0
    }
3774
17.9k
    return(xmlStrndup(buf, len));
3775
17.9k
}
3776
3777
/**
3778
 * xmlParseEntityValue:
3779
 * @ctxt:  an XML parser context
3780
 * @orig:  if non-NULL store a copy of the original entity value
3781
 *
3782
 * DEPRECATED: Internal function, don't use.
3783
 *
3784
 * parse a value for ENTITY declarations
3785
 *
3786
 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3787
 *                 "'" ([^%&'] | PEReference | Reference)* "'"
3788
 *
3789
 * Returns the EntityValue parsed with reference substituted or NULL
3790
 */
3791
3792
xmlChar *
3793
63.2k
xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3794
63.2k
    xmlChar *buf = NULL;
3795
63.2k
    int len = 0;
3796
63.2k
    int size = XML_PARSER_BUFFER_SIZE;
3797
63.2k
    int c, l;
3798
63.2k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3799
11.2k
                    XML_MAX_HUGE_LENGTH :
3800
63.2k
                    XML_MAX_TEXT_LENGTH;
3801
63.2k
    xmlChar stop;
3802
63.2k
    xmlChar *ret = NULL;
3803
63.2k
    const xmlChar *cur = NULL;
3804
63.2k
    xmlParserInputPtr input;
3805
3806
63.2k
    if (RAW == '"') stop = '"';
3807
12.4k
    else if (RAW == '\'') stop = '\'';
3808
0
    else {
3809
0
  xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3810
0
  return(NULL);
3811
0
    }
3812
63.2k
    buf = (xmlChar *) xmlMallocAtomic(size);
3813
63.2k
    if (buf == NULL) {
3814
0
  xmlErrMemory(ctxt, NULL);
3815
0
  return(NULL);
3816
0
    }
3817
3818
    /*
3819
     * The content of the entity definition is copied in a buffer.
3820
     */
3821
3822
63.2k
    ctxt->instate = XML_PARSER_ENTITY_VALUE;
3823
63.2k
    input = ctxt->input;
3824
63.2k
    GROW;
3825
63.2k
    if (ctxt->instate == XML_PARSER_EOF)
3826
0
        goto error;
3827
63.2k
    NEXT;
3828
63.2k
    c = CUR_CHAR(l);
3829
    /*
3830
     * NOTE: 4.4.5 Included in Literal
3831
     * When a parameter entity reference appears in a literal entity
3832
     * value, ... a single or double quote character in the replacement
3833
     * text is always treated as a normal data character and will not
3834
     * terminate the literal.
3835
     * In practice it means we stop the loop only when back at parsing
3836
     * the initial entity and the quote is found
3837
     */
3838
3.52M
    while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3839
3.52M
      (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3840
3.46M
  if (len + 5 >= size) {
3841
7.61k
      xmlChar *tmp;
3842
3843
7.61k
      size *= 2;
3844
7.61k
      tmp = (xmlChar *) xmlRealloc(buf, size);
3845
7.61k
      if (tmp == NULL) {
3846
0
    xmlErrMemory(ctxt, NULL);
3847
0
                goto error;
3848
0
      }
3849
7.61k
      buf = tmp;
3850
7.61k
  }
3851
3.46M
  COPY_BUF(l,buf,len,c);
3852
3.46M
  NEXTL(l);
3853
3854
3.46M
  GROW;
3855
3.46M
  c = CUR_CHAR(l);
3856
3.46M
  if (c == 0) {
3857
506
      GROW;
3858
506
      c = CUR_CHAR(l);
3859
506
  }
3860
3861
3.46M
        if (len > maxLength) {
3862
0
            xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
3863
0
                           "entity value too long\n");
3864
0
            goto error;
3865
0
        }
3866
3.46M
    }
3867
63.2k
    buf[len] = 0;
3868
63.2k
    if (ctxt->instate == XML_PARSER_EOF)
3869
0
        goto error;
3870
63.2k
    if (c != stop) {
3871
791
        xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3872
791
        goto error;
3873
791
    }
3874
62.4k
    NEXT;
3875
3876
    /*
3877
     * Raise problem w.r.t. '&' and '%' being used in non-entities
3878
     * reference constructs. Note Charref will be handled in
3879
     * xmlStringDecodeEntities()
3880
     */
3881
62.4k
    cur = buf;
3882
2.01M
    while (*cur != 0) { /* non input consuming */
3883
1.95M
  if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3884
66.3k
      xmlChar *name;
3885
66.3k
      xmlChar tmp = *cur;
3886
66.3k
            int nameOk = 0;
3887
3888
66.3k
      cur++;
3889
66.3k
      name = xmlParseStringName(ctxt, &cur);
3890
66.3k
            if (name != NULL) {
3891
65.8k
                nameOk = 1;
3892
65.8k
                xmlFree(name);
3893
65.8k
            }
3894
66.3k
            if ((nameOk == 0) || (*cur != ';')) {
3895
1.72k
    xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3896
1.72k
      "EntityValue: '%c' forbidden except for entities references\n",
3897
1.72k
                            tmp);
3898
1.72k
                goto error;
3899
1.72k
      }
3900
64.6k
      if ((tmp == '%') && (ctxt->inSubset == 1) &&
3901
64.6k
    (ctxt->inputNr == 1)) {
3902
139
    xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3903
139
                goto error;
3904
139
      }
3905
64.5k
      if (*cur == 0)
3906
0
          break;
3907
64.5k
  }
3908
1.95M
  cur++;
3909
1.95M
    }
3910
3911
    /*
3912
     * Then PEReference entities are substituted.
3913
     *
3914
     * NOTE: 4.4.7 Bypassed
3915
     * When a general entity reference appears in the EntityValue in
3916
     * an entity declaration, it is bypassed and left as is.
3917
     * so XML_SUBSTITUTE_REF is not set here.
3918
     */
3919
60.5k
    ++ctxt->depth;
3920
60.5k
    ret = xmlStringDecodeEntitiesInt(ctxt, buf, len, XML_SUBSTITUTE_PEREF,
3921
60.5k
                                     0, 0, 0, /* check */ 1);
3922
60.5k
    --ctxt->depth;
3923
3924
60.5k
    if (orig != NULL) {
3925
60.5k
        *orig = buf;
3926
60.5k
        buf = NULL;
3927
60.5k
    }
3928
3929
63.2k
error:
3930
63.2k
    if (buf != NULL)
3931
2.65k
        xmlFree(buf);
3932
63.2k
    return(ret);
3933
60.5k
}
3934
3935
/**
3936
 * xmlParseAttValueComplex:
3937
 * @ctxt:  an XML parser context
3938
 * @len:   the resulting attribute len
3939
 * @normalize:  whether to apply the inner normalization
3940
 *
3941
 * parse a value for an attribute, this is the fallback function
3942
 * of xmlParseAttValue() when the attribute parsing requires handling
3943
 * of non-ASCII characters, or normalization compaction.
3944
 *
3945
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3946
 */
3947
static xmlChar *
3948
119k
xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3949
119k
    xmlChar limit = 0;
3950
119k
    xmlChar *buf = NULL;
3951
119k
    xmlChar *rep = NULL;
3952
119k
    size_t len = 0;
3953
119k
    size_t buf_size = 0;
3954
119k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3955
32.7k
                       XML_MAX_HUGE_LENGTH :
3956
119k
                       XML_MAX_TEXT_LENGTH;
3957
119k
    int c, l, in_space = 0;
3958
119k
    xmlChar *current = NULL;
3959
119k
    xmlEntityPtr ent;
3960
3961
119k
    if (NXT(0) == '"') {
3962
49.0k
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3963
49.0k
  limit = '"';
3964
49.0k
        NEXT;
3965
70.1k
    } else if (NXT(0) == '\'') {
3966
70.1k
  limit = '\'';
3967
70.1k
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3968
70.1k
        NEXT;
3969
70.1k
    } else {
3970
0
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3971
0
  return(NULL);
3972
0
    }
3973
3974
    /*
3975
     * allocate a translation buffer.
3976
     */
3977
119k
    buf_size = XML_PARSER_BUFFER_SIZE;
3978
119k
    buf = (xmlChar *) xmlMallocAtomic(buf_size);
3979
119k
    if (buf == NULL) goto mem_error;
3980
3981
    /*
3982
     * OK loop until we reach one of the ending char or a size limit.
3983
     */
3984
119k
    c = CUR_CHAR(l);
3985
3.17M
    while (((NXT(0) != limit) && /* checked */
3986
3.17M
            (IS_CHAR(c)) && (c != '<')) &&
3987
3.17M
            (ctxt->instate != XML_PARSER_EOF)) {
3988
3.05M
  if (c == '&') {
3989
200k
      in_space = 0;
3990
200k
      if (NXT(1) == '#') {
3991
14.8k
    int val = xmlParseCharRef(ctxt);
3992
3993
14.8k
    if (val == '&') {
3994
1.30k
        if (ctxt->replaceEntities) {
3995
448
      if (len + 10 > buf_size) {
3996
0
          growBuffer(buf, 10);
3997
0
      }
3998
448
      buf[len++] = '&';
3999
854
        } else {
4000
      /*
4001
       * The reparsing will be done in xmlStringGetNodeList()
4002
       * called by the attribute() function in SAX.c
4003
       */
4004
854
      if (len + 10 > buf_size) {
4005
0
          growBuffer(buf, 10);
4006
0
      }
4007
854
      buf[len++] = '&';
4008
854
      buf[len++] = '#';
4009
854
      buf[len++] = '3';
4010
854
      buf[len++] = '8';
4011
854
      buf[len++] = ';';
4012
854
        }
4013
13.5k
    } else if (val != 0) {
4014
11.2k
        if (len + 10 > buf_size) {
4015
30
      growBuffer(buf, 10);
4016
30
        }
4017
11.2k
        len += xmlCopyChar(0, &buf[len], val);
4018
11.2k
    }
4019
185k
      } else {
4020
185k
    ent = xmlParseEntityRef(ctxt);
4021
185k
    if ((ent != NULL) &&
4022
185k
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
4023
35.8k
        if (len + 10 > buf_size) {
4024
0
      growBuffer(buf, 10);
4025
0
        }
4026
35.8k
        if ((ctxt->replaceEntities == 0) &&
4027
35.8k
            (ent->content[0] == '&')) {
4028
8.00k
      buf[len++] = '&';
4029
8.00k
      buf[len++] = '#';
4030
8.00k
      buf[len++] = '3';
4031
8.00k
      buf[len++] = '8';
4032
8.00k
      buf[len++] = ';';
4033
27.8k
        } else {
4034
27.8k
      buf[len++] = ent->content[0];
4035
27.8k
        }
4036
150k
    } else if ((ent != NULL) &&
4037
150k
               (ctxt->replaceEntities != 0)) {
4038
123k
        if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4039
123k
                        if (xmlParserEntityCheck(ctxt, ent->length))
4040
0
                            goto error;
4041
4042
123k
      ++ctxt->depth;
4043
123k
      rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
4044
123k
                                ent->length, XML_SUBSTITUTE_REF, 0, 0, 0,
4045
123k
                                /* check */ 1);
4046
123k
      --ctxt->depth;
4047
123k
      if (rep != NULL) {
4048
122k
          current = rep;
4049
25.4M
          while (*current != 0) { /* non input consuming */
4050
25.3M
                                if ((*current == 0xD) || (*current == 0xA) ||
4051
25.3M
                                    (*current == 0x9)) {
4052
231
                                    buf[len++] = 0x20;
4053
231
                                    current++;
4054
231
                                } else
4055
25.3M
                                    buf[len++] = *current++;
4056
25.3M
        if (len + 10 > buf_size) {
4057
770
            growBuffer(buf, 10);
4058
770
        }
4059
25.3M
          }
4060
122k
          xmlFree(rep);
4061
122k
          rep = NULL;
4062
122k
      }
4063
123k
        } else {
4064
0
      if (len + 10 > buf_size) {
4065
0
          growBuffer(buf, 10);
4066
0
      }
4067
0
      if (ent->content != NULL)
4068
0
          buf[len++] = ent->content[0];
4069
0
        }
4070
123k
    } else if (ent != NULL) {
4071
12.5k
        int i = xmlStrlen(ent->name);
4072
12.5k
        const xmlChar *cur = ent->name;
4073
4074
        /*
4075
                     * We also check for recursion and amplification
4076
                     * when entities are not substituted. They're
4077
                     * often expanded later.
4078
         */
4079
12.5k
        if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4080
12.5k
      (ent->content != NULL)) {
4081
12.2k
                        if ((ent->flags & XML_ENT_CHECKED) == 0) {
4082
322
                            unsigned long oldCopy = ctxt->sizeentcopy;
4083
4084
322
                            ctxt->sizeentcopy = ent->length;
4085
4086
322
                            ++ctxt->depth;
4087
322
                            rep = xmlStringDecodeEntitiesInt(ctxt,
4088
322
                                    ent->content, ent->length,
4089
322
                                    XML_SUBSTITUTE_REF, 0, 0, 0,
4090
322
                                    /* check */ 1);
4091
322
                            --ctxt->depth;
4092
4093
                            /*
4094
                             * If we're parsing DTD content, the entity
4095
                             * might reference other entities which
4096
                             * weren't defined yet, so the check isn't
4097
                             * reliable.
4098
                             */
4099
322
                            if (ctxt->inSubset == 0) {
4100
313
                                ent->flags |= XML_ENT_CHECKED;
4101
313
                                ent->expandedSize = ctxt->sizeentcopy;
4102
313
                            }
4103
4104
322
                            if (rep != NULL) {
4105
322
                                xmlFree(rep);
4106
322
                                rep = NULL;
4107
322
                            } else {
4108
0
                                ent->content[0] = 0;
4109
0
                            }
4110
4111
322
                            if (xmlParserEntityCheck(ctxt, oldCopy))
4112
0
                                goto error;
4113
11.9k
                        } else {
4114
11.9k
                            if (xmlParserEntityCheck(ctxt, ent->expandedSize))
4115
0
                                goto error;
4116
11.9k
                        }
4117
12.2k
        }
4118
4119
        /*
4120
         * Just output the reference
4121
         */
4122
12.5k
        buf[len++] = '&';
4123
12.6k
        while (len + i + 10 > buf_size) {
4124
254
      growBuffer(buf, i + 10);
4125
254
        }
4126
41.4k
        for (;i > 0;i--)
4127
28.9k
      buf[len++] = *cur++;
4128
12.5k
        buf[len++] = ';';
4129
12.5k
    }
4130
185k
      }
4131
2.85M
  } else {
4132
2.85M
      if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4133
349k
          if ((len != 0) || (!normalize)) {
4134
338k
        if ((!normalize) || (!in_space)) {
4135
330k
      COPY_BUF(l,buf,len,0x20);
4136
331k
      while (len + 10 > buf_size) {
4137
1.08k
          growBuffer(buf, 10);
4138
1.08k
      }
4139
330k
        }
4140
338k
        in_space = 1;
4141
338k
    }
4142
2.50M
      } else {
4143
2.50M
          in_space = 0;
4144
2.50M
    COPY_BUF(l,buf,len,c);
4145
2.50M
    if (len + 10 > buf_size) {
4146
9.62k
        growBuffer(buf, 10);
4147
9.62k
    }
4148
2.50M
      }
4149
2.85M
      NEXTL(l);
4150
2.85M
  }
4151
3.05M
  GROW;
4152
3.05M
  c = CUR_CHAR(l);
4153
3.05M
        if (len > maxLength) {
4154
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4155
0
                           "AttValue length too long\n");
4156
0
            goto mem_error;
4157
0
        }
4158
3.05M
    }
4159
119k
    if (ctxt->instate == XML_PARSER_EOF)
4160
75
        goto error;
4161
4162
119k
    if ((in_space) && (normalize)) {
4163
12.5k
        while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4164
6.23k
    }
4165
119k
    buf[len] = 0;
4166
119k
    if (RAW == '<') {
4167
27.5k
  xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4168
91.5k
    } else if (RAW != limit) {
4169
11.4k
  if ((c != 0) && (!IS_CHAR(c))) {
4170
4.98k
      xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4171
4.98k
         "invalid character in attribute value\n");
4172
6.44k
  } else {
4173
6.44k
      xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4174
6.44k
         "AttValue: ' expected\n");
4175
6.44k
        }
4176
11.4k
    } else
4177
80.1k
  NEXT;
4178
4179
119k
    if (attlen != NULL) *attlen = len;
4180
119k
    return(buf);
4181
4182
0
mem_error:
4183
0
    xmlErrMemory(ctxt, NULL);
4184
75
error:
4185
75
    if (buf != NULL)
4186
75
        xmlFree(buf);
4187
75
    if (rep != NULL)
4188
0
        xmlFree(rep);
4189
75
    return(NULL);
4190
0
}
4191
4192
/**
4193
 * xmlParseAttValue:
4194
 * @ctxt:  an XML parser context
4195
 *
4196
 * DEPRECATED: Internal function, don't use.
4197
 *
4198
 * parse a value for an attribute
4199
 * Note: the parser won't do substitution of entities here, this
4200
 * will be handled later in xmlStringGetNodeList
4201
 *
4202
 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4203
 *                   "'" ([^<&'] | Reference)* "'"
4204
 *
4205
 * 3.3.3 Attribute-Value Normalization:
4206
 * Before the value of an attribute is passed to the application or
4207
 * checked for validity, the XML processor must normalize it as follows:
4208
 * - a character reference is processed by appending the referenced
4209
 *   character to the attribute value
4210
 * - an entity reference is processed by recursively processing the
4211
 *   replacement text of the entity
4212
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4213
 *   appending #x20 to the normalized value, except that only a single
4214
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4215
 *   parsed entity or the literal entity value of an internal parsed entity
4216
 * - other characters are processed by appending them to the normalized value
4217
 * If the declared value is not CDATA, then the XML processor must further
4218
 * process the normalized attribute value by discarding any leading and
4219
 * trailing space (#x20) characters, and by replacing sequences of space
4220
 * (#x20) characters by a single space (#x20) character.
4221
 * All attributes for which no declaration has been read should be treated
4222
 * by a non-validating parser as if declared CDATA.
4223
 *
4224
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4225
 */
4226
4227
4228
xmlChar *
4229
185k
xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4230
185k
    if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4231
185k
    return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4232
185k
}
4233
4234
/**
4235
 * xmlParseSystemLiteral:
4236
 * @ctxt:  an XML parser context
4237
 *
4238
 * DEPRECATED: Internal function, don't use.
4239
 *
4240
 * parse an XML Literal
4241
 *
4242
 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4243
 *
4244
 * Returns the SystemLiteral parsed or NULL
4245
 */
4246
4247
xmlChar *
4248
22.5k
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4249
22.5k
    xmlChar *buf = NULL;
4250
22.5k
    int len = 0;
4251
22.5k
    int size = XML_PARSER_BUFFER_SIZE;
4252
22.5k
    int cur, l;
4253
22.5k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4254
5.84k
                    XML_MAX_TEXT_LENGTH :
4255
22.5k
                    XML_MAX_NAME_LENGTH;
4256
22.5k
    xmlChar stop;
4257
22.5k
    int state = ctxt->instate;
4258
22.5k
    int count = 0;
4259
4260
22.5k
    SHRINK;
4261
22.5k
    if (RAW == '"') {
4262
18.6k
        NEXT;
4263
18.6k
  stop = '"';
4264
18.6k
    } else if (RAW == '\'') {
4265
2.41k
        NEXT;
4266
2.41k
  stop = '\'';
4267
2.41k
    } else {
4268
1.44k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4269
1.44k
  return(NULL);
4270
1.44k
    }
4271
4272
21.1k
    buf = (xmlChar *) xmlMallocAtomic(size);
4273
21.1k
    if (buf == NULL) {
4274
0
        xmlErrMemory(ctxt, NULL);
4275
0
  return(NULL);
4276
0
    }
4277
21.1k
    ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4278
21.1k
    cur = CUR_CHAR(l);
4279
846k
    while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4280
825k
  if (len + 5 >= size) {
4281
1.82k
      xmlChar *tmp;
4282
4283
1.82k
      size *= 2;
4284
1.82k
      tmp = (xmlChar *) xmlRealloc(buf, size);
4285
1.82k
      if (tmp == NULL) {
4286
0
          xmlFree(buf);
4287
0
    xmlErrMemory(ctxt, NULL);
4288
0
    ctxt->instate = (xmlParserInputState) state;
4289
0
    return(NULL);
4290
0
      }
4291
1.82k
      buf = tmp;
4292
1.82k
  }
4293
825k
  count++;
4294
825k
  if (count > 50) {
4295
7.93k
      SHRINK;
4296
7.93k
      GROW;
4297
7.93k
      count = 0;
4298
7.93k
            if (ctxt->instate == XML_PARSER_EOF) {
4299
0
          xmlFree(buf);
4300
0
    return(NULL);
4301
0
            }
4302
7.93k
  }
4303
825k
  COPY_BUF(l,buf,len,cur);
4304
825k
  NEXTL(l);
4305
825k
  cur = CUR_CHAR(l);
4306
825k
  if (cur == 0) {
4307
452
      GROW;
4308
452
      SHRINK;
4309
452
      cur = CUR_CHAR(l);
4310
452
  }
4311
825k
        if (len > maxLength) {
4312
0
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4313
0
            xmlFree(buf);
4314
0
            ctxt->instate = (xmlParserInputState) state;
4315
0
            return(NULL);
4316
0
        }
4317
825k
    }
4318
21.1k
    buf[len] = 0;
4319
21.1k
    ctxt->instate = (xmlParserInputState) state;
4320
21.1k
    if (!IS_CHAR(cur)) {
4321
804
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4322
20.3k
    } else {
4323
20.3k
  NEXT;
4324
20.3k
    }
4325
21.1k
    return(buf);
4326
21.1k
}
4327
4328
/**
4329
 * xmlParsePubidLiteral:
4330
 * @ctxt:  an XML parser context
4331
 *
4332
 * DEPRECATED: Internal function, don't use.
4333
 *
4334
 * parse an XML public literal
4335
 *
4336
 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4337
 *
4338
 * Returns the PubidLiteral parsed or NULL.
4339
 */
4340
4341
xmlChar *
4342
8.32k
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4343
8.32k
    xmlChar *buf = NULL;
4344
8.32k
    int len = 0;
4345
8.32k
    int size = XML_PARSER_BUFFER_SIZE;
4346
8.32k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4347
1.73k
                    XML_MAX_TEXT_LENGTH :
4348
8.32k
                    XML_MAX_NAME_LENGTH;
4349
8.32k
    xmlChar cur;
4350
8.32k
    xmlChar stop;
4351
8.32k
    int count = 0;
4352
8.32k
    xmlParserInputState oldstate = ctxt->instate;
4353
4354
8.32k
    SHRINK;
4355
8.32k
    if (RAW == '"') {
4356
6.94k
        NEXT;
4357
6.94k
  stop = '"';
4358
6.94k
    } else if (RAW == '\'') {
4359
1.20k
        NEXT;
4360
1.20k
  stop = '\'';
4361
1.20k
    } else {
4362
176
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4363
176
  return(NULL);
4364
176
    }
4365
8.15k
    buf = (xmlChar *) xmlMallocAtomic(size);
4366
8.15k
    if (buf == NULL) {
4367
0
  xmlErrMemory(ctxt, NULL);
4368
0
  return(NULL);
4369
0
    }
4370
8.15k
    ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4371
8.15k
    cur = CUR;
4372
326k
    while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4373
318k
  if (len + 1 >= size) {
4374
531
      xmlChar *tmp;
4375
4376
531
      size *= 2;
4377
531
      tmp = (xmlChar *) xmlRealloc(buf, size);
4378
531
      if (tmp == NULL) {
4379
0
    xmlErrMemory(ctxt, NULL);
4380
0
    xmlFree(buf);
4381
0
    return(NULL);
4382
0
      }
4383
531
      buf = tmp;
4384
531
  }
4385
318k
  buf[len++] = cur;
4386
318k
  count++;
4387
318k
  if (count > 50) {
4388
2.48k
      SHRINK;
4389
2.48k
      GROW;
4390
2.48k
      count = 0;
4391
2.48k
            if (ctxt->instate == XML_PARSER_EOF) {
4392
0
    xmlFree(buf);
4393
0
    return(NULL);
4394
0
            }
4395
2.48k
  }
4396
318k
  NEXT;
4397
318k
  cur = CUR;
4398
318k
  if (cur == 0) {
4399
105
      GROW;
4400
105
      SHRINK;
4401
105
      cur = CUR;
4402
105
  }
4403
318k
        if (len > maxLength) {
4404
0
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4405
0
            xmlFree(buf);
4406
0
            return(NULL);
4407
0
        }
4408
318k
    }
4409
8.15k
    buf[len] = 0;
4410
8.15k
    if (cur != stop) {
4411
1.06k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4412
7.09k
    } else {
4413
7.09k
  NEXT;
4414
7.09k
    }
4415
8.15k
    ctxt->instate = oldstate;
4416
8.15k
    return(buf);
4417
8.15k
}
4418
4419
static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt);
4420
4421
/*
4422
 * used for the test in the inner loop of the char data testing
4423
 */
4424
static const unsigned char test_char_data[256] = {
4425
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4426
    0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4427
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4428
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4429
    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4430
    0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4431
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4432
    0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4433
    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4434
    0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4435
    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4436
    0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4437
    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4438
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4439
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4440
    0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4441
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4442
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4443
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4444
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4445
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4446
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4447
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4448
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4449
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4450
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4451
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4452
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4453
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4454
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4455
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4456
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4457
};
4458
4459
/**
4460
 * xmlParseCharData:
4461
 * @ctxt:  an XML parser context
4462
 * @cdata:  unused
4463
 *
4464
 * DEPRECATED: Internal function, don't use.
4465
 *
4466
 * Parse character data. Always makes progress if the first char isn't
4467
 * '<' or '&'.
4468
 *
4469
 * if we are within a CDATA section ']]>' marks an end of section.
4470
 *
4471
 * The right angle bracket (>) may be represented using the string "&gt;",
4472
 * and must, for compatibility, be escaped using "&gt;" or a character
4473
 * reference when it appears in the string "]]>" in content, when that
4474
 * string is not marking the end of a CDATA section.
4475
 *
4476
 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4477
 */
4478
4479
void
4480
2.45M
xmlParseCharData(xmlParserCtxtPtr ctxt, ATTRIBUTE_UNUSED int cdata) {
4481
2.45M
    const xmlChar *in;
4482
2.45M
    int nbchar = 0;
4483
2.45M
    int line = ctxt->input->line;
4484
2.45M
    int col = ctxt->input->col;
4485
2.45M
    int ccol;
4486
4487
2.45M
    SHRINK;
4488
2.45M
    GROW;
4489
    /*
4490
     * Accelerated common case where input don't need to be
4491
     * modified before passing it to the handler.
4492
     */
4493
2.45M
    in = ctxt->input->cur;
4494
2.84M
    do {
4495
3.38M
get_more_space:
4496
5.13M
        while (*in == 0x20) { in++; ctxt->input->col++; }
4497
3.38M
        if (*in == 0xA) {
4498
554k
            do {
4499
554k
                ctxt->input->line++; ctxt->input->col = 1;
4500
554k
                in++;
4501
554k
            } while (*in == 0xA);
4502
537k
            goto get_more_space;
4503
537k
        }
4504
2.84M
        if (*in == '<') {
4505
644k
            nbchar = in - ctxt->input->cur;
4506
644k
            if (nbchar > 0) {
4507
644k
                const xmlChar *tmp = ctxt->input->cur;
4508
644k
                ctxt->input->cur = in;
4509
4510
644k
                if ((ctxt->sax != NULL) &&
4511
644k
                    (ctxt->sax->ignorableWhitespace !=
4512
644k
                     ctxt->sax->characters)) {
4513
227k
                    if (areBlanks(ctxt, tmp, nbchar, 1)) {
4514
142k
                        if (ctxt->sax->ignorableWhitespace != NULL)
4515
142k
                            ctxt->sax->ignorableWhitespace(ctxt->userData,
4516
142k
                                                   tmp, nbchar);
4517
142k
                    } else {
4518
84.5k
                        if (ctxt->sax->characters != NULL)
4519
84.5k
                            ctxt->sax->characters(ctxt->userData,
4520
84.5k
                                                  tmp, nbchar);
4521
84.5k
                        if (*ctxt->space == -1)
4522
29.0k
                            *ctxt->space = -2;
4523
84.5k
                    }
4524
417k
                } else if ((ctxt->sax != NULL) &&
4525
417k
                           (ctxt->sax->characters != NULL)) {
4526
417k
                    ctxt->sax->characters(ctxt->userData,
4527
417k
                                          tmp, nbchar);
4528
417k
                }
4529
644k
            }
4530
644k
            return;
4531
644k
        }
4532
4533
2.73M
get_more:
4534
2.73M
        ccol = ctxt->input->col;
4535
30.7M
        while (test_char_data[*in]) {
4536
28.0M
            in++;
4537
28.0M
            ccol++;
4538
28.0M
        }
4539
2.73M
        ctxt->input->col = ccol;
4540
2.73M
        if (*in == 0xA) {
4541
499k
            do {
4542
499k
                ctxt->input->line++; ctxt->input->col = 1;
4543
499k
                in++;
4544
499k
            } while (*in == 0xA);
4545
488k
            goto get_more;
4546
488k
        }
4547
2.24M
        if (*in == ']') {
4548
57.7k
            if ((in[1] == ']') && (in[2] == '>')) {
4549
9.94k
                xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4550
9.94k
                ctxt->input->cur = in + 1;
4551
9.94k
                return;
4552
9.94k
            }
4553
47.8k
            in++;
4554
47.8k
            ctxt->input->col++;
4555
47.8k
            goto get_more;
4556
57.7k
        }
4557
2.19M
        nbchar = in - ctxt->input->cur;
4558
2.19M
        if (nbchar > 0) {
4559
1.09M
            if ((ctxt->sax != NULL) &&
4560
1.09M
                (ctxt->sax->ignorableWhitespace !=
4561
1.09M
                 ctxt->sax->characters) &&
4562
1.09M
                (IS_BLANK_CH(*ctxt->input->cur))) {
4563
59.4k
                const xmlChar *tmp = ctxt->input->cur;
4564
59.4k
                ctxt->input->cur = in;
4565
4566
59.4k
                if (areBlanks(ctxt, tmp, nbchar, 0)) {
4567
2.45k
                    if (ctxt->sax->ignorableWhitespace != NULL)
4568
2.45k
                        ctxt->sax->ignorableWhitespace(ctxt->userData,
4569
2.45k
                                                       tmp, nbchar);
4570
57.0k
                } else {
4571
57.0k
                    if (ctxt->sax->characters != NULL)
4572
57.0k
                        ctxt->sax->characters(ctxt->userData,
4573
57.0k
                                              tmp, nbchar);
4574
57.0k
                    if (*ctxt->space == -1)
4575
32.2k
                        *ctxt->space = -2;
4576
57.0k
                }
4577
59.4k
                line = ctxt->input->line;
4578
59.4k
                col = ctxt->input->col;
4579
1.03M
            } else if (ctxt->sax != NULL) {
4580
1.03M
                if (ctxt->sax->characters != NULL)
4581
1.03M
                    ctxt->sax->characters(ctxt->userData,
4582
1.03M
                                          ctxt->input->cur, nbchar);
4583
1.03M
                line = ctxt->input->line;
4584
1.03M
                col = ctxt->input->col;
4585
1.03M
            }
4586
1.09M
        }
4587
2.19M
        ctxt->input->cur = in;
4588
2.19M
        if (*in == 0xD) {
4589
418k
            in++;
4590
418k
            if (*in == 0xA) {
4591
405k
                ctxt->input->cur = in;
4592
405k
                in++;
4593
405k
                ctxt->input->line++; ctxt->input->col = 1;
4594
405k
                continue; /* while */
4595
405k
            }
4596
12.4k
            in--;
4597
12.4k
        }
4598
1.78M
        if (*in == '<') {
4599
736k
            return;
4600
736k
        }
4601
1.04M
        if (*in == '&') {
4602
88.6k
            return;
4603
88.6k
        }
4604
960k
        SHRINK;
4605
960k
        GROW;
4606
960k
        if (ctxt->instate == XML_PARSER_EOF)
4607
0
            return;
4608
960k
        in = ctxt->input->cur;
4609
1.36M
    } while (((*in >= 0x20) && (*in <= 0x7F)) ||
4610
1.36M
             (*in == 0x09) || (*in == 0x0a));
4611
973k
    ctxt->input->line = line;
4612
973k
    ctxt->input->col = col;
4613
973k
    xmlParseCharDataComplex(ctxt);
4614
973k
}
4615
4616
/**
4617
 * xmlParseCharDataComplex:
4618
 * @ctxt:  an XML parser context
4619
 * @cdata:  int indicating whether we are within a CDATA section
4620
 *
4621
 * Always makes progress if the first char isn't '<' or '&'.
4622
 *
4623
 * parse a CharData section.this is the fallback function
4624
 * of xmlParseCharData() when the parsing requires handling
4625
 * of non-ASCII characters.
4626
 */
4627
static void
4628
973k
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt) {
4629
973k
    xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4630
973k
    int nbchar = 0;
4631
973k
    int cur, l;
4632
973k
    int count = 0;
4633
4634
973k
    SHRINK;
4635
973k
    GROW;
4636
973k
    cur = CUR_CHAR(l);
4637
9.59M
    while ((cur != '<') && /* checked */
4638
9.59M
           (cur != '&') &&
4639
9.59M
     (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4640
8.61M
  if ((cur == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4641
2.72k
      xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4642
2.72k
  }
4643
8.61M
  COPY_BUF(l,buf,nbchar,cur);
4644
  /* move current position before possible calling of ctxt->sax->characters */
4645
8.61M
  NEXTL(l);
4646
8.61M
  cur = CUR_CHAR(l);
4647
8.61M
  if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4648
8.89k
      buf[nbchar] = 0;
4649
4650
      /*
4651
       * OK the segment is to be consumed as chars.
4652
       */
4653
8.89k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4654
6.75k
    if (areBlanks(ctxt, buf, nbchar, 0)) {
4655
0
        if (ctxt->sax->ignorableWhitespace != NULL)
4656
0
      ctxt->sax->ignorableWhitespace(ctxt->userData,
4657
0
                                     buf, nbchar);
4658
6.75k
    } else {
4659
6.75k
        if (ctxt->sax->characters != NULL)
4660
6.75k
      ctxt->sax->characters(ctxt->userData, buf, nbchar);
4661
6.75k
        if ((ctxt->sax->characters !=
4662
6.75k
             ctxt->sax->ignorableWhitespace) &&
4663
6.75k
      (*ctxt->space == -1))
4664
188
      *ctxt->space = -2;
4665
6.75k
    }
4666
6.75k
      }
4667
8.89k
      nbchar = 0;
4668
            /* something really bad happened in the SAX callback */
4669
8.89k
            if (ctxt->instate != XML_PARSER_CONTENT)
4670
0
                return;
4671
8.89k
  }
4672
8.61M
  count++;
4673
8.61M
  if (count > 50) {
4674
99.7k
      SHRINK;
4675
99.7k
      GROW;
4676
99.7k
      count = 0;
4677
99.7k
            if (ctxt->instate == XML_PARSER_EOF)
4678
0
    return;
4679
99.7k
  }
4680
8.61M
    }
4681
973k
    if (nbchar != 0) {
4682
225k
        buf[nbchar] = 0;
4683
  /*
4684
   * OK the segment is to be consumed as chars.
4685
   */
4686
225k
  if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4687
200k
      if (areBlanks(ctxt, buf, nbchar, 0)) {
4688
3.81k
    if (ctxt->sax->ignorableWhitespace != NULL)
4689
3.81k
        ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4690
196k
      } else {
4691
196k
    if (ctxt->sax->characters != NULL)
4692
196k
        ctxt->sax->characters(ctxt->userData, buf, nbchar);
4693
196k
    if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4694
196k
        (*ctxt->space == -1))
4695
33.9k
        *ctxt->space = -2;
4696
196k
      }
4697
200k
  }
4698
225k
    }
4699
973k
    if ((ctxt->input->cur < ctxt->input->end) && (!IS_CHAR(cur))) {
4700
  /* Generate the error and skip the offending character */
4701
762k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4702
762k
                          "PCDATA invalid Char value %d\n",
4703
762k
                    cur ? cur : CUR);
4704
762k
  NEXT;
4705
762k
    }
4706
973k
}
4707
4708
/**
4709
 * xmlParseExternalID:
4710
 * @ctxt:  an XML parser context
4711
 * @publicID:  a xmlChar** receiving PubidLiteral
4712
 * @strict: indicate whether we should restrict parsing to only
4713
 *          production [75], see NOTE below
4714
 *
4715
 * DEPRECATED: Internal function, don't use.
4716
 *
4717
 * Parse an External ID or a Public ID
4718
 *
4719
 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4720
 *       'PUBLIC' S PubidLiteral S SystemLiteral
4721
 *
4722
 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4723
 *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
4724
 *
4725
 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4726
 *
4727
 * Returns the function returns SystemLiteral and in the second
4728
 *                case publicID receives PubidLiteral, is strict is off
4729
 *                it is possible to return NULL and have publicID set.
4730
 */
4731
4732
xmlChar *
4733
60.1k
xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4734
60.1k
    xmlChar *URI = NULL;
4735
4736
60.1k
    SHRINK;
4737
4738
60.1k
    *publicID = NULL;
4739
60.1k
    if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4740
14.5k
        SKIP(6);
4741
14.5k
  if (SKIP_BLANKS == 0) {
4742
90
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4743
90
                     "Space required after 'SYSTEM'\n");
4744
90
  }
4745
14.5k
  URI = xmlParseSystemLiteral(ctxt);
4746
14.5k
  if (URI == NULL) {
4747
118
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4748
118
        }
4749
45.5k
    } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4750
8.32k
        SKIP(6);
4751
8.32k
  if (SKIP_BLANKS == 0) {
4752
105
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4753
105
        "Space required after 'PUBLIC'\n");
4754
105
  }
4755
8.32k
  *publicID = xmlParsePubidLiteral(ctxt);
4756
8.32k
  if (*publicID == NULL) {
4757
176
      xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4758
176
  }
4759
8.32k
  if (strict) {
4760
      /*
4761
       * We don't handle [83] so "S SystemLiteral" is required.
4762
       */
4763
7.99k
      if (SKIP_BLANKS == 0) {
4764
1.32k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4765
1.32k
      "Space required after the Public Identifier\n");
4766
1.32k
      }
4767
7.99k
  } else {
4768
      /*
4769
       * We handle [83] so we return immediately, if
4770
       * "S SystemLiteral" is not detected. We skip blanks if no
4771
             * system literal was found, but this is harmless since we must
4772
             * be at the end of a NotationDecl.
4773
       */
4774
334
      if (SKIP_BLANKS == 0) return(NULL);
4775
91
      if ((CUR != '\'') && (CUR != '"')) return(NULL);
4776
91
  }
4777
7.99k
  URI = xmlParseSystemLiteral(ctxt);
4778
7.99k
  if (URI == NULL) {
4779
1.32k
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4780
1.32k
        }
4781
7.99k
    }
4782
59.8k
    return(URI);
4783
60.1k
}
4784
4785
/**
4786
 * xmlParseCommentComplex:
4787
 * @ctxt:  an XML parser context
4788
 * @buf:  the already parsed part of the buffer
4789
 * @len:  number of bytes in the buffer
4790
 * @size:  allocated size of the buffer
4791
 *
4792
 * Skip an XML (SGML) comment <!-- .... -->
4793
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4794
 *  must not occur within comments. "
4795
 * This is the slow routine in case the accelerator for ascii didn't work
4796
 *
4797
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4798
 */
4799
static void
4800
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4801
11.7k
                       size_t len, size_t size) {
4802
11.7k
    int q, ql;
4803
11.7k
    int r, rl;
4804
11.7k
    int cur, l;
4805
11.7k
    size_t count = 0;
4806
11.7k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4807
2.67k
                       XML_MAX_HUGE_LENGTH :
4808
11.7k
                       XML_MAX_TEXT_LENGTH;
4809
11.7k
    int inputid;
4810
4811
11.7k
    inputid = ctxt->input->id;
4812
4813
11.7k
    if (buf == NULL) {
4814
2.16k
        len = 0;
4815
2.16k
  size = XML_PARSER_BUFFER_SIZE;
4816
2.16k
  buf = (xmlChar *) xmlMallocAtomic(size);
4817
2.16k
  if (buf == NULL) {
4818
0
      xmlErrMemory(ctxt, NULL);
4819
0
      return;
4820
0
  }
4821
2.16k
    }
4822
11.7k
    GROW; /* Assure there's enough input data */
4823
11.7k
    q = CUR_CHAR(ql);
4824
11.7k
    if (q == 0)
4825
2.59k
        goto not_terminated;
4826
9.17k
    if (!IS_CHAR(q)) {
4827
1.37k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4828
1.37k
                          "xmlParseComment: invalid xmlChar value %d\n",
4829
1.37k
                    q);
4830
1.37k
  xmlFree (buf);
4831
1.37k
  return;
4832
1.37k
    }
4833
7.80k
    NEXTL(ql);
4834
7.80k
    r = CUR_CHAR(rl);
4835
7.80k
    if (r == 0)
4836
117
        goto not_terminated;
4837
7.68k
    if (!IS_CHAR(r)) {
4838
136
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4839
136
                          "xmlParseComment: invalid xmlChar value %d\n",
4840
136
                    r);
4841
136
  xmlFree (buf);
4842
136
  return;
4843
136
    }
4844
7.54k
    NEXTL(rl);
4845
7.54k
    cur = CUR_CHAR(l);
4846
7.54k
    if (cur == 0)
4847
73
        goto not_terminated;
4848
1.55M
    while (IS_CHAR(cur) && /* checked */
4849
1.55M
           ((cur != '>') ||
4850
1.55M
      (r != '-') || (q != '-'))) {
4851
1.54M
  if ((r == '-') && (q == '-')) {
4852
6.47k
      xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4853
6.47k
  }
4854
1.54M
  if (len + 5 >= size) {
4855
4.26k
      xmlChar *new_buf;
4856
4.26k
            size_t new_size;
4857
4858
4.26k
      new_size = size * 2;
4859
4.26k
      new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4860
4.26k
      if (new_buf == NULL) {
4861
0
    xmlFree (buf);
4862
0
    xmlErrMemory(ctxt, NULL);
4863
0
    return;
4864
0
      }
4865
4.26k
      buf = new_buf;
4866
4.26k
            size = new_size;
4867
4.26k
  }
4868
1.54M
  COPY_BUF(ql,buf,len,q);
4869
1.54M
  q = r;
4870
1.54M
  ql = rl;
4871
1.54M
  r = cur;
4872
1.54M
  rl = l;
4873
4874
1.54M
  count++;
4875
1.54M
  if (count > 50) {
4876
27.7k
      SHRINK;
4877
27.7k
      GROW;
4878
27.7k
      count = 0;
4879
27.7k
            if (ctxt->instate == XML_PARSER_EOF) {
4880
0
    xmlFree(buf);
4881
0
    return;
4882
0
            }
4883
27.7k
  }
4884
1.54M
  NEXTL(l);
4885
1.54M
  cur = CUR_CHAR(l);
4886
1.54M
  if (cur == 0) {
4887
1.25k
      SHRINK;
4888
1.25k
      GROW;
4889
1.25k
      cur = CUR_CHAR(l);
4890
1.25k
  }
4891
4892
1.54M
        if (len > maxLength) {
4893
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4894
0
                         "Comment too big found", NULL);
4895
0
            xmlFree (buf);
4896
0
            return;
4897
0
        }
4898
1.54M
    }
4899
7.47k
    buf[len] = 0;
4900
7.47k
    if (cur == 0) {
4901
1.25k
  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4902
1.25k
                       "Comment not terminated \n<!--%.50s\n", buf);
4903
6.22k
    } else if (!IS_CHAR(cur)) {
4904
1.38k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4905
1.38k
                          "xmlParseComment: invalid xmlChar value %d\n",
4906
1.38k
                    cur);
4907
4.84k
    } else {
4908
4.84k
  if (inputid != ctxt->input->id) {
4909
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4910
0
               "Comment doesn't start and stop in the same"
4911
0
                           " entity\n");
4912
0
  }
4913
4.84k
        NEXT;
4914
4.84k
  if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4915
4.84k
      (!ctxt->disableSAX))
4916
3.70k
      ctxt->sax->comment(ctxt->userData, buf);
4917
4.84k
    }
4918
7.47k
    xmlFree(buf);
4919
7.47k
    return;
4920
2.78k
not_terminated:
4921
2.78k
    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4922
2.78k
       "Comment not terminated\n", NULL);
4923
2.78k
    xmlFree(buf);
4924
2.78k
    return;
4925
7.47k
}
4926
4927
/**
4928
 * xmlParseComment:
4929
 * @ctxt:  an XML parser context
4930
 *
4931
 * DEPRECATED: Internal function, don't use.
4932
 *
4933
 * Parse an XML (SGML) comment. Always consumes '<!'.
4934
 *
4935
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4936
 *  must not occur within comments. "
4937
 *
4938
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4939
 */
4940
void
4941
2.45M
xmlParseComment(xmlParserCtxtPtr ctxt) {
4942
2.45M
    xmlChar *buf = NULL;
4943
2.45M
    size_t size = XML_PARSER_BUFFER_SIZE;
4944
2.45M
    size_t len = 0;
4945
2.45M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4946
11.6k
                       XML_MAX_HUGE_LENGTH :
4947
2.45M
                       XML_MAX_TEXT_LENGTH;
4948
2.45M
    xmlParserInputState state;
4949
2.45M
    const xmlChar *in;
4950
2.45M
    size_t nbchar = 0;
4951
2.45M
    int ccol;
4952
2.45M
    int inputid;
4953
4954
    /*
4955
     * Check that there is a comment right here.
4956
     */
4957
2.45M
    if ((RAW != '<') || (NXT(1) != '!'))
4958
0
        return;
4959
2.45M
    SKIP(2);
4960
2.45M
    if ((RAW != '-') || (NXT(1) != '-'))
4961
41
        return;
4962
2.45M
    state = ctxt->instate;
4963
2.45M
    ctxt->instate = XML_PARSER_COMMENT;
4964
2.45M
    inputid = ctxt->input->id;
4965
2.45M
    SKIP(2);
4966
2.45M
    SHRINK;
4967
2.45M
    GROW;
4968
4969
    /*
4970
     * Accelerated common case where input don't need to be
4971
     * modified before passing it to the handler.
4972
     */
4973
2.45M
    in = ctxt->input->cur;
4974
2.45M
    do {
4975
2.45M
  if (*in == 0xA) {
4976
10.2k
      do {
4977
10.2k
    ctxt->input->line++; ctxt->input->col = 1;
4978
10.2k
    in++;
4979
10.2k
      } while (*in == 0xA);
4980
6.14k
  }
4981
2.64M
get_more:
4982
2.64M
        ccol = ctxt->input->col;
4983
10.6M
  while (((*in > '-') && (*in <= 0x7F)) ||
4984
10.6M
         ((*in >= 0x20) && (*in < '-')) ||
4985
10.6M
         (*in == 0x09)) {
4986
7.99M
        in++;
4987
7.99M
        ccol++;
4988
7.99M
  }
4989
2.64M
  ctxt->input->col = ccol;
4990
2.64M
  if (*in == 0xA) {
4991
92.3k
      do {
4992
92.3k
    ctxt->input->line++; ctxt->input->col = 1;
4993
92.3k
    in++;
4994
92.3k
      } while (*in == 0xA);
4995
86.8k
      goto get_more;
4996
86.8k
  }
4997
2.56M
  nbchar = in - ctxt->input->cur;
4998
  /*
4999
   * save current set of data
5000
   */
5001
2.56M
  if (nbchar > 0) {
5002
176k
      if ((ctxt->sax != NULL) &&
5003
176k
    (ctxt->sax->comment != NULL)) {
5004
176k
    if (buf == NULL) {
5005
71.0k
        if ((*in == '-') && (in[1] == '-'))
5006
42.1k
            size = nbchar + 1;
5007
28.9k
        else
5008
28.9k
            size = XML_PARSER_BUFFER_SIZE + nbchar;
5009
71.0k
        buf = (xmlChar *) xmlMallocAtomic(size);
5010
71.0k
        if (buf == NULL) {
5011
0
            xmlErrMemory(ctxt, NULL);
5012
0
      ctxt->instate = state;
5013
0
      return;
5014
0
        }
5015
71.0k
        len = 0;
5016
105k
    } else if (len + nbchar + 1 >= size) {
5017
13.9k
        xmlChar *new_buf;
5018
13.9k
        size  += len + nbchar + XML_PARSER_BUFFER_SIZE;
5019
13.9k
        new_buf = (xmlChar *) xmlRealloc(buf, size);
5020
13.9k
        if (new_buf == NULL) {
5021
0
            xmlFree (buf);
5022
0
      xmlErrMemory(ctxt, NULL);
5023
0
      ctxt->instate = state;
5024
0
      return;
5025
0
        }
5026
13.9k
        buf = new_buf;
5027
13.9k
    }
5028
176k
    memcpy(&buf[len], ctxt->input->cur, nbchar);
5029
176k
    len += nbchar;
5030
176k
    buf[len] = 0;
5031
176k
      }
5032
176k
  }
5033
2.56M
        if (len > maxLength) {
5034
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5035
0
                         "Comment too big found", NULL);
5036
0
            xmlFree (buf);
5037
0
            return;
5038
0
        }
5039
2.56M
  ctxt->input->cur = in;
5040
2.56M
  if (*in == 0xA) {
5041
0
      in++;
5042
0
      ctxt->input->line++; ctxt->input->col = 1;
5043
0
  }
5044
2.56M
  if (*in == 0xD) {
5045
38.3k
      in++;
5046
38.3k
      if (*in == 0xA) {
5047
37.6k
    ctxt->input->cur = in;
5048
37.6k
    in++;
5049
37.6k
    ctxt->input->line++; ctxt->input->col = 1;
5050
37.6k
    goto get_more;
5051
37.6k
      }
5052
695
      in--;
5053
695
  }
5054
2.52M
  SHRINK;
5055
2.52M
  GROW;
5056
2.52M
        if (ctxt->instate == XML_PARSER_EOF) {
5057
0
            xmlFree(buf);
5058
0
            return;
5059
0
        }
5060
2.52M
  in = ctxt->input->cur;
5061
2.52M
  if (*in == '-') {
5062
2.51M
      if (in[1] == '-') {
5063
2.45M
          if (in[2] == '>') {
5064
2.44M
        if (ctxt->input->id != inputid) {
5065
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5066
0
                     "comment doesn't start and stop in the"
5067
0
                                       " same entity\n");
5068
0
        }
5069
2.44M
        SKIP(3);
5070
2.44M
        if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5071
2.44M
            (!ctxt->disableSAX)) {
5072
2.43M
      if (buf != NULL)
5073
55.1k
          ctxt->sax->comment(ctxt->userData, buf);
5074
2.38M
      else
5075
2.38M
          ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5076
2.43M
        }
5077
2.44M
        if (buf != NULL)
5078
61.4k
            xmlFree(buf);
5079
2.44M
        if (ctxt->instate != XML_PARSER_EOF)
5080
2.44M
      ctxt->instate = state;
5081
2.44M
        return;
5082
2.44M
    }
5083
7.22k
    if (buf != NULL) {
5084
7.13k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5085
7.13k
                          "Double hyphen within comment: "
5086
7.13k
                                      "<!--%.50s\n",
5087
7.13k
              buf);
5088
7.13k
    } else
5089
92
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5090
92
                          "Double hyphen within comment\n", NULL);
5091
7.22k
                if (ctxt->instate == XML_PARSER_EOF) {
5092
0
                    xmlFree(buf);
5093
0
                    return;
5094
0
                }
5095
7.22k
    in++;
5096
7.22k
    ctxt->input->col++;
5097
7.22k
      }
5098
68.9k
      in++;
5099
68.9k
      ctxt->input->col++;
5100
68.9k
      goto get_more;
5101
2.51M
  }
5102
2.52M
    } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5103
11.7k
    xmlParseCommentComplex(ctxt, buf, len, size);
5104
11.7k
    ctxt->instate = state;
5105
11.7k
    return;
5106
2.45M
}
5107
5108
5109
/**
5110
 * xmlParsePITarget:
5111
 * @ctxt:  an XML parser context
5112
 *
5113
 * DEPRECATED: Internal function, don't use.
5114
 *
5115
 * parse the name of a PI
5116
 *
5117
 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5118
 *
5119
 * Returns the PITarget name or NULL
5120
 */
5121
5122
const xmlChar *
5123
34.8k
xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5124
34.8k
    const xmlChar *name;
5125
5126
34.8k
    name = xmlParseName(ctxt);
5127
34.8k
    if ((name != NULL) &&
5128
34.8k
        ((name[0] == 'x') || (name[0] == 'X')) &&
5129
34.8k
        ((name[1] == 'm') || (name[1] == 'M')) &&
5130
34.8k
        ((name[2] == 'l') || (name[2] == 'L'))) {
5131
12.7k
  int i;
5132
12.7k
  if ((name[0] == 'x') && (name[1] == 'm') &&
5133
12.7k
      (name[2] == 'l') && (name[3] == 0)) {
5134
9.79k
      xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5135
9.79k
     "XML declaration allowed only at the start of the document\n");
5136
9.79k
      return(name);
5137
9.79k
  } else if (name[3] == 0) {
5138
343
      xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5139
343
      return(name);
5140
343
  }
5141
7.75k
  for (i = 0;;i++) {
5142
7.75k
      if (xmlW3CPIs[i] == NULL) break;
5143
5.19k
      if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5144
38
          return(name);
5145
5.19k
  }
5146
2.56k
  xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5147
2.56k
          "xmlParsePITarget: invalid name prefix 'xml'\n",
5148
2.56k
          NULL, NULL);
5149
2.56k
    }
5150
24.6k
    if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5151
737
  xmlNsErr(ctxt, XML_NS_ERR_COLON,
5152
737
     "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5153
737
    }
5154
24.6k
    return(name);
5155
34.8k
}
5156
5157
#ifdef LIBXML_CATALOG_ENABLED
5158
/**
5159
 * xmlParseCatalogPI:
5160
 * @ctxt:  an XML parser context
5161
 * @catalog:  the PI value string
5162
 *
5163
 * parse an XML Catalog Processing Instruction.
5164
 *
5165
 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5166
 *
5167
 * Occurs only if allowed by the user and if happening in the Misc
5168
 * part of the document before any doctype information
5169
 * This will add the given catalog to the parsing context in order
5170
 * to be used if there is a resolution need further down in the document
5171
 */
5172
5173
static void
5174
6
xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5175
6
    xmlChar *URL = NULL;
5176
6
    const xmlChar *tmp, *base;
5177
6
    xmlChar marker;
5178
5179
6
    tmp = catalog;
5180
6
    while (IS_BLANK_CH(*tmp)) tmp++;
5181
6
    if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5182
6
  goto error;
5183
0
    tmp += 7;
5184
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5185
0
    if (*tmp != '=') {
5186
0
  return;
5187
0
    }
5188
0
    tmp++;
5189
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5190
0
    marker = *tmp;
5191
0
    if ((marker != '\'') && (marker != '"'))
5192
0
  goto error;
5193
0
    tmp++;
5194
0
    base = tmp;
5195
0
    while ((*tmp != 0) && (*tmp != marker)) tmp++;
5196
0
    if (*tmp == 0)
5197
0
  goto error;
5198
0
    URL = xmlStrndup(base, tmp - base);
5199
0
    tmp++;
5200
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5201
0
    if (*tmp != 0)
5202
0
  goto error;
5203
5204
0
    if (URL != NULL) {
5205
0
  ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5206
0
  xmlFree(URL);
5207
0
    }
5208
0
    return;
5209
5210
6
error:
5211
6
    xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5212
6
            "Catalog PI syntax error: %s\n",
5213
6
      catalog, NULL);
5214
6
    if (URL != NULL)
5215
0
  xmlFree(URL);
5216
6
}
5217
#endif
5218
5219
/**
5220
 * xmlParsePI:
5221
 * @ctxt:  an XML parser context
5222
 *
5223
 * DEPRECATED: Internal function, don't use.
5224
 *
5225
 * parse an XML Processing Instruction.
5226
 *
5227
 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5228
 *
5229
 * The processing is transferred to SAX once parsed.
5230
 */
5231
5232
void
5233
34.8k
xmlParsePI(xmlParserCtxtPtr ctxt) {
5234
34.8k
    xmlChar *buf = NULL;
5235
34.8k
    size_t len = 0;
5236
34.8k
    size_t size = XML_PARSER_BUFFER_SIZE;
5237
34.8k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5238
9.34k
                       XML_MAX_HUGE_LENGTH :
5239
34.8k
                       XML_MAX_TEXT_LENGTH;
5240
34.8k
    int cur, l;
5241
34.8k
    const xmlChar *target;
5242
34.8k
    xmlParserInputState state;
5243
34.8k
    int count = 0;
5244
5245
34.8k
    if ((RAW == '<') && (NXT(1) == '?')) {
5246
34.8k
  int inputid = ctxt->input->id;
5247
34.8k
  state = ctxt->instate;
5248
34.8k
        ctxt->instate = XML_PARSER_PI;
5249
  /*
5250
   * this is a Processing Instruction.
5251
   */
5252
34.8k
  SKIP(2);
5253
34.8k
  SHRINK;
5254
5255
  /*
5256
   * Parse the target name and check for special support like
5257
   * namespace.
5258
   */
5259
34.8k
        target = xmlParsePITarget(ctxt);
5260
34.8k
  if (target != NULL) {
5261
29.2k
      if ((RAW == '?') && (NXT(1) == '>')) {
5262
4.54k
    if (inputid != ctxt->input->id) {
5263
0
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5264
0
                             "PI declaration doesn't start and stop in"
5265
0
                                   " the same entity\n");
5266
0
    }
5267
4.54k
    SKIP(2);
5268
5269
    /*
5270
     * SAX: PI detected.
5271
     */
5272
4.54k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5273
4.54k
        (ctxt->sax->processingInstruction != NULL))
5274
4.11k
        ctxt->sax->processingInstruction(ctxt->userData,
5275
4.11k
                                         target, NULL);
5276
4.54k
    if (ctxt->instate != XML_PARSER_EOF)
5277
4.54k
        ctxt->instate = state;
5278
4.54k
    return;
5279
4.54k
      }
5280
24.7k
      buf = (xmlChar *) xmlMallocAtomic(size);
5281
24.7k
      if (buf == NULL) {
5282
0
    xmlErrMemory(ctxt, NULL);
5283
0
    ctxt->instate = state;
5284
0
    return;
5285
0
      }
5286
24.7k
      if (SKIP_BLANKS == 0) {
5287
8.71k
    xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5288
8.71k
        "ParsePI: PI %s space expected\n", target);
5289
8.71k
      }
5290
24.7k
      cur = CUR_CHAR(l);
5291
2.11M
      while (IS_CHAR(cur) && /* checked */
5292
2.11M
       ((cur != '?') || (NXT(1) != '>'))) {
5293
2.09M
    if (len + 5 >= size) {
5294
7.28k
        xmlChar *tmp;
5295
7.28k
                    size_t new_size = size * 2;
5296
7.28k
        tmp = (xmlChar *) xmlRealloc(buf, new_size);
5297
7.28k
        if (tmp == NULL) {
5298
0
      xmlErrMemory(ctxt, NULL);
5299
0
      xmlFree(buf);
5300
0
      ctxt->instate = state;
5301
0
      return;
5302
0
        }
5303
7.28k
        buf = tmp;
5304
7.28k
                    size = new_size;
5305
7.28k
    }
5306
2.09M
    count++;
5307
2.09M
    if (count > 50) {
5308
32.2k
        SHRINK;
5309
32.2k
        GROW;
5310
32.2k
                    if (ctxt->instate == XML_PARSER_EOF) {
5311
0
                        xmlFree(buf);
5312
0
                        return;
5313
0
                    }
5314
32.2k
        count = 0;
5315
32.2k
    }
5316
2.09M
    COPY_BUF(l,buf,len,cur);
5317
2.09M
    NEXTL(l);
5318
2.09M
    cur = CUR_CHAR(l);
5319
2.09M
    if (cur == 0) {
5320
4.12k
        SHRINK;
5321
4.12k
        GROW;
5322
4.12k
        cur = CUR_CHAR(l);
5323
4.12k
    }
5324
2.09M
                if (len > maxLength) {
5325
0
                    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5326
0
                                      "PI %s too big found", target);
5327
0
                    xmlFree(buf);
5328
0
                    ctxt->instate = state;
5329
0
                    return;
5330
0
                }
5331
2.09M
      }
5332
24.7k
      buf[len] = 0;
5333
24.7k
      if (cur != '?') {
5334
8.41k
    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5335
8.41k
          "ParsePI: PI %s never end ...\n", target);
5336
16.3k
      } else {
5337
16.3k
    if (inputid != ctxt->input->id) {
5338
0
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5339
0
                             "PI declaration doesn't start and stop in"
5340
0
                                   " the same entity\n");
5341
0
    }
5342
16.3k
    SKIP(2);
5343
5344
16.3k
#ifdef LIBXML_CATALOG_ENABLED
5345
16.3k
    if (((state == XML_PARSER_MISC) ||
5346
16.3k
               (state == XML_PARSER_START)) &&
5347
16.3k
        (xmlStrEqual(target, XML_CATALOG_PI))) {
5348
6
        xmlCatalogAllow allow = xmlCatalogGetDefaults();
5349
6
        if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5350
6
      (allow == XML_CATA_ALLOW_ALL))
5351
6
      xmlParseCatalogPI(ctxt, buf);
5352
6
    }
5353
16.3k
#endif
5354
5355
5356
    /*
5357
     * SAX: PI detected.
5358
     */
5359
16.3k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5360
16.3k
        (ctxt->sax->processingInstruction != NULL))
5361
13.3k
        ctxt->sax->processingInstruction(ctxt->userData,
5362
13.3k
                                         target, buf);
5363
16.3k
      }
5364
24.7k
      xmlFree(buf);
5365
24.7k
  } else {
5366
5.58k
      xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5367
5.58k
  }
5368
30.3k
  if (ctxt->instate != XML_PARSER_EOF)
5369
30.3k
      ctxt->instate = state;
5370
30.3k
    }
5371
34.8k
}
5372
5373
/**
5374
 * xmlParseNotationDecl:
5375
 * @ctxt:  an XML parser context
5376
 *
5377
 * DEPRECATED: Internal function, don't use.
5378
 *
5379
 * Parse a notation declaration. Always consumes '<!'.
5380
 *
5381
 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>'
5382
 *
5383
 * Hence there is actually 3 choices:
5384
 *     'PUBLIC' S PubidLiteral
5385
 *     'PUBLIC' S PubidLiteral S SystemLiteral
5386
 * and 'SYSTEM' S SystemLiteral
5387
 *
5388
 * See the NOTE on xmlParseExternalID().
5389
 */
5390
5391
void
5392
1.00k
xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5393
1.00k
    const xmlChar *name;
5394
1.00k
    xmlChar *Pubid;
5395
1.00k
    xmlChar *Systemid;
5396
5397
1.00k
    if ((CUR != '<') || (NXT(1) != '!'))
5398
0
        return;
5399
1.00k
    SKIP(2);
5400
5401
1.00k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5402
759
  int inputid = ctxt->input->id;
5403
759
  SHRINK;
5404
759
  SKIP(8);
5405
759
  if (SKIP_BLANKS == 0) {
5406
69
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5407
69
         "Space required after '<!NOTATION'\n");
5408
69
      return;
5409
69
  }
5410
5411
690
        name = xmlParseName(ctxt);
5412
690
  if (name == NULL) {
5413
45
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5414
45
      return;
5415
45
  }
5416
645
  if (xmlStrchr(name, ':') != NULL) {
5417
32
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5418
32
         "colons are forbidden from notation names '%s'\n",
5419
32
         name, NULL, NULL);
5420
32
  }
5421
645
  if (SKIP_BLANKS == 0) {
5422
62
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5423
62
         "Space required after the NOTATION name'\n");
5424
62
      return;
5425
62
  }
5426
5427
  /*
5428
   * Parse the IDs.
5429
   */
5430
583
  Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5431
583
  SKIP_BLANKS;
5432
5433
583
  if (RAW == '>') {
5434
414
      if (inputid != ctxt->input->id) {
5435
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5436
0
                         "Notation declaration doesn't start and stop"
5437
0
                               " in the same entity\n");
5438
0
      }
5439
414
      NEXT;
5440
414
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5441
414
    (ctxt->sax->notationDecl != NULL))
5442
328
    ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5443
414
  } else {
5444
169
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5445
169
  }
5446
583
  if (Systemid != NULL) xmlFree(Systemid);
5447
583
  if (Pubid != NULL) xmlFree(Pubid);
5448
583
    }
5449
1.00k
}
5450
5451
/**
5452
 * xmlParseEntityDecl:
5453
 * @ctxt:  an XML parser context
5454
 *
5455
 * DEPRECATED: Internal function, don't use.
5456
 *
5457
 * Parse an entity declaration. Always consumes '<!'.
5458
 *
5459
 * [70] EntityDecl ::= GEDecl | PEDecl
5460
 *
5461
 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5462
 *
5463
 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5464
 *
5465
 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5466
 *
5467
 * [74] PEDef ::= EntityValue | ExternalID
5468
 *
5469
 * [76] NDataDecl ::= S 'NDATA' S Name
5470
 *
5471
 * [ VC: Notation Declared ]
5472
 * The Name must match the declared name of a notation.
5473
 */
5474
5475
void
5476
70.3k
xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5477
70.3k
    const xmlChar *name = NULL;
5478
70.3k
    xmlChar *value = NULL;
5479
70.3k
    xmlChar *URI = NULL, *literal = NULL;
5480
70.3k
    const xmlChar *ndata = NULL;
5481
70.3k
    int isParameter = 0;
5482
70.3k
    xmlChar *orig = NULL;
5483
5484
70.3k
    if ((CUR != '<') || (NXT(1) != '!'))
5485
0
        return;
5486
70.3k
    SKIP(2);
5487
5488
    /* GROW; done in the caller */
5489
70.3k
    if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5490
69.9k
  int inputid = ctxt->input->id;
5491
69.9k
  SHRINK;
5492
69.9k
  SKIP(6);
5493
69.9k
  if (SKIP_BLANKS == 0) {
5494
357
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5495
357
         "Space required after '<!ENTITY'\n");
5496
357
  }
5497
5498
69.9k
  if (RAW == '%') {
5499
20.3k
      NEXT;
5500
20.3k
      if (SKIP_BLANKS == 0) {
5501
242
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5502
242
             "Space required after '%%'\n");
5503
242
      }
5504
20.3k
      isParameter = 1;
5505
20.3k
  }
5506
5507
69.9k
        name = xmlParseName(ctxt);
5508
69.9k
  if (name == NULL) {
5509
548
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5510
548
                     "xmlParseEntityDecl: no name\n");
5511
548
            return;
5512
548
  }
5513
69.3k
  if (xmlStrchr(name, ':') != NULL) {
5514
78
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5515
78
         "colons are forbidden from entities names '%s'\n",
5516
78
         name, NULL, NULL);
5517
78
  }
5518
69.3k
  if (SKIP_BLANKS == 0) {
5519
781
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5520
781
         "Space required after the entity name\n");
5521
781
  }
5522
5523
69.3k
  ctxt->instate = XML_PARSER_ENTITY_DECL;
5524
  /*
5525
   * handle the various case of definitions...
5526
   */
5527
69.3k
  if (isParameter) {
5528
20.1k
      if ((RAW == '"') || (RAW == '\'')) {
5529
18.8k
          value = xmlParseEntityValue(ctxt, &orig);
5530
18.8k
    if (value) {
5531
16.4k
        if ((ctxt->sax != NULL) &&
5532
16.4k
      (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5533
15.0k
      ctxt->sax->entityDecl(ctxt->userData, name,
5534
15.0k
                        XML_INTERNAL_PARAMETER_ENTITY,
5535
15.0k
            NULL, NULL, value);
5536
16.4k
    }
5537
18.8k
      } else {
5538
1.30k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5539
1.30k
    if ((URI == NULL) && (literal == NULL)) {
5540
348
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5541
348
    }
5542
1.30k
    if (URI) {
5543
914
        xmlURIPtr uri;
5544
5545
914
        uri = xmlParseURI((const char *) URI);
5546
914
        if (uri == NULL) {
5547
78
            xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5548
78
             "Invalid URI: %s\n", URI);
5549
      /*
5550
       * This really ought to be a well formedness error
5551
       * but the XML Core WG decided otherwise c.f. issue
5552
       * E26 of the XML erratas.
5553
       */
5554
836
        } else {
5555
836
      if (uri->fragment != NULL) {
5556
          /*
5557
           * Okay this is foolish to block those but not
5558
           * invalid URIs.
5559
           */
5560
13
          xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5561
823
      } else {
5562
823
          if ((ctxt->sax != NULL) &&
5563
823
        (!ctxt->disableSAX) &&
5564
823
        (ctxt->sax->entityDecl != NULL))
5565
749
        ctxt->sax->entityDecl(ctxt->userData, name,
5566
749
              XML_EXTERNAL_PARAMETER_ENTITY,
5567
749
              literal, URI, NULL);
5568
823
      }
5569
836
      xmlFreeURI(uri);
5570
836
        }
5571
914
    }
5572
1.30k
      }
5573
49.2k
  } else {
5574
49.2k
      if ((RAW == '"') || (RAW == '\'')) {
5575
44.3k
          value = xmlParseEntityValue(ctxt, &orig);
5576
44.3k
    if ((ctxt->sax != NULL) &&
5577
44.3k
        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5578
39.8k
        ctxt->sax->entityDecl(ctxt->userData, name,
5579
39.8k
        XML_INTERNAL_GENERAL_ENTITY,
5580
39.8k
        NULL, NULL, value);
5581
    /*
5582
     * For expat compatibility in SAX mode.
5583
     */
5584
44.3k
    if ((ctxt->myDoc == NULL) ||
5585
44.3k
        (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5586
776
        if (ctxt->myDoc == NULL) {
5587
233
      ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5588
233
      if (ctxt->myDoc == NULL) {
5589
0
          xmlErrMemory(ctxt, "New Doc failed");
5590
0
          return;
5591
0
      }
5592
233
      ctxt->myDoc->properties = XML_DOC_INTERNAL;
5593
233
        }
5594
776
        if (ctxt->myDoc->intSubset == NULL)
5595
233
      ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5596
233
              BAD_CAST "fake", NULL, NULL);
5597
5598
776
        xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5599
776
                    NULL, NULL, value);
5600
776
    }
5601
44.3k
      } else {
5602
4.87k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5603
4.87k
    if ((URI == NULL) && (literal == NULL)) {
5604
697
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5605
697
    }
5606
4.87k
    if (URI) {
5607
4.10k
        xmlURIPtr uri;
5608
5609
4.10k
        uri = xmlParseURI((const char *)URI);
5610
4.10k
        if (uri == NULL) {
5611
352
            xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5612
352
             "Invalid URI: %s\n", URI);
5613
      /*
5614
       * This really ought to be a well formedness error
5615
       * but the XML Core WG decided otherwise c.f. issue
5616
       * E26 of the XML erratas.
5617
       */
5618
3.75k
        } else {
5619
3.75k
      if (uri->fragment != NULL) {
5620
          /*
5621
           * Okay this is foolish to block those but not
5622
           * invalid URIs.
5623
           */
5624
172
          xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5625
172
      }
5626
3.75k
      xmlFreeURI(uri);
5627
3.75k
        }
5628
4.10k
    }
5629
4.87k
    if ((RAW != '>') && (SKIP_BLANKS == 0)) {
5630
967
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5631
967
           "Space required before 'NDATA'\n");
5632
967
    }
5633
4.87k
    if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5634
593
        SKIP(5);
5635
593
        if (SKIP_BLANKS == 0) {
5636
77
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5637
77
               "Space required after 'NDATA'\n");
5638
77
        }
5639
593
        ndata = xmlParseName(ctxt);
5640
593
        if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5641
593
            (ctxt->sax->unparsedEntityDecl != NULL))
5642
526
      ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5643
526
            literal, URI, ndata);
5644
4.28k
    } else {
5645
4.28k
        if ((ctxt->sax != NULL) &&
5646
4.28k
            (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5647
3.58k
      ctxt->sax->entityDecl(ctxt->userData, name,
5648
3.58k
            XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5649
3.58k
            literal, URI, NULL);
5650
        /*
5651
         * For expat compatibility in SAX mode.
5652
         * assuming the entity replacement was asked for
5653
         */
5654
4.28k
        if ((ctxt->replaceEntities != 0) &&
5655
4.28k
      ((ctxt->myDoc == NULL) ||
5656
2.81k
      (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5657
125
      if (ctxt->myDoc == NULL) {
5658
100
          ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5659
100
          if (ctxt->myDoc == NULL) {
5660
0
              xmlErrMemory(ctxt, "New Doc failed");
5661
0
        return;
5662
0
          }
5663
100
          ctxt->myDoc->properties = XML_DOC_INTERNAL;
5664
100
      }
5665
5666
125
      if (ctxt->myDoc->intSubset == NULL)
5667
100
          ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5668
100
            BAD_CAST "fake", NULL, NULL);
5669
125
      xmlSAX2EntityDecl(ctxt, name,
5670
125
                  XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5671
125
                  literal, URI, NULL);
5672
125
        }
5673
4.28k
    }
5674
4.87k
      }
5675
49.2k
  }
5676
69.3k
  if (ctxt->instate == XML_PARSER_EOF)
5677
51
      goto done;
5678
69.3k
  SKIP_BLANKS;
5679
69.3k
  if (RAW != '>') {
5680
3.82k
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5681
3.82k
              "xmlParseEntityDecl: entity %s not terminated\n", name);
5682
3.82k
      xmlHaltParser(ctxt);
5683
65.5k
  } else {
5684
65.5k
      if (inputid != ctxt->input->id) {
5685
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5686
0
                         "Entity declaration doesn't start and stop in"
5687
0
                               " the same entity\n");
5688
0
      }
5689
65.5k
      NEXT;
5690
65.5k
  }
5691
69.3k
  if (orig != NULL) {
5692
      /*
5693
       * Ugly mechanism to save the raw entity value.
5694
       */
5695
60.5k
      xmlEntityPtr cur = NULL;
5696
5697
60.5k
      if (isParameter) {
5698
17.6k
          if ((ctxt->sax != NULL) &&
5699
17.6k
        (ctxt->sax->getParameterEntity != NULL))
5700
17.6k
        cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5701
42.8k
      } else {
5702
42.8k
          if ((ctxt->sax != NULL) &&
5703
42.8k
        (ctxt->sax->getEntity != NULL))
5704
42.8k
        cur = ctxt->sax->getEntity(ctxt->userData, name);
5705
42.8k
    if ((cur == NULL) && (ctxt->userData==ctxt)) {
5706
2.85k
        cur = xmlSAX2GetEntity(ctxt, name);
5707
2.85k
    }
5708
42.8k
      }
5709
60.5k
            if ((cur != NULL) && (cur->orig == NULL)) {
5710
52.4k
    cur->orig = orig;
5711
52.4k
                orig = NULL;
5712
52.4k
      }
5713
60.5k
  }
5714
5715
69.3k
done:
5716
69.3k
  if (value != NULL) xmlFree(value);
5717
69.3k
  if (URI != NULL) xmlFree(URI);
5718
69.3k
  if (literal != NULL) xmlFree(literal);
5719
69.3k
        if (orig != NULL) xmlFree(orig);
5720
69.3k
    }
5721
70.3k
}
5722
5723
/**
5724
 * xmlParseDefaultDecl:
5725
 * @ctxt:  an XML parser context
5726
 * @value:  Receive a possible fixed default value for the attribute
5727
 *
5728
 * DEPRECATED: Internal function, don't use.
5729
 *
5730
 * Parse an attribute default declaration
5731
 *
5732
 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5733
 *
5734
 * [ VC: Required Attribute ]
5735
 * if the default declaration is the keyword #REQUIRED, then the
5736
 * attribute must be specified for all elements of the type in the
5737
 * attribute-list declaration.
5738
 *
5739
 * [ VC: Attribute Default Legal ]
5740
 * The declared default value must meet the lexical constraints of
5741
 * the declared attribute type c.f. xmlValidateAttributeDecl()
5742
 *
5743
 * [ VC: Fixed Attribute Default ]
5744
 * if an attribute has a default value declared with the #FIXED
5745
 * keyword, instances of that attribute must match the default value.
5746
 *
5747
 * [ WFC: No < in Attribute Values ]
5748
 * handled in xmlParseAttValue()
5749
 *
5750
 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5751
 *          or XML_ATTRIBUTE_FIXED.
5752
 */
5753
5754
int
5755
67.1k
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5756
67.1k
    int val;
5757
67.1k
    xmlChar *ret;
5758
5759
67.1k
    *value = NULL;
5760
67.1k
    if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5761
5.32k
  SKIP(9);
5762
5.32k
  return(XML_ATTRIBUTE_REQUIRED);
5763
5.32k
    }
5764
61.8k
    if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5765
49.9k
  SKIP(8);
5766
49.9k
  return(XML_ATTRIBUTE_IMPLIED);
5767
49.9k
    }
5768
11.8k
    val = XML_ATTRIBUTE_NONE;
5769
11.8k
    if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5770
6.22k
  SKIP(6);
5771
6.22k
  val = XML_ATTRIBUTE_FIXED;
5772
6.22k
  if (SKIP_BLANKS == 0) {
5773
64
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5774
64
         "Space required after '#FIXED'\n");
5775
64
  }
5776
6.22k
    }
5777
11.8k
    ret = xmlParseAttValue(ctxt);
5778
11.8k
    ctxt->instate = XML_PARSER_DTD;
5779
11.8k
    if (ret == NULL) {
5780
937
  xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5781
937
           "Attribute default value declaration error\n");
5782
937
    } else
5783
10.9k
        *value = ret;
5784
11.8k
    return(val);
5785
61.8k
}
5786
5787
/**
5788
 * xmlParseNotationType:
5789
 * @ctxt:  an XML parser context
5790
 *
5791
 * DEPRECATED: Internal function, don't use.
5792
 *
5793
 * parse an Notation attribute type.
5794
 *
5795
 * Note: the leading 'NOTATION' S part has already being parsed...
5796
 *
5797
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5798
 *
5799
 * [ VC: Notation Attributes ]
5800
 * Values of this type must match one of the notation names included
5801
 * in the declaration; all notation names in the declaration must be declared.
5802
 *
5803
 * Returns: the notation attribute tree built while parsing
5804
 */
5805
5806
xmlEnumerationPtr
5807
502
xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5808
502
    const xmlChar *name;
5809
502
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5810
5811
502
    if (RAW != '(') {
5812
55
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5813
55
  return(NULL);
5814
55
    }
5815
447
    SHRINK;
5816
495
    do {
5817
495
        NEXT;
5818
495
  SKIP_BLANKS;
5819
495
        name = xmlParseName(ctxt);
5820
495
  if (name == NULL) {
5821
51
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5822
51
         "Name expected in NOTATION declaration\n");
5823
51
            xmlFreeEnumeration(ret);
5824
51
      return(NULL);
5825
51
  }
5826
444
  tmp = ret;
5827
497
  while (tmp != NULL) {
5828
53
      if (xmlStrEqual(name, tmp->name)) {
5829
0
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5830
0
    "standalone: attribute notation value token %s duplicated\n",
5831
0
         name, NULL);
5832
0
    if (!xmlDictOwns(ctxt->dict, name))
5833
0
        xmlFree((xmlChar *) name);
5834
0
    break;
5835
0
      }
5836
53
      tmp = tmp->next;
5837
53
  }
5838
444
  if (tmp == NULL) {
5839
444
      cur = xmlCreateEnumeration(name);
5840
444
      if (cur == NULL) {
5841
0
                xmlFreeEnumeration(ret);
5842
0
                return(NULL);
5843
0
            }
5844
444
      if (last == NULL) ret = last = cur;
5845
32
      else {
5846
32
    last->next = cur;
5847
32
    last = cur;
5848
32
      }
5849
444
  }
5850
444
  SKIP_BLANKS;
5851
444
    } while (RAW == '|');
5852
396
    if (RAW != ')') {
5853
117
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5854
117
        xmlFreeEnumeration(ret);
5855
117
  return(NULL);
5856
117
    }
5857
279
    NEXT;
5858
279
    return(ret);
5859
396
}
5860
5861
/**
5862
 * xmlParseEnumerationType:
5863
 * @ctxt:  an XML parser context
5864
 *
5865
 * DEPRECATED: Internal function, don't use.
5866
 *
5867
 * parse an Enumeration attribute type.
5868
 *
5869
 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5870
 *
5871
 * [ VC: Enumeration ]
5872
 * Values of this type must match one of the Nmtoken tokens in
5873
 * the declaration
5874
 *
5875
 * Returns: the enumeration attribute tree built while parsing
5876
 */
5877
5878
xmlEnumerationPtr
5879
7.67k
xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5880
7.67k
    xmlChar *name;
5881
7.67k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5882
5883
7.67k
    if (RAW != '(') {
5884
1.38k
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5885
1.38k
  return(NULL);
5886
1.38k
    }
5887
6.28k
    SHRINK;
5888
14.8k
    do {
5889
14.8k
        NEXT;
5890
14.8k
  SKIP_BLANKS;
5891
14.8k
        name = xmlParseNmtoken(ctxt);
5892
14.8k
  if (name == NULL) {
5893
96
      xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5894
96
      return(ret);
5895
96
  }
5896
14.7k
  tmp = ret;
5897
28.9k
  while (tmp != NULL) {
5898
14.3k
      if (xmlStrEqual(name, tmp->name)) {
5899
163
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5900
163
    "standalone: attribute enumeration value token %s duplicated\n",
5901
163
         name, NULL);
5902
163
    if (!xmlDictOwns(ctxt->dict, name))
5903
163
        xmlFree(name);
5904
163
    break;
5905
163
      }
5906
14.1k
      tmp = tmp->next;
5907
14.1k
  }
5908
14.7k
  if (tmp == NULL) {
5909
14.5k
      cur = xmlCreateEnumeration(name);
5910
14.5k
      if (!xmlDictOwns(ctxt->dict, name))
5911
14.5k
    xmlFree(name);
5912
14.5k
      if (cur == NULL) {
5913
0
                xmlFreeEnumeration(ret);
5914
0
                return(NULL);
5915
0
            }
5916
14.5k
      if (last == NULL) ret = last = cur;
5917
8.36k
      else {
5918
8.36k
    last->next = cur;
5919
8.36k
    last = cur;
5920
8.36k
      }
5921
14.5k
  }
5922
14.7k
  SKIP_BLANKS;
5923
14.7k
    } while (RAW == '|');
5924
6.19k
    if (RAW != ')') {
5925
292
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5926
292
  return(ret);
5927
292
    }
5928
5.90k
    NEXT;
5929
5.90k
    return(ret);
5930
6.19k
}
5931
5932
/**
5933
 * xmlParseEnumeratedType:
5934
 * @ctxt:  an XML parser context
5935
 * @tree:  the enumeration tree built while parsing
5936
 *
5937
 * DEPRECATED: Internal function, don't use.
5938
 *
5939
 * parse an Enumerated attribute type.
5940
 *
5941
 * [57] EnumeratedType ::= NotationType | Enumeration
5942
 *
5943
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5944
 *
5945
 *
5946
 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5947
 */
5948
5949
int
5950
8.23k
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5951
8.23k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5952
562
  SKIP(8);
5953
562
  if (SKIP_BLANKS == 0) {
5954
60
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5955
60
         "Space required after 'NOTATION'\n");
5956
60
      return(0);
5957
60
  }
5958
502
  *tree = xmlParseNotationType(ctxt);
5959
502
  if (*tree == NULL) return(0);
5960
279
  return(XML_ATTRIBUTE_NOTATION);
5961
502
    }
5962
7.67k
    *tree = xmlParseEnumerationType(ctxt);
5963
7.67k
    if (*tree == NULL) return(0);
5964
6.22k
    return(XML_ATTRIBUTE_ENUMERATION);
5965
7.67k
}
5966
5967
/**
5968
 * xmlParseAttributeType:
5969
 * @ctxt:  an XML parser context
5970
 * @tree:  the enumeration tree built while parsing
5971
 *
5972
 * DEPRECATED: Internal function, don't use.
5973
 *
5974
 * parse the Attribute list def for an element
5975
 *
5976
 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5977
 *
5978
 * [55] StringType ::= 'CDATA'
5979
 *
5980
 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5981
 *                        'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5982
 *
5983
 * Validity constraints for attribute values syntax are checked in
5984
 * xmlValidateAttributeValue()
5985
 *
5986
 * [ VC: ID ]
5987
 * Values of type ID must match the Name production. A name must not
5988
 * appear more than once in an XML document as a value of this type;
5989
 * i.e., ID values must uniquely identify the elements which bear them.
5990
 *
5991
 * [ VC: One ID per Element Type ]
5992
 * No element type may have more than one ID attribute specified.
5993
 *
5994
 * [ VC: ID Attribute Default ]
5995
 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5996
 *
5997
 * [ VC: IDREF ]
5998
 * Values of type IDREF must match the Name production, and values
5999
 * of type IDREFS must match Names; each IDREF Name must match the value
6000
 * of an ID attribute on some element in the XML document; i.e. IDREF
6001
 * values must match the value of some ID attribute.
6002
 *
6003
 * [ VC: Entity Name ]
6004
 * Values of type ENTITY must match the Name production, values
6005
 * of type ENTITIES must match Names; each Entity Name must match the
6006
 * name of an unparsed entity declared in the DTD.
6007
 *
6008
 * [ VC: Name Token ]
6009
 * Values of type NMTOKEN must match the Nmtoken production; values
6010
 * of type NMTOKENS must match Nmtokens.
6011
 *
6012
 * Returns the attribute type
6013
 */
6014
int
6015
69.6k
xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6016
69.6k
    SHRINK;
6017
69.6k
    if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
6018
21.6k
  SKIP(5);
6019
21.6k
  return(XML_ATTRIBUTE_CDATA);
6020
47.9k
     } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
6021
272
  SKIP(6);
6022
272
  return(XML_ATTRIBUTE_IDREFS);
6023
47.7k
     } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
6024
2.37k
  SKIP(5);
6025
2.37k
  return(XML_ATTRIBUTE_IDREF);
6026
45.3k
     } else if ((RAW == 'I') && (NXT(1) == 'D')) {
6027
17.4k
        SKIP(2);
6028
17.4k
  return(XML_ATTRIBUTE_ID);
6029
27.8k
     } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
6030
196
  SKIP(6);
6031
196
  return(XML_ATTRIBUTE_ENTITY);
6032
27.6k
     } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
6033
140
  SKIP(8);
6034
140
  return(XML_ATTRIBUTE_ENTITIES);
6035
27.5k
     } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
6036
5.26k
  SKIP(8);
6037
5.26k
  return(XML_ATTRIBUTE_NMTOKENS);
6038
22.2k
     } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
6039
14.0k
  SKIP(7);
6040
14.0k
  return(XML_ATTRIBUTE_NMTOKEN);
6041
14.0k
     }
6042
8.23k
     return(xmlParseEnumeratedType(ctxt, tree));
6043
69.6k
}
6044
6045
/**
6046
 * xmlParseAttributeListDecl:
6047
 * @ctxt:  an XML parser context
6048
 *
6049
 * DEPRECATED: Internal function, don't use.
6050
 *
6051
 * Parse an attribute list declaration for an element. Always consumes '<!'.
6052
 *
6053
 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
6054
 *
6055
 * [53] AttDef ::= S Name S AttType S DefaultDecl
6056
 *
6057
 */
6058
void
6059
35.7k
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
6060
35.7k
    const xmlChar *elemName;
6061
35.7k
    const xmlChar *attrName;
6062
35.7k
    xmlEnumerationPtr tree;
6063
6064
35.7k
    if ((CUR != '<') || (NXT(1) != '!'))
6065
0
        return;
6066
35.7k
    SKIP(2);
6067
6068
35.7k
    if (CMP7(CUR_PTR, 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
6069
35.3k
  int inputid = ctxt->input->id;
6070
6071
35.3k
  SKIP(7);
6072
35.3k
  if (SKIP_BLANKS == 0) {
6073
436
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6074
436
                     "Space required after '<!ATTLIST'\n");
6075
436
  }
6076
35.3k
        elemName = xmlParseName(ctxt);
6077
35.3k
  if (elemName == NULL) {
6078
337
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6079
337
         "ATTLIST: no name for Element\n");
6080
337
      return;
6081
337
  }
6082
35.0k
  SKIP_BLANKS;
6083
35.0k
  GROW;
6084
99.6k
  while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
6085
72.1k
      int type;
6086
72.1k
      int def;
6087
72.1k
      xmlChar *defaultValue = NULL;
6088
6089
72.1k
      GROW;
6090
72.1k
            tree = NULL;
6091
72.1k
      attrName = xmlParseName(ctxt);
6092
72.1k
      if (attrName == NULL) {
6093
974
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6094
974
             "ATTLIST: no name for Attribute\n");
6095
974
    break;
6096
974
      }
6097
71.1k
      GROW;
6098
71.1k
      if (SKIP_BLANKS == 0) {
6099
1.49k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6100
1.49k
            "Space required after the attribute name\n");
6101
1.49k
    break;
6102
1.49k
      }
6103
6104
69.6k
      type = xmlParseAttributeType(ctxt, &tree);
6105
69.6k
      if (type <= 0) {
6106
1.73k
          break;
6107
1.73k
      }
6108
6109
67.9k
      GROW;
6110
67.9k
      if (SKIP_BLANKS == 0) {
6111
716
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6112
716
             "Space required after the attribute type\n");
6113
716
          if (tree != NULL)
6114
369
        xmlFreeEnumeration(tree);
6115
716
    break;
6116
716
      }
6117
6118
67.1k
      def = xmlParseDefaultDecl(ctxt, &defaultValue);
6119
67.1k
      if (def <= 0) {
6120
0
                if (defaultValue != NULL)
6121
0
        xmlFree(defaultValue);
6122
0
          if (tree != NULL)
6123
0
        xmlFreeEnumeration(tree);
6124
0
          break;
6125
0
      }
6126
67.1k
      if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6127
4.15k
          xmlAttrNormalizeSpace(defaultValue, defaultValue);
6128
6129
67.1k
      GROW;
6130
67.1k
            if (RAW != '>') {
6131
55.2k
    if (SKIP_BLANKS == 0) {
6132
2.55k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6133
2.55k
      "Space required after the attribute default value\n");
6134
2.55k
        if (defaultValue != NULL)
6135
1.52k
      xmlFree(defaultValue);
6136
2.55k
        if (tree != NULL)
6137
318
      xmlFreeEnumeration(tree);
6138
2.55k
        break;
6139
2.55k
    }
6140
55.2k
      }
6141
64.6k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6142
64.6k
    (ctxt->sax->attributeDecl != NULL))
6143
62.3k
    ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6144
62.3k
                          type, def, defaultValue, tree);
6145
2.26k
      else if (tree != NULL)
6146
769
    xmlFreeEnumeration(tree);
6147
6148
64.6k
      if ((ctxt->sax2) && (defaultValue != NULL) &&
6149
64.6k
          (def != XML_ATTRIBUTE_IMPLIED) &&
6150
64.6k
    (def != XML_ATTRIBUTE_REQUIRED)) {
6151
7.96k
    xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6152
7.96k
      }
6153
64.6k
      if (ctxt->sax2) {
6154
59.5k
    xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6155
59.5k
      }
6156
64.6k
      if (defaultValue != NULL)
6157
9.41k
          xmlFree(defaultValue);
6158
64.6k
      GROW;
6159
64.6k
  }
6160
35.0k
  if (RAW == '>') {
6161
28.2k
      if (inputid != ctxt->input->id) {
6162
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6163
0
                               "Attribute list declaration doesn't start and"
6164
0
                               " stop in the same entity\n");
6165
0
      }
6166
28.2k
      NEXT;
6167
28.2k
  }
6168
35.0k
    }
6169
35.7k
}
6170
6171
/**
6172
 * xmlParseElementMixedContentDecl:
6173
 * @ctxt:  an XML parser context
6174
 * @inputchk:  the input used for the current entity, needed for boundary checks
6175
 *
6176
 * DEPRECATED: Internal function, don't use.
6177
 *
6178
 * parse the declaration for a Mixed Element content
6179
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6180
 *
6181
 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6182
 *                '(' S? '#PCDATA' S? ')'
6183
 *
6184
 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6185
 *
6186
 * [ VC: No Duplicate Types ]
6187
 * The same name must not appear more than once in a single
6188
 * mixed-content declaration.
6189
 *
6190
 * returns: the list of the xmlElementContentPtr describing the element choices
6191
 */
6192
xmlElementContentPtr
6193
22.6k
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6194
22.6k
    xmlElementContentPtr ret = NULL, cur = NULL, n;
6195
22.6k
    const xmlChar *elem = NULL;
6196
6197
22.6k
    GROW;
6198
22.6k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6199
22.6k
  SKIP(7);
6200
22.6k
  SKIP_BLANKS;
6201
22.6k
  SHRINK;
6202
22.6k
  if (RAW == ')') {
6203
17.0k
      if (ctxt->input->id != inputchk) {
6204
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6205
0
                               "Element content declaration doesn't start and"
6206
0
                               " stop in the same entity\n");
6207
0
      }
6208
17.0k
      NEXT;
6209
17.0k
      ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6210
17.0k
      if (ret == NULL)
6211
0
          return(NULL);
6212
17.0k
      if (RAW == '*') {
6213
15
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6214
15
    NEXT;
6215
15
      }
6216
17.0k
      return(ret);
6217
17.0k
  }
6218
5.60k
  if ((RAW == '(') || (RAW == '|')) {
6219
5.47k
      ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6220
5.47k
      if (ret == NULL) return(NULL);
6221
5.47k
  }
6222
48.3k
  while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6223
42.8k
      NEXT;
6224
42.8k
      if (elem == NULL) {
6225
5.45k
          ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6226
5.45k
    if (ret == NULL) {
6227
0
        xmlFreeDocElementContent(ctxt->myDoc, cur);
6228
0
                    return(NULL);
6229
0
                }
6230
5.45k
    ret->c1 = cur;
6231
5.45k
    if (cur != NULL)
6232
5.45k
        cur->parent = ret;
6233
5.45k
    cur = ret;
6234
37.4k
      } else {
6235
37.4k
          n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6236
37.4k
    if (n == NULL) {
6237
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6238
0
                    return(NULL);
6239
0
                }
6240
37.4k
    n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6241
37.4k
    if (n->c1 != NULL)
6242
37.4k
        n->c1->parent = n;
6243
37.4k
          cur->c2 = n;
6244
37.4k
    if (n != NULL)
6245
37.4k
        n->parent = cur;
6246
37.4k
    cur = n;
6247
37.4k
      }
6248
42.8k
      SKIP_BLANKS;
6249
42.8k
      elem = xmlParseName(ctxt);
6250
42.8k
      if (elem == NULL) {
6251
78
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6252
78
      "xmlParseElementMixedContentDecl : Name expected\n");
6253
78
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6254
78
    return(NULL);
6255
78
      }
6256
42.7k
      SKIP_BLANKS;
6257
42.7k
      GROW;
6258
42.7k
  }
6259
5.52k
  if ((RAW == ')') && (NXT(1) == '*')) {
6260
5.17k
      if (elem != NULL) {
6261
5.17k
    cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6262
5.17k
                                   XML_ELEMENT_CONTENT_ELEMENT);
6263
5.17k
    if (cur->c2 != NULL)
6264
5.17k
        cur->c2->parent = cur;
6265
5.17k
            }
6266
5.17k
            if (ret != NULL)
6267
5.17k
                ret->ocur = XML_ELEMENT_CONTENT_MULT;
6268
5.17k
      if (ctxt->input->id != inputchk) {
6269
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6270
0
                               "Element content declaration doesn't start and"
6271
0
                               " stop in the same entity\n");
6272
0
      }
6273
5.17k
      SKIP(2);
6274
5.17k
  } else {
6275
356
      xmlFreeDocElementContent(ctxt->myDoc, ret);
6276
356
      xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6277
356
      return(NULL);
6278
356
  }
6279
6280
5.52k
    } else {
6281
0
  xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6282
0
    }
6283
5.17k
    return(ret);
6284
22.6k
}
6285
6286
/**
6287
 * xmlParseElementChildrenContentDeclPriv:
6288
 * @ctxt:  an XML parser context
6289
 * @inputchk:  the input used for the current entity, needed for boundary checks
6290
 * @depth: the level of recursion
6291
 *
6292
 * parse the declaration for a Mixed Element content
6293
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6294
 *
6295
 *
6296
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6297
 *
6298
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6299
 *
6300
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6301
 *
6302
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6303
 *
6304
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6305
 * TODO Parameter-entity replacement text must be properly nested
6306
 *  with parenthesized groups. That is to say, if either of the
6307
 *  opening or closing parentheses in a choice, seq, or Mixed
6308
 *  construct is contained in the replacement text for a parameter
6309
 *  entity, both must be contained in the same replacement text. For
6310
 *  interoperability, if a parameter-entity reference appears in a
6311
 *  choice, seq, or Mixed construct, its replacement text should not
6312
 *  be empty, and neither the first nor last non-blank character of
6313
 *  the replacement text should be a connector (| or ,).
6314
 *
6315
 * Returns the tree of xmlElementContentPtr describing the element
6316
 *          hierarchy.
6317
 */
6318
static xmlElementContentPtr
6319
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6320
38.9k
                                       int depth) {
6321
38.9k
    xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6322
38.9k
    const xmlChar *elem;
6323
38.9k
    xmlChar type = 0;
6324
6325
38.9k
    if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6326
38.9k
        (depth >  2048)) {
6327
1
        xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6328
1
"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6329
1
                          depth);
6330
1
  return(NULL);
6331
1
    }
6332
38.9k
    SKIP_BLANKS;
6333
38.9k
    GROW;
6334
38.9k
    if (RAW == '(') {
6335
9.52k
  int inputid = ctxt->input->id;
6336
6337
        /* Recurse on first child */
6338
9.52k
  NEXT;
6339
9.52k
  SKIP_BLANKS;
6340
9.52k
        cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6341
9.52k
                                                           depth + 1);
6342
9.52k
        if (cur == NULL)
6343
8.37k
            return(NULL);
6344
1.15k
  SKIP_BLANKS;
6345
1.15k
  GROW;
6346
29.4k
    } else {
6347
29.4k
  elem = xmlParseName(ctxt);
6348
29.4k
  if (elem == NULL) {
6349
857
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6350
857
      return(NULL);
6351
857
  }
6352
28.5k
        cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6353
28.5k
  if (cur == NULL) {
6354
0
      xmlErrMemory(ctxt, NULL);
6355
0
      return(NULL);
6356
0
  }
6357
28.5k
  GROW;
6358
28.5k
  if (RAW == '?') {
6359
1.54k
      cur->ocur = XML_ELEMENT_CONTENT_OPT;
6360
1.54k
      NEXT;
6361
27.0k
  } else if (RAW == '*') {
6362
1.38k
      cur->ocur = XML_ELEMENT_CONTENT_MULT;
6363
1.38k
      NEXT;
6364
25.6k
  } else if (RAW == '+') {
6365
4.85k
      cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6366
4.85k
      NEXT;
6367
20.7k
  } else {
6368
20.7k
      cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6369
20.7k
  }
6370
28.5k
  GROW;
6371
28.5k
    }
6372
29.7k
    SKIP_BLANKS;
6373
29.7k
    SHRINK;
6374
103k
    while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6375
        /*
6376
   * Each loop we parse one separator and one element.
6377
   */
6378
76.3k
        if (RAW == ',') {
6379
28.9k
      if (type == 0) type = CUR;
6380
6381
      /*
6382
       * Detect "Name | Name , Name" error
6383
       */
6384
18.6k
      else if (type != CUR) {
6385
36
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6386
36
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6387
36
                      type);
6388
36
    if ((last != NULL) && (last != ret))
6389
36
        xmlFreeDocElementContent(ctxt->myDoc, last);
6390
36
    if (ret != NULL)
6391
36
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6392
36
    return(NULL);
6393
36
      }
6394
28.8k
      NEXT;
6395
6396
28.8k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6397
28.8k
      if (op == NULL) {
6398
0
    if ((last != NULL) && (last != ret))
6399
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6400
0
          xmlFreeDocElementContent(ctxt->myDoc, ret);
6401
0
    return(NULL);
6402
0
      }
6403
28.8k
      if (last == NULL) {
6404
10.2k
    op->c1 = ret;
6405
10.2k
    if (ret != NULL)
6406
10.2k
        ret->parent = op;
6407
10.2k
    ret = cur = op;
6408
18.6k
      } else {
6409
18.6k
          cur->c2 = op;
6410
18.6k
    if (op != NULL)
6411
18.6k
        op->parent = cur;
6412
18.6k
    op->c1 = last;
6413
18.6k
    if (last != NULL)
6414
18.6k
        last->parent = op;
6415
18.6k
    cur =op;
6416
18.6k
    last = NULL;
6417
18.6k
      }
6418
47.4k
  } else if (RAW == '|') {
6419
45.1k
      if (type == 0) type = CUR;
6420
6421
      /*
6422
       * Detect "Name , Name | Name" error
6423
       */
6424
35.9k
      else if (type != CUR) {
6425
40
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6426
40
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6427
40
          type);
6428
40
    if ((last != NULL) && (last != ret))
6429
40
        xmlFreeDocElementContent(ctxt->myDoc, last);
6430
40
    if (ret != NULL)
6431
40
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6432
40
    return(NULL);
6433
40
      }
6434
45.1k
      NEXT;
6435
6436
45.1k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6437
45.1k
      if (op == NULL) {
6438
0
    if ((last != NULL) && (last != ret))
6439
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6440
0
    if (ret != NULL)
6441
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6442
0
    return(NULL);
6443
0
      }
6444
45.1k
      if (last == NULL) {
6445
9.21k
    op->c1 = ret;
6446
9.21k
    if (ret != NULL)
6447
9.21k
        ret->parent = op;
6448
9.21k
    ret = cur = op;
6449
35.9k
      } else {
6450
35.9k
          cur->c2 = op;
6451
35.9k
    if (op != NULL)
6452
35.9k
        op->parent = cur;
6453
35.9k
    op->c1 = last;
6454
35.9k
    if (last != NULL)
6455
35.9k
        last->parent = op;
6456
35.9k
    cur =op;
6457
35.9k
    last = NULL;
6458
35.9k
      }
6459
45.1k
  } else {
6460
2.32k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6461
2.32k
      if ((last != NULL) && (last != ret))
6462
1.22k
          xmlFreeDocElementContent(ctxt->myDoc, last);
6463
2.32k
      if (ret != NULL)
6464
2.32k
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6465
2.32k
      return(NULL);
6466
2.32k
  }
6467
73.9k
  GROW;
6468
73.9k
  SKIP_BLANKS;
6469
73.9k
  GROW;
6470
73.9k
  if (RAW == '(') {
6471
3.28k
      int inputid = ctxt->input->id;
6472
      /* Recurse on second child */
6473
3.28k
      NEXT;
6474
3.28k
      SKIP_BLANKS;
6475
3.28k
      last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6476
3.28k
                                                          depth + 1);
6477
3.28k
            if (last == NULL) {
6478
263
    if (ret != NULL)
6479
263
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6480
263
    return(NULL);
6481
263
            }
6482
3.02k
      SKIP_BLANKS;
6483
70.7k
  } else {
6484
70.7k
      elem = xmlParseName(ctxt);
6485
70.7k
      if (elem == NULL) {
6486
345
    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6487
345
    if (ret != NULL)
6488
345
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6489
345
    return(NULL);
6490
345
      }
6491
70.3k
      last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6492
70.3k
      if (last == NULL) {
6493
0
    if (ret != NULL)
6494
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6495
0
    return(NULL);
6496
0
      }
6497
70.3k
      if (RAW == '?') {
6498
9.06k
    last->ocur = XML_ELEMENT_CONTENT_OPT;
6499
9.06k
    NEXT;
6500
61.3k
      } else if (RAW == '*') {
6501
2.16k
    last->ocur = XML_ELEMENT_CONTENT_MULT;
6502
2.16k
    NEXT;
6503
59.1k
      } else if (RAW == '+') {
6504
1.36k
    last->ocur = XML_ELEMENT_CONTENT_PLUS;
6505
1.36k
    NEXT;
6506
57.7k
      } else {
6507
57.7k
    last->ocur = XML_ELEMENT_CONTENT_ONCE;
6508
57.7k
      }
6509
70.3k
  }
6510
73.3k
  SKIP_BLANKS;
6511
73.3k
  GROW;
6512
73.3k
    }
6513
26.7k
    if ((cur != NULL) && (last != NULL)) {
6514
17.5k
        cur->c2 = last;
6515
17.5k
  if (last != NULL)
6516
17.5k
      last->parent = cur;
6517
17.5k
    }
6518
26.7k
    if (ctxt->input->id != inputchk) {
6519
0
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6520
0
                       "Element content declaration doesn't start and stop in"
6521
0
                       " the same entity\n");
6522
0
    }
6523
26.7k
    NEXT;
6524
26.7k
    if (RAW == '?') {
6525
734
  if (ret != NULL) {
6526
734
      if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6527
734
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6528
15
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6529
719
      else
6530
719
          ret->ocur = XML_ELEMENT_CONTENT_OPT;
6531
734
  }
6532
734
  NEXT;
6533
25.9k
    } else if (RAW == '*') {
6534
7.14k
  if (ret != NULL) {
6535
7.14k
      ret->ocur = XML_ELEMENT_CONTENT_MULT;
6536
7.14k
      cur = ret;
6537
      /*
6538
       * Some normalization:
6539
       * (a | b* | c?)* == (a | b | c)*
6540
       */
6541
34.7k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6542
27.6k
    if ((cur->c1 != NULL) &&
6543
27.6k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6544
27.6k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6545
3.51k
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6546
27.6k
    if ((cur->c2 != NULL) &&
6547
27.6k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6548
27.6k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6549
635
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6550
27.6k
    cur = cur->c2;
6551
27.6k
      }
6552
7.14k
  }
6553
7.14k
  NEXT;
6554
18.8k
    } else if (RAW == '+') {
6555
3.30k
  if (ret != NULL) {
6556
3.30k
      int found = 0;
6557
6558
3.30k
      if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6559
3.30k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6560
9
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6561
3.29k
      else
6562
3.29k
          ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6563
      /*
6564
       * Some normalization:
6565
       * (a | b*)+ == (a | b)*
6566
       * (a | b?)+ == (a | b)*
6567
       */
6568
5.37k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6569
2.07k
    if ((cur->c1 != NULL) &&
6570
2.07k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6571
2.07k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6572
20
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6573
20
        found = 1;
6574
20
    }
6575
2.07k
    if ((cur->c2 != NULL) &&
6576
2.07k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6577
2.07k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6578
29
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6579
29
        found = 1;
6580
29
    }
6581
2.07k
    cur = cur->c2;
6582
2.07k
      }
6583
3.30k
      if (found)
6584
48
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6585
3.30k
  }
6586
3.30k
  NEXT;
6587
3.30k
    }
6588
26.7k
    return(ret);
6589
29.7k
}
6590
6591
/**
6592
 * xmlParseElementChildrenContentDecl:
6593
 * @ctxt:  an XML parser context
6594
 * @inputchk:  the input used for the current entity, needed for boundary checks
6595
 *
6596
 * DEPRECATED: Internal function, don't use.
6597
 *
6598
 * parse the declaration for a Mixed Element content
6599
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6600
 *
6601
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6602
 *
6603
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6604
 *
6605
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6606
 *
6607
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6608
 *
6609
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6610
 * TODO Parameter-entity replacement text must be properly nested
6611
 *  with parenthesized groups. That is to say, if either of the
6612
 *  opening or closing parentheses in a choice, seq, or Mixed
6613
 *  construct is contained in the replacement text for a parameter
6614
 *  entity, both must be contained in the same replacement text. For
6615
 *  interoperability, if a parameter-entity reference appears in a
6616
 *  choice, seq, or Mixed construct, its replacement text should not
6617
 *  be empty, and neither the first nor last non-blank character of
6618
 *  the replacement text should be a connector (| or ,).
6619
 *
6620
 * Returns the tree of xmlElementContentPtr describing the element
6621
 *          hierarchy.
6622
 */
6623
xmlElementContentPtr
6624
0
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6625
    /* stub left for API/ABI compat */
6626
0
    return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6627
0
}
6628
6629
/**
6630
 * xmlParseElementContentDecl:
6631
 * @ctxt:  an XML parser context
6632
 * @name:  the name of the element being defined.
6633
 * @result:  the Element Content pointer will be stored here if any
6634
 *
6635
 * DEPRECATED: Internal function, don't use.
6636
 *
6637
 * parse the declaration for an Element content either Mixed or Children,
6638
 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6639
 *
6640
 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6641
 *
6642
 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6643
 */
6644
6645
int
6646
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6647
48.8k
                           xmlElementContentPtr *result) {
6648
6649
48.8k
    xmlElementContentPtr tree = NULL;
6650
48.8k
    int inputid = ctxt->input->id;
6651
48.8k
    int res;
6652
6653
48.8k
    *result = NULL;
6654
6655
48.8k
    if (RAW != '(') {
6656
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6657
0
    "xmlParseElementContentDecl : %s '(' expected\n", name);
6658
0
  return(-1);
6659
0
    }
6660
48.8k
    NEXT;
6661
48.8k
    GROW;
6662
48.8k
    if (ctxt->instate == XML_PARSER_EOF)
6663
0
        return(-1);
6664
48.8k
    SKIP_BLANKS;
6665
48.8k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6666
22.6k
        tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6667
22.6k
  res = XML_ELEMENT_TYPE_MIXED;
6668
26.1k
    } else {
6669
26.1k
        tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6670
26.1k
  res = XML_ELEMENT_TYPE_ELEMENT;
6671
26.1k
    }
6672
48.8k
    SKIP_BLANKS;
6673
48.8k
    *result = tree;
6674
48.8k
    return(res);
6675
48.8k
}
6676
6677
/**
6678
 * xmlParseElementDecl:
6679
 * @ctxt:  an XML parser context
6680
 *
6681
 * DEPRECATED: Internal function, don't use.
6682
 *
6683
 * Parse an element declaration. Always consumes '<!'.
6684
 *
6685
 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6686
 *
6687
 * [ VC: Unique Element Type Declaration ]
6688
 * No element type may be declared more than once
6689
 *
6690
 * Returns the type of the element, or -1 in case of error
6691
 */
6692
int
6693
67.2k
xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6694
67.2k
    const xmlChar *name;
6695
67.2k
    int ret = -1;
6696
67.2k
    xmlElementContentPtr content  = NULL;
6697
6698
67.2k
    if ((CUR != '<') || (NXT(1) != '!'))
6699
0
        return(ret);
6700
67.2k
    SKIP(2);
6701
6702
    /* GROW; done in the caller */
6703
67.2k
    if (CMP7(CUR_PTR, 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6704
66.4k
  int inputid = ctxt->input->id;
6705
6706
66.4k
  SKIP(7);
6707
66.4k
  if (SKIP_BLANKS == 0) {
6708
354
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6709
354
               "Space required after 'ELEMENT'\n");
6710
354
      return(-1);
6711
354
  }
6712
66.0k
        name = xmlParseName(ctxt);
6713
66.0k
  if (name == NULL) {
6714
558
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6715
558
         "xmlParseElementDecl: no name for Element\n");
6716
558
      return(-1);
6717
558
  }
6718
65.4k
  if (SKIP_BLANKS == 0) {
6719
3.06k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6720
3.06k
         "Space required after the element name\n");
6721
3.06k
  }
6722
65.4k
  if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6723
13.4k
      SKIP(5);
6724
      /*
6725
       * Element must always be empty.
6726
       */
6727
13.4k
      ret = XML_ELEMENT_TYPE_EMPTY;
6728
52.0k
  } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6729
52.0k
             (NXT(2) == 'Y')) {
6730
255
      SKIP(3);
6731
      /*
6732
       * Element is a generic container.
6733
       */
6734
255
      ret = XML_ELEMENT_TYPE_ANY;
6735
51.7k
  } else if (RAW == '(') {
6736
48.8k
      ret = xmlParseElementContentDecl(ctxt, name, &content);
6737
48.8k
  } else {
6738
      /*
6739
       * [ WFC: PEs in Internal Subset ] error handling.
6740
       */
6741
2.96k
      if ((RAW == '%') && (ctxt->external == 0) &&
6742
2.96k
          (ctxt->inputNr == 1)) {
6743
136
    xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6744
136
    "PEReference: forbidden within markup decl in internal subset\n");
6745
2.82k
      } else {
6746
2.82k
    xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6747
2.82k
          "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6748
2.82k
            }
6749
2.96k
      return(-1);
6750
2.96k
  }
6751
6752
62.5k
  SKIP_BLANKS;
6753
6754
62.5k
  if (RAW != '>') {
6755
4.02k
      xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6756
4.02k
      if (content != NULL) {
6757
293
    xmlFreeDocElementContent(ctxt->myDoc, content);
6758
293
      }
6759
58.5k
  } else {
6760
58.5k
      if (inputid != ctxt->input->id) {
6761
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6762
0
                               "Element declaration doesn't start and stop in"
6763
0
                               " the same entity\n");
6764
0
      }
6765
6766
58.5k
      NEXT;
6767
58.5k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6768
58.5k
    (ctxt->sax->elementDecl != NULL)) {
6769
54.1k
    if (content != NULL)
6770
41.5k
        content->parent = NULL;
6771
54.1k
          ctxt->sax->elementDecl(ctxt->userData, name, ret,
6772
54.1k
                           content);
6773
54.1k
    if ((content != NULL) && (content->parent == NULL)) {
6774
        /*
6775
         * this is a trick: if xmlAddElementDecl is called,
6776
         * instead of copying the full tree it is plugged directly
6777
         * if called from the parser. Avoid duplicating the
6778
         * interfaces or change the API/ABI
6779
         */
6780
858
        xmlFreeDocElementContent(ctxt->myDoc, content);
6781
858
    }
6782
54.1k
      } else if (content != NULL) {
6783
2.90k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6784
2.90k
      }
6785
58.5k
  }
6786
62.5k
    }
6787
63.3k
    return(ret);
6788
67.2k
}
6789
6790
/**
6791
 * xmlParseConditionalSections
6792
 * @ctxt:  an XML parser context
6793
 *
6794
 * Parse a conditional section. Always consumes '<!['.
6795
 *
6796
 * [61] conditionalSect ::= includeSect | ignoreSect
6797
 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6798
 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6799
 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6800
 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6801
 */
6802
6803
static void
6804
120
xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6805
120
    int *inputIds = NULL;
6806
120
    size_t inputIdsSize = 0;
6807
120
    size_t depth = 0;
6808
6809
503
    while (ctxt->instate != XML_PARSER_EOF) {
6810
503
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6811
270
            int id = ctxt->input->id;
6812
6813
270
            SKIP(3);
6814
270
            SKIP_BLANKS;
6815
6816
270
            if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6817
203
                SKIP(7);
6818
203
                SKIP_BLANKS;
6819
203
                if (RAW != '[') {
6820
0
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6821
0
                    xmlHaltParser(ctxt);
6822
0
                    goto error;
6823
0
                }
6824
203
                if (ctxt->input->id != id) {
6825
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6826
0
                                   "All markup of the conditional section is"
6827
0
                                   " not in the same entity\n");
6828
0
                }
6829
203
                NEXT;
6830
6831
203
                if (inputIdsSize <= depth) {
6832
68
                    int *tmp;
6833
6834
68
                    inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2);
6835
68
                    tmp = (int *) xmlRealloc(inputIds,
6836
68
                            inputIdsSize * sizeof(int));
6837
68
                    if (tmp == NULL) {
6838
0
                        xmlErrMemory(ctxt, NULL);
6839
0
                        goto error;
6840
0
                    }
6841
68
                    inputIds = tmp;
6842
68
                }
6843
203
                inputIds[depth] = id;
6844
203
                depth++;
6845
203
            } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6846
32
                size_t ignoreDepth = 0;
6847
6848
32
                SKIP(6);
6849
32
                SKIP_BLANKS;
6850
32
                if (RAW != '[') {
6851
0
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6852
0
                    xmlHaltParser(ctxt);
6853
0
                    goto error;
6854
0
                }
6855
32
                if (ctxt->input->id != id) {
6856
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6857
0
                                   "All markup of the conditional section is"
6858
0
                                   " not in the same entity\n");
6859
0
                }
6860
32
                NEXT;
6861
6862
3.43k
                while (RAW != 0) {
6863
3.43k
                    if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6864
45
                        SKIP(3);
6865
45
                        ignoreDepth++;
6866
                        /* Check for integer overflow */
6867
45
                        if (ignoreDepth == 0) {
6868
0
                            xmlErrMemory(ctxt, NULL);
6869
0
                            goto error;
6870
0
                        }
6871
3.38k
                    } else if ((RAW == ']') && (NXT(1) == ']') &&
6872
3.38k
                               (NXT(2) == '>')) {
6873
74
                        if (ignoreDepth == 0)
6874
29
                            break;
6875
45
                        SKIP(3);
6876
45
                        ignoreDepth--;
6877
3.31k
                    } else {
6878
3.31k
                        NEXT;
6879
3.31k
                    }
6880
3.43k
                }
6881
6882
32
    if (RAW == 0) {
6883
3
        xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6884
3
                    goto error;
6885
3
    }
6886
29
                if (ctxt->input->id != id) {
6887
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6888
0
                                   "All markup of the conditional section is"
6889
0
                                   " not in the same entity\n");
6890
0
                }
6891
29
                SKIP(3);
6892
35
            } else {
6893
35
                xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6894
35
                xmlHaltParser(ctxt);
6895
35
                goto error;
6896
35
            }
6897
270
        } else if ((depth > 0) &&
6898
233
                   (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6899
188
            depth--;
6900
188
            if (ctxt->input->id != inputIds[depth]) {
6901
15
                xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6902
15
                               "All markup of the conditional section is not"
6903
15
                               " in the same entity\n");
6904
15
            }
6905
188
            SKIP(3);
6906
188
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
6907
30
            xmlParseMarkupDecl(ctxt);
6908
30
        } else {
6909
15
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6910
15
            xmlHaltParser(ctxt);
6911
15
            goto error;
6912
15
        }
6913
6914
450
        if (depth == 0)
6915
67
            break;
6916
6917
383
        SKIP_BLANKS;
6918
383
        GROW;
6919
383
    }
6920
6921
120
error:
6922
120
    xmlFree(inputIds);
6923
120
}
6924
6925
/**
6926
 * xmlParseMarkupDecl:
6927
 * @ctxt:  an XML parser context
6928
 *
6929
 * DEPRECATED: Internal function, don't use.
6930
 *
6931
 * Parse markup declarations. Always consumes '<!' or '<?'.
6932
 *
6933
 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6934
 *                     NotationDecl | PI | Comment
6935
 *
6936
 * [ VC: Proper Declaration/PE Nesting ]
6937
 * Parameter-entity replacement text must be properly nested with
6938
 * markup declarations. That is to say, if either the first character
6939
 * or the last character of a markup declaration (markupdecl above) is
6940
 * contained in the replacement text for a parameter-entity reference,
6941
 * both must be contained in the same replacement text.
6942
 *
6943
 * [ WFC: PEs in Internal Subset ]
6944
 * In the internal DTD subset, parameter-entity references can occur
6945
 * only where markup declarations can occur, not within markup declarations.
6946
 * (This does not apply to references that occur in external parameter
6947
 * entities or to the external subset.)
6948
 */
6949
void
6950
2.59M
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6951
2.59M
    GROW;
6952
2.59M
    if (CUR == '<') {
6953
2.59M
        if (NXT(1) == '!') {
6954
2.59M
      switch (NXT(2)) {
6955
137k
          case 'E':
6956
137k
        if (NXT(3) == 'L')
6957
67.2k
      xmlParseElementDecl(ctxt);
6958
70.6k
        else if (NXT(3) == 'N')
6959
70.3k
      xmlParseEntityDecl(ctxt);
6960
281
                    else
6961
281
                        SKIP(2);
6962
137k
        break;
6963
35.7k
          case 'A':
6964
35.7k
        xmlParseAttributeListDecl(ctxt);
6965
35.7k
        break;
6966
1.00k
          case 'N':
6967
1.00k
        xmlParseNotationDecl(ctxt);
6968
1.00k
        break;
6969
2.41M
          case '-':
6970
2.41M
        xmlParseComment(ctxt);
6971
2.41M
        break;
6972
2.52k
    default:
6973
        /* there is an error but it will be detected later */
6974
2.52k
                    SKIP(2);
6975
2.52k
        break;
6976
2.59M
      }
6977
2.59M
  } else if (NXT(1) == '?') {
6978
859
      xmlParsePI(ctxt);
6979
859
  }
6980
2.59M
    }
6981
6982
    /*
6983
     * detect requirement to exit there and act accordingly
6984
     * and avoid having instate overridden later on
6985
     */
6986
2.59M
    if (ctxt->instate == XML_PARSER_EOF)
6987
3.87k
        return;
6988
6989
2.58M
    ctxt->instate = XML_PARSER_DTD;
6990
2.58M
}
6991
6992
/**
6993
 * xmlParseTextDecl:
6994
 * @ctxt:  an XML parser context
6995
 *
6996
 * DEPRECATED: Internal function, don't use.
6997
 *
6998
 * parse an XML declaration header for external entities
6999
 *
7000
 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
7001
 */
7002
7003
void
7004
1.66k
xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
7005
1.66k
    xmlChar *version;
7006
1.66k
    const xmlChar *encoding;
7007
1.66k
    int oldstate;
7008
7009
    /*
7010
     * We know that '<?xml' is here.
7011
     */
7012
1.66k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7013
1.62k
  SKIP(5);
7014
1.62k
    } else {
7015
39
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
7016
39
  return;
7017
39
    }
7018
7019
    /* Avoid expansion of parameter entities when skipping blanks. */
7020
1.62k
    oldstate = ctxt->instate;
7021
1.62k
    ctxt->instate = XML_PARSER_START;
7022
7023
1.62k
    if (SKIP_BLANKS == 0) {
7024
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7025
0
           "Space needed after '<?xml'\n");
7026
0
    }
7027
7028
    /*
7029
     * We may have the VersionInfo here.
7030
     */
7031
1.62k
    version = xmlParseVersionInfo(ctxt);
7032
1.62k
    if (version == NULL)
7033
223
  version = xmlCharStrdup(XML_DEFAULT_VERSION);
7034
1.39k
    else {
7035
1.39k
  if (SKIP_BLANKS == 0) {
7036
34
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7037
34
               "Space needed here\n");
7038
34
  }
7039
1.39k
    }
7040
1.62k
    ctxt->input->version = version;
7041
7042
    /*
7043
     * We must have the encoding declaration
7044
     */
7045
1.62k
    encoding = xmlParseEncodingDecl(ctxt);
7046
1.62k
    if (ctxt->instate == XML_PARSER_EOF)
7047
0
        return;
7048
1.62k
    if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7049
  /*
7050
   * The XML REC instructs us to stop parsing right here
7051
   */
7052
24
        ctxt->instate = oldstate;
7053
24
        return;
7054
24
    }
7055
1.59k
    if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
7056
562
  xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
7057
562
           "Missing encoding in text declaration\n");
7058
562
    }
7059
7060
1.59k
    SKIP_BLANKS;
7061
1.59k
    if ((RAW == '?') && (NXT(1) == '>')) {
7062
1.08k
        SKIP(2);
7063
1.08k
    } else if (RAW == '>') {
7064
        /* Deprecated old WD ... */
7065
11
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7066
11
  NEXT;
7067
498
    } else {
7068
498
        int c;
7069
7070
498
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7071
15.8k
        while ((c = CUR) != 0) {
7072
15.6k
            NEXT;
7073
15.6k
            if (c == '>')
7074
351
                break;
7075
15.6k
        }
7076
498
    }
7077
7078
1.59k
    ctxt->instate = oldstate;
7079
1.59k
}
7080
7081
/**
7082
 * xmlParseExternalSubset:
7083
 * @ctxt:  an XML parser context
7084
 * @ExternalID: the external identifier
7085
 * @SystemID: the system identifier (or URL)
7086
 *
7087
 * parse Markup declarations from an external subset
7088
 *
7089
 * [30] extSubset ::= textDecl? extSubsetDecl
7090
 *
7091
 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7092
 */
7093
void
7094
xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7095
3.80k
                       const xmlChar *SystemID) {
7096
3.80k
    xmlDetectSAX2(ctxt);
7097
3.80k
    GROW;
7098
7099
3.80k
    if ((ctxt->encoding == NULL) &&
7100
3.80k
        (ctxt->input->end - ctxt->input->cur >= 4)) {
7101
3.79k
        xmlChar start[4];
7102
3.79k
  xmlCharEncoding enc;
7103
7104
3.79k
  start[0] = RAW;
7105
3.79k
  start[1] = NXT(1);
7106
3.79k
  start[2] = NXT(2);
7107
3.79k
  start[3] = NXT(3);
7108
3.79k
  enc = xmlDetectCharEncoding(start, 4);
7109
3.79k
  if (enc != XML_CHAR_ENCODING_NONE)
7110
1.77k
      xmlSwitchEncoding(ctxt, enc);
7111
3.79k
    }
7112
7113
3.80k
    if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7114
1.64k
  xmlParseTextDecl(ctxt);
7115
1.64k
  if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7116
      /*
7117
       * The XML REC instructs us to stop parsing right here
7118
       */
7119
24
      xmlHaltParser(ctxt);
7120
24
      return;
7121
24
  }
7122
1.64k
    }
7123
3.77k
    if (ctxt->myDoc == NULL) {
7124
0
        ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7125
0
  if (ctxt->myDoc == NULL) {
7126
0
      xmlErrMemory(ctxt, "New Doc failed");
7127
0
      return;
7128
0
  }
7129
0
  ctxt->myDoc->properties = XML_DOC_INTERNAL;
7130
0
    }
7131
3.77k
    if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7132
0
        xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7133
7134
3.77k
    ctxt->instate = XML_PARSER_DTD;
7135
3.77k
    ctxt->external = 1;
7136
3.77k
    SKIP_BLANKS;
7137
77.7k
    while ((ctxt->instate != XML_PARSER_EOF) && (RAW != 0)) {
7138
75.3k
  GROW;
7139
75.3k
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7140
120
            xmlParseConditionalSections(ctxt);
7141
75.1k
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7142
73.8k
            xmlParseMarkupDecl(ctxt);
7143
73.8k
        } else {
7144
1.37k
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7145
1.37k
            xmlHaltParser(ctxt);
7146
1.37k
            return;
7147
1.37k
        }
7148
73.9k
        SKIP_BLANKS;
7149
73.9k
    }
7150
7151
2.40k
    if (RAW != 0) {
7152
0
  xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7153
0
    }
7154
7155
2.40k
}
7156
7157
/**
7158
 * xmlParseReference:
7159
 * @ctxt:  an XML parser context
7160
 *
7161
 * DEPRECATED: Internal function, don't use.
7162
 *
7163
 * parse and handle entity references in content, depending on the SAX
7164
 * interface, this may end-up in a call to character() if this is a
7165
 * CharRef, a predefined entity, if there is no reference() callback.
7166
 * or if the parser was asked to switch to that mode.
7167
 *
7168
 * Always consumes '&'.
7169
 *
7170
 * [67] Reference ::= EntityRef | CharRef
7171
 */
7172
void
7173
590k
xmlParseReference(xmlParserCtxtPtr ctxt) {
7174
590k
    xmlEntityPtr ent;
7175
590k
    xmlChar *val;
7176
590k
    int was_checked;
7177
590k
    xmlNodePtr list = NULL;
7178
590k
    xmlParserErrors ret = XML_ERR_OK;
7179
7180
7181
590k
    if (RAW != '&')
7182
0
        return;
7183
7184
    /*
7185
     * Simple case of a CharRef
7186
     */
7187
590k
    if (NXT(1) == '#') {
7188
31.8k
  int i = 0;
7189
31.8k
  xmlChar out[16];
7190
31.8k
  int hex = NXT(2);
7191
31.8k
  int value = xmlParseCharRef(ctxt);
7192
7193
31.8k
  if (value == 0)
7194
8.82k
      return;
7195
23.0k
  if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7196
      /*
7197
       * So we are using non-UTF-8 buffers
7198
       * Check that the char fit on 8bits, if not
7199
       * generate a CharRef.
7200
       */
7201
17.4k
      if (value <= 0xFF) {
7202
16.4k
    out[0] = value;
7203
16.4k
    out[1] = 0;
7204
16.4k
    if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7205
16.4k
        (!ctxt->disableSAX))
7206
13.6k
        ctxt->sax->characters(ctxt->userData, out, 1);
7207
16.4k
      } else {
7208
993
    if ((hex == 'x') || (hex == 'X'))
7209
161
        snprintf((char *)out, sizeof(out), "#x%X", value);
7210
832
    else
7211
832
        snprintf((char *)out, sizeof(out), "#%d", value);
7212
993
    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7213
993
        (!ctxt->disableSAX))
7214
828
        ctxt->sax->reference(ctxt->userData, out);
7215
993
      }
7216
17.4k
  } else {
7217
      /*
7218
       * Just encode the value in UTF-8
7219
       */
7220
5.61k
      COPY_BUF(0 ,out, i, value);
7221
5.61k
      out[i] = 0;
7222
5.61k
      if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7223
5.61k
    (!ctxt->disableSAX))
7224
4.00k
    ctxt->sax->characters(ctxt->userData, out, i);
7225
5.61k
  }
7226
23.0k
  return;
7227
31.8k
    }
7228
7229
    /*
7230
     * We are seeing an entity reference
7231
     */
7232
558k
    ent = xmlParseEntityRef(ctxt);
7233
558k
    if (ent == NULL) return;
7234
459k
    if (!ctxt->wellFormed)
7235
131k
  return;
7236
328k
    was_checked = ent->flags & XML_ENT_PARSED;
7237
7238
    /* special case of predefined entities */
7239
328k
    if ((ent->name == NULL) ||
7240
328k
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7241
11.9k
  val = ent->content;
7242
11.9k
  if (val == NULL) return;
7243
  /*
7244
   * inline the entity.
7245
   */
7246
11.9k
  if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7247
11.9k
      (!ctxt->disableSAX))
7248
11.9k
      ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7249
11.9k
  return;
7250
11.9k
    }
7251
7252
    /*
7253
     * The first reference to the entity trigger a parsing phase
7254
     * where the ent->children is filled with the result from
7255
     * the parsing.
7256
     * Note: external parsed entities will not be loaded, it is not
7257
     * required for a non-validating parser, unless the parsing option
7258
     * of validating, or substituting entities were given. Doing so is
7259
     * far more secure as the parser will only process data coming from
7260
     * the document entity by default.
7261
     */
7262
316k
    if (((ent->flags & XML_ENT_PARSED) == 0) &&
7263
316k
        ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7264
8.15k
         (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7265
8.10k
  unsigned long oldsizeentcopy = ctxt->sizeentcopy;
7266
7267
  /*
7268
   * This is a bit hackish but this seems the best
7269
   * way to make sure both SAX and DOM entity support
7270
   * behaves okay.
7271
   */
7272
8.10k
  void *user_data;
7273
8.10k
  if (ctxt->userData == ctxt)
7274
8.10k
      user_data = NULL;
7275
0
  else
7276
0
      user_data = ctxt->userData;
7277
7278
        /* Avoid overflow as much as possible */
7279
8.10k
        ctxt->sizeentcopy = 0;
7280
7281
8.10k
        if (ent->flags & XML_ENT_EXPANDING) {
7282
21
            xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7283
21
            xmlHaltParser(ctxt);
7284
21
            return;
7285
21
        }
7286
7287
8.08k
        ent->flags |= XML_ENT_EXPANDING;
7288
7289
  /*
7290
   * Check that this entity is well formed
7291
   * 4.3.2: An internal general parsed entity is well-formed
7292
   * if its replacement text matches the production labeled
7293
   * content.
7294
   */
7295
8.08k
  if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7296
6.99k
      ctxt->depth++;
7297
6.99k
      ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7298
6.99k
                                                user_data, &list);
7299
6.99k
      ctxt->depth--;
7300
7301
6.99k
  } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7302
1.09k
      ctxt->depth++;
7303
1.09k
      ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7304
1.09k
                                     user_data, ctxt->depth, ent->URI,
7305
1.09k
             ent->ExternalID, &list);
7306
1.09k
      ctxt->depth--;
7307
1.09k
  } else {
7308
0
      ret = XML_ERR_ENTITY_PE_INTERNAL;
7309
0
      xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7310
0
       "invalid entity type found\n", NULL);
7311
0
  }
7312
7313
8.08k
        ent->flags &= ~XML_ENT_EXPANDING;
7314
8.08k
        ent->flags |= XML_ENT_PARSED | XML_ENT_CHECKED;
7315
8.08k
        ent->expandedSize = ctxt->sizeentcopy;
7316
8.08k
  if (ret == XML_ERR_ENTITY_LOOP) {
7317
690
            xmlHaltParser(ctxt);
7318
690
      xmlFreeNodeList(list);
7319
690
      return;
7320
690
  }
7321
7.39k
  if (xmlParserEntityCheck(ctxt, oldsizeentcopy)) {
7322
0
      xmlFreeNodeList(list);
7323
0
      return;
7324
0
  }
7325
7326
7.39k
  if ((ret == XML_ERR_OK) && (list != NULL)) {
7327
6.19k
            ent->children = list;
7328
            /*
7329
             * Prune it directly in the generated document
7330
             * except for single text nodes.
7331
             */
7332
6.19k
            if ((ctxt->replaceEntities == 0) ||
7333
6.19k
                (ctxt->parseMode == XML_PARSE_READER) ||
7334
6.19k
                ((list->type == XML_TEXT_NODE) &&
7335
5.88k
                 (list->next == NULL))) {
7336
5.88k
                ent->owner = 1;
7337
12.8k
                while (list != NULL) {
7338
6.94k
                    list->parent = (xmlNodePtr) ent;
7339
6.94k
                    if (list->doc != ent->doc)
7340
0
                        xmlSetTreeDoc(list, ent->doc);
7341
6.94k
                    if (list->next == NULL)
7342
5.88k
                        ent->last = list;
7343
6.94k
                    list = list->next;
7344
6.94k
                }
7345
5.88k
                list = NULL;
7346
5.88k
            } else {
7347
310
                ent->owner = 0;
7348
1.47k
                while (list != NULL) {
7349
1.16k
                    list->parent = (xmlNodePtr) ctxt->node;
7350
1.16k
                    list->doc = ctxt->myDoc;
7351
1.16k
                    if (list->next == NULL)
7352
310
                        ent->last = list;
7353
1.16k
                    list = list->next;
7354
1.16k
                }
7355
310
                list = ent->children;
7356
#ifdef LIBXML_LEGACY_ENABLED
7357
                if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7358
                    xmlAddEntityReference(ent, list, NULL);
7359
#endif /* LIBXML_LEGACY_ENABLED */
7360
310
            }
7361
6.19k
  } else if ((ret != XML_ERR_OK) &&
7362
1.20k
       (ret != XML_WAR_UNDECLARED_ENTITY)) {
7363
617
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7364
617
         "Entity '%s' failed to parse\n", ent->name);
7365
617
            if (ent->content != NULL)
7366
305
                ent->content[0] = 0;
7367
617
  } else if (list != NULL) {
7368
0
      xmlFreeNodeList(list);
7369
0
      list = NULL;
7370
0
  }
7371
7372
        /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7373
7.39k
        was_checked = 0;
7374
7.39k
    }
7375
7376
    /*
7377
     * Now that the entity content has been gathered
7378
     * provide it to the application, this can take different forms based
7379
     * on the parsing modes.
7380
     */
7381
316k
    if (ent->children == NULL) {
7382
  /*
7383
   * Probably running in SAX mode and the callbacks don't
7384
   * build the entity content. So unless we already went
7385
   * though parsing for first checking go though the entity
7386
   * content to generate callbacks associated to the entity
7387
   */
7388
1.88k
  if (was_checked != 0) {
7389
628
      void *user_data;
7390
      /*
7391
       * This is a bit hackish but this seems the best
7392
       * way to make sure both SAX and DOM entity support
7393
       * behaves okay.
7394
       */
7395
628
      if (ctxt->userData == ctxt)
7396
628
    user_data = NULL;
7397
0
      else
7398
0
    user_data = ctxt->userData;
7399
7400
628
      if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7401
0
    ctxt->depth++;
7402
0
    ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7403
0
           ent->content, user_data, NULL);
7404
0
    ctxt->depth--;
7405
628
      } else if (ent->etype ==
7406
628
           XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7407
628
          unsigned long oldsizeentities = ctxt->sizeentities;
7408
7409
628
    ctxt->depth++;
7410
628
    ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7411
628
         ctxt->sax, user_data, ctxt->depth,
7412
628
         ent->URI, ent->ExternalID, NULL);
7413
628
    ctxt->depth--;
7414
7415
                /* Undo the change to sizeentities */
7416
628
                ctxt->sizeentities = oldsizeentities;
7417
628
      } else {
7418
0
    ret = XML_ERR_ENTITY_PE_INTERNAL;
7419
0
    xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7420
0
           "invalid entity type found\n", NULL);
7421
0
      }
7422
628
      if (ret == XML_ERR_ENTITY_LOOP) {
7423
0
    xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7424
0
    return;
7425
0
      }
7426
628
            if (xmlParserEntityCheck(ctxt, 0))
7427
0
                return;
7428
628
  }
7429
1.88k
  if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7430
1.88k
      (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7431
      /*
7432
       * Entity reference callback comes second, it's somewhat
7433
       * superfluous but a compatibility to historical behaviour
7434
       */
7435
399
      ctxt->sax->reference(ctxt->userData, ent->name);
7436
399
  }
7437
1.88k
  return;
7438
1.88k
    }
7439
7440
    /*
7441
     * We also check for amplification if entities aren't substituted.
7442
     * They might be expanded later.
7443
     */
7444
314k
    if ((was_checked != 0) &&
7445
314k
        (xmlParserEntityCheck(ctxt, ent->expandedSize)))
7446
36
        return;
7447
7448
    /*
7449
     * If we didn't get any children for the entity being built
7450
     */
7451
314k
    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7452
314k
  (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7453
  /*
7454
   * Create a node.
7455
   */
7456
60.0k
  ctxt->sax->reference(ctxt->userData, ent->name);
7457
60.0k
  return;
7458
60.0k
    }
7459
7460
254k
    if (ctxt->replaceEntities)  {
7461
  /*
7462
   * There is a problem on the handling of _private for entities
7463
   * (bug 155816): Should we copy the content of the field from
7464
   * the entity (possibly overwriting some value set by the user
7465
   * when a copy is created), should we leave it alone, or should
7466
   * we try to take care of different situations?  The problem
7467
   * is exacerbated by the usage of this field by the xmlReader.
7468
   * To fix this bug, we look at _private on the created node
7469
   * and, if it's NULL, we copy in whatever was in the entity.
7470
   * If it's not NULL we leave it alone.  This is somewhat of a
7471
   * hack - maybe we should have further tests to determine
7472
   * what to do.
7473
   */
7474
254k
  if (ctxt->node != NULL) {
7475
      /*
7476
       * Seems we are generating the DOM content, do
7477
       * a simple tree copy for all references except the first
7478
       * In the first occurrence list contains the replacement.
7479
       */
7480
254k
      if (((list == NULL) && (ent->owner == 0)) ||
7481
254k
    (ctxt->parseMode == XML_PARSE_READER)) {
7482
82.9k
    xmlNodePtr nw = NULL, cur, firstChild = NULL;
7483
7484
    /*
7485
     * when operating on a reader, the entities definitions
7486
     * are always owning the entities subtree.
7487
    if (ctxt->parseMode == XML_PARSE_READER)
7488
        ent->owner = 1;
7489
     */
7490
7491
82.9k
    cur = ent->children;
7492
84.3k
    while (cur != NULL) {
7493
84.3k
        nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7494
84.3k
        if (nw != NULL) {
7495
84.3k
      if (nw->_private == NULL)
7496
84.3k
          nw->_private = cur->_private;
7497
84.3k
      if (firstChild == NULL){
7498
82.9k
          firstChild = nw;
7499
82.9k
      }
7500
84.3k
      nw = xmlAddChild(ctxt->node, nw);
7501
84.3k
        }
7502
84.3k
        if (cur == ent->last) {
7503
      /*
7504
       * needed to detect some strange empty
7505
       * node cases in the reader tests
7506
       */
7507
82.9k
      if ((ctxt->parseMode == XML_PARSE_READER) &&
7508
82.9k
          (nw != NULL) &&
7509
82.9k
          (nw->type == XML_ELEMENT_NODE) &&
7510
82.9k
          (nw->children == NULL))
7511
48
          nw->extra = 1;
7512
7513
82.9k
      break;
7514
82.9k
        }
7515
1.37k
        cur = cur->next;
7516
1.37k
    }
7517
#ifdef LIBXML_LEGACY_ENABLED
7518
    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7519
      xmlAddEntityReference(ent, firstChild, nw);
7520
#endif /* LIBXML_LEGACY_ENABLED */
7521
171k
      } else if ((list == NULL) || (ctxt->inputNr > 0)) {
7522
171k
    xmlNodePtr nw = NULL, cur, next, last,
7523
171k
         firstChild = NULL;
7524
7525
    /*
7526
     * Copy the entity child list and make it the new
7527
     * entity child list. The goal is to make sure any
7528
     * ID or REF referenced will be the one from the
7529
     * document content and not the entity copy.
7530
     */
7531
171k
    cur = ent->children;
7532
171k
    ent->children = NULL;
7533
171k
    last = ent->last;
7534
171k
    ent->last = NULL;
7535
174k
    while (cur != NULL) {
7536
174k
        next = cur->next;
7537
174k
        cur->next = NULL;
7538
174k
        cur->parent = NULL;
7539
174k
        nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7540
174k
        if (nw != NULL) {
7541
174k
      if (nw->_private == NULL)
7542
174k
          nw->_private = cur->_private;
7543
174k
      if (firstChild == NULL){
7544
171k
          firstChild = cur;
7545
171k
      }
7546
174k
      xmlAddChild((xmlNodePtr) ent, nw);
7547
174k
        }
7548
174k
        xmlAddChild(ctxt->node, cur);
7549
174k
        if (cur == last)
7550
171k
      break;
7551
3.04k
        cur = next;
7552
3.04k
    }
7553
171k
    if (ent->owner == 0)
7554
310
        ent->owner = 1;
7555
#ifdef LIBXML_LEGACY_ENABLED
7556
    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7557
      xmlAddEntityReference(ent, firstChild, nw);
7558
#endif /* LIBXML_LEGACY_ENABLED */
7559
171k
      } else {
7560
0
    const xmlChar *nbktext;
7561
7562
    /*
7563
     * the name change is to avoid coalescing of the
7564
     * node with a possible previous text one which
7565
     * would make ent->children a dangling pointer
7566
     */
7567
0
    nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7568
0
          -1);
7569
0
    if (ent->children->type == XML_TEXT_NODE)
7570
0
        ent->children->name = nbktext;
7571
0
    if ((ent->last != ent->children) &&
7572
0
        (ent->last->type == XML_TEXT_NODE))
7573
0
        ent->last->name = nbktext;
7574
0
    xmlAddChildList(ctxt->node, ent->children);
7575
0
      }
7576
7577
      /*
7578
       * This is to avoid a nasty side effect, see
7579
       * characters() in SAX.c
7580
       */
7581
254k
      ctxt->nodemem = 0;
7582
254k
      ctxt->nodelen = 0;
7583
254k
      return;
7584
254k
  }
7585
254k
    }
7586
254k
}
7587
7588
/**
7589
 * xmlParseEntityRef:
7590
 * @ctxt:  an XML parser context
7591
 *
7592
 * DEPRECATED: Internal function, don't use.
7593
 *
7594
 * Parse an entitiy reference. Always consumes '&'.
7595
 *
7596
 * [68] EntityRef ::= '&' Name ';'
7597
 *
7598
 * [ WFC: Entity Declared ]
7599
 * In a document without any DTD, a document with only an internal DTD
7600
 * subset which contains no parameter entity references, or a document
7601
 * with "standalone='yes'", the Name given in the entity reference
7602
 * must match that in an entity declaration, except that well-formed
7603
 * documents need not declare any of the following entities: amp, lt,
7604
 * gt, apos, quot.  The declaration of a parameter entity must precede
7605
 * any reference to it.  Similarly, the declaration of a general entity
7606
 * must precede any reference to it which appears in a default value in an
7607
 * attribute-list declaration. Note that if entities are declared in the
7608
 * external subset or in external parameter entities, a non-validating
7609
 * processor is not obligated to read and process their declarations;
7610
 * for such documents, the rule that an entity must be declared is a
7611
 * well-formedness constraint only if standalone='yes'.
7612
 *
7613
 * [ WFC: Parsed Entity ]
7614
 * An entity reference must not contain the name of an unparsed entity
7615
 *
7616
 * Returns the xmlEntityPtr if found, or NULL otherwise.
7617
 */
7618
xmlEntityPtr
7619
744k
xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7620
744k
    const xmlChar *name;
7621
744k
    xmlEntityPtr ent = NULL;
7622
7623
744k
    GROW;
7624
744k
    if (ctxt->instate == XML_PARSER_EOF)
7625
0
        return(NULL);
7626
7627
744k
    if (RAW != '&')
7628
0
        return(NULL);
7629
744k
    NEXT;
7630
744k
    name = xmlParseName(ctxt);
7631
744k
    if (name == NULL) {
7632
24.7k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7633
24.7k
           "xmlParseEntityRef: no name\n");
7634
24.7k
        return(NULL);
7635
24.7k
    }
7636
719k
    if (RAW != ';') {
7637
11.7k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7638
11.7k
  return(NULL);
7639
11.7k
    }
7640
707k
    NEXT;
7641
7642
    /*
7643
     * Predefined entities override any extra definition
7644
     */
7645
707k
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7646
604k
        ent = xmlGetPredefinedEntity(name);
7647
604k
        if (ent != NULL)
7648
45.5k
            return(ent);
7649
604k
    }
7650
7651
    /*
7652
     * Ask first SAX for entity resolution, otherwise try the
7653
     * entities which may have stored in the parser context.
7654
     */
7655
662k
    if (ctxt->sax != NULL) {
7656
662k
  if (ctxt->sax->getEntity != NULL)
7657
662k
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7658
662k
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7659
662k
      (ctxt->options & XML_PARSE_OLDSAX))
7660
734
      ent = xmlGetPredefinedEntity(name);
7661
662k
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7662
662k
      (ctxt->userData==ctxt)) {
7663
1.69k
      ent = xmlSAX2GetEntity(ctxt, name);
7664
1.69k
  }
7665
662k
    }
7666
662k
    if (ctxt->instate == XML_PARSER_EOF)
7667
0
  return(NULL);
7668
    /*
7669
     * [ WFC: Entity Declared ]
7670
     * In a document without any DTD, a document with only an
7671
     * internal DTD subset which contains no parameter entity
7672
     * references, or a document with "standalone='yes'", the
7673
     * Name given in the entity reference must match that in an
7674
     * entity declaration, except that well-formed documents
7675
     * need not declare any of the following entities: amp, lt,
7676
     * gt, apos, quot.
7677
     * The declaration of a parameter entity must precede any
7678
     * reference to it.
7679
     * Similarly, the declaration of a general entity must
7680
     * precede any reference to it which appears in a default
7681
     * value in an attribute-list declaration. Note that if
7682
     * entities are declared in the external subset or in
7683
     * external parameter entities, a non-validating processor
7684
     * is not obligated to read and process their declarations;
7685
     * for such documents, the rule that an entity must be
7686
     * declared is a well-formedness constraint only if
7687
     * standalone='yes'.
7688
     */
7689
662k
    if (ent == NULL) {
7690
76.3k
  if ((ctxt->standalone == 1) ||
7691
76.3k
      ((ctxt->hasExternalSubset == 0) &&
7692
56.3k
       (ctxt->hasPErefs == 0))) {
7693
44.1k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7694
44.1k
         "Entity '%s' not defined\n", name);
7695
44.1k
  } else {
7696
32.2k
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7697
32.2k
         "Entity '%s' not defined\n", name);
7698
32.2k
      if ((ctxt->inSubset == 0) &&
7699
32.2k
    (ctxt->sax != NULL) &&
7700
32.2k
    (ctxt->sax->reference != NULL)) {
7701
32.2k
    ctxt->sax->reference(ctxt->userData, name);
7702
32.2k
      }
7703
32.2k
  }
7704
76.3k
  ctxt->valid = 0;
7705
76.3k
    }
7706
7707
    /*
7708
     * [ WFC: Parsed Entity ]
7709
     * An entity reference must not contain the name of an
7710
     * unparsed entity
7711
     */
7712
585k
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7713
25
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7714
25
     "Entity reference to unparsed entity %s\n", name);
7715
25
    }
7716
7717
    /*
7718
     * [ WFC: No External Entity References ]
7719
     * Attribute values cannot contain direct or indirect
7720
     * entity references to external entities.
7721
     */
7722
585k
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7723
585k
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7724
1
  xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7725
1
       "Attribute references external entity '%s'\n", name);
7726
1
    }
7727
    /*
7728
     * [ WFC: No < in Attribute Values ]
7729
     * The replacement text of any entity referred to directly or
7730
     * indirectly in an attribute value (other than "&lt;") must
7731
     * not contain a <.
7732
     */
7733
585k
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7734
585k
       (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7735
135k
  if ((ent->flags & XML_ENT_CHECKED_LT) == 0) {
7736
1.47k
            if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7737
111
                ent->flags |= XML_ENT_CONTAINS_LT;
7738
1.47k
            ent->flags |= XML_ENT_CHECKED_LT;
7739
1.47k
        }
7740
135k
        if (ent->flags & XML_ENT_CONTAINS_LT)
7741
138
            xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7742
138
                    "'<' in entity '%s' is not allowed in attributes "
7743
138
                    "values\n", name);
7744
135k
    }
7745
7746
    /*
7747
     * Internal check, no parameter entities here ...
7748
     */
7749
450k
    else {
7750
450k
  switch (ent->etype) {
7751
0
      case XML_INTERNAL_PARAMETER_ENTITY:
7752
0
      case XML_EXTERNAL_PARAMETER_ENTITY:
7753
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7754
0
       "Attempt to reference the parameter entity '%s'\n",
7755
0
            name);
7756
0
      break;
7757
450k
      default:
7758
450k
      break;
7759
450k
  }
7760
450k
    }
7761
7762
    /*
7763
     * [ WFC: No Recursion ]
7764
     * A parsed entity must not contain a recursive reference
7765
     * to itself, either directly or indirectly.
7766
     * Done somewhere else
7767
     */
7768
662k
    return(ent);
7769
662k
}
7770
7771
/**
7772
 * xmlParseStringEntityRef:
7773
 * @ctxt:  an XML parser context
7774
 * @str:  a pointer to an index in the string
7775
 *
7776
 * parse ENTITY references declarations, but this version parses it from
7777
 * a string value.
7778
 *
7779
 * [68] EntityRef ::= '&' Name ';'
7780
 *
7781
 * [ WFC: Entity Declared ]
7782
 * In a document without any DTD, a document with only an internal DTD
7783
 * subset which contains no parameter entity references, or a document
7784
 * with "standalone='yes'", the Name given in the entity reference
7785
 * must match that in an entity declaration, except that well-formed
7786
 * documents need not declare any of the following entities: amp, lt,
7787
 * gt, apos, quot.  The declaration of a parameter entity must precede
7788
 * any reference to it.  Similarly, the declaration of a general entity
7789
 * must precede any reference to it which appears in a default value in an
7790
 * attribute-list declaration. Note that if entities are declared in the
7791
 * external subset or in external parameter entities, a non-validating
7792
 * processor is not obligated to read and process their declarations;
7793
 * for such documents, the rule that an entity must be declared is a
7794
 * well-formedness constraint only if standalone='yes'.
7795
 *
7796
 * [ WFC: Parsed Entity ]
7797
 * An entity reference must not contain the name of an unparsed entity
7798
 *
7799
 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7800
 * is updated to the current location in the string.
7801
 */
7802
static xmlEntityPtr
7803
6.04M
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7804
6.04M
    xmlChar *name;
7805
6.04M
    const xmlChar *ptr;
7806
6.04M
    xmlChar cur;
7807
6.04M
    xmlEntityPtr ent = NULL;
7808
7809
6.04M
    if ((str == NULL) || (*str == NULL))
7810
0
        return(NULL);
7811
6.04M
    ptr = *str;
7812
6.04M
    cur = *ptr;
7813
6.04M
    if (cur != '&')
7814
0
  return(NULL);
7815
7816
6.04M
    ptr++;
7817
6.04M
    name = xmlParseStringName(ctxt, &ptr);
7818
6.04M
    if (name == NULL) {
7819
0
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7820
0
           "xmlParseStringEntityRef: no name\n");
7821
0
  *str = ptr;
7822
0
  return(NULL);
7823
0
    }
7824
6.04M
    if (*ptr != ';') {
7825
0
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7826
0
        xmlFree(name);
7827
0
  *str = ptr;
7828
0
  return(NULL);
7829
0
    }
7830
6.04M
    ptr++;
7831
7832
7833
    /*
7834
     * Predefined entities override any extra definition
7835
     */
7836
6.04M
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7837
5.35M
        ent = xmlGetPredefinedEntity(name);
7838
5.35M
        if (ent != NULL) {
7839
58
            xmlFree(name);
7840
58
            *str = ptr;
7841
58
            return(ent);
7842
58
        }
7843
5.35M
    }
7844
7845
    /*
7846
     * Ask first SAX for entity resolution, otherwise try the
7847
     * entities which may have stored in the parser context.
7848
     */
7849
6.04M
    if (ctxt->sax != NULL) {
7850
6.04M
  if (ctxt->sax->getEntity != NULL)
7851
6.04M
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7852
6.04M
  if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7853
230k
      ent = xmlGetPredefinedEntity(name);
7854
6.04M
  if ((ent == NULL) && (ctxt->userData==ctxt)) {
7855
461k
      ent = xmlSAX2GetEntity(ctxt, name);
7856
461k
  }
7857
6.04M
    }
7858
6.04M
    if (ctxt->instate == XML_PARSER_EOF) {
7859
0
  xmlFree(name);
7860
0
  return(NULL);
7861
0
    }
7862
7863
    /*
7864
     * [ WFC: Entity Declared ]
7865
     * In a document without any DTD, a document with only an
7866
     * internal DTD subset which contains no parameter entity
7867
     * references, or a document with "standalone='yes'", the
7868
     * Name given in the entity reference must match that in an
7869
     * entity declaration, except that well-formed documents
7870
     * need not declare any of the following entities: amp, lt,
7871
     * gt, apos, quot.
7872
     * The declaration of a parameter entity must precede any
7873
     * reference to it.
7874
     * Similarly, the declaration of a general entity must
7875
     * precede any reference to it which appears in a default
7876
     * value in an attribute-list declaration. Note that if
7877
     * entities are declared in the external subset or in
7878
     * external parameter entities, a non-validating processor
7879
     * is not obligated to read and process their declarations;
7880
     * for such documents, the rule that an entity must be
7881
     * declared is a well-formedness constraint only if
7882
     * standalone='yes'.
7883
     */
7884
6.04M
    if (ent == NULL) {
7885
461k
  if ((ctxt->standalone == 1) ||
7886
461k
      ((ctxt->hasExternalSubset == 0) &&
7887
461k
       (ctxt->hasPErefs == 0))) {
7888
461k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7889
461k
         "Entity '%s' not defined\n", name);
7890
461k
  } else {
7891
15
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7892
15
        "Entity '%s' not defined\n",
7893
15
        name);
7894
15
  }
7895
  /* TODO ? check regressions ctxt->valid = 0; */
7896
461k
    }
7897
7898
    /*
7899
     * [ WFC: Parsed Entity ]
7900
     * An entity reference must not contain the name of an
7901
     * unparsed entity
7902
     */
7903
5.58M
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7904
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7905
0
     "Entity reference to unparsed entity %s\n", name);
7906
0
    }
7907
7908
    /*
7909
     * [ WFC: No External Entity References ]
7910
     * Attribute values cannot contain direct or indirect
7911
     * entity references to external entities.
7912
     */
7913
5.58M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7914
5.58M
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7915
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7916
0
   "Attribute references external entity '%s'\n", name);
7917
0
    }
7918
    /*
7919
     * [ WFC: No < in Attribute Values ]
7920
     * The replacement text of any entity referred to directly or
7921
     * indirectly in an attribute value (other than "&lt;") must
7922
     * not contain a <.
7923
     */
7924
5.58M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7925
5.58M
       (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7926
5.57M
  if ((ent->flags & XML_ENT_CHECKED_LT) == 0) {
7927
1.23k
            if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7928
1
                ent->flags |= XML_ENT_CONTAINS_LT;
7929
1.23k
            ent->flags |= XML_ENT_CHECKED_LT;
7930
1.23k
        }
7931
5.57M
        if (ent->flags & XML_ENT_CONTAINS_LT)
7932
42
            xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7933
42
                    "'<' in entity '%s' is not allowed in attributes "
7934
42
                    "values\n", name);
7935
5.57M
    }
7936
7937
    /*
7938
     * Internal check, no parameter entities here ...
7939
     */
7940
2.64k
    else {
7941
2.64k
  switch (ent->etype) {
7942
0
      case XML_INTERNAL_PARAMETER_ENTITY:
7943
0
      case XML_EXTERNAL_PARAMETER_ENTITY:
7944
0
    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7945
0
       "Attempt to reference the parameter entity '%s'\n",
7946
0
          name);
7947
0
      break;
7948
2.64k
      default:
7949
2.64k
      break;
7950
2.64k
  }
7951
2.64k
    }
7952
7953
    /*
7954
     * [ WFC: No Recursion ]
7955
     * A parsed entity must not contain a recursive reference
7956
     * to itself, either directly or indirectly.
7957
     * Done somewhere else
7958
     */
7959
7960
6.04M
    xmlFree(name);
7961
6.04M
    *str = ptr;
7962
6.04M
    return(ent);
7963
6.04M
}
7964
7965
/**
7966
 * xmlParsePEReference:
7967
 * @ctxt:  an XML parser context
7968
 *
7969
 * DEPRECATED: Internal function, don't use.
7970
 *
7971
 * Parse a parameter entity reference. Always consumes '%'.
7972
 *
7973
 * The entity content is handled directly by pushing it's content as
7974
 * a new input stream.
7975
 *
7976
 * [69] PEReference ::= '%' Name ';'
7977
 *
7978
 * [ WFC: No Recursion ]
7979
 * A parsed entity must not contain a recursive
7980
 * reference to itself, either directly or indirectly.
7981
 *
7982
 * [ WFC: Entity Declared ]
7983
 * In a document without any DTD, a document with only an internal DTD
7984
 * subset which contains no parameter entity references, or a document
7985
 * with "standalone='yes'", ...  ... The declaration of a parameter
7986
 * entity must precede any reference to it...
7987
 *
7988
 * [ VC: Entity Declared ]
7989
 * In a document with an external subset or external parameter entities
7990
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7991
 * must precede any reference to it...
7992
 *
7993
 * [ WFC: In DTD ]
7994
 * Parameter-entity references may only appear in the DTD.
7995
 * NOTE: misleading but this is handled.
7996
 */
7997
void
7998
xmlParsePEReference(xmlParserCtxtPtr ctxt)
7999
2.41M
{
8000
2.41M
    const xmlChar *name;
8001
2.41M
    xmlEntityPtr entity = NULL;
8002
2.41M
    xmlParserInputPtr input;
8003
8004
2.41M
    if (RAW != '%')
8005
0
        return;
8006
2.41M
    NEXT;
8007
2.41M
    name = xmlParseName(ctxt);
8008
2.41M
    if (name == NULL) {
8009
4.72k
  xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
8010
4.72k
  return;
8011
4.72k
    }
8012
2.41M
    if (xmlParserDebugEntities)
8013
0
  xmlGenericError(xmlGenericErrorContext,
8014
0
    "PEReference: %s\n", name);
8015
2.41M
    if (RAW != ';') {
8016
689
  xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
8017
689
        return;
8018
689
    }
8019
8020
2.41M
    NEXT;
8021
8022
    /*
8023
     * Request the entity from SAX
8024
     */
8025
2.41M
    if ((ctxt->sax != NULL) &&
8026
2.41M
  (ctxt->sax->getParameterEntity != NULL))
8027
2.41M
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8028
2.41M
    if (ctxt->instate == XML_PARSER_EOF)
8029
0
  return;
8030
2.41M
    if (entity == NULL) {
8031
  /*
8032
   * [ WFC: Entity Declared ]
8033
   * In a document without any DTD, a document with only an
8034
   * internal DTD subset which contains no parameter entity
8035
   * references, or a document with "standalone='yes'", ...
8036
   * ... The declaration of a parameter entity must precede
8037
   * any reference to it...
8038
   */
8039
1.94k
  if ((ctxt->standalone == 1) ||
8040
1.94k
      ((ctxt->hasExternalSubset == 0) &&
8041
1.94k
       (ctxt->hasPErefs == 0))) {
8042
1.09k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8043
1.09k
            "PEReference: %%%s; not found\n",
8044
1.09k
            name);
8045
1.09k
  } else {
8046
      /*
8047
       * [ VC: Entity Declared ]
8048
       * In a document with an external subset or external
8049
       * parameter entities with "standalone='no'", ...
8050
       * ... The declaration of a parameter entity must
8051
       * precede any reference to it...
8052
       */
8053
857
            if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
8054
89
                xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
8055
89
                                 "PEReference: %%%s; not found\n",
8056
89
                                 name, NULL);
8057
89
            } else
8058
768
                xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8059
768
                              "PEReference: %%%s; not found\n",
8060
768
                              name, NULL);
8061
857
            ctxt->valid = 0;
8062
857
  }
8063
2.40M
    } else {
8064
  /*
8065
   * Internal checking in case the entity quest barfed
8066
   */
8067
2.40M
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8068
2.40M
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8069
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8070
0
      "Internal: %%%s; is not a parameter entity\n",
8071
0
        name, NULL);
8072
2.40M
  } else {
8073
2.40M
            xmlChar start[4];
8074
2.40M
            xmlCharEncoding enc;
8075
2.40M
            unsigned long parentConsumed;
8076
2.40M
            xmlEntityPtr oldEnt;
8077
8078
2.40M
      if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8079
2.40M
          ((ctxt->options & XML_PARSE_NOENT) == 0) &&
8080
2.40M
    ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
8081
2.40M
    ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
8082
2.40M
    ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
8083
2.40M
    (ctxt->replaceEntities == 0) &&
8084
2.40M
    (ctxt->validate == 0))
8085
0
    return;
8086
8087
2.40M
            if (entity->flags & XML_ENT_EXPANDING) {
8088
38
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
8089
38
                xmlHaltParser(ctxt);
8090
38
                return;
8091
38
            }
8092
8093
            /* Must be computed from old input before pushing new input. */
8094
2.40M
            parentConsumed = ctxt->input->parentConsumed;
8095
2.40M
            oldEnt = ctxt->input->entity;
8096
2.40M
            if ((oldEnt == NULL) ||
8097
2.40M
                ((oldEnt->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8098
2.38M
                 ((oldEnt->flags & XML_ENT_PARSED) == 0))) {
8099
28.0k
                xmlSaturatedAdd(&parentConsumed, ctxt->input->consumed);
8100
28.0k
                xmlSaturatedAddSizeT(&parentConsumed,
8101
28.0k
                                     ctxt->input->cur - ctxt->input->base);
8102
28.0k
            }
8103
8104
2.40M
      input = xmlNewEntityInputStream(ctxt, entity);
8105
2.40M
      if (xmlPushInput(ctxt, input) < 0) {
8106
8
                xmlFreeInputStream(input);
8107
8
    return;
8108
8
            }
8109
8110
2.40M
            entity->flags |= XML_ENT_EXPANDING;
8111
8112
2.40M
            input->parentConsumed = parentConsumed;
8113
8114
2.40M
      if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
8115
                /*
8116
                 * Get the 4 first bytes and decode the charset
8117
                 * if enc != XML_CHAR_ENCODING_NONE
8118
                 * plug some encoding conversion routines.
8119
                 * Note that, since we may have some non-UTF8
8120
                 * encoding (like UTF16, bug 135229), the 'length'
8121
                 * is not known, but we can calculate based upon
8122
                 * the amount of data in the buffer.
8123
                 */
8124
15
                GROW
8125
15
                if (ctxt->instate == XML_PARSER_EOF)
8126
0
                    return;
8127
15
                if ((ctxt->input->end - ctxt->input->cur)>=4) {
8128
15
                    start[0] = RAW;
8129
15
                    start[1] = NXT(1);
8130
15
                    start[2] = NXT(2);
8131
15
                    start[3] = NXT(3);
8132
15
                    enc = xmlDetectCharEncoding(start, 4);
8133
15
                    if (enc != XML_CHAR_ENCODING_NONE) {
8134
0
                        xmlSwitchEncoding(ctxt, enc);
8135
0
                    }
8136
15
                }
8137
8138
15
                if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8139
15
                    (IS_BLANK_CH(NXT(5)))) {
8140
0
                    xmlParseTextDecl(ctxt);
8141
0
                }
8142
15
            }
8143
2.40M
  }
8144
2.40M
    }
8145
2.41M
    ctxt->hasPErefs = 1;
8146
2.41M
}
8147
8148
/**
8149
 * xmlLoadEntityContent:
8150
 * @ctxt:  an XML parser context
8151
 * @entity: an unloaded system entity
8152
 *
8153
 * Load the original content of the given system entity from the
8154
 * ExternalID/SystemID given. This is to be used for Included in Literal
8155
 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8156
 *
8157
 * Returns 0 in case of success and -1 in case of failure
8158
 */
8159
static int
8160
360
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8161
360
    xmlParserInputPtr input;
8162
360
    xmlBufferPtr buf;
8163
360
    int l, c;
8164
360
    int count = 0;
8165
8166
360
    if ((ctxt == NULL) || (entity == NULL) ||
8167
360
        ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8168
360
   (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8169
360
  (entity->content != NULL)) {
8170
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8171
0
              "xmlLoadEntityContent parameter error");
8172
0
        return(-1);
8173
0
    }
8174
8175
360
    if (xmlParserDebugEntities)
8176
0
  xmlGenericError(xmlGenericErrorContext,
8177
0
    "Reading %s entity content input\n", entity->name);
8178
8179
360
    buf = xmlBufferCreate();
8180
360
    if (buf == NULL) {
8181
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8182
0
              "xmlLoadEntityContent parameter error");
8183
0
        return(-1);
8184
0
    }
8185
360
    xmlBufferSetAllocationScheme(buf, XML_BUFFER_ALLOC_DOUBLEIT);
8186
8187
360
    input = xmlNewEntityInputStream(ctxt, entity);
8188
360
    if (input == NULL) {
8189
51
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8190
51
              "xmlLoadEntityContent input error");
8191
51
  xmlBufferFree(buf);
8192
51
        return(-1);
8193
51
    }
8194
8195
    /*
8196
     * Push the entity as the current input, read char by char
8197
     * saving to the buffer until the end of the entity or an error
8198
     */
8199
309
    if (xmlPushInput(ctxt, input) < 0) {
8200
0
        xmlBufferFree(buf);
8201
0
  xmlFreeInputStream(input);
8202
0
  return(-1);
8203
0
    }
8204
8205
309
    GROW;
8206
309
    c = CUR_CHAR(l);
8207
96.0k
    while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8208
96.0k
           (IS_CHAR(c))) {
8209
95.7k
        xmlBufferAdd(buf, ctxt->input->cur, l);
8210
95.7k
  if (count++ > XML_PARSER_CHUNK_SIZE) {
8211
779
      count = 0;
8212
779
      GROW;
8213
779
            if (ctxt->instate == XML_PARSER_EOF) {
8214
0
                xmlBufferFree(buf);
8215
0
                return(-1);
8216
0
            }
8217
779
  }
8218
95.7k
  NEXTL(l);
8219
95.7k
  c = CUR_CHAR(l);
8220
95.7k
  if (c == 0) {
8221
256
      count = 0;
8222
256
      GROW;
8223
256
            if (ctxt->instate == XML_PARSER_EOF) {
8224
0
                xmlBufferFree(buf);
8225
0
                return(-1);
8226
0
            }
8227
256
      c = CUR_CHAR(l);
8228
256
  }
8229
95.7k
    }
8230
8231
309
    if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8232
210
        xmlSaturatedAdd(&ctxt->sizeentities, ctxt->input->consumed);
8233
210
        xmlPopInput(ctxt);
8234
210
    } else if (!IS_CHAR(c)) {
8235
99
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8236
99
                          "xmlLoadEntityContent: invalid char value %d\n",
8237
99
                    c);
8238
99
  xmlBufferFree(buf);
8239
99
  return(-1);
8240
99
    }
8241
210
    entity->content = buf->content;
8242
210
    entity->length = buf->use;
8243
210
    buf->content = NULL;
8244
210
    xmlBufferFree(buf);
8245
8246
210
    return(0);
8247
309
}
8248
8249
/**
8250
 * xmlParseStringPEReference:
8251
 * @ctxt:  an XML parser context
8252
 * @str:  a pointer to an index in the string
8253
 *
8254
 * parse PEReference declarations
8255
 *
8256
 * [69] PEReference ::= '%' Name ';'
8257
 *
8258
 * [ WFC: No Recursion ]
8259
 * A parsed entity must not contain a recursive
8260
 * reference to itself, either directly or indirectly.
8261
 *
8262
 * [ WFC: Entity Declared ]
8263
 * In a document without any DTD, a document with only an internal DTD
8264
 * subset which contains no parameter entity references, or a document
8265
 * with "standalone='yes'", ...  ... The declaration of a parameter
8266
 * entity must precede any reference to it...
8267
 *
8268
 * [ VC: Entity Declared ]
8269
 * In a document with an external subset or external parameter entities
8270
 * with "standalone='no'", ...  ... The declaration of a parameter entity
8271
 * must precede any reference to it...
8272
 *
8273
 * [ WFC: In DTD ]
8274
 * Parameter-entity references may only appear in the DTD.
8275
 * NOTE: misleading but this is handled.
8276
 *
8277
 * Returns the string of the entity content.
8278
 *         str is updated to the current value of the index
8279
 */
8280
static xmlEntityPtr
8281
22.8k
xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8282
22.8k
    const xmlChar *ptr;
8283
22.8k
    xmlChar cur;
8284
22.8k
    xmlChar *name;
8285
22.8k
    xmlEntityPtr entity = NULL;
8286
8287
22.8k
    if ((str == NULL) || (*str == NULL)) return(NULL);
8288
22.8k
    ptr = *str;
8289
22.8k
    cur = *ptr;
8290
22.8k
    if (cur != '%')
8291
0
        return(NULL);
8292
22.8k
    ptr++;
8293
22.8k
    name = xmlParseStringName(ctxt, &ptr);
8294
22.8k
    if (name == NULL) {
8295
136
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8296
136
           "xmlParseStringPEReference: no name\n");
8297
136
  *str = ptr;
8298
136
  return(NULL);
8299
136
    }
8300
22.6k
    cur = *ptr;
8301
22.6k
    if (cur != ';') {
8302
29
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8303
29
  xmlFree(name);
8304
29
  *str = ptr;
8305
29
  return(NULL);
8306
29
    }
8307
22.6k
    ptr++;
8308
8309
    /*
8310
     * Request the entity from SAX
8311
     */
8312
22.6k
    if ((ctxt->sax != NULL) &&
8313
22.6k
  (ctxt->sax->getParameterEntity != NULL))
8314
22.6k
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8315
22.6k
    if (ctxt->instate == XML_PARSER_EOF) {
8316
0
  xmlFree(name);
8317
0
  *str = ptr;
8318
0
  return(NULL);
8319
0
    }
8320
22.6k
    if (entity == NULL) {
8321
  /*
8322
   * [ WFC: Entity Declared ]
8323
   * In a document without any DTD, a document with only an
8324
   * internal DTD subset which contains no parameter entity
8325
   * references, or a document with "standalone='yes'", ...
8326
   * ... The declaration of a parameter entity must precede
8327
   * any reference to it...
8328
   */
8329
3.73k
  if ((ctxt->standalone == 1) ||
8330
3.73k
      ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8331
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8332
0
     "PEReference: %%%s; not found\n", name);
8333
3.73k
  } else {
8334
      /*
8335
       * [ VC: Entity Declared ]
8336
       * In a document with an external subset or external
8337
       * parameter entities with "standalone='no'", ...
8338
       * ... The declaration of a parameter entity must
8339
       * precede any reference to it...
8340
       */
8341
3.73k
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8342
3.73k
        "PEReference: %%%s; not found\n",
8343
3.73k
        name, NULL);
8344
3.73k
      ctxt->valid = 0;
8345
3.73k
  }
8346
18.9k
    } else {
8347
  /*
8348
   * Internal checking in case the entity quest barfed
8349
   */
8350
18.9k
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8351
18.9k
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8352
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8353
0
        "%%%s; is not a parameter entity\n",
8354
0
        name, NULL);
8355
0
  }
8356
18.9k
    }
8357
22.6k
    ctxt->hasPErefs = 1;
8358
22.6k
    xmlFree(name);
8359
22.6k
    *str = ptr;
8360
22.6k
    return(entity);
8361
22.6k
}
8362
8363
/**
8364
 * xmlParseDocTypeDecl:
8365
 * @ctxt:  an XML parser context
8366
 *
8367
 * DEPRECATED: Internal function, don't use.
8368
 *
8369
 * parse a DOCTYPE declaration
8370
 *
8371
 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8372
 *                      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8373
 *
8374
 * [ VC: Root Element Type ]
8375
 * The Name in the document type declaration must match the element
8376
 * type of the root element.
8377
 */
8378
8379
void
8380
53.3k
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8381
53.3k
    const xmlChar *name = NULL;
8382
53.3k
    xmlChar *ExternalID = NULL;
8383
53.3k
    xmlChar *URI = NULL;
8384
8385
    /*
8386
     * We know that '<!DOCTYPE' has been detected.
8387
     */
8388
53.3k
    SKIP(9);
8389
8390
53.3k
    SKIP_BLANKS;
8391
8392
    /*
8393
     * Parse the DOCTYPE name.
8394
     */
8395
53.3k
    name = xmlParseName(ctxt);
8396
53.3k
    if (name == NULL) {
8397
496
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8398
496
           "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8399
496
    }
8400
53.3k
    ctxt->intSubName = name;
8401
8402
53.3k
    SKIP_BLANKS;
8403
8404
    /*
8405
     * Check for SystemID and ExternalID
8406
     */
8407
53.3k
    URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8408
8409
53.3k
    if ((URI != NULL) || (ExternalID != NULL)) {
8410
16.9k
        ctxt->hasExternalSubset = 1;
8411
16.9k
    }
8412
53.3k
    ctxt->extSubURI = URI;
8413
53.3k
    ctxt->extSubSystem = ExternalID;
8414
8415
53.3k
    SKIP_BLANKS;
8416
8417
    /*
8418
     * Create and update the internal subset.
8419
     */
8420
53.3k
    if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8421
53.3k
  (!ctxt->disableSAX))
8422
48.0k
  ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8423
53.3k
    if (ctxt->instate == XML_PARSER_EOF)
8424
0
  return;
8425
8426
    /*
8427
     * Is there any internal subset declarations ?
8428
     * they are handled separately in xmlParseInternalSubset()
8429
     */
8430
53.3k
    if (RAW == '[')
8431
31.0k
  return;
8432
8433
    /*
8434
     * We should be at the end of the DOCTYPE declaration.
8435
     */
8436
22.2k
    if (RAW != '>') {
8437
8.04k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8438
8.04k
    }
8439
22.2k
    NEXT;
8440
22.2k
}
8441
8442
/**
8443
 * xmlParseInternalSubset:
8444
 * @ctxt:  an XML parser context
8445
 *
8446
 * parse the internal subset declaration
8447
 *
8448
 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8449
 */
8450
8451
static void
8452
32.2k
xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8453
    /*
8454
     * Is there any DTD definition ?
8455
     */
8456
32.2k
    if (RAW == '[') {
8457
32.2k
        int baseInputNr = ctxt->inputNr;
8458
32.2k
        ctxt->instate = XML_PARSER_DTD;
8459
32.2k
        NEXT;
8460
  /*
8461
   * Parse the succession of Markup declarations and
8462
   * PEReferences.
8463
   * Subsequence (markupdecl | PEReference | S)*
8464
   */
8465
32.2k
  SKIP_BLANKS;
8466
2.55M
  while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) &&
8467
2.55M
               (ctxt->instate != XML_PARSER_EOF)) {
8468
8469
            /*
8470
             * Conditional sections are allowed from external entities included
8471
             * by PE References in the internal subset.
8472
             */
8473
2.54M
            if ((ctxt->inputNr > 1) && (ctxt->input->filename != NULL) &&
8474
2.54M
                (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8475
0
                xmlParseConditionalSections(ctxt);
8476
2.54M
            } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
8477
2.51M
          xmlParseMarkupDecl(ctxt);
8478
2.51M
            } else if (RAW == '%') {
8479
7.75k
          xmlParsePEReference(ctxt);
8480
14.7k
            } else {
8481
14.7k
    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8482
14.7k
                        "xmlParseInternalSubset: error detected in"
8483
14.7k
                        " Markup declaration\n");
8484
14.7k
                xmlHaltParser(ctxt);
8485
14.7k
                return;
8486
14.7k
            }
8487
2.52M
      SKIP_BLANKS;
8488
2.52M
  }
8489
17.5k
  if (RAW == ']') {
8490
13.9k
      NEXT;
8491
13.9k
      SKIP_BLANKS;
8492
13.9k
  }
8493
17.5k
    }
8494
8495
    /*
8496
     * We should be at the end of the DOCTYPE declaration.
8497
     */
8498
17.5k
    if (RAW != '>') {
8499
3.96k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8500
3.96k
  return;
8501
3.96k
    }
8502
13.6k
    NEXT;
8503
13.6k
}
8504
8505
#ifdef LIBXML_SAX1_ENABLED
8506
/**
8507
 * xmlParseAttribute:
8508
 * @ctxt:  an XML parser context
8509
 * @value:  a xmlChar ** used to store the value of the attribute
8510
 *
8511
 * DEPRECATED: Internal function, don't use.
8512
 *
8513
 * parse an attribute
8514
 *
8515
 * [41] Attribute ::= Name Eq AttValue
8516
 *
8517
 * [ WFC: No External Entity References ]
8518
 * Attribute values cannot contain direct or indirect entity references
8519
 * to external entities.
8520
 *
8521
 * [ WFC: No < in Attribute Values ]
8522
 * The replacement text of any entity referred to directly or indirectly in
8523
 * an attribute value (other than "&lt;") must not contain a <.
8524
 *
8525
 * [ VC: Attribute Value Type ]
8526
 * The attribute must have been declared; the value must be of the type
8527
 * declared for it.
8528
 *
8529
 * [25] Eq ::= S? '=' S?
8530
 *
8531
 * With namespace:
8532
 *
8533
 * [NS 11] Attribute ::= QName Eq AttValue
8534
 *
8535
 * Also the case QName == xmlns:??? is handled independently as a namespace
8536
 * definition.
8537
 *
8538
 * Returns the attribute name, and the value in *value.
8539
 */
8540
8541
const xmlChar *
8542
280k
xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8543
280k
    const xmlChar *name;
8544
280k
    xmlChar *val;
8545
8546
280k
    *value = NULL;
8547
280k
    GROW;
8548
280k
    name = xmlParseName(ctxt);
8549
280k
    if (name == NULL) {
8550
80.2k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8551
80.2k
                 "error parsing attribute name\n");
8552
80.2k
        return(NULL);
8553
80.2k
    }
8554
8555
    /*
8556
     * read the value
8557
     */
8558
199k
    SKIP_BLANKS;
8559
199k
    if (RAW == '=') {
8560
173k
        NEXT;
8561
173k
  SKIP_BLANKS;
8562
173k
  val = xmlParseAttValue(ctxt);
8563
173k
  ctxt->instate = XML_PARSER_CONTENT;
8564
173k
    } else {
8565
26.2k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8566
26.2k
         "Specification mandates value for attribute %s\n", name);
8567
26.2k
  return(name);
8568
26.2k
    }
8569
8570
    /*
8571
     * Check that xml:lang conforms to the specification
8572
     * No more registered as an error, just generate a warning now
8573
     * since this was deprecated in XML second edition
8574
     */
8575
173k
    if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8576
252
  if (!xmlCheckLanguageID(val)) {
8577
214
      xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8578
214
              "Malformed value for xml:lang : %s\n",
8579
214
        val, NULL);
8580
214
  }
8581
252
    }
8582
8583
    /*
8584
     * Check that xml:space conforms to the specification
8585
     */
8586
173k
    if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8587
292
  if (xmlStrEqual(val, BAD_CAST "default"))
8588
0
      *(ctxt->space) = 0;
8589
292
  else if (xmlStrEqual(val, BAD_CAST "preserve"))
8590
1
      *(ctxt->space) = 1;
8591
291
  else {
8592
291
    xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8593
291
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8594
291
                                 val, NULL);
8595
291
  }
8596
292
    }
8597
8598
173k
    *value = val;
8599
173k
    return(name);
8600
199k
}
8601
8602
/**
8603
 * xmlParseStartTag:
8604
 * @ctxt:  an XML parser context
8605
 *
8606
 * DEPRECATED: Internal function, don't use.
8607
 *
8608
 * Parse a start tag. Always consumes '<'.
8609
 *
8610
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8611
 *
8612
 * [ WFC: Unique Att Spec ]
8613
 * No attribute name may appear more than once in the same start-tag or
8614
 * empty-element tag.
8615
 *
8616
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8617
 *
8618
 * [ WFC: Unique Att Spec ]
8619
 * No attribute name may appear more than once in the same start-tag or
8620
 * empty-element tag.
8621
 *
8622
 * With namespace:
8623
 *
8624
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8625
 *
8626
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8627
 *
8628
 * Returns the element name parsed
8629
 */
8630
8631
const xmlChar *
8632
348k
xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8633
348k
    const xmlChar *name;
8634
348k
    const xmlChar *attname;
8635
348k
    xmlChar *attvalue;
8636
348k
    const xmlChar **atts = ctxt->atts;
8637
348k
    int nbatts = 0;
8638
348k
    int maxatts = ctxt->maxatts;
8639
348k
    int i;
8640
8641
348k
    if (RAW != '<') return(NULL);
8642
348k
    NEXT1;
8643
8644
348k
    name = xmlParseName(ctxt);
8645
348k
    if (name == NULL) {
8646
17.7k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8647
17.7k
       "xmlParseStartTag: invalid element name\n");
8648
17.7k
        return(NULL);
8649
17.7k
    }
8650
8651
    /*
8652
     * Now parse the attributes, it ends up with the ending
8653
     *
8654
     * (S Attribute)* S?
8655
     */
8656
330k
    SKIP_BLANKS;
8657
330k
    GROW;
8658
8659
432k
    while (((RAW != '>') &&
8660
432k
     ((RAW != '/') || (NXT(1) != '>')) &&
8661
432k
     (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8662
280k
  attname = xmlParseAttribute(ctxt, &attvalue);
8663
280k
        if (attname == NULL) {
8664
80.2k
      xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8665
80.2k
         "xmlParseStartTag: problem parsing attributes\n");
8666
80.2k
      break;
8667
80.2k
  }
8668
199k
        if (attvalue != NULL) {
8669
      /*
8670
       * [ WFC: Unique Att Spec ]
8671
       * No attribute name may appear more than once in the same
8672
       * start-tag or empty-element tag.
8673
       */
8674
230k
      for (i = 0; i < nbatts;i += 2) {
8675
60.0k
          if (xmlStrEqual(atts[i], attname)) {
8676
982
        xmlErrAttributeDup(ctxt, NULL, attname);
8677
982
        xmlFree(attvalue);
8678
982
        goto failed;
8679
982
    }
8680
60.0k
      }
8681
      /*
8682
       * Add the pair to atts
8683
       */
8684
170k
      if (atts == NULL) {
8685
17.6k
          maxatts = 22; /* allow for 10 attrs by default */
8686
17.6k
          atts = (const xmlChar **)
8687
17.6k
           xmlMalloc(maxatts * sizeof(xmlChar *));
8688
17.6k
    if (atts == NULL) {
8689
0
        xmlErrMemory(ctxt, NULL);
8690
0
        if (attvalue != NULL)
8691
0
      xmlFree(attvalue);
8692
0
        goto failed;
8693
0
    }
8694
17.6k
    ctxt->atts = atts;
8695
17.6k
    ctxt->maxatts = maxatts;
8696
152k
      } else if (nbatts + 4 > maxatts) {
8697
0
          const xmlChar **n;
8698
8699
0
          maxatts *= 2;
8700
0
          n = (const xmlChar **) xmlRealloc((void *) atts,
8701
0
               maxatts * sizeof(const xmlChar *));
8702
0
    if (n == NULL) {
8703
0
        xmlErrMemory(ctxt, NULL);
8704
0
        if (attvalue != NULL)
8705
0
      xmlFree(attvalue);
8706
0
        goto failed;
8707
0
    }
8708
0
    atts = n;
8709
0
    ctxt->atts = atts;
8710
0
    ctxt->maxatts = maxatts;
8711
0
      }
8712
170k
      atts[nbatts++] = attname;
8713
170k
      atts[nbatts++] = attvalue;
8714
170k
      atts[nbatts] = NULL;
8715
170k
      atts[nbatts + 1] = NULL;
8716
170k
  } else {
8717
28.2k
      if (attvalue != NULL)
8718
0
    xmlFree(attvalue);
8719
28.2k
  }
8720
8721
199k
failed:
8722
8723
199k
  GROW
8724
199k
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8725
97.5k
      break;
8726
102k
  if (SKIP_BLANKS == 0) {
8727
49.5k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8728
49.5k
         "attributes construct error\n");
8729
49.5k
  }
8730
102k
  SHRINK;
8731
102k
        GROW;
8732
102k
    }
8733
8734
    /*
8735
     * SAX: Start of Element !
8736
     */
8737
330k
    if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8738
330k
  (!ctxt->disableSAX)) {
8739
290k
  if (nbatts > 0)
8740
109k
      ctxt->sax->startElement(ctxt->userData, name, atts);
8741
181k
  else
8742
181k
      ctxt->sax->startElement(ctxt->userData, name, NULL);
8743
290k
    }
8744
8745
330k
    if (atts != NULL) {
8746
        /* Free only the content strings */
8747
426k
        for (i = 1;i < nbatts;i+=2)
8748
170k
      if (atts[i] != NULL)
8749
170k
         xmlFree((xmlChar *) atts[i]);
8750
255k
    }
8751
330k
    return(name);
8752
330k
}
8753
8754
/**
8755
 * xmlParseEndTag1:
8756
 * @ctxt:  an XML parser context
8757
 * @line:  line of the start tag
8758
 * @nsNr:  number of namespaces on the start tag
8759
 *
8760
 * Parse an end tag. Always consumes '</'.
8761
 *
8762
 * [42] ETag ::= '</' Name S? '>'
8763
 *
8764
 * With namespace
8765
 *
8766
 * [NS 9] ETag ::= '</' QName S? '>'
8767
 */
8768
8769
static void
8770
135k
xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8771
135k
    const xmlChar *name;
8772
8773
135k
    GROW;
8774
135k
    if ((RAW != '<') || (NXT(1) != '/')) {
8775
0
  xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8776
0
           "xmlParseEndTag: '</' not found\n");
8777
0
  return;
8778
0
    }
8779
135k
    SKIP(2);
8780
8781
135k
    name = xmlParseNameAndCompare(ctxt,ctxt->name);
8782
8783
    /*
8784
     * We should definitely be at the ending "S? '>'" part
8785
     */
8786
135k
    GROW;
8787
135k
    SKIP_BLANKS;
8788
135k
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8789
14.4k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8790
14.4k
    } else
8791
121k
  NEXT1;
8792
8793
    /*
8794
     * [ WFC: Element Type Match ]
8795
     * The Name in an element's end-tag must match the element type in the
8796
     * start-tag.
8797
     *
8798
     */
8799
135k
    if (name != (xmlChar*)1) {
8800
31.2k
        if (name == NULL) name = BAD_CAST "unparsable";
8801
31.2k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8802
31.2k
         "Opening and ending tag mismatch: %s line %d and %s\n",
8803
31.2k
                    ctxt->name, line, name);
8804
31.2k
    }
8805
8806
    /*
8807
     * SAX: End of Tag
8808
     */
8809
135k
    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8810
135k
  (!ctxt->disableSAX))
8811
112k
        ctxt->sax->endElement(ctxt->userData, ctxt->name);
8812
8813
135k
    namePop(ctxt);
8814
135k
    spacePop(ctxt);
8815
135k
    return;
8816
135k
}
8817
8818
/**
8819
 * xmlParseEndTag:
8820
 * @ctxt:  an XML parser context
8821
 *
8822
 * DEPRECATED: Internal function, don't use.
8823
 *
8824
 * parse an end of tag
8825
 *
8826
 * [42] ETag ::= '</' Name S? '>'
8827
 *
8828
 * With namespace
8829
 *
8830
 * [NS 9] ETag ::= '</' QName S? '>'
8831
 */
8832
8833
void
8834
0
xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8835
0
    xmlParseEndTag1(ctxt, 0);
8836
0
}
8837
#endif /* LIBXML_SAX1_ENABLED */
8838
8839
/************************************************************************
8840
 *                  *
8841
 *          SAX 2 specific operations       *
8842
 *                  *
8843
 ************************************************************************/
8844
8845
/*
8846
 * xmlGetNamespace:
8847
 * @ctxt:  an XML parser context
8848
 * @prefix:  the prefix to lookup
8849
 *
8850
 * Lookup the namespace name for the @prefix (which ca be NULL)
8851
 * The prefix must come from the @ctxt->dict dictionary
8852
 *
8853
 * Returns the namespace name or NULL if not bound
8854
 */
8855
static const xmlChar *
8856
856k
xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8857
856k
    int i;
8858
8859
856k
    if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8860
1.02M
    for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8861
310k
        if (ctxt->nsTab[i] == prefix) {
8862
130k
      if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8863
1.28k
          return(NULL);
8864
128k
      return(ctxt->nsTab[i + 1]);
8865
130k
  }
8866
714k
    return(NULL);
8867
844k
}
8868
8869
/**
8870
 * xmlParseQName:
8871
 * @ctxt:  an XML parser context
8872
 * @prefix:  pointer to store the prefix part
8873
 *
8874
 * parse an XML Namespace QName
8875
 *
8876
 * [6]  QName  ::= (Prefix ':')? LocalPart
8877
 * [7]  Prefix  ::= NCName
8878
 * [8]  LocalPart  ::= NCName
8879
 *
8880
 * Returns the Name parsed or NULL
8881
 */
8882
8883
static const xmlChar *
8884
1.65M
xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8885
1.65M
    const xmlChar *l, *p;
8886
8887
1.65M
    GROW;
8888
8889
1.65M
    l = xmlParseNCName(ctxt);
8890
1.65M
    if (l == NULL) {
8891
115k
        if (CUR == ':') {
8892
4.74k
      l = xmlParseName(ctxt);
8893
4.74k
      if (l != NULL) {
8894
4.74k
          xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8895
4.74k
             "Failed to parse QName '%s'\n", l, NULL, NULL);
8896
4.74k
    *prefix = NULL;
8897
4.74k
    return(l);
8898
4.74k
      }
8899
4.74k
  }
8900
110k
        return(NULL);
8901
115k
    }
8902
1.54M
    if (CUR == ':') {
8903
199k
        NEXT;
8904
199k
  p = l;
8905
199k
  l = xmlParseNCName(ctxt);
8906
199k
  if (l == NULL) {
8907
10.9k
      xmlChar *tmp;
8908
8909
10.9k
            if (ctxt->instate == XML_PARSER_EOF)
8910
0
                return(NULL);
8911
10.9k
            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8912
10.9k
               "Failed to parse QName '%s:'\n", p, NULL, NULL);
8913
10.9k
      l = xmlParseNmtoken(ctxt);
8914
10.9k
      if (l == NULL) {
8915
7.42k
                if (ctxt->instate == XML_PARSER_EOF)
8916
0
                    return(NULL);
8917
7.42k
    tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8918
7.42k
            } else {
8919
3.56k
    tmp = xmlBuildQName(l, p, NULL, 0);
8920
3.56k
    xmlFree((char *)l);
8921
3.56k
      }
8922
10.9k
      p = xmlDictLookup(ctxt->dict, tmp, -1);
8923
10.9k
      if (tmp != NULL) xmlFree(tmp);
8924
10.9k
      *prefix = NULL;
8925
10.9k
      return(p);
8926
10.9k
  }
8927
188k
  if (CUR == ':') {
8928
6.76k
      xmlChar *tmp;
8929
8930
6.76k
            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8931
6.76k
               "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8932
6.76k
      NEXT;
8933
6.76k
      tmp = (xmlChar *) xmlParseName(ctxt);
8934
6.76k
      if (tmp != NULL) {
8935
3.62k
          tmp = xmlBuildQName(tmp, l, NULL, 0);
8936
3.62k
    l = xmlDictLookup(ctxt->dict, tmp, -1);
8937
3.62k
    if (tmp != NULL) xmlFree(tmp);
8938
3.62k
    *prefix = p;
8939
3.62k
    return(l);
8940
3.62k
      }
8941
3.14k
            if (ctxt->instate == XML_PARSER_EOF)
8942
0
                return(NULL);
8943
3.14k
      tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8944
3.14k
      l = xmlDictLookup(ctxt->dict, tmp, -1);
8945
3.14k
      if (tmp != NULL) xmlFree(tmp);
8946
3.14k
      *prefix = p;
8947
3.14k
      return(l);
8948
3.14k
  }
8949
182k
  *prefix = p;
8950
182k
    } else
8951
1.34M
        *prefix = NULL;
8952
1.52M
    return(l);
8953
1.54M
}
8954
8955
/**
8956
 * xmlParseQNameAndCompare:
8957
 * @ctxt:  an XML parser context
8958
 * @name:  the localname
8959
 * @prefix:  the prefix, if any.
8960
 *
8961
 * parse an XML name and compares for match
8962
 * (specialized for endtag parsing)
8963
 *
8964
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8965
 * and the name for mismatch
8966
 */
8967
8968
static const xmlChar *
8969
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8970
52.0k
                        xmlChar const *prefix) {
8971
52.0k
    const xmlChar *cmp;
8972
52.0k
    const xmlChar *in;
8973
52.0k
    const xmlChar *ret;
8974
52.0k
    const xmlChar *prefix2;
8975
8976
52.0k
    if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8977
8978
52.0k
    GROW;
8979
52.0k
    in = ctxt->input->cur;
8980
8981
52.0k
    cmp = prefix;
8982
114k
    while (*in != 0 && *in == *cmp) {
8983
62.6k
  ++in;
8984
62.6k
  ++cmp;
8985
62.6k
    }
8986
52.0k
    if ((*cmp == 0) && (*in == ':')) {
8987
43.7k
        in++;
8988
43.7k
  cmp = name;
8989
239k
  while (*in != 0 && *in == *cmp) {
8990
196k
      ++in;
8991
196k
      ++cmp;
8992
196k
  }
8993
43.7k
  if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8994
      /* success */
8995
31.9k
            ctxt->input->col += in - ctxt->input->cur;
8996
31.9k
      ctxt->input->cur = in;
8997
31.9k
      return((const xmlChar*) 1);
8998
31.9k
  }
8999
43.7k
    }
9000
    /*
9001
     * all strings coms from the dictionary, equality can be done directly
9002
     */
9003
20.1k
    ret = xmlParseQName (ctxt, &prefix2);
9004
20.1k
    if ((ret == name) && (prefix == prefix2))
9005
697
  return((const xmlChar*) 1);
9006
19.4k
    return ret;
9007
20.1k
}
9008
9009
/**
9010
 * xmlParseAttValueInternal:
9011
 * @ctxt:  an XML parser context
9012
 * @len:  attribute len result
9013
 * @alloc:  whether the attribute was reallocated as a new string
9014
 * @normalize:  if 1 then further non-CDATA normalization must be done
9015
 *
9016
 * parse a value for an attribute.
9017
 * NOTE: if no normalization is needed, the routine will return pointers
9018
 *       directly from the data buffer.
9019
 *
9020
 * 3.3.3 Attribute-Value Normalization:
9021
 * Before the value of an attribute is passed to the application or
9022
 * checked for validity, the XML processor must normalize it as follows:
9023
 * - a character reference is processed by appending the referenced
9024
 *   character to the attribute value
9025
 * - an entity reference is processed by recursively processing the
9026
 *   replacement text of the entity
9027
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
9028
 *   appending #x20 to the normalized value, except that only a single
9029
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
9030
 *   parsed entity or the literal entity value of an internal parsed entity
9031
 * - other characters are processed by appending them to the normalized value
9032
 * If the declared value is not CDATA, then the XML processor must further
9033
 * process the normalized attribute value by discarding any leading and
9034
 * trailing space (#x20) characters, and by replacing sequences of space
9035
 * (#x20) characters by a single space (#x20) character.
9036
 * All attributes for which no declaration has been read should be treated
9037
 * by a non-validating parser as if declared CDATA.
9038
 *
9039
 * Returns the AttValue parsed or NULL. The value has to be freed by the
9040
 *     caller if it was copied, this can be detected by val[*len] == 0.
9041
 */
9042
9043
#define GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) \
9044
356
    const xmlChar *oldbase = ctxt->input->base;\
9045
356
    GROW;\
9046
356
    if (ctxt->instate == XML_PARSER_EOF)\
9047
356
        return(NULL);\
9048
356
    if (oldbase != ctxt->input->base) {\
9049
0
        ptrdiff_t delta = ctxt->input->base - oldbase;\
9050
0
        start = start + delta;\
9051
0
        in = in + delta;\
9052
0
    }\
9053
356
    end = ctxt->input->end;
9054
9055
static xmlChar *
9056
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
9057
                         int normalize)
9058
851k
{
9059
851k
    xmlChar limit = 0;
9060
851k
    const xmlChar *in = NULL, *start, *end, *last;
9061
851k
    xmlChar *ret = NULL;
9062
851k
    int line, col;
9063
851k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9064
151k
                    XML_MAX_HUGE_LENGTH :
9065
851k
                    XML_MAX_TEXT_LENGTH;
9066
9067
851k
    GROW;
9068
851k
    in = (xmlChar *) CUR_PTR;
9069
851k
    line = ctxt->input->line;
9070
851k
    col = ctxt->input->col;
9071
851k
    if (*in != '"' && *in != '\'') {
9072
5.98k
        xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
9073
5.98k
        return (NULL);
9074
5.98k
    }
9075
845k
    ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
9076
9077
    /*
9078
     * try to handle in this routine the most common case where no
9079
     * allocation of a new string is required and where content is
9080
     * pure ASCII.
9081
     */
9082
845k
    limit = *in++;
9083
845k
    col++;
9084
845k
    end = ctxt->input->end;
9085
845k
    start = in;
9086
845k
    if (in >= end) {
9087
46
        GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9088
46
    }
9089
845k
    if (normalize) {
9090
        /*
9091
   * Skip any leading spaces
9092
   */
9093
81.3k
  while ((in < end) && (*in != limit) &&
9094
81.3k
         ((*in == 0x20) || (*in == 0x9) ||
9095
80.6k
          (*in == 0xA) || (*in == 0xD))) {
9096
17.7k
      if (*in == 0xA) {
9097
6.98k
          line++; col = 1;
9098
10.7k
      } else {
9099
10.7k
          col++;
9100
10.7k
      }
9101
17.7k
      in++;
9102
17.7k
      start = in;
9103
17.7k
      if (in >= end) {
9104
11
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9105
11
                if ((in - start) > maxLength) {
9106
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9107
0
                                   "AttValue length too long\n");
9108
0
                    return(NULL);
9109
0
                }
9110
11
      }
9111
17.7k
  }
9112
575k
  while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9113
575k
         (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9114
512k
      col++;
9115
512k
      if ((*in++ == 0x20) && (*in == 0x20)) break;
9116
511k
      if (in >= end) {
9117
25
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9118
25
                if ((in - start) > maxLength) {
9119
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9120
0
                                   "AttValue length too long\n");
9121
0
                    return(NULL);
9122
0
                }
9123
25
      }
9124
511k
  }
9125
63.5k
  last = in;
9126
  /*
9127
   * skip the trailing blanks
9128
   */
9129
64.8k
  while ((last[-1] == 0x20) && (last > start)) last--;
9130
75.0k
  while ((in < end) && (*in != limit) &&
9131
75.0k
         ((*in == 0x20) || (*in == 0x9) ||
9132
22.0k
          (*in == 0xA) || (*in == 0xD))) {
9133
11.4k
      if (*in == 0xA) {
9134
7.67k
          line++, col = 1;
9135
7.67k
      } else {
9136
3.80k
          col++;
9137
3.80k
      }
9138
11.4k
      in++;
9139
11.4k
      if (in >= end) {
9140
23
    const xmlChar *oldbase = ctxt->input->base;
9141
23
    GROW;
9142
23
                if (ctxt->instate == XML_PARSER_EOF)
9143
0
                    return(NULL);
9144
23
    if (oldbase != ctxt->input->base) {
9145
0
        ptrdiff_t delta = ctxt->input->base - oldbase;
9146
0
        start = start + delta;
9147
0
        in = in + delta;
9148
0
        last = last + delta;
9149
0
    }
9150
23
    end = ctxt->input->end;
9151
23
                if ((in - start) > maxLength) {
9152
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9153
0
                                   "AttValue length too long\n");
9154
0
                    return(NULL);
9155
0
                }
9156
23
      }
9157
11.4k
  }
9158
63.5k
        if ((in - start) > maxLength) {
9159
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9160
0
                           "AttValue length too long\n");
9161
0
            return(NULL);
9162
0
        }
9163
63.5k
  if (*in != limit) goto need_complex;
9164
781k
    } else {
9165
9.24M
  while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9166
9.24M
         (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9167
8.46M
      in++;
9168
8.46M
      col++;
9169
8.46M
      if (in >= end) {
9170
274
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9171
274
                if ((in - start) > maxLength) {
9172
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9173
0
                                   "AttValue length too long\n");
9174
0
                    return(NULL);
9175
0
                }
9176
274
      }
9177
8.46M
  }
9178
781k
  last = in;
9179
781k
        if ((in - start) > maxLength) {
9180
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9181
0
                           "AttValue length too long\n");
9182
0
            return(NULL);
9183
0
        }
9184
781k
  if (*in != limit) goto need_complex;
9185
781k
    }
9186
726k
    in++;
9187
726k
    col++;
9188
726k
    if (len != NULL) {
9189
588k
        if (alloc) *alloc = 0;
9190
588k
        *len = last - start;
9191
588k
        ret = (xmlChar *) start;
9192
588k
    } else {
9193
138k
        if (alloc) *alloc = 1;
9194
138k
        ret = xmlStrndup(start, last - start);
9195
138k
    }
9196
726k
    CUR_PTR = in;
9197
726k
    ctxt->input->line = line;
9198
726k
    ctxt->input->col = col;
9199
726k
    return ret;
9200
119k
need_complex:
9201
119k
    if (alloc) *alloc = 1;
9202
119k
    return xmlParseAttValueComplex(ctxt, len, normalize);
9203
845k
}
9204
9205
/**
9206
 * xmlParseAttribute2:
9207
 * @ctxt:  an XML parser context
9208
 * @pref:  the element prefix
9209
 * @elem:  the element name
9210
 * @prefix:  a xmlChar ** used to store the value of the attribute prefix
9211
 * @value:  a xmlChar ** used to store the value of the attribute
9212
 * @len:  an int * to save the length of the attribute
9213
 * @alloc:  an int * to indicate if the attribute was allocated
9214
 *
9215
 * parse an attribute in the new SAX2 framework.
9216
 *
9217
 * Returns the attribute name, and the value in *value, .
9218
 */
9219
9220
static const xmlChar *
9221
xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9222
                   const xmlChar * pref, const xmlChar * elem,
9223
                   const xmlChar ** prefix, xmlChar ** value,
9224
                   int *len, int *alloc)
9225
768k
{
9226
768k
    const xmlChar *name;
9227
768k
    xmlChar *val, *internal_val = NULL;
9228
768k
    int normalize = 0;
9229
9230
768k
    *value = NULL;
9231
768k
    GROW;
9232
768k
    name = xmlParseQName(ctxt, prefix);
9233
768k
    if (name == NULL) {
9234
76.9k
        xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9235
76.9k
                       "error parsing attribute name\n");
9236
76.9k
        return (NULL);
9237
76.9k
    }
9238
9239
    /*
9240
     * get the type if needed
9241
     */
9242
691k
    if (ctxt->attsSpecial != NULL) {
9243
104k
        int type;
9244
9245
104k
        type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
9246
104k
                                                 pref, elem, *prefix, name);
9247
104k
        if (type != 0)
9248
63.7k
            normalize = 1;
9249
104k
    }
9250
9251
    /*
9252
     * read the value
9253
     */
9254
691k
    SKIP_BLANKS;
9255
691k
    if (RAW == '=') {
9256
666k
        NEXT;
9257
666k
        SKIP_BLANKS;
9258
666k
        val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9259
666k
        if (val == NULL)
9260
3.14k
            return (NULL);
9261
662k
  if (normalize) {
9262
      /*
9263
       * Sometimes a second normalisation pass for spaces is needed
9264
       * but that only happens if charrefs or entities references
9265
       * have been used in the attribute value, i.e. the attribute
9266
       * value have been extracted in an allocated string already.
9267
       */
9268
63.5k
      if (*alloc) {
9269
10.5k
          const xmlChar *val2;
9270
9271
10.5k
          val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9272
10.5k
    if ((val2 != NULL) && (val2 != val)) {
9273
437
        xmlFree(val);
9274
437
        val = (xmlChar *) val2;
9275
437
    }
9276
10.5k
      }
9277
63.5k
  }
9278
662k
        ctxt->instate = XML_PARSER_CONTENT;
9279
662k
    } else {
9280
25.5k
        xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9281
25.5k
                          "Specification mandates value for attribute %s\n",
9282
25.5k
                          name);
9283
25.5k
        return (name);
9284
25.5k
    }
9285
9286
662k
    if (*prefix == ctxt->str_xml) {
9287
        /*
9288
         * Check that xml:lang conforms to the specification
9289
         * No more registered as an error, just generate a warning now
9290
         * since this was deprecated in XML second edition
9291
         */
9292
6.29k
        if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9293
136
            internal_val = xmlStrndup(val, *len);
9294
136
            if (!xmlCheckLanguageID(internal_val)) {
9295
108
                xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9296
108
                              "Malformed value for xml:lang : %s\n",
9297
108
                              internal_val, NULL);
9298
108
            }
9299
136
        }
9300
9301
        /*
9302
         * Check that xml:space conforms to the specification
9303
         */
9304
6.29k
        if (xmlStrEqual(name, BAD_CAST "space")) {
9305
476
            internal_val = xmlStrndup(val, *len);
9306
476
            if (xmlStrEqual(internal_val, BAD_CAST "default"))
9307
0
                *(ctxt->space) = 0;
9308
476
            else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9309
28
                *(ctxt->space) = 1;
9310
448
            else {
9311
448
                xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9312
448
                              "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9313
448
                              internal_val, NULL);
9314
448
            }
9315
476
        }
9316
6.29k
        if (internal_val) {
9317
612
            xmlFree(internal_val);
9318
612
        }
9319
6.29k
    }
9320
9321
662k
    *value = val;
9322
662k
    return (name);
9323
691k
}
9324
/**
9325
 * xmlParseStartTag2:
9326
 * @ctxt:  an XML parser context
9327
 *
9328
 * Parse a start tag. Always consumes '<'.
9329
 *
9330
 * This routine is called when running SAX2 parsing
9331
 *
9332
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9333
 *
9334
 * [ WFC: Unique Att Spec ]
9335
 * No attribute name may appear more than once in the same start-tag or
9336
 * empty-element tag.
9337
 *
9338
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9339
 *
9340
 * [ WFC: Unique Att Spec ]
9341
 * No attribute name may appear more than once in the same start-tag or
9342
 * empty-element tag.
9343
 *
9344
 * With namespace:
9345
 *
9346
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9347
 *
9348
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9349
 *
9350
 * Returns the element name parsed
9351
 */
9352
9353
static const xmlChar *
9354
xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9355
868k
                  const xmlChar **URI, int *tlen) {
9356
868k
    const xmlChar *localname;
9357
868k
    const xmlChar *prefix;
9358
868k
    const xmlChar *attname;
9359
868k
    const xmlChar *aprefix;
9360
868k
    const xmlChar *nsname;
9361
868k
    xmlChar *attvalue;
9362
868k
    const xmlChar **atts = ctxt->atts;
9363
868k
    int maxatts = ctxt->maxatts;
9364
868k
    int nratts, nbatts, nbdef, inputid;
9365
868k
    int i, j, nbNs, attval;
9366
868k
    unsigned long cur;
9367
868k
    int nsNr = ctxt->nsNr;
9368
9369
868k
    if (RAW != '<') return(NULL);
9370
868k
    NEXT1;
9371
9372
    /*
9373
     * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9374
     *       point since the attribute values may be stored as pointers to
9375
     *       the buffer and calling SHRINK would destroy them !
9376
     *       The Shrinking is only possible once the full set of attribute
9377
     *       callbacks have been done.
9378
     */
9379
868k
    SHRINK;
9380
868k
    cur = ctxt->input->cur - ctxt->input->base;
9381
868k
    inputid = ctxt->input->id;
9382
868k
    nbatts = 0;
9383
868k
    nratts = 0;
9384
868k
    nbdef = 0;
9385
868k
    nbNs = 0;
9386
868k
    attval = 0;
9387
    /* Forget any namespaces added during an earlier parse of this element. */
9388
868k
    ctxt->nsNr = nsNr;
9389
9390
868k
    localname = xmlParseQName(ctxt, &prefix);
9391
868k
    if (localname == NULL) {
9392
32.0k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9393
32.0k
           "StartTag: invalid element name\n");
9394
32.0k
        return(NULL);
9395
32.0k
    }
9396
836k
    *tlen = ctxt->input->cur - ctxt->input->base - cur;
9397
9398
    /*
9399
     * Now parse the attributes, it ends up with the ending
9400
     *
9401
     * (S Attribute)* S?
9402
     */
9403
836k
    SKIP_BLANKS;
9404
836k
    GROW;
9405
9406
1.08M
    while (((RAW != '>') &&
9407
1.08M
     ((RAW != '/') || (NXT(1) != '>')) &&
9408
1.08M
     (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9409
768k
  int len = -1, alloc = 0;
9410
9411
768k
  attname = xmlParseAttribute2(ctxt, prefix, localname,
9412
768k
                               &aprefix, &attvalue, &len, &alloc);
9413
768k
        if (attname == NULL) {
9414
80.0k
      xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9415
80.0k
           "xmlParseStartTag: problem parsing attributes\n");
9416
80.0k
      break;
9417
80.0k
  }
9418
688k
        if (attvalue == NULL)
9419
25.5k
            goto next_attr;
9420
662k
  if (len < 0) len = xmlStrlen(attvalue);
9421
9422
662k
        if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9423
12.6k
            const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9424
12.6k
            xmlURIPtr uri;
9425
9426
12.6k
            if (URL == NULL) {
9427
0
                xmlErrMemory(ctxt, "dictionary allocation failure");
9428
0
                if ((attvalue != NULL) && (alloc != 0))
9429
0
                    xmlFree(attvalue);
9430
0
                localname = NULL;
9431
0
                goto done;
9432
0
            }
9433
12.6k
            if (*URL != 0) {
9434
12.3k
                uri = xmlParseURI((const char *) URL);
9435
12.3k
                if (uri == NULL) {
9436
4.88k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9437
4.88k
                             "xmlns: '%s' is not a valid URI\n",
9438
4.88k
                                       URL, NULL, NULL);
9439
7.45k
                } else {
9440
7.45k
                    if (uri->scheme == NULL) {
9441
1.43k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9442
1.43k
                                  "xmlns: URI %s is not absolute\n",
9443
1.43k
                                  URL, NULL, NULL);
9444
1.43k
                    }
9445
7.45k
                    xmlFreeURI(uri);
9446
7.45k
                }
9447
12.3k
                if (URL == ctxt->str_xml_ns) {
9448
0
                    if (attname != ctxt->str_xml) {
9449
0
                        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9450
0
                     "xml namespace URI cannot be the default namespace\n",
9451
0
                                 NULL, NULL, NULL);
9452
0
                    }
9453
0
                    goto next_attr;
9454
0
                }
9455
12.3k
                if ((len == 29) &&
9456
12.3k
                    (xmlStrEqual(URL,
9457
130
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9458
6
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9459
6
                         "reuse of the xmlns namespace name is forbidden\n",
9460
6
                             NULL, NULL, NULL);
9461
6
                    goto next_attr;
9462
6
                }
9463
12.3k
            }
9464
            /*
9465
             * check that it's not a defined namespace
9466
             */
9467
12.8k
            for (j = 1;j <= nbNs;j++)
9468
531
                if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9469
276
                    break;
9470
12.6k
            if (j <= nbNs)
9471
276
                xmlErrAttributeDup(ctxt, NULL, attname);
9472
12.3k
            else
9473
12.3k
                if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9474
9475
650k
        } else if (aprefix == ctxt->str_xmlns) {
9476
26.6k
            const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9477
26.6k
            xmlURIPtr uri;
9478
9479
26.6k
            if (attname == ctxt->str_xml) {
9480
44
                if (URL != ctxt->str_xml_ns) {
9481
44
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9482
44
                             "xml namespace prefix mapped to wrong URI\n",
9483
44
                             NULL, NULL, NULL);
9484
44
                }
9485
                /*
9486
                 * Do not keep a namespace definition node
9487
                 */
9488
44
                goto next_attr;
9489
44
            }
9490
26.6k
            if (URL == ctxt->str_xml_ns) {
9491
0
                if (attname != ctxt->str_xml) {
9492
0
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9493
0
                             "xml namespace URI mapped to wrong prefix\n",
9494
0
                             NULL, NULL, NULL);
9495
0
                }
9496
0
                goto next_attr;
9497
0
            }
9498
26.6k
            if (attname == ctxt->str_xmlns) {
9499
0
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9500
0
                         "redefinition of the xmlns prefix is forbidden\n",
9501
0
                         NULL, NULL, NULL);
9502
0
                goto next_attr;
9503
0
            }
9504
26.6k
            if ((len == 29) &&
9505
26.6k
                (xmlStrEqual(URL,
9506
1.54k
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9507
10
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9508
10
                         "reuse of the xmlns namespace name is forbidden\n",
9509
10
                         NULL, NULL, NULL);
9510
10
                goto next_attr;
9511
10
            }
9512
26.6k
            if ((URL == NULL) || (URL[0] == 0)) {
9513
596
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9514
596
                         "xmlns:%s: Empty XML namespace is not allowed\n",
9515
596
                              attname, NULL, NULL);
9516
596
                goto next_attr;
9517
26.0k
            } else {
9518
26.0k
                uri = xmlParseURI((const char *) URL);
9519
26.0k
                if (uri == NULL) {
9520
6.77k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9521
6.77k
                         "xmlns:%s: '%s' is not a valid URI\n",
9522
6.77k
                                       attname, URL, NULL);
9523
19.2k
                } else {
9524
19.2k
                    if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9525
1.47k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9526
1.47k
                                  "xmlns:%s: URI %s is not absolute\n",
9527
1.47k
                                  attname, URL, NULL);
9528
1.47k
                    }
9529
19.2k
                    xmlFreeURI(uri);
9530
19.2k
                }
9531
26.0k
            }
9532
9533
            /*
9534
             * check that it's not a defined namespace
9535
             */
9536
33.0k
            for (j = 1;j <= nbNs;j++)
9537
7.73k
                if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9538
686
                    break;
9539
26.0k
            if (j <= nbNs)
9540
686
                xmlErrAttributeDup(ctxt, aprefix, attname);
9541
25.3k
            else
9542
25.3k
                if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9543
9544
623k
        } else {
9545
            /*
9546
             * Add the pair to atts
9547
             */
9548
623k
            if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9549
23.1k
                if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9550
0
                    goto next_attr;
9551
0
                }
9552
23.1k
                maxatts = ctxt->maxatts;
9553
23.1k
                atts = ctxt->atts;
9554
23.1k
            }
9555
623k
            ctxt->attallocs[nratts++] = alloc;
9556
623k
            atts[nbatts++] = attname;
9557
623k
            atts[nbatts++] = aprefix;
9558
            /*
9559
             * The namespace URI field is used temporarily to point at the
9560
             * base of the current input buffer for non-alloced attributes.
9561
             * When the input buffer is reallocated, all the pointers become
9562
             * invalid, but they can be reconstructed later.
9563
             */
9564
623k
            if (alloc)
9565
62.6k
                atts[nbatts++] = NULL;
9566
560k
            else
9567
560k
                atts[nbatts++] = ctxt->input->base;
9568
623k
            atts[nbatts++] = attvalue;
9569
623k
            attvalue += len;
9570
623k
            atts[nbatts++] = attvalue;
9571
            /*
9572
             * tag if some deallocation is needed
9573
             */
9574
623k
            if (alloc != 0) attval = 1;
9575
623k
            attvalue = NULL; /* moved into atts */
9576
623k
        }
9577
9578
688k
next_attr:
9579
688k
        if ((attvalue != NULL) && (alloc != 0)) {
9580
12.1k
            xmlFree(attvalue);
9581
12.1k
            attvalue = NULL;
9582
12.1k
        }
9583
9584
688k
  GROW
9585
688k
        if (ctxt->instate == XML_PARSER_EOF)
9586
0
            break;
9587
688k
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9588
385k
      break;
9589
302k
  if (SKIP_BLANKS == 0) {
9590
56.9k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9591
56.9k
         "attributes construct error\n");
9592
56.9k
      break;
9593
56.9k
  }
9594
245k
        GROW;
9595
245k
    }
9596
9597
836k
    if (ctxt->input->id != inputid) {
9598
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9599
0
                    "Unexpected change of input\n");
9600
0
        localname = NULL;
9601
0
        goto done;
9602
0
    }
9603
9604
    /* Reconstruct attribute value pointers. */
9605
1.46M
    for (i = 0, j = 0; j < nratts; i += 5, j++) {
9606
623k
        if (atts[i+2] != NULL) {
9607
            /*
9608
             * Arithmetic on dangling pointers is technically undefined
9609
             * behavior, but well...
9610
             */
9611
560k
            const xmlChar *old = atts[i+2];
9612
560k
            atts[i+2]  = NULL;    /* Reset repurposed namespace URI */
9613
560k
            atts[i+3] = ctxt->input->base + (atts[i+3] - old);  /* value */
9614
560k
            atts[i+4] = ctxt->input->base + (atts[i+4] - old);  /* valuend */
9615
560k
        }
9616
623k
    }
9617
9618
    /*
9619
     * The attributes defaulting
9620
     */
9621
836k
    if (ctxt->attsDefault != NULL) {
9622
126k
        xmlDefAttrsPtr defaults;
9623
9624
126k
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9625
126k
  if (defaults != NULL) {
9626
27.6k
      for (i = 0;i < defaults->nbAttrs;i++) {
9627
19.0k
          attname = defaults->values[5 * i];
9628
19.0k
    aprefix = defaults->values[5 * i + 1];
9629
9630
                /*
9631
     * special work for namespaces defaulted defs
9632
     */
9633
19.0k
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9634
        /*
9635
         * check that it's not a defined namespace
9636
         */
9637
182
        for (j = 1;j <= nbNs;j++)
9638
146
            if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9639
83
          break;
9640
119
              if (j <= nbNs) continue;
9641
9642
36
        nsname = xmlGetNamespace(ctxt, NULL);
9643
36
        if (nsname != defaults->values[5 * i + 2]) {
9644
35
      if (nsPush(ctxt, NULL,
9645
35
                 defaults->values[5 * i + 2]) > 0)
9646
35
          nbNs++;
9647
35
        }
9648
18.9k
    } else if (aprefix == ctxt->str_xmlns) {
9649
        /*
9650
         * check that it's not a defined namespace
9651
         */
9652
172
        for (j = 1;j <= nbNs;j++)
9653
48
            if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9654
33
          break;
9655
157
              if (j <= nbNs) continue;
9656
9657
124
        nsname = xmlGetNamespace(ctxt, attname);
9658
124
        if (nsname != defaults->values[5 * i + 2]) {
9659
109
      if (nsPush(ctxt, attname,
9660
109
                 defaults->values[5 * i + 2]) > 0)
9661
109
          nbNs++;
9662
109
        }
9663
18.7k
    } else {
9664
        /*
9665
         * check that it's not a defined attribute
9666
         */
9667
51.3k
        for (j = 0;j < nbatts;j+=5) {
9668
32.9k
      if ((attname == atts[j]) && (aprefix == atts[j+1]))
9669
373
          break;
9670
32.9k
        }
9671
18.7k
        if (j < nbatts) continue;
9672
9673
18.4k
        if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9674
291
      if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9675
0
                            localname = NULL;
9676
0
                            goto done;
9677
0
      }
9678
291
      maxatts = ctxt->maxatts;
9679
291
      atts = ctxt->atts;
9680
291
        }
9681
18.4k
        atts[nbatts++] = attname;
9682
18.4k
        atts[nbatts++] = aprefix;
9683
18.4k
        if (aprefix == NULL)
9684
15.8k
      atts[nbatts++] = NULL;
9685
2.52k
        else
9686
2.52k
            atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9687
18.4k
        atts[nbatts++] = defaults->values[5 * i + 2];
9688
18.4k
        atts[nbatts++] = defaults->values[5 * i + 3];
9689
18.4k
        if ((ctxt->standalone == 1) &&
9690
18.4k
            (defaults->values[5 * i + 4] != NULL)) {
9691
0
      xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9692
0
    "standalone: attribute %s on %s defaulted from external subset\n",
9693
0
                                   attname, localname);
9694
0
        }
9695
18.4k
        nbdef++;
9696
18.4k
    }
9697
19.0k
      }
9698
8.55k
  }
9699
126k
    }
9700
9701
    /*
9702
     * The attributes checkings
9703
     */
9704
1.47M
    for (i = 0; i < nbatts;i += 5) {
9705
        /*
9706
  * The default namespace does not apply to attribute names.
9707
  */
9708
642k
  if (atts[i + 1] != NULL) {
9709
17.1k
      nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9710
17.1k
      if (nsname == NULL) {
9711
6.77k
    xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9712
6.77k
        "Namespace prefix %s for %s on %s is not defined\n",
9713
6.77k
        atts[i + 1], atts[i], localname);
9714
6.77k
      }
9715
17.1k
      atts[i + 2] = nsname;
9716
17.1k
  } else
9717
624k
      nsname = NULL;
9718
  /*
9719
   * [ WFC: Unique Att Spec ]
9720
   * No attribute name may appear more than once in the same
9721
   * start-tag or empty-element tag.
9722
   * As extended by the Namespace in XML REC.
9723
   */
9724
914k
        for (j = 0; j < i;j += 5) {
9725
273k
      if (atts[i] == atts[j]) {
9726
1.35k
          if (atts[i+1] == atts[j+1]) {
9727
959
        xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9728
959
        break;
9729
959
    }
9730
395
    if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9731
15
        xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9732
15
           "Namespaced Attribute %s in '%s' redefined\n",
9733
15
           atts[i], nsname, NULL);
9734
15
        break;
9735
15
    }
9736
395
      }
9737
273k
  }
9738
642k
    }
9739
9740
836k
    nsname = xmlGetNamespace(ctxt, prefix);
9741
836k
    if ((prefix != NULL) && (nsname == NULL)) {
9742
68.4k
  xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9743
68.4k
           "Namespace prefix %s on %s is not defined\n",
9744
68.4k
     prefix, localname, NULL);
9745
68.4k
    }
9746
836k
    *pref = prefix;
9747
836k
    *URI = nsname;
9748
9749
    /*
9750
     * SAX: Start of Element !
9751
     */
9752
836k
    if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9753
836k
  (!ctxt->disableSAX)) {
9754
731k
  if (nbNs > 0)
9755
23.3k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9756
23.3k
        nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9757
23.3k
        nbatts / 5, nbdef, atts);
9758
708k
  else
9759
708k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9760
708k
                    nsname, 0, NULL, nbatts / 5, nbdef, atts);
9761
731k
    }
9762
9763
836k
done:
9764
    /*
9765
     * Free up attribute allocated strings if needed
9766
     */
9767
836k
    if (attval != 0) {
9768
135k
  for (i = 3,j = 0; j < nratts;i += 5,j++)
9769
73.6k
      if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9770
62.6k
          xmlFree((xmlChar *) atts[i]);
9771
61.3k
    }
9772
9773
836k
    return(localname);
9774
836k
}
9775
9776
/**
9777
 * xmlParseEndTag2:
9778
 * @ctxt:  an XML parser context
9779
 * @line:  line of the start tag
9780
 * @nsNr:  number of namespaces on the start tag
9781
 *
9782
 * Parse an end tag. Always consumes '</'.
9783
 *
9784
 * [42] ETag ::= '</' Name S? '>'
9785
 *
9786
 * With namespace
9787
 *
9788
 * [NS 9] ETag ::= '</' QName S? '>'
9789
 */
9790
9791
static void
9792
357k
xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9793
357k
    const xmlChar *name;
9794
9795
357k
    GROW;
9796
357k
    if ((RAW != '<') || (NXT(1) != '/')) {
9797
0
  xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9798
0
  return;
9799
0
    }
9800
357k
    SKIP(2);
9801
9802
357k
    if (tag->prefix == NULL)
9803
305k
        name = xmlParseNameAndCompare(ctxt, ctxt->name);
9804
52.0k
    else
9805
52.0k
        name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9806
9807
    /*
9808
     * We should definitely be at the ending "S? '>'" part
9809
     */
9810
357k
    GROW;
9811
357k
    if (ctxt->instate == XML_PARSER_EOF)
9812
0
        return;
9813
357k
    SKIP_BLANKS;
9814
357k
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9815
23.2k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9816
23.2k
    } else
9817
334k
  NEXT1;
9818
9819
    /*
9820
     * [ WFC: Element Type Match ]
9821
     * The Name in an element's end-tag must match the element type in the
9822
     * start-tag.
9823
     *
9824
     */
9825
357k
    if (name != (xmlChar*)1) {
9826
50.0k
        if (name == NULL) name = BAD_CAST "unparsable";
9827
50.0k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9828
50.0k
         "Opening and ending tag mismatch: %s line %d and %s\n",
9829
50.0k
                    ctxt->name, tag->line, name);
9830
50.0k
    }
9831
9832
    /*
9833
     * SAX: End of Tag
9834
     */
9835
357k
    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9836
357k
  (!ctxt->disableSAX))
9837
303k
  ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9838
303k
                                tag->URI);
9839
9840
357k
    spacePop(ctxt);
9841
357k
    if (tag->nsNr != 0)
9842
5.96k
  nsPop(ctxt, tag->nsNr);
9843
357k
}
9844
9845
/**
9846
 * xmlParseCDSect:
9847
 * @ctxt:  an XML parser context
9848
 *
9849
 * DEPRECATED: Internal function, don't use.
9850
 *
9851
 * Parse escaped pure raw content. Always consumes '<!['.
9852
 *
9853
 * [18] CDSect ::= CDStart CData CDEnd
9854
 *
9855
 * [19] CDStart ::= '<![CDATA['
9856
 *
9857
 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9858
 *
9859
 * [21] CDEnd ::= ']]>'
9860
 */
9861
void
9862
4.80k
xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9863
4.80k
    xmlChar *buf = NULL;
9864
4.80k
    int len = 0;
9865
4.80k
    int size = XML_PARSER_BUFFER_SIZE;
9866
4.80k
    int r, rl;
9867
4.80k
    int s, sl;
9868
4.80k
    int cur, l;
9869
4.80k
    int count = 0;
9870
4.80k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9871
1.01k
                    XML_MAX_HUGE_LENGTH :
9872
4.80k
                    XML_MAX_TEXT_LENGTH;
9873
9874
4.80k
    if ((CUR != '<') || (NXT(1) != '!') || (NXT(2) != '['))
9875
0
        return;
9876
4.80k
    SKIP(3);
9877
9878
4.80k
    if (!CMP6(CUR_PTR, 'C', 'D', 'A', 'T', 'A', '['))
9879
0
        return;
9880
4.80k
    SKIP(6);
9881
9882
4.80k
    ctxt->instate = XML_PARSER_CDATA_SECTION;
9883
4.80k
    r = CUR_CHAR(rl);
9884
4.80k
    if (!IS_CHAR(r)) {
9885
76
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9886
76
        goto out;
9887
76
    }
9888
4.73k
    NEXTL(rl);
9889
4.73k
    s = CUR_CHAR(sl);
9890
4.73k
    if (!IS_CHAR(s)) {
9891
39
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9892
39
        goto out;
9893
39
    }
9894
4.69k
    NEXTL(sl);
9895
4.69k
    cur = CUR_CHAR(l);
9896
4.69k
    buf = (xmlChar *) xmlMallocAtomic(size);
9897
4.69k
    if (buf == NULL) {
9898
0
  xmlErrMemory(ctxt, NULL);
9899
0
        goto out;
9900
0
    }
9901
1.01M
    while (IS_CHAR(cur) &&
9902
1.01M
           ((r != ']') || (s != ']') || (cur != '>'))) {
9903
1.01M
  if (len + 5 >= size) {
9904
4.37k
      xmlChar *tmp;
9905
9906
4.37k
      tmp = (xmlChar *) xmlRealloc(buf, size * 2);
9907
4.37k
      if (tmp == NULL) {
9908
0
    xmlErrMemory(ctxt, NULL);
9909
0
                goto out;
9910
0
      }
9911
4.37k
      buf = tmp;
9912
4.37k
      size *= 2;
9913
4.37k
  }
9914
1.01M
  COPY_BUF(rl,buf,len,r);
9915
1.01M
  r = s;
9916
1.01M
  rl = sl;
9917
1.01M
  s = cur;
9918
1.01M
  sl = l;
9919
1.01M
  count++;
9920
1.01M
  if (count > 50) {
9921
18.2k
      SHRINK;
9922
18.2k
      GROW;
9923
18.2k
            if (ctxt->instate == XML_PARSER_EOF) {
9924
0
                goto out;
9925
0
            }
9926
18.2k
      count = 0;
9927
18.2k
  }
9928
1.01M
  NEXTL(l);
9929
1.01M
  cur = CUR_CHAR(l);
9930
1.01M
        if (len > maxLength) {
9931
0
            xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9932
0
                           "CData section too big found\n");
9933
0
            goto out;
9934
0
        }
9935
1.01M
    }
9936
4.69k
    buf[len] = 0;
9937
4.69k
    if (cur != '>') {
9938
741
  xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9939
741
                       "CData section not finished\n%.50s\n", buf);
9940
741
        goto out;
9941
741
    }
9942
3.95k
    NEXTL(l);
9943
9944
    /*
9945
     * OK the buffer is to be consumed as cdata.
9946
     */
9947
3.95k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9948
2.92k
  if (ctxt->sax->cdataBlock != NULL)
9949
2.07k
      ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9950
855
  else if (ctxt->sax->characters != NULL)
9951
855
      ctxt->sax->characters(ctxt->userData, buf, len);
9952
2.92k
    }
9953
9954
4.80k
out:
9955
4.80k
    if (ctxt->instate != XML_PARSER_EOF)
9956
4.80k
        ctxt->instate = XML_PARSER_CONTENT;
9957
4.80k
    xmlFree(buf);
9958
4.80k
}
9959
9960
/**
9961
 * xmlParseContentInternal:
9962
 * @ctxt:  an XML parser context
9963
 *
9964
 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9965
 * unexpected EOF to the caller.
9966
 */
9967
9968
static void
9969
26.0k
xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9970
26.0k
    int nameNr = ctxt->nameNr;
9971
9972
26.0k
    GROW;
9973
1.75M
    while ((RAW != 0) &&
9974
1.75M
     (ctxt->instate != XML_PARSER_EOF)) {
9975
1.73M
  const xmlChar *cur = ctxt->input->cur;
9976
9977
  /*
9978
   * First case : a Processing Instruction.
9979
   */
9980
1.73M
  if ((*cur == '<') && (cur[1] == '?')) {
9981
5.45k
      xmlParsePI(ctxt);
9982
5.45k
  }
9983
9984
  /*
9985
   * Second case : a CDSection
9986
   */
9987
  /* 2.6.0 test was *cur not RAW */
9988
1.72M
  else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9989
4.80k
      xmlParseCDSect(ctxt);
9990
4.80k
  }
9991
9992
  /*
9993
   * Third case :  a comment
9994
   */
9995
1.72M
  else if ((*cur == '<') && (NXT(1) == '!') &&
9996
1.72M
     (NXT(2) == '-') && (NXT(3) == '-')) {
9997
14.5k
      xmlParseComment(ctxt);
9998
14.5k
      ctxt->instate = XML_PARSER_CONTENT;
9999
14.5k
  }
10000
10001
  /*
10002
   * Fourth case :  a sub-element.
10003
   */
10004
1.70M
  else if (*cur == '<') {
10005
674k
            if (NXT(1) == '/') {
10006
211k
                if (ctxt->nameNr <= nameNr)
10007
5.89k
                    break;
10008
206k
          xmlParseElementEnd(ctxt);
10009
462k
            } else {
10010
462k
          xmlParseElementStart(ctxt);
10011
462k
            }
10012
674k
  }
10013
10014
  /*
10015
   * Fifth case : a reference. If if has not been resolved,
10016
   *    parsing returns it's Name, create the node
10017
   */
10018
10019
1.03M
  else if (*cur == '&') {
10020
277k
      xmlParseReference(ctxt);
10021
277k
  }
10022
10023
  /*
10024
   * Last case, text. Note that References are handled directly.
10025
   */
10026
755k
  else {
10027
755k
      xmlParseCharData(ctxt, 0);
10028
755k
  }
10029
10030
1.72M
  GROW;
10031
1.72M
  SHRINK;
10032
1.72M
    }
10033
26.0k
}
10034
10035
/**
10036
 * xmlParseContent:
10037
 * @ctxt:  an XML parser context
10038
 *
10039
 * Parse a content sequence. Stops at EOF or '</'.
10040
 *
10041
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10042
 */
10043
10044
void
10045
7.44k
xmlParseContent(xmlParserCtxtPtr ctxt) {
10046
7.44k
    int nameNr = ctxt->nameNr;
10047
10048
7.44k
    xmlParseContentInternal(ctxt);
10049
10050
7.44k
    if ((ctxt->instate != XML_PARSER_EOF) && (ctxt->nameNr > nameNr)) {
10051
38
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
10052
38
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
10053
38
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10054
38
                "Premature end of data in tag %s line %d\n",
10055
38
    name, line, NULL);
10056
38
    }
10057
7.44k
}
10058
10059
/**
10060
 * xmlParseElement:
10061
 * @ctxt:  an XML parser context
10062
 *
10063
 * DEPRECATED: Internal function, don't use.
10064
 *
10065
 * parse an XML element
10066
 *
10067
 * [39] element ::= EmptyElemTag | STag content ETag
10068
 *
10069
 * [ WFC: Element Type Match ]
10070
 * The Name in an element's end-tag must match the element type in the
10071
 * start-tag.
10072
 *
10073
 */
10074
10075
void
10076
33.3k
xmlParseElement(xmlParserCtxtPtr ctxt) {
10077
33.3k
    if (xmlParseElementStart(ctxt) != 0)
10078
14.7k
        return;
10079
10080
18.6k
    xmlParseContentInternal(ctxt);
10081
18.6k
    if (ctxt->instate == XML_PARSER_EOF)
10082
24
  return;
10083
10084
18.6k
    if (CUR == 0) {
10085
12.7k
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
10086
12.7k
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
10087
12.7k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10088
12.7k
                "Premature end of data in tag %s line %d\n",
10089
12.7k
    name, line, NULL);
10090
12.7k
        return;
10091
12.7k
    }
10092
10093
5.84k
    xmlParseElementEnd(ctxt);
10094
5.84k
}
10095
10096
/**
10097
 * xmlParseElementStart:
10098
 * @ctxt:  an XML parser context
10099
 *
10100
 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
10101
 * opening tag was parsed, 1 if an empty element was parsed.
10102
 *
10103
 * Always consumes '<'.
10104
 */
10105
static int
10106
495k
xmlParseElementStart(xmlParserCtxtPtr ctxt) {
10107
495k
    const xmlChar *name;
10108
495k
    const xmlChar *prefix = NULL;
10109
495k
    const xmlChar *URI = NULL;
10110
495k
    xmlParserNodeInfo node_info;
10111
495k
    int line, tlen = 0;
10112
495k
    xmlNodePtr ret;
10113
495k
    int nsNr = ctxt->nsNr;
10114
10115
495k
    if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
10116
495k
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
10117
0
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
10118
0
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
10119
0
        xmlParserMaxDepth);
10120
0
  xmlHaltParser(ctxt);
10121
0
  return(-1);
10122
0
    }
10123
10124
    /* Capture start position */
10125
495k
    if (ctxt->record_info) {
10126
0
        node_info.begin_pos = ctxt->input->consumed +
10127
0
                          (CUR_PTR - ctxt->input->base);
10128
0
  node_info.begin_line = ctxt->input->line;
10129
0
    }
10130
10131
495k
    if (ctxt->spaceNr == 0)
10132
0
  spacePush(ctxt, -1);
10133
495k
    else if (*ctxt->space == -2)
10134
75.9k
  spacePush(ctxt, -1);
10135
419k
    else
10136
419k
  spacePush(ctxt, *ctxt->space);
10137
10138
495k
    line = ctxt->input->line;
10139
495k
#ifdef LIBXML_SAX1_ENABLED
10140
495k
    if (ctxt->sax2)
10141
352k
#endif /* LIBXML_SAX1_ENABLED */
10142
352k
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
10143
143k
#ifdef LIBXML_SAX1_ENABLED
10144
143k
    else
10145
143k
  name = xmlParseStartTag(ctxt);
10146
495k
#endif /* LIBXML_SAX1_ENABLED */
10147
495k
    if (ctxt->instate == XML_PARSER_EOF)
10148
25
  return(-1);
10149
495k
    if (name == NULL) {
10150
43.8k
  spacePop(ctxt);
10151
43.8k
        return(-1);
10152
43.8k
    }
10153
451k
    nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
10154
451k
    ret = ctxt->node;
10155
10156
451k
#ifdef LIBXML_VALID_ENABLED
10157
    /*
10158
     * [ VC: Root Element Type ]
10159
     * The Name in the document type declaration must match the element
10160
     * type of the root element.
10161
     */
10162
451k
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10163
451k
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
10164
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10165
451k
#endif /* LIBXML_VALID_ENABLED */
10166
10167
    /*
10168
     * Check for an Empty Element.
10169
     */
10170
451k
    if ((RAW == '/') && (NXT(1) == '>')) {
10171
110k
        SKIP(2);
10172
110k
  if (ctxt->sax2) {
10173
93.0k
      if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10174
93.0k
    (!ctxt->disableSAX))
10175
75.6k
    ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
10176
93.0k
#ifdef LIBXML_SAX1_ENABLED
10177
93.0k
  } else {
10178
17.8k
      if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10179
17.8k
    (!ctxt->disableSAX))
10180
13.0k
    ctxt->sax->endElement(ctxt->userData, name);
10181
17.8k
#endif /* LIBXML_SAX1_ENABLED */
10182
17.8k
  }
10183
110k
  namePop(ctxt);
10184
110k
  spacePop(ctxt);
10185
110k
  if (nsNr != ctxt->nsNr)
10186
459
      nsPop(ctxt, ctxt->nsNr - nsNr);
10187
110k
  if ( ret != NULL && ctxt->record_info ) {
10188
0
     node_info.end_pos = ctxt->input->consumed +
10189
0
            (CUR_PTR - ctxt->input->base);
10190
0
     node_info.end_line = ctxt->input->line;
10191
0
     node_info.node = ret;
10192
0
     xmlParserAddNodeInfo(ctxt, &node_info);
10193
0
  }
10194
110k
  return(1);
10195
110k
    }
10196
340k
    if (RAW == '>') {
10197
268k
        NEXT1;
10198
268k
    } else {
10199
72.2k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10200
72.2k
         "Couldn't find end of Start Tag %s line %d\n",
10201
72.2k
                    name, line, NULL);
10202
10203
  /*
10204
   * end of parsing of this node.
10205
   */
10206
72.2k
  nodePop(ctxt);
10207
72.2k
  namePop(ctxt);
10208
72.2k
  spacePop(ctxt);
10209
72.2k
  if (nsNr != ctxt->nsNr)
10210
3.26k
      nsPop(ctxt, ctxt->nsNr - nsNr);
10211
10212
  /*
10213
   * Capture end position and add node
10214
   */
10215
72.2k
  if ( ret != NULL && ctxt->record_info ) {
10216
0
     node_info.end_pos = ctxt->input->consumed +
10217
0
            (CUR_PTR - ctxt->input->base);
10218
0
     node_info.end_line = ctxt->input->line;
10219
0
     node_info.node = ret;
10220
0
     xmlParserAddNodeInfo(ctxt, &node_info);
10221
0
  }
10222
72.2k
  return(-1);
10223
72.2k
    }
10224
10225
268k
    return(0);
10226
340k
}
10227
10228
/**
10229
 * xmlParseElementEnd:
10230
 * @ctxt:  an XML parser context
10231
 *
10232
 * Parse the end of an XML element. Always consumes '</'.
10233
 */
10234
static void
10235
211k
xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
10236
211k
    xmlParserNodeInfo node_info;
10237
211k
    xmlNodePtr ret = ctxt->node;
10238
10239
211k
    if (ctxt->nameNr <= 0) {
10240
0
        if ((RAW == '<') && (NXT(1) == '/'))
10241
0
            SKIP(2);
10242
0
        return;
10243
0
    }
10244
10245
    /*
10246
     * parse the end of tag: '</' should be here.
10247
     */
10248
211k
    if (ctxt->sax2) {
10249
149k
  xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
10250
149k
  namePop(ctxt);
10251
149k
    }
10252
62.0k
#ifdef LIBXML_SAX1_ENABLED
10253
62.0k
    else
10254
62.0k
  xmlParseEndTag1(ctxt, 0);
10255
211k
#endif /* LIBXML_SAX1_ENABLED */
10256
10257
    /*
10258
     * Capture end position and add node
10259
     */
10260
211k
    if ( ret != NULL && ctxt->record_info ) {
10261
0
       node_info.end_pos = ctxt->input->consumed +
10262
0
                          (CUR_PTR - ctxt->input->base);
10263
0
       node_info.end_line = ctxt->input->line;
10264
0
       node_info.node = ret;
10265
0
       xmlParserAddNodeInfo(ctxt, &node_info);
10266
0
    }
10267
211k
}
10268
10269
/**
10270
 * xmlParseVersionNum:
10271
 * @ctxt:  an XML parser context
10272
 *
10273
 * DEPRECATED: Internal function, don't use.
10274
 *
10275
 * parse the XML version value.
10276
 *
10277
 * [26] VersionNum ::= '1.' [0-9]+
10278
 *
10279
 * In practice allow [0-9].[0-9]+ at that level
10280
 *
10281
 * Returns the string giving the XML version number, or NULL
10282
 */
10283
xmlChar *
10284
59.2k
xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10285
59.2k
    xmlChar *buf = NULL;
10286
59.2k
    int len = 0;
10287
59.2k
    int size = 10;
10288
59.2k
    xmlChar cur;
10289
10290
59.2k
    buf = (xmlChar *) xmlMallocAtomic(size);
10291
59.2k
    if (buf == NULL) {
10292
0
  xmlErrMemory(ctxt, NULL);
10293
0
  return(NULL);
10294
0
    }
10295
59.2k
    cur = CUR;
10296
59.2k
    if (!((cur >= '0') && (cur <= '9'))) {
10297
3.44k
  xmlFree(buf);
10298
3.44k
  return(NULL);
10299
3.44k
    }
10300
55.7k
    buf[len++] = cur;
10301
55.7k
    NEXT;
10302
55.7k
    cur=CUR;
10303
55.7k
    if (cur != '.') {
10304
1.19k
  xmlFree(buf);
10305
1.19k
  return(NULL);
10306
1.19k
    }
10307
54.5k
    buf[len++] = cur;
10308
54.5k
    NEXT;
10309
54.5k
    cur=CUR;
10310
115k
    while ((cur >= '0') && (cur <= '9')) {
10311
61.1k
  if (len + 1 >= size) {
10312
318
      xmlChar *tmp;
10313
10314
318
      size *= 2;
10315
318
      tmp = (xmlChar *) xmlRealloc(buf, size);
10316
318
      if (tmp == NULL) {
10317
0
          xmlFree(buf);
10318
0
    xmlErrMemory(ctxt, NULL);
10319
0
    return(NULL);
10320
0
      }
10321
318
      buf = tmp;
10322
318
  }
10323
61.1k
  buf[len++] = cur;
10324
61.1k
  NEXT;
10325
61.1k
  cur=CUR;
10326
61.1k
    }
10327
54.5k
    buf[len] = 0;
10328
54.5k
    return(buf);
10329
54.5k
}
10330
10331
/**
10332
 * xmlParseVersionInfo:
10333
 * @ctxt:  an XML parser context
10334
 *
10335
 * DEPRECATED: Internal function, don't use.
10336
 *
10337
 * parse the XML version.
10338
 *
10339
 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10340
 *
10341
 * [25] Eq ::= S? '=' S?
10342
 *
10343
 * Returns the version string, e.g. "1.0"
10344
 */
10345
10346
xmlChar *
10347
75.5k
xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10348
75.5k
    xmlChar *version = NULL;
10349
10350
75.5k
    if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10351
62.3k
  SKIP(7);
10352
62.3k
  SKIP_BLANKS;
10353
62.3k
  if (RAW != '=') {
10354
2.31k
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10355
2.31k
      return(NULL);
10356
2.31k
        }
10357
60.0k
  NEXT;
10358
60.0k
  SKIP_BLANKS;
10359
60.0k
  if (RAW == '"') {
10360
54.7k
      NEXT;
10361
54.7k
      version = xmlParseVersionNum(ctxt);
10362
54.7k
      if (RAW != '"') {
10363
5.69k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10364
5.69k
      } else
10365
49.1k
          NEXT;
10366
54.7k
  } else if (RAW == '\''){
10367
4.43k
      NEXT;
10368
4.43k
      version = xmlParseVersionNum(ctxt);
10369
4.43k
      if (RAW != '\'') {
10370
699
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10371
699
      } else
10372
3.74k
          NEXT;
10373
4.43k
  } else {
10374
822
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10375
822
  }
10376
60.0k
    }
10377
73.2k
    return(version);
10378
75.5k
}
10379
10380
/**
10381
 * xmlParseEncName:
10382
 * @ctxt:  an XML parser context
10383
 *
10384
 * DEPRECATED: Internal function, don't use.
10385
 *
10386
 * parse the XML encoding name
10387
 *
10388
 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10389
 *
10390
 * Returns the encoding name value or NULL
10391
 */
10392
xmlChar *
10393
12.7k
xmlParseEncName(xmlParserCtxtPtr ctxt) {
10394
12.7k
    xmlChar *buf = NULL;
10395
12.7k
    int len = 0;
10396
12.7k
    int size = 10;
10397
12.7k
    xmlChar cur;
10398
10399
12.7k
    cur = CUR;
10400
12.7k
    if (((cur >= 'a') && (cur <= 'z')) ||
10401
12.7k
        ((cur >= 'A') && (cur <= 'Z'))) {
10402
12.6k
  buf = (xmlChar *) xmlMallocAtomic(size);
10403
12.6k
  if (buf == NULL) {
10404
0
      xmlErrMemory(ctxt, NULL);
10405
0
      return(NULL);
10406
0
  }
10407
10408
12.6k
  buf[len++] = cur;
10409
12.6k
  NEXT;
10410
12.6k
  cur = CUR;
10411
109k
  while (((cur >= 'a') && (cur <= 'z')) ||
10412
109k
         ((cur >= 'A') && (cur <= 'Z')) ||
10413
109k
         ((cur >= '0') && (cur <= '9')) ||
10414
109k
         (cur == '.') || (cur == '_') ||
10415
109k
         (cur == '-')) {
10416
96.8k
      if (len + 1 >= size) {
10417
4.83k
          xmlChar *tmp;
10418
10419
4.83k
    size *= 2;
10420
4.83k
    tmp = (xmlChar *) xmlRealloc(buf, size);
10421
4.83k
    if (tmp == NULL) {
10422
0
        xmlErrMemory(ctxt, NULL);
10423
0
        xmlFree(buf);
10424
0
        return(NULL);
10425
0
    }
10426
4.83k
    buf = tmp;
10427
4.83k
      }
10428
96.8k
      buf[len++] = cur;
10429
96.8k
      NEXT;
10430
96.8k
      cur = CUR;
10431
96.8k
      if (cur == 0) {
10432
91
          SHRINK;
10433
91
    GROW;
10434
91
    cur = CUR;
10435
91
      }
10436
96.8k
        }
10437
12.6k
  buf[len] = 0;
10438
12.6k
    } else {
10439
125
  xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10440
125
    }
10441
12.7k
    return(buf);
10442
12.7k
}
10443
10444
/**
10445
 * xmlParseEncodingDecl:
10446
 * @ctxt:  an XML parser context
10447
 *
10448
 * DEPRECATED: Internal function, don't use.
10449
 *
10450
 * parse the XML encoding declaration
10451
 *
10452
 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'")
10453
 *
10454
 * this setups the conversion filters.
10455
 *
10456
 * Returns the encoding value or NULL
10457
 */
10458
10459
const xmlChar *
10460
53.6k
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10461
53.6k
    xmlChar *encoding = NULL;
10462
10463
53.6k
    SKIP_BLANKS;
10464
53.6k
    if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10465
12.9k
  SKIP(8);
10466
12.9k
  SKIP_BLANKS;
10467
12.9k
  if (RAW != '=') {
10468
58
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10469
58
      return(NULL);
10470
58
        }
10471
12.9k
  NEXT;
10472
12.9k
  SKIP_BLANKS;
10473
12.9k
  if (RAW == '"') {
10474
10.2k
      NEXT;
10475
10.2k
      encoding = xmlParseEncName(ctxt);
10476
10.2k
      if (RAW != '"') {
10477
620
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10478
620
    xmlFree((xmlChar *) encoding);
10479
620
    return(NULL);
10480
620
      } else
10481
9.65k
          NEXT;
10482
10.2k
  } else if (RAW == '\''){
10483
2.46k
      NEXT;
10484
2.46k
      encoding = xmlParseEncName(ctxt);
10485
2.46k
      if (RAW != '\'') {
10486
279
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10487
279
    xmlFree((xmlChar *) encoding);
10488
279
    return(NULL);
10489
279
      } else
10490
2.18k
          NEXT;
10491
2.46k
  } else {
10492
177
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10493
177
  }
10494
10495
        /*
10496
         * Non standard parsing, allowing the user to ignore encoding
10497
         */
10498
12.0k
        if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10499
3.70k
      xmlFree((xmlChar *) encoding);
10500
3.70k
            return(NULL);
10501
3.70k
  }
10502
10503
  /*
10504
   * UTF-16 encoding switch has already taken place at this stage,
10505
   * more over the little-endian/big-endian selection is already done
10506
   */
10507
8.31k
        if ((encoding != NULL) &&
10508
8.31k
      ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10509
8.20k
       (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10510
      /*
10511
       * If no encoding was passed to the parser, that we are
10512
       * using UTF-16 and no decoder is present i.e. the
10513
       * document is apparently UTF-8 compatible, then raise an
10514
       * encoding mismatch fatal error
10515
       */
10516
21
      if ((ctxt->encoding == NULL) &&
10517
21
          (ctxt->input->buf != NULL) &&
10518
21
          (ctxt->input->buf->encoder == NULL)) {
10519
21
    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10520
21
      "Document labelled UTF-16 but has UTF-8 content\n");
10521
21
      }
10522
21
      if (ctxt->encoding != NULL)
10523
0
    xmlFree((xmlChar *) ctxt->encoding);
10524
21
      ctxt->encoding = encoding;
10525
21
  }
10526
  /*
10527
   * UTF-8 encoding is handled natively
10528
   */
10529
8.29k
        else if ((encoding != NULL) &&
10530
8.29k
      ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10531
8.18k
       (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10532
4.57k
      if (ctxt->encoding != NULL)
10533
0
    xmlFree((xmlChar *) ctxt->encoding);
10534
4.57k
      ctxt->encoding = encoding;
10535
4.57k
  }
10536
3.71k
  else if (encoding != NULL) {
10537
3.61k
      xmlCharEncodingHandlerPtr handler;
10538
10539
3.61k
      if (ctxt->input->encoding != NULL)
10540
0
    xmlFree((xmlChar *) ctxt->input->encoding);
10541
3.61k
      ctxt->input->encoding = encoding;
10542
10543
3.61k
            handler = xmlFindCharEncodingHandler((const char *) encoding);
10544
3.61k
      if (handler != NULL) {
10545
3.38k
    if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10546
        /* failed to convert */
10547
18
        ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10548
18
        return(NULL);
10549
18
    }
10550
3.38k
      } else {
10551
228
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10552
228
      "Unsupported encoding %s\n", encoding);
10553
228
    return(NULL);
10554
228
      }
10555
3.61k
  }
10556
8.31k
    }
10557
48.7k
    return(encoding);
10558
53.6k
}
10559
10560
/**
10561
 * xmlParseSDDecl:
10562
 * @ctxt:  an XML parser context
10563
 *
10564
 * DEPRECATED: Internal function, don't use.
10565
 *
10566
 * parse the XML standalone declaration
10567
 *
10568
 * [32] SDDecl ::= S 'standalone' Eq
10569
 *                 (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10570
 *
10571
 * [ VC: Standalone Document Declaration ]
10572
 * TODO The standalone document declaration must have the value "no"
10573
 * if any external markup declarations contain declarations of:
10574
 *  - attributes with default values, if elements to which these
10575
 *    attributes apply appear in the document without specifications
10576
 *    of values for these attributes, or
10577
 *  - entities (other than amp, lt, gt, apos, quot), if references
10578
 *    to those entities appear in the document, or
10579
 *  - attributes with values subject to normalization, where the
10580
 *    attribute appears in the document with a value which will change
10581
 *    as a result of normalization, or
10582
 *  - element types with element content, if white space occurs directly
10583
 *    within any instance of those types.
10584
 *
10585
 * Returns:
10586
 *   1 if standalone="yes"
10587
 *   0 if standalone="no"
10588
 *  -2 if standalone attribute is missing or invalid
10589
 *    (A standalone value of -2 means that the XML declaration was found,
10590
 *     but no value was specified for the standalone attribute).
10591
 */
10592
10593
int
10594
49.7k
xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10595
49.7k
    int standalone = -2;
10596
10597
49.7k
    SKIP_BLANKS;
10598
49.7k
    if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10599
12.2k
  SKIP(10);
10600
12.2k
        SKIP_BLANKS;
10601
12.2k
  if (RAW != '=') {
10602
378
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10603
378
      return(standalone);
10604
378
        }
10605
11.8k
  NEXT;
10606
11.8k
  SKIP_BLANKS;
10607
11.8k
        if (RAW == '\''){
10608
1.06k
      NEXT;
10609
1.06k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10610
1.00k
          standalone = 0;
10611
1.00k
                SKIP(2);
10612
1.00k
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10613
63
                 (NXT(2) == 's')) {
10614
24
          standalone = 1;
10615
24
    SKIP(3);
10616
39
            } else {
10617
39
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10618
39
      }
10619
1.06k
      if (RAW != '\'') {
10620
60
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10621
60
      } else
10622
1.00k
          NEXT;
10623
10.8k
  } else if (RAW == '"'){
10624
10.7k
      NEXT;
10625
10.7k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10626
4.37k
          standalone = 0;
10627
4.37k
    SKIP(2);
10628
6.38k
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10629
6.38k
                 (NXT(2) == 's')) {
10630
5.97k
          standalone = 1;
10631
5.97k
                SKIP(3);
10632
5.97k
            } else {
10633
408
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10634
408
      }
10635
10.7k
      if (RAW != '"') {
10636
546
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10637
546
      } else
10638
10.2k
          NEXT;
10639
10.7k
  } else {
10640
45
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10641
45
        }
10642
11.8k
    }
10643
49.3k
    return(standalone);
10644
49.7k
}
10645
10646
/**
10647
 * xmlParseXMLDecl:
10648
 * @ctxt:  an XML parser context
10649
 *
10650
 * DEPRECATED: Internal function, don't use.
10651
 *
10652
 * parse an XML declaration header
10653
 *
10654
 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10655
 */
10656
10657
void
10658
73.9k
xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10659
73.9k
    xmlChar *version;
10660
10661
    /*
10662
     * This value for standalone indicates that the document has an
10663
     * XML declaration but it does not have a standalone attribute.
10664
     * It will be overwritten later if a standalone attribute is found.
10665
     */
10666
73.9k
    ctxt->input->standalone = -2;
10667
10668
    /*
10669
     * We know that '<?xml' is here.
10670
     */
10671
73.9k
    SKIP(5);
10672
10673
73.9k
    if (!IS_BLANK_CH(RAW)) {
10674
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10675
0
                 "Blank needed after '<?xml'\n");
10676
0
    }
10677
73.9k
    SKIP_BLANKS;
10678
10679
    /*
10680
     * We must have the VersionInfo here.
10681
     */
10682
73.9k
    version = xmlParseVersionInfo(ctxt);
10683
73.9k
    if (version == NULL) {
10684
20.7k
  xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10685
53.1k
    } else {
10686
53.1k
  if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10687
      /*
10688
       * Changed here for XML-1.0 5th edition
10689
       */
10690
5.66k
      if (ctxt->options & XML_PARSE_OLD10) {
10691
1.80k
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10692
1.80k
                "Unsupported version '%s'\n",
10693
1.80k
                version);
10694
3.86k
      } else {
10695
3.86k
          if ((version[0] == '1') && ((version[1] == '.'))) {
10696
1.27k
        xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10697
1.27k
                      "Unsupported version '%s'\n",
10698
1.27k
          version, NULL);
10699
2.58k
    } else {
10700
2.58k
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10701
2.58k
              "Unsupported version '%s'\n",
10702
2.58k
              version);
10703
2.58k
    }
10704
3.86k
      }
10705
5.66k
  }
10706
53.1k
  if (ctxt->version != NULL)
10707
0
      xmlFree((void *) ctxt->version);
10708
53.1k
  ctxt->version = version;
10709
53.1k
    }
10710
10711
    /*
10712
     * We may have the encoding declaration
10713
     */
10714
73.9k
    if (!IS_BLANK_CH(RAW)) {
10715
45.9k
        if ((RAW == '?') && (NXT(1) == '>')) {
10716
21.9k
      SKIP(2);
10717
21.9k
      return;
10718
21.9k
  }
10719
24.0k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10720
24.0k
    }
10721
51.9k
    xmlParseEncodingDecl(ctxt);
10722
51.9k
    if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10723
51.9k
         (ctxt->instate == XML_PARSER_EOF)) {
10724
  /*
10725
   * The XML REC instructs us to stop parsing right here
10726
   */
10727
222
        return;
10728
222
    }
10729
10730
    /*
10731
     * We may have the standalone status.
10732
     */
10733
51.7k
    if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10734
2.57k
        if ((RAW == '?') && (NXT(1) == '>')) {
10735
2.06k
      SKIP(2);
10736
2.06k
      return;
10737
2.06k
  }
10738
513
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10739
513
    }
10740
10741
    /*
10742
     * We can grow the input buffer freely at that point
10743
     */
10744
49.7k
    GROW;
10745
10746
49.7k
    SKIP_BLANKS;
10747
49.7k
    ctxt->input->standalone = xmlParseSDDecl(ctxt);
10748
10749
49.7k
    SKIP_BLANKS;
10750
49.7k
    if ((RAW == '?') && (NXT(1) == '>')) {
10751
19.9k
        SKIP(2);
10752
29.7k
    } else if (RAW == '>') {
10753
        /* Deprecated old WD ... */
10754
114
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10755
114
  NEXT;
10756
29.6k
    } else {
10757
29.6k
        int c;
10758
10759
29.6k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10760
1.12M
        while ((c = CUR) != 0) {
10761
1.12M
            NEXT;
10762
1.12M
            if (c == '>')
10763
28.0k
                break;
10764
1.12M
        }
10765
29.6k
    }
10766
49.7k
}
10767
10768
/**
10769
 * xmlParseMisc:
10770
 * @ctxt:  an XML parser context
10771
 *
10772
 * DEPRECATED: Internal function, don't use.
10773
 *
10774
 * parse an XML Misc* optional field.
10775
 *
10776
 * [27] Misc ::= Comment | PI |  S
10777
 */
10778
10779
void
10780
94.0k
xmlParseMisc(xmlParserCtxtPtr ctxt) {
10781
101k
    while (ctxt->instate != XML_PARSER_EOF) {
10782
101k
        SKIP_BLANKS;
10783
101k
        GROW;
10784
101k
        if ((RAW == '<') && (NXT(1) == '?')) {
10785
5.73k
      xmlParsePI(ctxt);
10786
96.1k
        } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10787
2.08k
      xmlParseComment(ctxt);
10788
94.0k
        } else {
10789
94.0k
            break;
10790
94.0k
        }
10791
101k
    }
10792
94.0k
}
10793
10794
/**
10795
 * xmlParseDocument:
10796
 * @ctxt:  an XML parser context
10797
 *
10798
 * parse an XML document (and build a tree if using the standard SAX
10799
 * interface).
10800
 *
10801
 * [1] document ::= prolog element Misc*
10802
 *
10803
 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10804
 *
10805
 * Returns 0, -1 in case of error. the parser context is augmented
10806
 *                as a result of the parsing.
10807
 */
10808
10809
int
10810
50.7k
xmlParseDocument(xmlParserCtxtPtr ctxt) {
10811
50.7k
    xmlChar start[4];
10812
50.7k
    xmlCharEncoding enc;
10813
10814
50.7k
    xmlInitParser();
10815
10816
50.7k
    if ((ctxt == NULL) || (ctxt->input == NULL))
10817
0
        return(-1);
10818
10819
50.7k
    GROW;
10820
10821
    /*
10822
     * SAX: detecting the level.
10823
     */
10824
50.7k
    xmlDetectSAX2(ctxt);
10825
10826
    /*
10827
     * SAX: beginning of the document processing.
10828
     */
10829
50.7k
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10830
50.7k
        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10831
50.7k
    if (ctxt->instate == XML_PARSER_EOF)
10832
0
  return(-1);
10833
10834
50.7k
    if ((ctxt->encoding == NULL) &&
10835
50.7k
        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10836
  /*
10837
   * Get the 4 first bytes and decode the charset
10838
   * if enc != XML_CHAR_ENCODING_NONE
10839
   * plug some encoding conversion routines.
10840
   */
10841
49.0k
  start[0] = RAW;
10842
49.0k
  start[1] = NXT(1);
10843
49.0k
  start[2] = NXT(2);
10844
49.0k
  start[3] = NXT(3);
10845
49.0k
  enc = xmlDetectCharEncoding(&start[0], 4);
10846
49.0k
  if (enc != XML_CHAR_ENCODING_NONE) {
10847
28.5k
      xmlSwitchEncoding(ctxt, enc);
10848
28.5k
  }
10849
49.0k
    }
10850
10851
10852
50.7k
    if (CUR == 0) {
10853
401
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10854
401
  return(-1);
10855
401
    }
10856
10857
    /*
10858
     * Check for the XMLDecl in the Prolog.
10859
     * do not GROW here to avoid the detected encoder to decode more
10860
     * than just the first line, unless the amount of data is really
10861
     * too small to hold "<?xml version="1.0" encoding="foo"
10862
     */
10863
50.3k
    if ((ctxt->input->end - ctxt->input->cur) < 35) {
10864
4.87k
       GROW;
10865
4.87k
    }
10866
50.3k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10867
10868
  /*
10869
   * Note that we will switch encoding on the fly.
10870
   */
10871
24.6k
  xmlParseXMLDecl(ctxt);
10872
24.6k
  if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10873
24.6k
      (ctxt->instate == XML_PARSER_EOF)) {
10874
      /*
10875
       * The XML REC instructs us to stop parsing right here
10876
       */
10877
74
      return(-1);
10878
74
  }
10879
24.5k
  ctxt->standalone = ctxt->input->standalone;
10880
24.5k
  SKIP_BLANKS;
10881
25.6k
    } else {
10882
25.6k
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10883
25.6k
    }
10884
50.2k
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10885
45.1k
        ctxt->sax->startDocument(ctxt->userData);
10886
50.2k
    if (ctxt->instate == XML_PARSER_EOF)
10887
0
  return(-1);
10888
50.2k
    if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10889
50.2k
        (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10890
0
  ctxt->myDoc->compression = ctxt->input->buf->compressed;
10891
0
    }
10892
10893
    /*
10894
     * The Misc part of the Prolog
10895
     */
10896
50.2k
    xmlParseMisc(ctxt);
10897
10898
    /*
10899
     * Then possibly doc type declaration(s) and more Misc
10900
     * (doctypedecl Misc*)?
10901
     */
10902
50.2k
    GROW;
10903
50.2k
    if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10904
10905
19.1k
  ctxt->inSubset = 1;
10906
19.1k
  xmlParseDocTypeDecl(ctxt);
10907
19.1k
  if (RAW == '[') {
10908
12.8k
      ctxt->instate = XML_PARSER_DTD;
10909
12.8k
      xmlParseInternalSubset(ctxt);
10910
12.8k
      if (ctxt->instate == XML_PARSER_EOF)
10911
7.99k
    return(-1);
10912
12.8k
  }
10913
10914
  /*
10915
   * Create and update the external subset.
10916
   */
10917
11.1k
  ctxt->inSubset = 2;
10918
11.1k
  if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10919
11.1k
      (!ctxt->disableSAX))
10920
9.06k
      ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10921
9.06k
                                ctxt->extSubSystem, ctxt->extSubURI);
10922
11.1k
  if (ctxt->instate == XML_PARSER_EOF)
10923
621
      return(-1);
10924
10.5k
  ctxt->inSubset = 0;
10925
10926
10.5k
        xmlCleanSpecialAttr(ctxt);
10927
10928
10.5k
  ctxt->instate = XML_PARSER_PROLOG;
10929
10.5k
  xmlParseMisc(ctxt);
10930
10.5k
    }
10931
10932
    /*
10933
     * Time to start parsing the tree itself
10934
     */
10935
41.6k
    GROW;
10936
41.6k
    if (RAW != '<') {
10937
8.25k
  xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10938
8.25k
           "Start tag expected, '<' not found\n");
10939
33.3k
    } else {
10940
33.3k
  ctxt->instate = XML_PARSER_CONTENT;
10941
33.3k
  xmlParseElement(ctxt);
10942
33.3k
  ctxt->instate = XML_PARSER_EPILOG;
10943
10944
10945
  /*
10946
   * The Misc part at the end
10947
   */
10948
33.3k
  xmlParseMisc(ctxt);
10949
10950
33.3k
  if (RAW != 0) {
10951
14.9k
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10952
14.9k
  }
10953
33.3k
  ctxt->instate = XML_PARSER_EOF;
10954
33.3k
    }
10955
10956
    /*
10957
     * SAX: end of the document processing.
10958
     */
10959
41.6k
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10960
41.6k
        ctxt->sax->endDocument(ctxt->userData);
10961
10962
    /*
10963
     * Remove locally kept entity definitions if the tree was not built
10964
     */
10965
41.6k
    if ((ctxt->myDoc != NULL) &&
10966
41.6k
  (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10967
91
  xmlFreeDoc(ctxt->myDoc);
10968
91
  ctxt->myDoc = NULL;
10969
91
    }
10970
10971
41.6k
    if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10972
1.91k
        ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10973
1.91k
  if (ctxt->valid)
10974
1.70k
      ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10975
1.91k
  if (ctxt->nsWellFormed)
10976
1.83k
      ctxt->myDoc->properties |= XML_DOC_NSVALID;
10977
1.91k
  if (ctxt->options & XML_PARSE_OLD10)
10978
125
      ctxt->myDoc->properties |= XML_DOC_OLD10;
10979
1.91k
    }
10980
41.6k
    if (! ctxt->wellFormed) {
10981
39.7k
  ctxt->valid = 0;
10982
39.7k
  return(-1);
10983
39.7k
    }
10984
1.91k
    return(0);
10985
41.6k
}
10986
10987
/**
10988
 * xmlParseExtParsedEnt:
10989
 * @ctxt:  an XML parser context
10990
 *
10991
 * parse a general parsed entity
10992
 * An external general parsed entity is well-formed if it matches the
10993
 * production labeled extParsedEnt.
10994
 *
10995
 * [78] extParsedEnt ::= TextDecl? content
10996
 *
10997
 * Returns 0, -1 in case of error. the parser context is augmented
10998
 *                as a result of the parsing.
10999
 */
11000
11001
int
11002
0
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
11003
0
    xmlChar start[4];
11004
0
    xmlCharEncoding enc;
11005
11006
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
11007
0
        return(-1);
11008
11009
0
    xmlDetectSAX2(ctxt);
11010
11011
0
    GROW;
11012
11013
    /*
11014
     * SAX: beginning of the document processing.
11015
     */
11016
0
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11017
0
        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
11018
11019
    /*
11020
     * Get the 4 first bytes and decode the charset
11021
     * if enc != XML_CHAR_ENCODING_NONE
11022
     * plug some encoding conversion routines.
11023
     */
11024
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11025
0
  start[0] = RAW;
11026
0
  start[1] = NXT(1);
11027
0
  start[2] = NXT(2);
11028
0
  start[3] = NXT(3);
11029
0
  enc = xmlDetectCharEncoding(start, 4);
11030
0
  if (enc != XML_CHAR_ENCODING_NONE) {
11031
0
      xmlSwitchEncoding(ctxt, enc);
11032
0
  }
11033
0
    }
11034
11035
11036
0
    if (CUR == 0) {
11037
0
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11038
0
    }
11039
11040
    /*
11041
     * Check for the XMLDecl in the Prolog.
11042
     */
11043
0
    GROW;
11044
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
11045
11046
  /*
11047
   * Note that we will switch encoding on the fly.
11048
   */
11049
0
  xmlParseXMLDecl(ctxt);
11050
0
  if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11051
      /*
11052
       * The XML REC instructs us to stop parsing right here
11053
       */
11054
0
      return(-1);
11055
0
  }
11056
0
  SKIP_BLANKS;
11057
0
    } else {
11058
0
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11059
0
    }
11060
0
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
11061
0
        ctxt->sax->startDocument(ctxt->userData);
11062
0
    if (ctxt->instate == XML_PARSER_EOF)
11063
0
  return(-1);
11064
11065
    /*
11066
     * Doing validity checking on chunk doesn't make sense
11067
     */
11068
0
    ctxt->instate = XML_PARSER_CONTENT;
11069
0
    ctxt->validate = 0;
11070
0
    ctxt->loadsubset = 0;
11071
0
    ctxt->depth = 0;
11072
11073
0
    xmlParseContent(ctxt);
11074
0
    if (ctxt->instate == XML_PARSER_EOF)
11075
0
  return(-1);
11076
11077
0
    if ((RAW == '<') && (NXT(1) == '/')) {
11078
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11079
0
    } else if (RAW != 0) {
11080
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11081
0
    }
11082
11083
    /*
11084
     * SAX: end of the document processing.
11085
     */
11086
0
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11087
0
        ctxt->sax->endDocument(ctxt->userData);
11088
11089
0
    if (! ctxt->wellFormed) return(-1);
11090
0
    return(0);
11091
0
}
11092
11093
#ifdef LIBXML_PUSH_ENABLED
11094
/************************************************************************
11095
 *                  *
11096
 *    Progressive parsing interfaces        *
11097
 *                  *
11098
 ************************************************************************/
11099
11100
/**
11101
 * xmlParseLookupChar:
11102
 * @ctxt:  an XML parser context
11103
 * @c:  character
11104
 *
11105
 * Check whether the input buffer contains a character.
11106
 */
11107
static int
11108
548k
xmlParseLookupChar(xmlParserCtxtPtr ctxt, int c) {
11109
548k
    const xmlChar *cur;
11110
11111
548k
    if (ctxt->checkIndex == 0) {
11112
532k
        cur = ctxt->input->cur + 1;
11113
532k
    } else {
11114
15.9k
        cur = ctxt->input->cur + ctxt->checkIndex;
11115
15.9k
    }
11116
11117
548k
    if (memchr(cur, c, ctxt->input->end - cur) == NULL) {
11118
18.4k
        ctxt->checkIndex = ctxt->input->end - ctxt->input->cur;
11119
18.4k
        return(0);
11120
529k
    } else {
11121
529k
        ctxt->checkIndex = 0;
11122
529k
        return(1);
11123
529k
    }
11124
548k
}
11125
11126
/**
11127
 * xmlParseLookupString:
11128
 * @ctxt:  an XML parser context
11129
 * @startDelta: delta to apply at the start
11130
 * @str:  string
11131
 * @strLen:  length of string
11132
 *
11133
 * Check whether the input buffer contains a string.
11134
 */
11135
static const xmlChar *
11136
xmlParseLookupString(xmlParserCtxtPtr ctxt, size_t startDelta,
11137
198k
                     const char *str, size_t strLen) {
11138
198k
    const xmlChar *cur, *term;
11139
11140
198k
    if (ctxt->checkIndex == 0) {
11141
106k
        cur = ctxt->input->cur + startDelta;
11142
106k
    } else {
11143
92.3k
        cur = ctxt->input->cur + ctxt->checkIndex;
11144
92.3k
    }
11145
11146
198k
    term = BAD_CAST strstr((const char *) cur, str);
11147
198k
    if (term == NULL) {
11148
113k
        const xmlChar *end = ctxt->input->end;
11149
11150
        /* Rescan (strLen - 1) characters. */
11151
113k
        if ((size_t) (end - cur) < strLen)
11152
6.04k
            end = cur;
11153
107k
        else
11154
107k
            end -= strLen - 1;
11155
113k
        ctxt->checkIndex = end - ctxt->input->cur;
11156
113k
    } else {
11157
85.5k
        ctxt->checkIndex = 0;
11158
85.5k
    }
11159
11160
198k
    return(term);
11161
198k
}
11162
11163
/**
11164
 * xmlParseLookupCharData:
11165
 * @ctxt:  an XML parser context
11166
 *
11167
 * Check whether the input buffer contains terminated char data.
11168
 */
11169
static int
11170
940k
xmlParseLookupCharData(xmlParserCtxtPtr ctxt) {
11171
940k
    const xmlChar *cur = ctxt->input->cur + ctxt->checkIndex;
11172
940k
    const xmlChar *end = ctxt->input->end;
11173
11174
25.0M
    while (cur < end) {
11175
25.0M
        if ((*cur == '<') || (*cur == '&')) {
11176
860k
            ctxt->checkIndex = 0;
11177
860k
            return(1);
11178
860k
        }
11179
24.1M
        cur++;
11180
24.1M
    }
11181
11182
79.4k
    ctxt->checkIndex = cur - ctxt->input->cur;
11183
79.4k
    return(0);
11184
940k
}
11185
11186
/**
11187
 * xmlParseLookupGt:
11188
 * @ctxt:  an XML parser context
11189
 *
11190
 * Check whether there's enough data in the input buffer to finish parsing
11191
 * a start tag. This has to take quotes into account.
11192
 */
11193
static int
11194
783k
xmlParseLookupGt(xmlParserCtxtPtr ctxt) {
11195
783k
    const xmlChar *cur;
11196
783k
    const xmlChar *end = ctxt->input->end;
11197
783k
    int state = ctxt->endCheckState;
11198
11199
783k
    if (ctxt->checkIndex == 0)
11200
637k
        cur = ctxt->input->cur + 1;
11201
145k
    else
11202
145k
        cur = ctxt->input->cur + ctxt->checkIndex;
11203
11204
37.0M
    while (cur < end) {
11205
36.9M
        if (state) {
11206
17.5M
            if (*cur == state)
11207
650k
                state = 0;
11208
19.3M
        } else if (*cur == '\'' || *cur == '"') {
11209
664k
            state = *cur;
11210
18.6M
        } else if (*cur == '>') {
11211
614k
            ctxt->checkIndex = 0;
11212
614k
            ctxt->endCheckState = 0;
11213
614k
            return(1);
11214
614k
        }
11215
36.3M
        cur++;
11216
36.3M
    }
11217
11218
168k
    ctxt->checkIndex = cur - ctxt->input->cur;
11219
168k
    ctxt->endCheckState = state;
11220
168k
    return(0);
11221
783k
}
11222
11223
/**
11224
 * xmlParseLookupInternalSubset:
11225
 * @ctxt:  an XML parser context
11226
 *
11227
 * Check whether there's enough data in the input buffer to finish parsing
11228
 * the internal subset.
11229
 */
11230
static int
11231
55.8k
xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt) {
11232
    /*
11233
     * Sorry, but progressive parsing of the internal subset is not
11234
     * supported. We first check that the full content of the internal
11235
     * subset is available and parsing is launched only at that point.
11236
     * Internal subset ends with "']' S? '>'" in an unescaped section and
11237
     * not in a ']]>' sequence which are conditional sections.
11238
     */
11239
55.8k
    const xmlChar *cur, *start;
11240
55.8k
    const xmlChar *end = ctxt->input->end;
11241
55.8k
    int state = ctxt->endCheckState;
11242
11243
55.8k
    if (ctxt->checkIndex == 0) {
11244
19.7k
        cur = ctxt->input->cur + 1;
11245
36.0k
    } else {
11246
36.0k
        cur = ctxt->input->cur + ctxt->checkIndex;
11247
36.0k
    }
11248
55.8k
    start = cur;
11249
11250
8.53M
    while (cur < end) {
11251
8.49M
        if (state == '-') {
11252
1.17M
            if ((*cur == '-') &&
11253
1.17M
                (cur[1] == '-') &&
11254
1.17M
                (cur[2] == '>')) {
11255
15.3k
                state = 0;
11256
15.3k
                cur += 3;
11257
15.3k
                start = cur;
11258
15.3k
                continue;
11259
15.3k
            }
11260
1.17M
        }
11261
7.31M
        else if (state == ']') {
11262
27.1k
            if (*cur == '>') {
11263
11.0k
                ctxt->checkIndex = 0;
11264
11.0k
                ctxt->endCheckState = 0;
11265
11.0k
                return(1);
11266
11.0k
            }
11267
16.0k
            if (IS_BLANK_CH(*cur)) {
11268
2.22k
                state = ' ';
11269
13.8k
            } else if (*cur != ']') {
11270
2.01k
                state = 0;
11271
2.01k
                start = cur;
11272
2.01k
                continue;
11273
2.01k
            }
11274
16.0k
        }
11275
7.28M
        else if (state == ' ') {
11276
5.44k
            if (*cur == '>') {
11277
54
                ctxt->checkIndex = 0;
11278
54
                ctxt->endCheckState = 0;
11279
54
                return(1);
11280
54
            }
11281
5.38k
            if (!IS_BLANK_CH(*cur)) {
11282
2.12k
                state = 0;
11283
2.12k
                start = cur;
11284
2.12k
                continue;
11285
2.12k
            }
11286
5.38k
        }
11287
7.28M
        else if (state != 0) {
11288
3.17M
            if (*cur == state) {
11289
68.2k
                state = 0;
11290
68.2k
                start = cur + 1;
11291
68.2k
            }
11292
3.17M
        }
11293
4.10M
        else if (*cur == '<') {
11294
133k
            if ((cur[1] == '!') &&
11295
133k
                (cur[2] == '-') &&
11296
133k
                (cur[3] == '-')) {
11297
15.7k
                state = '-';
11298
15.7k
                cur += 4;
11299
                /* Don't treat <!--> as comment */
11300
15.7k
                start = cur;
11301
15.7k
                continue;
11302
15.7k
            }
11303
133k
        }
11304
3.97M
        else if ((*cur == '"') || (*cur == '\'') || (*cur == ']')) {
11305
86.4k
            state = *cur;
11306
86.4k
        }
11307
11308
8.44M
        cur++;
11309
8.44M
    }
11310
11311
    /*
11312
     * Rescan the three last characters to detect "<!--" and "-->"
11313
     * split across chunks.
11314
     */
11315
44.7k
    if ((state == 0) || (state == '-')) {
11316
26.3k
        if (cur - start < 3)
11317
1.99k
            cur = start;
11318
24.3k
        else
11319
24.3k
            cur -= 3;
11320
26.3k
    }
11321
44.7k
    ctxt->checkIndex = cur - ctxt->input->cur;
11322
44.7k
    ctxt->endCheckState = state;
11323
44.7k
    return(0);
11324
55.8k
}
11325
11326
/**
11327
 * xmlCheckCdataPush:
11328
 * @cur: pointer to the block of characters
11329
 * @len: length of the block in bytes
11330
 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
11331
 *
11332
 * Check that the block of characters is okay as SCdata content [20]
11333
 *
11334
 * Returns the number of bytes to pass if okay, a negative index where an
11335
 *         UTF-8 error occurred otherwise
11336
 */
11337
static int
11338
20.7k
xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
11339
20.7k
    int ix;
11340
20.7k
    unsigned char c;
11341
20.7k
    int codepoint;
11342
11343
20.7k
    if ((utf == NULL) || (len <= 0))
11344
112
        return(0);
11345
11346
766k
    for (ix = 0; ix < len;) {      /* string is 0-terminated */
11347
758k
        c = utf[ix];
11348
758k
        if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11349
573k
      if (c >= 0x20)
11350
539k
    ix++;
11351
34.2k
      else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11352
32.8k
          ix++;
11353
1.38k
      else
11354
1.38k
          return(-ix);
11355
573k
  } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11356
20.3k
      if (ix + 2 > len) return(complete ? -ix : ix);
11357
20.1k
      if ((utf[ix+1] & 0xc0 ) != 0x80)
11358
3.66k
          return(-ix);
11359
16.4k
      codepoint = (utf[ix] & 0x1f) << 6;
11360
16.4k
      codepoint |= utf[ix+1] & 0x3f;
11361
16.4k
      if (!xmlIsCharQ(codepoint))
11362
1.04k
          return(-ix);
11363
15.4k
      ix += 2;
11364
163k
  } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11365
148k
      if (ix + 3 > len) return(complete ? -ix : ix);
11366
147k
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11367
147k
          ((utf[ix+2] & 0xc0) != 0x80))
11368
1.82k
        return(-ix);
11369
146k
      codepoint = (utf[ix] & 0xf) << 12;
11370
146k
      codepoint |= (utf[ix+1] & 0x3f) << 6;
11371
146k
      codepoint |= utf[ix+2] & 0x3f;
11372
146k
      if (!xmlIsCharQ(codepoint))
11373
43
          return(-ix);
11374
146k
      ix += 3;
11375
146k
  } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11376
13.0k
      if (ix + 4 > len) return(complete ? -ix : ix);
11377
12.8k
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11378
12.8k
          ((utf[ix+2] & 0xc0) != 0x80) ||
11379
12.8k
    ((utf[ix+3] & 0xc0) != 0x80))
11380
763
        return(-ix);
11381
12.1k
      codepoint = (utf[ix] & 0x7) << 18;
11382
12.1k
      codepoint |= (utf[ix+1] & 0x3f) << 12;
11383
12.1k
      codepoint |= (utf[ix+2] & 0x3f) << 6;
11384
12.1k
      codepoint |= utf[ix+3] & 0x3f;
11385
12.1k
      if (!xmlIsCharQ(codepoint))
11386
90
          return(-ix);
11387
12.0k
      ix += 4;
11388
12.0k
  } else       /* unknown encoding */
11389
2.16k
      return(-ix);
11390
758k
      }
11391
8.46k
      return(ix);
11392
20.5k
}
11393
11394
/**
11395
 * xmlParseTryOrFinish:
11396
 * @ctxt:  an XML parser context
11397
 * @terminate:  last chunk indicator
11398
 *
11399
 * Try to progress on parsing
11400
 *
11401
 * Returns zero if no parsing was possible
11402
 */
11403
static int
11404
553k
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11405
553k
    int ret = 0;
11406
553k
    int avail, tlen;
11407
553k
    xmlChar cur, next;
11408
11409
553k
    if (ctxt->input == NULL)
11410
0
        return(0);
11411
11412
#ifdef DEBUG_PUSH
11413
    switch (ctxt->instate) {
11414
  case XML_PARSER_EOF:
11415
      xmlGenericError(xmlGenericErrorContext,
11416
        "PP: try EOF\n"); break;
11417
  case XML_PARSER_START:
11418
      xmlGenericError(xmlGenericErrorContext,
11419
        "PP: try START\n"); break;
11420
  case XML_PARSER_MISC:
11421
      xmlGenericError(xmlGenericErrorContext,
11422
        "PP: try MISC\n");break;
11423
  case XML_PARSER_COMMENT:
11424
      xmlGenericError(xmlGenericErrorContext,
11425
        "PP: try COMMENT\n");break;
11426
  case XML_PARSER_PROLOG:
11427
      xmlGenericError(xmlGenericErrorContext,
11428
        "PP: try PROLOG\n");break;
11429
  case XML_PARSER_START_TAG:
11430
      xmlGenericError(xmlGenericErrorContext,
11431
        "PP: try START_TAG\n");break;
11432
  case XML_PARSER_CONTENT:
11433
      xmlGenericError(xmlGenericErrorContext,
11434
        "PP: try CONTENT\n");break;
11435
  case XML_PARSER_CDATA_SECTION:
11436
      xmlGenericError(xmlGenericErrorContext,
11437
        "PP: try CDATA_SECTION\n");break;
11438
  case XML_PARSER_END_TAG:
11439
      xmlGenericError(xmlGenericErrorContext,
11440
        "PP: try END_TAG\n");break;
11441
  case XML_PARSER_ENTITY_DECL:
11442
      xmlGenericError(xmlGenericErrorContext,
11443
        "PP: try ENTITY_DECL\n");break;
11444
  case XML_PARSER_ENTITY_VALUE:
11445
      xmlGenericError(xmlGenericErrorContext,
11446
        "PP: try ENTITY_VALUE\n");break;
11447
  case XML_PARSER_ATTRIBUTE_VALUE:
11448
      xmlGenericError(xmlGenericErrorContext,
11449
        "PP: try ATTRIBUTE_VALUE\n");break;
11450
  case XML_PARSER_DTD:
11451
      xmlGenericError(xmlGenericErrorContext,
11452
        "PP: try DTD\n");break;
11453
  case XML_PARSER_EPILOG:
11454
      xmlGenericError(xmlGenericErrorContext,
11455
        "PP: try EPILOG\n");break;
11456
  case XML_PARSER_PI:
11457
      xmlGenericError(xmlGenericErrorContext,
11458
        "PP: try PI\n");break;
11459
        case XML_PARSER_IGNORE:
11460
            xmlGenericError(xmlGenericErrorContext,
11461
        "PP: try IGNORE\n");break;
11462
    }
11463
#endif
11464
11465
553k
    if ((ctxt->input != NULL) &&
11466
553k
        (ctxt->input->cur - ctxt->input->base > 4096)) {
11467
5.80k
        xmlParserInputShrink(ctxt->input);
11468
5.80k
    }
11469
11470
4.90M
    while (ctxt->instate != XML_PARSER_EOF) {
11471
4.90M
  if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11472
35.5k
      return(0);
11473
11474
4.86M
  if (ctxt->input == NULL) break;
11475
4.86M
  if (ctxt->input->buf == NULL)
11476
0
      avail = ctxt->input->length -
11477
0
              (ctxt->input->cur - ctxt->input->base);
11478
4.86M
  else {
11479
      /*
11480
       * If we are operating on converted input, try to flush
11481
       * remaining chars to avoid them stalling in the non-converted
11482
       * buffer. But do not do this in document start where
11483
       * encoding="..." may not have been read and we work on a
11484
       * guessed encoding.
11485
       */
11486
4.86M
      if ((ctxt->instate != XML_PARSER_START) &&
11487
4.86M
          (ctxt->input->buf->raw != NULL) &&
11488
4.86M
    (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11489
28.2k
                size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11490
28.2k
                                                 ctxt->input);
11491
28.2k
    size_t current = ctxt->input->cur - ctxt->input->base;
11492
11493
28.2k
    xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11494
28.2k
                xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11495
28.2k
                                      base, current);
11496
28.2k
      }
11497
4.86M
      avail = xmlBufUse(ctxt->input->buf->buffer) -
11498
4.86M
        (ctxt->input->cur - ctxt->input->base);
11499
4.86M
  }
11500
4.86M
        if (avail < 1)
11501
39.3k
      goto done;
11502
4.82M
        switch (ctxt->instate) {
11503
0
            case XML_PARSER_EOF:
11504
          /*
11505
     * Document parsing is done !
11506
     */
11507
0
          goto done;
11508
247k
            case XML_PARSER_START:
11509
247k
    if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11510
77.8k
        xmlChar start[4];
11511
77.8k
        xmlCharEncoding enc;
11512
11513
        /*
11514
         * Very first chars read from the document flow.
11515
         */
11516
77.8k
        if (avail < 4)
11517
8.22k
      goto done;
11518
11519
        /*
11520
         * Get the 4 first bytes and decode the charset
11521
         * if enc != XML_CHAR_ENCODING_NONE
11522
         * plug some encoding conversion routines,
11523
         * else xmlSwitchEncoding will set to (default)
11524
         * UTF8.
11525
         */
11526
69.5k
        start[0] = RAW;
11527
69.5k
        start[1] = NXT(1);
11528
69.5k
        start[2] = NXT(2);
11529
69.5k
        start[3] = NXT(3);
11530
69.5k
        enc = xmlDetectCharEncoding(start, 4);
11531
69.5k
        xmlSwitchEncoding(ctxt, enc);
11532
69.5k
        break;
11533
77.8k
    }
11534
11535
169k
    if (avail < 2)
11536
67
        goto done;
11537
169k
    cur = ctxt->input->cur[0];
11538
169k
    next = ctxt->input->cur[1];
11539
169k
    if (cur == 0) {
11540
612
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11541
612
      ctxt->sax->setDocumentLocator(ctxt->userData,
11542
612
                  &xmlDefaultSAXLocator);
11543
612
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11544
612
        xmlHaltParser(ctxt);
11545
#ifdef DEBUG_PUSH
11546
        xmlGenericError(xmlGenericErrorContext,
11547
          "PP: entering EOF\n");
11548
#endif
11549
612
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11550
612
      ctxt->sax->endDocument(ctxt->userData);
11551
612
        goto done;
11552
612
    }
11553
169k
          if ((cur == '<') && (next == '?')) {
11554
        /* PI or XML decl */
11555
129k
        if (avail < 5) goto done;
11556
129k
        if ((!terminate) &&
11557
129k
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11558
71.8k
      goto done;
11559
57.2k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11560
57.2k
      ctxt->sax->setDocumentLocator(ctxt->userData,
11561
57.2k
                  &xmlDefaultSAXLocator);
11562
57.2k
        if ((ctxt->input->cur[2] == 'x') &&
11563
57.2k
      (ctxt->input->cur[3] == 'm') &&
11564
57.2k
      (ctxt->input->cur[4] == 'l') &&
11565
57.2k
      (IS_BLANK_CH(ctxt->input->cur[5]))) {
11566
49.2k
      ret += 5;
11567
#ifdef DEBUG_PUSH
11568
      xmlGenericError(xmlGenericErrorContext,
11569
        "PP: Parsing XML Decl\n");
11570
#endif
11571
49.2k
      xmlParseXMLDecl(ctxt);
11572
49.2k
      if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11573
          /*
11574
           * The XML REC instructs us to stop parsing right
11575
           * here
11576
           */
11577
148
          xmlHaltParser(ctxt);
11578
148
          return(0);
11579
148
      }
11580
49.1k
      ctxt->standalone = ctxt->input->standalone;
11581
49.1k
      if ((ctxt->encoding == NULL) &&
11582
49.1k
          (ctxt->input->encoding != NULL))
11583
2.24k
          ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11584
49.1k
      if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11585
49.1k
          (!ctxt->disableSAX))
11586
38.9k
          ctxt->sax->startDocument(ctxt->userData);
11587
49.1k
      ctxt->instate = XML_PARSER_MISC;
11588
#ifdef DEBUG_PUSH
11589
      xmlGenericError(xmlGenericErrorContext,
11590
        "PP: entering MISC\n");
11591
#endif
11592
49.1k
        } else {
11593
7.96k
      ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11594
7.96k
      if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11595
7.96k
          (!ctxt->disableSAX))
11596
7.96k
          ctxt->sax->startDocument(ctxt->userData);
11597
7.96k
      ctxt->instate = XML_PARSER_MISC;
11598
#ifdef DEBUG_PUSH
11599
      xmlGenericError(xmlGenericErrorContext,
11600
        "PP: entering MISC\n");
11601
#endif
11602
7.96k
        }
11603
57.2k
    } else {
11604
39.6k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11605
39.6k
      ctxt->sax->setDocumentLocator(ctxt->userData,
11606
39.6k
                  &xmlDefaultSAXLocator);
11607
39.6k
        ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11608
39.6k
        if (ctxt->version == NULL) {
11609
0
            xmlErrMemory(ctxt, NULL);
11610
0
      break;
11611
0
        }
11612
39.6k
        if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11613
39.6k
            (!ctxt->disableSAX))
11614
39.6k
      ctxt->sax->startDocument(ctxt->userData);
11615
39.6k
        ctxt->instate = XML_PARSER_MISC;
11616
#ifdef DEBUG_PUSH
11617
        xmlGenericError(xmlGenericErrorContext,
11618
          "PP: entering MISC\n");
11619
#endif
11620
39.6k
    }
11621
96.7k
    break;
11622
881k
            case XML_PARSER_START_TAG: {
11623
881k
          const xmlChar *name;
11624
881k
    const xmlChar *prefix = NULL;
11625
881k
    const xmlChar *URI = NULL;
11626
881k
                int line = ctxt->input->line;
11627
881k
    int nsNr = ctxt->nsNr;
11628
11629
881k
    if ((avail < 2) && (ctxt->inputNr == 1))
11630
0
        goto done;
11631
881k
    cur = ctxt->input->cur[0];
11632
881k
          if (cur != '<') {
11633
5.42k
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11634
5.42k
        xmlHaltParser(ctxt);
11635
5.42k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11636
5.42k
      ctxt->sax->endDocument(ctxt->userData);
11637
5.42k
        goto done;
11638
5.42k
    }
11639
875k
    if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11640
154k
                    goto done;
11641
721k
    if (ctxt->spaceNr == 0)
11642
29.2k
        spacePush(ctxt, -1);
11643
692k
    else if (*ctxt->space == -2)
11644
83.6k
        spacePush(ctxt, -1);
11645
608k
    else
11646
608k
        spacePush(ctxt, *ctxt->space);
11647
721k
#ifdef LIBXML_SAX1_ENABLED
11648
721k
    if (ctxt->sax2)
11649
515k
#endif /* LIBXML_SAX1_ENABLED */
11650
515k
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11651
205k
#ifdef LIBXML_SAX1_ENABLED
11652
205k
    else
11653
205k
        name = xmlParseStartTag(ctxt);
11654
721k
#endif /* LIBXML_SAX1_ENABLED */
11655
721k
    if (ctxt->instate == XML_PARSER_EOF)
11656
50
        goto done;
11657
721k
    if (name == NULL) {
11658
5.90k
        spacePop(ctxt);
11659
5.90k
        xmlHaltParser(ctxt);
11660
5.90k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11661
5.90k
      ctxt->sax->endDocument(ctxt->userData);
11662
5.90k
        goto done;
11663
5.90k
    }
11664
715k
#ifdef LIBXML_VALID_ENABLED
11665
    /*
11666
     * [ VC: Root Element Type ]
11667
     * The Name in the document type declaration must match
11668
     * the element type of the root element.
11669
     */
11670
715k
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11671
715k
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
11672
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11673
715k
#endif /* LIBXML_VALID_ENABLED */
11674
11675
    /*
11676
     * Check for an Empty Element.
11677
     */
11678
715k
    if ((RAW == '/') && (NXT(1) == '>')) {
11679
190k
        SKIP(2);
11680
11681
190k
        if (ctxt->sax2) {
11682
158k
      if ((ctxt->sax != NULL) &&
11683
158k
          (ctxt->sax->endElementNs != NULL) &&
11684
158k
          (!ctxt->disableSAX))
11685
158k
          ctxt->sax->endElementNs(ctxt->userData, name,
11686
158k
                                  prefix, URI);
11687
158k
      if (ctxt->nsNr - nsNr > 0)
11688
683
          nsPop(ctxt, ctxt->nsNr - nsNr);
11689
158k
#ifdef LIBXML_SAX1_ENABLED
11690
158k
        } else {
11691
31.2k
      if ((ctxt->sax != NULL) &&
11692
31.2k
          (ctxt->sax->endElement != NULL) &&
11693
31.2k
          (!ctxt->disableSAX))
11694
31.1k
          ctxt->sax->endElement(ctxt->userData, name);
11695
31.2k
#endif /* LIBXML_SAX1_ENABLED */
11696
31.2k
        }
11697
190k
        if (ctxt->instate == XML_PARSER_EOF)
11698
0
      goto done;
11699
190k
        spacePop(ctxt);
11700
190k
        if (ctxt->nameNr == 0) {
11701
885
      ctxt->instate = XML_PARSER_EPILOG;
11702
189k
        } else {
11703
189k
      ctxt->instate = XML_PARSER_CONTENT;
11704
189k
        }
11705
190k
        break;
11706
190k
    }
11707
525k
    if (RAW == '>') {
11708
356k
        NEXT;
11709
356k
    } else {
11710
169k
        xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11711
169k
           "Couldn't find end of Start Tag %s\n",
11712
169k
           name);
11713
169k
        nodePop(ctxt);
11714
169k
        spacePop(ctxt);
11715
169k
    }
11716
525k
                nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
11717
11718
525k
    ctxt->instate = XML_PARSER_CONTENT;
11719
525k
                break;
11720
715k
      }
11721
3.17M
            case XML_PARSER_CONTENT: {
11722
3.17M
    if ((avail < 2) && (ctxt->inputNr == 1))
11723
9.39k
        goto done;
11724
3.16M
    cur = ctxt->input->cur[0];
11725
3.16M
    next = ctxt->input->cur[1];
11726
11727
3.16M
    if ((cur == '<') && (next == '/')) {
11728
281k
        ctxt->instate = XML_PARSER_END_TAG;
11729
281k
        break;
11730
2.88M
          } else if ((cur == '<') && (next == '?')) {
11731
19.7k
        if ((!terminate) &&
11732
19.7k
            (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11733
6.47k
      goto done;
11734
13.3k
        xmlParsePI(ctxt);
11735
13.3k
        ctxt->instate = XML_PARSER_CONTENT;
11736
2.86M
    } else if ((cur == '<') && (next != '!')) {
11737
670k
        ctxt->instate = XML_PARSER_START_TAG;
11738
670k
        break;
11739
2.19M
    } else if ((cur == '<') && (next == '!') &&
11740
2.19M
               (ctxt->input->cur[2] == '-') &&
11741
2.19M
         (ctxt->input->cur[3] == '-')) {
11742
32.4k
        if ((!terminate) &&
11743
32.4k
            (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11744
12.0k
      goto done;
11745
20.3k
        xmlParseComment(ctxt);
11746
20.3k
        ctxt->instate = XML_PARSER_CONTENT;
11747
2.16M
    } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11748
2.16M
        (ctxt->input->cur[2] == '[') &&
11749
2.16M
        (ctxt->input->cur[3] == 'C') &&
11750
2.16M
        (ctxt->input->cur[4] == 'D') &&
11751
2.16M
        (ctxt->input->cur[5] == 'A') &&
11752
2.16M
        (ctxt->input->cur[6] == 'T') &&
11753
2.16M
        (ctxt->input->cur[7] == 'A') &&
11754
2.16M
        (ctxt->input->cur[8] == '[')) {
11755
8.64k
        SKIP(9);
11756
8.64k
        ctxt->instate = XML_PARSER_CDATA_SECTION;
11757
8.64k
        break;
11758
2.15M
    } else if ((cur == '<') && (next == '!') &&
11759
2.15M
               (avail < 9)) {
11760
1.27k
        goto done;
11761
2.15M
    } else if (cur == '<') {
11762
52.5k
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11763
52.5k
                    "detected an error in element content\n");
11764
52.5k
                    SKIP(1);
11765
2.09M
    } else if (cur == '&') {
11766
322k
        if ((!terminate) && (!xmlParseLookupChar(ctxt, ';')))
11767
9.50k
      goto done;
11768
313k
        xmlParseReference(ctxt);
11769
1.77M
    } else {
11770
        /* TODO Avoid the extra copy, handle directly !!! */
11771
        /*
11772
         * Goal of the following test is:
11773
         *  - minimize calls to the SAX 'character' callback
11774
         *    when they are mergeable
11775
         *  - handle an problem for isBlank when we only parse
11776
         *    a sequence of blank chars and the next one is
11777
         *    not available to check against '<' presence.
11778
         *  - tries to homogenize the differences in SAX
11779
         *    callbacks between the push and pull versions
11780
         *    of the parser.
11781
         */
11782
1.77M
        if ((ctxt->inputNr == 1) &&
11783
1.77M
            (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11784
1.15M
      if ((!terminate) && (!xmlParseLookupCharData(ctxt)))
11785
79.4k
          goto done;
11786
1.15M
                    }
11787
1.69M
                    ctxt->checkIndex = 0;
11788
1.69M
        xmlParseCharData(ctxt, 0);
11789
1.69M
    }
11790
2.09M
    break;
11791
3.16M
      }
11792
2.09M
            case XML_PARSER_END_TAG:
11793
290k
    if (avail < 2)
11794
0
        goto done;
11795
290k
    if ((!terminate) && (!xmlParseLookupChar(ctxt, '>')))
11796
8.92k
        goto done;
11797
281k
    if (ctxt->sax2) {
11798
207k
              xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11799
207k
        nameNsPop(ctxt);
11800
207k
    }
11801
73.6k
#ifdef LIBXML_SAX1_ENABLED
11802
73.6k
      else
11803
73.6k
        xmlParseEndTag1(ctxt, 0);
11804
281k
#endif /* LIBXML_SAX1_ENABLED */
11805
281k
    if (ctxt->instate == XML_PARSER_EOF) {
11806
        /* Nothing */
11807
281k
    } else if (ctxt->nameNr == 0) {
11808
5.92k
        ctxt->instate = XML_PARSER_EPILOG;
11809
275k
    } else {
11810
275k
        ctxt->instate = XML_PARSER_CONTENT;
11811
275k
    }
11812
281k
    break;
11813
27.8k
            case XML_PARSER_CDATA_SECTION: {
11814
          /*
11815
     * The Push mode need to have the SAX callback for
11816
     * cdataBlock merge back contiguous callbacks.
11817
     */
11818
27.8k
    const xmlChar *term;
11819
11820
27.8k
                if (terminate) {
11821
                    /*
11822
                     * Don't call xmlParseLookupString. If 'terminate'
11823
                     * is set, checkIndex is invalid.
11824
                     */
11825
4.16k
                    term = BAD_CAST strstr((const char *) ctxt->input->cur,
11826
4.16k
                                           "]]>");
11827
23.6k
                } else {
11828
23.6k
        term = xmlParseLookupString(ctxt, 0, "]]>", 3);
11829
23.6k
                }
11830
11831
27.8k
    if (term == NULL) {
11832
14.8k
        int tmp, size;
11833
11834
14.8k
                    if (terminate) {
11835
                        /* Unfinished CDATA section */
11836
875
                        size = ctxt->input->end - ctxt->input->cur;
11837
13.9k
                    } else {
11838
13.9k
                        if (avail < XML_PARSER_BIG_BUFFER_SIZE + 2)
11839
7.13k
                            goto done;
11840
6.82k
                        ctxt->checkIndex = 0;
11841
                        /* XXX: Why don't we pass the full buffer? */
11842
6.82k
                        size = XML_PARSER_BIG_BUFFER_SIZE;
11843
6.82k
                    }
11844
7.70k
                    tmp = xmlCheckCdataPush(ctxt->input->cur, size, 0);
11845
7.70k
                    if (tmp <= 0) {
11846
5.31k
                        tmp = -tmp;
11847
5.31k
                        ctxt->input->cur += tmp;
11848
5.31k
                        goto encoding_error;
11849
5.31k
                    }
11850
2.39k
                    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11851
2.39k
                        if (ctxt->sax->cdataBlock != NULL)
11852
1.77k
                            ctxt->sax->cdataBlock(ctxt->userData,
11853
1.77k
                                                  ctxt->input->cur, tmp);
11854
619
                        else if (ctxt->sax->characters != NULL)
11855
619
                            ctxt->sax->characters(ctxt->userData,
11856
619
                                                  ctxt->input->cur, tmp);
11857
2.39k
                    }
11858
2.39k
                    if (ctxt->instate == XML_PARSER_EOF)
11859
0
                        goto done;
11860
2.39k
                    SKIPL(tmp);
11861
13.0k
    } else {
11862
13.0k
                    int base = term - CUR_PTR;
11863
13.0k
        int tmp;
11864
11865
13.0k
        tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11866
13.0k
        if ((tmp < 0) || (tmp != base)) {
11867
6.54k
      tmp = -tmp;
11868
6.54k
      ctxt->input->cur += tmp;
11869
6.54k
      goto encoding_error;
11870
6.54k
        }
11871
6.46k
        if ((ctxt->sax != NULL) && (base == 0) &&
11872
6.46k
            (ctxt->sax->cdataBlock != NULL) &&
11873
6.46k
            (!ctxt->disableSAX)) {
11874
      /*
11875
       * Special case to provide identical behaviour
11876
       * between pull and push parsers on enpty CDATA
11877
       * sections
11878
       */
11879
81
       if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11880
81
           (!strncmp((const char *)&ctxt->input->cur[-9],
11881
81
                     "<![CDATA[", 9)))
11882
81
           ctxt->sax->cdataBlock(ctxt->userData,
11883
81
                                 BAD_CAST "", 0);
11884
6.38k
        } else if ((ctxt->sax != NULL) && (base > 0) &&
11885
6.38k
      (!ctxt->disableSAX)) {
11886
6.35k
      if (ctxt->sax->cdataBlock != NULL)
11887
4.54k
          ctxt->sax->cdataBlock(ctxt->userData,
11888
4.54k
              ctxt->input->cur, base);
11889
1.81k
      else if (ctxt->sax->characters != NULL)
11890
1.81k
          ctxt->sax->characters(ctxt->userData,
11891
1.81k
              ctxt->input->cur, base);
11892
6.35k
        }
11893
6.46k
        if (ctxt->instate == XML_PARSER_EOF)
11894
0
      goto done;
11895
6.46k
        SKIPL(base + 3);
11896
6.46k
        ctxt->instate = XML_PARSER_CONTENT;
11897
#ifdef DEBUG_PUSH
11898
        xmlGenericError(xmlGenericErrorContext,
11899
          "PP: entering CONTENT\n");
11900
#endif
11901
6.46k
    }
11902
8.85k
    break;
11903
27.8k
      }
11904
114k
            case XML_PARSER_MISC:
11905
133k
            case XML_PARSER_PROLOG:
11906
139k
            case XML_PARSER_EPILOG:
11907
139k
    SKIP_BLANKS;
11908
139k
    if (ctxt->input->buf == NULL)
11909
0
        avail = ctxt->input->length -
11910
0
                (ctxt->input->cur - ctxt->input->base);
11911
139k
    else
11912
139k
        avail = xmlBufUse(ctxt->input->buf->buffer) -
11913
139k
                (ctxt->input->cur - ctxt->input->base);
11914
139k
    if (avail < 2)
11915
4.57k
        goto done;
11916
135k
    cur = ctxt->input->cur[0];
11917
135k
    next = ctxt->input->cur[1];
11918
135k
          if ((cur == '<') && (next == '?')) {
11919
11.1k
        if ((!terminate) &&
11920
11.1k
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11921
1.66k
      goto done;
11922
#ifdef DEBUG_PUSH
11923
        xmlGenericError(xmlGenericErrorContext,
11924
          "PP: Parsing PI\n");
11925
#endif
11926
9.48k
        xmlParsePI(ctxt);
11927
9.48k
        if (ctxt->instate == XML_PARSER_EOF)
11928
0
      goto done;
11929
124k
    } else if ((cur == '<') && (next == '!') &&
11930
124k
        (ctxt->input->cur[2] == '-') &&
11931
124k
        (ctxt->input->cur[3] == '-')) {
11932
10.3k
        if ((!terminate) &&
11933
10.3k
                        (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11934
7.22k
      goto done;
11935
#ifdef DEBUG_PUSH
11936
        xmlGenericError(xmlGenericErrorContext,
11937
          "PP: Parsing Comment\n");
11938
#endif
11939
3.07k
        xmlParseComment(ctxt);
11940
3.07k
        if (ctxt->instate == XML_PARSER_EOF)
11941
0
      goto done;
11942
113k
    } else if ((ctxt->instate == XML_PARSER_MISC) &&
11943
113k
                    (cur == '<') && (next == '!') &&
11944
113k
        (ctxt->input->cur[2] == 'D') &&
11945
113k
        (ctxt->input->cur[3] == 'O') &&
11946
113k
        (ctxt->input->cur[4] == 'C') &&
11947
113k
        (ctxt->input->cur[5] == 'T') &&
11948
113k
        (ctxt->input->cur[6] == 'Y') &&
11949
113k
        (ctxt->input->cur[7] == 'P') &&
11950
113k
        (ctxt->input->cur[8] == 'E')) {
11951
48.4k
        if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11952
14.2k
                        goto done;
11953
#ifdef DEBUG_PUSH
11954
        xmlGenericError(xmlGenericErrorContext,
11955
          "PP: Parsing internal subset\n");
11956
#endif
11957
34.2k
        ctxt->inSubset = 1;
11958
34.2k
        xmlParseDocTypeDecl(ctxt);
11959
34.2k
        if (ctxt->instate == XML_PARSER_EOF)
11960
0
      goto done;
11961
34.2k
        if (RAW == '[') {
11962
23.5k
      ctxt->instate = XML_PARSER_DTD;
11963
#ifdef DEBUG_PUSH
11964
      xmlGenericError(xmlGenericErrorContext,
11965
        "PP: entering DTD\n");
11966
#endif
11967
23.5k
        } else {
11968
      /*
11969
       * Create and update the external subset.
11970
       */
11971
10.6k
      ctxt->inSubset = 2;
11972
10.6k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11973
10.6k
          (ctxt->sax->externalSubset != NULL))
11974
9.36k
          ctxt->sax->externalSubset(ctxt->userData,
11975
9.36k
            ctxt->intSubName, ctxt->extSubSystem,
11976
9.36k
            ctxt->extSubURI);
11977
10.6k
      ctxt->inSubset = 0;
11978
10.6k
      xmlCleanSpecialAttr(ctxt);
11979
10.6k
      ctxt->instate = XML_PARSER_PROLOG;
11980
#ifdef DEBUG_PUSH
11981
      xmlGenericError(xmlGenericErrorContext,
11982
        "PP: entering PROLOG\n");
11983
#endif
11984
10.6k
        }
11985
65.4k
    } else if ((cur == '<') && (next == '!') &&
11986
65.4k
               (avail <
11987
1.25k
                            (ctxt->instate == XML_PARSER_MISC ? 9 : 4))) {
11988
479
        goto done;
11989
64.9k
    } else if (ctxt->instate == XML_PARSER_EPILOG) {
11990
2.03k
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11991
2.03k
        xmlHaltParser(ctxt);
11992
#ifdef DEBUG_PUSH
11993
        xmlGenericError(xmlGenericErrorContext,
11994
          "PP: entering EOF\n");
11995
#endif
11996
2.03k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11997
2.03k
      ctxt->sax->endDocument(ctxt->userData);
11998
2.03k
        goto done;
11999
62.8k
                } else {
12000
62.8k
        ctxt->instate = XML_PARSER_START_TAG;
12001
#ifdef DEBUG_PUSH
12002
        xmlGenericError(xmlGenericErrorContext,
12003
          "PP: entering START_TAG\n");
12004
#endif
12005
62.8k
    }
12006
109k
    break;
12007
109k
            case XML_PARSER_DTD: {
12008
64.1k
                if ((!terminate) && (!xmlParseLookupInternalSubset(ctxt)))
12009
44.7k
                    goto done;
12010
19.4k
    xmlParseInternalSubset(ctxt);
12011
19.4k
    if (ctxt->instate == XML_PARSER_EOF)
12012
10.2k
        goto done;
12013
9.11k
    ctxt->inSubset = 2;
12014
9.11k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
12015
9.11k
        (ctxt->sax->externalSubset != NULL))
12016
8.39k
        ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
12017
8.39k
          ctxt->extSubSystem, ctxt->extSubURI);
12018
9.11k
    ctxt->inSubset = 0;
12019
9.11k
    xmlCleanSpecialAttr(ctxt);
12020
9.11k
    if (ctxt->instate == XML_PARSER_EOF)
12021
44
        goto done;
12022
9.06k
    ctxt->instate = XML_PARSER_PROLOG;
12023
#ifdef DEBUG_PUSH
12024
    xmlGenericError(xmlGenericErrorContext,
12025
      "PP: entering PROLOG\n");
12026
#endif
12027
9.06k
                break;
12028
9.11k
      }
12029
0
            case XML_PARSER_COMMENT:
12030
0
    xmlGenericError(xmlGenericErrorContext,
12031
0
      "PP: internal error, state == COMMENT\n");
12032
0
    ctxt->instate = XML_PARSER_CONTENT;
12033
#ifdef DEBUG_PUSH
12034
    xmlGenericError(xmlGenericErrorContext,
12035
      "PP: entering CONTENT\n");
12036
#endif
12037
0
    break;
12038
0
            case XML_PARSER_IGNORE:
12039
0
    xmlGenericError(xmlGenericErrorContext,
12040
0
      "PP: internal error, state == IGNORE");
12041
0
          ctxt->instate = XML_PARSER_DTD;
12042
#ifdef DEBUG_PUSH
12043
    xmlGenericError(xmlGenericErrorContext,
12044
      "PP: entering DTD\n");
12045
#endif
12046
0
          break;
12047
0
            case XML_PARSER_PI:
12048
0
    xmlGenericError(xmlGenericErrorContext,
12049
0
      "PP: internal error, state == PI\n");
12050
0
    ctxt->instate = XML_PARSER_CONTENT;
12051
#ifdef DEBUG_PUSH
12052
    xmlGenericError(xmlGenericErrorContext,
12053
      "PP: entering CONTENT\n");
12054
#endif
12055
0
    break;
12056
0
            case XML_PARSER_ENTITY_DECL:
12057
0
    xmlGenericError(xmlGenericErrorContext,
12058
0
      "PP: internal error, state == ENTITY_DECL\n");
12059
0
    ctxt->instate = XML_PARSER_DTD;
12060
#ifdef DEBUG_PUSH
12061
    xmlGenericError(xmlGenericErrorContext,
12062
      "PP: entering DTD\n");
12063
#endif
12064
0
    break;
12065
0
            case XML_PARSER_ENTITY_VALUE:
12066
0
    xmlGenericError(xmlGenericErrorContext,
12067
0
      "PP: internal error, state == ENTITY_VALUE\n");
12068
0
    ctxt->instate = XML_PARSER_CONTENT;
12069
#ifdef DEBUG_PUSH
12070
    xmlGenericError(xmlGenericErrorContext,
12071
      "PP: entering DTD\n");
12072
#endif
12073
0
    break;
12074
0
            case XML_PARSER_ATTRIBUTE_VALUE:
12075
0
    xmlGenericError(xmlGenericErrorContext,
12076
0
      "PP: internal error, state == ATTRIBUTE_VALUE\n");
12077
0
    ctxt->instate = XML_PARSER_START_TAG;
12078
#ifdef DEBUG_PUSH
12079
    xmlGenericError(xmlGenericErrorContext,
12080
      "PP: entering START_TAG\n");
12081
#endif
12082
0
    break;
12083
0
            case XML_PARSER_SYSTEM_LITERAL:
12084
0
    xmlGenericError(xmlGenericErrorContext,
12085
0
      "PP: internal error, state == SYSTEM_LITERAL\n");
12086
0
    ctxt->instate = XML_PARSER_START_TAG;
12087
#ifdef DEBUG_PUSH
12088
    xmlGenericError(xmlGenericErrorContext,
12089
      "PP: entering START_TAG\n");
12090
#endif
12091
0
    break;
12092
0
            case XML_PARSER_PUBLIC_LITERAL:
12093
0
    xmlGenericError(xmlGenericErrorContext,
12094
0
      "PP: internal error, state == PUBLIC_LITERAL\n");
12095
0
    ctxt->instate = XML_PARSER_START_TAG;
12096
#ifdef DEBUG_PUSH
12097
    xmlGenericError(xmlGenericErrorContext,
12098
      "PP: entering START_TAG\n");
12099
#endif
12100
0
    break;
12101
4.82M
  }
12102
4.82M
    }
12103
505k
done:
12104
#ifdef DEBUG_PUSH
12105
    xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12106
#endif
12107
505k
    return(ret);
12108
11.8k
encoding_error:
12109
11.8k
    {
12110
11.8k
        char buffer[150];
12111
12112
11.8k
  snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12113
11.8k
      ctxt->input->cur[0], ctxt->input->cur[1],
12114
11.8k
      ctxt->input->cur[2], ctxt->input->cur[3]);
12115
11.8k
  __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12116
11.8k
         "Input is not proper UTF-8, indicate encoding !\n%s",
12117
11.8k
         BAD_CAST buffer, NULL);
12118
11.8k
    }
12119
11.8k
    return(0);
12120
553k
}
12121
12122
/**
12123
 * xmlParseChunk:
12124
 * @ctxt:  an XML parser context
12125
 * @chunk:  an char array
12126
 * @size:  the size in byte of the chunk
12127
 * @terminate:  last chunk indicator
12128
 *
12129
 * Parse a Chunk of memory
12130
 *
12131
 * Returns zero if no error, the xmlParserErrors otherwise.
12132
 */
12133
int
12134
xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12135
682k
              int terminate) {
12136
682k
    int end_in_lf = 0;
12137
682k
    int remain = 0;
12138
12139
682k
    if (ctxt == NULL)
12140
0
        return(XML_ERR_INTERNAL_ERROR);
12141
682k
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12142
133k
        return(ctxt->errNo);
12143
548k
    if (ctxt->instate == XML_PARSER_EOF)
12144
329
        return(-1);
12145
547k
    if (ctxt->input == NULL)
12146
0
        return(-1);
12147
12148
547k
    ctxt->progressive = 1;
12149
547k
    if (ctxt->instate == XML_PARSER_START)
12150
173k
        xmlDetectSAX2(ctxt);
12151
547k
    if ((size > 0) && (chunk != NULL) && (!terminate) &&
12152
547k
        (chunk[size - 1] == '\r')) {
12153
3.04k
  end_in_lf = 1;
12154
3.04k
  size--;
12155
3.04k
    }
12156
12157
554k
xmldecl_done:
12158
12159
554k
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12160
554k
        (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF))  {
12161
484k
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12162
484k
  size_t cur = ctxt->input->cur - ctxt->input->base;
12163
484k
  int res;
12164
12165
        /*
12166
         * Specific handling if we autodetected an encoding, we should not
12167
         * push more than the first line ... which depend on the encoding
12168
         * And only push the rest once the final encoding was detected
12169
         */
12170
484k
        if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12171
484k
            (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
12172
11.5k
            unsigned int len = 45;
12173
12174
11.5k
            if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12175
11.5k
                               BAD_CAST "UTF-16")) ||
12176
11.5k
                (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12177
318
                               BAD_CAST "UTF16")))
12178
11.2k
                len = 90;
12179
318
            else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12180
318
                                    BAD_CAST "UCS-4")) ||
12181
318
                     (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12182
5
                                    BAD_CAST "UCS4")))
12183
313
                len = 180;
12184
12185
11.5k
            if (ctxt->input->buf->rawconsumed < len)
12186
2.39k
                len -= ctxt->input->buf->rawconsumed;
12187
12188
            /*
12189
             * Change size for reading the initial declaration only
12190
             * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12191
             * will blindly copy extra bytes from memory.
12192
             */
12193
11.5k
            if ((unsigned int) size > len) {
12194
6.90k
                remain = size - len;
12195
6.90k
                size = len;
12196
6.90k
            } else {
12197
4.67k
                remain = 0;
12198
4.67k
            }
12199
11.5k
        }
12200
484k
  res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12201
484k
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12202
484k
  if (res < 0) {
12203
303
      ctxt->errNo = XML_PARSER_EOF;
12204
303
      xmlHaltParser(ctxt);
12205
303
      return (XML_PARSER_EOF);
12206
303
  }
12207
#ifdef DEBUG_PUSH
12208
  xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12209
#endif
12210
12211
484k
    } else if (ctxt->instate != XML_PARSER_EOF) {
12212
69.3k
  if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12213
69.3k
      xmlParserInputBufferPtr in = ctxt->input->buf;
12214
69.3k
      if ((in->encoder != NULL) && (in->buffer != NULL) &&
12215
69.3k
        (in->raw != NULL)) {
12216
5.96k
    int nbchars;
12217
5.96k
    size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12218
5.96k
    size_t current = ctxt->input->cur - ctxt->input->base;
12219
12220
5.96k
    nbchars = xmlCharEncInput(in, terminate);
12221
5.96k
    xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
12222
5.96k
    if (nbchars < 0) {
12223
        /* TODO 2.6.0 */
12224
583
        xmlGenericError(xmlGenericErrorContext,
12225
583
            "xmlParseChunk: encoder error\n");
12226
583
                    xmlHaltParser(ctxt);
12227
583
        return(XML_ERR_INVALID_ENCODING);
12228
583
    }
12229
5.96k
      }
12230
69.3k
  }
12231
69.3k
    }
12232
12233
553k
    if (remain != 0) {
12234
6.84k
        xmlParseTryOrFinish(ctxt, 0);
12235
546k
    } else {
12236
546k
        xmlParseTryOrFinish(ctxt, terminate);
12237
546k
    }
12238
553k
    if (ctxt->instate == XML_PARSER_EOF)
12239
24.5k
        return(ctxt->errNo);
12240
12241
528k
    if ((ctxt->input != NULL) &&
12242
528k
         (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12243
528k
         ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12244
528k
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12245
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12246
0
        xmlHaltParser(ctxt);
12247
0
    }
12248
528k
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12249
35.7k
        return(ctxt->errNo);
12250
12251
493k
    if (remain != 0) {
12252
6.51k
        chunk += size;
12253
6.51k
        size = remain;
12254
6.51k
        remain = 0;
12255
6.51k
        goto xmldecl_done;
12256
6.51k
    }
12257
486k
    if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12258
486k
        (ctxt->input->buf != NULL)) {
12259
2.83k
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12260
2.83k
           ctxt->input);
12261
2.83k
  size_t current = ctxt->input->cur - ctxt->input->base;
12262
12263
2.83k
  xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12264
12265
2.83k
  xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12266
2.83k
            base, current);
12267
2.83k
    }
12268
486k
    if (terminate) {
12269
  /*
12270
   * Check for termination
12271
   */
12272
26.6k
  int cur_avail = 0;
12273
12274
26.6k
  if (ctxt->input != NULL) {
12275
26.6k
      if (ctxt->input->buf == NULL)
12276
0
    cur_avail = ctxt->input->length -
12277
0
          (ctxt->input->cur - ctxt->input->base);
12278
26.6k
      else
12279
26.6k
    cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12280
26.6k
                    (ctxt->input->cur - ctxt->input->base);
12281
26.6k
  }
12282
12283
26.6k
  if ((ctxt->instate != XML_PARSER_EOF) &&
12284
26.6k
      (ctxt->instate != XML_PARSER_EPILOG)) {
12285
22.4k
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12286
22.4k
  }
12287
26.6k
  if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
12288
36
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12289
36
  }
12290
26.6k
  if (ctxt->instate != XML_PARSER_EOF) {
12291
26.6k
      if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12292
26.6k
    ctxt->sax->endDocument(ctxt->userData);
12293
26.6k
  }
12294
26.6k
  ctxt->instate = XML_PARSER_EOF;
12295
26.6k
    }
12296
486k
    if (ctxt->wellFormed == 0)
12297
175k
  return((xmlParserErrors) ctxt->errNo);
12298
310k
    else
12299
310k
        return(0);
12300
486k
}
12301
12302
/************************************************************************
12303
 *                  *
12304
 *    I/O front end functions to the parser     *
12305
 *                  *
12306
 ************************************************************************/
12307
12308
/**
12309
 * xmlCreatePushParserCtxt:
12310
 * @sax:  a SAX handler
12311
 * @user_data:  The user data returned on SAX callbacks
12312
 * @chunk:  a pointer to an array of chars
12313
 * @size:  number of chars in the array
12314
 * @filename:  an optional file name or URI
12315
 *
12316
 * Create a parser context for using the XML parser in push mode.
12317
 * If @buffer and @size are non-NULL, the data is used to detect
12318
 * the encoding.  The remaining characters will be parsed so they
12319
 * don't need to be fed in again through xmlParseChunk.
12320
 * To allow content encoding detection, @size should be >= 4
12321
 * The value of @filename is used for fetching external entities
12322
 * and error/warning reports.
12323
 *
12324
 * Returns the new parser context or NULL
12325
 */
12326
12327
xmlParserCtxtPtr
12328
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12329
101k
                        const char *chunk, int size, const char *filename) {
12330
101k
    xmlParserCtxtPtr ctxt;
12331
101k
    xmlParserInputPtr inputStream;
12332
101k
    xmlParserInputBufferPtr buf;
12333
101k
    xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12334
12335
    /*
12336
     * plug some encoding conversion routines
12337
     */
12338
101k
    if ((chunk != NULL) && (size >= 4))
12339
49.0k
  enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12340
12341
101k
    buf = xmlAllocParserInputBuffer(enc);
12342
101k
    if (buf == NULL) return(NULL);
12343
12344
101k
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12345
101k
    if (ctxt == NULL) {
12346
0
        xmlErrMemory(NULL, "creating parser: out of memory\n");
12347
0
  xmlFreeParserInputBuffer(buf);
12348
0
  return(NULL);
12349
0
    }
12350
101k
    ctxt->dictNames = 1;
12351
101k
    if (filename == NULL) {
12352
50.9k
  ctxt->directory = NULL;
12353
50.9k
    } else {
12354
50.9k
        ctxt->directory = xmlParserGetDirectory(filename);
12355
50.9k
    }
12356
12357
101k
    inputStream = xmlNewInputStream(ctxt);
12358
101k
    if (inputStream == NULL) {
12359
0
  xmlFreeParserCtxt(ctxt);
12360
0
  xmlFreeParserInputBuffer(buf);
12361
0
  return(NULL);
12362
0
    }
12363
12364
101k
    if (filename == NULL)
12365
50.9k
  inputStream->filename = NULL;
12366
50.9k
    else {
12367
50.9k
  inputStream->filename = (char *)
12368
50.9k
      xmlCanonicPath((const xmlChar *) filename);
12369
50.9k
  if (inputStream->filename == NULL) {
12370
0
            xmlFreeInputStream(inputStream);
12371
0
      xmlFreeParserCtxt(ctxt);
12372
0
      xmlFreeParserInputBuffer(buf);
12373
0
      return(NULL);
12374
0
  }
12375
50.9k
    }
12376
101k
    inputStream->buf = buf;
12377
101k
    xmlBufResetInput(inputStream->buf->buffer, inputStream);
12378
101k
    inputPush(ctxt, inputStream);
12379
12380
    /*
12381
     * If the caller didn't provide an initial 'chunk' for determining
12382
     * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12383
     * that it can be automatically determined later
12384
     */
12385
101k
    ctxt->charset = XML_CHAR_ENCODING_NONE;
12386
12387
101k
    if ((size != 0) && (chunk != NULL) &&
12388
101k
        (ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12389
49.0k
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12390
49.0k
  size_t cur = ctxt->input->cur - ctxt->input->base;
12391
12392
49.0k
  xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12393
12394
49.0k
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12395
#ifdef DEBUG_PUSH
12396
  xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12397
#endif
12398
49.0k
    }
12399
12400
101k
    if (enc != XML_CHAR_ENCODING_NONE) {
12401
28.5k
        xmlSwitchEncoding(ctxt, enc);
12402
28.5k
    }
12403
12404
101k
    return(ctxt);
12405
101k
}
12406
#endif /* LIBXML_PUSH_ENABLED */
12407
12408
/**
12409
 * xmlHaltParser:
12410
 * @ctxt:  an XML parser context
12411
 *
12412
 * Blocks further parser processing don't override error
12413
 * for internal use
12414
 */
12415
static void
12416
86.9k
xmlHaltParser(xmlParserCtxtPtr ctxt) {
12417
86.9k
    if (ctxt == NULL)
12418
0
        return;
12419
86.9k
    ctxt->instate = XML_PARSER_EOF;
12420
86.9k
    ctxt->disableSAX = 1;
12421
87.7k
    while (ctxt->inputNr > 1)
12422
754
        xmlFreeInputStream(inputPop(ctxt));
12423
86.9k
    if (ctxt->input != NULL) {
12424
        /*
12425
   * in case there was a specific allocation deallocate before
12426
   * overriding base
12427
   */
12428
86.9k
        if (ctxt->input->free != NULL) {
12429
0
      ctxt->input->free((xmlChar *) ctxt->input->base);
12430
0
      ctxt->input->free = NULL;
12431
0
  }
12432
86.9k
        if (ctxt->input->buf != NULL) {
12433
76.1k
            xmlFreeParserInputBuffer(ctxt->input->buf);
12434
76.1k
            ctxt->input->buf = NULL;
12435
76.1k
        }
12436
86.9k
  ctxt->input->cur = BAD_CAST"";
12437
86.9k
        ctxt->input->length = 0;
12438
86.9k
  ctxt->input->base = ctxt->input->cur;
12439
86.9k
        ctxt->input->end = ctxt->input->cur;
12440
86.9k
    }
12441
86.9k
}
12442
12443
/**
12444
 * xmlStopParser:
12445
 * @ctxt:  an XML parser context
12446
 *
12447
 * Blocks further parser processing
12448
 */
12449
void
12450
51.0k
xmlStopParser(xmlParserCtxtPtr ctxt) {
12451
51.0k
    if (ctxt == NULL)
12452
0
        return;
12453
51.0k
    xmlHaltParser(ctxt);
12454
51.0k
    ctxt->errNo = XML_ERR_USER_STOP;
12455
51.0k
}
12456
12457
/**
12458
 * xmlCreateIOParserCtxt:
12459
 * @sax:  a SAX handler
12460
 * @user_data:  The user data returned on SAX callbacks
12461
 * @ioread:  an I/O read function
12462
 * @ioclose:  an I/O close function
12463
 * @ioctx:  an I/O handler
12464
 * @enc:  the charset encoding if known
12465
 *
12466
 * Create a parser context for using the XML parser with an existing
12467
 * I/O stream
12468
 *
12469
 * Returns the new parser context or NULL
12470
 */
12471
xmlParserCtxtPtr
12472
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12473
  xmlInputReadCallback   ioread, xmlInputCloseCallback  ioclose,
12474
0
  void *ioctx, xmlCharEncoding enc) {
12475
0
    xmlParserCtxtPtr ctxt;
12476
0
    xmlParserInputPtr inputStream;
12477
0
    xmlParserInputBufferPtr buf;
12478
12479
0
    if (ioread == NULL) return(NULL);
12480
12481
0
    buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12482
0
    if (buf == NULL) {
12483
0
        if (ioclose != NULL)
12484
0
            ioclose(ioctx);
12485
0
        return (NULL);
12486
0
    }
12487
12488
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12489
0
    if (ctxt == NULL) {
12490
0
  xmlFreeParserInputBuffer(buf);
12491
0
  return(NULL);
12492
0
    }
12493
12494
0
    inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12495
0
    if (inputStream == NULL) {
12496
0
  xmlFreeParserCtxt(ctxt);
12497
0
  return(NULL);
12498
0
    }
12499
0
    inputPush(ctxt, inputStream);
12500
12501
0
    return(ctxt);
12502
0
}
12503
12504
#ifdef LIBXML_VALID_ENABLED
12505
/************************************************************************
12506
 *                  *
12507
 *    Front ends when parsing a DTD       *
12508
 *                  *
12509
 ************************************************************************/
12510
12511
/**
12512
 * xmlIOParseDTD:
12513
 * @sax:  the SAX handler block or NULL
12514
 * @input:  an Input Buffer
12515
 * @enc:  the charset encoding if known
12516
 *
12517
 * Load and parse a DTD
12518
 *
12519
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12520
 * @input will be freed by the function in any case.
12521
 */
12522
12523
xmlDtdPtr
12524
xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12525
0
        xmlCharEncoding enc) {
12526
0
    xmlDtdPtr ret = NULL;
12527
0
    xmlParserCtxtPtr ctxt;
12528
0
    xmlParserInputPtr pinput = NULL;
12529
0
    xmlChar start[4];
12530
12531
0
    if (input == NULL)
12532
0
  return(NULL);
12533
12534
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12535
0
    if (ctxt == NULL) {
12536
0
        xmlFreeParserInputBuffer(input);
12537
0
  return(NULL);
12538
0
    }
12539
12540
    /* We are loading a DTD */
12541
0
    ctxt->options |= XML_PARSE_DTDLOAD;
12542
12543
0
    xmlDetectSAX2(ctxt);
12544
12545
    /*
12546
     * generate a parser input from the I/O handler
12547
     */
12548
12549
0
    pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12550
0
    if (pinput == NULL) {
12551
0
        xmlFreeParserInputBuffer(input);
12552
0
  xmlFreeParserCtxt(ctxt);
12553
0
  return(NULL);
12554
0
    }
12555
12556
    /*
12557
     * plug some encoding conversion routines here.
12558
     */
12559
0
    if (xmlPushInput(ctxt, pinput) < 0) {
12560
0
  xmlFreeParserCtxt(ctxt);
12561
0
  return(NULL);
12562
0
    }
12563
0
    if (enc != XML_CHAR_ENCODING_NONE) {
12564
0
        xmlSwitchEncoding(ctxt, enc);
12565
0
    }
12566
12567
0
    pinput->filename = NULL;
12568
0
    pinput->line = 1;
12569
0
    pinput->col = 1;
12570
0
    pinput->base = ctxt->input->cur;
12571
0
    pinput->cur = ctxt->input->cur;
12572
0
    pinput->free = NULL;
12573
12574
    /*
12575
     * let's parse that entity knowing it's an external subset.
12576
     */
12577
0
    ctxt->inSubset = 2;
12578
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12579
0
    if (ctxt->myDoc == NULL) {
12580
0
  xmlErrMemory(ctxt, "New Doc failed");
12581
0
  return(NULL);
12582
0
    }
12583
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12584
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12585
0
                                 BAD_CAST "none", BAD_CAST "none");
12586
12587
0
    if ((enc == XML_CHAR_ENCODING_NONE) &&
12588
0
        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12589
  /*
12590
   * Get the 4 first bytes and decode the charset
12591
   * if enc != XML_CHAR_ENCODING_NONE
12592
   * plug some encoding conversion routines.
12593
   */
12594
0
  start[0] = RAW;
12595
0
  start[1] = NXT(1);
12596
0
  start[2] = NXT(2);
12597
0
  start[3] = NXT(3);
12598
0
  enc = xmlDetectCharEncoding(start, 4);
12599
0
  if (enc != XML_CHAR_ENCODING_NONE) {
12600
0
      xmlSwitchEncoding(ctxt, enc);
12601
0
  }
12602
0
    }
12603
12604
0
    xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12605
12606
0
    if (ctxt->myDoc != NULL) {
12607
0
  if (ctxt->wellFormed) {
12608
0
      ret = ctxt->myDoc->extSubset;
12609
0
      ctxt->myDoc->extSubset = NULL;
12610
0
      if (ret != NULL) {
12611
0
    xmlNodePtr tmp;
12612
12613
0
    ret->doc = NULL;
12614
0
    tmp = ret->children;
12615
0
    while (tmp != NULL) {
12616
0
        tmp->doc = NULL;
12617
0
        tmp = tmp->next;
12618
0
    }
12619
0
      }
12620
0
  } else {
12621
0
      ret = NULL;
12622
0
  }
12623
0
        xmlFreeDoc(ctxt->myDoc);
12624
0
        ctxt->myDoc = NULL;
12625
0
    }
12626
0
    xmlFreeParserCtxt(ctxt);
12627
12628
0
    return(ret);
12629
0
}
12630
12631
/**
12632
 * xmlSAXParseDTD:
12633
 * @sax:  the SAX handler block
12634
 * @ExternalID:  a NAME* containing the External ID of the DTD
12635
 * @SystemID:  a NAME* containing the URL to the DTD
12636
 *
12637
 * DEPRECATED: Don't use.
12638
 *
12639
 * Load and parse an external subset.
12640
 *
12641
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12642
 */
12643
12644
xmlDtdPtr
12645
xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12646
0
                          const xmlChar *SystemID) {
12647
0
    xmlDtdPtr ret = NULL;
12648
0
    xmlParserCtxtPtr ctxt;
12649
0
    xmlParserInputPtr input = NULL;
12650
0
    xmlCharEncoding enc;
12651
0
    xmlChar* systemIdCanonic;
12652
12653
0
    if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12654
12655
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12656
0
    if (ctxt == NULL) {
12657
0
  return(NULL);
12658
0
    }
12659
12660
    /* We are loading a DTD */
12661
0
    ctxt->options |= XML_PARSE_DTDLOAD;
12662
12663
    /*
12664
     * Canonicalise the system ID
12665
     */
12666
0
    systemIdCanonic = xmlCanonicPath(SystemID);
12667
0
    if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12668
0
  xmlFreeParserCtxt(ctxt);
12669
0
  return(NULL);
12670
0
    }
12671
12672
    /*
12673
     * Ask the Entity resolver to load the damn thing
12674
     */
12675
12676
0
    if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12677
0
  input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12678
0
                                   systemIdCanonic);
12679
0
    if (input == NULL) {
12680
0
  xmlFreeParserCtxt(ctxt);
12681
0
  if (systemIdCanonic != NULL)
12682
0
      xmlFree(systemIdCanonic);
12683
0
  return(NULL);
12684
0
    }
12685
12686
    /*
12687
     * plug some encoding conversion routines here.
12688
     */
12689
0
    if (xmlPushInput(ctxt, input) < 0) {
12690
0
  xmlFreeParserCtxt(ctxt);
12691
0
  if (systemIdCanonic != NULL)
12692
0
      xmlFree(systemIdCanonic);
12693
0
  return(NULL);
12694
0
    }
12695
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12696
0
  enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12697
0
  xmlSwitchEncoding(ctxt, enc);
12698
0
    }
12699
12700
0
    if (input->filename == NULL)
12701
0
  input->filename = (char *) systemIdCanonic;
12702
0
    else
12703
0
  xmlFree(systemIdCanonic);
12704
0
    input->line = 1;
12705
0
    input->col = 1;
12706
0
    input->base = ctxt->input->cur;
12707
0
    input->cur = ctxt->input->cur;
12708
0
    input->free = NULL;
12709
12710
    /*
12711
     * let's parse that entity knowing it's an external subset.
12712
     */
12713
0
    ctxt->inSubset = 2;
12714
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12715
0
    if (ctxt->myDoc == NULL) {
12716
0
  xmlErrMemory(ctxt, "New Doc failed");
12717
0
  xmlFreeParserCtxt(ctxt);
12718
0
  return(NULL);
12719
0
    }
12720
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12721
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12722
0
                                 ExternalID, SystemID);
12723
0
    xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12724
12725
0
    if (ctxt->myDoc != NULL) {
12726
0
  if (ctxt->wellFormed) {
12727
0
      ret = ctxt->myDoc->extSubset;
12728
0
      ctxt->myDoc->extSubset = NULL;
12729
0
      if (ret != NULL) {
12730
0
    xmlNodePtr tmp;
12731
12732
0
    ret->doc = NULL;
12733
0
    tmp = ret->children;
12734
0
    while (tmp != NULL) {
12735
0
        tmp->doc = NULL;
12736
0
        tmp = tmp->next;
12737
0
    }
12738
0
      }
12739
0
  } else {
12740
0
      ret = NULL;
12741
0
  }
12742
0
        xmlFreeDoc(ctxt->myDoc);
12743
0
        ctxt->myDoc = NULL;
12744
0
    }
12745
0
    xmlFreeParserCtxt(ctxt);
12746
12747
0
    return(ret);
12748
0
}
12749
12750
12751
/**
12752
 * xmlParseDTD:
12753
 * @ExternalID:  a NAME* containing the External ID of the DTD
12754
 * @SystemID:  a NAME* containing the URL to the DTD
12755
 *
12756
 * Load and parse an external subset.
12757
 *
12758
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12759
 */
12760
12761
xmlDtdPtr
12762
0
xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12763
0
    return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12764
0
}
12765
#endif /* LIBXML_VALID_ENABLED */
12766
12767
/************************************************************************
12768
 *                  *
12769
 *    Front ends when parsing an Entity     *
12770
 *                  *
12771
 ************************************************************************/
12772
12773
/**
12774
 * xmlParseCtxtExternalEntity:
12775
 * @ctx:  the existing parsing context
12776
 * @URL:  the URL for the entity to load
12777
 * @ID:  the System ID for the entity to load
12778
 * @lst:  the return value for the set of parsed nodes
12779
 *
12780
 * Parse an external general entity within an existing parsing context
12781
 * An external general parsed entity is well-formed if it matches the
12782
 * production labeled extParsedEnt.
12783
 *
12784
 * [78] extParsedEnt ::= TextDecl? content
12785
 *
12786
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12787
 *    the parser error code otherwise
12788
 */
12789
12790
int
12791
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12792
0
                 const xmlChar *ID, xmlNodePtr *lst) {
12793
0
    void *userData;
12794
12795
0
    if (ctx == NULL) return(-1);
12796
    /*
12797
     * If the user provided their own SAX callbacks, then reuse the
12798
     * userData callback field, otherwise the expected setup in a
12799
     * DOM builder is to have userData == ctxt
12800
     */
12801
0
    if (ctx->userData == ctx)
12802
0
        userData = NULL;
12803
0
    else
12804
0
        userData = ctx->userData;
12805
0
    return xmlParseExternalEntityPrivate(ctx->myDoc, ctx, ctx->sax,
12806
0
                                         userData, ctx->depth + 1,
12807
0
                                         URL, ID, lst);
12808
0
}
12809
12810
/**
12811
 * xmlParseExternalEntityPrivate:
12812
 * @doc:  the document the chunk pertains to
12813
 * @oldctxt:  the previous parser context if available
12814
 * @sax:  the SAX handler block (possibly NULL)
12815
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12816
 * @depth:  Used for loop detection, use 0
12817
 * @URL:  the URL for the entity to load
12818
 * @ID:  the System ID for the entity to load
12819
 * @list:  the return value for the set of parsed nodes
12820
 *
12821
 * Private version of xmlParseExternalEntity()
12822
 *
12823
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12824
 *    the parser error code otherwise
12825
 */
12826
12827
static xmlParserErrors
12828
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12829
                xmlSAXHandlerPtr sax,
12830
          void *user_data, int depth, const xmlChar *URL,
12831
1.72k
          const xmlChar *ID, xmlNodePtr *list) {
12832
1.72k
    xmlParserCtxtPtr ctxt;
12833
1.72k
    xmlDocPtr newDoc;
12834
1.72k
    xmlNodePtr newRoot;
12835
1.72k
    xmlParserErrors ret = XML_ERR_OK;
12836
1.72k
    xmlChar start[4];
12837
1.72k
    xmlCharEncoding enc;
12838
12839
1.72k
    if (((depth > 40) &&
12840
1.72k
  ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12841
1.72k
  (depth > 100)) {
12842
0
  xmlFatalErrMsg(oldctxt, XML_ERR_ENTITY_LOOP,
12843
0
                       "Maximum entity nesting depth exceeded");
12844
0
        return(XML_ERR_ENTITY_LOOP);
12845
0
    }
12846
12847
1.72k
    if (list != NULL)
12848
1.09k
        *list = NULL;
12849
1.72k
    if ((URL == NULL) && (ID == NULL))
12850
27
  return(XML_ERR_INTERNAL_ERROR);
12851
1.69k
    if (doc == NULL)
12852
0
  return(XML_ERR_INTERNAL_ERROR);
12853
12854
1.69k
    ctxt = xmlCreateEntityParserCtxtInternal(sax, user_data, URL, ID, NULL,
12855
1.69k
                                             oldctxt);
12856
1.69k
    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
12857
485
    if (oldctxt != NULL) {
12858
485
        ctxt->nbErrors = oldctxt->nbErrors;
12859
485
        ctxt->nbWarnings = oldctxt->nbWarnings;
12860
485
    }
12861
485
    xmlDetectSAX2(ctxt);
12862
12863
485
    newDoc = xmlNewDoc(BAD_CAST "1.0");
12864
485
    if (newDoc == NULL) {
12865
0
  xmlFreeParserCtxt(ctxt);
12866
0
  return(XML_ERR_INTERNAL_ERROR);
12867
0
    }
12868
485
    newDoc->properties = XML_DOC_INTERNAL;
12869
485
    if (doc) {
12870
485
        newDoc->intSubset = doc->intSubset;
12871
485
        newDoc->extSubset = doc->extSubset;
12872
485
        if (doc->dict) {
12873
413
            newDoc->dict = doc->dict;
12874
413
            xmlDictReference(newDoc->dict);
12875
413
        }
12876
485
        if (doc->URL != NULL) {
12877
301
            newDoc->URL = xmlStrdup(doc->URL);
12878
301
        }
12879
485
    }
12880
485
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12881
485
    if (newRoot == NULL) {
12882
0
  if (sax != NULL)
12883
0
  xmlFreeParserCtxt(ctxt);
12884
0
  newDoc->intSubset = NULL;
12885
0
  newDoc->extSubset = NULL;
12886
0
        xmlFreeDoc(newDoc);
12887
0
  return(XML_ERR_INTERNAL_ERROR);
12888
0
    }
12889
485
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
12890
485
    nodePush(ctxt, newDoc->children);
12891
485
    if (doc == NULL) {
12892
0
        ctxt->myDoc = newDoc;
12893
485
    } else {
12894
485
        ctxt->myDoc = doc;
12895
485
        newRoot->doc = doc;
12896
485
    }
12897
12898
    /*
12899
     * Get the 4 first bytes and decode the charset
12900
     * if enc != XML_CHAR_ENCODING_NONE
12901
     * plug some encoding conversion routines.
12902
     */
12903
485
    GROW;
12904
485
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12905
417
  start[0] = RAW;
12906
417
  start[1] = NXT(1);
12907
417
  start[2] = NXT(2);
12908
417
  start[3] = NXT(3);
12909
417
  enc = xmlDetectCharEncoding(start, 4);
12910
417
  if (enc != XML_CHAR_ENCODING_NONE) {
12911
83
      xmlSwitchEncoding(ctxt, enc);
12912
83
  }
12913
417
    }
12914
12915
    /*
12916
     * Parse a possible text declaration first
12917
     */
12918
485
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12919
15
  xmlParseTextDecl(ctxt);
12920
        /*
12921
         * An XML-1.0 document can't reference an entity not XML-1.0
12922
         */
12923
15
        if ((xmlStrEqual(oldctxt->version, BAD_CAST "1.0")) &&
12924
15
            (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12925
0
            xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12926
0
                           "Version mismatch between document and entity\n");
12927
0
        }
12928
15
    }
12929
12930
485
    ctxt->instate = XML_PARSER_CONTENT;
12931
485
    ctxt->depth = depth;
12932
485
    if (oldctxt != NULL) {
12933
485
  ctxt->_private = oldctxt->_private;
12934
485
  ctxt->loadsubset = oldctxt->loadsubset;
12935
485
  ctxt->validate = oldctxt->validate;
12936
485
  ctxt->valid = oldctxt->valid;
12937
485
  ctxt->replaceEntities = oldctxt->replaceEntities;
12938
485
        if (oldctxt->validate) {
12939
116
            ctxt->vctxt.error = oldctxt->vctxt.error;
12940
116
            ctxt->vctxt.warning = oldctxt->vctxt.warning;
12941
116
            ctxt->vctxt.userData = oldctxt->vctxt.userData;
12942
116
            ctxt->vctxt.flags = oldctxt->vctxt.flags;
12943
116
        }
12944
485
  ctxt->external = oldctxt->external;
12945
485
        if (ctxt->dict) xmlDictFree(ctxt->dict);
12946
485
        ctxt->dict = oldctxt->dict;
12947
485
        ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12948
485
        ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12949
485
        ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12950
485
        ctxt->dictNames = oldctxt->dictNames;
12951
485
        ctxt->attsDefault = oldctxt->attsDefault;
12952
485
        ctxt->attsSpecial = oldctxt->attsSpecial;
12953
485
        ctxt->linenumbers = oldctxt->linenumbers;
12954
485
  ctxt->record_info = oldctxt->record_info;
12955
485
  ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
12956
485
  ctxt->node_seq.length = oldctxt->node_seq.length;
12957
485
  ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
12958
485
    } else {
12959
  /*
12960
   * Doing validity checking on chunk without context
12961
   * doesn't make sense
12962
   */
12963
0
  ctxt->_private = NULL;
12964
0
  ctxt->validate = 0;
12965
0
  ctxt->external = 2;
12966
0
  ctxt->loadsubset = 0;
12967
0
    }
12968
12969
485
    xmlParseContent(ctxt);
12970
12971
485
    if ((RAW == '<') && (NXT(1) == '/')) {
12972
18
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12973
467
    } else if (RAW != 0) {
12974
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12975
0
    }
12976
485
    if (ctxt->node != newDoc->children) {
12977
32
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12978
32
    }
12979
12980
485
    if (!ctxt->wellFormed) {
12981
307
  ret = (xmlParserErrors)ctxt->errNo;
12982
307
        if (oldctxt != NULL) {
12983
307
            oldctxt->errNo = ctxt->errNo;
12984
307
            oldctxt->wellFormed = 0;
12985
307
            xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12986
307
        }
12987
307
    } else {
12988
178
  if (list != NULL) {
12989
178
      xmlNodePtr cur;
12990
12991
      /*
12992
       * Return the newly created nodeset after unlinking it from
12993
       * they pseudo parent.
12994
       */
12995
178
      cur = newDoc->children->children;
12996
178
      *list = cur;
12997
405
      while (cur != NULL) {
12998
227
    cur->parent = NULL;
12999
227
    cur = cur->next;
13000
227
      }
13001
178
            newDoc->children->children = NULL;
13002
178
  }
13003
178
  ret = XML_ERR_OK;
13004
178
    }
13005
13006
    /*
13007
     * Also record the size of the entity parsed
13008
     */
13009
485
    if (ctxt->input != NULL && oldctxt != NULL) {
13010
485
        unsigned long consumed = ctxt->input->consumed;
13011
13012
485
        xmlSaturatedAddSizeT(&consumed, ctxt->input->cur - ctxt->input->base);
13013
13014
485
        xmlSaturatedAdd(&oldctxt->sizeentities, consumed);
13015
485
        xmlSaturatedAdd(&oldctxt->sizeentities, ctxt->sizeentities);
13016
13017
485
        xmlSaturatedAdd(&oldctxt->sizeentcopy, consumed);
13018
485
        xmlSaturatedAdd(&oldctxt->sizeentcopy, ctxt->sizeentcopy);
13019
485
    }
13020
13021
485
    if (oldctxt != NULL) {
13022
485
        ctxt->dict = NULL;
13023
485
        ctxt->attsDefault = NULL;
13024
485
        ctxt->attsSpecial = NULL;
13025
485
        oldctxt->nbErrors = ctxt->nbErrors;
13026
485
        oldctxt->nbWarnings = ctxt->nbWarnings;
13027
485
        oldctxt->validate = ctxt->validate;
13028
485
        oldctxt->valid = ctxt->valid;
13029
485
        oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13030
485
        oldctxt->node_seq.length = ctxt->node_seq.length;
13031
485
        oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13032
485
    }
13033
485
    ctxt->node_seq.maximum = 0;
13034
485
    ctxt->node_seq.length = 0;
13035
485
    ctxt->node_seq.buffer = NULL;
13036
485
    xmlFreeParserCtxt(ctxt);
13037
485
    newDoc->intSubset = NULL;
13038
485
    newDoc->extSubset = NULL;
13039
485
    xmlFreeDoc(newDoc);
13040
13041
485
    return(ret);
13042
485
}
13043
13044
#ifdef LIBXML_SAX1_ENABLED
13045
/**
13046
 * xmlParseExternalEntity:
13047
 * @doc:  the document the chunk pertains to
13048
 * @sax:  the SAX handler block (possibly NULL)
13049
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13050
 * @depth:  Used for loop detection, use 0
13051
 * @URL:  the URL for the entity to load
13052
 * @ID:  the System ID for the entity to load
13053
 * @lst:  the return value for the set of parsed nodes
13054
 *
13055
 * Parse an external general entity
13056
 * An external general parsed entity is well-formed if it matches the
13057
 * production labeled extParsedEnt.
13058
 *
13059
 * [78] extParsedEnt ::= TextDecl? content
13060
 *
13061
 * Returns 0 if the entity is well formed, -1 in case of args problem and
13062
 *    the parser error code otherwise
13063
 */
13064
13065
int
13066
xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
13067
0
    int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
13068
0
    return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
13069
0
                           ID, lst));
13070
0
}
13071
13072
/**
13073
 * xmlParseBalancedChunkMemory:
13074
 * @doc:  the document the chunk pertains to (must not be NULL)
13075
 * @sax:  the SAX handler block (possibly NULL)
13076
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13077
 * @depth:  Used for loop detection, use 0
13078
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13079
 * @lst:  the return value for the set of parsed nodes
13080
 *
13081
 * Parse a well-balanced chunk of an XML document
13082
 * called by the parser
13083
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13084
 * the content production in the XML grammar:
13085
 *
13086
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13087
 *
13088
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13089
 *    the parser error code otherwise
13090
 */
13091
13092
int
13093
xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13094
0
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
13095
0
    return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13096
0
                                                depth, string, lst, 0 );
13097
0
}
13098
#endif /* LIBXML_SAX1_ENABLED */
13099
13100
/**
13101
 * xmlParseBalancedChunkMemoryInternal:
13102
 * @oldctxt:  the existing parsing context
13103
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13104
 * @user_data:  the user data field for the parser context
13105
 * @lst:  the return value for the set of parsed nodes
13106
 *
13107
 *
13108
 * Parse a well-balanced chunk of an XML document
13109
 * called by the parser
13110
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13111
 * the content production in the XML grammar:
13112
 *
13113
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13114
 *
13115
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13116
 * error code otherwise
13117
 *
13118
 * In case recover is set to 1, the nodelist will not be empty even if
13119
 * the parsed chunk is not well balanced.
13120
 */
13121
static xmlParserErrors
13122
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13123
6.99k
  const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13124
6.99k
    xmlParserCtxtPtr ctxt;
13125
6.99k
    xmlDocPtr newDoc = NULL;
13126
6.99k
    xmlNodePtr newRoot;
13127
6.99k
    xmlSAXHandlerPtr oldsax = NULL;
13128
6.99k
    xmlNodePtr content = NULL;
13129
6.99k
    xmlNodePtr last = NULL;
13130
6.99k
    int size;
13131
6.99k
    xmlParserErrors ret = XML_ERR_OK;
13132
6.99k
#ifdef SAX2
13133
6.99k
    int i;
13134
6.99k
#endif
13135
13136
6.99k
    if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13137
6.99k
        (oldctxt->depth >  100)) {
13138
15
  xmlFatalErrMsg(oldctxt, XML_ERR_ENTITY_LOOP,
13139
15
                       "Maximum entity nesting depth exceeded");
13140
15
  return(XML_ERR_ENTITY_LOOP);
13141
15
    }
13142
13143
13144
6.97k
    if (lst != NULL)
13145
6.97k
        *lst = NULL;
13146
6.97k
    if (string == NULL)
13147
11
        return(XML_ERR_INTERNAL_ERROR);
13148
13149
6.96k
    size = xmlStrlen(string);
13150
13151
6.96k
    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13152
6.96k
    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13153
6.96k
    ctxt->nbErrors = oldctxt->nbErrors;
13154
6.96k
    ctxt->nbWarnings = oldctxt->nbWarnings;
13155
6.96k
    if (user_data != NULL)
13156
0
  ctxt->userData = user_data;
13157
6.96k
    else
13158
6.96k
  ctxt->userData = ctxt;
13159
6.96k
    if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13160
6.96k
    ctxt->dict = oldctxt->dict;
13161
6.96k
    ctxt->input_id = oldctxt->input_id;
13162
6.96k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13163
6.96k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13164
6.96k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13165
13166
6.96k
#ifdef SAX2
13167
    /* propagate namespaces down the entity */
13168
6.96k
    for (i = 0;i < oldctxt->nsNr;i += 2) {
13169
0
        nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13170
0
    }
13171
6.96k
#endif
13172
13173
6.96k
    oldsax = ctxt->sax;
13174
6.96k
    ctxt->sax = oldctxt->sax;
13175
6.96k
    xmlDetectSAX2(ctxt);
13176
6.96k
    ctxt->replaceEntities = oldctxt->replaceEntities;
13177
6.96k
    ctxt->options = oldctxt->options;
13178
13179
6.96k
    ctxt->_private = oldctxt->_private;
13180
6.96k
    if (oldctxt->myDoc == NULL) {
13181
0
  newDoc = xmlNewDoc(BAD_CAST "1.0");
13182
0
  if (newDoc == NULL) {
13183
0
      ctxt->sax = oldsax;
13184
0
      ctxt->dict = NULL;
13185
0
      xmlFreeParserCtxt(ctxt);
13186
0
      return(XML_ERR_INTERNAL_ERROR);
13187
0
  }
13188
0
  newDoc->properties = XML_DOC_INTERNAL;
13189
0
  newDoc->dict = ctxt->dict;
13190
0
  xmlDictReference(newDoc->dict);
13191
0
  ctxt->myDoc = newDoc;
13192
6.96k
    } else {
13193
6.96k
  ctxt->myDoc = oldctxt->myDoc;
13194
6.96k
        content = ctxt->myDoc->children;
13195
6.96k
  last = ctxt->myDoc->last;
13196
6.96k
    }
13197
6.96k
    newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13198
6.96k
    if (newRoot == NULL) {
13199
0
  ctxt->sax = oldsax;
13200
0
  ctxt->dict = NULL;
13201
0
  xmlFreeParserCtxt(ctxt);
13202
0
  if (newDoc != NULL) {
13203
0
      xmlFreeDoc(newDoc);
13204
0
  }
13205
0
  return(XML_ERR_INTERNAL_ERROR);
13206
0
    }
13207
6.96k
    ctxt->myDoc->children = NULL;
13208
6.96k
    ctxt->myDoc->last = NULL;
13209
6.96k
    xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13210
6.96k
    nodePush(ctxt, ctxt->myDoc->children);
13211
6.96k
    ctxt->instate = XML_PARSER_CONTENT;
13212
6.96k
    ctxt->depth = oldctxt->depth;
13213
13214
6.96k
    ctxt->validate = 0;
13215
6.96k
    ctxt->loadsubset = oldctxt->loadsubset;
13216
6.96k
    if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13217
  /*
13218
   * ID/IDREF registration will be done in xmlValidateElement below
13219
   */
13220
5.46k
  ctxt->loadsubset |= XML_SKIP_IDS;
13221
5.46k
    }
13222
6.96k
    ctxt->dictNames = oldctxt->dictNames;
13223
6.96k
    ctxt->attsDefault = oldctxt->attsDefault;
13224
6.96k
    ctxt->attsSpecial = oldctxt->attsSpecial;
13225
13226
6.96k
    xmlParseContent(ctxt);
13227
6.96k
    if ((RAW == '<') && (NXT(1) == '/')) {
13228
29
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13229
6.93k
    } else if (RAW != 0) {
13230
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13231
0
    }
13232
6.96k
    if (ctxt->node != ctxt->myDoc->children) {
13233
76
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13234
76
    }
13235
13236
6.96k
    if (!ctxt->wellFormed) {
13237
947
  ret = (xmlParserErrors)ctxt->errNo;
13238
947
        oldctxt->errNo = ctxt->errNo;
13239
947
        oldctxt->wellFormed = 0;
13240
947
        xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13241
6.01k
    } else {
13242
6.01k
        ret = XML_ERR_OK;
13243
6.01k
    }
13244
13245
6.96k
    if ((lst != NULL) && (ret == XML_ERR_OK)) {
13246
6.01k
  xmlNodePtr cur;
13247
13248
  /*
13249
   * Return the newly created nodeset after unlinking it from
13250
   * they pseudo parent.
13251
   */
13252
6.01k
  cur = ctxt->myDoc->children->children;
13253
6.01k
  *lst = cur;
13254
13.9k
  while (cur != NULL) {
13255
7.88k
#ifdef LIBXML_VALID_ENABLED
13256
7.88k
      if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13257
7.88k
    (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13258
7.88k
    (cur->type == XML_ELEMENT_NODE)) {
13259
144
    oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13260
144
      oldctxt->myDoc, cur);
13261
144
      }
13262
7.88k
#endif /* LIBXML_VALID_ENABLED */
13263
7.88k
      cur->parent = NULL;
13264
7.88k
      cur = cur->next;
13265
7.88k
  }
13266
6.01k
  ctxt->myDoc->children->children = NULL;
13267
6.01k
    }
13268
6.96k
    if (ctxt->myDoc != NULL) {
13269
6.96k
  xmlFreeNode(ctxt->myDoc->children);
13270
6.96k
        ctxt->myDoc->children = content;
13271
6.96k
        ctxt->myDoc->last = last;
13272
6.96k
    }
13273
13274
    /*
13275
     * Also record the size of the entity parsed
13276
     */
13277
6.96k
    if (ctxt->input != NULL && oldctxt != NULL) {
13278
6.96k
        unsigned long consumed = ctxt->input->consumed;
13279
13280
6.96k
        xmlSaturatedAddSizeT(&consumed, ctxt->input->cur - ctxt->input->base);
13281
13282
6.96k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, consumed);
13283
6.96k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, ctxt->sizeentcopy);
13284
6.96k
    }
13285
13286
6.96k
    oldctxt->nbErrors = ctxt->nbErrors;
13287
6.96k
    oldctxt->nbWarnings = ctxt->nbWarnings;
13288
6.96k
    ctxt->sax = oldsax;
13289
6.96k
    ctxt->dict = NULL;
13290
6.96k
    ctxt->attsDefault = NULL;
13291
6.96k
    ctxt->attsSpecial = NULL;
13292
6.96k
    xmlFreeParserCtxt(ctxt);
13293
6.96k
    if (newDoc != NULL) {
13294
0
  xmlFreeDoc(newDoc);
13295
0
    }
13296
13297
6.96k
    return(ret);
13298
6.96k
}
13299
13300
/**
13301
 * xmlParseInNodeContext:
13302
 * @node:  the context node
13303
 * @data:  the input string
13304
 * @datalen:  the input string length in bytes
13305
 * @options:  a combination of xmlParserOption
13306
 * @lst:  the return value for the set of parsed nodes
13307
 *
13308
 * Parse a well-balanced chunk of an XML document
13309
 * within the context (DTD, namespaces, etc ...) of the given node.
13310
 *
13311
 * The allowed sequence for the data is a Well Balanced Chunk defined by
13312
 * the content production in the XML grammar:
13313
 *
13314
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13315
 *
13316
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13317
 * error code otherwise
13318
 */
13319
xmlParserErrors
13320
xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13321
0
                      int options, xmlNodePtr *lst) {
13322
0
#ifdef SAX2
13323
0
    xmlParserCtxtPtr ctxt;
13324
0
    xmlDocPtr doc = NULL;
13325
0
    xmlNodePtr fake, cur;
13326
0
    int nsnr = 0;
13327
13328
0
    xmlParserErrors ret = XML_ERR_OK;
13329
13330
    /*
13331
     * check all input parameters, grab the document
13332
     */
13333
0
    if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13334
0
        return(XML_ERR_INTERNAL_ERROR);
13335
0
    switch (node->type) {
13336
0
        case XML_ELEMENT_NODE:
13337
0
        case XML_ATTRIBUTE_NODE:
13338
0
        case XML_TEXT_NODE:
13339
0
        case XML_CDATA_SECTION_NODE:
13340
0
        case XML_ENTITY_REF_NODE:
13341
0
        case XML_PI_NODE:
13342
0
        case XML_COMMENT_NODE:
13343
0
        case XML_DOCUMENT_NODE:
13344
0
        case XML_HTML_DOCUMENT_NODE:
13345
0
      break;
13346
0
  default:
13347
0
      return(XML_ERR_INTERNAL_ERROR);
13348
13349
0
    }
13350
0
    while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13351
0
           (node->type != XML_DOCUMENT_NODE) &&
13352
0
     (node->type != XML_HTML_DOCUMENT_NODE))
13353
0
  node = node->parent;
13354
0
    if (node == NULL)
13355
0
  return(XML_ERR_INTERNAL_ERROR);
13356
0
    if (node->type == XML_ELEMENT_NODE)
13357
0
  doc = node->doc;
13358
0
    else
13359
0
        doc = (xmlDocPtr) node;
13360
0
    if (doc == NULL)
13361
0
  return(XML_ERR_INTERNAL_ERROR);
13362
13363
    /*
13364
     * allocate a context and set-up everything not related to the
13365
     * node position in the tree
13366
     */
13367
0
    if (doc->type == XML_DOCUMENT_NODE)
13368
0
  ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13369
0
#ifdef LIBXML_HTML_ENABLED
13370
0
    else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13371
0
  ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13372
        /*
13373
         * When parsing in context, it makes no sense to add implied
13374
         * elements like html/body/etc...
13375
         */
13376
0
        options |= HTML_PARSE_NOIMPLIED;
13377
0
    }
13378
0
#endif
13379
0
    else
13380
0
        return(XML_ERR_INTERNAL_ERROR);
13381
13382
0
    if (ctxt == NULL)
13383
0
        return(XML_ERR_NO_MEMORY);
13384
13385
    /*
13386
     * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13387
     * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13388
     * we must wait until the last moment to free the original one.
13389
     */
13390
0
    if (doc->dict != NULL) {
13391
0
        if (ctxt->dict != NULL)
13392
0
      xmlDictFree(ctxt->dict);
13393
0
  ctxt->dict = doc->dict;
13394
0
    } else
13395
0
        options |= XML_PARSE_NODICT;
13396
13397
0
    if (doc->encoding != NULL) {
13398
0
        xmlCharEncodingHandlerPtr hdlr;
13399
13400
0
        if (ctxt->encoding != NULL)
13401
0
      xmlFree((xmlChar *) ctxt->encoding);
13402
0
        ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13403
13404
0
        hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
13405
0
        if (hdlr != NULL) {
13406
0
            xmlSwitchToEncoding(ctxt, hdlr);
13407
0
  } else {
13408
0
            return(XML_ERR_UNSUPPORTED_ENCODING);
13409
0
        }
13410
0
    }
13411
13412
0
    xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13413
0
    xmlDetectSAX2(ctxt);
13414
0
    ctxt->myDoc = doc;
13415
    /* parsing in context, i.e. as within existing content */
13416
0
    ctxt->input_id = 2;
13417
0
    ctxt->instate = XML_PARSER_CONTENT;
13418
13419
0
    fake = xmlNewDocComment(node->doc, NULL);
13420
0
    if (fake == NULL) {
13421
0
        xmlFreeParserCtxt(ctxt);
13422
0
  return(XML_ERR_NO_MEMORY);
13423
0
    }
13424
0
    xmlAddChild(node, fake);
13425
13426
0
    if (node->type == XML_ELEMENT_NODE) {
13427
0
  nodePush(ctxt, node);
13428
  /*
13429
   * initialize the SAX2 namespaces stack
13430
   */
13431
0
  cur = node;
13432
0
  while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13433
0
      xmlNsPtr ns = cur->nsDef;
13434
0
      const xmlChar *iprefix, *ihref;
13435
13436
0
      while (ns != NULL) {
13437
0
    if (ctxt->dict) {
13438
0
        iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13439
0
        ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13440
0
    } else {
13441
0
        iprefix = ns->prefix;
13442
0
        ihref = ns->href;
13443
0
    }
13444
13445
0
          if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13446
0
        nsPush(ctxt, iprefix, ihref);
13447
0
        nsnr++;
13448
0
    }
13449
0
    ns = ns->next;
13450
0
      }
13451
0
      cur = cur->parent;
13452
0
  }
13453
0
    }
13454
13455
0
    if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13456
  /*
13457
   * ID/IDREF registration will be done in xmlValidateElement below
13458
   */
13459
0
  ctxt->loadsubset |= XML_SKIP_IDS;
13460
0
    }
13461
13462
0
#ifdef LIBXML_HTML_ENABLED
13463
0
    if (doc->type == XML_HTML_DOCUMENT_NODE)
13464
0
        __htmlParseContent(ctxt);
13465
0
    else
13466
0
#endif
13467
0
  xmlParseContent(ctxt);
13468
13469
0
    nsPop(ctxt, nsnr);
13470
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13471
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13472
0
    } else if (RAW != 0) {
13473
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13474
0
    }
13475
0
    if ((ctxt->node != NULL) && (ctxt->node != node)) {
13476
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13477
0
  ctxt->wellFormed = 0;
13478
0
    }
13479
13480
0
    if (!ctxt->wellFormed) {
13481
0
        if (ctxt->errNo == 0)
13482
0
      ret = XML_ERR_INTERNAL_ERROR;
13483
0
  else
13484
0
      ret = (xmlParserErrors)ctxt->errNo;
13485
0
    } else {
13486
0
        ret = XML_ERR_OK;
13487
0
    }
13488
13489
    /*
13490
     * Return the newly created nodeset after unlinking it from
13491
     * the pseudo sibling.
13492
     */
13493
13494
0
    cur = fake->next;
13495
0
    fake->next = NULL;
13496
0
    node->last = fake;
13497
13498
0
    if (cur != NULL) {
13499
0
  cur->prev = NULL;
13500
0
    }
13501
13502
0
    *lst = cur;
13503
13504
0
    while (cur != NULL) {
13505
0
  cur->parent = NULL;
13506
0
  cur = cur->next;
13507
0
    }
13508
13509
0
    xmlUnlinkNode(fake);
13510
0
    xmlFreeNode(fake);
13511
13512
13513
0
    if (ret != XML_ERR_OK) {
13514
0
        xmlFreeNodeList(*lst);
13515
0
  *lst = NULL;
13516
0
    }
13517
13518
0
    if (doc->dict != NULL)
13519
0
        ctxt->dict = NULL;
13520
0
    xmlFreeParserCtxt(ctxt);
13521
13522
0
    return(ret);
13523
#else /* !SAX2 */
13524
    return(XML_ERR_INTERNAL_ERROR);
13525
#endif
13526
0
}
13527
13528
#ifdef LIBXML_SAX1_ENABLED
13529
/**
13530
 * xmlParseBalancedChunkMemoryRecover:
13531
 * @doc:  the document the chunk pertains to (must not be NULL)
13532
 * @sax:  the SAX handler block (possibly NULL)
13533
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13534
 * @depth:  Used for loop detection, use 0
13535
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13536
 * @lst:  the return value for the set of parsed nodes
13537
 * @recover: return nodes even if the data is broken (use 0)
13538
 *
13539
 *
13540
 * Parse a well-balanced chunk of an XML document
13541
 * called by the parser
13542
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13543
 * the content production in the XML grammar:
13544
 *
13545
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13546
 *
13547
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13548
 *    the parser error code otherwise
13549
 *
13550
 * In case recover is set to 1, the nodelist will not be empty even if
13551
 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13552
 * some extent.
13553
 */
13554
int
13555
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13556
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13557
0
     int recover) {
13558
0
    xmlParserCtxtPtr ctxt;
13559
0
    xmlDocPtr newDoc;
13560
0
    xmlSAXHandlerPtr oldsax = NULL;
13561
0
    xmlNodePtr content, newRoot;
13562
0
    int size;
13563
0
    int ret = 0;
13564
13565
0
    if (depth > 40) {
13566
0
  return(XML_ERR_ENTITY_LOOP);
13567
0
    }
13568
13569
13570
0
    if (lst != NULL)
13571
0
        *lst = NULL;
13572
0
    if (string == NULL)
13573
0
        return(-1);
13574
13575
0
    size = xmlStrlen(string);
13576
13577
0
    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13578
0
    if (ctxt == NULL) return(-1);
13579
0
    ctxt->userData = ctxt;
13580
0
    if (sax != NULL) {
13581
0
  oldsax = ctxt->sax;
13582
0
        ctxt->sax = sax;
13583
0
  if (user_data != NULL)
13584
0
      ctxt->userData = user_data;
13585
0
    }
13586
0
    newDoc = xmlNewDoc(BAD_CAST "1.0");
13587
0
    if (newDoc == NULL) {
13588
0
  xmlFreeParserCtxt(ctxt);
13589
0
  return(-1);
13590
0
    }
13591
0
    newDoc->properties = XML_DOC_INTERNAL;
13592
0
    if ((doc != NULL) && (doc->dict != NULL)) {
13593
0
        xmlDictFree(ctxt->dict);
13594
0
  ctxt->dict = doc->dict;
13595
0
  xmlDictReference(ctxt->dict);
13596
0
  ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13597
0
  ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13598
0
  ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13599
0
  ctxt->dictNames = 1;
13600
0
    } else {
13601
0
  xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13602
0
    }
13603
    /* doc == NULL is only supported for historic reasons */
13604
0
    if (doc != NULL) {
13605
0
  newDoc->intSubset = doc->intSubset;
13606
0
  newDoc->extSubset = doc->extSubset;
13607
0
    }
13608
0
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13609
0
    if (newRoot == NULL) {
13610
0
  if (sax != NULL)
13611
0
      ctxt->sax = oldsax;
13612
0
  xmlFreeParserCtxt(ctxt);
13613
0
  newDoc->intSubset = NULL;
13614
0
  newDoc->extSubset = NULL;
13615
0
        xmlFreeDoc(newDoc);
13616
0
  return(-1);
13617
0
    }
13618
0
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
13619
0
    nodePush(ctxt, newRoot);
13620
    /* doc == NULL is only supported for historic reasons */
13621
0
    if (doc == NULL) {
13622
0
  ctxt->myDoc = newDoc;
13623
0
    } else {
13624
0
  ctxt->myDoc = newDoc;
13625
0
  newDoc->children->doc = doc;
13626
  /* Ensure that doc has XML spec namespace */
13627
0
  xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13628
0
  newDoc->oldNs = doc->oldNs;
13629
0
    }
13630
0
    ctxt->instate = XML_PARSER_CONTENT;
13631
0
    ctxt->input_id = 2;
13632
0
    ctxt->depth = depth;
13633
13634
    /*
13635
     * Doing validity checking on chunk doesn't make sense
13636
     */
13637
0
    ctxt->validate = 0;
13638
0
    ctxt->loadsubset = 0;
13639
0
    xmlDetectSAX2(ctxt);
13640
13641
0
    if ( doc != NULL ){
13642
0
        content = doc->children;
13643
0
        doc->children = NULL;
13644
0
        xmlParseContent(ctxt);
13645
0
        doc->children = content;
13646
0
    }
13647
0
    else {
13648
0
        xmlParseContent(ctxt);
13649
0
    }
13650
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13651
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13652
0
    } else if (RAW != 0) {
13653
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13654
0
    }
13655
0
    if (ctxt->node != newDoc->children) {
13656
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13657
0
    }
13658
13659
0
    if (!ctxt->wellFormed) {
13660
0
        if (ctxt->errNo == 0)
13661
0
      ret = 1;
13662
0
  else
13663
0
      ret = ctxt->errNo;
13664
0
    } else {
13665
0
      ret = 0;
13666
0
    }
13667
13668
0
    if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13669
0
  xmlNodePtr cur;
13670
13671
  /*
13672
   * Return the newly created nodeset after unlinking it from
13673
   * they pseudo parent.
13674
   */
13675
0
  cur = newDoc->children->children;
13676
0
  *lst = cur;
13677
0
  while (cur != NULL) {
13678
0
      xmlSetTreeDoc(cur, doc);
13679
0
      cur->parent = NULL;
13680
0
      cur = cur->next;
13681
0
  }
13682
0
  newDoc->children->children = NULL;
13683
0
    }
13684
13685
0
    if (sax != NULL)
13686
0
  ctxt->sax = oldsax;
13687
0
    xmlFreeParserCtxt(ctxt);
13688
0
    newDoc->intSubset = NULL;
13689
0
    newDoc->extSubset = NULL;
13690
    /* This leaks the namespace list if doc == NULL */
13691
0
    newDoc->oldNs = NULL;
13692
0
    xmlFreeDoc(newDoc);
13693
13694
0
    return(ret);
13695
0
}
13696
13697
/**
13698
 * xmlSAXParseEntity:
13699
 * @sax:  the SAX handler block
13700
 * @filename:  the filename
13701
 *
13702
 * DEPRECATED: Don't use.
13703
 *
13704
 * parse an XML external entity out of context and build a tree.
13705
 * It use the given SAX function block to handle the parsing callback.
13706
 * If sax is NULL, fallback to the default DOM tree building routines.
13707
 *
13708
 * [78] extParsedEnt ::= TextDecl? content
13709
 *
13710
 * This correspond to a "Well Balanced" chunk
13711
 *
13712
 * Returns the resulting document tree
13713
 */
13714
13715
xmlDocPtr
13716
0
xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13717
0
    xmlDocPtr ret;
13718
0
    xmlParserCtxtPtr ctxt;
13719
13720
0
    ctxt = xmlCreateFileParserCtxt(filename);
13721
0
    if (ctxt == NULL) {
13722
0
  return(NULL);
13723
0
    }
13724
0
    if (sax != NULL) {
13725
0
  if (ctxt->sax != NULL)
13726
0
      xmlFree(ctxt->sax);
13727
0
        ctxt->sax = sax;
13728
0
        ctxt->userData = NULL;
13729
0
    }
13730
13731
0
    xmlParseExtParsedEnt(ctxt);
13732
13733
0
    if (ctxt->wellFormed)
13734
0
  ret = ctxt->myDoc;
13735
0
    else {
13736
0
        ret = NULL;
13737
0
        xmlFreeDoc(ctxt->myDoc);
13738
0
        ctxt->myDoc = NULL;
13739
0
    }
13740
0
    if (sax != NULL)
13741
0
        ctxt->sax = NULL;
13742
0
    xmlFreeParserCtxt(ctxt);
13743
13744
0
    return(ret);
13745
0
}
13746
13747
/**
13748
 * xmlParseEntity:
13749
 * @filename:  the filename
13750
 *
13751
 * parse an XML external entity out of context and build a tree.
13752
 *
13753
 * [78] extParsedEnt ::= TextDecl? content
13754
 *
13755
 * This correspond to a "Well Balanced" chunk
13756
 *
13757
 * Returns the resulting document tree
13758
 */
13759
13760
xmlDocPtr
13761
0
xmlParseEntity(const char *filename) {
13762
0
    return(xmlSAXParseEntity(NULL, filename));
13763
0
}
13764
#endif /* LIBXML_SAX1_ENABLED */
13765
13766
/**
13767
 * xmlCreateEntityParserCtxtInternal:
13768
 * @URL:  the entity URL
13769
 * @ID:  the entity PUBLIC ID
13770
 * @base:  a possible base for the target URI
13771
 * @pctx:  parser context used to set options on new context
13772
 *
13773
 * Create a parser context for an external entity
13774
 * Automatic support for ZLIB/Compress compressed document is provided
13775
 * by default if found at compile-time.
13776
 *
13777
 * Returns the new parser context or NULL
13778
 */
13779
static xmlParserCtxtPtr
13780
xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
13781
        const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
13782
1.69k
        xmlParserCtxtPtr pctx) {
13783
1.69k
    xmlParserCtxtPtr ctxt;
13784
1.69k
    xmlParserInputPtr inputStream;
13785
1.69k
    char *directory = NULL;
13786
1.69k
    xmlChar *uri;
13787
13788
1.69k
    ctxt = xmlNewSAXParserCtxt(sax, userData);
13789
1.69k
    if (ctxt == NULL) {
13790
0
  return(NULL);
13791
0
    }
13792
13793
1.69k
    if (pctx != NULL) {
13794
1.69k
        ctxt->options = pctx->options;
13795
1.69k
        ctxt->_private = pctx->_private;
13796
1.69k
  ctxt->input_id = pctx->input_id;
13797
1.69k
    }
13798
13799
    /* Don't read from stdin. */
13800
1.69k
    if (xmlStrcmp(URL, BAD_CAST "-") == 0)
13801
0
        URL = BAD_CAST "./-";
13802
13803
1.69k
    uri = xmlBuildURI(URL, base);
13804
13805
1.69k
    if (uri == NULL) {
13806
42
  inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13807
42
  if (inputStream == NULL) {
13808
42
      xmlFreeParserCtxt(ctxt);
13809
42
      return(NULL);
13810
42
  }
13811
13812
0
  inputPush(ctxt, inputStream);
13813
13814
0
  if ((ctxt->directory == NULL) && (directory == NULL))
13815
0
      directory = xmlParserGetDirectory((char *)URL);
13816
0
  if ((ctxt->directory == NULL) && (directory != NULL))
13817
0
      ctxt->directory = directory;
13818
1.65k
    } else {
13819
1.65k
  inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13820
1.65k
  if (inputStream == NULL) {
13821
1.16k
      xmlFree(uri);
13822
1.16k
      xmlFreeParserCtxt(ctxt);
13823
1.16k
      return(NULL);
13824
1.16k
  }
13825
13826
485
  inputPush(ctxt, inputStream);
13827
13828
485
  if ((ctxt->directory == NULL) && (directory == NULL))
13829
485
      directory = xmlParserGetDirectory((char *)uri);
13830
485
  if ((ctxt->directory == NULL) && (directory != NULL))
13831
485
      ctxt->directory = directory;
13832
485
  xmlFree(uri);
13833
485
    }
13834
485
    return(ctxt);
13835
1.69k
}
13836
13837
/**
13838
 * xmlCreateEntityParserCtxt:
13839
 * @URL:  the entity URL
13840
 * @ID:  the entity PUBLIC ID
13841
 * @base:  a possible base for the target URI
13842
 *
13843
 * Create a parser context for an external entity
13844
 * Automatic support for ZLIB/Compress compressed document is provided
13845
 * by default if found at compile-time.
13846
 *
13847
 * Returns the new parser context or NULL
13848
 */
13849
xmlParserCtxtPtr
13850
xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
13851
0
                    const xmlChar *base) {
13852
0
    return xmlCreateEntityParserCtxtInternal(NULL, NULL, URL, ID, base, NULL);
13853
13854
0
}
13855
13856
/************************************************************************
13857
 *                  *
13858
 *    Front ends when parsing from a file     *
13859
 *                  *
13860
 ************************************************************************/
13861
13862
/**
13863
 * xmlCreateURLParserCtxt:
13864
 * @filename:  the filename or URL
13865
 * @options:  a combination of xmlParserOption
13866
 *
13867
 * Create a parser context for a file or URL content.
13868
 * Automatic support for ZLIB/Compress compressed document is provided
13869
 * by default if found at compile-time and for file accesses
13870
 *
13871
 * Returns the new parser context or NULL
13872
 */
13873
xmlParserCtxtPtr
13874
xmlCreateURLParserCtxt(const char *filename, int options)
13875
0
{
13876
0
    xmlParserCtxtPtr ctxt;
13877
0
    xmlParserInputPtr inputStream;
13878
0
    char *directory = NULL;
13879
13880
0
    ctxt = xmlNewParserCtxt();
13881
0
    if (ctxt == NULL) {
13882
0
  xmlErrMemory(NULL, "cannot allocate parser context");
13883
0
  return(NULL);
13884
0
    }
13885
13886
0
    if (options)
13887
0
  xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13888
0
    ctxt->linenumbers = 1;
13889
13890
0
    inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
13891
0
    if (inputStream == NULL) {
13892
0
  xmlFreeParserCtxt(ctxt);
13893
0
  return(NULL);
13894
0
    }
13895
13896
0
    inputPush(ctxt, inputStream);
13897
0
    if ((ctxt->directory == NULL) && (directory == NULL))
13898
0
        directory = xmlParserGetDirectory(filename);
13899
0
    if ((ctxt->directory == NULL) && (directory != NULL))
13900
0
        ctxt->directory = directory;
13901
13902
0
    return(ctxt);
13903
0
}
13904
13905
/**
13906
 * xmlCreateFileParserCtxt:
13907
 * @filename:  the filename
13908
 *
13909
 * Create a parser context for a file content.
13910
 * Automatic support for ZLIB/Compress compressed document is provided
13911
 * by default if found at compile-time.
13912
 *
13913
 * Returns the new parser context or NULL
13914
 */
13915
xmlParserCtxtPtr
13916
xmlCreateFileParserCtxt(const char *filename)
13917
0
{
13918
0
    return(xmlCreateURLParserCtxt(filename, 0));
13919
0
}
13920
13921
#ifdef LIBXML_SAX1_ENABLED
13922
/**
13923
 * xmlSAXParseFileWithData:
13924
 * @sax:  the SAX handler block
13925
 * @filename:  the filename
13926
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13927
 *             documents
13928
 * @data:  the userdata
13929
 *
13930
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
13931
 *
13932
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13933
 * compressed document is provided by default if found at compile-time.
13934
 * It use the given SAX function block to handle the parsing callback.
13935
 * If sax is NULL, fallback to the default DOM tree building routines.
13936
 *
13937
 * User data (void *) is stored within the parser context in the
13938
 * context's _private member, so it is available nearly everywhere in libxml
13939
 *
13940
 * Returns the resulting document tree
13941
 */
13942
13943
xmlDocPtr
13944
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
13945
0
                        int recovery, void *data) {
13946
0
    xmlDocPtr ret;
13947
0
    xmlParserCtxtPtr ctxt;
13948
13949
0
    xmlInitParser();
13950
13951
0
    ctxt = xmlCreateFileParserCtxt(filename);
13952
0
    if (ctxt == NULL) {
13953
0
  return(NULL);
13954
0
    }
13955
0
    if (sax != NULL) {
13956
0
  if (ctxt->sax != NULL)
13957
0
      xmlFree(ctxt->sax);
13958
0
        ctxt->sax = sax;
13959
0
    }
13960
0
    xmlDetectSAX2(ctxt);
13961
0
    if (data!=NULL) {
13962
0
  ctxt->_private = data;
13963
0
    }
13964
13965
0
    if (ctxt->directory == NULL)
13966
0
        ctxt->directory = xmlParserGetDirectory(filename);
13967
13968
0
    ctxt->recovery = recovery;
13969
13970
0
    xmlParseDocument(ctxt);
13971
13972
0
    if ((ctxt->wellFormed) || recovery) {
13973
0
        ret = ctxt->myDoc;
13974
0
  if ((ret != NULL) && (ctxt->input->buf != NULL)) {
13975
0
      if (ctxt->input->buf->compressed > 0)
13976
0
    ret->compression = 9;
13977
0
      else
13978
0
    ret->compression = ctxt->input->buf->compressed;
13979
0
  }
13980
0
    }
13981
0
    else {
13982
0
       ret = NULL;
13983
0
       xmlFreeDoc(ctxt->myDoc);
13984
0
       ctxt->myDoc = NULL;
13985
0
    }
13986
0
    if (sax != NULL)
13987
0
        ctxt->sax = NULL;
13988
0
    xmlFreeParserCtxt(ctxt);
13989
13990
0
    return(ret);
13991
0
}
13992
13993
/**
13994
 * xmlSAXParseFile:
13995
 * @sax:  the SAX handler block
13996
 * @filename:  the filename
13997
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13998
 *             documents
13999
 *
14000
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
14001
 *
14002
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14003
 * compressed document is provided by default if found at compile-time.
14004
 * It use the given SAX function block to handle the parsing callback.
14005
 * If sax is NULL, fallback to the default DOM tree building routines.
14006
 *
14007
 * Returns the resulting document tree
14008
 */
14009
14010
xmlDocPtr
14011
xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14012
0
                          int recovery) {
14013
0
    return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14014
0
}
14015
14016
/**
14017
 * xmlRecoverDoc:
14018
 * @cur:  a pointer to an array of xmlChar
14019
 *
14020
 * DEPRECATED: Use xmlReadDoc with XML_PARSE_RECOVER.
14021
 *
14022
 * parse an XML in-memory document and build a tree.
14023
 * In the case the document is not Well Formed, a attempt to build a
14024
 * tree is tried anyway
14025
 *
14026
 * Returns the resulting document tree or NULL in case of failure
14027
 */
14028
14029
xmlDocPtr
14030
0
xmlRecoverDoc(const xmlChar *cur) {
14031
0
    return(xmlSAXParseDoc(NULL, cur, 1));
14032
0
}
14033
14034
/**
14035
 * xmlParseFile:
14036
 * @filename:  the filename
14037
 *
14038
 * DEPRECATED: Use xmlReadFile.
14039
 *
14040
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14041
 * compressed document is provided by default if found at compile-time.
14042
 *
14043
 * Returns the resulting document tree if the file was wellformed,
14044
 * NULL otherwise.
14045
 */
14046
14047
xmlDocPtr
14048
0
xmlParseFile(const char *filename) {
14049
0
    return(xmlSAXParseFile(NULL, filename, 0));
14050
0
}
14051
14052
/**
14053
 * xmlRecoverFile:
14054
 * @filename:  the filename
14055
 *
14056
 * DEPRECATED: Use xmlReadFile with XML_PARSE_RECOVER.
14057
 *
14058
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14059
 * compressed document is provided by default if found at compile-time.
14060
 * In the case the document is not Well Formed, it attempts to build
14061
 * a tree anyway
14062
 *
14063
 * Returns the resulting document tree or NULL in case of failure
14064
 */
14065
14066
xmlDocPtr
14067
0
xmlRecoverFile(const char *filename) {
14068
0
    return(xmlSAXParseFile(NULL, filename, 1));
14069
0
}
14070
14071
14072
/**
14073
 * xmlSetupParserForBuffer:
14074
 * @ctxt:  an XML parser context
14075
 * @buffer:  a xmlChar * buffer
14076
 * @filename:  a file name
14077
 *
14078
 * DEPRECATED: Don't use.
14079
 *
14080
 * Setup the parser context to parse a new buffer; Clears any prior
14081
 * contents from the parser context. The buffer parameter must not be
14082
 * NULL, but the filename parameter can be
14083
 */
14084
void
14085
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14086
                             const char* filename)
14087
0
{
14088
0
    xmlParserInputPtr input;
14089
14090
0
    if ((ctxt == NULL) || (buffer == NULL))
14091
0
        return;
14092
14093
0
    input = xmlNewInputStream(ctxt);
14094
0
    if (input == NULL) {
14095
0
        xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
14096
0
        xmlClearParserCtxt(ctxt);
14097
0
        return;
14098
0
    }
14099
14100
0
    xmlClearParserCtxt(ctxt);
14101
0
    if (filename != NULL)
14102
0
        input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
14103
0
    input->base = buffer;
14104
0
    input->cur = buffer;
14105
0
    input->end = &buffer[xmlStrlen(buffer)];
14106
0
    inputPush(ctxt, input);
14107
0
}
14108
14109
/**
14110
 * xmlSAXUserParseFile:
14111
 * @sax:  a SAX handler
14112
 * @user_data:  The user data returned on SAX callbacks
14113
 * @filename:  a file name
14114
 *
14115
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
14116
 *
14117
 * parse an XML file and call the given SAX handler routines.
14118
 * Automatic support for ZLIB/Compress compressed document is provided
14119
 *
14120
 * Returns 0 in case of success or a error number otherwise
14121
 */
14122
int
14123
xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14124
0
                    const char *filename) {
14125
0
    int ret = 0;
14126
0
    xmlParserCtxtPtr ctxt;
14127
14128
0
    ctxt = xmlCreateFileParserCtxt(filename);
14129
0
    if (ctxt == NULL) return -1;
14130
0
    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14131
0
  xmlFree(ctxt->sax);
14132
0
    ctxt->sax = sax;
14133
0
    xmlDetectSAX2(ctxt);
14134
14135
0
    if (user_data != NULL)
14136
0
  ctxt->userData = user_data;
14137
14138
0
    xmlParseDocument(ctxt);
14139
14140
0
    if (ctxt->wellFormed)
14141
0
  ret = 0;
14142
0
    else {
14143
0
        if (ctxt->errNo != 0)
14144
0
      ret = ctxt->errNo;
14145
0
  else
14146
0
      ret = -1;
14147
0
    }
14148
0
    if (sax != NULL)
14149
0
  ctxt->sax = NULL;
14150
0
    if (ctxt->myDoc != NULL) {
14151
0
        xmlFreeDoc(ctxt->myDoc);
14152
0
  ctxt->myDoc = NULL;
14153
0
    }
14154
0
    xmlFreeParserCtxt(ctxt);
14155
14156
0
    return ret;
14157
0
}
14158
#endif /* LIBXML_SAX1_ENABLED */
14159
14160
/************************************************************************
14161
 *                  *
14162
 *    Front ends when parsing from memory     *
14163
 *                  *
14164
 ************************************************************************/
14165
14166
/**
14167
 * xmlCreateMemoryParserCtxt:
14168
 * @buffer:  a pointer to a char array
14169
 * @size:  the size of the array
14170
 *
14171
 * Create a parser context for an XML in-memory document.
14172
 *
14173
 * Returns the new parser context or NULL
14174
 */
14175
xmlParserCtxtPtr
14176
57.9k
xmlCreateMemoryParserCtxt(const char *buffer, int size) {
14177
57.9k
    xmlParserCtxtPtr ctxt;
14178
57.9k
    xmlParserInputPtr input;
14179
57.9k
    xmlParserInputBufferPtr buf;
14180
14181
57.9k
    if (buffer == NULL)
14182
0
  return(NULL);
14183
57.9k
    if (size <= 0)
14184
260
  return(NULL);
14185
14186
57.6k
    ctxt = xmlNewParserCtxt();
14187
57.6k
    if (ctxt == NULL)
14188
0
  return(NULL);
14189
14190
57.6k
    buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14191
57.6k
    if (buf == NULL) {
14192
0
  xmlFreeParserCtxt(ctxt);
14193
0
  return(NULL);
14194
0
    }
14195
14196
57.6k
    input = xmlNewInputStream(ctxt);
14197
57.6k
    if (input == NULL) {
14198
0
  xmlFreeParserInputBuffer(buf);
14199
0
  xmlFreeParserCtxt(ctxt);
14200
0
  return(NULL);
14201
0
    }
14202
14203
57.6k
    input->filename = NULL;
14204
57.6k
    input->buf = buf;
14205
57.6k
    xmlBufResetInput(input->buf->buffer, input);
14206
14207
57.6k
    inputPush(ctxt, input);
14208
57.6k
    return(ctxt);
14209
57.6k
}
14210
14211
#ifdef LIBXML_SAX1_ENABLED
14212
/**
14213
 * xmlSAXParseMemoryWithData:
14214
 * @sax:  the SAX handler block
14215
 * @buffer:  an pointer to a char array
14216
 * @size:  the size of the array
14217
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14218
 *             documents
14219
 * @data:  the userdata
14220
 *
14221
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14222
 *
14223
 * parse an XML in-memory block and use the given SAX function block
14224
 * to handle the parsing callback. If sax is NULL, fallback to the default
14225
 * DOM tree building routines.
14226
 *
14227
 * User data (void *) is stored within the parser context in the
14228
 * context's _private member, so it is available nearly everywhere in libxml
14229
 *
14230
 * Returns the resulting document tree
14231
 */
14232
14233
xmlDocPtr
14234
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14235
0
            int size, int recovery, void *data) {
14236
0
    xmlDocPtr ret;
14237
0
    xmlParserCtxtPtr ctxt;
14238
14239
0
    xmlInitParser();
14240
14241
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14242
0
    if (ctxt == NULL) return(NULL);
14243
0
    if (sax != NULL) {
14244
0
  if (ctxt->sax != NULL)
14245
0
      xmlFree(ctxt->sax);
14246
0
        ctxt->sax = sax;
14247
0
    }
14248
0
    xmlDetectSAX2(ctxt);
14249
0
    if (data!=NULL) {
14250
0
  ctxt->_private=data;
14251
0
    }
14252
14253
0
    ctxt->recovery = recovery;
14254
14255
0
    xmlParseDocument(ctxt);
14256
14257
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14258
0
    else {
14259
0
       ret = NULL;
14260
0
       xmlFreeDoc(ctxt->myDoc);
14261
0
       ctxt->myDoc = NULL;
14262
0
    }
14263
0
    if (sax != NULL)
14264
0
  ctxt->sax = NULL;
14265
0
    xmlFreeParserCtxt(ctxt);
14266
14267
0
    return(ret);
14268
0
}
14269
14270
/**
14271
 * xmlSAXParseMemory:
14272
 * @sax:  the SAX handler block
14273
 * @buffer:  an pointer to a char array
14274
 * @size:  the size of the array
14275
 * @recovery:  work in recovery mode, i.e. tries to read not Well Formed
14276
 *             documents
14277
 *
14278
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14279
 *
14280
 * parse an XML in-memory block and use the given SAX function block
14281
 * to handle the parsing callback. If sax is NULL, fallback to the default
14282
 * DOM tree building routines.
14283
 *
14284
 * Returns the resulting document tree
14285
 */
14286
xmlDocPtr
14287
xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14288
0
            int size, int recovery) {
14289
0
    return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14290
0
}
14291
14292
/**
14293
 * xmlParseMemory:
14294
 * @buffer:  an pointer to a char array
14295
 * @size:  the size of the array
14296
 *
14297
 * DEPRECATED: Use xmlReadMemory.
14298
 *
14299
 * parse an XML in-memory block and build a tree.
14300
 *
14301
 * Returns the resulting document tree
14302
 */
14303
14304
0
xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14305
0
   return(xmlSAXParseMemory(NULL, buffer, size, 0));
14306
0
}
14307
14308
/**
14309
 * xmlRecoverMemory:
14310
 * @buffer:  an pointer to a char array
14311
 * @size:  the size of the array
14312
 *
14313
 * DEPRECATED: Use xmlReadMemory with XML_PARSE_RECOVER.
14314
 *
14315
 * parse an XML in-memory block and build a tree.
14316
 * In the case the document is not Well Formed, an attempt to
14317
 * build a tree is tried anyway
14318
 *
14319
 * Returns the resulting document tree or NULL in case of error
14320
 */
14321
14322
0
xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14323
0
   return(xmlSAXParseMemory(NULL, buffer, size, 1));
14324
0
}
14325
14326
/**
14327
 * xmlSAXUserParseMemory:
14328
 * @sax:  a SAX handler
14329
 * @user_data:  The user data returned on SAX callbacks
14330
 * @buffer:  an in-memory XML document input
14331
 * @size:  the length of the XML document in bytes
14332
 *
14333
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14334
 *
14335
 * parse an XML in-memory buffer and call the given SAX handler routines.
14336
 *
14337
 * Returns 0 in case of success or a error number otherwise
14338
 */
14339
int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14340
0
        const char *buffer, int size) {
14341
0
    int ret = 0;
14342
0
    xmlParserCtxtPtr ctxt;
14343
14344
0
    xmlInitParser();
14345
14346
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14347
0
    if (ctxt == NULL) return -1;
14348
0
    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14349
0
        xmlFree(ctxt->sax);
14350
0
    ctxt->sax = sax;
14351
0
    xmlDetectSAX2(ctxt);
14352
14353
0
    if (user_data != NULL)
14354
0
  ctxt->userData = user_data;
14355
14356
0
    xmlParseDocument(ctxt);
14357
14358
0
    if (ctxt->wellFormed)
14359
0
  ret = 0;
14360
0
    else {
14361
0
        if (ctxt->errNo != 0)
14362
0
      ret = ctxt->errNo;
14363
0
  else
14364
0
      ret = -1;
14365
0
    }
14366
0
    if (sax != NULL)
14367
0
        ctxt->sax = NULL;
14368
0
    if (ctxt->myDoc != NULL) {
14369
0
        xmlFreeDoc(ctxt->myDoc);
14370
0
  ctxt->myDoc = NULL;
14371
0
    }
14372
0
    xmlFreeParserCtxt(ctxt);
14373
14374
0
    return ret;
14375
0
}
14376
#endif /* LIBXML_SAX1_ENABLED */
14377
14378
/**
14379
 * xmlCreateDocParserCtxt:
14380
 * @cur:  a pointer to an array of xmlChar
14381
 *
14382
 * Creates a parser context for an XML in-memory document.
14383
 *
14384
 * Returns the new parser context or NULL
14385
 */
14386
xmlParserCtxtPtr
14387
0
xmlCreateDocParserCtxt(const xmlChar *cur) {
14388
0
    int len;
14389
14390
0
    if (cur == NULL)
14391
0
  return(NULL);
14392
0
    len = xmlStrlen(cur);
14393
0
    return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14394
0
}
14395
14396
#ifdef LIBXML_SAX1_ENABLED
14397
/**
14398
 * xmlSAXParseDoc:
14399
 * @sax:  the SAX handler block
14400
 * @cur:  a pointer to an array of xmlChar
14401
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14402
 *             documents
14403
 *
14404
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadDoc.
14405
 *
14406
 * parse an XML in-memory document and build a tree.
14407
 * It use the given SAX function block to handle the parsing callback.
14408
 * If sax is NULL, fallback to the default DOM tree building routines.
14409
 *
14410
 * Returns the resulting document tree
14411
 */
14412
14413
xmlDocPtr
14414
0
xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14415
0
    xmlDocPtr ret;
14416
0
    xmlParserCtxtPtr ctxt;
14417
0
    xmlSAXHandlerPtr oldsax = NULL;
14418
14419
0
    if (cur == NULL) return(NULL);
14420
14421
14422
0
    ctxt = xmlCreateDocParserCtxt(cur);
14423
0
    if (ctxt == NULL) return(NULL);
14424
0
    if (sax != NULL) {
14425
0
        oldsax = ctxt->sax;
14426
0
        ctxt->sax = sax;
14427
0
        ctxt->userData = NULL;
14428
0
    }
14429
0
    xmlDetectSAX2(ctxt);
14430
14431
0
    xmlParseDocument(ctxt);
14432
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14433
0
    else {
14434
0
       ret = NULL;
14435
0
       xmlFreeDoc(ctxt->myDoc);
14436
0
       ctxt->myDoc = NULL;
14437
0
    }
14438
0
    if (sax != NULL)
14439
0
  ctxt->sax = oldsax;
14440
0
    xmlFreeParserCtxt(ctxt);
14441
14442
0
    return(ret);
14443
0
}
14444
14445
/**
14446
 * xmlParseDoc:
14447
 * @cur:  a pointer to an array of xmlChar
14448
 *
14449
 * DEPRECATED: Use xmlReadDoc.
14450
 *
14451
 * parse an XML in-memory document and build a tree.
14452
 *
14453
 * Returns the resulting document tree
14454
 */
14455
14456
xmlDocPtr
14457
0
xmlParseDoc(const xmlChar *cur) {
14458
0
    return(xmlSAXParseDoc(NULL, cur, 0));
14459
0
}
14460
#endif /* LIBXML_SAX1_ENABLED */
14461
14462
#ifdef LIBXML_LEGACY_ENABLED
14463
/************************************************************************
14464
 *                  *
14465
 *  Specific function to keep track of entities references    *
14466
 *  and used by the XSLT debugger         *
14467
 *                  *
14468
 ************************************************************************/
14469
14470
static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14471
14472
/**
14473
 * xmlAddEntityReference:
14474
 * @ent : A valid entity
14475
 * @firstNode : A valid first node for children of entity
14476
 * @lastNode : A valid last node of children entity
14477
 *
14478
 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14479
 */
14480
static void
14481
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14482
                      xmlNodePtr lastNode)
14483
{
14484
    if (xmlEntityRefFunc != NULL) {
14485
        (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14486
    }
14487
}
14488
14489
14490
/**
14491
 * xmlSetEntityReferenceFunc:
14492
 * @func: A valid function
14493
 *
14494
 * Set the function to call call back when a xml reference has been made
14495
 */
14496
void
14497
xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14498
{
14499
    xmlEntityRefFunc = func;
14500
}
14501
#endif /* LIBXML_LEGACY_ENABLED */
14502
14503
/************************************************************************
14504
 *                  *
14505
 *        Miscellaneous       *
14506
 *                  *
14507
 ************************************************************************/
14508
14509
static int xmlParserInitialized = 0;
14510
14511
/**
14512
 * xmlInitParser:
14513
 *
14514
 * Initialization function for the XML parser.
14515
 * This is not reentrant. Call once before processing in case of
14516
 * use in multithreaded programs.
14517
 */
14518
14519
void
14520
34.7M
xmlInitParser(void) {
14521
    /*
14522
     * Note that the initialization code must not make memory allocations.
14523
     */
14524
34.7M
    if (xmlParserInitialized != 0)
14525
34.7M
  return;
14526
14527
50
#ifdef LIBXML_THREAD_ENABLED
14528
50
    __xmlGlobalInitMutexLock();
14529
50
    if (xmlParserInitialized == 0) {
14530
50
#endif
14531
#if defined(_WIN32) && (!defined(LIBXML_STATIC) || defined(LIBXML_STATIC_FOR_DLL))
14532
        if (xmlFree == free)
14533
            atexit(xmlCleanupParser);
14534
#endif
14535
14536
50
  xmlInitThreadsInternal();
14537
50
  xmlInitGlobalsInternal();
14538
50
  xmlInitMemoryInternal();
14539
50
        __xmlInitializeDict();
14540
50
  xmlInitEncodingInternal();
14541
50
  xmlRegisterDefaultInputCallbacks();
14542
50
#ifdef LIBXML_OUTPUT_ENABLED
14543
50
  xmlRegisterDefaultOutputCallbacks();
14544
50
#endif /* LIBXML_OUTPUT_ENABLED */
14545
50
#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
14546
50
  xmlInitXPathInternal();
14547
50
#endif
14548
50
  xmlParserInitialized = 1;
14549
50
#ifdef LIBXML_THREAD_ENABLED
14550
50
    }
14551
50
    __xmlGlobalInitMutexUnlock();
14552
50
#endif
14553
50
}
14554
14555
/**
14556
 * xmlCleanupParser:
14557
 *
14558
 * This function name is somewhat misleading. It does not clean up
14559
 * parser state, it cleans up memory allocated by the library itself.
14560
 * It is a cleanup function for the XML library. It tries to reclaim all
14561
 * related global memory allocated for the library processing.
14562
 * It doesn't deallocate any document related memory. One should
14563
 * call xmlCleanupParser() only when the process has finished using
14564
 * the library and all XML/HTML documents built with it.
14565
 * See also xmlInitParser() which has the opposite function of preparing
14566
 * the library for operations.
14567
 *
14568
 * WARNING: if your application is multithreaded or has plugin support
14569
 *          calling this may crash the application if another thread or
14570
 *          a plugin is still using libxml2. It's sometimes very hard to
14571
 *          guess if libxml2 is in use in the application, some libraries
14572
 *          or plugins may use it without notice. In case of doubt abstain
14573
 *          from calling this function or do it just before calling exit()
14574
 *          to avoid leak reports from valgrind !
14575
 */
14576
14577
void
14578
0
xmlCleanupParser(void) {
14579
0
    if (!xmlParserInitialized)
14580
0
  return;
14581
14582
0
    xmlCleanupCharEncodingHandlers();
14583
0
#ifdef LIBXML_CATALOG_ENABLED
14584
0
    xmlCatalogCleanup();
14585
0
#endif
14586
0
    xmlCleanupDictInternal();
14587
0
    xmlCleanupInputCallbacks();
14588
0
#ifdef LIBXML_OUTPUT_ENABLED
14589
0
    xmlCleanupOutputCallbacks();
14590
0
#endif
14591
0
#ifdef LIBXML_SCHEMAS_ENABLED
14592
0
    xmlSchemaCleanupTypes();
14593
0
    xmlRelaxNGCleanupTypes();
14594
0
#endif
14595
0
    xmlCleanupGlobalsInternal();
14596
0
    xmlCleanupThreadsInternal();
14597
0
    xmlCleanupMemoryInternal();
14598
0
    xmlParserInitialized = 0;
14599
0
}
14600
14601
#if defined(HAVE_ATTRIBUTE_DESTRUCTOR) && !defined(LIBXML_STATIC) && \
14602
    !defined(_WIN32)
14603
static void
14604
ATTRIBUTE_DESTRUCTOR
14605
0
xmlDestructor(void) {
14606
    /*
14607
     * Calling custom deallocation functions in a destructor can cause
14608
     * problems, for example with Nokogiri.
14609
     */
14610
0
    if (xmlFree == free)
14611
0
        xmlCleanupParser();
14612
0
}
14613
#endif
14614
14615
/************************************************************************
14616
 *                  *
14617
 *  New set (2.6.0) of simpler and more flexible APIs   *
14618
 *                  *
14619
 ************************************************************************/
14620
14621
/**
14622
 * DICT_FREE:
14623
 * @str:  a string
14624
 *
14625
 * Free a string if it is not owned by the "dict" dictionary in the
14626
 * current scope
14627
 */
14628
#define DICT_FREE(str)            \
14629
0
  if ((str) && ((!dict) ||       \
14630
0
      (xmlDictOwns(dict, (const xmlChar *)(str)) == 0)))  \
14631
0
      xmlFree((char *)(str));
14632
14633
/**
14634
 * xmlCtxtReset:
14635
 * @ctxt: an XML parser context
14636
 *
14637
 * Reset a parser context
14638
 */
14639
void
14640
xmlCtxtReset(xmlParserCtxtPtr ctxt)
14641
0
{
14642
0
    xmlParserInputPtr input;
14643
0
    xmlDictPtr dict;
14644
14645
0
    if (ctxt == NULL)
14646
0
        return;
14647
14648
0
    dict = ctxt->dict;
14649
14650
0
    while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14651
0
        xmlFreeInputStream(input);
14652
0
    }
14653
0
    ctxt->inputNr = 0;
14654
0
    ctxt->input = NULL;
14655
14656
0
    ctxt->spaceNr = 0;
14657
0
    if (ctxt->spaceTab != NULL) {
14658
0
  ctxt->spaceTab[0] = -1;
14659
0
  ctxt->space = &ctxt->spaceTab[0];
14660
0
    } else {
14661
0
        ctxt->space = NULL;
14662
0
    }
14663
14664
14665
0
    ctxt->nodeNr = 0;
14666
0
    ctxt->node = NULL;
14667
14668
0
    ctxt->nameNr = 0;
14669
0
    ctxt->name = NULL;
14670
14671
0
    ctxt->nsNr = 0;
14672
14673
0
    DICT_FREE(ctxt->version);
14674
0
    ctxt->version = NULL;
14675
0
    DICT_FREE(ctxt->encoding);
14676
0
    ctxt->encoding = NULL;
14677
0
    DICT_FREE(ctxt->directory);
14678
0
    ctxt->directory = NULL;
14679
0
    DICT_FREE(ctxt->extSubURI);
14680
0
    ctxt->extSubURI = NULL;
14681
0
    DICT_FREE(ctxt->extSubSystem);
14682
0
    ctxt->extSubSystem = NULL;
14683
0
    if (ctxt->myDoc != NULL)
14684
0
        xmlFreeDoc(ctxt->myDoc);
14685
0
    ctxt->myDoc = NULL;
14686
14687
0
    ctxt->standalone = -1;
14688
0
    ctxt->hasExternalSubset = 0;
14689
0
    ctxt->hasPErefs = 0;
14690
0
    ctxt->html = 0;
14691
0
    ctxt->external = 0;
14692
0
    ctxt->instate = XML_PARSER_START;
14693
0
    ctxt->token = 0;
14694
14695
0
    ctxt->wellFormed = 1;
14696
0
    ctxt->nsWellFormed = 1;
14697
0
    ctxt->disableSAX = 0;
14698
0
    ctxt->valid = 1;
14699
#if 0
14700
    ctxt->vctxt.userData = ctxt;
14701
    ctxt->vctxt.error = xmlParserValidityError;
14702
    ctxt->vctxt.warning = xmlParserValidityWarning;
14703
#endif
14704
0
    ctxt->record_info = 0;
14705
0
    ctxt->checkIndex = 0;
14706
0
    ctxt->endCheckState = 0;
14707
0
    ctxt->inSubset = 0;
14708
0
    ctxt->errNo = XML_ERR_OK;
14709
0
    ctxt->depth = 0;
14710
0
    ctxt->charset = XML_CHAR_ENCODING_UTF8;
14711
0
    ctxt->catalogs = NULL;
14712
0
    ctxt->sizeentities = 0;
14713
0
    ctxt->sizeentcopy = 0;
14714
0
    xmlInitNodeInfoSeq(&ctxt->node_seq);
14715
14716
0
    if (ctxt->attsDefault != NULL) {
14717
0
        xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
14718
0
        ctxt->attsDefault = NULL;
14719
0
    }
14720
0
    if (ctxt->attsSpecial != NULL) {
14721
0
        xmlHashFree(ctxt->attsSpecial, NULL);
14722
0
        ctxt->attsSpecial = NULL;
14723
0
    }
14724
14725
0
#ifdef LIBXML_CATALOG_ENABLED
14726
0
    if (ctxt->catalogs != NULL)
14727
0
  xmlCatalogFreeLocal(ctxt->catalogs);
14728
0
#endif
14729
0
    ctxt->nbErrors = 0;
14730
0
    ctxt->nbWarnings = 0;
14731
0
    if (ctxt->lastError.code != XML_ERR_OK)
14732
0
        xmlResetError(&ctxt->lastError);
14733
0
}
14734
14735
/**
14736
 * xmlCtxtResetPush:
14737
 * @ctxt: an XML parser context
14738
 * @chunk:  a pointer to an array of chars
14739
 * @size:  number of chars in the array
14740
 * @filename:  an optional file name or URI
14741
 * @encoding:  the document encoding, or NULL
14742
 *
14743
 * Reset a push parser context
14744
 *
14745
 * Returns 0 in case of success and 1 in case of error
14746
 */
14747
int
14748
xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14749
                 int size, const char *filename, const char *encoding)
14750
0
{
14751
0
    xmlParserInputPtr inputStream;
14752
0
    xmlParserInputBufferPtr buf;
14753
0
    xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14754
14755
0
    if (ctxt == NULL)
14756
0
        return(1);
14757
14758
0
    if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14759
0
        enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14760
14761
0
    buf = xmlAllocParserInputBuffer(enc);
14762
0
    if (buf == NULL)
14763
0
        return(1);
14764
14765
0
    if (ctxt == NULL) {
14766
0
        xmlFreeParserInputBuffer(buf);
14767
0
        return(1);
14768
0
    }
14769
14770
0
    xmlCtxtReset(ctxt);
14771
14772
0
    if (filename == NULL) {
14773
0
        ctxt->directory = NULL;
14774
0
    } else {
14775
0
        ctxt->directory = xmlParserGetDirectory(filename);
14776
0
    }
14777
14778
0
    inputStream = xmlNewInputStream(ctxt);
14779
0
    if (inputStream == NULL) {
14780
0
        xmlFreeParserInputBuffer(buf);
14781
0
        return(1);
14782
0
    }
14783
14784
0
    if (filename == NULL)
14785
0
        inputStream->filename = NULL;
14786
0
    else
14787
0
        inputStream->filename = (char *)
14788
0
            xmlCanonicPath((const xmlChar *) filename);
14789
0
    inputStream->buf = buf;
14790
0
    xmlBufResetInput(buf->buffer, inputStream);
14791
14792
0
    inputPush(ctxt, inputStream);
14793
14794
0
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14795
0
        (ctxt->input->buf != NULL)) {
14796
0
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14797
0
        size_t cur = ctxt->input->cur - ctxt->input->base;
14798
14799
0
        xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14800
14801
0
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
14802
#ifdef DEBUG_PUSH
14803
        xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14804
#endif
14805
0
    }
14806
14807
0
    if (encoding != NULL) {
14808
0
        xmlCharEncodingHandlerPtr hdlr;
14809
14810
0
        if (ctxt->encoding != NULL)
14811
0
      xmlFree((xmlChar *) ctxt->encoding);
14812
0
        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14813
14814
0
        hdlr = xmlFindCharEncodingHandler(encoding);
14815
0
        if (hdlr != NULL) {
14816
0
            xmlSwitchToEncoding(ctxt, hdlr);
14817
0
  } else {
14818
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14819
0
            "Unsupported encoding %s\n", BAD_CAST encoding);
14820
0
        }
14821
0
    } else if (enc != XML_CHAR_ENCODING_NONE) {
14822
0
        xmlSwitchEncoding(ctxt, enc);
14823
0
    }
14824
14825
0
    return(0);
14826
0
}
14827
14828
14829
/**
14830
 * xmlCtxtUseOptionsInternal:
14831
 * @ctxt: an XML parser context
14832
 * @options:  a combination of xmlParserOption
14833
 * @encoding:  the user provided encoding to use
14834
 *
14835
 * Applies the options to the parser context
14836
 *
14837
 * Returns 0 in case of success, the set of unknown or unimplemented options
14838
 *         in case of error.
14839
 */
14840
static int
14841
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
14842
152k
{
14843
152k
    if (ctxt == NULL)
14844
0
        return(-1);
14845
152k
    if (encoding != NULL) {
14846
0
        if (ctxt->encoding != NULL)
14847
0
      xmlFree((xmlChar *) ctxt->encoding);
14848
0
        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14849
0
    }
14850
152k
    if (options & XML_PARSE_RECOVER) {
14851
69.2k
        ctxt->recovery = 1;
14852
69.2k
        options -= XML_PARSE_RECOVER;
14853
69.2k
  ctxt->options |= XML_PARSE_RECOVER;
14854
69.2k
    } else
14855
83.3k
        ctxt->recovery = 0;
14856
152k
    if (options & XML_PARSE_DTDLOAD) {
14857
111k
        ctxt->loadsubset = XML_DETECT_IDS;
14858
111k
        options -= XML_PARSE_DTDLOAD;
14859
111k
  ctxt->options |= XML_PARSE_DTDLOAD;
14860
111k
    } else
14861
41.3k
        ctxt->loadsubset = 0;
14862
152k
    if (options & XML_PARSE_DTDATTR) {
14863
37.4k
        ctxt->loadsubset |= XML_COMPLETE_ATTRS;
14864
37.4k
        options -= XML_PARSE_DTDATTR;
14865
37.4k
  ctxt->options |= XML_PARSE_DTDATTR;
14866
37.4k
    }
14867
152k
    if (options & XML_PARSE_NOENT) {
14868
107k
        ctxt->replaceEntities = 1;
14869
        /* ctxt->loadsubset |= XML_DETECT_IDS; */
14870
107k
        options -= XML_PARSE_NOENT;
14871
107k
  ctxt->options |= XML_PARSE_NOENT;
14872
107k
    } else
14873
44.8k
        ctxt->replaceEntities = 0;
14874
152k
    if (options & XML_PARSE_PEDANTIC) {
14875
21.0k
        ctxt->pedantic = 1;
14876
21.0k
        options -= XML_PARSE_PEDANTIC;
14877
21.0k
  ctxt->options |= XML_PARSE_PEDANTIC;
14878
21.0k
    } else
14879
131k
        ctxt->pedantic = 0;
14880
152k
    if (options & XML_PARSE_NOBLANKS) {
14881
51.2k
        ctxt->keepBlanks = 0;
14882
51.2k
        ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
14883
51.2k
        options -= XML_PARSE_NOBLANKS;
14884
51.2k
  ctxt->options |= XML_PARSE_NOBLANKS;
14885
51.2k
    } else
14886
101k
        ctxt->keepBlanks = 1;
14887
152k
    if (options & XML_PARSE_DTDVALID) {
14888
35.9k
        ctxt->validate = 1;
14889
35.9k
        if (options & XML_PARSE_NOWARNING)
14890
25.1k
            ctxt->vctxt.warning = NULL;
14891
35.9k
        if (options & XML_PARSE_NOERROR)
14892
27.9k
            ctxt->vctxt.error = NULL;
14893
35.9k
        options -= XML_PARSE_DTDVALID;
14894
35.9k
  ctxt->options |= XML_PARSE_DTDVALID;
14895
35.9k
    } else
14896
116k
        ctxt->validate = 0;
14897
152k
    if (options & XML_PARSE_NOWARNING) {
14898
43.3k
        ctxt->sax->warning = NULL;
14899
43.3k
        options -= XML_PARSE_NOWARNING;
14900
43.3k
    }
14901
152k
    if (options & XML_PARSE_NOERROR) {
14902
62.2k
        ctxt->sax->error = NULL;
14903
62.2k
        ctxt->sax->fatalError = NULL;
14904
62.2k
        options -= XML_PARSE_NOERROR;
14905
62.2k
    }
14906
152k
#ifdef LIBXML_SAX1_ENABLED
14907
152k
    if (options & XML_PARSE_SAX1) {
14908
47.7k
        ctxt->sax->startElement = xmlSAX2StartElement;
14909
47.7k
        ctxt->sax->endElement = xmlSAX2EndElement;
14910
47.7k
        ctxt->sax->startElementNs = NULL;
14911
47.7k
        ctxt->sax->endElementNs = NULL;
14912
47.7k
        ctxt->sax->initialized = 1;
14913
47.7k
        options -= XML_PARSE_SAX1;
14914
47.7k
  ctxt->options |= XML_PARSE_SAX1;
14915
47.7k
    }
14916
152k
#endif /* LIBXML_SAX1_ENABLED */
14917
152k
    if (options & XML_PARSE_NODICT) {
14918
47.3k
        ctxt->dictNames = 0;
14919
47.3k
        options -= XML_PARSE_NODICT;
14920
47.3k
  ctxt->options |= XML_PARSE_NODICT;
14921
105k
    } else {
14922
105k
        ctxt->dictNames = 1;
14923
105k
    }
14924
152k
    if (options & XML_PARSE_NOCDATA) {
14925
46.1k
        ctxt->sax->cdataBlock = NULL;
14926
46.1k
        options -= XML_PARSE_NOCDATA;
14927
46.1k
  ctxt->options |= XML_PARSE_NOCDATA;
14928
46.1k
    }
14929
152k
    if (options & XML_PARSE_NSCLEAN) {
14930
68.8k
  ctxt->options |= XML_PARSE_NSCLEAN;
14931
68.8k
        options -= XML_PARSE_NSCLEAN;
14932
68.8k
    }
14933
152k
    if (options & XML_PARSE_NONET) {
14934
55.7k
  ctxt->options |= XML_PARSE_NONET;
14935
55.7k
        options -= XML_PARSE_NONET;
14936
55.7k
    }
14937
152k
    if (options & XML_PARSE_COMPACT) {
14938
87.6k
  ctxt->options |= XML_PARSE_COMPACT;
14939
87.6k
        options -= XML_PARSE_COMPACT;
14940
87.6k
    }
14941
152k
    if (options & XML_PARSE_OLD10) {
14942
43.4k
  ctxt->options |= XML_PARSE_OLD10;
14943
43.4k
        options -= XML_PARSE_OLD10;
14944
43.4k
    }
14945
152k
    if (options & XML_PARSE_NOBASEFIX) {
14946
47.4k
  ctxt->options |= XML_PARSE_NOBASEFIX;
14947
47.4k
        options -= XML_PARSE_NOBASEFIX;
14948
47.4k
    }
14949
152k
    if (options & XML_PARSE_HUGE) {
14950
35.9k
  ctxt->options |= XML_PARSE_HUGE;
14951
35.9k
        options -= XML_PARSE_HUGE;
14952
35.9k
        if (ctxt->dict != NULL)
14953
35.9k
            xmlDictSetLimit(ctxt->dict, 0);
14954
35.9k
    }
14955
152k
    if (options & XML_PARSE_OLDSAX) {
14956
44.6k
  ctxt->options |= XML_PARSE_OLDSAX;
14957
44.6k
        options -= XML_PARSE_OLDSAX;
14958
44.6k
    }
14959
152k
    if (options & XML_PARSE_IGNORE_ENC) {
14960
61.6k
  ctxt->options |= XML_PARSE_IGNORE_ENC;
14961
61.6k
        options -= XML_PARSE_IGNORE_ENC;
14962
61.6k
    }
14963
152k
    if (options & XML_PARSE_BIG_LINES) {
14964
51.7k
  ctxt->options |= XML_PARSE_BIG_LINES;
14965
51.7k
        options -= XML_PARSE_BIG_LINES;
14966
51.7k
    }
14967
152k
    ctxt->linenumbers = 1;
14968
152k
    return (options);
14969
152k
}
14970
14971
/**
14972
 * xmlCtxtUseOptions:
14973
 * @ctxt: an XML parser context
14974
 * @options:  a combination of xmlParserOption
14975
 *
14976
 * Applies the options to the parser context
14977
 *
14978
 * Returns 0 in case of success, the set of unknown or unimplemented options
14979
 *         in case of error.
14980
 */
14981
int
14982
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
14983
101k
{
14984
101k
   return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
14985
101k
}
14986
14987
/**
14988
 * xmlDoRead:
14989
 * @ctxt:  an XML parser context
14990
 * @URL:  the base URL to use for the document
14991
 * @encoding:  the document encoding, or NULL
14992
 * @options:  a combination of xmlParserOption
14993
 * @reuse:  keep the context for reuse
14994
 *
14995
 * Common front-end for the xmlRead functions
14996
 *
14997
 * Returns the resulting document tree or NULL
14998
 */
14999
static xmlDocPtr
15000
xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15001
          int options, int reuse)
15002
50.7k
{
15003
50.7k
    xmlDocPtr ret;
15004
15005
50.7k
    xmlCtxtUseOptionsInternal(ctxt, options, encoding);
15006
50.7k
    if (encoding != NULL) {
15007
0
        xmlCharEncodingHandlerPtr hdlr;
15008
15009
0
  hdlr = xmlFindCharEncodingHandler(encoding);
15010
0
  if (hdlr != NULL)
15011
0
      xmlSwitchToEncoding(ctxt, hdlr);
15012
0
    }
15013
50.7k
    if ((URL != NULL) && (ctxt->input != NULL) &&
15014
50.7k
        (ctxt->input->filename == NULL))
15015
50.7k
        ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
15016
50.7k
    xmlParseDocument(ctxt);
15017
50.7k
    if ((ctxt->wellFormed) || ctxt->recovery)
15018
24.8k
        ret = ctxt->myDoc;
15019
25.8k
    else {
15020
25.8k
        ret = NULL;
15021
25.8k
  if (ctxt->myDoc != NULL) {
15022
20.6k
      xmlFreeDoc(ctxt->myDoc);
15023
20.6k
  }
15024
25.8k
    }
15025
50.7k
    ctxt->myDoc = NULL;
15026
50.7k
    if (!reuse) {
15027
50.7k
  xmlFreeParserCtxt(ctxt);
15028
50.7k
    }
15029
15030
50.7k
    return (ret);
15031
50.7k
}
15032
15033
/**
15034
 * xmlReadDoc:
15035
 * @cur:  a pointer to a zero terminated string
15036
 * @URL:  the base URL to use for the document
15037
 * @encoding:  the document encoding, or NULL
15038
 * @options:  a combination of xmlParserOption
15039
 *
15040
 * parse an XML in-memory document and build a tree.
15041
 *
15042
 * Returns the resulting document tree
15043
 */
15044
xmlDocPtr
15045
xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
15046
0
{
15047
0
    xmlParserCtxtPtr ctxt;
15048
15049
0
    if (cur == NULL)
15050
0
        return (NULL);
15051
0
    xmlInitParser();
15052
15053
0
    ctxt = xmlCreateDocParserCtxt(cur);
15054
0
    if (ctxt == NULL)
15055
0
        return (NULL);
15056
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15057
0
}
15058
15059
/**
15060
 * xmlReadFile:
15061
 * @filename:  a file or URL
15062
 * @encoding:  the document encoding, or NULL
15063
 * @options:  a combination of xmlParserOption
15064
 *
15065
 * parse an XML file from the filesystem or the network.
15066
 *
15067
 * Returns the resulting document tree
15068
 */
15069
xmlDocPtr
15070
xmlReadFile(const char *filename, const char *encoding, int options)
15071
0
{
15072
0
    xmlParserCtxtPtr ctxt;
15073
15074
0
    xmlInitParser();
15075
0
    ctxt = xmlCreateURLParserCtxt(filename, options);
15076
0
    if (ctxt == NULL)
15077
0
        return (NULL);
15078
0
    return (xmlDoRead(ctxt, NULL, encoding, options, 0));
15079
0
}
15080
15081
/**
15082
 * xmlReadMemory:
15083
 * @buffer:  a pointer to a char array
15084
 * @size:  the size of the array
15085
 * @URL:  the base URL to use for the document
15086
 * @encoding:  the document encoding, or NULL
15087
 * @options:  a combination of xmlParserOption
15088
 *
15089
 * parse an XML in-memory document and build a tree.
15090
 *
15091
 * Returns the resulting document tree
15092
 */
15093
xmlDocPtr
15094
xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
15095
50.9k
{
15096
50.9k
    xmlParserCtxtPtr ctxt;
15097
15098
50.9k
    xmlInitParser();
15099
50.9k
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15100
50.9k
    if (ctxt == NULL)
15101
260
        return (NULL);
15102
50.7k
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15103
50.9k
}
15104
15105
/**
15106
 * xmlReadFd:
15107
 * @fd:  an open file descriptor
15108
 * @URL:  the base URL to use for the document
15109
 * @encoding:  the document encoding, or NULL
15110
 * @options:  a combination of xmlParserOption
15111
 *
15112
 * parse an XML from a file descriptor and build a tree.
15113
 * NOTE that the file descriptor will not be closed when the
15114
 *      reader is closed or reset.
15115
 *
15116
 * Returns the resulting document tree
15117
 */
15118
xmlDocPtr
15119
xmlReadFd(int fd, const char *URL, const char *encoding, int options)
15120
0
{
15121
0
    xmlParserCtxtPtr ctxt;
15122
0
    xmlParserInputBufferPtr input;
15123
0
    xmlParserInputPtr stream;
15124
15125
0
    if (fd < 0)
15126
0
        return (NULL);
15127
0
    xmlInitParser();
15128
15129
0
    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15130
0
    if (input == NULL)
15131
0
        return (NULL);
15132
0
    input->closecallback = NULL;
15133
0
    ctxt = xmlNewParserCtxt();
15134
0
    if (ctxt == NULL) {
15135
0
        xmlFreeParserInputBuffer(input);
15136
0
        return (NULL);
15137
0
    }
15138
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15139
0
    if (stream == NULL) {
15140
0
        xmlFreeParserInputBuffer(input);
15141
0
  xmlFreeParserCtxt(ctxt);
15142
0
        return (NULL);
15143
0
    }
15144
0
    inputPush(ctxt, stream);
15145
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15146
0
}
15147
15148
/**
15149
 * xmlReadIO:
15150
 * @ioread:  an I/O read function
15151
 * @ioclose:  an I/O close function
15152
 * @ioctx:  an I/O handler
15153
 * @URL:  the base URL to use for the document
15154
 * @encoding:  the document encoding, or NULL
15155
 * @options:  a combination of xmlParserOption
15156
 *
15157
 * parse an XML document from I/O functions and source and build a tree.
15158
 *
15159
 * Returns the resulting document tree
15160
 */
15161
xmlDocPtr
15162
xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
15163
          void *ioctx, const char *URL, const char *encoding, int options)
15164
0
{
15165
0
    xmlParserCtxtPtr ctxt;
15166
0
    xmlParserInputBufferPtr input;
15167
0
    xmlParserInputPtr stream;
15168
15169
0
    if (ioread == NULL)
15170
0
        return (NULL);
15171
0
    xmlInitParser();
15172
15173
0
    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15174
0
                                         XML_CHAR_ENCODING_NONE);
15175
0
    if (input == NULL) {
15176
0
        if (ioclose != NULL)
15177
0
            ioclose(ioctx);
15178
0
        return (NULL);
15179
0
    }
15180
0
    ctxt = xmlNewParserCtxt();
15181
0
    if (ctxt == NULL) {
15182
0
        xmlFreeParserInputBuffer(input);
15183
0
        return (NULL);
15184
0
    }
15185
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15186
0
    if (stream == NULL) {
15187
0
        xmlFreeParserInputBuffer(input);
15188
0
  xmlFreeParserCtxt(ctxt);
15189
0
        return (NULL);
15190
0
    }
15191
0
    inputPush(ctxt, stream);
15192
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15193
0
}
15194
15195
/**
15196
 * xmlCtxtReadDoc:
15197
 * @ctxt:  an XML parser context
15198
 * @cur:  a pointer to a zero terminated string
15199
 * @URL:  the base URL to use for the document
15200
 * @encoding:  the document encoding, or NULL
15201
 * @options:  a combination of xmlParserOption
15202
 *
15203
 * parse an XML in-memory document and build a tree.
15204
 * This reuses the existing @ctxt parser context
15205
 *
15206
 * Returns the resulting document tree
15207
 */
15208
xmlDocPtr
15209
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
15210
               const char *URL, const char *encoding, int options)
15211
0
{
15212
0
    if (cur == NULL)
15213
0
        return (NULL);
15214
0
    return (xmlCtxtReadMemory(ctxt, (const char *) cur, xmlStrlen(cur), URL,
15215
0
                              encoding, options));
15216
0
}
15217
15218
/**
15219
 * xmlCtxtReadFile:
15220
 * @ctxt:  an XML parser context
15221
 * @filename:  a file or URL
15222
 * @encoding:  the document encoding, or NULL
15223
 * @options:  a combination of xmlParserOption
15224
 *
15225
 * parse an XML file from the filesystem or the network.
15226
 * This reuses the existing @ctxt parser context
15227
 *
15228
 * Returns the resulting document tree
15229
 */
15230
xmlDocPtr
15231
xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15232
                const char *encoding, int options)
15233
0
{
15234
0
    xmlParserInputPtr stream;
15235
15236
0
    if (filename == NULL)
15237
0
        return (NULL);
15238
0
    if (ctxt == NULL)
15239
0
        return (NULL);
15240
0
    xmlInitParser();
15241
15242
0
    xmlCtxtReset(ctxt);
15243
15244
0
    stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15245
0
    if (stream == NULL) {
15246
0
        return (NULL);
15247
0
    }
15248
0
    inputPush(ctxt, stream);
15249
0
    return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15250
0
}
15251
15252
/**
15253
 * xmlCtxtReadMemory:
15254
 * @ctxt:  an XML parser context
15255
 * @buffer:  a pointer to a char array
15256
 * @size:  the size of the array
15257
 * @URL:  the base URL to use for the document
15258
 * @encoding:  the document encoding, or NULL
15259
 * @options:  a combination of xmlParserOption
15260
 *
15261
 * parse an XML in-memory document and build a tree.
15262
 * This reuses the existing @ctxt parser context
15263
 *
15264
 * Returns the resulting document tree
15265
 */
15266
xmlDocPtr
15267
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15268
                  const char *URL, const char *encoding, int options)
15269
0
{
15270
0
    xmlParserInputBufferPtr input;
15271
0
    xmlParserInputPtr stream;
15272
15273
0
    if (ctxt == NULL)
15274
0
        return (NULL);
15275
0
    if (buffer == NULL)
15276
0
        return (NULL);
15277
0
    xmlInitParser();
15278
15279
0
    xmlCtxtReset(ctxt);
15280
15281
0
    input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15282
0
    if (input == NULL) {
15283
0
  return(NULL);
15284
0
    }
15285
15286
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15287
0
    if (stream == NULL) {
15288
0
  xmlFreeParserInputBuffer(input);
15289
0
  return(NULL);
15290
0
    }
15291
15292
0
    inputPush(ctxt, stream);
15293
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15294
0
}
15295
15296
/**
15297
 * xmlCtxtReadFd:
15298
 * @ctxt:  an XML parser context
15299
 * @fd:  an open file descriptor
15300
 * @URL:  the base URL to use for the document
15301
 * @encoding:  the document encoding, or NULL
15302
 * @options:  a combination of xmlParserOption
15303
 *
15304
 * parse an XML from a file descriptor and build a tree.
15305
 * This reuses the existing @ctxt parser context
15306
 * NOTE that the file descriptor will not be closed when the
15307
 *      reader is closed or reset.
15308
 *
15309
 * Returns the resulting document tree
15310
 */
15311
xmlDocPtr
15312
xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15313
              const char *URL, const char *encoding, int options)
15314
0
{
15315
0
    xmlParserInputBufferPtr input;
15316
0
    xmlParserInputPtr stream;
15317
15318
0
    if (fd < 0)
15319
0
        return (NULL);
15320
0
    if (ctxt == NULL)
15321
0
        return (NULL);
15322
0
    xmlInitParser();
15323
15324
0
    xmlCtxtReset(ctxt);
15325
15326
15327
0
    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15328
0
    if (input == NULL)
15329
0
        return (NULL);
15330
0
    input->closecallback = NULL;
15331
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15332
0
    if (stream == NULL) {
15333
0
        xmlFreeParserInputBuffer(input);
15334
0
        return (NULL);
15335
0
    }
15336
0
    inputPush(ctxt, stream);
15337
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15338
0
}
15339
15340
/**
15341
 * xmlCtxtReadIO:
15342
 * @ctxt:  an XML parser context
15343
 * @ioread:  an I/O read function
15344
 * @ioclose:  an I/O close function
15345
 * @ioctx:  an I/O handler
15346
 * @URL:  the base URL to use for the document
15347
 * @encoding:  the document encoding, or NULL
15348
 * @options:  a combination of xmlParserOption
15349
 *
15350
 * parse an XML document from I/O functions and source and build a tree.
15351
 * This reuses the existing @ctxt parser context
15352
 *
15353
 * Returns the resulting document tree
15354
 */
15355
xmlDocPtr
15356
xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15357
              xmlInputCloseCallback ioclose, void *ioctx,
15358
        const char *URL,
15359
              const char *encoding, int options)
15360
0
{
15361
0
    xmlParserInputBufferPtr input;
15362
0
    xmlParserInputPtr stream;
15363
15364
0
    if (ioread == NULL)
15365
0
        return (NULL);
15366
0
    if (ctxt == NULL)
15367
0
        return (NULL);
15368
0
    xmlInitParser();
15369
15370
0
    xmlCtxtReset(ctxt);
15371
15372
0
    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15373
0
                                         XML_CHAR_ENCODING_NONE);
15374
0
    if (input == NULL) {
15375
0
        if (ioclose != NULL)
15376
0
            ioclose(ioctx);
15377
0
        return (NULL);
15378
0
    }
15379
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15380
0
    if (stream == NULL) {
15381
0
        xmlFreeParserInputBuffer(input);
15382
0
        return (NULL);
15383
0
    }
15384
0
    inputPush(ctxt, stream);
15385
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15386
0
}
15387