Coverage Report

Created: 2024-08-17 10:59

/src/libxml2/parser.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
 *            implemented on top of the SAX interfaces
4
 *
5
 * References:
6
 *   The XML specification:
7
 *     http://www.w3.org/TR/REC-xml
8
 *   Original 1.0 version:
9
 *     http://www.w3.org/TR/1998/REC-xml-19980210
10
 *   XML second edition working draft
11
 *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12
 *
13
 * Okay this is a big file, the parser core is around 7000 lines, then it
14
 * is followed by the progressive parser top routines, then the various
15
 * high level APIs to call the parser and a few miscellaneous functions.
16
 * A number of helper functions and deprecated ones have been moved to
17
 * parserInternals.c to reduce this file size.
18
 * As much as possible the functions are associated with their relative
19
 * production in the XML specification. A few productions defining the
20
 * different ranges of character are actually implanted either in
21
 * parserInternals.h or parserInternals.c
22
 * The DOM tree build is realized from the default SAX callbacks in
23
 * the module SAX.c.
24
 * The routines doing the validation checks are in valid.c and called either
25
 * from the SAX callbacks or as standalone functions using a preparsed
26
 * document.
27
 *
28
 * See Copyright for the status of this software.
29
 *
30
 * daniel@veillard.com
31
 */
32
33
/* To avoid EBCDIC trouble when parsing on zOS */
34
#if defined(__MVS__)
35
#pragma convert("ISO8859-1")
36
#endif
37
38
#define IN_LIBXML
39
#include "libxml.h"
40
41
#if defined(_WIN32)
42
#define XML_DIR_SEP '\\'
43
#else
44
#define XML_DIR_SEP '/'
45
#endif
46
47
#include <stdlib.h>
48
#include <limits.h>
49
#include <string.h>
50
#include <stdarg.h>
51
#include <stddef.h>
52
#include <ctype.h>
53
#include <stdlib.h>
54
#include <libxml/xmlmemory.h>
55
#include <libxml/threads.h>
56
#include <libxml/globals.h>
57
#include <libxml/tree.h>
58
#include <libxml/parser.h>
59
#include <libxml/parserInternals.h>
60
#include <libxml/HTMLparser.h>
61
#include <libxml/valid.h>
62
#include <libxml/entities.h>
63
#include <libxml/xmlerror.h>
64
#include <libxml/encoding.h>
65
#include <libxml/xmlIO.h>
66
#include <libxml/uri.h>
67
#ifdef LIBXML_CATALOG_ENABLED
68
#include <libxml/catalog.h>
69
#endif
70
#ifdef LIBXML_SCHEMAS_ENABLED
71
#include <libxml/xmlschemastypes.h>
72
#include <libxml/relaxng.h>
73
#endif
74
#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
75
#include <libxml/xpath.h>
76
#endif
77
78
#include "private/buf.h"
79
#include "private/dict.h"
80
#include "private/enc.h"
81
#include "private/entities.h"
82
#include "private/error.h"
83
#include "private/globals.h"
84
#include "private/html.h"
85
#include "private/io.h"
86
#include "private/memory.h"
87
#include "private/parser.h"
88
#include "private/threads.h"
89
#include "private/xpath.h"
90
91
struct _xmlStartTag {
92
    const xmlChar *prefix;
93
    const xmlChar *URI;
94
    int line;
95
    int nsNr;
96
};
97
98
static xmlParserCtxtPtr
99
xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
100
        const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
101
        xmlParserCtxtPtr pctx);
102
103
static void xmlHaltParser(xmlParserCtxtPtr ctxt);
104
105
static int
106
xmlParseElementStart(xmlParserCtxtPtr ctxt);
107
108
static void
109
xmlParseElementEnd(xmlParserCtxtPtr ctxt);
110
111
/************************************************************************
112
 *                  *
113
 *  Arbitrary limits set in the parser. See XML_PARSE_HUGE    *
114
 *                  *
115
 ************************************************************************/
116
117
14.2k
#define XML_MAX_HUGE_LENGTH 1000000000
118
119
#define XML_PARSER_BIG_ENTITY 1000
120
#define XML_PARSER_LOT_ENTITY 5000
121
122
/*
123
 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
124
 *    replacement over the size in byte of the input indicates that you have
125
 *    and exponential behaviour. A value of 10 correspond to at least 3 entity
126
 *    replacement per byte of input.
127
 */
128
126
#define XML_PARSER_NON_LINEAR 10
129
130
17.0M
#define XML_ENT_FIXED_COST 50
131
132
/**
133
 * xmlParserMaxDepth:
134
 *
135
 * arbitrary depth limit for the XML documents that we allow to
136
 * process. This is not a limitation of the parser but a safety
137
 * boundary feature. It can be disabled with the XML_PARSE_HUGE
138
 * parser option.
139
 */
140
unsigned int xmlParserMaxDepth = 256;
141
142
143
144
#define SAX2 1
145
8.59M
#define XML_PARSER_BIG_BUFFER_SIZE 300
146
1.19G
#define XML_PARSER_BUFFER_SIZE 100
147
83.3k
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
148
149
/**
150
 * XML_PARSER_CHUNK_SIZE
151
 *
152
 * When calling GROW that's the minimal amount of data
153
 * the parser expected to have received. It is not a hard
154
 * limit but an optimization when reading strings like Names
155
 * It is not strictly needed as long as inputs available characters
156
 * are followed by 0, which should be provided by the I/O level
157
 */
158
1.40M
#define XML_PARSER_CHUNK_SIZE 100
159
160
/*
161
 * List of XML prefixed PI allowed by W3C specs
162
 */
163
164
static const char* const xmlW3CPIs[] = {
165
    "xml-stylesheet",
166
    "xml-model",
167
    NULL
168
};
169
170
171
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
172
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
173
                                              const xmlChar **str);
174
175
static xmlParserErrors
176
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
177
                xmlSAXHandlerPtr sax,
178
          void *user_data, int depth, const xmlChar *URL,
179
          const xmlChar *ID, xmlNodePtr *list);
180
181
static int
182
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
183
                          const char *encoding);
184
#ifdef LIBXML_LEGACY_ENABLED
185
static void
186
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
187
                      xmlNodePtr lastNode);
188
#endif /* LIBXML_LEGACY_ENABLED */
189
190
static xmlParserErrors
191
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
192
          const xmlChar *string, void *user_data, xmlNodePtr *lst);
193
194
static int
195
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
196
197
/************************************************************************
198
 *                  *
199
 *    Some factorized error routines        *
200
 *                  *
201
 ************************************************************************/
202
203
/**
204
 * xmlErrAttributeDup:
205
 * @ctxt:  an XML parser context
206
 * @prefix:  the attribute prefix
207
 * @localname:  the attribute localname
208
 *
209
 * Handle a redefinition of attribute error
210
 */
211
static void
212
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
213
                   const xmlChar * localname)
214
166
{
215
166
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
216
166
        (ctxt->instate == XML_PARSER_EOF))
217
0
  return;
218
166
    if (ctxt != NULL)
219
166
  ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
220
221
166
    if (prefix == NULL)
222
121
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
223
121
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
224
121
                        (const char *) localname, NULL, NULL, 0, 0,
225
121
                        "Attribute %s redefined\n", localname);
226
45
    else
227
45
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
228
45
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
229
45
                        (const char *) prefix, (const char *) localname,
230
45
                        NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
231
45
                        localname);
232
166
    if (ctxt != NULL) {
233
166
  ctxt->wellFormed = 0;
234
166
  if (ctxt->recovery == 0)
235
133
      ctxt->disableSAX = 1;
236
166
    }
237
166
}
238
239
/**
240
 * xmlFatalErr:
241
 * @ctxt:  an XML parser context
242
 * @error:  the error number
243
 * @extra:  extra information string
244
 *
245
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
246
 */
247
static void
248
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
249
198k
{
250
198k
    const char *errmsg;
251
252
198k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
253
198k
        (ctxt->instate == XML_PARSER_EOF))
254
839
  return;
255
197k
    switch (error) {
256
251
        case XML_ERR_INVALID_HEX_CHARREF:
257
251
            errmsg = "CharRef: invalid hexadecimal value";
258
251
            break;
259
1.79k
        case XML_ERR_INVALID_DEC_CHARREF:
260
1.79k
            errmsg = "CharRef: invalid decimal value";
261
1.79k
            break;
262
0
        case XML_ERR_INVALID_CHARREF:
263
0
            errmsg = "CharRef: invalid value";
264
0
            break;
265
11.5k
        case XML_ERR_INTERNAL_ERROR:
266
11.5k
            errmsg = "internal error";
267
11.5k
            break;
268
0
        case XML_ERR_PEREF_AT_EOF:
269
0
            errmsg = "PEReference at end of document";
270
0
            break;
271
0
        case XML_ERR_PEREF_IN_PROLOG:
272
0
            errmsg = "PEReference in prolog";
273
0
            break;
274
0
        case XML_ERR_PEREF_IN_EPILOG:
275
0
            errmsg = "PEReference in epilog";
276
0
            break;
277
0
        case XML_ERR_PEREF_NO_NAME:
278
0
            errmsg = "PEReference: no name";
279
0
            break;
280
155k
        case XML_ERR_PEREF_SEMICOL_MISSING:
281
155k
            errmsg = "PEReference: expecting ';'";
282
155k
            break;
283
82
        case XML_ERR_ENTITY_LOOP:
284
82
            errmsg = "Detected an entity reference loop";
285
82
            break;
286
0
        case XML_ERR_ENTITY_NOT_STARTED:
287
0
            errmsg = "EntityValue: \" or ' expected";
288
0
            break;
289
7
        case XML_ERR_ENTITY_PE_INTERNAL:
290
7
            errmsg = "PEReferences forbidden in internal subset";
291
7
            break;
292
302
        case XML_ERR_ENTITY_NOT_FINISHED:
293
302
            errmsg = "EntityValue: \" or ' expected";
294
302
            break;
295
625
        case XML_ERR_ATTRIBUTE_NOT_STARTED:
296
625
            errmsg = "AttValue: \" or ' expected";
297
625
            break;
298
1.61k
        case XML_ERR_LT_IN_ATTRIBUTE:
299
1.61k
            errmsg = "Unescaped '<' not allowed in attributes values";
300
1.61k
            break;
301
177
        case XML_ERR_LITERAL_NOT_STARTED:
302
177
            errmsg = "SystemLiteral \" or ' expected";
303
177
            break;
304
253
        case XML_ERR_LITERAL_NOT_FINISHED:
305
253
            errmsg = "Unfinished System or Public ID \" or ' expected";
306
253
            break;
307
565
        case XML_ERR_MISPLACED_CDATA_END:
308
565
            errmsg = "Sequence ']]>' not allowed in content";
309
565
            break;
310
143
        case XML_ERR_URI_REQUIRED:
311
143
            errmsg = "SYSTEM or PUBLIC, the URI is missing";
312
143
            break;
313
34
        case XML_ERR_PUBID_REQUIRED:
314
34
            errmsg = "PUBLIC, the Public Identifier is missing";
315
34
            break;
316
421
        case XML_ERR_HYPHEN_IN_COMMENT:
317
421
            errmsg = "Comment must not contain '--' (double-hyphen)";
318
421
            break;
319
155
        case XML_ERR_PI_NOT_STARTED:
320
155
            errmsg = "xmlParsePI : no target name";
321
155
            break;
322
30
        case XML_ERR_RESERVED_XML_NAME:
323
30
            errmsg = "Invalid PI name";
324
30
            break;
325
4
        case XML_ERR_NOTATION_NOT_STARTED:
326
4
            errmsg = "NOTATION: Name expected here";
327
4
            break;
328
18
        case XML_ERR_NOTATION_NOT_FINISHED:
329
18
            errmsg = "'>' required to close NOTATION declaration";
330
18
            break;
331
377
        case XML_ERR_VALUE_REQUIRED:
332
377
            errmsg = "Entity value required";
333
377
            break;
334
9
        case XML_ERR_URI_FRAGMENT:
335
9
            errmsg = "Fragment not allowed";
336
9
            break;
337
287
        case XML_ERR_ATTLIST_NOT_STARTED:
338
287
            errmsg = "'(' required to start ATTLIST enumeration";
339
287
            break;
340
24
        case XML_ERR_NMTOKEN_REQUIRED:
341
24
            errmsg = "NmToken expected in ATTLIST enumeration";
342
24
            break;
343
54
        case XML_ERR_ATTLIST_NOT_FINISHED:
344
54
            errmsg = "')' required to finish ATTLIST enumeration";
345
54
            break;
346
76
        case XML_ERR_MIXED_NOT_STARTED:
347
76
            errmsg = "MixedContentDecl : '|' or ')*' expected";
348
76
            break;
349
0
        case XML_ERR_PCDATA_REQUIRED:
350
0
            errmsg = "MixedContentDecl : '#PCDATA' expected";
351
0
            break;
352
323
        case XML_ERR_ELEMCONTENT_NOT_STARTED:
353
323
            errmsg = "ContentDecl : Name or '(' expected";
354
323
            break;
355
532
        case XML_ERR_ELEMCONTENT_NOT_FINISHED:
356
532
            errmsg = "ContentDecl : ',' '|' or ')' expected";
357
532
            break;
358
0
        case XML_ERR_PEREF_IN_INT_SUBSET:
359
0
            errmsg =
360
0
                "PEReference: forbidden within markup decl in internal subset";
361
0
            break;
362
2.88k
        case XML_ERR_GT_REQUIRED:
363
2.88k
            errmsg = "expected '>'";
364
2.88k
            break;
365
9
        case XML_ERR_CONDSEC_INVALID:
366
9
            errmsg = "XML conditional section '[' expected";
367
9
            break;
368
1.34k
        case XML_ERR_EXT_SUBSET_NOT_FINISHED:
369
1.34k
            errmsg = "Content error in the external subset";
370
1.34k
            break;
371
57
        case XML_ERR_CONDSEC_INVALID_KEYWORD:
372
57
            errmsg =
373
57
                "conditional section INCLUDE or IGNORE keyword expected";
374
57
            break;
375
33
        case XML_ERR_CONDSEC_NOT_FINISHED:
376
33
            errmsg = "XML conditional section not closed";
377
33
            break;
378
9
        case XML_ERR_XMLDECL_NOT_STARTED:
379
9
            errmsg = "Text declaration '<?xml' required";
380
9
            break;
381
4.02k
        case XML_ERR_XMLDECL_NOT_FINISHED:
382
4.02k
            errmsg = "parsing XML declaration: '?>' expected";
383
4.02k
            break;
384
0
        case XML_ERR_EXT_ENTITY_STANDALONE:
385
0
            errmsg = "external parsed entities cannot be standalone";
386
0
            break;
387
4.57k
        case XML_ERR_ENTITYREF_SEMICOL_MISSING:
388
4.57k
            errmsg = "EntityRef: expecting ';'";
389
4.57k
            break;
390
987
        case XML_ERR_DOCTYPE_NOT_FINISHED:
391
987
            errmsg = "DOCTYPE improperly terminated";
392
987
            break;
393
0
        case XML_ERR_LTSLASH_REQUIRED:
394
0
            errmsg = "EndTag: '</' not found";
395
0
            break;
396
265
        case XML_ERR_EQUAL_REQUIRED:
397
265
            errmsg = "expected '='";
398
265
            break;
399
961
        case XML_ERR_STRING_NOT_CLOSED:
400
961
            errmsg = "String not closed expecting \" or '";
401
961
            break;
402
251
        case XML_ERR_STRING_NOT_STARTED:
403
251
            errmsg = "String not started expecting ' or \"";
404
251
            break;
405
29
        case XML_ERR_ENCODING_NAME:
406
29
            errmsg = "Invalid XML encoding name";
407
29
            break;
408
39
        case XML_ERR_STANDALONE_VALUE:
409
39
            errmsg = "standalone accepts only 'yes' or 'no'";
410
39
            break;
411
965
        case XML_ERR_DOCUMENT_EMPTY:
412
965
            errmsg = "Document is empty";
413
965
            break;
414
3.51k
        case XML_ERR_DOCUMENT_END:
415
3.51k
            errmsg = "Extra content at the end of the document";
416
3.51k
            break;
417
160
        case XML_ERR_NOT_WELL_BALANCED:
418
160
            errmsg = "chunk is not well balanced";
419
160
            break;
420
0
        case XML_ERR_EXTRA_CONTENT:
421
0
            errmsg = "extra content at the end of well balanced chunk";
422
0
            break;
423
2.31k
        case XML_ERR_VERSION_MISSING:
424
2.31k
            errmsg = "Malformed declaration expecting version";
425
2.31k
            break;
426
0
        case XML_ERR_NAME_TOO_LONG:
427
0
            errmsg = "Name too long";
428
0
            break;
429
#if 0
430
        case:
431
            errmsg = "";
432
            break;
433
#endif
434
14
        default:
435
14
            errmsg = "Unregistered error message";
436
197k
    }
437
197k
    if (ctxt != NULL)
438
197k
  ctxt->errNo = error;
439
197k
    if (info == NULL) {
440
186k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
441
186k
                        XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
442
186k
                        errmsg);
443
186k
    } else {
444
11.5k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
445
11.5k
                        XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
446
11.5k
                        errmsg, info);
447
11.5k
    }
448
197k
    if (ctxt != NULL) {
449
197k
  ctxt->wellFormed = 0;
450
197k
  if (ctxt->recovery == 0)
451
181k
      ctxt->disableSAX = 1;
452
197k
    }
453
197k
}
454
455
/**
456
 * xmlFatalErrMsg:
457
 * @ctxt:  an XML parser context
458
 * @error:  the error number
459
 * @msg:  the error message
460
 *
461
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
462
 */
463
static void LIBXML_ATTR_FORMAT(3,0)
464
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
465
               const char *msg)
466
44.0k
{
467
44.0k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
468
44.0k
        (ctxt->instate == XML_PARSER_EOF))
469
0
  return;
470
44.0k
    if (ctxt != NULL)
471
44.0k
  ctxt->errNo = error;
472
44.0k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
473
44.0k
                    XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
474
44.0k
    if (ctxt != NULL) {
475
44.0k
  ctxt->wellFormed = 0;
476
44.0k
  if (ctxt->recovery == 0)
477
26.3k
      ctxt->disableSAX = 1;
478
44.0k
    }
479
44.0k
}
480
481
/**
482
 * xmlWarningMsg:
483
 * @ctxt:  an XML parser context
484
 * @error:  the error number
485
 * @msg:  the error message
486
 * @str1:  extra data
487
 * @str2:  extra data
488
 *
489
 * Handle a warning.
490
 */
491
static void LIBXML_ATTR_FORMAT(3,0)
492
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
493
              const char *msg, const xmlChar *str1, const xmlChar *str2)
494
322k
{
495
322k
    xmlStructuredErrorFunc schannel = NULL;
496
497
322k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
498
322k
        (ctxt->instate == XML_PARSER_EOF))
499
0
  return;
500
322k
    if ((ctxt != NULL) && (ctxt->sax != NULL) &&
501
322k
        (ctxt->sax->initialized == XML_SAX2_MAGIC))
502
322k
        schannel = ctxt->sax->serror;
503
322k
    if (ctxt != NULL) {
504
322k
        __xmlRaiseError(schannel,
505
322k
                    (ctxt->sax) ? ctxt->sax->warning : NULL,
506
322k
                    ctxt->userData,
507
322k
                    ctxt, NULL, XML_FROM_PARSER, error,
508
322k
                    XML_ERR_WARNING, NULL, 0,
509
322k
        (const char *) str1, (const char *) str2, NULL, 0, 0,
510
322k
        msg, (const char *) str1, (const char *) str2);
511
322k
    } else {
512
0
        __xmlRaiseError(schannel, NULL, NULL,
513
0
                    ctxt, NULL, XML_FROM_PARSER, error,
514
0
                    XML_ERR_WARNING, NULL, 0,
515
0
        (const char *) str1, (const char *) str2, NULL, 0, 0,
516
0
        msg, (const char *) str1, (const char *) str2);
517
0
    }
518
322k
}
519
520
/**
521
 * xmlValidityError:
522
 * @ctxt:  an XML parser context
523
 * @error:  the error number
524
 * @msg:  the error message
525
 * @str1:  extra data
526
 *
527
 * Handle a validity error.
528
 */
529
static void LIBXML_ATTR_FORMAT(3,0)
530
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
531
              const char *msg, const xmlChar *str1, const xmlChar *str2)
532
0
{
533
0
    xmlStructuredErrorFunc schannel = NULL;
534
535
0
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
536
0
        (ctxt->instate == XML_PARSER_EOF))
537
0
  return;
538
0
    if (ctxt != NULL) {
539
0
  ctxt->errNo = error;
540
0
  if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
541
0
      schannel = ctxt->sax->serror;
542
0
    }
543
0
    if (ctxt != NULL) {
544
0
        __xmlRaiseError(schannel,
545
0
                    ctxt->vctxt.error, ctxt->vctxt.userData,
546
0
                    ctxt, NULL, XML_FROM_DTD, error,
547
0
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
548
0
        (const char *) str2, NULL, 0, 0,
549
0
        msg, (const char *) str1, (const char *) str2);
550
0
  ctxt->valid = 0;
551
0
    } else {
552
0
        __xmlRaiseError(schannel, NULL, NULL,
553
0
                    ctxt, NULL, XML_FROM_DTD, error,
554
0
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
555
0
        (const char *) str2, NULL, 0, 0,
556
0
        msg, (const char *) str1, (const char *) str2);
557
0
    }
558
0
}
559
560
/**
561
 * xmlFatalErrMsgInt:
562
 * @ctxt:  an XML parser context
563
 * @error:  the error number
564
 * @msg:  the error message
565
 * @val:  an integer value
566
 *
567
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
568
 */
569
static void LIBXML_ATTR_FORMAT(3,0)
570
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
571
                  const char *msg, int val)
572
22.5k
{
573
22.5k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
574
22.5k
        (ctxt->instate == XML_PARSER_EOF))
575
0
  return;
576
22.5k
    if (ctxt != NULL)
577
22.5k
  ctxt->errNo = error;
578
22.5k
    __xmlRaiseError(NULL, NULL, NULL,
579
22.5k
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
580
22.5k
                    NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
581
22.5k
    if (ctxt != NULL) {
582
22.5k
  ctxt->wellFormed = 0;
583
22.5k
  if (ctxt->recovery == 0)
584
6.24k
      ctxt->disableSAX = 1;
585
22.5k
    }
586
22.5k
}
587
588
/**
589
 * xmlFatalErrMsgStrIntStr:
590
 * @ctxt:  an XML parser context
591
 * @error:  the error number
592
 * @msg:  the error message
593
 * @str1:  an string info
594
 * @val:  an integer value
595
 * @str2:  an string info
596
 *
597
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
598
 */
599
static void LIBXML_ATTR_FORMAT(3,0)
600
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
601
                  const char *msg, const xmlChar *str1, int val,
602
      const xmlChar *str2)
603
14.8k
{
604
14.8k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
605
14.8k
        (ctxt->instate == XML_PARSER_EOF))
606
0
  return;
607
14.8k
    if (ctxt != NULL)
608
14.8k
  ctxt->errNo = error;
609
14.8k
    __xmlRaiseError(NULL, NULL, NULL,
610
14.8k
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
611
14.8k
                    NULL, 0, (const char *) str1, (const char *) str2,
612
14.8k
        NULL, val, 0, msg, str1, val, str2);
613
14.8k
    if (ctxt != NULL) {
614
14.8k
  ctxt->wellFormed = 0;
615
14.8k
  if (ctxt->recovery == 0)
616
8.69k
      ctxt->disableSAX = 1;
617
14.8k
    }
618
14.8k
}
619
620
/**
621
 * xmlFatalErrMsgStr:
622
 * @ctxt:  an XML parser context
623
 * @error:  the error number
624
 * @msg:  the error message
625
 * @val:  a string value
626
 *
627
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
628
 */
629
static void LIBXML_ATTR_FORMAT(3,0)
630
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
631
                  const char *msg, const xmlChar * val)
632
700k
{
633
700k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
634
700k
        (ctxt->instate == XML_PARSER_EOF))
635
0
  return;
636
700k
    if (ctxt != NULL)
637
700k
  ctxt->errNo = error;
638
700k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
639
700k
                    XML_FROM_PARSER, error, XML_ERR_FATAL,
640
700k
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
641
700k
                    val);
642
700k
    if (ctxt != NULL) {
643
700k
  ctxt->wellFormed = 0;
644
700k
  if (ctxt->recovery == 0)
645
638k
      ctxt->disableSAX = 1;
646
700k
    }
647
700k
}
648
649
/**
650
 * xmlErrMsgStr:
651
 * @ctxt:  an XML parser context
652
 * @error:  the error number
653
 * @msg:  the error message
654
 * @val:  a string value
655
 *
656
 * Handle a non fatal parser error
657
 */
658
static void LIBXML_ATTR_FORMAT(3,0)
659
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
660
                  const char *msg, const xmlChar * val)
661
232k
{
662
232k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
663
232k
        (ctxt->instate == XML_PARSER_EOF))
664
0
  return;
665
232k
    if (ctxt != NULL)
666
232k
  ctxt->errNo = error;
667
232k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
668
232k
                    XML_FROM_PARSER, error, XML_ERR_ERROR,
669
232k
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
670
232k
                    val);
671
232k
}
672
673
/**
674
 * xmlNsErr:
675
 * @ctxt:  an XML parser context
676
 * @error:  the error number
677
 * @msg:  the message
678
 * @info1:  extra information string
679
 * @info2:  extra information string
680
 *
681
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
682
 */
683
static void LIBXML_ATTR_FORMAT(3,0)
684
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
685
         const char *msg,
686
         const xmlChar * info1, const xmlChar * info2,
687
         const xmlChar * info3)
688
3.84k
{
689
3.84k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
690
3.84k
        (ctxt->instate == XML_PARSER_EOF))
691
0
  return;
692
3.84k
    if (ctxt != NULL)
693
3.84k
  ctxt->errNo = error;
694
3.84k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
695
3.84k
                    XML_ERR_ERROR, NULL, 0, (const char *) info1,
696
3.84k
                    (const char *) info2, (const char *) info3, 0, 0, msg,
697
3.84k
                    info1, info2, info3);
698
3.84k
    if (ctxt != NULL)
699
3.84k
  ctxt->nsWellFormed = 0;
700
3.84k
}
701
702
/**
703
 * xmlNsWarn
704
 * @ctxt:  an XML parser context
705
 * @error:  the error number
706
 * @msg:  the message
707
 * @info1:  extra information string
708
 * @info2:  extra information string
709
 *
710
 * Handle a namespace warning error
711
 */
712
static void LIBXML_ATTR_FORMAT(3,0)
713
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
714
         const char *msg,
715
         const xmlChar * info1, const xmlChar * info2,
716
         const xmlChar * info3)
717
113
{
718
113
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
719
113
        (ctxt->instate == XML_PARSER_EOF))
720
0
  return;
721
113
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
722
113
                    XML_ERR_WARNING, NULL, 0, (const char *) info1,
723
113
                    (const char *) info2, (const char *) info3, 0, 0, msg,
724
113
                    info1, info2, info3);
725
113
}
726
727
static void
728
58.0M
xmlSaturatedAdd(unsigned long *dst, unsigned long val) {
729
58.0M
    if (val > ULONG_MAX - *dst)
730
0
        *dst = ULONG_MAX;
731
58.0M
    else
732
58.0M
        *dst += val;
733
58.0M
}
734
735
static void
736
17.3M
xmlSaturatedAddSizeT(unsigned long *dst, unsigned long val) {
737
17.3M
    if (val > ULONG_MAX - *dst)
738
0
        *dst = ULONG_MAX;
739
17.3M
    else
740
17.3M
        *dst += val;
741
17.3M
}
742
743
/**
744
 * xmlParserEntityCheck:
745
 * @ctxt:  parser context
746
 * @extra:  sum of unexpanded entity sizes
747
 *
748
 * Check for non-linear entity expansion behaviour.
749
 *
750
 * In some cases like xmlStringDecodeEntities, this function is called
751
 * for each, possibly nested entity and its unexpanded content length.
752
 *
753
 * In other cases like xmlParseReference, it's only called for each
754
 * top-level entity with its unexpanded content length plus the sum of
755
 * the unexpanded content lengths (plus fixed cost) of all nested
756
 * entities.
757
 *
758
 * Summing the unexpanded lengths also adds the length of the reference.
759
 * This is by design. Taking the length of the entity name into account
760
 * discourages attacks that try to waste CPU time with abusively long
761
 * entity names. See test/recurse/lol6.xml for example. Each call also
762
 * adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with
763
 * short entities.
764
 *
765
 * Returns 1 on error, 0 on success.
766
 */
767
static int
768
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long extra)
769
17.0M
{
770
17.0M
    unsigned long consumed;
771
17.0M
    xmlParserInputPtr input = ctxt->input;
772
17.0M
    xmlEntityPtr entity = input->entity;
773
774
    /*
775
     * Compute total consumed bytes so far, including input streams of
776
     * external entities.
777
     */
778
17.0M
    consumed = input->parentConsumed;
779
17.0M
    if ((entity == NULL) ||
780
17.0M
        ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
781
10.4M
         ((entity->flags & XML_ENT_PARSED) == 0))) {
782
6.60M
        xmlSaturatedAdd(&consumed, input->consumed);
783
6.60M
        xmlSaturatedAddSizeT(&consumed, input->cur - input->base);
784
6.60M
    }
785
17.0M
    xmlSaturatedAdd(&consumed, ctxt->sizeentities);
786
787
    /*
788
     * Add extra cost and some fixed cost.
789
     */
790
17.0M
    xmlSaturatedAdd(&ctxt->sizeentcopy, extra);
791
17.0M
    xmlSaturatedAdd(&ctxt->sizeentcopy, XML_ENT_FIXED_COST);
792
793
    /*
794
     * It's important to always use saturation arithmetic when tracking
795
     * entity sizes to make the size checks reliable. If "sizeentcopy"
796
     * overflows, we have to abort.
797
     */
798
17.0M
    if ((ctxt->sizeentcopy > XML_MAX_TEXT_LENGTH) &&
799
17.0M
        ((ctxt->sizeentcopy >= ULONG_MAX) ||
800
126
         (ctxt->sizeentcopy / XML_PARSER_NON_LINEAR > consumed))) {
801
126
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP,
802
126
                       "Maximum entity amplification factor exceeded");
803
126
        xmlHaltParser(ctxt);
804
126
        return(1);
805
126
    }
806
807
17.0M
    return(0);
808
17.0M
}
809
810
/************************************************************************
811
 *                  *
812
 *    Library wide options          *
813
 *                  *
814
 ************************************************************************/
815
816
/**
817
  * xmlHasFeature:
818
  * @feature: the feature to be examined
819
  *
820
  * Examines if the library has been compiled with a given feature.
821
  *
822
  * Returns a non-zero value if the feature exist, otherwise zero.
823
  * Returns zero (0) if the feature does not exist or an unknown
824
  * unknown feature is requested, non-zero otherwise.
825
  */
826
int
827
xmlHasFeature(xmlFeature feature)
828
0
{
829
0
    switch (feature) {
830
0
  case XML_WITH_THREAD:
831
0
#ifdef LIBXML_THREAD_ENABLED
832
0
      return(1);
833
#else
834
      return(0);
835
#endif
836
0
        case XML_WITH_TREE:
837
0
#ifdef LIBXML_TREE_ENABLED
838
0
            return(1);
839
#else
840
            return(0);
841
#endif
842
0
        case XML_WITH_OUTPUT:
843
0
#ifdef LIBXML_OUTPUT_ENABLED
844
0
            return(1);
845
#else
846
            return(0);
847
#endif
848
0
        case XML_WITH_PUSH:
849
0
#ifdef LIBXML_PUSH_ENABLED
850
0
            return(1);
851
#else
852
            return(0);
853
#endif
854
0
        case XML_WITH_READER:
855
0
#ifdef LIBXML_READER_ENABLED
856
0
            return(1);
857
#else
858
            return(0);
859
#endif
860
0
        case XML_WITH_PATTERN:
861
0
#ifdef LIBXML_PATTERN_ENABLED
862
0
            return(1);
863
#else
864
            return(0);
865
#endif
866
0
        case XML_WITH_WRITER:
867
0
#ifdef LIBXML_WRITER_ENABLED
868
0
            return(1);
869
#else
870
            return(0);
871
#endif
872
0
        case XML_WITH_SAX1:
873
0
#ifdef LIBXML_SAX1_ENABLED
874
0
            return(1);
875
#else
876
            return(0);
877
#endif
878
0
        case XML_WITH_FTP:
879
#ifdef LIBXML_FTP_ENABLED
880
            return(1);
881
#else
882
0
            return(0);
883
0
#endif
884
0
        case XML_WITH_HTTP:
885
#ifdef LIBXML_HTTP_ENABLED
886
            return(1);
887
#else
888
0
            return(0);
889
0
#endif
890
0
        case XML_WITH_VALID:
891
0
#ifdef LIBXML_VALID_ENABLED
892
0
            return(1);
893
#else
894
            return(0);
895
#endif
896
0
        case XML_WITH_HTML:
897
0
#ifdef LIBXML_HTML_ENABLED
898
0
            return(1);
899
#else
900
            return(0);
901
#endif
902
0
        case XML_WITH_LEGACY:
903
#ifdef LIBXML_LEGACY_ENABLED
904
            return(1);
905
#else
906
0
            return(0);
907
0
#endif
908
0
        case XML_WITH_C14N:
909
0
#ifdef LIBXML_C14N_ENABLED
910
0
            return(1);
911
#else
912
            return(0);
913
#endif
914
0
        case XML_WITH_CATALOG:
915
0
#ifdef LIBXML_CATALOG_ENABLED
916
0
            return(1);
917
#else
918
            return(0);
919
#endif
920
0
        case XML_WITH_XPATH:
921
0
#ifdef LIBXML_XPATH_ENABLED
922
0
            return(1);
923
#else
924
            return(0);
925
#endif
926
0
        case XML_WITH_XPTR:
927
0
#ifdef LIBXML_XPTR_ENABLED
928
0
            return(1);
929
#else
930
            return(0);
931
#endif
932
0
        case XML_WITH_XINCLUDE:
933
0
#ifdef LIBXML_XINCLUDE_ENABLED
934
0
            return(1);
935
#else
936
            return(0);
937
#endif
938
0
        case XML_WITH_ICONV:
939
0
#ifdef LIBXML_ICONV_ENABLED
940
0
            return(1);
941
#else
942
            return(0);
943
#endif
944
0
        case XML_WITH_ISO8859X:
945
0
#ifdef LIBXML_ISO8859X_ENABLED
946
0
            return(1);
947
#else
948
            return(0);
949
#endif
950
0
        case XML_WITH_UNICODE:
951
0
#ifdef LIBXML_UNICODE_ENABLED
952
0
            return(1);
953
#else
954
            return(0);
955
#endif
956
0
        case XML_WITH_REGEXP:
957
0
#ifdef LIBXML_REGEXP_ENABLED
958
0
            return(1);
959
#else
960
            return(0);
961
#endif
962
0
        case XML_WITH_AUTOMATA:
963
0
#ifdef LIBXML_AUTOMATA_ENABLED
964
0
            return(1);
965
#else
966
            return(0);
967
#endif
968
0
        case XML_WITH_EXPR:
969
#ifdef LIBXML_EXPR_ENABLED
970
            return(1);
971
#else
972
0
            return(0);
973
0
#endif
974
0
        case XML_WITH_SCHEMAS:
975
0
#ifdef LIBXML_SCHEMAS_ENABLED
976
0
            return(1);
977
#else
978
            return(0);
979
#endif
980
0
        case XML_WITH_SCHEMATRON:
981
0
#ifdef LIBXML_SCHEMATRON_ENABLED
982
0
            return(1);
983
#else
984
            return(0);
985
#endif
986
0
        case XML_WITH_MODULES:
987
0
#ifdef LIBXML_MODULES_ENABLED
988
0
            return(1);
989
#else
990
            return(0);
991
#endif
992
0
        case XML_WITH_DEBUG:
993
#ifdef LIBXML_DEBUG_ENABLED
994
            return(1);
995
#else
996
0
            return(0);
997
0
#endif
998
0
        case XML_WITH_DEBUG_MEM:
999
#ifdef DEBUG_MEMORY_LOCATION
1000
            return(1);
1001
#else
1002
0
            return(0);
1003
0
#endif
1004
0
        case XML_WITH_DEBUG_RUN:
1005
0
            return(0);
1006
0
        case XML_WITH_ZLIB:
1007
0
#ifdef LIBXML_ZLIB_ENABLED
1008
0
            return(1);
1009
#else
1010
            return(0);
1011
#endif
1012
0
        case XML_WITH_LZMA:
1013
0
#ifdef LIBXML_LZMA_ENABLED
1014
0
            return(1);
1015
#else
1016
            return(0);
1017
#endif
1018
0
        case XML_WITH_ICU:
1019
#ifdef LIBXML_ICU_ENABLED
1020
            return(1);
1021
#else
1022
0
            return(0);
1023
0
#endif
1024
0
        default:
1025
0
      break;
1026
0
     }
1027
0
     return(0);
1028
0
}
1029
1030
/************************************************************************
1031
 *                  *
1032
 *    SAX2 defaulted attributes handling      *
1033
 *                  *
1034
 ************************************************************************/
1035
1036
/**
1037
 * xmlDetectSAX2:
1038
 * @ctxt:  an XML parser context
1039
 *
1040
 * Do the SAX2 detection and specific initialization
1041
 */
1042
static void
1043
52.5k
xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1044
52.5k
    xmlSAXHandlerPtr sax;
1045
1046
    /* Avoid unused variable warning if features are disabled. */
1047
52.5k
    (void) sax;
1048
1049
52.5k
    if (ctxt == NULL) return;
1050
52.5k
    sax = ctxt->sax;
1051
52.5k
#ifdef LIBXML_SAX1_ENABLED
1052
52.5k
    if ((sax) &&  (sax->initialized == XML_SAX2_MAGIC) &&
1053
52.5k
        ((sax->startElementNs != NULL) ||
1054
46.5k
         (sax->endElementNs != NULL) ||
1055
46.5k
         ((sax->startElement == NULL) && (sax->endElement == NULL))))
1056
46.5k
        ctxt->sax2 = 1;
1057
#else
1058
    ctxt->sax2 = 1;
1059
#endif /* LIBXML_SAX1_ENABLED */
1060
1061
52.5k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1062
52.5k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1063
52.5k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1064
52.5k
    if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1065
52.5k
    (ctxt->str_xml_ns == NULL)) {
1066
0
        xmlErrMemory(ctxt, NULL);
1067
0
    }
1068
52.5k
}
1069
1070
typedef struct _xmlDefAttrs xmlDefAttrs;
1071
typedef xmlDefAttrs *xmlDefAttrsPtr;
1072
struct _xmlDefAttrs {
1073
    int nbAttrs;  /* number of defaulted attributes on that element */
1074
    int maxAttrs;       /* the size of the array */
1075
#if __STDC_VERSION__ >= 199901L
1076
    /* Using a C99 flexible array member avoids UBSan errors. */
1077
    const xmlChar *values[]; /* array of localname/prefix/values/external */
1078
#else
1079
    const xmlChar *values[5];
1080
#endif
1081
};
1082
1083
/**
1084
 * xmlAttrNormalizeSpace:
1085
 * @src: the source string
1086
 * @dst: the target string
1087
 *
1088
 * Normalize the space in non CDATA attribute values:
1089
 * If the attribute type is not CDATA, then the XML processor MUST further
1090
 * process the normalized attribute value by discarding any leading and
1091
 * trailing space (#x20) characters, and by replacing sequences of space
1092
 * (#x20) characters by a single space (#x20) character.
1093
 * Note that the size of dst need to be at least src, and if one doesn't need
1094
 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1095
 * passing src as dst is just fine.
1096
 *
1097
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1098
 *         is needed.
1099
 */
1100
static xmlChar *
1101
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1102
13.3k
{
1103
13.3k
    if ((src == NULL) || (dst == NULL))
1104
0
        return(NULL);
1105
1106
13.5k
    while (*src == 0x20) src++;
1107
91.7k
    while (*src != 0) {
1108
78.3k
  if (*src == 0x20) {
1109
7.86k
      while (*src == 0x20) src++;
1110
3.00k
      if (*src != 0)
1111
2.55k
    *dst++ = 0x20;
1112
75.3k
  } else {
1113
75.3k
      *dst++ = *src++;
1114
75.3k
  }
1115
78.3k
    }
1116
13.3k
    *dst = 0;
1117
13.3k
    if (dst == src)
1118
12.7k
       return(NULL);
1119
582
    return(dst);
1120
13.3k
}
1121
1122
/**
1123
 * xmlAttrNormalizeSpace2:
1124
 * @src: the source string
1125
 *
1126
 * Normalize the space in non CDATA attribute values, a slightly more complex
1127
 * front end to avoid allocation problems when running on attribute values
1128
 * coming from the input.
1129
 *
1130
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1131
 *         is needed.
1132
 */
1133
static const xmlChar *
1134
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1135
2.02k
{
1136
2.02k
    int i;
1137
2.02k
    int remove_head = 0;
1138
2.02k
    int need_realloc = 0;
1139
2.02k
    const xmlChar *cur;
1140
1141
2.02k
    if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1142
0
        return(NULL);
1143
2.02k
    i = *len;
1144
2.02k
    if (i <= 0)
1145
34
        return(NULL);
1146
1147
1.99k
    cur = src;
1148
2.51k
    while (*cur == 0x20) {
1149
518
        cur++;
1150
518
  remove_head++;
1151
518
    }
1152
19.1k
    while (*cur != 0) {
1153
17.5k
  if (*cur == 0x20) {
1154
1.68k
      cur++;
1155
1.68k
      if ((*cur == 0x20) || (*cur == 0)) {
1156
385
          need_realloc = 1;
1157
385
    break;
1158
385
      }
1159
1.68k
  } else
1160
15.8k
      cur++;
1161
17.5k
    }
1162
1.99k
    if (need_realloc) {
1163
385
        xmlChar *ret;
1164
1165
385
  ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1166
385
  if (ret == NULL) {
1167
0
      xmlErrMemory(ctxt, NULL);
1168
0
      return(NULL);
1169
0
  }
1170
385
  xmlAttrNormalizeSpace(ret, ret);
1171
385
  *len = strlen((const char *)ret);
1172
385
        return(ret);
1173
1.61k
    } else if (remove_head) {
1174
16
        *len -= remove_head;
1175
16
        memmove(src, src + remove_head, 1 + *len);
1176
16
  return(src);
1177
16
    }
1178
1.59k
    return(NULL);
1179
1.99k
}
1180
1181
/**
1182
 * xmlAddDefAttrs:
1183
 * @ctxt:  an XML parser context
1184
 * @fullname:  the element fullname
1185
 * @fullattr:  the attribute fullname
1186
 * @value:  the attribute value
1187
 *
1188
 * Add a defaulted attribute for an element
1189
 */
1190
static void
1191
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1192
               const xmlChar *fullname,
1193
               const xmlChar *fullattr,
1194
47.4k
               const xmlChar *value) {
1195
47.4k
    xmlDefAttrsPtr defaults;
1196
47.4k
    int len;
1197
47.4k
    const xmlChar *name;
1198
47.4k
    const xmlChar *prefix;
1199
1200
    /*
1201
     * Allows to detect attribute redefinitions
1202
     */
1203
47.4k
    if (ctxt->attsSpecial != NULL) {
1204
46.5k
        if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1205
46
      return;
1206
46.5k
    }
1207
1208
47.3k
    if (ctxt->attsDefault == NULL) {
1209
2.85k
        ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1210
2.85k
  if (ctxt->attsDefault == NULL)
1211
0
      goto mem_error;
1212
2.85k
    }
1213
1214
    /*
1215
     * split the element name into prefix:localname , the string found
1216
     * are within the DTD and then not associated to namespace names.
1217
     */
1218
47.3k
    name = xmlSplitQName3(fullname, &len);
1219
47.3k
    if (name == NULL) {
1220
47.0k
        name = xmlDictLookup(ctxt->dict, fullname, -1);
1221
47.0k
  prefix = NULL;
1222
47.0k
    } else {
1223
314
        name = xmlDictLookup(ctxt->dict, name, -1);
1224
314
  prefix = xmlDictLookup(ctxt->dict, fullname, len);
1225
314
    }
1226
1227
    /*
1228
     * make sure there is some storage
1229
     */
1230
47.3k
    defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1231
47.3k
    if (defaults == NULL) {
1232
20.7k
        defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1233
20.7k
                     (4 * 5) * sizeof(const xmlChar *));
1234
20.7k
  if (defaults == NULL)
1235
0
      goto mem_error;
1236
20.7k
  defaults->nbAttrs = 0;
1237
20.7k
  defaults->maxAttrs = 4;
1238
20.7k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1239
20.7k
                          defaults, NULL) < 0) {
1240
0
      xmlFree(defaults);
1241
0
      goto mem_error;
1242
0
  }
1243
26.5k
    } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1244
110
        xmlDefAttrsPtr temp;
1245
1246
110
        temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1247
110
           (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1248
110
  if (temp == NULL)
1249
0
      goto mem_error;
1250
110
  defaults = temp;
1251
110
  defaults->maxAttrs *= 2;
1252
110
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1253
110
                          defaults, NULL) < 0) {
1254
0
      xmlFree(defaults);
1255
0
      goto mem_error;
1256
0
  }
1257
110
    }
1258
1259
    /*
1260
     * Split the element name into prefix:localname , the string found
1261
     * are within the DTD and hen not associated to namespace names.
1262
     */
1263
47.3k
    name = xmlSplitQName3(fullattr, &len);
1264
47.3k
    if (name == NULL) {
1265
39.7k
        name = xmlDictLookup(ctxt->dict, fullattr, -1);
1266
39.7k
  prefix = NULL;
1267
39.7k
    } else {
1268
7.66k
        name = xmlDictLookup(ctxt->dict, name, -1);
1269
7.66k
  prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1270
7.66k
    }
1271
1272
47.3k
    defaults->values[5 * defaults->nbAttrs] = name;
1273
47.3k
    defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1274
    /* intern the string and precompute the end */
1275
47.3k
    len = xmlStrlen(value);
1276
47.3k
    value = xmlDictLookup(ctxt->dict, value, len);
1277
47.3k
    if (value == NULL)
1278
0
        goto mem_error;
1279
47.3k
    defaults->values[5 * defaults->nbAttrs + 2] = value;
1280
47.3k
    defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1281
47.3k
    if (ctxt->external)
1282
39.5k
        defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1283
7.86k
    else
1284
7.86k
        defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1285
47.3k
    defaults->nbAttrs++;
1286
1287
47.3k
    return;
1288
1289
0
mem_error:
1290
0
    xmlErrMemory(ctxt, NULL);
1291
0
    return;
1292
47.3k
}
1293
1294
/**
1295
 * xmlAddSpecialAttr:
1296
 * @ctxt:  an XML parser context
1297
 * @fullname:  the element fullname
1298
 * @fullattr:  the attribute fullname
1299
 * @type:  the attribute type
1300
 *
1301
 * Register this attribute type
1302
 */
1303
static void
1304
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1305
      const xmlChar *fullname,
1306
      const xmlChar *fullattr,
1307
      int type)
1308
552k
{
1309
552k
    if (ctxt->attsSpecial == NULL) {
1310
4.81k
        ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1311
4.81k
  if (ctxt->attsSpecial == NULL)
1312
0
      goto mem_error;
1313
4.81k
    }
1314
1315
552k
    if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1316
58
        return;
1317
1318
552k
    xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1319
552k
                     (void *) (ptrdiff_t) type);
1320
552k
    return;
1321
1322
0
mem_error:
1323
0
    xmlErrMemory(ctxt, NULL);
1324
0
    return;
1325
552k
}
1326
1327
/**
1328
 * xmlCleanSpecialAttrCallback:
1329
 *
1330
 * Removes CDATA attributes from the special attribute table
1331
 */
1332
static void
1333
xmlCleanSpecialAttrCallback(void *payload, void *data,
1334
                            const xmlChar *fullname, const xmlChar *fullattr,
1335
462k
                            const xmlChar *unused ATTRIBUTE_UNUSED) {
1336
462k
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1337
1338
462k
    if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1339
135k
        xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1340
135k
    }
1341
462k
}
1342
1343
/**
1344
 * xmlCleanSpecialAttr:
1345
 * @ctxt:  an XML parser context
1346
 *
1347
 * Trim the list of attributes defined to remove all those of type
1348
 * CDATA as they are not special. This call should be done when finishing
1349
 * to parse the DTD and before starting to parse the document root.
1350
 */
1351
static void
1352
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1353
9.64k
{
1354
9.64k
    if (ctxt->attsSpecial == NULL)
1355
6.03k
        return;
1356
1357
3.60k
    xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1358
1359
3.60k
    if (xmlHashSize(ctxt->attsSpecial) == 0) {
1360
406
        xmlHashFree(ctxt->attsSpecial, NULL);
1361
406
        ctxt->attsSpecial = NULL;
1362
406
    }
1363
3.60k
    return;
1364
9.64k
}
1365
1366
/**
1367
 * xmlCheckLanguageID:
1368
 * @lang:  pointer to the string value
1369
 *
1370
 * Checks that the value conforms to the LanguageID production:
1371
 *
1372
 * NOTE: this is somewhat deprecated, those productions were removed from
1373
 *       the XML Second edition.
1374
 *
1375
 * [33] LanguageID ::= Langcode ('-' Subcode)*
1376
 * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1377
 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1378
 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1379
 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1380
 * [38] Subcode ::= ([a-z] | [A-Z])+
1381
 *
1382
 * The current REC reference the successors of RFC 1766, currently 5646
1383
 *
1384
 * http://www.rfc-editor.org/rfc/rfc5646.txt
1385
 * langtag       = language
1386
 *                 ["-" script]
1387
 *                 ["-" region]
1388
 *                 *("-" variant)
1389
 *                 *("-" extension)
1390
 *                 ["-" privateuse]
1391
 * language      = 2*3ALPHA            ; shortest ISO 639 code
1392
 *                 ["-" extlang]       ; sometimes followed by
1393
 *                                     ; extended language subtags
1394
 *               / 4ALPHA              ; or reserved for future use
1395
 *               / 5*8ALPHA            ; or registered language subtag
1396
 *
1397
 * extlang       = 3ALPHA              ; selected ISO 639 codes
1398
 *                 *2("-" 3ALPHA)      ; permanently reserved
1399
 *
1400
 * script        = 4ALPHA              ; ISO 15924 code
1401
 *
1402
 * region        = 2ALPHA              ; ISO 3166-1 code
1403
 *               / 3DIGIT              ; UN M.49 code
1404
 *
1405
 * variant       = 5*8alphanum         ; registered variants
1406
 *               / (DIGIT 3alphanum)
1407
 *
1408
 * extension     = singleton 1*("-" (2*8alphanum))
1409
 *
1410
 *                                     ; Single alphanumerics
1411
 *                                     ; "x" reserved for private use
1412
 * singleton     = DIGIT               ; 0 - 9
1413
 *               / %x41-57             ; A - W
1414
 *               / %x59-5A             ; Y - Z
1415
 *               / %x61-77             ; a - w
1416
 *               / %x79-7A             ; y - z
1417
 *
1418
 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1419
 * The parser below doesn't try to cope with extension or privateuse
1420
 * that could be added but that's not interoperable anyway
1421
 *
1422
 * Returns 1 if correct 0 otherwise
1423
 **/
1424
int
1425
xmlCheckLanguageID(const xmlChar * lang)
1426
3
{
1427
3
    const xmlChar *cur = lang, *nxt;
1428
1429
3
    if (cur == NULL)
1430
0
        return (0);
1431
3
    if (((cur[0] == 'i') && (cur[1] == '-')) ||
1432
3
        ((cur[0] == 'I') && (cur[1] == '-')) ||
1433
3
        ((cur[0] == 'x') && (cur[1] == '-')) ||
1434
3
        ((cur[0] == 'X') && (cur[1] == '-'))) {
1435
        /*
1436
         * Still allow IANA code and user code which were coming
1437
         * from the previous version of the XML-1.0 specification
1438
         * it's deprecated but we should not fail
1439
         */
1440
0
        cur += 2;
1441
0
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1442
0
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
1443
0
            cur++;
1444
0
        return(cur[0] == 0);
1445
0
    }
1446
3
    nxt = cur;
1447
9
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1448
9
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1449
6
           nxt++;
1450
3
    if (nxt - cur >= 4) {
1451
        /*
1452
         * Reserved
1453
         */
1454
0
        if ((nxt - cur > 8) || (nxt[0] != 0))
1455
0
            return(0);
1456
0
        return(1);
1457
0
    }
1458
3
    if (nxt - cur < 2)
1459
0
        return(0);
1460
    /* we got an ISO 639 code */
1461
3
    if (nxt[0] == 0)
1462
3
        return(1);
1463
0
    if (nxt[0] != '-')
1464
0
        return(0);
1465
1466
0
    nxt++;
1467
0
    cur = nxt;
1468
    /* now we can have extlang or script or region or variant */
1469
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1470
0
        goto region_m49;
1471
1472
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1473
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1474
0
           nxt++;
1475
0
    if (nxt - cur == 4)
1476
0
        goto script;
1477
0
    if (nxt - cur == 2)
1478
0
        goto region;
1479
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1480
0
        goto variant;
1481
0
    if (nxt - cur != 3)
1482
0
        return(0);
1483
    /* we parsed an extlang */
1484
0
    if (nxt[0] == 0)
1485
0
        return(1);
1486
0
    if (nxt[0] != '-')
1487
0
        return(0);
1488
1489
0
    nxt++;
1490
0
    cur = nxt;
1491
    /* now we can have script or region or variant */
1492
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1493
0
        goto region_m49;
1494
1495
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1496
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1497
0
           nxt++;
1498
0
    if (nxt - cur == 2)
1499
0
        goto region;
1500
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1501
0
        goto variant;
1502
0
    if (nxt - cur != 4)
1503
0
        return(0);
1504
    /* we parsed a script */
1505
0
script:
1506
0
    if (nxt[0] == 0)
1507
0
        return(1);
1508
0
    if (nxt[0] != '-')
1509
0
        return(0);
1510
1511
0
    nxt++;
1512
0
    cur = nxt;
1513
    /* now we can have region or variant */
1514
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1515
0
        goto region_m49;
1516
1517
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1518
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1519
0
           nxt++;
1520
1521
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1522
0
        goto variant;
1523
0
    if (nxt - cur != 2)
1524
0
        return(0);
1525
    /* we parsed a region */
1526
0
region:
1527
0
    if (nxt[0] == 0)
1528
0
        return(1);
1529
0
    if (nxt[0] != '-')
1530
0
        return(0);
1531
1532
0
    nxt++;
1533
0
    cur = nxt;
1534
    /* now we can just have a variant */
1535
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1536
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1537
0
           nxt++;
1538
1539
0
    if ((nxt - cur < 5) || (nxt - cur > 8))
1540
0
        return(0);
1541
1542
    /* we parsed a variant */
1543
0
variant:
1544
0
    if (nxt[0] == 0)
1545
0
        return(1);
1546
0
    if (nxt[0] != '-')
1547
0
        return(0);
1548
    /* extensions and private use subtags not checked */
1549
0
    return (1);
1550
1551
0
region_m49:
1552
0
    if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1553
0
        ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1554
0
        nxt += 3;
1555
0
        goto region;
1556
0
    }
1557
0
    return(0);
1558
0
}
1559
1560
/************************************************************************
1561
 *                  *
1562
 *    Parser stacks related functions and macros    *
1563
 *                  *
1564
 ************************************************************************/
1565
1566
static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1567
                                            const xmlChar ** str);
1568
1569
#ifdef SAX2
1570
/**
1571
 * nsPush:
1572
 * @ctxt:  an XML parser context
1573
 * @prefix:  the namespace prefix or NULL
1574
 * @URL:  the namespace name
1575
 *
1576
 * Pushes a new parser namespace on top of the ns stack
1577
 *
1578
 * Returns -1 in case of error, -2 if the namespace should be discarded
1579
 *     and the index in the stack otherwise.
1580
 */
1581
static int
1582
nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1583
4.21k
{
1584
4.21k
    if (ctxt->options & XML_PARSE_NSCLEAN) {
1585
151
        int i;
1586
241
  for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1587
92
      if (ctxt->nsTab[i] == prefix) {
1588
    /* in scope */
1589
2
          if (ctxt->nsTab[i + 1] == URL)
1590
0
        return(-2);
1591
    /* out of scope keep it */
1592
2
    break;
1593
2
      }
1594
92
  }
1595
151
    }
1596
4.21k
    if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1597
2.73k
  ctxt->nsMax = 10;
1598
2.73k
  ctxt->nsNr = 0;
1599
2.73k
  ctxt->nsTab = (const xmlChar **)
1600
2.73k
                xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1601
2.73k
  if (ctxt->nsTab == NULL) {
1602
0
      xmlErrMemory(ctxt, NULL);
1603
0
      ctxt->nsMax = 0;
1604
0
            return (-1);
1605
0
  }
1606
2.73k
    } else if (ctxt->nsNr >= ctxt->nsMax) {
1607
3
        const xmlChar ** tmp;
1608
3
        ctxt->nsMax *= 2;
1609
3
        tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1610
3
            ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1611
3
        if (tmp == NULL) {
1612
0
            xmlErrMemory(ctxt, NULL);
1613
0
      ctxt->nsMax /= 2;
1614
0
            return (-1);
1615
0
        }
1616
3
  ctxt->nsTab = tmp;
1617
3
    }
1618
4.21k
    ctxt->nsTab[ctxt->nsNr++] = prefix;
1619
4.21k
    ctxt->nsTab[ctxt->nsNr++] = URL;
1620
4.21k
    return (ctxt->nsNr);
1621
4.21k
}
1622
/**
1623
 * nsPop:
1624
 * @ctxt: an XML parser context
1625
 * @nr:  the number to pop
1626
 *
1627
 * Pops the top @nr parser prefix/namespace from the ns stack
1628
 *
1629
 * Returns the number of namespaces removed
1630
 */
1631
static int
1632
nsPop(xmlParserCtxtPtr ctxt, int nr)
1633
1.82k
{
1634
1.82k
    int i;
1635
1636
1.82k
    if (ctxt->nsTab == NULL) return(0);
1637
1.82k
    if (ctxt->nsNr < nr) {
1638
0
        xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1639
0
        nr = ctxt->nsNr;
1640
0
    }
1641
1.82k
    if (ctxt->nsNr <= 0)
1642
0
        return (0);
1643
1644
6.64k
    for (i = 0;i < nr;i++) {
1645
4.82k
         ctxt->nsNr--;
1646
4.82k
   ctxt->nsTab[ctxt->nsNr] = NULL;
1647
4.82k
    }
1648
1.82k
    return(nr);
1649
1.82k
}
1650
#endif
1651
1652
static int
1653
5.60k
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1654
5.60k
    const xmlChar **atts;
1655
5.60k
    int *attallocs;
1656
5.60k
    int maxatts;
1657
1658
5.60k
    if (nr + 5 > ctxt->maxatts) {
1659
5.60k
  maxatts = ctxt->maxatts == 0 ? 55 : (nr + 5) * 2;
1660
5.60k
  atts = (const xmlChar **) xmlMalloc(
1661
5.60k
             maxatts * sizeof(const xmlChar *));
1662
5.60k
  if (atts == NULL) goto mem_error;
1663
5.60k
  attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1664
5.60k
                               (maxatts / 5) * sizeof(int));
1665
5.60k
  if (attallocs == NULL) {
1666
0
            xmlFree(atts);
1667
0
            goto mem_error;
1668
0
        }
1669
5.60k
        if (ctxt->maxatts > 0)
1670
8
            memcpy(atts, ctxt->atts, ctxt->maxatts * sizeof(const xmlChar *));
1671
5.60k
        xmlFree(ctxt->atts);
1672
5.60k
  ctxt->atts = atts;
1673
5.60k
  ctxt->attallocs = attallocs;
1674
5.60k
  ctxt->maxatts = maxatts;
1675
5.60k
    }
1676
5.60k
    return(ctxt->maxatts);
1677
0
mem_error:
1678
0
    xmlErrMemory(ctxt, NULL);
1679
0
    return(-1);
1680
5.60k
}
1681
1682
/**
1683
 * inputPush:
1684
 * @ctxt:  an XML parser context
1685
 * @value:  the parser input
1686
 *
1687
 * Pushes a new parser input on top of the input stack
1688
 *
1689
 * Returns -1 in case of error, the index in the stack otherwise
1690
 */
1691
int
1692
inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1693
10.4M
{
1694
10.4M
    if ((ctxt == NULL) || (value == NULL))
1695
0
        return(-1);
1696
10.4M
    if (ctxt->inputNr >= ctxt->inputMax) {
1697
270
        size_t newSize = ctxt->inputMax * 2;
1698
270
        xmlParserInputPtr *tmp;
1699
1700
270
        tmp = (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1701
270
                                               newSize * sizeof(*tmp));
1702
270
        if (tmp == NULL) {
1703
0
            xmlErrMemory(ctxt, NULL);
1704
0
            return (-1);
1705
0
        }
1706
270
        ctxt->inputTab = tmp;
1707
270
        ctxt->inputMax = newSize;
1708
270
    }
1709
10.4M
    ctxt->inputTab[ctxt->inputNr] = value;
1710
10.4M
    ctxt->input = value;
1711
10.4M
    return (ctxt->inputNr++);
1712
10.4M
}
1713
/**
1714
 * inputPop:
1715
 * @ctxt: an XML parser context
1716
 *
1717
 * Pops the top parser input from the input stack
1718
 *
1719
 * Returns the input just removed
1720
 */
1721
xmlParserInputPtr
1722
inputPop(xmlParserCtxtPtr ctxt)
1723
10.5M
{
1724
10.5M
    xmlParserInputPtr ret;
1725
1726
10.5M
    if (ctxt == NULL)
1727
0
        return(NULL);
1728
10.5M
    if (ctxt->inputNr <= 0)
1729
99.6k
        return (NULL);
1730
10.4M
    ctxt->inputNr--;
1731
10.4M
    if (ctxt->inputNr > 0)
1732
10.4M
        ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1733
41.2k
    else
1734
41.2k
        ctxt->input = NULL;
1735
10.4M
    ret = ctxt->inputTab[ctxt->inputNr];
1736
10.4M
    ctxt->inputTab[ctxt->inputNr] = NULL;
1737
10.4M
    return (ret);
1738
10.5M
}
1739
/**
1740
 * nodePush:
1741
 * @ctxt:  an XML parser context
1742
 * @value:  the element node
1743
 *
1744
 * Pushes a new element node on top of the node stack
1745
 *
1746
 * Returns -1 in case of error, the index in the stack otherwise
1747
 */
1748
int
1749
nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1750
1.10M
{
1751
1.10M
    if (ctxt == NULL) return(0);
1752
1.10M
    if (ctxt->nodeNr >= ctxt->nodeMax) {
1753
756
        xmlNodePtr *tmp;
1754
1755
756
  tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1756
756
                                      ctxt->nodeMax * 2 *
1757
756
                                      sizeof(ctxt->nodeTab[0]));
1758
756
        if (tmp == NULL) {
1759
0
            xmlErrMemory(ctxt, NULL);
1760
0
            return (-1);
1761
0
        }
1762
756
        ctxt->nodeTab = tmp;
1763
756
  ctxt->nodeMax *= 2;
1764
756
    }
1765
1.10M
    if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1766
1.10M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1767
0
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1768
0
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1769
0
        xmlParserMaxDepth);
1770
0
  xmlHaltParser(ctxt);
1771
0
  return(-1);
1772
0
    }
1773
1.10M
    ctxt->nodeTab[ctxt->nodeNr] = value;
1774
1.10M
    ctxt->node = value;
1775
1.10M
    return (ctxt->nodeNr++);
1776
1.10M
}
1777
1778
/**
1779
 * nodePop:
1780
 * @ctxt: an XML parser context
1781
 *
1782
 * Pops the top element node from the node stack
1783
 *
1784
 * Returns the node just removed
1785
 */
1786
xmlNodePtr
1787
nodePop(xmlParserCtxtPtr ctxt)
1788
1.08M
{
1789
1.08M
    xmlNodePtr ret;
1790
1791
1.08M
    if (ctxt == NULL) return(NULL);
1792
1.08M
    if (ctxt->nodeNr <= 0)
1793
3.64k
        return (NULL);
1794
1.07M
    ctxt->nodeNr--;
1795
1.07M
    if (ctxt->nodeNr > 0)
1796
1.06M
        ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1797
8.79k
    else
1798
8.79k
        ctxt->node = NULL;
1799
1.07M
    ret = ctxt->nodeTab[ctxt->nodeNr];
1800
1.07M
    ctxt->nodeTab[ctxt->nodeNr] = NULL;
1801
1.07M
    return (ret);
1802
1.08M
}
1803
1804
/**
1805
 * nameNsPush:
1806
 * @ctxt:  an XML parser context
1807
 * @value:  the element name
1808
 * @prefix:  the element prefix
1809
 * @URI:  the element namespace name
1810
 * @line:  the current line number for error messages
1811
 * @nsNr:  the number of namespaces pushed on the namespace table
1812
 *
1813
 * Pushes a new element name/prefix/URL on top of the name stack
1814
 *
1815
 * Returns -1 in case of error, the index in the stack otherwise
1816
 */
1817
static int
1818
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1819
           const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
1820
1.34M
{
1821
1.34M
    xmlStartTag *tag;
1822
1823
1.34M
    if (ctxt->nameNr >= ctxt->nameMax) {
1824
973
        const xmlChar * *tmp;
1825
973
        xmlStartTag *tmp2;
1826
973
        ctxt->nameMax *= 2;
1827
973
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1828
973
                                    ctxt->nameMax *
1829
973
                                    sizeof(ctxt->nameTab[0]));
1830
973
        if (tmp == NULL) {
1831
0
      ctxt->nameMax /= 2;
1832
0
      goto mem_error;
1833
0
        }
1834
973
  ctxt->nameTab = tmp;
1835
973
        tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
1836
973
                                    ctxt->nameMax *
1837
973
                                    sizeof(ctxt->pushTab[0]));
1838
973
        if (tmp2 == NULL) {
1839
0
      ctxt->nameMax /= 2;
1840
0
      goto mem_error;
1841
0
        }
1842
973
  ctxt->pushTab = tmp2;
1843
1.34M
    } else if (ctxt->pushTab == NULL) {
1844
16.4k
        ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
1845
16.4k
                                            sizeof(ctxt->pushTab[0]));
1846
16.4k
        if (ctxt->pushTab == NULL)
1847
0
            goto mem_error;
1848
16.4k
    }
1849
1.34M
    ctxt->nameTab[ctxt->nameNr] = value;
1850
1.34M
    ctxt->name = value;
1851
1.34M
    tag = &ctxt->pushTab[ctxt->nameNr];
1852
1.34M
    tag->prefix = prefix;
1853
1.34M
    tag->URI = URI;
1854
1.34M
    tag->line = line;
1855
1.34M
    tag->nsNr = nsNr;
1856
1.34M
    return (ctxt->nameNr++);
1857
0
mem_error:
1858
0
    xmlErrMemory(ctxt, NULL);
1859
0
    return (-1);
1860
1.34M
}
1861
#ifdef LIBXML_PUSH_ENABLED
1862
/**
1863
 * nameNsPop:
1864
 * @ctxt: an XML parser context
1865
 *
1866
 * Pops the top element/prefix/URI name from the name stack
1867
 *
1868
 * Returns the name just removed
1869
 */
1870
static const xmlChar *
1871
nameNsPop(xmlParserCtxtPtr ctxt)
1872
541k
{
1873
541k
    const xmlChar *ret;
1874
1875
541k
    if (ctxt->nameNr <= 0)
1876
0
        return (NULL);
1877
541k
    ctxt->nameNr--;
1878
541k
    if (ctxt->nameNr > 0)
1879
539k
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1880
1.71k
    else
1881
1.71k
        ctxt->name = NULL;
1882
541k
    ret = ctxt->nameTab[ctxt->nameNr];
1883
541k
    ctxt->nameTab[ctxt->nameNr] = NULL;
1884
541k
    return (ret);
1885
541k
}
1886
#endif /* LIBXML_PUSH_ENABLED */
1887
1888
/**
1889
 * namePush:
1890
 * @ctxt:  an XML parser context
1891
 * @value:  the element name
1892
 *
1893
 * Pushes a new element name on top of the name stack
1894
 *
1895
 * Returns -1 in case of error, the index in the stack otherwise
1896
 */
1897
int
1898
namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1899
0
{
1900
0
    if (ctxt == NULL) return (-1);
1901
1902
0
    if (ctxt->nameNr >= ctxt->nameMax) {
1903
0
        const xmlChar * *tmp;
1904
0
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1905
0
                                    ctxt->nameMax * 2 *
1906
0
                                    sizeof(ctxt->nameTab[0]));
1907
0
        if (tmp == NULL) {
1908
0
      goto mem_error;
1909
0
        }
1910
0
  ctxt->nameTab = tmp;
1911
0
        ctxt->nameMax *= 2;
1912
0
    }
1913
0
    ctxt->nameTab[ctxt->nameNr] = value;
1914
0
    ctxt->name = value;
1915
0
    return (ctxt->nameNr++);
1916
0
mem_error:
1917
0
    xmlErrMemory(ctxt, NULL);
1918
0
    return (-1);
1919
0
}
1920
/**
1921
 * namePop:
1922
 * @ctxt: an XML parser context
1923
 *
1924
 * Pops the top element name from the name stack
1925
 *
1926
 * Returns the name just removed
1927
 */
1928
const xmlChar *
1929
namePop(xmlParserCtxtPtr ctxt)
1930
786k
{
1931
786k
    const xmlChar *ret;
1932
1933
786k
    if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1934
0
        return (NULL);
1935
786k
    ctxt->nameNr--;
1936
786k
    if (ctxt->nameNr > 0)
1937
778k
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1938
7.70k
    else
1939
7.70k
        ctxt->name = NULL;
1940
786k
    ret = ctxt->nameTab[ctxt->nameNr];
1941
786k
    ctxt->nameTab[ctxt->nameNr] = NULL;
1942
786k
    return (ret);
1943
786k
}
1944
1945
1.42M
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1946
1.42M
    if (ctxt->spaceNr >= ctxt->spaceMax) {
1947
1.20k
        int *tmp;
1948
1949
1.20k
  ctxt->spaceMax *= 2;
1950
1.20k
        tmp = (int *) xmlRealloc(ctxt->spaceTab,
1951
1.20k
                           ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1952
1.20k
        if (tmp == NULL) {
1953
0
      xmlErrMemory(ctxt, NULL);
1954
0
      ctxt->spaceMax /=2;
1955
0
      return(-1);
1956
0
  }
1957
1.20k
  ctxt->spaceTab = tmp;
1958
1.20k
    }
1959
1.42M
    ctxt->spaceTab[ctxt->spaceNr] = val;
1960
1.42M
    ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1961
1.42M
    return(ctxt->spaceNr++);
1962
1.42M
}
1963
1964
1.41M
static int spacePop(xmlParserCtxtPtr ctxt) {
1965
1.41M
    int ret;
1966
1.41M
    if (ctxt->spaceNr <= 0) return(0);
1967
1.41M
    ctxt->spaceNr--;
1968
1.41M
    if (ctxt->spaceNr > 0)
1969
1.41M
  ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1970
868
    else
1971
868
        ctxt->space = &ctxt->spaceTab[0];
1972
1.41M
    ret = ctxt->spaceTab[ctxt->spaceNr];
1973
1.41M
    ctxt->spaceTab[ctxt->spaceNr] = -1;
1974
1.41M
    return(ret);
1975
1.41M
}
1976
1977
/*
1978
 * Macros for accessing the content. Those should be used only by the parser,
1979
 * and not exported.
1980
 *
1981
 * Dirty macros, i.e. one often need to make assumption on the context to
1982
 * use them
1983
 *
1984
 *   CUR_PTR return the current pointer to the xmlChar to be parsed.
1985
 *           To be used with extreme caution since operations consuming
1986
 *           characters may move the input buffer to a different location !
1987
 *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
1988
 *           This should be used internally by the parser
1989
 *           only to compare to ASCII values otherwise it would break when
1990
 *           running with UTF-8 encoding.
1991
 *   RAW     same as CUR but in the input buffer, bypass any token
1992
 *           extraction that may have been done
1993
 *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
1994
 *           to compare on ASCII based substring.
1995
 *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1996
 *           strings without newlines within the parser.
1997
 *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1998
 *           defined char within the parser.
1999
 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2000
 *
2001
 *   NEXT    Skip to the next character, this does the proper decoding
2002
 *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2003
 *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2004
 *   CUR_CHAR(l) returns the current unicode character (int), set l
2005
 *           to the number of xmlChars used for the encoding [0-5].
2006
 *   CUR_SCHAR  same but operate on a string instead of the context
2007
 *   COPY_BUF  copy the current unicode char to the target buffer, increment
2008
 *            the index
2009
 *   GROW, SHRINK  handling of input buffers
2010
 */
2011
2012
94.7M
#define RAW (*ctxt->input->cur)
2013
92.2M
#define CUR (*ctxt->input->cur)
2014
76.2M
#define NXT(val) ctxt->input->cur[(val)]
2015
1.63M
#define CUR_PTR ctxt->input->cur
2016
17.5k
#define BASE_PTR ctxt->input->base
2017
2018
#define CMP4( s, c1, c2, c3, c4 ) \
2019
15.5M
  ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2020
7.77M
    ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2021
#define CMP5( s, c1, c2, c3, c4, c5 ) \
2022
14.2M
  ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2023
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2024
11.9M
  ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2025
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2026
10.0M
  ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2027
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2028
8.02M
  ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2029
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2030
3.49M
  ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2031
3.49M
    ((unsigned char *) s)[ 8 ] == c9 )
2032
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2033
9.88k
  ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2034
9.88k
    ((unsigned char *) s)[ 9 ] == c10 )
2035
2036
35.1M
#define SKIP(val) do {             \
2037
35.1M
    ctxt->input->cur += (val),ctxt->input->col+=(val);      \
2038
35.1M
    if (*ctxt->input->cur == 0)           \
2039
35.1M
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);     \
2040
35.1M
  } while (0)
2041
2042
8.17k
#define SKIPL(val) do {             \
2043
8.17k
    int skipl;                \
2044
1.84M
    for(skipl=0; skipl<val; skipl++) {         \
2045
1.83M
  if (*(ctxt->input->cur) == '\n') {       \
2046
23.5k
  ctxt->input->line++; ctxt->input->col = 1;      \
2047
1.80M
  } else ctxt->input->col++;         \
2048
1.83M
  ctxt->input->cur++;           \
2049
1.83M
    }                  \
2050
8.17k
    if (*ctxt->input->cur == 0)           \
2051
8.17k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);     \
2052
8.17k
  } while (0)
2053
2054
29.5M
#define SHRINK if ((ctxt->progressive == 0) &&       \
2055
29.5M
       (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2056
29.5M
       (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2057
29.5M
  xmlSHRINK (ctxt);
2058
2059
54.7k
static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2060
    /* Don't shrink memory buffers. */
2061
54.7k
    if ((ctxt->input->buf) &&
2062
54.7k
        ((ctxt->input->buf->encoder) || (ctxt->input->buf->readcallback)))
2063
514
        xmlParserInputShrink(ctxt->input);
2064
54.7k
    if (*ctxt->input->cur == 0)
2065
949
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2066
54.7k
}
2067
2068
95.9M
#define GROW if ((ctxt->progressive == 0) &&       \
2069
95.9M
     (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2070
95.9M
  xmlGROW (ctxt);
2071
2072
22.8M
static void xmlGROW (xmlParserCtxtPtr ctxt) {
2073
22.8M
    ptrdiff_t curEnd = ctxt->input->end - ctxt->input->cur;
2074
22.8M
    ptrdiff_t curBase = ctxt->input->cur - ctxt->input->base;
2075
2076
22.8M
    if (((curEnd > XML_MAX_LOOKUP_LIMIT) ||
2077
22.8M
         (curBase > XML_MAX_LOOKUP_LIMIT)) &&
2078
22.8M
         ((ctxt->input->buf) &&
2079
0
          (ctxt->input->buf->readcallback != NULL)) &&
2080
22.8M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2081
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2082
0
        xmlHaltParser(ctxt);
2083
0
  return;
2084
0
    }
2085
22.8M
    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2086
22.8M
    if ((ctxt->input->cur > ctxt->input->end) ||
2087
22.8M
        (ctxt->input->cur < ctxt->input->base)) {
2088
0
        xmlHaltParser(ctxt);
2089
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2090
0
  return;
2091
0
    }
2092
22.8M
    if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0))
2093
402k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2094
22.8M
}
2095
2096
22.6M
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2097
2098
64.6M
#define NEXT xmlNextChar(ctxt)
2099
2100
3.32M
#define NEXT1 {               \
2101
3.32M
  ctxt->input->col++;           \
2102
3.32M
  ctxt->input->cur++;           \
2103
3.32M
  if (*ctxt->input->cur == 0)         \
2104
3.32M
      xmlParserInputGrow(ctxt->input, INPUT_CHUNK);   \
2105
3.32M
    }
2106
2107
16.0M
#define NEXTL(l) do {             \
2108
16.0M
    if (*(ctxt->input->cur) == '\n') {         \
2109
227k
  ctxt->input->line++; ctxt->input->col = 1;      \
2110
15.8M
    } else ctxt->input->col++;           \
2111
16.0M
    ctxt->input->cur += l;        \
2112
16.0M
  } while (0)
2113
2114
17.9M
#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2115
414M
#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2116
2117
#define COPY_BUF(l,b,i,v)           \
2118
418M
    if (l == 1) b[i++] = v;           \
2119
418M
    else i += xmlCopyCharMultiByte(&b[i],v)
2120
2121
/**
2122
 * xmlSkipBlankChars:
2123
 * @ctxt:  the XML parser context
2124
 *
2125
 * skip all blanks character found at that point in the input streams.
2126
 * It pops up finished entities in the process if allowable at that point.
2127
 *
2128
 * Returns the number of space chars skipped
2129
 */
2130
2131
int
2132
22.6M
xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2133
22.6M
    int res = 0;
2134
2135
    /*
2136
     * It's Okay to use CUR/NEXT here since all the blanks are on
2137
     * the ASCII range.
2138
     */
2139
22.6M
    if (((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) ||
2140
22.6M
        (ctxt->instate == XML_PARSER_START)) {
2141
4.84M
  const xmlChar *cur;
2142
  /*
2143
   * if we are in the document content, go really fast
2144
   */
2145
4.84M
  cur = ctxt->input->cur;
2146
4.84M
  while (IS_BLANK_CH(*cur)) {
2147
1.20M
      if (*cur == '\n') {
2148
58.9k
    ctxt->input->line++; ctxt->input->col = 1;
2149
1.14M
      } else {
2150
1.14M
    ctxt->input->col++;
2151
1.14M
      }
2152
1.20M
      cur++;
2153
1.20M
      if (res < INT_MAX)
2154
1.20M
    res++;
2155
1.20M
      if (*cur == 0) {
2156
4.15k
    ctxt->input->cur = cur;
2157
4.15k
    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2158
4.15k
    cur = ctxt->input->cur;
2159
4.15k
      }
2160
1.20M
  }
2161
4.84M
  ctxt->input->cur = cur;
2162
17.7M
    } else {
2163
17.7M
        int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2164
2165
72.6M
  while (ctxt->instate != XML_PARSER_EOF) {
2166
72.6M
            if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2167
33.5M
    NEXT;
2168
39.1M
      } else if (CUR == '%') {
2169
                /*
2170
                 * Need to handle support of entities branching here
2171
                 */
2172
11.0M
          if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2173
140k
                    break;
2174
10.9M
          xmlParsePEReference(ctxt);
2175
28.0M
            } else if (CUR == 0) {
2176
10.4M
                unsigned long consumed;
2177
10.4M
                xmlEntityPtr ent;
2178
2179
10.4M
                if (ctxt->inputNr <= 1)
2180
3.00k
                    break;
2181
2182
10.4M
                consumed = ctxt->input->consumed;
2183
10.4M
                xmlSaturatedAddSizeT(&consumed,
2184
10.4M
                                     ctxt->input->cur - ctxt->input->base);
2185
2186
                /*
2187
                 * Add to sizeentities when parsing an external entity
2188
                 * for the first time.
2189
                 */
2190
10.4M
                ent = ctxt->input->entity;
2191
10.4M
                if ((ent->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2192
10.4M
                    ((ent->flags & XML_ENT_PARSED) == 0)) {
2193
258
                    ent->flags |= XML_ENT_PARSED;
2194
2195
258
                    xmlSaturatedAdd(&ctxt->sizeentities, consumed);
2196
258
                }
2197
2198
10.4M
                xmlParserEntityCheck(ctxt, consumed);
2199
2200
10.4M
                xmlPopInput(ctxt);
2201
17.6M
            } else {
2202
17.6M
                break;
2203
17.6M
            }
2204
2205
            /*
2206
             * Also increase the counter when entering or exiting a PERef.
2207
             * The spec says: "When a parameter-entity reference is recognized
2208
             * in the DTD and included, its replacement text MUST be enlarged
2209
             * by the attachment of one leading and one following space (#x20)
2210
             * character."
2211
             */
2212
54.9M
      if (res < INT_MAX)
2213
54.9M
    res++;
2214
54.9M
        }
2215
17.7M
    }
2216
22.6M
    return(res);
2217
22.6M
}
2218
2219
/************************************************************************
2220
 *                  *
2221
 *    Commodity functions to handle entities      *
2222
 *                  *
2223
 ************************************************************************/
2224
2225
/**
2226
 * xmlPopInput:
2227
 * @ctxt:  an XML parser context
2228
 *
2229
 * xmlPopInput: the current input pointed by ctxt->input came to an end
2230
 *          pop it and return the next char.
2231
 *
2232
 * Returns the current xmlChar in the parser context
2233
 */
2234
xmlChar
2235
10.4M
xmlPopInput(xmlParserCtxtPtr ctxt) {
2236
10.4M
    xmlParserInputPtr input;
2237
2238
10.4M
    if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2239
10.4M
    if (xmlParserDebugEntities)
2240
0
  xmlGenericError(xmlGenericErrorContext,
2241
0
    "Popping input %d\n", ctxt->inputNr);
2242
10.4M
    if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2243
10.4M
        (ctxt->instate != XML_PARSER_EOF))
2244
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2245
0
                    "Unfinished entity outside the DTD");
2246
10.4M
    input = inputPop(ctxt);
2247
10.4M
    if (input->entity != NULL)
2248
10.4M
        input->entity->flags &= ~XML_ENT_EXPANDING;
2249
10.4M
    xmlFreeInputStream(input);
2250
10.4M
    if (*ctxt->input->cur == 0)
2251
4.97M
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2252
10.4M
    return(CUR);
2253
10.4M
}
2254
2255
/**
2256
 * xmlPushInput:
2257
 * @ctxt:  an XML parser context
2258
 * @input:  an XML parser input fragment (entity, XML fragment ...).
2259
 *
2260
 * xmlPushInput: switch to a new input stream which is stacked on top
2261
 *               of the previous one(s).
2262
 * Returns -1 in case of error or the index in the input stack
2263
 */
2264
int
2265
10.4M
xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2266
10.4M
    int ret;
2267
10.4M
    if (input == NULL) return(-1);
2268
2269
10.4M
    if (xmlParserDebugEntities) {
2270
0
  if ((ctxt->input != NULL) && (ctxt->input->filename))
2271
0
      xmlGenericError(xmlGenericErrorContext,
2272
0
        "%s(%d): ", ctxt->input->filename,
2273
0
        ctxt->input->line);
2274
0
  xmlGenericError(xmlGenericErrorContext,
2275
0
    "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2276
0
    }
2277
10.4M
    if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2278
10.4M
        (ctxt->inputNr > 100)) {
2279
0
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2280
0
        while (ctxt->inputNr > 1)
2281
0
            xmlFreeInputStream(inputPop(ctxt));
2282
0
  return(-1);
2283
0
    }
2284
10.4M
    ret = inputPush(ctxt, input);
2285
10.4M
    if (ctxt->instate == XML_PARSER_EOF)
2286
0
        return(-1);
2287
10.4M
    GROW;
2288
10.4M
    return(ret);
2289
10.4M
}
2290
2291
/**
2292
 * xmlParseCharRef:
2293
 * @ctxt:  an XML parser context
2294
 *
2295
 * DEPRECATED: Internal function, don't use.
2296
 *
2297
 * Parse a numeric character reference. Always consumes '&'.
2298
 *
2299
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2300
 *                  '&#x' [0-9a-fA-F]+ ';'
2301
 *
2302
 * [ WFC: Legal Character ]
2303
 * Characters referred to using character references must match the
2304
 * production for Char.
2305
 *
2306
 * Returns the value parsed (as an int), 0 in case of error
2307
 */
2308
int
2309
28.5k
xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2310
28.5k
    int val = 0;
2311
28.5k
    int count = 0;
2312
2313
    /*
2314
     * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2315
     */
2316
28.5k
    if ((RAW == '&') && (NXT(1) == '#') &&
2317
28.5k
        (NXT(2) == 'x')) {
2318
3.94k
  SKIP(3);
2319
3.94k
  GROW;
2320
8.91k
  while (RAW != ';') { /* loop blocked by count */
2321
5.17k
      if (count++ > 20) {
2322
0
    count = 0;
2323
0
    GROW;
2324
0
                if (ctxt->instate == XML_PARSER_EOF)
2325
0
                    return(0);
2326
0
      }
2327
5.17k
      if ((RAW >= '0') && (RAW <= '9'))
2328
1.98k
          val = val * 16 + (CUR - '0');
2329
3.18k
      else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2330
2.83k
          val = val * 16 + (CUR - 'a') + 10;
2331
356
      else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2332
158
          val = val * 16 + (CUR - 'A') + 10;
2333
198
      else {
2334
198
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2335
198
    val = 0;
2336
198
    break;
2337
198
      }
2338
4.97k
      if (val > 0x110000)
2339
11
          val = 0x110000;
2340
2341
4.97k
      NEXT;
2342
4.97k
      count++;
2343
4.97k
  }
2344
3.94k
  if (RAW == ';') {
2345
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2346
3.74k
      ctxt->input->col++;
2347
3.74k
      ctxt->input->cur++;
2348
3.74k
  }
2349
24.6k
    } else if  ((RAW == '&') && (NXT(1) == '#')) {
2350
24.6k
  SKIP(2);
2351
24.6k
  GROW;
2352
154k
  while (RAW != ';') { /* loop blocked by count */
2353
131k
      if (count++ > 20) {
2354
6.60k
    count = 0;
2355
6.60k
    GROW;
2356
6.60k
                if (ctxt->instate == XML_PARSER_EOF)
2357
0
                    return(0);
2358
6.60k
      }
2359
131k
      if ((RAW >= '0') && (RAW <= '9'))
2360
129k
          val = val * 10 + (CUR - '0');
2361
1.58k
      else {
2362
1.58k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2363
1.58k
    val = 0;
2364
1.58k
    break;
2365
1.58k
      }
2366
129k
      if (val > 0x110000)
2367
72.3k
          val = 0x110000;
2368
2369
129k
      NEXT;
2370
129k
      count++;
2371
129k
  }
2372
24.6k
  if (RAW == ';') {
2373
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2374
23.0k
      ctxt->input->col++;
2375
23.0k
      ctxt->input->cur++;
2376
23.0k
  }
2377
24.6k
    } else {
2378
0
        if (RAW == '&')
2379
0
            SKIP(1);
2380
0
        xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2381
0
    }
2382
2383
    /*
2384
     * [ WFC: Legal Character ]
2385
     * Characters referred to using character references must match the
2386
     * production for Char.
2387
     */
2388
28.5k
    if (val >= 0x110000) {
2389
39
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2390
39
                "xmlParseCharRef: character reference out of bounds\n",
2391
39
          val);
2392
28.5k
    } else if (IS_CHAR(val)) {
2393
26.6k
        return(val);
2394
26.6k
    } else {
2395
1.85k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2396
1.85k
                          "xmlParseCharRef: invalid xmlChar value %d\n",
2397
1.85k
                    val);
2398
1.85k
    }
2399
1.89k
    return(0);
2400
28.5k
}
2401
2402
/**
2403
 * xmlParseStringCharRef:
2404
 * @ctxt:  an XML parser context
2405
 * @str:  a pointer to an index in the string
2406
 *
2407
 * parse Reference declarations, variant parsing from a string rather
2408
 * than an an input flow.
2409
 *
2410
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2411
 *                  '&#x' [0-9a-fA-F]+ ';'
2412
 *
2413
 * [ WFC: Legal Character ]
2414
 * Characters referred to using character references must match the
2415
 * production for Char.
2416
 *
2417
 * Returns the value parsed (as an int), 0 in case of error, str will be
2418
 *         updated to the current value of the index
2419
 */
2420
static int
2421
53.4k
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2422
53.4k
    const xmlChar *ptr;
2423
53.4k
    xmlChar cur;
2424
53.4k
    int val = 0;
2425
2426
53.4k
    if ((str == NULL) || (*str == NULL)) return(0);
2427
53.4k
    ptr = *str;
2428
53.4k
    cur = *ptr;
2429
53.4k
    if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2430
1.59k
  ptr += 3;
2431
1.59k
  cur = *ptr;
2432
3.55k
  while (cur != ';') { /* Non input consuming loop */
2433
2.01k
      if ((cur >= '0') && (cur <= '9'))
2434
659
          val = val * 16 + (cur - '0');
2435
1.35k
      else if ((cur >= 'a') && (cur <= 'f'))
2436
38
          val = val * 16 + (cur - 'a') + 10;
2437
1.31k
      else if ((cur >= 'A') && (cur <= 'F'))
2438
1.26k
          val = val * 16 + (cur - 'A') + 10;
2439
53
      else {
2440
53
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2441
53
    val = 0;
2442
53
    break;
2443
53
      }
2444
1.96k
      if (val > 0x110000)
2445
39
          val = 0x110000;
2446
2447
1.96k
      ptr++;
2448
1.96k
      cur = *ptr;
2449
1.96k
  }
2450
1.59k
  if (cur == ';')
2451
1.53k
      ptr++;
2452
51.8k
    } else if  ((cur == '&') && (ptr[1] == '#')){
2453
51.8k
  ptr += 2;
2454
51.8k
  cur = *ptr;
2455
172k
  while (cur != ';') { /* Non input consuming loops */
2456
121k
      if ((cur >= '0') && (cur <= '9'))
2457
121k
          val = val * 10 + (cur - '0');
2458
208
      else {
2459
208
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2460
208
    val = 0;
2461
208
    break;
2462
208
      }
2463
121k
      if (val > 0x110000)
2464
54
          val = 0x110000;
2465
2466
121k
      ptr++;
2467
121k
      cur = *ptr;
2468
121k
  }
2469
51.8k
  if (cur == ';')
2470
51.6k
      ptr++;
2471
51.8k
    } else {
2472
0
  xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2473
0
  return(0);
2474
0
    }
2475
53.4k
    *str = ptr;
2476
2477
    /*
2478
     * [ WFC: Legal Character ]
2479
     * Characters referred to using character references must match the
2480
     * production for Char.
2481
     */
2482
53.4k
    if (val >= 0x110000) {
2483
6
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2484
6
                "xmlParseStringCharRef: character reference out of bounds\n",
2485
6
                val);
2486
53.4k
    } else if (IS_CHAR(val)) {
2487
53.1k
        return(val);
2488
53.1k
    } else {
2489
278
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2490
278
        "xmlParseStringCharRef: invalid xmlChar value %d\n",
2491
278
        val);
2492
278
    }
2493
284
    return(0);
2494
53.4k
}
2495
2496
/**
2497
 * xmlParserHandlePEReference:
2498
 * @ctxt:  the parser context
2499
 *
2500
 * [69] PEReference ::= '%' Name ';'
2501
 *
2502
 * [ WFC: No Recursion ]
2503
 * A parsed entity must not contain a recursive
2504
 * reference to itself, either directly or indirectly.
2505
 *
2506
 * [ WFC: Entity Declared ]
2507
 * In a document without any DTD, a document with only an internal DTD
2508
 * subset which contains no parameter entity references, or a document
2509
 * with "standalone='yes'", ...  ... The declaration of a parameter
2510
 * entity must precede any reference to it...
2511
 *
2512
 * [ VC: Entity Declared ]
2513
 * In a document with an external subset or external parameter entities
2514
 * with "standalone='no'", ...  ... The declaration of a parameter entity
2515
 * must precede any reference to it...
2516
 *
2517
 * [ WFC: In DTD ]
2518
 * Parameter-entity references may only appear in the DTD.
2519
 * NOTE: misleading but this is handled.
2520
 *
2521
 * A PEReference may have been detected in the current input stream
2522
 * the handling is done accordingly to
2523
 *      http://www.w3.org/TR/REC-xml#entproc
2524
 * i.e.
2525
 *   - Included in literal in entity values
2526
 *   - Included as Parameter Entity reference within DTDs
2527
 */
2528
void
2529
0
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2530
0
    switch(ctxt->instate) {
2531
0
  case XML_PARSER_CDATA_SECTION:
2532
0
      return;
2533
0
        case XML_PARSER_COMMENT:
2534
0
      return;
2535
0
  case XML_PARSER_START_TAG:
2536
0
      return;
2537
0
  case XML_PARSER_END_TAG:
2538
0
      return;
2539
0
        case XML_PARSER_EOF:
2540
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2541
0
      return;
2542
0
        case XML_PARSER_PROLOG:
2543
0
  case XML_PARSER_START:
2544
0
  case XML_PARSER_MISC:
2545
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2546
0
      return;
2547
0
  case XML_PARSER_ENTITY_DECL:
2548
0
        case XML_PARSER_CONTENT:
2549
0
        case XML_PARSER_ATTRIBUTE_VALUE:
2550
0
        case XML_PARSER_PI:
2551
0
  case XML_PARSER_SYSTEM_LITERAL:
2552
0
  case XML_PARSER_PUBLIC_LITERAL:
2553
      /* we just ignore it there */
2554
0
      return;
2555
0
        case XML_PARSER_EPILOG:
2556
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2557
0
      return;
2558
0
  case XML_PARSER_ENTITY_VALUE:
2559
      /*
2560
       * NOTE: in the case of entity values, we don't do the
2561
       *       substitution here since we need the literal
2562
       *       entity value to be able to save the internal
2563
       *       subset of the document.
2564
       *       This will be handled by xmlStringDecodeEntities
2565
       */
2566
0
      return;
2567
0
        case XML_PARSER_DTD:
2568
      /*
2569
       * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2570
       * In the internal DTD subset, parameter-entity references
2571
       * can occur only where markup declarations can occur, not
2572
       * within markup declarations.
2573
       * In that case this is handled in xmlParseMarkupDecl
2574
       */
2575
0
      if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2576
0
    return;
2577
0
      if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2578
0
    return;
2579
0
            break;
2580
0
        case XML_PARSER_IGNORE:
2581
0
            return;
2582
0
    }
2583
2584
0
    xmlParsePEReference(ctxt);
2585
0
}
2586
2587
/*
2588
 * Macro used to grow the current buffer.
2589
 * buffer##_size is expected to be a size_t
2590
 * mem_error: is expected to handle memory allocation failures
2591
 */
2592
726k
#define growBuffer(buffer, n) {           \
2593
726k
    xmlChar *tmp;             \
2594
726k
    size_t new_size = buffer##_size * 2 + n;                            \
2595
726k
    if (new_size < buffer##_size) goto mem_error;                       \
2596
726k
    tmp = (xmlChar *) xmlRealloc(buffer, new_size);                     \
2597
726k
    if (tmp == NULL) goto mem_error;         \
2598
726k
    buffer = tmp;             \
2599
726k
    buffer##_size = new_size;                                           \
2600
726k
}
2601
2602
/**
2603
 * xmlStringDecodeEntitiesInt:
2604
 * @ctxt:  the parser context
2605
 * @str:  the input string
2606
 * @len: the string length
2607
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2608
 * @end:  an end marker xmlChar, 0 if none
2609
 * @end2:  an end marker xmlChar, 0 if none
2610
 * @end3:  an end marker xmlChar, 0 if none
2611
 * @check:  whether to perform entity checks
2612
 */
2613
static xmlChar *
2614
xmlStringDecodeEntitiesInt(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2615
               int what, xmlChar end, xmlChar  end2, xmlChar end3,
2616
6.37M
                           int check) {
2617
6.37M
    xmlChar *buffer = NULL;
2618
6.37M
    size_t buffer_size = 0;
2619
6.37M
    size_t nbchars = 0;
2620
2621
6.37M
    xmlChar *current = NULL;
2622
6.37M
    xmlChar *rep = NULL;
2623
6.37M
    const xmlChar *last;
2624
6.37M
    xmlEntityPtr ent;
2625
6.37M
    int c,l;
2626
2627
6.37M
    if (str == NULL)
2628
85
        return(NULL);
2629
6.37M
    last = str + len;
2630
2631
6.37M
    if (((ctxt->depth > 40) &&
2632
6.37M
         ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2633
6.37M
  (ctxt->depth > 100)) {
2634
0
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP,
2635
0
                       "Maximum entity nesting depth exceeded");
2636
0
  return(NULL);
2637
0
    }
2638
2639
    /*
2640
     * allocate a translation buffer.
2641
     */
2642
6.37M
    buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2643
6.37M
    buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2644
6.37M
    if (buffer == NULL) goto mem_error;
2645
2646
    /*
2647
     * OK loop until we reach one of the ending char or a size limit.
2648
     * we are operating on already parsed values.
2649
     */
2650
6.37M
    if (str < last)
2651
6.32M
  c = CUR_SCHAR(str, l);
2652
55.6k
    else
2653
55.6k
        c = 0;
2654
348M
    while ((c != 0) && (c != end) && /* non input consuming loop */
2655
348M
           (c != end2) && (c != end3) &&
2656
348M
           (ctxt->instate != XML_PARSER_EOF)) {
2657
2658
342M
  if (c == 0) break;
2659
342M
        if ((c == '&') && (str[1] == '#')) {
2660
53.4k
      int val = xmlParseStringCharRef(ctxt, &str);
2661
53.4k
      if (val == 0)
2662
284
                goto int_error;
2663
53.1k
      COPY_BUF(0,buffer,nbchars,val);
2664
53.1k
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2665
0
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2666
0
      }
2667
341M
  } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2668
5.57M
      if (xmlParserDebugEntities)
2669
0
    xmlGenericError(xmlGenericErrorContext,
2670
0
      "String decoding Entity Reference: %.30s\n",
2671
0
      str);
2672
5.57M
      ent = xmlParseStringEntityRef(ctxt, &str);
2673
5.57M
      if ((ent != NULL) &&
2674
5.57M
    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2675
3.87k
    if (ent->content != NULL) {
2676
3.87k
        COPY_BUF(0,buffer,nbchars,ent->content[0]);
2677
3.87k
        if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2678
0
      growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2679
0
        }
2680
3.87k
    } else {
2681
0
        xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2682
0
          "predefined entity has no content\n");
2683
0
                    goto int_error;
2684
0
    }
2685
5.57M
      } else if ((ent != NULL) && (ent->content != NULL)) {
2686
5.26M
          if ((check) && (xmlParserEntityCheck(ctxt, ent->length)))
2687
18
                    goto int_error;
2688
2689
5.26M
                if (ent->flags & XML_ENT_EXPANDING) {
2690
34
              xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2691
34
                    xmlHaltParser(ctxt);
2692
34
                    ent->content[0] = 0;
2693
34
                    goto int_error;
2694
34
                }
2695
2696
5.26M
                ent->flags |= XML_ENT_EXPANDING;
2697
5.26M
    ctxt->depth++;
2698
5.26M
    rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
2699
5.26M
                        ent->length, what, 0, 0, 0, check);
2700
5.26M
    ctxt->depth--;
2701
5.26M
                ent->flags &= ~XML_ENT_EXPANDING;
2702
2703
5.26M
    if (rep == NULL) {
2704
376
                    ent->content[0] = 0;
2705
376
                    goto int_error;
2706
376
                }
2707
2708
5.26M
                current = rep;
2709
734M
                while (*current != 0) { /* non input consuming loop */
2710
729M
                    buffer[nbchars++] = *current++;
2711
729M
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2712
1.35M
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2713
1.35M
                    }
2714
729M
                }
2715
5.26M
                xmlFree(rep);
2716
5.26M
                rep = NULL;
2717
5.26M
      } else if (ent != NULL) {
2718
12
    int i = xmlStrlen(ent->name);
2719
12
    const xmlChar *cur = ent->name;
2720
2721
12
    buffer[nbchars++] = '&';
2722
12
    if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2723
0
        growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2724
0
    }
2725
24
    for (;i > 0;i--)
2726
12
        buffer[nbchars++] = *cur++;
2727
12
    buffer[nbchars++] = ';';
2728
12
      }
2729
336M
  } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2730
154k
      if (xmlParserDebugEntities)
2731
0
    xmlGenericError(xmlGenericErrorContext,
2732
0
      "String decoding PE Reference: %.30s\n", str);
2733
154k
      ent = xmlParseStringPEReference(ctxt, &str);
2734
154k
      if (ent != NULL) {
2735
151k
                if (ent->content == NULL) {
2736
        /*
2737
         * Note: external parsed entities will not be loaded,
2738
         * it is not required for a non-validating parser to
2739
         * complete external PEReferences coming from the
2740
         * internal subset
2741
         */
2742
102
        if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2743
102
      ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2744
102
      (ctxt->validate != 0)) {
2745
102
      xmlLoadEntityContent(ctxt, ent);
2746
102
        } else {
2747
0
      xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2748
0
      "not validating will not read content for PE entity %s\n",
2749
0
                          ent->name, NULL);
2750
0
        }
2751
102
    }
2752
2753
151k
          if ((check) && (xmlParserEntityCheck(ctxt, ent->length)))
2754
12
                    goto int_error;
2755
2756
151k
                if (ent->flags & XML_ENT_EXPANDING) {
2757
3
              xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2758
3
                    xmlHaltParser(ctxt);
2759
3
                    if (ent->content != NULL)
2760
3
                        ent->content[0] = 0;
2761
3
                    goto int_error;
2762
3
                }
2763
2764
151k
                ent->flags |= XML_ENT_EXPANDING;
2765
151k
    ctxt->depth++;
2766
151k
    rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
2767
151k
                        ent->length, what, 0, 0, 0, check);
2768
151k
    ctxt->depth--;
2769
151k
                ent->flags &= ~XML_ENT_EXPANDING;
2770
2771
151k
    if (rep == NULL) {
2772
72
                    if (ent->content != NULL)
2773
3
                        ent->content[0] = 0;
2774
72
                    goto int_error;
2775
72
                }
2776
151k
                current = rep;
2777
123M
                while (*current != 0) { /* non input consuming loop */
2778
123M
                    buffer[nbchars++] = *current++;
2779
123M
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2780
28.0k
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2781
28.0k
                    }
2782
123M
                }
2783
151k
                xmlFree(rep);
2784
151k
                rep = NULL;
2785
151k
      }
2786
336M
  } else {
2787
336M
      COPY_BUF(l,buffer,nbchars,c);
2788
336M
      str += l;
2789
336M
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2790
58.2k
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2791
58.2k
      }
2792
336M
  }
2793
342M
  if (str < last)
2794
335M
      c = CUR_SCHAR(str, l);
2795
6.32M
  else
2796
6.32M
      c = 0;
2797
342M
    }
2798
6.37M
    buffer[nbchars] = 0;
2799
6.37M
    return(buffer);
2800
2801
0
mem_error:
2802
0
    xmlErrMemory(ctxt, NULL);
2803
799
int_error:
2804
799
    if (rep != NULL)
2805
0
        xmlFree(rep);
2806
799
    if (buffer != NULL)
2807
799
        xmlFree(buffer);
2808
799
    return(NULL);
2809
0
}
2810
2811
/**
2812
 * xmlStringLenDecodeEntities:
2813
 * @ctxt:  the parser context
2814
 * @str:  the input string
2815
 * @len: the string length
2816
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2817
 * @end:  an end marker xmlChar, 0 if none
2818
 * @end2:  an end marker xmlChar, 0 if none
2819
 * @end3:  an end marker xmlChar, 0 if none
2820
 *
2821
 * DEPRECATED: Internal function, don't use.
2822
 *
2823
 * Takes a entity string content and process to do the adequate substitutions.
2824
 *
2825
 * [67] Reference ::= EntityRef | CharRef
2826
 *
2827
 * [69] PEReference ::= '%' Name ';'
2828
 *
2829
 * Returns A newly allocated string with the substitution done. The caller
2830
 *      must deallocate it !
2831
 */
2832
xmlChar *
2833
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2834
                           int what, xmlChar end, xmlChar  end2,
2835
57
                           xmlChar end3) {
2836
57
    if ((ctxt == NULL) || (str == NULL) || (len < 0))
2837
0
        return(NULL);
2838
57
    return(xmlStringDecodeEntitiesInt(ctxt, str, len, what,
2839
57
                                      end, end2, end3, 0));
2840
57
}
2841
2842
/**
2843
 * xmlStringDecodeEntities:
2844
 * @ctxt:  the parser context
2845
 * @str:  the input string
2846
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2847
 * @end:  an end marker xmlChar, 0 if none
2848
 * @end2:  an end marker xmlChar, 0 if none
2849
 * @end3:  an end marker xmlChar, 0 if none
2850
 *
2851
 * DEPRECATED: Internal function, don't use.
2852
 *
2853
 * Takes a entity string content and process to do the adequate substitutions.
2854
 *
2855
 * [67] Reference ::= EntityRef | CharRef
2856
 *
2857
 * [69] PEReference ::= '%' Name ';'
2858
 *
2859
 * Returns A newly allocated string with the substitution done. The caller
2860
 *      must deallocate it !
2861
 */
2862
xmlChar *
2863
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2864
850
            xmlChar end, xmlChar  end2, xmlChar end3) {
2865
850
    if ((ctxt == NULL) || (str == NULL)) return(NULL);
2866
850
    return(xmlStringDecodeEntitiesInt(ctxt, str, xmlStrlen(str), what,
2867
850
                                      end, end2, end3, 0));
2868
850
}
2869
2870
/************************************************************************
2871
 *                  *
2872
 *    Commodity functions, cleanup needed ?     *
2873
 *                  *
2874
 ************************************************************************/
2875
2876
/**
2877
 * areBlanks:
2878
 * @ctxt:  an XML parser context
2879
 * @str:  a xmlChar *
2880
 * @len:  the size of @str
2881
 * @blank_chars: we know the chars are blanks
2882
 *
2883
 * Is this a sequence of blank chars that one can ignore ?
2884
 *
2885
 * Returns 1 if ignorable 0 otherwise.
2886
 */
2887
2888
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2889
195k
                     int blank_chars) {
2890
195k
    int i, ret;
2891
195k
    xmlNodePtr lastChild;
2892
2893
    /*
2894
     * Don't spend time trying to differentiate them, the same callback is
2895
     * used !
2896
     */
2897
195k
    if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2898
4.73k
  return(0);
2899
2900
    /*
2901
     * Check for xml:space value.
2902
     */
2903
190k
    if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2904
190k
        (*(ctxt->space) == -2))
2905
55.2k
  return(0);
2906
2907
    /*
2908
     * Check that the string is made of blanks
2909
     */
2910
135k
    if (blank_chars == 0) {
2911
193k
  for (i = 0;i < len;i++)
2912
160k
      if (!(IS_BLANK_CH(str[i]))) return(0);
2913
56.2k
    }
2914
2915
    /*
2916
     * Look if the element is mixed content in the DTD if available
2917
     */
2918
112k
    if (ctxt->node == NULL) return(0);
2919
110k
    if (ctxt->myDoc != NULL) {
2920
110k
  ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2921
110k
        if (ret == 0) return(1);
2922
45.3k
        if (ret == 1) return(0);
2923
45.3k
    }
2924
2925
    /*
2926
     * Otherwise, heuristic :-\
2927
     */
2928
42.4k
    if ((RAW != '<') && (RAW != 0xD)) return(0);
2929
42.0k
    if ((ctxt->node->children == NULL) &&
2930
42.0k
  (RAW == '<') && (NXT(1) == '/')) return(0);
2931
2932
41.9k
    lastChild = xmlGetLastChild(ctxt->node);
2933
41.9k
    if (lastChild == NULL) {
2934
9.23k
        if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2935
9.23k
            (ctxt->node->content != NULL)) return(0);
2936
32.7k
    } else if (xmlNodeIsText(lastChild))
2937
304
        return(0);
2938
32.4k
    else if ((ctxt->node->children != NULL) &&
2939
32.4k
             (xmlNodeIsText(ctxt->node->children)))
2940
830
        return(0);
2941
40.8k
    return(1);
2942
41.9k
}
2943
2944
/************************************************************************
2945
 *                  *
2946
 *    Extra stuff for namespace support     *
2947
 *  Relates to http://www.w3.org/TR/WD-xml-names      *
2948
 *                  *
2949
 ************************************************************************/
2950
2951
/**
2952
 * xmlSplitQName:
2953
 * @ctxt:  an XML parser context
2954
 * @name:  an XML parser context
2955
 * @prefix:  a xmlChar **
2956
 *
2957
 * parse an UTF8 encoded XML qualified name string
2958
 *
2959
 * [NS 5] QName ::= (Prefix ':')? LocalPart
2960
 *
2961
 * [NS 6] Prefix ::= NCName
2962
 *
2963
 * [NS 7] LocalPart ::= NCName
2964
 *
2965
 * Returns the local part, and prefix is updated
2966
 *   to get the Prefix if any.
2967
 */
2968
2969
xmlChar *
2970
816k
xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2971
816k
    xmlChar buf[XML_MAX_NAMELEN + 5];
2972
816k
    xmlChar *buffer = NULL;
2973
816k
    int len = 0;
2974
816k
    int max = XML_MAX_NAMELEN;
2975
816k
    xmlChar *ret = NULL;
2976
816k
    const xmlChar *cur = name;
2977
816k
    int c;
2978
2979
816k
    if (prefix == NULL) return(NULL);
2980
816k
    *prefix = NULL;
2981
2982
816k
    if (cur == NULL) return(NULL);
2983
2984
#ifndef XML_XML_NAMESPACE
2985
    /* xml: prefix is not really a namespace */
2986
    if ((cur[0] == 'x') && (cur[1] == 'm') &&
2987
        (cur[2] == 'l') && (cur[3] == ':'))
2988
  return(xmlStrdup(name));
2989
#endif
2990
2991
    /* nasty but well=formed */
2992
816k
    if (cur[0] == ':')
2993
30
  return(xmlStrdup(name));
2994
2995
816k
    c = *cur++;
2996
4.12M
    while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2997
3.31M
  buf[len++] = c;
2998
3.31M
  c = *cur++;
2999
3.31M
    }
3000
816k
    if (len >= max) {
3001
  /*
3002
   * Okay someone managed to make a huge name, so he's ready to pay
3003
   * for the processing speed.
3004
   */
3005
10
  max = len * 2;
3006
3007
10
  buffer = (xmlChar *) xmlMallocAtomic(max);
3008
10
  if (buffer == NULL) {
3009
0
      xmlErrMemory(ctxt, NULL);
3010
0
      return(NULL);
3011
0
  }
3012
10
  memcpy(buffer, buf, len);
3013
1.10k
  while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3014
1.09k
      if (len + 10 > max) {
3015
8
          xmlChar *tmp;
3016
3017
8
    max *= 2;
3018
8
    tmp = (xmlChar *) xmlRealloc(buffer, max);
3019
8
    if (tmp == NULL) {
3020
0
        xmlFree(buffer);
3021
0
        xmlErrMemory(ctxt, NULL);
3022
0
        return(NULL);
3023
0
    }
3024
8
    buffer = tmp;
3025
8
      }
3026
1.09k
      buffer[len++] = c;
3027
1.09k
      c = *cur++;
3028
1.09k
  }
3029
10
  buffer[len] = 0;
3030
10
    }
3031
3032
816k
    if ((c == ':') && (*cur == 0)) {
3033
45
        if (buffer != NULL)
3034
0
      xmlFree(buffer);
3035
45
  *prefix = NULL;
3036
45
  return(xmlStrdup(name));
3037
45
    }
3038
3039
816k
    if (buffer == NULL)
3040
816k
  ret = xmlStrndup(buf, len);
3041
10
    else {
3042
10
  ret = buffer;
3043
10
  buffer = NULL;
3044
10
  max = XML_MAX_NAMELEN;
3045
10
    }
3046
3047
3048
816k
    if (c == ':') {
3049
14.8k
  c = *cur;
3050
14.8k
        *prefix = ret;
3051
14.8k
  if (c == 0) {
3052
0
      return(xmlStrndup(BAD_CAST "", 0));
3053
0
  }
3054
14.8k
  len = 0;
3055
3056
  /*
3057
   * Check that the first character is proper to start
3058
   * a new name
3059
   */
3060
14.8k
  if (!(((c >= 0x61) && (c <= 0x7A)) ||
3061
14.8k
        ((c >= 0x41) && (c <= 0x5A)) ||
3062
14.8k
        (c == '_') || (c == ':'))) {
3063
18
      int l;
3064
18
      int first = CUR_SCHAR(cur, l);
3065
3066
18
      if (!IS_LETTER(first) && (first != '_')) {
3067
9
    xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3068
9
          "Name %s is not XML Namespace compliant\n",
3069
9
          name);
3070
9
      }
3071
18
  }
3072
14.8k
  cur++;
3073
3074
89.1k
  while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3075
74.2k
      buf[len++] = c;
3076
74.2k
      c = *cur++;
3077
74.2k
  }
3078
14.8k
  if (len >= max) {
3079
      /*
3080
       * Okay someone managed to make a huge name, so he's ready to pay
3081
       * for the processing speed.
3082
       */
3083
0
      max = len * 2;
3084
3085
0
      buffer = (xmlChar *) xmlMallocAtomic(max);
3086
0
      if (buffer == NULL) {
3087
0
          xmlErrMemory(ctxt, NULL);
3088
0
    return(NULL);
3089
0
      }
3090
0
      memcpy(buffer, buf, len);
3091
0
      while (c != 0) { /* tested bigname2.xml */
3092
0
    if (len + 10 > max) {
3093
0
        xmlChar *tmp;
3094
3095
0
        max *= 2;
3096
0
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3097
0
        if (tmp == NULL) {
3098
0
      xmlErrMemory(ctxt, NULL);
3099
0
      xmlFree(buffer);
3100
0
      return(NULL);
3101
0
        }
3102
0
        buffer = tmp;
3103
0
    }
3104
0
    buffer[len++] = c;
3105
0
    c = *cur++;
3106
0
      }
3107
0
      buffer[len] = 0;
3108
0
  }
3109
3110
14.8k
  if (buffer == NULL)
3111
14.8k
      ret = xmlStrndup(buf, len);
3112
0
  else {
3113
0
      ret = buffer;
3114
0
  }
3115
14.8k
    }
3116
3117
816k
    return(ret);
3118
816k
}
3119
3120
/************************************************************************
3121
 *                  *
3122
 *      The parser itself       *
3123
 *  Relates to http://www.w3.org/TR/REC-xml       *
3124
 *                  *
3125
 ************************************************************************/
3126
3127
/************************************************************************
3128
 *                  *
3129
 *  Routines to parse Name, NCName and NmToken      *
3130
 *                  *
3131
 ************************************************************************/
3132
#ifdef DEBUG
3133
static unsigned long nbParseName = 0;
3134
static unsigned long nbParseNmToken = 0;
3135
static unsigned long nbParseNCName = 0;
3136
static unsigned long nbParseNCNameComplex = 0;
3137
static unsigned long nbParseNameComplex = 0;
3138
static unsigned long nbParseStringName = 0;
3139
#endif
3140
3141
/*
3142
 * The two following functions are related to the change of accepted
3143
 * characters for Name and NmToken in the Revision 5 of XML-1.0
3144
 * They correspond to the modified production [4] and the new production [4a]
3145
 * changes in that revision. Also note that the macros used for the
3146
 * productions Letter, Digit, CombiningChar and Extender are not needed
3147
 * anymore.
3148
 * We still keep compatibility to pre-revision5 parsing semantic if the
3149
 * new XML_PARSE_OLD10 option is given to the parser.
3150
 */
3151
static int
3152
5.91M
xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3153
5.91M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3154
        /*
3155
   * Use the new checks of production [4] [4a] amd [5] of the
3156
   * Update 5 of XML-1.0
3157
   */
3158
5.89M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3159
5.89M
      (((c >= 'a') && (c <= 'z')) ||
3160
5.89M
       ((c >= 'A') && (c <= 'Z')) ||
3161
5.89M
       (c == '_') || (c == ':') ||
3162
5.89M
       ((c >= 0xC0) && (c <= 0xD6)) ||
3163
5.89M
       ((c >= 0xD8) && (c <= 0xF6)) ||
3164
5.89M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3165
5.89M
       ((c >= 0x370) && (c <= 0x37D)) ||
3166
5.89M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3167
5.89M
       ((c >= 0x200C) && (c <= 0x200D)) ||
3168
5.89M
       ((c >= 0x2070) && (c <= 0x218F)) ||
3169
5.89M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3170
5.89M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3171
5.89M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3172
5.89M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3173
5.89M
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3174
5.87M
      return(1);
3175
5.89M
    } else {
3176
26.1k
        if (IS_LETTER(c) || (c == '_') || (c == ':'))
3177
25.5k
      return(1);
3178
26.1k
    }
3179
12.0k
    return(0);
3180
5.91M
}
3181
3182
static int
3183
67.3M
xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3184
67.3M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3185
        /*
3186
   * Use the new checks of production [4] [4a] amd [5] of the
3187
   * Update 5 of XML-1.0
3188
   */
3189
67.3M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3190
67.3M
      (((c >= 'a') && (c <= 'z')) ||
3191
67.2M
       ((c >= 'A') && (c <= 'Z')) ||
3192
67.2M
       ((c >= '0') && (c <= '9')) || /* !start */
3193
67.2M
       (c == '_') || (c == ':') ||
3194
67.2M
       (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3195
67.2M
       ((c >= 0xC0) && (c <= 0xD6)) ||
3196
67.2M
       ((c >= 0xD8) && (c <= 0xF6)) ||
3197
67.2M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3198
67.2M
       ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3199
67.2M
       ((c >= 0x370) && (c <= 0x37D)) ||
3200
67.2M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3201
67.2M
       ((c >= 0x200C) && (c <= 0x200D)) ||
3202
67.2M
       ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3203
67.2M
       ((c >= 0x2070) && (c <= 0x218F)) ||
3204
67.2M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3205
67.2M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3206
67.2M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3207
67.2M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3208
67.2M
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3209
61.2M
       return(1);
3210
67.3M
    } else {
3211
69.5k
        if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3212
69.5k
            (c == '.') || (c == '-') ||
3213
69.5k
      (c == '_') || (c == ':') ||
3214
69.5k
      (IS_COMBINING(c)) ||
3215
69.5k
      (IS_EXTENDER(c)))
3216
43.9k
      return(1);
3217
69.5k
    }
3218
6.04M
    return(0);
3219
67.3M
}
3220
3221
static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3222
                                          int *len, int *alloc, int normalize);
3223
3224
static const xmlChar *
3225
25.8k
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3226
25.8k
    int len = 0, l;
3227
25.8k
    int c;
3228
25.8k
    int count = 0;
3229
25.8k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3230
3.84k
                    XML_MAX_TEXT_LENGTH :
3231
25.8k
                    XML_MAX_NAME_LENGTH;
3232
3233
#ifdef DEBUG
3234
    nbParseNameComplex++;
3235
#endif
3236
3237
    /*
3238
     * Handler for more complex cases
3239
     */
3240
25.8k
    GROW;
3241
25.8k
    if (ctxt->instate == XML_PARSER_EOF)
3242
0
        return(NULL);
3243
25.8k
    c = CUR_CHAR(l);
3244
25.8k
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3245
        /*
3246
   * Use the new checks of production [4] [4a] amd [5] of the
3247
   * Update 5 of XML-1.0
3248
   */
3249
19.4k
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3250
19.4k
      (!(((c >= 'a') && (c <= 'z')) ||
3251
19.1k
         ((c >= 'A') && (c <= 'Z')) ||
3252
19.1k
         (c == '_') || (c == ':') ||
3253
19.1k
         ((c >= 0xC0) && (c <= 0xD6)) ||
3254
19.1k
         ((c >= 0xD8) && (c <= 0xF6)) ||
3255
19.1k
         ((c >= 0xF8) && (c <= 0x2FF)) ||
3256
19.1k
         ((c >= 0x370) && (c <= 0x37D)) ||
3257
19.1k
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
3258
19.1k
         ((c >= 0x200C) && (c <= 0x200D)) ||
3259
19.1k
         ((c >= 0x2070) && (c <= 0x218F)) ||
3260
19.1k
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3261
19.1k
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
3262
19.1k
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
3263
19.1k
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3264
19.1k
         ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3265
7.24k
      return(NULL);
3266
7.24k
  }
3267
12.2k
  len += l;
3268
12.2k
  NEXTL(l);
3269
12.2k
  c = CUR_CHAR(l);
3270
368k
  while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3271
368k
         (((c >= 'a') && (c <= 'z')) ||
3272
367k
          ((c >= 'A') && (c <= 'Z')) ||
3273
367k
          ((c >= '0') && (c <= '9')) || /* !start */
3274
367k
          (c == '_') || (c == ':') ||
3275
367k
          (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3276
367k
          ((c >= 0xC0) && (c <= 0xD6)) ||
3277
367k
          ((c >= 0xD8) && (c <= 0xF6)) ||
3278
367k
          ((c >= 0xF8) && (c <= 0x2FF)) ||
3279
367k
          ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3280
367k
          ((c >= 0x370) && (c <= 0x37D)) ||
3281
367k
          ((c >= 0x37F) && (c <= 0x1FFF)) ||
3282
367k
          ((c >= 0x200C) && (c <= 0x200D)) ||
3283
367k
          ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3284
367k
          ((c >= 0x2070) && (c <= 0x218F)) ||
3285
367k
          ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3286
367k
          ((c >= 0x3001) && (c <= 0xD7FF)) ||
3287
367k
          ((c >= 0xF900) && (c <= 0xFDCF)) ||
3288
367k
          ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3289
367k
          ((c >= 0x10000) && (c <= 0xEFFFF))
3290
367k
    )) {
3291
356k
      if (count++ > XML_PARSER_CHUNK_SIZE) {
3292
2.76k
    count = 0;
3293
2.76k
    GROW;
3294
2.76k
                if (ctxt->instate == XML_PARSER_EOF)
3295
0
                    return(NULL);
3296
2.76k
      }
3297
356k
            if (len <= INT_MAX - l)
3298
356k
          len += l;
3299
356k
      NEXTL(l);
3300
356k
      c = CUR_CHAR(l);
3301
356k
  }
3302
12.2k
    } else {
3303
6.40k
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3304
6.40k
      (!IS_LETTER(c) && (c != '_') &&
3305
6.16k
       (c != ':'))) {
3306
5.24k
      return(NULL);
3307
5.24k
  }
3308
1.16k
  len += l;
3309
1.16k
  NEXTL(l);
3310
1.16k
  c = CUR_CHAR(l);
3311
3312
28.9k
  while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3313
28.9k
         ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3314
28.7k
    (c == '.') || (c == '-') ||
3315
28.7k
    (c == '_') || (c == ':') ||
3316
28.7k
    (IS_COMBINING(c)) ||
3317
28.7k
    (IS_EXTENDER(c)))) {
3318
27.8k
      if (count++ > XML_PARSER_CHUNK_SIZE) {
3319
203
    count = 0;
3320
203
    GROW;
3321
203
                if (ctxt->instate == XML_PARSER_EOF)
3322
0
                    return(NULL);
3323
203
      }
3324
27.8k
            if (len <= INT_MAX - l)
3325
27.8k
          len += l;
3326
27.8k
      NEXTL(l);
3327
27.8k
      c = CUR_CHAR(l);
3328
27.8k
  }
3329
1.16k
    }
3330
13.3k
    if (len > maxLength) {
3331
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3332
0
        return(NULL);
3333
0
    }
3334
13.3k
    if (ctxt->input->cur - ctxt->input->base < len) {
3335
        /*
3336
         * There were a couple of bugs where PERefs lead to to a change
3337
         * of the buffer. Check the buffer size to avoid passing an invalid
3338
         * pointer to xmlDictLookup.
3339
         */
3340
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3341
0
                    "unexpected change of input buffer");
3342
0
        return (NULL);
3343
0
    }
3344
13.3k
    if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3345
43
        return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3346
13.3k
    return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3347
13.3k
}
3348
3349
/**
3350
 * xmlParseName:
3351
 * @ctxt:  an XML parser context
3352
 *
3353
 * DEPRECATED: Internal function, don't use.
3354
 *
3355
 * parse an XML name.
3356
 *
3357
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3358
 *                  CombiningChar | Extender
3359
 *
3360
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3361
 *
3362
 * [6] Names ::= Name (#x20 Name)*
3363
 *
3364
 * Returns the Name parsed or NULL
3365
 */
3366
3367
const xmlChar *
3368
15.7M
xmlParseName(xmlParserCtxtPtr ctxt) {
3369
15.7M
    const xmlChar *in;
3370
15.7M
    const xmlChar *ret;
3371
15.7M
    size_t count = 0;
3372
15.7M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3373
82.6k
                       XML_MAX_TEXT_LENGTH :
3374
15.7M
                       XML_MAX_NAME_LENGTH;
3375
3376
15.7M
    GROW;
3377
3378
#ifdef DEBUG
3379
    nbParseName++;
3380
#endif
3381
3382
    /*
3383
     * Accelerator for simple ASCII names
3384
     */
3385
15.7M
    in = ctxt->input->cur;
3386
15.7M
    if (((*in >= 0x61) && (*in <= 0x7A)) ||
3387
15.7M
  ((*in >= 0x41) && (*in <= 0x5A)) ||
3388
15.7M
  (*in == '_') || (*in == ':')) {
3389
15.7M
  in++;
3390
65.1M
  while (((*in >= 0x61) && (*in <= 0x7A)) ||
3391
65.1M
         ((*in >= 0x41) && (*in <= 0x5A)) ||
3392
65.1M
         ((*in >= 0x30) && (*in <= 0x39)) ||
3393
65.1M
         (*in == '_') || (*in == '-') ||
3394
65.1M
         (*in == ':') || (*in == '.'))
3395
49.4M
      in++;
3396
15.7M
  if ((*in > 0) && (*in < 0x80)) {
3397
15.7M
      count = in - ctxt->input->cur;
3398
15.7M
            if (count > maxLength) {
3399
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3400
0
                return(NULL);
3401
0
            }
3402
15.7M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3403
15.7M
      ctxt->input->cur = in;
3404
15.7M
      ctxt->input->col += count;
3405
15.7M
      if (ret == NULL)
3406
0
          xmlErrMemory(ctxt, NULL);
3407
15.7M
      return(ret);
3408
15.7M
  }
3409
15.7M
    }
3410
    /* accelerator for special cases */
3411
25.8k
    return(xmlParseNameComplex(ctxt));
3412
15.7M
}
3413
3414
static const xmlChar *
3415
15.0k
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3416
15.0k
    int len = 0, l;
3417
15.0k
    int c;
3418
15.0k
    int count = 0;
3419
15.0k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3420
784
                    XML_MAX_TEXT_LENGTH :
3421
15.0k
                    XML_MAX_NAME_LENGTH;
3422
15.0k
    size_t startPosition = 0;
3423
3424
#ifdef DEBUG
3425
    nbParseNCNameComplex++;
3426
#endif
3427
3428
    /*
3429
     * Handler for more complex cases
3430
     */
3431
15.0k
    GROW;
3432
15.0k
    startPosition = CUR_PTR - BASE_PTR;
3433
15.0k
    c = CUR_CHAR(l);
3434
15.0k
    if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3435
15.0k
  (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3436
12.4k
  return(NULL);
3437
12.4k
    }
3438
3439
169k
    while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3440
169k
     (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3441
166k
  if (count++ > XML_PARSER_CHUNK_SIZE) {
3442
1.46k
      count = 0;
3443
1.46k
      GROW;
3444
1.46k
            if (ctxt->instate == XML_PARSER_EOF)
3445
0
                return(NULL);
3446
1.46k
  }
3447
166k
        if (len <= INT_MAX - l)
3448
166k
      len += l;
3449
166k
  NEXTL(l);
3450
166k
  c = CUR_CHAR(l);
3451
166k
  if (c == 0) {
3452
408
      count = 0;
3453
      /*
3454
       * when shrinking to extend the buffer we really need to preserve
3455
       * the part of the name we already parsed. Hence rolling back
3456
       * by current length.
3457
       */
3458
408
      ctxt->input->cur -= l;
3459
408
      GROW;
3460
408
            if (ctxt->instate == XML_PARSER_EOF)
3461
0
                return(NULL);
3462
408
      ctxt->input->cur += l;
3463
408
      c = CUR_CHAR(l);
3464
408
  }
3465
166k
    }
3466
2.52k
    if (len > maxLength) {
3467
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3468
0
        return(NULL);
3469
0
    }
3470
2.52k
    return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
3471
2.52k
}
3472
3473
/**
3474
 * xmlParseNCName:
3475
 * @ctxt:  an XML parser context
3476
 * @len:  length of the string parsed
3477
 *
3478
 * parse an XML name.
3479
 *
3480
 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3481
 *                      CombiningChar | Extender
3482
 *
3483
 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3484
 *
3485
 * Returns the Name parsed or NULL
3486
 */
3487
3488
static const xmlChar *
3489
1.94M
xmlParseNCName(xmlParserCtxtPtr ctxt) {
3490
1.94M
    const xmlChar *in, *e;
3491
1.94M
    const xmlChar *ret;
3492
1.94M
    size_t count = 0;
3493
1.94M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3494
6.30k
                       XML_MAX_TEXT_LENGTH :
3495
1.94M
                       XML_MAX_NAME_LENGTH;
3496
3497
#ifdef DEBUG
3498
    nbParseNCName++;
3499
#endif
3500
3501
    /*
3502
     * Accelerator for simple ASCII names
3503
     */
3504
1.94M
    in = ctxt->input->cur;
3505
1.94M
    e = ctxt->input->end;
3506
1.94M
    if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3507
1.94M
   ((*in >= 0x41) && (*in <= 0x5A)) ||
3508
1.94M
   (*in == '_')) && (in < e)) {
3509
1.93M
  in++;
3510
7.48M
  while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3511
7.48M
          ((*in >= 0x41) && (*in <= 0x5A)) ||
3512
7.48M
          ((*in >= 0x30) && (*in <= 0x39)) ||
3513
7.48M
          (*in == '_') || (*in == '-') ||
3514
7.48M
          (*in == '.')) && (in < e))
3515
5.54M
      in++;
3516
1.93M
  if (in >= e)
3517
64
      goto complex;
3518
1.93M
  if ((*in > 0) && (*in < 0x80)) {
3519
1.93M
      count = in - ctxt->input->cur;
3520
1.93M
            if (count > maxLength) {
3521
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3522
0
                return(NULL);
3523
0
            }
3524
1.93M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3525
1.93M
      ctxt->input->cur = in;
3526
1.93M
      ctxt->input->col += count;
3527
1.93M
      if (ret == NULL) {
3528
0
          xmlErrMemory(ctxt, NULL);
3529
0
      }
3530
1.93M
      return(ret);
3531
1.93M
  }
3532
1.93M
    }
3533
15.0k
complex:
3534
15.0k
    return(xmlParseNCNameComplex(ctxt));
3535
1.94M
}
3536
3537
/**
3538
 * xmlParseNameAndCompare:
3539
 * @ctxt:  an XML parser context
3540
 *
3541
 * parse an XML name and compares for match
3542
 * (specialized for endtag parsing)
3543
 *
3544
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3545
 * and the name for mismatch
3546
 */
3547
3548
static const xmlChar *
3549
1.27M
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3550
1.27M
    register const xmlChar *cmp = other;
3551
1.27M
    register const xmlChar *in;
3552
1.27M
    const xmlChar *ret;
3553
3554
1.27M
    GROW;
3555
1.27M
    if (ctxt->instate == XML_PARSER_EOF)
3556
0
        return(NULL);
3557
3558
1.27M
    in = ctxt->input->cur;
3559
6.18M
    while (*in != 0 && *in == *cmp) {
3560
4.91M
  ++in;
3561
4.91M
  ++cmp;
3562
4.91M
    }
3563
1.27M
    if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3564
  /* success */
3565
1.26M
  ctxt->input->col += in - ctxt->input->cur;
3566
1.26M
  ctxt->input->cur = in;
3567
1.26M
  return (const xmlChar*) 1;
3568
1.26M
    }
3569
    /* failure (or end of input buffer), check with full function */
3570
7.51k
    ret = xmlParseName (ctxt);
3571
    /* strings coming from the dictionary direct compare possible */
3572
7.51k
    if (ret == other) {
3573
144
  return (const xmlChar*) 1;
3574
144
    }
3575
7.36k
    return ret;
3576
7.51k
}
3577
3578
/**
3579
 * xmlParseStringName:
3580
 * @ctxt:  an XML parser context
3581
 * @str:  a pointer to the string pointer (IN/OUT)
3582
 *
3583
 * parse an XML name.
3584
 *
3585
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3586
 *                  CombiningChar | Extender
3587
 *
3588
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3589
 *
3590
 * [6] Names ::= Name (#x20 Name)*
3591
 *
3592
 * Returns the Name parsed or NULL. The @str pointer
3593
 * is updated to the current location in the string.
3594
 */
3595
3596
static xmlChar *
3597
5.90M
xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3598
5.90M
    xmlChar buf[XML_MAX_NAMELEN + 5];
3599
5.90M
    const xmlChar *cur = *str;
3600
5.90M
    int len = 0, l;
3601
5.90M
    int c;
3602
5.90M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3603
64.5k
                    XML_MAX_TEXT_LENGTH :
3604
5.90M
                    XML_MAX_NAME_LENGTH;
3605
3606
#ifdef DEBUG
3607
    nbParseStringName++;
3608
#endif
3609
3610
5.90M
    c = CUR_SCHAR(cur, l);
3611
5.90M
    if (!xmlIsNameStartChar(ctxt, c)) {
3612
163
  return(NULL);
3613
163
    }
3614
3615
5.90M
    COPY_BUF(l,buf,len,c);
3616
5.90M
    cur += l;
3617
5.90M
    c = CUR_SCHAR(cur, l);
3618
30.0M
    while (xmlIsNameChar(ctxt, c)) {
3619
24.3M
  COPY_BUF(l,buf,len,c);
3620
24.3M
  cur += l;
3621
24.3M
  c = CUR_SCHAR(cur, l);
3622
24.3M
  if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3623
      /*
3624
       * Okay someone managed to make a huge name, so he's ready to pay
3625
       * for the processing speed.
3626
       */
3627
158k
      xmlChar *buffer;
3628
158k
      int max = len * 2;
3629
3630
158k
      buffer = (xmlChar *) xmlMallocAtomic(max);
3631
158k
      if (buffer == NULL) {
3632
0
          xmlErrMemory(ctxt, NULL);
3633
0
    return(NULL);
3634
0
      }
3635
158k
      memcpy(buffer, buf, len);
3636
36.1M
      while (xmlIsNameChar(ctxt, c)) {
3637
36.0M
    if (len + 10 > max) {
3638
158k
        xmlChar *tmp;
3639
3640
158k
        max *= 2;
3641
158k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3642
158k
        if (tmp == NULL) {
3643
0
      xmlErrMemory(ctxt, NULL);
3644
0
      xmlFree(buffer);
3645
0
      return(NULL);
3646
0
        }
3647
158k
        buffer = tmp;
3648
158k
    }
3649
36.0M
    COPY_BUF(l,buffer,len,c);
3650
36.0M
    cur += l;
3651
36.0M
    c = CUR_SCHAR(cur, l);
3652
36.0M
                if (len > maxLength) {
3653
0
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3654
0
                    xmlFree(buffer);
3655
0
                    return(NULL);
3656
0
                }
3657
36.0M
      }
3658
158k
      buffer[len] = 0;
3659
158k
      *str = cur;
3660
158k
      return(buffer);
3661
158k
  }
3662
24.3M
    }
3663
5.74M
    if (len > maxLength) {
3664
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3665
0
        return(NULL);
3666
0
    }
3667
5.74M
    *str = cur;
3668
5.74M
    return(xmlStrndup(buf, len));
3669
5.74M
}
3670
3671
/**
3672
 * xmlParseNmtoken:
3673
 * @ctxt:  an XML parser context
3674
 *
3675
 * DEPRECATED: Internal function, don't use.
3676
 *
3677
 * parse an XML Nmtoken.
3678
 *
3679
 * [7] Nmtoken ::= (NameChar)+
3680
 *
3681
 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3682
 *
3683
 * Returns the Nmtoken parsed or NULL
3684
 */
3685
3686
xmlChar *
3687
139k
xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3688
139k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3689
139k
    int len = 0, l;
3690
139k
    int c;
3691
139k
    int count = 0;
3692
139k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3693
58
                    XML_MAX_TEXT_LENGTH :
3694
139k
                    XML_MAX_NAME_LENGTH;
3695
3696
#ifdef DEBUG
3697
    nbParseNmToken++;
3698
#endif
3699
3700
139k
    GROW;
3701
139k
    if (ctxt->instate == XML_PARSER_EOF)
3702
0
        return(NULL);
3703
139k
    c = CUR_CHAR(l);
3704
3705
859k
    while (xmlIsNameChar(ctxt, c)) {
3706
720k
  if (count++ > XML_PARSER_CHUNK_SIZE) {
3707
0
      count = 0;
3708
0
      GROW;
3709
0
  }
3710
720k
  COPY_BUF(l,buf,len,c);
3711
720k
  NEXTL(l);
3712
720k
  c = CUR_CHAR(l);
3713
720k
  if (c == 0) {
3714
19
      count = 0;
3715
19
      GROW;
3716
19
      if (ctxt->instate == XML_PARSER_EOF)
3717
0
    return(NULL);
3718
19
            c = CUR_CHAR(l);
3719
19
  }
3720
720k
  if (len >= XML_MAX_NAMELEN) {
3721
      /*
3722
       * Okay someone managed to make a huge token, so he's ready to pay
3723
       * for the processing speed.
3724
       */
3725
54
      xmlChar *buffer;
3726
54
      int max = len * 2;
3727
3728
54
      buffer = (xmlChar *) xmlMallocAtomic(max);
3729
54
      if (buffer == NULL) {
3730
0
          xmlErrMemory(ctxt, NULL);
3731
0
    return(NULL);
3732
0
      }
3733
54
      memcpy(buffer, buf, len);
3734
106k
      while (xmlIsNameChar(ctxt, c)) {
3735
106k
    if (count++ > XML_PARSER_CHUNK_SIZE) {
3736
1.07k
        count = 0;
3737
1.07k
        GROW;
3738
1.07k
                    if (ctxt->instate == XML_PARSER_EOF) {
3739
0
                        xmlFree(buffer);
3740
0
                        return(NULL);
3741
0
                    }
3742
1.07k
    }
3743
106k
    if (len + 10 > max) {
3744
147
        xmlChar *tmp;
3745
3746
147
        max *= 2;
3747
147
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3748
147
        if (tmp == NULL) {
3749
0
      xmlErrMemory(ctxt, NULL);
3750
0
      xmlFree(buffer);
3751
0
      return(NULL);
3752
0
        }
3753
147
        buffer = tmp;
3754
147
    }
3755
106k
    COPY_BUF(l,buffer,len,c);
3756
106k
    NEXTL(l);
3757
106k
    c = CUR_CHAR(l);
3758
106k
                if (len > maxLength) {
3759
0
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3760
0
                    xmlFree(buffer);
3761
0
                    return(NULL);
3762
0
                }
3763
106k
      }
3764
54
      buffer[len] = 0;
3765
54
      return(buffer);
3766
54
  }
3767
720k
    }
3768
139k
    if (len == 0)
3769
222
        return(NULL);
3770
139k
    if (len > maxLength) {
3771
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3772
0
        return(NULL);
3773
0
    }
3774
139k
    return(xmlStrndup(buf, len));
3775
139k
}
3776
3777
/**
3778
 * xmlParseEntityValue:
3779
 * @ctxt:  an XML parser context
3780
 * @orig:  if non-NULL store a copy of the original entity value
3781
 *
3782
 * DEPRECATED: Internal function, don't use.
3783
 *
3784
 * parse a value for ENTITY declarations
3785
 *
3786
 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3787
 *                 "'" ([^%&'] | PEReference | Reference)* "'"
3788
 *
3789
 * Returns the EntityValue parsed with reference substituted or NULL
3790
 */
3791
3792
xmlChar *
3793
212k
xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3794
212k
    xmlChar *buf = NULL;
3795
212k
    int len = 0;
3796
212k
    int size = XML_PARSER_BUFFER_SIZE;
3797
212k
    int c, l;
3798
212k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3799
1.98k
                    XML_MAX_HUGE_LENGTH :
3800
212k
                    XML_MAX_TEXT_LENGTH;
3801
212k
    xmlChar stop;
3802
212k
    xmlChar *ret = NULL;
3803
212k
    const xmlChar *cur = NULL;
3804
212k
    xmlParserInputPtr input;
3805
3806
212k
    if (RAW == '"') stop = '"';
3807
37.7k
    else if (RAW == '\'') stop = '\'';
3808
0
    else {
3809
0
  xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3810
0
  return(NULL);
3811
0
    }
3812
212k
    buf = (xmlChar *) xmlMallocAtomic(size);
3813
212k
    if (buf == NULL) {
3814
0
  xmlErrMemory(ctxt, NULL);
3815
0
  return(NULL);
3816
0
    }
3817
3818
    /*
3819
     * The content of the entity definition is copied in a buffer.
3820
     */
3821
3822
212k
    ctxt->instate = XML_PARSER_ENTITY_VALUE;
3823
212k
    input = ctxt->input;
3824
212k
    GROW;
3825
212k
    if (ctxt->instate == XML_PARSER_EOF)
3826
0
        goto error;
3827
212k
    NEXT;
3828
212k
    c = CUR_CHAR(l);
3829
    /*
3830
     * NOTE: 4.4.5 Included in Literal
3831
     * When a parameter entity reference appears in a literal entity
3832
     * value, ... a single or double quote character in the replacement
3833
     * text is always treated as a normal data character and will not
3834
     * terminate the literal.
3835
     * In practice it means we stop the loop only when back at parsing
3836
     * the initial entity and the quote is found
3837
     */
3838
10.5M
    while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3839
10.5M
      (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3840
10.3M
  if (len + 5 >= size) {
3841
35.9k
      xmlChar *tmp;
3842
3843
35.9k
      size *= 2;
3844
35.9k
      tmp = (xmlChar *) xmlRealloc(buf, size);
3845
35.9k
      if (tmp == NULL) {
3846
0
    xmlErrMemory(ctxt, NULL);
3847
0
                goto error;
3848
0
      }
3849
35.9k
      buf = tmp;
3850
35.9k
  }
3851
10.3M
  COPY_BUF(l,buf,len,c);
3852
10.3M
  NEXTL(l);
3853
3854
10.3M
  GROW;
3855
10.3M
  c = CUR_CHAR(l);
3856
10.3M
  if (c == 0) {
3857
170
      GROW;
3858
170
      c = CUR_CHAR(l);
3859
170
  }
3860
3861
10.3M
        if (len > maxLength) {
3862
0
            xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
3863
0
                           "entity value too long\n");
3864
0
            goto error;
3865
0
        }
3866
10.3M
    }
3867
212k
    buf[len] = 0;
3868
212k
    if (ctxt->instate == XML_PARSER_EOF)
3869
0
        goto error;
3870
212k
    if (c != stop) {
3871
302
        xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3872
302
        goto error;
3873
302
    }
3874
212k
    NEXT;
3875
3876
    /*
3877
     * Raise problem w.r.t. '&' and '%' being used in non-entities
3878
     * reference constructs. Note Charref will be handled in
3879
     * xmlStringDecodeEntities()
3880
     */
3881
212k
    cur = buf;
3882
7.79M
    while (*cur != 0) { /* non input consuming */
3883
7.58M
  if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3884
171k
      xmlChar *name;
3885
171k
      xmlChar tmp = *cur;
3886
171k
            int nameOk = 0;
3887
3888
171k
      cur++;
3889
171k
      name = xmlParseStringName(ctxt, &cur);
3890
171k
            if (name != NULL) {
3891
171k
                nameOk = 1;
3892
171k
                xmlFree(name);
3893
171k
            }
3894
171k
            if ((nameOk == 0) || (*cur != ';')) {
3895
381
    xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3896
381
      "EntityValue: '%c' forbidden except for entities references\n",
3897
381
                            tmp);
3898
381
                goto error;
3899
381
      }
3900
171k
      if ((tmp == '%') && (ctxt->inSubset == 1) &&
3901
171k
    (ctxt->inputNr == 1)) {
3902
7
    xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3903
7
                goto error;
3904
7
      }
3905
171k
      if (*cur == 0)
3906
0
          break;
3907
171k
  }
3908
7.58M
  cur++;
3909
7.58M
    }
3910
3911
    /*
3912
     * Then PEReference entities are substituted.
3913
     *
3914
     * NOTE: 4.4.7 Bypassed
3915
     * When a general entity reference appears in the EntityValue in
3916
     * an entity declaration, it is bypassed and left as is.
3917
     * so XML_SUBSTITUTE_REF is not set here.
3918
     */
3919
211k
    ++ctxt->depth;
3920
211k
    ret = xmlStringDecodeEntitiesInt(ctxt, buf, len, XML_SUBSTITUTE_PEREF,
3921
211k
                                     0, 0, 0, /* check */ 1);
3922
211k
    --ctxt->depth;
3923
3924
211k
    if (orig != NULL) {
3925
211k
        *orig = buf;
3926
211k
        buf = NULL;
3927
211k
    }
3928
3929
212k
error:
3930
212k
    if (buf != NULL)
3931
690
        xmlFree(buf);
3932
212k
    return(ret);
3933
211k
}
3934
3935
/**
3936
 * xmlParseAttValueComplex:
3937
 * @ctxt:  an XML parser context
3938
 * @len:   the resulting attribute len
3939
 * @normalize:  whether to apply the inner normalization
3940
 *
3941
 * parse a value for an attribute, this is the fallback function
3942
 * of xmlParseAttValue() when the attribute parsing requires handling
3943
 * of non-ASCII characters, or normalization compaction.
3944
 *
3945
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3946
 */
3947
static xmlChar *
3948
31.5k
xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3949
31.5k
    xmlChar limit = 0;
3950
31.5k
    xmlChar *buf = NULL;
3951
31.5k
    xmlChar *rep = NULL;
3952
31.5k
    size_t len = 0;
3953
31.5k
    size_t buf_size = 0;
3954
31.5k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3955
1.26k
                       XML_MAX_HUGE_LENGTH :
3956
31.5k
                       XML_MAX_TEXT_LENGTH;
3957
31.5k
    int c, l, in_space = 0;
3958
31.5k
    xmlChar *current = NULL;
3959
31.5k
    xmlEntityPtr ent;
3960
3961
31.5k
    if (NXT(0) == '"') {
3962
9.96k
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3963
9.96k
  limit = '"';
3964
9.96k
        NEXT;
3965
21.6k
    } else if (NXT(0) == '\'') {
3966
21.6k
  limit = '\'';
3967
21.6k
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3968
21.6k
        NEXT;
3969
21.6k
    } else {
3970
0
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3971
0
  return(NULL);
3972
0
    }
3973
3974
    /*
3975
     * allocate a translation buffer.
3976
     */
3977
31.5k
    buf_size = XML_PARSER_BUFFER_SIZE;
3978
31.5k
    buf = (xmlChar *) xmlMallocAtomic(buf_size);
3979
31.5k
    if (buf == NULL) goto mem_error;
3980
3981
    /*
3982
     * OK loop until we reach one of the ending char or a size limit.
3983
     */
3984
31.5k
    c = CUR_CHAR(l);
3985
1.90M
    while (((NXT(0) != limit) && /* checked */
3986
1.90M
            (IS_CHAR(c)) && (c != '<')) &&
3987
1.90M
            (ctxt->instate != XML_PARSER_EOF)) {
3988
1.87M
  if (c == '&') {
3989
1.14M
      in_space = 0;
3990
1.14M
      if (NXT(1) == '#') {
3991
8.98k
    int val = xmlParseCharRef(ctxt);
3992
3993
8.98k
    if (val == '&') {
3994
45
        if (ctxt->replaceEntities) {
3995
36
      if (len + 10 > buf_size) {
3996
0
          growBuffer(buf, 10);
3997
0
      }
3998
36
      buf[len++] = '&';
3999
36
        } else {
4000
      /*
4001
       * The reparsing will be done in xmlStringGetNodeList()
4002
       * called by the attribute() function in SAX.c
4003
       */
4004
9
      if (len + 10 > buf_size) {
4005
0
          growBuffer(buf, 10);
4006
0
      }
4007
9
      buf[len++] = '&';
4008
9
      buf[len++] = '#';
4009
9
      buf[len++] = '3';
4010
9
      buf[len++] = '8';
4011
9
      buf[len++] = ';';
4012
9
        }
4013
8.93k
    } else if (val != 0) {
4014
8.36k
        if (len + 10 > buf_size) {
4015
156
      growBuffer(buf, 10);
4016
156
        }
4017
8.36k
        len += xmlCopyChar(0, &buf[len], val);
4018
8.36k
    }
4019
1.13M
      } else {
4020
1.13M
    ent = xmlParseEntityRef(ctxt);
4021
1.13M
    if ((ent != NULL) &&
4022
1.13M
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
4023
4.92k
        if (len + 10 > buf_size) {
4024
12
      growBuffer(buf, 10);
4025
12
        }
4026
4.92k
        if ((ctxt->replaceEntities == 0) &&
4027
4.92k
            (ent->content[0] == '&')) {
4028
587
      buf[len++] = '&';
4029
587
      buf[len++] = '#';
4030
587
      buf[len++] = '3';
4031
587
      buf[len++] = '8';
4032
587
      buf[len++] = ';';
4033
4.33k
        } else {
4034
4.33k
      buf[len++] = ent->content[0];
4035
4.33k
        }
4036
1.13M
    } else if ((ent != NULL) &&
4037
1.13M
               (ctxt->replaceEntities != 0)) {
4038
746k
        if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4039
746k
                        if (xmlParserEntityCheck(ctxt, ent->length))
4040
0
                            goto error;
4041
4042
746k
      ++ctxt->depth;
4043
746k
      rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
4044
746k
                                ent->length, XML_SUBSTITUTE_REF, 0, 0, 0,
4045
746k
                                /* check */ 1);
4046
746k
      --ctxt->depth;
4047
746k
      if (rep != NULL) {
4048
746k
          current = rep;
4049
145M
          while (*current != 0) { /* non input consuming */
4050
144M
                                if ((*current == 0xD) || (*current == 0xA) ||
4051
144M
                                    (*current == 0x9)) {
4052
2.26k
                                    buf[len++] = 0x20;
4053
2.26k
                                    current++;
4054
2.26k
                                } else
4055
144M
                                    buf[len++] = *current++;
4056
144M
        if (len + 10 > buf_size) {
4057
6.07k
            growBuffer(buf, 10);
4058
6.07k
        }
4059
144M
          }
4060
746k
          xmlFree(rep);
4061
746k
          rep = NULL;
4062
746k
      }
4063
746k
        } else {
4064
0
      if (len + 10 > buf_size) {
4065
0
          growBuffer(buf, 10);
4066
0
      }
4067
0
      if (ent->content != NULL)
4068
0
          buf[len++] = ent->content[0];
4069
0
        }
4070
746k
    } else if (ent != NULL) {
4071
328k
        int i = xmlStrlen(ent->name);
4072
328k
        const xmlChar *cur = ent->name;
4073
4074
        /*
4075
                     * We also check for recursion and amplification
4076
                     * when entities are not substituted. They're
4077
                     * often expanded later.
4078
         */
4079
328k
        if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4080
328k
      (ent->content != NULL)) {
4081
316k
                        if ((ent->flags & XML_ENT_CHECKED) == 0) {
4082
271
                            unsigned long oldCopy = ctxt->sizeentcopy;
4083
4084
271
                            ctxt->sizeentcopy = ent->length;
4085
4086
271
                            ++ctxt->depth;
4087
271
                            rep = xmlStringDecodeEntitiesInt(ctxt,
4088
271
                                    ent->content, ent->length,
4089
271
                                    XML_SUBSTITUTE_REF, 0, 0, 0,
4090
271
                                    /* check */ 1);
4091
271
                            --ctxt->depth;
4092
4093
                            /*
4094
                             * If we're parsing DTD content, the entity
4095
                             * might reference other entities which
4096
                             * weren't defined yet, so the check isn't
4097
                             * reliable.
4098
                             */
4099
271
                            if (ctxt->inSubset == 0) {
4100
262
                                ent->flags |= XML_ENT_CHECKED;
4101
262
                                ent->expandedSize = ctxt->sizeentcopy;
4102
262
                            }
4103
4104
271
                            if (rep != NULL) {
4105
268
                                xmlFree(rep);
4106
268
                                rep = NULL;
4107
268
                            } else {
4108
3
                                ent->content[0] = 0;
4109
3
                            }
4110
4111
271
                            if (xmlParserEntityCheck(ctxt, oldCopy))
4112
0
                                goto error;
4113
315k
                        } else {
4114
315k
                            if (xmlParserEntityCheck(ctxt, ent->expandedSize))
4115
0
                                goto error;
4116
315k
                        }
4117
316k
        }
4118
4119
        /*
4120
         * Just output the reference
4121
         */
4122
328k
        buf[len++] = '&';
4123
329k
        while (len + i + 10 > buf_size) {
4124
1.47k
      growBuffer(buf, i + 10);
4125
1.47k
        }
4126
672k
        for (;i > 0;i--)
4127
343k
      buf[len++] = *cur++;
4128
328k
        buf[len++] = ';';
4129
328k
    }
4130
1.13M
      }
4131
1.14M
  } else {
4132
730k
      if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4133
97.5k
          if ((len != 0) || (!normalize)) {
4134
95.7k
        if ((!normalize) || (!in_space)) {
4135
94.4k
      COPY_BUF(l,buf,len,0x20);
4136
94.4k
      while (len + 10 > buf_size) {
4137
126
          growBuffer(buf, 10);
4138
126
      }
4139
94.4k
        }
4140
95.7k
        in_space = 1;
4141
95.7k
    }
4142
632k
      } else {
4143
632k
          in_space = 0;
4144
632k
    COPY_BUF(l,buf,len,c);
4145
632k
    if (len + 10 > buf_size) {
4146
1.55k
        growBuffer(buf, 10);
4147
1.55k
    }
4148
632k
      }
4149
730k
      NEXTL(l);
4150
730k
  }
4151
1.87M
  GROW;
4152
1.87M
  c = CUR_CHAR(l);
4153
1.87M
        if (len > maxLength) {
4154
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4155
0
                           "AttValue length too long\n");
4156
0
            goto mem_error;
4157
0
        }
4158
1.87M
    }
4159
31.5k
    if (ctxt->instate == XML_PARSER_EOF)
4160
52
        goto error;
4161
4162
31.5k
    if ((in_space) && (normalize)) {
4163
1.93k
        while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4164
952
    }
4165
31.5k
    buf[len] = 0;
4166
31.5k
    if (RAW == '<') {
4167
1.61k
  xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4168
29.9k
    } else if (RAW != limit) {
4169
1.66k
  if ((c != 0) && (!IS_CHAR(c))) {
4170
781
      xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4171
781
         "invalid character in attribute value\n");
4172
882
  } else {
4173
882
      xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4174
882
         "AttValue: ' expected\n");
4175
882
        }
4176
1.66k
    } else
4177
28.2k
  NEXT;
4178
4179
31.5k
    if (attlen != NULL) *attlen = len;
4180
31.5k
    return(buf);
4181
4182
0
mem_error:
4183
0
    xmlErrMemory(ctxt, NULL);
4184
52
error:
4185
52
    if (buf != NULL)
4186
52
        xmlFree(buf);
4187
52
    if (rep != NULL)
4188
0
        xmlFree(rep);
4189
52
    return(NULL);
4190
0
}
4191
4192
/**
4193
 * xmlParseAttValue:
4194
 * @ctxt:  an XML parser context
4195
 *
4196
 * DEPRECATED: Internal function, don't use.
4197
 *
4198
 * parse a value for an attribute
4199
 * Note: the parser won't do substitution of entities here, this
4200
 * will be handled later in xmlStringGetNodeList
4201
 *
4202
 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4203
 *                   "'" ([^<&'] | Reference)* "'"
4204
 *
4205
 * 3.3.3 Attribute-Value Normalization:
4206
 * Before the value of an attribute is passed to the application or
4207
 * checked for validity, the XML processor must normalize it as follows:
4208
 * - a character reference is processed by appending the referenced
4209
 *   character to the attribute value
4210
 * - an entity reference is processed by recursively processing the
4211
 *   replacement text of the entity
4212
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4213
 *   appending #x20 to the normalized value, except that only a single
4214
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4215
 *   parsed entity or the literal entity value of an internal parsed entity
4216
 * - other characters are processed by appending them to the normalized value
4217
 * If the declared value is not CDATA, then the XML processor must further
4218
 * process the normalized attribute value by discarding any leading and
4219
 * trailing space (#x20) characters, and by replacing sequences of space
4220
 * (#x20) characters by a single space (#x20) character.
4221
 * All attributes for which no declaration has been read should be treated
4222
 * by a non-validating parser as if declared CDATA.
4223
 *
4224
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4225
 */
4226
4227
4228
xmlChar *
4229
135k
xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4230
135k
    if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4231
135k
    return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4232
135k
}
4233
4234
/**
4235
 * xmlParseSystemLiteral:
4236
 * @ctxt:  an XML parser context
4237
 *
4238
 * DEPRECATED: Internal function, don't use.
4239
 *
4240
 * parse an XML Literal
4241
 *
4242
 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4243
 *
4244
 * Returns the SystemLiteral parsed or NULL
4245
 */
4246
4247
xmlChar *
4248
10.0k
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4249
10.0k
    xmlChar *buf = NULL;
4250
10.0k
    int len = 0;
4251
10.0k
    int size = XML_PARSER_BUFFER_SIZE;
4252
10.0k
    int cur, l;
4253
10.0k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4254
381
                    XML_MAX_TEXT_LENGTH :
4255
10.0k
                    XML_MAX_NAME_LENGTH;
4256
10.0k
    xmlChar stop;
4257
10.0k
    int state = ctxt->instate;
4258
10.0k
    int count = 0;
4259
4260
10.0k
    SHRINK;
4261
10.0k
    if (RAW == '"') {
4262
9.68k
        NEXT;
4263
9.68k
  stop = '"';
4264
9.68k
    } else if (RAW == '\'') {
4265
246
        NEXT;
4266
246
  stop = '\'';
4267
246
    } else {
4268
143
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4269
143
  return(NULL);
4270
143
    }
4271
4272
9.93k
    buf = (xmlChar *) xmlMallocAtomic(size);
4273
9.93k
    if (buf == NULL) {
4274
0
        xmlErrMemory(ctxt, NULL);
4275
0
  return(NULL);
4276
0
    }
4277
9.93k
    ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4278
9.93k
    cur = CUR_CHAR(l);
4279
258k
    while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4280
249k
  if (len + 5 >= size) {
4281
330
      xmlChar *tmp;
4282
4283
330
      size *= 2;
4284
330
      tmp = (xmlChar *) xmlRealloc(buf, size);
4285
330
      if (tmp == NULL) {
4286
0
          xmlFree(buf);
4287
0
    xmlErrMemory(ctxt, NULL);
4288
0
    ctxt->instate = (xmlParserInputState) state;
4289
0
    return(NULL);
4290
0
      }
4291
330
      buf = tmp;
4292
330
  }
4293
249k
  count++;
4294
249k
  if (count > 50) {
4295
2.01k
      SHRINK;
4296
2.01k
      GROW;
4297
2.01k
      count = 0;
4298
2.01k
            if (ctxt->instate == XML_PARSER_EOF) {
4299
0
          xmlFree(buf);
4300
0
    return(NULL);
4301
0
            }
4302
2.01k
  }
4303
249k
  COPY_BUF(l,buf,len,cur);
4304
249k
  NEXTL(l);
4305
249k
  cur = CUR_CHAR(l);
4306
249k
  if (cur == 0) {
4307
152
      GROW;
4308
152
      SHRINK;
4309
152
      cur = CUR_CHAR(l);
4310
152
  }
4311
249k
        if (len > maxLength) {
4312
0
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4313
0
            xmlFree(buf);
4314
0
            ctxt->instate = (xmlParserInputState) state;
4315
0
            return(NULL);
4316
0
        }
4317
249k
    }
4318
9.93k
    buf[len] = 0;
4319
9.93k
    ctxt->instate = (xmlParserInputState) state;
4320
9.93k
    if (!IS_CHAR(cur)) {
4321
206
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4322
9.72k
    } else {
4323
9.72k
  NEXT;
4324
9.72k
    }
4325
9.93k
    return(buf);
4326
9.93k
}
4327
4328
/**
4329
 * xmlParsePubidLiteral:
4330
 * @ctxt:  an XML parser context
4331
 *
4332
 * DEPRECATED: Internal function, don't use.
4333
 *
4334
 * parse an XML public literal
4335
 *
4336
 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4337
 *
4338
 * Returns the PubidLiteral parsed or NULL.
4339
 */
4340
4341
xmlChar *
4342
2.24k
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4343
2.24k
    xmlChar *buf = NULL;
4344
2.24k
    int len = 0;
4345
2.24k
    int size = XML_PARSER_BUFFER_SIZE;
4346
2.24k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4347
56
                    XML_MAX_TEXT_LENGTH :
4348
2.24k
                    XML_MAX_NAME_LENGTH;
4349
2.24k
    xmlChar cur;
4350
2.24k
    xmlChar stop;
4351
2.24k
    int count = 0;
4352
2.24k
    xmlParserInputState oldstate = ctxt->instate;
4353
4354
2.24k
    SHRINK;
4355
2.24k
    if (RAW == '"') {
4356
2.07k
        NEXT;
4357
2.07k
  stop = '"';
4358
2.07k
    } else if (RAW == '\'') {
4359
136
        NEXT;
4360
136
  stop = '\'';
4361
136
    } else {
4362
34
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4363
34
  return(NULL);
4364
34
    }
4365
2.20k
    buf = (xmlChar *) xmlMallocAtomic(size);
4366
2.20k
    if (buf == NULL) {
4367
0
  xmlErrMemory(ctxt, NULL);
4368
0
  return(NULL);
4369
0
    }
4370
2.20k
    ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4371
2.20k
    cur = CUR;
4372
74.5k
    while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4373
72.3k
  if (len + 1 >= size) {
4374
18
      xmlChar *tmp;
4375
4376
18
      size *= 2;
4377
18
      tmp = (xmlChar *) xmlRealloc(buf, size);
4378
18
      if (tmp == NULL) {
4379
0
    xmlErrMemory(ctxt, NULL);
4380
0
    xmlFree(buf);
4381
0
    return(NULL);
4382
0
      }
4383
18
      buf = tmp;
4384
18
  }
4385
72.3k
  buf[len++] = cur;
4386
72.3k
  count++;
4387
72.3k
  if (count > 50) {
4388
62
      SHRINK;
4389
62
      GROW;
4390
62
      count = 0;
4391
62
            if (ctxt->instate == XML_PARSER_EOF) {
4392
0
    xmlFree(buf);
4393
0
    return(NULL);
4394
0
            }
4395
62
  }
4396
72.3k
  NEXT;
4397
72.3k
  cur = CUR;
4398
72.3k
  if (cur == 0) {
4399
18
      GROW;
4400
18
      SHRINK;
4401
18
      cur = CUR;
4402
18
  }
4403
72.3k
        if (len > maxLength) {
4404
0
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4405
0
            xmlFree(buf);
4406
0
            return(NULL);
4407
0
        }
4408
72.3k
    }
4409
2.20k
    buf[len] = 0;
4410
2.20k
    if (cur != stop) {
4411
47
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4412
2.16k
    } else {
4413
2.16k
  NEXT;
4414
2.16k
    }
4415
2.20k
    ctxt->instate = oldstate;
4416
2.20k
    return(buf);
4417
2.20k
}
4418
4419
static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt);
4420
4421
/*
4422
 * used for the test in the inner loop of the char data testing
4423
 */
4424
static const unsigned char test_char_data[256] = {
4425
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4426
    0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4427
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4428
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4429
    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4430
    0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4431
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4432
    0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4433
    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4434
    0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4435
    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4436
    0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4437
    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4438
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4439
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4440
    0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4441
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4442
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4443
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4444
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4445
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4446
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4447
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4448
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4449
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4450
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4451
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4452
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4453
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4454
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4455
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4456
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4457
};
4458
4459
/**
4460
 * xmlParseCharData:
4461
 * @ctxt:  an XML parser context
4462
 * @cdata:  unused
4463
 *
4464
 * DEPRECATED: Internal function, don't use.
4465
 *
4466
 * Parse character data. Always makes progress if the first char isn't
4467
 * '<' or '&'.
4468
 *
4469
 * if we are within a CDATA section ']]>' marks an end of section.
4470
 *
4471
 * The right angle bracket (>) may be represented using the string "&gt;",
4472
 * and must, for compatibility, be escaped using "&gt;" or a character
4473
 * reference when it appears in the string "]]>" in content, when that
4474
 * string is not marking the end of a CDATA section.
4475
 *
4476
 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4477
 */
4478
4479
void
4480
2.61M
xmlParseCharData(xmlParserCtxtPtr ctxt, ATTRIBUTE_UNUSED int cdata) {
4481
2.61M
    const xmlChar *in;
4482
2.61M
    int nbchar = 0;
4483
2.61M
    int line = ctxt->input->line;
4484
2.61M
    int col = ctxt->input->col;
4485
2.61M
    int ccol;
4486
4487
2.61M
    SHRINK;
4488
2.61M
    GROW;
4489
    /*
4490
     * Accelerated common case where input don't need to be
4491
     * modified before passing it to the handler.
4492
     */
4493
2.61M
    in = ctxt->input->cur;
4494
2.67M
    do {
4495
3.74M
get_more_space:
4496
4.22M
        while (*in == 0x20) { in++; ctxt->input->col++; }
4497
3.74M
        if (*in == 0xA) {
4498
1.11M
            do {
4499
1.11M
                ctxt->input->line++; ctxt->input->col = 1;
4500
1.11M
                in++;
4501
1.11M
            } while (*in == 0xA);
4502
1.06M
            goto get_more_space;
4503
1.06M
        }
4504
2.67M
        if (*in == '<') {
4505
789k
            nbchar = in - ctxt->input->cur;
4506
789k
            if (nbchar > 0) {
4507
789k
                const xmlChar *tmp = ctxt->input->cur;
4508
789k
                ctxt->input->cur = in;
4509
4510
789k
                if ((ctxt->sax != NULL) &&
4511
789k
                    (ctxt->sax->ignorableWhitespace !=
4512
789k
                     ctxt->sax->characters)) {
4513
88.3k
                    if (areBlanks(ctxt, tmp, nbchar, 1)) {
4514
74.2k
                        if (ctxt->sax->ignorableWhitespace != NULL)
4515
74.2k
                            ctxt->sax->ignorableWhitespace(ctxt->userData,
4516
74.2k
                                                   tmp, nbchar);
4517
74.2k
                    } else {
4518
14.0k
                        if (ctxt->sax->characters != NULL)
4519
14.0k
                            ctxt->sax->characters(ctxt->userData,
4520
14.0k
                                                  tmp, nbchar);
4521
14.0k
                        if (*ctxt->space == -1)
4522
4.45k
                            *ctxt->space = -2;
4523
14.0k
                    }
4524
701k
                } else if ((ctxt->sax != NULL) &&
4525
701k
                           (ctxt->sax->characters != NULL)) {
4526
701k
                    ctxt->sax->characters(ctxt->userData,
4527
701k
                                          tmp, nbchar);
4528
701k
                }
4529
789k
            }
4530
789k
            return;
4531
789k
        }
4532
4533
3.04M
get_more:
4534
3.04M
        ccol = ctxt->input->col;
4535
62.4M
        while (test_char_data[*in]) {
4536
59.3M
            in++;
4537
59.3M
            ccol++;
4538
59.3M
        }
4539
3.04M
        ctxt->input->col = ccol;
4540
3.04M
        if (*in == 0xA) {
4541
1.07M
            do {
4542
1.07M
                ctxt->input->line++; ctxt->input->col = 1;
4543
1.07M
                in++;
4544
1.07M
            } while (*in == 0xA);
4545
1.06M
            goto get_more;
4546
1.06M
        }
4547
1.98M
        if (*in == ']') {
4548
94.8k
            if ((in[1] == ']') && (in[2] == '>')) {
4549
385
                xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4550
385
                ctxt->input->cur = in + 1;
4551
385
                return;
4552
385
            }
4553
94.4k
            in++;
4554
94.4k
            ctxt->input->col++;
4555
94.4k
            goto get_more;
4556
94.8k
        }
4557
1.88M
        nbchar = in - ctxt->input->cur;
4558
1.88M
        if (nbchar > 0) {
4559
1.81M
            if ((ctxt->sax != NULL) &&
4560
1.81M
                (ctxt->sax->ignorableWhitespace !=
4561
1.81M
                 ctxt->sax->characters) &&
4562
1.81M
                (IS_BLANK_CH(*ctxt->input->cur))) {
4563
98.1k
                const xmlChar *tmp = ctxt->input->cur;
4564
98.1k
                ctxt->input->cur = in;
4565
4566
98.1k
                if (areBlanks(ctxt, tmp, nbchar, 0)) {
4567
31.4k
                    if (ctxt->sax->ignorableWhitespace != NULL)
4568
31.4k
                        ctxt->sax->ignorableWhitespace(ctxt->userData,
4569
31.4k
                                                       tmp, nbchar);
4570
66.6k
                } else {
4571
66.6k
                    if (ctxt->sax->characters != NULL)
4572
66.6k
                        ctxt->sax->characters(ctxt->userData,
4573
66.6k
                                              tmp, nbchar);
4574
66.6k
                    if (*ctxt->space == -1)
4575
23.6k
                        *ctxt->space = -2;
4576
66.6k
                }
4577
98.1k
                line = ctxt->input->line;
4578
98.1k
                col = ctxt->input->col;
4579
1.71M
            } else if (ctxt->sax != NULL) {
4580
1.71M
                if (ctxt->sax->characters != NULL)
4581
1.71M
                    ctxt->sax->characters(ctxt->userData,
4582
1.71M
                                          ctxt->input->cur, nbchar);
4583
1.71M
                line = ctxt->input->line;
4584
1.71M
                col = ctxt->input->col;
4585
1.71M
            }
4586
1.81M
        }
4587
1.88M
        ctxt->input->cur = in;
4588
1.88M
        if (*in == 0xD) {
4589
64.1k
            in++;
4590
64.1k
            if (*in == 0xA) {
4591
63.9k
                ctxt->input->cur = in;
4592
63.9k
                in++;
4593
63.9k
                ctxt->input->line++; ctxt->input->col = 1;
4594
63.9k
                continue; /* while */
4595
63.9k
            }
4596
208
            in--;
4597
208
        }
4598
1.82M
        if (*in == '<') {
4599
1.57M
            return;
4600
1.57M
        }
4601
250k
        if (*in == '&') {
4602
202k
            return;
4603
202k
        }
4604
47.9k
        SHRINK;
4605
47.9k
        GROW;
4606
47.9k
        if (ctxt->instate == XML_PARSER_EOF)
4607
0
            return;
4608
47.9k
        in = ctxt->input->cur;
4609
111k
    } while (((*in >= 0x20) && (*in <= 0x7F)) ||
4610
111k
             (*in == 0x09) || (*in == 0x0a));
4611
48.3k
    ctxt->input->line = line;
4612
48.3k
    ctxt->input->col = col;
4613
48.3k
    xmlParseCharDataComplex(ctxt);
4614
48.3k
}
4615
4616
/**
4617
 * xmlParseCharDataComplex:
4618
 * @ctxt:  an XML parser context
4619
 * @cdata:  int indicating whether we are within a CDATA section
4620
 *
4621
 * Always makes progress if the first char isn't '<' or '&'.
4622
 *
4623
 * parse a CharData section.this is the fallback function
4624
 * of xmlParseCharData() when the parsing requires handling
4625
 * of non-ASCII characters.
4626
 */
4627
static void
4628
48.3k
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt) {
4629
48.3k
    xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4630
48.3k
    int nbchar = 0;
4631
48.3k
    int cur, l;
4632
48.3k
    int count = 0;
4633
4634
48.3k
    SHRINK;
4635
48.3k
    GROW;
4636
48.3k
    cur = CUR_CHAR(l);
4637
728k
    while ((cur != '<') && /* checked */
4638
728k
           (cur != '&') &&
4639
728k
     (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4640
679k
  if ((cur == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4641
180
      xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4642
180
  }
4643
679k
  COPY_BUF(l,buf,nbchar,cur);
4644
  /* move current position before possible calling of ctxt->sax->characters */
4645
679k
  NEXTL(l);
4646
679k
  cur = CUR_CHAR(l);
4647
679k
  if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4648
1.26k
      buf[nbchar] = 0;
4649
4650
      /*
4651
       * OK the segment is to be consumed as chars.
4652
       */
4653
1.26k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4654
399
    if (areBlanks(ctxt, buf, nbchar, 0)) {
4655
0
        if (ctxt->sax->ignorableWhitespace != NULL)
4656
0
      ctxt->sax->ignorableWhitespace(ctxt->userData,
4657
0
                                     buf, nbchar);
4658
399
    } else {
4659
399
        if (ctxt->sax->characters != NULL)
4660
399
      ctxt->sax->characters(ctxt->userData, buf, nbchar);
4661
399
        if ((ctxt->sax->characters !=
4662
399
             ctxt->sax->ignorableWhitespace) &&
4663
399
      (*ctxt->space == -1))
4664
22
      *ctxt->space = -2;
4665
399
    }
4666
399
      }
4667
1.26k
      nbchar = 0;
4668
            /* something really bad happened in the SAX callback */
4669
1.26k
            if (ctxt->instate != XML_PARSER_CONTENT)
4670
0
                return;
4671
1.26k
  }
4672
679k
  count++;
4673
679k
  if (count > 50) {
4674
10.2k
      SHRINK;
4675
10.2k
      GROW;
4676
10.2k
      count = 0;
4677
10.2k
            if (ctxt->instate == XML_PARSER_EOF)
4678
0
    return;
4679
10.2k
  }
4680
679k
    }
4681
48.3k
    if (nbchar != 0) {
4682
11.7k
        buf[nbchar] = 0;
4683
  /*
4684
   * OK the segment is to be consumed as chars.
4685
   */
4686
11.7k
  if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4687
8.14k
      if (areBlanks(ctxt, buf, nbchar, 0)) {
4688
17
    if (ctxt->sax->ignorableWhitespace != NULL)
4689
17
        ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4690
8.13k
      } else {
4691
8.13k
    if (ctxt->sax->characters != NULL)
4692
8.13k
        ctxt->sax->characters(ctxt->userData, buf, nbchar);
4693
8.13k
    if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4694
8.13k
        (*ctxt->space == -1))
4695
1.17k
        *ctxt->space = -2;
4696
8.13k
      }
4697
8.14k
  }
4698
11.7k
    }
4699
48.3k
    if ((ctxt->input->cur < ctxt->input->end) && (!IS_CHAR(cur))) {
4700
  /* Generate the error and skip the offending character */
4701
19.5k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4702
19.5k
                          "PCDATA invalid Char value %d\n",
4703
19.5k
                    cur ? cur : CUR);
4704
19.5k
  NEXT;
4705
19.5k
    }
4706
48.3k
}
4707
4708
/**
4709
 * xmlParseExternalID:
4710
 * @ctxt:  an XML parser context
4711
 * @publicID:  a xmlChar** receiving PubidLiteral
4712
 * @strict: indicate whether we should restrict parsing to only
4713
 *          production [75], see NOTE below
4714
 *
4715
 * DEPRECATED: Internal function, don't use.
4716
 *
4717
 * Parse an External ID or a Public ID
4718
 *
4719
 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4720
 *       'PUBLIC' S PubidLiteral S SystemLiteral
4721
 *
4722
 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4723
 *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
4724
 *
4725
 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4726
 *
4727
 * Returns the function returns SystemLiteral and in the second
4728
 *                case publicID receives PubidLiteral, is strict is off
4729
 *                it is possible to return NULL and have publicID set.
4730
 */
4731
4732
xmlChar *
4733
18.2k
xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4734
18.2k
    xmlChar *URI = NULL;
4735
4736
18.2k
    SHRINK;
4737
4738
18.2k
    *publicID = NULL;
4739
18.2k
    if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4740
7.88k
        SKIP(6);
4741
7.88k
  if (SKIP_BLANKS == 0) {
4742
34
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4743
34
                     "Space required after 'SYSTEM'\n");
4744
34
  }
4745
7.88k
  URI = xmlParseSystemLiteral(ctxt);
4746
7.88k
  if (URI == NULL) {
4747
44
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4748
44
        }
4749
10.3k
    } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4750
2.24k
        SKIP(6);
4751
2.24k
  if (SKIP_BLANKS == 0) {
4752
22
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4753
22
        "Space required after 'PUBLIC'\n");
4754
22
  }
4755
2.24k
  *publicID = xmlParsePubidLiteral(ctxt);
4756
2.24k
  if (*publicID == NULL) {
4757
34
      xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4758
34
  }
4759
2.24k
  if (strict) {
4760
      /*
4761
       * We don't handle [83] so "S SystemLiteral" is required.
4762
       */
4763
2.19k
      if (SKIP_BLANKS == 0) {
4764
96
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4765
96
      "Space required after the Public Identifier\n");
4766
96
      }
4767
2.19k
  } else {
4768
      /*
4769
       * We handle [83] so we return immediately, if
4770
       * "S SystemLiteral" is not detected. We skip blanks if no
4771
             * system literal was found, but this is harmless since we must
4772
             * be at the end of a NotationDecl.
4773
       */
4774
51
      if (SKIP_BLANKS == 0) return(NULL);
4775
0
      if ((CUR != '\'') && (CUR != '"')) return(NULL);
4776
0
  }
4777
2.19k
  URI = xmlParseSystemLiteral(ctxt);
4778
2.19k
  if (URI == NULL) {
4779
99
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4780
99
        }
4781
2.19k
    }
4782
18.2k
    return(URI);
4783
18.2k
}
4784
4785
/**
4786
 * xmlParseCommentComplex:
4787
 * @ctxt:  an XML parser context
4788
 * @buf:  the already parsed part of the buffer
4789
 * @len:  number of bytes in the buffer
4790
 * @size:  allocated size of the buffer
4791
 *
4792
 * Skip an XML (SGML) comment <!-- .... -->
4793
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4794
 *  must not occur within comments. "
4795
 * This is the slow routine in case the accelerator for ascii didn't work
4796
 *
4797
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4798
 */
4799
static void
4800
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4801
259k
                       size_t len, size_t size) {
4802
259k
    int q, ql;
4803
259k
    int r, rl;
4804
259k
    int cur, l;
4805
259k
    size_t count = 0;
4806
259k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4807
249
                       XML_MAX_HUGE_LENGTH :
4808
259k
                       XML_MAX_TEXT_LENGTH;
4809
259k
    int inputid;
4810
4811
259k
    inputid = ctxt->input->id;
4812
4813
259k
    if (buf == NULL) {
4814
61
        len = 0;
4815
61
  size = XML_PARSER_BUFFER_SIZE;
4816
61
  buf = (xmlChar *) xmlMallocAtomic(size);
4817
61
  if (buf == NULL) {
4818
0
      xmlErrMemory(ctxt, NULL);
4819
0
      return;
4820
0
  }
4821
61
    }
4822
259k
    GROW; /* Assure there's enough input data */
4823
259k
    q = CUR_CHAR(ql);
4824
259k
    if (q == 0)
4825
255k
        goto not_terminated;
4826
3.66k
    if (!IS_CHAR(q)) {
4827
278
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4828
278
                          "xmlParseComment: invalid xmlChar value %d\n",
4829
278
                    q);
4830
278
  xmlFree (buf);
4831
278
  return;
4832
278
    }
4833
3.39k
    NEXTL(ql);
4834
3.39k
    r = CUR_CHAR(rl);
4835
3.39k
    if (r == 0)
4836
72
        goto not_terminated;
4837
3.31k
    if (!IS_CHAR(r)) {
4838
30
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4839
30
                          "xmlParseComment: invalid xmlChar value %d\n",
4840
30
                    r);
4841
30
  xmlFree (buf);
4842
30
  return;
4843
30
    }
4844
3.28k
    NEXTL(rl);
4845
3.28k
    cur = CUR_CHAR(l);
4846
3.28k
    if (cur == 0)
4847
33
        goto not_terminated;
4848
751k
    while (IS_CHAR(cur) && /* checked */
4849
751k
           ((cur != '>') ||
4850
750k
      (r != '-') || (q != '-'))) {
4851
747k
  if ((r == '-') && (q == '-')) {
4852
421
      xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4853
421
  }
4854
747k
  if (len + 5 >= size) {
4855
1.60k
      xmlChar *new_buf;
4856
1.60k
            size_t new_size;
4857
4858
1.60k
      new_size = size * 2;
4859
1.60k
      new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4860
1.60k
      if (new_buf == NULL) {
4861
0
    xmlFree (buf);
4862
0
    xmlErrMemory(ctxt, NULL);
4863
0
    return;
4864
0
      }
4865
1.60k
      buf = new_buf;
4866
1.60k
            size = new_size;
4867
1.60k
  }
4868
747k
  COPY_BUF(ql,buf,len,q);
4869
747k
  q = r;
4870
747k
  ql = rl;
4871
747k
  r = cur;
4872
747k
  rl = l;
4873
4874
747k
  count++;
4875
747k
  if (count > 50) {
4876
13.7k
      SHRINK;
4877
13.7k
      GROW;
4878
13.7k
      count = 0;
4879
13.7k
            if (ctxt->instate == XML_PARSER_EOF) {
4880
0
    xmlFree(buf);
4881
0
    return;
4882
0
            }
4883
13.7k
  }
4884
747k
  NEXTL(l);
4885
747k
  cur = CUR_CHAR(l);
4886
747k
  if (cur == 0) {
4887
240
      SHRINK;
4888
240
      GROW;
4889
240
      cur = CUR_CHAR(l);
4890
240
  }
4891
4892
747k
        if (len > maxLength) {
4893
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4894
0
                         "Comment too big found", NULL);
4895
0
            xmlFree (buf);
4896
0
            return;
4897
0
        }
4898
747k
    }
4899
3.25k
    buf[len] = 0;
4900
3.25k
    if (cur == 0) {
4901
240
  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4902
240
                       "Comment not terminated \n<!--%.50s\n", buf);
4903
3.01k
    } else if (!IS_CHAR(cur)) {
4904
140
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4905
140
                          "xmlParseComment: invalid xmlChar value %d\n",
4906
140
                    cur);
4907
2.87k
    } else {
4908
2.87k
  if (inputid != ctxt->input->id) {
4909
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4910
0
               "Comment doesn't start and stop in the same"
4911
0
                           " entity\n");
4912
0
  }
4913
2.87k
        NEXT;
4914
2.87k
  if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4915
2.87k
      (!ctxt->disableSAX))
4916
1.80k
      ctxt->sax->comment(ctxt->userData, buf);
4917
2.87k
    }
4918
3.25k
    xmlFree(buf);
4919
3.25k
    return;
4920
255k
not_terminated:
4921
255k
    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4922
255k
       "Comment not terminated\n", NULL);
4923
255k
    xmlFree(buf);
4924
255k
    return;
4925
3.25k
}
4926
4927
/**
4928
 * xmlParseComment:
4929
 * @ctxt:  an XML parser context
4930
 *
4931
 * DEPRECATED: Internal function, don't use.
4932
 *
4933
 * Parse an XML (SGML) comment. Always consumes '<!'.
4934
 *
4935
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4936
 *  must not occur within comments. "
4937
 *
4938
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4939
 */
4940
void
4941
10.3M
xmlParseComment(xmlParserCtxtPtr ctxt) {
4942
10.3M
    xmlChar *buf = NULL;
4943
10.3M
    size_t size = XML_PARSER_BUFFER_SIZE;
4944
10.3M
    size_t len = 0;
4945
10.3M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4946
4.34k
                       XML_MAX_HUGE_LENGTH :
4947
10.3M
                       XML_MAX_TEXT_LENGTH;
4948
10.3M
    xmlParserInputState state;
4949
10.3M
    const xmlChar *in;
4950
10.3M
    size_t nbchar = 0;
4951
10.3M
    int ccol;
4952
10.3M
    int inputid;
4953
4954
    /*
4955
     * Check that there is a comment right here.
4956
     */
4957
10.3M
    if ((RAW != '<') || (NXT(1) != '!'))
4958
0
        return;
4959
10.3M
    SKIP(2);
4960
10.3M
    if ((RAW != '-') || (NXT(1) != '-'))
4961
30
        return;
4962
10.3M
    state = ctxt->instate;
4963
10.3M
    ctxt->instate = XML_PARSER_COMMENT;
4964
10.3M
    inputid = ctxt->input->id;
4965
10.3M
    SKIP(2);
4966
10.3M
    SHRINK;
4967
10.3M
    GROW;
4968
4969
    /*
4970
     * Accelerated common case where input don't need to be
4971
     * modified before passing it to the handler.
4972
     */
4973
10.3M
    in = ctxt->input->cur;
4974
10.3M
    do {
4975
10.3M
  if (*in == 0xA) {
4976
54.7k
      do {
4977
54.7k
    ctxt->input->line++; ctxt->input->col = 1;
4978
54.7k
    in++;
4979
54.7k
      } while (*in == 0xA);
4980
54.3k
  }
4981
11.7M
get_more:
4982
11.7M
        ccol = ctxt->input->col;
4983
56.7M
  while (((*in > '-') && (*in <= 0x7F)) ||
4984
56.7M
         ((*in >= 0x20) && (*in < '-')) ||
4985
56.7M
         (*in == 0x09)) {
4986
45.0M
        in++;
4987
45.0M
        ccol++;
4988
45.0M
  }
4989
11.7M
  ctxt->input->col = ccol;
4990
11.7M
  if (*in == 0xA) {
4991
543k
      do {
4992
543k
    ctxt->input->line++; ctxt->input->col = 1;
4993
543k
    in++;
4994
543k
      } while (*in == 0xA);
4995
520k
      goto get_more;
4996
520k
  }
4997
11.2M
  nbchar = in - ctxt->input->cur;
4998
  /*
4999
   * save current set of data
5000
   */
5001
11.2M
  if (nbchar > 0) {
5002
1.29M
      if ((ctxt->sax != NULL) &&
5003
1.29M
    (ctxt->sax->comment != NULL)) {
5004
1.29M
    if (buf == NULL) {
5005
535k
        if ((*in == '-') && (in[1] == '-'))
5006
166k
            size = nbchar + 1;
5007
369k
        else
5008
369k
            size = XML_PARSER_BUFFER_SIZE + nbchar;
5009
535k
        buf = (xmlChar *) xmlMallocAtomic(size);
5010
535k
        if (buf == NULL) {
5011
0
            xmlErrMemory(ctxt, NULL);
5012
0
      ctxt->instate = state;
5013
0
      return;
5014
0
        }
5015
535k
        len = 0;
5016
758k
    } else if (len + nbchar + 1 >= size) {
5017
72.2k
        xmlChar *new_buf;
5018
72.2k
        size  += len + nbchar + XML_PARSER_BUFFER_SIZE;
5019
72.2k
        new_buf = (xmlChar *) xmlRealloc(buf, size);
5020
72.2k
        if (new_buf == NULL) {
5021
0
            xmlFree (buf);
5022
0
      xmlErrMemory(ctxt, NULL);
5023
0
      ctxt->instate = state;
5024
0
      return;
5025
0
        }
5026
72.2k
        buf = new_buf;
5027
72.2k
    }
5028
1.29M
    memcpy(&buf[len], ctxt->input->cur, nbchar);
5029
1.29M
    len += nbchar;
5030
1.29M
    buf[len] = 0;
5031
1.29M
      }
5032
1.29M
  }
5033
11.2M
        if (len > maxLength) {
5034
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5035
0
                         "Comment too big found", NULL);
5036
0
            xmlFree (buf);
5037
0
            return;
5038
0
        }
5039
11.2M
  ctxt->input->cur = in;
5040
11.2M
  if (*in == 0xA) {
5041
0
      in++;
5042
0
      ctxt->input->line++; ctxt->input->col = 1;
5043
0
  }
5044
11.2M
  if (*in == 0xD) {
5045
168k
      in++;
5046
168k
      if (*in == 0xA) {
5047
168k
    ctxt->input->cur = in;
5048
168k
    in++;
5049
168k
    ctxt->input->line++; ctxt->input->col = 1;
5050
168k
    goto get_more;
5051
168k
      }
5052
60
      in--;
5053
60
  }
5054
11.0M
  SHRINK;
5055
11.0M
  GROW;
5056
11.0M
        if (ctxt->instate == XML_PARSER_EOF) {
5057
0
            xmlFree(buf);
5058
0
            return;
5059
0
        }
5060
11.0M
  in = ctxt->input->cur;
5061
11.0M
  if (*in == '-') {
5062
10.8M
      if (in[1] == '-') {
5063
10.2M
          if (in[2] == '>') {
5064
10.1M
        if (ctxt->input->id != inputid) {
5065
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5066
0
                     "comment doesn't start and stop in the"
5067
0
                                       " same entity\n");
5068
0
        }
5069
10.1M
        SKIP(3);
5070
10.1M
        if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5071
10.1M
            (!ctxt->disableSAX)) {
5072
5.31M
      if (buf != NULL)
5073
247k
          ctxt->sax->comment(ctxt->userData, buf);
5074
5.06M
      else
5075
5.06M
          ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5076
5.31M
        }
5077
10.1M
        if (buf != NULL)
5078
276k
            xmlFree(buf);
5079
10.1M
        if (ctxt->instate != XML_PARSER_EOF)
5080
10.1M
      ctxt->instate = state;
5081
10.1M
        return;
5082
10.1M
    }
5083
94.7k
    if (buf != NULL) {
5084
794
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5085
794
                          "Double hyphen within comment: "
5086
794
                                      "<!--%.50s\n",
5087
794
              buf);
5088
794
    } else
5089
93.9k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5090
93.9k
                          "Double hyphen within comment\n", NULL);
5091
94.7k
                if (ctxt->instate == XML_PARSER_EOF) {
5092
0
                    xmlFree(buf);
5093
0
                    return;
5094
0
                }
5095
94.7k
    in++;
5096
94.7k
    ctxt->input->col++;
5097
94.7k
      }
5098
692k
      in++;
5099
692k
      ctxt->input->col++;
5100
692k
      goto get_more;
5101
10.8M
  }
5102
11.0M
    } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5103
259k
    xmlParseCommentComplex(ctxt, buf, len, size);
5104
259k
    ctxt->instate = state;
5105
259k
    return;
5106
10.3M
}
5107
5108
5109
/**
5110
 * xmlParsePITarget:
5111
 * @ctxt:  an XML parser context
5112
 *
5113
 * DEPRECATED: Internal function, don't use.
5114
 *
5115
 * parse the name of a PI
5116
 *
5117
 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5118
 *
5119
 * Returns the PITarget name or NULL
5120
 */
5121
5122
const xmlChar *
5123
7.75k
xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5124
7.75k
    const xmlChar *name;
5125
5126
7.75k
    name = xmlParseName(ctxt);
5127
7.75k
    if ((name != NULL) &&
5128
7.75k
        ((name[0] == 'x') || (name[0] == 'X')) &&
5129
7.75k
        ((name[1] == 'm') || (name[1] == 'M')) &&
5130
7.75k
        ((name[2] == 'l') || (name[2] == 'L'))) {
5131
1.38k
  int i;
5132
1.38k
  if ((name[0] == 'x') && (name[1] == 'm') &&
5133
1.38k
      (name[2] == 'l') && (name[3] == 0)) {
5134
589
      xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5135
589
     "XML declaration allowed only at the start of the document\n");
5136
589
      return(name);
5137
796
  } else if (name[3] == 0) {
5138
30
      xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5139
30
      return(name);
5140
30
  }
5141
1.03k
  for (i = 0;;i++) {
5142
1.03k
      if (xmlW3CPIs[i] == NULL) break;
5143
902
      if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5144
630
          return(name);
5145
902
  }
5146
136
  xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5147
136
          "xmlParsePITarget: invalid name prefix 'xml'\n",
5148
136
          NULL, NULL);
5149
136
    }
5150
6.50k
    if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5151
38
  xmlNsErr(ctxt, XML_NS_ERR_COLON,
5152
38
     "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5153
38
    }
5154
6.50k
    return(name);
5155
7.75k
}
5156
5157
#ifdef LIBXML_CATALOG_ENABLED
5158
/**
5159
 * xmlParseCatalogPI:
5160
 * @ctxt:  an XML parser context
5161
 * @catalog:  the PI value string
5162
 *
5163
 * parse an XML Catalog Processing Instruction.
5164
 *
5165
 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5166
 *
5167
 * Occurs only if allowed by the user and if happening in the Misc
5168
 * part of the document before any doctype information
5169
 * This will add the given catalog to the parsing context in order
5170
 * to be used if there is a resolution need further down in the document
5171
 */
5172
5173
static void
5174
0
xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5175
0
    xmlChar *URL = NULL;
5176
0
    const xmlChar *tmp, *base;
5177
0
    xmlChar marker;
5178
5179
0
    tmp = catalog;
5180
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5181
0
    if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5182
0
  goto error;
5183
0
    tmp += 7;
5184
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5185
0
    if (*tmp != '=') {
5186
0
  return;
5187
0
    }
5188
0
    tmp++;
5189
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5190
0
    marker = *tmp;
5191
0
    if ((marker != '\'') && (marker != '"'))
5192
0
  goto error;
5193
0
    tmp++;
5194
0
    base = tmp;
5195
0
    while ((*tmp != 0) && (*tmp != marker)) tmp++;
5196
0
    if (*tmp == 0)
5197
0
  goto error;
5198
0
    URL = xmlStrndup(base, tmp - base);
5199
0
    tmp++;
5200
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5201
0
    if (*tmp != 0)
5202
0
  goto error;
5203
5204
0
    if (URL != NULL) {
5205
0
  ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5206
0
  xmlFree(URL);
5207
0
    }
5208
0
    return;
5209
5210
0
error:
5211
0
    xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5212
0
            "Catalog PI syntax error: %s\n",
5213
0
      catalog, NULL);
5214
0
    if (URL != NULL)
5215
0
  xmlFree(URL);
5216
0
}
5217
#endif
5218
5219
/**
5220
 * xmlParsePI:
5221
 * @ctxt:  an XML parser context
5222
 *
5223
 * DEPRECATED: Internal function, don't use.
5224
 *
5225
 * parse an XML Processing Instruction.
5226
 *
5227
 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5228
 *
5229
 * The processing is transferred to SAX once parsed.
5230
 */
5231
5232
void
5233
7.75k
xmlParsePI(xmlParserCtxtPtr ctxt) {
5234
7.75k
    xmlChar *buf = NULL;
5235
7.75k
    size_t len = 0;
5236
7.75k
    size_t size = XML_PARSER_BUFFER_SIZE;
5237
7.75k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5238
358
                       XML_MAX_HUGE_LENGTH :
5239
7.75k
                       XML_MAX_TEXT_LENGTH;
5240
7.75k
    int cur, l;
5241
7.75k
    const xmlChar *target;
5242
7.75k
    xmlParserInputState state;
5243
7.75k
    int count = 0;
5244
5245
7.75k
    if ((RAW == '<') && (NXT(1) == '?')) {
5246
7.75k
  int inputid = ctxt->input->id;
5247
7.75k
  state = ctxt->instate;
5248
7.75k
        ctxt->instate = XML_PARSER_PI;
5249
  /*
5250
   * this is a Processing Instruction.
5251
   */
5252
7.75k
  SKIP(2);
5253
7.75k
  SHRINK;
5254
5255
  /*
5256
   * Parse the target name and check for special support like
5257
   * namespace.
5258
   */
5259
7.75k
        target = xmlParsePITarget(ctxt);
5260
7.75k
  if (target != NULL) {
5261
7.59k
      if ((RAW == '?') && (NXT(1) == '>')) {
5262
745
    if (inputid != ctxt->input->id) {
5263
0
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5264
0
                             "PI declaration doesn't start and stop in"
5265
0
                                   " the same entity\n");
5266
0
    }
5267
745
    SKIP(2);
5268
5269
    /*
5270
     * SAX: PI detected.
5271
     */
5272
745
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5273
745
        (ctxt->sax->processingInstruction != NULL))
5274
685
        ctxt->sax->processingInstruction(ctxt->userData,
5275
685
                                         target, NULL);
5276
745
    if (ctxt->instate != XML_PARSER_EOF)
5277
745
        ctxt->instate = state;
5278
745
    return;
5279
745
      }
5280
6.85k
      buf = (xmlChar *) xmlMallocAtomic(size);
5281
6.85k
      if (buf == NULL) {
5282
0
    xmlErrMemory(ctxt, NULL);
5283
0
    ctxt->instate = state;
5284
0
    return;
5285
0
      }
5286
6.85k
      if (SKIP_BLANKS == 0) {
5287
942
    xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5288
942
        "ParsePI: PI %s space expected\n", target);
5289
942
      }
5290
6.85k
      cur = CUR_CHAR(l);
5291
546k
      while (IS_CHAR(cur) && /* checked */
5292
546k
       ((cur != '?') || (NXT(1) != '>'))) {
5293
539k
    if (len + 5 >= size) {
5294
594
        xmlChar *tmp;
5295
594
                    size_t new_size = size * 2;
5296
594
        tmp = (xmlChar *) xmlRealloc(buf, new_size);
5297
594
        if (tmp == NULL) {
5298
0
      xmlErrMemory(ctxt, NULL);
5299
0
      xmlFree(buf);
5300
0
      ctxt->instate = state;
5301
0
      return;
5302
0
        }
5303
594
        buf = tmp;
5304
594
                    size = new_size;
5305
594
    }
5306
539k
    count++;
5307
539k
    if (count > 50) {
5308
8.80k
        SHRINK;
5309
8.80k
        GROW;
5310
8.80k
                    if (ctxt->instate == XML_PARSER_EOF) {
5311
0
                        xmlFree(buf);
5312
0
                        return;
5313
0
                    }
5314
8.80k
        count = 0;
5315
8.80k
    }
5316
539k
    COPY_BUF(l,buf,len,cur);
5317
539k
    NEXTL(l);
5318
539k
    cur = CUR_CHAR(l);
5319
539k
    if (cur == 0) {
5320
336
        SHRINK;
5321
336
        GROW;
5322
336
        cur = CUR_CHAR(l);
5323
336
    }
5324
539k
                if (len > maxLength) {
5325
0
                    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5326
0
                                      "PI %s too big found", target);
5327
0
                    xmlFree(buf);
5328
0
                    ctxt->instate = state;
5329
0
                    return;
5330
0
                }
5331
539k
      }
5332
6.85k
      buf[len] = 0;
5333
6.85k
      if (cur != '?') {
5334
707
    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5335
707
          "ParsePI: PI %s never end ...\n", target);
5336
6.14k
      } else {
5337
6.14k
    if (inputid != ctxt->input->id) {
5338
0
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5339
0
                             "PI declaration doesn't start and stop in"
5340
0
                                   " the same entity\n");
5341
0
    }
5342
6.14k
    SKIP(2);
5343
5344
6.14k
#ifdef LIBXML_CATALOG_ENABLED
5345
6.14k
    if (((state == XML_PARSER_MISC) ||
5346
6.14k
               (state == XML_PARSER_START)) &&
5347
6.14k
        (xmlStrEqual(target, XML_CATALOG_PI))) {
5348
0
        xmlCatalogAllow allow = xmlCatalogGetDefaults();
5349
0
        if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5350
0
      (allow == XML_CATA_ALLOW_ALL))
5351
0
      xmlParseCatalogPI(ctxt, buf);
5352
0
    }
5353
6.14k
#endif
5354
5355
5356
    /*
5357
     * SAX: PI detected.
5358
     */
5359
6.14k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5360
6.14k
        (ctxt->sax->processingInstruction != NULL))
5361
4.77k
        ctxt->sax->processingInstruction(ctxt->userData,
5362
4.77k
                                         target, buf);
5363
6.14k
      }
5364
6.85k
      xmlFree(buf);
5365
6.85k
  } else {
5366
155
      xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5367
155
  }
5368
7.00k
  if (ctxt->instate != XML_PARSER_EOF)
5369
7.00k
      ctxt->instate = state;
5370
7.00k
    }
5371
7.75k
}
5372
5373
/**
5374
 * xmlParseNotationDecl:
5375
 * @ctxt:  an XML parser context
5376
 *
5377
 * DEPRECATED: Internal function, don't use.
5378
 *
5379
 * Parse a notation declaration. Always consumes '<!'.
5380
 *
5381
 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>'
5382
 *
5383
 * Hence there is actually 3 choices:
5384
 *     'PUBLIC' S PubidLiteral
5385
 *     'PUBLIC' S PubidLiteral S SystemLiteral
5386
 * and 'SYSTEM' S SystemLiteral
5387
 *
5388
 * See the NOTE on xmlParseExternalID().
5389
 */
5390
5391
void
5392
164
xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5393
164
    const xmlChar *name;
5394
164
    xmlChar *Pubid;
5395
164
    xmlChar *Systemid;
5396
5397
164
    if ((CUR != '<') || (NXT(1) != '!'))
5398
0
        return;
5399
164
    SKIP(2);
5400
5401
164
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5402
110
  int inputid = ctxt->input->id;
5403
110
  SHRINK;
5404
110
  SKIP(8);
5405
110
  if (SKIP_BLANKS == 0) {
5406
6
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5407
6
         "Space required after '<!NOTATION'\n");
5408
6
      return;
5409
6
  }
5410
5411
104
        name = xmlParseName(ctxt);
5412
104
  if (name == NULL) {
5413
3
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5414
3
      return;
5415
3
  }
5416
101
  if (xmlStrchr(name, ':') != NULL) {
5417
0
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5418
0
         "colons are forbidden from notation names '%s'\n",
5419
0
         name, NULL, NULL);
5420
0
  }
5421
101
  if (SKIP_BLANKS == 0) {
5422
7
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5423
7
         "Space required after the NOTATION name'\n");
5424
7
      return;
5425
7
  }
5426
5427
  /*
5428
   * Parse the IDs.
5429
   */
5430
94
  Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5431
94
  SKIP_BLANKS;
5432
5433
94
  if (RAW == '>') {
5434
76
      if (inputid != ctxt->input->id) {
5435
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5436
0
                         "Notation declaration doesn't start and stop"
5437
0
                               " in the same entity\n");
5438
0
      }
5439
76
      NEXT;
5440
76
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5441
76
    (ctxt->sax->notationDecl != NULL))
5442
26
    ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5443
76
  } else {
5444
18
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5445
18
  }
5446
94
  if (Systemid != NULL) xmlFree(Systemid);
5447
94
  if (Pubid != NULL) xmlFree(Pubid);
5448
94
    }
5449
164
}
5450
5451
/**
5452
 * xmlParseEntityDecl:
5453
 * @ctxt:  an XML parser context
5454
 *
5455
 * DEPRECATED: Internal function, don't use.
5456
 *
5457
 * Parse an entity declaration. Always consumes '<!'.
5458
 *
5459
 * [70] EntityDecl ::= GEDecl | PEDecl
5460
 *
5461
 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5462
 *
5463
 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5464
 *
5465
 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5466
 *
5467
 * [74] PEDef ::= EntityValue | ExternalID
5468
 *
5469
 * [76] NDataDecl ::= S 'NDATA' S Name
5470
 *
5471
 * [ VC: Notation Declared ]
5472
 * The Name must match the declared name of a notation.
5473
 */
5474
5475
void
5476
216k
xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5477
216k
    const xmlChar *name = NULL;
5478
216k
    xmlChar *value = NULL;
5479
216k
    xmlChar *URI = NULL, *literal = NULL;
5480
216k
    const xmlChar *ndata = NULL;
5481
216k
    int isParameter = 0;
5482
216k
    xmlChar *orig = NULL;
5483
5484
216k
    if ((CUR != '<') || (NXT(1) != '!'))
5485
0
        return;
5486
216k
    SKIP(2);
5487
5488
    /* GROW; done in the caller */
5489
216k
    if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5490
216k
  int inputid = ctxt->input->id;
5491
216k
  SHRINK;
5492
216k
  SKIP(6);
5493
216k
  if (SKIP_BLANKS == 0) {
5494
197
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5495
197
         "Space required after '<!ENTITY'\n");
5496
197
  }
5497
5498
216k
  if (RAW == '%') {
5499
139k
      NEXT;
5500
139k
      if (SKIP_BLANKS == 0) {
5501
26
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5502
26
             "Space required after '%%'\n");
5503
26
      }
5504
139k
      isParameter = 1;
5505
139k
  }
5506
5507
216k
        name = xmlParseName(ctxt);
5508
216k
  if (name == NULL) {
5509
134
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5510
134
                     "xmlParseEntityDecl: no name\n");
5511
134
            return;
5512
134
  }
5513
216k
  if (xmlStrchr(name, ':') != NULL) {
5514
19
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5515
19
         "colons are forbidden from entities names '%s'\n",
5516
19
         name, NULL, NULL);
5517
19
  }
5518
216k
  if (SKIP_BLANKS == 0) {
5519
278
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5520
278
         "Space required after the entity name\n");
5521
278
  }
5522
5523
216k
  ctxt->instate = XML_PARSER_ENTITY_DECL;
5524
  /*
5525
   * handle the various case of definitions...
5526
   */
5527
216k
  if (isParameter) {
5528
139k
      if ((RAW == '"') || (RAW == '\'')) {
5529
138k
          value = xmlParseEntityValue(ctxt, &orig);
5530
138k
    if (value) {
5531
138k
        if ((ctxt->sax != NULL) &&
5532
138k
      (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5533
131k
      ctxt->sax->entityDecl(ctxt->userData, name,
5534
131k
                        XML_INTERNAL_PARAMETER_ENTITY,
5535
131k
            NULL, NULL, value);
5536
138k
    }
5537
138k
      } else {
5538
838
          URI = xmlParseExternalID(ctxt, &literal, 1);
5539
838
    if ((URI == NULL) && (literal == NULL)) {
5540
127
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5541
127
    }
5542
838
    if (URI) {
5543
711
        xmlURIPtr uri;
5544
5545
711
        uri = xmlParseURI((const char *) URI);
5546
711
        if (uri == NULL) {
5547
11
            xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5548
11
             "Invalid URI: %s\n", URI);
5549
      /*
5550
       * This really ought to be a well formedness error
5551
       * but the XML Core WG decided otherwise c.f. issue
5552
       * E26 of the XML erratas.
5553
       */
5554
700
        } else {
5555
700
      if (uri->fragment != NULL) {
5556
          /*
5557
           * Okay this is foolish to block those but not
5558
           * invalid URIs.
5559
           */
5560
3
          xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5561
697
      } else {
5562
697
          if ((ctxt->sax != NULL) &&
5563
697
        (!ctxt->disableSAX) &&
5564
697
        (ctxt->sax->entityDecl != NULL))
5565
666
        ctxt->sax->entityDecl(ctxt->userData, name,
5566
666
              XML_EXTERNAL_PARAMETER_ENTITY,
5567
666
              literal, URI, NULL);
5568
697
      }
5569
700
      xmlFreeURI(uri);
5570
700
        }
5571
711
    }
5572
838
      }
5573
139k
  } else {
5574
76.6k
      if ((RAW == '"') || (RAW == '\'')) {
5575
73.5k
          value = xmlParseEntityValue(ctxt, &orig);
5576
73.5k
    if ((ctxt->sax != NULL) &&
5577
73.5k
        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5578
66.8k
        ctxt->sax->entityDecl(ctxt->userData, name,
5579
66.8k
        XML_INTERNAL_GENERAL_ENTITY,
5580
66.8k
        NULL, NULL, value);
5581
    /*
5582
     * For expat compatibility in SAX mode.
5583
     */
5584
73.5k
    if ((ctxt->myDoc == NULL) ||
5585
73.5k
        (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5586
882
        if (ctxt->myDoc == NULL) {
5587
64
      ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5588
64
      if (ctxt->myDoc == NULL) {
5589
0
          xmlErrMemory(ctxt, "New Doc failed");
5590
0
          return;
5591
0
      }
5592
64
      ctxt->myDoc->properties = XML_DOC_INTERNAL;
5593
64
        }
5594
882
        if (ctxt->myDoc->intSubset == NULL)
5595
64
      ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5596
64
              BAD_CAST "fake", NULL, NULL);
5597
5598
882
        xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5599
882
                    NULL, NULL, value);
5600
882
    }
5601
73.5k
      } else {
5602
3.12k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5603
3.12k
    if ((URI == NULL) && (literal == NULL)) {
5604
250
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5605
250
    }
5606
3.12k
    if (URI) {
5607
2.86k
        xmlURIPtr uri;
5608
5609
2.86k
        uri = xmlParseURI((const char *)URI);
5610
2.86k
        if (uri == NULL) {
5611
83
            xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5612
83
             "Invalid URI: %s\n", URI);
5613
      /*
5614
       * This really ought to be a well formedness error
5615
       * but the XML Core WG decided otherwise c.f. issue
5616
       * E26 of the XML erratas.
5617
       */
5618
2.77k
        } else {
5619
2.77k
      if (uri->fragment != NULL) {
5620
          /*
5621
           * Okay this is foolish to block those but not
5622
           * invalid URIs.
5623
           */
5624
6
          xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5625
6
      }
5626
2.77k
      xmlFreeURI(uri);
5627
2.77k
        }
5628
2.86k
    }
5629
3.12k
    if ((RAW != '>') && (SKIP_BLANKS == 0)) {
5630
226
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5631
226
           "Space required before 'NDATA'\n");
5632
226
    }
5633
3.12k
    if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5634
144
        SKIP(5);
5635
144
        if (SKIP_BLANKS == 0) {
5636
12
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5637
12
               "Space required after 'NDATA'\n");
5638
12
        }
5639
144
        ndata = xmlParseName(ctxt);
5640
144
        if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5641
144
            (ctxt->sax->unparsedEntityDecl != NULL))
5642
114
      ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5643
114
            literal, URI, ndata);
5644
2.97k
    } else {
5645
2.97k
        if ((ctxt->sax != NULL) &&
5646
2.97k
            (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5647
2.53k
      ctxt->sax->entityDecl(ctxt->userData, name,
5648
2.53k
            XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5649
2.53k
            literal, URI, NULL);
5650
        /*
5651
         * For expat compatibility in SAX mode.
5652
         * assuming the entity replacement was asked for
5653
         */
5654
2.97k
        if ((ctxt->replaceEntities != 0) &&
5655
2.97k
      ((ctxt->myDoc == NULL) ||
5656
2.83k
      (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5657
95
      if (ctxt->myDoc == NULL) {
5658
42
          ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5659
42
          if (ctxt->myDoc == NULL) {
5660
0
              xmlErrMemory(ctxt, "New Doc failed");
5661
0
        return;
5662
0
          }
5663
42
          ctxt->myDoc->properties = XML_DOC_INTERNAL;
5664
42
      }
5665
5666
95
      if (ctxt->myDoc->intSubset == NULL)
5667
42
          ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5668
42
            BAD_CAST "fake", NULL, NULL);
5669
95
      xmlSAX2EntityDecl(ctxt, name,
5670
95
                  XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5671
95
                  literal, URI, NULL);
5672
95
        }
5673
2.97k
    }
5674
3.12k
      }
5675
76.6k
  }
5676
216k
  if (ctxt->instate == XML_PARSER_EOF)
5677
15
      goto done;
5678
216k
  SKIP_BLANKS;
5679
216k
  if (RAW != '>') {
5680
997
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5681
997
              "xmlParseEntityDecl: entity %s not terminated\n", name);
5682
997
      xmlHaltParser(ctxt);
5683
215k
  } else {
5684
215k
      if (inputid != ctxt->input->id) {
5685
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5686
0
                         "Entity declaration doesn't start and stop in"
5687
0
                               " the same entity\n");
5688
0
      }
5689
215k
      NEXT;
5690
215k
  }
5691
216k
  if (orig != NULL) {
5692
      /*
5693
       * Ugly mechanism to save the raw entity value.
5694
       */
5695
211k
      xmlEntityPtr cur = NULL;
5696
5697
211k
      if (isParameter) {
5698
138k
          if ((ctxt->sax != NULL) &&
5699
138k
        (ctxt->sax->getParameterEntity != NULL))
5700
138k
        cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5701
138k
      } else {
5702
73.2k
          if ((ctxt->sax != NULL) &&
5703
73.2k
        (ctxt->sax->getEntity != NULL))
5704
73.2k
        cur = ctxt->sax->getEntity(ctxt->userData, name);
5705
73.2k
    if ((cur == NULL) && (ctxt->userData==ctxt)) {
5706
5.35k
        cur = xmlSAX2GetEntity(ctxt, name);
5707
5.35k
    }
5708
73.2k
      }
5709
211k
            if ((cur != NULL) && (cur->orig == NULL)) {
5710
193k
    cur->orig = orig;
5711
193k
                orig = NULL;
5712
193k
      }
5713
211k
  }
5714
5715
216k
done:
5716
216k
  if (value != NULL) xmlFree(value);
5717
216k
  if (URI != NULL) xmlFree(URI);
5718
216k
  if (literal != NULL) xmlFree(literal);
5719
216k
        if (orig != NULL) xmlFree(orig);
5720
216k
    }
5721
216k
}
5722
5723
/**
5724
 * xmlParseDefaultDecl:
5725
 * @ctxt:  an XML parser context
5726
 * @value:  Receive a possible fixed default value for the attribute
5727
 *
5728
 * DEPRECATED: Internal function, don't use.
5729
 *
5730
 * Parse an attribute default declaration
5731
 *
5732
 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5733
 *
5734
 * [ VC: Required Attribute ]
5735
 * if the default declaration is the keyword #REQUIRED, then the
5736
 * attribute must be specified for all elements of the type in the
5737
 * attribute-list declaration.
5738
 *
5739
 * [ VC: Attribute Default Legal ]
5740
 * The declared default value must meet the lexical constraints of
5741
 * the declared attribute type c.f. xmlValidateAttributeDecl()
5742
 *
5743
 * [ VC: Fixed Attribute Default ]
5744
 * if an attribute has a default value declared with the #FIXED
5745
 * keyword, instances of that attribute must match the default value.
5746
 *
5747
 * [ WFC: No < in Attribute Values ]
5748
 * handled in xmlParseAttValue()
5749
 *
5750
 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5751
 *          or XML_ATTRIBUTE_FIXED.
5752
 */
5753
5754
int
5755
604k
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5756
604k
    int val;
5757
604k
    xmlChar *ret;
5758
5759
604k
    *value = NULL;
5760
604k
    if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5761
37.6k
  SKIP(9);
5762
37.6k
  return(XML_ATTRIBUTE_REQUIRED);
5763
37.6k
    }
5764
567k
    if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5765
513k
  SKIP(8);
5766
513k
  return(XML_ATTRIBUTE_IMPLIED);
5767
513k
    }
5768
53.6k
    val = XML_ATTRIBUTE_NONE;
5769
53.6k
    if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5770
39.7k
  SKIP(6);
5771
39.7k
  val = XML_ATTRIBUTE_FIXED;
5772
39.7k
  if (SKIP_BLANKS == 0) {
5773
14
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5774
14
         "Space required after '#FIXED'\n");
5775
14
  }
5776
39.7k
    }
5777
53.6k
    ret = xmlParseAttValue(ctxt);
5778
53.6k
    ctxt->instate = XML_PARSER_DTD;
5779
53.6k
    if (ret == NULL) {
5780
252
  xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5781
252
           "Attribute default value declaration error\n");
5782
252
    } else
5783
53.4k
        *value = ret;
5784
53.6k
    return(val);
5785
567k
}
5786
5787
/**
5788
 * xmlParseNotationType:
5789
 * @ctxt:  an XML parser context
5790
 *
5791
 * DEPRECATED: Internal function, don't use.
5792
 *
5793
 * parse an Notation attribute type.
5794
 *
5795
 * Note: the leading 'NOTATION' S part has already being parsed...
5796
 *
5797
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5798
 *
5799
 * [ VC: Notation Attributes ]
5800
 * Values of this type must match one of the notation names included
5801
 * in the declaration; all notation names in the declaration must be declared.
5802
 *
5803
 * Returns: the notation attribute tree built while parsing
5804
 */
5805
5806
xmlEnumerationPtr
5807
34
xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5808
34
    const xmlChar *name;
5809
34
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5810
5811
34
    if (RAW != '(') {
5812
1
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5813
1
  return(NULL);
5814
1
    }
5815
33
    SHRINK;
5816
33
    do {
5817
33
        NEXT;
5818
33
  SKIP_BLANKS;
5819
33
        name = xmlParseName(ctxt);
5820
33
  if (name == NULL) {
5821
0
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5822
0
         "Name expected in NOTATION declaration\n");
5823
0
            xmlFreeEnumeration(ret);
5824
0
      return(NULL);
5825
0
  }
5826
33
  tmp = ret;
5827
33
  while (tmp != NULL) {
5828
0
      if (xmlStrEqual(name, tmp->name)) {
5829
0
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5830
0
    "standalone: attribute notation value token %s duplicated\n",
5831
0
         name, NULL);
5832
0
    if (!xmlDictOwns(ctxt->dict, name))
5833
0
        xmlFree((xmlChar *) name);
5834
0
    break;
5835
0
      }
5836
0
      tmp = tmp->next;
5837
0
  }
5838
33
  if (tmp == NULL) {
5839
33
      cur = xmlCreateEnumeration(name);
5840
33
      if (cur == NULL) {
5841
0
                xmlFreeEnumeration(ret);
5842
0
                return(NULL);
5843
0
            }
5844
33
      if (last == NULL) ret = last = cur;
5845
0
      else {
5846
0
    last->next = cur;
5847
0
    last = cur;
5848
0
      }
5849
33
  }
5850
33
  SKIP_BLANKS;
5851
33
    } while (RAW == '|');
5852
33
    if (RAW != ')') {
5853
0
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5854
0
        xmlFreeEnumeration(ret);
5855
0
  return(NULL);
5856
0
    }
5857
33
    NEXT;
5858
33
    return(ret);
5859
33
}
5860
5861
/**
5862
 * xmlParseEnumerationType:
5863
 * @ctxt:  an XML parser context
5864
 *
5865
 * DEPRECATED: Internal function, don't use.
5866
 *
5867
 * parse an Enumeration attribute type.
5868
 *
5869
 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5870
 *
5871
 * [ VC: Enumeration ]
5872
 * Values of this type must match one of the Nmtoken tokens in
5873
 * the declaration
5874
 *
5875
 * Returns: the enumeration attribute tree built while parsing
5876
 */
5877
5878
xmlEnumerationPtr
5879
43.1k
xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5880
43.1k
    xmlChar *name;
5881
43.1k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5882
5883
43.1k
    if (RAW != '(') {
5884
287
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5885
287
  return(NULL);
5886
287
    }
5887
42.8k
    SHRINK;
5888
139k
    do {
5889
139k
        NEXT;
5890
139k
  SKIP_BLANKS;
5891
139k
        name = xmlParseNmtoken(ctxt);
5892
139k
  if (name == NULL) {
5893
24
      xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5894
24
      return(ret);
5895
24
  }
5896
139k
  tmp = ret;
5897
363k
  while (tmp != NULL) {
5898
224k
      if (xmlStrEqual(name, tmp->name)) {
5899
0
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5900
0
    "standalone: attribute enumeration value token %s duplicated\n",
5901
0
         name, NULL);
5902
0
    if (!xmlDictOwns(ctxt->dict, name))
5903
0
        xmlFree(name);
5904
0
    break;
5905
0
      }
5906
224k
      tmp = tmp->next;
5907
224k
  }
5908
139k
  if (tmp == NULL) {
5909
139k
      cur = xmlCreateEnumeration(name);
5910
139k
      if (!xmlDictOwns(ctxt->dict, name))
5911
139k
    xmlFree(name);
5912
139k
      if (cur == NULL) {
5913
0
                xmlFreeEnumeration(ret);
5914
0
                return(NULL);
5915
0
            }
5916
139k
      if (last == NULL) ret = last = cur;
5917
96.2k
      else {
5918
96.2k
    last->next = cur;
5919
96.2k
    last = cur;
5920
96.2k
      }
5921
139k
  }
5922
139k
  SKIP_BLANKS;
5923
139k
    } while (RAW == '|');
5924
42.8k
    if (RAW != ')') {
5925
54
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5926
54
  return(ret);
5927
54
    }
5928
42.7k
    NEXT;
5929
42.7k
    return(ret);
5930
42.8k
}
5931
5932
/**
5933
 * xmlParseEnumeratedType:
5934
 * @ctxt:  an XML parser context
5935
 * @tree:  the enumeration tree built while parsing
5936
 *
5937
 * DEPRECATED: Internal function, don't use.
5938
 *
5939
 * parse an Enumerated attribute type.
5940
 *
5941
 * [57] EnumeratedType ::= NotationType | Enumeration
5942
 *
5943
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5944
 *
5945
 *
5946
 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5947
 */
5948
5949
int
5950
43.1k
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5951
43.1k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5952
34
  SKIP(8);
5953
34
  if (SKIP_BLANKS == 0) {
5954
0
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5955
0
         "Space required after 'NOTATION'\n");
5956
0
      return(0);
5957
0
  }
5958
34
  *tree = xmlParseNotationType(ctxt);
5959
34
  if (*tree == NULL) return(0);
5960
33
  return(XML_ATTRIBUTE_NOTATION);
5961
34
    }
5962
43.1k
    *tree = xmlParseEnumerationType(ctxt);
5963
43.1k
    if (*tree == NULL) return(0);
5964
42.8k
    return(XML_ATTRIBUTE_ENUMERATION);
5965
43.1k
}
5966
5967
/**
5968
 * xmlParseAttributeType:
5969
 * @ctxt:  an XML parser context
5970
 * @tree:  the enumeration tree built while parsing
5971
 *
5972
 * DEPRECATED: Internal function, don't use.
5973
 *
5974
 * parse the Attribute list def for an element
5975
 *
5976
 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5977
 *
5978
 * [55] StringType ::= 'CDATA'
5979
 *
5980
 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5981
 *                        'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5982
 *
5983
 * Validity constraints for attribute values syntax are checked in
5984
 * xmlValidateAttributeValue()
5985
 *
5986
 * [ VC: ID ]
5987
 * Values of type ID must match the Name production. A name must not
5988
 * appear more than once in an XML document as a value of this type;
5989
 * i.e., ID values must uniquely identify the elements which bear them.
5990
 *
5991
 * [ VC: One ID per Element Type ]
5992
 * No element type may have more than one ID attribute specified.
5993
 *
5994
 * [ VC: ID Attribute Default ]
5995
 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5996
 *
5997
 * [ VC: IDREF ]
5998
 * Values of type IDREF must match the Name production, and values
5999
 * of type IDREFS must match Names; each IDREF Name must match the value
6000
 * of an ID attribute on some element in the XML document; i.e. IDREF
6001
 * values must match the value of some ID attribute.
6002
 *
6003
 * [ VC: Entity Name ]
6004
 * Values of type ENTITY must match the Name production, values
6005
 * of type ENTITIES must match Names; each Entity Name must match the
6006
 * name of an unparsed entity declared in the DTD.
6007
 *
6008
 * [ VC: Name Token ]
6009
 * Values of type NMTOKEN must match the Nmtoken production; values
6010
 * of type NMTOKENS must match Nmtokens.
6011
 *
6012
 * Returns the attribute type
6013
 */
6014
int
6015
605k
xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6016
605k
    SHRINK;
6017
605k
    if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
6018
180k
  SKIP(5);
6019
180k
  return(XML_ATTRIBUTE_CDATA);
6020
424k
     } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
6021
1.10k
  SKIP(6);
6022
1.10k
  return(XML_ATTRIBUTE_IDREFS);
6023
423k
     } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
6024
11.0k
  SKIP(5);
6025
11.0k
  return(XML_ATTRIBUTE_IDREF);
6026
412k
     } else if ((RAW == 'I') && (NXT(1) == 'D')) {
6027
190k
        SKIP(2);
6028
190k
  return(XML_ATTRIBUTE_ID);
6029
222k
     } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
6030
908
  SKIP(6);
6031
908
  return(XML_ATTRIBUTE_ENTITY);
6032
221k
     } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
6033
14
  SKIP(8);
6034
14
  return(XML_ATTRIBUTE_ENTITIES);
6035
221k
     } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
6036
14.0k
  SKIP(8);
6037
14.0k
  return(XML_ATTRIBUTE_NMTOKENS);
6038
207k
     } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
6039
163k
  SKIP(7);
6040
163k
  return(XML_ATTRIBUTE_NMTOKEN);
6041
163k
     }
6042
43.1k
     return(xmlParseEnumeratedType(ctxt, tree));
6043
605k
}
6044
6045
/**
6046
 * xmlParseAttributeListDecl:
6047
 * @ctxt:  an XML parser context
6048
 *
6049
 * DEPRECATED: Internal function, don't use.
6050
 *
6051
 * Parse an attribute list declaration for an element. Always consumes '<!'.
6052
 *
6053
 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
6054
 *
6055
 * [53] AttDef ::= S Name S AttType S DefaultDecl
6056
 *
6057
 */
6058
void
6059
204k
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
6060
204k
    const xmlChar *elemName;
6061
204k
    const xmlChar *attrName;
6062
204k
    xmlEnumerationPtr tree;
6063
6064
204k
    if ((CUR != '<') || (NXT(1) != '!'))
6065
0
        return;
6066
204k
    SKIP(2);
6067
6068
204k
    if (CMP7(CUR_PTR, 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
6069
204k
  int inputid = ctxt->input->id;
6070
6071
204k
  SKIP(7);
6072
204k
  if (SKIP_BLANKS == 0) {
6073
62
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6074
62
                     "Space required after '<!ATTLIST'\n");
6075
62
  }
6076
204k
        elemName = xmlParseName(ctxt);
6077
204k
  if (elemName == NULL) {
6078
51
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6079
51
         "ATTLIST: no name for Element\n");
6080
51
      return;
6081
51
  }
6082
204k
  SKIP_BLANKS;
6083
204k
  GROW;
6084
809k
  while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
6085
605k
      int type;
6086
605k
      int def;
6087
605k
      xmlChar *defaultValue = NULL;
6088
6089
605k
      GROW;
6090
605k
            tree = NULL;
6091
605k
      attrName = xmlParseName(ctxt);
6092
605k
      if (attrName == NULL) {
6093
305
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6094
305
             "ATTLIST: no name for Attribute\n");
6095
305
    break;
6096
305
      }
6097
605k
      GROW;
6098
605k
      if (SKIP_BLANKS == 0) {
6099
84
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6100
84
            "Space required after the attribute name\n");
6101
84
    break;
6102
84
      }
6103
6104
605k
      type = xmlParseAttributeType(ctxt, &tree);
6105
605k
      if (type <= 0) {
6106
306
          break;
6107
306
      }
6108
6109
605k
      GROW;
6110
605k
      if (SKIP_BLANKS == 0) {
6111
145
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6112
145
             "Space required after the attribute type\n");
6113
145
          if (tree != NULL)
6114
65
        xmlFreeEnumeration(tree);
6115
145
    break;
6116
145
      }
6117
6118
604k
      def = xmlParseDefaultDecl(ctxt, &defaultValue);
6119
604k
      if (def <= 0) {
6120
0
                if (defaultValue != NULL)
6121
0
        xmlFree(defaultValue);
6122
0
          if (tree != NULL)
6123
0
        xmlFreeEnumeration(tree);
6124
0
          break;
6125
0
      }
6126
604k
      if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6127
12.9k
          xmlAttrNormalizeSpace(defaultValue, defaultValue);
6128
6129
604k
      GROW;
6130
604k
            if (RAW != '>') {
6131
589k
    if (SKIP_BLANKS == 0) {
6132
437
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6133
437
      "Space required after the attribute default value\n");
6134
437
        if (defaultValue != NULL)
6135
173
      xmlFree(defaultValue);
6136
437
        if (tree != NULL)
6137
49
      xmlFreeEnumeration(tree);
6138
437
        break;
6139
437
    }
6140
589k
      }
6141
604k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6142
604k
    (ctxt->sax->attributeDecl != NULL))
6143
565k
    ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6144
565k
                          type, def, defaultValue, tree);
6145
38.6k
      else if (tree != NULL)
6146
2.39k
    xmlFreeEnumeration(tree);
6147
6148
604k
      if ((ctxt->sax2) && (defaultValue != NULL) &&
6149
604k
          (def != XML_ATTRIBUTE_IMPLIED) &&
6150
604k
    (def != XML_ATTRIBUTE_REQUIRED)) {
6151
47.4k
    xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6152
47.4k
      }
6153
604k
      if (ctxt->sax2) {
6154
552k
    xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6155
552k
      }
6156
604k
      if (defaultValue != NULL)
6157
53.2k
          xmlFree(defaultValue);
6158
604k
      GROW;
6159
604k
  }
6160
204k
  if (RAW == '>') {
6161
203k
      if (inputid != ctxt->input->id) {
6162
3
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6163
3
                               "Attribute list declaration doesn't start and"
6164
3
                               " stop in the same entity\n");
6165
3
      }
6166
203k
      NEXT;
6167
203k
  }
6168
204k
    }
6169
204k
}
6170
6171
/**
6172
 * xmlParseElementMixedContentDecl:
6173
 * @ctxt:  an XML parser context
6174
 * @inputchk:  the input used for the current entity, needed for boundary checks
6175
 *
6176
 * DEPRECATED: Internal function, don't use.
6177
 *
6178
 * parse the declaration for a Mixed Element content
6179
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6180
 *
6181
 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6182
 *                '(' S? '#PCDATA' S? ')'
6183
 *
6184
 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6185
 *
6186
 * [ VC: No Duplicate Types ]
6187
 * The same name must not appear more than once in a single
6188
 * mixed-content declaration.
6189
 *
6190
 * returns: the list of the xmlElementContentPtr describing the element choices
6191
 */
6192
xmlElementContentPtr
6193
105k
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6194
105k
    xmlElementContentPtr ret = NULL, cur = NULL, n;
6195
105k
    const xmlChar *elem = NULL;
6196
6197
105k
    GROW;
6198
105k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6199
105k
  SKIP(7);
6200
105k
  SKIP_BLANKS;
6201
105k
  SHRINK;
6202
105k
  if (RAW == ')') {
6203
59.4k
      if (ctxt->input->id != inputchk) {
6204
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6205
0
                               "Element content declaration doesn't start and"
6206
0
                               " stop in the same entity\n");
6207
0
      }
6208
59.4k
      NEXT;
6209
59.4k
      ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6210
59.4k
      if (ret == NULL)
6211
0
          return(NULL);
6212
59.4k
      if (RAW == '*') {
6213
7
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6214
7
    NEXT;
6215
7
      }
6216
59.4k
      return(ret);
6217
59.4k
  }
6218
46.4k
  if ((RAW == '(') || (RAW == '|')) {
6219
46.4k
      ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6220
46.4k
      if (ret == NULL) return(NULL);
6221
46.4k
  }
6222
553k
  while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6223
507k
      NEXT;
6224
507k
      if (elem == NULL) {
6225
46.4k
          ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6226
46.4k
    if (ret == NULL) {
6227
0
        xmlFreeDocElementContent(ctxt->myDoc, cur);
6228
0
                    return(NULL);
6229
0
                }
6230
46.4k
    ret->c1 = cur;
6231
46.4k
    if (cur != NULL)
6232
46.4k
        cur->parent = ret;
6233
46.4k
    cur = ret;
6234
460k
      } else {
6235
460k
          n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6236
460k
    if (n == NULL) {
6237
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6238
0
                    return(NULL);
6239
0
                }
6240
460k
    n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6241
460k
    if (n->c1 != NULL)
6242
460k
        n->c1->parent = n;
6243
460k
          cur->c2 = n;
6244
460k
    if (n != NULL)
6245
460k
        n->parent = cur;
6246
460k
    cur = n;
6247
460k
      }
6248
507k
      SKIP_BLANKS;
6249
507k
      elem = xmlParseName(ctxt);
6250
507k
      if (elem == NULL) {
6251
47
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6252
47
      "xmlParseElementMixedContentDecl : Name expected\n");
6253
47
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6254
47
    return(NULL);
6255
47
      }
6256
507k
      SKIP_BLANKS;
6257
507k
      GROW;
6258
507k
  }
6259
46.4k
  if ((RAW == ')') && (NXT(1) == '*')) {
6260
46.3k
      if (elem != NULL) {
6261
46.3k
    cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6262
46.3k
                                   XML_ELEMENT_CONTENT_ELEMENT);
6263
46.3k
    if (cur->c2 != NULL)
6264
46.3k
        cur->c2->parent = cur;
6265
46.3k
            }
6266
46.3k
            if (ret != NULL)
6267
46.3k
                ret->ocur = XML_ELEMENT_CONTENT_MULT;
6268
46.3k
      if (ctxt->input->id != inputchk) {
6269
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6270
0
                               "Element content declaration doesn't start and"
6271
0
                               " stop in the same entity\n");
6272
0
      }
6273
46.3k
      SKIP(2);
6274
46.3k
  } else {
6275
76
      xmlFreeDocElementContent(ctxt->myDoc, ret);
6276
76
      xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6277
76
      return(NULL);
6278
76
  }
6279
6280
46.4k
    } else {
6281
0
  xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6282
0
    }
6283
46.3k
    return(ret);
6284
105k
}
6285
6286
/**
6287
 * xmlParseElementChildrenContentDeclPriv:
6288
 * @ctxt:  an XML parser context
6289
 * @inputchk:  the input used for the current entity, needed for boundary checks
6290
 * @depth: the level of recursion
6291
 *
6292
 * parse the declaration for a Mixed Element content
6293
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6294
 *
6295
 *
6296
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6297
 *
6298
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6299
 *
6300
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6301
 *
6302
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6303
 *
6304
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6305
 * TODO Parameter-entity replacement text must be properly nested
6306
 *  with parenthesized groups. That is to say, if either of the
6307
 *  opening or closing parentheses in a choice, seq, or Mixed
6308
 *  construct is contained in the replacement text for a parameter
6309
 *  entity, both must be contained in the same replacement text. For
6310
 *  interoperability, if a parameter-entity reference appears in a
6311
 *  choice, seq, or Mixed construct, its replacement text should not
6312
 *  be empty, and neither the first nor last non-blank character of
6313
 *  the replacement text should be a connector (| or ,).
6314
 *
6315
 * Returns the tree of xmlElementContentPtr describing the element
6316
 *          hierarchy.
6317
 */
6318
static xmlElementContentPtr
6319
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6320
129k
                                       int depth) {
6321
129k
    xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6322
129k
    const xmlChar *elem;
6323
129k
    xmlChar type = 0;
6324
6325
129k
    if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6326
129k
        (depth >  2048)) {
6327
0
        xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6328
0
"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6329
0
                          depth);
6330
0
  return(NULL);
6331
0
    }
6332
129k
    SKIP_BLANKS;
6333
129k
    GROW;
6334
129k
    if (RAW == '(') {
6335
3.48k
  int inputid = ctxt->input->id;
6336
6337
        /* Recurse on first child */
6338
3.48k
  NEXT;
6339
3.48k
  SKIP_BLANKS;
6340
3.48k
        cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6341
3.48k
                                                           depth + 1);
6342
3.48k
        if (cur == NULL)
6343
49
            return(NULL);
6344
3.44k
  SKIP_BLANKS;
6345
3.44k
  GROW;
6346
125k
    } else {
6347
125k
  elem = xmlParseName(ctxt);
6348
125k
  if (elem == NULL) {
6349
250
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6350
250
      return(NULL);
6351
250
  }
6352
125k
        cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6353
125k
  if (cur == NULL) {
6354
0
      xmlErrMemory(ctxt, NULL);
6355
0
      return(NULL);
6356
0
  }
6357
125k
  GROW;
6358
125k
  if (RAW == '?') {
6359
9.05k
      cur->ocur = XML_ELEMENT_CONTENT_OPT;
6360
9.05k
      NEXT;
6361
116k
  } else if (RAW == '*') {
6362
5.29k
      cur->ocur = XML_ELEMENT_CONTENT_MULT;
6363
5.29k
      NEXT;
6364
110k
  } else if (RAW == '+') {
6365
31.8k
      cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6366
31.8k
      NEXT;
6367
79.1k
  } else {
6368
79.1k
      cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6369
79.1k
  }
6370
125k
  GROW;
6371
125k
    }
6372
128k
    SKIP_BLANKS;
6373
128k
    SHRINK;
6374
668k
    while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6375
        /*
6376
   * Each loop we parse one separator and one element.
6377
   */
6378
540k
        if (RAW == ',') {
6379
109k
      if (type == 0) type = CUR;
6380
6381
      /*
6382
       * Detect "Name | Name , Name" error
6383
       */
6384
64.6k
      else if (type != CUR) {
6385
6
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6386
6
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6387
6
                      type);
6388
6
    if ((last != NULL) && (last != ret))
6389
6
        xmlFreeDocElementContent(ctxt->myDoc, last);
6390
6
    if (ret != NULL)
6391
6
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6392
6
    return(NULL);
6393
6
      }
6394
109k
      NEXT;
6395
6396
109k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6397
109k
      if (op == NULL) {
6398
0
    if ((last != NULL) && (last != ret))
6399
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6400
0
          xmlFreeDocElementContent(ctxt->myDoc, ret);
6401
0
    return(NULL);
6402
0
      }
6403
109k
      if (last == NULL) {
6404
44.6k
    op->c1 = ret;
6405
44.6k
    if (ret != NULL)
6406
44.6k
        ret->parent = op;
6407
44.6k
    ret = cur = op;
6408
64.6k
      } else {
6409
64.6k
          cur->c2 = op;
6410
64.6k
    if (op != NULL)
6411
64.6k
        op->parent = cur;
6412
64.6k
    op->c1 = last;
6413
64.6k
    if (last != NULL)
6414
64.6k
        last->parent = op;
6415
64.6k
    cur =op;
6416
64.6k
    last = NULL;
6417
64.6k
      }
6418
431k
  } else if (RAW == '|') {
6419
430k
      if (type == 0) type = CUR;
6420
6421
      /*
6422
       * Detect "Name , Name | Name" error
6423
       */
6424
384k
      else if (type != CUR) {
6425
10
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6426
10
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6427
10
          type);
6428
10
    if ((last != NULL) && (last != ret))
6429
10
        xmlFreeDocElementContent(ctxt->myDoc, last);
6430
10
    if (ret != NULL)
6431
10
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6432
10
    return(NULL);
6433
10
      }
6434
430k
      NEXT;
6435
6436
430k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6437
430k
      if (op == NULL) {
6438
0
    if ((last != NULL) && (last != ret))
6439
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6440
0
    if (ret != NULL)
6441
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6442
0
    return(NULL);
6443
0
      }
6444
430k
      if (last == NULL) {
6445
46.4k
    op->c1 = ret;
6446
46.4k
    if (ret != NULL)
6447
46.4k
        ret->parent = op;
6448
46.4k
    ret = cur = op;
6449
384k
      } else {
6450
384k
          cur->c2 = op;
6451
384k
    if (op != NULL)
6452
384k
        op->parent = cur;
6453
384k
    op->c1 = last;
6454
384k
    if (last != NULL)
6455
384k
        last->parent = op;
6456
384k
    cur =op;
6457
384k
    last = NULL;
6458
384k
      }
6459
430k
  } else {
6460
532
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6461
532
      if ((last != NULL) && (last != ret))
6462
166
          xmlFreeDocElementContent(ctxt->myDoc, last);
6463
532
      if (ret != NULL)
6464
532
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6465
532
      return(NULL);
6466
532
  }
6467
540k
  GROW;
6468
540k
  SKIP_BLANKS;
6469
540k
  GROW;
6470
540k
  if (RAW == '(') {
6471
23.5k
      int inputid = ctxt->input->id;
6472
      /* Recurse on second child */
6473
23.5k
      NEXT;
6474
23.5k
      SKIP_BLANKS;
6475
23.5k
      last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6476
23.5k
                                                          depth + 1);
6477
23.5k
            if (last == NULL) {
6478
72
    if (ret != NULL)
6479
72
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6480
72
    return(NULL);
6481
72
            }
6482
23.4k
      SKIP_BLANKS;
6483
516k
  } else {
6484
516k
      elem = xmlParseName(ctxt);
6485
516k
      if (elem == NULL) {
6486
73
    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6487
73
    if (ret != NULL)
6488
73
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6489
73
    return(NULL);
6490
73
      }
6491
516k
      last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6492
516k
      if (last == NULL) {
6493
0
    if (ret != NULL)
6494
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6495
0
    return(NULL);
6496
0
      }
6497
516k
      if (RAW == '?') {
6498
37.6k
    last->ocur = XML_ELEMENT_CONTENT_OPT;
6499
37.6k
    NEXT;
6500
478k
      } else if (RAW == '*') {
6501
21.1k
    last->ocur = XML_ELEMENT_CONTENT_MULT;
6502
21.1k
    NEXT;
6503
457k
      } else if (RAW == '+') {
6504
6.41k
    last->ocur = XML_ELEMENT_CONTENT_PLUS;
6505
6.41k
    NEXT;
6506
451k
      } else {
6507
451k
    last->ocur = XML_ELEMENT_CONTENT_ONCE;
6508
451k
      }
6509
516k
  }
6510
539k
  SKIP_BLANKS;
6511
539k
  GROW;
6512
539k
    }
6513
128k
    if ((cur != NULL) && (last != NULL)) {
6514
90.8k
        cur->c2 = last;
6515
90.8k
  if (last != NULL)
6516
90.8k
      last->parent = cur;
6517
90.8k
    }
6518
128k
    if (ctxt->input->id != inputchk) {
6519
3
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6520
3
                       "Element content declaration doesn't start and stop in"
6521
3
                       " the same entity\n");
6522
3
    }
6523
128k
    NEXT;
6524
128k
    if (RAW == '?') {
6525
2.03k
  if (ret != NULL) {
6526
2.03k
      if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6527
2.03k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6528
0
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6529
2.03k
      else
6530
2.03k
          ret->ocur = XML_ELEMENT_CONTENT_OPT;
6531
2.03k
  }
6532
2.03k
  NEXT;
6533
126k
    } else if (RAW == '*') {
6534
23.6k
  if (ret != NULL) {
6535
23.6k
      ret->ocur = XML_ELEMENT_CONTENT_MULT;
6536
23.6k
      cur = ret;
6537
      /*
6538
       * Some normalization:
6539
       * (a | b* | c?)* == (a | b | c)*
6540
       */
6541
255k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6542
231k
    if ((cur->c1 != NULL) &&
6543
231k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6544
231k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6545
3.43k
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6546
231k
    if ((cur->c2 != NULL) &&
6547
231k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6548
231k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6549
559
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6550
231k
    cur = cur->c2;
6551
231k
      }
6552
23.6k
  }
6553
23.6k
  NEXT;
6554
102k
    } else if (RAW == '+') {
6555
24.3k
  if (ret != NULL) {
6556
24.3k
      int found = 0;
6557
6558
24.3k
      if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6559
24.3k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6560
0
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6561
24.3k
      else
6562
24.3k
          ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6563
      /*
6564
       * Some normalization:
6565
       * (a | b*)+ == (a | b)*
6566
       * (a | b?)+ == (a | b)*
6567
       */
6568
43.5k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6569
19.1k
    if ((cur->c1 != NULL) &&
6570
19.1k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6571
19.1k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6572
0
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6573
0
        found = 1;
6574
0
    }
6575
19.1k
    if ((cur->c2 != NULL) &&
6576
19.1k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6577
19.1k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6578
0
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6579
0
        found = 1;
6580
0
    }
6581
19.1k
    cur = cur->c2;
6582
19.1k
      }
6583
24.3k
      if (found)
6584
0
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6585
24.3k
  }
6586
24.3k
  NEXT;
6587
24.3k
    }
6588
128k
    return(ret);
6589
128k
}
6590
6591
/**
6592
 * xmlParseElementChildrenContentDecl:
6593
 * @ctxt:  an XML parser context
6594
 * @inputchk:  the input used for the current entity, needed for boundary checks
6595
 *
6596
 * DEPRECATED: Internal function, don't use.
6597
 *
6598
 * parse the declaration for a Mixed Element content
6599
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6600
 *
6601
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6602
 *
6603
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6604
 *
6605
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6606
 *
6607
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6608
 *
6609
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6610
 * TODO Parameter-entity replacement text must be properly nested
6611
 *  with parenthesized groups. That is to say, if either of the
6612
 *  opening or closing parentheses in a choice, seq, or Mixed
6613
 *  construct is contained in the replacement text for a parameter
6614
 *  entity, both must be contained in the same replacement text. For
6615
 *  interoperability, if a parameter-entity reference appears in a
6616
 *  choice, seq, or Mixed construct, its replacement text should not
6617
 *  be empty, and neither the first nor last non-blank character of
6618
 *  the replacement text should be a connector (| or ,).
6619
 *
6620
 * Returns the tree of xmlElementContentPtr describing the element
6621
 *          hierarchy.
6622
 */
6623
xmlElementContentPtr
6624
0
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6625
    /* stub left for API/ABI compat */
6626
0
    return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6627
0
}
6628
6629
/**
6630
 * xmlParseElementContentDecl:
6631
 * @ctxt:  an XML parser context
6632
 * @name:  the name of the element being defined.
6633
 * @result:  the Element Content pointer will be stored here if any
6634
 *
6635
 * DEPRECATED: Internal function, don't use.
6636
 *
6637
 * parse the declaration for an Element content either Mixed or Children,
6638
 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6639
 *
6640
 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6641
 *
6642
 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6643
 */
6644
6645
int
6646
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6647
208k
                           xmlElementContentPtr *result) {
6648
6649
208k
    xmlElementContentPtr tree = NULL;
6650
208k
    int inputid = ctxt->input->id;
6651
208k
    int res;
6652
6653
208k
    *result = NULL;
6654
6655
208k
    if (RAW != '(') {
6656
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6657
0
    "xmlParseElementContentDecl : %s '(' expected\n", name);
6658
0
  return(-1);
6659
0
    }
6660
208k
    NEXT;
6661
208k
    GROW;
6662
208k
    if (ctxt->instate == XML_PARSER_EOF)
6663
0
        return(-1);
6664
208k
    SKIP_BLANKS;
6665
208k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6666
105k
        tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6667
105k
  res = XML_ELEMENT_TYPE_MIXED;
6668
105k
    } else {
6669
102k
        tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6670
102k
  res = XML_ELEMENT_TYPE_ELEMENT;
6671
102k
    }
6672
208k
    SKIP_BLANKS;
6673
208k
    *result = tree;
6674
208k
    return(res);
6675
208k
}
6676
6677
/**
6678
 * xmlParseElementDecl:
6679
 * @ctxt:  an XML parser context
6680
 *
6681
 * DEPRECATED: Internal function, don't use.
6682
 *
6683
 * Parse an element declaration. Always consumes '<!'.
6684
 *
6685
 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6686
 *
6687
 * [ VC: Unique Element Type Declaration ]
6688
 * No element type may be declared more than once
6689
 *
6690
 * Returns the type of the element, or -1 in case of error
6691
 */
6692
int
6693
233k
xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6694
233k
    const xmlChar *name;
6695
233k
    int ret = -1;
6696
233k
    xmlElementContentPtr content  = NULL;
6697
6698
233k
    if ((CUR != '<') || (NXT(1) != '!'))
6699
0
        return(ret);
6700
233k
    SKIP(2);
6701
6702
    /* GROW; done in the caller */
6703
233k
    if (CMP7(CUR_PTR, 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6704
233k
  int inputid = ctxt->input->id;
6705
6706
233k
  SKIP(7);
6707
233k
  if (SKIP_BLANKS == 0) {
6708
60
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6709
60
               "Space required after 'ELEMENT'\n");
6710
60
      return(-1);
6711
60
  }
6712
233k
        name = xmlParseName(ctxt);
6713
233k
  if (name == NULL) {
6714
34
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6715
34
         "xmlParseElementDecl: no name for Element\n");
6716
34
      return(-1);
6717
34
  }
6718
233k
  if (SKIP_BLANKS == 0) {
6719
232
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6720
232
         "Space required after the element name\n");
6721
232
  }
6722
233k
  if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6723
24.0k
      SKIP(5);
6724
      /*
6725
       * Element must always be empty.
6726
       */
6727
24.0k
      ret = XML_ELEMENT_TYPE_EMPTY;
6728
209k
  } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6729
209k
             (NXT(2) == 'Y')) {
6730
674
      SKIP(3);
6731
      /*
6732
       * Element is a generic container.
6733
       */
6734
674
      ret = XML_ELEMENT_TYPE_ANY;
6735
208k
  } else if (RAW == '(') {
6736
208k
      ret = xmlParseElementContentDecl(ctxt, name, &content);
6737
208k
  } else {
6738
      /*
6739
       * [ WFC: PEs in Internal Subset ] error handling.
6740
       */
6741
332
      if ((RAW == '%') && (ctxt->external == 0) &&
6742
332
          (ctxt->inputNr == 1)) {
6743
8
    xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6744
8
    "PEReference: forbidden within markup decl in internal subset\n");
6745
324
      } else {
6746
324
    xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6747
324
          "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6748
324
            }
6749
332
      return(-1);
6750
332
  }
6751
6752
232k
  SKIP_BLANKS;
6753
6754
232k
  if (RAW != '>') {
6755
1.02k
      xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6756
1.02k
      if (content != NULL) {
6757
61
    xmlFreeDocElementContent(ctxt->myDoc, content);
6758
61
      }
6759
231k
  } else {
6760
231k
      if (inputid != ctxt->input->id) {
6761
3
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6762
3
                               "Element declaration doesn't start and stop in"
6763
3
                               " the same entity\n");
6764
3
      }
6765
6766
231k
      NEXT;
6767
231k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6768
231k
    (ctxt->sax->elementDecl != NULL)) {
6769
212k
    if (content != NULL)
6770
189k
        content->parent = NULL;
6771
212k
          ctxt->sax->elementDecl(ctxt->userData, name, ret,
6772
212k
                           content);
6773
212k
    if ((content != NULL) && (content->parent == NULL)) {
6774
        /*
6775
         * this is a trick: if xmlAddElementDecl is called,
6776
         * instead of copying the full tree it is plugged directly
6777
         * if called from the parser. Avoid duplicating the
6778
         * interfaces or change the API/ABI
6779
         */
6780
12
        xmlFreeDocElementContent(ctxt->myDoc, content);
6781
12
    }
6782
212k
      } else if (content != NULL) {
6783
17.0k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6784
17.0k
      }
6785
231k
  }
6786
232k
    }
6787
232k
    return(ret);
6788
233k
}
6789
6790
/**
6791
 * xmlParseConditionalSections
6792
 * @ctxt:  an XML parser context
6793
 *
6794
 * Parse a conditional section. Always consumes '<!['.
6795
 *
6796
 * [61] conditionalSect ::= includeSect | ignoreSect
6797
 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6798
 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6799
 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6800
 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6801
 */
6802
6803
static void
6804
331
xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6805
331
    int *inputIds = NULL;
6806
331
    size_t inputIdsSize = 0;
6807
331
    size_t depth = 0;
6808
6809
1.95k
    while (ctxt->instate != XML_PARSER_EOF) {
6810
1.95k
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6811
1.10k
            int id = ctxt->input->id;
6812
6813
1.10k
            SKIP(3);
6814
1.10k
            SKIP_BLANKS;
6815
6816
1.10k
            if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6817
977
                SKIP(7);
6818
977
                SKIP_BLANKS;
6819
977
                if (RAW != '[') {
6820
9
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6821
9
                    xmlHaltParser(ctxt);
6822
9
                    goto error;
6823
9
                }
6824
968
                if (ctxt->input->id != id) {
6825
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6826
0
                                   "All markup of the conditional section is"
6827
0
                                   " not in the same entity\n");
6828
0
                }
6829
968
                NEXT;
6830
6831
968
                if (inputIdsSize <= depth) {
6832
293
                    int *tmp;
6833
6834
293
                    inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2);
6835
293
                    tmp = (int *) xmlRealloc(inputIds,
6836
293
                            inputIdsSize * sizeof(int));
6837
293
                    if (tmp == NULL) {
6838
0
                        xmlErrMemory(ctxt, NULL);
6839
0
                        goto error;
6840
0
                    }
6841
293
                    inputIds = tmp;
6842
293
                }
6843
968
                inputIds[depth] = id;
6844
968
                depth++;
6845
968
            } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6846
69
                size_t ignoreDepth = 0;
6847
6848
69
                SKIP(6);
6849
69
                SKIP_BLANKS;
6850
69
                if (RAW != '[') {
6851
0
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6852
0
                    xmlHaltParser(ctxt);
6853
0
                    goto error;
6854
0
                }
6855
69
                if (ctxt->input->id != id) {
6856
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6857
0
                                   "All markup of the conditional section is"
6858
0
                                   " not in the same entity\n");
6859
0
                }
6860
69
                NEXT;
6861
6862
12.9k
                while (RAW != 0) {
6863
12.9k
                    if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6864
138
                        SKIP(3);
6865
138
                        ignoreDepth++;
6866
                        /* Check for integer overflow */
6867
138
                        if (ignoreDepth == 0) {
6868
0
                            xmlErrMemory(ctxt, NULL);
6869
0
                            goto error;
6870
0
                        }
6871
12.7k
                    } else if ((RAW == ']') && (NXT(1) == ']') &&
6872
12.7k
                               (NXT(2) == '>')) {
6873
144
                        if (ignoreDepth == 0)
6874
36
                            break;
6875
108
                        SKIP(3);
6876
108
                        ignoreDepth--;
6877
12.6k
                    } else {
6878
12.6k
                        NEXT;
6879
12.6k
                    }
6880
12.9k
                }
6881
6882
69
    if (RAW == 0) {
6883
33
        xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6884
33
                    goto error;
6885
33
    }
6886
36
                if (ctxt->input->id != id) {
6887
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6888
0
                                   "All markup of the conditional section is"
6889
0
                                   " not in the same entity\n");
6890
0
                }
6891
36
                SKIP(3);
6892
57
            } else {
6893
57
                xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6894
57
                xmlHaltParser(ctxt);
6895
57
                goto error;
6896
57
            }
6897
1.10k
        } else if ((depth > 0) &&
6898
850
                   (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6899
492
            depth--;
6900
492
            if (ctxt->input->id != inputIds[depth]) {
6901
18
                xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6902
18
                               "All markup of the conditional section is not"
6903
18
                               " in the same entity\n");
6904
18
            }
6905
492
            SKIP(3);
6906
492
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
6907
246
            xmlParseMarkupDecl(ctxt);
6908
246
        } else {
6909
112
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6910
112
            xmlHaltParser(ctxt);
6911
112
            goto error;
6912
112
        }
6913
6914
1.74k
        if (depth == 0)
6915
114
            break;
6916
6917
1.62k
        SKIP_BLANKS;
6918
1.62k
        GROW;
6919
1.62k
    }
6920
6921
331
error:
6922
331
    xmlFree(inputIds);
6923
331
}
6924
6925
/**
6926
 * xmlParseMarkupDecl:
6927
 * @ctxt:  an XML parser context
6928
 *
6929
 * DEPRECATED: Internal function, don't use.
6930
 *
6931
 * Parse markup declarations. Always consumes '<!' or '<?'.
6932
 *
6933
 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6934
 *                     NotationDecl | PI | Comment
6935
 *
6936
 * [ VC: Proper Declaration/PE Nesting ]
6937
 * Parameter-entity replacement text must be properly nested with
6938
 * markup declarations. That is to say, if either the first character
6939
 * or the last character of a markup declaration (markupdecl above) is
6940
 * contained in the replacement text for a parameter-entity reference,
6941
 * both must be contained in the same replacement text.
6942
 *
6943
 * [ WFC: PEs in Internal Subset ]
6944
 * In the internal DTD subset, parameter-entity references can occur
6945
 * only where markup declarations can occur, not within markup declarations.
6946
 * (This does not apply to references that occur in external parameter
6947
 * entities or to the external subset.)
6948
 */
6949
void
6950
11.0M
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6951
11.0M
    GROW;
6952
11.0M
    if (CUR == '<') {
6953
11.0M
        if (NXT(1) == '!') {
6954
11.0M
      switch (NXT(2)) {
6955
450k
          case 'E':
6956
450k
        if (NXT(3) == 'L')
6957
233k
      xmlParseElementDecl(ctxt);
6958
216k
        else if (NXT(3) == 'N')
6959
216k
      xmlParseEntityDecl(ctxt);
6960
74
                    else
6961
74
                        SKIP(2);
6962
450k
        break;
6963
204k
          case 'A':
6964
204k
        xmlParseAttributeListDecl(ctxt);
6965
204k
        break;
6966
164
          case 'N':
6967
164
        xmlParseNotationDecl(ctxt);
6968
164
        break;
6969
10.3M
          case '-':
6970
10.3M
        xmlParseComment(ctxt);
6971
10.3M
        break;
6972
211
    default:
6973
        /* there is an error but it will be detected later */
6974
211
                    SKIP(2);
6975
211
        break;
6976
11.0M
      }
6977
11.0M
  } else if (NXT(1) == '?') {
6978
127
      xmlParsePI(ctxt);
6979
127
  }
6980
11.0M
    }
6981
6982
    /*
6983
     * detect requirement to exit there and act accordingly
6984
     * and avoid having instate overridden later on
6985
     */
6986
11.0M
    if (ctxt->instate == XML_PARSER_EOF)
6987
1.01k
        return;
6988
6989
11.0M
    ctxt->instate = XML_PARSER_DTD;
6990
11.0M
}
6991
6992
/**
6993
 * xmlParseTextDecl:
6994
 * @ctxt:  an XML parser context
6995
 *
6996
 * DEPRECATED: Internal function, don't use.
6997
 *
6998
 * parse an XML declaration header for external entities
6999
 *
7000
 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
7001
 */
7002
7003
void
7004
720
xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
7005
720
    xmlChar *version;
7006
720
    const xmlChar *encoding;
7007
720
    int oldstate;
7008
7009
    /*
7010
     * We know that '<?xml' is here.
7011
     */
7012
720
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7013
711
  SKIP(5);
7014
711
    } else {
7015
9
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
7016
9
  return;
7017
9
    }
7018
7019
    /* Avoid expansion of parameter entities when skipping blanks. */
7020
711
    oldstate = ctxt->instate;
7021
711
    ctxt->instate = XML_PARSER_START;
7022
7023
711
    if (SKIP_BLANKS == 0) {
7024
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7025
0
           "Space needed after '<?xml'\n");
7026
0
    }
7027
7028
    /*
7029
     * We may have the VersionInfo here.
7030
     */
7031
711
    version = xmlParseVersionInfo(ctxt);
7032
711
    if (version == NULL)
7033
78
  version = xmlCharStrdup(XML_DEFAULT_VERSION);
7034
633
    else {
7035
633
  if (SKIP_BLANKS == 0) {
7036
12
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7037
12
               "Space needed here\n");
7038
12
  }
7039
633
    }
7040
711
    ctxt->input->version = version;
7041
7042
    /*
7043
     * We must have the encoding declaration
7044
     */
7045
711
    encoding = xmlParseEncodingDecl(ctxt);
7046
711
    if (ctxt->instate == XML_PARSER_EOF)
7047
0
        return;
7048
711
    if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7049
  /*
7050
   * The XML REC instructs us to stop parsing right here
7051
   */
7052
9
        ctxt->instate = oldstate;
7053
9
        return;
7054
9
    }
7055
702
    if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
7056
111
  xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
7057
111
           "Missing encoding in text declaration\n");
7058
111
    }
7059
7060
702
    SKIP_BLANKS;
7061
702
    if ((RAW == '?') && (NXT(1) == '>')) {
7062
522
        SKIP(2);
7063
522
    } else if (RAW == '>') {
7064
        /* Deprecated old WD ... */
7065
6
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7066
6
  NEXT;
7067
174
    } else {
7068
174
        int c;
7069
7070
174
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7071
17.2k
        while ((c = CUR) != 0) {
7072
17.2k
            NEXT;
7073
17.2k
            if (c == '>')
7074
129
                break;
7075
17.2k
        }
7076
174
    }
7077
7078
702
    ctxt->instate = oldstate;
7079
702
}
7080
7081
/**
7082
 * xmlParseExternalSubset:
7083
 * @ctxt:  an XML parser context
7084
 * @ExternalID: the external identifier
7085
 * @SystemID: the system identifier (or URL)
7086
 *
7087
 * parse Markup declarations from an external subset
7088
 *
7089
 * [30] extSubset ::= textDecl? extSubsetDecl
7090
 *
7091
 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7092
 */
7093
void
7094
xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7095
3.86k
                       const xmlChar *SystemID) {
7096
3.86k
    xmlDetectSAX2(ctxt);
7097
3.86k
    GROW;
7098
7099
3.86k
    if ((ctxt->encoding == NULL) &&
7100
3.86k
        (ctxt->input->end - ctxt->input->cur >= 4)) {
7101
3.86k
        xmlChar start[4];
7102
3.86k
  xmlCharEncoding enc;
7103
7104
3.86k
  start[0] = RAW;
7105
3.86k
  start[1] = NXT(1);
7106
3.86k
  start[2] = NXT(2);
7107
3.86k
  start[3] = NXT(3);
7108
3.86k
  enc = xmlDetectCharEncoding(start, 4);
7109
3.86k
  if (enc != XML_CHAR_ENCODING_NONE)
7110
720
      xmlSwitchEncoding(ctxt, enc);
7111
3.86k
    }
7112
7113
3.86k
    if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7114
669
  xmlParseTextDecl(ctxt);
7115
669
  if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7116
      /*
7117
       * The XML REC instructs us to stop parsing right here
7118
       */
7119
9
      xmlHaltParser(ctxt);
7120
9
      return;
7121
9
  }
7122
669
    }
7123
3.86k
    if (ctxt->myDoc == NULL) {
7124
0
        ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7125
0
  if (ctxt->myDoc == NULL) {
7126
0
      xmlErrMemory(ctxt, "New Doc failed");
7127
0
      return;
7128
0
  }
7129
0
  ctxt->myDoc->properties = XML_DOC_INTERNAL;
7130
0
    }
7131
3.86k
    if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7132
0
        xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7133
7134
3.86k
    ctxt->instate = XML_PARSER_DTD;
7135
3.86k
    ctxt->external = 1;
7136
3.86k
    SKIP_BLANKS;
7137
709k
    while ((ctxt->instate != XML_PARSER_EOF) && (RAW != 0)) {
7138
706k
  GROW;
7139
706k
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7140
331
            xmlParseConditionalSections(ctxt);
7141
706k
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7142
704k
            xmlParseMarkupDecl(ctxt);
7143
704k
        } else {
7144
1.22k
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7145
1.22k
            xmlHaltParser(ctxt);
7146
1.22k
            return;
7147
1.22k
        }
7148
705k
        SKIP_BLANKS;
7149
705k
    }
7150
7151
2.63k
    if (RAW != 0) {
7152
0
  xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7153
0
    }
7154
7155
2.63k
}
7156
7157
/**
7158
 * xmlParseReference:
7159
 * @ctxt:  an XML parser context
7160
 *
7161
 * DEPRECATED: Internal function, don't use.
7162
 *
7163
 * parse and handle entity references in content, depending on the SAX
7164
 * interface, this may end-up in a call to character() if this is a
7165
 * CharRef, a predefined entity, if there is no reference() callback.
7166
 * or if the parser was asked to switch to that mode.
7167
 *
7168
 * Always consumes '&'.
7169
 *
7170
 * [67] Reference ::= EntityRef | CharRef
7171
 */
7172
void
7173
1.03M
xmlParseReference(xmlParserCtxtPtr ctxt) {
7174
1.03M
    xmlEntityPtr ent;
7175
1.03M
    xmlChar *val;
7176
1.03M
    int was_checked;
7177
1.03M
    xmlNodePtr list = NULL;
7178
1.03M
    xmlParserErrors ret = XML_ERR_OK;
7179
7180
7181
1.03M
    if (RAW != '&')
7182
0
        return;
7183
7184
    /*
7185
     * Simple case of a CharRef
7186
     */
7187
1.03M
    if (NXT(1) == '#') {
7188
19.5k
  int i = 0;
7189
19.5k
  xmlChar out[16];
7190
19.5k
  int hex = NXT(2);
7191
19.5k
  int value = xmlParseCharRef(ctxt);
7192
7193
19.5k
  if (value == 0)
7194
1.32k
      return;
7195
18.2k
  if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7196
      /*
7197
       * So we are using non-UTF-8 buffers
7198
       * Check that the char fit on 8bits, if not
7199
       * generate a CharRef.
7200
       */
7201
13.7k
      if (value <= 0xFF) {
7202
13.7k
    out[0] = value;
7203
13.7k
    out[1] = 0;
7204
13.7k
    if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7205
13.7k
        (!ctxt->disableSAX))
7206
4.64k
        ctxt->sax->characters(ctxt->userData, out, 1);
7207
13.7k
      } else {
7208
26
    if ((hex == 'x') || (hex == 'X'))
7209
1
        snprintf((char *)out, sizeof(out), "#x%X", value);
7210
25
    else
7211
25
        snprintf((char *)out, sizeof(out), "#%d", value);
7212
26
    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7213
26
        (!ctxt->disableSAX))
7214
17
        ctxt->sax->reference(ctxt->userData, out);
7215
26
      }
7216
13.7k
  } else {
7217
      /*
7218
       * Just encode the value in UTF-8
7219
       */
7220
4.51k
      COPY_BUF(0 ,out, i, value);
7221
4.51k
      out[i] = 0;
7222
4.51k
      if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7223
4.51k
    (!ctxt->disableSAX))
7224
1.78k
    ctxt->sax->characters(ctxt->userData, out, i);
7225
4.51k
  }
7226
18.2k
  return;
7227
19.5k
    }
7228
7229
    /*
7230
     * We are seeing an entity reference
7231
     */
7232
1.01M
    ent = xmlParseEntityRef(ctxt);
7233
1.01M
    if (ent == NULL) return;
7234
850k
    if (!ctxt->wellFormed)
7235
666k
  return;
7236
184k
    was_checked = ent->flags & XML_ENT_PARSED;
7237
7238
    /* special case of predefined entities */
7239
184k
    if ((ent->name == NULL) ||
7240
184k
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7241
53.7k
  val = ent->content;
7242
53.7k
  if (val == NULL) return;
7243
  /*
7244
   * inline the entity.
7245
   */
7246
53.7k
  if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7247
53.7k
      (!ctxt->disableSAX))
7248
53.7k
      ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7249
53.7k
  return;
7250
53.7k
    }
7251
7252
    /*
7253
     * The first reference to the entity trigger a parsing phase
7254
     * where the ent->children is filled with the result from
7255
     * the parsing.
7256
     * Note: external parsed entities will not be loaded, it is not
7257
     * required for a non-validating parser, unless the parsing option
7258
     * of validating, or substituting entities were given. Doing so is
7259
     * far more secure as the parser will only process data coming from
7260
     * the document entity by default.
7261
     */
7262
130k
    if (((ent->flags & XML_ENT_PARSED) == 0) &&
7263
130k
        ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7264
11.2k
         (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7265
11.2k
  unsigned long oldsizeentcopy = ctxt->sizeentcopy;
7266
7267
  /*
7268
   * This is a bit hackish but this seems the best
7269
   * way to make sure both SAX and DOM entity support
7270
   * behaves okay.
7271
   */
7272
11.2k
  void *user_data;
7273
11.2k
  if (ctxt->userData == ctxt)
7274
11.2k
      user_data = NULL;
7275
0
  else
7276
0
      user_data = ctxt->userData;
7277
7278
        /* Avoid overflow as much as possible */
7279
11.2k
        ctxt->sizeentcopy = 0;
7280
7281
11.2k
        if (ent->flags & XML_ENT_EXPANDING) {
7282
36
            xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7283
36
            xmlHaltParser(ctxt);
7284
36
            return;
7285
36
        }
7286
7287
11.1k
        ent->flags |= XML_ENT_EXPANDING;
7288
7289
  /*
7290
   * Check that this entity is well formed
7291
   * 4.3.2: An internal general parsed entity is well-formed
7292
   * if its replacement text matches the production labeled
7293
   * content.
7294
   */
7295
11.1k
  if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7296
9.41k
      ctxt->depth++;
7297
9.41k
      ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7298
9.41k
                                                user_data, &list);
7299
9.41k
      ctxt->depth--;
7300
7301
9.41k
  } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7302
1.76k
      ctxt->depth++;
7303
1.76k
      ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7304
1.76k
                                     user_data, ctxt->depth, ent->URI,
7305
1.76k
             ent->ExternalID, &list);
7306
1.76k
      ctxt->depth--;
7307
1.76k
  } else {
7308
0
      ret = XML_ERR_ENTITY_PE_INTERNAL;
7309
0
      xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7310
0
       "invalid entity type found\n", NULL);
7311
0
  }
7312
7313
11.1k
        ent->flags &= ~XML_ENT_EXPANDING;
7314
11.1k
        ent->flags |= XML_ENT_PARSED | XML_ENT_CHECKED;
7315
11.1k
        ent->expandedSize = ctxt->sizeentcopy;
7316
11.1k
  if (ret == XML_ERR_ENTITY_LOOP) {
7317
360
            xmlHaltParser(ctxt);
7318
360
      xmlFreeNodeList(list);
7319
360
      return;
7320
360
  }
7321
10.8k
  if (xmlParserEntityCheck(ctxt, oldsizeentcopy)) {
7322
0
      xmlFreeNodeList(list);
7323
0
      return;
7324
0
  }
7325
7326
10.8k
  if ((ret == XML_ERR_OK) && (list != NULL)) {
7327
9.19k
            ent->children = list;
7328
            /*
7329
             * Prune it directly in the generated document
7330
             * except for single text nodes.
7331
             */
7332
9.19k
            if ((ctxt->replaceEntities == 0) ||
7333
9.19k
                (ctxt->parseMode == XML_PARSE_READER) ||
7334
9.19k
                ((list->type == XML_TEXT_NODE) &&
7335
8.79k
                 (list->next == NULL))) {
7336
8.79k
                ent->owner = 1;
7337
18.8k
                while (list != NULL) {
7338
10.0k
                    list->parent = (xmlNodePtr) ent;
7339
10.0k
                    if (list->doc != ent->doc)
7340
0
                        xmlSetTreeDoc(list, ent->doc);
7341
10.0k
                    if (list->next == NULL)
7342
8.79k
                        ent->last = list;
7343
10.0k
                    list = list->next;
7344
10.0k
                }
7345
8.79k
                list = NULL;
7346
8.79k
            } else {
7347
395
                ent->owner = 0;
7348
98.0k
                while (list != NULL) {
7349
97.6k
                    list->parent = (xmlNodePtr) ctxt->node;
7350
97.6k
                    list->doc = ctxt->myDoc;
7351
97.6k
                    if (list->next == NULL)
7352
395
                        ent->last = list;
7353
97.6k
                    list = list->next;
7354
97.6k
                }
7355
395
                list = ent->children;
7356
#ifdef LIBXML_LEGACY_ENABLED
7357
                if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7358
                    xmlAddEntityReference(ent, list, NULL);
7359
#endif /* LIBXML_LEGACY_ENABLED */
7360
395
            }
7361
9.19k
  } else if ((ret != XML_ERR_OK) &&
7362
1.63k
       (ret != XML_WAR_UNDECLARED_ENTITY)) {
7363
1.31k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7364
1.31k
         "Entity '%s' failed to parse\n", ent->name);
7365
1.31k
            if (ent->content != NULL)
7366
258
                ent->content[0] = 0;
7367
1.31k
  } else if (list != NULL) {
7368
0
      xmlFreeNodeList(list);
7369
0
      list = NULL;
7370
0
  }
7371
7372
        /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7373
10.8k
        was_checked = 0;
7374
10.8k
    }
7375
7376
    /*
7377
     * Now that the entity content has been gathered
7378
     * provide it to the application, this can take different forms based
7379
     * on the parsing modes.
7380
     */
7381
130k
    if (ent->children == NULL) {
7382
  /*
7383
   * Probably running in SAX mode and the callbacks don't
7384
   * build the entity content. So unless we already went
7385
   * though parsing for first checking go though the entity
7386
   * content to generate callbacks associated to the entity
7387
   */
7388
10.0k
  if (was_checked != 0) {
7389
8.32k
      void *user_data;
7390
      /*
7391
       * This is a bit hackish but this seems the best
7392
       * way to make sure both SAX and DOM entity support
7393
       * behaves okay.
7394
       */
7395
8.32k
      if (ctxt->userData == ctxt)
7396
8.32k
    user_data = NULL;
7397
0
      else
7398
0
    user_data = ctxt->userData;
7399
7400
8.32k
      if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7401
0
    ctxt->depth++;
7402
0
    ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7403
0
           ent->content, user_data, NULL);
7404
0
    ctxt->depth--;
7405
8.32k
      } else if (ent->etype ==
7406
8.32k
           XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7407
8.32k
          unsigned long oldsizeentities = ctxt->sizeentities;
7408
7409
8.32k
    ctxt->depth++;
7410
8.32k
    ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7411
8.32k
         ctxt->sax, user_data, ctxt->depth,
7412
8.32k
         ent->URI, ent->ExternalID, NULL);
7413
8.32k
    ctxt->depth--;
7414
7415
                /* Undo the change to sizeentities */
7416
8.32k
                ctxt->sizeentities = oldsizeentities;
7417
8.32k
      } else {
7418
0
    ret = XML_ERR_ENTITY_PE_INTERNAL;
7419
0
    xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7420
0
           "invalid entity type found\n", NULL);
7421
0
      }
7422
8.32k
      if (ret == XML_ERR_ENTITY_LOOP) {
7423
0
    xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7424
0
    return;
7425
0
      }
7426
8.32k
            if (xmlParserEntityCheck(ctxt, 0))
7427
0
                return;
7428
8.32k
  }
7429
10.0k
  if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7430
10.0k
      (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7431
      /*
7432
       * Entity reference callback comes second, it's somewhat
7433
       * superfluous but a compatibility to historical behaviour
7434
       */
7435
130
      ctxt->sax->reference(ctxt->userData, ent->name);
7436
130
  }
7437
10.0k
  return;
7438
10.0k
    }
7439
7440
    /*
7441
     * We also check for amplification if entities aren't substituted.
7442
     * They might be expanded later.
7443
     */
7444
120k
    if ((was_checked != 0) &&
7445
120k
        (xmlParserEntityCheck(ctxt, ent->expandedSize)))
7446
33
        return;
7447
7448
    /*
7449
     * If we didn't get any children for the entity being built
7450
     */
7451
120k
    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7452
120k
  (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7453
  /*
7454
   * Create a node.
7455
   */
7456
1.32k
  ctxt->sax->reference(ctxt->userData, ent->name);
7457
1.32k
  return;
7458
1.32k
    }
7459
7460
118k
    if (ctxt->replaceEntities)  {
7461
  /*
7462
   * There is a problem on the handling of _private for entities
7463
   * (bug 155816): Should we copy the content of the field from
7464
   * the entity (possibly overwriting some value set by the user
7465
   * when a copy is created), should we leave it alone, or should
7466
   * we try to take care of different situations?  The problem
7467
   * is exacerbated by the usage of this field by the xmlReader.
7468
   * To fix this bug, we look at _private on the created node
7469
   * and, if it's NULL, we copy in whatever was in the entity.
7470
   * If it's not NULL we leave it alone.  This is somewhat of a
7471
   * hack - maybe we should have further tests to determine
7472
   * what to do.
7473
   */
7474
118k
  if (ctxt->node != NULL) {
7475
      /*
7476
       * Seems we are generating the DOM content, do
7477
       * a simple tree copy for all references except the first
7478
       * In the first occurrence list contains the replacement.
7479
       */
7480
118k
      if (((list == NULL) && (ent->owner == 0)) ||
7481
118k
    (ctxt->parseMode == XML_PARSE_READER)) {
7482
36.0k
    xmlNodePtr nw = NULL, cur, firstChild = NULL;
7483
7484
    /*
7485
     * when operating on a reader, the entities definitions
7486
     * are always owning the entities subtree.
7487
    if (ctxt->parseMode == XML_PARSE_READER)
7488
        ent->owner = 1;
7489
     */
7490
7491
36.0k
    cur = ent->children;
7492
39.3k
    while (cur != NULL) {
7493
39.3k
        nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7494
39.3k
        if (nw != NULL) {
7495
39.3k
      if (nw->_private == NULL)
7496
39.3k
          nw->_private = cur->_private;
7497
39.3k
      if (firstChild == NULL){
7498
36.0k
          firstChild = nw;
7499
36.0k
      }
7500
39.3k
      nw = xmlAddChild(ctxt->node, nw);
7501
39.3k
        }
7502
39.3k
        if (cur == ent->last) {
7503
      /*
7504
       * needed to detect some strange empty
7505
       * node cases in the reader tests
7506
       */
7507
36.0k
      if ((ctxt->parseMode == XML_PARSE_READER) &&
7508
36.0k
          (nw != NULL) &&
7509
36.0k
          (nw->type == XML_ELEMENT_NODE) &&
7510
36.0k
          (nw->children == NULL))
7511
14
          nw->extra = 1;
7512
7513
36.0k
      break;
7514
36.0k
        }
7515
3.30k
        cur = cur->next;
7516
3.30k
    }
7517
#ifdef LIBXML_LEGACY_ENABLED
7518
    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7519
      xmlAddEntityReference(ent, firstChild, nw);
7520
#endif /* LIBXML_LEGACY_ENABLED */
7521
82.8k
      } else if ((list == NULL) || (ctxt->inputNr > 0)) {
7522
82.8k
    xmlNodePtr nw = NULL, cur, next, last,
7523
82.8k
         firstChild = NULL;
7524
7525
    /*
7526
     * Copy the entity child list and make it the new
7527
     * entity child list. The goal is to make sure any
7528
     * ID or REF referenced will be the one from the
7529
     * document content and not the entity copy.
7530
     */
7531
82.8k
    cur = ent->children;
7532
82.8k
    ent->children = NULL;
7533
82.8k
    last = ent->last;
7534
82.8k
    ent->last = NULL;
7535
674k
    while (cur != NULL) {
7536
674k
        next = cur->next;
7537
674k
        cur->next = NULL;
7538
674k
        cur->parent = NULL;
7539
674k
        nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7540
674k
        if (nw != NULL) {
7541
674k
      if (nw->_private == NULL)
7542
674k
          nw->_private = cur->_private;
7543
674k
      if (firstChild == NULL){
7544
82.8k
          firstChild = cur;
7545
82.8k
      }
7546
674k
      xmlAddChild((xmlNodePtr) ent, nw);
7547
674k
        }
7548
674k
        xmlAddChild(ctxt->node, cur);
7549
674k
        if (cur == last)
7550
82.8k
      break;
7551
592k
        cur = next;
7552
592k
    }
7553
82.8k
    if (ent->owner == 0)
7554
395
        ent->owner = 1;
7555
#ifdef LIBXML_LEGACY_ENABLED
7556
    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7557
      xmlAddEntityReference(ent, firstChild, nw);
7558
#endif /* LIBXML_LEGACY_ENABLED */
7559
82.8k
      } else {
7560
0
    const xmlChar *nbktext;
7561
7562
    /*
7563
     * the name change is to avoid coalescing of the
7564
     * node with a possible previous text one which
7565
     * would make ent->children a dangling pointer
7566
     */
7567
0
    nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7568
0
          -1);
7569
0
    if (ent->children->type == XML_TEXT_NODE)
7570
0
        ent->children->name = nbktext;
7571
0
    if ((ent->last != ent->children) &&
7572
0
        (ent->last->type == XML_TEXT_NODE))
7573
0
        ent->last->name = nbktext;
7574
0
    xmlAddChildList(ctxt->node, ent->children);
7575
0
      }
7576
7577
      /*
7578
       * This is to avoid a nasty side effect, see
7579
       * characters() in SAX.c
7580
       */
7581
118k
      ctxt->nodemem = 0;
7582
118k
      ctxt->nodelen = 0;
7583
118k
      return;
7584
118k
  }
7585
118k
    }
7586
118k
}
7587
7588
/**
7589
 * xmlParseEntityRef:
7590
 * @ctxt:  an XML parser context
7591
 *
7592
 * DEPRECATED: Internal function, don't use.
7593
 *
7594
 * Parse an entitiy reference. Always consumes '&'.
7595
 *
7596
 * [68] EntityRef ::= '&' Name ';'
7597
 *
7598
 * [ WFC: Entity Declared ]
7599
 * In a document without any DTD, a document with only an internal DTD
7600
 * subset which contains no parameter entity references, or a document
7601
 * with "standalone='yes'", the Name given in the entity reference
7602
 * must match that in an entity declaration, except that well-formed
7603
 * documents need not declare any of the following entities: amp, lt,
7604
 * gt, apos, quot.  The declaration of a parameter entity must precede
7605
 * any reference to it.  Similarly, the declaration of a general entity
7606
 * must precede any reference to it which appears in a default value in an
7607
 * attribute-list declaration. Note that if entities are declared in the
7608
 * external subset or in external parameter entities, a non-validating
7609
 * processor is not obligated to read and process their declarations;
7610
 * for such documents, the rule that an entity must be declared is a
7611
 * well-formedness constraint only if standalone='yes'.
7612
 *
7613
 * [ WFC: Parsed Entity ]
7614
 * An entity reference must not contain the name of an unparsed entity
7615
 *
7616
 * Returns the xmlEntityPtr if found, or NULL otherwise.
7617
 */
7618
xmlEntityPtr
7619
2.15M
xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7620
2.15M
    const xmlChar *name;
7621
2.15M
    xmlEntityPtr ent = NULL;
7622
7623
2.15M
    GROW;
7624
2.15M
    if (ctxt->instate == XML_PARSER_EOF)
7625
0
        return(NULL);
7626
7627
2.15M
    if (RAW != '&')
7628
0
        return(NULL);
7629
2.15M
    NEXT;
7630
2.15M
    name = xmlParseName(ctxt);
7631
2.15M
    if (name == NULL) {
7632
3.91k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7633
3.91k
           "xmlParseEntityRef: no name\n");
7634
3.91k
        return(NULL);
7635
3.91k
    }
7636
2.14M
    if (RAW != ';') {
7637
4.57k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7638
4.57k
  return(NULL);
7639
4.57k
    }
7640
2.14M
    NEXT;
7641
7642
    /*
7643
     * Predefined entities override any extra definition
7644
     */
7645
2.14M
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7646
2.08M
        ent = xmlGetPredefinedEntity(name);
7647
2.08M
        if (ent != NULL)
7648
84.9k
            return(ent);
7649
2.08M
    }
7650
7651
    /*
7652
     * Ask first SAX for entity resolution, otherwise try the
7653
     * entities which may have stored in the parser context.
7654
     */
7655
2.05M
    if (ctxt->sax != NULL) {
7656
2.05M
  if (ctxt->sax->getEntity != NULL)
7657
2.05M
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7658
2.05M
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7659
2.05M
      (ctxt->options & XML_PARSE_OLDSAX))
7660
38
      ent = xmlGetPredefinedEntity(name);
7661
2.05M
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7662
2.05M
      (ctxt->userData==ctxt)) {
7663
12.1k
      ent = xmlSAX2GetEntity(ctxt, name);
7664
12.1k
  }
7665
2.05M
    }
7666
2.05M
    if (ctxt->instate == XML_PARSER_EOF)
7667
0
  return(NULL);
7668
    /*
7669
     * [ WFC: Entity Declared ]
7670
     * In a document without any DTD, a document with only an
7671
     * internal DTD subset which contains no parameter entity
7672
     * references, or a document with "standalone='yes'", the
7673
     * Name given in the entity reference must match that in an
7674
     * entity declaration, except that well-formed documents
7675
     * need not declare any of the following entities: amp, lt,
7676
     * gt, apos, quot.
7677
     * The declaration of a parameter entity must precede any
7678
     * reference to it.
7679
     * Similarly, the declaration of a general entity must
7680
     * precede any reference to it which appears in a default
7681
     * value in an attribute-list declaration. Note that if
7682
     * entities are declared in the external subset or in
7683
     * external parameter entities, a non-validating processor
7684
     * is not obligated to read and process their declarations;
7685
     * for such documents, the rule that an entity must be
7686
     * declared is a well-formedness constraint only if
7687
     * standalone='yes'.
7688
     */
7689
2.05M
    if (ent == NULL) {
7690
213k
  if ((ctxt->standalone == 1) ||
7691
213k
      ((ctxt->hasExternalSubset == 0) &&
7692
212k
       (ctxt->hasPErefs == 0))) {
7693
96.3k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7694
96.3k
         "Entity '%s' not defined\n", name);
7695
117k
  } else {
7696
117k
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7697
117k
         "Entity '%s' not defined\n", name);
7698
117k
      if ((ctxt->inSubset == 0) &&
7699
117k
    (ctxt->sax != NULL) &&
7700
117k
    (ctxt->sax->reference != NULL)) {
7701
117k
    ctxt->sax->reference(ctxt->userData, name);
7702
117k
      }
7703
117k
  }
7704
213k
  ctxt->valid = 0;
7705
213k
    }
7706
7707
    /*
7708
     * [ WFC: Parsed Entity ]
7709
     * An entity reference must not contain the name of an
7710
     * unparsed entity
7711
     */
7712
1.84M
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7713
15
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7714
15
     "Entity reference to unparsed entity %s\n", name);
7715
15
    }
7716
7717
    /*
7718
     * [ WFC: No External Entity References ]
7719
     * Attribute values cannot contain direct or indirect
7720
     * entity references to external entities.
7721
     */
7722
1.84M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7723
1.84M
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7724
12
  xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7725
12
       "Attribute references external entity '%s'\n", name);
7726
12
    }
7727
    /*
7728
     * [ WFC: No < in Attribute Values ]
7729
     * The replacement text of any entity referred to directly or
7730
     * indirectly in an attribute value (other than "&lt;") must
7731
     * not contain a <.
7732
     */
7733
1.84M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7734
1.84M
       (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7735
1.07M
  if ((ent->flags & XML_ENT_CHECKED_LT) == 0) {
7736
2.54k
            if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7737
12
                ent->flags |= XML_ENT_CONTAINS_LT;
7738
2.54k
            ent->flags |= XML_ENT_CHECKED_LT;
7739
2.54k
        }
7740
1.07M
        if (ent->flags & XML_ENT_CONTAINS_LT)
7741
23.3k
            xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7742
23.3k
                    "'<' in entity '%s' is not allowed in attributes "
7743
23.3k
                    "values\n", name);
7744
1.07M
    }
7745
7746
    /*
7747
     * Internal check, no parameter entities here ...
7748
     */
7749
770k
    else {
7750
770k
  switch (ent->etype) {
7751
0
      case XML_INTERNAL_PARAMETER_ENTITY:
7752
0
      case XML_EXTERNAL_PARAMETER_ENTITY:
7753
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7754
0
       "Attempt to reference the parameter entity '%s'\n",
7755
0
            name);
7756
0
      break;
7757
770k
      default:
7758
770k
      break;
7759
770k
  }
7760
770k
    }
7761
7762
    /*
7763
     * [ WFC: No Recursion ]
7764
     * A parsed entity must not contain a recursive reference
7765
     * to itself, either directly or indirectly.
7766
     * Done somewhere else
7767
     */
7768
2.05M
    return(ent);
7769
2.05M
}
7770
7771
/**
7772
 * xmlParseStringEntityRef:
7773
 * @ctxt:  an XML parser context
7774
 * @str:  a pointer to an index in the string
7775
 *
7776
 * parse ENTITY references declarations, but this version parses it from
7777
 * a string value.
7778
 *
7779
 * [68] EntityRef ::= '&' Name ';'
7780
 *
7781
 * [ WFC: Entity Declared ]
7782
 * In a document without any DTD, a document with only an internal DTD
7783
 * subset which contains no parameter entity references, or a document
7784
 * with "standalone='yes'", the Name given in the entity reference
7785
 * must match that in an entity declaration, except that well-formed
7786
 * documents need not declare any of the following entities: amp, lt,
7787
 * gt, apos, quot.  The declaration of a parameter entity must precede
7788
 * any reference to it.  Similarly, the declaration of a general entity
7789
 * must precede any reference to it which appears in a default value in an
7790
 * attribute-list declaration. Note that if entities are declared in the
7791
 * external subset or in external parameter entities, a non-validating
7792
 * processor is not obligated to read and process their declarations;
7793
 * for such documents, the rule that an entity must be declared is a
7794
 * well-formedness constraint only if standalone='yes'.
7795
 *
7796
 * [ WFC: Parsed Entity ]
7797
 * An entity reference must not contain the name of an unparsed entity
7798
 *
7799
 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7800
 * is updated to the current location in the string.
7801
 */
7802
static xmlEntityPtr
7803
5.57M
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7804
5.57M
    xmlChar *name;
7805
5.57M
    const xmlChar *ptr;
7806
5.57M
    xmlChar cur;
7807
5.57M
    xmlEntityPtr ent = NULL;
7808
7809
5.57M
    if ((str == NULL) || (*str == NULL))
7810
0
        return(NULL);
7811
5.57M
    ptr = *str;
7812
5.57M
    cur = *ptr;
7813
5.57M
    if (cur != '&')
7814
0
  return(NULL);
7815
7816
5.57M
    ptr++;
7817
5.57M
    name = xmlParseStringName(ctxt, &ptr);
7818
5.57M
    if (name == NULL) {
7819
0
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7820
0
           "xmlParseStringEntityRef: no name\n");
7821
0
  *str = ptr;
7822
0
  return(NULL);
7823
0
    }
7824
5.57M
    if (*ptr != ';') {
7825
0
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7826
0
        xmlFree(name);
7827
0
  *str = ptr;
7828
0
  return(NULL);
7829
0
    }
7830
5.57M
    ptr++;
7831
7832
7833
    /*
7834
     * Predefined entities override any extra definition
7835
     */
7836
5.57M
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7837
5.50M
        ent = xmlGetPredefinedEntity(name);
7838
5.50M
        if (ent != NULL) {
7839
3.87k
            xmlFree(name);
7840
3.87k
            *str = ptr;
7841
3.87k
            return(ent);
7842
3.87k
        }
7843
5.50M
    }
7844
7845
    /*
7846
     * Ask first SAX for entity resolution, otherwise try the
7847
     * entities which may have stored in the parser context.
7848
     */
7849
5.57M
    if (ctxt->sax != NULL) {
7850
5.57M
  if (ctxt->sax->getEntity != NULL)
7851
5.57M
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7852
5.57M
  if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7853
15
      ent = xmlGetPredefinedEntity(name);
7854
5.57M
  if ((ent == NULL) && (ctxt->userData==ctxt)) {
7855
306k
      ent = xmlSAX2GetEntity(ctxt, name);
7856
306k
  }
7857
5.57M
    }
7858
5.57M
    if (ctxt->instate == XML_PARSER_EOF) {
7859
0
  xmlFree(name);
7860
0
  return(NULL);
7861
0
    }
7862
7863
    /*
7864
     * [ WFC: Entity Declared ]
7865
     * In a document without any DTD, a document with only an
7866
     * internal DTD subset which contains no parameter entity
7867
     * references, or a document with "standalone='yes'", the
7868
     * Name given in the entity reference must match that in an
7869
     * entity declaration, except that well-formed documents
7870
     * need not declare any of the following entities: amp, lt,
7871
     * gt, apos, quot.
7872
     * The declaration of a parameter entity must precede any
7873
     * reference to it.
7874
     * Similarly, the declaration of a general entity must
7875
     * precede any reference to it which appears in a default
7876
     * value in an attribute-list declaration. Note that if
7877
     * entities are declared in the external subset or in
7878
     * external parameter entities, a non-validating processor
7879
     * is not obligated to read and process their declarations;
7880
     * for such documents, the rule that an entity must be
7881
     * declared is a well-formedness constraint only if
7882
     * standalone='yes'.
7883
     */
7884
5.57M
    if (ent == NULL) {
7885
306k
  if ((ctxt->standalone == 1) ||
7886
306k
      ((ctxt->hasExternalSubset == 0) &&
7887
306k
       (ctxt->hasPErefs == 0))) {
7888
191k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7889
191k
         "Entity '%s' not defined\n", name);
7890
191k
  } else {
7891
115k
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7892
115k
        "Entity '%s' not defined\n",
7893
115k
        name);
7894
115k
  }
7895
  /* TODO ? check regressions ctxt->valid = 0; */
7896
306k
    }
7897
7898
    /*
7899
     * [ WFC: Parsed Entity ]
7900
     * An entity reference must not contain the name of an
7901
     * unparsed entity
7902
     */
7903
5.26M
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7904
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7905
0
     "Entity reference to unparsed entity %s\n", name);
7906
0
    }
7907
7908
    /*
7909
     * [ WFC: No External Entity References ]
7910
     * Attribute values cannot contain direct or indirect
7911
     * entity references to external entities.
7912
     */
7913
5.26M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7914
5.26M
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7915
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7916
0
   "Attribute references external entity '%s'\n", name);
7917
0
    }
7918
    /*
7919
     * [ WFC: No < in Attribute Values ]
7920
     * The replacement text of any entity referred to directly or
7921
     * indirectly in an attribute value (other than "&lt;") must
7922
     * not contain a <.
7923
     */
7924
5.26M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7925
5.26M
       (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7926
5.26M
  if ((ent->flags & XML_ENT_CHECKED_LT) == 0) {
7927
1.31k
            if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7928
21
                ent->flags |= XML_ENT_CONTAINS_LT;
7929
1.31k
            ent->flags |= XML_ENT_CHECKED_LT;
7930
1.31k
        }
7931
5.26M
        if (ent->flags & XML_ENT_CONTAINS_LT)
7932
24.3k
            xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7933
24.3k
                    "'<' in entity '%s' is not allowed in attributes "
7934
24.3k
                    "values\n", name);
7935
5.26M
    }
7936
7937
    /*
7938
     * Internal check, no parameter entities here ...
7939
     */
7940
3
    else {
7941
3
  switch (ent->etype) {
7942
0
      case XML_INTERNAL_PARAMETER_ENTITY:
7943
0
      case XML_EXTERNAL_PARAMETER_ENTITY:
7944
0
    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7945
0
       "Attempt to reference the parameter entity '%s'\n",
7946
0
          name);
7947
0
      break;
7948
3
      default:
7949
3
      break;
7950
3
  }
7951
3
    }
7952
7953
    /*
7954
     * [ WFC: No Recursion ]
7955
     * A parsed entity must not contain a recursive reference
7956
     * to itself, either directly or indirectly.
7957
     * Done somewhere else
7958
     */
7959
7960
5.57M
    xmlFree(name);
7961
5.57M
    *str = ptr;
7962
5.57M
    return(ent);
7963
5.57M
}
7964
7965
/**
7966
 * xmlParsePEReference:
7967
 * @ctxt:  an XML parser context
7968
 *
7969
 * DEPRECATED: Internal function, don't use.
7970
 *
7971
 * Parse a parameter entity reference. Always consumes '%'.
7972
 *
7973
 * The entity content is handled directly by pushing it's content as
7974
 * a new input stream.
7975
 *
7976
 * [69] PEReference ::= '%' Name ';'
7977
 *
7978
 * [ WFC: No Recursion ]
7979
 * A parsed entity must not contain a recursive
7980
 * reference to itself, either directly or indirectly.
7981
 *
7982
 * [ WFC: Entity Declared ]
7983
 * In a document without any DTD, a document with only an internal DTD
7984
 * subset which contains no parameter entity references, or a document
7985
 * with "standalone='yes'", ...  ... The declaration of a parameter
7986
 * entity must precede any reference to it...
7987
 *
7988
 * [ VC: Entity Declared ]
7989
 * In a document with an external subset or external parameter entities
7990
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7991
 * must precede any reference to it...
7992
 *
7993
 * [ WFC: In DTD ]
7994
 * Parameter-entity references may only appear in the DTD.
7995
 * NOTE: misleading but this is handled.
7996
 */
7997
void
7998
xmlParsePEReference(xmlParserCtxtPtr ctxt)
7999
10.9M
{
8000
10.9M
    const xmlChar *name;
8001
10.9M
    xmlEntityPtr entity = NULL;
8002
10.9M
    xmlParserInputPtr input;
8003
8004
10.9M
    if (RAW != '%')
8005
0
        return;
8006
10.9M
    NEXT;
8007
10.9M
    name = xmlParseName(ctxt);
8008
10.9M
    if (name == NULL) {
8009
629
  xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
8010
629
  return;
8011
629
    }
8012
10.9M
    if (xmlParserDebugEntities)
8013
0
  xmlGenericError(xmlGenericErrorContext,
8014
0
    "PEReference: %s\n", name);
8015
10.9M
    if (RAW != ';') {
8016
155k
  xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
8017
155k
        return;
8018
155k
    }
8019
8020
10.7M
    NEXT;
8021
8022
    /*
8023
     * Request the entity from SAX
8024
     */
8025
10.7M
    if ((ctxt->sax != NULL) &&
8026
10.7M
  (ctxt->sax->getParameterEntity != NULL))
8027
10.7M
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8028
10.7M
    if (ctxt->instate == XML_PARSER_EOF)
8029
0
  return;
8030
10.7M
    if (entity == NULL) {
8031
  /*
8032
   * [ WFC: Entity Declared ]
8033
   * In a document without any DTD, a document with only an
8034
   * internal DTD subset which contains no parameter entity
8035
   * references, or a document with "standalone='yes'", ...
8036
   * ... The declaration of a parameter entity must precede
8037
   * any reference to it...
8038
   */
8039
318k
  if ((ctxt->standalone == 1) ||
8040
318k
      ((ctxt->hasExternalSubset == 0) &&
8041
318k
       (ctxt->hasPErefs == 0))) {
8042
80
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8043
80
            "PEReference: %%%s; not found\n",
8044
80
            name);
8045
318k
  } else {
8046
      /*
8047
       * [ VC: Entity Declared ]
8048
       * In a document with an external subset or external
8049
       * parameter entities with "standalone='no'", ...
8050
       * ... The declaration of a parameter entity must
8051
       * precede any reference to it...
8052
       */
8053
318k
            if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
8054
0
                xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
8055
0
                                 "PEReference: %%%s; not found\n",
8056
0
                                 name, NULL);
8057
0
            } else
8058
318k
                xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8059
318k
                              "PEReference: %%%s; not found\n",
8060
318k
                              name, NULL);
8061
318k
            ctxt->valid = 0;
8062
318k
  }
8063
10.4M
    } else {
8064
  /*
8065
   * Internal checking in case the entity quest barfed
8066
   */
8067
10.4M
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8068
10.4M
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8069
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8070
0
      "Internal: %%%s; is not a parameter entity\n",
8071
0
        name, NULL);
8072
10.4M
  } else {
8073
10.4M
            xmlChar start[4];
8074
10.4M
            xmlCharEncoding enc;
8075
10.4M
            unsigned long parentConsumed;
8076
10.4M
            xmlEntityPtr oldEnt;
8077
8078
10.4M
      if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8079
10.4M
          ((ctxt->options & XML_PARSE_NOENT) == 0) &&
8080
10.4M
    ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
8081
10.4M
    ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
8082
10.4M
    ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
8083
10.4M
    (ctxt->replaceEntities == 0) &&
8084
10.4M
    (ctxt->validate == 0))
8085
3
    return;
8086
8087
10.4M
            if (entity->flags & XML_ENT_EXPANDING) {
8088
9
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
8089
9
                xmlHaltParser(ctxt);
8090
9
                return;
8091
9
            }
8092
8093
            /* Must be computed from old input before pushing new input. */
8094
10.4M
            parentConsumed = ctxt->input->parentConsumed;
8095
10.4M
            oldEnt = ctxt->input->entity;
8096
10.4M
            if ((oldEnt == NULL) ||
8097
10.4M
                ((oldEnt->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8098
10.1M
                 ((oldEnt->flags & XML_ENT_PARSED) == 0))) {
8099
322k
                xmlSaturatedAdd(&parentConsumed, ctxt->input->consumed);
8100
322k
                xmlSaturatedAddSizeT(&parentConsumed,
8101
322k
                                     ctxt->input->cur - ctxt->input->base);
8102
322k
            }
8103
8104
10.4M
      input = xmlNewEntityInputStream(ctxt, entity);
8105
10.4M
      if (xmlPushInput(ctxt, input) < 0) {
8106
39
                xmlFreeInputStream(input);
8107
39
    return;
8108
39
            }
8109
8110
10.4M
            entity->flags |= XML_ENT_EXPANDING;
8111
8112
10.4M
            input->parentConsumed = parentConsumed;
8113
8114
10.4M
      if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
8115
                /*
8116
                 * Get the 4 first bytes and decode the charset
8117
                 * if enc != XML_CHAR_ENCODING_NONE
8118
                 * plug some encoding conversion routines.
8119
                 * Note that, since we may have some non-UTF8
8120
                 * encoding (like UTF16, bug 135229), the 'length'
8121
                 * is not known, but we can calculate based upon
8122
                 * the amount of data in the buffer.
8123
                 */
8124
489
                GROW
8125
489
                if (ctxt->instate == XML_PARSER_EOF)
8126
0
                    return;
8127
489
                if ((ctxt->input->end - ctxt->input->cur)>=4) {
8128
489
                    start[0] = RAW;
8129
489
                    start[1] = NXT(1);
8130
489
                    start[2] = NXT(2);
8131
489
                    start[3] = NXT(3);
8132
489
                    enc = xmlDetectCharEncoding(start, 4);
8133
489
                    if (enc != XML_CHAR_ENCODING_NONE) {
8134
0
                        xmlSwitchEncoding(ctxt, enc);
8135
0
                    }
8136
489
                }
8137
8138
489
                if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8139
489
                    (IS_BLANK_CH(NXT(5)))) {
8140
0
                    xmlParseTextDecl(ctxt);
8141
0
                }
8142
489
            }
8143
10.4M
  }
8144
10.4M
    }
8145
10.7M
    ctxt->hasPErefs = 1;
8146
10.7M
}
8147
8148
/**
8149
 * xmlLoadEntityContent:
8150
 * @ctxt:  an XML parser context
8151
 * @entity: an unloaded system entity
8152
 *
8153
 * Load the original content of the given system entity from the
8154
 * ExternalID/SystemID given. This is to be used for Included in Literal
8155
 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8156
 *
8157
 * Returns 0 in case of success and -1 in case of failure
8158
 */
8159
static int
8160
102
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8161
102
    xmlParserInputPtr input;
8162
102
    xmlBufferPtr buf;
8163
102
    int l, c;
8164
102
    int count = 0;
8165
8166
102
    if ((ctxt == NULL) || (entity == NULL) ||
8167
102
        ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8168
102
   (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8169
102
  (entity->content != NULL)) {
8170
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8171
0
              "xmlLoadEntityContent parameter error");
8172
0
        return(-1);
8173
0
    }
8174
8175
102
    if (xmlParserDebugEntities)
8176
0
  xmlGenericError(xmlGenericErrorContext,
8177
0
    "Reading %s entity content input\n", entity->name);
8178
8179
102
    buf = xmlBufferCreate();
8180
102
    if (buf == NULL) {
8181
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8182
0
              "xmlLoadEntityContent parameter error");
8183
0
        return(-1);
8184
0
    }
8185
102
    xmlBufferSetAllocationScheme(buf, XML_BUFFER_ALLOC_DOUBLEIT);
8186
8187
102
    input = xmlNewEntityInputStream(ctxt, entity);
8188
102
    if (input == NULL) {
8189
12
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8190
12
              "xmlLoadEntityContent input error");
8191
12
  xmlBufferFree(buf);
8192
12
        return(-1);
8193
12
    }
8194
8195
    /*
8196
     * Push the entity as the current input, read char by char
8197
     * saving to the buffer until the end of the entity or an error
8198
     */
8199
90
    if (xmlPushInput(ctxt, input) < 0) {
8200
0
        xmlBufferFree(buf);
8201
0
  xmlFreeInputStream(input);
8202
0
  return(-1);
8203
0
    }
8204
8205
90
    GROW;
8206
90
    c = CUR_CHAR(l);
8207
26.7k
    while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8208
26.7k
           (IS_CHAR(c))) {
8209
26.6k
        xmlBufferAdd(buf, ctxt->input->cur, l);
8210
26.6k
  if (count++ > XML_PARSER_CHUNK_SIZE) {
8211
228
      count = 0;
8212
228
      GROW;
8213
228
            if (ctxt->instate == XML_PARSER_EOF) {
8214
0
                xmlBufferFree(buf);
8215
0
                return(-1);
8216
0
            }
8217
228
  }
8218
26.6k
  NEXTL(l);
8219
26.6k
  c = CUR_CHAR(l);
8220
26.6k
  if (c == 0) {
8221
63
      count = 0;
8222
63
      GROW;
8223
63
            if (ctxt->instate == XML_PARSER_EOF) {
8224
0
                xmlBufferFree(buf);
8225
0
                return(-1);
8226
0
            }
8227
63
      c = CUR_CHAR(l);
8228
63
  }
8229
26.6k
    }
8230
8231
90
    if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8232
33
        xmlSaturatedAdd(&ctxt->sizeentities, ctxt->input->consumed);
8233
33
        xmlPopInput(ctxt);
8234
57
    } else if (!IS_CHAR(c)) {
8235
57
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8236
57
                          "xmlLoadEntityContent: invalid char value %d\n",
8237
57
                    c);
8238
57
  xmlBufferFree(buf);
8239
57
  return(-1);
8240
57
    }
8241
33
    entity->content = buf->content;
8242
33
    entity->length = buf->use;
8243
33
    buf->content = NULL;
8244
33
    xmlBufferFree(buf);
8245
8246
33
    return(0);
8247
90
}
8248
8249
/**
8250
 * xmlParseStringPEReference:
8251
 * @ctxt:  an XML parser context
8252
 * @str:  a pointer to an index in the string
8253
 *
8254
 * parse PEReference declarations
8255
 *
8256
 * [69] PEReference ::= '%' Name ';'
8257
 *
8258
 * [ WFC: No Recursion ]
8259
 * A parsed entity must not contain a recursive
8260
 * reference to itself, either directly or indirectly.
8261
 *
8262
 * [ WFC: Entity Declared ]
8263
 * In a document without any DTD, a document with only an internal DTD
8264
 * subset which contains no parameter entity references, or a document
8265
 * with "standalone='yes'", ...  ... The declaration of a parameter
8266
 * entity must precede any reference to it...
8267
 *
8268
 * [ VC: Entity Declared ]
8269
 * In a document with an external subset or external parameter entities
8270
 * with "standalone='no'", ...  ... The declaration of a parameter entity
8271
 * must precede any reference to it...
8272
 *
8273
 * [ WFC: In DTD ]
8274
 * Parameter-entity references may only appear in the DTD.
8275
 * NOTE: misleading but this is handled.
8276
 *
8277
 * Returns the string of the entity content.
8278
 *         str is updated to the current value of the index
8279
 */
8280
static xmlEntityPtr
8281
154k
xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8282
154k
    const xmlChar *ptr;
8283
154k
    xmlChar cur;
8284
154k
    xmlChar *name;
8285
154k
    xmlEntityPtr entity = NULL;
8286
8287
154k
    if ((str == NULL) || (*str == NULL)) return(NULL);
8288
154k
    ptr = *str;
8289
154k
    cur = *ptr;
8290
154k
    if (cur != '%')
8291
0
        return(NULL);
8292
154k
    ptr++;
8293
154k
    name = xmlParseStringName(ctxt, &ptr);
8294
154k
    if (name == NULL) {
8295
0
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8296
0
           "xmlParseStringPEReference: no name\n");
8297
0
  *str = ptr;
8298
0
  return(NULL);
8299
0
    }
8300
154k
    cur = *ptr;
8301
154k
    if (cur != ';') {
8302
0
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8303
0
  xmlFree(name);
8304
0
  *str = ptr;
8305
0
  return(NULL);
8306
0
    }
8307
154k
    ptr++;
8308
8309
    /*
8310
     * Request the entity from SAX
8311
     */
8312
154k
    if ((ctxt->sax != NULL) &&
8313
154k
  (ctxt->sax->getParameterEntity != NULL))
8314
154k
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8315
154k
    if (ctxt->instate == XML_PARSER_EOF) {
8316
0
  xmlFree(name);
8317
0
  *str = ptr;
8318
0
  return(NULL);
8319
0
    }
8320
154k
    if (entity == NULL) {
8321
  /*
8322
   * [ WFC: Entity Declared ]
8323
   * In a document without any DTD, a document with only an
8324
   * internal DTD subset which contains no parameter entity
8325
   * references, or a document with "standalone='yes'", ...
8326
   * ... The declaration of a parameter entity must precede
8327
   * any reference to it...
8328
   */
8329
3.65k
  if ((ctxt->standalone == 1) ||
8330
3.65k
      ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8331
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8332
0
     "PEReference: %%%s; not found\n", name);
8333
3.65k
  } else {
8334
      /*
8335
       * [ VC: Entity Declared ]
8336
       * In a document with an external subset or external
8337
       * parameter entities with "standalone='no'", ...
8338
       * ... The declaration of a parameter entity must
8339
       * precede any reference to it...
8340
       */
8341
3.65k
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8342
3.65k
        "PEReference: %%%s; not found\n",
8343
3.65k
        name, NULL);
8344
3.65k
      ctxt->valid = 0;
8345
3.65k
  }
8346
151k
    } else {
8347
  /*
8348
   * Internal checking in case the entity quest barfed
8349
   */
8350
151k
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8351
151k
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8352
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8353
0
        "%%%s; is not a parameter entity\n",
8354
0
        name, NULL);
8355
0
  }
8356
151k
    }
8357
154k
    ctxt->hasPErefs = 1;
8358
154k
    xmlFree(name);
8359
154k
    *str = ptr;
8360
154k
    return(entity);
8361
154k
}
8362
8363
/**
8364
 * xmlParseDocTypeDecl:
8365
 * @ctxt:  an XML parser context
8366
 *
8367
 * DEPRECATED: Internal function, don't use.
8368
 *
8369
 * parse a DOCTYPE declaration
8370
 *
8371
 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8372
 *                      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8373
 *
8374
 * [ VC: Root Element Type ]
8375
 * The Name in the document type declaration must match the element
8376
 * type of the root element.
8377
 */
8378
8379
void
8380
14.2k
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8381
14.2k
    const xmlChar *name = NULL;
8382
14.2k
    xmlChar *ExternalID = NULL;
8383
14.2k
    xmlChar *URI = NULL;
8384
8385
    /*
8386
     * We know that '<!DOCTYPE' has been detected.
8387
     */
8388
14.2k
    SKIP(9);
8389
8390
14.2k
    SKIP_BLANKS;
8391
8392
    /*
8393
     * Parse the DOCTYPE name.
8394
     */
8395
14.2k
    name = xmlParseName(ctxt);
8396
14.2k
    if (name == NULL) {
8397
78
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8398
78
           "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8399
78
    }
8400
14.2k
    ctxt->intSubName = name;
8401
8402
14.2k
    SKIP_BLANKS;
8403
8404
    /*
8405
     * Check for SystemID and ExternalID
8406
     */
8407
14.2k
    URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8408
8409
14.2k
    if ((URI != NULL) || (ExternalID != NULL)) {
8410
6.38k
        ctxt->hasExternalSubset = 1;
8411
6.38k
    }
8412
14.2k
    ctxt->extSubURI = URI;
8413
14.2k
    ctxt->extSubSystem = ExternalID;
8414
8415
14.2k
    SKIP_BLANKS;
8416
8417
    /*
8418
     * Create and update the internal subset.
8419
     */
8420
14.2k
    if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8421
14.2k
  (!ctxt->disableSAX))
8422
13.0k
  ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8423
14.2k
    if (ctxt->instate == XML_PARSER_EOF)
8424
0
  return;
8425
8426
    /*
8427
     * Is there any internal subset declarations ?
8428
     * they are handled separately in xmlParseInternalSubset()
8429
     */
8430
14.2k
    if (RAW == '[')
8431
10.8k
  return;
8432
8433
    /*
8434
     * We should be at the end of the DOCTYPE declaration.
8435
     */
8436
3.31k
    if (RAW != '>') {
8437
943
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8438
943
    }
8439
3.31k
    NEXT;
8440
3.31k
}
8441
8442
/**
8443
 * xmlParseInternalSubset:
8444
 * @ctxt:  an XML parser context
8445
 *
8446
 * parse the internal subset declaration
8447
 *
8448
 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8449
 */
8450
8451
static void
8452
10.8k
xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8453
    /*
8454
     * Is there any DTD definition ?
8455
     */
8456
10.8k
    if (RAW == '[') {
8457
10.8k
        int baseInputNr = ctxt->inputNr;
8458
10.8k
        ctxt->instate = XML_PARSER_DTD;
8459
10.8k
        NEXT;
8460
  /*
8461
   * Parse the succession of Markup declarations and
8462
   * PEReferences.
8463
   * Subsequence (markupdecl | PEReference | S)*
8464
   */
8465
10.8k
  SKIP_BLANKS;
8466
10.3M
  while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) &&
8467
10.3M
               (ctxt->instate != XML_PARSER_EOF)) {
8468
8469
            /*
8470
             * Conditional sections are allowed from external entities included
8471
             * by PE References in the internal subset.
8472
             */
8473
10.3M
            if ((ctxt->inputNr > 1) && (ctxt->input->filename != NULL) &&
8474
10.3M
                (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8475
0
                xmlParseConditionalSections(ctxt);
8476
10.3M
            } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
8477
10.3M
          xmlParseMarkupDecl(ctxt);
8478
10.3M
            } else if (RAW == '%') {
8479
970
          xmlParsePEReference(ctxt);
8480
2.85k
            } else {
8481
2.85k
    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8482
2.85k
                        "xmlParseInternalSubset: error detected in"
8483
2.85k
                        " Markup declaration\n");
8484
2.85k
                xmlHaltParser(ctxt);
8485
2.85k
                return;
8486
2.85k
            }
8487
10.3M
      SKIP_BLANKS;
8488
10.3M
  }
8489
8.04k
  if (RAW == ']') {
8490
7.26k
      NEXT;
8491
7.26k
      SKIP_BLANKS;
8492
7.26k
  }
8493
8.04k
    }
8494
8495
    /*
8496
     * We should be at the end of the DOCTYPE declaration.
8497
     */
8498
8.04k
    if (RAW != '>') {
8499
819
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8500
819
  return;
8501
819
    }
8502
7.22k
    NEXT;
8503
7.22k
}
8504
8505
#ifdef LIBXML_SAX1_ENABLED
8506
/**
8507
 * xmlParseAttribute:
8508
 * @ctxt:  an XML parser context
8509
 * @value:  a xmlChar ** used to store the value of the attribute
8510
 *
8511
 * DEPRECATED: Internal function, don't use.
8512
 *
8513
 * parse an attribute
8514
 *
8515
 * [41] Attribute ::= Name Eq AttValue
8516
 *
8517
 * [ WFC: No External Entity References ]
8518
 * Attribute values cannot contain direct or indirect entity references
8519
 * to external entities.
8520
 *
8521
 * [ WFC: No < in Attribute Values ]
8522
 * The replacement text of any entity referred to directly or indirectly in
8523
 * an attribute value (other than "&lt;") must not contain a <.
8524
 *
8525
 * [ VC: Attribute Value Type ]
8526
 * The attribute must have been declared; the value must be of the type
8527
 * declared for it.
8528
 *
8529
 * [25] Eq ::= S? '=' S?
8530
 *
8531
 * With namespace:
8532
 *
8533
 * [NS 11] Attribute ::= QName Eq AttValue
8534
 *
8535
 * Also the case QName == xmlns:??? is handled independently as a namespace
8536
 * definition.
8537
 *
8538
 * Returns the attribute name, and the value in *value.
8539
 */
8540
8541
const xmlChar *
8542
86.7k
xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8543
86.7k
    const xmlChar *name;
8544
86.7k
    xmlChar *val;
8545
8546
86.7k
    *value = NULL;
8547
86.7k
    GROW;
8548
86.7k
    name = xmlParseName(ctxt);
8549
86.7k
    if (name == NULL) {
8550
3.55k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8551
3.55k
                 "error parsing attribute name\n");
8552
3.55k
        return(NULL);
8553
3.55k
    }
8554
8555
    /*
8556
     * read the value
8557
     */
8558
83.2k
    SKIP_BLANKS;
8559
83.2k
    if (RAW == '=') {
8560
81.3k
        NEXT;
8561
81.3k
  SKIP_BLANKS;
8562
81.3k
  val = xmlParseAttValue(ctxt);
8563
81.3k
  ctxt->instate = XML_PARSER_CONTENT;
8564
81.3k
    } else {
8565
1.87k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8566
1.87k
         "Specification mandates value for attribute %s\n", name);
8567
1.87k
  return(name);
8568
1.87k
    }
8569
8570
    /*
8571
     * Check that xml:lang conforms to the specification
8572
     * No more registered as an error, just generate a warning now
8573
     * since this was deprecated in XML second edition
8574
     */
8575
81.3k
    if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8576
0
  if (!xmlCheckLanguageID(val)) {
8577
0
      xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8578
0
              "Malformed value for xml:lang : %s\n",
8579
0
        val, NULL);
8580
0
  }
8581
0
    }
8582
8583
    /*
8584
     * Check that xml:space conforms to the specification
8585
     */
8586
81.3k
    if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8587
15
  if (xmlStrEqual(val, BAD_CAST "default"))
8588
0
      *(ctxt->space) = 0;
8589
15
  else if (xmlStrEqual(val, BAD_CAST "preserve"))
8590
4
      *(ctxt->space) = 1;
8591
11
  else {
8592
11
    xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8593
11
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8594
11
                                 val, NULL);
8595
11
  }
8596
15
    }
8597
8598
81.3k
    *value = val;
8599
81.3k
    return(name);
8600
83.2k
}
8601
8602
/**
8603
 * xmlParseStartTag:
8604
 * @ctxt:  an XML parser context
8605
 *
8606
 * DEPRECATED: Internal function, don't use.
8607
 *
8608
 * Parse a start tag. Always consumes '<'.
8609
 *
8610
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8611
 *
8612
 * [ WFC: Unique Att Spec ]
8613
 * No attribute name may appear more than once in the same start-tag or
8614
 * empty-element tag.
8615
 *
8616
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8617
 *
8618
 * [ WFC: Unique Att Spec ]
8619
 * No attribute name may appear more than once in the same start-tag or
8620
 * empty-element tag.
8621
 *
8622
 * With namespace:
8623
 *
8624
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8625
 *
8626
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8627
 *
8628
 * Returns the element name parsed
8629
 */
8630
8631
const xmlChar *
8632
188k
xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8633
188k
    const xmlChar *name;
8634
188k
    const xmlChar *attname;
8635
188k
    xmlChar *attvalue;
8636
188k
    const xmlChar **atts = ctxt->atts;
8637
188k
    int nbatts = 0;
8638
188k
    int maxatts = ctxt->maxatts;
8639
188k
    int i;
8640
8641
188k
    if (RAW != '<') return(NULL);
8642
188k
    NEXT1;
8643
8644
188k
    name = xmlParseName(ctxt);
8645
188k
    if (name == NULL) {
8646
3.02k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8647
3.02k
       "xmlParseStartTag: invalid element name\n");
8648
3.02k
        return(NULL);
8649
3.02k
    }
8650
8651
    /*
8652
     * Now parse the attributes, it ends up with the ending
8653
     *
8654
     * (S Attribute)* S?
8655
     */
8656
185k
    SKIP_BLANKS;
8657
185k
    GROW;
8658
8659
200k
    while (((RAW != '>') &&
8660
200k
     ((RAW != '/') || (NXT(1) != '>')) &&
8661
200k
     (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8662
86.7k
  attname = xmlParseAttribute(ctxt, &attvalue);
8663
86.7k
        if (attname == NULL) {
8664
3.55k
      xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8665
3.55k
         "xmlParseStartTag: problem parsing attributes\n");
8666
3.55k
      break;
8667
3.55k
  }
8668
83.2k
        if (attvalue != NULL) {
8669
      /*
8670
       * [ WFC: Unique Att Spec ]
8671
       * No attribute name may appear more than once in the same
8672
       * start-tag or empty-element tag.
8673
       */
8674
93.5k
      for (i = 0; i < nbatts;i += 2) {
8675
12.3k
          if (xmlStrEqual(atts[i], attname)) {
8676
28
        xmlErrAttributeDup(ctxt, NULL, attname);
8677
28
        xmlFree(attvalue);
8678
28
        goto failed;
8679
28
    }
8680
12.3k
      }
8681
      /*
8682
       * Add the pair to atts
8683
       */
8684
81.1k
      if (atts == NULL) {
8685
1.69k
          maxatts = 22; /* allow for 10 attrs by default */
8686
1.69k
          atts = (const xmlChar **)
8687
1.69k
           xmlMalloc(maxatts * sizeof(xmlChar *));
8688
1.69k
    if (atts == NULL) {
8689
0
        xmlErrMemory(ctxt, NULL);
8690
0
        if (attvalue != NULL)
8691
0
      xmlFree(attvalue);
8692
0
        goto failed;
8693
0
    }
8694
1.69k
    ctxt->atts = atts;
8695
1.69k
    ctxt->maxatts = maxatts;
8696
79.4k
      } else if (nbatts + 4 > maxatts) {
8697
1
          const xmlChar **n;
8698
8699
1
          maxatts *= 2;
8700
1
          n = (const xmlChar **) xmlRealloc((void *) atts,
8701
1
               maxatts * sizeof(const xmlChar *));
8702
1
    if (n == NULL) {
8703
0
        xmlErrMemory(ctxt, NULL);
8704
0
        if (attvalue != NULL)
8705
0
      xmlFree(attvalue);
8706
0
        goto failed;
8707
0
    }
8708
1
    atts = n;
8709
1
    ctxt->atts = atts;
8710
1
    ctxt->maxatts = maxatts;
8711
1
      }
8712
81.1k
      atts[nbatts++] = attname;
8713
81.1k
      atts[nbatts++] = attvalue;
8714
81.1k
      atts[nbatts] = NULL;
8715
81.1k
      atts[nbatts + 1] = NULL;
8716
81.1k
  } else {
8717
2.05k
      if (attvalue != NULL)
8718
0
    xmlFree(attvalue);
8719
2.05k
  }
8720
8721
83.2k
failed:
8722
8723
83.2k
  GROW
8724
83.2k
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8725
68.4k
      break;
8726
14.8k
  if (SKIP_BLANKS == 0) {
8727
3.52k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8728
3.52k
         "attributes construct error\n");
8729
3.52k
  }
8730
14.8k
  SHRINK;
8731
14.8k
        GROW;
8732
14.8k
    }
8733
8734
    /*
8735
     * SAX: Start of Element !
8736
     */
8737
185k
    if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8738
185k
  (!ctxt->disableSAX)) {
8739
174k
  if (nbatts > 0)
8740
66.2k
      ctxt->sax->startElement(ctxt->userData, name, atts);
8741
108k
  else
8742
108k
      ctxt->sax->startElement(ctxt->userData, name, NULL);
8743
174k
    }
8744
8745
185k
    if (atts != NULL) {
8746
        /* Free only the content strings */
8747
260k
        for (i = 1;i < nbatts;i+=2)
8748
81.1k
      if (atts[i] != NULL)
8749
81.1k
         xmlFree((xmlChar *) atts[i]);
8750
179k
    }
8751
185k
    return(name);
8752
185k
}
8753
8754
/**
8755
 * xmlParseEndTag1:
8756
 * @ctxt:  an XML parser context
8757
 * @line:  line of the start tag
8758
 * @nsNr:  number of namespaces on the start tag
8759
 *
8760
 * Parse an end tag. Always consumes '</'.
8761
 *
8762
 * [42] ETag ::= '</' Name S? '>'
8763
 *
8764
 * With namespace
8765
 *
8766
 * [NS 9] ETag ::= '</' QName S? '>'
8767
 */
8768
8769
static void
8770
171k
xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8771
171k
    const xmlChar *name;
8772
8773
171k
    GROW;
8774
171k
    if ((RAW != '<') || (NXT(1) != '/')) {
8775
0
  xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8776
0
           "xmlParseEndTag: '</' not found\n");
8777
0
  return;
8778
0
    }
8779
171k
    SKIP(2);
8780
8781
171k
    name = xmlParseNameAndCompare(ctxt,ctxt->name);
8782
8783
    /*
8784
     * We should definitely be at the ending "S? '>'" part
8785
     */
8786
171k
    GROW;
8787
171k
    SKIP_BLANKS;
8788
171k
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8789
735
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8790
735
    } else
8791
170k
  NEXT1;
8792
8793
    /*
8794
     * [ WFC: Element Type Match ]
8795
     * The Name in an element's end-tag must match the element type in the
8796
     * start-tag.
8797
     *
8798
     */
8799
171k
    if (name != (xmlChar*)1) {
8800
3.64k
        if (name == NULL) name = BAD_CAST "unparsable";
8801
3.64k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8802
3.64k
         "Opening and ending tag mismatch: %s line %d and %s\n",
8803
3.64k
                    ctxt->name, line, name);
8804
3.64k
    }
8805
8806
    /*
8807
     * SAX: End of Tag
8808
     */
8809
171k
    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8810
171k
  (!ctxt->disableSAX))
8811
161k
        ctxt->sax->endElement(ctxt->userData, ctxt->name);
8812
8813
171k
    namePop(ctxt);
8814
171k
    spacePop(ctxt);
8815
171k
    return;
8816
171k
}
8817
8818
/**
8819
 * xmlParseEndTag:
8820
 * @ctxt:  an XML parser context
8821
 *
8822
 * DEPRECATED: Internal function, don't use.
8823
 *
8824
 * parse an end of tag
8825
 *
8826
 * [42] ETag ::= '</' Name S? '>'
8827
 *
8828
 * With namespace
8829
 *
8830
 * [NS 9] ETag ::= '</' QName S? '>'
8831
 */
8832
8833
void
8834
0
xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8835
0
    xmlParseEndTag1(ctxt, 0);
8836
0
}
8837
#endif /* LIBXML_SAX1_ENABLED */
8838
8839
/************************************************************************
8840
 *                  *
8841
 *          SAX 2 specific operations       *
8842
 *                  *
8843
 ************************************************************************/
8844
8845
/*
8846
 * xmlGetNamespace:
8847
 * @ctxt:  an XML parser context
8848
 * @prefix:  the prefix to lookup
8849
 *
8850
 * Lookup the namespace name for the @prefix (which ca be NULL)
8851
 * The prefix must come from the @ctxt->dict dictionary
8852
 *
8853
 * Returns the namespace name or NULL if not bound
8854
 */
8855
static const xmlChar *
8856
1.28M
xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8857
1.28M
    int i;
8858
8859
1.28M
    if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8860
1.24M
    for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8861
24.6k
        if (ctxt->nsTab[i] == prefix) {
8862
16.1k
      if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8863
25
          return(NULL);
8864
16.1k
      return(ctxt->nsTab[i + 1]);
8865
16.1k
  }
8866
1.21M
    return(NULL);
8867
1.23M
}
8868
8869
/**
8870
 * xmlParseQName:
8871
 * @ctxt:  an XML parser context
8872
 * @prefix:  pointer to store the prefix part
8873
 *
8874
 * parse an XML Namespace QName
8875
 *
8876
 * [6]  QName  ::= (Prefix ':')? LocalPart
8877
 * [7]  Prefix  ::= NCName
8878
 * [8]  LocalPart  ::= NCName
8879
 *
8880
 * Returns the Name parsed or NULL
8881
 */
8882
8883
static const xmlChar *
8884
1.93M
xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8885
1.93M
    const xmlChar *l, *p;
8886
8887
1.93M
    GROW;
8888
8889
1.93M
    l = xmlParseNCName(ctxt);
8890
1.93M
    if (l == NULL) {
8891
12.1k
        if (CUR == ':') {
8892
100
      l = xmlParseName(ctxt);
8893
100
      if (l != NULL) {
8894
100
          xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8895
100
             "Failed to parse QName '%s'\n", l, NULL, NULL);
8896
100
    *prefix = NULL;
8897
100
    return(l);
8898
100
      }
8899
100
  }
8900
12.0k
        return(NULL);
8901
12.1k
    }
8902
1.91M
    if (CUR == ':') {
8903
17.7k
        NEXT;
8904
17.7k
  p = l;
8905
17.7k
  l = xmlParseNCName(ctxt);
8906
17.7k
  if (l == NULL) {
8907
327
      xmlChar *tmp;
8908
8909
327
            if (ctxt->instate == XML_PARSER_EOF)
8910
0
                return(NULL);
8911
327
            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8912
327
               "Failed to parse QName '%s:'\n", p, NULL, NULL);
8913
327
      l = xmlParseNmtoken(ctxt);
8914
327
      if (l == NULL) {
8915
198
                if (ctxt->instate == XML_PARSER_EOF)
8916
0
                    return(NULL);
8917
198
    tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8918
198
            } else {
8919
129
    tmp = xmlBuildQName(l, p, NULL, 0);
8920
129
    xmlFree((char *)l);
8921
129
      }
8922
327
      p = xmlDictLookup(ctxt->dict, tmp, -1);
8923
327
      if (tmp != NULL) xmlFree(tmp);
8924
327
      *prefix = NULL;
8925
327
      return(p);
8926
327
  }
8927
17.3k
  if (CUR == ':') {
8928
103
      xmlChar *tmp;
8929
8930
103
            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8931
103
               "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8932
103
      NEXT;
8933
103
      tmp = (xmlChar *) xmlParseName(ctxt);
8934
103
      if (tmp != NULL) {
8935
77
          tmp = xmlBuildQName(tmp, l, NULL, 0);
8936
77
    l = xmlDictLookup(ctxt->dict, tmp, -1);
8937
77
    if (tmp != NULL) xmlFree(tmp);
8938
77
    *prefix = p;
8939
77
    return(l);
8940
77
      }
8941
26
            if (ctxt->instate == XML_PARSER_EOF)
8942
0
                return(NULL);
8943
26
      tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8944
26
      l = xmlDictLookup(ctxt->dict, tmp, -1);
8945
26
      if (tmp != NULL) xmlFree(tmp);
8946
26
      *prefix = p;
8947
26
      return(l);
8948
26
  }
8949
17.2k
  *prefix = p;
8950
17.2k
    } else
8951
1.90M
        *prefix = NULL;
8952
1.91M
    return(l);
8953
1.91M
}
8954
8955
/**
8956
 * xmlParseQNameAndCompare:
8957
 * @ctxt:  an XML parser context
8958
 * @name:  the localname
8959
 * @prefix:  the prefix, if any.
8960
 *
8961
 * parse an XML name and compares for match
8962
 * (specialized for endtag parsing)
8963
 *
8964
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8965
 * and the name for mismatch
8966
 */
8967
8968
static const xmlChar *
8969
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8970
5.85k
                        xmlChar const *prefix) {
8971
5.85k
    const xmlChar *cmp;
8972
5.85k
    const xmlChar *in;
8973
5.85k
    const xmlChar *ret;
8974
5.85k
    const xmlChar *prefix2;
8975
8976
5.85k
    if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8977
8978
5.85k
    GROW;
8979
5.85k
    in = ctxt->input->cur;
8980
8981
5.85k
    cmp = prefix;
8982
18.2k
    while (*in != 0 && *in == *cmp) {
8983
12.4k
  ++in;
8984
12.4k
  ++cmp;
8985
12.4k
    }
8986
5.85k
    if ((*cmp == 0) && (*in == ':')) {
8987
5.56k
        in++;
8988
5.56k
  cmp = name;
8989
43.8k
  while (*in != 0 && *in == *cmp) {
8990
38.2k
      ++in;
8991
38.2k
      ++cmp;
8992
38.2k
  }
8993
5.56k
  if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8994
      /* success */
8995
4.94k
            ctxt->input->col += in - ctxt->input->cur;
8996
4.94k
      ctxt->input->cur = in;
8997
4.94k
      return((const xmlChar*) 1);
8998
4.94k
  }
8999
5.56k
    }
9000
    /*
9001
     * all strings coms from the dictionary, equality can be done directly
9002
     */
9003
907
    ret = xmlParseQName (ctxt, &prefix2);
9004
907
    if ((ret == name) && (prefix == prefix2))
9005
13
  return((const xmlChar*) 1);
9006
894
    return ret;
9007
907
}
9008
9009
/**
9010
 * xmlParseAttValueInternal:
9011
 * @ctxt:  an XML parser context
9012
 * @len:  attribute len result
9013
 * @alloc:  whether the attribute was reallocated as a new string
9014
 * @normalize:  if 1 then further non-CDATA normalization must be done
9015
 *
9016
 * parse a value for an attribute.
9017
 * NOTE: if no normalization is needed, the routine will return pointers
9018
 *       directly from the data buffer.
9019
 *
9020
 * 3.3.3 Attribute-Value Normalization:
9021
 * Before the value of an attribute is passed to the application or
9022
 * checked for validity, the XML processor must normalize it as follows:
9023
 * - a character reference is processed by appending the referenced
9024
 *   character to the attribute value
9025
 * - an entity reference is processed by recursively processing the
9026
 *   replacement text of the entity
9027
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
9028
 *   appending #x20 to the normalized value, except that only a single
9029
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
9030
 *   parsed entity or the literal entity value of an internal parsed entity
9031
 * - other characters are processed by appending them to the normalized value
9032
 * If the declared value is not CDATA, then the XML processor must further
9033
 * process the normalized attribute value by discarding any leading and
9034
 * trailing space (#x20) characters, and by replacing sequences of space
9035
 * (#x20) characters by a single space (#x20) character.
9036
 * All attributes for which no declaration has been read should be treated
9037
 * by a non-validating parser as if declared CDATA.
9038
 *
9039
 * Returns the AttValue parsed or NULL. The value has to be freed by the
9040
 *     caller if it was copied, this can be detected by val[*len] == 0.
9041
 */
9042
9043
#define GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) \
9044
61
    const xmlChar *oldbase = ctxt->input->base;\
9045
61
    GROW;\
9046
61
    if (ctxt->instate == XML_PARSER_EOF)\
9047
61
        return(NULL);\
9048
61
    if (oldbase != ctxt->input->base) {\
9049
0
        ptrdiff_t delta = ctxt->input->base - oldbase;\
9050
0
        start = start + delta;\
9051
0
        in = in + delta;\
9052
0
    }\
9053
61
    end = ctxt->input->end;
9054
9055
static xmlChar *
9056
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
9057
                         int normalize)
9058
822k
{
9059
822k
    xmlChar limit = 0;
9060
822k
    const xmlChar *in = NULL, *start, *end, *last;
9061
822k
    xmlChar *ret = NULL;
9062
822k
    int line, col;
9063
822k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9064
5.97k
                    XML_MAX_HUGE_LENGTH :
9065
822k
                    XML_MAX_TEXT_LENGTH;
9066
9067
822k
    GROW;
9068
822k
    in = (xmlChar *) CUR_PTR;
9069
822k
    line = ctxt->input->line;
9070
822k
    col = ctxt->input->col;
9071
822k
    if (*in != '"' && *in != '\'') {
9072
625
        xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
9073
625
        return (NULL);
9074
625
    }
9075
821k
    ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
9076
9077
    /*
9078
     * try to handle in this routine the most common case where no
9079
     * allocation of a new string is required and where content is
9080
     * pure ASCII.
9081
     */
9082
821k
    limit = *in++;
9083
821k
    col++;
9084
821k
    end = ctxt->input->end;
9085
821k
    start = in;
9086
821k
    if (in >= end) {
9087
8
        GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9088
8
    }
9089
821k
    if (normalize) {
9090
        /*
9091
   * Skip any leading spaces
9092
   */
9093
405k
  while ((in < end) && (*in != limit) &&
9094
405k
         ((*in == 0x20) || (*in == 0x9) ||
9095
405k
          (*in == 0xA) || (*in == 0xD))) {
9096
3.67k
      if (*in == 0xA) {
9097
1.57k
          line++; col = 1;
9098
2.10k
      } else {
9099
2.10k
          col++;
9100
2.10k
      }
9101
3.67k
      in++;
9102
3.67k
      start = in;
9103
3.67k
      if (in >= end) {
9104
0
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9105
0
                if ((in - start) > maxLength) {
9106
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9107
0
                                   "AttValue length too long\n");
9108
0
                    return(NULL);
9109
0
                }
9110
0
      }
9111
3.67k
  }
9112
4.18M
  while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9113
4.18M
         (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9114
3.78M
      col++;
9115
3.78M
      if ((*in++ == 0x20) && (*in == 0x20)) break;
9116
3.78M
      if (in >= end) {
9117
3
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9118
3
                if ((in - start) > maxLength) {
9119
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9120
0
                                   "AttValue length too long\n");
9121
0
                    return(NULL);
9122
0
                }
9123
3
      }
9124
3.78M
  }
9125
401k
  last = in;
9126
  /*
9127
   * skip the trailing blanks
9128
   */
9129
401k
  while ((last[-1] == 0x20) && (last > start)) last--;
9130
403k
  while ((in < end) && (*in != limit) &&
9131
403k
         ((*in == 0x20) || (*in == 0x9) ||
9132
3.84k
          (*in == 0xA) || (*in == 0xD))) {
9133
1.82k
      if (*in == 0xA) {
9134
1.06k
          line++, col = 1;
9135
1.06k
      } else {
9136
756
          col++;
9137
756
      }
9138
1.82k
      in++;
9139
1.82k
      if (in >= end) {
9140
2
    const xmlChar *oldbase = ctxt->input->base;
9141
2
    GROW;
9142
2
                if (ctxt->instate == XML_PARSER_EOF)
9143
0
                    return(NULL);
9144
2
    if (oldbase != ctxt->input->base) {
9145
0
        ptrdiff_t delta = ctxt->input->base - oldbase;
9146
0
        start = start + delta;
9147
0
        in = in + delta;
9148
0
        last = last + delta;
9149
0
    }
9150
2
    end = ctxt->input->end;
9151
2
                if ((in - start) > maxLength) {
9152
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9153
0
                                   "AttValue length too long\n");
9154
0
                    return(NULL);
9155
0
                }
9156
2
      }
9157
1.82k
  }
9158
401k
        if ((in - start) > maxLength) {
9159
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9160
0
                           "AttValue length too long\n");
9161
0
            return(NULL);
9162
0
        }
9163
401k
  if (*in != limit) goto need_complex;
9164
419k
    } else {
9165
5.36M
  while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9166
5.36M
         (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9167
4.94M
      in++;
9168
4.94M
      col++;
9169
4.94M
      if (in >= end) {
9170
50
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9171
50
                if ((in - start) > maxLength) {
9172
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9173
0
                                   "AttValue length too long\n");
9174
0
                    return(NULL);
9175
0
                }
9176
50
      }
9177
4.94M
  }
9178
419k
  last = in;
9179
419k
        if ((in - start) > maxLength) {
9180
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9181
0
                           "AttValue length too long\n");
9182
0
            return(NULL);
9183
0
        }
9184
419k
  if (*in != limit) goto need_complex;
9185
419k
    }
9186
789k
    in++;
9187
789k
    col++;
9188
789k
    if (len != NULL) {
9189
660k
        if (alloc) *alloc = 0;
9190
660k
        *len = last - start;
9191
660k
        ret = (xmlChar *) start;
9192
660k
    } else {
9193
129k
        if (alloc) *alloc = 1;
9194
129k
        ret = xmlStrndup(start, last - start);
9195
129k
    }
9196
789k
    CUR_PTR = in;
9197
789k
    ctxt->input->line = line;
9198
789k
    ctxt->input->col = col;
9199
789k
    return ret;
9200
31.5k
need_complex:
9201
31.5k
    if (alloc) *alloc = 1;
9202
31.5k
    return xmlParseAttValueComplex(ctxt, len, normalize);
9203
821k
}
9204
9205
/**
9206
 * xmlParseAttribute2:
9207
 * @ctxt:  an XML parser context
9208
 * @pref:  the element prefix
9209
 * @elem:  the element name
9210
 * @prefix:  a xmlChar ** used to store the value of the attribute prefix
9211
 * @value:  a xmlChar ** used to store the value of the attribute
9212
 * @len:  an int * to save the length of the attribute
9213
 * @alloc:  an int * to indicate if the attribute was allocated
9214
 *
9215
 * parse an attribute in the new SAX2 framework.
9216
 *
9217
 * Returns the attribute name, and the value in *value, .
9218
 */
9219
9220
static const xmlChar *
9221
xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9222
                   const xmlChar * pref, const xmlChar * elem,
9223
                   const xmlChar ** prefix, xmlChar ** value,
9224
                   int *len, int *alloc)
9225
690k
{
9226
690k
    const xmlChar *name;
9227
690k
    xmlChar *val, *internal_val = NULL;
9228
690k
    int normalize = 0;
9229
9230
690k
    *value = NULL;
9231
690k
    GROW;
9232
690k
    name = xmlParseQName(ctxt, prefix);
9233
690k
    if (name == NULL) {
9234
2.34k
        xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9235
2.34k
                       "error parsing attribute name\n");
9236
2.34k
        return (NULL);
9237
2.34k
    }
9238
9239
    /*
9240
     * get the type if needed
9241
     */
9242
688k
    if (ctxt->attsSpecial != NULL) {
9243
524k
        int type;
9244
9245
524k
        type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
9246
524k
                                                 pref, elem, *prefix, name);
9247
524k
        if (type != 0)
9248
401k
            normalize = 1;
9249
524k
    }
9250
9251
    /*
9252
     * read the value
9253
     */
9254
688k
    SKIP_BLANKS;
9255
688k
    if (RAW == '=') {
9256
687k
        NEXT;
9257
687k
        SKIP_BLANKS;
9258
687k
        val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9259
687k
        if (val == NULL)
9260
239
            return (NULL);
9261
686k
  if (normalize) {
9262
      /*
9263
       * Sometimes a second normalisation pass for spaces is needed
9264
       * but that only happens if charrefs or entities references
9265
       * have been used in the attribute value, i.e. the attribute
9266
       * value have been extracted in an allocated string already.
9267
       */
9268
401k
      if (*alloc) {
9269
2.02k
          const xmlChar *val2;
9270
9271
2.02k
          val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9272
2.02k
    if ((val2 != NULL) && (val2 != val)) {
9273
385
        xmlFree(val);
9274
385
        val = (xmlChar *) val2;
9275
385
    }
9276
2.02k
      }
9277
401k
  }
9278
686k
        ctxt->instate = XML_PARSER_CONTENT;
9279
686k
    } else {
9280
1.30k
        xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9281
1.30k
                          "Specification mandates value for attribute %s\n",
9282
1.30k
                          name);
9283
1.30k
        return (name);
9284
1.30k
    }
9285
9286
686k
    if (*prefix == ctxt->str_xml) {
9287
        /*
9288
         * Check that xml:lang conforms to the specification
9289
         * No more registered as an error, just generate a warning now
9290
         * since this was deprecated in XML second edition
9291
         */
9292
482
        if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9293
3
            internal_val = xmlStrndup(val, *len);
9294
3
            if (!xmlCheckLanguageID(internal_val)) {
9295
0
                xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9296
0
                              "Malformed value for xml:lang : %s\n",
9297
0
                              internal_val, NULL);
9298
0
            }
9299
3
        }
9300
9301
        /*
9302
         * Check that xml:space conforms to the specification
9303
         */
9304
482
        if (xmlStrEqual(name, BAD_CAST "space")) {
9305
30
            internal_val = xmlStrndup(val, *len);
9306
30
            if (xmlStrEqual(internal_val, BAD_CAST "default"))
9307
0
                *(ctxt->space) = 0;
9308
30
            else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9309
13
                *(ctxt->space) = 1;
9310
17
            else {
9311
17
                xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9312
17
                              "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9313
17
                              internal_val, NULL);
9314
17
            }
9315
30
        }
9316
482
        if (internal_val) {
9317
33
            xmlFree(internal_val);
9318
33
        }
9319
482
    }
9320
9321
686k
    *value = val;
9322
686k
    return (name);
9323
688k
}
9324
/**
9325
 * xmlParseStartTag2:
9326
 * @ctxt:  an XML parser context
9327
 *
9328
 * Parse a start tag. Always consumes '<'.
9329
 *
9330
 * This routine is called when running SAX2 parsing
9331
 *
9332
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9333
 *
9334
 * [ WFC: Unique Att Spec ]
9335
 * No attribute name may appear more than once in the same start-tag or
9336
 * empty-element tag.
9337
 *
9338
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9339
 *
9340
 * [ WFC: Unique Att Spec ]
9341
 * No attribute name may appear more than once in the same start-tag or
9342
 * empty-element tag.
9343
 *
9344
 * With namespace:
9345
 *
9346
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9347
 *
9348
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9349
 *
9350
 * Returns the element name parsed
9351
 */
9352
9353
static const xmlChar *
9354
xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9355
1.23M
                  const xmlChar **URI, int *tlen) {
9356
1.23M
    const xmlChar *localname;
9357
1.23M
    const xmlChar *prefix;
9358
1.23M
    const xmlChar *attname;
9359
1.23M
    const xmlChar *aprefix;
9360
1.23M
    const xmlChar *nsname;
9361
1.23M
    xmlChar *attvalue;
9362
1.23M
    const xmlChar **atts = ctxt->atts;
9363
1.23M
    int maxatts = ctxt->maxatts;
9364
1.23M
    int nratts, nbatts, nbdef, inputid;
9365
1.23M
    int i, j, nbNs, attval;
9366
1.23M
    unsigned long cur;
9367
1.23M
    int nsNr = ctxt->nsNr;
9368
9369
1.23M
    if (RAW != '<') return(NULL);
9370
1.23M
    NEXT1;
9371
9372
    /*
9373
     * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9374
     *       point since the attribute values may be stored as pointers to
9375
     *       the buffer and calling SHRINK would destroy them !
9376
     *       The Shrinking is only possible once the full set of attribute
9377
     *       callbacks have been done.
9378
     */
9379
1.23M
    SHRINK;
9380
1.23M
    cur = ctxt->input->cur - ctxt->input->base;
9381
1.23M
    inputid = ctxt->input->id;
9382
1.23M
    nbatts = 0;
9383
1.23M
    nratts = 0;
9384
1.23M
    nbdef = 0;
9385
1.23M
    nbNs = 0;
9386
1.23M
    attval = 0;
9387
    /* Forget any namespaces added during an earlier parse of this element. */
9388
1.23M
    ctxt->nsNr = nsNr;
9389
9390
1.23M
    localname = xmlParseQName(ctxt, &prefix);
9391
1.23M
    if (localname == NULL) {
9392
9.66k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9393
9.66k
           "StartTag: invalid element name\n");
9394
9.66k
        return(NULL);
9395
9.66k
    }
9396
1.22M
    *tlen = ctxt->input->cur - ctxt->input->base - cur;
9397
9398
    /*
9399
     * Now parse the attributes, it ends up with the ending
9400
     *
9401
     * (S Attribute)* S?
9402
     */
9403
1.22M
    SKIP_BLANKS;
9404
1.22M
    GROW;
9405
9406
1.36M
    while (((RAW != '>') &&
9407
1.36M
     ((RAW != '/') || (NXT(1) != '>')) &&
9408
1.36M
     (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9409
690k
  int len = -1, alloc = 0;
9410
9411
690k
  attname = xmlParseAttribute2(ctxt, prefix, localname,
9412
690k
                               &aprefix, &attvalue, &len, &alloc);
9413
690k
        if (attname == NULL) {
9414
2.58k
      xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9415
2.58k
           "xmlParseStartTag: problem parsing attributes\n");
9416
2.58k
      break;
9417
2.58k
  }
9418
688k
        if (attvalue == NULL)
9419
1.30k
            goto next_attr;
9420
686k
  if (len < 0) len = xmlStrlen(attvalue);
9421
9422
686k
        if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9423
1.50k
            const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9424
1.50k
            xmlURIPtr uri;
9425
9426
1.50k
            if (URL == NULL) {
9427
0
                xmlErrMemory(ctxt, "dictionary allocation failure");
9428
0
                if ((attvalue != NULL) && (alloc != 0))
9429
0
                    xmlFree(attvalue);
9430
0
                localname = NULL;
9431
0
                goto done;
9432
0
            }
9433
1.50k
            if (*URL != 0) {
9434
1.48k
                uri = xmlParseURI((const char *) URL);
9435
1.48k
                if (uri == NULL) {
9436
365
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9437
365
                             "xmlns: '%s' is not a valid URI\n",
9438
365
                                       URL, NULL, NULL);
9439
1.12k
                } else {
9440
1.12k
                    if (uri->scheme == NULL) {
9441
95
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9442
95
                                  "xmlns: URI %s is not absolute\n",
9443
95
                                  URL, NULL, NULL);
9444
95
                    }
9445
1.12k
                    xmlFreeURI(uri);
9446
1.12k
                }
9447
1.48k
                if (URL == ctxt->str_xml_ns) {
9448
0
                    if (attname != ctxt->str_xml) {
9449
0
                        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9450
0
                     "xml namespace URI cannot be the default namespace\n",
9451
0
                                 NULL, NULL, NULL);
9452
0
                    }
9453
0
                    goto next_attr;
9454
0
                }
9455
1.48k
                if ((len == 29) &&
9456
1.48k
                    (xmlStrEqual(URL,
9457
18
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9458
0
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9459
0
                         "reuse of the xmlns namespace name is forbidden\n",
9460
0
                             NULL, NULL, NULL);
9461
0
                    goto next_attr;
9462
0
                }
9463
1.48k
            }
9464
            /*
9465
             * check that it's not a defined namespace
9466
             */
9467
1.81k
            for (j = 1;j <= nbNs;j++)
9468
365
                if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9469
58
                    break;
9470
1.50k
            if (j <= nbNs)
9471
58
                xmlErrAttributeDup(ctxt, NULL, attname);
9472
1.45k
            else
9473
1.45k
                if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9474
9475
685k
        } else if (aprefix == ctxt->str_xmlns) {
9476
2.75k
            const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9477
2.75k
            xmlURIPtr uri;
9478
9479
2.75k
            if (attname == ctxt->str_xml) {
9480
28
                if (URL != ctxt->str_xml_ns) {
9481
28
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9482
28
                             "xml namespace prefix mapped to wrong URI\n",
9483
28
                             NULL, NULL, NULL);
9484
28
                }
9485
                /*
9486
                 * Do not keep a namespace definition node
9487
                 */
9488
28
                goto next_attr;
9489
28
            }
9490
2.73k
            if (URL == ctxt->str_xml_ns) {
9491
0
                if (attname != ctxt->str_xml) {
9492
0
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9493
0
                             "xml namespace URI mapped to wrong prefix\n",
9494
0
                             NULL, NULL, NULL);
9495
0
                }
9496
0
                goto next_attr;
9497
0
            }
9498
2.73k
            if (attname == ctxt->str_xmlns) {
9499
0
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9500
0
                         "redefinition of the xmlns prefix is forbidden\n",
9501
0
                         NULL, NULL, NULL);
9502
0
                goto next_attr;
9503
0
            }
9504
2.73k
            if ((len == 29) &&
9505
2.73k
                (xmlStrEqual(URL,
9506
58
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9507
0
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9508
0
                         "reuse of the xmlns namespace name is forbidden\n",
9509
0
                         NULL, NULL, NULL);
9510
0
                goto next_attr;
9511
0
            }
9512
2.73k
            if ((URL == NULL) || (URL[0] == 0)) {
9513
14
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9514
14
                         "xmlns:%s: Empty XML namespace is not allowed\n",
9515
14
                              attname, NULL, NULL);
9516
14
                goto next_attr;
9517
2.71k
            } else {
9518
2.71k
                uri = xmlParseURI((const char *) URL);
9519
2.71k
                if (uri == NULL) {
9520
415
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9521
415
                         "xmlns:%s: '%s' is not a valid URI\n",
9522
415
                                       attname, URL, NULL);
9523
2.30k
                } else {
9524
2.30k
                    if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9525
18
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9526
18
                                  "xmlns:%s: URI %s is not absolute\n",
9527
18
                                  attname, URL, NULL);
9528
18
                    }
9529
2.30k
                    xmlFreeURI(uri);
9530
2.30k
                }
9531
2.71k
            }
9532
9533
            /*
9534
             * check that it's not a defined namespace
9535
             */
9536
4.05k
            for (j = 1;j <= nbNs;j++)
9537
1.37k
                if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9538
41
                    break;
9539
2.71k
            if (j <= nbNs)
9540
41
                xmlErrAttributeDup(ctxt, aprefix, attname);
9541
2.67k
            else
9542
2.67k
                if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9543
9544
682k
        } else {
9545
            /*
9546
             * Add the pair to atts
9547
             */
9548
682k
            if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9549
5.49k
                if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9550
0
                    goto next_attr;
9551
0
                }
9552
5.49k
                maxatts = ctxt->maxatts;
9553
5.49k
                atts = ctxt->atts;
9554
5.49k
            }
9555
682k
            ctxt->attallocs[nratts++] = alloc;
9556
682k
            atts[nbatts++] = attname;
9557
682k
            atts[nbatts++] = aprefix;
9558
            /*
9559
             * The namespace URI field is used temporarily to point at the
9560
             * base of the current input buffer for non-alloced attributes.
9561
             * When the input buffer is reallocated, all the pointers become
9562
             * invalid, but they can be reconstructed later.
9563
             */
9564
682k
            if (alloc)
9565
25.2k
                atts[nbatts++] = NULL;
9566
657k
            else
9567
657k
                atts[nbatts++] = ctxt->input->base;
9568
682k
            atts[nbatts++] = attvalue;
9569
682k
            attvalue += len;
9570
682k
            atts[nbatts++] = attvalue;
9571
            /*
9572
             * tag if some deallocation is needed
9573
             */
9574
682k
            if (alloc != 0) attval = 1;
9575
682k
            attvalue = NULL; /* moved into atts */
9576
682k
        }
9577
9578
688k
next_attr:
9579
688k
        if ((attvalue != NULL) && (alloc != 0)) {
9580
847
            xmlFree(attvalue);
9581
847
            attvalue = NULL;
9582
847
        }
9583
9584
688k
  GROW
9585
688k
        if (ctxt->instate == XML_PARSER_EOF)
9586
0
            break;
9587
688k
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9588
550k
      break;
9589
137k
  if (SKIP_BLANKS == 0) {
9590
3.89k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9591
3.89k
         "attributes construct error\n");
9592
3.89k
      break;
9593
3.89k
  }
9594
133k
        GROW;
9595
133k
    }
9596
9597
1.22M
    if (ctxt->input->id != inputid) {
9598
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9599
0
                    "Unexpected change of input\n");
9600
0
        localname = NULL;
9601
0
        goto done;
9602
0
    }
9603
9604
    /* Reconstruct attribute value pointers. */
9605
1.91M
    for (i = 0, j = 0; j < nratts; i += 5, j++) {
9606
682k
        if (atts[i+2] != NULL) {
9607
            /*
9608
             * Arithmetic on dangling pointers is technically undefined
9609
             * behavior, but well...
9610
             */
9611
657k
            const xmlChar *old = atts[i+2];
9612
657k
            atts[i+2]  = NULL;    /* Reset repurposed namespace URI */
9613
657k
            atts[i+3] = ctxt->input->base + (atts[i+3] - old);  /* value */
9614
657k
            atts[i+4] = ctxt->input->base + (atts[i+4] - old);  /* valuend */
9615
657k
        }
9616
682k
    }
9617
9618
    /*
9619
     * The attributes defaulting
9620
     */
9621
1.22M
    if (ctxt->attsDefault != NULL) {
9622
1.05M
        xmlDefAttrsPtr defaults;
9623
9624
1.05M
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9625
1.05M
  if (defaults != NULL) {
9626
212k
      for (i = 0;i < defaults->nbAttrs;i++) {
9627
148k
          attname = defaults->values[5 * i];
9628
148k
    aprefix = defaults->values[5 * i + 1];
9629
9630
                /*
9631
     * special work for namespaces defaulted defs
9632
     */
9633
148k
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9634
        /*
9635
         * check that it's not a defined namespace
9636
         */
9637
44
        for (j = 1;j <= nbNs;j++)
9638
33
            if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9639
17
          break;
9640
28
              if (j <= nbNs) continue;
9641
9642
11
        nsname = xmlGetNamespace(ctxt, NULL);
9643
11
        if (nsname != defaults->values[5 * i + 2]) {
9644
11
      if (nsPush(ctxt, NULL,
9645
11
                 defaults->values[5 * i + 2]) > 0)
9646
11
          nbNs++;
9647
11
        }
9648
148k
    } else if (aprefix == ctxt->str_xmlns) {
9649
        /*
9650
         * check that it's not a defined namespace
9651
         */
9652
192
        for (j = 1;j <= nbNs;j++)
9653
92
            if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9654
85
          break;
9655
185
              if (j <= nbNs) continue;
9656
9657
100
        nsname = xmlGetNamespace(ctxt, attname);
9658
100
        if (nsname != defaults->values[5 * i + 2]) {
9659
75
      if (nsPush(ctxt, attname,
9660
75
                 defaults->values[5 * i + 2]) > 0)
9661
75
          nbNs++;
9662
75
        }
9663
148k
    } else {
9664
        /*
9665
         * check that it's not a defined attribute
9666
         */
9667
416k
        for (j = 0;j < nbatts;j+=5) {
9668
270k
      if ((attname == atts[j]) && (aprefix == atts[j+1]))
9669
2.99k
          break;
9670
270k
        }
9671
148k
        if (j < nbatts) continue;
9672
9673
145k
        if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9674
112
      if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9675
0
                            localname = NULL;
9676
0
                            goto done;
9677
0
      }
9678
112
      maxatts = ctxt->maxatts;
9679
112
      atts = ctxt->atts;
9680
112
        }
9681
145k
        atts[nbatts++] = attname;
9682
145k
        atts[nbatts++] = aprefix;
9683
145k
        if (aprefix == NULL)
9684
121k
      atts[nbatts++] = NULL;
9685
24.1k
        else
9686
24.1k
            atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9687
145k
        atts[nbatts++] = defaults->values[5 * i + 2];
9688
145k
        atts[nbatts++] = defaults->values[5 * i + 3];
9689
145k
        if ((ctxt->standalone == 1) &&
9690
145k
            (defaults->values[5 * i + 4] != NULL)) {
9691
0
      xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9692
0
    "standalone: attribute %s on %s defaulted from external subset\n",
9693
0
                                   attname, localname);
9694
0
        }
9695
145k
        nbdef++;
9696
145k
    }
9697
148k
      }
9698
63.3k
  }
9699
1.05M
    }
9700
9701
    /*
9702
     * The attributes checkings
9703
     */
9704
2.05M
    for (i = 0; i < nbatts;i += 5) {
9705
        /*
9706
  * The default namespace does not apply to attribute names.
9707
  */
9708
828k
  if (atts[i + 1] != NULL) {
9709
28.2k
      nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9710
28.2k
      if (nsname == NULL) {
9711
649
    xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9712
649
        "Namespace prefix %s for %s on %s is not defined\n",
9713
649
        atts[i + 1], atts[i], localname);
9714
649
      }
9715
28.2k
      atts[i + 2] = nsname;
9716
28.2k
  } else
9717
799k
      nsname = NULL;
9718
  /*
9719
   * [ WFC: Unique Att Spec ]
9720
   * No attribute name may appear more than once in the same
9721
   * start-tag or empty-element tag.
9722
   * As extended by the Namespace in XML REC.
9723
   */
9724
1.23M
        for (j = 0; j < i;j += 5) {
9725
405k
      if (atts[i] == atts[j]) {
9726
226
          if (atts[i+1] == atts[j+1]) {
9727
39
        xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9728
39
        break;
9729
39
    }
9730
187
    if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9731
12
        xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9732
12
           "Namespaced Attribute %s in '%s' redefined\n",
9733
12
           atts[i], nsname, NULL);
9734
12
        break;
9735
12
    }
9736
187
      }
9737
405k
  }
9738
828k
    }
9739
9740
1.22M
    nsname = xmlGetNamespace(ctxt, prefix);
9741
1.22M
    if ((prefix != NULL) && (nsname == NULL)) {
9742
1.77k
  xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9743
1.77k
           "Namespace prefix %s on %s is not defined\n",
9744
1.77k
     prefix, localname, NULL);
9745
1.77k
    }
9746
1.22M
    *pref = prefix;
9747
1.22M
    *URI = nsname;
9748
9749
    /*
9750
     * SAX: Start of Element !
9751
     */
9752
1.22M
    if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9753
1.22M
  (!ctxt->disableSAX)) {
9754
918k
  if (nbNs > 0)
9755
1.94k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9756
1.94k
        nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9757
1.94k
        nbatts / 5, nbdef, atts);
9758
916k
  else
9759
916k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9760
916k
                    nsname, 0, NULL, nbatts / 5, nbdef, atts);
9761
918k
    }
9762
9763
1.22M
done:
9764
    /*
9765
     * Free up attribute allocated strings if needed
9766
     */
9767
1.22M
    if (attval != 0) {
9768
55.8k
  for (i = 3,j = 0; j < nratts;i += 5,j++)
9769
31.1k
      if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9770
25.2k
          xmlFree((xmlChar *) atts[i]);
9771
24.7k
    }
9772
9773
1.22M
    return(localname);
9774
1.22M
}
9775
9776
/**
9777
 * xmlParseEndTag2:
9778
 * @ctxt:  an XML parser context
9779
 * @line:  line of the start tag
9780
 * @nsNr:  number of namespaces on the start tag
9781
 *
9782
 * Parse an end tag. Always consumes '</'.
9783
 *
9784
 * [42] ETag ::= '</' Name S? '>'
9785
 *
9786
 * With namespace
9787
 *
9788
 * [NS 9] ETag ::= '</' QName S? '>'
9789
 */
9790
9791
static void
9792
1.10M
xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9793
1.10M
    const xmlChar *name;
9794
9795
1.10M
    GROW;
9796
1.10M
    if ((RAW != '<') || (NXT(1) != '/')) {
9797
0
  xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9798
0
  return;
9799
0
    }
9800
1.10M
    SKIP(2);
9801
9802
1.10M
    if (tag->prefix == NULL)
9803
1.10M
        name = xmlParseNameAndCompare(ctxt, ctxt->name);
9804
5.85k
    else
9805
5.85k
        name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9806
9807
    /*
9808
     * We should definitely be at the ending "S? '>'" part
9809
     */
9810
1.10M
    GROW;
9811
1.10M
    if (ctxt->instate == XML_PARSER_EOF)
9812
0
        return;
9813
1.10M
    SKIP_BLANKS;
9814
1.10M
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9815
1.12k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9816
1.12k
    } else
9817
1.10M
  NEXT1;
9818
9819
    /*
9820
     * [ WFC: Element Type Match ]
9821
     * The Name in an element's end-tag must match the element type in the
9822
     * start-tag.
9823
     *
9824
     */
9825
1.10M
    if (name != (xmlChar*)1) {
9826
4.61k
        if (name == NULL) name = BAD_CAST "unparsable";
9827
4.61k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9828
4.61k
         "Opening and ending tag mismatch: %s line %d and %s\n",
9829
4.61k
                    ctxt->name, tag->line, name);
9830
4.61k
    }
9831
9832
    /*
9833
     * SAX: End of Tag
9834
     */
9835
1.10M
    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9836
1.10M
  (!ctxt->disableSAX))
9837
811k
  ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9838
811k
                                tag->URI);
9839
9840
1.10M
    spacePop(ctxt);
9841
1.10M
    if (tag->nsNr != 0)
9842
931
  nsPop(ctxt, tag->nsNr);
9843
1.10M
}
9844
9845
/**
9846
 * xmlParseCDSect:
9847
 * @ctxt:  an XML parser context
9848
 *
9849
 * DEPRECATED: Internal function, don't use.
9850
 *
9851
 * Parse escaped pure raw content. Always consumes '<!['.
9852
 *
9853
 * [18] CDSect ::= CDStart CData CDEnd
9854
 *
9855
 * [19] CDStart ::= '<![CDATA['
9856
 *
9857
 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9858
 *
9859
 * [21] CDEnd ::= ']]>'
9860
 */
9861
void
9862
4.14k
xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9863
4.14k
    xmlChar *buf = NULL;
9864
4.14k
    int len = 0;
9865
4.14k
    int size = XML_PARSER_BUFFER_SIZE;
9866
4.14k
    int r, rl;
9867
4.14k
    int s, sl;
9868
4.14k
    int cur, l;
9869
4.14k
    int count = 0;
9870
4.14k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9871
65
                    XML_MAX_HUGE_LENGTH :
9872
4.14k
                    XML_MAX_TEXT_LENGTH;
9873
9874
4.14k
    if ((CUR != '<') || (NXT(1) != '!') || (NXT(2) != '['))
9875
0
        return;
9876
4.14k
    SKIP(3);
9877
9878
4.14k
    if (!CMP6(CUR_PTR, 'C', 'D', 'A', 'T', 'A', '['))
9879
0
        return;
9880
4.14k
    SKIP(6);
9881
9882
4.14k
    ctxt->instate = XML_PARSER_CDATA_SECTION;
9883
4.14k
    r = CUR_CHAR(rl);
9884
4.14k
    if (!IS_CHAR(r)) {
9885
10
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9886
10
        goto out;
9887
10
    }
9888
4.13k
    NEXTL(rl);
9889
4.13k
    s = CUR_CHAR(sl);
9890
4.13k
    if (!IS_CHAR(s)) {
9891
4
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9892
4
        goto out;
9893
4
    }
9894
4.12k
    NEXTL(sl);
9895
4.12k
    cur = CUR_CHAR(l);
9896
4.12k
    buf = (xmlChar *) xmlMallocAtomic(size);
9897
4.12k
    if (buf == NULL) {
9898
0
  xmlErrMemory(ctxt, NULL);
9899
0
        goto out;
9900
0
    }
9901
1.37M
    while (IS_CHAR(cur) &&
9902
1.37M
           ((r != ']') || (s != ']') || (cur != '>'))) {
9903
1.36M
  if (len + 5 >= size) {
9904
4.90k
      xmlChar *tmp;
9905
9906
4.90k
      tmp = (xmlChar *) xmlRealloc(buf, size * 2);
9907
4.90k
      if (tmp == NULL) {
9908
0
    xmlErrMemory(ctxt, NULL);
9909
0
                goto out;
9910
0
      }
9911
4.90k
      buf = tmp;
9912
4.90k
      size *= 2;
9913
4.90k
  }
9914
1.36M
  COPY_BUF(rl,buf,len,r);
9915
1.36M
  r = s;
9916
1.36M
  rl = sl;
9917
1.36M
  s = cur;
9918
1.36M
  sl = l;
9919
1.36M
  count++;
9920
1.36M
  if (count > 50) {
9921
24.7k
      SHRINK;
9922
24.7k
      GROW;
9923
24.7k
            if (ctxt->instate == XML_PARSER_EOF) {
9924
0
                goto out;
9925
0
            }
9926
24.7k
      count = 0;
9927
24.7k
  }
9928
1.36M
  NEXTL(l);
9929
1.36M
  cur = CUR_CHAR(l);
9930
1.36M
        if (len > maxLength) {
9931
0
            xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9932
0
                           "CData section too big found\n");
9933
0
            goto out;
9934
0
        }
9935
1.36M
    }
9936
4.12k
    buf[len] = 0;
9937
4.12k
    if (cur != '>') {
9938
333
  xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9939
333
                       "CData section not finished\n%.50s\n", buf);
9940
333
        goto out;
9941
333
    }
9942
3.79k
    NEXTL(l);
9943
9944
    /*
9945
     * OK the buffer is to be consumed as cdata.
9946
     */
9947
3.79k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9948
1.86k
  if (ctxt->sax->cdataBlock != NULL)
9949
1.64k
      ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9950
215
  else if (ctxt->sax->characters != NULL)
9951
215
      ctxt->sax->characters(ctxt->userData, buf, len);
9952
1.86k
    }
9953
9954
4.14k
out:
9955
4.14k
    if (ctxt->instate != XML_PARSER_EOF)
9956
4.14k
        ctxt->instate = XML_PARSER_CONTENT;
9957
4.14k
    xmlFree(buf);
9958
4.14k
}
9959
9960
/**
9961
 * xmlParseContentInternal:
9962
 * @ctxt:  an XML parser context
9963
 *
9964
 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9965
 * unexpected EOF to the caller.
9966
 */
9967
9968
static void
9969
15.2k
xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9970
15.2k
    int nameNr = ctxt->nameNr;
9971
9972
15.2k
    GROW;
9973
2.86M
    while ((RAW != 0) &&
9974
2.86M
     (ctxt->instate != XML_PARSER_EOF)) {
9975
2.85M
  const xmlChar *cur = ctxt->input->cur;
9976
9977
  /*
9978
   * First case : a Processing Instruction.
9979
   */
9980
2.85M
  if ((*cur == '<') && (cur[1] == '?')) {
9981
1.35k
      xmlParsePI(ctxt);
9982
1.35k
  }
9983
9984
  /*
9985
   * Second case : a CDSection
9986
   */
9987
  /* 2.6.0 test was *cur not RAW */
9988
2.85M
  else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9989
4.14k
      xmlParseCDSect(ctxt);
9990
4.14k
  }
9991
9992
  /*
9993
   * Third case :  a comment
9994
   */
9995
2.85M
  else if ((*cur == '<') && (NXT(1) == '!') &&
9996
2.85M
     (NXT(2) == '-') && (NXT(3) == '-')) {
9997
20.3k
      xmlParseComment(ctxt);
9998
20.3k
      ctxt->instate = XML_PARSER_CONTENT;
9999
20.3k
  }
10000
10001
  /*
10002
   * Fourth case :  a sub-element.
10003
   */
10004
2.83M
  else if (*cur == '<') {
10005
1.29M
            if (NXT(1) == '/') {
10006
616k
                if (ctxt->nameNr <= nameNr)
10007
2.50k
                    break;
10008
613k
          xmlParseElementEnd(ctxt);
10009
675k
            } else {
10010
675k
          xmlParseElementStart(ctxt);
10011
675k
            }
10012
1.29M
  }
10013
10014
  /*
10015
   * Fifth case : a reference. If if has not been resolved,
10016
   *    parsing returns it's Name, create the node
10017
   */
10018
10019
1.53M
  else if (*cur == '&') {
10020
293k
      xmlParseReference(ctxt);
10021
293k
  }
10022
10023
  /*
10024
   * Last case, text. Note that References are handled directly.
10025
   */
10026
1.24M
  else {
10027
1.24M
      xmlParseCharData(ctxt, 0);
10028
1.24M
  }
10029
10030
2.85M
  GROW;
10031
2.85M
  SHRINK;
10032
2.85M
    }
10033
15.2k
}
10034
10035
/**
10036
 * xmlParseContent:
10037
 * @ctxt:  an XML parser context
10038
 *
10039
 * Parse a content sequence. Stops at EOF or '</'.
10040
 *
10041
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10042
 */
10043
10044
void
10045
10.8k
xmlParseContent(xmlParserCtxtPtr ctxt) {
10046
10.8k
    int nameNr = ctxt->nameNr;
10047
10048
10.8k
    xmlParseContentInternal(ctxt);
10049
10050
10.8k
    if ((ctxt->instate != XML_PARSER_EOF) && (ctxt->nameNr > nameNr)) {
10051
30
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
10052
30
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
10053
30
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10054
30
                "Premature end of data in tag %s line %d\n",
10055
30
    name, line, NULL);
10056
30
    }
10057
10.8k
}
10058
10059
/**
10060
 * xmlParseElement:
10061
 * @ctxt:  an XML parser context
10062
 *
10063
 * DEPRECATED: Internal function, don't use.
10064
 *
10065
 * parse an XML element
10066
 *
10067
 * [39] element ::= EmptyElemTag | STag content ETag
10068
 *
10069
 * [ WFC: Element Type Match ]
10070
 * The Name in an element's end-tag must match the element type in the
10071
 * start-tag.
10072
 *
10073
 */
10074
10075
void
10076
6.40k
xmlParseElement(xmlParserCtxtPtr ctxt) {
10077
6.40k
    if (xmlParseElementStart(ctxt) != 0)
10078
1.98k
        return;
10079
10080
4.41k
    xmlParseContentInternal(ctxt);
10081
4.41k
    if (ctxt->instate == XML_PARSER_EOF)
10082
25
  return;
10083
10084
4.39k
    if (CUR == 0) {
10085
1.91k
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
10086
1.91k
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
10087
1.91k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10088
1.91k
                "Premature end of data in tag %s line %d\n",
10089
1.91k
    name, line, NULL);
10090
1.91k
        return;
10091
1.91k
    }
10092
10093
2.47k
    xmlParseElementEnd(ctxt);
10094
2.47k
}
10095
10096
/**
10097
 * xmlParseElementStart:
10098
 * @ctxt:  an XML parser context
10099
 *
10100
 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
10101
 * opening tag was parsed, 1 if an empty element was parsed.
10102
 *
10103
 * Always consumes '<'.
10104
 */
10105
static int
10106
681k
xmlParseElementStart(xmlParserCtxtPtr ctxt) {
10107
681k
    const xmlChar *name;
10108
681k
    const xmlChar *prefix = NULL;
10109
681k
    const xmlChar *URI = NULL;
10110
681k
    xmlParserNodeInfo node_info;
10111
681k
    int line, tlen = 0;
10112
681k
    xmlNodePtr ret;
10113
681k
    int nsNr = ctxt->nsNr;
10114
10115
681k
    if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
10116
681k
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
10117
0
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
10118
0
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
10119
0
        xmlParserMaxDepth);
10120
0
  xmlHaltParser(ctxt);
10121
0
  return(-1);
10122
0
    }
10123
10124
    /* Capture start position */
10125
681k
    if (ctxt->record_info) {
10126
0
        node_info.begin_pos = ctxt->input->consumed +
10127
0
                          (CUR_PTR - ctxt->input->base);
10128
0
  node_info.begin_line = ctxt->input->line;
10129
0
    }
10130
10131
681k
    if (ctxt->spaceNr == 0)
10132
0
  spacePush(ctxt, -1);
10133
681k
    else if (*ctxt->space == -2)
10134
18.4k
  spacePush(ctxt, -1);
10135
663k
    else
10136
663k
  spacePush(ctxt, *ctxt->space);
10137
10138
681k
    line = ctxt->input->line;
10139
681k
#ifdef LIBXML_SAX1_ENABLED
10140
681k
    if (ctxt->sax2)
10141
623k
#endif /* LIBXML_SAX1_ENABLED */
10142
623k
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
10143
58.0k
#ifdef LIBXML_SAX1_ENABLED
10144
58.0k
    else
10145
58.0k
  name = xmlParseStartTag(ctxt);
10146
681k
#endif /* LIBXML_SAX1_ENABLED */
10147
681k
    if (ctxt->instate == XML_PARSER_EOF)
10148
16
  return(-1);
10149
681k
    if (name == NULL) {
10150
11.8k
  spacePop(ctxt);
10151
11.8k
        return(-1);
10152
11.8k
    }
10153
669k
    nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
10154
669k
    ret = ctxt->node;
10155
10156
669k
#ifdef LIBXML_VALID_ENABLED
10157
    /*
10158
     * [ VC: Root Element Type ]
10159
     * The Name in the document type declaration must match the element
10160
     * type of the root element.
10161
     */
10162
669k
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10163
669k
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
10164
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10165
669k
#endif /* LIBXML_VALID_ENABLED */
10166
10167
    /*
10168
     * Check for an Empty Element.
10169
     */
10170
669k
    if ((RAW == '/') && (NXT(1) == '>')) {
10171
44.5k
        SKIP(2);
10172
44.5k
  if (ctxt->sax2) {
10173
42.3k
      if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10174
42.3k
    (!ctxt->disableSAX))
10175
30.2k
    ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
10176
42.3k
#ifdef LIBXML_SAX1_ENABLED
10177
42.3k
  } else {
10178
2.21k
      if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10179
2.21k
    (!ctxt->disableSAX))
10180
1.59k
    ctxt->sax->endElement(ctxt->userData, name);
10181
2.21k
#endif /* LIBXML_SAX1_ENABLED */
10182
2.21k
  }
10183
44.5k
  namePop(ctxt);
10184
44.5k
  spacePop(ctxt);
10185
44.5k
  if (nsNr != ctxt->nsNr)
10186
229
      nsPop(ctxt, ctxt->nsNr - nsNr);
10187
44.5k
  if ( ret != NULL && ctxt->record_info ) {
10188
0
     node_info.end_pos = ctxt->input->consumed +
10189
0
            (CUR_PTR - ctxt->input->base);
10190
0
     node_info.end_line = ctxt->input->line;
10191
0
     node_info.node = ret;
10192
0
     xmlParserAddNodeInfo(ctxt, &node_info);
10193
0
  }
10194
44.5k
  return(1);
10195
44.5k
    }
10196
625k
    if (RAW == '>') {
10197
620k
        NEXT1;
10198
620k
    } else {
10199
4.59k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10200
4.59k
         "Couldn't find end of Start Tag %s line %d\n",
10201
4.59k
                    name, line, NULL);
10202
10203
  /*
10204
   * end of parsing of this node.
10205
   */
10206
4.59k
  nodePop(ctxt);
10207
4.59k
  namePop(ctxt);
10208
4.59k
  spacePop(ctxt);
10209
4.59k
  if (nsNr != ctxt->nsNr)
10210
369
      nsPop(ctxt, ctxt->nsNr - nsNr);
10211
10212
  /*
10213
   * Capture end position and add node
10214
   */
10215
4.59k
  if ( ret != NULL && ctxt->record_info ) {
10216
0
     node_info.end_pos = ctxt->input->consumed +
10217
0
            (CUR_PTR - ctxt->input->base);
10218
0
     node_info.end_line = ctxt->input->line;
10219
0
     node_info.node = ret;
10220
0
     xmlParserAddNodeInfo(ctxt, &node_info);
10221
0
  }
10222
4.59k
  return(-1);
10223
4.59k
    }
10224
10225
620k
    return(0);
10226
625k
}
10227
10228
/**
10229
 * xmlParseElementEnd:
10230
 * @ctxt:  an XML parser context
10231
 *
10232
 * Parse the end of an XML element. Always consumes '</'.
10233
 */
10234
static void
10235
616k
xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
10236
616k
    xmlParserNodeInfo node_info;
10237
616k
    xmlNodePtr ret = ctxt->node;
10238
10239
616k
    if (ctxt->nameNr <= 0) {
10240
0
        if ((RAW == '<') && (NXT(1) == '/'))
10241
0
            SKIP(2);
10242
0
        return;
10243
0
    }
10244
10245
    /*
10246
     * parse the end of tag: '</' should be here.
10247
     */
10248
616k
    if (ctxt->sax2) {
10249
565k
  xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
10250
565k
  namePop(ctxt);
10251
565k
    }
10252
50.5k
#ifdef LIBXML_SAX1_ENABLED
10253
50.5k
    else
10254
50.5k
  xmlParseEndTag1(ctxt, 0);
10255
616k
#endif /* LIBXML_SAX1_ENABLED */
10256
10257
    /*
10258
     * Capture end position and add node
10259
     */
10260
616k
    if ( ret != NULL && ctxt->record_info ) {
10261
0
       node_info.end_pos = ctxt->input->consumed +
10262
0
                          (CUR_PTR - ctxt->input->base);
10263
0
       node_info.end_line = ctxt->input->line;
10264
0
       node_info.node = ret;
10265
0
       xmlParserAddNodeInfo(ctxt, &node_info);
10266
0
    }
10267
616k
}
10268
10269
/**
10270
 * xmlParseVersionNum:
10271
 * @ctxt:  an XML parser context
10272
 *
10273
 * DEPRECATED: Internal function, don't use.
10274
 *
10275
 * parse the XML version value.
10276
 *
10277
 * [26] VersionNum ::= '1.' [0-9]+
10278
 *
10279
 * In practice allow [0-9].[0-9]+ at that level
10280
 *
10281
 * Returns the string giving the XML version number, or NULL
10282
 */
10283
xmlChar *
10284
14.1k
xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10285
14.1k
    xmlChar *buf = NULL;
10286
14.1k
    int len = 0;
10287
14.1k
    int size = 10;
10288
14.1k
    xmlChar cur;
10289
10290
14.1k
    buf = (xmlChar *) xmlMallocAtomic(size);
10291
14.1k
    if (buf == NULL) {
10292
0
  xmlErrMemory(ctxt, NULL);
10293
0
  return(NULL);
10294
0
    }
10295
14.1k
    cur = CUR;
10296
14.1k
    if (!((cur >= '0') && (cur <= '9'))) {
10297
136
  xmlFree(buf);
10298
136
  return(NULL);
10299
136
    }
10300
13.9k
    buf[len++] = cur;
10301
13.9k
    NEXT;
10302
13.9k
    cur=CUR;
10303
13.9k
    if (cur != '.') {
10304
207
  xmlFree(buf);
10305
207
  return(NULL);
10306
207
    }
10307
13.7k
    buf[len++] = cur;
10308
13.7k
    NEXT;
10309
13.7k
    cur=CUR;
10310
27.5k
    while ((cur >= '0') && (cur <= '9')) {
10311
13.7k
  if (len + 1 >= size) {
10312
12
      xmlChar *tmp;
10313
10314
12
      size *= 2;
10315
12
      tmp = (xmlChar *) xmlRealloc(buf, size);
10316
12
      if (tmp == NULL) {
10317
0
          xmlFree(buf);
10318
0
    xmlErrMemory(ctxt, NULL);
10319
0
    return(NULL);
10320
0
      }
10321
12
      buf = tmp;
10322
12
  }
10323
13.7k
  buf[len++] = cur;
10324
13.7k
  NEXT;
10325
13.7k
  cur=CUR;
10326
13.7k
    }
10327
13.7k
    buf[len] = 0;
10328
13.7k
    return(buf);
10329
13.7k
}
10330
10331
/**
10332
 * xmlParseVersionInfo:
10333
 * @ctxt:  an XML parser context
10334
 *
10335
 * DEPRECATED: Internal function, don't use.
10336
 *
10337
 * parse the XML version.
10338
 *
10339
 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10340
 *
10341
 * [25] Eq ::= S? '=' S?
10342
 *
10343
 * Returns the version string, e.g. "1.0"
10344
 */
10345
10346
xmlChar *
10347
16.1k
xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10348
16.1k
    xmlChar *version = NULL;
10349
10350
16.1k
    if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10351
14.4k
  SKIP(7);
10352
14.4k
  SKIP_BLANKS;
10353
14.4k
  if (RAW != '=') {
10354
176
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10355
176
      return(NULL);
10356
176
        }
10357
14.2k
  NEXT;
10358
14.2k
  SKIP_BLANKS;
10359
14.2k
  if (RAW == '"') {
10360
12.0k
      NEXT;
10361
12.0k
      version = xmlParseVersionNum(ctxt);
10362
12.0k
      if (RAW != '"') {
10363
610
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10364
610
      } else
10365
11.4k
          NEXT;
10366
12.0k
  } else if (RAW == '\''){
10367
2.05k
      NEXT;
10368
2.05k
      version = xmlParseVersionNum(ctxt);
10369
2.05k
      if (RAW != '\'') {
10370
27
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10371
27
      } else
10372
2.02k
          NEXT;
10373
2.05k
  } else {
10374
171
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10375
171
  }
10376
14.2k
    }
10377
15.9k
    return(version);
10378
16.1k
}
10379
10380
/**
10381
 * xmlParseEncName:
10382
 * @ctxt:  an XML parser context
10383
 *
10384
 * DEPRECATED: Internal function, don't use.
10385
 *
10386
 * parse the XML encoding name
10387
 *
10388
 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10389
 *
10390
 * Returns the encoding name value or NULL
10391
 */
10392
xmlChar *
10393
8.06k
xmlParseEncName(xmlParserCtxtPtr ctxt) {
10394
8.06k
    xmlChar *buf = NULL;
10395
8.06k
    int len = 0;
10396
8.06k
    int size = 10;
10397
8.06k
    xmlChar cur;
10398
10399
8.06k
    cur = CUR;
10400
8.06k
    if (((cur >= 'a') && (cur <= 'z')) ||
10401
8.06k
        ((cur >= 'A') && (cur <= 'Z'))) {
10402
8.04k
  buf = (xmlChar *) xmlMallocAtomic(size);
10403
8.04k
  if (buf == NULL) {
10404
0
      xmlErrMemory(ctxt, NULL);
10405
0
      return(NULL);
10406
0
  }
10407
10408
8.04k
  buf[len++] = cur;
10409
8.04k
  NEXT;
10410
8.04k
  cur = CUR;
10411
59.1k
  while (((cur >= 'a') && (cur <= 'z')) ||
10412
59.1k
         ((cur >= 'A') && (cur <= 'Z')) ||
10413
59.1k
         ((cur >= '0') && (cur <= '9')) ||
10414
59.1k
         (cur == '.') || (cur == '_') ||
10415
59.1k
         (cur == '-')) {
10416
51.0k
      if (len + 1 >= size) {
10417
3.37k
          xmlChar *tmp;
10418
10419
3.37k
    size *= 2;
10420
3.37k
    tmp = (xmlChar *) xmlRealloc(buf, size);
10421
3.37k
    if (tmp == NULL) {
10422
0
        xmlErrMemory(ctxt, NULL);
10423
0
        xmlFree(buf);
10424
0
        return(NULL);
10425
0
    }
10426
3.37k
    buf = tmp;
10427
3.37k
      }
10428
51.0k
      buf[len++] = cur;
10429
51.0k
      NEXT;
10430
51.0k
      cur = CUR;
10431
51.0k
      if (cur == 0) {
10432
32
          SHRINK;
10433
32
    GROW;
10434
32
    cur = CUR;
10435
32
      }
10436
51.0k
        }
10437
8.04k
  buf[len] = 0;
10438
8.04k
    } else {
10439
29
  xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10440
29
    }
10441
8.06k
    return(buf);
10442
8.06k
}
10443
10444
/**
10445
 * xmlParseEncodingDecl:
10446
 * @ctxt:  an XML parser context
10447
 *
10448
 * DEPRECATED: Internal function, don't use.
10449
 *
10450
 * parse the XML encoding declaration
10451
 *
10452
 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'")
10453
 *
10454
 * this setups the conversion filters.
10455
 *
10456
 * Returns the encoding value or NULL
10457
 */
10458
10459
const xmlChar *
10460
12.5k
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10461
12.5k
    xmlChar *encoding = NULL;
10462
10463
12.5k
    SKIP_BLANKS;
10464
12.5k
    if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10465
8.22k
  SKIP(8);
10466
8.22k
  SKIP_BLANKS;
10467
8.22k
  if (RAW != '=') {
10468
77
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10469
77
      return(NULL);
10470
77
        }
10471
8.14k
  NEXT;
10472
8.14k
  SKIP_BLANKS;
10473
8.14k
  if (RAW == '"') {
10474
6.34k
      NEXT;
10475
6.34k
      encoding = xmlParseEncName(ctxt);
10476
6.34k
      if (RAW != '"') {
10477
249
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10478
249
    xmlFree((xmlChar *) encoding);
10479
249
    return(NULL);
10480
249
      } else
10481
6.09k
          NEXT;
10482
6.34k
  } else if (RAW == '\''){
10483
1.72k
      NEXT;
10484
1.72k
      encoding = xmlParseEncName(ctxt);
10485
1.72k
      if (RAW != '\'') {
10486
9
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10487
9
    xmlFree((xmlChar *) encoding);
10488
9
    return(NULL);
10489
9
      } else
10490
1.71k
          NEXT;
10491
1.72k
  } else {
10492
74
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10493
74
  }
10494
10495
        /*
10496
         * Non standard parsing, allowing the user to ignore encoding
10497
         */
10498
7.88k
        if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10499
268
      xmlFree((xmlChar *) encoding);
10500
268
            return(NULL);
10501
268
  }
10502
10503
  /*
10504
   * UTF-16 encoding switch has already taken place at this stage,
10505
   * more over the little-endian/big-endian selection is already done
10506
   */
10507
7.61k
        if ((encoding != NULL) &&
10508
7.61k
      ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10509
7.54k
       (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10510
      /*
10511
       * If no encoding was passed to the parser, that we are
10512
       * using UTF-16 and no decoder is present i.e. the
10513
       * document is apparently UTF-8 compatible, then raise an
10514
       * encoding mismatch fatal error
10515
       */
10516
0
      if ((ctxt->encoding == NULL) &&
10517
0
          (ctxt->input->buf != NULL) &&
10518
0
          (ctxt->input->buf->encoder == NULL)) {
10519
0
    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10520
0
      "Document labelled UTF-16 but has UTF-8 content\n");
10521
0
      }
10522
0
      if (ctxt->encoding != NULL)
10523
0
    xmlFree((xmlChar *) ctxt->encoding);
10524
0
      ctxt->encoding = encoding;
10525
0
  }
10526
  /*
10527
   * UTF-8 encoding is handled natively
10528
   */
10529
7.61k
        else if ((encoding != NULL) &&
10530
7.61k
      ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10531
7.54k
       (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10532
3.95k
      if (ctxt->encoding != NULL)
10533
0
    xmlFree((xmlChar *) ctxt->encoding);
10534
3.95k
      ctxt->encoding = encoding;
10535
3.95k
  }
10536
3.66k
  else if (encoding != NULL) {
10537
3.59k
      xmlCharEncodingHandlerPtr handler;
10538
10539
3.59k
      if (ctxt->input->encoding != NULL)
10540
0
    xmlFree((xmlChar *) ctxt->input->encoding);
10541
3.59k
      ctxt->input->encoding = encoding;
10542
10543
3.59k
            handler = xmlFindCharEncodingHandler((const char *) encoding);
10544
3.59k
      if (handler != NULL) {
10545
3.52k
    if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10546
        /* failed to convert */
10547
0
        ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10548
0
        return(NULL);
10549
0
    }
10550
3.52k
      } else {
10551
62
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10552
62
      "Unsupported encoding %s\n", encoding);
10553
62
    return(NULL);
10554
62
      }
10555
3.59k
  }
10556
7.61k
    }
10557
11.9k
    return(encoding);
10558
12.5k
}
10559
10560
/**
10561
 * xmlParseSDDecl:
10562
 * @ctxt:  an XML parser context
10563
 *
10564
 * DEPRECATED: Internal function, don't use.
10565
 *
10566
 * parse the XML standalone declaration
10567
 *
10568
 * [32] SDDecl ::= S 'standalone' Eq
10569
 *                 (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10570
 *
10571
 * [ VC: Standalone Document Declaration ]
10572
 * TODO The standalone document declaration must have the value "no"
10573
 * if any external markup declarations contain declarations of:
10574
 *  - attributes with default values, if elements to which these
10575
 *    attributes apply appear in the document without specifications
10576
 *    of values for these attributes, or
10577
 *  - entities (other than amp, lt, gt, apos, quot), if references
10578
 *    to those entities appear in the document, or
10579
 *  - attributes with values subject to normalization, where the
10580
 *    attribute appears in the document with a value which will change
10581
 *    as a result of normalization, or
10582
 *  - element types with element content, if white space occurs directly
10583
 *    within any instance of those types.
10584
 *
10585
 * Returns:
10586
 *   1 if standalone="yes"
10587
 *   0 if standalone="no"
10588
 *  -2 if standalone attribute is missing or invalid
10589
 *    (A standalone value of -2 means that the XML declaration was found,
10590
 *     but no value was specified for the standalone attribute).
10591
 */
10592
10593
int
10594
9.88k
xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10595
9.88k
    int standalone = -2;
10596
10597
9.88k
    SKIP_BLANKS;
10598
9.88k
    if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10599
2.40k
  SKIP(10);
10600
2.40k
        SKIP_BLANKS;
10601
2.40k
  if (RAW != '=') {
10602
12
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10603
12
      return(standalone);
10604
12
        }
10605
2.39k
  NEXT;
10606
2.39k
  SKIP_BLANKS;
10607
2.39k
        if (RAW == '\''){
10608
1.52k
      NEXT;
10609
1.52k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10610
1.46k
          standalone = 0;
10611
1.46k
                SKIP(2);
10612
1.46k
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10613
63
                 (NXT(2) == 's')) {
10614
48
          standalone = 1;
10615
48
    SKIP(3);
10616
48
            } else {
10617
15
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10618
15
      }
10619
1.52k
      if (RAW != '\'') {
10620
21
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10621
21
      } else
10622
1.50k
          NEXT;
10623
1.52k
  } else if (RAW == '"'){
10624
861
      NEXT;
10625
861
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10626
477
          standalone = 0;
10627
477
    SKIP(2);
10628
477
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10629
384
                 (NXT(2) == 's')) {
10630
360
          standalone = 1;
10631
360
                SKIP(3);
10632
360
            } else {
10633
24
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10634
24
      }
10635
861
      if (RAW != '"') {
10636
45
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10637
45
      } else
10638
816
          NEXT;
10639
861
  } else {
10640
6
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10641
6
        }
10642
2.39k
    }
10643
9.86k
    return(standalone);
10644
9.88k
}
10645
10646
/**
10647
 * xmlParseXMLDecl:
10648
 * @ctxt:  an XML parser context
10649
 *
10650
 * DEPRECATED: Internal function, don't use.
10651
 *
10652
 * parse an XML declaration header
10653
 *
10654
 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10655
 */
10656
10657
void
10658
15.4k
xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10659
15.4k
    xmlChar *version;
10660
10661
    /*
10662
     * This value for standalone indicates that the document has an
10663
     * XML declaration but it does not have a standalone attribute.
10664
     * It will be overwritten later if a standalone attribute is found.
10665
     */
10666
15.4k
    ctxt->input->standalone = -2;
10667
10668
    /*
10669
     * We know that '<?xml' is here.
10670
     */
10671
15.4k
    SKIP(5);
10672
10673
15.4k
    if (!IS_BLANK_CH(RAW)) {
10674
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10675
0
                 "Blank needed after '<?xml'\n");
10676
0
    }
10677
15.4k
    SKIP_BLANKS;
10678
10679
    /*
10680
     * We must have the VersionInfo here.
10681
     */
10682
15.4k
    version = xmlParseVersionInfo(ctxt);
10683
15.4k
    if (version == NULL) {
10684
2.31k
  xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10685
13.1k
    } else {
10686
13.1k
  if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10687
      /*
10688
       * Changed here for XML-1.0 5th edition
10689
       */
10690
206
      if (ctxt->options & XML_PARSE_OLD10) {
10691
31
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10692
31
                "Unsupported version '%s'\n",
10693
31
                version);
10694
175
      } else {
10695
175
          if ((version[0] == '1') && ((version[1] == '.'))) {
10696
155
        xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10697
155
                      "Unsupported version '%s'\n",
10698
155
          version, NULL);
10699
155
    } else {
10700
20
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10701
20
              "Unsupported version '%s'\n",
10702
20
              version);
10703
20
    }
10704
175
      }
10705
206
  }
10706
13.1k
  if (ctxt->version != NULL)
10707
0
      xmlFree((void *) ctxt->version);
10708
13.1k
  ctxt->version = version;
10709
13.1k
    }
10710
10711
    /*
10712
     * We may have the encoding declaration
10713
     */
10714
15.4k
    if (!IS_BLANK_CH(RAW)) {
10715
6.25k
        if ((RAW == '?') && (NXT(1) == '>')) {
10716
3.58k
      SKIP(2);
10717
3.58k
      return;
10718
3.58k
  }
10719
2.66k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10720
2.66k
    }
10721
11.8k
    xmlParseEncodingDecl(ctxt);
10722
11.8k
    if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10723
11.8k
         (ctxt->instate == XML_PARSER_EOF)) {
10724
  /*
10725
   * The XML REC instructs us to stop parsing right here
10726
   */
10727
53
        return;
10728
53
    }
10729
10730
    /*
10731
     * We may have the standalone status.
10732
     */
10733
11.8k
    if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10734
2.00k
        if ((RAW == '?') && (NXT(1) == '>')) {
10735
1.93k
      SKIP(2);
10736
1.93k
      return;
10737
1.93k
  }
10738
70
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10739
70
    }
10740
10741
    /*
10742
     * We can grow the input buffer freely at that point
10743
     */
10744
9.88k
    GROW;
10745
10746
9.88k
    SKIP_BLANKS;
10747
9.88k
    ctxt->input->standalone = xmlParseSDDecl(ctxt);
10748
10749
9.88k
    SKIP_BLANKS;
10750
9.88k
    if ((RAW == '?') && (NXT(1) == '>')) {
10751
6.03k
        SKIP(2);
10752
6.03k
    } else if (RAW == '>') {
10753
        /* Deprecated old WD ... */
10754
28
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10755
28
  NEXT;
10756
3.81k
    } else {
10757
3.81k
        int c;
10758
10759
3.81k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10760
104k
        while ((c = CUR) != 0) {
10761
104k
            NEXT;
10762
104k
            if (c == '>')
10763
3.34k
                break;
10764
104k
        }
10765
3.81k
    }
10766
9.88k
}
10767
10768
/**
10769
 * xmlParseMisc:
10770
 * @ctxt:  an XML parser context
10771
 *
10772
 * DEPRECATED: Internal function, don't use.
10773
 *
10774
 * parse an XML Misc* optional field.
10775
 *
10776
 * [27] Misc ::= Comment | PI |  S
10777
 */
10778
10779
void
10780
19.4k
xmlParseMisc(xmlParserCtxtPtr ctxt) {
10781
22.8k
    while (ctxt->instate != XML_PARSER_EOF) {
10782
22.8k
        SKIP_BLANKS;
10783
22.8k
        GROW;
10784
22.8k
        if ((RAW == '<') && (NXT(1) == '?')) {
10785
1.74k
      xmlParsePI(ctxt);
10786
21.0k
        } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10787
1.63k
      xmlParseComment(ctxt);
10788
19.4k
        } else {
10789
19.4k
            break;
10790
19.4k
        }
10791
22.8k
    }
10792
19.4k
}
10793
10794
/**
10795
 * xmlParseDocument:
10796
 * @ctxt:  an XML parser context
10797
 *
10798
 * parse an XML document (and build a tree if using the standard SAX
10799
 * interface).
10800
 *
10801
 * [1] document ::= prolog element Misc*
10802
 *
10803
 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10804
 *
10805
 * Returns 0, -1 in case of error. the parser context is augmented
10806
 *                as a result of the parsing.
10807
 */
10808
10809
int
10810
10.0k
xmlParseDocument(xmlParserCtxtPtr ctxt) {
10811
10.0k
    xmlChar start[4];
10812
10.0k
    xmlCharEncoding enc;
10813
10814
10.0k
    xmlInitParser();
10815
10816
10.0k
    if ((ctxt == NULL) || (ctxt->input == NULL))
10817
0
        return(-1);
10818
10819
10.0k
    GROW;
10820
10821
    /*
10822
     * SAX: detecting the level.
10823
     */
10824
10.0k
    xmlDetectSAX2(ctxt);
10825
10826
    /*
10827
     * SAX: beginning of the document processing.
10828
     */
10829
10.0k
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10830
10.0k
        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10831
10.0k
    if (ctxt->instate == XML_PARSER_EOF)
10832
0
  return(-1);
10833
10834
10.0k
    if ((ctxt->encoding == NULL) &&
10835
10.0k
        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10836
  /*
10837
   * Get the 4 first bytes and decode the charset
10838
   * if enc != XML_CHAR_ENCODING_NONE
10839
   * plug some encoding conversion routines.
10840
   */
10841
9.93k
  start[0] = RAW;
10842
9.93k
  start[1] = NXT(1);
10843
9.93k
  start[2] = NXT(2);
10844
9.93k
  start[3] = NXT(3);
10845
9.93k
  enc = xmlDetectCharEncoding(&start[0], 4);
10846
9.93k
  if (enc != XML_CHAR_ENCODING_NONE) {
10847
5.53k
      xmlSwitchEncoding(ctxt, enc);
10848
5.53k
  }
10849
9.93k
    }
10850
10851
10852
10.0k
    if (CUR == 0) {
10853
36
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10854
36
  return(-1);
10855
36
    }
10856
10857
    /*
10858
     * Check for the XMLDecl in the Prolog.
10859
     * do not GROW here to avoid the detected encoder to decode more
10860
     * than just the first line, unless the amount of data is really
10861
     * too small to hold "<?xml version="1.0" encoding="foo"
10862
     */
10863
10.0k
    if ((ctxt->input->end - ctxt->input->cur) < 35) {
10864
603
       GROW;
10865
603
    }
10866
10.0k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10867
10868
  /*
10869
   * Note that we will switch encoding on the fly.
10870
   */
10871
5.21k
  xmlParseXMLDecl(ctxt);
10872
5.21k
  if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10873
5.21k
      (ctxt->instate == XML_PARSER_EOF)) {
10874
      /*
10875
       * The XML REC instructs us to stop parsing right here
10876
       */
10877
19
      return(-1);
10878
19
  }
10879
5.19k
  ctxt->standalone = ctxt->input->standalone;
10880
5.19k
  SKIP_BLANKS;
10881
5.19k
    } else {
10882
4.82k
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10883
4.82k
    }
10884
10.0k
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10885
8.83k
        ctxt->sax->startDocument(ctxt->userData);
10886
10.0k
    if (ctxt->instate == XML_PARSER_EOF)
10887
0
  return(-1);
10888
10.0k
    if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10889
10.0k
        (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10890
0
  ctxt->myDoc->compression = ctxt->input->buf->compressed;
10891
0
    }
10892
10893
    /*
10894
     * The Misc part of the Prolog
10895
     */
10896
10.0k
    xmlParseMisc(ctxt);
10897
10898
    /*
10899
     * Then possibly doc type declaration(s) and more Misc
10900
     * (doctypedecl Misc*)?
10901
     */
10902
10.0k
    GROW;
10903
10.0k
    if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10904
10905
5.15k
  ctxt->inSubset = 1;
10906
5.15k
  xmlParseDocTypeDecl(ctxt);
10907
5.15k
  if (RAW == '[') {
10908
4.03k
      ctxt->instate = XML_PARSER_DTD;
10909
4.03k
      xmlParseInternalSubset(ctxt);
10910
4.03k
      if (ctxt->instate == XML_PARSER_EOF)
10911
1.54k
    return(-1);
10912
4.03k
  }
10913
10914
  /*
10915
   * Create and update the external subset.
10916
   */
10917
3.61k
  ctxt->inSubset = 2;
10918
3.61k
  if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10919
3.61k
      (!ctxt->disableSAX))
10920
2.98k
      ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10921
2.98k
                                ctxt->extSubSystem, ctxt->extSubURI);
10922
3.61k
  if (ctxt->instate == XML_PARSER_EOF)
10923
571
      return(-1);
10924
3.04k
  ctxt->inSubset = 0;
10925
10926
3.04k
        xmlCleanSpecialAttr(ctxt);
10927
10928
3.04k
  ctxt->instate = XML_PARSER_PROLOG;
10929
3.04k
  xmlParseMisc(ctxt);
10930
3.04k
    }
10931
10932
    /*
10933
     * Time to start parsing the tree itself
10934
     */
10935
7.90k
    GROW;
10936
7.90k
    if (RAW != '<') {
10937
1.50k
  xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10938
1.50k
           "Start tag expected, '<' not found\n");
10939
6.40k
    } else {
10940
6.40k
  ctxt->instate = XML_PARSER_CONTENT;
10941
6.40k
  xmlParseElement(ctxt);
10942
6.40k
  ctxt->instate = XML_PARSER_EPILOG;
10943
10944
10945
  /*
10946
   * The Misc part at the end
10947
   */
10948
6.40k
  xmlParseMisc(ctxt);
10949
10950
6.40k
  if (RAW != 0) {
10951
1.90k
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10952
1.90k
  }
10953
6.40k
  ctxt->instate = XML_PARSER_EOF;
10954
6.40k
    }
10955
10956
    /*
10957
     * SAX: end of the document processing.
10958
     */
10959
7.90k
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10960
7.90k
        ctxt->sax->endDocument(ctxt->userData);
10961
10962
    /*
10963
     * Remove locally kept entity definitions if the tree was not built
10964
     */
10965
7.90k
    if ((ctxt->myDoc != NULL) &&
10966
7.90k
  (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10967
43
  xmlFreeDoc(ctxt->myDoc);
10968
43
  ctxt->myDoc = NULL;
10969
43
    }
10970
10971
7.90k
    if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10972
940
        ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10973
940
  if (ctxt->valid)
10974
812
      ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10975
940
  if (ctxt->nsWellFormed)
10976
864
      ctxt->myDoc->properties |= XML_DOC_NSVALID;
10977
940
  if (ctxt->options & XML_PARSE_OLD10)
10978
39
      ctxt->myDoc->properties |= XML_DOC_OLD10;
10979
940
    }
10980
7.90k
    if (! ctxt->wellFormed) {
10981
6.96k
  ctxt->valid = 0;
10982
6.96k
  return(-1);
10983
6.96k
    }
10984
940
    return(0);
10985
7.90k
}
10986
10987
/**
10988
 * xmlParseExtParsedEnt:
10989
 * @ctxt:  an XML parser context
10990
 *
10991
 * parse a general parsed entity
10992
 * An external general parsed entity is well-formed if it matches the
10993
 * production labeled extParsedEnt.
10994
 *
10995
 * [78] extParsedEnt ::= TextDecl? content
10996
 *
10997
 * Returns 0, -1 in case of error. the parser context is augmented
10998
 *                as a result of the parsing.
10999
 */
11000
11001
int
11002
0
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
11003
0
    xmlChar start[4];
11004
0
    xmlCharEncoding enc;
11005
11006
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
11007
0
        return(-1);
11008
11009
0
    xmlDetectSAX2(ctxt);
11010
11011
0
    GROW;
11012
11013
    /*
11014
     * SAX: beginning of the document processing.
11015
     */
11016
0
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11017
0
        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
11018
11019
    /*
11020
     * Get the 4 first bytes and decode the charset
11021
     * if enc != XML_CHAR_ENCODING_NONE
11022
     * plug some encoding conversion routines.
11023
     */
11024
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11025
0
  start[0] = RAW;
11026
0
  start[1] = NXT(1);
11027
0
  start[2] = NXT(2);
11028
0
  start[3] = NXT(3);
11029
0
  enc = xmlDetectCharEncoding(start, 4);
11030
0
  if (enc != XML_CHAR_ENCODING_NONE) {
11031
0
      xmlSwitchEncoding(ctxt, enc);
11032
0
  }
11033
0
    }
11034
11035
11036
0
    if (CUR == 0) {
11037
0
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11038
0
    }
11039
11040
    /*
11041
     * Check for the XMLDecl in the Prolog.
11042
     */
11043
0
    GROW;
11044
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
11045
11046
  /*
11047
   * Note that we will switch encoding on the fly.
11048
   */
11049
0
  xmlParseXMLDecl(ctxt);
11050
0
  if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11051
      /*
11052
       * The XML REC instructs us to stop parsing right here
11053
       */
11054
0
      return(-1);
11055
0
  }
11056
0
  SKIP_BLANKS;
11057
0
    } else {
11058
0
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11059
0
    }
11060
0
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
11061
0
        ctxt->sax->startDocument(ctxt->userData);
11062
0
    if (ctxt->instate == XML_PARSER_EOF)
11063
0
  return(-1);
11064
11065
    /*
11066
     * Doing validity checking on chunk doesn't make sense
11067
     */
11068
0
    ctxt->instate = XML_PARSER_CONTENT;
11069
0
    ctxt->validate = 0;
11070
0
    ctxt->loadsubset = 0;
11071
0
    ctxt->depth = 0;
11072
11073
0
    xmlParseContent(ctxt);
11074
0
    if (ctxt->instate == XML_PARSER_EOF)
11075
0
  return(-1);
11076
11077
0
    if ((RAW == '<') && (NXT(1) == '/')) {
11078
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11079
0
    } else if (RAW != 0) {
11080
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11081
0
    }
11082
11083
    /*
11084
     * SAX: end of the document processing.
11085
     */
11086
0
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11087
0
        ctxt->sax->endDocument(ctxt->userData);
11088
11089
0
    if (! ctxt->wellFormed) return(-1);
11090
0
    return(0);
11091
0
}
11092
11093
#ifdef LIBXML_PUSH_ENABLED
11094
/************************************************************************
11095
 *                  *
11096
 *    Progressive parsing interfaces        *
11097
 *                  *
11098
 ************************************************************************/
11099
11100
/**
11101
 * xmlParseLookupChar:
11102
 * @ctxt:  an XML parser context
11103
 * @c:  character
11104
 *
11105
 * Check whether the input buffer contains a character.
11106
 */
11107
static int
11108
1.36M
xmlParseLookupChar(xmlParserCtxtPtr ctxt, int c) {
11109
1.36M
    const xmlChar *cur;
11110
11111
1.36M
    if (ctxt->checkIndex == 0) {
11112
1.34M
        cur = ctxt->input->cur + 1;
11113
1.34M
    } else {
11114
18.4k
        cur = ctxt->input->cur + ctxt->checkIndex;
11115
18.4k
    }
11116
11117
1.36M
    if (memchr(cur, c, ctxt->input->end - cur) == NULL) {
11118
18.7k
        ctxt->checkIndex = ctxt->input->end - ctxt->input->cur;
11119
18.7k
        return(0);
11120
1.34M
    } else {
11121
1.34M
        ctxt->checkIndex = 0;
11122
1.34M
        return(1);
11123
1.34M
    }
11124
1.36M
}
11125
11126
/**
11127
 * xmlParseLookupString:
11128
 * @ctxt:  an XML parser context
11129
 * @startDelta: delta to apply at the start
11130
 * @str:  string
11131
 * @strLen:  length of string
11132
 *
11133
 * Check whether the input buffer contains a string.
11134
 */
11135
static const xmlChar *
11136
xmlParseLookupString(xmlParserCtxtPtr ctxt, size_t startDelta,
11137
80.3k
                     const char *str, size_t strLen) {
11138
80.3k
    const xmlChar *cur, *term;
11139
11140
80.3k
    if (ctxt->checkIndex == 0) {
11141
46.6k
        cur = ctxt->input->cur + startDelta;
11142
46.6k
    } else {
11143
33.6k
        cur = ctxt->input->cur + ctxt->checkIndex;
11144
33.6k
    }
11145
11146
80.3k
    term = BAD_CAST strstr((const char *) cur, str);
11147
80.3k
    if (term == NULL) {
11148
41.2k
        const xmlChar *end = ctxt->input->end;
11149
11150
        /* Rescan (strLen - 1) characters. */
11151
41.2k
        if ((size_t) (end - cur) < strLen)
11152
744
            end = cur;
11153
40.5k
        else
11154
40.5k
            end -= strLen - 1;
11155
41.2k
        ctxt->checkIndex = end - ctxt->input->cur;
11156
41.2k
    } else {
11157
39.0k
        ctxt->checkIndex = 0;
11158
39.0k
    }
11159
11160
80.3k
    return(term);
11161
80.3k
}
11162
11163
/**
11164
 * xmlParseLookupCharData:
11165
 * @ctxt:  an XML parser context
11166
 *
11167
 * Check whether the input buffer contains terminated char data.
11168
 */
11169
static int
11170
1.15M
xmlParseLookupCharData(xmlParserCtxtPtr ctxt) {
11171
1.15M
    const xmlChar *cur = ctxt->input->cur + ctxt->checkIndex;
11172
1.15M
    const xmlChar *end = ctxt->input->end;
11173
11174
23.4M
    while (cur < end) {
11175
23.2M
        if ((*cur == '<') || (*cur == '&')) {
11176
1.00M
            ctxt->checkIndex = 0;
11177
1.00M
            return(1);
11178
1.00M
        }
11179
22.2M
        cur++;
11180
22.2M
    }
11181
11182
148k
    ctxt->checkIndex = cur - ctxt->input->cur;
11183
148k
    return(0);
11184
1.15M
}
11185
11186
/**
11187
 * xmlParseLookupGt:
11188
 * @ctxt:  an XML parser context
11189
 *
11190
 * Check whether there's enough data in the input buffer to finish parsing
11191
 * a start tag. This has to take quotes into account.
11192
 */
11193
static int
11194
900k
xmlParseLookupGt(xmlParserCtxtPtr ctxt) {
11195
900k
    const xmlChar *cur;
11196
900k
    const xmlChar *end = ctxt->input->end;
11197
900k
    int state = ctxt->endCheckState;
11198
11199
900k
    if (ctxt->checkIndex == 0)
11200
753k
        cur = ctxt->input->cur + 1;
11201
146k
    else
11202
146k
        cur = ctxt->input->cur + ctxt->checkIndex;
11203
11204
30.5M
    while (cur < end) {
11205
30.3M
        if (state) {
11206
18.8M
            if (*cur == state)
11207
541k
                state = 0;
11208
18.8M
        } else if (*cur == '\'' || *cur == '"') {
11209
542k
            state = *cur;
11210
11.0M
        } else if (*cur == '>') {
11211
751k
            ctxt->checkIndex = 0;
11212
751k
            ctxt->endCheckState = 0;
11213
751k
            return(1);
11214
751k
        }
11215
29.6M
        cur++;
11216
29.6M
    }
11217
11218
148k
    ctxt->checkIndex = cur - ctxt->input->cur;
11219
148k
    ctxt->endCheckState = state;
11220
148k
    return(0);
11221
900k
}
11222
11223
/**
11224
 * xmlParseLookupInternalSubset:
11225
 * @ctxt:  an XML parser context
11226
 *
11227
 * Check whether there's enough data in the input buffer to finish parsing
11228
 * the internal subset.
11229
 */
11230
static int
11231
54.4k
xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt) {
11232
    /*
11233
     * Sorry, but progressive parsing of the internal subset is not
11234
     * supported. We first check that the full content of the internal
11235
     * subset is available and parsing is launched only at that point.
11236
     * Internal subset ends with "']' S? '>'" in an unescaped section and
11237
     * not in a ']]>' sequence which are conditional sections.
11238
     */
11239
54.4k
    const xmlChar *cur, *start;
11240
54.4k
    const xmlChar *end = ctxt->input->end;
11241
54.4k
    int state = ctxt->endCheckState;
11242
11243
54.4k
    if (ctxt->checkIndex == 0) {
11244
6.79k
        cur = ctxt->input->cur + 1;
11245
47.6k
    } else {
11246
47.6k
        cur = ctxt->input->cur + ctxt->checkIndex;
11247
47.6k
    }
11248
54.4k
    start = cur;
11249
11250
10.9M
    while (cur < end) {
11251
10.8M
        if (state == '-') {
11252
1.45M
            if ((*cur == '-') &&
11253
1.45M
                (cur[1] == '-') &&
11254
1.45M
                (cur[2] == '>')) {
11255
28.0k
                state = 0;
11256
28.0k
                cur += 3;
11257
28.0k
                start = cur;
11258
28.0k
                continue;
11259
28.0k
            }
11260
1.45M
        }
11261
9.41M
        else if (state == ']') {
11262
7.28k
            if (*cur == '>') {
11263
5.94k
                ctxt->checkIndex = 0;
11264
5.94k
                ctxt->endCheckState = 0;
11265
5.94k
                return(1);
11266
5.94k
            }
11267
1.33k
            if (IS_BLANK_CH(*cur)) {
11268
562
                state = ' ';
11269
777
            } else if (*cur != ']') {
11270
411
                state = 0;
11271
411
                start = cur;
11272
411
                continue;
11273
411
            }
11274
1.33k
        }
11275
9.40M
        else if (state == ' ') {
11276
903
            if (*cur == '>') {
11277
14
                ctxt->checkIndex = 0;
11278
14
                ctxt->endCheckState = 0;
11279
14
                return(1);
11280
14
            }
11281
889
            if (!IS_BLANK_CH(*cur)) {
11282
546
                state = 0;
11283
546
                start = cur;
11284
546
                continue;
11285
546
            }
11286
889
        }
11287
9.40M
        else if (state != 0) {
11288
5.12M
            if (*cur == state) {
11289
90.3k
                state = 0;
11290
90.3k
                start = cur + 1;
11291
90.3k
            }
11292
5.12M
        }
11293
4.28M
        else if (*cur == '<') {
11294
129k
            if ((cur[1] == '!') &&
11295
129k
                (cur[2] == '-') &&
11296
129k
                (cur[3] == '-')) {
11297
28.0k
                state = '-';
11298
28.0k
                cur += 4;
11299
                /* Don't treat <!--> as comment */
11300
28.0k
                start = cur;
11301
28.0k
                continue;
11302
28.0k
            }
11303
129k
        }
11304
4.15M
        else if ((*cur == '"') || (*cur == '\'') || (*cur == ']')) {
11305
97.6k
            state = *cur;
11306
97.6k
        }
11307
11308
10.8M
        cur++;
11309
10.8M
    }
11310
11311
    /*
11312
     * Rescan the three last characters to detect "<!--" and "-->"
11313
     * split across chunks.
11314
     */
11315
48.4k
    if ((state == 0) || (state == '-')) {
11316
22.7k
        if (cur - start < 3)
11317
2.75k
            cur = start;
11318
19.9k
        else
11319
19.9k
            cur -= 3;
11320
22.7k
    }
11321
48.4k
    ctxt->checkIndex = cur - ctxt->input->cur;
11322
48.4k
    ctxt->endCheckState = state;
11323
48.4k
    return(0);
11324
54.4k
}
11325
11326
/**
11327
 * xmlCheckCdataPush:
11328
 * @cur: pointer to the block of characters
11329
 * @len: length of the block in bytes
11330
 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
11331
 *
11332
 * Check that the block of characters is okay as SCdata content [20]
11333
 *
11334
 * Returns the number of bytes to pass if okay, a negative index where an
11335
 *         UTF-8 error occurred otherwise
11336
 */
11337
static int
11338
12.2k
xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
11339
12.2k
    int ix;
11340
12.2k
    unsigned char c;
11341
12.2k
    int codepoint;
11342
11343
12.2k
    if ((utf == NULL) || (len <= 0))
11344
11
        return(0);
11345
11346
1.55M
    for (ix = 0; ix < len;) {      /* string is 0-terminated */
11347
1.54M
        c = utf[ix];
11348
1.54M
        if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11349
1.35M
      if (c >= 0x20)
11350
1.30M
    ix++;
11351
43.0k
      else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11352
42.0k
          ix++;
11353
1.00k
      else
11354
1.00k
          return(-ix);
11355
1.35M
  } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11356
116k
      if (ix + 2 > len) return(complete ? -ix : ix);
11357
116k
      if ((utf[ix+1] & 0xc0 ) != 0x80)
11358
2.14k
          return(-ix);
11359
113k
      codepoint = (utf[ix] & 0x1f) << 6;
11360
113k
      codepoint |= utf[ix+1] & 0x3f;
11361
113k
      if (!xmlIsCharQ(codepoint))
11362
6
          return(-ix);
11363
113k
      ix += 2;
11364
113k
  } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11365
6.59k
      if (ix + 3 > len) return(complete ? -ix : ix);
11366
6.57k
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11367
6.57k
          ((utf[ix+2] & 0xc0) != 0x80))
11368
28
        return(-ix);
11369
6.54k
      codepoint = (utf[ix] & 0xf) << 12;
11370
6.54k
      codepoint |= (utf[ix+1] & 0x3f) << 6;
11371
6.54k
      codepoint |= utf[ix+2] & 0x3f;
11372
6.54k
      if (!xmlIsCharQ(codepoint))
11373
0
          return(-ix);
11374
6.54k
      ix += 3;
11375
74.3k
  } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11376
73.9k
      if (ix + 4 > len) return(complete ? -ix : ix);
11377
73.7k
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11378
73.7k
          ((utf[ix+2] & 0xc0) != 0x80) ||
11379
73.7k
    ((utf[ix+3] & 0xc0) != 0x80))
11380
414
        return(-ix);
11381
73.3k
      codepoint = (utf[ix] & 0x7) << 18;
11382
73.3k
      codepoint |= (utf[ix+1] & 0x3f) << 12;
11383
73.3k
      codepoint |= (utf[ix+2] & 0x3f) << 6;
11384
73.3k
      codepoint |= utf[ix+3] & 0x3f;
11385
73.3k
      if (!xmlIsCharQ(codepoint))
11386
44
          return(-ix);
11387
73.3k
      ix += 4;
11388
73.3k
  } else       /* unknown encoding */
11389
391
      return(-ix);
11390
1.54M
      }
11391
7.93k
      return(ix);
11392
12.2k
}
11393
11394
/**
11395
 * xmlParseTryOrFinish:
11396
 * @ctxt:  an XML parser context
11397
 * @terminate:  last chunk indicator
11398
 *
11399
 * Try to progress on parsing
11400
 *
11401
 * Returns zero if no parsing was possible
11402
 */
11403
static int
11404
459k
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11405
459k
    int ret = 0;
11406
459k
    int avail, tlen;
11407
459k
    xmlChar cur, next;
11408
11409
459k
    if (ctxt->input == NULL)
11410
0
        return(0);
11411
11412
#ifdef DEBUG_PUSH
11413
    switch (ctxt->instate) {
11414
  case XML_PARSER_EOF:
11415
      xmlGenericError(xmlGenericErrorContext,
11416
        "PP: try EOF\n"); break;
11417
  case XML_PARSER_START:
11418
      xmlGenericError(xmlGenericErrorContext,
11419
        "PP: try START\n"); break;
11420
  case XML_PARSER_MISC:
11421
      xmlGenericError(xmlGenericErrorContext,
11422
        "PP: try MISC\n");break;
11423
  case XML_PARSER_COMMENT:
11424
      xmlGenericError(xmlGenericErrorContext,
11425
        "PP: try COMMENT\n");break;
11426
  case XML_PARSER_PROLOG:
11427
      xmlGenericError(xmlGenericErrorContext,
11428
        "PP: try PROLOG\n");break;
11429
  case XML_PARSER_START_TAG:
11430
      xmlGenericError(xmlGenericErrorContext,
11431
        "PP: try START_TAG\n");break;
11432
  case XML_PARSER_CONTENT:
11433
      xmlGenericError(xmlGenericErrorContext,
11434
        "PP: try CONTENT\n");break;
11435
  case XML_PARSER_CDATA_SECTION:
11436
      xmlGenericError(xmlGenericErrorContext,
11437
        "PP: try CDATA_SECTION\n");break;
11438
  case XML_PARSER_END_TAG:
11439
      xmlGenericError(xmlGenericErrorContext,
11440
        "PP: try END_TAG\n");break;
11441
  case XML_PARSER_ENTITY_DECL:
11442
      xmlGenericError(xmlGenericErrorContext,
11443
        "PP: try ENTITY_DECL\n");break;
11444
  case XML_PARSER_ENTITY_VALUE:
11445
      xmlGenericError(xmlGenericErrorContext,
11446
        "PP: try ENTITY_VALUE\n");break;
11447
  case XML_PARSER_ATTRIBUTE_VALUE:
11448
      xmlGenericError(xmlGenericErrorContext,
11449
        "PP: try ATTRIBUTE_VALUE\n");break;
11450
  case XML_PARSER_DTD:
11451
      xmlGenericError(xmlGenericErrorContext,
11452
        "PP: try DTD\n");break;
11453
  case XML_PARSER_EPILOG:
11454
      xmlGenericError(xmlGenericErrorContext,
11455
        "PP: try EPILOG\n");break;
11456
  case XML_PARSER_PI:
11457
      xmlGenericError(xmlGenericErrorContext,
11458
        "PP: try PI\n");break;
11459
        case XML_PARSER_IGNORE:
11460
            xmlGenericError(xmlGenericErrorContext,
11461
        "PP: try IGNORE\n");break;
11462
    }
11463
#endif
11464
11465
459k
    if ((ctxt->input != NULL) &&
11466
459k
        (ctxt->input->cur - ctxt->input->base > 4096)) {
11467
11.9k
        xmlParserInputShrink(ctxt->input);
11468
11.9k
    }
11469
11470
5.47M
    while (ctxt->instate != XML_PARSER_EOF) {
11471
5.47M
  if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11472
10.2k
      return(0);
11473
11474
5.46M
  if (ctxt->input == NULL) break;
11475
5.46M
  if (ctxt->input->buf == NULL)
11476
0
      avail = ctxt->input->length -
11477
0
              (ctxt->input->cur - ctxt->input->base);
11478
5.46M
  else {
11479
      /*
11480
       * If we are operating on converted input, try to flush
11481
       * remaining chars to avoid them stalling in the non-converted
11482
       * buffer. But do not do this in document start where
11483
       * encoding="..." may not have been read and we work on a
11484
       * guessed encoding.
11485
       */
11486
5.46M
      if ((ctxt->instate != XML_PARSER_START) &&
11487
5.46M
          (ctxt->input->buf->raw != NULL) &&
11488
5.46M
    (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11489
4.31k
                size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11490
4.31k
                                                 ctxt->input);
11491
4.31k
    size_t current = ctxt->input->cur - ctxt->input->base;
11492
11493
4.31k
    xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11494
4.31k
                xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11495
4.31k
                                      base, current);
11496
4.31k
      }
11497
5.46M
      avail = xmlBufUse(ctxt->input->buf->buffer) -
11498
5.46M
        (ctxt->input->cur - ctxt->input->base);
11499
5.46M
  }
11500
5.46M
        if (avail < 1)
11501
21.5k
      goto done;
11502
5.44M
        switch (ctxt->instate) {
11503
0
            case XML_PARSER_EOF:
11504
          /*
11505
     * Document parsing is done !
11506
     */
11507
0
          goto done;
11508
41.7k
            case XML_PARSER_START:
11509
41.7k
    if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11510
14.9k
        xmlChar start[4];
11511
14.9k
        xmlCharEncoding enc;
11512
11513
        /*
11514
         * Very first chars read from the document flow.
11515
         */
11516
14.9k
        if (avail < 4)
11517
658
      goto done;
11518
11519
        /*
11520
         * Get the 4 first bytes and decode the charset
11521
         * if enc != XML_CHAR_ENCODING_NONE
11522
         * plug some encoding conversion routines,
11523
         * else xmlSwitchEncoding will set to (default)
11524
         * UTF8.
11525
         */
11526
14.3k
        start[0] = RAW;
11527
14.3k
        start[1] = NXT(1);
11528
14.3k
        start[2] = NXT(2);
11529
14.3k
        start[3] = NXT(3);
11530
14.3k
        enc = xmlDetectCharEncoding(start, 4);
11531
14.3k
        xmlSwitchEncoding(ctxt, enc);
11532
14.3k
        break;
11533
14.9k
    }
11534
11535
26.8k
    if (avail < 2)
11536
14
        goto done;
11537
26.7k
    cur = ctxt->input->cur[0];
11538
26.7k
    next = ctxt->input->cur[1];
11539
26.7k
    if (cur == 0) {
11540
70
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11541
70
      ctxt->sax->setDocumentLocator(ctxt->userData,
11542
70
                  &xmlDefaultSAXLocator);
11543
70
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11544
70
        xmlHaltParser(ctxt);
11545
#ifdef DEBUG_PUSH
11546
        xmlGenericError(xmlGenericErrorContext,
11547
          "PP: entering EOF\n");
11548
#endif
11549
70
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11550
70
      ctxt->sax->endDocument(ctxt->userData);
11551
70
        goto done;
11552
70
    }
11553
26.7k
          if ((cur == '<') && (next == '?')) {
11554
        /* PI or XML decl */
11555
18.5k
        if (avail < 5) goto done;
11556
18.4k
        if ((!terminate) &&
11557
18.4k
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11558
7.11k
      goto done;
11559
11.3k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11560
11.3k
      ctxt->sax->setDocumentLocator(ctxt->userData,
11561
11.3k
                  &xmlDefaultSAXLocator);
11562
11.3k
        if ((ctxt->input->cur[2] == 'x') &&
11563
11.3k
      (ctxt->input->cur[3] == 'm') &&
11564
11.3k
      (ctxt->input->cur[4] == 'l') &&
11565
11.3k
      (IS_BLANK_CH(ctxt->input->cur[5]))) {
11566
10.2k
      ret += 5;
11567
#ifdef DEBUG_PUSH
11568
      xmlGenericError(xmlGenericErrorContext,
11569
        "PP: Parsing XML Decl\n");
11570
#endif
11571
10.2k
      xmlParseXMLDecl(ctxt);
11572
10.2k
      if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11573
          /*
11574
           * The XML REC instructs us to stop parsing right
11575
           * here
11576
           */
11577
34
          xmlHaltParser(ctxt);
11578
34
          return(0);
11579
34
      }
11580
10.2k
      ctxt->standalone = ctxt->input->standalone;
11581
10.2k
      if ((ctxt->encoding == NULL) &&
11582
10.2k
          (ctxt->input->encoding != NULL))
11583
2.34k
          ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11584
10.2k
      if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11585
10.2k
          (!ctxt->disableSAX))
11586
8.01k
          ctxt->sax->startDocument(ctxt->userData);
11587
10.2k
      ctxt->instate = XML_PARSER_MISC;
11588
#ifdef DEBUG_PUSH
11589
      xmlGenericError(xmlGenericErrorContext,
11590
        "PP: entering MISC\n");
11591
#endif
11592
10.2k
        } else {
11593
1.12k
      ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11594
1.12k
      if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11595
1.12k
          (!ctxt->disableSAX))
11596
1.12k
          ctxt->sax->startDocument(ctxt->userData);
11597
1.12k
      ctxt->instate = XML_PARSER_MISC;
11598
#ifdef DEBUG_PUSH
11599
      xmlGenericError(xmlGenericErrorContext,
11600
        "PP: entering MISC\n");
11601
#endif
11602
1.12k
        }
11603
11.3k
    } else {
11604
8.20k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11605
8.20k
      ctxt->sax->setDocumentLocator(ctxt->userData,
11606
8.20k
                  &xmlDefaultSAXLocator);
11607
8.20k
        ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11608
8.20k
        if (ctxt->version == NULL) {
11609
0
            xmlErrMemory(ctxt, NULL);
11610
0
      break;
11611
0
        }
11612
8.20k
        if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11613
8.20k
            (!ctxt->disableSAX))
11614
8.20k
      ctxt->sax->startDocument(ctxt->userData);
11615
8.20k
        ctxt->instate = XML_PARSER_MISC;
11616
#ifdef DEBUG_PUSH
11617
        xmlGenericError(xmlGenericErrorContext,
11618
          "PP: entering MISC\n");
11619
#endif
11620
8.20k
    }
11621
19.5k
    break;
11622
885k
            case XML_PARSER_START_TAG: {
11623
885k
          const xmlChar *name;
11624
885k
    const xmlChar *prefix = NULL;
11625
885k
    const xmlChar *URI = NULL;
11626
885k
                int line = ctxt->input->line;
11627
885k
    int nsNr = ctxt->nsNr;
11628
11629
885k
    if ((avail < 2) && (ctxt->inputNr == 1))
11630
0
        goto done;
11631
885k
    cur = ctxt->input->cur[0];
11632
885k
          if (cur != '<') {
11633
859
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11634
859
        xmlHaltParser(ctxt);
11635
859
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11636
859
      ctxt->sax->endDocument(ctxt->userData);
11637
859
        goto done;
11638
859
    }
11639
884k
    if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11640
138k
                    goto done;
11641
745k
    if (ctxt->spaceNr == 0)
11642
518
        spacePush(ctxt, -1);
11643
745k
    else if (*ctxt->space == -2)
11644
29.9k
        spacePush(ctxt, -1);
11645
715k
    else
11646
715k
        spacePush(ctxt, *ctxt->space);
11647
745k
#ifdef LIBXML_SAX1_ENABLED
11648
745k
    if (ctxt->sax2)
11649
615k
#endif /* LIBXML_SAX1_ENABLED */
11650
615k
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11651
130k
#ifdef LIBXML_SAX1_ENABLED
11652
130k
    else
11653
130k
        name = xmlParseStartTag(ctxt);
11654
745k
#endif /* LIBXML_SAX1_ENABLED */
11655
745k
    if (ctxt->instate == XML_PARSER_EOF)
11656
30
        goto done;
11657
745k
    if (name == NULL) {
11658
866
        spacePop(ctxt);
11659
866
        xmlHaltParser(ctxt);
11660
866
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11661
866
      ctxt->sax->endDocument(ctxt->userData);
11662
866
        goto done;
11663
866
    }
11664
745k
#ifdef LIBXML_VALID_ENABLED
11665
    /*
11666
     * [ VC: Root Element Type ]
11667
     * The Name in the document type declaration must match
11668
     * the element type of the root element.
11669
     */
11670
745k
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11671
745k
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
11672
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11673
745k
#endif /* LIBXML_VALID_ENABLED */
11674
11675
    /*
11676
     * Check for an Empty Element.
11677
     */
11678
745k
    if ((RAW == '/') && (NXT(1) == '>')) {
11679
65.2k
        SKIP(2);
11680
11681
65.2k
        if (ctxt->sax2) {
11682
60.6k
      if ((ctxt->sax != NULL) &&
11683
60.6k
          (ctxt->sax->endElementNs != NULL) &&
11684
60.6k
          (!ctxt->disableSAX))
11685
60.4k
          ctxt->sax->endElementNs(ctxt->userData, name,
11686
60.4k
                                  prefix, URI);
11687
60.6k
      if (ctxt->nsNr - nsNr > 0)
11688
294
          nsPop(ctxt, ctxt->nsNr - nsNr);
11689
60.6k
#ifdef LIBXML_SAX1_ENABLED
11690
60.6k
        } else {
11691
4.61k
      if ((ctxt->sax != NULL) &&
11692
4.61k
          (ctxt->sax->endElement != NULL) &&
11693
4.61k
          (!ctxt->disableSAX))
11694
4.59k
          ctxt->sax->endElement(ctxt->userData, name);
11695
4.61k
#endif /* LIBXML_SAX1_ENABLED */
11696
4.61k
        }
11697
65.2k
        if (ctxt->instate == XML_PARSER_EOF)
11698
0
      goto done;
11699
65.2k
        spacePop(ctxt);
11700
65.2k
        if (ctxt->nameNr == 0) {
11701
602
      ctxt->instate = XML_PARSER_EPILOG;
11702
64.6k
        } else {
11703
64.6k
      ctxt->instate = XML_PARSER_CONTENT;
11704
64.6k
        }
11705
65.2k
        break;
11706
65.2k
    }
11707
679k
    if (RAW == '>') {
11708
672k
        NEXT;
11709
672k
    } else {
11710
7.03k
        xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11711
7.03k
           "Couldn't find end of Start Tag %s\n",
11712
7.03k
           name);
11713
7.03k
        nodePop(ctxt);
11714
7.03k
        spacePop(ctxt);
11715
7.03k
    }
11716
679k
                nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
11717
11718
679k
    ctxt->instate = XML_PARSER_CONTENT;
11719
679k
                break;
11720
745k
      }
11721
3.71M
            case XML_PARSER_CONTENT: {
11722
3.71M
    if ((avail < 2) && (ctxt->inputNr == 1))
11723
15.2k
        goto done;
11724
3.69M
    cur = ctxt->input->cur[0];
11725
3.69M
    next = ctxt->input->cur[1];
11726
11727
3.69M
    if ((cur == '<') && (next == '/')) {
11728
662k
        ctxt->instate = XML_PARSER_END_TAG;
11729
662k
        break;
11730
3.03M
          } else if ((cur == '<') && (next == '?')) {
11731
3.27k
        if ((!terminate) &&
11732
3.27k
            (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11733
1.34k
      goto done;
11734
1.92k
        xmlParsePI(ctxt);
11735
1.92k
        ctxt->instate = XML_PARSER_CONTENT;
11736
3.03M
    } else if ((cur == '<') && (next != '!')) {
11737
735k
        ctxt->instate = XML_PARSER_START_TAG;
11738
735k
        break;
11739
2.29M
    } else if ((cur == '<') && (next == '!') &&
11740
2.29M
               (ctxt->input->cur[2] == '-') &&
11741
2.29M
         (ctxt->input->cur[3] == '-')) {
11742
25.8k
        if ((!terminate) &&
11743
25.8k
            (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11744
9.15k
      goto done;
11745
16.7k
        xmlParseComment(ctxt);
11746
16.7k
        ctxt->instate = XML_PARSER_CONTENT;
11747
2.27M
    } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11748
2.27M
        (ctxt->input->cur[2] == '[') &&
11749
2.27M
        (ctxt->input->cur[3] == 'C') &&
11750
2.27M
        (ctxt->input->cur[4] == 'D') &&
11751
2.27M
        (ctxt->input->cur[5] == 'A') &&
11752
2.27M
        (ctxt->input->cur[6] == 'T') &&
11753
2.27M
        (ctxt->input->cur[7] == 'A') &&
11754
2.27M
        (ctxt->input->cur[8] == '[')) {
11755
4.47k
        SKIP(9);
11756
4.47k
        ctxt->instate = XML_PARSER_CDATA_SECTION;
11757
4.47k
        break;
11758
2.26M
    } else if ((cur == '<') && (next == '!') &&
11759
2.26M
               (avail < 9)) {
11760
387
        goto done;
11761
2.26M
    } else if (cur == '<') {
11762
6.10k
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11763
6.10k
                    "detected an error in element content\n");
11764
6.10k
                    SKIP(1);
11765
2.25M
    } else if (cur == '&') {
11766
742k
        if ((!terminate) && (!xmlParseLookupChar(ctxt, ';')))
11767
3.10k
      goto done;
11768
739k
        xmlParseReference(ctxt);
11769
1.51M
    } else {
11770
        /* TODO Avoid the extra copy, handle directly !!! */
11771
        /*
11772
         * Goal of the following test is:
11773
         *  - minimize calls to the SAX 'character' callback
11774
         *    when they are mergeable
11775
         *  - handle an problem for isBlank when we only parse
11776
         *    a sequence of blank chars and the next one is
11777
         *    not available to check against '<' presence.
11778
         *  - tries to homogenize the differences in SAX
11779
         *    callbacks between the push and pull versions
11780
         *    of the parser.
11781
         */
11782
1.51M
        if ((ctxt->inputNr == 1) &&
11783
1.51M
            (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11784
1.15M
      if ((!terminate) && (!xmlParseLookupCharData(ctxt)))
11785
148k
          goto done;
11786
1.15M
                    }
11787
1.36M
                    ctxt->checkIndex = 0;
11788
1.36M
        xmlParseCharData(ctxt, 0);
11789
1.36M
    }
11790
2.13M
    break;
11791
3.69M
      }
11792
2.13M
            case XML_PARSER_END_TAG:
11793
677k
    if (avail < 2)
11794
0
        goto done;
11795
677k
    if ((!terminate) && (!xmlParseLookupChar(ctxt, '>')))
11796
15.6k
        goto done;
11797
662k
    if (ctxt->sax2) {
11798
541k
              xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11799
541k
        nameNsPop(ctxt);
11800
541k
    }
11801
120k
#ifdef LIBXML_SAX1_ENABLED
11802
120k
      else
11803
120k
        xmlParseEndTag1(ctxt, 0);
11804
662k
#endif /* LIBXML_SAX1_ENABLED */
11805
662k
    if (ctxt->instate == XML_PARSER_EOF) {
11806
        /* Nothing */
11807
662k
    } else if (ctxt->nameNr == 0) {
11808
1.94k
        ctxt->instate = XML_PARSER_EPILOG;
11809
660k
    } else {
11810
660k
        ctxt->instate = XML_PARSER_CONTENT;
11811
660k
    }
11812
662k
    break;
11813
21.9k
            case XML_PARSER_CDATA_SECTION: {
11814
          /*
11815
     * The Push mode need to have the SAX callback for
11816
     * cdataBlock merge back contiguous callbacks.
11817
     */
11818
21.9k
    const xmlChar *term;
11819
11820
21.9k
                if (terminate) {
11821
                    /*
11822
                     * Don't call xmlParseLookupString. If 'terminate'
11823
                     * is set, checkIndex is invalid.
11824
                     */
11825
137
                    term = BAD_CAST strstr((const char *) ctxt->input->cur,
11826
137
                                           "]]>");
11827
21.8k
                } else {
11828
21.8k
        term = xmlParseLookupString(ctxt, 0, "]]>", 3);
11829
21.8k
                }
11830
11831
21.9k
    if (term == NULL) {
11832
15.8k
        int tmp, size;
11833
11834
15.8k
                    if (terminate) {
11835
                        /* Unfinished CDATA section */
11836
97
                        size = ctxt->input->end - ctxt->input->cur;
11837
15.7k
                    } else {
11838
15.7k
                        if (avail < XML_PARSER_BIG_BUFFER_SIZE + 2)
11839
9.75k
                            goto done;
11840
5.99k
                        ctxt->checkIndex = 0;
11841
                        /* XXX: Why don't we pass the full buffer? */
11842
5.99k
                        size = XML_PARSER_BIG_BUFFER_SIZE;
11843
5.99k
                    }
11844
6.09k
                    tmp = xmlCheckCdataPush(ctxt->input->cur, size, 0);
11845
6.09k
                    if (tmp <= 0) {
11846
1.77k
                        tmp = -tmp;
11847
1.77k
                        ctxt->input->cur += tmp;
11848
1.77k
                        goto encoding_error;
11849
1.77k
                    }
11850
4.31k
                    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11851
4.31k
                        if (ctxt->sax->cdataBlock != NULL)
11852
4.00k
                            ctxt->sax->cdataBlock(ctxt->userData,
11853
4.00k
                                                  ctxt->input->cur, tmp);
11854
314
                        else if (ctxt->sax->characters != NULL)
11855
314
                            ctxt->sax->characters(ctxt->userData,
11856
314
                                                  ctxt->input->cur, tmp);
11857
4.31k
                    }
11858
4.31k
                    if (ctxt->instate == XML_PARSER_EOF)
11859
0
                        goto done;
11860
4.31k
                    SKIPL(tmp);
11861
6.12k
    } else {
11862
6.12k
                    int base = term - CUR_PTR;
11863
6.12k
        int tmp;
11864
11865
6.12k
        tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11866
6.12k
        if ((tmp < 0) || (tmp != base)) {
11867
2.26k
      tmp = -tmp;
11868
2.26k
      ctxt->input->cur += tmp;
11869
2.26k
      goto encoding_error;
11870
2.26k
        }
11871
3.85k
        if ((ctxt->sax != NULL) && (base == 0) &&
11872
3.85k
            (ctxt->sax->cdataBlock != NULL) &&
11873
3.85k
            (!ctxt->disableSAX)) {
11874
      /*
11875
       * Special case to provide identical behaviour
11876
       * between pull and push parsers on enpty CDATA
11877
       * sections
11878
       */
11879
7
       if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11880
7
           (!strncmp((const char *)&ctxt->input->cur[-9],
11881
7
                     "<![CDATA[", 9)))
11882
7
           ctxt->sax->cdataBlock(ctxt->userData,
11883
7
                                 BAD_CAST "", 0);
11884
3.85k
        } else if ((ctxt->sax != NULL) && (base > 0) &&
11885
3.85k
      (!ctxt->disableSAX)) {
11886
3.84k
      if (ctxt->sax->cdataBlock != NULL)
11887
3.26k
          ctxt->sax->cdataBlock(ctxt->userData,
11888
3.26k
              ctxt->input->cur, base);
11889
582
      else if (ctxt->sax->characters != NULL)
11890
582
          ctxt->sax->characters(ctxt->userData,
11891
582
              ctxt->input->cur, base);
11892
3.84k
        }
11893
3.85k
        if (ctxt->instate == XML_PARSER_EOF)
11894
0
      goto done;
11895
3.85k
        SKIPL(base + 3);
11896
3.85k
        ctxt->instate = XML_PARSER_CONTENT;
11897
#ifdef DEBUG_PUSH
11898
        xmlGenericError(xmlGenericErrorContext,
11899
          "PP: entering CONTENT\n");
11900
#endif
11901
3.85k
    }
11902
8.17k
    break;
11903
21.9k
      }
11904
35.9k
            case XML_PARSER_MISC:
11905
43.3k
            case XML_PARSER_PROLOG:
11906
45.9k
            case XML_PARSER_EPILOG:
11907
45.9k
    SKIP_BLANKS;
11908
45.9k
    if (ctxt->input->buf == NULL)
11909
0
        avail = ctxt->input->length -
11910
0
                (ctxt->input->cur - ctxt->input->base);
11911
45.9k
    else
11912
45.9k
        avail = xmlBufUse(ctxt->input->buf->buffer) -
11913
45.9k
                (ctxt->input->cur - ctxt->input->base);
11914
45.9k
    if (avail < 2)
11915
2.07k
        goto done;
11916
43.8k
    cur = ctxt->input->cur[0];
11917
43.8k
    next = ctxt->input->cur[1];
11918
43.8k
          if ((cur == '<') && (next == '?')) {
11919
3.76k
        if ((!terminate) &&
11920
3.76k
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11921
1.17k
      goto done;
11922
#ifdef DEBUG_PUSH
11923
        xmlGenericError(xmlGenericErrorContext,
11924
          "PP: Parsing PI\n");
11925
#endif
11926
2.59k
        xmlParsePI(ctxt);
11927
2.59k
        if (ctxt->instate == XML_PARSER_EOF)
11928
0
      goto done;
11929
40.0k
    } else if ((cur == '<') && (next == '!') &&
11930
40.0k
        (ctxt->input->cur[2] == '-') &&
11931
40.0k
        (ctxt->input->cur[3] == '-')) {
11932
9.23k
        if ((!terminate) &&
11933
9.23k
                        (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11934
6.73k
      goto done;
11935
#ifdef DEBUG_PUSH
11936
        xmlGenericError(xmlGenericErrorContext,
11937
          "PP: Parsing Comment\n");
11938
#endif
11939
2.50k
        xmlParseComment(ctxt);
11940
2.50k
        if (ctxt->instate == XML_PARSER_EOF)
11941
0
      goto done;
11942
30.8k
    } else if ((ctxt->instate == XML_PARSER_MISC) &&
11943
30.8k
                    (cur == '<') && (next == '!') &&
11944
30.8k
        (ctxt->input->cur[2] == 'D') &&
11945
30.8k
        (ctxt->input->cur[3] == 'O') &&
11946
30.8k
        (ctxt->input->cur[4] == 'C') &&
11947
30.8k
        (ctxt->input->cur[5] == 'T') &&
11948
30.8k
        (ctxt->input->cur[6] == 'Y') &&
11949
30.8k
        (ctxt->input->cur[7] == 'P') &&
11950
30.8k
        (ctxt->input->cur[8] == 'E')) {
11951
19.0k
        if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11952
10.0k
                        goto done;
11953
#ifdef DEBUG_PUSH
11954
        xmlGenericError(xmlGenericErrorContext,
11955
          "PP: Parsing internal subset\n");
11956
#endif
11957
9.06k
        ctxt->inSubset = 1;
11958
9.06k
        xmlParseDocTypeDecl(ctxt);
11959
9.06k
        if (ctxt->instate == XML_PARSER_EOF)
11960
0
      goto done;
11961
9.06k
        if (RAW == '[') {
11962
7.23k
      ctxt->instate = XML_PARSER_DTD;
11963
#ifdef DEBUG_PUSH
11964
      xmlGenericError(xmlGenericErrorContext,
11965
        "PP: entering DTD\n");
11966
#endif
11967
7.23k
        } else {
11968
      /*
11969
       * Create and update the external subset.
11970
       */
11971
1.82k
      ctxt->inSubset = 2;
11972
1.82k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11973
1.82k
          (ctxt->sax->externalSubset != NULL))
11974
1.54k
          ctxt->sax->externalSubset(ctxt->userData,
11975
1.54k
            ctxt->intSubName, ctxt->extSubSystem,
11976
1.54k
            ctxt->extSubURI);
11977
1.82k
      ctxt->inSubset = 0;
11978
1.82k
      xmlCleanSpecialAttr(ctxt);
11979
1.82k
      ctxt->instate = XML_PARSER_PROLOG;
11980
#ifdef DEBUG_PUSH
11981
      xmlGenericError(xmlGenericErrorContext,
11982
        "PP: entering PROLOG\n");
11983
#endif
11984
1.82k
        }
11985
11.7k
    } else if ((cur == '<') && (next == '!') &&
11986
11.7k
               (avail <
11987
287
                            (ctxt->instate == XML_PARSER_MISC ? 9 : 4))) {
11988
93
        goto done;
11989
11.6k
    } else if (ctxt->instate == XML_PARSER_EPILOG) {
11990
203
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11991
203
        xmlHaltParser(ctxt);
11992
#ifdef DEBUG_PUSH
11993
        xmlGenericError(xmlGenericErrorContext,
11994
          "PP: entering EOF\n");
11995
#endif
11996
203
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11997
203
      ctxt->sax->endDocument(ctxt->userData);
11998
203
        goto done;
11999
11.4k
                } else {
12000
11.4k
        ctxt->instate = XML_PARSER_START_TAG;
12001
#ifdef DEBUG_PUSH
12002
        xmlGenericError(xmlGenericErrorContext,
12003
          "PP: entering START_TAG\n");
12004
#endif
12005
11.4k
    }
12006
25.6k
    break;
12007
55.3k
            case XML_PARSER_DTD: {
12008
55.3k
                if ((!terminate) && (!xmlParseLookupInternalSubset(ctxt)))
12009
48.4k
                    goto done;
12010
6.86k
    xmlParseInternalSubset(ctxt);
12011
6.86k
    if (ctxt->instate == XML_PARSER_EOF)
12012
2.08k
        goto done;
12013
4.77k
    ctxt->inSubset = 2;
12014
4.77k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
12015
4.77k
        (ctxt->sax->externalSubset != NULL))
12016
4.40k
        ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
12017
4.40k
          ctxt->extSubSystem, ctxt->extSubURI);
12018
4.77k
    ctxt->inSubset = 0;
12019
4.77k
    xmlCleanSpecialAttr(ctxt);
12020
4.77k
    if (ctxt->instate == XML_PARSER_EOF)
12021
721
        goto done;
12022
4.05k
    ctxt->instate = XML_PARSER_PROLOG;
12023
#ifdef DEBUG_PUSH
12024
    xmlGenericError(xmlGenericErrorContext,
12025
      "PP: entering PROLOG\n");
12026
#endif
12027
4.05k
                break;
12028
4.77k
      }
12029
0
            case XML_PARSER_COMMENT:
12030
0
    xmlGenericError(xmlGenericErrorContext,
12031
0
      "PP: internal error, state == COMMENT\n");
12032
0
    ctxt->instate = XML_PARSER_CONTENT;
12033
#ifdef DEBUG_PUSH
12034
    xmlGenericError(xmlGenericErrorContext,
12035
      "PP: entering CONTENT\n");
12036
#endif
12037
0
    break;
12038
0
            case XML_PARSER_IGNORE:
12039
0
    xmlGenericError(xmlGenericErrorContext,
12040
0
      "PP: internal error, state == IGNORE");
12041
0
          ctxt->instate = XML_PARSER_DTD;
12042
#ifdef DEBUG_PUSH
12043
    xmlGenericError(xmlGenericErrorContext,
12044
      "PP: entering DTD\n");
12045
#endif
12046
0
          break;
12047
0
            case XML_PARSER_PI:
12048
0
    xmlGenericError(xmlGenericErrorContext,
12049
0
      "PP: internal error, state == PI\n");
12050
0
    ctxt->instate = XML_PARSER_CONTENT;
12051
#ifdef DEBUG_PUSH
12052
    xmlGenericError(xmlGenericErrorContext,
12053
      "PP: entering CONTENT\n");
12054
#endif
12055
0
    break;
12056
0
            case XML_PARSER_ENTITY_DECL:
12057
0
    xmlGenericError(xmlGenericErrorContext,
12058
0
      "PP: internal error, state == ENTITY_DECL\n");
12059
0
    ctxt->instate = XML_PARSER_DTD;
12060
#ifdef DEBUG_PUSH
12061
    xmlGenericError(xmlGenericErrorContext,
12062
      "PP: entering DTD\n");
12063
#endif
12064
0
    break;
12065
0
            case XML_PARSER_ENTITY_VALUE:
12066
0
    xmlGenericError(xmlGenericErrorContext,
12067
0
      "PP: internal error, state == ENTITY_VALUE\n");
12068
0
    ctxt->instate = XML_PARSER_CONTENT;
12069
#ifdef DEBUG_PUSH
12070
    xmlGenericError(xmlGenericErrorContext,
12071
      "PP: entering DTD\n");
12072
#endif
12073
0
    break;
12074
0
            case XML_PARSER_ATTRIBUTE_VALUE:
12075
0
    xmlGenericError(xmlGenericErrorContext,
12076
0
      "PP: internal error, state == ATTRIBUTE_VALUE\n");
12077
0
    ctxt->instate = XML_PARSER_START_TAG;
12078
#ifdef DEBUG_PUSH
12079
    xmlGenericError(xmlGenericErrorContext,
12080
      "PP: entering START_TAG\n");
12081
#endif
12082
0
    break;
12083
0
            case XML_PARSER_SYSTEM_LITERAL:
12084
0
    xmlGenericError(xmlGenericErrorContext,
12085
0
      "PP: internal error, state == SYSTEM_LITERAL\n");
12086
0
    ctxt->instate = XML_PARSER_START_TAG;
12087
#ifdef DEBUG_PUSH
12088
    xmlGenericError(xmlGenericErrorContext,
12089
      "PP: entering START_TAG\n");
12090
#endif
12091
0
    break;
12092
0
            case XML_PARSER_PUBLIC_LITERAL:
12093
0
    xmlGenericError(xmlGenericErrorContext,
12094
0
      "PP: internal error, state == PUBLIC_LITERAL\n");
12095
0
    ctxt->instate = XML_PARSER_START_TAG;
12096
#ifdef DEBUG_PUSH
12097
    xmlGenericError(xmlGenericErrorContext,
12098
      "PP: entering START_TAG\n");
12099
#endif
12100
0
    break;
12101
5.44M
  }
12102
5.44M
    }
12103
444k
done:
12104
#ifdef DEBUG_PUSH
12105
    xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12106
#endif
12107
444k
    return(ret);
12108
4.04k
encoding_error:
12109
4.04k
    {
12110
4.04k
        char buffer[150];
12111
12112
4.04k
  snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12113
4.04k
      ctxt->input->cur[0], ctxt->input->cur[1],
12114
4.04k
      ctxt->input->cur[2], ctxt->input->cur[3]);
12115
4.04k
  __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12116
4.04k
         "Input is not proper UTF-8, indicate encoding !\n%s",
12117
4.04k
         BAD_CAST buffer, NULL);
12118
4.04k
    }
12119
4.04k
    return(0);
12120
459k
}
12121
12122
/**
12123
 * xmlParseChunk:
12124
 * @ctxt:  an XML parser context
12125
 * @chunk:  an char array
12126
 * @size:  the size in byte of the chunk
12127
 * @terminate:  last chunk indicator
12128
 *
12129
 * Parse a Chunk of memory
12130
 *
12131
 * Returns zero if no error, the xmlParserErrors otherwise.
12132
 */
12133
int
12134
xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12135
1.10M
              int terminate) {
12136
1.10M
    int end_in_lf = 0;
12137
1.10M
    int remain = 0;
12138
12139
1.10M
    if (ctxt == NULL)
12140
0
        return(XML_ERR_INTERNAL_ERROR);
12141
1.10M
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12142
643k
        return(ctxt->errNo);
12143
459k
    if (ctxt->instate == XML_PARSER_EOF)
12144
47
        return(-1);
12145
459k
    if (ctxt->input == NULL)
12146
0
        return(-1);
12147
12148
459k
    ctxt->progressive = 1;
12149
459k
    if (ctxt->instate == XML_PARSER_START)
12150
27.7k
        xmlDetectSAX2(ctxt);
12151
459k
    if ((size > 0) && (chunk != NULL) && (!terminate) &&
12152
459k
        (chunk[size - 1] == '\r')) {
12153
438
  end_in_lf = 1;
12154
438
  size--;
12155
438
    }
12156
12157
459k
xmldecl_done:
12158
12159
459k
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12160
459k
        (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF))  {
12161
449k
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12162
449k
  size_t cur = ctxt->input->cur - ctxt->input->base;
12163
449k
  int res;
12164
12165
        /*
12166
         * Specific handling if we autodetected an encoding, we should not
12167
         * push more than the first line ... which depend on the encoding
12168
         * And only push the rest once the final encoding was detected
12169
         */
12170
449k
        if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12171
449k
            (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
12172
371
            unsigned int len = 45;
12173
12174
371
            if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12175
371
                               BAD_CAST "UTF-16")) ||
12176
371
                (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12177
314
                               BAD_CAST "UTF16")))
12178
57
                len = 90;
12179
314
            else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12180
314
                                    BAD_CAST "UCS-4")) ||
12181
314
                     (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12182
291
                                    BAD_CAST "UCS4")))
12183
23
                len = 180;
12184
12185
371
            if (ctxt->input->buf->rawconsumed < len)
12186
308
                len -= ctxt->input->buf->rawconsumed;
12187
12188
            /*
12189
             * Change size for reading the initial declaration only
12190
             * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12191
             * will blindly copy extra bytes from memory.
12192
             */
12193
371
            if ((unsigned int) size > len) {
12194
244
                remain = size - len;
12195
244
                size = len;
12196
244
            } else {
12197
127
                remain = 0;
12198
127
            }
12199
371
        }
12200
449k
  res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12201
449k
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12202
449k
  if (res < 0) {
12203
106
      ctxt->errNo = XML_PARSER_EOF;
12204
106
      xmlHaltParser(ctxt);
12205
106
      return (XML_PARSER_EOF);
12206
106
  }
12207
#ifdef DEBUG_PUSH
12208
  xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12209
#endif
12210
12211
449k
    } else if (ctxt->instate != XML_PARSER_EOF) {
12212
9.73k
  if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12213
9.73k
      xmlParserInputBufferPtr in = ctxt->input->buf;
12214
9.73k
      if ((in->encoder != NULL) && (in->buffer != NULL) &&
12215
9.73k
        (in->raw != NULL)) {
12216
934
    int nbchars;
12217
934
    size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12218
934
    size_t current = ctxt->input->cur - ctxt->input->base;
12219
12220
934
    nbchars = xmlCharEncInput(in, terminate);
12221
934
    xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
12222
934
    if (nbchars < 0) {
12223
        /* TODO 2.6.0 */
12224
136
        xmlGenericError(xmlGenericErrorContext,
12225
136
            "xmlParseChunk: encoder error\n");
12226
136
                    xmlHaltParser(ctxt);
12227
136
        return(XML_ERR_INVALID_ENCODING);
12228
136
    }
12229
934
      }
12230
9.73k
  }
12231
9.73k
    }
12232
12233
459k
    if (remain != 0) {
12234
180
        xmlParseTryOrFinish(ctxt, 0);
12235
458k
    } else {
12236
458k
        xmlParseTryOrFinish(ctxt, terminate);
12237
458k
    }
12238
459k
    if (ctxt->instate == XML_PARSER_EOF)
12239
4.92k
        return(ctxt->errNo);
12240
12241
454k
    if ((ctxt->input != NULL) &&
12242
454k
         (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12243
454k
         ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12244
454k
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12245
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12246
0
        xmlHaltParser(ctxt);
12247
0
    }
12248
454k
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12249
10.7k
        return(ctxt->errNo);
12250
12251
443k
    if (remain != 0) {
12252
144
        chunk += size;
12253
144
        size = remain;
12254
144
        remain = 0;
12255
144
        goto xmldecl_done;
12256
144
    }
12257
443k
    if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12258
443k
        (ctxt->input->buf != NULL)) {
12259
429
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12260
429
           ctxt->input);
12261
429
  size_t current = ctxt->input->cur - ctxt->input->base;
12262
12263
429
  xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12264
12265
429
  xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12266
429
            base, current);
12267
429
    }
12268
443k
    if (terminate) {
12269
  /*
12270
   * Check for termination
12271
   */
12272
3.39k
  int cur_avail = 0;
12273
12274
3.39k
  if (ctxt->input != NULL) {
12275
3.39k
      if (ctxt->input->buf == NULL)
12276
0
    cur_avail = ctxt->input->length -
12277
0
          (ctxt->input->cur - ctxt->input->base);
12278
3.39k
      else
12279
3.39k
    cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12280
3.39k
                    (ctxt->input->cur - ctxt->input->base);
12281
3.39k
  }
12282
12283
3.39k
  if ((ctxt->instate != XML_PARSER_EOF) &&
12284
3.39k
      (ctxt->instate != XML_PARSER_EPILOG)) {
12285
1.39k
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12286
1.39k
  }
12287
3.39k
  if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
12288
9
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12289
9
  }
12290
3.39k
  if (ctxt->instate != XML_PARSER_EOF) {
12291
3.39k
      if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12292
3.39k
    ctxt->sax->endDocument(ctxt->userData);
12293
3.39k
  }
12294
3.39k
  ctxt->instate = XML_PARSER_EOF;
12295
3.39k
    }
12296
443k
    if (ctxt->wellFormed == 0)
12297
63.6k
  return((xmlParserErrors) ctxt->errNo);
12298
379k
    else
12299
379k
        return(0);
12300
443k
}
12301
12302
/************************************************************************
12303
 *                  *
12304
 *    I/O front end functions to the parser     *
12305
 *                  *
12306
 ************************************************************************/
12307
12308
/**
12309
 * xmlCreatePushParserCtxt:
12310
 * @sax:  a SAX handler
12311
 * @user_data:  The user data returned on SAX callbacks
12312
 * @chunk:  a pointer to an array of chars
12313
 * @size:  number of chars in the array
12314
 * @filename:  an optional file name or URI
12315
 *
12316
 * Create a parser context for using the XML parser in push mode.
12317
 * If @buffer and @size are non-NULL, the data is used to detect
12318
 * the encoding.  The remaining characters will be parsed so they
12319
 * don't need to be fed in again through xmlParseChunk.
12320
 * To allow content encoding detection, @size should be >= 4
12321
 * The value of @filename is used for fetching external entities
12322
 * and error/warning reports.
12323
 *
12324
 * Returns the new parser context or NULL
12325
 */
12326
12327
xmlParserCtxtPtr
12328
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12329
20.2k
                        const char *chunk, int size, const char *filename) {
12330
20.2k
    xmlParserCtxtPtr ctxt;
12331
20.2k
    xmlParserInputPtr inputStream;
12332
20.2k
    xmlParserInputBufferPtr buf;
12333
20.2k
    xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12334
12335
    /*
12336
     * plug some encoding conversion routines
12337
     */
12338
20.2k
    if ((chunk != NULL) && (size >= 4))
12339
9.93k
  enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12340
12341
20.2k
    buf = xmlAllocParserInputBuffer(enc);
12342
20.2k
    if (buf == NULL) return(NULL);
12343
12344
20.2k
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12345
20.2k
    if (ctxt == NULL) {
12346
0
        xmlErrMemory(NULL, "creating parser: out of memory\n");
12347
0
  xmlFreeParserInputBuffer(buf);
12348
0
  return(NULL);
12349
0
    }
12350
20.2k
    ctxt->dictNames = 1;
12351
20.2k
    if (filename == NULL) {
12352
10.1k
  ctxt->directory = NULL;
12353
10.1k
    } else {
12354
10.1k
        ctxt->directory = xmlParserGetDirectory(filename);
12355
10.1k
    }
12356
12357
20.2k
    inputStream = xmlNewInputStream(ctxt);
12358
20.2k
    if (inputStream == NULL) {
12359
0
  xmlFreeParserCtxt(ctxt);
12360
0
  xmlFreeParserInputBuffer(buf);
12361
0
  return(NULL);
12362
0
    }
12363
12364
20.2k
    if (filename == NULL)
12365
10.1k
  inputStream->filename = NULL;
12366
10.1k
    else {
12367
10.1k
  inputStream->filename = (char *)
12368
10.1k
      xmlCanonicPath((const xmlChar *) filename);
12369
10.1k
  if (inputStream->filename == NULL) {
12370
0
            xmlFreeInputStream(inputStream);
12371
0
      xmlFreeParserCtxt(ctxt);
12372
0
      xmlFreeParserInputBuffer(buf);
12373
0
      return(NULL);
12374
0
  }
12375
10.1k
    }
12376
20.2k
    inputStream->buf = buf;
12377
20.2k
    xmlBufResetInput(inputStream->buf->buffer, inputStream);
12378
20.2k
    inputPush(ctxt, inputStream);
12379
12380
    /*
12381
     * If the caller didn't provide an initial 'chunk' for determining
12382
     * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12383
     * that it can be automatically determined later
12384
     */
12385
20.2k
    ctxt->charset = XML_CHAR_ENCODING_NONE;
12386
12387
20.2k
    if ((size != 0) && (chunk != NULL) &&
12388
20.2k
        (ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12389
9.93k
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12390
9.93k
  size_t cur = ctxt->input->cur - ctxt->input->base;
12391
12392
9.93k
  xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12393
12394
9.93k
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12395
#ifdef DEBUG_PUSH
12396
  xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12397
#endif
12398
9.93k
    }
12399
12400
20.2k
    if (enc != XML_CHAR_ENCODING_NONE) {
12401
5.53k
        xmlSwitchEncoding(ctxt, enc);
12402
5.53k
    }
12403
12404
20.2k
    return(ctxt);
12405
20.2k
}
12406
#endif /* LIBXML_PUSH_ENABLED */
12407
12408
/**
12409
 * xmlHaltParser:
12410
 * @ctxt:  an XML parser context
12411
 *
12412
 * Blocks further parser processing don't override error
12413
 * for internal use
12414
 */
12415
static void
12416
18.2k
xmlHaltParser(xmlParserCtxtPtr ctxt) {
12417
18.2k
    if (ctxt == NULL)
12418
0
        return;
12419
18.2k
    ctxt->instate = XML_PARSER_EOF;
12420
18.2k
    ctxt->disableSAX = 1;
12421
20.5k
    while (ctxt->inputNr > 1)
12422
2.33k
        xmlFreeInputStream(inputPop(ctxt));
12423
18.2k
    if (ctxt->input != NULL) {
12424
        /*
12425
   * in case there was a specific allocation deallocate before
12426
   * overriding base
12427
   */
12428
18.2k
        if (ctxt->input->free != NULL) {
12429
0
      ctxt->input->free((xmlChar *) ctxt->input->base);
12430
0
      ctxt->input->free = NULL;
12431
0
  }
12432
18.2k
        if (ctxt->input->buf != NULL) {
12433
16.1k
            xmlFreeParserInputBuffer(ctxt->input->buf);
12434
16.1k
            ctxt->input->buf = NULL;
12435
16.1k
        }
12436
18.2k
  ctxt->input->cur = BAD_CAST"";
12437
18.2k
        ctxt->input->length = 0;
12438
18.2k
  ctxt->input->base = ctxt->input->cur;
12439
18.2k
        ctxt->input->end = ctxt->input->cur;
12440
18.2k
    }
12441
18.2k
}
12442
12443
/**
12444
 * xmlStopParser:
12445
 * @ctxt:  an XML parser context
12446
 *
12447
 * Blocks further parser processing
12448
 */
12449
void
12450
10.1k
xmlStopParser(xmlParserCtxtPtr ctxt) {
12451
10.1k
    if (ctxt == NULL)
12452
0
        return;
12453
10.1k
    xmlHaltParser(ctxt);
12454
10.1k
    ctxt->errNo = XML_ERR_USER_STOP;
12455
10.1k
}
12456
12457
/**
12458
 * xmlCreateIOParserCtxt:
12459
 * @sax:  a SAX handler
12460
 * @user_data:  The user data returned on SAX callbacks
12461
 * @ioread:  an I/O read function
12462
 * @ioclose:  an I/O close function
12463
 * @ioctx:  an I/O handler
12464
 * @enc:  the charset encoding if known
12465
 *
12466
 * Create a parser context for using the XML parser with an existing
12467
 * I/O stream
12468
 *
12469
 * Returns the new parser context or NULL
12470
 */
12471
xmlParserCtxtPtr
12472
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12473
  xmlInputReadCallback   ioread, xmlInputCloseCallback  ioclose,
12474
0
  void *ioctx, xmlCharEncoding enc) {
12475
0
    xmlParserCtxtPtr ctxt;
12476
0
    xmlParserInputPtr inputStream;
12477
0
    xmlParserInputBufferPtr buf;
12478
12479
0
    if (ioread == NULL) return(NULL);
12480
12481
0
    buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12482
0
    if (buf == NULL) {
12483
0
        if (ioclose != NULL)
12484
0
            ioclose(ioctx);
12485
0
        return (NULL);
12486
0
    }
12487
12488
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12489
0
    if (ctxt == NULL) {
12490
0
  xmlFreeParserInputBuffer(buf);
12491
0
  return(NULL);
12492
0
    }
12493
12494
0
    inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12495
0
    if (inputStream == NULL) {
12496
0
  xmlFreeParserCtxt(ctxt);
12497
0
  return(NULL);
12498
0
    }
12499
0
    inputPush(ctxt, inputStream);
12500
12501
0
    return(ctxt);
12502
0
}
12503
12504
#ifdef LIBXML_VALID_ENABLED
12505
/************************************************************************
12506
 *                  *
12507
 *    Front ends when parsing a DTD       *
12508
 *                  *
12509
 ************************************************************************/
12510
12511
/**
12512
 * xmlIOParseDTD:
12513
 * @sax:  the SAX handler block or NULL
12514
 * @input:  an Input Buffer
12515
 * @enc:  the charset encoding if known
12516
 *
12517
 * Load and parse a DTD
12518
 *
12519
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12520
 * @input will be freed by the function in any case.
12521
 */
12522
12523
xmlDtdPtr
12524
xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12525
0
        xmlCharEncoding enc) {
12526
0
    xmlDtdPtr ret = NULL;
12527
0
    xmlParserCtxtPtr ctxt;
12528
0
    xmlParserInputPtr pinput = NULL;
12529
0
    xmlChar start[4];
12530
12531
0
    if (input == NULL)
12532
0
  return(NULL);
12533
12534
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12535
0
    if (ctxt == NULL) {
12536
0
        xmlFreeParserInputBuffer(input);
12537
0
  return(NULL);
12538
0
    }
12539
12540
    /* We are loading a DTD */
12541
0
    ctxt->options |= XML_PARSE_DTDLOAD;
12542
12543
0
    xmlDetectSAX2(ctxt);
12544
12545
    /*
12546
     * generate a parser input from the I/O handler
12547
     */
12548
12549
0
    pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12550
0
    if (pinput == NULL) {
12551
0
        xmlFreeParserInputBuffer(input);
12552
0
  xmlFreeParserCtxt(ctxt);
12553
0
  return(NULL);
12554
0
    }
12555
12556
    /*
12557
     * plug some encoding conversion routines here.
12558
     */
12559
0
    if (xmlPushInput(ctxt, pinput) < 0) {
12560
0
  xmlFreeParserCtxt(ctxt);
12561
0
  return(NULL);
12562
0
    }
12563
0
    if (enc != XML_CHAR_ENCODING_NONE) {
12564
0
        xmlSwitchEncoding(ctxt, enc);
12565
0
    }
12566
12567
0
    pinput->filename = NULL;
12568
0
    pinput->line = 1;
12569
0
    pinput->col = 1;
12570
0
    pinput->base = ctxt->input->cur;
12571
0
    pinput->cur = ctxt->input->cur;
12572
0
    pinput->free = NULL;
12573
12574
    /*
12575
     * let's parse that entity knowing it's an external subset.
12576
     */
12577
0
    ctxt->inSubset = 2;
12578
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12579
0
    if (ctxt->myDoc == NULL) {
12580
0
  xmlErrMemory(ctxt, "New Doc failed");
12581
0
  return(NULL);
12582
0
    }
12583
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12584
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12585
0
                                 BAD_CAST "none", BAD_CAST "none");
12586
12587
0
    if ((enc == XML_CHAR_ENCODING_NONE) &&
12588
0
        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12589
  /*
12590
   * Get the 4 first bytes and decode the charset
12591
   * if enc != XML_CHAR_ENCODING_NONE
12592
   * plug some encoding conversion routines.
12593
   */
12594
0
  start[0] = RAW;
12595
0
  start[1] = NXT(1);
12596
0
  start[2] = NXT(2);
12597
0
  start[3] = NXT(3);
12598
0
  enc = xmlDetectCharEncoding(start, 4);
12599
0
  if (enc != XML_CHAR_ENCODING_NONE) {
12600
0
      xmlSwitchEncoding(ctxt, enc);
12601
0
  }
12602
0
    }
12603
12604
0
    xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12605
12606
0
    if (ctxt->myDoc != NULL) {
12607
0
  if (ctxt->wellFormed) {
12608
0
      ret = ctxt->myDoc->extSubset;
12609
0
      ctxt->myDoc->extSubset = NULL;
12610
0
      if (ret != NULL) {
12611
0
    xmlNodePtr tmp;
12612
12613
0
    ret->doc = NULL;
12614
0
    tmp = ret->children;
12615
0
    while (tmp != NULL) {
12616
0
        tmp->doc = NULL;
12617
0
        tmp = tmp->next;
12618
0
    }
12619
0
      }
12620
0
  } else {
12621
0
      ret = NULL;
12622
0
  }
12623
0
        xmlFreeDoc(ctxt->myDoc);
12624
0
        ctxt->myDoc = NULL;
12625
0
    }
12626
0
    xmlFreeParserCtxt(ctxt);
12627
12628
0
    return(ret);
12629
0
}
12630
12631
/**
12632
 * xmlSAXParseDTD:
12633
 * @sax:  the SAX handler block
12634
 * @ExternalID:  a NAME* containing the External ID of the DTD
12635
 * @SystemID:  a NAME* containing the URL to the DTD
12636
 *
12637
 * DEPRECATED: Don't use.
12638
 *
12639
 * Load and parse an external subset.
12640
 *
12641
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12642
 */
12643
12644
xmlDtdPtr
12645
xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12646
0
                          const xmlChar *SystemID) {
12647
0
    xmlDtdPtr ret = NULL;
12648
0
    xmlParserCtxtPtr ctxt;
12649
0
    xmlParserInputPtr input = NULL;
12650
0
    xmlCharEncoding enc;
12651
0
    xmlChar* systemIdCanonic;
12652
12653
0
    if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12654
12655
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12656
0
    if (ctxt == NULL) {
12657
0
  return(NULL);
12658
0
    }
12659
12660
    /* We are loading a DTD */
12661
0
    ctxt->options |= XML_PARSE_DTDLOAD;
12662
12663
    /*
12664
     * Canonicalise the system ID
12665
     */
12666
0
    systemIdCanonic = xmlCanonicPath(SystemID);
12667
0
    if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12668
0
  xmlFreeParserCtxt(ctxt);
12669
0
  return(NULL);
12670
0
    }
12671
12672
    /*
12673
     * Ask the Entity resolver to load the damn thing
12674
     */
12675
12676
0
    if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12677
0
  input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12678
0
                                   systemIdCanonic);
12679
0
    if (input == NULL) {
12680
0
  xmlFreeParserCtxt(ctxt);
12681
0
  if (systemIdCanonic != NULL)
12682
0
      xmlFree(systemIdCanonic);
12683
0
  return(NULL);
12684
0
    }
12685
12686
    /*
12687
     * plug some encoding conversion routines here.
12688
     */
12689
0
    if (xmlPushInput(ctxt, input) < 0) {
12690
0
  xmlFreeParserCtxt(ctxt);
12691
0
  if (systemIdCanonic != NULL)
12692
0
      xmlFree(systemIdCanonic);
12693
0
  return(NULL);
12694
0
    }
12695
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12696
0
  enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12697
0
  xmlSwitchEncoding(ctxt, enc);
12698
0
    }
12699
12700
0
    if (input->filename == NULL)
12701
0
  input->filename = (char *) systemIdCanonic;
12702
0
    else
12703
0
  xmlFree(systemIdCanonic);
12704
0
    input->line = 1;
12705
0
    input->col = 1;
12706
0
    input->base = ctxt->input->cur;
12707
0
    input->cur = ctxt->input->cur;
12708
0
    input->free = NULL;
12709
12710
    /*
12711
     * let's parse that entity knowing it's an external subset.
12712
     */
12713
0
    ctxt->inSubset = 2;
12714
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12715
0
    if (ctxt->myDoc == NULL) {
12716
0
  xmlErrMemory(ctxt, "New Doc failed");
12717
0
  xmlFreeParserCtxt(ctxt);
12718
0
  return(NULL);
12719
0
    }
12720
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12721
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12722
0
                                 ExternalID, SystemID);
12723
0
    xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12724
12725
0
    if (ctxt->myDoc != NULL) {
12726
0
  if (ctxt->wellFormed) {
12727
0
      ret = ctxt->myDoc->extSubset;
12728
0
      ctxt->myDoc->extSubset = NULL;
12729
0
      if (ret != NULL) {
12730
0
    xmlNodePtr tmp;
12731
12732
0
    ret->doc = NULL;
12733
0
    tmp = ret->children;
12734
0
    while (tmp != NULL) {
12735
0
        tmp->doc = NULL;
12736
0
        tmp = tmp->next;
12737
0
    }
12738
0
      }
12739
0
  } else {
12740
0
      ret = NULL;
12741
0
  }
12742
0
        xmlFreeDoc(ctxt->myDoc);
12743
0
        ctxt->myDoc = NULL;
12744
0
    }
12745
0
    xmlFreeParserCtxt(ctxt);
12746
12747
0
    return(ret);
12748
0
}
12749
12750
12751
/**
12752
 * xmlParseDTD:
12753
 * @ExternalID:  a NAME* containing the External ID of the DTD
12754
 * @SystemID:  a NAME* containing the URL to the DTD
12755
 *
12756
 * Load and parse an external subset.
12757
 *
12758
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12759
 */
12760
12761
xmlDtdPtr
12762
0
xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12763
0
    return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12764
0
}
12765
#endif /* LIBXML_VALID_ENABLED */
12766
12767
/************************************************************************
12768
 *                  *
12769
 *    Front ends when parsing an Entity     *
12770
 *                  *
12771
 ************************************************************************/
12772
12773
/**
12774
 * xmlParseCtxtExternalEntity:
12775
 * @ctx:  the existing parsing context
12776
 * @URL:  the URL for the entity to load
12777
 * @ID:  the System ID for the entity to load
12778
 * @lst:  the return value for the set of parsed nodes
12779
 *
12780
 * Parse an external general entity within an existing parsing context
12781
 * An external general parsed entity is well-formed if it matches the
12782
 * production labeled extParsedEnt.
12783
 *
12784
 * [78] extParsedEnt ::= TextDecl? content
12785
 *
12786
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12787
 *    the parser error code otherwise
12788
 */
12789
12790
int
12791
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12792
0
                 const xmlChar *ID, xmlNodePtr *lst) {
12793
0
    void *userData;
12794
12795
0
    if (ctx == NULL) return(-1);
12796
    /*
12797
     * If the user provided their own SAX callbacks, then reuse the
12798
     * userData callback field, otherwise the expected setup in a
12799
     * DOM builder is to have userData == ctxt
12800
     */
12801
0
    if (ctx->userData == ctx)
12802
0
        userData = NULL;
12803
0
    else
12804
0
        userData = ctx->userData;
12805
0
    return xmlParseExternalEntityPrivate(ctx->myDoc, ctx, ctx->sax,
12806
0
                                         userData, ctx->depth + 1,
12807
0
                                         URL, ID, lst);
12808
0
}
12809
12810
/**
12811
 * xmlParseExternalEntityPrivate:
12812
 * @doc:  the document the chunk pertains to
12813
 * @oldctxt:  the previous parser context if available
12814
 * @sax:  the SAX handler block (possibly NULL)
12815
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12816
 * @depth:  Used for loop detection, use 0
12817
 * @URL:  the URL for the entity to load
12818
 * @ID:  the System ID for the entity to load
12819
 * @list:  the return value for the set of parsed nodes
12820
 *
12821
 * Private version of xmlParseExternalEntity()
12822
 *
12823
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12824
 *    the parser error code otherwise
12825
 */
12826
12827
static xmlParserErrors
12828
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12829
                xmlSAXHandlerPtr sax,
12830
          void *user_data, int depth, const xmlChar *URL,
12831
10.0k
          const xmlChar *ID, xmlNodePtr *list) {
12832
10.0k
    xmlParserCtxtPtr ctxt;
12833
10.0k
    xmlDocPtr newDoc;
12834
10.0k
    xmlNodePtr newRoot;
12835
10.0k
    xmlParserErrors ret = XML_ERR_OK;
12836
10.0k
    xmlChar start[4];
12837
10.0k
    xmlCharEncoding enc;
12838
12839
10.0k
    if (((depth > 40) &&
12840
10.0k
  ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12841
10.0k
  (depth > 100)) {
12842
0
  xmlFatalErrMsg(oldctxt, XML_ERR_ENTITY_LOOP,
12843
0
                       "Maximum entity nesting depth exceeded");
12844
0
        return(XML_ERR_ENTITY_LOOP);
12845
0
    }
12846
12847
10.0k
    if (list != NULL)
12848
1.76k
        *list = NULL;
12849
10.0k
    if ((URL == NULL) && (ID == NULL))
12850
9
  return(XML_ERR_INTERNAL_ERROR);
12851
10.0k
    if (doc == NULL)
12852
0
  return(XML_ERR_INTERNAL_ERROR);
12853
12854
10.0k
    ctxt = xmlCreateEntityParserCtxtInternal(sax, user_data, URL, ID, NULL,
12855
10.0k
                                             oldctxt);
12856
10.0k
    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
12857
1.46k
    if (oldctxt != NULL) {
12858
1.46k
        ctxt->nbErrors = oldctxt->nbErrors;
12859
1.46k
        ctxt->nbWarnings = oldctxt->nbWarnings;
12860
1.46k
    }
12861
1.46k
    xmlDetectSAX2(ctxt);
12862
12863
1.46k
    newDoc = xmlNewDoc(BAD_CAST "1.0");
12864
1.46k
    if (newDoc == NULL) {
12865
0
  xmlFreeParserCtxt(ctxt);
12866
0
  return(XML_ERR_INTERNAL_ERROR);
12867
0
    }
12868
1.46k
    newDoc->properties = XML_DOC_INTERNAL;
12869
1.46k
    if (doc) {
12870
1.46k
        newDoc->intSubset = doc->intSubset;
12871
1.46k
        newDoc->extSubset = doc->extSubset;
12872
1.46k
        if (doc->dict) {
12873
1.40k
            newDoc->dict = doc->dict;
12874
1.40k
            xmlDictReference(newDoc->dict);
12875
1.40k
        }
12876
1.46k
        if (doc->URL != NULL) {
12877
976
            newDoc->URL = xmlStrdup(doc->URL);
12878
976
        }
12879
1.46k
    }
12880
1.46k
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12881
1.46k
    if (newRoot == NULL) {
12882
0
  if (sax != NULL)
12883
0
  xmlFreeParserCtxt(ctxt);
12884
0
  newDoc->intSubset = NULL;
12885
0
  newDoc->extSubset = NULL;
12886
0
        xmlFreeDoc(newDoc);
12887
0
  return(XML_ERR_INTERNAL_ERROR);
12888
0
    }
12889
1.46k
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
12890
1.46k
    nodePush(ctxt, newDoc->children);
12891
1.46k
    if (doc == NULL) {
12892
0
        ctxt->myDoc = newDoc;
12893
1.46k
    } else {
12894
1.46k
        ctxt->myDoc = doc;
12895
1.46k
        newRoot->doc = doc;
12896
1.46k
    }
12897
12898
    /*
12899
     * Get the 4 first bytes and decode the charset
12900
     * if enc != XML_CHAR_ENCODING_NONE
12901
     * plug some encoding conversion routines.
12902
     */
12903
1.46k
    GROW;
12904
1.46k
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12905
1.44k
  start[0] = RAW;
12906
1.44k
  start[1] = NXT(1);
12907
1.44k
  start[2] = NXT(2);
12908
1.44k
  start[3] = NXT(3);
12909
1.44k
  enc = xmlDetectCharEncoding(start, 4);
12910
1.44k
  if (enc != XML_CHAR_ENCODING_NONE) {
12911
78
      xmlSwitchEncoding(ctxt, enc);
12912
78
  }
12913
1.44k
    }
12914
12915
    /*
12916
     * Parse a possible text declaration first
12917
     */
12918
1.46k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12919
51
  xmlParseTextDecl(ctxt);
12920
        /*
12921
         * An XML-1.0 document can't reference an entity not XML-1.0
12922
         */
12923
51
        if ((xmlStrEqual(oldctxt->version, BAD_CAST "1.0")) &&
12924
51
            (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12925
3
            xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12926
3
                           "Version mismatch between document and entity\n");
12927
3
        }
12928
51
    }
12929
12930
1.46k
    ctxt->instate = XML_PARSER_CONTENT;
12931
1.46k
    ctxt->depth = depth;
12932
1.46k
    if (oldctxt != NULL) {
12933
1.46k
  ctxt->_private = oldctxt->_private;
12934
1.46k
  ctxt->loadsubset = oldctxt->loadsubset;
12935
1.46k
  ctxt->validate = oldctxt->validate;
12936
1.46k
  ctxt->valid = oldctxt->valid;
12937
1.46k
  ctxt->replaceEntities = oldctxt->replaceEntities;
12938
1.46k
        if (oldctxt->validate) {
12939
52
            ctxt->vctxt.error = oldctxt->vctxt.error;
12940
52
            ctxt->vctxt.warning = oldctxt->vctxt.warning;
12941
52
            ctxt->vctxt.userData = oldctxt->vctxt.userData;
12942
52
            ctxt->vctxt.flags = oldctxt->vctxt.flags;
12943
52
        }
12944
1.46k
  ctxt->external = oldctxt->external;
12945
1.46k
        if (ctxt->dict) xmlDictFree(ctxt->dict);
12946
1.46k
        ctxt->dict = oldctxt->dict;
12947
1.46k
        ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12948
1.46k
        ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12949
1.46k
        ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12950
1.46k
        ctxt->dictNames = oldctxt->dictNames;
12951
1.46k
        ctxt->attsDefault = oldctxt->attsDefault;
12952
1.46k
        ctxt->attsSpecial = oldctxt->attsSpecial;
12953
1.46k
        ctxt->linenumbers = oldctxt->linenumbers;
12954
1.46k
  ctxt->record_info = oldctxt->record_info;
12955
1.46k
  ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
12956
1.46k
  ctxt->node_seq.length = oldctxt->node_seq.length;
12957
1.46k
  ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
12958
1.46k
    } else {
12959
  /*
12960
   * Doing validity checking on chunk without context
12961
   * doesn't make sense
12962
   */
12963
0
  ctxt->_private = NULL;
12964
0
  ctxt->validate = 0;
12965
0
  ctxt->external = 2;
12966
0
  ctxt->loadsubset = 0;
12967
0
    }
12968
12969
1.46k
    xmlParseContent(ctxt);
12970
12971
1.46k
    if ((RAW == '<') && (NXT(1) == '/')) {
12972
18
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12973
1.44k
    } else if (RAW != 0) {
12974
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12975
0
    }
12976
1.46k
    if (ctxt->node != newDoc->children) {
12977
115
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12978
115
    }
12979
12980
1.46k
    if (!ctxt->wellFormed) {
12981
1.14k
  ret = (xmlParserErrors)ctxt->errNo;
12982
1.14k
        if (oldctxt != NULL) {
12983
1.14k
            oldctxt->errNo = ctxt->errNo;
12984
1.14k
            oldctxt->wellFormed = 0;
12985
1.14k
            xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12986
1.14k
        }
12987
1.14k
    } else {
12988
324
  if (list != NULL) {
12989
315
      xmlNodePtr cur;
12990
12991
      /*
12992
       * Return the newly created nodeset after unlinking it from
12993
       * they pseudo parent.
12994
       */
12995
315
      cur = newDoc->children->children;
12996
315
      *list = cur;
12997
95.7k
      while (cur != NULL) {
12998
95.4k
    cur->parent = NULL;
12999
95.4k
    cur = cur->next;
13000
95.4k
      }
13001
315
            newDoc->children->children = NULL;
13002
315
  }
13003
324
  ret = XML_ERR_OK;
13004
324
    }
13005
13006
    /*
13007
     * Also record the size of the entity parsed
13008
     */
13009
1.46k
    if (ctxt->input != NULL && oldctxt != NULL) {
13010
1.46k
        unsigned long consumed = ctxt->input->consumed;
13011
13012
1.46k
        xmlSaturatedAddSizeT(&consumed, ctxt->input->cur - ctxt->input->base);
13013
13014
1.46k
        xmlSaturatedAdd(&oldctxt->sizeentities, consumed);
13015
1.46k
        xmlSaturatedAdd(&oldctxt->sizeentities, ctxt->sizeentities);
13016
13017
1.46k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, consumed);
13018
1.46k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, ctxt->sizeentcopy);
13019
1.46k
    }
13020
13021
1.46k
    if (oldctxt != NULL) {
13022
1.46k
        ctxt->dict = NULL;
13023
1.46k
        ctxt->attsDefault = NULL;
13024
1.46k
        ctxt->attsSpecial = NULL;
13025
1.46k
        oldctxt->nbErrors = ctxt->nbErrors;
13026
1.46k
        oldctxt->nbWarnings = ctxt->nbWarnings;
13027
1.46k
        oldctxt->validate = ctxt->validate;
13028
1.46k
        oldctxt->valid = ctxt->valid;
13029
1.46k
        oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13030
1.46k
        oldctxt->node_seq.length = ctxt->node_seq.length;
13031
1.46k
        oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13032
1.46k
    }
13033
1.46k
    ctxt->node_seq.maximum = 0;
13034
1.46k
    ctxt->node_seq.length = 0;
13035
1.46k
    ctxt->node_seq.buffer = NULL;
13036
1.46k
    xmlFreeParserCtxt(ctxt);
13037
1.46k
    newDoc->intSubset = NULL;
13038
1.46k
    newDoc->extSubset = NULL;
13039
1.46k
    xmlFreeDoc(newDoc);
13040
13041
1.46k
    return(ret);
13042
1.46k
}
13043
13044
#ifdef LIBXML_SAX1_ENABLED
13045
/**
13046
 * xmlParseExternalEntity:
13047
 * @doc:  the document the chunk pertains to
13048
 * @sax:  the SAX handler block (possibly NULL)
13049
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13050
 * @depth:  Used for loop detection, use 0
13051
 * @URL:  the URL for the entity to load
13052
 * @ID:  the System ID for the entity to load
13053
 * @lst:  the return value for the set of parsed nodes
13054
 *
13055
 * Parse an external general entity
13056
 * An external general parsed entity is well-formed if it matches the
13057
 * production labeled extParsedEnt.
13058
 *
13059
 * [78] extParsedEnt ::= TextDecl? content
13060
 *
13061
 * Returns 0 if the entity is well formed, -1 in case of args problem and
13062
 *    the parser error code otherwise
13063
 */
13064
13065
int
13066
xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
13067
0
    int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
13068
0
    return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
13069
0
                           ID, lst));
13070
0
}
13071
13072
/**
13073
 * xmlParseBalancedChunkMemory:
13074
 * @doc:  the document the chunk pertains to (must not be NULL)
13075
 * @sax:  the SAX handler block (possibly NULL)
13076
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13077
 * @depth:  Used for loop detection, use 0
13078
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13079
 * @lst:  the return value for the set of parsed nodes
13080
 *
13081
 * Parse a well-balanced chunk of an XML document
13082
 * called by the parser
13083
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13084
 * the content production in the XML grammar:
13085
 *
13086
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13087
 *
13088
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13089
 *    the parser error code otherwise
13090
 */
13091
13092
int
13093
xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13094
0
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
13095
0
    return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13096
0
                                                depth, string, lst, 0 );
13097
0
}
13098
#endif /* LIBXML_SAX1_ENABLED */
13099
13100
/**
13101
 * xmlParseBalancedChunkMemoryInternal:
13102
 * @oldctxt:  the existing parsing context
13103
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13104
 * @user_data:  the user data field for the parser context
13105
 * @lst:  the return value for the set of parsed nodes
13106
 *
13107
 *
13108
 * Parse a well-balanced chunk of an XML document
13109
 * called by the parser
13110
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13111
 * the content production in the XML grammar:
13112
 *
13113
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13114
 *
13115
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13116
 * error code otherwise
13117
 *
13118
 * In case recover is set to 1, the nodelist will not be empty even if
13119
 * the parsed chunk is not well balanced.
13120
 */
13121
static xmlParserErrors
13122
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13123
9.41k
  const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13124
9.41k
    xmlParserCtxtPtr ctxt;
13125
9.41k
    xmlDocPtr newDoc = NULL;
13126
9.41k
    xmlNodePtr newRoot;
13127
9.41k
    xmlSAXHandlerPtr oldsax = NULL;
13128
9.41k
    xmlNodePtr content = NULL;
13129
9.41k
    xmlNodePtr last = NULL;
13130
9.41k
    int size;
13131
9.41k
    xmlParserErrors ret = XML_ERR_OK;
13132
9.41k
#ifdef SAX2
13133
9.41k
    int i;
13134
9.41k
#endif
13135
13136
9.41k
    if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13137
9.41k
        (oldctxt->depth >  100)) {
13138
6
  xmlFatalErrMsg(oldctxt, XML_ERR_ENTITY_LOOP,
13139
6
                       "Maximum entity nesting depth exceeded");
13140
6
  return(XML_ERR_ENTITY_LOOP);
13141
6
    }
13142
13143
13144
9.41k
    if (lst != NULL)
13145
9.41k
        *lst = NULL;
13146
9.41k
    if (string == NULL)
13147
0
        return(XML_ERR_INTERNAL_ERROR);
13148
13149
9.41k
    size = xmlStrlen(string);
13150
13151
9.41k
    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13152
9.41k
    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13153
9.41k
    ctxt->nbErrors = oldctxt->nbErrors;
13154
9.41k
    ctxt->nbWarnings = oldctxt->nbWarnings;
13155
9.41k
    if (user_data != NULL)
13156
0
  ctxt->userData = user_data;
13157
9.41k
    else
13158
9.41k
  ctxt->userData = ctxt;
13159
9.41k
    if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13160
9.41k
    ctxt->dict = oldctxt->dict;
13161
9.41k
    ctxt->input_id = oldctxt->input_id;
13162
9.41k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13163
9.41k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13164
9.41k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13165
13166
9.41k
#ifdef SAX2
13167
    /* propagate namespaces down the entity */
13168
9.41k
    for (i = 0;i < oldctxt->nsNr;i += 2) {
13169
0
        nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13170
0
    }
13171
9.41k
#endif
13172
13173
9.41k
    oldsax = ctxt->sax;
13174
9.41k
    ctxt->sax = oldctxt->sax;
13175
9.41k
    xmlDetectSAX2(ctxt);
13176
9.41k
    ctxt->replaceEntities = oldctxt->replaceEntities;
13177
9.41k
    ctxt->options = oldctxt->options;
13178
13179
9.41k
    ctxt->_private = oldctxt->_private;
13180
9.41k
    if (oldctxt->myDoc == NULL) {
13181
0
  newDoc = xmlNewDoc(BAD_CAST "1.0");
13182
0
  if (newDoc == NULL) {
13183
0
      ctxt->sax = oldsax;
13184
0
      ctxt->dict = NULL;
13185
0
      xmlFreeParserCtxt(ctxt);
13186
0
      return(XML_ERR_INTERNAL_ERROR);
13187
0
  }
13188
0
  newDoc->properties = XML_DOC_INTERNAL;
13189
0
  newDoc->dict = ctxt->dict;
13190
0
  xmlDictReference(newDoc->dict);
13191
0
  ctxt->myDoc = newDoc;
13192
9.41k
    } else {
13193
9.41k
  ctxt->myDoc = oldctxt->myDoc;
13194
9.41k
        content = ctxt->myDoc->children;
13195
9.41k
  last = ctxt->myDoc->last;
13196
9.41k
    }
13197
9.41k
    newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13198
9.41k
    if (newRoot == NULL) {
13199
0
  ctxt->sax = oldsax;
13200
0
  ctxt->dict = NULL;
13201
0
  xmlFreeParserCtxt(ctxt);
13202
0
  if (newDoc != NULL) {
13203
0
      xmlFreeDoc(newDoc);
13204
0
  }
13205
0
  return(XML_ERR_INTERNAL_ERROR);
13206
0
    }
13207
9.41k
    ctxt->myDoc->children = NULL;
13208
9.41k
    ctxt->myDoc->last = NULL;
13209
9.41k
    xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13210
9.41k
    nodePush(ctxt, ctxt->myDoc->children);
13211
9.41k
    ctxt->instate = XML_PARSER_CONTENT;
13212
9.41k
    ctxt->depth = oldctxt->depth;
13213
13214
9.41k
    ctxt->validate = 0;
13215
9.41k
    ctxt->loadsubset = oldctxt->loadsubset;
13216
9.41k
    if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13217
  /*
13218
   * ID/IDREF registration will be done in xmlValidateElement below
13219
   */
13220
8.73k
  ctxt->loadsubset |= XML_SKIP_IDS;
13221
8.73k
    }
13222
9.41k
    ctxt->dictNames = oldctxt->dictNames;
13223
9.41k
    ctxt->attsDefault = oldctxt->attsDefault;
13224
9.41k
    ctxt->attsSpecial = oldctxt->attsSpecial;
13225
13226
9.41k
    xmlParseContent(ctxt);
13227
9.41k
    if ((RAW == '<') && (NXT(1) == '/')) {
13228
15
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13229
9.39k
    } else if (RAW != 0) {
13230
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13231
0
    }
13232
9.41k
    if (ctxt->node != ctxt->myDoc->children) {
13233
30
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13234
30
    }
13235
13236
9.41k
    if (!ctxt->wellFormed) {
13237
519
  ret = (xmlParserErrors)ctxt->errNo;
13238
519
        oldctxt->errNo = ctxt->errNo;
13239
519
        oldctxt->wellFormed = 0;
13240
519
        xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13241
8.89k
    } else {
13242
8.89k
        ret = XML_ERR_OK;
13243
8.89k
    }
13244
13245
9.41k
    if ((lst != NULL) && (ret == XML_ERR_OK)) {
13246
8.89k
  xmlNodePtr cur;
13247
13248
  /*
13249
   * Return the newly created nodeset after unlinking it from
13250
   * they pseudo parent.
13251
   */
13252
8.89k
  cur = ctxt->myDoc->children->children;
13253
8.89k
  *lst = cur;
13254
21.2k
  while (cur != NULL) {
13255
12.3k
#ifdef LIBXML_VALID_ENABLED
13256
12.3k
      if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13257
12.3k
    (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13258
12.3k
    (cur->type == XML_ELEMENT_NODE)) {
13259
3
    oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13260
3
      oldctxt->myDoc, cur);
13261
3
      }
13262
12.3k
#endif /* LIBXML_VALID_ENABLED */
13263
12.3k
      cur->parent = NULL;
13264
12.3k
      cur = cur->next;
13265
12.3k
  }
13266
8.89k
  ctxt->myDoc->children->children = NULL;
13267
8.89k
    }
13268
9.41k
    if (ctxt->myDoc != NULL) {
13269
9.41k
  xmlFreeNode(ctxt->myDoc->children);
13270
9.41k
        ctxt->myDoc->children = content;
13271
9.41k
        ctxt->myDoc->last = last;
13272
9.41k
    }
13273
13274
    /*
13275
     * Also record the size of the entity parsed
13276
     */
13277
9.41k
    if (ctxt->input != NULL && oldctxt != NULL) {
13278
9.41k
        unsigned long consumed = ctxt->input->consumed;
13279
13280
9.41k
        xmlSaturatedAddSizeT(&consumed, ctxt->input->cur - ctxt->input->base);
13281
13282
9.41k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, consumed);
13283
9.41k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, ctxt->sizeentcopy);
13284
9.41k
    }
13285
13286
9.41k
    oldctxt->nbErrors = ctxt->nbErrors;
13287
9.41k
    oldctxt->nbWarnings = ctxt->nbWarnings;
13288
9.41k
    ctxt->sax = oldsax;
13289
9.41k
    ctxt->dict = NULL;
13290
9.41k
    ctxt->attsDefault = NULL;
13291
9.41k
    ctxt->attsSpecial = NULL;
13292
9.41k
    xmlFreeParserCtxt(ctxt);
13293
9.41k
    if (newDoc != NULL) {
13294
0
  xmlFreeDoc(newDoc);
13295
0
    }
13296
13297
9.41k
    return(ret);
13298
9.41k
}
13299
13300
/**
13301
 * xmlParseInNodeContext:
13302
 * @node:  the context node
13303
 * @data:  the input string
13304
 * @datalen:  the input string length in bytes
13305
 * @options:  a combination of xmlParserOption
13306
 * @lst:  the return value for the set of parsed nodes
13307
 *
13308
 * Parse a well-balanced chunk of an XML document
13309
 * within the context (DTD, namespaces, etc ...) of the given node.
13310
 *
13311
 * The allowed sequence for the data is a Well Balanced Chunk defined by
13312
 * the content production in the XML grammar:
13313
 *
13314
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13315
 *
13316
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13317
 * error code otherwise
13318
 */
13319
xmlParserErrors
13320
xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13321
0
                      int options, xmlNodePtr *lst) {
13322
0
#ifdef SAX2
13323
0
    xmlParserCtxtPtr ctxt;
13324
0
    xmlDocPtr doc = NULL;
13325
0
    xmlNodePtr fake, cur;
13326
0
    int nsnr = 0;
13327
13328
0
    xmlParserErrors ret = XML_ERR_OK;
13329
13330
    /*
13331
     * check all input parameters, grab the document
13332
     */
13333
0
    if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13334
0
        return(XML_ERR_INTERNAL_ERROR);
13335
0
    switch (node->type) {
13336
0
        case XML_ELEMENT_NODE:
13337
0
        case XML_ATTRIBUTE_NODE:
13338
0
        case XML_TEXT_NODE:
13339
0
        case XML_CDATA_SECTION_NODE:
13340
0
        case XML_ENTITY_REF_NODE:
13341
0
        case XML_PI_NODE:
13342
0
        case XML_COMMENT_NODE:
13343
0
        case XML_DOCUMENT_NODE:
13344
0
        case XML_HTML_DOCUMENT_NODE:
13345
0
      break;
13346
0
  default:
13347
0
      return(XML_ERR_INTERNAL_ERROR);
13348
13349
0
    }
13350
0
    while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13351
0
           (node->type != XML_DOCUMENT_NODE) &&
13352
0
     (node->type != XML_HTML_DOCUMENT_NODE))
13353
0
  node = node->parent;
13354
0
    if (node == NULL)
13355
0
  return(XML_ERR_INTERNAL_ERROR);
13356
0
    if (node->type == XML_ELEMENT_NODE)
13357
0
  doc = node->doc;
13358
0
    else
13359
0
        doc = (xmlDocPtr) node;
13360
0
    if (doc == NULL)
13361
0
  return(XML_ERR_INTERNAL_ERROR);
13362
13363
    /*
13364
     * allocate a context and set-up everything not related to the
13365
     * node position in the tree
13366
     */
13367
0
    if (doc->type == XML_DOCUMENT_NODE)
13368
0
  ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13369
0
#ifdef LIBXML_HTML_ENABLED
13370
0
    else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13371
0
  ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13372
        /*
13373
         * When parsing in context, it makes no sense to add implied
13374
         * elements like html/body/etc...
13375
         */
13376
0
        options |= HTML_PARSE_NOIMPLIED;
13377
0
    }
13378
0
#endif
13379
0
    else
13380
0
        return(XML_ERR_INTERNAL_ERROR);
13381
13382
0
    if (ctxt == NULL)
13383
0
        return(XML_ERR_NO_MEMORY);
13384
13385
    /*
13386
     * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13387
     * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13388
     * we must wait until the last moment to free the original one.
13389
     */
13390
0
    if (doc->dict != NULL) {
13391
0
        if (ctxt->dict != NULL)
13392
0
      xmlDictFree(ctxt->dict);
13393
0
  ctxt->dict = doc->dict;
13394
0
    } else
13395
0
        options |= XML_PARSE_NODICT;
13396
13397
0
    if (doc->encoding != NULL) {
13398
0
        xmlCharEncodingHandlerPtr hdlr;
13399
13400
0
        if (ctxt->encoding != NULL)
13401
0
      xmlFree((xmlChar *) ctxt->encoding);
13402
0
        ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13403
13404
0
        hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
13405
0
        if (hdlr != NULL) {
13406
0
            xmlSwitchToEncoding(ctxt, hdlr);
13407
0
  } else {
13408
0
            return(XML_ERR_UNSUPPORTED_ENCODING);
13409
0
        }
13410
0
    }
13411
13412
0
    xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13413
0
    xmlDetectSAX2(ctxt);
13414
0
    ctxt->myDoc = doc;
13415
    /* parsing in context, i.e. as within existing content */
13416
0
    ctxt->input_id = 2;
13417
0
    ctxt->instate = XML_PARSER_CONTENT;
13418
13419
0
    fake = xmlNewDocComment(node->doc, NULL);
13420
0
    if (fake == NULL) {
13421
0
        xmlFreeParserCtxt(ctxt);
13422
0
  return(XML_ERR_NO_MEMORY);
13423
0
    }
13424
0
    xmlAddChild(node, fake);
13425
13426
0
    if (node->type == XML_ELEMENT_NODE) {
13427
0
  nodePush(ctxt, node);
13428
  /*
13429
   * initialize the SAX2 namespaces stack
13430
   */
13431
0
  cur = node;
13432
0
  while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13433
0
      xmlNsPtr ns = cur->nsDef;
13434
0
      const xmlChar *iprefix, *ihref;
13435
13436
0
      while (ns != NULL) {
13437
0
    if (ctxt->dict) {
13438
0
        iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13439
0
        ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13440
0
    } else {
13441
0
        iprefix = ns->prefix;
13442
0
        ihref = ns->href;
13443
0
    }
13444
13445
0
          if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13446
0
        nsPush(ctxt, iprefix, ihref);
13447
0
        nsnr++;
13448
0
    }
13449
0
    ns = ns->next;
13450
0
      }
13451
0
      cur = cur->parent;
13452
0
  }
13453
0
    }
13454
13455
0
    if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13456
  /*
13457
   * ID/IDREF registration will be done in xmlValidateElement below
13458
   */
13459
0
  ctxt->loadsubset |= XML_SKIP_IDS;
13460
0
    }
13461
13462
0
#ifdef LIBXML_HTML_ENABLED
13463
0
    if (doc->type == XML_HTML_DOCUMENT_NODE)
13464
0
        __htmlParseContent(ctxt);
13465
0
    else
13466
0
#endif
13467
0
  xmlParseContent(ctxt);
13468
13469
0
    nsPop(ctxt, nsnr);
13470
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13471
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13472
0
    } else if (RAW != 0) {
13473
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13474
0
    }
13475
0
    if ((ctxt->node != NULL) && (ctxt->node != node)) {
13476
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13477
0
  ctxt->wellFormed = 0;
13478
0
    }
13479
13480
0
    if (!ctxt->wellFormed) {
13481
0
        if (ctxt->errNo == 0)
13482
0
      ret = XML_ERR_INTERNAL_ERROR;
13483
0
  else
13484
0
      ret = (xmlParserErrors)ctxt->errNo;
13485
0
    } else {
13486
0
        ret = XML_ERR_OK;
13487
0
    }
13488
13489
    /*
13490
     * Return the newly created nodeset after unlinking it from
13491
     * the pseudo sibling.
13492
     */
13493
13494
0
    cur = fake->next;
13495
0
    fake->next = NULL;
13496
0
    node->last = fake;
13497
13498
0
    if (cur != NULL) {
13499
0
  cur->prev = NULL;
13500
0
    }
13501
13502
0
    *lst = cur;
13503
13504
0
    while (cur != NULL) {
13505
0
  cur->parent = NULL;
13506
0
  cur = cur->next;
13507
0
    }
13508
13509
0
    xmlUnlinkNode(fake);
13510
0
    xmlFreeNode(fake);
13511
13512
13513
0
    if (ret != XML_ERR_OK) {
13514
0
        xmlFreeNodeList(*lst);
13515
0
  *lst = NULL;
13516
0
    }
13517
13518
0
    if (doc->dict != NULL)
13519
0
        ctxt->dict = NULL;
13520
0
    xmlFreeParserCtxt(ctxt);
13521
13522
0
    return(ret);
13523
#else /* !SAX2 */
13524
    return(XML_ERR_INTERNAL_ERROR);
13525
#endif
13526
0
}
13527
13528
#ifdef LIBXML_SAX1_ENABLED
13529
/**
13530
 * xmlParseBalancedChunkMemoryRecover:
13531
 * @doc:  the document the chunk pertains to (must not be NULL)
13532
 * @sax:  the SAX handler block (possibly NULL)
13533
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13534
 * @depth:  Used for loop detection, use 0
13535
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13536
 * @lst:  the return value for the set of parsed nodes
13537
 * @recover: return nodes even if the data is broken (use 0)
13538
 *
13539
 *
13540
 * Parse a well-balanced chunk of an XML document
13541
 * called by the parser
13542
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13543
 * the content production in the XML grammar:
13544
 *
13545
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13546
 *
13547
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13548
 *    the parser error code otherwise
13549
 *
13550
 * In case recover is set to 1, the nodelist will not be empty even if
13551
 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13552
 * some extent.
13553
 */
13554
int
13555
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13556
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13557
0
     int recover) {
13558
0
    xmlParserCtxtPtr ctxt;
13559
0
    xmlDocPtr newDoc;
13560
0
    xmlSAXHandlerPtr oldsax = NULL;
13561
0
    xmlNodePtr content, newRoot;
13562
0
    int size;
13563
0
    int ret = 0;
13564
13565
0
    if (depth > 40) {
13566
0
  return(XML_ERR_ENTITY_LOOP);
13567
0
    }
13568
13569
13570
0
    if (lst != NULL)
13571
0
        *lst = NULL;
13572
0
    if (string == NULL)
13573
0
        return(-1);
13574
13575
0
    size = xmlStrlen(string);
13576
13577
0
    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13578
0
    if (ctxt == NULL) return(-1);
13579
0
    ctxt->userData = ctxt;
13580
0
    if (sax != NULL) {
13581
0
  oldsax = ctxt->sax;
13582
0
        ctxt->sax = sax;
13583
0
  if (user_data != NULL)
13584
0
      ctxt->userData = user_data;
13585
0
    }
13586
0
    newDoc = xmlNewDoc(BAD_CAST "1.0");
13587
0
    if (newDoc == NULL) {
13588
0
  xmlFreeParserCtxt(ctxt);
13589
0
  return(-1);
13590
0
    }
13591
0
    newDoc->properties = XML_DOC_INTERNAL;
13592
0
    if ((doc != NULL) && (doc->dict != NULL)) {
13593
0
        xmlDictFree(ctxt->dict);
13594
0
  ctxt->dict = doc->dict;
13595
0
  xmlDictReference(ctxt->dict);
13596
0
  ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13597
0
  ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13598
0
  ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13599
0
  ctxt->dictNames = 1;
13600
0
    } else {
13601
0
  xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13602
0
    }
13603
    /* doc == NULL is only supported for historic reasons */
13604
0
    if (doc != NULL) {
13605
0
  newDoc->intSubset = doc->intSubset;
13606
0
  newDoc->extSubset = doc->extSubset;
13607
0
    }
13608
0
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13609
0
    if (newRoot == NULL) {
13610
0
  if (sax != NULL)
13611
0
      ctxt->sax = oldsax;
13612
0
  xmlFreeParserCtxt(ctxt);
13613
0
  newDoc->intSubset = NULL;
13614
0
  newDoc->extSubset = NULL;
13615
0
        xmlFreeDoc(newDoc);
13616
0
  return(-1);
13617
0
    }
13618
0
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
13619
0
    nodePush(ctxt, newRoot);
13620
    /* doc == NULL is only supported for historic reasons */
13621
0
    if (doc == NULL) {
13622
0
  ctxt->myDoc = newDoc;
13623
0
    } else {
13624
0
  ctxt->myDoc = newDoc;
13625
0
  newDoc->children->doc = doc;
13626
  /* Ensure that doc has XML spec namespace */
13627
0
  xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13628
0
  newDoc->oldNs = doc->oldNs;
13629
0
    }
13630
0
    ctxt->instate = XML_PARSER_CONTENT;
13631
0
    ctxt->input_id = 2;
13632
0
    ctxt->depth = depth;
13633
13634
    /*
13635
     * Doing validity checking on chunk doesn't make sense
13636
     */
13637
0
    ctxt->validate = 0;
13638
0
    ctxt->loadsubset = 0;
13639
0
    xmlDetectSAX2(ctxt);
13640
13641
0
    if ( doc != NULL ){
13642
0
        content = doc->children;
13643
0
        doc->children = NULL;
13644
0
        xmlParseContent(ctxt);
13645
0
        doc->children = content;
13646
0
    }
13647
0
    else {
13648
0
        xmlParseContent(ctxt);
13649
0
    }
13650
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13651
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13652
0
    } else if (RAW != 0) {
13653
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13654
0
    }
13655
0
    if (ctxt->node != newDoc->children) {
13656
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13657
0
    }
13658
13659
0
    if (!ctxt->wellFormed) {
13660
0
        if (ctxt->errNo == 0)
13661
0
      ret = 1;
13662
0
  else
13663
0
      ret = ctxt->errNo;
13664
0
    } else {
13665
0
      ret = 0;
13666
0
    }
13667
13668
0
    if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13669
0
  xmlNodePtr cur;
13670
13671
  /*
13672
   * Return the newly created nodeset after unlinking it from
13673
   * they pseudo parent.
13674
   */
13675
0
  cur = newDoc->children->children;
13676
0
  *lst = cur;
13677
0
  while (cur != NULL) {
13678
0
      xmlSetTreeDoc(cur, doc);
13679
0
      cur->parent = NULL;
13680
0
      cur = cur->next;
13681
0
  }
13682
0
  newDoc->children->children = NULL;
13683
0
    }
13684
13685
0
    if (sax != NULL)
13686
0
  ctxt->sax = oldsax;
13687
0
    xmlFreeParserCtxt(ctxt);
13688
0
    newDoc->intSubset = NULL;
13689
0
    newDoc->extSubset = NULL;
13690
    /* This leaks the namespace list if doc == NULL */
13691
0
    newDoc->oldNs = NULL;
13692
0
    xmlFreeDoc(newDoc);
13693
13694
0
    return(ret);
13695
0
}
13696
13697
/**
13698
 * xmlSAXParseEntity:
13699
 * @sax:  the SAX handler block
13700
 * @filename:  the filename
13701
 *
13702
 * DEPRECATED: Don't use.
13703
 *
13704
 * parse an XML external entity out of context and build a tree.
13705
 * It use the given SAX function block to handle the parsing callback.
13706
 * If sax is NULL, fallback to the default DOM tree building routines.
13707
 *
13708
 * [78] extParsedEnt ::= TextDecl? content
13709
 *
13710
 * This correspond to a "Well Balanced" chunk
13711
 *
13712
 * Returns the resulting document tree
13713
 */
13714
13715
xmlDocPtr
13716
0
xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13717
0
    xmlDocPtr ret;
13718
0
    xmlParserCtxtPtr ctxt;
13719
13720
0
    ctxt = xmlCreateFileParserCtxt(filename);
13721
0
    if (ctxt == NULL) {
13722
0
  return(NULL);
13723
0
    }
13724
0
    if (sax != NULL) {
13725
0
  if (ctxt->sax != NULL)
13726
0
      xmlFree(ctxt->sax);
13727
0
        ctxt->sax = sax;
13728
0
        ctxt->userData = NULL;
13729
0
    }
13730
13731
0
    xmlParseExtParsedEnt(ctxt);
13732
13733
0
    if (ctxt->wellFormed)
13734
0
  ret = ctxt->myDoc;
13735
0
    else {
13736
0
        ret = NULL;
13737
0
        xmlFreeDoc(ctxt->myDoc);
13738
0
        ctxt->myDoc = NULL;
13739
0
    }
13740
0
    if (sax != NULL)
13741
0
        ctxt->sax = NULL;
13742
0
    xmlFreeParserCtxt(ctxt);
13743
13744
0
    return(ret);
13745
0
}
13746
13747
/**
13748
 * xmlParseEntity:
13749
 * @filename:  the filename
13750
 *
13751
 * parse an XML external entity out of context and build a tree.
13752
 *
13753
 * [78] extParsedEnt ::= TextDecl? content
13754
 *
13755
 * This correspond to a "Well Balanced" chunk
13756
 *
13757
 * Returns the resulting document tree
13758
 */
13759
13760
xmlDocPtr
13761
0
xmlParseEntity(const char *filename) {
13762
0
    return(xmlSAXParseEntity(NULL, filename));
13763
0
}
13764
#endif /* LIBXML_SAX1_ENABLED */
13765
13766
/**
13767
 * xmlCreateEntityParserCtxtInternal:
13768
 * @URL:  the entity URL
13769
 * @ID:  the entity PUBLIC ID
13770
 * @base:  a possible base for the target URI
13771
 * @pctx:  parser context used to set options on new context
13772
 *
13773
 * Create a parser context for an external entity
13774
 * Automatic support for ZLIB/Compress compressed document is provided
13775
 * by default if found at compile-time.
13776
 *
13777
 * Returns the new parser context or NULL
13778
 */
13779
static xmlParserCtxtPtr
13780
xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
13781
        const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
13782
10.0k
        xmlParserCtxtPtr pctx) {
13783
10.0k
    xmlParserCtxtPtr ctxt;
13784
10.0k
    xmlParserInputPtr inputStream;
13785
10.0k
    char *directory = NULL;
13786
10.0k
    xmlChar *uri;
13787
13788
10.0k
    ctxt = xmlNewSAXParserCtxt(sax, userData);
13789
10.0k
    if (ctxt == NULL) {
13790
0
  return(NULL);
13791
0
    }
13792
13793
10.0k
    if (pctx != NULL) {
13794
10.0k
        ctxt->options = pctx->options;
13795
10.0k
        ctxt->_private = pctx->_private;
13796
10.0k
  ctxt->input_id = pctx->input_id;
13797
10.0k
    }
13798
13799
    /* Don't read from stdin. */
13800
10.0k
    if (xmlStrcmp(URL, BAD_CAST "-") == 0)
13801
0
        URL = BAD_CAST "./-";
13802
13803
10.0k
    uri = xmlBuildURI(URL, base);
13804
13805
10.0k
    if (uri == NULL) {
13806
6
  inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13807
6
  if (inputStream == NULL) {
13808
6
      xmlFreeParserCtxt(ctxt);
13809
6
      return(NULL);
13810
6
  }
13811
13812
0
  inputPush(ctxt, inputStream);
13813
13814
0
  if ((ctxt->directory == NULL) && (directory == NULL))
13815
0
      directory = xmlParserGetDirectory((char *)URL);
13816
0
  if ((ctxt->directory == NULL) && (directory != NULL))
13817
0
      ctxt->directory = directory;
13818
10.0k
    } else {
13819
10.0k
  inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13820
10.0k
  if (inputStream == NULL) {
13821
8.60k
      xmlFree(uri);
13822
8.60k
      xmlFreeParserCtxt(ctxt);
13823
8.60k
      return(NULL);
13824
8.60k
  }
13825
13826
1.46k
  inputPush(ctxt, inputStream);
13827
13828
1.46k
  if ((ctxt->directory == NULL) && (directory == NULL))
13829
1.46k
      directory = xmlParserGetDirectory((char *)uri);
13830
1.46k
  if ((ctxt->directory == NULL) && (directory != NULL))
13831
1.46k
      ctxt->directory = directory;
13832
1.46k
  xmlFree(uri);
13833
1.46k
    }
13834
1.46k
    return(ctxt);
13835
10.0k
}
13836
13837
/**
13838
 * xmlCreateEntityParserCtxt:
13839
 * @URL:  the entity URL
13840
 * @ID:  the entity PUBLIC ID
13841
 * @base:  a possible base for the target URI
13842
 *
13843
 * Create a parser context for an external entity
13844
 * Automatic support for ZLIB/Compress compressed document is provided
13845
 * by default if found at compile-time.
13846
 *
13847
 * Returns the new parser context or NULL
13848
 */
13849
xmlParserCtxtPtr
13850
xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
13851
0
                    const xmlChar *base) {
13852
0
    return xmlCreateEntityParserCtxtInternal(NULL, NULL, URL, ID, base, NULL);
13853
13854
0
}
13855
13856
/************************************************************************
13857
 *                  *
13858
 *    Front ends when parsing from a file     *
13859
 *                  *
13860
 ************************************************************************/
13861
13862
/**
13863
 * xmlCreateURLParserCtxt:
13864
 * @filename:  the filename or URL
13865
 * @options:  a combination of xmlParserOption
13866
 *
13867
 * Create a parser context for a file or URL content.
13868
 * Automatic support for ZLIB/Compress compressed document is provided
13869
 * by default if found at compile-time and for file accesses
13870
 *
13871
 * Returns the new parser context or NULL
13872
 */
13873
xmlParserCtxtPtr
13874
xmlCreateURLParserCtxt(const char *filename, int options)
13875
0
{
13876
0
    xmlParserCtxtPtr ctxt;
13877
0
    xmlParserInputPtr inputStream;
13878
0
    char *directory = NULL;
13879
13880
0
    ctxt = xmlNewParserCtxt();
13881
0
    if (ctxt == NULL) {
13882
0
  xmlErrMemory(NULL, "cannot allocate parser context");
13883
0
  return(NULL);
13884
0
    }
13885
13886
0
    if (options)
13887
0
  xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13888
0
    ctxt->linenumbers = 1;
13889
13890
0
    inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
13891
0
    if (inputStream == NULL) {
13892
0
  xmlFreeParserCtxt(ctxt);
13893
0
  return(NULL);
13894
0
    }
13895
13896
0
    inputPush(ctxt, inputStream);
13897
0
    if ((ctxt->directory == NULL) && (directory == NULL))
13898
0
        directory = xmlParserGetDirectory(filename);
13899
0
    if ((ctxt->directory == NULL) && (directory != NULL))
13900
0
        ctxt->directory = directory;
13901
13902
0
    return(ctxt);
13903
0
}
13904
13905
/**
13906
 * xmlCreateFileParserCtxt:
13907
 * @filename:  the filename
13908
 *
13909
 * Create a parser context for a file content.
13910
 * Automatic support for ZLIB/Compress compressed document is provided
13911
 * by default if found at compile-time.
13912
 *
13913
 * Returns the new parser context or NULL
13914
 */
13915
xmlParserCtxtPtr
13916
xmlCreateFileParserCtxt(const char *filename)
13917
0
{
13918
0
    return(xmlCreateURLParserCtxt(filename, 0));
13919
0
}
13920
13921
#ifdef LIBXML_SAX1_ENABLED
13922
/**
13923
 * xmlSAXParseFileWithData:
13924
 * @sax:  the SAX handler block
13925
 * @filename:  the filename
13926
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13927
 *             documents
13928
 * @data:  the userdata
13929
 *
13930
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
13931
 *
13932
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13933
 * compressed document is provided by default if found at compile-time.
13934
 * It use the given SAX function block to handle the parsing callback.
13935
 * If sax is NULL, fallback to the default DOM tree building routines.
13936
 *
13937
 * User data (void *) is stored within the parser context in the
13938
 * context's _private member, so it is available nearly everywhere in libxml
13939
 *
13940
 * Returns the resulting document tree
13941
 */
13942
13943
xmlDocPtr
13944
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
13945
0
                        int recovery, void *data) {
13946
0
    xmlDocPtr ret;
13947
0
    xmlParserCtxtPtr ctxt;
13948
13949
0
    xmlInitParser();
13950
13951
0
    ctxt = xmlCreateFileParserCtxt(filename);
13952
0
    if (ctxt == NULL) {
13953
0
  return(NULL);
13954
0
    }
13955
0
    if (sax != NULL) {
13956
0
  if (ctxt->sax != NULL)
13957
0
      xmlFree(ctxt->sax);
13958
0
        ctxt->sax = sax;
13959
0
    }
13960
0
    xmlDetectSAX2(ctxt);
13961
0
    if (data!=NULL) {
13962
0
  ctxt->_private = data;
13963
0
    }
13964
13965
0
    if (ctxt->directory == NULL)
13966
0
        ctxt->directory = xmlParserGetDirectory(filename);
13967
13968
0
    ctxt->recovery = recovery;
13969
13970
0
    xmlParseDocument(ctxt);
13971
13972
0
    if ((ctxt->wellFormed) || recovery) {
13973
0
        ret = ctxt->myDoc;
13974
0
  if ((ret != NULL) && (ctxt->input->buf != NULL)) {
13975
0
      if (ctxt->input->buf->compressed > 0)
13976
0
    ret->compression = 9;
13977
0
      else
13978
0
    ret->compression = ctxt->input->buf->compressed;
13979
0
  }
13980
0
    }
13981
0
    else {
13982
0
       ret = NULL;
13983
0
       xmlFreeDoc(ctxt->myDoc);
13984
0
       ctxt->myDoc = NULL;
13985
0
    }
13986
0
    if (sax != NULL)
13987
0
        ctxt->sax = NULL;
13988
0
    xmlFreeParserCtxt(ctxt);
13989
13990
0
    return(ret);
13991
0
}
13992
13993
/**
13994
 * xmlSAXParseFile:
13995
 * @sax:  the SAX handler block
13996
 * @filename:  the filename
13997
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13998
 *             documents
13999
 *
14000
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
14001
 *
14002
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14003
 * compressed document is provided by default if found at compile-time.
14004
 * It use the given SAX function block to handle the parsing callback.
14005
 * If sax is NULL, fallback to the default DOM tree building routines.
14006
 *
14007
 * Returns the resulting document tree
14008
 */
14009
14010
xmlDocPtr
14011
xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14012
0
                          int recovery) {
14013
0
    return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14014
0
}
14015
14016
/**
14017
 * xmlRecoverDoc:
14018
 * @cur:  a pointer to an array of xmlChar
14019
 *
14020
 * DEPRECATED: Use xmlReadDoc with XML_PARSE_RECOVER.
14021
 *
14022
 * parse an XML in-memory document and build a tree.
14023
 * In the case the document is not Well Formed, a attempt to build a
14024
 * tree is tried anyway
14025
 *
14026
 * Returns the resulting document tree or NULL in case of failure
14027
 */
14028
14029
xmlDocPtr
14030
0
xmlRecoverDoc(const xmlChar *cur) {
14031
0
    return(xmlSAXParseDoc(NULL, cur, 1));
14032
0
}
14033
14034
/**
14035
 * xmlParseFile:
14036
 * @filename:  the filename
14037
 *
14038
 * DEPRECATED: Use xmlReadFile.
14039
 *
14040
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14041
 * compressed document is provided by default if found at compile-time.
14042
 *
14043
 * Returns the resulting document tree if the file was wellformed,
14044
 * NULL otherwise.
14045
 */
14046
14047
xmlDocPtr
14048
0
xmlParseFile(const char *filename) {
14049
0
    return(xmlSAXParseFile(NULL, filename, 0));
14050
0
}
14051
14052
/**
14053
 * xmlRecoverFile:
14054
 * @filename:  the filename
14055
 *
14056
 * DEPRECATED: Use xmlReadFile with XML_PARSE_RECOVER.
14057
 *
14058
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14059
 * compressed document is provided by default if found at compile-time.
14060
 * In the case the document is not Well Formed, it attempts to build
14061
 * a tree anyway
14062
 *
14063
 * Returns the resulting document tree or NULL in case of failure
14064
 */
14065
14066
xmlDocPtr
14067
0
xmlRecoverFile(const char *filename) {
14068
0
    return(xmlSAXParseFile(NULL, filename, 1));
14069
0
}
14070
14071
14072
/**
14073
 * xmlSetupParserForBuffer:
14074
 * @ctxt:  an XML parser context
14075
 * @buffer:  a xmlChar * buffer
14076
 * @filename:  a file name
14077
 *
14078
 * DEPRECATED: Don't use.
14079
 *
14080
 * Setup the parser context to parse a new buffer; Clears any prior
14081
 * contents from the parser context. The buffer parameter must not be
14082
 * NULL, but the filename parameter can be
14083
 */
14084
void
14085
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14086
                             const char* filename)
14087
0
{
14088
0
    xmlParserInputPtr input;
14089
14090
0
    if ((ctxt == NULL) || (buffer == NULL))
14091
0
        return;
14092
14093
0
    input = xmlNewInputStream(ctxt);
14094
0
    if (input == NULL) {
14095
0
        xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
14096
0
        xmlClearParserCtxt(ctxt);
14097
0
        return;
14098
0
    }
14099
14100
0
    xmlClearParserCtxt(ctxt);
14101
0
    if (filename != NULL)
14102
0
        input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
14103
0
    input->base = buffer;
14104
0
    input->cur = buffer;
14105
0
    input->end = &buffer[xmlStrlen(buffer)];
14106
0
    inputPush(ctxt, input);
14107
0
}
14108
14109
/**
14110
 * xmlSAXUserParseFile:
14111
 * @sax:  a SAX handler
14112
 * @user_data:  The user data returned on SAX callbacks
14113
 * @filename:  a file name
14114
 *
14115
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
14116
 *
14117
 * parse an XML file and call the given SAX handler routines.
14118
 * Automatic support for ZLIB/Compress compressed document is provided
14119
 *
14120
 * Returns 0 in case of success or a error number otherwise
14121
 */
14122
int
14123
xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14124
0
                    const char *filename) {
14125
0
    int ret = 0;
14126
0
    xmlParserCtxtPtr ctxt;
14127
14128
0
    ctxt = xmlCreateFileParserCtxt(filename);
14129
0
    if (ctxt == NULL) return -1;
14130
0
    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14131
0
  xmlFree(ctxt->sax);
14132
0
    ctxt->sax = sax;
14133
0
    xmlDetectSAX2(ctxt);
14134
14135
0
    if (user_data != NULL)
14136
0
  ctxt->userData = user_data;
14137
14138
0
    xmlParseDocument(ctxt);
14139
14140
0
    if (ctxt->wellFormed)
14141
0
  ret = 0;
14142
0
    else {
14143
0
        if (ctxt->errNo != 0)
14144
0
      ret = ctxt->errNo;
14145
0
  else
14146
0
      ret = -1;
14147
0
    }
14148
0
    if (sax != NULL)
14149
0
  ctxt->sax = NULL;
14150
0
    if (ctxt->myDoc != NULL) {
14151
0
        xmlFreeDoc(ctxt->myDoc);
14152
0
  ctxt->myDoc = NULL;
14153
0
    }
14154
0
    xmlFreeParserCtxt(ctxt);
14155
14156
0
    return ret;
14157
0
}
14158
#endif /* LIBXML_SAX1_ENABLED */
14159
14160
/************************************************************************
14161
 *                  *
14162
 *    Front ends when parsing from memory     *
14163
 *                  *
14164
 ************************************************************************/
14165
14166
/**
14167
 * xmlCreateMemoryParserCtxt:
14168
 * @buffer:  a pointer to a char array
14169
 * @size:  the size of the array
14170
 *
14171
 * Create a parser context for an XML in-memory document.
14172
 *
14173
 * Returns the new parser context or NULL
14174
 */
14175
xmlParserCtxtPtr
14176
19.5k
xmlCreateMemoryParserCtxt(const char *buffer, int size) {
14177
19.5k
    xmlParserCtxtPtr ctxt;
14178
19.5k
    xmlParserInputPtr input;
14179
19.5k
    xmlParserInputBufferPtr buf;
14180
14181
19.5k
    if (buffer == NULL)
14182
0
  return(NULL);
14183
19.5k
    if (size <= 0)
14184
67
  return(NULL);
14185
14186
19.4k
    ctxt = xmlNewParserCtxt();
14187
19.4k
    if (ctxt == NULL)
14188
0
  return(NULL);
14189
14190
19.4k
    buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14191
19.4k
    if (buf == NULL) {
14192
0
  xmlFreeParserCtxt(ctxt);
14193
0
  return(NULL);
14194
0
    }
14195
14196
19.4k
    input = xmlNewInputStream(ctxt);
14197
19.4k
    if (input == NULL) {
14198
0
  xmlFreeParserInputBuffer(buf);
14199
0
  xmlFreeParserCtxt(ctxt);
14200
0
  return(NULL);
14201
0
    }
14202
14203
19.4k
    input->filename = NULL;
14204
19.4k
    input->buf = buf;
14205
19.4k
    xmlBufResetInput(input->buf->buffer, input);
14206
14207
19.4k
    inputPush(ctxt, input);
14208
19.4k
    return(ctxt);
14209
19.4k
}
14210
14211
#ifdef LIBXML_SAX1_ENABLED
14212
/**
14213
 * xmlSAXParseMemoryWithData:
14214
 * @sax:  the SAX handler block
14215
 * @buffer:  an pointer to a char array
14216
 * @size:  the size of the array
14217
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14218
 *             documents
14219
 * @data:  the userdata
14220
 *
14221
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14222
 *
14223
 * parse an XML in-memory block and use the given SAX function block
14224
 * to handle the parsing callback. If sax is NULL, fallback to the default
14225
 * DOM tree building routines.
14226
 *
14227
 * User data (void *) is stored within the parser context in the
14228
 * context's _private member, so it is available nearly everywhere in libxml
14229
 *
14230
 * Returns the resulting document tree
14231
 */
14232
14233
xmlDocPtr
14234
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14235
0
            int size, int recovery, void *data) {
14236
0
    xmlDocPtr ret;
14237
0
    xmlParserCtxtPtr ctxt;
14238
14239
0
    xmlInitParser();
14240
14241
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14242
0
    if (ctxt == NULL) return(NULL);
14243
0
    if (sax != NULL) {
14244
0
  if (ctxt->sax != NULL)
14245
0
      xmlFree(ctxt->sax);
14246
0
        ctxt->sax = sax;
14247
0
    }
14248
0
    xmlDetectSAX2(ctxt);
14249
0
    if (data!=NULL) {
14250
0
  ctxt->_private=data;
14251
0
    }
14252
14253
0
    ctxt->recovery = recovery;
14254
14255
0
    xmlParseDocument(ctxt);
14256
14257
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14258
0
    else {
14259
0
       ret = NULL;
14260
0
       xmlFreeDoc(ctxt->myDoc);
14261
0
       ctxt->myDoc = NULL;
14262
0
    }
14263
0
    if (sax != NULL)
14264
0
  ctxt->sax = NULL;
14265
0
    xmlFreeParserCtxt(ctxt);
14266
14267
0
    return(ret);
14268
0
}
14269
14270
/**
14271
 * xmlSAXParseMemory:
14272
 * @sax:  the SAX handler block
14273
 * @buffer:  an pointer to a char array
14274
 * @size:  the size of the array
14275
 * @recovery:  work in recovery mode, i.e. tries to read not Well Formed
14276
 *             documents
14277
 *
14278
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14279
 *
14280
 * parse an XML in-memory block and use the given SAX function block
14281
 * to handle the parsing callback. If sax is NULL, fallback to the default
14282
 * DOM tree building routines.
14283
 *
14284
 * Returns the resulting document tree
14285
 */
14286
xmlDocPtr
14287
xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14288
0
            int size, int recovery) {
14289
0
    return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14290
0
}
14291
14292
/**
14293
 * xmlParseMemory:
14294
 * @buffer:  an pointer to a char array
14295
 * @size:  the size of the array
14296
 *
14297
 * DEPRECATED: Use xmlReadMemory.
14298
 *
14299
 * parse an XML in-memory block and build a tree.
14300
 *
14301
 * Returns the resulting document tree
14302
 */
14303
14304
0
xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14305
0
   return(xmlSAXParseMemory(NULL, buffer, size, 0));
14306
0
}
14307
14308
/**
14309
 * xmlRecoverMemory:
14310
 * @buffer:  an pointer to a char array
14311
 * @size:  the size of the array
14312
 *
14313
 * DEPRECATED: Use xmlReadMemory with XML_PARSE_RECOVER.
14314
 *
14315
 * parse an XML in-memory block and build a tree.
14316
 * In the case the document is not Well Formed, an attempt to
14317
 * build a tree is tried anyway
14318
 *
14319
 * Returns the resulting document tree or NULL in case of error
14320
 */
14321
14322
0
xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14323
0
   return(xmlSAXParseMemory(NULL, buffer, size, 1));
14324
0
}
14325
14326
/**
14327
 * xmlSAXUserParseMemory:
14328
 * @sax:  a SAX handler
14329
 * @user_data:  The user data returned on SAX callbacks
14330
 * @buffer:  an in-memory XML document input
14331
 * @size:  the length of the XML document in bytes
14332
 *
14333
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14334
 *
14335
 * parse an XML in-memory buffer and call the given SAX handler routines.
14336
 *
14337
 * Returns 0 in case of success or a error number otherwise
14338
 */
14339
int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14340
0
        const char *buffer, int size) {
14341
0
    int ret = 0;
14342
0
    xmlParserCtxtPtr ctxt;
14343
14344
0
    xmlInitParser();
14345
14346
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14347
0
    if (ctxt == NULL) return -1;
14348
0
    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14349
0
        xmlFree(ctxt->sax);
14350
0
    ctxt->sax = sax;
14351
0
    xmlDetectSAX2(ctxt);
14352
14353
0
    if (user_data != NULL)
14354
0
  ctxt->userData = user_data;
14355
14356
0
    xmlParseDocument(ctxt);
14357
14358
0
    if (ctxt->wellFormed)
14359
0
  ret = 0;
14360
0
    else {
14361
0
        if (ctxt->errNo != 0)
14362
0
      ret = ctxt->errNo;
14363
0
  else
14364
0
      ret = -1;
14365
0
    }
14366
0
    if (sax != NULL)
14367
0
        ctxt->sax = NULL;
14368
0
    if (ctxt->myDoc != NULL) {
14369
0
        xmlFreeDoc(ctxt->myDoc);
14370
0
  ctxt->myDoc = NULL;
14371
0
    }
14372
0
    xmlFreeParserCtxt(ctxt);
14373
14374
0
    return ret;
14375
0
}
14376
#endif /* LIBXML_SAX1_ENABLED */
14377
14378
/**
14379
 * xmlCreateDocParserCtxt:
14380
 * @cur:  a pointer to an array of xmlChar
14381
 *
14382
 * Creates a parser context for an XML in-memory document.
14383
 *
14384
 * Returns the new parser context or NULL
14385
 */
14386
xmlParserCtxtPtr
14387
0
xmlCreateDocParserCtxt(const xmlChar *cur) {
14388
0
    int len;
14389
14390
0
    if (cur == NULL)
14391
0
  return(NULL);
14392
0
    len = xmlStrlen(cur);
14393
0
    return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14394
0
}
14395
14396
#ifdef LIBXML_SAX1_ENABLED
14397
/**
14398
 * xmlSAXParseDoc:
14399
 * @sax:  the SAX handler block
14400
 * @cur:  a pointer to an array of xmlChar
14401
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14402
 *             documents
14403
 *
14404
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadDoc.
14405
 *
14406
 * parse an XML in-memory document and build a tree.
14407
 * It use the given SAX function block to handle the parsing callback.
14408
 * If sax is NULL, fallback to the default DOM tree building routines.
14409
 *
14410
 * Returns the resulting document tree
14411
 */
14412
14413
xmlDocPtr
14414
0
xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14415
0
    xmlDocPtr ret;
14416
0
    xmlParserCtxtPtr ctxt;
14417
0
    xmlSAXHandlerPtr oldsax = NULL;
14418
14419
0
    if (cur == NULL) return(NULL);
14420
14421
14422
0
    ctxt = xmlCreateDocParserCtxt(cur);
14423
0
    if (ctxt == NULL) return(NULL);
14424
0
    if (sax != NULL) {
14425
0
        oldsax = ctxt->sax;
14426
0
        ctxt->sax = sax;
14427
0
        ctxt->userData = NULL;
14428
0
    }
14429
0
    xmlDetectSAX2(ctxt);
14430
14431
0
    xmlParseDocument(ctxt);
14432
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14433
0
    else {
14434
0
       ret = NULL;
14435
0
       xmlFreeDoc(ctxt->myDoc);
14436
0
       ctxt->myDoc = NULL;
14437
0
    }
14438
0
    if (sax != NULL)
14439
0
  ctxt->sax = oldsax;
14440
0
    xmlFreeParserCtxt(ctxt);
14441
14442
0
    return(ret);
14443
0
}
14444
14445
/**
14446
 * xmlParseDoc:
14447
 * @cur:  a pointer to an array of xmlChar
14448
 *
14449
 * DEPRECATED: Use xmlReadDoc.
14450
 *
14451
 * parse an XML in-memory document and build a tree.
14452
 *
14453
 * Returns the resulting document tree
14454
 */
14455
14456
xmlDocPtr
14457
0
xmlParseDoc(const xmlChar *cur) {
14458
0
    return(xmlSAXParseDoc(NULL, cur, 0));
14459
0
}
14460
#endif /* LIBXML_SAX1_ENABLED */
14461
14462
#ifdef LIBXML_LEGACY_ENABLED
14463
/************************************************************************
14464
 *                  *
14465
 *  Specific function to keep track of entities references    *
14466
 *  and used by the XSLT debugger         *
14467
 *                  *
14468
 ************************************************************************/
14469
14470
static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14471
14472
/**
14473
 * xmlAddEntityReference:
14474
 * @ent : A valid entity
14475
 * @firstNode : A valid first node for children of entity
14476
 * @lastNode : A valid last node of children entity
14477
 *
14478
 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14479
 */
14480
static void
14481
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14482
                      xmlNodePtr lastNode)
14483
{
14484
    if (xmlEntityRefFunc != NULL) {
14485
        (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14486
    }
14487
}
14488
14489
14490
/**
14491
 * xmlSetEntityReferenceFunc:
14492
 * @func: A valid function
14493
 *
14494
 * Set the function to call call back when a xml reference has been made
14495
 */
14496
void
14497
xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14498
{
14499
    xmlEntityRefFunc = func;
14500
}
14501
#endif /* LIBXML_LEGACY_ENABLED */
14502
14503
/************************************************************************
14504
 *                  *
14505
 *        Miscellaneous       *
14506
 *                  *
14507
 ************************************************************************/
14508
14509
static int xmlParserInitialized = 0;
14510
14511
/**
14512
 * xmlInitParser:
14513
 *
14514
 * Initialization function for the XML parser.
14515
 * This is not reentrant. Call once before processing in case of
14516
 * use in multithreaded programs.
14517
 */
14518
14519
void
14520
53.1M
xmlInitParser(void) {
14521
    /*
14522
     * Note that the initialization code must not make memory allocations.
14523
     */
14524
53.1M
    if (xmlParserInitialized != 0)
14525
53.1M
  return;
14526
14527
12
#ifdef LIBXML_THREAD_ENABLED
14528
12
    __xmlGlobalInitMutexLock();
14529
12
    if (xmlParserInitialized == 0) {
14530
12
#endif
14531
#if defined(_WIN32) && (!defined(LIBXML_STATIC) || defined(LIBXML_STATIC_FOR_DLL))
14532
        if (xmlFree == free)
14533
            atexit(xmlCleanupParser);
14534
#endif
14535
14536
12
  xmlInitThreadsInternal();
14537
12
  xmlInitGlobalsInternal();
14538
12
  xmlInitMemoryInternal();
14539
12
        __xmlInitializeDict();
14540
12
  xmlInitEncodingInternal();
14541
12
  xmlRegisterDefaultInputCallbacks();
14542
12
#ifdef LIBXML_OUTPUT_ENABLED
14543
12
  xmlRegisterDefaultOutputCallbacks();
14544
12
#endif /* LIBXML_OUTPUT_ENABLED */
14545
12
#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
14546
12
  xmlInitXPathInternal();
14547
12
#endif
14548
12
  xmlParserInitialized = 1;
14549
12
#ifdef LIBXML_THREAD_ENABLED
14550
12
    }
14551
12
    __xmlGlobalInitMutexUnlock();
14552
12
#endif
14553
12
}
14554
14555
/**
14556
 * xmlCleanupParser:
14557
 *
14558
 * This function name is somewhat misleading. It does not clean up
14559
 * parser state, it cleans up memory allocated by the library itself.
14560
 * It is a cleanup function for the XML library. It tries to reclaim all
14561
 * related global memory allocated for the library processing.
14562
 * It doesn't deallocate any document related memory. One should
14563
 * call xmlCleanupParser() only when the process has finished using
14564
 * the library and all XML/HTML documents built with it.
14565
 * See also xmlInitParser() which has the opposite function of preparing
14566
 * the library for operations.
14567
 *
14568
 * WARNING: if your application is multithreaded or has plugin support
14569
 *          calling this may crash the application if another thread or
14570
 *          a plugin is still using libxml2. It's sometimes very hard to
14571
 *          guess if libxml2 is in use in the application, some libraries
14572
 *          or plugins may use it without notice. In case of doubt abstain
14573
 *          from calling this function or do it just before calling exit()
14574
 *          to avoid leak reports from valgrind !
14575
 */
14576
14577
void
14578
0
xmlCleanupParser(void) {
14579
0
    if (!xmlParserInitialized)
14580
0
  return;
14581
14582
0
    xmlCleanupCharEncodingHandlers();
14583
0
#ifdef LIBXML_CATALOG_ENABLED
14584
0
    xmlCatalogCleanup();
14585
0
#endif
14586
0
    xmlCleanupDictInternal();
14587
0
    xmlCleanupInputCallbacks();
14588
0
#ifdef LIBXML_OUTPUT_ENABLED
14589
0
    xmlCleanupOutputCallbacks();
14590
0
#endif
14591
0
#ifdef LIBXML_SCHEMAS_ENABLED
14592
0
    xmlSchemaCleanupTypes();
14593
0
    xmlRelaxNGCleanupTypes();
14594
0
#endif
14595
0
    xmlCleanupGlobalsInternal();
14596
0
    xmlCleanupThreadsInternal();
14597
0
    xmlCleanupMemoryInternal();
14598
0
    xmlParserInitialized = 0;
14599
0
}
14600
14601
#if defined(HAVE_ATTRIBUTE_DESTRUCTOR) && !defined(LIBXML_STATIC) && \
14602
    !defined(_WIN32)
14603
static void
14604
ATTRIBUTE_DESTRUCTOR
14605
0
xmlDestructor(void) {
14606
    /*
14607
     * Calling custom deallocation functions in a destructor can cause
14608
     * problems, for example with Nokogiri.
14609
     */
14610
0
    if (xmlFree == free)
14611
0
        xmlCleanupParser();
14612
0
}
14613
#endif
14614
14615
/************************************************************************
14616
 *                  *
14617
 *  New set (2.6.0) of simpler and more flexible APIs   *
14618
 *                  *
14619
 ************************************************************************/
14620
14621
/**
14622
 * DICT_FREE:
14623
 * @str:  a string
14624
 *
14625
 * Free a string if it is not owned by the "dict" dictionary in the
14626
 * current scope
14627
 */
14628
#define DICT_FREE(str)            \
14629
0
  if ((str) && ((!dict) ||       \
14630
0
      (xmlDictOwns(dict, (const xmlChar *)(str)) == 0)))  \
14631
0
      xmlFree((char *)(str));
14632
14633
/**
14634
 * xmlCtxtReset:
14635
 * @ctxt: an XML parser context
14636
 *
14637
 * Reset a parser context
14638
 */
14639
void
14640
xmlCtxtReset(xmlParserCtxtPtr ctxt)
14641
0
{
14642
0
    xmlParserInputPtr input;
14643
0
    xmlDictPtr dict;
14644
14645
0
    if (ctxt == NULL)
14646
0
        return;
14647
14648
0
    dict = ctxt->dict;
14649
14650
0
    while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14651
0
        xmlFreeInputStream(input);
14652
0
    }
14653
0
    ctxt->inputNr = 0;
14654
0
    ctxt->input = NULL;
14655
14656
0
    ctxt->spaceNr = 0;
14657
0
    if (ctxt->spaceTab != NULL) {
14658
0
  ctxt->spaceTab[0] = -1;
14659
0
  ctxt->space = &ctxt->spaceTab[0];
14660
0
    } else {
14661
0
        ctxt->space = NULL;
14662
0
    }
14663
14664
14665
0
    ctxt->nodeNr = 0;
14666
0
    ctxt->node = NULL;
14667
14668
0
    ctxt->nameNr = 0;
14669
0
    ctxt->name = NULL;
14670
14671
0
    ctxt->nsNr = 0;
14672
14673
0
    DICT_FREE(ctxt->version);
14674
0
    ctxt->version = NULL;
14675
0
    DICT_FREE(ctxt->encoding);
14676
0
    ctxt->encoding = NULL;
14677
0
    DICT_FREE(ctxt->directory);
14678
0
    ctxt->directory = NULL;
14679
0
    DICT_FREE(ctxt->extSubURI);
14680
0
    ctxt->extSubURI = NULL;
14681
0
    DICT_FREE(ctxt->extSubSystem);
14682
0
    ctxt->extSubSystem = NULL;
14683
0
    if (ctxt->myDoc != NULL)
14684
0
        xmlFreeDoc(ctxt->myDoc);
14685
0
    ctxt->myDoc = NULL;
14686
14687
0
    ctxt->standalone = -1;
14688
0
    ctxt->hasExternalSubset = 0;
14689
0
    ctxt->hasPErefs = 0;
14690
0
    ctxt->html = 0;
14691
0
    ctxt->external = 0;
14692
0
    ctxt->instate = XML_PARSER_START;
14693
0
    ctxt->token = 0;
14694
14695
0
    ctxt->wellFormed = 1;
14696
0
    ctxt->nsWellFormed = 1;
14697
0
    ctxt->disableSAX = 0;
14698
0
    ctxt->valid = 1;
14699
#if 0
14700
    ctxt->vctxt.userData = ctxt;
14701
    ctxt->vctxt.error = xmlParserValidityError;
14702
    ctxt->vctxt.warning = xmlParserValidityWarning;
14703
#endif
14704
0
    ctxt->record_info = 0;
14705
0
    ctxt->checkIndex = 0;
14706
0
    ctxt->endCheckState = 0;
14707
0
    ctxt->inSubset = 0;
14708
0
    ctxt->errNo = XML_ERR_OK;
14709
0
    ctxt->depth = 0;
14710
0
    ctxt->charset = XML_CHAR_ENCODING_UTF8;
14711
0
    ctxt->catalogs = NULL;
14712
0
    ctxt->sizeentities = 0;
14713
0
    ctxt->sizeentcopy = 0;
14714
0
    xmlInitNodeInfoSeq(&ctxt->node_seq);
14715
14716
0
    if (ctxt->attsDefault != NULL) {
14717
0
        xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
14718
0
        ctxt->attsDefault = NULL;
14719
0
    }
14720
0
    if (ctxt->attsSpecial != NULL) {
14721
0
        xmlHashFree(ctxt->attsSpecial, NULL);
14722
0
        ctxt->attsSpecial = NULL;
14723
0
    }
14724
14725
0
#ifdef LIBXML_CATALOG_ENABLED
14726
0
    if (ctxt->catalogs != NULL)
14727
0
  xmlCatalogFreeLocal(ctxt->catalogs);
14728
0
#endif
14729
0
    ctxt->nbErrors = 0;
14730
0
    ctxt->nbWarnings = 0;
14731
0
    if (ctxt->lastError.code != XML_ERR_OK)
14732
0
        xmlResetError(&ctxt->lastError);
14733
0
}
14734
14735
/**
14736
 * xmlCtxtResetPush:
14737
 * @ctxt: an XML parser context
14738
 * @chunk:  a pointer to an array of chars
14739
 * @size:  number of chars in the array
14740
 * @filename:  an optional file name or URI
14741
 * @encoding:  the document encoding, or NULL
14742
 *
14743
 * Reset a push parser context
14744
 *
14745
 * Returns 0 in case of success and 1 in case of error
14746
 */
14747
int
14748
xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14749
                 int size, const char *filename, const char *encoding)
14750
0
{
14751
0
    xmlParserInputPtr inputStream;
14752
0
    xmlParserInputBufferPtr buf;
14753
0
    xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14754
14755
0
    if (ctxt == NULL)
14756
0
        return(1);
14757
14758
0
    if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14759
0
        enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14760
14761
0
    buf = xmlAllocParserInputBuffer(enc);
14762
0
    if (buf == NULL)
14763
0
        return(1);
14764
14765
0
    if (ctxt == NULL) {
14766
0
        xmlFreeParserInputBuffer(buf);
14767
0
        return(1);
14768
0
    }
14769
14770
0
    xmlCtxtReset(ctxt);
14771
14772
0
    if (filename == NULL) {
14773
0
        ctxt->directory = NULL;
14774
0
    } else {
14775
0
        ctxt->directory = xmlParserGetDirectory(filename);
14776
0
    }
14777
14778
0
    inputStream = xmlNewInputStream(ctxt);
14779
0
    if (inputStream == NULL) {
14780
0
        xmlFreeParserInputBuffer(buf);
14781
0
        return(1);
14782
0
    }
14783
14784
0
    if (filename == NULL)
14785
0
        inputStream->filename = NULL;
14786
0
    else
14787
0
        inputStream->filename = (char *)
14788
0
            xmlCanonicPath((const xmlChar *) filename);
14789
0
    inputStream->buf = buf;
14790
0
    xmlBufResetInput(buf->buffer, inputStream);
14791
14792
0
    inputPush(ctxt, inputStream);
14793
14794
0
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14795
0
        (ctxt->input->buf != NULL)) {
14796
0
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14797
0
        size_t cur = ctxt->input->cur - ctxt->input->base;
14798
14799
0
        xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14800
14801
0
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
14802
#ifdef DEBUG_PUSH
14803
        xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14804
#endif
14805
0
    }
14806
14807
0
    if (encoding != NULL) {
14808
0
        xmlCharEncodingHandlerPtr hdlr;
14809
14810
0
        if (ctxt->encoding != NULL)
14811
0
      xmlFree((xmlChar *) ctxt->encoding);
14812
0
        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14813
14814
0
        hdlr = xmlFindCharEncodingHandler(encoding);
14815
0
        if (hdlr != NULL) {
14816
0
            xmlSwitchToEncoding(ctxt, hdlr);
14817
0
  } else {
14818
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14819
0
            "Unsupported encoding %s\n", BAD_CAST encoding);
14820
0
        }
14821
0
    } else if (enc != XML_CHAR_ENCODING_NONE) {
14822
0
        xmlSwitchEncoding(ctxt, enc);
14823
0
    }
14824
14825
0
    return(0);
14826
0
}
14827
14828
14829
/**
14830
 * xmlCtxtUseOptionsInternal:
14831
 * @ctxt: an XML parser context
14832
 * @options:  a combination of xmlParserOption
14833
 * @encoding:  the user provided encoding to use
14834
 *
14835
 * Applies the options to the parser context
14836
 *
14837
 * Returns 0 in case of success, the set of unknown or unimplemented options
14838
 *         in case of error.
14839
 */
14840
static int
14841
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
14842
30.3k
{
14843
30.3k
    if (ctxt == NULL)
14844
0
        return(-1);
14845
30.3k
    if (encoding != NULL) {
14846
0
        if (ctxt->encoding != NULL)
14847
0
      xmlFree((xmlChar *) ctxt->encoding);
14848
0
        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14849
0
    }
14850
30.3k
    if (options & XML_PARSE_RECOVER) {
14851
4.64k
        ctxt->recovery = 1;
14852
4.64k
        options -= XML_PARSE_RECOVER;
14853
4.64k
  ctxt->options |= XML_PARSE_RECOVER;
14854
4.64k
    } else
14855
25.6k
        ctxt->recovery = 0;
14856
30.3k
    if (options & XML_PARSE_DTDLOAD) {
14857
28.7k
        ctxt->loadsubset = XML_DETECT_IDS;
14858
28.7k
        options -= XML_PARSE_DTDLOAD;
14859
28.7k
  ctxt->options |= XML_PARSE_DTDLOAD;
14860
28.7k
    } else
14861
1.61k
        ctxt->loadsubset = 0;
14862
30.3k
    if (options & XML_PARSE_DTDATTR) {
14863
2.22k
        ctxt->loadsubset |= XML_COMPLETE_ATTRS;
14864
2.22k
        options -= XML_PARSE_DTDATTR;
14865
2.22k
  ctxt->options |= XML_PARSE_DTDATTR;
14866
2.22k
    }
14867
30.3k
    if (options & XML_PARSE_NOENT) {
14868
26.2k
        ctxt->replaceEntities = 1;
14869
        /* ctxt->loadsubset |= XML_DETECT_IDS; */
14870
26.2k
        options -= XML_PARSE_NOENT;
14871
26.2k
  ctxt->options |= XML_PARSE_NOENT;
14872
26.2k
    } else
14873
4.10k
        ctxt->replaceEntities = 0;
14874
30.3k
    if (options & XML_PARSE_PEDANTIC) {
14875
1.24k
        ctxt->pedantic = 1;
14876
1.24k
        options -= XML_PARSE_PEDANTIC;
14877
1.24k
  ctxt->options |= XML_PARSE_PEDANTIC;
14878
1.24k
    } else
14879
29.0k
        ctxt->pedantic = 0;
14880
30.3k
    if (options & XML_PARSE_NOBLANKS) {
14881
3.76k
        ctxt->keepBlanks = 0;
14882
3.76k
        ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
14883
3.76k
        options -= XML_PARSE_NOBLANKS;
14884
3.76k
  ctxt->options |= XML_PARSE_NOBLANKS;
14885
3.76k
    } else
14886
26.5k
        ctxt->keepBlanks = 1;
14887
30.3k
    if (options & XML_PARSE_DTDVALID) {
14888
2.96k
        ctxt->validate = 1;
14889
2.96k
        if (options & XML_PARSE_NOWARNING)
14890
1.97k
            ctxt->vctxt.warning = NULL;
14891
2.96k
        if (options & XML_PARSE_NOERROR)
14892
1.80k
            ctxt->vctxt.error = NULL;
14893
2.96k
        options -= XML_PARSE_DTDVALID;
14894
2.96k
  ctxt->options |= XML_PARSE_DTDVALID;
14895
2.96k
    } else
14896
27.3k
        ctxt->validate = 0;
14897
30.3k
    if (options & XML_PARSE_NOWARNING) {
14898
2.87k
        ctxt->sax->warning = NULL;
14899
2.87k
        options -= XML_PARSE_NOWARNING;
14900
2.87k
    }
14901
30.3k
    if (options & XML_PARSE_NOERROR) {
14902
2.80k
        ctxt->sax->error = NULL;
14903
2.80k
        ctxt->sax->fatalError = NULL;
14904
2.80k
        options -= XML_PARSE_NOERROR;
14905
2.80k
    }
14906
30.3k
#ifdef LIBXML_SAX1_ENABLED
14907
30.3k
    if (options & XML_PARSE_SAX1) {
14908
4.29k
        ctxt->sax->startElement = xmlSAX2StartElement;
14909
4.29k
        ctxt->sax->endElement = xmlSAX2EndElement;
14910
4.29k
        ctxt->sax->startElementNs = NULL;
14911
4.29k
        ctxt->sax->endElementNs = NULL;
14912
4.29k
        ctxt->sax->initialized = 1;
14913
4.29k
        options -= XML_PARSE_SAX1;
14914
4.29k
  ctxt->options |= XML_PARSE_SAX1;
14915
4.29k
    }
14916
30.3k
#endif /* LIBXML_SAX1_ENABLED */
14917
30.3k
    if (options & XML_PARSE_NODICT) {
14918
2.92k
        ctxt->dictNames = 0;
14919
2.92k
        options -= XML_PARSE_NODICT;
14920
2.92k
  ctxt->options |= XML_PARSE_NODICT;
14921
27.4k
    } else {
14922
27.4k
        ctxt->dictNames = 1;
14923
27.4k
    }
14924
30.3k
    if (options & XML_PARSE_NOCDATA) {
14925
4.03k
        ctxt->sax->cdataBlock = NULL;
14926
4.03k
        options -= XML_PARSE_NOCDATA;
14927
4.03k
  ctxt->options |= XML_PARSE_NOCDATA;
14928
4.03k
    }
14929
30.3k
    if (options & XML_PARSE_NSCLEAN) {
14930
4.01k
  ctxt->options |= XML_PARSE_NSCLEAN;
14931
4.01k
        options -= XML_PARSE_NSCLEAN;
14932
4.01k
    }
14933
30.3k
    if (options & XML_PARSE_NONET) {
14934
2.36k
  ctxt->options |= XML_PARSE_NONET;
14935
2.36k
        options -= XML_PARSE_NONET;
14936
2.36k
    }
14937
30.3k
    if (options & XML_PARSE_COMPACT) {
14938
12.2k
  ctxt->options |= XML_PARSE_COMPACT;
14939
12.2k
        options -= XML_PARSE_COMPACT;
14940
12.2k
    }
14941
30.3k
    if (options & XML_PARSE_OLD10) {
14942
3.22k
  ctxt->options |= XML_PARSE_OLD10;
14943
3.22k
        options -= XML_PARSE_OLD10;
14944
3.22k
    }
14945
30.3k
    if (options & XML_PARSE_NOBASEFIX) {
14946
2.44k
  ctxt->options |= XML_PARSE_NOBASEFIX;
14947
2.44k
        options -= XML_PARSE_NOBASEFIX;
14948
2.44k
    }
14949
30.3k
    if (options & XML_PARSE_HUGE) {
14950
2.68k
  ctxt->options |= XML_PARSE_HUGE;
14951
2.68k
        options -= XML_PARSE_HUGE;
14952
2.68k
        if (ctxt->dict != NULL)
14953
2.68k
            xmlDictSetLimit(ctxt->dict, 0);
14954
2.68k
    }
14955
30.3k
    if (options & XML_PARSE_OLDSAX) {
14956
2.94k
  ctxt->options |= XML_PARSE_OLDSAX;
14957
2.94k
        options -= XML_PARSE_OLDSAX;
14958
2.94k
    }
14959
30.3k
    if (options & XML_PARSE_IGNORE_ENC) {
14960
3.37k
  ctxt->options |= XML_PARSE_IGNORE_ENC;
14961
3.37k
        options -= XML_PARSE_IGNORE_ENC;
14962
3.37k
    }
14963
30.3k
    if (options & XML_PARSE_BIG_LINES) {
14964
3.73k
  ctxt->options |= XML_PARSE_BIG_LINES;
14965
3.73k
        options -= XML_PARSE_BIG_LINES;
14966
3.73k
    }
14967
30.3k
    ctxt->linenumbers = 1;
14968
30.3k
    return (options);
14969
30.3k
}
14970
14971
/**
14972
 * xmlCtxtUseOptions:
14973
 * @ctxt: an XML parser context
14974
 * @options:  a combination of xmlParserOption
14975
 *
14976
 * Applies the options to the parser context
14977
 *
14978
 * Returns 0 in case of success, the set of unknown or unimplemented options
14979
 *         in case of error.
14980
 */
14981
int
14982
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
14983
20.2k
{
14984
20.2k
   return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
14985
20.2k
}
14986
14987
/**
14988
 * xmlDoRead:
14989
 * @ctxt:  an XML parser context
14990
 * @URL:  the base URL to use for the document
14991
 * @encoding:  the document encoding, or NULL
14992
 * @options:  a combination of xmlParserOption
14993
 * @reuse:  keep the context for reuse
14994
 *
14995
 * Common front-end for the xmlRead functions
14996
 *
14997
 * Returns the resulting document tree or NULL
14998
 */
14999
static xmlDocPtr
15000
xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15001
          int options, int reuse)
15002
10.0k
{
15003
10.0k
    xmlDocPtr ret;
15004
15005
10.0k
    xmlCtxtUseOptionsInternal(ctxt, options, encoding);
15006
10.0k
    if (encoding != NULL) {
15007
0
        xmlCharEncodingHandlerPtr hdlr;
15008
15009
0
  hdlr = xmlFindCharEncodingHandler(encoding);
15010
0
  if (hdlr != NULL)
15011
0
      xmlSwitchToEncoding(ctxt, hdlr);
15012
0
    }
15013
10.0k
    if ((URL != NULL) && (ctxt->input != NULL) &&
15014
10.0k
        (ctxt->input->filename == NULL))
15015
10.0k
        ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
15016
10.0k
    xmlParseDocument(ctxt);
15017
10.0k
    if ((ctxt->wellFormed) || ctxt->recovery)
15018
2.43k
        ret = ctxt->myDoc;
15019
7.63k
    else {
15020
7.63k
        ret = NULL;
15021
7.63k
  if (ctxt->myDoc != NULL) {
15022
6.47k
      xmlFreeDoc(ctxt->myDoc);
15023
6.47k
  }
15024
7.63k
    }
15025
10.0k
    ctxt->myDoc = NULL;
15026
10.0k
    if (!reuse) {
15027
10.0k
  xmlFreeParserCtxt(ctxt);
15028
10.0k
    }
15029
15030
10.0k
    return (ret);
15031
10.0k
}
15032
15033
/**
15034
 * xmlReadDoc:
15035
 * @cur:  a pointer to a zero terminated string
15036
 * @URL:  the base URL to use for the document
15037
 * @encoding:  the document encoding, or NULL
15038
 * @options:  a combination of xmlParserOption
15039
 *
15040
 * parse an XML in-memory document and build a tree.
15041
 *
15042
 * Returns the resulting document tree
15043
 */
15044
xmlDocPtr
15045
xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
15046
0
{
15047
0
    xmlParserCtxtPtr ctxt;
15048
15049
0
    if (cur == NULL)
15050
0
        return (NULL);
15051
0
    xmlInitParser();
15052
15053
0
    ctxt = xmlCreateDocParserCtxt(cur);
15054
0
    if (ctxt == NULL)
15055
0
        return (NULL);
15056
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15057
0
}
15058
15059
/**
15060
 * xmlReadFile:
15061
 * @filename:  a file or URL
15062
 * @encoding:  the document encoding, or NULL
15063
 * @options:  a combination of xmlParserOption
15064
 *
15065
 * parse an XML file from the filesystem or the network.
15066
 *
15067
 * Returns the resulting document tree
15068
 */
15069
xmlDocPtr
15070
xmlReadFile(const char *filename, const char *encoding, int options)
15071
0
{
15072
0
    xmlParserCtxtPtr ctxt;
15073
15074
0
    xmlInitParser();
15075
0
    ctxt = xmlCreateURLParserCtxt(filename, options);
15076
0
    if (ctxt == NULL)
15077
0
        return (NULL);
15078
0
    return (xmlDoRead(ctxt, NULL, encoding, options, 0));
15079
0
}
15080
15081
/**
15082
 * xmlReadMemory:
15083
 * @buffer:  a pointer to a char array
15084
 * @size:  the size of the array
15085
 * @URL:  the base URL to use for the document
15086
 * @encoding:  the document encoding, or NULL
15087
 * @options:  a combination of xmlParserOption
15088
 *
15089
 * parse an XML in-memory document and build a tree.
15090
 *
15091
 * Returns the resulting document tree
15092
 */
15093
xmlDocPtr
15094
xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
15095
10.1k
{
15096
10.1k
    xmlParserCtxtPtr ctxt;
15097
15098
10.1k
    xmlInitParser();
15099
10.1k
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15100
10.1k
    if (ctxt == NULL)
15101
64
        return (NULL);
15102
10.0k
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15103
10.1k
}
15104
15105
/**
15106
 * xmlReadFd:
15107
 * @fd:  an open file descriptor
15108
 * @URL:  the base URL to use for the document
15109
 * @encoding:  the document encoding, or NULL
15110
 * @options:  a combination of xmlParserOption
15111
 *
15112
 * parse an XML from a file descriptor and build a tree.
15113
 * NOTE that the file descriptor will not be closed when the
15114
 *      reader is closed or reset.
15115
 *
15116
 * Returns the resulting document tree
15117
 */
15118
xmlDocPtr
15119
xmlReadFd(int fd, const char *URL, const char *encoding, int options)
15120
0
{
15121
0
    xmlParserCtxtPtr ctxt;
15122
0
    xmlParserInputBufferPtr input;
15123
0
    xmlParserInputPtr stream;
15124
15125
0
    if (fd < 0)
15126
0
        return (NULL);
15127
0
    xmlInitParser();
15128
15129
0
    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15130
0
    if (input == NULL)
15131
0
        return (NULL);
15132
0
    input->closecallback = NULL;
15133
0
    ctxt = xmlNewParserCtxt();
15134
0
    if (ctxt == NULL) {
15135
0
        xmlFreeParserInputBuffer(input);
15136
0
        return (NULL);
15137
0
    }
15138
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15139
0
    if (stream == NULL) {
15140
0
        xmlFreeParserInputBuffer(input);
15141
0
  xmlFreeParserCtxt(ctxt);
15142
0
        return (NULL);
15143
0
    }
15144
0
    inputPush(ctxt, stream);
15145
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15146
0
}
15147
15148
/**
15149
 * xmlReadIO:
15150
 * @ioread:  an I/O read function
15151
 * @ioclose:  an I/O close function
15152
 * @ioctx:  an I/O handler
15153
 * @URL:  the base URL to use for the document
15154
 * @encoding:  the document encoding, or NULL
15155
 * @options:  a combination of xmlParserOption
15156
 *
15157
 * parse an XML document from I/O functions and source and build a tree.
15158
 *
15159
 * Returns the resulting document tree
15160
 */
15161
xmlDocPtr
15162
xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
15163
          void *ioctx, const char *URL, const char *encoding, int options)
15164
0
{
15165
0
    xmlParserCtxtPtr ctxt;
15166
0
    xmlParserInputBufferPtr input;
15167
0
    xmlParserInputPtr stream;
15168
15169
0
    if (ioread == NULL)
15170
0
        return (NULL);
15171
0
    xmlInitParser();
15172
15173
0
    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15174
0
                                         XML_CHAR_ENCODING_NONE);
15175
0
    if (input == NULL) {
15176
0
        if (ioclose != NULL)
15177
0
            ioclose(ioctx);
15178
0
        return (NULL);
15179
0
    }
15180
0
    ctxt = xmlNewParserCtxt();
15181
0
    if (ctxt == NULL) {
15182
0
        xmlFreeParserInputBuffer(input);
15183
0
        return (NULL);
15184
0
    }
15185
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15186
0
    if (stream == NULL) {
15187
0
        xmlFreeParserInputBuffer(input);
15188
0
  xmlFreeParserCtxt(ctxt);
15189
0
        return (NULL);
15190
0
    }
15191
0
    inputPush(ctxt, stream);
15192
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15193
0
}
15194
15195
/**
15196
 * xmlCtxtReadDoc:
15197
 * @ctxt:  an XML parser context
15198
 * @cur:  a pointer to a zero terminated string
15199
 * @URL:  the base URL to use for the document
15200
 * @encoding:  the document encoding, or NULL
15201
 * @options:  a combination of xmlParserOption
15202
 *
15203
 * parse an XML in-memory document and build a tree.
15204
 * This reuses the existing @ctxt parser context
15205
 *
15206
 * Returns the resulting document tree
15207
 */
15208
xmlDocPtr
15209
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
15210
               const char *URL, const char *encoding, int options)
15211
0
{
15212
0
    if (cur == NULL)
15213
0
        return (NULL);
15214
0
    return (xmlCtxtReadMemory(ctxt, (const char *) cur, xmlStrlen(cur), URL,
15215
0
                              encoding, options));
15216
0
}
15217
15218
/**
15219
 * xmlCtxtReadFile:
15220
 * @ctxt:  an XML parser context
15221
 * @filename:  a file or URL
15222
 * @encoding:  the document encoding, or NULL
15223
 * @options:  a combination of xmlParserOption
15224
 *
15225
 * parse an XML file from the filesystem or the network.
15226
 * This reuses the existing @ctxt parser context
15227
 *
15228
 * Returns the resulting document tree
15229
 */
15230
xmlDocPtr
15231
xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15232
                const char *encoding, int options)
15233
0
{
15234
0
    xmlParserInputPtr stream;
15235
15236
0
    if (filename == NULL)
15237
0
        return (NULL);
15238
0
    if (ctxt == NULL)
15239
0
        return (NULL);
15240
0
    xmlInitParser();
15241
15242
0
    xmlCtxtReset(ctxt);
15243
15244
0
    stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15245
0
    if (stream == NULL) {
15246
0
        return (NULL);
15247
0
    }
15248
0
    inputPush(ctxt, stream);
15249
0
    return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15250
0
}
15251
15252
/**
15253
 * xmlCtxtReadMemory:
15254
 * @ctxt:  an XML parser context
15255
 * @buffer:  a pointer to a char array
15256
 * @size:  the size of the array
15257
 * @URL:  the base URL to use for the document
15258
 * @encoding:  the document encoding, or NULL
15259
 * @options:  a combination of xmlParserOption
15260
 *
15261
 * parse an XML in-memory document and build a tree.
15262
 * This reuses the existing @ctxt parser context
15263
 *
15264
 * Returns the resulting document tree
15265
 */
15266
xmlDocPtr
15267
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15268
                  const char *URL, const char *encoding, int options)
15269
0
{
15270
0
    xmlParserInputBufferPtr input;
15271
0
    xmlParserInputPtr stream;
15272
15273
0
    if (ctxt == NULL)
15274
0
        return (NULL);
15275
0
    if (buffer == NULL)
15276
0
        return (NULL);
15277
0
    xmlInitParser();
15278
15279
0
    xmlCtxtReset(ctxt);
15280
15281
0
    input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15282
0
    if (input == NULL) {
15283
0
  return(NULL);
15284
0
    }
15285
15286
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15287
0
    if (stream == NULL) {
15288
0
  xmlFreeParserInputBuffer(input);
15289
0
  return(NULL);
15290
0
    }
15291
15292
0
    inputPush(ctxt, stream);
15293
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15294
0
}
15295
15296
/**
15297
 * xmlCtxtReadFd:
15298
 * @ctxt:  an XML parser context
15299
 * @fd:  an open file descriptor
15300
 * @URL:  the base URL to use for the document
15301
 * @encoding:  the document encoding, or NULL
15302
 * @options:  a combination of xmlParserOption
15303
 *
15304
 * parse an XML from a file descriptor and build a tree.
15305
 * This reuses the existing @ctxt parser context
15306
 * NOTE that the file descriptor will not be closed when the
15307
 *      reader is closed or reset.
15308
 *
15309
 * Returns the resulting document tree
15310
 */
15311
xmlDocPtr
15312
xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15313
              const char *URL, const char *encoding, int options)
15314
0
{
15315
0
    xmlParserInputBufferPtr input;
15316
0
    xmlParserInputPtr stream;
15317
15318
0
    if (fd < 0)
15319
0
        return (NULL);
15320
0
    if (ctxt == NULL)
15321
0
        return (NULL);
15322
0
    xmlInitParser();
15323
15324
0
    xmlCtxtReset(ctxt);
15325
15326
15327
0
    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15328
0
    if (input == NULL)
15329
0
        return (NULL);
15330
0
    input->closecallback = NULL;
15331
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15332
0
    if (stream == NULL) {
15333
0
        xmlFreeParserInputBuffer(input);
15334
0
        return (NULL);
15335
0
    }
15336
0
    inputPush(ctxt, stream);
15337
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15338
0
}
15339
15340
/**
15341
 * xmlCtxtReadIO:
15342
 * @ctxt:  an XML parser context
15343
 * @ioread:  an I/O read function
15344
 * @ioclose:  an I/O close function
15345
 * @ioctx:  an I/O handler
15346
 * @URL:  the base URL to use for the document
15347
 * @encoding:  the document encoding, or NULL
15348
 * @options:  a combination of xmlParserOption
15349
 *
15350
 * parse an XML document from I/O functions and source and build a tree.
15351
 * This reuses the existing @ctxt parser context
15352
 *
15353
 * Returns the resulting document tree
15354
 */
15355
xmlDocPtr
15356
xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15357
              xmlInputCloseCallback ioclose, void *ioctx,
15358
        const char *URL,
15359
              const char *encoding, int options)
15360
0
{
15361
0
    xmlParserInputBufferPtr input;
15362
0
    xmlParserInputPtr stream;
15363
15364
0
    if (ioread == NULL)
15365
0
        return (NULL);
15366
0
    if (ctxt == NULL)
15367
0
        return (NULL);
15368
0
    xmlInitParser();
15369
15370
0
    xmlCtxtReset(ctxt);
15371
15372
0
    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15373
0
                                         XML_CHAR_ENCODING_NONE);
15374
0
    if (input == NULL) {
15375
0
        if (ioclose != NULL)
15376
0
            ioclose(ioctx);
15377
0
        return (NULL);
15378
0
    }
15379
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15380
0
    if (stream == NULL) {
15381
0
        xmlFreeParserInputBuffer(input);
15382
0
        return (NULL);
15383
0
    }
15384
0
    inputPush(ctxt, stream);
15385
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15386
0
}
15387