Coverage Report

Created: 2024-08-17 06:43

/src/libxml2/parser.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
 *            implemented on top of the SAX interfaces
4
 *
5
 * References:
6
 *   The XML specification:
7
 *     http://www.w3.org/TR/REC-xml
8
 *   Original 1.0 version:
9
 *     http://www.w3.org/TR/1998/REC-xml-19980210
10
 *   XML second edition working draft
11
 *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12
 *
13
 * Okay this is a big file, the parser core is around 7000 lines, then it
14
 * is followed by the progressive parser top routines, then the various
15
 * high level APIs to call the parser and a few miscellaneous functions.
16
 * A number of helper functions and deprecated ones have been moved to
17
 * parserInternals.c to reduce this file size.
18
 * As much as possible the functions are associated with their relative
19
 * production in the XML specification. A few productions defining the
20
 * different ranges of character are actually implanted either in
21
 * parserInternals.h or parserInternals.c
22
 * The DOM tree build is realized from the default SAX callbacks in
23
 * the module SAX.c.
24
 * The routines doing the validation checks are in valid.c and called either
25
 * from the SAX callbacks or as standalone functions using a preparsed
26
 * document.
27
 *
28
 * See Copyright for the status of this software.
29
 *
30
 * daniel@veillard.com
31
 */
32
33
/* To avoid EBCDIC trouble when parsing on zOS */
34
#if defined(__MVS__)
35
#pragma convert("ISO8859-1")
36
#endif
37
38
#define IN_LIBXML
39
#include "libxml.h"
40
41
#if defined(_WIN32)
42
#define XML_DIR_SEP '\\'
43
#else
44
#define XML_DIR_SEP '/'
45
#endif
46
47
#include <stdlib.h>
48
#include <limits.h>
49
#include <string.h>
50
#include <stdarg.h>
51
#include <stddef.h>
52
#include <ctype.h>
53
#include <stdlib.h>
54
#include <libxml/xmlmemory.h>
55
#include <libxml/threads.h>
56
#include <libxml/globals.h>
57
#include <libxml/tree.h>
58
#include <libxml/parser.h>
59
#include <libxml/parserInternals.h>
60
#include <libxml/HTMLparser.h>
61
#include <libxml/valid.h>
62
#include <libxml/entities.h>
63
#include <libxml/xmlerror.h>
64
#include <libxml/encoding.h>
65
#include <libxml/xmlIO.h>
66
#include <libxml/uri.h>
67
#ifdef LIBXML_CATALOG_ENABLED
68
#include <libxml/catalog.h>
69
#endif
70
#ifdef LIBXML_SCHEMAS_ENABLED
71
#include <libxml/xmlschemastypes.h>
72
#include <libxml/relaxng.h>
73
#endif
74
#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
75
#include <libxml/xpath.h>
76
#endif
77
78
#include "private/buf.h"
79
#include "private/dict.h"
80
#include "private/enc.h"
81
#include "private/entities.h"
82
#include "private/error.h"
83
#include "private/globals.h"
84
#include "private/html.h"
85
#include "private/io.h"
86
#include "private/memory.h"
87
#include "private/parser.h"
88
#include "private/threads.h"
89
#include "private/xpath.h"
90
91
struct _xmlStartTag {
92
    const xmlChar *prefix;
93
    const xmlChar *URI;
94
    int line;
95
    int nsNr;
96
};
97
98
static xmlParserCtxtPtr
99
xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
100
        const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
101
        xmlParserCtxtPtr pctx);
102
103
static void xmlHaltParser(xmlParserCtxtPtr ctxt);
104
105
static int
106
xmlParseElementStart(xmlParserCtxtPtr ctxt);
107
108
static void
109
xmlParseElementEnd(xmlParserCtxtPtr ctxt);
110
111
/************************************************************************
112
 *                  *
113
 *  Arbitrary limits set in the parser. See XML_PARSE_HUGE    *
114
 *                  *
115
 ************************************************************************/
116
117
49.4k
#define XML_MAX_HUGE_LENGTH 1000000000
118
119
#define XML_PARSER_BIG_ENTITY 1000
120
#define XML_PARSER_LOT_ENTITY 5000
121
122
/*
123
 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
124
 *    replacement over the size in byte of the input indicates that you have
125
 *    and exponential behaviour. A value of 10 correspond to at least 3 entity
126
 *    replacement per byte of input.
127
 */
128
136
#define XML_PARSER_NON_LINEAR 10
129
130
12.7M
#define XML_ENT_FIXED_COST 50
131
132
/**
133
 * xmlParserMaxDepth:
134
 *
135
 * arbitrary depth limit for the XML documents that we allow to
136
 * process. This is not a limitation of the parser but a safety
137
 * boundary feature. It can be disabled with the XML_PARSE_HUGE
138
 * parser option.
139
 */
140
unsigned int xmlParserMaxDepth = 256;
141
142
143
144
#define SAX2 1
145
14.7M
#define XML_PARSER_BIG_BUFFER_SIZE 300
146
875M
#define XML_PARSER_BUFFER_SIZE 100
147
105k
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
148
149
/**
150
 * XML_PARSER_CHUNK_SIZE
151
 *
152
 * When calling GROW that's the minimal amount of data
153
 * the parser expected to have received. It is not a hard
154
 * limit but an optimization when reading strings like Names
155
 * It is not strictly needed as long as inputs available characters
156
 * are followed by 0, which should be provided by the I/O level
157
 */
158
2.99M
#define XML_PARSER_CHUNK_SIZE 100
159
160
/*
161
 * List of XML prefixed PI allowed by W3C specs
162
 */
163
164
static const char* const xmlW3CPIs[] = {
165
    "xml-stylesheet",
166
    "xml-model",
167
    NULL
168
};
169
170
171
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
172
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
173
                                              const xmlChar **str);
174
175
static xmlParserErrors
176
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
177
                xmlSAXHandlerPtr sax,
178
          void *user_data, int depth, const xmlChar *URL,
179
          const xmlChar *ID, xmlNodePtr *list);
180
181
static int
182
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
183
                          const char *encoding);
184
#ifdef LIBXML_LEGACY_ENABLED
185
static void
186
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
187
                      xmlNodePtr lastNode);
188
#endif /* LIBXML_LEGACY_ENABLED */
189
190
static xmlParserErrors
191
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
192
          const xmlChar *string, void *user_data, xmlNodePtr *lst);
193
194
static int
195
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
196
197
/************************************************************************
198
 *                  *
199
 *    Some factorized error routines        *
200
 *                  *
201
 ************************************************************************/
202
203
/**
204
 * xmlErrAttributeDup:
205
 * @ctxt:  an XML parser context
206
 * @prefix:  the attribute prefix
207
 * @localname:  the attribute localname
208
 *
209
 * Handle a redefinition of attribute error
210
 */
211
static void
212
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
213
                   const xmlChar * localname)
214
117
{
215
117
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
216
117
        (ctxt->instate == XML_PARSER_EOF))
217
0
  return;
218
117
    if (ctxt != NULL)
219
117
  ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
220
221
117
    if (prefix == NULL)
222
92
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
223
92
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
224
92
                        (const char *) localname, NULL, NULL, 0, 0,
225
92
                        "Attribute %s redefined\n", localname);
226
25
    else
227
25
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
228
25
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
229
25
                        (const char *) prefix, (const char *) localname,
230
25
                        NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
231
25
                        localname);
232
117
    if (ctxt != NULL) {
233
117
  ctxt->wellFormed = 0;
234
117
  if (ctxt->recovery == 0)
235
68
      ctxt->disableSAX = 1;
236
117
    }
237
117
}
238
239
/**
240
 * xmlFatalErr:
241
 * @ctxt:  an XML parser context
242
 * @error:  the error number
243
 * @extra:  extra information string
244
 *
245
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
246
 */
247
static void
248
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
249
52.1k
{
250
52.1k
    const char *errmsg;
251
252
52.1k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
253
52.1k
        (ctxt->instate == XML_PARSER_EOF))
254
911
  return;
255
51.2k
    switch (error) {
256
181
        case XML_ERR_INVALID_HEX_CHARREF:
257
181
            errmsg = "CharRef: invalid hexadecimal value";
258
181
            break;
259
438
        case XML_ERR_INVALID_DEC_CHARREF:
260
438
            errmsg = "CharRef: invalid decimal value";
261
438
            break;
262
0
        case XML_ERR_INVALID_CHARREF:
263
0
            errmsg = "CharRef: invalid value";
264
0
            break;
265
12.9k
        case XML_ERR_INTERNAL_ERROR:
266
12.9k
            errmsg = "internal error";
267
12.9k
            break;
268
0
        case XML_ERR_PEREF_AT_EOF:
269
0
            errmsg = "PEReference at end of document";
270
0
            break;
271
0
        case XML_ERR_PEREF_IN_PROLOG:
272
0
            errmsg = "PEReference in prolog";
273
0
            break;
274
0
        case XML_ERR_PEREF_IN_EPILOG:
275
0
            errmsg = "PEReference in epilog";
276
0
            break;
277
0
        case XML_ERR_PEREF_NO_NAME:
278
0
            errmsg = "PEReference: no name";
279
0
            break;
280
184
        case XML_ERR_PEREF_SEMICOL_MISSING:
281
184
            errmsg = "PEReference: expecting ';'";
282
184
            break;
283
239
        case XML_ERR_ENTITY_LOOP:
284
239
            errmsg = "Detected an entity reference loop";
285
239
            break;
286
0
        case XML_ERR_ENTITY_NOT_STARTED:
287
0
            errmsg = "EntityValue: \" or ' expected";
288
0
            break;
289
11
        case XML_ERR_ENTITY_PE_INTERNAL:
290
11
            errmsg = "PEReferences forbidden in internal subset";
291
11
            break;
292
239
        case XML_ERR_ENTITY_NOT_FINISHED:
293
239
            errmsg = "EntityValue: \" or ' expected";
294
239
            break;
295
918
        case XML_ERR_ATTRIBUTE_NOT_STARTED:
296
918
            errmsg = "AttValue: \" or ' expected";
297
918
            break;
298
1.46k
        case XML_ERR_LT_IN_ATTRIBUTE:
299
1.46k
            errmsg = "Unescaped '<' not allowed in attributes values";
300
1.46k
            break;
301
242
        case XML_ERR_LITERAL_NOT_STARTED:
302
242
            errmsg = "SystemLiteral \" or ' expected";
303
242
            break;
304
250
        case XML_ERR_LITERAL_NOT_FINISHED:
305
250
            errmsg = "Unfinished System or Public ID \" or ' expected";
306
250
            break;
307
894
        case XML_ERR_MISPLACED_CDATA_END:
308
894
            errmsg = "Sequence ']]>' not allowed in content";
309
894
            break;
310
200
        case XML_ERR_URI_REQUIRED:
311
200
            errmsg = "SYSTEM or PUBLIC, the URI is missing";
312
200
            break;
313
42
        case XML_ERR_PUBID_REQUIRED:
314
42
            errmsg = "PUBLIC, the Public Identifier is missing";
315
42
            break;
316
412
        case XML_ERR_HYPHEN_IN_COMMENT:
317
412
            errmsg = "Comment must not contain '--' (double-hyphen)";
318
412
            break;
319
217
        case XML_ERR_PI_NOT_STARTED:
320
217
            errmsg = "xmlParsePI : no target name";
321
217
            break;
322
46
        case XML_ERR_RESERVED_XML_NAME:
323
46
            errmsg = "Invalid PI name";
324
46
            break;
325
13
        case XML_ERR_NOTATION_NOT_STARTED:
326
13
            errmsg = "NOTATION: Name expected here";
327
13
            break;
328
47
        case XML_ERR_NOTATION_NOT_FINISHED:
329
47
            errmsg = "'>' required to close NOTATION declaration";
330
47
            break;
331
431
        case XML_ERR_VALUE_REQUIRED:
332
431
            errmsg = "Entity value required";
333
431
            break;
334
15
        case XML_ERR_URI_FRAGMENT:
335
15
            errmsg = "Fragment not allowed";
336
15
            break;
337
361
        case XML_ERR_ATTLIST_NOT_STARTED:
338
361
            errmsg = "'(' required to start ATTLIST enumeration";
339
361
            break;
340
26
        case XML_ERR_NMTOKEN_REQUIRED:
341
26
            errmsg = "NmToken expected in ATTLIST enumeration";
342
26
            break;
343
68
        case XML_ERR_ATTLIST_NOT_FINISHED:
344
68
            errmsg = "')' required to finish ATTLIST enumeration";
345
68
            break;
346
123
        case XML_ERR_MIXED_NOT_STARTED:
347
123
            errmsg = "MixedContentDecl : '|' or ')*' expected";
348
123
            break;
349
0
        case XML_ERR_PCDATA_REQUIRED:
350
0
            errmsg = "MixedContentDecl : '#PCDATA' expected";
351
0
            break;
352
504
        case XML_ERR_ELEMCONTENT_NOT_STARTED:
353
504
            errmsg = "ContentDecl : Name or '(' expected";
354
504
            break;
355
466
        case XML_ERR_ELEMCONTENT_NOT_FINISHED:
356
466
            errmsg = "ContentDecl : ',' '|' or ')' expected";
357
466
            break;
358
0
        case XML_ERR_PEREF_IN_INT_SUBSET:
359
0
            errmsg =
360
0
                "PEReference: forbidden within markup decl in internal subset";
361
0
            break;
362
3.43k
        case XML_ERR_GT_REQUIRED:
363
3.43k
            errmsg = "expected '>'";
364
3.43k
            break;
365
12
        case XML_ERR_CONDSEC_INVALID:
366
12
            errmsg = "XML conditional section '[' expected";
367
12
            break;
368
1.49k
        case XML_ERR_EXT_SUBSET_NOT_FINISHED:
369
1.49k
            errmsg = "Content error in the external subset";
370
1.49k
            break;
371
50
        case XML_ERR_CONDSEC_INVALID_KEYWORD:
372
50
            errmsg =
373
50
                "conditional section INCLUDE or IGNORE keyword expected";
374
50
            break;
375
3
        case XML_ERR_CONDSEC_NOT_FINISHED:
376
3
            errmsg = "XML conditional section not closed";
377
3
            break;
378
3
        case XML_ERR_XMLDECL_NOT_STARTED:
379
3
            errmsg = "Text declaration '<?xml' required";
380
3
            break;
381
6.40k
        case XML_ERR_XMLDECL_NOT_FINISHED:
382
6.40k
            errmsg = "parsing XML declaration: '?>' expected";
383
6.40k
            break;
384
0
        case XML_ERR_EXT_ENTITY_STANDALONE:
385
0
            errmsg = "external parsed entities cannot be standalone";
386
0
            break;
387
1.04k
        case XML_ERR_ENTITYREF_SEMICOL_MISSING:
388
1.04k
            errmsg = "EntityRef: expecting ';'";
389
1.04k
            break;
390
1.71k
        case XML_ERR_DOCTYPE_NOT_FINISHED:
391
1.71k
            errmsg = "DOCTYPE improperly terminated";
392
1.71k
            break;
393
0
        case XML_ERR_LTSLASH_REQUIRED:
394
0
            errmsg = "EndTag: '</' not found";
395
0
            break;
396
478
        case XML_ERR_EQUAL_REQUIRED:
397
478
            errmsg = "expected '='";
398
478
            break;
399
1.42k
        case XML_ERR_STRING_NOT_CLOSED:
400
1.42k
            errmsg = "String not closed expecting \" or '";
401
1.42k
            break;
402
358
        case XML_ERR_STRING_NOT_STARTED:
403
358
            errmsg = "String not started expecting ' or \"";
404
358
            break;
405
45
        case XML_ERR_ENCODING_NAME:
406
45
            errmsg = "Invalid XML encoding name";
407
45
            break;
408
243
        case XML_ERR_STANDALONE_VALUE:
409
243
            errmsg = "standalone accepts only 'yes' or 'no'";
410
243
            break;
411
1.41k
        case XML_ERR_DOCUMENT_EMPTY:
412
1.41k
            errmsg = "Document is empty";
413
1.41k
            break;
414
8.50k
        case XML_ERR_DOCUMENT_END:
415
8.50k
            errmsg = "Extra content at the end of the document";
416
8.50k
            break;
417
57
        case XML_ERR_NOT_WELL_BALANCED:
418
57
            errmsg = "chunk is not well balanced";
419
57
            break;
420
0
        case XML_ERR_EXTRA_CONTENT:
421
0
            errmsg = "extra content at the end of well balanced chunk";
422
0
            break;
423
3.58k
        case XML_ERR_VERSION_MISSING:
424
3.58k
            errmsg = "Malformed declaration expecting version";
425
3.58k
            break;
426
0
        case XML_ERR_NAME_TOO_LONG:
427
0
            errmsg = "Name too long";
428
0
            break;
429
#if 0
430
        case:
431
            errmsg = "";
432
            break;
433
#endif
434
21
        default:
435
21
            errmsg = "Unregistered error message";
436
51.2k
    }
437
51.2k
    if (ctxt != NULL)
438
51.2k
  ctxt->errNo = error;
439
51.2k
    if (info == NULL) {
440
38.3k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
441
38.3k
                        XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
442
38.3k
                        errmsg);
443
38.3k
    } else {
444
12.9k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
445
12.9k
                        XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
446
12.9k
                        errmsg, info);
447
12.9k
    }
448
51.2k
    if (ctxt != NULL) {
449
51.2k
  ctxt->wellFormed = 0;
450
51.2k
  if (ctxt->recovery == 0)
451
24.8k
      ctxt->disableSAX = 1;
452
51.2k
    }
453
51.2k
}
454
455
/**
456
 * xmlFatalErrMsg:
457
 * @ctxt:  an XML parser context
458
 * @error:  the error number
459
 * @msg:  the error message
460
 *
461
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
462
 */
463
static void LIBXML_ATTR_FORMAT(3,0)
464
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
465
               const char *msg)
466
45.5k
{
467
45.5k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
468
45.5k
        (ctxt->instate == XML_PARSER_EOF))
469
20
  return;
470
45.5k
    if (ctxt != NULL)
471
45.5k
  ctxt->errNo = error;
472
45.5k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
473
45.5k
                    XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
474
45.5k
    if (ctxt != NULL) {
475
45.5k
  ctxt->wellFormed = 0;
476
45.5k
  if (ctxt->recovery == 0)
477
17.7k
      ctxt->disableSAX = 1;
478
45.5k
    }
479
45.5k
}
480
481
/**
482
 * xmlWarningMsg:
483
 * @ctxt:  an XML parser context
484
 * @error:  the error number
485
 * @msg:  the error message
486
 * @str1:  extra data
487
 * @str2:  extra data
488
 *
489
 * Handle a warning.
490
 */
491
static void LIBXML_ATTR_FORMAT(3,0)
492
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
493
              const char *msg, const xmlChar *str1, const xmlChar *str2)
494
24.1k
{
495
24.1k
    xmlStructuredErrorFunc schannel = NULL;
496
497
24.1k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
498
24.1k
        (ctxt->instate == XML_PARSER_EOF))
499
0
  return;
500
24.1k
    if ((ctxt != NULL) && (ctxt->sax != NULL) &&
501
24.1k
        (ctxt->sax->initialized == XML_SAX2_MAGIC))
502
22.2k
        schannel = ctxt->sax->serror;
503
24.1k
    if (ctxt != NULL) {
504
24.1k
        __xmlRaiseError(schannel,
505
24.1k
                    (ctxt->sax) ? ctxt->sax->warning : NULL,
506
24.1k
                    ctxt->userData,
507
24.1k
                    ctxt, NULL, XML_FROM_PARSER, error,
508
24.1k
                    XML_ERR_WARNING, NULL, 0,
509
24.1k
        (const char *) str1, (const char *) str2, NULL, 0, 0,
510
24.1k
        msg, (const char *) str1, (const char *) str2);
511
24.1k
    } else {
512
0
        __xmlRaiseError(schannel, NULL, NULL,
513
0
                    ctxt, NULL, XML_FROM_PARSER, error,
514
0
                    XML_ERR_WARNING, NULL, 0,
515
0
        (const char *) str1, (const char *) str2, NULL, 0, 0,
516
0
        msg, (const char *) str1, (const char *) str2);
517
0
    }
518
24.1k
}
519
520
/**
521
 * xmlValidityError:
522
 * @ctxt:  an XML parser context
523
 * @error:  the error number
524
 * @msg:  the error message
525
 * @str1:  extra data
526
 *
527
 * Handle a validity error.
528
 */
529
static void LIBXML_ATTR_FORMAT(3,0)
530
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
531
              const char *msg, const xmlChar *str1, const xmlChar *str2)
532
388
{
533
388
    xmlStructuredErrorFunc schannel = NULL;
534
535
388
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
536
388
        (ctxt->instate == XML_PARSER_EOF))
537
0
  return;
538
388
    if (ctxt != NULL) {
539
388
  ctxt->errNo = error;
540
388
  if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
541
360
      schannel = ctxt->sax->serror;
542
388
    }
543
388
    if (ctxt != NULL) {
544
388
        __xmlRaiseError(schannel,
545
388
                    ctxt->vctxt.error, ctxt->vctxt.userData,
546
388
                    ctxt, NULL, XML_FROM_DTD, error,
547
388
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
548
388
        (const char *) str2, NULL, 0, 0,
549
388
        msg, (const char *) str1, (const char *) str2);
550
388
  ctxt->valid = 0;
551
388
    } else {
552
0
        __xmlRaiseError(schannel, NULL, NULL,
553
0
                    ctxt, NULL, XML_FROM_DTD, error,
554
0
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
555
0
        (const char *) str2, NULL, 0, 0,
556
0
        msg, (const char *) str1, (const char *) str2);
557
0
    }
558
388
}
559
560
/**
561
 * xmlFatalErrMsgInt:
562
 * @ctxt:  an XML parser context
563
 * @error:  the error number
564
 * @msg:  the error message
565
 * @val:  an integer value
566
 *
567
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
568
 */
569
static void LIBXML_ATTR_FORMAT(3,0)
570
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
571
                  const char *msg, int val)
572
56.2k
{
573
56.2k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
574
56.2k
        (ctxt->instate == XML_PARSER_EOF))
575
0
  return;
576
56.2k
    if (ctxt != NULL)
577
56.2k
  ctxt->errNo = error;
578
56.2k
    __xmlRaiseError(NULL, NULL, NULL,
579
56.2k
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
580
56.2k
                    NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
581
56.2k
    if (ctxt != NULL) {
582
56.2k
  ctxt->wellFormed = 0;
583
56.2k
  if (ctxt->recovery == 0)
584
3.21k
      ctxt->disableSAX = 1;
585
56.2k
    }
586
56.2k
}
587
588
/**
589
 * xmlFatalErrMsgStrIntStr:
590
 * @ctxt:  an XML parser context
591
 * @error:  the error number
592
 * @msg:  the error message
593
 * @str1:  an string info
594
 * @val:  an integer value
595
 * @str2:  an string info
596
 *
597
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
598
 */
599
static void LIBXML_ATTR_FORMAT(3,0)
600
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
601
                  const char *msg, const xmlChar *str1, int val,
602
      const xmlChar *str2)
603
18.7k
{
604
18.7k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
605
18.7k
        (ctxt->instate == XML_PARSER_EOF))
606
0
  return;
607
18.7k
    if (ctxt != NULL)
608
18.7k
  ctxt->errNo = error;
609
18.7k
    __xmlRaiseError(NULL, NULL, NULL,
610
18.7k
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
611
18.7k
                    NULL, 0, (const char *) str1, (const char *) str2,
612
18.7k
        NULL, val, 0, msg, str1, val, str2);
613
18.7k
    if (ctxt != NULL) {
614
18.7k
  ctxt->wellFormed = 0;
615
18.7k
  if (ctxt->recovery == 0)
616
7.48k
      ctxt->disableSAX = 1;
617
18.7k
    }
618
18.7k
}
619
620
/**
621
 * xmlFatalErrMsgStr:
622
 * @ctxt:  an XML parser context
623
 * @error:  the error number
624
 * @msg:  the error message
625
 * @val:  a string value
626
 *
627
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
628
 */
629
static void LIBXML_ATTR_FORMAT(3,0)
630
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
631
                  const char *msg, const xmlChar * val)
632
2.25M
{
633
2.25M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
634
2.25M
        (ctxt->instate == XML_PARSER_EOF))
635
0
  return;
636
2.25M
    if (ctxt != NULL)
637
2.25M
  ctxt->errNo = error;
638
2.25M
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
639
2.25M
                    XML_FROM_PARSER, error, XML_ERR_FATAL,
640
2.25M
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
641
2.25M
                    val);
642
2.25M
    if (ctxt != NULL) {
643
2.25M
  ctxt->wellFormed = 0;
644
2.25M
  if (ctxt->recovery == 0)
645
1.11M
      ctxt->disableSAX = 1;
646
2.25M
    }
647
2.25M
}
648
649
/**
650
 * xmlErrMsgStr:
651
 * @ctxt:  an XML parser context
652
 * @error:  the error number
653
 * @msg:  the error message
654
 * @val:  a string value
655
 *
656
 * Handle a non fatal parser error
657
 */
658
static void LIBXML_ATTR_FORMAT(3,0)
659
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
660
                  const char *msg, const xmlChar * val)
661
1.38k
{
662
1.38k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
663
1.38k
        (ctxt->instate == XML_PARSER_EOF))
664
0
  return;
665
1.38k
    if (ctxt != NULL)
666
1.38k
  ctxt->errNo = error;
667
1.38k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
668
1.38k
                    XML_FROM_PARSER, error, XML_ERR_ERROR,
669
1.38k
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
670
1.38k
                    val);
671
1.38k
}
672
673
/**
674
 * xmlNsErr:
675
 * @ctxt:  an XML parser context
676
 * @error:  the error number
677
 * @msg:  the message
678
 * @info1:  extra information string
679
 * @info2:  extra information string
680
 *
681
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
682
 */
683
static void LIBXML_ATTR_FORMAT(3,0)
684
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
685
         const char *msg,
686
         const xmlChar * info1, const xmlChar * info2,
687
         const xmlChar * info3)
688
16.2k
{
689
16.2k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
690
16.2k
        (ctxt->instate == XML_PARSER_EOF))
691
3
  return;
692
16.2k
    if (ctxt != NULL)
693
16.2k
  ctxt->errNo = error;
694
16.2k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
695
16.2k
                    XML_ERR_ERROR, NULL, 0, (const char *) info1,
696
16.2k
                    (const char *) info2, (const char *) info3, 0, 0, msg,
697
16.2k
                    info1, info2, info3);
698
16.2k
    if (ctxt != NULL)
699
16.2k
  ctxt->nsWellFormed = 0;
700
16.2k
}
701
702
/**
703
 * xmlNsWarn
704
 * @ctxt:  an XML parser context
705
 * @error:  the error number
706
 * @msg:  the message
707
 * @info1:  extra information string
708
 * @info2:  extra information string
709
 *
710
 * Handle a namespace warning error
711
 */
712
static void LIBXML_ATTR_FORMAT(3,0)
713
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
714
         const char *msg,
715
         const xmlChar * info1, const xmlChar * info2,
716
         const xmlChar * info3)
717
77
{
718
77
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
719
77
        (ctxt->instate == XML_PARSER_EOF))
720
0
  return;
721
77
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
722
77
                    XML_ERR_WARNING, NULL, 0, (const char *) info1,
723
77
                    (const char *) info2, (const char *) info3, 0, 0, msg,
724
77
                    info1, info2, info3);
725
77
}
726
727
static void
728
49.8M
xmlSaturatedAdd(unsigned long *dst, unsigned long val) {
729
49.8M
    if (val > ULONG_MAX - *dst)
730
0
        *dst = ULONG_MAX;
731
49.8M
    else
732
49.8M
        *dst += val;
733
49.8M
}
734
735
static void
736
13.2M
xmlSaturatedAddSizeT(unsigned long *dst, unsigned long val) {
737
13.2M
    if (val > ULONG_MAX - *dst)
738
0
        *dst = ULONG_MAX;
739
13.2M
    else
740
13.2M
        *dst += val;
741
13.2M
}
742
743
/**
744
 * xmlParserEntityCheck:
745
 * @ctxt:  parser context
746
 * @extra:  sum of unexpanded entity sizes
747
 *
748
 * Check for non-linear entity expansion behaviour.
749
 *
750
 * In some cases like xmlStringDecodeEntities, this function is called
751
 * for each, possibly nested entity and its unexpanded content length.
752
 *
753
 * In other cases like xmlParseReference, it's only called for each
754
 * top-level entity with its unexpanded content length plus the sum of
755
 * the unexpanded content lengths (plus fixed cost) of all nested
756
 * entities.
757
 *
758
 * Summing the unexpanded lengths also adds the length of the reference.
759
 * This is by design. Taking the length of the entity name into account
760
 * discourages attacks that try to waste CPU time with abusively long
761
 * entity names. See test/recurse/lol6.xml for example. Each call also
762
 * adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with
763
 * short entities.
764
 *
765
 * Returns 1 on error, 0 on success.
766
 */
767
static int
768
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long extra)
769
12.7M
{
770
12.7M
    unsigned long consumed;
771
12.7M
    xmlParserInputPtr input = ctxt->input;
772
12.7M
    xmlEntityPtr entity = input->entity;
773
774
    /*
775
     * Compute total consumed bytes so far, including input streams of
776
     * external entities.
777
     */
778
12.7M
    consumed = input->parentConsumed;
779
12.7M
    if ((entity == NULL) ||
780
12.7M
        ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
781
11.1M
         ((entity->flags & XML_ENT_PARSED) == 0))) {
782
11.1M
        xmlSaturatedAdd(&consumed, input->consumed);
783
11.1M
        xmlSaturatedAddSizeT(&consumed, input->cur - input->base);
784
11.1M
    }
785
12.7M
    xmlSaturatedAdd(&consumed, ctxt->sizeentities);
786
787
    /*
788
     * Add extra cost and some fixed cost.
789
     */
790
12.7M
    xmlSaturatedAdd(&ctxt->sizeentcopy, extra);
791
12.7M
    xmlSaturatedAdd(&ctxt->sizeentcopy, XML_ENT_FIXED_COST);
792
793
    /*
794
     * It's important to always use saturation arithmetic when tracking
795
     * entity sizes to make the size checks reliable. If "sizeentcopy"
796
     * overflows, we have to abort.
797
     */
798
12.7M
    if ((ctxt->sizeentcopy > XML_MAX_TEXT_LENGTH) &&
799
12.7M
        ((ctxt->sizeentcopy >= ULONG_MAX) ||
800
136
         (ctxt->sizeentcopy / XML_PARSER_NON_LINEAR > consumed))) {
801
136
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP,
802
136
                       "Maximum entity amplification factor exceeded");
803
136
        xmlHaltParser(ctxt);
804
136
        return(1);
805
136
    }
806
807
12.7M
    return(0);
808
12.7M
}
809
810
/************************************************************************
811
 *                  *
812
 *    Library wide options          *
813
 *                  *
814
 ************************************************************************/
815
816
/**
817
  * xmlHasFeature:
818
  * @feature: the feature to be examined
819
  *
820
  * Examines if the library has been compiled with a given feature.
821
  *
822
  * Returns a non-zero value if the feature exist, otherwise zero.
823
  * Returns zero (0) if the feature does not exist or an unknown
824
  * unknown feature is requested, non-zero otherwise.
825
  */
826
int
827
xmlHasFeature(xmlFeature feature)
828
0
{
829
0
    switch (feature) {
830
0
  case XML_WITH_THREAD:
831
0
#ifdef LIBXML_THREAD_ENABLED
832
0
      return(1);
833
#else
834
      return(0);
835
#endif
836
0
        case XML_WITH_TREE:
837
0
#ifdef LIBXML_TREE_ENABLED
838
0
            return(1);
839
#else
840
            return(0);
841
#endif
842
0
        case XML_WITH_OUTPUT:
843
0
#ifdef LIBXML_OUTPUT_ENABLED
844
0
            return(1);
845
#else
846
            return(0);
847
#endif
848
0
        case XML_WITH_PUSH:
849
0
#ifdef LIBXML_PUSH_ENABLED
850
0
            return(1);
851
#else
852
            return(0);
853
#endif
854
0
        case XML_WITH_READER:
855
0
#ifdef LIBXML_READER_ENABLED
856
0
            return(1);
857
#else
858
            return(0);
859
#endif
860
0
        case XML_WITH_PATTERN:
861
0
#ifdef LIBXML_PATTERN_ENABLED
862
0
            return(1);
863
#else
864
            return(0);
865
#endif
866
0
        case XML_WITH_WRITER:
867
0
#ifdef LIBXML_WRITER_ENABLED
868
0
            return(1);
869
#else
870
            return(0);
871
#endif
872
0
        case XML_WITH_SAX1:
873
0
#ifdef LIBXML_SAX1_ENABLED
874
0
            return(1);
875
#else
876
            return(0);
877
#endif
878
0
        case XML_WITH_FTP:
879
#ifdef LIBXML_FTP_ENABLED
880
            return(1);
881
#else
882
0
            return(0);
883
0
#endif
884
0
        case XML_WITH_HTTP:
885
#ifdef LIBXML_HTTP_ENABLED
886
            return(1);
887
#else
888
0
            return(0);
889
0
#endif
890
0
        case XML_WITH_VALID:
891
0
#ifdef LIBXML_VALID_ENABLED
892
0
            return(1);
893
#else
894
            return(0);
895
#endif
896
0
        case XML_WITH_HTML:
897
0
#ifdef LIBXML_HTML_ENABLED
898
0
            return(1);
899
#else
900
            return(0);
901
#endif
902
0
        case XML_WITH_LEGACY:
903
#ifdef LIBXML_LEGACY_ENABLED
904
            return(1);
905
#else
906
0
            return(0);
907
0
#endif
908
0
        case XML_WITH_C14N:
909
0
#ifdef LIBXML_C14N_ENABLED
910
0
            return(1);
911
#else
912
            return(0);
913
#endif
914
0
        case XML_WITH_CATALOG:
915
0
#ifdef LIBXML_CATALOG_ENABLED
916
0
            return(1);
917
#else
918
            return(0);
919
#endif
920
0
        case XML_WITH_XPATH:
921
0
#ifdef LIBXML_XPATH_ENABLED
922
0
            return(1);
923
#else
924
            return(0);
925
#endif
926
0
        case XML_WITH_XPTR:
927
0
#ifdef LIBXML_XPTR_ENABLED
928
0
            return(1);
929
#else
930
            return(0);
931
#endif
932
0
        case XML_WITH_XINCLUDE:
933
0
#ifdef LIBXML_XINCLUDE_ENABLED
934
0
            return(1);
935
#else
936
            return(0);
937
#endif
938
0
        case XML_WITH_ICONV:
939
0
#ifdef LIBXML_ICONV_ENABLED
940
0
            return(1);
941
#else
942
            return(0);
943
#endif
944
0
        case XML_WITH_ISO8859X:
945
0
#ifdef LIBXML_ISO8859X_ENABLED
946
0
            return(1);
947
#else
948
            return(0);
949
#endif
950
0
        case XML_WITH_UNICODE:
951
0
#ifdef LIBXML_UNICODE_ENABLED
952
0
            return(1);
953
#else
954
            return(0);
955
#endif
956
0
        case XML_WITH_REGEXP:
957
0
#ifdef LIBXML_REGEXP_ENABLED
958
0
            return(1);
959
#else
960
            return(0);
961
#endif
962
0
        case XML_WITH_AUTOMATA:
963
0
#ifdef LIBXML_AUTOMATA_ENABLED
964
0
            return(1);
965
#else
966
            return(0);
967
#endif
968
0
        case XML_WITH_EXPR:
969
#ifdef LIBXML_EXPR_ENABLED
970
            return(1);
971
#else
972
0
            return(0);
973
0
#endif
974
0
        case XML_WITH_SCHEMAS:
975
0
#ifdef LIBXML_SCHEMAS_ENABLED
976
0
            return(1);
977
#else
978
            return(0);
979
#endif
980
0
        case XML_WITH_SCHEMATRON:
981
0
#ifdef LIBXML_SCHEMATRON_ENABLED
982
0
            return(1);
983
#else
984
            return(0);
985
#endif
986
0
        case XML_WITH_MODULES:
987
0
#ifdef LIBXML_MODULES_ENABLED
988
0
            return(1);
989
#else
990
            return(0);
991
#endif
992
0
        case XML_WITH_DEBUG:
993
#ifdef LIBXML_DEBUG_ENABLED
994
            return(1);
995
#else
996
0
            return(0);
997
0
#endif
998
0
        case XML_WITH_DEBUG_MEM:
999
#ifdef DEBUG_MEMORY_LOCATION
1000
            return(1);
1001
#else
1002
0
            return(0);
1003
0
#endif
1004
0
        case XML_WITH_DEBUG_RUN:
1005
0
            return(0);
1006
0
        case XML_WITH_ZLIB:
1007
0
#ifdef LIBXML_ZLIB_ENABLED
1008
0
            return(1);
1009
#else
1010
            return(0);
1011
#endif
1012
0
        case XML_WITH_LZMA:
1013
0
#ifdef LIBXML_LZMA_ENABLED
1014
0
            return(1);
1015
#else
1016
            return(0);
1017
#endif
1018
0
        case XML_WITH_ICU:
1019
#ifdef LIBXML_ICU_ENABLED
1020
            return(1);
1021
#else
1022
0
            return(0);
1023
0
#endif
1024
0
        default:
1025
0
      break;
1026
0
     }
1027
0
     return(0);
1028
0
}
1029
1030
/************************************************************************
1031
 *                  *
1032
 *    SAX2 defaulted attributes handling      *
1033
 *                  *
1034
 ************************************************************************/
1035
1036
/**
1037
 * xmlDetectSAX2:
1038
 * @ctxt:  an XML parser context
1039
 *
1040
 * Do the SAX2 detection and specific initialization
1041
 */
1042
static void
1043
60.3k
xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1044
60.3k
    xmlSAXHandlerPtr sax;
1045
1046
    /* Avoid unused variable warning if features are disabled. */
1047
60.3k
    (void) sax;
1048
1049
60.3k
    if (ctxt == NULL) return;
1050
60.3k
    sax = ctxt->sax;
1051
60.3k
#ifdef LIBXML_SAX1_ENABLED
1052
60.3k
    if ((sax) &&  (sax->initialized == XML_SAX2_MAGIC) &&
1053
60.3k
        ((sax->startElementNs != NULL) ||
1054
42.9k
         (sax->endElementNs != NULL) ||
1055
42.9k
         ((sax->startElement == NULL) && (sax->endElement == NULL))))
1056
42.9k
        ctxt->sax2 = 1;
1057
#else
1058
    ctxt->sax2 = 1;
1059
#endif /* LIBXML_SAX1_ENABLED */
1060
1061
60.3k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1062
60.3k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1063
60.3k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
1064
60.3k
    if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1065
60.3k
    (ctxt->str_xml_ns == NULL)) {
1066
0
        xmlErrMemory(ctxt, NULL);
1067
0
    }
1068
60.3k
}
1069
1070
typedef struct _xmlDefAttrs xmlDefAttrs;
1071
typedef xmlDefAttrs *xmlDefAttrsPtr;
1072
struct _xmlDefAttrs {
1073
    int nbAttrs;  /* number of defaulted attributes on that element */
1074
    int maxAttrs;       /* the size of the array */
1075
#if __STDC_VERSION__ >= 199901L
1076
    /* Using a C99 flexible array member avoids UBSan errors. */
1077
    const xmlChar *values[]; /* array of localname/prefix/values/external */
1078
#else
1079
    const xmlChar *values[5];
1080
#endif
1081
};
1082
1083
/**
1084
 * xmlAttrNormalizeSpace:
1085
 * @src: the source string
1086
 * @dst: the target string
1087
 *
1088
 * Normalize the space in non CDATA attribute values:
1089
 * If the attribute type is not CDATA, then the XML processor MUST further
1090
 * process the normalized attribute value by discarding any leading and
1091
 * trailing space (#x20) characters, and by replacing sequences of space
1092
 * (#x20) characters by a single space (#x20) character.
1093
 * Note that the size of dst need to be at least src, and if one doesn't need
1094
 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1095
 * passing src as dst is just fine.
1096
 *
1097
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1098
 *         is needed.
1099
 */
1100
static xmlChar *
1101
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1102
27.1k
{
1103
27.1k
    if ((src == NULL) || (dst == NULL))
1104
0
        return(NULL);
1105
1106
27.3k
    while (*src == 0x20) src++;
1107
136k
    while (*src != 0) {
1108
109k
  if (*src == 0x20) {
1109
8.80k
      while (*src == 0x20) src++;
1110
3.55k
      if (*src != 0)
1111
3.41k
    *dst++ = 0x20;
1112
105k
  } else {
1113
105k
      *dst++ = *src++;
1114
105k
  }
1115
109k
    }
1116
27.1k
    *dst = 0;
1117
27.1k
    if (dst == src)
1118
26.9k
       return(NULL);
1119
196
    return(dst);
1120
27.1k
}
1121
1122
/**
1123
 * xmlAttrNormalizeSpace2:
1124
 * @src: the source string
1125
 *
1126
 * Normalize the space in non CDATA attribute values, a slightly more complex
1127
 * front end to avoid allocation problems when running on attribute values
1128
 * coming from the input.
1129
 *
1130
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1131
 *         is needed.
1132
 */
1133
static const xmlChar *
1134
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
1135
362
{
1136
362
    int i;
1137
362
    int remove_head = 0;
1138
362
    int need_realloc = 0;
1139
362
    const xmlChar *cur;
1140
1141
362
    if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1142
0
        return(NULL);
1143
362
    i = *len;
1144
362
    if (i <= 0)
1145
16
        return(NULL);
1146
1147
346
    cur = src;
1148
370
    while (*cur == 0x20) {
1149
24
        cur++;
1150
24
  remove_head++;
1151
24
    }
1152
7.14k
    while (*cur != 0) {
1153
6.83k
  if (*cur == 0x20) {
1154
715
      cur++;
1155
715
      if ((*cur == 0x20) || (*cur == 0)) {
1156
38
          need_realloc = 1;
1157
38
    break;
1158
38
      }
1159
715
  } else
1160
6.11k
      cur++;
1161
6.83k
    }
1162
346
    if (need_realloc) {
1163
38
        xmlChar *ret;
1164
1165
38
  ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1166
38
  if (ret == NULL) {
1167
0
      xmlErrMemory(ctxt, NULL);
1168
0
      return(NULL);
1169
0
  }
1170
38
  xmlAttrNormalizeSpace(ret, ret);
1171
38
  *len = strlen((const char *)ret);
1172
38
        return(ret);
1173
308
    } else if (remove_head) {
1174
0
        *len -= remove_head;
1175
0
        memmove(src, src + remove_head, 1 + *len);
1176
0
  return(src);
1177
0
    }
1178
308
    return(NULL);
1179
346
}
1180
1181
/**
1182
 * xmlAddDefAttrs:
1183
 * @ctxt:  an XML parser context
1184
 * @fullname:  the element fullname
1185
 * @fullattr:  the attribute fullname
1186
 * @value:  the attribute value
1187
 *
1188
 * Add a defaulted attribute for an element
1189
 */
1190
static void
1191
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1192
               const xmlChar *fullname,
1193
               const xmlChar *fullattr,
1194
65.4k
               const xmlChar *value) {
1195
65.4k
    xmlDefAttrsPtr defaults;
1196
65.4k
    int len;
1197
65.4k
    const xmlChar *name;
1198
65.4k
    const xmlChar *prefix;
1199
1200
    /*
1201
     * Allows to detect attribute redefinitions
1202
     */
1203
65.4k
    if (ctxt->attsSpecial != NULL) {
1204
63.3k
        if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1205
15
      return;
1206
63.3k
    }
1207
1208
65.4k
    if (ctxt->attsDefault == NULL) {
1209
4.64k
        ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1210
4.64k
  if (ctxt->attsDefault == NULL)
1211
0
      goto mem_error;
1212
4.64k
    }
1213
1214
    /*
1215
     * split the element name into prefix:localname , the string found
1216
     * are within the DTD and then not associated to namespace names.
1217
     */
1218
65.4k
    name = xmlSplitQName3(fullname, &len);
1219
65.4k
    if (name == NULL) {
1220
64.2k
        name = xmlDictLookup(ctxt->dict, fullname, -1);
1221
64.2k
  prefix = NULL;
1222
64.2k
    } else {
1223
1.22k
        name = xmlDictLookup(ctxt->dict, name, -1);
1224
1.22k
  prefix = xmlDictLookup(ctxt->dict, fullname, len);
1225
1.22k
    }
1226
1227
    /*
1228
     * make sure there is some storage
1229
     */
1230
65.4k
    defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1231
65.4k
    if (defaults == NULL) {
1232
30.2k
        defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1233
30.2k
                     (4 * 5) * sizeof(const xmlChar *));
1234
30.2k
  if (defaults == NULL)
1235
0
      goto mem_error;
1236
30.2k
  defaults->nbAttrs = 0;
1237
30.2k
  defaults->maxAttrs = 4;
1238
30.2k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1239
30.2k
                          defaults, NULL) < 0) {
1240
0
      xmlFree(defaults);
1241
0
      goto mem_error;
1242
0
  }
1243
35.2k
    } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1244
344
        xmlDefAttrsPtr temp;
1245
1246
344
        temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1247
344
           (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1248
344
  if (temp == NULL)
1249
0
      goto mem_error;
1250
344
  defaults = temp;
1251
344
  defaults->maxAttrs *= 2;
1252
344
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1253
344
                          defaults, NULL) < 0) {
1254
0
      xmlFree(defaults);
1255
0
      goto mem_error;
1256
0
  }
1257
344
    }
1258
1259
    /*
1260
     * Split the element name into prefix:localname , the string found
1261
     * are within the DTD and hen not associated to namespace names.
1262
     */
1263
65.4k
    name = xmlSplitQName3(fullattr, &len);
1264
65.4k
    if (name == NULL) {
1265
45.9k
        name = xmlDictLookup(ctxt->dict, fullattr, -1);
1266
45.9k
  prefix = NULL;
1267
45.9k
    } else {
1268
19.5k
        name = xmlDictLookup(ctxt->dict, name, -1);
1269
19.5k
  prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1270
19.5k
    }
1271
1272
65.4k
    defaults->values[5 * defaults->nbAttrs] = name;
1273
65.4k
    defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1274
    /* intern the string and precompute the end */
1275
65.4k
    len = xmlStrlen(value);
1276
65.4k
    value = xmlDictLookup(ctxt->dict, value, len);
1277
65.4k
    if (value == NULL)
1278
0
        goto mem_error;
1279
65.4k
    defaults->values[5 * defaults->nbAttrs + 2] = value;
1280
65.4k
    defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1281
65.4k
    if (ctxt->external)
1282
53.5k
        defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1283
11.8k
    else
1284
11.8k
        defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1285
65.4k
    defaults->nbAttrs++;
1286
1287
65.4k
    return;
1288
1289
0
mem_error:
1290
0
    xmlErrMemory(ctxt, NULL);
1291
0
    return;
1292
65.4k
}
1293
1294
/**
1295
 * xmlAddSpecialAttr:
1296
 * @ctxt:  an XML parser context
1297
 * @fullname:  the element fullname
1298
 * @fullattr:  the attribute fullname
1299
 * @type:  the attribute type
1300
 *
1301
 * Register this attribute type
1302
 */
1303
static void
1304
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1305
      const xmlChar *fullname,
1306
      const xmlChar *fullattr,
1307
      int type)
1308
883k
{
1309
883k
    if (ctxt->attsSpecial == NULL) {
1310
5.44k
        ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1311
5.44k
  if (ctxt->attsSpecial == NULL)
1312
0
      goto mem_error;
1313
5.44k
    }
1314
1315
883k
    if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1316
103
        return;
1317
1318
883k
    xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1319
883k
                     (void *) (ptrdiff_t) type);
1320
883k
    return;
1321
1322
0
mem_error:
1323
0
    xmlErrMemory(ctxt, NULL);
1324
0
    return;
1325
883k
}
1326
1327
/**
1328
 * xmlCleanSpecialAttrCallback:
1329
 *
1330
 * Removes CDATA attributes from the special attribute table
1331
 */
1332
static void
1333
xmlCleanSpecialAttrCallback(void *payload, void *data,
1334
                            const xmlChar *fullname, const xmlChar *fullattr,
1335
834k
                            const xmlChar *unused ATTRIBUTE_UNUSED) {
1336
834k
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1337
1338
834k
    if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1339
293k
        xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1340
293k
    }
1341
834k
}
1342
1343
/**
1344
 * xmlCleanSpecialAttr:
1345
 * @ctxt:  an XML parser context
1346
 *
1347
 * Trim the list of attributes defined to remove all those of type
1348
 * CDATA as they are not special. This call should be done when finishing
1349
 * to parse the DTD and before starting to parse the document root.
1350
 */
1351
static void
1352
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1353
16.2k
{
1354
16.2k
    if (ctxt->attsSpecial == NULL)
1355
11.7k
        return;
1356
1357
4.50k
    xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1358
1359
4.50k
    if (xmlHashSize(ctxt->attsSpecial) == 0) {
1360
437
        xmlHashFree(ctxt->attsSpecial, NULL);
1361
437
        ctxt->attsSpecial = NULL;
1362
437
    }
1363
4.50k
    return;
1364
16.2k
}
1365
1366
/**
1367
 * xmlCheckLanguageID:
1368
 * @lang:  pointer to the string value
1369
 *
1370
 * Checks that the value conforms to the LanguageID production:
1371
 *
1372
 * NOTE: this is somewhat deprecated, those productions were removed from
1373
 *       the XML Second edition.
1374
 *
1375
 * [33] LanguageID ::= Langcode ('-' Subcode)*
1376
 * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1377
 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1378
 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1379
 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1380
 * [38] Subcode ::= ([a-z] | [A-Z])+
1381
 *
1382
 * The current REC reference the successors of RFC 1766, currently 5646
1383
 *
1384
 * http://www.rfc-editor.org/rfc/rfc5646.txt
1385
 * langtag       = language
1386
 *                 ["-" script]
1387
 *                 ["-" region]
1388
 *                 *("-" variant)
1389
 *                 *("-" extension)
1390
 *                 ["-" privateuse]
1391
 * language      = 2*3ALPHA            ; shortest ISO 639 code
1392
 *                 ["-" extlang]       ; sometimes followed by
1393
 *                                     ; extended language subtags
1394
 *               / 4ALPHA              ; or reserved for future use
1395
 *               / 5*8ALPHA            ; or registered language subtag
1396
 *
1397
 * extlang       = 3ALPHA              ; selected ISO 639 codes
1398
 *                 *2("-" 3ALPHA)      ; permanently reserved
1399
 *
1400
 * script        = 4ALPHA              ; ISO 15924 code
1401
 *
1402
 * region        = 2ALPHA              ; ISO 3166-1 code
1403
 *               / 3DIGIT              ; UN M.49 code
1404
 *
1405
 * variant       = 5*8alphanum         ; registered variants
1406
 *               / (DIGIT 3alphanum)
1407
 *
1408
 * extension     = singleton 1*("-" (2*8alphanum))
1409
 *
1410
 *                                     ; Single alphanumerics
1411
 *                                     ; "x" reserved for private use
1412
 * singleton     = DIGIT               ; 0 - 9
1413
 *               / %x41-57             ; A - W
1414
 *               / %x59-5A             ; Y - Z
1415
 *               / %x61-77             ; a - w
1416
 *               / %x79-7A             ; y - z
1417
 *
1418
 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1419
 * The parser below doesn't try to cope with extension or privateuse
1420
 * that could be added but that's not interoperable anyway
1421
 *
1422
 * Returns 1 if correct 0 otherwise
1423
 **/
1424
int
1425
xmlCheckLanguageID(const xmlChar * lang)
1426
0
{
1427
0
    const xmlChar *cur = lang, *nxt;
1428
1429
0
    if (cur == NULL)
1430
0
        return (0);
1431
0
    if (((cur[0] == 'i') && (cur[1] == '-')) ||
1432
0
        ((cur[0] == 'I') && (cur[1] == '-')) ||
1433
0
        ((cur[0] == 'x') && (cur[1] == '-')) ||
1434
0
        ((cur[0] == 'X') && (cur[1] == '-'))) {
1435
        /*
1436
         * Still allow IANA code and user code which were coming
1437
         * from the previous version of the XML-1.0 specification
1438
         * it's deprecated but we should not fail
1439
         */
1440
0
        cur += 2;
1441
0
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1442
0
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
1443
0
            cur++;
1444
0
        return(cur[0] == 0);
1445
0
    }
1446
0
    nxt = cur;
1447
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1448
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1449
0
           nxt++;
1450
0
    if (nxt - cur >= 4) {
1451
        /*
1452
         * Reserved
1453
         */
1454
0
        if ((nxt - cur > 8) || (nxt[0] != 0))
1455
0
            return(0);
1456
0
        return(1);
1457
0
    }
1458
0
    if (nxt - cur < 2)
1459
0
        return(0);
1460
    /* we got an ISO 639 code */
1461
0
    if (nxt[0] == 0)
1462
0
        return(1);
1463
0
    if (nxt[0] != '-')
1464
0
        return(0);
1465
1466
0
    nxt++;
1467
0
    cur = nxt;
1468
    /* now we can have extlang or script or region or variant */
1469
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1470
0
        goto region_m49;
1471
1472
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1473
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1474
0
           nxt++;
1475
0
    if (nxt - cur == 4)
1476
0
        goto script;
1477
0
    if (nxt - cur == 2)
1478
0
        goto region;
1479
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1480
0
        goto variant;
1481
0
    if (nxt - cur != 3)
1482
0
        return(0);
1483
    /* we parsed an extlang */
1484
0
    if (nxt[0] == 0)
1485
0
        return(1);
1486
0
    if (nxt[0] != '-')
1487
0
        return(0);
1488
1489
0
    nxt++;
1490
0
    cur = nxt;
1491
    /* now we can have script or region or variant */
1492
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1493
0
        goto region_m49;
1494
1495
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1496
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1497
0
           nxt++;
1498
0
    if (nxt - cur == 2)
1499
0
        goto region;
1500
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1501
0
        goto variant;
1502
0
    if (nxt - cur != 4)
1503
0
        return(0);
1504
    /* we parsed a script */
1505
0
script:
1506
0
    if (nxt[0] == 0)
1507
0
        return(1);
1508
0
    if (nxt[0] != '-')
1509
0
        return(0);
1510
1511
0
    nxt++;
1512
0
    cur = nxt;
1513
    /* now we can have region or variant */
1514
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1515
0
        goto region_m49;
1516
1517
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1518
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1519
0
           nxt++;
1520
1521
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1522
0
        goto variant;
1523
0
    if (nxt - cur != 2)
1524
0
        return(0);
1525
    /* we parsed a region */
1526
0
region:
1527
0
    if (nxt[0] == 0)
1528
0
        return(1);
1529
0
    if (nxt[0] != '-')
1530
0
        return(0);
1531
1532
0
    nxt++;
1533
0
    cur = nxt;
1534
    /* now we can just have a variant */
1535
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1536
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1537
0
           nxt++;
1538
1539
0
    if ((nxt - cur < 5) || (nxt - cur > 8))
1540
0
        return(0);
1541
1542
    /* we parsed a variant */
1543
0
variant:
1544
0
    if (nxt[0] == 0)
1545
0
        return(1);
1546
0
    if (nxt[0] != '-')
1547
0
        return(0);
1548
    /* extensions and private use subtags not checked */
1549
0
    return (1);
1550
1551
0
region_m49:
1552
0
    if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1553
0
        ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1554
0
        nxt += 3;
1555
0
        goto region;
1556
0
    }
1557
0
    return(0);
1558
0
}
1559
1560
/************************************************************************
1561
 *                  *
1562
 *    Parser stacks related functions and macros    *
1563
 *                  *
1564
 ************************************************************************/
1565
1566
static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1567
                                            const xmlChar ** str);
1568
1569
#ifdef SAX2
1570
/**
1571
 * nsPush:
1572
 * @ctxt:  an XML parser context
1573
 * @prefix:  the namespace prefix or NULL
1574
 * @URL:  the namespace name
1575
 *
1576
 * Pushes a new parser namespace on top of the ns stack
1577
 *
1578
 * Returns -1 in case of error, -2 if the namespace should be discarded
1579
 *     and the index in the stack otherwise.
1580
 */
1581
static int
1582
nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1583
4.64k
{
1584
4.64k
    if (ctxt->options & XML_PARSE_NSCLEAN) {
1585
495
        int i;
1586
815
  for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1587
336
      if (ctxt->nsTab[i] == prefix) {
1588
    /* in scope */
1589
16
          if (ctxt->nsTab[i + 1] == URL)
1590
6
        return(-2);
1591
    /* out of scope keep it */
1592
10
    break;
1593
16
      }
1594
336
  }
1595
495
    }
1596
4.63k
    if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1597
2.77k
  ctxt->nsMax = 10;
1598
2.77k
  ctxt->nsNr = 0;
1599
2.77k
  ctxt->nsTab = (const xmlChar **)
1600
2.77k
                xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1601
2.77k
  if (ctxt->nsTab == NULL) {
1602
0
      xmlErrMemory(ctxt, NULL);
1603
0
      ctxt->nsMax = 0;
1604
0
            return (-1);
1605
0
  }
1606
2.77k
    } else if (ctxt->nsNr >= ctxt->nsMax) {
1607
15
        const xmlChar ** tmp;
1608
15
        ctxt->nsMax *= 2;
1609
15
        tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1610
15
            ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1611
15
        if (tmp == NULL) {
1612
0
            xmlErrMemory(ctxt, NULL);
1613
0
      ctxt->nsMax /= 2;
1614
0
            return (-1);
1615
0
        }
1616
15
  ctxt->nsTab = tmp;
1617
15
    }
1618
4.63k
    ctxt->nsTab[ctxt->nsNr++] = prefix;
1619
4.63k
    ctxt->nsTab[ctxt->nsNr++] = URL;
1620
4.63k
    return (ctxt->nsNr);
1621
4.63k
}
1622
/**
1623
 * nsPop:
1624
 * @ctxt: an XML parser context
1625
 * @nr:  the number to pop
1626
 *
1627
 * Pops the top @nr parser prefix/namespace from the ns stack
1628
 *
1629
 * Returns the number of namespaces removed
1630
 */
1631
static int
1632
nsPop(xmlParserCtxtPtr ctxt, int nr)
1633
1.56k
{
1634
1.56k
    int i;
1635
1636
1.56k
    if (ctxt->nsTab == NULL) return(0);
1637
1.56k
    if (ctxt->nsNr < nr) {
1638
0
        xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1639
0
        nr = ctxt->nsNr;
1640
0
    }
1641
1.56k
    if (ctxt->nsNr <= 0)
1642
0
        return (0);
1643
1644
6.25k
    for (i = 0;i < nr;i++) {
1645
4.68k
         ctxt->nsNr--;
1646
4.68k
   ctxt->nsTab[ctxt->nsNr] = NULL;
1647
4.68k
    }
1648
1.56k
    return(nr);
1649
1.56k
}
1650
#endif
1651
1652
static int
1653
6.94k
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1654
6.94k
    const xmlChar **atts;
1655
6.94k
    int *attallocs;
1656
6.94k
    int maxatts;
1657
1658
6.94k
    if (nr + 5 > ctxt->maxatts) {
1659
6.94k
  maxatts = ctxt->maxatts == 0 ? 55 : (nr + 5) * 2;
1660
6.94k
  atts = (const xmlChar **) xmlMalloc(
1661
6.94k
             maxatts * sizeof(const xmlChar *));
1662
6.94k
  if (atts == NULL) goto mem_error;
1663
6.94k
  attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1664
6.94k
                               (maxatts / 5) * sizeof(int));
1665
6.94k
  if (attallocs == NULL) {
1666
0
            xmlFree(atts);
1667
0
            goto mem_error;
1668
0
        }
1669
6.94k
        if (ctxt->maxatts > 0)
1670
8
            memcpy(atts, ctxt->atts, ctxt->maxatts * sizeof(const xmlChar *));
1671
6.94k
        xmlFree(ctxt->atts);
1672
6.94k
  ctxt->atts = atts;
1673
6.94k
  ctxt->attallocs = attallocs;
1674
6.94k
  ctxt->maxatts = maxatts;
1675
6.94k
    }
1676
6.94k
    return(ctxt->maxatts);
1677
0
mem_error:
1678
0
    xmlErrMemory(ctxt, NULL);
1679
0
    return(-1);
1680
6.94k
}
1681
1682
/**
1683
 * inputPush:
1684
 * @ctxt:  an XML parser context
1685
 * @value:  the parser input
1686
 *
1687
 * Pushes a new parser input on top of the input stack
1688
 *
1689
 * Returns -1 in case of error, the index in the stack otherwise
1690
 */
1691
int
1692
inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1693
1.56M
{
1694
1.56M
    if ((ctxt == NULL) || (value == NULL))
1695
0
        return(-1);
1696
1.56M
    if (ctxt->inputNr >= ctxt->inputMax) {
1697
18
        size_t newSize = ctxt->inputMax * 2;
1698
18
        xmlParserInputPtr *tmp;
1699
1700
18
        tmp = (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1701
18
                                               newSize * sizeof(*tmp));
1702
18
        if (tmp == NULL) {
1703
0
            xmlErrMemory(ctxt, NULL);
1704
0
            return (-1);
1705
0
        }
1706
18
        ctxt->inputTab = tmp;
1707
18
        ctxt->inputMax = newSize;
1708
18
    }
1709
1.56M
    ctxt->inputTab[ctxt->inputNr] = value;
1710
1.56M
    ctxt->input = value;
1711
1.56M
    return (ctxt->inputNr++);
1712
1.56M
}
1713
/**
1714
 * inputPop:
1715
 * @ctxt: an XML parser context
1716
 *
1717
 * Pops the top parser input from the input stack
1718
 *
1719
 * Returns the input just removed
1720
 */
1721
xmlParserInputPtr
1722
inputPop(xmlParserCtxtPtr ctxt)
1723
1.66M
{
1724
1.66M
    xmlParserInputPtr ret;
1725
1726
1.66M
    if (ctxt == NULL)
1727
0
        return(NULL);
1728
1.66M
    if (ctxt->inputNr <= 0)
1729
102k
        return (NULL);
1730
1.56M
    ctxt->inputNr--;
1731
1.56M
    if (ctxt->inputNr > 0)
1732
1.51M
        ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1733
49.9k
    else
1734
49.9k
        ctxt->input = NULL;
1735
1.56M
    ret = ctxt->inputTab[ctxt->inputNr];
1736
1.56M
    ctxt->inputTab[ctxt->inputNr] = NULL;
1737
1.56M
    return (ret);
1738
1.66M
}
1739
/**
1740
 * nodePush:
1741
 * @ctxt:  an XML parser context
1742
 * @value:  the element node
1743
 *
1744
 * Pushes a new element node on top of the node stack
1745
 *
1746
 * Returns -1 in case of error, the index in the stack otherwise
1747
 */
1748
int
1749
nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1750
1.22M
{
1751
1.22M
    if (ctxt == NULL) return(0);
1752
1.22M
    if (ctxt->nodeNr >= ctxt->nodeMax) {
1753
46
        xmlNodePtr *tmp;
1754
1755
46
  tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1756
46
                                      ctxt->nodeMax * 2 *
1757
46
                                      sizeof(ctxt->nodeTab[0]));
1758
46
        if (tmp == NULL) {
1759
0
            xmlErrMemory(ctxt, NULL);
1760
0
            return (-1);
1761
0
        }
1762
46
        ctxt->nodeTab = tmp;
1763
46
  ctxt->nodeMax *= 2;
1764
46
    }
1765
1.22M
    if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1766
1.22M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1767
0
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1768
0
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1769
0
        xmlParserMaxDepth);
1770
0
  xmlHaltParser(ctxt);
1771
0
  return(-1);
1772
0
    }
1773
1.22M
    ctxt->nodeTab[ctxt->nodeNr] = value;
1774
1.22M
    ctxt->node = value;
1775
1.22M
    return (ctxt->nodeNr++);
1776
1.22M
}
1777
1778
/**
1779
 * nodePop:
1780
 * @ctxt: an XML parser context
1781
 *
1782
 * Pops the top element node from the node stack
1783
 *
1784
 * Returns the node just removed
1785
 */
1786
xmlNodePtr
1787
nodePop(xmlParserCtxtPtr ctxt)
1788
1.19M
{
1789
1.19M
    xmlNodePtr ret;
1790
1791
1.19M
    if (ctxt == NULL) return(NULL);
1792
1.19M
    if (ctxt->nodeNr <= 0)
1793
4.38k
        return (NULL);
1794
1.19M
    ctxt->nodeNr--;
1795
1.19M
    if (ctxt->nodeNr > 0)
1796
1.17M
        ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1797
19.5k
    else
1798
19.5k
        ctxt->node = NULL;
1799
1.19M
    ret = ctxt->nodeTab[ctxt->nodeNr];
1800
1.19M
    ctxt->nodeTab[ctxt->nodeNr] = NULL;
1801
1.19M
    return (ret);
1802
1.19M
}
1803
1804
/**
1805
 * nameNsPush:
1806
 * @ctxt:  an XML parser context
1807
 * @value:  the element name
1808
 * @prefix:  the element prefix
1809
 * @URI:  the element namespace name
1810
 * @line:  the current line number for error messages
1811
 * @nsNr:  the number of namespaces pushed on the namespace table
1812
 *
1813
 * Pushes a new element name/prefix/URL on top of the name stack
1814
 *
1815
 * Returns -1 in case of error, the index in the stack otherwise
1816
 */
1817
static int
1818
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1819
           const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
1820
1.34M
{
1821
1.34M
    xmlStartTag *tag;
1822
1823
1.34M
    if (ctxt->nameNr >= ctxt->nameMax) {
1824
144
        const xmlChar * *tmp;
1825
144
        xmlStartTag *tmp2;
1826
144
        ctxt->nameMax *= 2;
1827
144
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1828
144
                                    ctxt->nameMax *
1829
144
                                    sizeof(ctxt->nameTab[0]));
1830
144
        if (tmp == NULL) {
1831
0
      ctxt->nameMax /= 2;
1832
0
      goto mem_error;
1833
0
        }
1834
144
  ctxt->nameTab = tmp;
1835
144
        tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
1836
144
                                    ctxt->nameMax *
1837
144
                                    sizeof(ctxt->pushTab[0]));
1838
144
        if (tmp2 == NULL) {
1839
0
      ctxt->nameMax /= 2;
1840
0
      goto mem_error;
1841
0
        }
1842
144
  ctxt->pushTab = tmp2;
1843
1.34M
    } else if (ctxt->pushTab == NULL) {
1844
25.7k
        ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
1845
25.7k
                                            sizeof(ctxt->pushTab[0]));
1846
25.7k
        if (ctxt->pushTab == NULL)
1847
0
            goto mem_error;
1848
25.7k
    }
1849
1.34M
    ctxt->nameTab[ctxt->nameNr] = value;
1850
1.34M
    ctxt->name = value;
1851
1.34M
    tag = &ctxt->pushTab[ctxt->nameNr];
1852
1.34M
    tag->prefix = prefix;
1853
1.34M
    tag->URI = URI;
1854
1.34M
    tag->line = line;
1855
1.34M
    tag->nsNr = nsNr;
1856
1.34M
    return (ctxt->nameNr++);
1857
0
mem_error:
1858
0
    xmlErrMemory(ctxt, NULL);
1859
0
    return (-1);
1860
1.34M
}
1861
#ifdef LIBXML_PUSH_ENABLED
1862
/**
1863
 * nameNsPop:
1864
 * @ctxt: an XML parser context
1865
 *
1866
 * Pops the top element/prefix/URI name from the name stack
1867
 *
1868
 * Returns the name just removed
1869
 */
1870
static const xmlChar *
1871
nameNsPop(xmlParserCtxtPtr ctxt)
1872
330k
{
1873
330k
    const xmlChar *ret;
1874
1875
330k
    if (ctxt->nameNr <= 0)
1876
0
        return (NULL);
1877
330k
    ctxt->nameNr--;
1878
330k
    if (ctxt->nameNr > 0)
1879
327k
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1880
2.63k
    else
1881
2.63k
        ctxt->name = NULL;
1882
330k
    ret = ctxt->nameTab[ctxt->nameNr];
1883
330k
    ctxt->nameTab[ctxt->nameNr] = NULL;
1884
330k
    return (ret);
1885
330k
}
1886
#endif /* LIBXML_PUSH_ENABLED */
1887
1888
/**
1889
 * namePush:
1890
 * @ctxt:  an XML parser context
1891
 * @value:  the element name
1892
 *
1893
 * Pushes a new element name on top of the name stack
1894
 *
1895
 * Returns -1 in case of error, the index in the stack otherwise
1896
 */
1897
int
1898
namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1899
0
{
1900
0
    if (ctxt == NULL) return (-1);
1901
1902
0
    if (ctxt->nameNr >= ctxt->nameMax) {
1903
0
        const xmlChar * *tmp;
1904
0
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1905
0
                                    ctxt->nameMax * 2 *
1906
0
                                    sizeof(ctxt->nameTab[0]));
1907
0
        if (tmp == NULL) {
1908
0
      goto mem_error;
1909
0
        }
1910
0
  ctxt->nameTab = tmp;
1911
0
        ctxt->nameMax *= 2;
1912
0
    }
1913
0
    ctxt->nameTab[ctxt->nameNr] = value;
1914
0
    ctxt->name = value;
1915
0
    return (ctxt->nameNr++);
1916
0
mem_error:
1917
0
    xmlErrMemory(ctxt, NULL);
1918
0
    return (-1);
1919
0
}
1920
/**
1921
 * namePop:
1922
 * @ctxt: an XML parser context
1923
 *
1924
 * Pops the top element name from the name stack
1925
 *
1926
 * Returns the name just removed
1927
 */
1928
const xmlChar *
1929
namePop(xmlParserCtxtPtr ctxt)
1930
986k
{
1931
986k
    const xmlChar *ret;
1932
1933
986k
    if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1934
0
        return (NULL);
1935
986k
    ctxt->nameNr--;
1936
986k
    if (ctxt->nameNr > 0)
1937
977k
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1938
8.76k
    else
1939
8.76k
        ctxt->name = NULL;
1940
986k
    ret = ctxt->nameTab[ctxt->nameNr];
1941
986k
    ctxt->nameTab[ctxt->nameNr] = NULL;
1942
986k
    return (ret);
1943
986k
}
1944
1945
1.41M
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1946
1.41M
    if (ctxt->spaceNr >= ctxt->spaceMax) {
1947
2.50k
        int *tmp;
1948
1949
2.50k
  ctxt->spaceMax *= 2;
1950
2.50k
        tmp = (int *) xmlRealloc(ctxt->spaceTab,
1951
2.50k
                           ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1952
2.50k
        if (tmp == NULL) {
1953
0
      xmlErrMemory(ctxt, NULL);
1954
0
      ctxt->spaceMax /=2;
1955
0
      return(-1);
1956
0
  }
1957
2.50k
  ctxt->spaceTab = tmp;
1958
2.50k
    }
1959
1.41M
    ctxt->spaceTab[ctxt->spaceNr] = val;
1960
1.41M
    ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1961
1.41M
    return(ctxt->spaceNr++);
1962
1.41M
}
1963
1964
1.39M
static int spacePop(xmlParserCtxtPtr ctxt) {
1965
1.39M
    int ret;
1966
1.39M
    if (ctxt->spaceNr <= 0) return(0);
1967
1.39M
    ctxt->spaceNr--;
1968
1.39M
    if (ctxt->spaceNr > 0)
1969
1.39M
  ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1970
3.72k
    else
1971
3.72k
        ctxt->space = &ctxt->spaceTab[0];
1972
1.39M
    ret = ctxt->spaceTab[ctxt->spaceNr];
1973
1.39M
    ctxt->spaceTab[ctxt->spaceNr] = -1;
1974
1.39M
    return(ret);
1975
1.39M
}
1976
1977
/*
1978
 * Macros for accessing the content. Those should be used only by the parser,
1979
 * and not exported.
1980
 *
1981
 * Dirty macros, i.e. one often need to make assumption on the context to
1982
 * use them
1983
 *
1984
 *   CUR_PTR return the current pointer to the xmlChar to be parsed.
1985
 *           To be used with extreme caution since operations consuming
1986
 *           characters may move the input buffer to a different location !
1987
 *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
1988
 *           This should be used internally by the parser
1989
 *           only to compare to ASCII values otherwise it would break when
1990
 *           running with UTF-8 encoding.
1991
 *   RAW     same as CUR but in the input buffer, bypass any token
1992
 *           extraction that may have been done
1993
 *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
1994
 *           to compare on ASCII based substring.
1995
 *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1996
 *           strings without newlines within the parser.
1997
 *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1998
 *           defined char within the parser.
1999
 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2000
 *
2001
 *   NEXT    Skip to the next character, this does the proper decoding
2002
 *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2003
 *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2004
 *   CUR_CHAR(l) returns the current unicode character (int), set l
2005
 *           to the number of xmlChars used for the encoding [0-5].
2006
 *   CUR_SCHAR  same but operate on a string instead of the context
2007
 *   COPY_BUF  copy the current unicode char to the target buffer, increment
2008
 *            the index
2009
 *   GROW, SHRINK  handling of input buffers
2010
 */
2011
2012
42.1M
#define RAW (*ctxt->input->cur)
2013
40.2M
#define CUR (*ctxt->input->cur)
2014
21.2M
#define NXT(val) ctxt->input->cur[(val)]
2015
1.29M
#define CUR_PTR ctxt->input->cur
2016
16.1k
#define BASE_PTR ctxt->input->base
2017
2018
#define CMP4( s, c1, c2, c3, c4 ) \
2019
21.5M
  ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2020
10.7M
    ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2021
#define CMP5( s, c1, c2, c3, c4, c5 ) \
2022
19.2M
  ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2023
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2024
15.5M
  ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2025
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2026
12.4M
  ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2027
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2028
8.89M
  ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2029
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2030
3.53M
  ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2031
3.53M
    ((unsigned char *) s)[ 8 ] == c9 )
2032
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2033
19.8k
  ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2034
19.8k
    ((unsigned char *) s)[ 9 ] == c10 )
2035
2036
10.1M
#define SKIP(val) do {             \
2037
10.1M
    ctxt->input->cur += (val),ctxt->input->col+=(val);      \
2038
10.1M
    if (*ctxt->input->cur == 0)           \
2039
10.1M
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);     \
2040
10.1M
  } while (0)
2041
2042
4.62k
#define SKIPL(val) do {             \
2043
4.62k
    int skipl;                \
2044
1.19M
    for(skipl=0; skipl<val; skipl++) {         \
2045
1.19M
  if (*(ctxt->input->cur) == '\n') {       \
2046
6.16k
  ctxt->input->line++; ctxt->input->col = 1;      \
2047
1.18M
  } else ctxt->input->col++;         \
2048
1.19M
  ctxt->input->cur++;           \
2049
1.19M
    }                  \
2050
4.62k
    if (*ctxt->input->cur == 0)           \
2051
4.62k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);     \
2052
4.62k
  } while (0)
2053
2054
11.4M
#define SHRINK if ((ctxt->progressive == 0) &&       \
2055
11.4M
       (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2056
11.4M
       (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2057
11.4M
  xmlSHRINK (ctxt);
2058
2059
96.1k
static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2060
    /* Don't shrink memory buffers. */
2061
96.1k
    if ((ctxt->input->buf) &&
2062
96.1k
        ((ctxt->input->buf->encoder) || (ctxt->input->buf->readcallback)))
2063
464
        xmlParserInputShrink(ctxt->input);
2064
96.1k
    if (*ctxt->input->cur == 0)
2065
1.27k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2066
96.1k
}
2067
2068
55.7M
#define GROW if ((ctxt->progressive == 0) &&       \
2069
55.7M
     (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2070
55.7M
  xmlGROW (ctxt);
2071
2072
11.2M
static void xmlGROW (xmlParserCtxtPtr ctxt) {
2073
11.2M
    ptrdiff_t curEnd = ctxt->input->end - ctxt->input->cur;
2074
11.2M
    ptrdiff_t curBase = ctxt->input->cur - ctxt->input->base;
2075
2076
11.2M
    if (((curEnd > XML_MAX_LOOKUP_LIMIT) ||
2077
11.2M
         (curBase > XML_MAX_LOOKUP_LIMIT)) &&
2078
11.2M
         ((ctxt->input->buf) &&
2079
0
          (ctxt->input->buf->readcallback != NULL)) &&
2080
11.2M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2081
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
2082
0
        xmlHaltParser(ctxt);
2083
0
  return;
2084
0
    }
2085
11.2M
    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2086
11.2M
    if ((ctxt->input->cur > ctxt->input->end) ||
2087
11.2M
        (ctxt->input->cur < ctxt->input->base)) {
2088
0
        xmlHaltParser(ctxt);
2089
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "cur index out of bound");
2090
0
  return;
2091
0
    }
2092
11.2M
    if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0))
2093
388k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2094
11.2M
}
2095
2096
18.7M
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2097
2098
68.2M
#define NEXT xmlNextChar(ctxt)
2099
2100
3.27M
#define NEXT1 {               \
2101
3.27M
  ctxt->input->col++;           \
2102
3.27M
  ctxt->input->cur++;           \
2103
3.27M
  if (*ctxt->input->cur == 0)         \
2104
3.27M
      xmlParserInputGrow(ctxt->input, INPUT_CHUNK);   \
2105
3.27M
    }
2106
2107
23.6M
#define NEXTL(l) do {             \
2108
23.6M
    if (*(ctxt->input->cur) == '\n') {         \
2109
233k
  ctxt->input->line++; ctxt->input->col = 1;      \
2110
23.3M
    } else ctxt->input->col++;           \
2111
23.6M
    ctxt->input->cur += l;        \
2112
23.6M
  } while (0)
2113
2114
24.4M
#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2115
366M
#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2116
2117
#define COPY_BUF(l,b,i,v)           \
2118
361M
    if (l == 1) b[i++] = v;           \
2119
361M
    else i += xmlCopyCharMultiByte(&b[i],v)
2120
2121
/**
2122
 * xmlSkipBlankChars:
2123
 * @ctxt:  the XML parser context
2124
 *
2125
 * skip all blanks character found at that point in the input streams.
2126
 * It pops up finished entities in the process if allowable at that point.
2127
 *
2128
 * Returns the number of space chars skipped
2129
 */
2130
2131
int
2132
18.7M
xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2133
18.7M
    int res = 0;
2134
2135
    /*
2136
     * It's Okay to use CUR/NEXT here since all the blanks are on
2137
     * the ASCII range.
2138
     */
2139
18.7M
    if (((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) ||
2140
18.7M
        (ctxt->instate == XML_PARSER_START)) {
2141
4.65M
  const xmlChar *cur;
2142
  /*
2143
   * if we are in the document content, go really fast
2144
   */
2145
4.65M
  cur = ctxt->input->cur;
2146
4.65M
  while (IS_BLANK_CH(*cur)) {
2147
1.08M
      if (*cur == '\n') {
2148
90.2k
    ctxt->input->line++; ctxt->input->col = 1;
2149
995k
      } else {
2150
995k
    ctxt->input->col++;
2151
995k
      }
2152
1.08M
      cur++;
2153
1.08M
      if (res < INT_MAX)
2154
1.08M
    res++;
2155
1.08M
      if (*cur == 0) {
2156
7.76k
    ctxt->input->cur = cur;
2157
7.76k
    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2158
7.76k
    cur = ctxt->input->cur;
2159
7.76k
      }
2160
1.08M
  }
2161
4.65M
  ctxt->input->cur = cur;
2162
14.0M
    } else {
2163
14.0M
        int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2164
2165
74.5M
  while (ctxt->instate != XML_PARSER_EOF) {
2166
74.5M
            if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2167
57.4M
    NEXT;
2168
57.4M
      } else if (CUR == '%') {
2169
                /*
2170
                 * Need to handle support of entities branching here
2171
                 */
2172
1.71M
          if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2173
186k
                    break;
2174
1.53M
          xmlParsePEReference(ctxt);
2175
15.3M
            } else if (CUR == 0) {
2176
1.51M
                unsigned long consumed;
2177
1.51M
                xmlEntityPtr ent;
2178
2179
1.51M
                if (ctxt->inputNr <= 1)
2180
3.84k
                    break;
2181
2182
1.51M
                consumed = ctxt->input->consumed;
2183
1.51M
                xmlSaturatedAddSizeT(&consumed,
2184
1.51M
                                     ctxt->input->cur - ctxt->input->base);
2185
2186
                /*
2187
                 * Add to sizeentities when parsing an external entity
2188
                 * for the first time.
2189
                 */
2190
1.51M
                ent = ctxt->input->entity;
2191
1.51M
                if ((ent->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2192
1.51M
                    ((ent->flags & XML_ENT_PARSED) == 0)) {
2193
437
                    ent->flags |= XML_ENT_PARSED;
2194
2195
437
                    xmlSaturatedAdd(&ctxt->sizeentities, consumed);
2196
437
                }
2197
2198
1.51M
                xmlParserEntityCheck(ctxt, consumed);
2199
2200
1.51M
                xmlPopInput(ctxt);
2201
13.8M
            } else {
2202
13.8M
                break;
2203
13.8M
            }
2204
2205
            /*
2206
             * Also increase the counter when entering or exiting a PERef.
2207
             * The spec says: "When a parameter-entity reference is recognized
2208
             * in the DTD and included, its replacement text MUST be enlarged
2209
             * by the attachment of one leading and one following space (#x20)
2210
             * character."
2211
             */
2212
60.5M
      if (res < INT_MAX)
2213
60.5M
    res++;
2214
60.5M
        }
2215
14.0M
    }
2216
18.7M
    return(res);
2217
18.7M
}
2218
2219
/************************************************************************
2220
 *                  *
2221
 *    Commodity functions to handle entities      *
2222
 *                  *
2223
 ************************************************************************/
2224
2225
/**
2226
 * xmlPopInput:
2227
 * @ctxt:  an XML parser context
2228
 *
2229
 * xmlPopInput: the current input pointed by ctxt->input came to an end
2230
 *          pop it and return the next char.
2231
 *
2232
 * Returns the current xmlChar in the parser context
2233
 */
2234
xmlChar
2235
1.51M
xmlPopInput(xmlParserCtxtPtr ctxt) {
2236
1.51M
    xmlParserInputPtr input;
2237
2238
1.51M
    if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2239
1.51M
    if (xmlParserDebugEntities)
2240
0
  xmlGenericError(xmlGenericErrorContext,
2241
0
    "Popping input %d\n", ctxt->inputNr);
2242
1.51M
    if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2243
1.51M
        (ctxt->instate != XML_PARSER_EOF))
2244
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2245
0
                    "Unfinished entity outside the DTD");
2246
1.51M
    input = inputPop(ctxt);
2247
1.51M
    if (input->entity != NULL)
2248
1.51M
        input->entity->flags &= ~XML_ENT_EXPANDING;
2249
1.51M
    xmlFreeInputStream(input);
2250
1.51M
    if (*ctxt->input->cur == 0)
2251
476k
        xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2252
1.51M
    return(CUR);
2253
1.51M
}
2254
2255
/**
2256
 * xmlPushInput:
2257
 * @ctxt:  an XML parser context
2258
 * @input:  an XML parser input fragment (entity, XML fragment ...).
2259
 *
2260
 * xmlPushInput: switch to a new input stream which is stacked on top
2261
 *               of the previous one(s).
2262
 * Returns -1 in case of error or the index in the input stack
2263
 */
2264
int
2265
1.51M
xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2266
1.51M
    int ret;
2267
1.51M
    if (input == NULL) return(-1);
2268
2269
1.51M
    if (xmlParserDebugEntities) {
2270
0
  if ((ctxt->input != NULL) && (ctxt->input->filename))
2271
0
      xmlGenericError(xmlGenericErrorContext,
2272
0
        "%s(%d): ", ctxt->input->filename,
2273
0
        ctxt->input->line);
2274
0
  xmlGenericError(xmlGenericErrorContext,
2275
0
    "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2276
0
    }
2277
1.51M
    if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2278
1.51M
        (ctxt->inputNr > 100)) {
2279
0
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2280
0
        while (ctxt->inputNr > 1)
2281
0
            xmlFreeInputStream(inputPop(ctxt));
2282
0
  return(-1);
2283
0
    }
2284
1.51M
    ret = inputPush(ctxt, input);
2285
1.51M
    if (ctxt->instate == XML_PARSER_EOF)
2286
0
        return(-1);
2287
1.51M
    GROW;
2288
1.51M
    return(ret);
2289
1.51M
}
2290
2291
/**
2292
 * xmlParseCharRef:
2293
 * @ctxt:  an XML parser context
2294
 *
2295
 * DEPRECATED: Internal function, don't use.
2296
 *
2297
 * Parse a numeric character reference. Always consumes '&'.
2298
 *
2299
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2300
 *                  '&#x' [0-9a-fA-F]+ ';'
2301
 *
2302
 * [ WFC: Legal Character ]
2303
 * Characters referred to using character references must match the
2304
 * production for Char.
2305
 *
2306
 * Returns the value parsed (as an int), 0 in case of error
2307
 */
2308
int
2309
2.26k
xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2310
2.26k
    int val = 0;
2311
2.26k
    int count = 0;
2312
2313
    /*
2314
     * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2315
     */
2316
2.26k
    if ((RAW == '&') && (NXT(1) == '#') &&
2317
2.26k
        (NXT(2) == 'x')) {
2318
549
  SKIP(3);
2319
549
  GROW;
2320
1.41k
  while (RAW != ';') { /* loop blocked by count */
2321
1.03k
      if (count++ > 20) {
2322
12
    count = 0;
2323
12
    GROW;
2324
12
                if (ctxt->instate == XML_PARSER_EOF)
2325
0
                    return(0);
2326
12
      }
2327
1.03k
      if ((RAW >= '0') && (RAW <= '9'))
2328
397
          val = val * 16 + (CUR - '0');
2329
636
      else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2330
408
          val = val * 16 + (CUR - 'a') + 10;
2331
228
      else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2332
61
          val = val * 16 + (CUR - 'A') + 10;
2333
167
      else {
2334
167
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2335
167
    val = 0;
2336
167
    break;
2337
167
      }
2338
866
      if (val > 0x110000)
2339
168
          val = 0x110000;
2340
2341
866
      NEXT;
2342
866
      count++;
2343
866
  }
2344
549
  if (RAW == ';') {
2345
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2346
382
      ctxt->input->col++;
2347
382
      ctxt->input->cur++;
2348
382
  }
2349
1.71k
    } else if  ((RAW == '&') && (NXT(1) == '#')) {
2350
1.71k
  SKIP(2);
2351
1.71k
  GROW;
2352
15.1k
  while (RAW != ';') { /* loop blocked by count */
2353
13.7k
      if (count++ > 20) {
2354
864
    count = 0;
2355
864
    GROW;
2356
864
                if (ctxt->instate == XML_PARSER_EOF)
2357
0
                    return(0);
2358
864
      }
2359
13.7k
      if ((RAW >= '0') && (RAW <= '9'))
2360
13.4k
          val = val * 10 + (CUR - '0');
2361
337
      else {
2362
337
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2363
337
    val = 0;
2364
337
    break;
2365
337
      }
2366
13.4k
      if (val > 0x110000)
2367
9.50k
          val = 0x110000;
2368
2369
13.4k
      NEXT;
2370
13.4k
      count++;
2371
13.4k
  }
2372
1.71k
  if (RAW == ';') {
2373
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2374
1.38k
      ctxt->input->col++;
2375
1.38k
      ctxt->input->cur++;
2376
1.38k
  }
2377
1.71k
    } else {
2378
0
        if (RAW == '&')
2379
0
            SKIP(1);
2380
0
        xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2381
0
    }
2382
2383
    /*
2384
     * [ WFC: Legal Character ]
2385
     * Characters referred to using character references must match the
2386
     * production for Char.
2387
     */
2388
2.26k
    if (val >= 0x110000) {
2389
29
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2390
29
                "xmlParseCharRef: character reference out of bounds\n",
2391
29
          val);
2392
2.23k
    } else if (IS_CHAR(val)) {
2393
1.68k
        return(val);
2394
1.68k
    } else {
2395
550
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2396
550
                          "xmlParseCharRef: invalid xmlChar value %d\n",
2397
550
                    val);
2398
550
    }
2399
579
    return(0);
2400
2.26k
}
2401
2402
/**
2403
 * xmlParseStringCharRef:
2404
 * @ctxt:  an XML parser context
2405
 * @str:  a pointer to an index in the string
2406
 *
2407
 * parse Reference declarations, variant parsing from a string rather
2408
 * than an an input flow.
2409
 *
2410
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2411
 *                  '&#x' [0-9a-fA-F]+ ';'
2412
 *
2413
 * [ WFC: Legal Character ]
2414
 * Characters referred to using character references must match the
2415
 * production for Char.
2416
 *
2417
 * Returns the value parsed (as an int), 0 in case of error, str will be
2418
 *         updated to the current value of the index
2419
 */
2420
static int
2421
38.4k
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2422
38.4k
    const xmlChar *ptr;
2423
38.4k
    xmlChar cur;
2424
38.4k
    int val = 0;
2425
2426
38.4k
    if ((str == NULL) || (*str == NULL)) return(0);
2427
38.4k
    ptr = *str;
2428
38.4k
    cur = *ptr;
2429
38.4k
    if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2430
145
  ptr += 3;
2431
145
  cur = *ptr;
2432
421
  while (cur != ';') { /* Non input consuming loop */
2433
290
      if ((cur >= '0') && (cur <= '9'))
2434
228
          val = val * 16 + (cur - '0');
2435
62
      else if ((cur >= 'a') && (cur <= 'f'))
2436
15
          val = val * 16 + (cur - 'a') + 10;
2437
47
      else if ((cur >= 'A') && (cur <= 'F'))
2438
33
          val = val * 16 + (cur - 'A') + 10;
2439
14
      else {
2440
14
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2441
14
    val = 0;
2442
14
    break;
2443
14
      }
2444
276
      if (val > 0x110000)
2445
0
          val = 0x110000;
2446
2447
276
      ptr++;
2448
276
      cur = *ptr;
2449
276
  }
2450
145
  if (cur == ';')
2451
131
      ptr++;
2452
38.2k
    } else if  ((cur == '&') && (ptr[1] == '#')){
2453
38.2k
  ptr += 2;
2454
38.2k
  cur = *ptr;
2455
132k
  while (cur != ';') { /* Non input consuming loops */
2456
94.0k
      if ((cur >= '0') && (cur <= '9'))
2457
93.9k
          val = val * 10 + (cur - '0');
2458
101
      else {
2459
101
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2460
101
    val = 0;
2461
101
    break;
2462
101
      }
2463
93.9k
      if (val > 0x110000)
2464
210
          val = 0x110000;
2465
2466
93.9k
      ptr++;
2467
93.9k
      cur = *ptr;
2468
93.9k
  }
2469
38.2k
  if (cur == ';')
2470
38.1k
      ptr++;
2471
38.2k
    } else {
2472
0
  xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2473
0
  return(0);
2474
0
    }
2475
38.4k
    *str = ptr;
2476
2477
    /*
2478
     * [ WFC: Legal Character ]
2479
     * Characters referred to using character references must match the
2480
     * production for Char.
2481
     */
2482
38.4k
    if (val >= 0x110000) {
2483
18
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2484
18
                "xmlParseStringCharRef: character reference out of bounds\n",
2485
18
                val);
2486
38.3k
    } else if (IS_CHAR(val)) {
2487
38.2k
        return(val);
2488
38.2k
    } else {
2489
127
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2490
127
        "xmlParseStringCharRef: invalid xmlChar value %d\n",
2491
127
        val);
2492
127
    }
2493
145
    return(0);
2494
38.4k
}
2495
2496
/**
2497
 * xmlParserHandlePEReference:
2498
 * @ctxt:  the parser context
2499
 *
2500
 * [69] PEReference ::= '%' Name ';'
2501
 *
2502
 * [ WFC: No Recursion ]
2503
 * A parsed entity must not contain a recursive
2504
 * reference to itself, either directly or indirectly.
2505
 *
2506
 * [ WFC: Entity Declared ]
2507
 * In a document without any DTD, a document with only an internal DTD
2508
 * subset which contains no parameter entity references, or a document
2509
 * with "standalone='yes'", ...  ... The declaration of a parameter
2510
 * entity must precede any reference to it...
2511
 *
2512
 * [ VC: Entity Declared ]
2513
 * In a document with an external subset or external parameter entities
2514
 * with "standalone='no'", ...  ... The declaration of a parameter entity
2515
 * must precede any reference to it...
2516
 *
2517
 * [ WFC: In DTD ]
2518
 * Parameter-entity references may only appear in the DTD.
2519
 * NOTE: misleading but this is handled.
2520
 *
2521
 * A PEReference may have been detected in the current input stream
2522
 * the handling is done accordingly to
2523
 *      http://www.w3.org/TR/REC-xml#entproc
2524
 * i.e.
2525
 *   - Included in literal in entity values
2526
 *   - Included as Parameter Entity reference within DTDs
2527
 */
2528
void
2529
0
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2530
0
    switch(ctxt->instate) {
2531
0
  case XML_PARSER_CDATA_SECTION:
2532
0
      return;
2533
0
        case XML_PARSER_COMMENT:
2534
0
      return;
2535
0
  case XML_PARSER_START_TAG:
2536
0
      return;
2537
0
  case XML_PARSER_END_TAG:
2538
0
      return;
2539
0
        case XML_PARSER_EOF:
2540
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2541
0
      return;
2542
0
        case XML_PARSER_PROLOG:
2543
0
  case XML_PARSER_START:
2544
0
  case XML_PARSER_MISC:
2545
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2546
0
      return;
2547
0
  case XML_PARSER_ENTITY_DECL:
2548
0
        case XML_PARSER_CONTENT:
2549
0
        case XML_PARSER_ATTRIBUTE_VALUE:
2550
0
        case XML_PARSER_PI:
2551
0
  case XML_PARSER_SYSTEM_LITERAL:
2552
0
  case XML_PARSER_PUBLIC_LITERAL:
2553
      /* we just ignore it there */
2554
0
      return;
2555
0
        case XML_PARSER_EPILOG:
2556
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2557
0
      return;
2558
0
  case XML_PARSER_ENTITY_VALUE:
2559
      /*
2560
       * NOTE: in the case of entity values, we don't do the
2561
       *       substitution here since we need the literal
2562
       *       entity value to be able to save the internal
2563
       *       subset of the document.
2564
       *       This will be handled by xmlStringDecodeEntities
2565
       */
2566
0
      return;
2567
0
        case XML_PARSER_DTD:
2568
      /*
2569
       * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2570
       * In the internal DTD subset, parameter-entity references
2571
       * can occur only where markup declarations can occur, not
2572
       * within markup declarations.
2573
       * In that case this is handled in xmlParseMarkupDecl
2574
       */
2575
0
      if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2576
0
    return;
2577
0
      if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2578
0
    return;
2579
0
            break;
2580
0
        case XML_PARSER_IGNORE:
2581
0
            return;
2582
0
    }
2583
2584
0
    xmlParsePEReference(ctxt);
2585
0
}
2586
2587
/*
2588
 * Macro used to grow the current buffer.
2589
 * buffer##_size is expected to be a size_t
2590
 * mem_error: is expected to handle memory allocation failures
2591
 */
2592
202k
#define growBuffer(buffer, n) {           \
2593
202k
    xmlChar *tmp;             \
2594
202k
    size_t new_size = buffer##_size * 2 + n;                            \
2595
202k
    if (new_size < buffer##_size) goto mem_error;                       \
2596
202k
    tmp = (xmlChar *) xmlRealloc(buffer, new_size);                     \
2597
202k
    if (tmp == NULL) goto mem_error;         \
2598
202k
    buffer = tmp;             \
2599
202k
    buffer##_size = new_size;                                           \
2600
202k
}
2601
2602
/**
2603
 * xmlStringDecodeEntitiesInt:
2604
 * @ctxt:  the parser context
2605
 * @str:  the input string
2606
 * @len: the string length
2607
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2608
 * @end:  an end marker xmlChar, 0 if none
2609
 * @end2:  an end marker xmlChar, 0 if none
2610
 * @end3:  an end marker xmlChar, 0 if none
2611
 * @check:  whether to perform entity checks
2612
 */
2613
static xmlChar *
2614
xmlStringDecodeEntitiesInt(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2615
               int what, xmlChar end, xmlChar  end2, xmlChar end3,
2616
11.4M
                           int check) {
2617
11.4M
    xmlChar *buffer = NULL;
2618
11.4M
    size_t buffer_size = 0;
2619
11.4M
    size_t nbchars = 0;
2620
2621
11.4M
    xmlChar *current = NULL;
2622
11.4M
    xmlChar *rep = NULL;
2623
11.4M
    const xmlChar *last;
2624
11.4M
    xmlEntityPtr ent;
2625
11.4M
    int c,l;
2626
2627
11.4M
    if (str == NULL)
2628
24
        return(NULL);
2629
11.4M
    last = str + len;
2630
2631
11.4M
    if (((ctxt->depth > 40) &&
2632
11.4M
         ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2633
11.4M
  (ctxt->depth > 100)) {
2634
0
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP,
2635
0
                       "Maximum entity nesting depth exceeded");
2636
0
  return(NULL);
2637
0
    }
2638
2639
    /*
2640
     * allocate a translation buffer.
2641
     */
2642
11.4M
    buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2643
11.4M
    buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2644
11.4M
    if (buffer == NULL) goto mem_error;
2645
2646
    /*
2647
     * OK loop until we reach one of the ending char or a size limit.
2648
     * we are operating on already parsed values.
2649
     */
2650
11.4M
    if (str < last)
2651
11.3M
  c = CUR_SCHAR(str, l);
2652
76.8k
    else
2653
76.8k
        c = 0;
2654
227M
    while ((c != 0) && (c != end) && /* non input consuming loop */
2655
227M
           (c != end2) && (c != end3) &&
2656
227M
           (ctxt->instate != XML_PARSER_EOF)) {
2657
2658
216M
  if (c == 0) break;
2659
216M
        if ((c == '&') && (str[1] == '#')) {
2660
38.4k
      int val = xmlParseStringCharRef(ctxt, &str);
2661
38.4k
      if (val == 0)
2662
145
                goto int_error;
2663
38.2k
      COPY_BUF(0,buffer,nbchars,val);
2664
38.2k
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2665
0
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2666
0
      }
2667
216M
  } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2668
13.5M
      if (xmlParserDebugEntities)
2669
0
    xmlGenericError(xmlGenericErrorContext,
2670
0
      "String decoding Entity Reference: %.30s\n",
2671
0
      str);
2672
13.5M
      ent = xmlParseStringEntityRef(ctxt, &str);
2673
13.5M
      if ((ent != NULL) &&
2674
13.5M
    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2675
28
    if (ent->content != NULL) {
2676
28
        COPY_BUF(0,buffer,nbchars,ent->content[0]);
2677
28
        if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2678
0
      growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2679
0
        }
2680
28
    } else {
2681
0
        xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2682
0
          "predefined entity has no content\n");
2683
0
                    goto int_error;
2684
0
    }
2685
13.5M
      } else if ((ent != NULL) && (ent->content != NULL)) {
2686
10.8M
          if ((check) && (xmlParserEntityCheck(ctxt, ent->length)))
2687
74
                    goto int_error;
2688
2689
10.8M
                if (ent->flags & XML_ENT_EXPANDING) {
2690
191
              xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2691
191
                    xmlHaltParser(ctxt);
2692
191
                    ent->content[0] = 0;
2693
191
                    goto int_error;
2694
191
                }
2695
2696
10.8M
                ent->flags |= XML_ENT_EXPANDING;
2697
10.8M
    ctxt->depth++;
2698
10.8M
    rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
2699
10.8M
                        ent->length, what, 0, 0, 0, check);
2700
10.8M
    ctxt->depth--;
2701
10.8M
                ent->flags &= ~XML_ENT_EXPANDING;
2702
2703
10.8M
    if (rep == NULL) {
2704
1.22k
                    ent->content[0] = 0;
2705
1.22k
                    goto int_error;
2706
1.22k
                }
2707
2708
10.8M
                current = rep;
2709
568M
                while (*current != 0) { /* non input consuming loop */
2710
557M
                    buffer[nbchars++] = *current++;
2711
557M
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2712
293k
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2713
293k
                    }
2714
557M
                }
2715
10.8M
                xmlFree(rep);
2716
10.8M
                rep = NULL;
2717
10.8M
      } else if (ent != NULL) {
2718
423k
    int i = xmlStrlen(ent->name);
2719
423k
    const xmlChar *cur = ent->name;
2720
2721
423k
    buffer[nbchars++] = '&';
2722
423k
    if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2723
1.50k
        growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2724
1.50k
    }
2725
2.99M
    for (;i > 0;i--)
2726
2.56M
        buffer[nbchars++] = *cur++;
2727
423k
    buffer[nbchars++] = ';';
2728
423k
      }
2729
202M
  } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2730
207k
      if (xmlParserDebugEntities)
2731
0
    xmlGenericError(xmlGenericErrorContext,
2732
0
      "String decoding PE Reference: %.30s\n", str);
2733
207k
      ent = xmlParseStringPEReference(ctxt, &str);
2734
207k
      if (ent != NULL) {
2735
201k
                if (ent->content == NULL) {
2736
        /*
2737
         * Note: external parsed entities will not be loaded,
2738
         * it is not required for a non-validating parser to
2739
         * complete external PEReferences coming from the
2740
         * internal subset
2741
         */
2742
6
        if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2743
6
      ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2744
6
      (ctxt->validate != 0)) {
2745
6
      xmlLoadEntityContent(ctxt, ent);
2746
6
        } else {
2747
0
      xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2748
0
      "not validating will not read content for PE entity %s\n",
2749
0
                          ent->name, NULL);
2750
0
        }
2751
6
    }
2752
2753
201k
          if ((check) && (xmlParserEntityCheck(ctxt, ent->length)))
2754
12
                    goto int_error;
2755
2756
201k
                if (ent->flags & XML_ENT_EXPANDING) {
2757
0
              xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2758
0
                    xmlHaltParser(ctxt);
2759
0
                    if (ent->content != NULL)
2760
0
                        ent->content[0] = 0;
2761
0
                    goto int_error;
2762
0
                }
2763
2764
201k
                ent->flags |= XML_ENT_EXPANDING;
2765
201k
    ctxt->depth++;
2766
201k
    rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
2767
201k
                        ent->length, what, 0, 0, 0, check);
2768
201k
    ctxt->depth--;
2769
201k
                ent->flags &= ~XML_ENT_EXPANDING;
2770
2771
201k
    if (rep == NULL) {
2772
0
                    if (ent->content != NULL)
2773
0
                        ent->content[0] = 0;
2774
0
                    goto int_error;
2775
0
                }
2776
201k
                current = rep;
2777
112M
                while (*current != 0) { /* non input consuming loop */
2778
112M
                    buffer[nbchars++] = *current++;
2779
112M
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2780
44.0k
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2781
44.0k
                    }
2782
112M
                }
2783
201k
                xmlFree(rep);
2784
201k
                rep = NULL;
2785
201k
      }
2786
202M
  } else {
2787
202M
      COPY_BUF(l,buffer,nbchars,c);
2788
202M
      str += l;
2789
202M
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2790
64.9k
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2791
64.9k
      }
2792
202M
  }
2793
216M
  if (str < last)
2794
204M
      c = CUR_SCHAR(str, l);
2795
11.3M
  else
2796
11.3M
      c = 0;
2797
216M
    }
2798
11.4M
    buffer[nbchars] = 0;
2799
11.4M
    return(buffer);
2800
2801
0
mem_error:
2802
0
    xmlErrMemory(ctxt, NULL);
2803
1.64k
int_error:
2804
1.64k
    if (rep != NULL)
2805
0
        xmlFree(rep);
2806
1.64k
    if (buffer != NULL)
2807
1.64k
        xmlFree(buffer);
2808
1.64k
    return(NULL);
2809
0
}
2810
2811
/**
2812
 * xmlStringLenDecodeEntities:
2813
 * @ctxt:  the parser context
2814
 * @str:  the input string
2815
 * @len: the string length
2816
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2817
 * @end:  an end marker xmlChar, 0 if none
2818
 * @end2:  an end marker xmlChar, 0 if none
2819
 * @end3:  an end marker xmlChar, 0 if none
2820
 *
2821
 * DEPRECATED: Internal function, don't use.
2822
 *
2823
 * Takes a entity string content and process to do the adequate substitutions.
2824
 *
2825
 * [67] Reference ::= EntityRef | CharRef
2826
 *
2827
 * [69] PEReference ::= '%' Name ';'
2828
 *
2829
 * Returns A newly allocated string with the substitution done. The caller
2830
 *      must deallocate it !
2831
 */
2832
xmlChar *
2833
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2834
                           int what, xmlChar end, xmlChar  end2,
2835
186
                           xmlChar end3) {
2836
186
    if ((ctxt == NULL) || (str == NULL) || (len < 0))
2837
0
        return(NULL);
2838
186
    return(xmlStringDecodeEntitiesInt(ctxt, str, len, what,
2839
186
                                      end, end2, end3, 0));
2840
186
}
2841
2842
/**
2843
 * xmlStringDecodeEntities:
2844
 * @ctxt:  the parser context
2845
 * @str:  the input string
2846
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2847
 * @end:  an end marker xmlChar, 0 if none
2848
 * @end2:  an end marker xmlChar, 0 if none
2849
 * @end3:  an end marker xmlChar, 0 if none
2850
 *
2851
 * DEPRECATED: Internal function, don't use.
2852
 *
2853
 * Takes a entity string content and process to do the adequate substitutions.
2854
 *
2855
 * [67] Reference ::= EntityRef | CharRef
2856
 *
2857
 * [69] PEReference ::= '%' Name ';'
2858
 *
2859
 * Returns A newly allocated string with the substitution done. The caller
2860
 *      must deallocate it !
2861
 */
2862
xmlChar *
2863
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2864
18.2k
            xmlChar end, xmlChar  end2, xmlChar end3) {
2865
18.2k
    if ((ctxt == NULL) || (str == NULL)) return(NULL);
2866
18.2k
    return(xmlStringDecodeEntitiesInt(ctxt, str, xmlStrlen(str), what,
2867
18.2k
                                      end, end2, end3, 0));
2868
18.2k
}
2869
2870
/************************************************************************
2871
 *                  *
2872
 *    Commodity functions, cleanup needed ?     *
2873
 *                  *
2874
 ************************************************************************/
2875
2876
/**
2877
 * areBlanks:
2878
 * @ctxt:  an XML parser context
2879
 * @str:  a xmlChar *
2880
 * @len:  the size of @str
2881
 * @blank_chars: we know the chars are blanks
2882
 *
2883
 * Is this a sequence of blank chars that one can ignore ?
2884
 *
2885
 * Returns 1 if ignorable 0 otherwise.
2886
 */
2887
2888
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2889
764k
                     int blank_chars) {
2890
764k
    int i, ret;
2891
764k
    xmlNodePtr lastChild;
2892
2893
    /*
2894
     * Don't spend time trying to differentiate them, the same callback is
2895
     * used !
2896
     */
2897
764k
    if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2898
9.65k
  return(0);
2899
2900
    /*
2901
     * Check for xml:space value.
2902
     */
2903
754k
    if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2904
754k
        (*(ctxt->space) == -2))
2905
121k
  return(0);
2906
2907
    /*
2908
     * Check that the string is made of blanks
2909
     */
2910
633k
    if (blank_chars == 0) {
2911
1.96M
  for (i = 0;i < len;i++)
2912
1.56M
      if (!(IS_BLANK_CH(str[i]))) return(0);
2913
499k
    }
2914
2915
    /*
2916
     * Look if the element is mixed content in the DTD if available
2917
     */
2918
529k
    if (ctxt->node == NULL) return(0);
2919
525k
    if (ctxt->myDoc != NULL) {
2920
525k
  ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2921
525k
        if (ret == 0) return(1);
2922
403k
        if (ret == 1) return(0);
2923
403k
    }
2924
2925
    /*
2926
     * Otherwise, heuristic :-\
2927
     */
2928
399k
    if ((RAW != '<') && (RAW != 0xD)) return(0);
2929
396k
    if ((ctxt->node->children == NULL) &&
2930
396k
  (RAW == '<') && (NXT(1) == '/')) return(0);
2931
2932
395k
    lastChild = xmlGetLastChild(ctxt->node);
2933
395k
    if (lastChild == NULL) {
2934
83.6k
        if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2935
83.6k
            (ctxt->node->content != NULL)) return(0);
2936
312k
    } else if (xmlNodeIsText(lastChild))
2937
7.88k
        return(0);
2938
304k
    else if ((ctxt->node->children != NULL) &&
2939
304k
             (xmlNodeIsText(ctxt->node->children)))
2940
8.16k
        return(0);
2941
379k
    return(1);
2942
395k
}
2943
2944
/************************************************************************
2945
 *                  *
2946
 *    Extra stuff for namespace support     *
2947
 *  Relates to http://www.w3.org/TR/WD-xml-names      *
2948
 *                  *
2949
 ************************************************************************/
2950
2951
/**
2952
 * xmlSplitQName:
2953
 * @ctxt:  an XML parser context
2954
 * @name:  an XML parser context
2955
 * @prefix:  a xmlChar **
2956
 *
2957
 * parse an UTF8 encoded XML qualified name string
2958
 *
2959
 * [NS 5] QName ::= (Prefix ':')? LocalPart
2960
 *
2961
 * [NS 6] Prefix ::= NCName
2962
 *
2963
 * [NS 7] LocalPart ::= NCName
2964
 *
2965
 * Returns the local part, and prefix is updated
2966
 *   to get the Prefix if any.
2967
 */
2968
2969
xmlChar *
2970
1.86M
xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2971
1.86M
    xmlChar buf[XML_MAX_NAMELEN + 5];
2972
1.86M
    xmlChar *buffer = NULL;
2973
1.86M
    int len = 0;
2974
1.86M
    int max = XML_MAX_NAMELEN;
2975
1.86M
    xmlChar *ret = NULL;
2976
1.86M
    const xmlChar *cur = name;
2977
1.86M
    int c;
2978
2979
1.86M
    if (prefix == NULL) return(NULL);
2980
1.86M
    *prefix = NULL;
2981
2982
1.86M
    if (cur == NULL) return(NULL);
2983
2984
#ifndef XML_XML_NAMESPACE
2985
    /* xml: prefix is not really a namespace */
2986
    if ((cur[0] == 'x') && (cur[1] == 'm') &&
2987
        (cur[2] == 'l') && (cur[3] == ':'))
2988
  return(xmlStrdup(name));
2989
#endif
2990
2991
    /* nasty but well=formed */
2992
1.86M
    if (cur[0] == ':')
2993
65
  return(xmlStrdup(name));
2994
2995
1.86M
    c = *cur++;
2996
9.73M
    while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2997
7.86M
  buf[len++] = c;
2998
7.86M
  c = *cur++;
2999
7.86M
    }
3000
1.86M
    if (len >= max) {
3001
  /*
3002
   * Okay someone managed to make a huge name, so he's ready to pay
3003
   * for the processing speed.
3004
   */
3005
377
  max = len * 2;
3006
3007
377
  buffer = (xmlChar *) xmlMallocAtomic(max);
3008
377
  if (buffer == NULL) {
3009
0
      xmlErrMemory(ctxt, NULL);
3010
0
      return(NULL);
3011
0
  }
3012
377
  memcpy(buffer, buf, len);
3013
679k
  while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3014
679k
      if (len + 10 > max) {
3015
1.16k
          xmlChar *tmp;
3016
3017
1.16k
    max *= 2;
3018
1.16k
    tmp = (xmlChar *) xmlRealloc(buffer, max);
3019
1.16k
    if (tmp == NULL) {
3020
0
        xmlFree(buffer);
3021
0
        xmlErrMemory(ctxt, NULL);
3022
0
        return(NULL);
3023
0
    }
3024
1.16k
    buffer = tmp;
3025
1.16k
      }
3026
679k
      buffer[len++] = c;
3027
679k
      c = *cur++;
3028
679k
  }
3029
377
  buffer[len] = 0;
3030
377
    }
3031
3032
1.86M
    if ((c == ':') && (*cur == 0)) {
3033
926
        if (buffer != NULL)
3034
9
      xmlFree(buffer);
3035
926
  *prefix = NULL;
3036
926
  return(xmlStrdup(name));
3037
926
    }
3038
3039
1.86M
    if (buffer == NULL)
3040
1.86M
  ret = xmlStrndup(buf, len);
3041
368
    else {
3042
368
  ret = buffer;
3043
368
  buffer = NULL;
3044
368
  max = XML_MAX_NAMELEN;
3045
368
    }
3046
3047
3048
1.86M
    if (c == ':') {
3049
82.0k
  c = *cur;
3050
82.0k
        *prefix = ret;
3051
82.0k
  if (c == 0) {
3052
0
      return(xmlStrndup(BAD_CAST "", 0));
3053
0
  }
3054
82.0k
  len = 0;
3055
3056
  /*
3057
   * Check that the first character is proper to start
3058
   * a new name
3059
   */
3060
82.0k
  if (!(((c >= 0x61) && (c <= 0x7A)) ||
3061
82.0k
        ((c >= 0x41) && (c <= 0x5A)) ||
3062
82.0k
        (c == '_') || (c == ':'))) {
3063
110
      int l;
3064
110
      int first = CUR_SCHAR(cur, l);
3065
3066
110
      if (!IS_LETTER(first) && (first != '_')) {
3067
65
    xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3068
65
          "Name %s is not XML Namespace compliant\n",
3069
65
          name);
3070
65
      }
3071
110
  }
3072
82.0k
  cur++;
3073
3074
466k
  while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3075
384k
      buf[len++] = c;
3076
384k
      c = *cur++;
3077
384k
  }
3078
82.0k
  if (len >= max) {
3079
      /*
3080
       * Okay someone managed to make a huge name, so he's ready to pay
3081
       * for the processing speed.
3082
       */
3083
31
      max = len * 2;
3084
3085
31
      buffer = (xmlChar *) xmlMallocAtomic(max);
3086
31
      if (buffer == NULL) {
3087
0
          xmlErrMemory(ctxt, NULL);
3088
0
    return(NULL);
3089
0
      }
3090
31
      memcpy(buffer, buf, len);
3091
72.4k
      while (c != 0) { /* tested bigname2.xml */
3092
72.3k
    if (len + 10 > max) {
3093
101
        xmlChar *tmp;
3094
3095
101
        max *= 2;
3096
101
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3097
101
        if (tmp == NULL) {
3098
0
      xmlErrMemory(ctxt, NULL);
3099
0
      xmlFree(buffer);
3100
0
      return(NULL);
3101
0
        }
3102
101
        buffer = tmp;
3103
101
    }
3104
72.3k
    buffer[len++] = c;
3105
72.3k
    c = *cur++;
3106
72.3k
      }
3107
31
      buffer[len] = 0;
3108
31
  }
3109
3110
82.0k
  if (buffer == NULL)
3111
82.0k
      ret = xmlStrndup(buf, len);
3112
31
  else {
3113
31
      ret = buffer;
3114
31
  }
3115
82.0k
    }
3116
3117
1.86M
    return(ret);
3118
1.86M
}
3119
3120
/************************************************************************
3121
 *                  *
3122
 *      The parser itself       *
3123
 *  Relates to http://www.w3.org/TR/REC-xml       *
3124
 *                  *
3125
 ************************************************************************/
3126
3127
/************************************************************************
3128
 *                  *
3129
 *  Routines to parse Name, NCName and NmToken      *
3130
 *                  *
3131
 ************************************************************************/
3132
#ifdef DEBUG
3133
static unsigned long nbParseName = 0;
3134
static unsigned long nbParseNmToken = 0;
3135
static unsigned long nbParseNCName = 0;
3136
static unsigned long nbParseNCNameComplex = 0;
3137
static unsigned long nbParseNameComplex = 0;
3138
static unsigned long nbParseStringName = 0;
3139
#endif
3140
3141
/*
3142
 * The two following functions are related to the change of accepted
3143
 * characters for Name and NmToken in the Revision 5 of XML-1.0
3144
 * They correspond to the modified production [4] and the new production [4a]
3145
 * changes in that revision. Also note that the macros used for the
3146
 * productions Letter, Digit, CombiningChar and Extender are not needed
3147
 * anymore.
3148
 * We still keep compatibility to pre-revision5 parsing semantic if the
3149
 * new XML_PARSE_OLD10 option is given to the parser.
3150
 */
3151
static int
3152
13.9M
xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3153
13.9M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3154
        /*
3155
   * Use the new checks of production [4] [4a] amd [5] of the
3156
   * Update 5 of XML-1.0
3157
   */
3158
1.88M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3159
1.88M
      (((c >= 'a') && (c <= 'z')) ||
3160
1.88M
       ((c >= 'A') && (c <= 'Z')) ||
3161
1.88M
       (c == '_') || (c == ':') ||
3162
1.88M
       ((c >= 0xC0) && (c <= 0xD6)) ||
3163
1.88M
       ((c >= 0xD8) && (c <= 0xF6)) ||
3164
1.88M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3165
1.88M
       ((c >= 0x370) && (c <= 0x37D)) ||
3166
1.88M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3167
1.88M
       ((c >= 0x200C) && (c <= 0x200D)) ||
3168
1.88M
       ((c >= 0x2070) && (c <= 0x218F)) ||
3169
1.88M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3170
1.88M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3171
1.88M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3172
1.88M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3173
1.88M
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3174
1.88M
      return(1);
3175
12.0M
    } else {
3176
12.0M
        if (IS_LETTER(c) || (c == '_') || (c == ':'))
3177
12.0M
      return(1);
3178
12.0M
    }
3179
8.24k
    return(0);
3180
13.9M
}
3181
3182
static int
3183
138M
xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3184
138M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3185
        /*
3186
   * Use the new checks of production [4] [4a] amd [5] of the
3187
   * Update 5 of XML-1.0
3188
   */
3189
65.3M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3190
65.3M
      (((c >= 'a') && (c <= 'z')) ||
3191
65.3M
       ((c >= 'A') && (c <= 'Z')) ||
3192
65.3M
       ((c >= '0') && (c <= '9')) || /* !start */
3193
65.3M
       (c == '_') || (c == ':') ||
3194
65.3M
       (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3195
65.3M
       ((c >= 0xC0) && (c <= 0xD6)) ||
3196
65.3M
       ((c >= 0xD8) && (c <= 0xF6)) ||
3197
65.3M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3198
65.3M
       ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3199
65.3M
       ((c >= 0x370) && (c <= 0x37D)) ||
3200
65.3M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3201
65.3M
       ((c >= 0x200C) && (c <= 0x200D)) ||
3202
65.3M
       ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3203
65.3M
       ((c >= 0x2070) && (c <= 0x218F)) ||
3204
65.3M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3205
65.3M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3206
65.3M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3207
65.3M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3208
65.3M
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3209
63.1M
       return(1);
3210
73.4M
    } else {
3211
73.4M
        if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3212
73.4M
            (c == '.') || (c == '-') ||
3213
73.4M
      (c == '_') || (c == ':') ||
3214
73.4M
      (IS_COMBINING(c)) ||
3215
73.4M
      (IS_EXTENDER(c)))
3216
61.3M
      return(1);
3217
73.4M
    }
3218
14.2M
    return(0);
3219
138M
}
3220
3221
static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3222
                                          int *len, int *alloc, int normalize);
3223
3224
static const xmlChar *
3225
32.6k
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3226
32.6k
    int len = 0, l;
3227
32.6k
    int c;
3228
32.6k
    int count = 0;
3229
32.6k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3230
6.77k
                    XML_MAX_TEXT_LENGTH :
3231
32.6k
                    XML_MAX_NAME_LENGTH;
3232
3233
#ifdef DEBUG
3234
    nbParseNameComplex++;
3235
#endif
3236
3237
    /*
3238
     * Handler for more complex cases
3239
     */
3240
32.6k
    GROW;
3241
32.6k
    if (ctxt->instate == XML_PARSER_EOF)
3242
0
        return(NULL);
3243
32.6k
    c = CUR_CHAR(l);
3244
32.6k
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3245
        /*
3246
   * Use the new checks of production [4] [4a] amd [5] of the
3247
   * Update 5 of XML-1.0
3248
   */
3249
25.2k
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3250
25.2k
      (!(((c >= 'a') && (c <= 'z')) ||
3251
24.7k
         ((c >= 'A') && (c <= 'Z')) ||
3252
24.7k
         (c == '_') || (c == ':') ||
3253
24.7k
         ((c >= 0xC0) && (c <= 0xD6)) ||
3254
24.7k
         ((c >= 0xD8) && (c <= 0xF6)) ||
3255
24.7k
         ((c >= 0xF8) && (c <= 0x2FF)) ||
3256
24.7k
         ((c >= 0x370) && (c <= 0x37D)) ||
3257
24.7k
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
3258
24.7k
         ((c >= 0x200C) && (c <= 0x200D)) ||
3259
24.7k
         ((c >= 0x2070) && (c <= 0x218F)) ||
3260
24.7k
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3261
24.7k
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
3262
24.7k
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
3263
24.7k
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3264
24.7k
         ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3265
6.40k
      return(NULL);
3266
6.40k
  }
3267
18.8k
  len += l;
3268
18.8k
  NEXTL(l);
3269
18.8k
  c = CUR_CHAR(l);
3270
330k
  while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3271
330k
         (((c >= 'a') && (c <= 'z')) ||
3272
329k
          ((c >= 'A') && (c <= 'Z')) ||
3273
329k
          ((c >= '0') && (c <= '9')) || /* !start */
3274
329k
          (c == '_') || (c == ':') ||
3275
329k
          (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3276
329k
          ((c >= 0xC0) && (c <= 0xD6)) ||
3277
329k
          ((c >= 0xD8) && (c <= 0xF6)) ||
3278
329k
          ((c >= 0xF8) && (c <= 0x2FF)) ||
3279
329k
          ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3280
329k
          ((c >= 0x370) && (c <= 0x37D)) ||
3281
329k
          ((c >= 0x37F) && (c <= 0x1FFF)) ||
3282
329k
          ((c >= 0x200C) && (c <= 0x200D)) ||
3283
329k
          ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3284
329k
          ((c >= 0x2070) && (c <= 0x218F)) ||
3285
329k
          ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3286
329k
          ((c >= 0x3001) && (c <= 0xD7FF)) ||
3287
329k
          ((c >= 0xF900) && (c <= 0xFDCF)) ||
3288
329k
          ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3289
329k
          ((c >= 0x10000) && (c <= 0xEFFFF))
3290
329k
    )) {
3291
311k
      if (count++ > XML_PARSER_CHUNK_SIZE) {
3292
1.79k
    count = 0;
3293
1.79k
    GROW;
3294
1.79k
                if (ctxt->instate == XML_PARSER_EOF)
3295
0
                    return(NULL);
3296
1.79k
      }
3297
311k
            if (len <= INT_MAX - l)
3298
311k
          len += l;
3299
311k
      NEXTL(l);
3300
311k
      c = CUR_CHAR(l);
3301
311k
  }
3302
18.8k
    } else {
3303
7.46k
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3304
7.46k
      (!IS_LETTER(c) && (c != '_') &&
3305
6.71k
       (c != ':'))) {
3306
4.75k
      return(NULL);
3307
4.75k
  }
3308
2.71k
  len += l;
3309
2.71k
  NEXTL(l);
3310
2.71k
  c = CUR_CHAR(l);
3311
3312
498k
  while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3313
498k
         ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3314
498k
    (c == '.') || (c == '-') ||
3315
498k
    (c == '_') || (c == ':') ||
3316
498k
    (IS_COMBINING(c)) ||
3317
498k
    (IS_EXTENDER(c)))) {
3318
496k
      if (count++ > XML_PARSER_CHUNK_SIZE) {
3319
4.53k
    count = 0;
3320
4.53k
    GROW;
3321
4.53k
                if (ctxt->instate == XML_PARSER_EOF)
3322
0
                    return(NULL);
3323
4.53k
      }
3324
496k
            if (len <= INT_MAX - l)
3325
496k
          len += l;
3326
496k
      NEXTL(l);
3327
496k
      c = CUR_CHAR(l);
3328
496k
  }
3329
2.71k
    }
3330
21.5k
    if (len > maxLength) {
3331
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3332
0
        return(NULL);
3333
0
    }
3334
21.5k
    if (ctxt->input->cur - ctxt->input->base < len) {
3335
        /*
3336
         * There were a couple of bugs where PERefs lead to to a change
3337
         * of the buffer. Check the buffer size to avoid passing an invalid
3338
         * pointer to xmlDictLookup.
3339
         */
3340
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3341
0
                    "unexpected change of input buffer");
3342
0
        return (NULL);
3343
0
    }
3344
21.5k
    if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3345
22
        return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3346
21.5k
    return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3347
21.5k
}
3348
3349
/**
3350
 * xmlParseName:
3351
 * @ctxt:  an XML parser context
3352
 *
3353
 * DEPRECATED: Internal function, don't use.
3354
 *
3355
 * parse an XML name.
3356
 *
3357
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3358
 *                  CombiningChar | Extender
3359
 *
3360
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3361
 *
3362
 * [6] Names ::= Name (#x20 Name)*
3363
 *
3364
 * Returns the Name parsed or NULL
3365
 */
3366
3367
const xmlChar *
3368
6.83M
xmlParseName(xmlParserCtxtPtr ctxt) {
3369
6.83M
    const xmlChar *in;
3370
6.83M
    const xmlChar *ret;
3371
6.83M
    size_t count = 0;
3372
6.83M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3373
181k
                       XML_MAX_TEXT_LENGTH :
3374
6.83M
                       XML_MAX_NAME_LENGTH;
3375
3376
6.83M
    GROW;
3377
3378
#ifdef DEBUG
3379
    nbParseName++;
3380
#endif
3381
3382
    /*
3383
     * Accelerator for simple ASCII names
3384
     */
3385
6.83M
    in = ctxt->input->cur;
3386
6.83M
    if (((*in >= 0x61) && (*in <= 0x7A)) ||
3387
6.83M
  ((*in >= 0x41) && (*in <= 0x5A)) ||
3388
6.83M
  (*in == '_') || (*in == ':')) {
3389
6.82M
  in++;
3390
39.0M
  while (((*in >= 0x61) && (*in <= 0x7A)) ||
3391
39.0M
         ((*in >= 0x41) && (*in <= 0x5A)) ||
3392
39.0M
         ((*in >= 0x30) && (*in <= 0x39)) ||
3393
39.0M
         (*in == '_') || (*in == '-') ||
3394
39.0M
         (*in == ':') || (*in == '.'))
3395
32.2M
      in++;
3396
6.82M
  if ((*in > 0) && (*in < 0x80)) {
3397
6.80M
      count = in - ctxt->input->cur;
3398
6.80M
            if (count > maxLength) {
3399
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3400
0
                return(NULL);
3401
0
            }
3402
6.80M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3403
6.80M
      ctxt->input->cur = in;
3404
6.80M
      ctxt->input->col += count;
3405
6.80M
      if (ret == NULL)
3406
0
          xmlErrMemory(ctxt, NULL);
3407
6.80M
      return(ret);
3408
6.80M
  }
3409
6.82M
    }
3410
    /* accelerator for special cases */
3411
32.6k
    return(xmlParseNameComplex(ctxt));
3412
6.83M
}
3413
3414
static const xmlChar *
3415
13.0k
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3416
13.0k
    int len = 0, l;
3417
13.0k
    int c;
3418
13.0k
    int count = 0;
3419
13.0k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3420
1.15k
                    XML_MAX_TEXT_LENGTH :
3421
13.0k
                    XML_MAX_NAME_LENGTH;
3422
13.0k
    size_t startPosition = 0;
3423
3424
#ifdef DEBUG
3425
    nbParseNCNameComplex++;
3426
#endif
3427
3428
    /*
3429
     * Handler for more complex cases
3430
     */
3431
13.0k
    GROW;
3432
13.0k
    startPosition = CUR_PTR - BASE_PTR;
3433
13.0k
    c = CUR_CHAR(l);
3434
13.0k
    if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3435
13.0k
  (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3436
9.99k
  return(NULL);
3437
9.99k
    }
3438
3439
342k
    while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3440
342k
     (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3441
339k
  if (count++ > XML_PARSER_CHUNK_SIZE) {
3442
3.00k
      count = 0;
3443
3.00k
      GROW;
3444
3.00k
            if (ctxt->instate == XML_PARSER_EOF)
3445
0
                return(NULL);
3446
3.00k
  }
3447
339k
        if (len <= INT_MAX - l)
3448
339k
      len += l;
3449
339k
  NEXTL(l);
3450
339k
  c = CUR_CHAR(l);
3451
339k
  if (c == 0) {
3452
870
      count = 0;
3453
      /*
3454
       * when shrinking to extend the buffer we really need to preserve
3455
       * the part of the name we already parsed. Hence rolling back
3456
       * by current length.
3457
       */
3458
870
      ctxt->input->cur -= l;
3459
870
      GROW;
3460
870
            if (ctxt->instate == XML_PARSER_EOF)
3461
0
                return(NULL);
3462
870
      ctxt->input->cur += l;
3463
870
      c = CUR_CHAR(l);
3464
870
  }
3465
339k
    }
3466
3.10k
    if (len > maxLength) {
3467
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3468
0
        return(NULL);
3469
0
    }
3470
3.10k
    return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
3471
3.10k
}
3472
3473
/**
3474
 * xmlParseNCName:
3475
 * @ctxt:  an XML parser context
3476
 * @len:  length of the string parsed
3477
 *
3478
 * parse an XML name.
3479
 *
3480
 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3481
 *                      CombiningChar | Extender
3482
 *
3483
 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3484
 *
3485
 * Returns the Name parsed or NULL
3486
 */
3487
3488
static const xmlChar *
3489
1.20M
xmlParseNCName(xmlParserCtxtPtr ctxt) {
3490
1.20M
    const xmlChar *in, *e;
3491
1.20M
    const xmlChar *ret;
3492
1.20M
    size_t count = 0;
3493
1.20M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3494
17.6k
                       XML_MAX_TEXT_LENGTH :
3495
1.20M
                       XML_MAX_NAME_LENGTH;
3496
3497
#ifdef DEBUG
3498
    nbParseNCName++;
3499
#endif
3500
3501
    /*
3502
     * Accelerator for simple ASCII names
3503
     */
3504
1.20M
    in = ctxt->input->cur;
3505
1.20M
    e = ctxt->input->end;
3506
1.20M
    if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3507
1.20M
   ((*in >= 0x41) && (*in <= 0x5A)) ||
3508
1.20M
   (*in == '_')) && (in < e)) {
3509
1.19M
  in++;
3510
5.65M
  while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3511
5.65M
          ((*in >= 0x41) && (*in <= 0x5A)) ||
3512
5.65M
          ((*in >= 0x30) && (*in <= 0x39)) ||
3513
5.65M
          (*in == '_') || (*in == '-') ||
3514
5.65M
          (*in == '.')) && (in < e))
3515
4.46M
      in++;
3516
1.19M
  if (in >= e)
3517
356
      goto complex;
3518
1.19M
  if ((*in > 0) && (*in < 0x80)) {
3519
1.19M
      count = in - ctxt->input->cur;
3520
1.19M
            if (count > maxLength) {
3521
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3522
0
                return(NULL);
3523
0
            }
3524
1.19M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3525
1.19M
      ctxt->input->cur = in;
3526
1.19M
      ctxt->input->col += count;
3527
1.19M
      if (ret == NULL) {
3528
0
          xmlErrMemory(ctxt, NULL);
3529
0
      }
3530
1.19M
      return(ret);
3531
1.19M
  }
3532
1.19M
    }
3533
13.0k
complex:
3534
13.0k
    return(xmlParseNCNameComplex(ctxt));
3535
1.20M
}
3536
3537
/**
3538
 * xmlParseNameAndCompare:
3539
 * @ctxt:  an XML parser context
3540
 *
3541
 * parse an XML name and compares for match
3542
 * (specialized for endtag parsing)
3543
 *
3544
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3545
 * and the name for mismatch
3546
 */
3547
3548
static const xmlChar *
3549
1.25M
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3550
1.25M
    register const xmlChar *cmp = other;
3551
1.25M
    register const xmlChar *in;
3552
1.25M
    const xmlChar *ret;
3553
3554
1.25M
    GROW;
3555
1.25M
    if (ctxt->instate == XML_PARSER_EOF)
3556
0
        return(NULL);
3557
3558
1.25M
    in = ctxt->input->cur;
3559
6.57M
    while (*in != 0 && *in == *cmp) {
3560
5.31M
  ++in;
3561
5.31M
  ++cmp;
3562
5.31M
    }
3563
1.25M
    if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3564
  /* success */
3565
1.24M
  ctxt->input->col += in - ctxt->input->cur;
3566
1.24M
  ctxt->input->cur = in;
3567
1.24M
  return (const xmlChar*) 1;
3568
1.24M
    }
3569
    /* failure (or end of input buffer), check with full function */
3570
8.07k
    ret = xmlParseName (ctxt);
3571
    /* strings coming from the dictionary direct compare possible */
3572
8.07k
    if (ret == other) {
3573
192
  return (const xmlChar*) 1;
3574
192
    }
3575
7.88k
    return ret;
3576
8.07k
}
3577
3578
/**
3579
 * xmlParseStringName:
3580
 * @ctxt:  an XML parser context
3581
 * @str:  a pointer to the string pointer (IN/OUT)
3582
 *
3583
 * parse an XML name.
3584
 *
3585
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3586
 *                  CombiningChar | Extender
3587
 *
3588
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3589
 *
3590
 * [6] Names ::= Name (#x20 Name)*
3591
 *
3592
 * Returns the Name parsed or NULL. The @str pointer
3593
 * is updated to the current location in the string.
3594
 */
3595
3596
static xmlChar *
3597
13.9M
xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3598
13.9M
    xmlChar buf[XML_MAX_NAMELEN + 5];
3599
13.9M
    const xmlChar *cur = *str;
3600
13.9M
    int len = 0, l;
3601
13.9M
    int c;
3602
13.9M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3603
5.00M
                    XML_MAX_TEXT_LENGTH :
3604
13.9M
                    XML_MAX_NAME_LENGTH;
3605
3606
#ifdef DEBUG
3607
    nbParseStringName++;
3608
#endif
3609
3610
13.9M
    c = CUR_SCHAR(cur, l);
3611
13.9M
    if (!xmlIsNameStartChar(ctxt, c)) {
3612
113
  return(NULL);
3613
113
    }
3614
3615
13.9M
    COPY_BUF(l,buf,len,c);
3616
13.9M
    cur += l;
3617
13.9M
    c = CUR_SCHAR(cur, l);
3618
100M
    while (xmlIsNameChar(ctxt, c)) {
3619
86.4M
  COPY_BUF(l,buf,len,c);
3620
86.4M
  cur += l;
3621
86.4M
  c = CUR_SCHAR(cur, l);
3622
86.4M
  if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3623
      /*
3624
       * Okay someone managed to make a huge name, so he's ready to pay
3625
       * for the processing speed.
3626
       */
3627
158k
      xmlChar *buffer;
3628
158k
      int max = len * 2;
3629
3630
158k
      buffer = (xmlChar *) xmlMallocAtomic(max);
3631
158k
      if (buffer == NULL) {
3632
0
          xmlErrMemory(ctxt, NULL);
3633
0
    return(NULL);
3634
0
      }
3635
158k
      memcpy(buffer, buf, len);
3636
36.1M
      while (xmlIsNameChar(ctxt, c)) {
3637
35.9M
    if (len + 10 > max) {
3638
158k
        xmlChar *tmp;
3639
3640
158k
        max *= 2;
3641
158k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3642
158k
        if (tmp == NULL) {
3643
0
      xmlErrMemory(ctxt, NULL);
3644
0
      xmlFree(buffer);
3645
0
      return(NULL);
3646
0
        }
3647
158k
        buffer = tmp;
3648
158k
    }
3649
35.9M
    COPY_BUF(l,buffer,len,c);
3650
35.9M
    cur += l;
3651
35.9M
    c = CUR_SCHAR(cur, l);
3652
35.9M
                if (len > maxLength) {
3653
0
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3654
0
                    xmlFree(buffer);
3655
0
                    return(NULL);
3656
0
                }
3657
35.9M
      }
3658
158k
      buffer[len] = 0;
3659
158k
      *str = cur;
3660
158k
      return(buffer);
3661
158k
  }
3662
86.4M
    }
3663
13.8M
    if (len > maxLength) {
3664
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3665
0
        return(NULL);
3666
0
    }
3667
13.8M
    *str = cur;
3668
13.8M
    return(xmlStrndup(buf, len));
3669
13.8M
}
3670
3671
/**
3672
 * xmlParseNmtoken:
3673
 * @ctxt:  an XML parser context
3674
 *
3675
 * DEPRECATED: Internal function, don't use.
3676
 *
3677
 * parse an XML Nmtoken.
3678
 *
3679
 * [7] Nmtoken ::= (NameChar)+
3680
 *
3681
 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3682
 *
3683
 * Returns the Nmtoken parsed or NULL
3684
 */
3685
3686
xmlChar *
3687
313k
xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3688
313k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3689
313k
    int len = 0, l;
3690
313k
    int c;
3691
313k
    int count = 0;
3692
313k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3693
9.50k
                    XML_MAX_TEXT_LENGTH :
3694
313k
                    XML_MAX_NAME_LENGTH;
3695
3696
#ifdef DEBUG
3697
    nbParseNmToken++;
3698
#endif
3699
3700
313k
    GROW;
3701
313k
    if (ctxt->instate == XML_PARSER_EOF)
3702
0
        return(NULL);
3703
313k
    c = CUR_CHAR(l);
3704
3705
1.92M
    while (xmlIsNameChar(ctxt, c)) {
3706
1.61M
  if (count++ > XML_PARSER_CHUNK_SIZE) {
3707
0
      count = 0;
3708
0
      GROW;
3709
0
  }
3710
1.61M
  COPY_BUF(l,buf,len,c);
3711
1.61M
  NEXTL(l);
3712
1.61M
  c = CUR_CHAR(l);
3713
1.61M
  if (c == 0) {
3714
43
      count = 0;
3715
43
      GROW;
3716
43
      if (ctxt->instate == XML_PARSER_EOF)
3717
0
    return(NULL);
3718
43
            c = CUR_CHAR(l);
3719
43
  }
3720
1.61M
  if (len >= XML_MAX_NAMELEN) {
3721
      /*
3722
       * Okay someone managed to make a huge token, so he's ready to pay
3723
       * for the processing speed.
3724
       */
3725
54
      xmlChar *buffer;
3726
54
      int max = len * 2;
3727
3728
54
      buffer = (xmlChar *) xmlMallocAtomic(max);
3729
54
      if (buffer == NULL) {
3730
0
          xmlErrMemory(ctxt, NULL);
3731
0
    return(NULL);
3732
0
      }
3733
54
      memcpy(buffer, buf, len);
3734
229k
      while (xmlIsNameChar(ctxt, c)) {
3735
229k
    if (count++ > XML_PARSER_CHUNK_SIZE) {
3736
2.28k
        count = 0;
3737
2.28k
        GROW;
3738
2.28k
                    if (ctxt->instate == XML_PARSER_EOF) {
3739
0
                        xmlFree(buffer);
3740
0
                        return(NULL);
3741
0
                    }
3742
2.28k
    }
3743
229k
    if (len + 10 > max) {
3744
226
        xmlChar *tmp;
3745
3746
226
        max *= 2;
3747
226
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3748
226
        if (tmp == NULL) {
3749
0
      xmlErrMemory(ctxt, NULL);
3750
0
      xmlFree(buffer);
3751
0
      return(NULL);
3752
0
        }
3753
226
        buffer = tmp;
3754
226
    }
3755
229k
    COPY_BUF(l,buffer,len,c);
3756
229k
    NEXTL(l);
3757
229k
    c = CUR_CHAR(l);
3758
229k
                if (len > maxLength) {
3759
0
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3760
0
                    xmlFree(buffer);
3761
0
                    return(NULL);
3762
0
                }
3763
229k
      }
3764
54
      buffer[len] = 0;
3765
54
      return(buffer);
3766
54
  }
3767
1.61M
    }
3768
313k
    if (len == 0)
3769
1.46k
        return(NULL);
3770
311k
    if (len > maxLength) {
3771
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3772
0
        return(NULL);
3773
0
    }
3774
311k
    return(xmlStrndup(buf, len));
3775
311k
}
3776
3777
/**
3778
 * xmlParseEntityValue:
3779
 * @ctxt:  an XML parser context
3780
 * @orig:  if non-NULL store a copy of the original entity value
3781
 *
3782
 * DEPRECATED: Internal function, don't use.
3783
 *
3784
 * parse a value for ENTITY declarations
3785
 *
3786
 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3787
 *                 "'" ([^%&'] | PEReference | Reference)* "'"
3788
 *
3789
 * Returns the EntityValue parsed with reference substituted or NULL
3790
 */
3791
3792
xmlChar *
3793
277k
xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3794
277k
    xmlChar *buf = NULL;
3795
277k
    int len = 0;
3796
277k
    int size = XML_PARSER_BUFFER_SIZE;
3797
277k
    int c, l;
3798
277k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3799
16.3k
                    XML_MAX_HUGE_LENGTH :
3800
277k
                    XML_MAX_TEXT_LENGTH;
3801
277k
    xmlChar stop;
3802
277k
    xmlChar *ret = NULL;
3803
277k
    const xmlChar *cur = NULL;
3804
277k
    xmlParserInputPtr input;
3805
3806
277k
    if (RAW == '"') stop = '"';
3807
63.8k
    else if (RAW == '\'') stop = '\'';
3808
0
    else {
3809
0
  xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3810
0
  return(NULL);
3811
0
    }
3812
277k
    buf = (xmlChar *) xmlMallocAtomic(size);
3813
277k
    if (buf == NULL) {
3814
0
  xmlErrMemory(ctxt, NULL);
3815
0
  return(NULL);
3816
0
    }
3817
3818
    /*
3819
     * The content of the entity definition is copied in a buffer.
3820
     */
3821
3822
277k
    ctxt->instate = XML_PARSER_ENTITY_VALUE;
3823
277k
    input = ctxt->input;
3824
277k
    GROW;
3825
277k
    if (ctxt->instate == XML_PARSER_EOF)
3826
0
        goto error;
3827
277k
    NEXT;
3828
277k
    c = CUR_CHAR(l);
3829
    /*
3830
     * NOTE: 4.4.5 Included in Literal
3831
     * When a parameter entity reference appears in a literal entity
3832
     * value, ... a single or double quote character in the replacement
3833
     * text is always treated as a normal data character and will not
3834
     * terminate the literal.
3835
     * In practice it means we stop the loop only when back at parsing
3836
     * the initial entity and the quote is found
3837
     */
3838
14.7M
    while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3839
14.7M
      (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3840
14.5M
  if (len + 5 >= size) {
3841
55.4k
      xmlChar *tmp;
3842
3843
55.4k
      size *= 2;
3844
55.4k
      tmp = (xmlChar *) xmlRealloc(buf, size);
3845
55.4k
      if (tmp == NULL) {
3846
0
    xmlErrMemory(ctxt, NULL);
3847
0
                goto error;
3848
0
      }
3849
55.4k
      buf = tmp;
3850
55.4k
  }
3851
14.5M
  COPY_BUF(l,buf,len,c);
3852
14.5M
  NEXTL(l);
3853
3854
14.5M
  GROW;
3855
14.5M
  c = CUR_CHAR(l);
3856
14.5M
  if (c == 0) {
3857
163
      GROW;
3858
163
      c = CUR_CHAR(l);
3859
163
  }
3860
3861
14.5M
        if (len > maxLength) {
3862
0
            xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
3863
0
                           "entity value too long\n");
3864
0
            goto error;
3865
0
        }
3866
14.5M
    }
3867
277k
    buf[len] = 0;
3868
277k
    if (ctxt->instate == XML_PARSER_EOF)
3869
0
        goto error;
3870
277k
    if (c != stop) {
3871
239
        xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3872
239
        goto error;
3873
239
    }
3874
277k
    NEXT;
3875
3876
    /*
3877
     * Raise problem w.r.t. '&' and '%' being used in non-entities
3878
     * reference constructs. Note Charref will be handled in
3879
     * xmlStringDecodeEntities()
3880
     */
3881
277k
    cur = buf;
3882
11.4M
    while (*cur != 0) { /* non input consuming */
3883
11.1M
  if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3884
236k
      xmlChar *name;
3885
236k
      xmlChar tmp = *cur;
3886
236k
            int nameOk = 0;
3887
3888
236k
      cur++;
3889
236k
      name = xmlParseStringName(ctxt, &cur);
3890
236k
            if (name != NULL) {
3891
236k
                nameOk = 1;
3892
236k
                xmlFree(name);
3893
236k
            }
3894
236k
            if ((nameOk == 0) || (*cur != ';')) {
3895
346
    xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3896
346
      "EntityValue: '%c' forbidden except for entities references\n",
3897
346
                            tmp);
3898
346
                goto error;
3899
346
      }
3900
236k
      if ((tmp == '%') && (ctxt->inSubset == 1) &&
3901
236k
    (ctxt->inputNr == 1)) {
3902
11
    xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3903
11
                goto error;
3904
11
      }
3905
236k
      if (*cur == 0)
3906
0
          break;
3907
236k
  }
3908
11.1M
  cur++;
3909
11.1M
    }
3910
3911
    /*
3912
     * Then PEReference entities are substituted.
3913
     *
3914
     * NOTE: 4.4.7 Bypassed
3915
     * When a general entity reference appears in the EntityValue in
3916
     * an entity declaration, it is bypassed and left as is.
3917
     * so XML_SUBSTITUTE_REF is not set here.
3918
     */
3919
277k
    ++ctxt->depth;
3920
277k
    ret = xmlStringDecodeEntitiesInt(ctxt, buf, len, XML_SUBSTITUTE_PEREF,
3921
277k
                                     0, 0, 0, /* check */ 1);
3922
277k
    --ctxt->depth;
3923
3924
277k
    if (orig != NULL) {
3925
277k
        *orig = buf;
3926
277k
        buf = NULL;
3927
277k
    }
3928
3929
277k
error:
3930
277k
    if (buf != NULL)
3931
596
        xmlFree(buf);
3932
277k
    return(ret);
3933
277k
}
3934
3935
/**
3936
 * xmlParseAttValueComplex:
3937
 * @ctxt:  an XML parser context
3938
 * @len:   the resulting attribute len
3939
 * @normalize:  whether to apply the inner normalization
3940
 *
3941
 * parse a value for an attribute, this is the fallback function
3942
 * of xmlParseAttValue() when the attribute parsing requires handling
3943
 * of non-ASCII characters, or normalization compaction.
3944
 *
3945
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3946
 */
3947
static xmlChar *
3948
12.7k
xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3949
12.7k
    xmlChar limit = 0;
3950
12.7k
    xmlChar *buf = NULL;
3951
12.7k
    xmlChar *rep = NULL;
3952
12.7k
    size_t len = 0;
3953
12.7k
    size_t buf_size = 0;
3954
12.7k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3955
3.02k
                       XML_MAX_HUGE_LENGTH :
3956
12.7k
                       XML_MAX_TEXT_LENGTH;
3957
12.7k
    int c, l, in_space = 0;
3958
12.7k
    xmlChar *current = NULL;
3959
12.7k
    xmlEntityPtr ent;
3960
3961
12.7k
    if (NXT(0) == '"') {
3962
12.0k
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3963
12.0k
  limit = '"';
3964
12.0k
        NEXT;
3965
12.0k
    } else if (NXT(0) == '\'') {
3966
774
  limit = '\'';
3967
774
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3968
774
        NEXT;
3969
774
    } else {
3970
0
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3971
0
  return(NULL);
3972
0
    }
3973
3974
    /*
3975
     * allocate a translation buffer.
3976
     */
3977
12.7k
    buf_size = XML_PARSER_BUFFER_SIZE;
3978
12.7k
    buf = (xmlChar *) xmlMallocAtomic(buf_size);
3979
12.7k
    if (buf == NULL) goto mem_error;
3980
3981
    /*
3982
     * OK loop until we reach one of the ending char or a size limit.
3983
     */
3984
12.7k
    c = CUR_CHAR(l);
3985
648k
    while (((NXT(0) != limit) && /* checked */
3986
648k
            (IS_CHAR(c)) && (c != '<')) &&
3987
648k
            (ctxt->instate != XML_PARSER_EOF)) {
3988
635k
  if (c == '&') {
3989
56.5k
      in_space = 0;
3990
56.5k
      if (NXT(1) == '#') {
3991
981
    int val = xmlParseCharRef(ctxt);
3992
3993
981
    if (val == '&') {
3994
30
        if (ctxt->replaceEntities) {
3995
10
      if (len + 10 > buf_size) {
3996
0
          growBuffer(buf, 10);
3997
0
      }
3998
10
      buf[len++] = '&';
3999
20
        } else {
4000
      /*
4001
       * The reparsing will be done in xmlStringGetNodeList()
4002
       * called by the attribute() function in SAX.c
4003
       */
4004
20
      if (len + 10 > buf_size) {
4005
0
          growBuffer(buf, 10);
4006
0
      }
4007
20
      buf[len++] = '&';
4008
20
      buf[len++] = '#';
4009
20
      buf[len++] = '3';
4010
20
      buf[len++] = '8';
4011
20
      buf[len++] = ';';
4012
20
        }
4013
951
    } else if (val != 0) {
4014
706
        if (len + 10 > buf_size) {
4015
12
      growBuffer(buf, 10);
4016
12
        }
4017
706
        len += xmlCopyChar(0, &buf[len], val);
4018
706
    }
4019
55.5k
      } else {
4020
55.5k
    ent = xmlParseEntityRef(ctxt);
4021
55.5k
    if ((ent != NULL) &&
4022
55.5k
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
4023
855
        if (len + 10 > buf_size) {
4024
0
      growBuffer(buf, 10);
4025
0
        }
4026
855
        if ((ctxt->replaceEntities == 0) &&
4027
855
            (ent->content[0] == '&')) {
4028
187
      buf[len++] = '&';
4029
187
      buf[len++] = '#';
4030
187
      buf[len++] = '3';
4031
187
      buf[len++] = '8';
4032
187
      buf[len++] = ';';
4033
668
        } else {
4034
668
      buf[len++] = ent->content[0];
4035
668
        }
4036
54.6k
    } else if ((ent != NULL) &&
4037
54.6k
               (ctxt->replaceEntities != 0)) {
4038
51.8k
        if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4039
51.8k
                        if (xmlParserEntityCheck(ctxt, ent->length))
4040
0
                            goto error;
4041
4042
51.8k
      ++ctxt->depth;
4043
51.8k
      rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
4044
51.8k
                                ent->length, XML_SUBSTITUTE_REF, 0, 0, 0,
4045
51.8k
                                /* check */ 1);
4046
51.8k
      --ctxt->depth;
4047
51.8k
      if (rep != NULL) {
4048
51.6k
          current = rep;
4049
10.1M
          while (*current != 0) { /* non input consuming */
4050
10.1M
                                if ((*current == 0xD) || (*current == 0xA) ||
4051
10.1M
                                    (*current == 0x9)) {
4052
119
                                    buf[len++] = 0x20;
4053
119
                                    current++;
4054
119
                                } else
4055
10.1M
                                    buf[len++] = *current++;
4056
10.1M
        if (len + 10 > buf_size) {
4057
180
            growBuffer(buf, 10);
4058
180
        }
4059
10.1M
          }
4060
51.6k
          xmlFree(rep);
4061
51.6k
          rep = NULL;
4062
51.6k
      }
4063
51.8k
        } else {
4064
0
      if (len + 10 > buf_size) {
4065
0
          growBuffer(buf, 10);
4066
0
      }
4067
0
      if (ent->content != NULL)
4068
0
          buf[len++] = ent->content[0];
4069
0
        }
4070
51.8k
    } else if (ent != NULL) {
4071
1.87k
        int i = xmlStrlen(ent->name);
4072
1.87k
        const xmlChar *cur = ent->name;
4073
4074
        /*
4075
                     * We also check for recursion and amplification
4076
                     * when entities are not substituted. They're
4077
                     * often expanded later.
4078
         */
4079
1.87k
        if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4080
1.87k
      (ent->content != NULL)) {
4081
1.86k
                        if ((ent->flags & XML_ENT_CHECKED) == 0) {
4082
631
                            unsigned long oldCopy = ctxt->sizeentcopy;
4083
4084
631
                            ctxt->sizeentcopy = ent->length;
4085
4086
631
                            ++ctxt->depth;
4087
631
                            rep = xmlStringDecodeEntitiesInt(ctxt,
4088
631
                                    ent->content, ent->length,
4089
631
                                    XML_SUBSTITUTE_REF, 0, 0, 0,
4090
631
                                    /* check */ 1);
4091
631
                            --ctxt->depth;
4092
4093
                            /*
4094
                             * If we're parsing DTD content, the entity
4095
                             * might reference other entities which
4096
                             * weren't defined yet, so the check isn't
4097
                             * reliable.
4098
                             */
4099
631
                            if (ctxt->inSubset == 0) {
4100
512
                                ent->flags |= XML_ENT_CHECKED;
4101
512
                                ent->expandedSize = ctxt->sizeentcopy;
4102
512
                            }
4103
4104
631
                            if (rep != NULL) {
4105
551
                                xmlFree(rep);
4106
551
                                rep = NULL;
4107
551
                            } else {
4108
80
                                ent->content[0] = 0;
4109
80
                            }
4110
4111
631
                            if (xmlParserEntityCheck(ctxt, oldCopy))
4112
20
                                goto error;
4113
1.23k
                        } else {
4114
1.23k
                            if (xmlParserEntityCheck(ctxt, ent->expandedSize))
4115
0
                                goto error;
4116
1.23k
                        }
4117
1.86k
        }
4118
4119
        /*
4120
         * Just output the reference
4121
         */
4122
1.85k
        buf[len++] = '&';
4123
1.88k
        while (len + i + 10 > buf_size) {
4124
74
      growBuffer(buf, i + 10);
4125
74
        }
4126
22.2k
        for (;i > 0;i--)
4127
20.3k
      buf[len++] = *cur++;
4128
1.85k
        buf[len++] = ';';
4129
1.85k
    }
4130
55.5k
      }
4131
579k
  } else {
4132
579k
      if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4133
21.8k
          if ((len != 0) || (!normalize)) {
4134
21.6k
        if ((!normalize) || (!in_space)) {
4135
21.0k
      COPY_BUF(l,buf,len,0x20);
4136
21.1k
      while (len + 10 > buf_size) {
4137
188
          growBuffer(buf, 10);
4138
188
      }
4139
21.0k
        }
4140
21.6k
        in_space = 1;
4141
21.6k
    }
4142
557k
      } else {
4143
557k
          in_space = 0;
4144
557k
    COPY_BUF(l,buf,len,c);
4145
557k
    if (len + 10 > buf_size) {
4146
1.17k
        growBuffer(buf, 10);
4147
1.17k
    }
4148
557k
      }
4149
579k
      NEXTL(l);
4150
579k
  }
4151
635k
  GROW;
4152
635k
  c = CUR_CHAR(l);
4153
635k
        if (len > maxLength) {
4154
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4155
0
                           "AttValue length too long\n");
4156
0
            goto mem_error;
4157
0
        }
4158
635k
    }
4159
12.7k
    if (ctxt->instate == XML_PARSER_EOF)
4160
245
        goto error;
4161
4162
12.5k
    if ((in_space) && (normalize)) {
4163
404
        while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4164
199
    }
4165
12.5k
    buf[len] = 0;
4166
12.5k
    if (RAW == '<') {
4167
1.46k
  xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4168
11.0k
    } else if (RAW != limit) {
4169
2.12k
  if ((c != 0) && (!IS_CHAR(c))) {
4170
680
      xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4171
680
         "invalid character in attribute value\n");
4172
1.44k
  } else {
4173
1.44k
      xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4174
1.44k
         "AttValue: ' expected\n");
4175
1.44k
        }
4176
2.12k
    } else
4177
8.92k
  NEXT;
4178
4179
12.5k
    if (attlen != NULL) *attlen = len;
4180
12.5k
    return(buf);
4181
4182
0
mem_error:
4183
0
    xmlErrMemory(ctxt, NULL);
4184
265
error:
4185
265
    if (buf != NULL)
4186
265
        xmlFree(buf);
4187
265
    if (rep != NULL)
4188
0
        xmlFree(rep);
4189
265
    return(NULL);
4190
0
}
4191
4192
/**
4193
 * xmlParseAttValue:
4194
 * @ctxt:  an XML parser context
4195
 *
4196
 * DEPRECATED: Internal function, don't use.
4197
 *
4198
 * parse a value for an attribute
4199
 * Note: the parser won't do substitution of entities here, this
4200
 * will be handled later in xmlStringGetNodeList
4201
 *
4202
 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4203
 *                   "'" ([^<&'] | Reference)* "'"
4204
 *
4205
 * 3.3.3 Attribute-Value Normalization:
4206
 * Before the value of an attribute is passed to the application or
4207
 * checked for validity, the XML processor must normalize it as follows:
4208
 * - a character reference is processed by appending the referenced
4209
 *   character to the attribute value
4210
 * - an entity reference is processed by recursively processing the
4211
 *   replacement text of the entity
4212
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4213
 *   appending #x20 to the normalized value, except that only a single
4214
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4215
 *   parsed entity or the literal entity value of an internal parsed entity
4216
 * - other characters are processed by appending them to the normalized value
4217
 * If the declared value is not CDATA, then the XML processor must further
4218
 * process the normalized attribute value by discarding any leading and
4219
 * trailing space (#x20) characters, and by replacing sequences of space
4220
 * (#x20) characters by a single space (#x20) character.
4221
 * All attributes for which no declaration has been read should be treated
4222
 * by a non-validating parser as if declared CDATA.
4223
 *
4224
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4225
 */
4226
4227
4228
xmlChar *
4229
291k
xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4230
291k
    if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4231
291k
    return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4232
291k
}
4233
4234
/**
4235
 * xmlParseSystemLiteral:
4236
 * @ctxt:  an XML parser context
4237
 *
4238
 * DEPRECATED: Internal function, don't use.
4239
 *
4240
 * parse an XML Literal
4241
 *
4242
 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4243
 *
4244
 * Returns the SystemLiteral parsed or NULL
4245
 */
4246
4247
xmlChar *
4248
12.9k
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4249
12.9k
    xmlChar *buf = NULL;
4250
12.9k
    int len = 0;
4251
12.9k
    int size = XML_PARSER_BUFFER_SIZE;
4252
12.9k
    int cur, l;
4253
12.9k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4254
1.62k
                    XML_MAX_TEXT_LENGTH :
4255
12.9k
                    XML_MAX_NAME_LENGTH;
4256
12.9k
    xmlChar stop;
4257
12.9k
    int state = ctxt->instate;
4258
12.9k
    int count = 0;
4259
4260
12.9k
    SHRINK;
4261
12.9k
    if (RAW == '"') {
4262
12.3k
        NEXT;
4263
12.3k
  stop = '"';
4264
12.3k
    } else if (RAW == '\'') {
4265
356
        NEXT;
4266
356
  stop = '\'';
4267
356
    } else {
4268
200
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4269
200
  return(NULL);
4270
200
    }
4271
4272
12.7k
    buf = (xmlChar *) xmlMallocAtomic(size);
4273
12.7k
    if (buf == NULL) {
4274
0
        xmlErrMemory(ctxt, NULL);
4275
0
  return(NULL);
4276
0
    }
4277
12.7k
    ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4278
12.7k
    cur = CUR_CHAR(l);
4279
238k
    while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4280
226k
  if (len + 5 >= size) {
4281
82
      xmlChar *tmp;
4282
4283
82
      size *= 2;
4284
82
      tmp = (xmlChar *) xmlRealloc(buf, size);
4285
82
      if (tmp == NULL) {
4286
0
          xmlFree(buf);
4287
0
    xmlErrMemory(ctxt, NULL);
4288
0
    ctxt->instate = (xmlParserInputState) state;
4289
0
    return(NULL);
4290
0
      }
4291
82
      buf = tmp;
4292
82
  }
4293
226k
  count++;
4294
226k
  if (count > 50) {
4295
344
      SHRINK;
4296
344
      GROW;
4297
344
      count = 0;
4298
344
            if (ctxt->instate == XML_PARSER_EOF) {
4299
0
          xmlFree(buf);
4300
0
    return(NULL);
4301
0
            }
4302
344
  }
4303
226k
  COPY_BUF(l,buf,len,cur);
4304
226k
  NEXTL(l);
4305
226k
  cur = CUR_CHAR(l);
4306
226k
  if (cur == 0) {
4307
122
      GROW;
4308
122
      SHRINK;
4309
122
      cur = CUR_CHAR(l);
4310
122
  }
4311
226k
        if (len > maxLength) {
4312
0
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4313
0
            xmlFree(buf);
4314
0
            ctxt->instate = (xmlParserInputState) state;
4315
0
            return(NULL);
4316
0
        }
4317
226k
    }
4318
12.7k
    buf[len] = 0;
4319
12.7k
    ctxt->instate = (xmlParserInputState) state;
4320
12.7k
    if (!IS_CHAR(cur)) {
4321
175
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4322
12.5k
    } else {
4323
12.5k
  NEXT;
4324
12.5k
    }
4325
12.7k
    return(buf);
4326
12.7k
}
4327
4328
/**
4329
 * xmlParsePubidLiteral:
4330
 * @ctxt:  an XML parser context
4331
 *
4332
 * DEPRECATED: Internal function, don't use.
4333
 *
4334
 * parse an XML public literal
4335
 *
4336
 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4337
 *
4338
 * Returns the PubidLiteral parsed or NULL.
4339
 */
4340
4341
xmlChar *
4342
6.02k
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4343
6.02k
    xmlChar *buf = NULL;
4344
6.02k
    int len = 0;
4345
6.02k
    int size = XML_PARSER_BUFFER_SIZE;
4346
6.02k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4347
370
                    XML_MAX_TEXT_LENGTH :
4348
6.02k
                    XML_MAX_NAME_LENGTH;
4349
6.02k
    xmlChar cur;
4350
6.02k
    xmlChar stop;
4351
6.02k
    int count = 0;
4352
6.02k
    xmlParserInputState oldstate = ctxt->instate;
4353
4354
6.02k
    SHRINK;
4355
6.02k
    if (RAW == '"') {
4356
5.81k
        NEXT;
4357
5.81k
  stop = '"';
4358
5.81k
    } else if (RAW == '\'') {
4359
172
        NEXT;
4360
172
  stop = '\'';
4361
172
    } else {
4362
42
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4363
42
  return(NULL);
4364
42
    }
4365
5.98k
    buf = (xmlChar *) xmlMallocAtomic(size);
4366
5.98k
    if (buf == NULL) {
4367
0
  xmlErrMemory(ctxt, NULL);
4368
0
  return(NULL);
4369
0
    }
4370
5.98k
    ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4371
5.98k
    cur = CUR;
4372
221k
    while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4373
215k
  if (len + 1 >= size) {
4374
3
      xmlChar *tmp;
4375
4376
3
      size *= 2;
4377
3
      tmp = (xmlChar *) xmlRealloc(buf, size);
4378
3
      if (tmp == NULL) {
4379
0
    xmlErrMemory(ctxt, NULL);
4380
0
    xmlFree(buf);
4381
0
    return(NULL);
4382
0
      }
4383
3
      buf = tmp;
4384
3
  }
4385
215k
  buf[len++] = cur;
4386
215k
  count++;
4387
215k
  if (count > 50) {
4388
25
      SHRINK;
4389
25
      GROW;
4390
25
      count = 0;
4391
25
            if (ctxt->instate == XML_PARSER_EOF) {
4392
0
    xmlFree(buf);
4393
0
    return(NULL);
4394
0
            }
4395
25
  }
4396
215k
  NEXT;
4397
215k
  cur = CUR;
4398
215k
  if (cur == 0) {
4399
22
      GROW;
4400
22
      SHRINK;
4401
22
      cur = CUR;
4402
22
  }
4403
215k
        if (len > maxLength) {
4404
0
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4405
0
            xmlFree(buf);
4406
0
            return(NULL);
4407
0
        }
4408
215k
    }
4409
5.98k
    buf[len] = 0;
4410
5.98k
    if (cur != stop) {
4411
75
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4412
5.91k
    } else {
4413
5.91k
  NEXT;
4414
5.91k
    }
4415
5.98k
    ctxt->instate = oldstate;
4416
5.98k
    return(buf);
4417
5.98k
}
4418
4419
static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt);
4420
4421
/*
4422
 * used for the test in the inner loop of the char data testing
4423
 */
4424
static const unsigned char test_char_data[256] = {
4425
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4426
    0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4427
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4428
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4429
    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4430
    0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4431
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4432
    0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4433
    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4434
    0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4435
    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4436
    0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4437
    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4438
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4439
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4440
    0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4441
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4442
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4443
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4444
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4445
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4446
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4447
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4448
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4449
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4450
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4451
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4452
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4453
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4454
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4455
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4456
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4457
};
4458
4459
/**
4460
 * xmlParseCharData:
4461
 * @ctxt:  an XML parser context
4462
 * @cdata:  unused
4463
 *
4464
 * DEPRECATED: Internal function, don't use.
4465
 *
4466
 * Parse character data. Always makes progress if the first char isn't
4467
 * '<' or '&'.
4468
 *
4469
 * if we are within a CDATA section ']]>' marks an end of section.
4470
 *
4471
 * The right angle bracket (>) may be represented using the string "&gt;",
4472
 * and must, for compatibility, be escaped using "&gt;" or a character
4473
 * reference when it appears in the string "]]>" in content, when that
4474
 * string is not marking the end of a CDATA section.
4475
 *
4476
 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4477
 */
4478
4479
void
4480
2.51M
xmlParseCharData(xmlParserCtxtPtr ctxt, ATTRIBUTE_UNUSED int cdata) {
4481
2.51M
    const xmlChar *in;
4482
2.51M
    int nbchar = 0;
4483
2.51M
    int line = ctxt->input->line;
4484
2.51M
    int col = ctxt->input->col;
4485
2.51M
    int ccol;
4486
4487
2.51M
    SHRINK;
4488
2.51M
    GROW;
4489
    /*
4490
     * Accelerated common case where input don't need to be
4491
     * modified before passing it to the handler.
4492
     */
4493
2.51M
    in = ctxt->input->cur;
4494
2.57M
    do {
4495
3.68M
get_more_space:
4496
4.73M
        while (*in == 0x20) { in++; ctxt->input->col++; }
4497
3.68M
        if (*in == 0xA) {
4498
1.17M
            do {
4499
1.17M
                ctxt->input->line++; ctxt->input->col = 1;
4500
1.17M
                in++;
4501
1.17M
            } while (*in == 0xA);
4502
1.10M
            goto get_more_space;
4503
1.10M
        }
4504
2.57M
        if (*in == '<') {
4505
358k
            nbchar = in - ctxt->input->cur;
4506
358k
            if (nbchar > 0) {
4507
358k
                const xmlChar *tmp = ctxt->input->cur;
4508
358k
                ctxt->input->cur = in;
4509
4510
358k
                if ((ctxt->sax != NULL) &&
4511
358k
                    (ctxt->sax->ignorableWhitespace !=
4512
358k
                     ctxt->sax->characters)) {
4513
152k
                    if (areBlanks(ctxt, tmp, nbchar, 1)) {
4514
124k
                        if (ctxt->sax->ignorableWhitespace != NULL)
4515
124k
                            ctxt->sax->ignorableWhitespace(ctxt->userData,
4516
124k
                                                   tmp, nbchar);
4517
124k
                    } else {
4518
28.3k
                        if (ctxt->sax->characters != NULL)
4519
28.3k
                            ctxt->sax->characters(ctxt->userData,
4520
28.3k
                                                  tmp, nbchar);
4521
28.3k
                        if (*ctxt->space == -1)
4522
9.72k
                            *ctxt->space = -2;
4523
28.3k
                    }
4524
206k
                } else if ((ctxt->sax != NULL) &&
4525
206k
                           (ctxt->sax->characters != NULL)) {
4526
206k
                    ctxt->sax->characters(ctxt->userData,
4527
206k
                                          tmp, nbchar);
4528
206k
                }
4529
358k
            }
4530
358k
            return;
4531
358k
        }
4532
4533
2.55M
get_more:
4534
2.55M
        ccol = ctxt->input->col;
4535
73.2M
        while (test_char_data[*in]) {
4536
70.6M
            in++;
4537
70.6M
            ccol++;
4538
70.6M
        }
4539
2.55M
        ctxt->input->col = ccol;
4540
2.55M
        if (*in == 0xA) {
4541
335k
            do {
4542
335k
                ctxt->input->line++; ctxt->input->col = 1;
4543
335k
                in++;
4544
335k
            } while (*in == 0xA);
4545
327k
            goto get_more;
4546
327k
        }
4547
2.23M
        if (*in == ']') {
4548
12.4k
            if ((in[1] == ']') && (in[2] == '>')) {
4549
666
                xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4550
666
                ctxt->input->cur = in + 1;
4551
666
                return;
4552
666
            }
4553
11.7k
            in++;
4554
11.7k
            ctxt->input->col++;
4555
11.7k
            goto get_more;
4556
12.4k
        }
4557
2.21M
        nbchar = in - ctxt->input->cur;
4558
2.21M
        if (nbchar > 0) {
4559
2.10M
            if ((ctxt->sax != NULL) &&
4560
2.10M
                (ctxt->sax->ignorableWhitespace !=
4561
2.10M
                 ctxt->sax->characters) &&
4562
2.10M
                (IS_BLANK_CH(*ctxt->input->cur))) {
4563
593k
                const xmlChar *tmp = ctxt->input->cur;
4564
593k
                ctxt->input->cur = in;
4565
4566
593k
                if (areBlanks(ctxt, tmp, nbchar, 0)) {
4567
378k
                    if (ctxt->sax->ignorableWhitespace != NULL)
4568
378k
                        ctxt->sax->ignorableWhitespace(ctxt->userData,
4569
378k
                                                       tmp, nbchar);
4570
378k
                } else {
4571
215k
                    if (ctxt->sax->characters != NULL)
4572
215k
                        ctxt->sax->characters(ctxt->userData,
4573
215k
                                              tmp, nbchar);
4574
215k
                    if (*ctxt->space == -1)
4575
117k
                        *ctxt->space = -2;
4576
215k
                }
4577
593k
                line = ctxt->input->line;
4578
593k
                col = ctxt->input->col;
4579
1.51M
            } else if (ctxt->sax != NULL) {
4580
1.51M
                if (ctxt->sax->characters != NULL)
4581
1.51M
                    ctxt->sax->characters(ctxt->userData,
4582
1.51M
                                          ctxt->input->cur, nbchar);
4583
1.51M
                line = ctxt->input->line;
4584
1.51M
                col = ctxt->input->col;
4585
1.51M
            }
4586
2.10M
        }
4587
2.21M
        ctxt->input->cur = in;
4588
2.21M
        if (*in == 0xD) {
4589
63.8k
            in++;
4590
63.8k
            if (*in == 0xA) {
4591
62.9k
                ctxt->input->cur = in;
4592
62.9k
                in++;
4593
62.9k
                ctxt->input->line++; ctxt->input->col = 1;
4594
62.9k
                continue; /* while */
4595
62.9k
            }
4596
937
            in--;
4597
937
        }
4598
2.15M
        if (*in == '<') {
4599
1.99M
            return;
4600
1.99M
        }
4601
160k
        if (*in == '&') {
4602
79.9k
            return;
4603
79.9k
        }
4604
80.6k
        SHRINK;
4605
80.6k
        GROW;
4606
80.6k
        if (ctxt->instate == XML_PARSER_EOF)
4607
0
            return;
4608
80.6k
        in = ctxt->input->cur;
4609
143k
    } while (((*in >= 0x20) && (*in <= 0x7F)) ||
4610
143k
             (*in == 0x09) || (*in == 0x0a));
4611
81.1k
    ctxt->input->line = line;
4612
81.1k
    ctxt->input->col = col;
4613
81.1k
    xmlParseCharDataComplex(ctxt);
4614
81.1k
}
4615
4616
/**
4617
 * xmlParseCharDataComplex:
4618
 * @ctxt:  an XML parser context
4619
 * @cdata:  int indicating whether we are within a CDATA section
4620
 *
4621
 * Always makes progress if the first char isn't '<' or '&'.
4622
 *
4623
 * parse a CharData section.this is the fallback function
4624
 * of xmlParseCharData() when the parsing requires handling
4625
 * of non-ASCII characters.
4626
 */
4627
static void
4628
81.1k
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt) {
4629
81.1k
    xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4630
81.1k
    int nbchar = 0;
4631
81.1k
    int cur, l;
4632
81.1k
    int count = 0;
4633
4634
81.1k
    SHRINK;
4635
81.1k
    GROW;
4636
81.1k
    cur = CUR_CHAR(l);
4637
1.76M
    while ((cur != '<') && /* checked */
4638
1.76M
           (cur != '&') &&
4639
1.76M
     (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4640
1.68M
  if ((cur == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4641
228
      xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4642
228
  }
4643
1.68M
  COPY_BUF(l,buf,nbchar,cur);
4644
  /* move current position before possible calling of ctxt->sax->characters */
4645
1.68M
  NEXTL(l);
4646
1.68M
  cur = CUR_CHAR(l);
4647
1.68M
  if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4648
6.03k
      buf[nbchar] = 0;
4649
4650
      /*
4651
       * OK the segment is to be consumed as chars.
4652
       */
4653
6.03k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4654
3.59k
    if (areBlanks(ctxt, buf, nbchar, 0)) {
4655
0
        if (ctxt->sax->ignorableWhitespace != NULL)
4656
0
      ctxt->sax->ignorableWhitespace(ctxt->userData,
4657
0
                                     buf, nbchar);
4658
3.59k
    } else {
4659
3.59k
        if (ctxt->sax->characters != NULL)
4660
3.59k
      ctxt->sax->characters(ctxt->userData, buf, nbchar);
4661
3.59k
        if ((ctxt->sax->characters !=
4662
3.59k
             ctxt->sax->ignorableWhitespace) &&
4663
3.59k
      (*ctxt->space == -1))
4664
226
      *ctxt->space = -2;
4665
3.59k
    }
4666
3.59k
      }
4667
6.03k
      nbchar = 0;
4668
            /* something really bad happened in the SAX callback */
4669
6.03k
            if (ctxt->instate != XML_PARSER_CONTENT)
4670
0
                return;
4671
6.03k
  }
4672
1.68M
  count++;
4673
1.68M
  if (count > 50) {
4674
29.1k
      SHRINK;
4675
29.1k
      GROW;
4676
29.1k
      count = 0;
4677
29.1k
            if (ctxt->instate == XML_PARSER_EOF)
4678
0
    return;
4679
29.1k
  }
4680
1.68M
    }
4681
81.1k
    if (nbchar != 0) {
4682
16.6k
        buf[nbchar] = 0;
4683
  /*
4684
   * OK the segment is to be consumed as chars.
4685
   */
4686
16.6k
  if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4687
14.3k
      if (areBlanks(ctxt, buf, nbchar, 0)) {
4688
40
    if (ctxt->sax->ignorableWhitespace != NULL)
4689
40
        ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4690
14.3k
      } else {
4691
14.3k
    if (ctxt->sax->characters != NULL)
4692
14.3k
        ctxt->sax->characters(ctxt->userData, buf, nbchar);
4693
14.3k
    if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4694
14.3k
        (*ctxt->space == -1))
4695
2.89k
        *ctxt->space = -2;
4696
14.3k
      }
4697
14.3k
  }
4698
16.6k
    }
4699
81.1k
    if ((ctxt->input->cur < ctxt->input->end) && (!IS_CHAR(cur))) {
4700
  /* Generate the error and skip the offending character */
4701
54.8k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4702
54.8k
                          "PCDATA invalid Char value %d\n",
4703
54.8k
                    cur ? cur : CUR);
4704
54.8k
  NEXT;
4705
54.8k
    }
4706
81.1k
}
4707
4708
/**
4709
 * xmlParseExternalID:
4710
 * @ctxt:  an XML parser context
4711
 * @publicID:  a xmlChar** receiving PubidLiteral
4712
 * @strict: indicate whether we should restrict parsing to only
4713
 *          production [75], see NOTE below
4714
 *
4715
 * DEPRECATED: Internal function, don't use.
4716
 *
4717
 * Parse an External ID or a Public ID
4718
 *
4719
 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4720
 *       'PUBLIC' S PubidLiteral S SystemLiteral
4721
 *
4722
 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4723
 *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
4724
 *
4725
 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4726
 *
4727
 * Returns the function returns SystemLiteral and in the second
4728
 *                case publicID receives PubidLiteral, is strict is off
4729
 *                it is possible to return NULL and have publicID set.
4730
 */
4731
4732
xmlChar *
4733
23.8k
xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4734
23.8k
    xmlChar *URI = NULL;
4735
4736
23.8k
    SHRINK;
4737
4738
23.8k
    *publicID = NULL;
4739
23.8k
    if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4740
6.97k
        SKIP(6);
4741
6.97k
  if (SKIP_BLANKS == 0) {
4742
29
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4743
29
                     "Space required after 'SYSTEM'\n");
4744
29
  }
4745
6.97k
  URI = xmlParseSystemLiteral(ctxt);
4746
6.97k
  if (URI == NULL) {
4747
34
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4748
34
        }
4749
16.8k
    } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4750
6.02k
        SKIP(6);
4751
6.02k
  if (SKIP_BLANKS == 0) {
4752
30
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4753
30
        "Space required after 'PUBLIC'\n");
4754
30
  }
4755
6.02k
  *publicID = xmlParsePubidLiteral(ctxt);
4756
6.02k
  if (*publicID == NULL) {
4757
42
      xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4758
42
  }
4759
6.02k
  if (strict) {
4760
      /*
4761
       * We don't handle [83] so "S SystemLiteral" is required.
4762
       */
4763
5.96k
      if (SKIP_BLANKS == 0) {
4764
152
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4765
152
      "Space required after the Public Identifier\n");
4766
152
      }
4767
5.96k
  } else {
4768
      /*
4769
       * We handle [83] so we return immediately, if
4770
       * "S SystemLiteral" is not detected. We skip blanks if no
4771
             * system literal was found, but this is harmless since we must
4772
             * be at the end of a NotationDecl.
4773
       */
4774
62
      if (SKIP_BLANKS == 0) return(NULL);
4775
6
      if ((CUR != '\'') && (CUR != '"')) return(NULL);
4776
6
  }
4777
5.96k
  URI = xmlParseSystemLiteral(ctxt);
4778
5.96k
  if (URI == NULL) {
4779
166
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4780
166
        }
4781
5.96k
    }
4782
23.7k
    return(URI);
4783
23.8k
}
4784
4785
/**
4786
 * xmlParseCommentComplex:
4787
 * @ctxt:  an XML parser context
4788
 * @buf:  the already parsed part of the buffer
4789
 * @len:  number of bytes in the buffer
4790
 * @size:  allocated size of the buffer
4791
 *
4792
 * Skip an XML (SGML) comment <!-- .... -->
4793
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4794
 *  must not occur within comments. "
4795
 * This is the slow routine in case the accelerator for ascii didn't work
4796
 *
4797
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4798
 */
4799
static void
4800
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4801
4.19k
                       size_t len, size_t size) {
4802
4.19k
    int q, ql;
4803
4.19k
    int r, rl;
4804
4.19k
    int cur, l;
4805
4.19k
    size_t count = 0;
4806
4.19k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4807
940
                       XML_MAX_HUGE_LENGTH :
4808
4.19k
                       XML_MAX_TEXT_LENGTH;
4809
4.19k
    int inputid;
4810
4811
4.19k
    inputid = ctxt->input->id;
4812
4813
4.19k
    if (buf == NULL) {
4814
66
        len = 0;
4815
66
  size = XML_PARSER_BUFFER_SIZE;
4816
66
  buf = (xmlChar *) xmlMallocAtomic(size);
4817
66
  if (buf == NULL) {
4818
0
      xmlErrMemory(ctxt, NULL);
4819
0
      return;
4820
0
  }
4821
66
    }
4822
4.19k
    GROW; /* Assure there's enough input data */
4823
4.19k
    q = CUR_CHAR(ql);
4824
4.19k
    if (q == 0)
4825
403
        goto not_terminated;
4826
3.78k
    if (!IS_CHAR(q)) {
4827
161
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4828
161
                          "xmlParseComment: invalid xmlChar value %d\n",
4829
161
                    q);
4830
161
  xmlFree (buf);
4831
161
  return;
4832
161
    }
4833
3.62k
    NEXTL(ql);
4834
3.62k
    r = CUR_CHAR(rl);
4835
3.62k
    if (r == 0)
4836
29
        goto not_terminated;
4837
3.59k
    if (!IS_CHAR(r)) {
4838
22
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4839
22
                          "xmlParseComment: invalid xmlChar value %d\n",
4840
22
                    r);
4841
22
  xmlFree (buf);
4842
22
  return;
4843
22
    }
4844
3.57k
    NEXTL(rl);
4845
3.57k
    cur = CUR_CHAR(l);
4846
3.57k
    if (cur == 0)
4847
24
        goto not_terminated;
4848
1.11M
    while (IS_CHAR(cur) && /* checked */
4849
1.11M
           ((cur != '>') ||
4850
1.11M
      (r != '-') || (q != '-'))) {
4851
1.11M
  if ((r == '-') && (q == '-')) {
4852
412
      xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4853
412
  }
4854
1.11M
  if (len + 5 >= size) {
4855
2.50k
      xmlChar *new_buf;
4856
2.50k
            size_t new_size;
4857
4858
2.50k
      new_size = size * 2;
4859
2.50k
      new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4860
2.50k
      if (new_buf == NULL) {
4861
0
    xmlFree (buf);
4862
0
    xmlErrMemory(ctxt, NULL);
4863
0
    return;
4864
0
      }
4865
2.50k
      buf = new_buf;
4866
2.50k
            size = new_size;
4867
2.50k
  }
4868
1.11M
  COPY_BUF(ql,buf,len,q);
4869
1.11M
  q = r;
4870
1.11M
  ql = rl;
4871
1.11M
  r = cur;
4872
1.11M
  rl = l;
4873
4874
1.11M
  count++;
4875
1.11M
  if (count > 50) {
4876
20.8k
      SHRINK;
4877
20.8k
      GROW;
4878
20.8k
      count = 0;
4879
20.8k
            if (ctxt->instate == XML_PARSER_EOF) {
4880
0
    xmlFree(buf);
4881
0
    return;
4882
0
            }
4883
20.8k
  }
4884
1.11M
  NEXTL(l);
4885
1.11M
  cur = CUR_CHAR(l);
4886
1.11M
  if (cur == 0) {
4887
250
      SHRINK;
4888
250
      GROW;
4889
250
      cur = CUR_CHAR(l);
4890
250
  }
4891
4892
1.11M
        if (len > maxLength) {
4893
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4894
0
                         "Comment too big found", NULL);
4895
0
            xmlFree (buf);
4896
0
            return;
4897
0
        }
4898
1.11M
    }
4899
3.55k
    buf[len] = 0;
4900
3.55k
    if (cur == 0) {
4901
250
  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4902
250
                       "Comment not terminated \n<!--%.50s\n", buf);
4903
3.30k
    } else if (!IS_CHAR(cur)) {
4904
86
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4905
86
                          "xmlParseComment: invalid xmlChar value %d\n",
4906
86
                    cur);
4907
3.21k
    } else {
4908
3.21k
  if (inputid != ctxt->input->id) {
4909
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4910
0
               "Comment doesn't start and stop in the same"
4911
0
                           " entity\n");
4912
0
  }
4913
3.21k
        NEXT;
4914
3.21k
  if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4915
3.21k
      (!ctxt->disableSAX))
4916
2.13k
      ctxt->sax->comment(ctxt->userData, buf);
4917
3.21k
    }
4918
3.55k
    xmlFree(buf);
4919
3.55k
    return;
4920
456
not_terminated:
4921
456
    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4922
456
       "Comment not terminated\n", NULL);
4923
456
    xmlFree(buf);
4924
456
    return;
4925
3.55k
}
4926
4927
/**
4928
 * xmlParseComment:
4929
 * @ctxt:  an XML parser context
4930
 *
4931
 * DEPRECATED: Internal function, don't use.
4932
 *
4933
 * Parse an XML (SGML) comment. Always consumes '<!'.
4934
 *
4935
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4936
 *  must not occur within comments. "
4937
 *
4938
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4939
 */
4940
void
4941
1.40M
xmlParseComment(xmlParserCtxtPtr ctxt) {
4942
1.40M
    xmlChar *buf = NULL;
4943
1.40M
    size_t size = XML_PARSER_BUFFER_SIZE;
4944
1.40M
    size_t len = 0;
4945
1.40M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4946
9.50k
                       XML_MAX_HUGE_LENGTH :
4947
1.40M
                       XML_MAX_TEXT_LENGTH;
4948
1.40M
    xmlParserInputState state;
4949
1.40M
    const xmlChar *in;
4950
1.40M
    size_t nbchar = 0;
4951
1.40M
    int ccol;
4952
1.40M
    int inputid;
4953
4954
    /*
4955
     * Check that there is a comment right here.
4956
     */
4957
1.40M
    if ((RAW != '<') || (NXT(1) != '!'))
4958
0
        return;
4959
1.40M
    SKIP(2);
4960
1.40M
    if ((RAW != '-') || (NXT(1) != '-'))
4961
34
        return;
4962
1.40M
    state = ctxt->instate;
4963
1.40M
    ctxt->instate = XML_PARSER_COMMENT;
4964
1.40M
    inputid = ctxt->input->id;
4965
1.40M
    SKIP(2);
4966
1.40M
    SHRINK;
4967
1.40M
    GROW;
4968
4969
    /*
4970
     * Accelerated common case where input don't need to be
4971
     * modified before passing it to the handler.
4972
     */
4973
1.40M
    in = ctxt->input->cur;
4974
1.40M
    do {
4975
1.40M
  if (*in == 0xA) {
4976
84.4k
      do {
4977
84.4k
    ctxt->input->line++; ctxt->input->col = 1;
4978
84.4k
    in++;
4979
84.4k
      } while (*in == 0xA);
4980
84.2k
  }
4981
3.34M
get_more:
4982
3.34M
        ccol = ctxt->input->col;
4983
79.4M
  while (((*in > '-') && (*in <= 0x7F)) ||
4984
79.4M
         ((*in >= 0x20) && (*in < '-')) ||
4985
79.4M
         (*in == 0x09)) {
4986
76.0M
        in++;
4987
76.0M
        ccol++;
4988
76.0M
  }
4989
3.34M
  ctxt->input->col = ccol;
4990
3.34M
  if (*in == 0xA) {
4991
823k
      do {
4992
823k
    ctxt->input->line++; ctxt->input->col = 1;
4993
823k
    in++;
4994
823k
      } while (*in == 0xA);
4995
796k
      goto get_more;
4996
796k
  }
4997
2.54M
  nbchar = in - ctxt->input->cur;
4998
  /*
4999
   * save current set of data
5000
   */
5001
2.54M
  if (nbchar > 0) {
5002
1.59M
      if ((ctxt->sax != NULL) &&
5003
1.59M
    (ctxt->sax->comment != NULL)) {
5004
1.59M
    if (buf == NULL) {
5005
454k
        if ((*in == '-') && (in[1] == '-'))
5006
240k
            size = nbchar + 1;
5007
213k
        else
5008
213k
            size = XML_PARSER_BUFFER_SIZE + nbchar;
5009
454k
        buf = (xmlChar *) xmlMallocAtomic(size);
5010
454k
        if (buf == NULL) {
5011
0
            xmlErrMemory(ctxt, NULL);
5012
0
      ctxt->instate = state;
5013
0
      return;
5014
0
        }
5015
454k
        len = 0;
5016
1.13M
    } else if (len + nbchar + 1 >= size) {
5017
147k
        xmlChar *new_buf;
5018
147k
        size  += len + nbchar + XML_PARSER_BUFFER_SIZE;
5019
147k
        new_buf = (xmlChar *) xmlRealloc(buf, size);
5020
147k
        if (new_buf == NULL) {
5021
0
            xmlFree (buf);
5022
0
      xmlErrMemory(ctxt, NULL);
5023
0
      ctxt->instate = state;
5024
0
      return;
5025
0
        }
5026
147k
        buf = new_buf;
5027
147k
    }
5028
1.59M
    memcpy(&buf[len], ctxt->input->cur, nbchar);
5029
1.59M
    len += nbchar;
5030
1.59M
    buf[len] = 0;
5031
1.59M
      }
5032
1.59M
  }
5033
2.54M
        if (len > maxLength) {
5034
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5035
0
                         "Comment too big found", NULL);
5036
0
            xmlFree (buf);
5037
0
            return;
5038
0
        }
5039
2.54M
  ctxt->input->cur = in;
5040
2.54M
  if (*in == 0xA) {
5041
0
      in++;
5042
0
      ctxt->input->line++; ctxt->input->col = 1;
5043
0
  }
5044
2.54M
  if (*in == 0xD) {
5045
401k
      in++;
5046
401k
      if (*in == 0xA) {
5047
401k
    ctxt->input->cur = in;
5048
401k
    in++;
5049
401k
    ctxt->input->line++; ctxt->input->col = 1;
5050
401k
    goto get_more;
5051
401k
      }
5052
55
      in--;
5053
55
  }
5054
2.14M
  SHRINK;
5055
2.14M
  GROW;
5056
2.14M
        if (ctxt->instate == XML_PARSER_EOF) {
5057
0
            xmlFree(buf);
5058
0
            return;
5059
0
        }
5060
2.14M
  in = ctxt->input->cur;
5061
2.14M
  if (*in == '-') {
5062
2.14M
      if (in[1] == '-') {
5063
1.40M
          if (in[2] == '>') {
5064
1.40M
        if (ctxt->input->id != inputid) {
5065
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5066
0
                     "comment doesn't start and stop in the"
5067
0
                                       " same entity\n");
5068
0
        }
5069
1.40M
        SKIP(3);
5070
1.40M
        if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5071
1.40M
            (!ctxt->disableSAX)) {
5072
1.36M
      if (buf != NULL)
5073
412k
          ctxt->sax->comment(ctxt->userData, buf);
5074
952k
      else
5075
952k
          ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5076
1.36M
        }
5077
1.40M
        if (buf != NULL)
5078
450k
            xmlFree(buf);
5079
1.40M
        if (ctxt->instate != XML_PARSER_EOF)
5080
1.40M
      ctxt->instate = state;
5081
1.40M
        return;
5082
1.40M
    }
5083
695
    if (buf != NULL) {
5084
669
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5085
669
                          "Double hyphen within comment: "
5086
669
                                      "<!--%.50s\n",
5087
669
              buf);
5088
669
    } else
5089
26
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5090
26
                          "Double hyphen within comment\n", NULL);
5091
695
                if (ctxt->instate == XML_PARSER_EOF) {
5092
0
                    xmlFree(buf);
5093
0
                    return;
5094
0
                }
5095
695
    in++;
5096
695
    ctxt->input->col++;
5097
695
      }
5098
737k
      in++;
5099
737k
      ctxt->input->col++;
5100
737k
      goto get_more;
5101
2.14M
  }
5102
2.14M
    } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5103
4.19k
    xmlParseCommentComplex(ctxt, buf, len, size);
5104
4.19k
    ctxt->instate = state;
5105
4.19k
    return;
5106
1.40M
}
5107
5108
5109
/**
5110
 * xmlParsePITarget:
5111
 * @ctxt:  an XML parser context
5112
 *
5113
 * DEPRECATED: Internal function, don't use.
5114
 *
5115
 * parse the name of a PI
5116
 *
5117
 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5118
 *
5119
 * Returns the PITarget name or NULL
5120
 */
5121
5122
const xmlChar *
5123
26.1k
xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5124
26.1k
    const xmlChar *name;
5125
5126
26.1k
    name = xmlParseName(ctxt);
5127
26.1k
    if ((name != NULL) &&
5128
26.1k
        ((name[0] == 'x') || (name[0] == 'X')) &&
5129
26.1k
        ((name[1] == 'm') || (name[1] == 'M')) &&
5130
26.1k
        ((name[2] == 'l') || (name[2] == 'L'))) {
5131
4.37k
  int i;
5132
4.37k
  if ((name[0] == 'x') && (name[1] == 'm') &&
5133
4.37k
      (name[2] == 'l') && (name[3] == 0)) {
5134
626
      xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5135
626
     "XML declaration allowed only at the start of the document\n");
5136
626
      return(name);
5137
3.75k
  } else if (name[3] == 0) {
5138
46
      xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5139
46
      return(name);
5140
46
  }
5141
4.72k
  for (i = 0;;i++) {
5142
4.72k
      if (xmlW3CPIs[i] == NULL) break;
5143
4.21k
      if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5144
3.19k
          return(name);
5145
4.21k
  }
5146
509
  xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5147
509
          "xmlParsePITarget: invalid name prefix 'xml'\n",
5148
509
          NULL, NULL);
5149
509
    }
5150
22.2k
    if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5151
273
  xmlNsErr(ctxt, XML_NS_ERR_COLON,
5152
273
     "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5153
273
    }
5154
22.2k
    return(name);
5155
26.1k
}
5156
5157
#ifdef LIBXML_CATALOG_ENABLED
5158
/**
5159
 * xmlParseCatalogPI:
5160
 * @ctxt:  an XML parser context
5161
 * @catalog:  the PI value string
5162
 *
5163
 * parse an XML Catalog Processing Instruction.
5164
 *
5165
 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5166
 *
5167
 * Occurs only if allowed by the user and if happening in the Misc
5168
 * part of the document before any doctype information
5169
 * This will add the given catalog to the parsing context in order
5170
 * to be used if there is a resolution need further down in the document
5171
 */
5172
5173
static void
5174
3
xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5175
3
    xmlChar *URL = NULL;
5176
3
    const xmlChar *tmp, *base;
5177
3
    xmlChar marker;
5178
5179
3
    tmp = catalog;
5180
3
    while (IS_BLANK_CH(*tmp)) tmp++;
5181
3
    if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5182
3
  goto error;
5183
0
    tmp += 7;
5184
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5185
0
    if (*tmp != '=') {
5186
0
  return;
5187
0
    }
5188
0
    tmp++;
5189
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5190
0
    marker = *tmp;
5191
0
    if ((marker != '\'') && (marker != '"'))
5192
0
  goto error;
5193
0
    tmp++;
5194
0
    base = tmp;
5195
0
    while ((*tmp != 0) && (*tmp != marker)) tmp++;
5196
0
    if (*tmp == 0)
5197
0
  goto error;
5198
0
    URL = xmlStrndup(base, tmp - base);
5199
0
    tmp++;
5200
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5201
0
    if (*tmp != 0)
5202
0
  goto error;
5203
5204
0
    if (URL != NULL) {
5205
0
  ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5206
0
  xmlFree(URL);
5207
0
    }
5208
0
    return;
5209
5210
3
error:
5211
3
    xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5212
3
            "Catalog PI syntax error: %s\n",
5213
3
      catalog, NULL);
5214
3
    if (URL != NULL)
5215
0
  xmlFree(URL);
5216
3
}
5217
#endif
5218
5219
/**
5220
 * xmlParsePI:
5221
 * @ctxt:  an XML parser context
5222
 *
5223
 * DEPRECATED: Internal function, don't use.
5224
 *
5225
 * parse an XML Processing Instruction.
5226
 *
5227
 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5228
 *
5229
 * The processing is transferred to SAX once parsed.
5230
 */
5231
5232
void
5233
26.1k
xmlParsePI(xmlParserCtxtPtr ctxt) {
5234
26.1k
    xmlChar *buf = NULL;
5235
26.1k
    size_t len = 0;
5236
26.1k
    size_t size = XML_PARSER_BUFFER_SIZE;
5237
26.1k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5238
1.22k
                       XML_MAX_HUGE_LENGTH :
5239
26.1k
                       XML_MAX_TEXT_LENGTH;
5240
26.1k
    int cur, l;
5241
26.1k
    const xmlChar *target;
5242
26.1k
    xmlParserInputState state;
5243
26.1k
    int count = 0;
5244
5245
26.1k
    if ((RAW == '<') && (NXT(1) == '?')) {
5246
26.1k
  int inputid = ctxt->input->id;
5247
26.1k
  state = ctxt->instate;
5248
26.1k
        ctxt->instate = XML_PARSER_PI;
5249
  /*
5250
   * this is a Processing Instruction.
5251
   */
5252
26.1k
  SKIP(2);
5253
26.1k
  SHRINK;
5254
5255
  /*
5256
   * Parse the target name and check for special support like
5257
   * namespace.
5258
   */
5259
26.1k
        target = xmlParsePITarget(ctxt);
5260
26.1k
  if (target != NULL) {
5261
25.8k
      if ((RAW == '?') && (NXT(1) == '>')) {
5262
284
    if (inputid != ctxt->input->id) {
5263
0
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5264
0
                             "PI declaration doesn't start and stop in"
5265
0
                                   " the same entity\n");
5266
0
    }
5267
284
    SKIP(2);
5268
5269
    /*
5270
     * SAX: PI detected.
5271
     */
5272
284
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5273
284
        (ctxt->sax->processingInstruction != NULL))
5274
253
        ctxt->sax->processingInstruction(ctxt->userData,
5275
253
                                         target, NULL);
5276
284
    if (ctxt->instate != XML_PARSER_EOF)
5277
284
        ctxt->instate = state;
5278
284
    return;
5279
284
      }
5280
25.6k
      buf = (xmlChar *) xmlMallocAtomic(size);
5281
25.6k
      if (buf == NULL) {
5282
0
    xmlErrMemory(ctxt, NULL);
5283
0
    ctxt->instate = state;
5284
0
    return;
5285
0
      }
5286
25.6k
      if (SKIP_BLANKS == 0) {
5287
1.29k
    xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5288
1.29k
        "ParsePI: PI %s space expected\n", target);
5289
1.29k
      }
5290
25.6k
      cur = CUR_CHAR(l);
5291
1.63M
      while (IS_CHAR(cur) && /* checked */
5292
1.63M
       ((cur != '?') || (NXT(1) != '>'))) {
5293
1.61M
    if (len + 5 >= size) {
5294
1.03k
        xmlChar *tmp;
5295
1.03k
                    size_t new_size = size * 2;
5296
1.03k
        tmp = (xmlChar *) xmlRealloc(buf, new_size);
5297
1.03k
        if (tmp == NULL) {
5298
0
      xmlErrMemory(ctxt, NULL);
5299
0
      xmlFree(buf);
5300
0
      ctxt->instate = state;
5301
0
      return;
5302
0
        }
5303
1.03k
        buf = tmp;
5304
1.03k
                    size = new_size;
5305
1.03k
    }
5306
1.61M
    count++;
5307
1.61M
    if (count > 50) {
5308
25.7k
        SHRINK;
5309
25.7k
        GROW;
5310
25.7k
                    if (ctxt->instate == XML_PARSER_EOF) {
5311
0
                        xmlFree(buf);
5312
0
                        return;
5313
0
                    }
5314
25.7k
        count = 0;
5315
25.7k
    }
5316
1.61M
    COPY_BUF(l,buf,len,cur);
5317
1.61M
    NEXTL(l);
5318
1.61M
    cur = CUR_CHAR(l);
5319
1.61M
    if (cur == 0) {
5320
565
        SHRINK;
5321
565
        GROW;
5322
565
        cur = CUR_CHAR(l);
5323
565
    }
5324
1.61M
                if (len > maxLength) {
5325
0
                    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5326
0
                                      "PI %s too big found", target);
5327
0
                    xmlFree(buf);
5328
0
                    ctxt->instate = state;
5329
0
                    return;
5330
0
                }
5331
1.61M
      }
5332
25.6k
      buf[len] = 0;
5333
25.6k
      if (cur != '?') {
5334
991
    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5335
991
          "ParsePI: PI %s never end ...\n", target);
5336
24.6k
      } else {
5337
24.6k
    if (inputid != ctxt->input->id) {
5338
0
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5339
0
                             "PI declaration doesn't start and stop in"
5340
0
                                   " the same entity\n");
5341
0
    }
5342
24.6k
    SKIP(2);
5343
5344
24.6k
#ifdef LIBXML_CATALOG_ENABLED
5345
24.6k
    if (((state == XML_PARSER_MISC) ||
5346
24.6k
               (state == XML_PARSER_START)) &&
5347
24.6k
        (xmlStrEqual(target, XML_CATALOG_PI))) {
5348
3
        xmlCatalogAllow allow = xmlCatalogGetDefaults();
5349
3
        if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5350
3
      (allow == XML_CATA_ALLOW_ALL))
5351
3
      xmlParseCatalogPI(ctxt, buf);
5352
3
    }
5353
24.6k
#endif
5354
5355
5356
    /*
5357
     * SAX: PI detected.
5358
     */
5359
24.6k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5360
24.6k
        (ctxt->sax->processingInstruction != NULL))
5361
21.9k
        ctxt->sax->processingInstruction(ctxt->userData,
5362
21.9k
                                         target, buf);
5363
24.6k
      }
5364
25.6k
      xmlFree(buf);
5365
25.6k
  } else {
5366
217
      xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5367
217
  }
5368
25.8k
  if (ctxt->instate != XML_PARSER_EOF)
5369
25.8k
      ctxt->instate = state;
5370
25.8k
    }
5371
26.1k
}
5372
5373
/**
5374
 * xmlParseNotationDecl:
5375
 * @ctxt:  an XML parser context
5376
 *
5377
 * DEPRECATED: Internal function, don't use.
5378
 *
5379
 * Parse a notation declaration. Always consumes '<!'.
5380
 *
5381
 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>'
5382
 *
5383
 * Hence there is actually 3 choices:
5384
 *     'PUBLIC' S PubidLiteral
5385
 *     'PUBLIC' S PubidLiteral S SystemLiteral
5386
 * and 'SYSTEM' S SystemLiteral
5387
 *
5388
 * See the NOTE on xmlParseExternalID().
5389
 */
5390
5391
void
5392
256
xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5393
256
    const xmlChar *name;
5394
256
    xmlChar *Pubid;
5395
256
    xmlChar *Systemid;
5396
5397
256
    if ((CUR != '<') || (NXT(1) != '!'))
5398
0
        return;
5399
256
    SKIP(2);
5400
5401
256
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5402
171
  int inputid = ctxt->input->id;
5403
171
  SHRINK;
5404
171
  SKIP(8);
5405
171
  if (SKIP_BLANKS == 0) {
5406
23
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5407
23
         "Space required after '<!NOTATION'\n");
5408
23
      return;
5409
23
  }
5410
5411
148
        name = xmlParseName(ctxt);
5412
148
  if (name == NULL) {
5413
10
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5414
10
      return;
5415
10
  }
5416
138
  if (xmlStrchr(name, ':') != NULL) {
5417
5
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5418
5
         "colons are forbidden from notation names '%s'\n",
5419
5
         name, NULL, NULL);
5420
5
  }
5421
138
  if (SKIP_BLANKS == 0) {
5422
14
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5423
14
         "Space required after the NOTATION name'\n");
5424
14
      return;
5425
14
  }
5426
5427
  /*
5428
   * Parse the IDs.
5429
   */
5430
124
  Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5431
124
  SKIP_BLANKS;
5432
5433
124
  if (RAW == '>') {
5434
77
      if (inputid != ctxt->input->id) {
5435
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5436
0
                         "Notation declaration doesn't start and stop"
5437
0
                               " in the same entity\n");
5438
0
      }
5439
77
      NEXT;
5440
77
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5441
77
    (ctxt->sax->notationDecl != NULL))
5442
46
    ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5443
77
  } else {
5444
47
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5445
47
  }
5446
124
  if (Systemid != NULL) xmlFree(Systemid);
5447
124
  if (Pubid != NULL) xmlFree(Pubid);
5448
124
    }
5449
256
}
5450
5451
/**
5452
 * xmlParseEntityDecl:
5453
 * @ctxt:  an XML parser context
5454
 *
5455
 * DEPRECATED: Internal function, don't use.
5456
 *
5457
 * Parse an entity declaration. Always consumes '<!'.
5458
 *
5459
 * [70] EntityDecl ::= GEDecl | PEDecl
5460
 *
5461
 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5462
 *
5463
 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5464
 *
5465
 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5466
 *
5467
 * [74] PEDef ::= EntityValue | ExternalID
5468
 *
5469
 * [76] NDataDecl ::= S 'NDATA' S Name
5470
 *
5471
 * [ VC: Notation Declared ]
5472
 * The Name must match the declared name of a notation.
5473
 */
5474
5475
void
5476
280k
xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5477
280k
    const xmlChar *name = NULL;
5478
280k
    xmlChar *value = NULL;
5479
280k
    xmlChar *URI = NULL, *literal = NULL;
5480
280k
    const xmlChar *ndata = NULL;
5481
280k
    int isParameter = 0;
5482
280k
    xmlChar *orig = NULL;
5483
5484
280k
    if ((CUR != '<') || (NXT(1) != '!'))
5485
0
        return;
5486
280k
    SKIP(2);
5487
5488
    /* GROW; done in the caller */
5489
280k
    if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5490
280k
  int inputid = ctxt->input->id;
5491
280k
  SHRINK;
5492
280k
  SKIP(6);
5493
280k
  if (SKIP_BLANKS == 0) {
5494
280
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5495
280
         "Space required after '<!ENTITY'\n");
5496
280
  }
5497
5498
280k
  if (RAW == '%') {
5499
185k
      NEXT;
5500
185k
      if (SKIP_BLANKS == 0) {
5501
24
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5502
24
             "Space required after '%%'\n");
5503
24
      }
5504
185k
      isParameter = 1;
5505
185k
  }
5506
5507
280k
        name = xmlParseName(ctxt);
5508
280k
  if (name == NULL) {
5509
244
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5510
244
                     "xmlParseEntityDecl: no name\n");
5511
244
            return;
5512
244
  }
5513
279k
  if (xmlStrchr(name, ':') != NULL) {
5514
22
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5515
22
         "colons are forbidden from entities names '%s'\n",
5516
22
         name, NULL, NULL);
5517
22
  }
5518
279k
  if (SKIP_BLANKS == 0) {
5519
341
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5520
341
         "Space required after the entity name\n");
5521
341
  }
5522
5523
279k
  ctxt->instate = XML_PARSER_ENTITY_DECL;
5524
  /*
5525
   * handle the various case of definitions...
5526
   */
5527
279k
  if (isParameter) {
5528
185k
      if ((RAW == '"') || (RAW == '\'')) {
5529
184k
          value = xmlParseEntityValue(ctxt, &orig);
5530
184k
    if (value) {
5531
184k
        if ((ctxt->sax != NULL) &&
5532
184k
      (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5533
177k
      ctxt->sax->entityDecl(ctxt->userData, name,
5534
177k
                        XML_INTERNAL_PARAMETER_ENTITY,
5535
177k
            NULL, NULL, value);
5536
184k
    }
5537
184k
      } else {
5538
692
          URI = xmlParseExternalID(ctxt, &literal, 1);
5539
692
    if ((URI == NULL) && (literal == NULL)) {
5540
87
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5541
87
    }
5542
692
    if (URI) {
5543
602
        xmlURIPtr uri;
5544
5545
602
        uri = xmlParseURI((const char *) URI);
5546
602
        if (uri == NULL) {
5547
4
            xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5548
4
             "Invalid URI: %s\n", URI);
5549
      /*
5550
       * This really ought to be a well formedness error
5551
       * but the XML Core WG decided otherwise c.f. issue
5552
       * E26 of the XML erratas.
5553
       */
5554
598
        } else {
5555
598
      if (uri->fragment != NULL) {
5556
          /*
5557
           * Okay this is foolish to block those but not
5558
           * invalid URIs.
5559
           */
5560
0
          xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5561
598
      } else {
5562
598
          if ((ctxt->sax != NULL) &&
5563
598
        (!ctxt->disableSAX) &&
5564
598
        (ctxt->sax->entityDecl != NULL))
5565
578
        ctxt->sax->entityDecl(ctxt->userData, name,
5566
578
              XML_EXTERNAL_PARAMETER_ENTITY,
5567
578
              literal, URI, NULL);
5568
598
      }
5569
598
      xmlFreeURI(uri);
5570
598
        }
5571
602
    }
5572
692
      }
5573
185k
  } else {
5574
94.4k
      if ((RAW == '"') || (RAW == '\'')) {
5575
92.9k
          value = xmlParseEntityValue(ctxt, &orig);
5576
92.9k
    if ((ctxt->sax != NULL) &&
5577
92.9k
        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5578
86.4k
        ctxt->sax->entityDecl(ctxt->userData, name,
5579
86.4k
        XML_INTERNAL_GENERAL_ENTITY,
5580
86.4k
        NULL, NULL, value);
5581
    /*
5582
     * For expat compatibility in SAX mode.
5583
     */
5584
92.9k
    if ((ctxt->myDoc == NULL) ||
5585
92.9k
        (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5586
748
        if (ctxt->myDoc == NULL) {
5587
176
      ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5588
176
      if (ctxt->myDoc == NULL) {
5589
0
          xmlErrMemory(ctxt, "New Doc failed");
5590
0
          return;
5591
0
      }
5592
176
      ctxt->myDoc->properties = XML_DOC_INTERNAL;
5593
176
        }
5594
748
        if (ctxt->myDoc->intSubset == NULL)
5595
176
      ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5596
176
              BAD_CAST "fake", NULL, NULL);
5597
5598
748
        xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5599
748
                    NULL, NULL, value);
5600
748
    }
5601
92.9k
      } else {
5602
1.52k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5603
1.52k
    if ((URI == NULL) && (literal == NULL)) {
5604
344
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5605
344
    }
5606
1.52k
    if (URI) {
5607
1.17k
        xmlURIPtr uri;
5608
5609
1.17k
        uri = xmlParseURI((const char *)URI);
5610
1.17k
        if (uri == NULL) {
5611
71
            xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5612
71
             "Invalid URI: %s\n", URI);
5613
      /*
5614
       * This really ought to be a well formedness error
5615
       * but the XML Core WG decided otherwise c.f. issue
5616
       * E26 of the XML erratas.
5617
       */
5618
1.10k
        } else {
5619
1.10k
      if (uri->fragment != NULL) {
5620
          /*
5621
           * Okay this is foolish to block those but not
5622
           * invalid URIs.
5623
           */
5624
15
          xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5625
15
      }
5626
1.10k
      xmlFreeURI(uri);
5627
1.10k
        }
5628
1.17k
    }
5629
1.52k
    if ((RAW != '>') && (SKIP_BLANKS == 0)) {
5630
359
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5631
359
           "Space required before 'NDATA'\n");
5632
359
    }
5633
1.52k
    if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5634
63
        SKIP(5);
5635
63
        if (SKIP_BLANKS == 0) {
5636
8
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5637
8
               "Space required after 'NDATA'\n");
5638
8
        }
5639
63
        ndata = xmlParseName(ctxt);
5640
63
        if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5641
63
            (ctxt->sax->unparsedEntityDecl != NULL))
5642
50
      ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5643
50
            literal, URI, ndata);
5644
1.46k
    } else {
5645
1.46k
        if ((ctxt->sax != NULL) &&
5646
1.46k
            (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5647
1.10k
      ctxt->sax->entityDecl(ctxt->userData, name,
5648
1.10k
            XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5649
1.10k
            literal, URI, NULL);
5650
        /*
5651
         * For expat compatibility in SAX mode.
5652
         * assuming the entity replacement was asked for
5653
         */
5654
1.46k
        if ((ctxt->replaceEntities != 0) &&
5655
1.46k
      ((ctxt->myDoc == NULL) ||
5656
860
      (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5657
34
      if (ctxt->myDoc == NULL) {
5658
21
          ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5659
21
          if (ctxt->myDoc == NULL) {
5660
0
              xmlErrMemory(ctxt, "New Doc failed");
5661
0
        return;
5662
0
          }
5663
21
          ctxt->myDoc->properties = XML_DOC_INTERNAL;
5664
21
      }
5665
5666
34
      if (ctxt->myDoc->intSubset == NULL)
5667
21
          ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5668
21
            BAD_CAST "fake", NULL, NULL);
5669
34
      xmlSAX2EntityDecl(ctxt, name,
5670
34
                  XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5671
34
                  literal, URI, NULL);
5672
34
        }
5673
1.46k
    }
5674
1.52k
      }
5675
94.4k
  }
5676
279k
  if (ctxt->instate == XML_PARSER_EOF)
5677
12
      goto done;
5678
279k
  SKIP_BLANKS;
5679
279k
  if (RAW != '>') {
5680
995
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5681
995
              "xmlParseEntityDecl: entity %s not terminated\n", name);
5682
995
      xmlHaltParser(ctxt);
5683
278k
  } else {
5684
278k
      if (inputid != ctxt->input->id) {
5685
3
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5686
3
                         "Entity declaration doesn't start and stop in"
5687
3
                               " the same entity\n");
5688
3
      }
5689
278k
      NEXT;
5690
278k
  }
5691
279k
  if (orig != NULL) {
5692
      /*
5693
       * Ugly mechanism to save the raw entity value.
5694
       */
5695
276k
      xmlEntityPtr cur = NULL;
5696
5697
276k
      if (isParameter) {
5698
184k
          if ((ctxt->sax != NULL) &&
5699
184k
        (ctxt->sax->getParameterEntity != NULL))
5700
184k
        cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5701
184k
      } else {
5702
92.4k
          if ((ctxt->sax != NULL) &&
5703
92.4k
        (ctxt->sax->getEntity != NULL))
5704
92.4k
        cur = ctxt->sax->getEntity(ctxt->userData, name);
5705
92.4k
    if ((cur == NULL) && (ctxt->userData==ctxt)) {
5706
5.18k
        cur = xmlSAX2GetEntity(ctxt, name);
5707
5.18k
    }
5708
92.4k
      }
5709
276k
            if ((cur != NULL) && (cur->orig == NULL)) {
5710
264k
    cur->orig = orig;
5711
264k
                orig = NULL;
5712
264k
      }
5713
276k
  }
5714
5715
279k
done:
5716
279k
  if (value != NULL) xmlFree(value);
5717
279k
  if (URI != NULL) xmlFree(URI);
5718
279k
  if (literal != NULL) xmlFree(literal);
5719
279k
        if (orig != NULL) xmlFree(orig);
5720
279k
    }
5721
280k
}
5722
5723
/**
5724
 * xmlParseDefaultDecl:
5725
 * @ctxt:  an XML parser context
5726
 * @value:  Receive a possible fixed default value for the attribute
5727
 *
5728
 * DEPRECATED: Internal function, don't use.
5729
 *
5730
 * Parse an attribute default declaration
5731
 *
5732
 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5733
 *
5734
 * [ VC: Required Attribute ]
5735
 * if the default declaration is the keyword #REQUIRED, then the
5736
 * attribute must be specified for all elements of the type in the
5737
 * attribute-list declaration.
5738
 *
5739
 * [ VC: Attribute Default Legal ]
5740
 * The declared default value must meet the lexical constraints of
5741
 * the declared attribute type c.f. xmlValidateAttributeDecl()
5742
 *
5743
 * [ VC: Fixed Attribute Default ]
5744
 * if an attribute has a default value declared with the #FIXED
5745
 * keyword, instances of that attribute must match the default value.
5746
 *
5747
 * [ WFC: No < in Attribute Values ]
5748
 * handled in xmlParseAttValue()
5749
 *
5750
 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5751
 *          or XML_ATTRIBUTE_FIXED.
5752
 */
5753
5754
int
5755
1.08M
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5756
1.08M
    int val;
5757
1.08M
    xmlChar *ret;
5758
5759
1.08M
    *value = NULL;
5760
1.08M
    if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5761
84.9k
  SKIP(9);
5762
84.9k
  return(XML_ATTRIBUTE_REQUIRED);
5763
84.9k
    }
5764
996k
    if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5765
911k
  SKIP(8);
5766
911k
  return(XML_ATTRIBUTE_IMPLIED);
5767
911k
    }
5768
84.6k
    val = XML_ATTRIBUTE_NONE;
5769
84.6k
    if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5770
61.8k
  SKIP(6);
5771
61.8k
  val = XML_ATTRIBUTE_FIXED;
5772
61.8k
  if (SKIP_BLANKS == 0) {
5773
17
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5774
17
         "Space required after '#FIXED'\n");
5775
17
  }
5776
61.8k
    }
5777
84.6k
    ret = xmlParseAttValue(ctxt);
5778
84.6k
    ctxt->instate = XML_PARSER_DTD;
5779
84.6k
    if (ret == NULL) {
5780
363
  xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5781
363
           "Attribute default value declaration error\n");
5782
363
    } else
5783
84.2k
        *value = ret;
5784
84.6k
    return(val);
5785
996k
}
5786
5787
/**
5788
 * xmlParseNotationType:
5789
 * @ctxt:  an XML parser context
5790
 *
5791
 * DEPRECATED: Internal function, don't use.
5792
 *
5793
 * parse an Notation attribute type.
5794
 *
5795
 * Note: the leading 'NOTATION' S part has already being parsed...
5796
 *
5797
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5798
 *
5799
 * [ VC: Notation Attributes ]
5800
 * Values of this type must match one of the notation names included
5801
 * in the declaration; all notation names in the declaration must be declared.
5802
 *
5803
 * Returns: the notation attribute tree built while parsing
5804
 */
5805
5806
xmlEnumerationPtr
5807
44
xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5808
44
    const xmlChar *name;
5809
44
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5810
5811
44
    if (RAW != '(') {
5812
3
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5813
3
  return(NULL);
5814
3
    }
5815
41
    SHRINK;
5816
41
    do {
5817
41
        NEXT;
5818
41
  SKIP_BLANKS;
5819
41
        name = xmlParseName(ctxt);
5820
41
  if (name == NULL) {
5821
0
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5822
0
         "Name expected in NOTATION declaration\n");
5823
0
            xmlFreeEnumeration(ret);
5824
0
      return(NULL);
5825
0
  }
5826
41
  tmp = ret;
5827
41
  while (tmp != NULL) {
5828
0
      if (xmlStrEqual(name, tmp->name)) {
5829
0
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5830
0
    "standalone: attribute notation value token %s duplicated\n",
5831
0
         name, NULL);
5832
0
    if (!xmlDictOwns(ctxt->dict, name))
5833
0
        xmlFree((xmlChar *) name);
5834
0
    break;
5835
0
      }
5836
0
      tmp = tmp->next;
5837
0
  }
5838
41
  if (tmp == NULL) {
5839
41
      cur = xmlCreateEnumeration(name);
5840
41
      if (cur == NULL) {
5841
0
                xmlFreeEnumeration(ret);
5842
0
                return(NULL);
5843
0
            }
5844
41
      if (last == NULL) ret = last = cur;
5845
0
      else {
5846
0
    last->next = cur;
5847
0
    last = cur;
5848
0
      }
5849
41
  }
5850
41
  SKIP_BLANKS;
5851
41
    } while (RAW == '|');
5852
41
    if (RAW != ')') {
5853
0
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5854
0
        xmlFreeEnumeration(ret);
5855
0
  return(NULL);
5856
0
    }
5857
41
    NEXT;
5858
41
    return(ret);
5859
41
}
5860
5861
/**
5862
 * xmlParseEnumerationType:
5863
 * @ctxt:  an XML parser context
5864
 *
5865
 * DEPRECATED: Internal function, don't use.
5866
 *
5867
 * parse an Enumeration attribute type.
5868
 *
5869
 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5870
 *
5871
 * [ VC: Enumeration ]
5872
 * Values of this type must match one of the Nmtoken tokens in
5873
 * the declaration
5874
 *
5875
 * Returns: the enumeration attribute tree built while parsing
5876
 */
5877
5878
xmlEnumerationPtr
5879
86.0k
xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5880
86.0k
    xmlChar *name;
5881
86.0k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5882
5883
86.0k
    if (RAW != '(') {
5884
361
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5885
361
  return(NULL);
5886
361
    }
5887
85.6k
    SHRINK;
5888
311k
    do {
5889
311k
        NEXT;
5890
311k
  SKIP_BLANKS;
5891
311k
        name = xmlParseNmtoken(ctxt);
5892
311k
  if (name == NULL) {
5893
26
      xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5894
26
      return(ret);
5895
26
  }
5896
311k
  tmp = ret;
5897
860k
  while (tmp != NULL) {
5898
548k
      if (xmlStrEqual(name, tmp->name)) {
5899
3
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5900
3
    "standalone: attribute enumeration value token %s duplicated\n",
5901
3
         name, NULL);
5902
3
    if (!xmlDictOwns(ctxt->dict, name))
5903
3
        xmlFree(name);
5904
3
    break;
5905
3
      }
5906
548k
      tmp = tmp->next;
5907
548k
  }
5908
311k
  if (tmp == NULL) {
5909
311k
      cur = xmlCreateEnumeration(name);
5910
311k
      if (!xmlDictOwns(ctxt->dict, name))
5911
311k
    xmlFree(name);
5912
311k
      if (cur == NULL) {
5913
0
                xmlFreeEnumeration(ret);
5914
0
                return(NULL);
5915
0
            }
5916
311k
      if (last == NULL) ret = last = cur;
5917
225k
      else {
5918
225k
    last->next = cur;
5919
225k
    last = cur;
5920
225k
      }
5921
311k
  }
5922
311k
  SKIP_BLANKS;
5923
311k
    } while (RAW == '|');
5924
85.6k
    if (RAW != ')') {
5925
68
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5926
68
  return(ret);
5927
68
    }
5928
85.5k
    NEXT;
5929
85.5k
    return(ret);
5930
85.6k
}
5931
5932
/**
5933
 * xmlParseEnumeratedType:
5934
 * @ctxt:  an XML parser context
5935
 * @tree:  the enumeration tree built while parsing
5936
 *
5937
 * DEPRECATED: Internal function, don't use.
5938
 *
5939
 * parse an Enumerated attribute type.
5940
 *
5941
 * [57] EnumeratedType ::= NotationType | Enumeration
5942
 *
5943
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5944
 *
5945
 *
5946
 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5947
 */
5948
5949
int
5950
86.0k
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5951
86.0k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5952
47
  SKIP(8);
5953
47
  if (SKIP_BLANKS == 0) {
5954
3
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5955
3
         "Space required after 'NOTATION'\n");
5956
3
      return(0);
5957
3
  }
5958
44
  *tree = xmlParseNotationType(ctxt);
5959
44
  if (*tree == NULL) return(0);
5960
41
  return(XML_ATTRIBUTE_NOTATION);
5961
44
    }
5962
86.0k
    *tree = xmlParseEnumerationType(ctxt);
5963
86.0k
    if (*tree == NULL) return(0);
5964
85.6k
    return(XML_ATTRIBUTE_ENUMERATION);
5965
86.0k
}
5966
5967
/**
5968
 * xmlParseAttributeType:
5969
 * @ctxt:  an XML parser context
5970
 * @tree:  the enumeration tree built while parsing
5971
 *
5972
 * DEPRECATED: Internal function, don't use.
5973
 *
5974
 * parse the Attribute list def for an element
5975
 *
5976
 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5977
 *
5978
 * [55] StringType ::= 'CDATA'
5979
 *
5980
 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5981
 *                        'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5982
 *
5983
 * Validity constraints for attribute values syntax are checked in
5984
 * xmlValidateAttributeValue()
5985
 *
5986
 * [ VC: ID ]
5987
 * Values of type ID must match the Name production. A name must not
5988
 * appear more than once in an XML document as a value of this type;
5989
 * i.e., ID values must uniquely identify the elements which bear them.
5990
 *
5991
 * [ VC: One ID per Element Type ]
5992
 * No element type may have more than one ID attribute specified.
5993
 *
5994
 * [ VC: ID Attribute Default ]
5995
 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5996
 *
5997
 * [ VC: IDREF ]
5998
 * Values of type IDREF must match the Name production, and values
5999
 * of type IDREFS must match Names; each IDREF Name must match the value
6000
 * of an ID attribute on some element in the XML document; i.e. IDREF
6001
 * values must match the value of some ID attribute.
6002
 *
6003
 * [ VC: Entity Name ]
6004
 * Values of type ENTITY must match the Name production, values
6005
 * of type ENTITIES must match Names; each Entity Name must match the
6006
 * name of an unparsed entity declared in the DTD.
6007
 *
6008
 * [ VC: Name Token ]
6009
 * Values of type NMTOKEN must match the Nmtoken production; values
6010
 * of type NMTOKENS must match Nmtokens.
6011
 *
6012
 * Returns the attribute type
6013
 */
6014
int
6015
1.08M
xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6016
1.08M
    SHRINK;
6017
1.08M
    if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
6018
361k
  SKIP(5);
6019
361k
  return(XML_ATTRIBUTE_CDATA);
6020
720k
     } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
6021
4.55k
  SKIP(6);
6022
4.55k
  return(XML_ATTRIBUTE_IDREFS);
6023
716k
     } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
6024
19.7k
  SKIP(5);
6025
19.7k
  return(XML_ATTRIBUTE_IDREF);
6026
696k
     } else if ((RAW == 'I') && (NXT(1) == 'D')) {
6027
314k
        SKIP(2);
6028
314k
  return(XML_ATTRIBUTE_ID);
6029
381k
     } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
6030
1.33k
  SKIP(6);
6031
1.33k
  return(XML_ATTRIBUTE_ENTITY);
6032
380k
     } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
6033
29
  SKIP(8);
6034
29
  return(XML_ATTRIBUTE_ENTITIES);
6035
380k
     } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
6036
20.3k
  SKIP(8);
6037
20.3k
  return(XML_ATTRIBUTE_NMTOKENS);
6038
359k
     } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
6039
273k
  SKIP(7);
6040
273k
  return(XML_ATTRIBUTE_NMTOKEN);
6041
273k
     }
6042
86.0k
     return(xmlParseEnumeratedType(ctxt, tree));
6043
1.08M
}
6044
6045
/**
6046
 * xmlParseAttributeListDecl:
6047
 * @ctxt:  an XML parser context
6048
 *
6049
 * DEPRECATED: Internal function, don't use.
6050
 *
6051
 * Parse an attribute list declaration for an element. Always consumes '<!'.
6052
 *
6053
 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
6054
 *
6055
 * [53] AttDef ::= S Name S AttType S DefaultDecl
6056
 *
6057
 */
6058
void
6059
347k
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
6060
347k
    const xmlChar *elemName;
6061
347k
    const xmlChar *attrName;
6062
347k
    xmlEnumerationPtr tree;
6063
6064
347k
    if ((CUR != '<') || (NXT(1) != '!'))
6065
0
        return;
6066
347k
    SKIP(2);
6067
6068
347k
    if (CMP7(CUR_PTR, 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
6069
347k
  int inputid = ctxt->input->id;
6070
6071
347k
  SKIP(7);
6072
347k
  if (SKIP_BLANKS == 0) {
6073
102
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6074
102
                     "Space required after '<!ATTLIST'\n");
6075
102
  }
6076
347k
        elemName = xmlParseName(ctxt);
6077
347k
  if (elemName == NULL) {
6078
102
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6079
102
         "ATTLIST: no name for Element\n");
6080
102
      return;
6081
102
  }
6082
347k
  SKIP_BLANKS;
6083
347k
  GROW;
6084
1.42M
  while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
6085
1.08M
      int type;
6086
1.08M
      int def;
6087
1.08M
      xmlChar *defaultValue = NULL;
6088
6089
1.08M
      GROW;
6090
1.08M
            tree = NULL;
6091
1.08M
      attrName = xmlParseName(ctxt);
6092
1.08M
      if (attrName == NULL) {
6093
244
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6094
244
             "ATTLIST: no name for Attribute\n");
6095
244
    break;
6096
244
      }
6097
1.08M
      GROW;
6098
1.08M
      if (SKIP_BLANKS == 0) {
6099
137
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6100
137
            "Space required after the attribute name\n");
6101
137
    break;
6102
137
      }
6103
6104
1.08M
      type = xmlParseAttributeType(ctxt, &tree);
6105
1.08M
      if (type <= 0) {
6106
387
          break;
6107
387
      }
6108
6109
1.08M
      GROW;
6110
1.08M
      if (SKIP_BLANKS == 0) {
6111
182
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6112
182
             "Space required after the attribute type\n");
6113
182
          if (tree != NULL)
6114
78
        xmlFreeEnumeration(tree);
6115
182
    break;
6116
182
      }
6117
6118
1.08M
      def = xmlParseDefaultDecl(ctxt, &defaultValue);
6119
1.08M
      if (def <= 0) {
6120
0
                if (defaultValue != NULL)
6121
0
        xmlFree(defaultValue);
6122
0
          if (tree != NULL)
6123
0
        xmlFreeEnumeration(tree);
6124
0
          break;
6125
0
      }
6126
1.08M
      if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6127
27.0k
          xmlAttrNormalizeSpace(defaultValue, defaultValue);
6128
6129
1.08M
      GROW;
6130
1.08M
            if (RAW != '>') {
6131
1.04M
    if (SKIP_BLANKS == 0) {
6132
594
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6133
594
      "Space required after the attribute default value\n");
6134
594
        if (defaultValue != NULL)
6135
213
      xmlFree(defaultValue);
6136
594
        if (tree != NULL)
6137
35
      xmlFreeEnumeration(tree);
6138
594
        break;
6139
594
    }
6140
1.04M
      }
6141
1.08M
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6142
1.08M
    (ctxt->sax->attributeDecl != NULL))
6143
1.01M
    ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6144
1.01M
                          type, def, defaultValue, tree);
6145
62.8k
      else if (tree != NULL)
6146
4.40k
    xmlFreeEnumeration(tree);
6147
6148
1.08M
      if ((ctxt->sax2) && (defaultValue != NULL) &&
6149
1.08M
          (def != XML_ATTRIBUTE_IMPLIED) &&
6150
1.08M
    (def != XML_ATTRIBUTE_REQUIRED)) {
6151
65.4k
    xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6152
65.4k
      }
6153
1.08M
      if (ctxt->sax2) {
6154
883k
    xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6155
883k
      }
6156
1.08M
      if (defaultValue != NULL)
6157
84.0k
          xmlFree(defaultValue);
6158
1.08M
      GROW;
6159
1.08M
  }
6160
347k
  if (RAW == '>') {
6161
345k
      if (inputid != ctxt->input->id) {
6162
8
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6163
8
                               "Attribute list declaration doesn't start and"
6164
8
                               " stop in the same entity\n");
6165
8
      }
6166
345k
      NEXT;
6167
345k
  }
6168
347k
    }
6169
347k
}
6170
6171
/**
6172
 * xmlParseElementMixedContentDecl:
6173
 * @ctxt:  an XML parser context
6174
 * @inputchk:  the input used for the current entity, needed for boundary checks
6175
 *
6176
 * DEPRECATED: Internal function, don't use.
6177
 *
6178
 * parse the declaration for a Mixed Element content
6179
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6180
 *
6181
 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6182
 *                '(' S? '#PCDATA' S? ')'
6183
 *
6184
 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6185
 *
6186
 * [ VC: No Duplicate Types ]
6187
 * The same name must not appear more than once in a single
6188
 * mixed-content declaration.
6189
 *
6190
 * returns: the list of the xmlElementContentPtr describing the element choices
6191
 */
6192
xmlElementContentPtr
6193
168k
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6194
168k
    xmlElementContentPtr ret = NULL, cur = NULL, n;
6195
168k
    const xmlChar *elem = NULL;
6196
6197
168k
    GROW;
6198
168k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6199
168k
  SKIP(7);
6200
168k
  SKIP_BLANKS;
6201
168k
  SHRINK;
6202
168k
  if (RAW == ')') {
6203
90.8k
      if (ctxt->input->id != inputchk) {
6204
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6205
0
                               "Element content declaration doesn't start and"
6206
0
                               " stop in the same entity\n");
6207
0
      }
6208
90.8k
      NEXT;
6209
90.8k
      ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6210
90.8k
      if (ret == NULL)
6211
0
          return(NULL);
6212
90.8k
      if (RAW == '*') {
6213
3
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6214
3
    NEXT;
6215
3
      }
6216
90.8k
      return(ret);
6217
90.8k
  }
6218
77.8k
  if ((RAW == '(') || (RAW == '|')) {
6219
77.8k
      ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6220
77.8k
      if (ret == NULL) return(NULL);
6221
77.8k
  }
6222
979k
  while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6223
901k
      NEXT;
6224
901k
      if (elem == NULL) {
6225
77.8k
          ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6226
77.8k
    if (ret == NULL) {
6227
0
        xmlFreeDocElementContent(ctxt->myDoc, cur);
6228
0
                    return(NULL);
6229
0
                }
6230
77.8k
    ret->c1 = cur;
6231
77.8k
    if (cur != NULL)
6232
77.8k
        cur->parent = ret;
6233
77.8k
    cur = ret;
6234
823k
      } else {
6235
823k
          n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6236
823k
    if (n == NULL) {
6237
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6238
0
                    return(NULL);
6239
0
                }
6240
823k
    n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6241
823k
    if (n->c1 != NULL)
6242
823k
        n->c1->parent = n;
6243
823k
          cur->c2 = n;
6244
823k
    if (n != NULL)
6245
823k
        n->parent = cur;
6246
823k
    cur = n;
6247
823k
      }
6248
901k
      SKIP_BLANKS;
6249
901k
      elem = xmlParseName(ctxt);
6250
901k
      if (elem == NULL) {
6251
57
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6252
57
      "xmlParseElementMixedContentDecl : Name expected\n");
6253
57
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6254
57
    return(NULL);
6255
57
      }
6256
901k
      SKIP_BLANKS;
6257
901k
      GROW;
6258
901k
  }
6259
77.8k
  if ((RAW == ')') && (NXT(1) == '*')) {
6260
77.7k
      if (elem != NULL) {
6261
77.7k
    cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6262
77.7k
                                   XML_ELEMENT_CONTENT_ELEMENT);
6263
77.7k
    if (cur->c2 != NULL)
6264
77.7k
        cur->c2->parent = cur;
6265
77.7k
            }
6266
77.7k
            if (ret != NULL)
6267
77.7k
                ret->ocur = XML_ELEMENT_CONTENT_MULT;
6268
77.7k
      if (ctxt->input->id != inputchk) {
6269
0
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6270
0
                               "Element content declaration doesn't start and"
6271
0
                               " stop in the same entity\n");
6272
0
      }
6273
77.7k
      SKIP(2);
6274
77.7k
  } else {
6275
123
      xmlFreeDocElementContent(ctxt->myDoc, ret);
6276
123
      xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6277
123
      return(NULL);
6278
123
  }
6279
6280
77.8k
    } else {
6281
0
  xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6282
0
    }
6283
77.7k
    return(ret);
6284
168k
}
6285
6286
/**
6287
 * xmlParseElementChildrenContentDeclPriv:
6288
 * @ctxt:  an XML parser context
6289
 * @inputchk:  the input used for the current entity, needed for boundary checks
6290
 * @depth: the level of recursion
6291
 *
6292
 * parse the declaration for a Mixed Element content
6293
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6294
 *
6295
 *
6296
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6297
 *
6298
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6299
 *
6300
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6301
 *
6302
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6303
 *
6304
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6305
 * TODO Parameter-entity replacement text must be properly nested
6306
 *  with parenthesized groups. That is to say, if either of the
6307
 *  opening or closing parentheses in a choice, seq, or Mixed
6308
 *  construct is contained in the replacement text for a parameter
6309
 *  entity, both must be contained in the same replacement text. For
6310
 *  interoperability, if a parameter-entity reference appears in a
6311
 *  choice, seq, or Mixed construct, its replacement text should not
6312
 *  be empty, and neither the first nor last non-blank character of
6313
 *  the replacement text should be a connector (| or ,).
6314
 *
6315
 * Returns the tree of xmlElementContentPtr describing the element
6316
 *          hierarchy.
6317
 */
6318
static xmlElementContentPtr
6319
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6320
227k
                                       int depth) {
6321
227k
    xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6322
227k
    const xmlChar *elem;
6323
227k
    xmlChar type = 0;
6324
6325
227k
    if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6326
227k
        (depth >  2048)) {
6327
0
        xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6328
0
"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6329
0
                          depth);
6330
0
  return(NULL);
6331
0
    }
6332
227k
    SKIP_BLANKS;
6333
227k
    GROW;
6334
227k
    if (RAW == '(') {
6335
9.07k
  int inputid = ctxt->input->id;
6336
6337
        /* Recurse on first child */
6338
9.07k
  NEXT;
6339
9.07k
  SKIP_BLANKS;
6340
9.07k
        cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6341
9.07k
                                                           depth + 1);
6342
9.07k
        if (cur == NULL)
6343
443
            return(NULL);
6344
8.62k
  SKIP_BLANKS;
6345
8.62k
  GROW;
6346
218k
    } else {
6347
218k
  elem = xmlParseName(ctxt);
6348
218k
  if (elem == NULL) {
6349
362
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6350
362
      return(NULL);
6351
362
  }
6352
217k
        cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6353
217k
  if (cur == NULL) {
6354
0
      xmlErrMemory(ctxt, NULL);
6355
0
      return(NULL);
6356
0
  }
6357
217k
  GROW;
6358
217k
  if (RAW == '?') {
6359
17.4k
      cur->ocur = XML_ELEMENT_CONTENT_OPT;
6360
17.4k
      NEXT;
6361
200k
  } else if (RAW == '*') {
6362
12.3k
      cur->ocur = XML_ELEMENT_CONTENT_MULT;
6363
12.3k
      NEXT;
6364
187k
  } else if (RAW == '+') {
6365
43.7k
      cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6366
43.7k
      NEXT;
6367
144k
  } else {
6368
144k
      cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6369
144k
  }
6370
217k
  GROW;
6371
217k
    }
6372
226k
    SKIP_BLANKS;
6373
226k
    SHRINK;
6374
1.18M
    while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6375
        /*
6376
   * Each loop we parse one separator and one element.
6377
   */
6378
962k
        if (RAW == ',') {
6379
191k
      if (type == 0) type = CUR;
6380
6381
      /*
6382
       * Detect "Name | Name , Name" error
6383
       */
6384
110k
      else if (type != CUR) {
6385
14
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6386
14
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6387
14
                      type);
6388
14
    if ((last != NULL) && (last != ret))
6389
14
        xmlFreeDocElementContent(ctxt->myDoc, last);
6390
14
    if (ret != NULL)
6391
14
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6392
14
    return(NULL);
6393
14
      }
6394
191k
      NEXT;
6395
6396
191k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6397
191k
      if (op == NULL) {
6398
0
    if ((last != NULL) && (last != ret))
6399
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6400
0
          xmlFreeDocElementContent(ctxt->myDoc, ret);
6401
0
    return(NULL);
6402
0
      }
6403
191k
      if (last == NULL) {
6404
80.7k
    op->c1 = ret;
6405
80.7k
    if (ret != NULL)
6406
80.7k
        ret->parent = op;
6407
80.7k
    ret = cur = op;
6408
110k
      } else {
6409
110k
          cur->c2 = op;
6410
110k
    if (op != NULL)
6411
110k
        op->parent = cur;
6412
110k
    op->c1 = last;
6413
110k
    if (last != NULL)
6414
110k
        last->parent = op;
6415
110k
    cur =op;
6416
110k
    last = NULL;
6417
110k
      }
6418
771k
  } else if (RAW == '|') {
6419
770k
      if (type == 0) type = CUR;
6420
6421
      /*
6422
       * Detect "Name , Name | Name" error
6423
       */
6424
688k
      else if (type != CUR) {
6425
12
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6426
12
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6427
12
          type);
6428
12
    if ((last != NULL) && (last != ret))
6429
12
        xmlFreeDocElementContent(ctxt->myDoc, last);
6430
12
    if (ret != NULL)
6431
12
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6432
12
    return(NULL);
6433
12
      }
6434
770k
      NEXT;
6435
6436
770k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6437
770k
      if (op == NULL) {
6438
0
    if ((last != NULL) && (last != ret))
6439
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6440
0
    if (ret != NULL)
6441
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6442
0
    return(NULL);
6443
0
      }
6444
770k
      if (last == NULL) {
6445
82.4k
    op->c1 = ret;
6446
82.4k
    if (ret != NULL)
6447
82.4k
        ret->parent = op;
6448
82.4k
    ret = cur = op;
6449
688k
      } else {
6450
688k
          cur->c2 = op;
6451
688k
    if (op != NULL)
6452
688k
        op->parent = cur;
6453
688k
    op->c1 = last;
6454
688k
    if (last != NULL)
6455
688k
        last->parent = op;
6456
688k
    cur =op;
6457
688k
    last = NULL;
6458
688k
      }
6459
770k
  } else {
6460
466
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6461
466
      if ((last != NULL) && (last != ret))
6462
182
          xmlFreeDocElementContent(ctxt->myDoc, last);
6463
466
      if (ret != NULL)
6464
466
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6465
466
      return(NULL);
6466
466
  }
6467
961k
  GROW;
6468
961k
  SKIP_BLANKS;
6469
961k
  GROW;
6470
961k
  if (RAW == '(') {
6471
44.5k
      int inputid = ctxt->input->id;
6472
      /* Recurse on second child */
6473
44.5k
      NEXT;
6474
44.5k
      SKIP_BLANKS;
6475
44.5k
      last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6476
44.5k
                                                          depth + 1);
6477
44.5k
            if (last == NULL) {
6478
105
    if (ret != NULL)
6479
105
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6480
105
    return(NULL);
6481
105
            }
6482
44.4k
      SKIP_BLANKS;
6483
917k
  } else {
6484
917k
      elem = xmlParseName(ctxt);
6485
917k
      if (elem == NULL) {
6486
142
    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6487
142
    if (ret != NULL)
6488
142
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6489
142
    return(NULL);
6490
142
      }
6491
917k
      last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6492
917k
      if (last == NULL) {
6493
0
    if (ret != NULL)
6494
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6495
0
    return(NULL);
6496
0
      }
6497
917k
      if (RAW == '?') {
6498
63.6k
    last->ocur = XML_ELEMENT_CONTENT_OPT;
6499
63.6k
    NEXT;
6500
853k
      } else if (RAW == '*') {
6501
38.8k
    last->ocur = XML_ELEMENT_CONTENT_MULT;
6502
38.8k
    NEXT;
6503
814k
      } else if (RAW == '+') {
6504
14.1k
    last->ocur = XML_ELEMENT_CONTENT_PLUS;
6505
14.1k
    NEXT;
6506
800k
      } else {
6507
800k
    last->ocur = XML_ELEMENT_CONTENT_ONCE;
6508
800k
      }
6509
917k
  }
6510
961k
  SKIP_BLANKS;
6511
961k
  GROW;
6512
961k
    }
6513
225k
    if ((cur != NULL) && (last != NULL)) {
6514
162k
        cur->c2 = last;
6515
162k
  if (last != NULL)
6516
162k
      last->parent = cur;
6517
162k
    }
6518
225k
    if (ctxt->input->id != inputchk) {
6519
87
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6520
87
                       "Element content declaration doesn't start and stop in"
6521
87
                       " the same entity\n");
6522
87
    }
6523
225k
    NEXT;
6524
225k
    if (RAW == '?') {
6525
5.14k
  if (ret != NULL) {
6526
5.14k
      if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6527
5.14k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6528
6
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6529
5.14k
      else
6530
5.14k
          ret->ocur = XML_ELEMENT_CONTENT_OPT;
6531
5.14k
  }
6532
5.14k
  NEXT;
6533
220k
    } else if (RAW == '*') {
6534
45.1k
  if (ret != NULL) {
6535
45.1k
      ret->ocur = XML_ELEMENT_CONTENT_MULT;
6536
45.1k
      cur = ret;
6537
      /*
6538
       * Some normalization:
6539
       * (a | b* | c?)* == (a | b | c)*
6540
       */
6541
430k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6542
385k
    if ((cur->c1 != NULL) &&
6543
385k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6544
385k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6545
2.48k
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6546
385k
    if ((cur->c2 != NULL) &&
6547
385k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6548
385k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6549
406
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6550
385k
    cur = cur->c2;
6551
385k
      }
6552
45.1k
  }
6553
45.1k
  NEXT;
6554
175k
    } else if (RAW == '+') {
6555
46.3k
  if (ret != NULL) {
6556
46.3k
      int found = 0;
6557
6558
46.3k
      if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6559
46.3k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6560
3
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6561
46.3k
      else
6562
46.3k
          ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6563
      /*
6564
       * Some normalization:
6565
       * (a | b*)+ == (a | b)*
6566
       * (a | b?)+ == (a | b)*
6567
       */
6568
79.6k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6569
33.3k
    if ((cur->c1 != NULL) &&
6570
33.3k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6571
33.3k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6572
4
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6573
4
        found = 1;
6574
4
    }
6575
33.3k
    if ((cur->c2 != NULL) &&
6576
33.3k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6577
33.3k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6578
3
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6579
3
        found = 1;
6580
3
    }
6581
33.3k
    cur = cur->c2;
6582
33.3k
      }
6583
46.3k
      if (found)
6584
7
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6585
46.3k
  }
6586
46.3k
  NEXT;
6587
46.3k
    }
6588
225k
    return(ret);
6589
226k
}
6590
6591
/**
6592
 * xmlParseElementChildrenContentDecl:
6593
 * @ctxt:  an XML parser context
6594
 * @inputchk:  the input used for the current entity, needed for boundary checks
6595
 *
6596
 * DEPRECATED: Internal function, don't use.
6597
 *
6598
 * parse the declaration for a Mixed Element content
6599
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6600
 *
6601
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6602
 *
6603
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6604
 *
6605
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6606
 *
6607
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6608
 *
6609
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6610
 * TODO Parameter-entity replacement text must be properly nested
6611
 *  with parenthesized groups. That is to say, if either of the
6612
 *  opening or closing parentheses in a choice, seq, or Mixed
6613
 *  construct is contained in the replacement text for a parameter
6614
 *  entity, both must be contained in the same replacement text. For
6615
 *  interoperability, if a parameter-entity reference appears in a
6616
 *  choice, seq, or Mixed construct, its replacement text should not
6617
 *  be empty, and neither the first nor last non-blank character of
6618
 *  the replacement text should be a connector (| or ,).
6619
 *
6620
 * Returns the tree of xmlElementContentPtr describing the element
6621
 *          hierarchy.
6622
 */
6623
xmlElementContentPtr
6624
0
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6625
    /* stub left for API/ABI compat */
6626
0
    return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6627
0
}
6628
6629
/**
6630
 * xmlParseElementContentDecl:
6631
 * @ctxt:  an XML parser context
6632
 * @name:  the name of the element being defined.
6633
 * @result:  the Element Content pointer will be stored here if any
6634
 *
6635
 * DEPRECATED: Internal function, don't use.
6636
 *
6637
 * parse the declaration for an Element content either Mixed or Children,
6638
 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6639
 *
6640
 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6641
 *
6642
 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6643
 */
6644
6645
int
6646
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6647
342k
                           xmlElementContentPtr *result) {
6648
6649
342k
    xmlElementContentPtr tree = NULL;
6650
342k
    int inputid = ctxt->input->id;
6651
342k
    int res;
6652
6653
342k
    *result = NULL;
6654
6655
342k
    if (RAW != '(') {
6656
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6657
0
    "xmlParseElementContentDecl : %s '(' expected\n", name);
6658
0
  return(-1);
6659
0
    }
6660
342k
    NEXT;
6661
342k
    GROW;
6662
342k
    if (ctxt->instate == XML_PARSER_EOF)
6663
0
        return(-1);
6664
342k
    SKIP_BLANKS;
6665
342k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6666
168k
        tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6667
168k
  res = XML_ELEMENT_TYPE_MIXED;
6668
173k
    } else {
6669
173k
        tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6670
173k
  res = XML_ELEMENT_TYPE_ELEMENT;
6671
173k
    }
6672
342k
    SKIP_BLANKS;
6673
342k
    *result = tree;
6674
342k
    return(res);
6675
342k
}
6676
6677
/**
6678
 * xmlParseElementDecl:
6679
 * @ctxt:  an XML parser context
6680
 *
6681
 * DEPRECATED: Internal function, don't use.
6682
 *
6683
 * Parse an element declaration. Always consumes '<!'.
6684
 *
6685
 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6686
 *
6687
 * [ VC: Unique Element Type Declaration ]
6688
 * No element type may be declared more than once
6689
 *
6690
 * Returns the type of the element, or -1 in case of error
6691
 */
6692
int
6693
394k
xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6694
394k
    const xmlChar *name;
6695
394k
    int ret = -1;
6696
394k
    xmlElementContentPtr content  = NULL;
6697
6698
394k
    if ((CUR != '<') || (NXT(1) != '!'))
6699
0
        return(ret);
6700
394k
    SKIP(2);
6701
6702
    /* GROW; done in the caller */
6703
394k
    if (CMP7(CUR_PTR, 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6704
393k
  int inputid = ctxt->input->id;
6705
6706
393k
  SKIP(7);
6707
393k
  if (SKIP_BLANKS == 0) {
6708
85
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6709
85
               "Space required after 'ELEMENT'\n");
6710
85
      return(-1);
6711
85
  }
6712
393k
        name = xmlParseName(ctxt);
6713
393k
  if (name == NULL) {
6714
204
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6715
204
         "xmlParseElementDecl: no name for Element\n");
6716
204
      return(-1);
6717
204
  }
6718
393k
  if (SKIP_BLANKS == 0) {
6719
242
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6720
242
         "Space required after the element name\n");
6721
242
  }
6722
393k
  if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6723
49.7k
      SKIP(5);
6724
      /*
6725
       * Element must always be empty.
6726
       */
6727
49.7k
      ret = XML_ELEMENT_TYPE_EMPTY;
6728
343k
  } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6729
343k
             (NXT(2) == 'Y')) {
6730
1.22k
      SKIP(3);
6731
      /*
6732
       * Element is a generic container.
6733
       */
6734
1.22k
      ret = XML_ELEMENT_TYPE_ANY;
6735
342k
  } else if (RAW == '(') {
6736
342k
      ret = xmlParseElementContentDecl(ctxt, name, &content);
6737
342k
  } else {
6738
      /*
6739
       * [ WFC: PEs in Internal Subset ] error handling.
6740
       */
6741
365
      if ((RAW == '%') && (ctxt->external == 0) &&
6742
365
          (ctxt->inputNr == 1)) {
6743
12
    xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6744
12
    "PEReference: forbidden within markup decl in internal subset\n");
6745
353
      } else {
6746
353
    xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6747
353
          "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6748
353
            }
6749
365
      return(-1);
6750
365
  }
6751
6752
393k
  SKIP_BLANKS;
6753
6754
393k
  if (RAW != '>') {
6755
1.05k
      xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6756
1.05k
      if (content != NULL) {
6757
81
    xmlFreeDocElementContent(ctxt->myDoc, content);
6758
81
      }
6759
392k
  } else {
6760
392k
      if (inputid != ctxt->input->id) {
6761
19
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6762
19
                               "Element declaration doesn't start and stop in"
6763
19
                               " the same entity\n");
6764
19
      }
6765
6766
392k
      NEXT;
6767
392k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6768
392k
    (ctxt->sax->elementDecl != NULL)) {
6769
358k
    if (content != NULL)
6770
311k
        content->parent = NULL;
6771
358k
          ctxt->sax->elementDecl(ctxt->userData, name, ret,
6772
358k
                           content);
6773
358k
    if ((content != NULL) && (content->parent == NULL)) {
6774
        /*
6775
         * this is a trick: if xmlAddElementDecl is called,
6776
         * instead of copying the full tree it is plugged directly
6777
         * if called from the parser. Avoid duplicating the
6778
         * interfaces or change the API/ABI
6779
         */
6780
28
        xmlFreeDocElementContent(ctxt->myDoc, content);
6781
28
    }
6782
358k
      } else if (content != NULL) {
6783
29.7k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6784
29.7k
      }
6785
392k
  }
6786
393k
    }
6787
393k
    return(ret);
6788
394k
}
6789
6790
/**
6791
 * xmlParseConditionalSections
6792
 * @ctxt:  an XML parser context
6793
 *
6794
 * Parse a conditional section. Always consumes '<!['.
6795
 *
6796
 * [61] conditionalSect ::= includeSect | ignoreSect
6797
 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6798
 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6799
 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6800
 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6801
 */
6802
6803
static void
6804
291
xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6805
291
    int *inputIds = NULL;
6806
291
    size_t inputIdsSize = 0;
6807
291
    size_t depth = 0;
6808
6809
2.51k
    while (ctxt->instate != XML_PARSER_EOF) {
6810
2.51k
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6811
1.41k
            int id = ctxt->input->id;
6812
6813
1.41k
            SKIP(3);
6814
1.41k
            SKIP_BLANKS;
6815
6816
1.41k
            if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6817
1.35k
                SKIP(7);
6818
1.35k
                SKIP_BLANKS;
6819
1.35k
                if (RAW != '[') {
6820
12
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6821
12
                    xmlHaltParser(ctxt);
6822
12
                    goto error;
6823
12
                }
6824
1.34k
                if (ctxt->input->id != id) {
6825
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6826
0
                                   "All markup of the conditional section is"
6827
0
                                   " not in the same entity\n");
6828
0
                }
6829
1.34k
                NEXT;
6830
6831
1.34k
                if (inputIdsSize <= depth) {
6832
383
                    int *tmp;
6833
6834
383
                    inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2);
6835
383
                    tmp = (int *) xmlRealloc(inputIds,
6836
383
                            inputIdsSize * sizeof(int));
6837
383
                    if (tmp == NULL) {
6838
0
                        xmlErrMemory(ctxt, NULL);
6839
0
                        goto error;
6840
0
                    }
6841
383
                    inputIds = tmp;
6842
383
                }
6843
1.34k
                inputIds[depth] = id;
6844
1.34k
                depth++;
6845
1.34k
            } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6846
15
                size_t ignoreDepth = 0;
6847
6848
15
                SKIP(6);
6849
15
                SKIP_BLANKS;
6850
15
                if (RAW != '[') {
6851
0
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6852
0
                    xmlHaltParser(ctxt);
6853
0
                    goto error;
6854
0
                }
6855
15
                if (ctxt->input->id != id) {
6856
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6857
0
                                   "All markup of the conditional section is"
6858
0
                                   " not in the same entity\n");
6859
0
                }
6860
15
                NEXT;
6861
6862
1.47k
                while (RAW != 0) {
6863
1.46k
                    if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6864
27
                        SKIP(3);
6865
27
                        ignoreDepth++;
6866
                        /* Check for integer overflow */
6867
27
                        if (ignoreDepth == 0) {
6868
0
                            xmlErrMemory(ctxt, NULL);
6869
0
                            goto error;
6870
0
                        }
6871
1.44k
                    } else if ((RAW == ']') && (NXT(1) == ']') &&
6872
1.44k
                               (NXT(2) == '>')) {
6873
33
                        if (ignoreDepth == 0)
6874
12
                            break;
6875
21
                        SKIP(3);
6876
21
                        ignoreDepth--;
6877
1.40k
                    } else {
6878
1.40k
                        NEXT;
6879
1.40k
                    }
6880
1.46k
                }
6881
6882
15
    if (RAW == 0) {
6883
3
        xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6884
3
                    goto error;
6885
3
    }
6886
12
                if (ctxt->input->id != id) {
6887
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6888
0
                                   "All markup of the conditional section is"
6889
0
                                   " not in the same entity\n");
6890
0
                }
6891
12
                SKIP(3);
6892
50
            } else {
6893
50
                xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6894
50
                xmlHaltParser(ctxt);
6895
50
                goto error;
6896
50
            }
6897
1.41k
        } else if ((depth > 0) &&
6898
1.09k
                   (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6899
846
            depth--;
6900
846
            if (ctxt->input->id != inputIds[depth]) {
6901
82
                xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6902
82
                               "All markup of the conditional section is not"
6903
82
                               " in the same entity\n");
6904
82
            }
6905
846
            SKIP(3);
6906
846
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
6907
72
            xmlParseMarkupDecl(ctxt);
6908
178
        } else {
6909
178
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6910
178
            xmlHaltParser(ctxt);
6911
178
            goto error;
6912
178
        }
6913
6914
2.27k
        if (depth == 0)
6915
45
            break;
6916
6917
2.22k
        SKIP_BLANKS;
6918
2.22k
        GROW;
6919
2.22k
    }
6920
6921
291
error:
6922
291
    xmlFree(inputIds);
6923
291
}
6924
6925
/**
6926
 * xmlParseMarkupDecl:
6927
 * @ctxt:  an XML parser context
6928
 *
6929
 * DEPRECATED: Internal function, don't use.
6930
 *
6931
 * Parse markup declarations. Always consumes '<!' or '<?'.
6932
 *
6933
 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6934
 *                     NotationDecl | PI | Comment
6935
 *
6936
 * [ VC: Proper Declaration/PE Nesting ]
6937
 * Parameter-entity replacement text must be properly nested with
6938
 * markup declarations. That is to say, if either the first character
6939
 * or the last character of a markup declaration (markupdecl above) is
6940
 * contained in the replacement text for a parameter-entity reference,
6941
 * both must be contained in the same replacement text.
6942
 *
6943
 * [ WFC: PEs in Internal Subset ]
6944
 * In the internal DTD subset, parameter-entity references can occur
6945
 * only where markup declarations can occur, not within markup declarations.
6946
 * (This does not apply to references that occur in external parameter
6947
 * entities or to the external subset.)
6948
 */
6949
void
6950
2.35M
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6951
2.35M
    GROW;
6952
2.35M
    if (CUR == '<') {
6953
2.35M
        if (NXT(1) == '!') {
6954
2.35M
      switch (NXT(2)) {
6955
674k
          case 'E':
6956
674k
        if (NXT(3) == 'L')
6957
394k
      xmlParseElementDecl(ctxt);
6958
280k
        else if (NXT(3) == 'N')
6959
280k
      xmlParseEntityDecl(ctxt);
6960
92
                    else
6961
92
                        SKIP(2);
6962
674k
        break;
6963
347k
          case 'A':
6964
347k
        xmlParseAttributeListDecl(ctxt);
6965
347k
        break;
6966
256
          case 'N':
6967
256
        xmlParseNotationDecl(ctxt);
6968
256
        break;
6969
1.33M
          case '-':
6970
1.33M
        xmlParseComment(ctxt);
6971
1.33M
        break;
6972
282
    default:
6973
        /* there is an error but it will be detected later */
6974
282
                    SKIP(2);
6975
282
        break;
6976
2.35M
      }
6977
2.35M
  } else if (NXT(1) == '?') {
6978
125
      xmlParsePI(ctxt);
6979
125
  }
6980
2.35M
    }
6981
6982
    /*
6983
     * detect requirement to exit there and act accordingly
6984
     * and avoid having instate overridden later on
6985
     */
6986
2.35M
    if (ctxt->instate == XML_PARSER_EOF)
6987
1.00k
        return;
6988
6989
2.35M
    ctxt->instate = XML_PARSER_DTD;
6990
2.35M
}
6991
6992
/**
6993
 * xmlParseTextDecl:
6994
 * @ctxt:  an XML parser context
6995
 *
6996
 * DEPRECATED: Internal function, don't use.
6997
 *
6998
 * parse an XML declaration header for external entities
6999
 *
7000
 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
7001
 */
7002
7003
void
7004
422
xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
7005
422
    xmlChar *version;
7006
422
    const xmlChar *encoding;
7007
422
    int oldstate;
7008
7009
    /*
7010
     * We know that '<?xml' is here.
7011
     */
7012
422
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7013
419
  SKIP(5);
7014
419
    } else {
7015
3
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
7016
3
  return;
7017
3
    }
7018
7019
    /* Avoid expansion of parameter entities when skipping blanks. */
7020
419
    oldstate = ctxt->instate;
7021
419
    ctxt->instate = XML_PARSER_START;
7022
7023
419
    if (SKIP_BLANKS == 0) {
7024
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7025
0
           "Space needed after '<?xml'\n");
7026
0
    }
7027
7028
    /*
7029
     * We may have the VersionInfo here.
7030
     */
7031
419
    version = xmlParseVersionInfo(ctxt);
7032
419
    if (version == NULL)
7033
6
  version = xmlCharStrdup(XML_DEFAULT_VERSION);
7034
413
    else {
7035
413
  if (SKIP_BLANKS == 0) {
7036
10
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7037
10
               "Space needed here\n");
7038
10
  }
7039
413
    }
7040
419
    ctxt->input->version = version;
7041
7042
    /*
7043
     * We must have the encoding declaration
7044
     */
7045
419
    encoding = xmlParseEncodingDecl(ctxt);
7046
419
    if (ctxt->instate == XML_PARSER_EOF)
7047
0
        return;
7048
419
    if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7049
  /*
7050
   * The XML REC instructs us to stop parsing right here
7051
   */
7052
3
        ctxt->instate = oldstate;
7053
3
        return;
7054
3
    }
7055
416
    if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
7056
49
  xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
7057
49
           "Missing encoding in text declaration\n");
7058
49
    }
7059
7060
416
    SKIP_BLANKS;
7061
416
    if ((RAW == '?') && (NXT(1) == '>')) {
7062
380
        SKIP(2);
7063
380
    } else if (RAW == '>') {
7064
        /* Deprecated old WD ... */
7065
3
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7066
3
  NEXT;
7067
33
    } else {
7068
33
        int c;
7069
7070
33
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7071
60.6k
        while ((c = CUR) != 0) {
7072
60.6k
            NEXT;
7073
60.6k
            if (c == '>')
7074
27
                break;
7075
60.6k
        }
7076
33
    }
7077
7078
416
    ctxt->instate = oldstate;
7079
416
}
7080
7081
/**
7082
 * xmlParseExternalSubset:
7083
 * @ctxt:  an XML parser context
7084
 * @ExternalID: the external identifier
7085
 * @SystemID: the system identifier (or URL)
7086
 *
7087
 * parse Markup declarations from an external subset
7088
 *
7089
 * [30] extSubset ::= textDecl? extSubsetDecl
7090
 *
7091
 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7092
 */
7093
void
7094
xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7095
4.63k
                       const xmlChar *SystemID) {
7096
4.63k
    xmlDetectSAX2(ctxt);
7097
4.63k
    GROW;
7098
7099
4.63k
    if ((ctxt->encoding == NULL) &&
7100
4.63k
        (ctxt->input->end - ctxt->input->cur >= 4)) {
7101
4.62k
        xmlChar start[4];
7102
4.62k
  xmlCharEncoding enc;
7103
7104
4.62k
  start[0] = RAW;
7105
4.62k
  start[1] = NXT(1);
7106
4.62k
  start[2] = NXT(2);
7107
4.62k
  start[3] = NXT(3);
7108
4.62k
  enc = xmlDetectCharEncoding(start, 4);
7109
4.62k
  if (enc != XML_CHAR_ENCODING_NONE)
7110
432
      xmlSwitchEncoding(ctxt, enc);
7111
4.62k
    }
7112
7113
4.63k
    if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7114
416
  xmlParseTextDecl(ctxt);
7115
416
  if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7116
      /*
7117
       * The XML REC instructs us to stop parsing right here
7118
       */
7119
3
      xmlHaltParser(ctxt);
7120
3
      return;
7121
3
  }
7122
416
    }
7123
4.63k
    if (ctxt->myDoc == NULL) {
7124
0
        ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7125
0
  if (ctxt->myDoc == NULL) {
7126
0
      xmlErrMemory(ctxt, "New Doc failed");
7127
0
      return;
7128
0
  }
7129
0
  ctxt->myDoc->properties = XML_DOC_INTERNAL;
7130
0
    }
7131
4.63k
    if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7132
0
        xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7133
7134
4.63k
    ctxt->instate = XML_PARSER_DTD;
7135
4.63k
    ctxt->external = 1;
7136
4.63k
    SKIP_BLANKS;
7137
1.14M
    while ((ctxt->instate != XML_PARSER_EOF) && (RAW != 0)) {
7138
1.13M
  GROW;
7139
1.13M
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7140
291
            xmlParseConditionalSections(ctxt);
7141
1.13M
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7142
1.13M
            xmlParseMarkupDecl(ctxt);
7143
1.13M
        } else {
7144
1.32k
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7145
1.32k
            xmlHaltParser(ctxt);
7146
1.32k
            return;
7147
1.32k
        }
7148
1.13M
        SKIP_BLANKS;
7149
1.13M
    }
7150
7151
3.31k
    if (RAW != 0) {
7152
0
  xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7153
0
    }
7154
7155
3.31k
}
7156
7157
/**
7158
 * xmlParseReference:
7159
 * @ctxt:  an XML parser context
7160
 *
7161
 * DEPRECATED: Internal function, don't use.
7162
 *
7163
 * parse and handle entity references in content, depending on the SAX
7164
 * interface, this may end-up in a call to character() if this is a
7165
 * CharRef, a predefined entity, if there is no reference() callback.
7166
 * or if the parser was asked to switch to that mode.
7167
 *
7168
 * Always consumes '&'.
7169
 *
7170
 * [67] Reference ::= EntityRef | CharRef
7171
 */
7172
void
7173
169k
xmlParseReference(xmlParserCtxtPtr ctxt) {
7174
169k
    xmlEntityPtr ent;
7175
169k
    xmlChar *val;
7176
169k
    int was_checked;
7177
169k
    xmlNodePtr list = NULL;
7178
169k
    xmlParserErrors ret = XML_ERR_OK;
7179
7180
7181
169k
    if (RAW != '&')
7182
0
        return;
7183
7184
    /*
7185
     * Simple case of a CharRef
7186
     */
7187
169k
    if (NXT(1) == '#') {
7188
1.28k
  int i = 0;
7189
1.28k
  xmlChar out[16];
7190
1.28k
  int hex = NXT(2);
7191
1.28k
  int value = xmlParseCharRef(ctxt);
7192
7193
1.28k
  if (value == 0)
7194
334
      return;
7195
951
  if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7196
      /*
7197
       * So we are using non-UTF-8 buffers
7198
       * Check that the char fit on 8bits, if not
7199
       * generate a CharRef.
7200
       */
7201
423
      if (value <= 0xFF) {
7202
414
    out[0] = value;
7203
414
    out[1] = 0;
7204
414
    if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7205
414
        (!ctxt->disableSAX))
7206
361
        ctxt->sax->characters(ctxt->userData, out, 1);
7207
414
      } else {
7208
9
    if ((hex == 'x') || (hex == 'X'))
7209
0
        snprintf((char *)out, sizeof(out), "#x%X", value);
7210
9
    else
7211
9
        snprintf((char *)out, sizeof(out), "#%d", value);
7212
9
    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7213
9
        (!ctxt->disableSAX))
7214
7
        ctxt->sax->reference(ctxt->userData, out);
7215
9
      }
7216
528
  } else {
7217
      /*
7218
       * Just encode the value in UTF-8
7219
       */
7220
528
      COPY_BUF(0 ,out, i, value);
7221
528
      out[i] = 0;
7222
528
      if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7223
528
    (!ctxt->disableSAX))
7224
485
    ctxt->sax->characters(ctxt->userData, out, i);
7225
528
  }
7226
951
  return;
7227
1.28k
    }
7228
7229
    /*
7230
     * We are seeing an entity reference
7231
     */
7232
167k
    ent = xmlParseEntityRef(ctxt);
7233
167k
    if (ent == NULL) return;
7234
161k
    if (!ctxt->wellFormed)
7235
36.6k
  return;
7236
124k
    was_checked = ent->flags & XML_ENT_PARSED;
7237
7238
    /* special case of predefined entities */
7239
124k
    if ((ent->name == NULL) ||
7240
124k
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7241
61.1k
  val = ent->content;
7242
61.1k
  if (val == NULL) return;
7243
  /*
7244
   * inline the entity.
7245
   */
7246
61.1k
  if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7247
61.1k
      (!ctxt->disableSAX))
7248
61.1k
      ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7249
61.1k
  return;
7250
61.1k
    }
7251
7252
    /*
7253
     * The first reference to the entity trigger a parsing phase
7254
     * where the ent->children is filled with the result from
7255
     * the parsing.
7256
     * Note: external parsed entities will not be loaded, it is not
7257
     * required for a non-validating parser, unless the parsing option
7258
     * of validating, or substituting entities were given. Doing so is
7259
     * far more secure as the parser will only process data coming from
7260
     * the document entity by default.
7261
     */
7262
63.6k
    if (((ent->flags & XML_ENT_PARSED) == 0) &&
7263
63.6k
        ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7264
6.02k
         (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7265
5.94k
  unsigned long oldsizeentcopy = ctxt->sizeentcopy;
7266
7267
  /*
7268
   * This is a bit hackish but this seems the best
7269
   * way to make sure both SAX and DOM entity support
7270
   * behaves okay.
7271
   */
7272
5.94k
  void *user_data;
7273
5.94k
  if (ctxt->userData == ctxt)
7274
5.94k
      user_data = NULL;
7275
0
  else
7276
0
      user_data = ctxt->userData;
7277
7278
        /* Avoid overflow as much as possible */
7279
5.94k
        ctxt->sizeentcopy = 0;
7280
7281
5.94k
        if (ent->flags & XML_ENT_EXPANDING) {
7282
42
            xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7283
42
            xmlHaltParser(ctxt);
7284
42
            return;
7285
42
        }
7286
7287
5.89k
        ent->flags |= XML_ENT_EXPANDING;
7288
7289
  /*
7290
   * Check that this entity is well formed
7291
   * 4.3.2: An internal general parsed entity is well-formed
7292
   * if its replacement text matches the production labeled
7293
   * content.
7294
   */
7295
5.89k
  if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7296
5.27k
      ctxt->depth++;
7297
5.27k
      ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7298
5.27k
                                                user_data, &list);
7299
5.27k
      ctxt->depth--;
7300
7301
5.27k
  } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7302
625
      ctxt->depth++;
7303
625
      ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7304
625
                                     user_data, ctxt->depth, ent->URI,
7305
625
             ent->ExternalID, &list);
7306
625
      ctxt->depth--;
7307
625
  } else {
7308
0
      ret = XML_ERR_ENTITY_PE_INTERNAL;
7309
0
      xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7310
0
       "invalid entity type found\n", NULL);
7311
0
  }
7312
7313
5.89k
        ent->flags &= ~XML_ENT_EXPANDING;
7314
5.89k
        ent->flags |= XML_ENT_PARSED | XML_ENT_CHECKED;
7315
5.89k
        ent->expandedSize = ctxt->sizeentcopy;
7316
5.89k
  if (ret == XML_ERR_ENTITY_LOOP) {
7317
342
            xmlHaltParser(ctxt);
7318
342
      xmlFreeNodeList(list);
7319
342
      return;
7320
342
  }
7321
5.55k
  if (xmlParserEntityCheck(ctxt, oldsizeentcopy)) {
7322
0
      xmlFreeNodeList(list);
7323
0
      return;
7324
0
  }
7325
7326
5.55k
  if ((ret == XML_ERR_OK) && (list != NULL)) {
7327
4.96k
            ent->children = list;
7328
            /*
7329
             * Prune it directly in the generated document
7330
             * except for single text nodes.
7331
             */
7332
4.96k
            if ((ctxt->replaceEntities == 0) ||
7333
4.96k
                (ctxt->parseMode == XML_PARSE_READER) ||
7334
4.96k
                ((list->type == XML_TEXT_NODE) &&
7335
4.90k
                 (list->next == NULL))) {
7336
4.90k
                ent->owner = 1;
7337
9.85k
                while (list != NULL) {
7338
4.94k
                    list->parent = (xmlNodePtr) ent;
7339
4.94k
                    if (list->doc != ent->doc)
7340
0
                        xmlSetTreeDoc(list, ent->doc);
7341
4.94k
                    if (list->next == NULL)
7342
4.90k
                        ent->last = list;
7343
4.94k
                    list = list->next;
7344
4.94k
                }
7345
4.90k
                list = NULL;
7346
4.90k
            } else {
7347
56
                ent->owner = 0;
7348
196
                while (list != NULL) {
7349
140
                    list->parent = (xmlNodePtr) ctxt->node;
7350
140
                    list->doc = ctxt->myDoc;
7351
140
                    if (list->next == NULL)
7352
56
                        ent->last = list;
7353
140
                    list = list->next;
7354
140
                }
7355
56
                list = ent->children;
7356
#ifdef LIBXML_LEGACY_ENABLED
7357
                if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7358
                    xmlAddEntityReference(ent, list, NULL);
7359
#endif /* LIBXML_LEGACY_ENABLED */
7360
56
            }
7361
4.96k
  } else if ((ret != XML_ERR_OK) &&
7362
596
       (ret != XML_WAR_UNDECLARED_ENTITY)) {
7363
353
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7364
353
         "Entity '%s' failed to parse\n", ent->name);
7365
353
            if (ent->content != NULL)
7366
138
                ent->content[0] = 0;
7367
353
  } else if (list != NULL) {
7368
0
      xmlFreeNodeList(list);
7369
0
      list = NULL;
7370
0
  }
7371
7372
        /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7373
5.55k
        was_checked = 0;
7374
5.55k
    }
7375
7376
    /*
7377
     * Now that the entity content has been gathered
7378
     * provide it to the application, this can take different forms based
7379
     * on the parsing modes.
7380
     */
7381
63.2k
    if (ent->children == NULL) {
7382
  /*
7383
   * Probably running in SAX mode and the callbacks don't
7384
   * build the entity content. So unless we already went
7385
   * though parsing for first checking go though the entity
7386
   * content to generate callbacks associated to the entity
7387
   */
7388
1.78k
  if (was_checked != 0) {
7389
1.10k
      void *user_data;
7390
      /*
7391
       * This is a bit hackish but this seems the best
7392
       * way to make sure both SAX and DOM entity support
7393
       * behaves okay.
7394
       */
7395
1.10k
      if (ctxt->userData == ctxt)
7396
1.10k
    user_data = NULL;
7397
0
      else
7398
0
    user_data = ctxt->userData;
7399
7400
1.10k
      if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7401
0
    ctxt->depth++;
7402
0
    ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7403
0
           ent->content, user_data, NULL);
7404
0
    ctxt->depth--;
7405
1.10k
      } else if (ent->etype ==
7406
1.10k
           XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7407
1.10k
          unsigned long oldsizeentities = ctxt->sizeentities;
7408
7409
1.10k
    ctxt->depth++;
7410
1.10k
    ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7411
1.10k
         ctxt->sax, user_data, ctxt->depth,
7412
1.10k
         ent->URI, ent->ExternalID, NULL);
7413
1.10k
    ctxt->depth--;
7414
7415
                /* Undo the change to sizeentities */
7416
1.10k
                ctxt->sizeentities = oldsizeentities;
7417
1.10k
      } else {
7418
0
    ret = XML_ERR_ENTITY_PE_INTERNAL;
7419
0
    xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7420
0
           "invalid entity type found\n", NULL);
7421
0
      }
7422
1.10k
      if (ret == XML_ERR_ENTITY_LOOP) {
7423
0
    xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7424
0
    return;
7425
0
      }
7426
1.10k
            if (xmlParserEntityCheck(ctxt, 0))
7427
0
                return;
7428
1.10k
  }
7429
1.78k
  if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7430
1.78k
      (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7431
      /*
7432
       * Entity reference callback comes second, it's somewhat
7433
       * superfluous but a compatibility to historical behaviour
7434
       */
7435
383
      ctxt->sax->reference(ctxt->userData, ent->name);
7436
383
  }
7437
1.78k
  return;
7438
1.78k
    }
7439
7440
    /*
7441
     * We also check for amplification if entities aren't substituted.
7442
     * They might be expanded later.
7443
     */
7444
61.4k
    if ((was_checked != 0) &&
7445
61.4k
        (xmlParserEntityCheck(ctxt, ent->expandedSize)))
7446
24
        return;
7447
7448
    /*
7449
     * If we didn't get any children for the entity being built
7450
     */
7451
61.4k
    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7452
61.4k
  (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7453
  /*
7454
   * Create a node.
7455
   */
7456
621
  ctxt->sax->reference(ctxt->userData, ent->name);
7457
621
  return;
7458
621
    }
7459
7460
60.8k
    if (ctxt->replaceEntities)  {
7461
  /*
7462
   * There is a problem on the handling of _private for entities
7463
   * (bug 155816): Should we copy the content of the field from
7464
   * the entity (possibly overwriting some value set by the user
7465
   * when a copy is created), should we leave it alone, or should
7466
   * we try to take care of different situations?  The problem
7467
   * is exacerbated by the usage of this field by the xmlReader.
7468
   * To fix this bug, we look at _private on the created node
7469
   * and, if it's NULL, we copy in whatever was in the entity.
7470
   * If it's not NULL we leave it alone.  This is somewhat of a
7471
   * hack - maybe we should have further tests to determine
7472
   * what to do.
7473
   */
7474
60.8k
  if (ctxt->node != NULL) {
7475
      /*
7476
       * Seems we are generating the DOM content, do
7477
       * a simple tree copy for all references except the first
7478
       * In the first occurrence list contains the replacement.
7479
       */
7480
60.8k
      if (((list == NULL) && (ent->owner == 0)) ||
7481
60.8k
    (ctxt->parseMode == XML_PARSE_READER)) {
7482
18.6k
    xmlNodePtr nw = NULL, cur, firstChild = NULL;
7483
7484
    /*
7485
     * when operating on a reader, the entities definitions
7486
     * are always owning the entities subtree.
7487
    if (ctxt->parseMode == XML_PARSE_READER)
7488
        ent->owner = 1;
7489
     */
7490
7491
18.6k
    cur = ent->children;
7492
18.7k
    while (cur != NULL) {
7493
18.7k
        nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7494
18.7k
        if (nw != NULL) {
7495
18.7k
      if (nw->_private == NULL)
7496
18.7k
          nw->_private = cur->_private;
7497
18.7k
      if (firstChild == NULL){
7498
18.6k
          firstChild = nw;
7499
18.6k
      }
7500
18.7k
      nw = xmlAddChild(ctxt->node, nw);
7501
18.7k
        }
7502
18.7k
        if (cur == ent->last) {
7503
      /*
7504
       * needed to detect some strange empty
7505
       * node cases in the reader tests
7506
       */
7507
18.6k
      if ((ctxt->parseMode == XML_PARSE_READER) &&
7508
18.6k
          (nw != NULL) &&
7509
18.6k
          (nw->type == XML_ELEMENT_NODE) &&
7510
18.6k
          (nw->children == NULL))
7511
14
          nw->extra = 1;
7512
7513
18.6k
      break;
7514
18.6k
        }
7515
125
        cur = cur->next;
7516
125
    }
7517
#ifdef LIBXML_LEGACY_ENABLED
7518
    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7519
      xmlAddEntityReference(ent, firstChild, nw);
7520
#endif /* LIBXML_LEGACY_ENABLED */
7521
42.1k
      } else if ((list == NULL) || (ctxt->inputNr > 0)) {
7522
42.1k
    xmlNodePtr nw = NULL, cur, next, last,
7523
42.1k
         firstChild = NULL;
7524
7525
    /*
7526
     * Copy the entity child list and make it the new
7527
     * entity child list. The goal is to make sure any
7528
     * ID or REF referenced will be the one from the
7529
     * document content and not the entity copy.
7530
     */
7531
42.1k
    cur = ent->children;
7532
42.1k
    ent->children = NULL;
7533
42.1k
    last = ent->last;
7534
42.1k
    ent->last = NULL;
7535
42.3k
    while (cur != NULL) {
7536
42.3k
        next = cur->next;
7537
42.3k
        cur->next = NULL;
7538
42.3k
        cur->parent = NULL;
7539
42.3k
        nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7540
42.3k
        if (nw != NULL) {
7541
42.3k
      if (nw->_private == NULL)
7542
42.3k
          nw->_private = cur->_private;
7543
42.3k
      if (firstChild == NULL){
7544
42.1k
          firstChild = cur;
7545
42.1k
      }
7546
42.3k
      xmlAddChild((xmlNodePtr) ent, nw);
7547
42.3k
        }
7548
42.3k
        xmlAddChild(ctxt->node, cur);
7549
42.3k
        if (cur == last)
7550
42.1k
      break;
7551
250
        cur = next;
7552
250
    }
7553
42.1k
    if (ent->owner == 0)
7554
56
        ent->owner = 1;
7555
#ifdef LIBXML_LEGACY_ENABLED
7556
    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7557
      xmlAddEntityReference(ent, firstChild, nw);
7558
#endif /* LIBXML_LEGACY_ENABLED */
7559
42.1k
      } else {
7560
0
    const xmlChar *nbktext;
7561
7562
    /*
7563
     * the name change is to avoid coalescing of the
7564
     * node with a possible previous text one which
7565
     * would make ent->children a dangling pointer
7566
     */
7567
0
    nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7568
0
          -1);
7569
0
    if (ent->children->type == XML_TEXT_NODE)
7570
0
        ent->children->name = nbktext;
7571
0
    if ((ent->last != ent->children) &&
7572
0
        (ent->last->type == XML_TEXT_NODE))
7573
0
        ent->last->name = nbktext;
7574
0
    xmlAddChildList(ctxt->node, ent->children);
7575
0
      }
7576
7577
      /*
7578
       * This is to avoid a nasty side effect, see
7579
       * characters() in SAX.c
7580
       */
7581
60.8k
      ctxt->nodemem = 0;
7582
60.8k
      ctxt->nodelen = 0;
7583
60.8k
      return;
7584
60.8k
  }
7585
60.8k
    }
7586
60.8k
}
7587
7588
/**
7589
 * xmlParseEntityRef:
7590
 * @ctxt:  an XML parser context
7591
 *
7592
 * DEPRECATED: Internal function, don't use.
7593
 *
7594
 * Parse an entitiy reference. Always consumes '&'.
7595
 *
7596
 * [68] EntityRef ::= '&' Name ';'
7597
 *
7598
 * [ WFC: Entity Declared ]
7599
 * In a document without any DTD, a document with only an internal DTD
7600
 * subset which contains no parameter entity references, or a document
7601
 * with "standalone='yes'", the Name given in the entity reference
7602
 * must match that in an entity declaration, except that well-formed
7603
 * documents need not declare any of the following entities: amp, lt,
7604
 * gt, apos, quot.  The declaration of a parameter entity must precede
7605
 * any reference to it.  Similarly, the declaration of a general entity
7606
 * must precede any reference to it which appears in a default value in an
7607
 * attribute-list declaration. Note that if entities are declared in the
7608
 * external subset or in external parameter entities, a non-validating
7609
 * processor is not obligated to read and process their declarations;
7610
 * for such documents, the rule that an entity must be declared is a
7611
 * well-formedness constraint only if standalone='yes'.
7612
 *
7613
 * [ WFC: Parsed Entity ]
7614
 * An entity reference must not contain the name of an unparsed entity
7615
 *
7616
 * Returns the xmlEntityPtr if found, or NULL otherwise.
7617
 */
7618
xmlEntityPtr
7619
223k
xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7620
223k
    const xmlChar *name;
7621
223k
    xmlEntityPtr ent = NULL;
7622
7623
223k
    GROW;
7624
223k
    if (ctxt->instate == XML_PARSER_EOF)
7625
0
        return(NULL);
7626
7627
223k
    if (RAW != '&')
7628
0
        return(NULL);
7629
223k
    NEXT;
7630
223k
    name = xmlParseName(ctxt);
7631
223k
    if (name == NULL) {
7632
1.50k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7633
1.50k
           "xmlParseEntityRef: no name\n");
7634
1.50k
        return(NULL);
7635
1.50k
    }
7636
221k
    if (RAW != ';') {
7637
1.04k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7638
1.04k
  return(NULL);
7639
1.04k
    }
7640
220k
    NEXT;
7641
7642
    /*
7643
     * Predefined entities override any extra definition
7644
     */
7645
220k
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7646
175k
        ent = xmlGetPredefinedEntity(name);
7647
175k
        if (ent != NULL)
7648
46.9k
            return(ent);
7649
175k
    }
7650
7651
    /*
7652
     * Ask first SAX for entity resolution, otherwise try the
7653
     * entities which may have stored in the parser context.
7654
     */
7655
173k
    if (ctxt->sax != NULL) {
7656
173k
  if (ctxt->sax->getEntity != NULL)
7657
173k
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7658
173k
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7659
173k
      (ctxt->options & XML_PARSE_OLDSAX))
7660
98
      ent = xmlGetPredefinedEntity(name);
7661
173k
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7662
173k
      (ctxt->userData==ctxt)) {
7663
567
      ent = xmlSAX2GetEntity(ctxt, name);
7664
567
  }
7665
173k
    }
7666
173k
    if (ctxt->instate == XML_PARSER_EOF)
7667
0
  return(NULL);
7668
    /*
7669
     * [ WFC: Entity Declared ]
7670
     * In a document without any DTD, a document with only an
7671
     * internal DTD subset which contains no parameter entity
7672
     * references, or a document with "standalone='yes'", the
7673
     * Name given in the entity reference must match that in an
7674
     * entity declaration, except that well-formed documents
7675
     * need not declare any of the following entities: amp, lt,
7676
     * gt, apos, quot.
7677
     * The declaration of a parameter entity must precede any
7678
     * reference to it.
7679
     * Similarly, the declaration of a general entity must
7680
     * precede any reference to it which appears in a default
7681
     * value in an attribute-list declaration. Note that if
7682
     * entities are declared in the external subset or in
7683
     * external parameter entities, a non-validating processor
7684
     * is not obligated to read and process their declarations;
7685
     * for such documents, the rule that an entity must be
7686
     * declared is a well-formedness constraint only if
7687
     * standalone='yes'.
7688
     */
7689
173k
    if (ent == NULL) {
7690
4.89k
  if ((ctxt->standalone == 1) ||
7691
4.89k
      ((ctxt->hasExternalSubset == 0) &&
7692
4.87k
       (ctxt->hasPErefs == 0))) {
7693
4.04k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7694
4.04k
         "Entity '%s' not defined\n", name);
7695
4.04k
  } else {
7696
850
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7697
850
         "Entity '%s' not defined\n", name);
7698
850
      if ((ctxt->inSubset == 0) &&
7699
850
    (ctxt->sax != NULL) &&
7700
850
    (ctxt->sax->reference != NULL)) {
7701
801
    ctxt->sax->reference(ctxt->userData, name);
7702
801
      }
7703
850
  }
7704
4.89k
  ctxt->valid = 0;
7705
4.89k
    }
7706
7707
    /*
7708
     * [ WFC: Parsed Entity ]
7709
     * An entity reference must not contain the name of an
7710
     * unparsed entity
7711
     */
7712
169k
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7713
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7714
0
     "Entity reference to unparsed entity %s\n", name);
7715
0
    }
7716
7717
    /*
7718
     * [ WFC: No External Entity References ]
7719
     * Attribute values cannot contain direct or indirect
7720
     * entity references to external entities.
7721
     */
7722
169k
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7723
169k
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7724
16
  xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7725
16
       "Attribute references external entity '%s'\n", name);
7726
16
    }
7727
    /*
7728
     * [ WFC: No < in Attribute Values ]
7729
     * The replacement text of any entity referred to directly or
7730
     * indirectly in an attribute value (other than "&lt;") must
7731
     * not contain a <.
7732
     */
7733
169k
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7734
169k
       (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7735
53.6k
  if ((ent->flags & XML_ENT_CHECKED_LT) == 0) {
7736
2.06k
            if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7737
24
                ent->flags |= XML_ENT_CONTAINS_LT;
7738
2.06k
            ent->flags |= XML_ENT_CHECKED_LT;
7739
2.06k
        }
7740
53.6k
        if (ent->flags & XML_ENT_CONTAINS_LT)
7741
26
            xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7742
26
                    "'<' in entity '%s' is not allowed in attributes "
7743
26
                    "values\n", name);
7744
53.6k
    }
7745
7746
    /*
7747
     * Internal check, no parameter entities here ...
7748
     */
7749
115k
    else {
7750
115k
  switch (ent->etype) {
7751
0
      case XML_INTERNAL_PARAMETER_ENTITY:
7752
0
      case XML_EXTERNAL_PARAMETER_ENTITY:
7753
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7754
0
       "Attempt to reference the parameter entity '%s'\n",
7755
0
            name);
7756
0
      break;
7757
115k
      default:
7758
115k
      break;
7759
115k
  }
7760
115k
    }
7761
7762
    /*
7763
     * [ WFC: No Recursion ]
7764
     * A parsed entity must not contain a recursive reference
7765
     * to itself, either directly or indirectly.
7766
     * Done somewhere else
7767
     */
7768
173k
    return(ent);
7769
173k
}
7770
7771
/**
7772
 * xmlParseStringEntityRef:
7773
 * @ctxt:  an XML parser context
7774
 * @str:  a pointer to an index in the string
7775
 *
7776
 * parse ENTITY references declarations, but this version parses it from
7777
 * a string value.
7778
 *
7779
 * [68] EntityRef ::= '&' Name ';'
7780
 *
7781
 * [ WFC: Entity Declared ]
7782
 * In a document without any DTD, a document with only an internal DTD
7783
 * subset which contains no parameter entity references, or a document
7784
 * with "standalone='yes'", the Name given in the entity reference
7785
 * must match that in an entity declaration, except that well-formed
7786
 * documents need not declare any of the following entities: amp, lt,
7787
 * gt, apos, quot.  The declaration of a parameter entity must precede
7788
 * any reference to it.  Similarly, the declaration of a general entity
7789
 * must precede any reference to it which appears in a default value in an
7790
 * attribute-list declaration. Note that if entities are declared in the
7791
 * external subset or in external parameter entities, a non-validating
7792
 * processor is not obligated to read and process their declarations;
7793
 * for such documents, the rule that an entity must be declared is a
7794
 * well-formedness constraint only if standalone='yes'.
7795
 *
7796
 * [ WFC: Parsed Entity ]
7797
 * An entity reference must not contain the name of an unparsed entity
7798
 *
7799
 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7800
 * is updated to the current location in the string.
7801
 */
7802
static xmlEntityPtr
7803
13.5M
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7804
13.5M
    xmlChar *name;
7805
13.5M
    const xmlChar *ptr;
7806
13.5M
    xmlChar cur;
7807
13.5M
    xmlEntityPtr ent = NULL;
7808
7809
13.5M
    if ((str == NULL) || (*str == NULL))
7810
0
        return(NULL);
7811
13.5M
    ptr = *str;
7812
13.5M
    cur = *ptr;
7813
13.5M
    if (cur != '&')
7814
0
  return(NULL);
7815
7816
13.5M
    ptr++;
7817
13.5M
    name = xmlParseStringName(ctxt, &ptr);
7818
13.5M
    if (name == NULL) {
7819
0
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7820
0
           "xmlParseStringEntityRef: no name\n");
7821
0
  *str = ptr;
7822
0
  return(NULL);
7823
0
    }
7824
13.5M
    if (*ptr != ';') {
7825
0
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7826
0
        xmlFree(name);
7827
0
  *str = ptr;
7828
0
  return(NULL);
7829
0
    }
7830
13.5M
    ptr++;
7831
7832
7833
    /*
7834
     * Predefined entities override any extra definition
7835
     */
7836
13.5M
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7837
13.5M
        ent = xmlGetPredefinedEntity(name);
7838
13.5M
        if (ent != NULL) {
7839
28
            xmlFree(name);
7840
28
            *str = ptr;
7841
28
            return(ent);
7842
28
        }
7843
13.5M
    }
7844
7845
    /*
7846
     * Ask first SAX for entity resolution, otherwise try the
7847
     * entities which may have stored in the parser context.
7848
     */
7849
13.5M
    if (ctxt->sax != NULL) {
7850
13.5M
  if (ctxt->sax->getEntity != NULL)
7851
13.5M
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7852
13.5M
  if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7853
143
      ent = xmlGetPredefinedEntity(name);
7854
13.5M
  if ((ent == NULL) && (ctxt->userData==ctxt)) {
7855
2.22M
      ent = xmlSAX2GetEntity(ctxt, name);
7856
2.22M
  }
7857
13.5M
    }
7858
13.5M
    if (ctxt->instate == XML_PARSER_EOF) {
7859
0
  xmlFree(name);
7860
0
  return(NULL);
7861
0
    }
7862
7863
    /*
7864
     * [ WFC: Entity Declared ]
7865
     * In a document without any DTD, a document with only an
7866
     * internal DTD subset which contains no parameter entity
7867
     * references, or a document with "standalone='yes'", the
7868
     * Name given in the entity reference must match that in an
7869
     * entity declaration, except that well-formed documents
7870
     * need not declare any of the following entities: amp, lt,
7871
     * gt, apos, quot.
7872
     * The declaration of a parameter entity must precede any
7873
     * reference to it.
7874
     * Similarly, the declaration of a general entity must
7875
     * precede any reference to it which appears in a default
7876
     * value in an attribute-list declaration. Note that if
7877
     * entities are declared in the external subset or in
7878
     * external parameter entities, a non-validating processor
7879
     * is not obligated to read and process their declarations;
7880
     * for such documents, the rule that an entity must be
7881
     * declared is a well-formedness constraint only if
7882
     * standalone='yes'.
7883
     */
7884
13.5M
    if (ent == NULL) {
7885
2.22M
  if ((ctxt->standalone == 1) ||
7886
2.22M
      ((ctxt->hasExternalSubset == 0) &&
7887
2.22M
       (ctxt->hasPErefs == 0))) {
7888
2.22M
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7889
2.22M
         "Entity '%s' not defined\n", name);
7890
2.22M
  } else {
7891
458
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7892
458
        "Entity '%s' not defined\n",
7893
458
        name);
7894
458
  }
7895
  /* TODO ? check regressions ctxt->valid = 0; */
7896
2.22M
    }
7897
7898
    /*
7899
     * [ WFC: Parsed Entity ]
7900
     * An entity reference must not contain the name of an
7901
     * unparsed entity
7902
     */
7903
11.2M
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7904
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7905
0
     "Entity reference to unparsed entity %s\n", name);
7906
0
    }
7907
7908
    /*
7909
     * [ WFC: No External Entity References ]
7910
     * Attribute values cannot contain direct or indirect
7911
     * entity references to external entities.
7912
     */
7913
11.2M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7914
11.2M
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7915
3
  xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7916
3
   "Attribute references external entity '%s'\n", name);
7917
3
    }
7918
    /*
7919
     * [ WFC: No < in Attribute Values ]
7920
     * The replacement text of any entity referred to directly or
7921
     * indirectly in an attribute value (other than "&lt;") must
7922
     * not contain a <.
7923
     */
7924
11.2M
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7925
11.2M
       (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7926
11.2M
  if ((ent->flags & XML_ENT_CHECKED_LT) == 0) {
7927
1.25k
            if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7928
7
                ent->flags |= XML_ENT_CONTAINS_LT;
7929
1.25k
            ent->flags |= XML_ENT_CHECKED_LT;
7930
1.25k
        }
7931
11.2M
        if (ent->flags & XML_ENT_CONTAINS_LT)
7932
26
            xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7933
26
                    "'<' in entity '%s' is not allowed in attributes "
7934
26
                    "values\n", name);
7935
11.2M
    }
7936
7937
    /*
7938
     * Internal check, no parameter entities here ...
7939
     */
7940
327
    else {
7941
327
  switch (ent->etype) {
7942
0
      case XML_INTERNAL_PARAMETER_ENTITY:
7943
0
      case XML_EXTERNAL_PARAMETER_ENTITY:
7944
0
    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7945
0
       "Attempt to reference the parameter entity '%s'\n",
7946
0
          name);
7947
0
      break;
7948
327
      default:
7949
327
      break;
7950
327
  }
7951
327
    }
7952
7953
    /*
7954
     * [ WFC: No Recursion ]
7955
     * A parsed entity must not contain a recursive reference
7956
     * to itself, either directly or indirectly.
7957
     * Done somewhere else
7958
     */
7959
7960
13.5M
    xmlFree(name);
7961
13.5M
    *str = ptr;
7962
13.5M
    return(ent);
7963
13.5M
}
7964
7965
/**
7966
 * xmlParsePEReference:
7967
 * @ctxt:  an XML parser context
7968
 *
7969
 * DEPRECATED: Internal function, don't use.
7970
 *
7971
 * Parse a parameter entity reference. Always consumes '%'.
7972
 *
7973
 * The entity content is handled directly by pushing it's content as
7974
 * a new input stream.
7975
 *
7976
 * [69] PEReference ::= '%' Name ';'
7977
 *
7978
 * [ WFC: No Recursion ]
7979
 * A parsed entity must not contain a recursive
7980
 * reference to itself, either directly or indirectly.
7981
 *
7982
 * [ WFC: Entity Declared ]
7983
 * In a document without any DTD, a document with only an internal DTD
7984
 * subset which contains no parameter entity references, or a document
7985
 * with "standalone='yes'", ...  ... The declaration of a parameter
7986
 * entity must precede any reference to it...
7987
 *
7988
 * [ VC: Entity Declared ]
7989
 * In a document with an external subset or external parameter entities
7990
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7991
 * must precede any reference to it...
7992
 *
7993
 * [ WFC: In DTD ]
7994
 * Parameter-entity references may only appear in the DTD.
7995
 * NOTE: misleading but this is handled.
7996
 */
7997
void
7998
xmlParsePEReference(xmlParserCtxtPtr ctxt)
7999
1.53M
{
8000
1.53M
    const xmlChar *name;
8001
1.53M
    xmlEntityPtr entity = NULL;
8002
1.53M
    xmlParserInputPtr input;
8003
8004
1.53M
    if (RAW != '%')
8005
0
        return;
8006
1.53M
    NEXT;
8007
1.53M
    name = xmlParseName(ctxt);
8008
1.53M
    if (name == NULL) {
8009
343
  xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
8010
343
  return;
8011
343
    }
8012
1.53M
    if (xmlParserDebugEntities)
8013
0
  xmlGenericError(xmlGenericErrorContext,
8014
0
    "PEReference: %s\n", name);
8015
1.53M
    if (RAW != ';') {
8016
184
  xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
8017
184
        return;
8018
184
    }
8019
8020
1.53M
    NEXT;
8021
8022
    /*
8023
     * Request the entity from SAX
8024
     */
8025
1.53M
    if ((ctxt->sax != NULL) &&
8026
1.53M
  (ctxt->sax->getParameterEntity != NULL))
8027
1.53M
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8028
1.53M
    if (ctxt->instate == XML_PARSER_EOF)
8029
0
  return;
8030
1.53M
    if (entity == NULL) {
8031
  /*
8032
   * [ WFC: Entity Declared ]
8033
   * In a document without any DTD, a document with only an
8034
   * internal DTD subset which contains no parameter entity
8035
   * references, or a document with "standalone='yes'", ...
8036
   * ... The declaration of a parameter entity must precede
8037
   * any reference to it...
8038
   */
8039
18.6k
  if ((ctxt->standalone == 1) ||
8040
18.6k
      ((ctxt->hasExternalSubset == 0) &&
8041
18.6k
       (ctxt->hasPErefs == 0))) {
8042
58
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8043
58
            "PEReference: %%%s; not found\n",
8044
58
            name);
8045
18.5k
  } else {
8046
      /*
8047
       * [ VC: Entity Declared ]
8048
       * In a document with an external subset or external
8049
       * parameter entities with "standalone='no'", ...
8050
       * ... The declaration of a parameter entity must
8051
       * precede any reference to it...
8052
       */
8053
18.5k
            if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
8054
385
                xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
8055
385
                                 "PEReference: %%%s; not found\n",
8056
385
                                 name, NULL);
8057
385
            } else
8058
18.1k
                xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8059
18.1k
                              "PEReference: %%%s; not found\n",
8060
18.1k
                              name, NULL);
8061
18.5k
            ctxt->valid = 0;
8062
18.5k
  }
8063
1.51M
    } else {
8064
  /*
8065
   * Internal checking in case the entity quest barfed
8066
   */
8067
1.51M
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8068
1.51M
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8069
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8070
0
      "Internal: %%%s; is not a parameter entity\n",
8071
0
        name, NULL);
8072
1.51M
  } else {
8073
1.51M
            xmlChar start[4];
8074
1.51M
            xmlCharEncoding enc;
8075
1.51M
            unsigned long parentConsumed;
8076
1.51M
            xmlEntityPtr oldEnt;
8077
8078
1.51M
      if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8079
1.51M
          ((ctxt->options & XML_PARSE_NOENT) == 0) &&
8080
1.51M
    ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
8081
1.51M
    ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
8082
1.51M
    ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
8083
1.51M
    (ctxt->replaceEntities == 0) &&
8084
1.51M
    (ctxt->validate == 0))
8085
6
    return;
8086
8087
1.51M
            if (entity->flags & XML_ENT_EXPANDING) {
8088
6
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
8089
6
                xmlHaltParser(ctxt);
8090
6
                return;
8091
6
            }
8092
8093
            /* Must be computed from old input before pushing new input. */
8094
1.51M
            parentConsumed = ctxt->input->parentConsumed;
8095
1.51M
            oldEnt = ctxt->input->entity;
8096
1.51M
            if ((oldEnt == NULL) ||
8097
1.51M
                ((oldEnt->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8098
1.02M
                 ((oldEnt->flags & XML_ENT_PARSED) == 0))) {
8099
559k
                xmlSaturatedAdd(&parentConsumed, ctxt->input->consumed);
8100
559k
                xmlSaturatedAddSizeT(&parentConsumed,
8101
559k
                                     ctxt->input->cur - ctxt->input->base);
8102
559k
            }
8103
8104
1.51M
      input = xmlNewEntityInputStream(ctxt, entity);
8105
1.51M
      if (xmlPushInput(ctxt, input) < 0) {
8106
25
                xmlFreeInputStream(input);
8107
25
    return;
8108
25
            }
8109
8110
1.51M
            entity->flags |= XML_ENT_EXPANDING;
8111
8112
1.51M
            input->parentConsumed = parentConsumed;
8113
8114
1.51M
      if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
8115
                /*
8116
                 * Get the 4 first bytes and decode the charset
8117
                 * if enc != XML_CHAR_ENCODING_NONE
8118
                 * plug some encoding conversion routines.
8119
                 * Note that, since we may have some non-UTF8
8120
                 * encoding (like UTF16, bug 135229), the 'length'
8121
                 * is not known, but we can calculate based upon
8122
                 * the amount of data in the buffer.
8123
                 */
8124
524
                GROW
8125
524
                if (ctxt->instate == XML_PARSER_EOF)
8126
0
                    return;
8127
524
                if ((ctxt->input->end - ctxt->input->cur)>=4) {
8128
524
                    start[0] = RAW;
8129
524
                    start[1] = NXT(1);
8130
524
                    start[2] = NXT(2);
8131
524
                    start[3] = NXT(3);
8132
524
                    enc = xmlDetectCharEncoding(start, 4);
8133
524
                    if (enc != XML_CHAR_ENCODING_NONE) {
8134
0
                        xmlSwitchEncoding(ctxt, enc);
8135
0
                    }
8136
524
                }
8137
8138
524
                if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8139
524
                    (IS_BLANK_CH(NXT(5)))) {
8140
0
                    xmlParseTextDecl(ctxt);
8141
0
                }
8142
524
            }
8143
1.51M
  }
8144
1.51M
    }
8145
1.53M
    ctxt->hasPErefs = 1;
8146
1.53M
}
8147
8148
/**
8149
 * xmlLoadEntityContent:
8150
 * @ctxt:  an XML parser context
8151
 * @entity: an unloaded system entity
8152
 *
8153
 * Load the original content of the given system entity from the
8154
 * ExternalID/SystemID given. This is to be used for Included in Literal
8155
 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8156
 *
8157
 * Returns 0 in case of success and -1 in case of failure
8158
 */
8159
static int
8160
6
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8161
6
    xmlParserInputPtr input;
8162
6
    xmlBufferPtr buf;
8163
6
    int l, c;
8164
6
    int count = 0;
8165
8166
6
    if ((ctxt == NULL) || (entity == NULL) ||
8167
6
        ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8168
6
   (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8169
6
  (entity->content != NULL)) {
8170
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8171
0
              "xmlLoadEntityContent parameter error");
8172
0
        return(-1);
8173
0
    }
8174
8175
6
    if (xmlParserDebugEntities)
8176
0
  xmlGenericError(xmlGenericErrorContext,
8177
0
    "Reading %s entity content input\n", entity->name);
8178
8179
6
    buf = xmlBufferCreate();
8180
6
    if (buf == NULL) {
8181
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8182
0
              "xmlLoadEntityContent parameter error");
8183
0
        return(-1);
8184
0
    }
8185
6
    xmlBufferSetAllocationScheme(buf, XML_BUFFER_ALLOC_DOUBLEIT);
8186
8187
6
    input = xmlNewEntityInputStream(ctxt, entity);
8188
6
    if (input == NULL) {
8189
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8190
0
              "xmlLoadEntityContent input error");
8191
0
  xmlBufferFree(buf);
8192
0
        return(-1);
8193
0
    }
8194
8195
    /*
8196
     * Push the entity as the current input, read char by char
8197
     * saving to the buffer until the end of the entity or an error
8198
     */
8199
6
    if (xmlPushInput(ctxt, input) < 0) {
8200
0
        xmlBufferFree(buf);
8201
0
  xmlFreeInputStream(input);
8202
0
  return(-1);
8203
0
    }
8204
8205
6
    GROW;
8206
6
    c = CUR_CHAR(l);
8207
258
    while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8208
258
           (IS_CHAR(c))) {
8209
252
        xmlBufferAdd(buf, ctxt->input->cur, l);
8210
252
  if (count++ > XML_PARSER_CHUNK_SIZE) {
8211
0
      count = 0;
8212
0
      GROW;
8213
0
            if (ctxt->instate == XML_PARSER_EOF) {
8214
0
                xmlBufferFree(buf);
8215
0
                return(-1);
8216
0
            }
8217
0
  }
8218
252
  NEXTL(l);
8219
252
  c = CUR_CHAR(l);
8220
252
  if (c == 0) {
8221
6
      count = 0;
8222
6
      GROW;
8223
6
            if (ctxt->instate == XML_PARSER_EOF) {
8224
0
                xmlBufferFree(buf);
8225
0
                return(-1);
8226
0
            }
8227
6
      c = CUR_CHAR(l);
8228
6
  }
8229
252
    }
8230
8231
6
    if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8232
6
        xmlSaturatedAdd(&ctxt->sizeentities, ctxt->input->consumed);
8233
6
        xmlPopInput(ctxt);
8234
6
    } else if (!IS_CHAR(c)) {
8235
0
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8236
0
                          "xmlLoadEntityContent: invalid char value %d\n",
8237
0
                    c);
8238
0
  xmlBufferFree(buf);
8239
0
  return(-1);
8240
0
    }
8241
6
    entity->content = buf->content;
8242
6
    entity->length = buf->use;
8243
6
    buf->content = NULL;
8244
6
    xmlBufferFree(buf);
8245
8246
6
    return(0);
8247
6
}
8248
8249
/**
8250
 * xmlParseStringPEReference:
8251
 * @ctxt:  an XML parser context
8252
 * @str:  a pointer to an index in the string
8253
 *
8254
 * parse PEReference declarations
8255
 *
8256
 * [69] PEReference ::= '%' Name ';'
8257
 *
8258
 * [ WFC: No Recursion ]
8259
 * A parsed entity must not contain a recursive
8260
 * reference to itself, either directly or indirectly.
8261
 *
8262
 * [ WFC: Entity Declared ]
8263
 * In a document without any DTD, a document with only an internal DTD
8264
 * subset which contains no parameter entity references, or a document
8265
 * with "standalone='yes'", ...  ... The declaration of a parameter
8266
 * entity must precede any reference to it...
8267
 *
8268
 * [ VC: Entity Declared ]
8269
 * In a document with an external subset or external parameter entities
8270
 * with "standalone='no'", ...  ... The declaration of a parameter entity
8271
 * must precede any reference to it...
8272
 *
8273
 * [ WFC: In DTD ]
8274
 * Parameter-entity references may only appear in the DTD.
8275
 * NOTE: misleading but this is handled.
8276
 *
8277
 * Returns the string of the entity content.
8278
 *         str is updated to the current value of the index
8279
 */
8280
static xmlEntityPtr
8281
207k
xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8282
207k
    const xmlChar *ptr;
8283
207k
    xmlChar cur;
8284
207k
    xmlChar *name;
8285
207k
    xmlEntityPtr entity = NULL;
8286
8287
207k
    if ((str == NULL) || (*str == NULL)) return(NULL);
8288
207k
    ptr = *str;
8289
207k
    cur = *ptr;
8290
207k
    if (cur != '%')
8291
0
        return(NULL);
8292
207k
    ptr++;
8293
207k
    name = xmlParseStringName(ctxt, &ptr);
8294
207k
    if (name == NULL) {
8295
0
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8296
0
           "xmlParseStringPEReference: no name\n");
8297
0
  *str = ptr;
8298
0
  return(NULL);
8299
0
    }
8300
207k
    cur = *ptr;
8301
207k
    if (cur != ';') {
8302
0
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8303
0
  xmlFree(name);
8304
0
  *str = ptr;
8305
0
  return(NULL);
8306
0
    }
8307
207k
    ptr++;
8308
8309
    /*
8310
     * Request the entity from SAX
8311
     */
8312
207k
    if ((ctxt->sax != NULL) &&
8313
207k
  (ctxt->sax->getParameterEntity != NULL))
8314
207k
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8315
207k
    if (ctxt->instate == XML_PARSER_EOF) {
8316
0
  xmlFree(name);
8317
0
  *str = ptr;
8318
0
  return(NULL);
8319
0
    }
8320
207k
    if (entity == NULL) {
8321
  /*
8322
   * [ WFC: Entity Declared ]
8323
   * In a document without any DTD, a document with only an
8324
   * internal DTD subset which contains no parameter entity
8325
   * references, or a document with "standalone='yes'", ...
8326
   * ... The declaration of a parameter entity must precede
8327
   * any reference to it...
8328
   */
8329
5.23k
  if ((ctxt->standalone == 1) ||
8330
5.23k
      ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8331
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8332
0
     "PEReference: %%%s; not found\n", name);
8333
5.23k
  } else {
8334
      /*
8335
       * [ VC: Entity Declared ]
8336
       * In a document with an external subset or external
8337
       * parameter entities with "standalone='no'", ...
8338
       * ... The declaration of a parameter entity must
8339
       * precede any reference to it...
8340
       */
8341
5.23k
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8342
5.23k
        "PEReference: %%%s; not found\n",
8343
5.23k
        name, NULL);
8344
5.23k
      ctxt->valid = 0;
8345
5.23k
  }
8346
201k
    } else {
8347
  /*
8348
   * Internal checking in case the entity quest barfed
8349
   */
8350
201k
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8351
201k
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8352
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8353
0
        "%%%s; is not a parameter entity\n",
8354
0
        name, NULL);
8355
0
  }
8356
201k
    }
8357
207k
    ctxt->hasPErefs = 1;
8358
207k
    xmlFree(name);
8359
207k
    *str = ptr;
8360
207k
    return(entity);
8361
207k
}
8362
8363
/**
8364
 * xmlParseDocTypeDecl:
8365
 * @ctxt:  an XML parser context
8366
 *
8367
 * DEPRECATED: Internal function, don't use.
8368
 *
8369
 * parse a DOCTYPE declaration
8370
 *
8371
 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8372
 *                      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8373
 *
8374
 * [ VC: Root Element Type ]
8375
 * The Name in the document type declaration must match the element
8376
 * type of the root element.
8377
 */
8378
8379
void
8380
21.4k
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8381
21.4k
    const xmlChar *name = NULL;
8382
21.4k
    xmlChar *ExternalID = NULL;
8383
21.4k
    xmlChar *URI = NULL;
8384
8385
    /*
8386
     * We know that '<!DOCTYPE' has been detected.
8387
     */
8388
21.4k
    SKIP(9);
8389
8390
21.4k
    SKIP_BLANKS;
8391
8392
    /*
8393
     * Parse the DOCTYPE name.
8394
     */
8395
21.4k
    name = xmlParseName(ctxt);
8396
21.4k
    if (name == NULL) {
8397
95
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8398
95
           "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8399
95
    }
8400
21.4k
    ctxt->intSubName = name;
8401
8402
21.4k
    SKIP_BLANKS;
8403
8404
    /*
8405
     * Check for SystemID and ExternalID
8406
     */
8407
21.4k
    URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8408
8409
21.4k
    if ((URI != NULL) || (ExternalID != NULL)) {
8410
11.0k
        ctxt->hasExternalSubset = 1;
8411
11.0k
    }
8412
21.4k
    ctxt->extSubURI = URI;
8413
21.4k
    ctxt->extSubSystem = ExternalID;
8414
8415
21.4k
    SKIP_BLANKS;
8416
8417
    /*
8418
     * Create and update the internal subset.
8419
     */
8420
21.4k
    if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8421
21.4k
  (!ctxt->disableSAX))
8422
20.1k
  ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8423
21.4k
    if (ctxt->instate == XML_PARSER_EOF)
8424
0
  return;
8425
8426
    /*
8427
     * Is there any internal subset declarations ?
8428
     * they are handled separately in xmlParseInternalSubset()
8429
     */
8430
21.4k
    if (RAW == '[')
8431
17.5k
  return;
8432
8433
    /*
8434
     * We should be at the end of the DOCTYPE declaration.
8435
     */
8436
3.92k
    if (RAW != '>') {
8437
1.57k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8438
1.57k
    }
8439
3.92k
    NEXT;
8440
3.92k
}
8441
8442
/**
8443
 * xmlParseInternalSubset:
8444
 * @ctxt:  an XML parser context
8445
 *
8446
 * parse the internal subset declaration
8447
 *
8448
 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8449
 */
8450
8451
static void
8452
17.7k
xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8453
    /*
8454
     * Is there any DTD definition ?
8455
     */
8456
17.7k
    if (RAW == '[') {
8457
17.7k
        int baseInputNr = ctxt->inputNr;
8458
17.7k
        ctxt->instate = XML_PARSER_DTD;
8459
17.7k
        NEXT;
8460
  /*
8461
   * Parse the succession of Markup declarations and
8462
   * PEReferences.
8463
   * Subsequence (markupdecl | PEReference | S)*
8464
   */
8465
17.7k
  SKIP_BLANKS;
8466
1.24M
  while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) &&
8467
1.24M
               (ctxt->instate != XML_PARSER_EOF)) {
8468
8469
            /*
8470
             * Conditional sections are allowed from external entities included
8471
             * by PE References in the internal subset.
8472
             */
8473
1.22M
            if ((ctxt->inputNr > 1) && (ctxt->input->filename != NULL) &&
8474
1.22M
                (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8475
0
                xmlParseConditionalSections(ctxt);
8476
1.22M
            } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
8477
1.22M
          xmlParseMarkupDecl(ctxt);
8478
1.22M
            } else if (RAW == '%') {
8479
996
          xmlParsePEReference(ctxt);
8480
3.35k
            } else {
8481
3.35k
    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8482
3.35k
                        "xmlParseInternalSubset: error detected in"
8483
3.35k
                        " Markup declaration\n");
8484
3.35k
                xmlHaltParser(ctxt);
8485
3.35k
                return;
8486
3.35k
            }
8487
1.22M
      SKIP_BLANKS;
8488
1.22M
  }
8489
14.4k
  if (RAW == ']') {
8490
13.6k
      NEXT;
8491
13.6k
      SKIP_BLANKS;
8492
13.6k
  }
8493
14.4k
    }
8494
8495
    /*
8496
     * We should be at the end of the DOCTYPE declaration.
8497
     */
8498
14.4k
    if (RAW != '>') {
8499
871
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8500
871
  return;
8501
871
    }
8502
13.5k
    NEXT;
8503
13.5k
}
8504
8505
#ifdef LIBXML_SAX1_ENABLED
8506
/**
8507
 * xmlParseAttribute:
8508
 * @ctxt:  an XML parser context
8509
 * @value:  a xmlChar ** used to store the value of the attribute
8510
 *
8511
 * DEPRECATED: Internal function, don't use.
8512
 *
8513
 * parse an attribute
8514
 *
8515
 * [41] Attribute ::= Name Eq AttValue
8516
 *
8517
 * [ WFC: No External Entity References ]
8518
 * Attribute values cannot contain direct or indirect entity references
8519
 * to external entities.
8520
 *
8521
 * [ WFC: No < in Attribute Values ]
8522
 * The replacement text of any entity referred to directly or indirectly in
8523
 * an attribute value (other than "&lt;") must not contain a <.
8524
 *
8525
 * [ VC: Attribute Value Type ]
8526
 * The attribute must have been declared; the value must be of the type
8527
 * declared for it.
8528
 *
8529
 * [25] Eq ::= S? '=' S?
8530
 *
8531
 * With namespace:
8532
 *
8533
 * [NS 11] Attribute ::= QName Eq AttValue
8534
 *
8535
 * Also the case QName == xmlns:??? is handled independently as a namespace
8536
 * definition.
8537
 *
8538
 * Returns the attribute name, and the value in *value.
8539
 */
8540
8541
const xmlChar *
8542
215k
xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8543
215k
    const xmlChar *name;
8544
215k
    xmlChar *val;
8545
8546
215k
    *value = NULL;
8547
215k
    GROW;
8548
215k
    name = xmlParseName(ctxt);
8549
215k
    if (name == NULL) {
8550
4.87k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8551
4.87k
                 "error parsing attribute name\n");
8552
4.87k
        return(NULL);
8553
4.87k
    }
8554
8555
    /*
8556
     * read the value
8557
     */
8558
210k
    SKIP_BLANKS;
8559
210k
    if (RAW == '=') {
8560
206k
        NEXT;
8561
206k
  SKIP_BLANKS;
8562
206k
  val = xmlParseAttValue(ctxt);
8563
206k
  ctxt->instate = XML_PARSER_CONTENT;
8564
206k
    } else {
8565
4.16k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8566
4.16k
         "Specification mandates value for attribute %s\n", name);
8567
4.16k
  return(name);
8568
4.16k
    }
8569
8570
    /*
8571
     * Check that xml:lang conforms to the specification
8572
     * No more registered as an error, just generate a warning now
8573
     * since this was deprecated in XML second edition
8574
     */
8575
206k
    if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8576
0
  if (!xmlCheckLanguageID(val)) {
8577
0
      xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8578
0
              "Malformed value for xml:lang : %s\n",
8579
0
        val, NULL);
8580
0
  }
8581
0
    }
8582
8583
    /*
8584
     * Check that xml:space conforms to the specification
8585
     */
8586
206k
    if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8587
0
  if (xmlStrEqual(val, BAD_CAST "default"))
8588
0
      *(ctxt->space) = 0;
8589
0
  else if (xmlStrEqual(val, BAD_CAST "preserve"))
8590
0
      *(ctxt->space) = 1;
8591
0
  else {
8592
0
    xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8593
0
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8594
0
                                 val, NULL);
8595
0
  }
8596
0
    }
8597
8598
206k
    *value = val;
8599
206k
    return(name);
8600
210k
}
8601
8602
/**
8603
 * xmlParseStartTag:
8604
 * @ctxt:  an XML parser context
8605
 *
8606
 * DEPRECATED: Internal function, don't use.
8607
 *
8608
 * Parse a start tag. Always consumes '<'.
8609
 *
8610
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8611
 *
8612
 * [ WFC: Unique Att Spec ]
8613
 * No attribute name may appear more than once in the same start-tag or
8614
 * empty-element tag.
8615
 *
8616
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8617
 *
8618
 * [ WFC: Unique Att Spec ]
8619
 * No attribute name may appear more than once in the same start-tag or
8620
 * empty-element tag.
8621
 *
8622
 * With namespace:
8623
 *
8624
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8625
 *
8626
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8627
 *
8628
 * Returns the element name parsed
8629
 */
8630
8631
const xmlChar *
8632
665k
xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8633
665k
    const xmlChar *name;
8634
665k
    const xmlChar *attname;
8635
665k
    xmlChar *attvalue;
8636
665k
    const xmlChar **atts = ctxt->atts;
8637
665k
    int nbatts = 0;
8638
665k
    int maxatts = ctxt->maxatts;
8639
665k
    int i;
8640
8641
665k
    if (RAW != '<') return(NULL);
8642
665k
    NEXT1;
8643
8644
665k
    name = xmlParseName(ctxt);
8645
665k
    if (name == NULL) {
8646
2.43k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8647
2.43k
       "xmlParseStartTag: invalid element name\n");
8648
2.43k
        return(NULL);
8649
2.43k
    }
8650
8651
    /*
8652
     * Now parse the attributes, it ends up with the ending
8653
     *
8654
     * (S Attribute)* S?
8655
     */
8656
663k
    SKIP_BLANKS;
8657
663k
    GROW;
8658
8659
710k
    while (((RAW != '>') &&
8660
710k
     ((RAW != '/') || (NXT(1) != '>')) &&
8661
710k
     (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8662
215k
  attname = xmlParseAttribute(ctxt, &attvalue);
8663
215k
        if (attname == NULL) {
8664
4.87k
      xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8665
4.87k
         "xmlParseStartTag: problem parsing attributes\n");
8666
4.87k
      break;
8667
4.87k
  }
8668
210k
        if (attvalue != NULL) {
8669
      /*
8670
       * [ WFC: Unique Att Spec ]
8671
       * No attribute name may appear more than once in the same
8672
       * start-tag or empty-element tag.
8673
       */
8674
250k
      for (i = 0; i < nbatts;i += 2) {
8675
43.6k
          if (xmlStrEqual(atts[i], attname)) {
8676
43
        xmlErrAttributeDup(ctxt, NULL, attname);
8677
43
        xmlFree(attvalue);
8678
43
        goto failed;
8679
43
    }
8680
43.6k
      }
8681
      /*
8682
       * Add the pair to atts
8683
       */
8684
206k
      if (atts == NULL) {
8685
4.94k
          maxatts = 22; /* allow for 10 attrs by default */
8686
4.94k
          atts = (const xmlChar **)
8687
4.94k
           xmlMalloc(maxatts * sizeof(xmlChar *));
8688
4.94k
    if (atts == NULL) {
8689
0
        xmlErrMemory(ctxt, NULL);
8690
0
        if (attvalue != NULL)
8691
0
      xmlFree(attvalue);
8692
0
        goto failed;
8693
0
    }
8694
4.94k
    ctxt->atts = atts;
8695
4.94k
    ctxt->maxatts = maxatts;
8696
201k
      } else if (nbatts + 4 > maxatts) {
8697
36
          const xmlChar **n;
8698
8699
36
          maxatts *= 2;
8700
36
          n = (const xmlChar **) xmlRealloc((void *) atts,
8701
36
               maxatts * sizeof(const xmlChar *));
8702
36
    if (n == NULL) {
8703
0
        xmlErrMemory(ctxt, NULL);
8704
0
        if (attvalue != NULL)
8705
0
      xmlFree(attvalue);
8706
0
        goto failed;
8707
0
    }
8708
36
    atts = n;
8709
36
    ctxt->atts = atts;
8710
36
    ctxt->maxatts = maxatts;
8711
36
      }
8712
206k
      atts[nbatts++] = attname;
8713
206k
      atts[nbatts++] = attvalue;
8714
206k
      atts[nbatts] = NULL;
8715
206k
      atts[nbatts + 1] = NULL;
8716
206k
  } else {
8717
4.47k
      if (attvalue != NULL)
8718
0
    xmlFree(attvalue);
8719
4.47k
  }
8720
8721
210k
failed:
8722
8723
210k
  GROW
8724
210k
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8725
164k
      break;
8726
46.7k
  if (SKIP_BLANKS == 0) {
8727
6.17k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8728
6.17k
         "attributes construct error\n");
8729
6.17k
  }
8730
46.7k
  SHRINK;
8731
46.7k
        GROW;
8732
46.7k
    }
8733
8734
    /*
8735
     * SAX: Start of Element !
8736
     */
8737
663k
    if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8738
663k
  (!ctxt->disableSAX)) {
8739
618k
  if (nbatts > 0)
8740
156k
      ctxt->sax->startElement(ctxt->userData, name, atts);
8741
461k
  else
8742
461k
      ctxt->sax->startElement(ctxt->userData, name, NULL);
8743
618k
    }
8744
8745
663k
    if (atts != NULL) {
8746
        /* Free only the content strings */
8747
836k
        for (i = 1;i < nbatts;i+=2)
8748
206k
      if (atts[i] != NULL)
8749
206k
         xmlFree((xmlChar *) atts[i]);
8750
629k
    }
8751
663k
    return(name);
8752
663k
}
8753
8754
/**
8755
 * xmlParseEndTag1:
8756
 * @ctxt:  an XML parser context
8757
 * @line:  line of the start tag
8758
 * @nsNr:  number of namespaces on the start tag
8759
 *
8760
 * Parse an end tag. Always consumes '</'.
8761
 *
8762
 * [42] ETag ::= '</' Name S? '>'
8763
 *
8764
 * With namespace
8765
 *
8766
 * [NS 9] ETag ::= '</' QName S? '>'
8767
 */
8768
8769
static void
8770
637k
xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8771
637k
    const xmlChar *name;
8772
8773
637k
    GROW;
8774
637k
    if ((RAW != '<') || (NXT(1) != '/')) {
8775
0
  xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8776
0
           "xmlParseEndTag: '</' not found\n");
8777
0
  return;
8778
0
    }
8779
637k
    SKIP(2);
8780
8781
637k
    name = xmlParseNameAndCompare(ctxt,ctxt->name);
8782
8783
    /*
8784
     * We should definitely be at the ending "S? '>'" part
8785
     */
8786
637k
    GROW;
8787
637k
    SKIP_BLANKS;
8788
637k
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8789
1.07k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8790
1.07k
    } else
8791
636k
  NEXT1;
8792
8793
    /*
8794
     * [ WFC: Element Type Match ]
8795
     * The Name in an element's end-tag must match the element type in the
8796
     * start-tag.
8797
     *
8798
     */
8799
637k
    if (name != (xmlChar*)1) {
8800
4.52k
        if (name == NULL) name = BAD_CAST "unparsable";
8801
4.52k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8802
4.52k
         "Opening and ending tag mismatch: %s line %d and %s\n",
8803
4.52k
                    ctxt->name, line, name);
8804
4.52k
    }
8805
8806
    /*
8807
     * SAX: End of Tag
8808
     */
8809
637k
    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8810
637k
  (!ctxt->disableSAX))
8811
592k
        ctxt->sax->endElement(ctxt->userData, ctxt->name);
8812
8813
637k
    namePop(ctxt);
8814
637k
    spacePop(ctxt);
8815
637k
    return;
8816
637k
}
8817
8818
/**
8819
 * xmlParseEndTag:
8820
 * @ctxt:  an XML parser context
8821
 *
8822
 * DEPRECATED: Internal function, don't use.
8823
 *
8824
 * parse an end of tag
8825
 *
8826
 * [42] ETag ::= '</' Name S? '>'
8827
 *
8828
 * With namespace
8829
 *
8830
 * [NS 9] ETag ::= '</' QName S? '>'
8831
 */
8832
8833
void
8834
0
xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8835
0
    xmlParseEndTag1(ctxt, 0);
8836
0
}
8837
#endif /* LIBXML_SAX1_ENABLED */
8838
8839
/************************************************************************
8840
 *                  *
8841
 *          SAX 2 specific operations       *
8842
 *                  *
8843
 ************************************************************************/
8844
8845
/*
8846
 * xmlGetNamespace:
8847
 * @ctxt:  an XML parser context
8848
 * @prefix:  the prefix to lookup
8849
 *
8850
 * Lookup the namespace name for the @prefix (which ca be NULL)
8851
 * The prefix must come from the @ctxt->dict dictionary
8852
 *
8853
 * Returns the namespace name or NULL if not bound
8854
 */
8855
static const xmlChar *
8856
887k
xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8857
887k
    int i;
8858
8859
887k
    if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8860
787k
    for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8861
69.4k
        if (ctxt->nsTab[i] == prefix) {
8862
64.5k
      if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8863
13
          return(NULL);
8864
64.5k
      return(ctxt->nsTab[i + 1]);
8865
64.5k
  }
8866
717k
    return(NULL);
8867
782k
}
8868
8869
/**
8870
 * xmlParseQName:
8871
 * @ctxt:  an XML parser context
8872
 * @prefix:  pointer to store the prefix part
8873
 *
8874
 * parse an XML Namespace QName
8875
 *
8876
 * [6]  QName  ::= (Prefix ':')? LocalPart
8877
 * [7]  Prefix  ::= NCName
8878
 * [8]  LocalPart  ::= NCName
8879
 *
8880
 * Returns the Name parsed or NULL
8881
 */
8882
8883
static const xmlChar *
8884
1.11M
xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8885
1.11M
    const xmlChar *l, *p;
8886
8887
1.11M
    GROW;
8888
8889
1.11M
    l = xmlParseNCName(ctxt);
8890
1.11M
    if (l == NULL) {
8891
8.38k
        if (CUR == ':') {
8892
158
      l = xmlParseName(ctxt);
8893
158
      if (l != NULL) {
8894
158
          xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8895
158
             "Failed to parse QName '%s'\n", l, NULL, NULL);
8896
158
    *prefix = NULL;
8897
158
    return(l);
8898
158
      }
8899
158
  }
8900
8.22k
        return(NULL);
8901
8.38k
    }
8902
1.11M
    if (CUR == ':') {
8903
84.4k
        NEXT;
8904
84.4k
  p = l;
8905
84.4k
  l = xmlParseNCName(ctxt);
8906
84.4k
  if (l == NULL) {
8907
1.60k
      xmlChar *tmp;
8908
8909
1.60k
            if (ctxt->instate == XML_PARSER_EOF)
8910
0
                return(NULL);
8911
1.60k
            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8912
1.60k
               "Failed to parse QName '%s:'\n", p, NULL, NULL);
8913
1.60k
      l = xmlParseNmtoken(ctxt);
8914
1.60k
      if (l == NULL) {
8915
1.43k
                if (ctxt->instate == XML_PARSER_EOF)
8916
0
                    return(NULL);
8917
1.43k
    tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8918
1.43k
            } else {
8919
171
    tmp = xmlBuildQName(l, p, NULL, 0);
8920
171
    xmlFree((char *)l);
8921
171
      }
8922
1.60k
      p = xmlDictLookup(ctxt->dict, tmp, -1);
8923
1.60k
      if (tmp != NULL) xmlFree(tmp);
8924
1.60k
      *prefix = NULL;
8925
1.60k
      return(p);
8926
1.60k
  }
8927
82.8k
  if (CUR == ':') {
8928
811
      xmlChar *tmp;
8929
8930
811
            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8931
811
               "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8932
811
      NEXT;
8933
811
      tmp = (xmlChar *) xmlParseName(ctxt);
8934
811
      if (tmp != NULL) {
8935
732
          tmp = xmlBuildQName(tmp, l, NULL, 0);
8936
732
    l = xmlDictLookup(ctxt->dict, tmp, -1);
8937
732
    if (tmp != NULL) xmlFree(tmp);
8938
732
    *prefix = p;
8939
732
    return(l);
8940
732
      }
8941
79
            if (ctxt->instate == XML_PARSER_EOF)
8942
0
                return(NULL);
8943
79
      tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8944
79
      l = xmlDictLookup(ctxt->dict, tmp, -1);
8945
79
      if (tmp != NULL) xmlFree(tmp);
8946
79
      *prefix = p;
8947
79
      return(l);
8948
79
  }
8949
82.0k
  *prefix = p;
8950
82.0k
    } else
8951
1.02M
        *prefix = NULL;
8952
1.10M
    return(l);
8953
1.11M
}
8954
8955
/**
8956
 * xmlParseQNameAndCompare:
8957
 * @ctxt:  an XML parser context
8958
 * @name:  the localname
8959
 * @prefix:  the prefix, if any.
8960
 *
8961
 * parse an XML name and compares for match
8962
 * (specialized for endtag parsing)
8963
 *
8964
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8965
 * and the name for mismatch
8966
 */
8967
8968
static const xmlChar *
8969
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8970
22.3k
                        xmlChar const *prefix) {
8971
22.3k
    const xmlChar *cmp;
8972
22.3k
    const xmlChar *in;
8973
22.3k
    const xmlChar *ret;
8974
22.3k
    const xmlChar *prefix2;
8975
8976
22.3k
    if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8977
8978
22.3k
    GROW;
8979
22.3k
    in = ctxt->input->cur;
8980
8981
22.3k
    cmp = prefix;
8982
84.3k
    while (*in != 0 && *in == *cmp) {
8983
62.0k
  ++in;
8984
62.0k
  ++cmp;
8985
62.0k
    }
8986
22.3k
    if ((*cmp == 0) && (*in == ':')) {
8987
21.8k
        in++;
8988
21.8k
  cmp = name;
8989
193k
  while (*in != 0 && *in == *cmp) {
8990
171k
      ++in;
8991
171k
      ++cmp;
8992
171k
  }
8993
21.8k
  if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8994
      /* success */
8995
19.2k
            ctxt->input->col += in - ctxt->input->cur;
8996
19.2k
      ctxt->input->cur = in;
8997
19.2k
      return((const xmlChar*) 1);
8998
19.2k
  }
8999
21.8k
    }
9000
    /*
9001
     * all strings coms from the dictionary, equality can be done directly
9002
     */
9003
3.03k
    ret = xmlParseQName (ctxt, &prefix2);
9004
3.03k
    if ((ret == name) && (prefix == prefix2))
9005
37
  return((const xmlChar*) 1);
9006
2.99k
    return ret;
9007
3.03k
}
9008
9009
/**
9010
 * xmlParseAttValueInternal:
9011
 * @ctxt:  an XML parser context
9012
 * @len:  attribute len result
9013
 * @alloc:  whether the attribute was reallocated as a new string
9014
 * @normalize:  if 1 then further non-CDATA normalization must be done
9015
 *
9016
 * parse a value for an attribute.
9017
 * NOTE: if no normalization is needed, the routine will return pointers
9018
 *       directly from the data buffer.
9019
 *
9020
 * 3.3.3 Attribute-Value Normalization:
9021
 * Before the value of an attribute is passed to the application or
9022
 * checked for validity, the XML processor must normalize it as follows:
9023
 * - a character reference is processed by appending the referenced
9024
 *   character to the attribute value
9025
 * - an entity reference is processed by recursively processing the
9026
 *   replacement text of the entity
9027
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
9028
 *   appending #x20 to the normalized value, except that only a single
9029
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
9030
 *   parsed entity or the literal entity value of an internal parsed entity
9031
 * - other characters are processed by appending them to the normalized value
9032
 * If the declared value is not CDATA, then the XML processor must further
9033
 * process the normalized attribute value by discarding any leading and
9034
 * trailing space (#x20) characters, and by replacing sequences of space
9035
 * (#x20) characters by a single space (#x20) character.
9036
 * All attributes for which no declaration has been read should be treated
9037
 * by a non-validating parser as if declared CDATA.
9038
 *
9039
 * Returns the AttValue parsed or NULL. The value has to be freed by the
9040
 *     caller if it was copied, this can be detected by val[*len] == 0.
9041
 */
9042
9043
#define GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) \
9044
591
    const xmlChar *oldbase = ctxt->input->base;\
9045
591
    GROW;\
9046
591
    if (ctxt->instate == XML_PARSER_EOF)\
9047
591
        return(NULL);\
9048
591
    if (oldbase != ctxt->input->base) {\
9049
0
        ptrdiff_t delta = ctxt->input->base - oldbase;\
9050
0
        start = start + delta;\
9051
0
        in = in + delta;\
9052
0
    }\
9053
591
    end = ctxt->input->end;
9054
9055
static xmlChar *
9056
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
9057
                         int normalize)
9058
647k
{
9059
647k
    xmlChar limit = 0;
9060
647k
    const xmlChar *in = NULL, *start, *end, *last;
9061
647k
    xmlChar *ret = NULL;
9062
647k
    int line, col;
9063
647k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9064
18.2k
                    XML_MAX_HUGE_LENGTH :
9065
647k
                    XML_MAX_TEXT_LENGTH;
9066
9067
647k
    GROW;
9068
647k
    in = (xmlChar *) CUR_PTR;
9069
647k
    line = ctxt->input->line;
9070
647k
    col = ctxt->input->col;
9071
647k
    if (*in != '"' && *in != '\'') {
9072
918
        xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
9073
918
        return (NULL);
9074
918
    }
9075
646k
    ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
9076
9077
    /*
9078
     * try to handle in this routine the most common case where no
9079
     * allocation of a new string is required and where content is
9080
     * pure ASCII.
9081
     */
9082
646k
    limit = *in++;
9083
646k
    col++;
9084
646k
    end = ctxt->input->end;
9085
646k
    start = in;
9086
646k
    if (in >= end) {
9087
16
        GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9088
16
    }
9089
646k
    if (normalize) {
9090
        /*
9091
   * Skip any leading spaces
9092
   */
9093
83.9k
  while ((in < end) && (*in != limit) &&
9094
83.9k
         ((*in == 0x20) || (*in == 0x9) ||
9095
83.9k
          (*in == 0xA) || (*in == 0xD))) {
9096
296
      if (*in == 0xA) {
9097
72
          line++; col = 1;
9098
224
      } else {
9099
224
          col++;
9100
224
      }
9101
296
      in++;
9102
296
      start = in;
9103
296
      if (in >= end) {
9104
0
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9105
0
                if ((in - start) > maxLength) {
9106
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9107
0
                                   "AttValue length too long\n");
9108
0
                    return(NULL);
9109
0
                }
9110
0
      }
9111
296
  }
9112
881k
  while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9113
881k
         (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9114
798k
      col++;
9115
798k
      if ((*in++ == 0x20) && (*in == 0x20)) break;
9116
798k
      if (in >= end) {
9117
4
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9118
4
                if ((in - start) > maxLength) {
9119
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9120
0
                                   "AttValue length too long\n");
9121
0
                    return(NULL);
9122
0
                }
9123
4
      }
9124
798k
  }
9125
83.6k
  last = in;
9126
  /*
9127
   * skip the trailing blanks
9128
   */
9129
83.7k
  while ((last[-1] == 0x20) && (last > start)) last--;
9130
84.2k
  while ((in < end) && (*in != limit) &&
9131
84.2k
         ((*in == 0x20) || (*in == 0x9) ||
9132
936
          (*in == 0xA) || (*in == 0xD))) {
9133
591
      if (*in == 0xA) {
9134
120
          line++, col = 1;
9135
471
      } else {
9136
471
          col++;
9137
471
      }
9138
591
      in++;
9139
591
      if (in >= end) {
9140
13
    const xmlChar *oldbase = ctxt->input->base;
9141
13
    GROW;
9142
13
                if (ctxt->instate == XML_PARSER_EOF)
9143
0
                    return(NULL);
9144
13
    if (oldbase != ctxt->input->base) {
9145
0
        ptrdiff_t delta = ctxt->input->base - oldbase;
9146
0
        start = start + delta;
9147
0
        in = in + delta;
9148
0
        last = last + delta;
9149
0
    }
9150
13
    end = ctxt->input->end;
9151
13
                if ((in - start) > maxLength) {
9152
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9153
0
                                   "AttValue length too long\n");
9154
0
                    return(NULL);
9155
0
                }
9156
13
      }
9157
591
  }
9158
83.6k
        if ((in - start) > maxLength) {
9159
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9160
0
                           "AttValue length too long\n");
9161
0
            return(NULL);
9162
0
        }
9163
83.6k
  if (*in != limit) goto need_complex;
9164
563k
    } else {
9165
9.28M
  while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9166
9.28M
         (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9167
8.72M
      in++;
9168
8.72M
      col++;
9169
8.72M
      if (in >= end) {
9170
571
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9171
571
                if ((in - start) > maxLength) {
9172
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9173
0
                                   "AttValue length too long\n");
9174
0
                    return(NULL);
9175
0
                }
9176
571
      }
9177
8.72M
  }
9178
563k
  last = in;
9179
563k
        if ((in - start) > maxLength) {
9180
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9181
0
                           "AttValue length too long\n");
9182
0
            return(NULL);
9183
0
        }
9184
563k
  if (*in != limit) goto need_complex;
9185
563k
    }
9186
633k
    in++;
9187
633k
    col++;
9188
633k
    if (len != NULL) {
9189
348k
        if (alloc) *alloc = 0;
9190
348k
        *len = last - start;
9191
348k
        ret = (xmlChar *) start;
9192
348k
    } else {
9193
285k
        if (alloc) *alloc = 1;
9194
285k
        ret = xmlStrndup(start, last - start);
9195
285k
    }
9196
633k
    CUR_PTR = in;
9197
633k
    ctxt->input->line = line;
9198
633k
    ctxt->input->col = col;
9199
633k
    return ret;
9200
12.7k
need_complex:
9201
12.7k
    if (alloc) *alloc = 1;
9202
12.7k
    return xmlParseAttValueComplex(ctxt, len, normalize);
9203
646k
}
9204
9205
/**
9206
 * xmlParseAttribute2:
9207
 * @ctxt:  an XML parser context
9208
 * @pref:  the element prefix
9209
 * @elem:  the element name
9210
 * @prefix:  a xmlChar ** used to store the value of the attribute prefix
9211
 * @value:  a xmlChar ** used to store the value of the attribute
9212
 * @len:  an int * to save the length of the attribute
9213
 * @alloc:  an int * to indicate if the attribute was allocated
9214
 *
9215
 * parse an attribute in the new SAX2 framework.
9216
 *
9217
 * Returns the attribute name, and the value in *value, .
9218
 */
9219
9220
static const xmlChar *
9221
xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9222
                   const xmlChar * pref, const xmlChar * elem,
9223
                   const xmlChar ** prefix, xmlChar ** value,
9224
                   int *len, int *alloc)
9225
362k
{
9226
362k
    const xmlChar *name;
9227
362k
    xmlChar *val, *internal_val = NULL;
9228
362k
    int normalize = 0;
9229
9230
362k
    *value = NULL;
9231
362k
    GROW;
9232
362k
    name = xmlParseQName(ctxt, prefix);
9233
362k
    if (name == NULL) {
9234
4.55k
        xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9235
4.55k
                       "error parsing attribute name\n");
9236
4.55k
        return (NULL);
9237
4.55k
    }
9238
9239
    /*
9240
     * get the type if needed
9241
     */
9242
357k
    if (ctxt->attsSpecial != NULL) {
9243
183k
        int type;
9244
9245
183k
        type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
9246
183k
                                                 pref, elem, *prefix, name);
9247
183k
        if (type != 0)
9248
83.6k
            normalize = 1;
9249
183k
    }
9250
9251
    /*
9252
     * read the value
9253
     */
9254
357k
    SKIP_BLANKS;
9255
357k
    if (RAW == '=') {
9256
356k
        NEXT;
9257
356k
        SKIP_BLANKS;
9258
356k
        val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9259
356k
        if (val == NULL)
9260
505
            return (NULL);
9261
355k
  if (normalize) {
9262
      /*
9263
       * Sometimes a second normalisation pass for spaces is needed
9264
       * but that only happens if charrefs or entities references
9265
       * have been used in the attribute value, i.e. the attribute
9266
       * value have been extracted in an allocated string already.
9267
       */
9268
83.6k
      if (*alloc) {
9269
362
          const xmlChar *val2;
9270
9271
362
          val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9272
362
    if ((val2 != NULL) && (val2 != val)) {
9273
38
        xmlFree(val);
9274
38
        val = (xmlChar *) val2;
9275
38
    }
9276
362
      }
9277
83.6k
  }
9278
355k
        ctxt->instate = XML_PARSER_CONTENT;
9279
355k
    } else {
9280
1.80k
        xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9281
1.80k
                          "Specification mandates value for attribute %s\n",
9282
1.80k
                          name);
9283
1.80k
        return (name);
9284
1.80k
    }
9285
9286
355k
    if (*prefix == ctxt->str_xml) {
9287
        /*
9288
         * Check that xml:lang conforms to the specification
9289
         * No more registered as an error, just generate a warning now
9290
         * since this was deprecated in XML second edition
9291
         */
9292
304
        if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9293
0
            internal_val = xmlStrndup(val, *len);
9294
0
            if (!xmlCheckLanguageID(internal_val)) {
9295
0
                xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9296
0
                              "Malformed value for xml:lang : %s\n",
9297
0
                              internal_val, NULL);
9298
0
            }
9299
0
        }
9300
9301
        /*
9302
         * Check that xml:space conforms to the specification
9303
         */
9304
304
        if (xmlStrEqual(name, BAD_CAST "space")) {
9305
6
            internal_val = xmlStrndup(val, *len);
9306
6
            if (xmlStrEqual(internal_val, BAD_CAST "default"))
9307
0
                *(ctxt->space) = 0;
9308
6
            else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9309
6
                *(ctxt->space) = 1;
9310
0
            else {
9311
0
                xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9312
0
                              "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9313
0
                              internal_val, NULL);
9314
0
            }
9315
6
        }
9316
304
        if (internal_val) {
9317
6
            xmlFree(internal_val);
9318
6
        }
9319
304
    }
9320
9321
355k
    *value = val;
9322
355k
    return (name);
9323
357k
}
9324
/**
9325
 * xmlParseStartTag2:
9326
 * @ctxt:  an XML parser context
9327
 *
9328
 * Parse a start tag. Always consumes '<'.
9329
 *
9330
 * This routine is called when running SAX2 parsing
9331
 *
9332
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9333
 *
9334
 * [ WFC: Unique Att Spec ]
9335
 * No attribute name may appear more than once in the same start-tag or
9336
 * empty-element tag.
9337
 *
9338
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9339
 *
9340
 * [ WFC: Unique Att Spec ]
9341
 * No attribute name may appear more than once in the same start-tag or
9342
 * empty-element tag.
9343
 *
9344
 * With namespace:
9345
 *
9346
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9347
 *
9348
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9349
 *
9350
 * Returns the element name parsed
9351
 */
9352
9353
static const xmlChar *
9354
xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9355
753k
                  const xmlChar **URI, int *tlen) {
9356
753k
    const xmlChar *localname;
9357
753k
    const xmlChar *prefix;
9358
753k
    const xmlChar *attname;
9359
753k
    const xmlChar *aprefix;
9360
753k
    const xmlChar *nsname;
9361
753k
    xmlChar *attvalue;
9362
753k
    const xmlChar **atts = ctxt->atts;
9363
753k
    int maxatts = ctxt->maxatts;
9364
753k
    int nratts, nbatts, nbdef, inputid;
9365
753k
    int i, j, nbNs, attval;
9366
753k
    unsigned long cur;
9367
753k
    int nsNr = ctxt->nsNr;
9368
9369
753k
    if (RAW != '<') return(NULL);
9370
753k
    NEXT1;
9371
9372
    /*
9373
     * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9374
     *       point since the attribute values may be stored as pointers to
9375
     *       the buffer and calling SHRINK would destroy them !
9376
     *       The Shrinking is only possible once the full set of attribute
9377
     *       callbacks have been done.
9378
     */
9379
753k
    SHRINK;
9380
753k
    cur = ctxt->input->cur - ctxt->input->base;
9381
753k
    inputid = ctxt->input->id;
9382
753k
    nbatts = 0;
9383
753k
    nratts = 0;
9384
753k
    nbdef = 0;
9385
753k
    nbNs = 0;
9386
753k
    attval = 0;
9387
    /* Forget any namespaces added during an earlier parse of this element. */
9388
753k
    ctxt->nsNr = nsNr;
9389
9390
753k
    localname = xmlParseQName(ctxt, &prefix);
9391
753k
    if (localname == NULL) {
9392
3.64k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9393
3.64k
           "StartTag: invalid element name\n");
9394
3.64k
        return(NULL);
9395
3.64k
    }
9396
750k
    *tlen = ctxt->input->cur - ctxt->input->base - cur;
9397
9398
    /*
9399
     * Now parse the attributes, it ends up with the ending
9400
     *
9401
     * (S Attribute)* S?
9402
     */
9403
750k
    SKIP_BLANKS;
9404
750k
    GROW;
9405
9406
852k
    while (((RAW != '>') &&
9407
852k
     ((RAW != '/') || (NXT(1) != '>')) &&
9408
852k
     (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9409
362k
  int len = -1, alloc = 0;
9410
9411
362k
  attname = xmlParseAttribute2(ctxt, prefix, localname,
9412
362k
                               &aprefix, &attvalue, &len, &alloc);
9413
362k
        if (attname == NULL) {
9414
5.05k
      xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9415
5.05k
           "xmlParseStartTag: problem parsing attributes\n");
9416
5.05k
      break;
9417
5.05k
  }
9418
357k
        if (attvalue == NULL)
9419
1.80k
            goto next_attr;
9420
355k
  if (len < 0) len = xmlStrlen(attvalue);
9421
9422
355k
        if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9423
818
            const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9424
818
            xmlURIPtr uri;
9425
9426
818
            if (URL == NULL) {
9427
0
                xmlErrMemory(ctxt, "dictionary allocation failure");
9428
0
                if ((attvalue != NULL) && (alloc != 0))
9429
0
                    xmlFree(attvalue);
9430
0
                localname = NULL;
9431
0
                goto done;
9432
0
            }
9433
818
            if (*URL != 0) {
9434
807
                uri = xmlParseURI((const char *) URL);
9435
807
                if (uri == NULL) {
9436
98
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9437
98
                             "xmlns: '%s' is not a valid URI\n",
9438
98
                                       URL, NULL, NULL);
9439
709
                } else {
9440
709
                    if (uri->scheme == NULL) {
9441
44
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9442
44
                                  "xmlns: URI %s is not absolute\n",
9443
44
                                  URL, NULL, NULL);
9444
44
                    }
9445
709
                    xmlFreeURI(uri);
9446
709
                }
9447
807
                if (URL == ctxt->str_xml_ns) {
9448
0
                    if (attname != ctxt->str_xml) {
9449
0
                        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9450
0
                     "xml namespace URI cannot be the default namespace\n",
9451
0
                                 NULL, NULL, NULL);
9452
0
                    }
9453
0
                    goto next_attr;
9454
0
                }
9455
807
                if ((len == 29) &&
9456
807
                    (xmlStrEqual(URL,
9457
18
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9458
0
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9459
0
                         "reuse of the xmlns namespace name is forbidden\n",
9460
0
                             NULL, NULL, NULL);
9461
0
                    goto next_attr;
9462
0
                }
9463
807
            }
9464
            /*
9465
             * check that it's not a defined namespace
9466
             */
9467
2.03k
            for (j = 1;j <= nbNs;j++)
9468
1.22k
                if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9469
10
                    break;
9470
818
            if (j <= nbNs)
9471
10
                xmlErrAttributeDup(ctxt, NULL, attname);
9472
808
            else
9473
808
                if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9474
9475
354k
        } else if (aprefix == ctxt->str_xmlns) {
9476
3.82k
            const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9477
3.82k
            xmlURIPtr uri;
9478
9479
3.82k
            if (attname == ctxt->str_xml) {
9480
14
                if (URL != ctxt->str_xml_ns) {
9481
14
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9482
14
                             "xml namespace prefix mapped to wrong URI\n",
9483
14
                             NULL, NULL, NULL);
9484
14
                }
9485
                /*
9486
                 * Do not keep a namespace definition node
9487
                 */
9488
14
                goto next_attr;
9489
14
            }
9490
3.81k
            if (URL == ctxt->str_xml_ns) {
9491
0
                if (attname != ctxt->str_xml) {
9492
0
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9493
0
                             "xml namespace URI mapped to wrong prefix\n",
9494
0
                             NULL, NULL, NULL);
9495
0
                }
9496
0
                goto next_attr;
9497
0
            }
9498
3.81k
            if (attname == ctxt->str_xmlns) {
9499
0
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9500
0
                         "redefinition of the xmlns prefix is forbidden\n",
9501
0
                         NULL, NULL, NULL);
9502
0
                goto next_attr;
9503
0
            }
9504
3.81k
            if ((len == 29) &&
9505
3.81k
                (xmlStrEqual(URL,
9506
124
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9507
0
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9508
0
                         "reuse of the xmlns namespace name is forbidden\n",
9509
0
                         NULL, NULL, NULL);
9510
0
                goto next_attr;
9511
0
            }
9512
3.81k
            if ((URL == NULL) || (URL[0] == 0)) {
9513
28
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9514
28
                         "xmlns:%s: Empty XML namespace is not allowed\n",
9515
28
                              attname, NULL, NULL);
9516
28
                goto next_attr;
9517
3.78k
            } else {
9518
3.78k
                uri = xmlParseURI((const char *) URL);
9519
3.78k
                if (uri == NULL) {
9520
443
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9521
443
                         "xmlns:%s: '%s' is not a valid URI\n",
9522
443
                                       attname, URL, NULL);
9523
3.34k
                } else {
9524
3.34k
                    if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9525
33
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9526
33
                                  "xmlns:%s: URI %s is not absolute\n",
9527
33
                                  attname, URL, NULL);
9528
33
                    }
9529
3.34k
                    xmlFreeURI(uri);
9530
3.34k
                }
9531
3.78k
            }
9532
9533
            /*
9534
             * check that it's not a defined namespace
9535
             */
9536
5.57k
            for (j = 1;j <= nbNs;j++)
9537
1.81k
                if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9538
24
                    break;
9539
3.78k
            if (j <= nbNs)
9540
24
                xmlErrAttributeDup(ctxt, aprefix, attname);
9541
3.76k
            else
9542
3.76k
                if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9543
9544
351k
        } else {
9545
            /*
9546
             * Add the pair to atts
9547
             */
9548
351k
            if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9549
6.83k
                if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9550
0
                    goto next_attr;
9551
0
                }
9552
6.83k
                maxatts = ctxt->maxatts;
9553
6.83k
                atts = ctxt->atts;
9554
6.83k
            }
9555
351k
            ctxt->attallocs[nratts++] = alloc;
9556
351k
            atts[nbatts++] = attname;
9557
351k
            atts[nbatts++] = aprefix;
9558
            /*
9559
             * The namespace URI field is used temporarily to point at the
9560
             * base of the current input buffer for non-alloced attributes.
9561
             * When the input buffer is reallocated, all the pointers become
9562
             * invalid, but they can be reconstructed later.
9563
             */
9564
351k
            if (alloc)
9565
6.69k
                atts[nbatts++] = NULL;
9566
344k
            else
9567
344k
                atts[nbatts++] = ctxt->input->base;
9568
351k
            atts[nbatts++] = attvalue;
9569
351k
            attvalue += len;
9570
351k
            atts[nbatts++] = attvalue;
9571
            /*
9572
             * tag if some deallocation is needed
9573
             */
9574
351k
            if (alloc != 0) attval = 1;
9575
351k
            attvalue = NULL; /* moved into atts */
9576
351k
        }
9577
9578
357k
next_attr:
9579
357k
        if ((attvalue != NULL) && (alloc != 0)) {
9580
608
            xmlFree(attvalue);
9581
608
            attvalue = NULL;
9582
608
        }
9583
9584
357k
  GROW
9585
357k
        if (ctxt->instate == XML_PARSER_EOF)
9586
0
            break;
9587
357k
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9588
251k
      break;
9589
106k
  if (SKIP_BLANKS == 0) {
9590
3.70k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9591
3.70k
         "attributes construct error\n");
9592
3.70k
      break;
9593
3.70k
  }
9594
102k
        GROW;
9595
102k
    }
9596
9597
750k
    if (ctxt->input->id != inputid) {
9598
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9599
0
                    "Unexpected change of input\n");
9600
0
        localname = NULL;
9601
0
        goto done;
9602
0
    }
9603
9604
    /* Reconstruct attribute value pointers. */
9605
1.10M
    for (i = 0, j = 0; j < nratts; i += 5, j++) {
9606
351k
        if (atts[i+2] != NULL) {
9607
            /*
9608
             * Arithmetic on dangling pointers is technically undefined
9609
             * behavior, but well...
9610
             */
9611
344k
            const xmlChar *old = atts[i+2];
9612
344k
            atts[i+2]  = NULL;    /* Reset repurposed namespace URI */
9613
344k
            atts[i+3] = ctxt->input->base + (atts[i+3] - old);  /* value */
9614
344k
            atts[i+4] = ctxt->input->base + (atts[i+4] - old);  /* valuend */
9615
344k
        }
9616
351k
    }
9617
9618
    /*
9619
     * The attributes defaulting
9620
     */
9621
750k
    if (ctxt->attsDefault != NULL) {
9622
523k
        xmlDefAttrsPtr defaults;
9623
9624
523k
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9625
523k
  if (defaults != NULL) {
9626
211k
      for (i = 0;i < defaults->nbAttrs;i++) {
9627
153k
          attname = defaults->values[5 * i];
9628
153k
    aprefix = defaults->values[5 * i + 1];
9629
9630
                /*
9631
     * special work for namespaces defaulted defs
9632
     */
9633
153k
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9634
        /*
9635
         * check that it's not a defined namespace
9636
         */
9637
54
        for (j = 1;j <= nbNs;j++)
9638
38
            if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9639
20
          break;
9640
36
              if (j <= nbNs) continue;
9641
9642
16
        nsname = xmlGetNamespace(ctxt, NULL);
9643
16
        if (nsname != defaults->values[5 * i + 2]) {
9644
16
      if (nsPush(ctxt, NULL,
9645
16
                 defaults->values[5 * i + 2]) > 0)
9646
16
          nbNs++;
9647
16
        }
9648
153k
    } else if (aprefix == ctxt->str_xmlns) {
9649
        /*
9650
         * check that it's not a defined namespace
9651
         */
9652
794
        for (j = 1;j <= nbNs;j++)
9653
725
            if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9654
712
          break;
9655
781
              if (j <= nbNs) continue;
9656
9657
69
        nsname = xmlGetNamespace(ctxt, attname);
9658
69
        if (nsname != defaults->values[5 * i + 2]) {
9659
59
      if (nsPush(ctxt, attname,
9660
59
                 defaults->values[5 * i + 2]) > 0)
9661
59
          nbNs++;
9662
59
        }
9663
152k
    } else {
9664
        /*
9665
         * check that it's not a defined attribute
9666
         */
9667
428k
        for (j = 0;j < nbatts;j+=5) {
9668
276k
      if ((attname == atts[j]) && (aprefix == atts[j+1]))
9669
91
          break;
9670
276k
        }
9671
152k
        if (j < nbatts) continue;
9672
9673
152k
        if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9674
113
      if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9675
0
                            localname = NULL;
9676
0
                            goto done;
9677
0
      }
9678
113
      maxatts = ctxt->maxatts;
9679
113
      atts = ctxt->atts;
9680
113
        }
9681
152k
        atts[nbatts++] = attname;
9682
152k
        atts[nbatts++] = aprefix;
9683
152k
        if (aprefix == NULL)
9684
99.3k
      atts[nbatts++] = NULL;
9685
52.8k
        else
9686
52.8k
            atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9687
152k
        atts[nbatts++] = defaults->values[5 * i + 2];
9688
152k
        atts[nbatts++] = defaults->values[5 * i + 3];
9689
152k
        if ((ctxt->standalone == 1) &&
9690
152k
            (defaults->values[5 * i + 4] != NULL)) {
9691
0
      xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9692
0
    "standalone: attribute %s on %s defaulted from external subset\n",
9693
0
                                   attname, localname);
9694
0
        }
9695
152k
        nbdef++;
9696
152k
    }
9697
153k
      }
9698
58.0k
  }
9699
523k
    }
9700
9701
    /*
9702
     * The attributes checkings
9703
     */
9704
1.25M
    for (i = 0; i < nbatts;i += 5) {
9705
        /*
9706
  * The default namespace does not apply to attribute names.
9707
  */
9708
503k
  if (atts[i + 1] != NULL) {
9709
84.4k
      nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9710
84.4k
      if (nsname == NULL) {
9711
3.09k
    xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9712
3.09k
        "Namespace prefix %s for %s on %s is not defined\n",
9713
3.09k
        atts[i + 1], atts[i], localname);
9714
3.09k
      }
9715
84.4k
      atts[i + 2] = nsname;
9716
84.4k
  } else
9717
418k
      nsname = NULL;
9718
  /*
9719
   * [ WFC: Unique Att Spec ]
9720
   * No attribute name may appear more than once in the same
9721
   * start-tag or empty-element tag.
9722
   * As extended by the Namespace in XML REC.
9723
   */
9724
892k
        for (j = 0; j < i;j += 5) {
9725
388k
      if (atts[i] == atts[j]) {
9726
60
          if (atts[i+1] == atts[j+1]) {
9727
40
        xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9728
40
        break;
9729
40
    }
9730
20
    if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9731
6
        xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9732
6
           "Namespaced Attribute %s in '%s' redefined\n",
9733
6
           atts[i], nsname, NULL);
9734
6
        break;
9735
6
    }
9736
20
      }
9737
388k
  }
9738
503k
    }
9739
9740
750k
    nsname = xmlGetNamespace(ctxt, prefix);
9741
750k
    if ((prefix != NULL) && (nsname == NULL)) {
9742
9.67k
  xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9743
9.67k
           "Namespace prefix %s on %s is not defined\n",
9744
9.67k
     prefix, localname, NULL);
9745
9.67k
    }
9746
750k
    *pref = prefix;
9747
750k
    *URI = nsname;
9748
9749
    /*
9750
     * SAX: Start of Element !
9751
     */
9752
750k
    if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9753
750k
  (!ctxt->disableSAX)) {
9754
601k
  if (nbNs > 0)
9755
2.36k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9756
2.36k
        nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9757
2.36k
        nbatts / 5, nbdef, atts);
9758
599k
  else
9759
599k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9760
599k
                    nsname, 0, NULL, nbatts / 5, nbdef, atts);
9761
601k
    }
9762
9763
750k
done:
9764
    /*
9765
     * Free up attribute allocated strings if needed
9766
     */
9767
750k
    if (attval != 0) {
9768
15.7k
  for (i = 3,j = 0; j < nratts;i += 5,j++)
9769
9.28k
      if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9770
6.69k
          xmlFree((xmlChar *) atts[i]);
9771
6.41k
    }
9772
9773
750k
    return(localname);
9774
750k
}
9775
9776
/**
9777
 * xmlParseEndTag2:
9778
 * @ctxt:  an XML parser context
9779
 * @line:  line of the start tag
9780
 * @nsNr:  number of namespaces on the start tag
9781
 *
9782
 * Parse an end tag. Always consumes '</'.
9783
 *
9784
 * [42] ETag ::= '</' Name S? '>'
9785
 *
9786
 * With namespace
9787
 *
9788
 * [NS 9] ETag ::= '</' QName S? '>'
9789
 */
9790
9791
static void
9792
638k
xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9793
638k
    const xmlChar *name;
9794
9795
638k
    GROW;
9796
638k
    if ((RAW != '<') || (NXT(1) != '/')) {
9797
0
  xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9798
0
  return;
9799
0
    }
9800
638k
    SKIP(2);
9801
9802
638k
    if (tag->prefix == NULL)
9803
616k
        name = xmlParseNameAndCompare(ctxt, ctxt->name);
9804
22.3k
    else
9805
22.3k
        name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9806
9807
    /*
9808
     * We should definitely be at the ending "S? '>'" part
9809
     */
9810
638k
    GROW;
9811
638k
    if (ctxt->instate == XML_PARSER_EOF)
9812
0
        return;
9813
638k
    SKIP_BLANKS;
9814
638k
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9815
1.29k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9816
1.29k
    } else
9817
637k
  NEXT1;
9818
9819
    /*
9820
     * [ WFC: Element Type Match ]
9821
     * The Name in an element's end-tag must match the element type in the
9822
     * start-tag.
9823
     *
9824
     */
9825
638k
    if (name != (xmlChar*)1) {
9826
6.35k
        if (name == NULL) name = BAD_CAST "unparsable";
9827
6.35k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9828
6.35k
         "Opening and ending tag mismatch: %s line %d and %s\n",
9829
6.35k
                    ctxt->name, tag->line, name);
9830
6.35k
    }
9831
9832
    /*
9833
     * SAX: End of Tag
9834
     */
9835
638k
    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9836
638k
  (!ctxt->disableSAX))
9837
494k
  ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9838
494k
                                tag->URI);
9839
9840
638k
    spacePop(ctxt);
9841
638k
    if (tag->nsNr != 0)
9842
984
  nsPop(ctxt, tag->nsNr);
9843
638k
}
9844
9845
/**
9846
 * xmlParseCDSect:
9847
 * @ctxt:  an XML parser context
9848
 *
9849
 * DEPRECATED: Internal function, don't use.
9850
 *
9851
 * Parse escaped pure raw content. Always consumes '<!['.
9852
 *
9853
 * [18] CDSect ::= CDStart CData CDEnd
9854
 *
9855
 * [19] CDStart ::= '<![CDATA['
9856
 *
9857
 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9858
 *
9859
 * [21] CDEnd ::= ']]>'
9860
 */
9861
void
9862
1.57k
xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9863
1.57k
    xmlChar *buf = NULL;
9864
1.57k
    int len = 0;
9865
1.57k
    int size = XML_PARSER_BUFFER_SIZE;
9866
1.57k
    int r, rl;
9867
1.57k
    int s, sl;
9868
1.57k
    int cur, l;
9869
1.57k
    int count = 0;
9870
1.57k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9871
158
                    XML_MAX_HUGE_LENGTH :
9872
1.57k
                    XML_MAX_TEXT_LENGTH;
9873
9874
1.57k
    if ((CUR != '<') || (NXT(1) != '!') || (NXT(2) != '['))
9875
0
        return;
9876
1.57k
    SKIP(3);
9877
9878
1.57k
    if (!CMP6(CUR_PTR, 'C', 'D', 'A', 'T', 'A', '['))
9879
0
        return;
9880
1.57k
    SKIP(6);
9881
9882
1.57k
    ctxt->instate = XML_PARSER_CDATA_SECTION;
9883
1.57k
    r = CUR_CHAR(rl);
9884
1.57k
    if (!IS_CHAR(r)) {
9885
11
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9886
11
        goto out;
9887
11
    }
9888
1.56k
    NEXTL(rl);
9889
1.56k
    s = CUR_CHAR(sl);
9890
1.56k
    if (!IS_CHAR(s)) {
9891
10
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9892
10
        goto out;
9893
10
    }
9894
1.55k
    NEXTL(sl);
9895
1.55k
    cur = CUR_CHAR(l);
9896
1.55k
    buf = (xmlChar *) xmlMallocAtomic(size);
9897
1.55k
    if (buf == NULL) {
9898
0
  xmlErrMemory(ctxt, NULL);
9899
0
        goto out;
9900
0
    }
9901
869k
    while (IS_CHAR(cur) &&
9902
869k
           ((r != ']') || (s != ']') || (cur != '>'))) {
9903
867k
  if (len + 5 >= size) {
9904
2.95k
      xmlChar *tmp;
9905
9906
2.95k
      tmp = (xmlChar *) xmlRealloc(buf, size * 2);
9907
2.95k
      if (tmp == NULL) {
9908
0
    xmlErrMemory(ctxt, NULL);
9909
0
                goto out;
9910
0
      }
9911
2.95k
      buf = tmp;
9912
2.95k
      size *= 2;
9913
2.95k
  }
9914
867k
  COPY_BUF(rl,buf,len,r);
9915
867k
  r = s;
9916
867k
  rl = sl;
9917
867k
  s = cur;
9918
867k
  sl = l;
9919
867k
  count++;
9920
867k
  if (count > 50) {
9921
16.2k
      SHRINK;
9922
16.2k
      GROW;
9923
16.2k
            if (ctxt->instate == XML_PARSER_EOF) {
9924
0
                goto out;
9925
0
            }
9926
16.2k
      count = 0;
9927
16.2k
  }
9928
867k
  NEXTL(l);
9929
867k
  cur = CUR_CHAR(l);
9930
867k
        if (len > maxLength) {
9931
0
            xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9932
0
                           "CData section too big found\n");
9933
0
            goto out;
9934
0
        }
9935
867k
    }
9936
1.55k
    buf[len] = 0;
9937
1.55k
    if (cur != '>') {
9938
327
  xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9939
327
                       "CData section not finished\n%.50s\n", buf);
9940
327
        goto out;
9941
327
    }
9942
1.22k
    NEXTL(l);
9943
9944
    /*
9945
     * OK the buffer is to be consumed as cdata.
9946
     */
9947
1.22k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9948
713
  if (ctxt->sax->cdataBlock != NULL)
9949
557
      ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9950
156
  else if (ctxt->sax->characters != NULL)
9951
156
      ctxt->sax->characters(ctxt->userData, buf, len);
9952
713
    }
9953
9954
1.57k
out:
9955
1.57k
    if (ctxt->instate != XML_PARSER_EOF)
9956
1.57k
        ctxt->instate = XML_PARSER_CONTENT;
9957
1.57k
    xmlFree(buf);
9958
1.57k
}
9959
9960
/**
9961
 * xmlParseContentInternal:
9962
 * @ctxt:  an XML parser context
9963
 *
9964
 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9965
 * unexpected EOF to the caller.
9966
 */
9967
9968
static void
9969
12.0k
xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9970
12.0k
    int nameNr = ctxt->nameNr;
9971
9972
12.0k
    GROW;
9973
2.41M
    while ((RAW != 0) &&
9974
2.41M
     (ctxt->instate != XML_PARSER_EOF)) {
9975
2.40M
  const xmlChar *cur = ctxt->input->cur;
9976
9977
  /*
9978
   * First case : a Processing Instruction.
9979
   */
9980
2.40M
  if ((*cur == '<') && (cur[1] == '?')) {
9981
4.54k
      xmlParsePI(ctxt);
9982
4.54k
  }
9983
9984
  /*
9985
   * Second case : a CDSection
9986
   */
9987
  /* 2.6.0 test was *cur not RAW */
9988
2.40M
  else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9989
1.57k
      xmlParseCDSect(ctxt);
9990
1.57k
  }
9991
9992
  /*
9993
   * Third case :  a comment
9994
   */
9995
2.39M
  else if ((*cur == '<') && (NXT(1) == '!') &&
9996
2.39M
     (NXT(2) == '-') && (NXT(3) == '-')) {
9997
28.4k
      xmlParseComment(ctxt);
9998
28.4k
      ctxt->instate = XML_PARSER_CONTENT;
9999
28.4k
  }
10000
10001
  /*
10002
   * Fourth case :  a sub-element.
10003
   */
10004
2.37M
  else if (*cur == '<') {
10005
1.18M
            if (NXT(1) == '/') {
10006
574k
                if (ctxt->nameNr <= nameNr)
10007
3.72k
                    break;
10008
571k
          xmlParseElementEnd(ctxt);
10009
614k
            } else {
10010
614k
          xmlParseElementStart(ctxt);
10011
614k
            }
10012
1.18M
  }
10013
10014
  /*
10015
   * Fifth case : a reference. If if has not been resolved,
10016
   *    parsing returns it's Name, create the node
10017
   */
10018
10019
1.18M
  else if (*cur == '&') {
10020
80.3k
      xmlParseReference(ctxt);
10021
80.3k
  }
10022
10023
  /*
10024
   * Last case, text. Note that References are handled directly.
10025
   */
10026
1.10M
  else {
10027
1.10M
      xmlParseCharData(ctxt, 0);
10028
1.10M
  }
10029
10030
2.40M
  GROW;
10031
2.40M
  SHRINK;
10032
2.40M
    }
10033
12.0k
}
10034
10035
/**
10036
 * xmlParseContent:
10037
 * @ctxt:  an XML parser context
10038
 *
10039
 * Parse a content sequence. Stops at EOF or '</'.
10040
 *
10041
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10042
 */
10043
10044
void
10045
5.67k
xmlParseContent(xmlParserCtxtPtr ctxt) {
10046
5.67k
    int nameNr = ctxt->nameNr;
10047
10048
5.67k
    xmlParseContentInternal(ctxt);
10049
10050
5.67k
    if ((ctxt->instate != XML_PARSER_EOF) && (ctxt->nameNr > nameNr)) {
10051
9
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
10052
9
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
10053
9
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10054
9
                "Premature end of data in tag %s line %d\n",
10055
9
    name, line, NULL);
10056
9
    }
10057
5.67k
}
10058
10059
/**
10060
 * xmlParseElement:
10061
 * @ctxt:  an XML parser context
10062
 *
10063
 * DEPRECATED: Internal function, don't use.
10064
 *
10065
 * parse an XML element
10066
 *
10067
 * [39] element ::= EmptyElemTag | STag content ETag
10068
 *
10069
 * [ WFC: Element Type Match ]
10070
 * The Name in an element's end-tag must match the element type in the
10071
 * start-tag.
10072
 *
10073
 */
10074
10075
void
10076
10.4k
xmlParseElement(xmlParserCtxtPtr ctxt) {
10077
10.4k
    if (xmlParseElementStart(ctxt) != 0)
10078
4.10k
        return;
10079
10080
6.37k
    xmlParseContentInternal(ctxt);
10081
6.37k
    if (ctxt->instate == XML_PARSER_EOF)
10082
23
  return;
10083
10084
6.35k
    if (CUR == 0) {
10085
2.65k
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
10086
2.65k
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
10087
2.65k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10088
2.65k
                "Premature end of data in tag %s line %d\n",
10089
2.65k
    name, line, NULL);
10090
2.65k
        return;
10091
2.65k
    }
10092
10093
3.69k
    xmlParseElementEnd(ctxt);
10094
3.69k
}
10095
10096
/**
10097
 * xmlParseElementStart:
10098
 * @ctxt:  an XML parser context
10099
 *
10100
 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
10101
 * opening tag was parsed, 1 if an empty element was parsed.
10102
 *
10103
 * Always consumes '<'.
10104
 */
10105
static int
10106
625k
xmlParseElementStart(xmlParserCtxtPtr ctxt) {
10107
625k
    const xmlChar *name;
10108
625k
    const xmlChar *prefix = NULL;
10109
625k
    const xmlChar *URI = NULL;
10110
625k
    xmlParserNodeInfo node_info;
10111
625k
    int line, tlen = 0;
10112
625k
    xmlNodePtr ret;
10113
625k
    int nsNr = ctxt->nsNr;
10114
10115
625k
    if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
10116
625k
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
10117
0
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
10118
0
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
10119
0
        xmlParserMaxDepth);
10120
0
  xmlHaltParser(ctxt);
10121
0
  return(-1);
10122
0
    }
10123
10124
    /* Capture start position */
10125
625k
    if (ctxt->record_info) {
10126
0
        node_info.begin_pos = ctxt->input->consumed +
10127
0
                          (CUR_PTR - ctxt->input->base);
10128
0
  node_info.begin_line = ctxt->input->line;
10129
0
    }
10130
10131
625k
    if (ctxt->spaceNr == 0)
10132
0
  spacePush(ctxt, -1);
10133
625k
    else if (*ctxt->space == -2)
10134
84.0k
  spacePush(ctxt, -1);
10135
541k
    else
10136
541k
  spacePush(ctxt, *ctxt->space);
10137
10138
625k
    line = ctxt->input->line;
10139
625k
#ifdef LIBXML_SAX1_ENABLED
10140
625k
    if (ctxt->sax2)
10141
348k
#endif /* LIBXML_SAX1_ENABLED */
10142
348k
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
10143
276k
#ifdef LIBXML_SAX1_ENABLED
10144
276k
    else
10145
276k
  name = xmlParseStartTag(ctxt);
10146
625k
#endif /* LIBXML_SAX1_ENABLED */
10147
625k
    if (ctxt->instate == XML_PARSER_EOF)
10148
49
  return(-1);
10149
625k
    if (name == NULL) {
10150
4.50k
  spacePop(ctxt);
10151
4.50k
        return(-1);
10152
4.50k
    }
10153
620k
    nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
10154
620k
    ret = ctxt->node;
10155
10156
620k
#ifdef LIBXML_VALID_ENABLED
10157
    /*
10158
     * [ VC: Root Element Type ]
10159
     * The Name in the document type declaration must match the element
10160
     * type of the root element.
10161
     */
10162
620k
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10163
620k
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
10164
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10165
620k
#endif /* LIBXML_VALID_ENABLED */
10166
10167
    /*
10168
     * Check for an Empty Element.
10169
     */
10170
620k
    if ((RAW == '/') && (NXT(1) == '>')) {
10171
34.9k
        SKIP(2);
10172
34.9k
  if (ctxt->sax2) {
10173
30.8k
      if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10174
30.8k
    (!ctxt->disableSAX))
10175
26.6k
    ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
10176
30.8k
#ifdef LIBXML_SAX1_ENABLED
10177
30.8k
  } else {
10178
4.09k
      if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10179
4.09k
    (!ctxt->disableSAX))
10180
3.47k
    ctxt->sax->endElement(ctxt->userData, name);
10181
4.09k
#endif /* LIBXML_SAX1_ENABLED */
10182
4.09k
  }
10183
34.9k
  namePop(ctxt);
10184
34.9k
  spacePop(ctxt);
10185
34.9k
  if (nsNr != ctxt->nsNr)
10186
121
      nsPop(ctxt, ctxt->nsNr - nsNr);
10187
34.9k
  if ( ret != NULL && ctxt->record_info ) {
10188
0
     node_info.end_pos = ctxt->input->consumed +
10189
0
            (CUR_PTR - ctxt->input->base);
10190
0
     node_info.end_line = ctxt->input->line;
10191
0
     node_info.node = ret;
10192
0
     xmlParserAddNodeInfo(ctxt, &node_info);
10193
0
  }
10194
34.9k
  return(1);
10195
34.9k
    }
10196
585k
    if (RAW == '>') {
10197
580k
        NEXT1;
10198
580k
    } else {
10199
5.22k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10200
5.22k
         "Couldn't find end of Start Tag %s line %d\n",
10201
5.22k
                    name, line, NULL);
10202
10203
  /*
10204
   * end of parsing of this node.
10205
   */
10206
5.22k
  nodePop(ctxt);
10207
5.22k
  namePop(ctxt);
10208
5.22k
  spacePop(ctxt);
10209
5.22k
  if (nsNr != ctxt->nsNr)
10210
230
      nsPop(ctxt, ctxt->nsNr - nsNr);
10211
10212
  /*
10213
   * Capture end position and add node
10214
   */
10215
5.22k
  if ( ret != NULL && ctxt->record_info ) {
10216
0
     node_info.end_pos = ctxt->input->consumed +
10217
0
            (CUR_PTR - ctxt->input->base);
10218
0
     node_info.end_line = ctxt->input->line;
10219
0
     node_info.node = ret;
10220
0
     xmlParserAddNodeInfo(ctxt, &node_info);
10221
0
  }
10222
5.22k
  return(-1);
10223
5.22k
    }
10224
10225
580k
    return(0);
10226
585k
}
10227
10228
/**
10229
 * xmlParseElementEnd:
10230
 * @ctxt:  an XML parser context
10231
 *
10232
 * Parse the end of an XML element. Always consumes '</'.
10233
 */
10234
static void
10235
574k
xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
10236
574k
    xmlParserNodeInfo node_info;
10237
574k
    xmlNodePtr ret = ctxt->node;
10238
10239
574k
    if (ctxt->nameNr <= 0) {
10240
0
        if ((RAW == '<') && (NXT(1) == '/'))
10241
0
            SKIP(2);
10242
0
        return;
10243
0
    }
10244
10245
    /*
10246
     * parse the end of tag: '</' should be here.
10247
     */
10248
574k
    if (ctxt->sax2) {
10249
308k
  xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
10250
308k
  namePop(ctxt);
10251
308k
    }
10252
266k
#ifdef LIBXML_SAX1_ENABLED
10253
266k
    else
10254
266k
  xmlParseEndTag1(ctxt, 0);
10255
574k
#endif /* LIBXML_SAX1_ENABLED */
10256
10257
    /*
10258
     * Capture end position and add node
10259
     */
10260
574k
    if ( ret != NULL && ctxt->record_info ) {
10261
0
       node_info.end_pos = ctxt->input->consumed +
10262
0
                          (CUR_PTR - ctxt->input->base);
10263
0
       node_info.end_line = ctxt->input->line;
10264
0
       node_info.node = ret;
10265
0
       xmlParserAddNodeInfo(ctxt, &node_info);
10266
0
    }
10267
574k
}
10268
10269
/**
10270
 * xmlParseVersionNum:
10271
 * @ctxt:  an XML parser context
10272
 *
10273
 * DEPRECATED: Internal function, don't use.
10274
 *
10275
 * parse the XML version value.
10276
 *
10277
 * [26] VersionNum ::= '1.' [0-9]+
10278
 *
10279
 * In practice allow [0-9].[0-9]+ at that level
10280
 *
10281
 * Returns the string giving the XML version number, or NULL
10282
 */
10283
xmlChar *
10284
21.9k
xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10285
21.9k
    xmlChar *buf = NULL;
10286
21.9k
    int len = 0;
10287
21.9k
    int size = 10;
10288
21.9k
    xmlChar cur;
10289
10290
21.9k
    buf = (xmlChar *) xmlMallocAtomic(size);
10291
21.9k
    if (buf == NULL) {
10292
0
  xmlErrMemory(ctxt, NULL);
10293
0
  return(NULL);
10294
0
    }
10295
21.9k
    cur = CUR;
10296
21.9k
    if (!((cur >= '0') && (cur <= '9'))) {
10297
240
  xmlFree(buf);
10298
240
  return(NULL);
10299
240
    }
10300
21.6k
    buf[len++] = cur;
10301
21.6k
    NEXT;
10302
21.6k
    cur=CUR;
10303
21.6k
    if (cur != '.') {
10304
241
  xmlFree(buf);
10305
241
  return(NULL);
10306
241
    }
10307
21.4k
    buf[len++] = cur;
10308
21.4k
    NEXT;
10309
21.4k
    cur=CUR;
10310
43.0k
    while ((cur >= '0') && (cur <= '9')) {
10311
21.6k
  if (len + 1 >= size) {
10312
24
      xmlChar *tmp;
10313
10314
24
      size *= 2;
10315
24
      tmp = (xmlChar *) xmlRealloc(buf, size);
10316
24
      if (tmp == NULL) {
10317
0
          xmlFree(buf);
10318
0
    xmlErrMemory(ctxt, NULL);
10319
0
    return(NULL);
10320
0
      }
10321
24
      buf = tmp;
10322
24
  }
10323
21.6k
  buf[len++] = cur;
10324
21.6k
  NEXT;
10325
21.6k
  cur=CUR;
10326
21.6k
    }
10327
21.4k
    buf[len] = 0;
10328
21.4k
    return(buf);
10329
21.4k
}
10330
10331
/**
10332
 * xmlParseVersionInfo:
10333
 * @ctxt:  an XML parser context
10334
 *
10335
 * DEPRECATED: Internal function, don't use.
10336
 *
10337
 * parse the XML version.
10338
 *
10339
 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10340
 *
10341
 * [25] Eq ::= S? '=' S?
10342
 *
10343
 * Returns the version string, e.g. "1.0"
10344
 */
10345
10346
xmlChar *
10347
25.0k
xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10348
25.0k
    xmlChar *version = NULL;
10349
10350
25.0k
    if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10351
22.4k
  SKIP(7);
10352
22.4k
  SKIP_BLANKS;
10353
22.4k
  if (RAW != '=') {
10354
349
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10355
349
      return(NULL);
10356
349
        }
10357
22.1k
  NEXT;
10358
22.1k
  SKIP_BLANKS;
10359
22.1k
  if (RAW == '"') {
10360
19.4k
      NEXT;
10361
19.4k
      version = xmlParseVersionNum(ctxt);
10362
19.4k
      if (RAW != '"') {
10363
844
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10364
844
      } else
10365
18.6k
          NEXT;
10366
19.4k
  } else if (RAW == '\''){
10367
2.42k
      NEXT;
10368
2.42k
      version = xmlParseVersionNum(ctxt);
10369
2.42k
      if (RAW != '\'') {
10370
69
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10371
69
      } else
10372
2.35k
          NEXT;
10373
2.42k
  } else {
10374
220
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10375
220
  }
10376
22.1k
    }
10377
24.6k
    return(version);
10378
25.0k
}
10379
10380
/**
10381
 * xmlParseEncName:
10382
 * @ctxt:  an XML parser context
10383
 *
10384
 * DEPRECATED: Internal function, don't use.
10385
 *
10386
 * parse the XML encoding name
10387
 *
10388
 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10389
 *
10390
 * Returns the encoding name value or NULL
10391
 */
10392
xmlChar *
10393
12.2k
xmlParseEncName(xmlParserCtxtPtr ctxt) {
10394
12.2k
    xmlChar *buf = NULL;
10395
12.2k
    int len = 0;
10396
12.2k
    int size = 10;
10397
12.2k
    xmlChar cur;
10398
10399
12.2k
    cur = CUR;
10400
12.2k
    if (((cur >= 'a') && (cur <= 'z')) ||
10401
12.2k
        ((cur >= 'A') && (cur <= 'Z'))) {
10402
12.2k
  buf = (xmlChar *) xmlMallocAtomic(size);
10403
12.2k
  if (buf == NULL) {
10404
0
      xmlErrMemory(ctxt, NULL);
10405
0
      return(NULL);
10406
0
  }
10407
10408
12.2k
  buf[len++] = cur;
10409
12.2k
  NEXT;
10410
12.2k
  cur = CUR;
10411
77.8k
  while (((cur >= 'a') && (cur <= 'z')) ||
10412
77.8k
         ((cur >= 'A') && (cur <= 'Z')) ||
10413
77.8k
         ((cur >= '0') && (cur <= '9')) ||
10414
77.8k
         (cur == '.') || (cur == '_') ||
10415
77.8k
         (cur == '-')) {
10416
65.6k
      if (len + 1 >= size) {
10417
3.17k
          xmlChar *tmp;
10418
10419
3.17k
    size *= 2;
10420
3.17k
    tmp = (xmlChar *) xmlRealloc(buf, size);
10421
3.17k
    if (tmp == NULL) {
10422
0
        xmlErrMemory(ctxt, NULL);
10423
0
        xmlFree(buf);
10424
0
        return(NULL);
10425
0
    }
10426
3.17k
    buf = tmp;
10427
3.17k
      }
10428
65.6k
      buf[len++] = cur;
10429
65.6k
      NEXT;
10430
65.6k
      cur = CUR;
10431
65.6k
      if (cur == 0) {
10432
21
          SHRINK;
10433
21
    GROW;
10434
21
    cur = CUR;
10435
21
      }
10436
65.6k
        }
10437
12.2k
  buf[len] = 0;
10438
12.2k
    } else {
10439
45
  xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10440
45
    }
10441
12.2k
    return(buf);
10442
12.2k
}
10443
10444
/**
10445
 * xmlParseEncodingDecl:
10446
 * @ctxt:  an XML parser context
10447
 *
10448
 * DEPRECATED: Internal function, don't use.
10449
 *
10450
 * parse the XML encoding declaration
10451
 *
10452
 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'")
10453
 *
10454
 * this setups the conversion filters.
10455
 *
10456
 * Returns the encoding value or NULL
10457
 */
10458
10459
const xmlChar *
10460
21.3k
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10461
21.3k
    xmlChar *encoding = NULL;
10462
10463
21.3k
    SKIP_BLANKS;
10464
21.3k
    if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10465
12.3k
  SKIP(8);
10466
12.3k
  SKIP_BLANKS;
10467
12.3k
  if (RAW != '=') {
10468
81
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10469
81
      return(NULL);
10470
81
        }
10471
12.3k
  NEXT;
10472
12.3k
  SKIP_BLANKS;
10473
12.3k
  if (RAW == '"') {
10474
10.1k
      NEXT;
10475
10.1k
      encoding = xmlParseEncName(ctxt);
10476
10.1k
      if (RAW != '"') {
10477
195
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10478
195
    xmlFree((xmlChar *) encoding);
10479
195
    return(NULL);
10480
195
      } else
10481
9.94k
          NEXT;
10482
10.1k
  } else if (RAW == '\''){
10483
2.14k
      NEXT;
10484
2.14k
      encoding = xmlParseEncName(ctxt);
10485
2.14k
      if (RAW != '\'') {
10486
27
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10487
27
    xmlFree((xmlChar *) encoding);
10488
27
    return(NULL);
10489
27
      } else
10490
2.11k
          NEXT;
10491
2.14k
  } else {
10492
36
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10493
36
  }
10494
10495
        /*
10496
         * Non standard parsing, allowing the user to ignore encoding
10497
         */
10498
12.0k
        if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10499
2.89k
      xmlFree((xmlChar *) encoding);
10500
2.89k
            return(NULL);
10501
2.89k
  }
10502
10503
  /*
10504
   * UTF-16 encoding switch has already taken place at this stage,
10505
   * more over the little-endian/big-endian selection is already done
10506
   */
10507
9.19k
        if ((encoding != NULL) &&
10508
9.19k
      ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10509
9.15k
       (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10510
      /*
10511
       * If no encoding was passed to the parser, that we are
10512
       * using UTF-16 and no decoder is present i.e. the
10513
       * document is apparently UTF-8 compatible, then raise an
10514
       * encoding mismatch fatal error
10515
       */
10516
3
      if ((ctxt->encoding == NULL) &&
10517
3
          (ctxt->input->buf != NULL) &&
10518
3
          (ctxt->input->buf->encoder == NULL)) {
10519
3
    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10520
3
      "Document labelled UTF-16 but has UTF-8 content\n");
10521
3
      }
10522
3
      if (ctxt->encoding != NULL)
10523
0
    xmlFree((xmlChar *) ctxt->encoding);
10524
3
      ctxt->encoding = encoding;
10525
3
  }
10526
  /*
10527
   * UTF-8 encoding is handled natively
10528
   */
10529
9.19k
        else if ((encoding != NULL) &&
10530
9.19k
      ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10531
9.15k
       (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10532
6.78k
      if (ctxt->encoding != NULL)
10533
0
    xmlFree((xmlChar *) ctxt->encoding);
10534
6.78k
      ctxt->encoding = encoding;
10535
6.78k
  }
10536
2.41k
  else if (encoding != NULL) {
10537
2.37k
      xmlCharEncodingHandlerPtr handler;
10538
10539
2.37k
      if (ctxt->input->encoding != NULL)
10540
0
    xmlFree((xmlChar *) ctxt->input->encoding);
10541
2.37k
      ctxt->input->encoding = encoding;
10542
10543
2.37k
            handler = xmlFindCharEncodingHandler((const char *) encoding);
10544
2.37k
      if (handler != NULL) {
10545
2.31k
    if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10546
        /* failed to convert */
10547
0
        ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10548
0
        return(NULL);
10549
0
    }
10550
2.31k
      } else {
10551
60
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10552
60
      "Unsupported encoding %s\n", encoding);
10553
60
    return(NULL);
10554
60
      }
10555
2.37k
  }
10556
9.19k
    }
10557
18.0k
    return(encoding);
10558
21.3k
}
10559
10560
/**
10561
 * xmlParseSDDecl:
10562
 * @ctxt:  an XML parser context
10563
 *
10564
 * DEPRECATED: Internal function, don't use.
10565
 *
10566
 * parse the XML standalone declaration
10567
 *
10568
 * [32] SDDecl ::= S 'standalone' Eq
10569
 *                 (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10570
 *
10571
 * [ VC: Standalone Document Declaration ]
10572
 * TODO The standalone document declaration must have the value "no"
10573
 * if any external markup declarations contain declarations of:
10574
 *  - attributes with default values, if elements to which these
10575
 *    attributes apply appear in the document without specifications
10576
 *    of values for these attributes, or
10577
 *  - entities (other than amp, lt, gt, apos, quot), if references
10578
 *    to those entities appear in the document, or
10579
 *  - attributes with values subject to normalization, where the
10580
 *    attribute appears in the document with a value which will change
10581
 *    as a result of normalization, or
10582
 *  - element types with element content, if white space occurs directly
10583
 *    within any instance of those types.
10584
 *
10585
 * Returns:
10586
 *   1 if standalone="yes"
10587
 *   0 if standalone="no"
10588
 *  -2 if standalone attribute is missing or invalid
10589
 *    (A standalone value of -2 means that the XML declaration was found,
10590
 *     but no value was specified for the standalone attribute).
10591
 */
10592
10593
int
10594
19.8k
xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10595
19.8k
    int standalone = -2;
10596
10597
19.8k
    SKIP_BLANKS;
10598
19.8k
    if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10599
5.13k
  SKIP(10);
10600
5.13k
        SKIP_BLANKS;
10601
5.13k
  if (RAW != '=') {
10602
48
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10603
48
      return(standalone);
10604
48
        }
10605
5.09k
  NEXT;
10606
5.09k
  SKIP_BLANKS;
10607
5.09k
        if (RAW == '\''){
10608
2.09k
      NEXT;
10609
2.09k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10610
2.07k
          standalone = 0;
10611
2.07k
                SKIP(2);
10612
2.07k
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10613
18
                 (NXT(2) == 's')) {
10614
9
          standalone = 1;
10615
9
    SKIP(3);
10616
9
            } else {
10617
9
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10618
9
      }
10619
2.09k
      if (RAW != '\'') {
10620
18
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10621
18
      } else
10622
2.07k
          NEXT;
10623
2.99k
  } else if (RAW == '"'){
10624
2.89k
      NEXT;
10625
2.89k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10626
630
          standalone = 0;
10627
630
    SKIP(2);
10628
2.26k
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10629
2.26k
                 (NXT(2) == 's')) {
10630
2.02k
          standalone = 1;
10631
2.02k
                SKIP(3);
10632
2.02k
            } else {
10633
234
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10634
234
      }
10635
2.89k
      if (RAW != '"') {
10636
276
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10637
276
      } else
10638
2.61k
          NEXT;
10639
2.89k
  } else {
10640
102
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10641
102
        }
10642
5.09k
    }
10643
19.7k
    return(standalone);
10644
19.8k
}
10645
10646
/**
10647
 * xmlParseXMLDecl:
10648
 * @ctxt:  an XML parser context
10649
 *
10650
 * DEPRECATED: Internal function, don't use.
10651
 *
10652
 * parse an XML declaration header
10653
 *
10654
 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10655
 */
10656
10657
void
10658
24.5k
xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10659
24.5k
    xmlChar *version;
10660
10661
    /*
10662
     * This value for standalone indicates that the document has an
10663
     * XML declaration but it does not have a standalone attribute.
10664
     * It will be overwritten later if a standalone attribute is found.
10665
     */
10666
24.5k
    ctxt->input->standalone = -2;
10667
10668
    /*
10669
     * We know that '<?xml' is here.
10670
     */
10671
24.5k
    SKIP(5);
10672
10673
24.5k
    if (!IS_BLANK_CH(RAW)) {
10674
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10675
0
                 "Blank needed after '<?xml'\n");
10676
0
    }
10677
24.5k
    SKIP_BLANKS;
10678
10679
    /*
10680
     * We must have the VersionInfo here.
10681
     */
10682
24.5k
    version = xmlParseVersionInfo(ctxt);
10683
24.5k
    if (version == NULL) {
10684
3.58k
  xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10685
21.0k
    } else {
10686
21.0k
  if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10687
      /*
10688
       * Changed here for XML-1.0 5th edition
10689
       */
10690
279
      if (ctxt->options & XML_PARSE_OLD10) {
10691
63
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10692
63
                "Unsupported version '%s'\n",
10693
63
                version);
10694
216
      } else {
10695
216
          if ((version[0] == '1') && ((version[1] == '.'))) {
10696
195
        xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10697
195
                      "Unsupported version '%s'\n",
10698
195
          version, NULL);
10699
195
    } else {
10700
21
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10701
21
              "Unsupported version '%s'\n",
10702
21
              version);
10703
21
    }
10704
216
      }
10705
279
  }
10706
21.0k
  if (ctxt->version != NULL)
10707
0
      xmlFree((void *) ctxt->version);
10708
21.0k
  ctxt->version = version;
10709
21.0k
    }
10710
10711
    /*
10712
     * We may have the encoding declaration
10713
     */
10714
24.5k
    if (!IS_BLANK_CH(RAW)) {
10715
7.90k
        if ((RAW == '?') && (NXT(1) == '>')) {
10716
3.67k
      SKIP(2);
10717
3.67k
      return;
10718
3.67k
  }
10719
4.22k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10720
4.22k
    }
10721
20.9k
    xmlParseEncodingDecl(ctxt);
10722
20.9k
    if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10723
20.9k
         (ctxt->instate == XML_PARSER_EOF)) {
10724
  /*
10725
   * The XML REC instructs us to stop parsing right here
10726
   */
10727
57
        return;
10728
57
    }
10729
10730
    /*
10731
     * We may have the standalone status.
10732
     */
10733
20.8k
    if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10734
1.05k
        if ((RAW == '?') && (NXT(1) == '>')) {
10735
1.02k
      SKIP(2);
10736
1.02k
      return;
10737
1.02k
  }
10738
21
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10739
21
    }
10740
10741
    /*
10742
     * We can grow the input buffer freely at that point
10743
     */
10744
19.8k
    GROW;
10745
10746
19.8k
    SKIP_BLANKS;
10747
19.8k
    ctxt->input->standalone = xmlParseSDDecl(ctxt);
10748
10749
19.8k
    SKIP_BLANKS;
10750
19.8k
    if ((RAW == '?') && (NXT(1) == '>')) {
10751
13.4k
        SKIP(2);
10752
13.4k
    } else if (RAW == '>') {
10753
        /* Deprecated old WD ... */
10754
48
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10755
48
  NEXT;
10756
6.31k
    } else {
10757
6.31k
        int c;
10758
10759
6.31k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10760
207k
        while ((c = CUR) != 0) {
10761
207k
            NEXT;
10762
207k
            if (c == '>')
10763
5.73k
                break;
10764
207k
        }
10765
6.31k
    }
10766
19.8k
}
10767
10768
/**
10769
 * xmlParseMisc:
10770
 * @ctxt:  an XML parser context
10771
 *
10772
 * DEPRECATED: Internal function, don't use.
10773
 *
10774
 * parse an XML Misc* optional field.
10775
 *
10776
 * [27] Misc ::= Comment | PI |  S
10777
 */
10778
10779
void
10780
30.4k
xmlParseMisc(xmlParserCtxtPtr ctxt) {
10781
38.5k
    while (ctxt->instate != XML_PARSER_EOF) {
10782
38.5k
        SKIP_BLANKS;
10783
38.5k
        GROW;
10784
38.5k
        if ((RAW == '<') && (NXT(1) == '?')) {
10785
5.79k
      xmlParsePI(ctxt);
10786
32.7k
        } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10787
2.31k
      xmlParseComment(ctxt);
10788
30.4k
        } else {
10789
30.4k
            break;
10790
30.4k
        }
10791
38.5k
    }
10792
30.4k
}
10793
10794
/**
10795
 * xmlParseDocument:
10796
 * @ctxt:  an XML parser context
10797
 *
10798
 * parse an XML document (and build a tree if using the standard SAX
10799
 * interface).
10800
 *
10801
 * [1] document ::= prolog element Misc*
10802
 *
10803
 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10804
 *
10805
 * Returns 0, -1 in case of error. the parser context is augmented
10806
 *                as a result of the parsing.
10807
 */
10808
10809
int
10810
14.6k
xmlParseDocument(xmlParserCtxtPtr ctxt) {
10811
14.6k
    xmlChar start[4];
10812
14.6k
    xmlCharEncoding enc;
10813
10814
14.6k
    xmlInitParser();
10815
10816
14.6k
    if ((ctxt == NULL) || (ctxt->input == NULL))
10817
0
        return(-1);
10818
10819
14.6k
    GROW;
10820
10821
    /*
10822
     * SAX: detecting the level.
10823
     */
10824
14.6k
    xmlDetectSAX2(ctxt);
10825
10826
    /*
10827
     * SAX: beginning of the document processing.
10828
     */
10829
14.6k
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10830
14.6k
        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10831
14.6k
    if (ctxt->instate == XML_PARSER_EOF)
10832
0
  return(-1);
10833
10834
14.6k
    if ((ctxt->encoding == NULL) &&
10835
14.6k
        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10836
  /*
10837
   * Get the 4 first bytes and decode the charset
10838
   * if enc != XML_CHAR_ENCODING_NONE
10839
   * plug some encoding conversion routines.
10840
   */
10841
14.4k
  start[0] = RAW;
10842
14.4k
  start[1] = NXT(1);
10843
14.4k
  start[2] = NXT(2);
10844
14.4k
  start[3] = NXT(3);
10845
14.4k
  enc = xmlDetectCharEncoding(&start[0], 4);
10846
14.4k
  if (enc != XML_CHAR_ENCODING_NONE) {
10847
8.92k
      xmlSwitchEncoding(ctxt, enc);
10848
8.92k
  }
10849
14.4k
    }
10850
10851
10852
14.6k
    if (CUR == 0) {
10853
73
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10854
73
  return(-1);
10855
73
    }
10856
10857
    /*
10858
     * Check for the XMLDecl in the Prolog.
10859
     * do not GROW here to avoid the detected encoder to decode more
10860
     * than just the first line, unless the amount of data is really
10861
     * too small to hold "<?xml version="1.0" encoding="foo"
10862
     */
10863
14.6k
    if ((ctxt->input->end - ctxt->input->cur) < 35) {
10864
1.19k
       GROW;
10865
1.19k
    }
10866
14.6k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10867
10868
  /*
10869
   * Note that we will switch encoding on the fly.
10870
   */
10871
8.21k
  xmlParseXMLDecl(ctxt);
10872
8.21k
  if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10873
8.21k
      (ctxt->instate == XML_PARSER_EOF)) {
10874
      /*
10875
       * The XML REC instructs us to stop parsing right here
10876
       */
10877
19
      return(-1);
10878
19
  }
10879
8.19k
  ctxt->standalone = ctxt->input->standalone;
10880
8.19k
  SKIP_BLANKS;
10881
8.19k
    } else {
10882
6.40k
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10883
6.40k
    }
10884
14.6k
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10885
13.1k
        ctxt->sax->startDocument(ctxt->userData);
10886
14.6k
    if (ctxt->instate == XML_PARSER_EOF)
10887
0
  return(-1);
10888
14.6k
    if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10889
14.6k
        (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10890
0
  ctxt->myDoc->compression = ctxt->input->buf->compressed;
10891
0
    }
10892
10893
    /*
10894
     * The Misc part of the Prolog
10895
     */
10896
14.6k
    xmlParseMisc(ctxt);
10897
10898
    /*
10899
     * Then possibly doc type declaration(s) and more Misc
10900
     * (doctypedecl Misc*)?
10901
     */
10902
14.6k
    GROW;
10903
14.6k
    if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10904
10905
7.76k
  ctxt->inSubset = 1;
10906
7.76k
  xmlParseDocTypeDecl(ctxt);
10907
7.76k
  if (RAW == '[') {
10908
6.59k
      ctxt->instate = XML_PARSER_DTD;
10909
6.59k
      xmlParseInternalSubset(ctxt);
10910
6.59k
      if (ctxt->instate == XML_PARSER_EOF)
10911
1.80k
    return(-1);
10912
6.59k
  }
10913
10914
  /*
10915
   * Create and update the external subset.
10916
   */
10917
5.96k
  ctxt->inSubset = 2;
10918
5.96k
  if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10919
5.96k
      (!ctxt->disableSAX))
10920
4.92k
      ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10921
4.92k
                                ctxt->extSubSystem, ctxt->extSubURI);
10922
5.96k
  if (ctxt->instate == XML_PARSER_EOF)
10923
579
      return(-1);
10924
5.38k
  ctxt->inSubset = 0;
10925
10926
5.38k
        xmlCleanSpecialAttr(ctxt);
10927
10928
5.38k
  ctxt->instate = XML_PARSER_PROLOG;
10929
5.38k
  xmlParseMisc(ctxt);
10930
5.38k
    }
10931
10932
    /*
10933
     * Time to start parsing the tree itself
10934
     */
10935
12.2k
    GROW;
10936
12.2k
    if (RAW != '<') {
10937
1.74k
  xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10938
1.74k
           "Start tag expected, '<' not found\n");
10939
10.4k
    } else {
10940
10.4k
  ctxt->instate = XML_PARSER_CONTENT;
10941
10.4k
  xmlParseElement(ctxt);
10942
10.4k
  ctxt->instate = XML_PARSER_EPILOG;
10943
10944
10945
  /*
10946
   * The Misc part at the end
10947
   */
10948
10.4k
  xmlParseMisc(ctxt);
10949
10950
10.4k
  if (RAW != 0) {
10951
3.47k
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10952
3.47k
  }
10953
10.4k
  ctxt->instate = XML_PARSER_EOF;
10954
10.4k
    }
10955
10956
    /*
10957
     * SAX: end of the document processing.
10958
     */
10959
12.2k
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10960
12.2k
        ctxt->sax->endDocument(ctxt->userData);
10961
10962
    /*
10963
     * Remove locally kept entity definitions if the tree was not built
10964
     */
10965
12.2k
    if ((ctxt->myDoc != NULL) &&
10966
12.2k
  (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10967
122
  xmlFreeDoc(ctxt->myDoc);
10968
122
  ctxt->myDoc = NULL;
10969
122
    }
10970
10971
12.2k
    if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10972
2.00k
        ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10973
2.00k
  if (ctxt->valid)
10974
1.41k
      ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10975
2.00k
  if (ctxt->nsWellFormed)
10976
1.86k
      ctxt->myDoc->properties |= XML_DOC_NSVALID;
10977
2.00k
  if (ctxt->options & XML_PARSE_OLD10)
10978
129
      ctxt->myDoc->properties |= XML_DOC_OLD10;
10979
2.00k
    }
10980
12.2k
    if (! ctxt->wellFormed) {
10981
10.2k
  ctxt->valid = 0;
10982
10.2k
  return(-1);
10983
10.2k
    }
10984
2.00k
    return(0);
10985
12.2k
}
10986
10987
/**
10988
 * xmlParseExtParsedEnt:
10989
 * @ctxt:  an XML parser context
10990
 *
10991
 * parse a general parsed entity
10992
 * An external general parsed entity is well-formed if it matches the
10993
 * production labeled extParsedEnt.
10994
 *
10995
 * [78] extParsedEnt ::= TextDecl? content
10996
 *
10997
 * Returns 0, -1 in case of error. the parser context is augmented
10998
 *                as a result of the parsing.
10999
 */
11000
11001
int
11002
0
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
11003
0
    xmlChar start[4];
11004
0
    xmlCharEncoding enc;
11005
11006
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
11007
0
        return(-1);
11008
11009
0
    xmlDetectSAX2(ctxt);
11010
11011
0
    GROW;
11012
11013
    /*
11014
     * SAX: beginning of the document processing.
11015
     */
11016
0
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11017
0
        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
11018
11019
    /*
11020
     * Get the 4 first bytes and decode the charset
11021
     * if enc != XML_CHAR_ENCODING_NONE
11022
     * plug some encoding conversion routines.
11023
     */
11024
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11025
0
  start[0] = RAW;
11026
0
  start[1] = NXT(1);
11027
0
  start[2] = NXT(2);
11028
0
  start[3] = NXT(3);
11029
0
  enc = xmlDetectCharEncoding(start, 4);
11030
0
  if (enc != XML_CHAR_ENCODING_NONE) {
11031
0
      xmlSwitchEncoding(ctxt, enc);
11032
0
  }
11033
0
    }
11034
11035
11036
0
    if (CUR == 0) {
11037
0
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11038
0
    }
11039
11040
    /*
11041
     * Check for the XMLDecl in the Prolog.
11042
     */
11043
0
    GROW;
11044
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
11045
11046
  /*
11047
   * Note that we will switch encoding on the fly.
11048
   */
11049
0
  xmlParseXMLDecl(ctxt);
11050
0
  if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11051
      /*
11052
       * The XML REC instructs us to stop parsing right here
11053
       */
11054
0
      return(-1);
11055
0
  }
11056
0
  SKIP_BLANKS;
11057
0
    } else {
11058
0
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11059
0
    }
11060
0
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
11061
0
        ctxt->sax->startDocument(ctxt->userData);
11062
0
    if (ctxt->instate == XML_PARSER_EOF)
11063
0
  return(-1);
11064
11065
    /*
11066
     * Doing validity checking on chunk doesn't make sense
11067
     */
11068
0
    ctxt->instate = XML_PARSER_CONTENT;
11069
0
    ctxt->validate = 0;
11070
0
    ctxt->loadsubset = 0;
11071
0
    ctxt->depth = 0;
11072
11073
0
    xmlParseContent(ctxt);
11074
0
    if (ctxt->instate == XML_PARSER_EOF)
11075
0
  return(-1);
11076
11077
0
    if ((RAW == '<') && (NXT(1) == '/')) {
11078
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11079
0
    } else if (RAW != 0) {
11080
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11081
0
    }
11082
11083
    /*
11084
     * SAX: end of the document processing.
11085
     */
11086
0
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11087
0
        ctxt->sax->endDocument(ctxt->userData);
11088
11089
0
    if (! ctxt->wellFormed) return(-1);
11090
0
    return(0);
11091
0
}
11092
11093
#ifdef LIBXML_PUSH_ENABLED
11094
/************************************************************************
11095
 *                  *
11096
 *    Progressive parsing interfaces        *
11097
 *                  *
11098
 ************************************************************************/
11099
11100
/**
11101
 * xmlParseLookupChar:
11102
 * @ctxt:  an XML parser context
11103
 * @c:  character
11104
 *
11105
 * Check whether the input buffer contains a character.
11106
 */
11107
static int
11108
795k
xmlParseLookupChar(xmlParserCtxtPtr ctxt, int c) {
11109
795k
    const xmlChar *cur;
11110
11111
795k
    if (ctxt->checkIndex == 0) {
11112
778k
        cur = ctxt->input->cur + 1;
11113
778k
    } else {
11114
17.1k
        cur = ctxt->input->cur + ctxt->checkIndex;
11115
17.1k
    }
11116
11117
795k
    if (memchr(cur, c, ctxt->input->end - cur) == NULL) {
11118
18.0k
        ctxt->checkIndex = ctxt->input->end - ctxt->input->cur;
11119
18.0k
        return(0);
11120
777k
    } else {
11121
777k
        ctxt->checkIndex = 0;
11122
777k
        return(1);
11123
777k
    }
11124
795k
}
11125
11126
/**
11127
 * xmlParseLookupString:
11128
 * @ctxt:  an XML parser context
11129
 * @startDelta: delta to apply at the start
11130
 * @str:  string
11131
 * @strLen:  length of string
11132
 *
11133
 * Check whether the input buffer contains a string.
11134
 */
11135
static const xmlChar *
11136
xmlParseLookupString(xmlParserCtxtPtr ctxt, size_t startDelta,
11137
112k
                     const char *str, size_t strLen) {
11138
112k
    const xmlChar *cur, *term;
11139
11140
112k
    if (ctxt->checkIndex == 0) {
11141
79.2k
        cur = ctxt->input->cur + startDelta;
11142
79.2k
    } else {
11143
33.2k
        cur = ctxt->input->cur + ctxt->checkIndex;
11144
33.2k
    }
11145
11146
112k
    term = BAD_CAST strstr((const char *) cur, str);
11147
112k
    if (term == NULL) {
11148
39.9k
        const xmlChar *end = ctxt->input->end;
11149
11150
        /* Rescan (strLen - 1) characters. */
11151
39.9k
        if ((size_t) (end - cur) < strLen)
11152
1.00k
            end = cur;
11153
38.9k
        else
11154
38.9k
            end -= strLen - 1;
11155
39.9k
        ctxt->checkIndex = end - ctxt->input->cur;
11156
72.5k
    } else {
11157
72.5k
        ctxt->checkIndex = 0;
11158
72.5k
    }
11159
11160
112k
    return(term);
11161
112k
}
11162
11163
/**
11164
 * xmlParseLookupCharData:
11165
 * @ctxt:  an XML parser context
11166
 *
11167
 * Check whether the input buffer contains terminated char data.
11168
 */
11169
static int
11170
1.31M
xmlParseLookupCharData(xmlParserCtxtPtr ctxt) {
11171
1.31M
    const xmlChar *cur = ctxt->input->cur + ctxt->checkIndex;
11172
1.31M
    const xmlChar *end = ctxt->input->end;
11173
11174
31.4M
    while (cur < end) {
11175
31.2M
        if ((*cur == '<') || (*cur == '&')) {
11176
1.10M
            ctxt->checkIndex = 0;
11177
1.10M
            return(1);
11178
1.10M
        }
11179
30.1M
        cur++;
11180
30.1M
    }
11181
11182
205k
    ctxt->checkIndex = cur - ctxt->input->cur;
11183
205k
    return(0);
11184
1.31M
}
11185
11186
/**
11187
 * xmlParseLookupGt:
11188
 * @ctxt:  an XML parser context
11189
 *
11190
 * Check whether there's enough data in the input buffer to finish parsing
11191
 * a start tag. This has to take quotes into account.
11192
 */
11193
static int
11194
891k
xmlParseLookupGt(xmlParserCtxtPtr ctxt) {
11195
891k
    const xmlChar *cur;
11196
891k
    const xmlChar *end = ctxt->input->end;
11197
891k
    int state = ctxt->endCheckState;
11198
11199
891k
    if (ctxt->checkIndex == 0)
11200
793k
        cur = ctxt->input->cur + 1;
11201
98.6k
    else
11202
98.6k
        cur = ctxt->input->cur + ctxt->checkIndex;
11203
11204
19.3M
    while (cur < end) {
11205
19.2M
        if (state) {
11206
9.16M
            if (*cur == state)
11207
362k
                state = 0;
11208
10.1M
        } else if (*cur == '\'' || *cur == '"') {
11209
365k
            state = *cur;
11210
9.74M
        } else if (*cur == '>') {
11211
788k
            ctxt->checkIndex = 0;
11212
788k
            ctxt->endCheckState = 0;
11213
788k
            return(1);
11214
788k
        }
11215
18.4M
        cur++;
11216
18.4M
    }
11217
11218
103k
    ctxt->checkIndex = cur - ctxt->input->cur;
11219
103k
    ctxt->endCheckState = state;
11220
103k
    return(0);
11221
891k
}
11222
11223
/**
11224
 * xmlParseLookupInternalSubset:
11225
 * @ctxt:  an XML parser context
11226
 *
11227
 * Check whether there's enough data in the input buffer to finish parsing
11228
 * the internal subset.
11229
 */
11230
static int
11231
40.8k
xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt) {
11232
    /*
11233
     * Sorry, but progressive parsing of the internal subset is not
11234
     * supported. We first check that the full content of the internal
11235
     * subset is available and parsing is launched only at that point.
11236
     * Internal subset ends with "']' S? '>'" in an unescaped section and
11237
     * not in a ']]>' sequence which are conditional sections.
11238
     */
11239
40.8k
    const xmlChar *cur, *start;
11240
40.8k
    const xmlChar *end = ctxt->input->end;
11241
40.8k
    int state = ctxt->endCheckState;
11242
11243
40.8k
    if (ctxt->checkIndex == 0) {
11244
11.3k
        cur = ctxt->input->cur + 1;
11245
29.5k
    } else {
11246
29.5k
        cur = ctxt->input->cur + ctxt->checkIndex;
11247
29.5k
    }
11248
40.8k
    start = cur;
11249
11250
7.04M
    while (cur < end) {
11251
7.01M
        if (state == '-') {
11252
1.01M
            if ((*cur == '-') &&
11253
1.01M
                (cur[1] == '-') &&
11254
1.01M
                (cur[2] == '>')) {
11255
20.3k
                state = 0;
11256
20.3k
                cur += 3;
11257
20.3k
                start = cur;
11258
20.3k
                continue;
11259
20.3k
            }
11260
1.01M
        }
11261
6.00M
        else if (state == ']') {
11262
10.3k
            if (*cur == '>') {
11263
9.88k
                ctxt->checkIndex = 0;
11264
9.88k
                ctxt->endCheckState = 0;
11265
9.88k
                return(1);
11266
9.88k
            }
11267
457
            if (IS_BLANK_CH(*cur)) {
11268
113
                state = ' ';
11269
344
            } else if (*cur != ']') {
11270
147
                state = 0;
11271
147
                start = cur;
11272
147
                continue;
11273
147
            }
11274
457
        }
11275
5.99M
        else if (state == ' ') {
11276
372
            if (*cur == '>') {
11277
18
                ctxt->checkIndex = 0;
11278
18
                ctxt->endCheckState = 0;
11279
18
                return(1);
11280
18
            }
11281
354
            if (!IS_BLANK_CH(*cur)) {
11282
94
                state = 0;
11283
94
                start = cur;
11284
94
                continue;
11285
94
            }
11286
354
        }
11287
5.99M
        else if (state != 0) {
11288
2.34M
            if (*cur == state) {
11289
65.1k
                state = 0;
11290
65.1k
                start = cur + 1;
11291
65.1k
            }
11292
2.34M
        }
11293
3.64M
        else if (*cur == '<') {
11294
121k
            if ((cur[1] == '!') &&
11295
121k
                (cur[2] == '-') &&
11296
121k
                (cur[3] == '-')) {
11297
20.4k
                state = '-';
11298
20.4k
                cur += 4;
11299
                /* Don't treat <!--> as comment */
11300
20.4k
                start = cur;
11301
20.4k
                continue;
11302
20.4k
            }
11303
121k
        }
11304
3.52M
        else if ((*cur == '"') || (*cur == '\'') || (*cur == ']')) {
11305
75.8k
            state = *cur;
11306
75.8k
        }
11307
11308
6.96M
        cur++;
11309
6.96M
    }
11310
11311
    /*
11312
     * Rescan the three last characters to detect "<!--" and "-->"
11313
     * split across chunks.
11314
     */
11315
30.9k
    if ((state == 0) || (state == '-')) {
11316
20.0k
        if (cur - start < 3)
11317
2.38k
            cur = start;
11318
17.6k
        else
11319
17.6k
            cur -= 3;
11320
20.0k
    }
11321
30.9k
    ctxt->checkIndex = cur - ctxt->input->cur;
11322
30.9k
    ctxt->endCheckState = state;
11323
30.9k
    return(0);
11324
40.8k
}
11325
11326
/**
11327
 * xmlCheckCdataPush:
11328
 * @cur: pointer to the block of characters
11329
 * @len: length of the block in bytes
11330
 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
11331
 *
11332
 * Check that the block of characters is okay as SCdata content [20]
11333
 *
11334
 * Returns the number of bytes to pass if okay, a negative index where an
11335
 *         UTF-8 error occurred otherwise
11336
 */
11337
static int
11338
6.96k
xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
11339
6.96k
    int ix;
11340
6.96k
    unsigned char c;
11341
6.96k
    int codepoint;
11342
11343
6.96k
    if ((utf == NULL) || (len <= 0))
11344
14
        return(0);
11345
11346
691k
    for (ix = 0; ix < len;) {      /* string is 0-terminated */
11347
687k
        c = utf[ix];
11348
687k
        if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11349
493k
      if (c >= 0x20)
11350
479k
    ix++;
11351
13.7k
      else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11352
13.2k
          ix++;
11353
534
      else
11354
534
          return(-ix);
11355
493k
  } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11356
7.37k
      if (ix + 2 > len) return(complete ? -ix : ix);
11357
7.34k
      if ((utf[ix+1] & 0xc0 ) != 0x80)
11358
214
          return(-ix);
11359
7.13k
      codepoint = (utf[ix] & 0x1f) << 6;
11360
7.13k
      codepoint |= utf[ix+1] & 0x3f;
11361
7.13k
      if (!xmlIsCharQ(codepoint))
11362
4
          return(-ix);
11363
7.12k
      ix += 2;
11364
186k
  } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11365
6.09k
      if (ix + 3 > len) return(complete ? -ix : ix);
11366
6.05k
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11367
6.05k
          ((utf[ix+2] & 0xc0) != 0x80))
11368
103
        return(-ix);
11369
5.95k
      codepoint = (utf[ix] & 0xf) << 12;
11370
5.95k
      codepoint |= (utf[ix+1] & 0x3f) << 6;
11371
5.95k
      codepoint |= utf[ix+2] & 0x3f;
11372
5.95k
      if (!xmlIsCharQ(codepoint))
11373
0
          return(-ix);
11374
5.95k
      ix += 3;
11375
180k
  } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11376
180k
      if (ix + 4 > len) return(complete ? -ix : ix);
11377
179k
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11378
179k
          ((utf[ix+2] & 0xc0) != 0x80) ||
11379
179k
    ((utf[ix+3] & 0xc0) != 0x80))
11380
822
        return(-ix);
11381
179k
      codepoint = (utf[ix] & 0x7) << 18;
11382
179k
      codepoint |= (utf[ix+1] & 0x3f) << 12;
11383
179k
      codepoint |= (utf[ix+2] & 0x3f) << 6;
11384
179k
      codepoint |= utf[ix+3] & 0x3f;
11385
179k
      if (!xmlIsCharQ(codepoint))
11386
105
          return(-ix);
11387
178k
      ix += 4;
11388
178k
  } else       /* unknown encoding */
11389
471
      return(-ix);
11390
687k
      }
11391
4.19k
      return(ix);
11392
6.95k
}
11393
11394
/**
11395
 * xmlParseTryOrFinish:
11396
 * @ctxt:  an XML parser context
11397
 * @terminate:  last chunk indicator
11398
 *
11399
 * Try to progress on parsing
11400
 *
11401
 * Returns zero if no parsing was possible
11402
 */
11403
static int
11404
467k
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11405
467k
    int ret = 0;
11406
467k
    int avail, tlen;
11407
467k
    xmlChar cur, next;
11408
11409
467k
    if (ctxt->input == NULL)
11410
0
        return(0);
11411
11412
#ifdef DEBUG_PUSH
11413
    switch (ctxt->instate) {
11414
  case XML_PARSER_EOF:
11415
      xmlGenericError(xmlGenericErrorContext,
11416
        "PP: try EOF\n"); break;
11417
  case XML_PARSER_START:
11418
      xmlGenericError(xmlGenericErrorContext,
11419
        "PP: try START\n"); break;
11420
  case XML_PARSER_MISC:
11421
      xmlGenericError(xmlGenericErrorContext,
11422
        "PP: try MISC\n");break;
11423
  case XML_PARSER_COMMENT:
11424
      xmlGenericError(xmlGenericErrorContext,
11425
        "PP: try COMMENT\n");break;
11426
  case XML_PARSER_PROLOG:
11427
      xmlGenericError(xmlGenericErrorContext,
11428
        "PP: try PROLOG\n");break;
11429
  case XML_PARSER_START_TAG:
11430
      xmlGenericError(xmlGenericErrorContext,
11431
        "PP: try START_TAG\n");break;
11432
  case XML_PARSER_CONTENT:
11433
      xmlGenericError(xmlGenericErrorContext,
11434
        "PP: try CONTENT\n");break;
11435
  case XML_PARSER_CDATA_SECTION:
11436
      xmlGenericError(xmlGenericErrorContext,
11437
        "PP: try CDATA_SECTION\n");break;
11438
  case XML_PARSER_END_TAG:
11439
      xmlGenericError(xmlGenericErrorContext,
11440
        "PP: try END_TAG\n");break;
11441
  case XML_PARSER_ENTITY_DECL:
11442
      xmlGenericError(xmlGenericErrorContext,
11443
        "PP: try ENTITY_DECL\n");break;
11444
  case XML_PARSER_ENTITY_VALUE:
11445
      xmlGenericError(xmlGenericErrorContext,
11446
        "PP: try ENTITY_VALUE\n");break;
11447
  case XML_PARSER_ATTRIBUTE_VALUE:
11448
      xmlGenericError(xmlGenericErrorContext,
11449
        "PP: try ATTRIBUTE_VALUE\n");break;
11450
  case XML_PARSER_DTD:
11451
      xmlGenericError(xmlGenericErrorContext,
11452
        "PP: try DTD\n");break;
11453
  case XML_PARSER_EPILOG:
11454
      xmlGenericError(xmlGenericErrorContext,
11455
        "PP: try EPILOG\n");break;
11456
  case XML_PARSER_PI:
11457
      xmlGenericError(xmlGenericErrorContext,
11458
        "PP: try PI\n");break;
11459
        case XML_PARSER_IGNORE:
11460
            xmlGenericError(xmlGenericErrorContext,
11461
        "PP: try IGNORE\n");break;
11462
    }
11463
#endif
11464
11465
467k
    if ((ctxt->input != NULL) &&
11466
467k
        (ctxt->input->cur - ctxt->input->base > 4096)) {
11467
13.5k
        xmlParserInputShrink(ctxt->input);
11468
13.5k
    }
11469
11470
5.10M
    while (ctxt->instate != XML_PARSER_EOF) {
11471
5.10M
  if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11472
10.9k
      return(0);
11473
11474
5.09M
  if (ctxt->input == NULL) break;
11475
5.09M
  if (ctxt->input->buf == NULL)
11476
0
      avail = ctxt->input->length -
11477
0
              (ctxt->input->cur - ctxt->input->base);
11478
5.09M
  else {
11479
      /*
11480
       * If we are operating on converted input, try to flush
11481
       * remaining chars to avoid them stalling in the non-converted
11482
       * buffer. But do not do this in document start where
11483
       * encoding="..." may not have been read and we work on a
11484
       * guessed encoding.
11485
       */
11486
5.09M
      if ((ctxt->instate != XML_PARSER_START) &&
11487
5.09M
          (ctxt->input->buf->raw != NULL) &&
11488
5.09M
    (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11489
2.49k
                size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11490
2.49k
                                                 ctxt->input);
11491
2.49k
    size_t current = ctxt->input->cur - ctxt->input->base;
11492
11493
2.49k
    xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11494
2.49k
                xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11495
2.49k
                                      base, current);
11496
2.49k
      }
11497
5.09M
      avail = xmlBufUse(ctxt->input->buf->buffer) -
11498
5.09M
        (ctxt->input->cur - ctxt->input->base);
11499
5.09M
  }
11500
5.09M
        if (avail < 1)
11501
29.5k
      goto done;
11502
5.06M
        switch (ctxt->instate) {
11503
0
            case XML_PARSER_EOF:
11504
          /*
11505
     * Document parsing is done !
11506
     */
11507
0
          goto done;
11508
55.0k
            case XML_PARSER_START:
11509
55.0k
    if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11510
21.1k
        xmlChar start[4];
11511
21.1k
        xmlCharEncoding enc;
11512
11513
        /*
11514
         * Very first chars read from the document flow.
11515
         */
11516
21.1k
        if (avail < 4)
11517
1.09k
      goto done;
11518
11519
        /*
11520
         * Get the 4 first bytes and decode the charset
11521
         * if enc != XML_CHAR_ENCODING_NONE
11522
         * plug some encoding conversion routines,
11523
         * else xmlSwitchEncoding will set to (default)
11524
         * UTF8.
11525
         */
11526
20.0k
        start[0] = RAW;
11527
20.0k
        start[1] = NXT(1);
11528
20.0k
        start[2] = NXT(2);
11529
20.0k
        start[3] = NXT(3);
11530
20.0k
        enc = xmlDetectCharEncoding(start, 4);
11531
20.0k
        xmlSwitchEncoding(ctxt, enc);
11532
20.0k
        break;
11533
21.1k
    }
11534
11535
33.9k
    if (avail < 2)
11536
11
        goto done;
11537
33.9k
    cur = ctxt->input->cur[0];
11538
33.9k
    next = ctxt->input->cur[1];
11539
33.9k
    if (cur == 0) {
11540
112
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11541
112
      ctxt->sax->setDocumentLocator(ctxt->userData,
11542
112
                  &xmlDefaultSAXLocator);
11543
112
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11544
112
        xmlHaltParser(ctxt);
11545
#ifdef DEBUG_PUSH
11546
        xmlGenericError(xmlGenericErrorContext,
11547
          "PP: entering EOF\n");
11548
#endif
11549
112
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11550
112
      ctxt->sax->endDocument(ctxt->userData);
11551
112
        goto done;
11552
112
    }
11553
33.8k
          if ((cur == '<') && (next == '?')) {
11554
        /* PI or XML decl */
11555
22.8k
        if (avail < 5) goto done;
11556
22.7k
        if ((!terminate) &&
11557
22.7k
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11558
5.01k
      goto done;
11559
17.7k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11560
17.7k
      ctxt->sax->setDocumentLocator(ctxt->userData,
11561
17.7k
                  &xmlDefaultSAXLocator);
11562
17.7k
        if ((ctxt->input->cur[2] == 'x') &&
11563
17.7k
      (ctxt->input->cur[3] == 'm') &&
11564
17.7k
      (ctxt->input->cur[4] == 'l') &&
11565
17.7k
      (IS_BLANK_CH(ctxt->input->cur[5]))) {
11566
16.3k
      ret += 5;
11567
#ifdef DEBUG_PUSH
11568
      xmlGenericError(xmlGenericErrorContext,
11569
        "PP: Parsing XML Decl\n");
11570
#endif
11571
16.3k
      xmlParseXMLDecl(ctxt);
11572
16.3k
      if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11573
          /*
11574
           * The XML REC instructs us to stop parsing right
11575
           * here
11576
           */
11577
38
          xmlHaltParser(ctxt);
11578
38
          return(0);
11579
38
      }
11580
16.3k
      ctxt->standalone = ctxt->input->standalone;
11581
16.3k
      if ((ctxt->encoding == NULL) &&
11582
16.3k
          (ctxt->input->encoding != NULL))
11583
1.54k
          ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11584
16.3k
      if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11585
16.3k
          (!ctxt->disableSAX))
11586
13.5k
          ctxt->sax->startDocument(ctxt->userData);
11587
16.3k
      ctxt->instate = XML_PARSER_MISC;
11588
#ifdef DEBUG_PUSH
11589
      xmlGenericError(xmlGenericErrorContext,
11590
        "PP: entering MISC\n");
11591
#endif
11592
16.3k
        } else {
11593
1.32k
      ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11594
1.32k
      if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11595
1.32k
          (!ctxt->disableSAX))
11596
1.32k
          ctxt->sax->startDocument(ctxt->userData);
11597
1.32k
      ctxt->instate = XML_PARSER_MISC;
11598
#ifdef DEBUG_PUSH
11599
      xmlGenericError(xmlGenericErrorContext,
11600
        "PP: entering MISC\n");
11601
#endif
11602
1.32k
        }
11603
17.7k
    } else {
11604
10.9k
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11605
10.9k
      ctxt->sax->setDocumentLocator(ctxt->userData,
11606
10.9k
                  &xmlDefaultSAXLocator);
11607
10.9k
        ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11608
10.9k
        if (ctxt->version == NULL) {
11609
0
            xmlErrMemory(ctxt, NULL);
11610
0
      break;
11611
0
        }
11612
10.9k
        if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11613
10.9k
            (!ctxt->disableSAX))
11614
10.9k
      ctxt->sax->startDocument(ctxt->userData);
11615
10.9k
        ctxt->instate = XML_PARSER_MISC;
11616
#ifdef DEBUG_PUSH
11617
        xmlGenericError(xmlGenericErrorContext,
11618
          "PP: entering MISC\n");
11619
#endif
11620
10.9k
    }
11621
28.6k
    break;
11622
893k
            case XML_PARSER_START_TAG: {
11623
893k
          const xmlChar *name;
11624
893k
    const xmlChar *prefix = NULL;
11625
893k
    const xmlChar *URI = NULL;
11626
893k
                int line = ctxt->input->line;
11627
893k
    int nsNr = ctxt->nsNr;
11628
11629
893k
    if ((avail < 2) && (ctxt->inputNr == 1))
11630
0
        goto done;
11631
893k
    cur = ctxt->input->cur[0];
11632
893k
          if (cur != '<') {
11633
1.24k
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11634
1.24k
        xmlHaltParser(ctxt);
11635
1.24k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11636
1.24k
      ctxt->sax->endDocument(ctxt->userData);
11637
1.24k
        goto done;
11638
1.24k
    }
11639
891k
    if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11640
97.0k
                    goto done;
11641
794k
    if (ctxt->spaceNr == 0)
11642
2.79k
        spacePush(ctxt, -1);
11643
792k
    else if (*ctxt->space == -2)
11644
78.6k
        spacePush(ctxt, -1);
11645
713k
    else
11646
713k
        spacePush(ctxt, *ctxt->space);
11647
794k
#ifdef LIBXML_SAX1_ENABLED
11648
794k
    if (ctxt->sax2)
11649
405k
#endif /* LIBXML_SAX1_ENABLED */
11650
405k
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11651
389k
#ifdef LIBXML_SAX1_ENABLED
11652
389k
    else
11653
389k
        name = xmlParseStartTag(ctxt);
11654
794k
#endif /* LIBXML_SAX1_ENABLED */
11655
794k
    if (ctxt->instate == XML_PARSER_EOF)
11656
94
        goto done;
11657
794k
    if (name == NULL) {
11658
1.57k
        spacePop(ctxt);
11659
1.57k
        xmlHaltParser(ctxt);
11660
1.57k
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11661
1.57k
      ctxt->sax->endDocument(ctxt->userData);
11662
1.57k
        goto done;
11663
1.57k
    }
11664
793k
#ifdef LIBXML_VALID_ENABLED
11665
    /*
11666
     * [ VC: Root Element Type ]
11667
     * The Name in the document type declaration must match
11668
     * the element type of the root element.
11669
     */
11670
793k
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11671
793k
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
11672
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11673
793k
#endif /* LIBXML_VALID_ENABLED */
11674
11675
    /*
11676
     * Check for an Empty Element.
11677
     */
11678
793k
    if ((RAW == '/') && (NXT(1) == '>')) {
11679
64.5k
        SKIP(2);
11680
11681
64.5k
        if (ctxt->sax2) {
11682
57.7k
      if ((ctxt->sax != NULL) &&
11683
57.7k
          (ctxt->sax->endElementNs != NULL) &&
11684
57.7k
          (!ctxt->disableSAX))
11685
57.6k
          ctxt->sax->endElementNs(ctxt->userData, name,
11686
57.6k
                                  prefix, URI);
11687
57.7k
      if (ctxt->nsNr - nsNr > 0)
11688
234
          nsPop(ctxt, ctxt->nsNr - nsNr);
11689
57.7k
#ifdef LIBXML_SAX1_ENABLED
11690
57.7k
        } else {
11691
6.80k
      if ((ctxt->sax != NULL) &&
11692
6.80k
          (ctxt->sax->endElement != NULL) &&
11693
6.80k
          (!ctxt->disableSAX))
11694
6.79k
          ctxt->sax->endElement(ctxt->userData, name);
11695
6.80k
#endif /* LIBXML_SAX1_ENABLED */
11696
6.80k
        }
11697
64.5k
        if (ctxt->instate == XML_PARSER_EOF)
11698
0
      goto done;
11699
64.5k
        spacePop(ctxt);
11700
64.5k
        if (ctxt->nameNr == 0) {
11701
1.27k
      ctxt->instate = XML_PARSER_EPILOG;
11702
63.2k
        } else {
11703
63.2k
      ctxt->instate = XML_PARSER_CONTENT;
11704
63.2k
        }
11705
64.5k
        break;
11706
64.5k
    }
11707
728k
    if (RAW == '>') {
11708
716k
        NEXT;
11709
716k
    } else {
11710
11.9k
        xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11711
11.9k
           "Couldn't find end of Start Tag %s\n",
11712
11.9k
           name);
11713
11.9k
        nodePop(ctxt);
11714
11.9k
        spacePop(ctxt);
11715
11.9k
    }
11716
728k
                nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
11717
11718
728k
    ctxt->instate = XML_PARSER_CONTENT;
11719
728k
                break;
11720
793k
      }
11721
3.27M
            case XML_PARSER_CONTENT: {
11722
3.27M
    if ((avail < 2) && (ctxt->inputNr == 1))
11723
18.0k
        goto done;
11724
3.25M
    cur = ctxt->input->cur[0];
11725
3.25M
    next = ctxt->input->cur[1];
11726
11727
3.25M
    if ((cur == '<') && (next == '/')) {
11728
701k
        ctxt->instate = XML_PARSER_END_TAG;
11729
701k
        break;
11730
2.55M
          } else if ((cur == '<') && (next == '?')) {
11731
8.41k
        if ((!terminate) &&
11732
8.41k
            (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11733
1.99k
      goto done;
11734
6.42k
        xmlParsePI(ctxt);
11735
6.42k
        ctxt->instate = XML_PARSER_CONTENT;
11736
2.54M
    } else if ((cur == '<') && (next != '!')) {
11737
777k
        ctxt->instate = XML_PARSER_START_TAG;
11738
777k
        break;
11739
1.77M
    } else if ((cur == '<') && (next == '!') &&
11740
1.77M
               (ctxt->input->cur[2] == '-') &&
11741
1.77M
         (ctxt->input->cur[3] == '-')) {
11742
50.7k
        if ((!terminate) &&
11743
50.7k
            (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11744
14.8k
      goto done;
11745
35.8k
        xmlParseComment(ctxt);
11746
35.8k
        ctxt->instate = XML_PARSER_CONTENT;
11747
1.71M
    } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11748
1.71M
        (ctxt->input->cur[2] == '[') &&
11749
1.71M
        (ctxt->input->cur[3] == 'C') &&
11750
1.71M
        (ctxt->input->cur[4] == 'D') &&
11751
1.71M
        (ctxt->input->cur[5] == 'A') &&
11752
1.71M
        (ctxt->input->cur[6] == 'T') &&
11753
1.71M
        (ctxt->input->cur[7] == 'A') &&
11754
1.71M
        (ctxt->input->cur[8] == '[')) {
11755
2.20k
        SKIP(9);
11756
2.20k
        ctxt->instate = XML_PARSER_CDATA_SECTION;
11757
2.20k
        break;
11758
1.71M
    } else if ((cur == '<') && (next == '!') &&
11759
1.71M
               (avail < 9)) {
11760
972
        goto done;
11761
1.71M
    } else if (cur == '<') {
11762
4.68k
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11763
4.68k
                    "detected an error in element content\n");
11764
4.68k
                    SKIP(1);
11765
1.71M
    } else if (cur == '&') {
11766
91.2k
        if ((!terminate) && (!xmlParseLookupChar(ctxt, ';')))
11767
2.41k
      goto done;
11768
88.8k
        xmlParseReference(ctxt);
11769
1.62M
    } else {
11770
        /* TODO Avoid the extra copy, handle directly !!! */
11771
        /*
11772
         * Goal of the following test is:
11773
         *  - minimize calls to the SAX 'character' callback
11774
         *    when they are mergeable
11775
         *  - handle an problem for isBlank when we only parse
11776
         *    a sequence of blank chars and the next one is
11777
         *    not available to check against '<' presence.
11778
         *  - tries to homogenize the differences in SAX
11779
         *    callbacks between the push and pull versions
11780
         *    of the parser.
11781
         */
11782
1.62M
        if ((ctxt->inputNr == 1) &&
11783
1.62M
            (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11784
1.33M
      if ((!terminate) && (!xmlParseLookupCharData(ctxt)))
11785
205k
          goto done;
11786
1.33M
                    }
11787
1.41M
                    ctxt->checkIndex = 0;
11788
1.41M
        xmlParseCharData(ctxt, 0);
11789
1.41M
    }
11790
1.55M
    break;
11791
3.25M
      }
11792
1.55M
            case XML_PARSER_END_TAG:
11793
717k
    if (avail < 2)
11794
0
        goto done;
11795
717k
    if ((!terminate) && (!xmlParseLookupChar(ctxt, '>')))
11796
15.5k
        goto done;
11797
701k
    if (ctxt->sax2) {
11798
330k
              xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11799
330k
        nameNsPop(ctxt);
11800
330k
    }
11801
371k
#ifdef LIBXML_SAX1_ENABLED
11802
371k
      else
11803
371k
        xmlParseEndTag1(ctxt, 0);
11804
701k
#endif /* LIBXML_SAX1_ENABLED */
11805
701k
    if (ctxt->instate == XML_PARSER_EOF) {
11806
        /* Nothing */
11807
701k
    } else if (ctxt->nameNr == 0) {
11808
4.00k
        ctxt->instate = XML_PARSER_EPILOG;
11809
697k
    } else {
11810
697k
        ctxt->instate = XML_PARSER_CONTENT;
11811
697k
    }
11812
701k
    break;
11813
13.6k
            case XML_PARSER_CDATA_SECTION: {
11814
          /*
11815
     * The Push mode need to have the SAX callback for
11816
     * cdataBlock merge back contiguous callbacks.
11817
     */
11818
13.6k
    const xmlChar *term;
11819
11820
13.6k
                if (terminate) {
11821
                    /*
11822
                     * Don't call xmlParseLookupString. If 'terminate'
11823
                     * is set, checkIndex is invalid.
11824
                     */
11825
472
                    term = BAD_CAST strstr((const char *) ctxt->input->cur,
11826
472
                                           "]]>");
11827
13.1k
                } else {
11828
13.1k
        term = xmlParseLookupString(ctxt, 0, "]]>", 3);
11829
13.1k
                }
11830
11831
13.6k
    if (term == NULL) {
11832
11.4k
        int tmp, size;
11833
11834
11.4k
                    if (terminate) {
11835
                        /* Unfinished CDATA section */
11836
369
                        size = ctxt->input->end - ctxt->input->cur;
11837
11.1k
                    } else {
11838
11.1k
                        if (avail < XML_PARSER_BIG_BUFFER_SIZE + 2)
11839
6.64k
                            goto done;
11840
4.46k
                        ctxt->checkIndex = 0;
11841
                        /* XXX: Why don't we pass the full buffer? */
11842
4.46k
                        size = XML_PARSER_BIG_BUFFER_SIZE;
11843
4.46k
                    }
11844
4.83k
                    tmp = xmlCheckCdataPush(ctxt->input->cur, size, 0);
11845
4.83k
                    if (tmp <= 0) {
11846
1.57k
                        tmp = -tmp;
11847
1.57k
                        ctxt->input->cur += tmp;
11848
1.57k
                        goto encoding_error;
11849
1.57k
                    }
11850
3.25k
                    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11851
3.25k
                        if (ctxt->sax->cdataBlock != NULL)
11852
2.69k
                            ctxt->sax->cdataBlock(ctxt->userData,
11853
2.69k
                                                  ctxt->input->cur, tmp);
11854
564
                        else if (ctxt->sax->characters != NULL)
11855
564
                            ctxt->sax->characters(ctxt->userData,
11856
564
                                                  ctxt->input->cur, tmp);
11857
3.25k
                    }
11858
3.25k
                    if (ctxt->instate == XML_PARSER_EOF)
11859
0
                        goto done;
11860
3.25k
                    SKIPL(tmp);
11861
3.25k
    } else {
11862
2.12k
                    int base = term - CUR_PTR;
11863
2.12k
        int tmp;
11864
11865
2.12k
        tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11866
2.12k
        if ((tmp < 0) || (tmp != base)) {
11867
763
      tmp = -tmp;
11868
763
      ctxt->input->cur += tmp;
11869
763
      goto encoding_error;
11870
763
        }
11871
1.36k
        if ((ctxt->sax != NULL) && (base == 0) &&
11872
1.36k
            (ctxt->sax->cdataBlock != NULL) &&
11873
1.36k
            (!ctxt->disableSAX)) {
11874
      /*
11875
       * Special case to provide identical behaviour
11876
       * between pull and push parsers on enpty CDATA
11877
       * sections
11878
       */
11879
11
       if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11880
11
           (!strncmp((const char *)&ctxt->input->cur[-9],
11881
11
                     "<![CDATA[", 9)))
11882
4
           ctxt->sax->cdataBlock(ctxt->userData,
11883
4
                                 BAD_CAST "", 0);
11884
1.35k
        } else if ((ctxt->sax != NULL) && (base > 0) &&
11885
1.35k
      (!ctxt->disableSAX)) {
11886
1.35k
      if (ctxt->sax->cdataBlock != NULL)
11887
1.01k
          ctxt->sax->cdataBlock(ctxt->userData,
11888
1.01k
              ctxt->input->cur, base);
11889
336
      else if (ctxt->sax->characters != NULL)
11890
336
          ctxt->sax->characters(ctxt->userData,
11891
336
              ctxt->input->cur, base);
11892
1.35k
        }
11893
1.36k
        if (ctxt->instate == XML_PARSER_EOF)
11894
0
      goto done;
11895
1.36k
        SKIPL(base + 3);
11896
1.36k
        ctxt->instate = XML_PARSER_CONTENT;
11897
#ifdef DEBUG_PUSH
11898
        xmlGenericError(xmlGenericErrorContext,
11899
          "PP: entering CONTENT\n");
11900
#endif
11901
1.36k
    }
11902
4.62k
    break;
11903
13.6k
      }
11904
37.6k
            case XML_PARSER_MISC:
11905
57.4k
            case XML_PARSER_PROLOG:
11906
64.0k
            case XML_PARSER_EPILOG:
11907
64.0k
    SKIP_BLANKS;
11908
64.0k
    if (ctxt->input->buf == NULL)
11909
0
        avail = ctxt->input->length -
11910
0
                (ctxt->input->cur - ctxt->input->base);
11911
64.0k
    else
11912
64.0k
        avail = xmlBufUse(ctxt->input->buf->buffer) -
11913
64.0k
                (ctxt->input->cur - ctxt->input->base);
11914
64.0k
    if (avail < 2)
11915
4.35k
        goto done;
11916
59.7k
    cur = ctxt->input->cur[0];
11917
59.7k
    next = ctxt->input->cur[1];
11918
59.7k
          if ((cur == '<') && (next == '?')) {
11919
11.8k
        if ((!terminate) &&
11920
11.8k
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11921
2.67k
      goto done;
11922
#ifdef DEBUG_PUSH
11923
        xmlGenericError(xmlGenericErrorContext,
11924
          "PP: Parsing PI\n");
11925
#endif
11926
9.21k
        xmlParsePI(ctxt);
11927
9.21k
        if (ctxt->instate == XML_PARSER_EOF)
11928
0
      goto done;
11929
47.8k
    } else if ((cur == '<') && (next == '!') &&
11930
47.8k
        (ctxt->input->cur[2] == '-') &&
11931
47.8k
        (ctxt->input->cur[3] == '-')) {
11932
7.99k
        if ((!terminate) &&
11933
7.99k
                        (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11934
4.28k
      goto done;
11935
#ifdef DEBUG_PUSH
11936
        xmlGenericError(xmlGenericErrorContext,
11937
          "PP: Parsing Comment\n");
11938
#endif
11939
3.70k
        xmlParseComment(ctxt);
11940
3.70k
        if (ctxt->instate == XML_PARSER_EOF)
11941
0
      goto done;
11942
39.8k
    } else if ((ctxt->instate == XML_PARSER_MISC) &&
11943
39.8k
                    (cur == '<') && (next == '!') &&
11944
39.8k
        (ctxt->input->cur[2] == 'D') &&
11945
39.8k
        (ctxt->input->cur[3] == 'O') &&
11946
39.8k
        (ctxt->input->cur[4] == 'C') &&
11947
39.8k
        (ctxt->input->cur[5] == 'T') &&
11948
39.8k
        (ctxt->input->cur[6] == 'Y') &&
11949
39.8k
        (ctxt->input->cur[7] == 'P') &&
11950
39.8k
        (ctxt->input->cur[8] == 'E')) {
11951
20.1k
        if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11952
6.45k
                        goto done;
11953
#ifdef DEBUG_PUSH
11954
        xmlGenericError(xmlGenericErrorContext,
11955
          "PP: Parsing internal subset\n");
11956
#endif
11957
13.6k
        ctxt->inSubset = 1;
11958
13.6k
        xmlParseDocTypeDecl(ctxt);
11959
13.6k
        if (ctxt->instate == XML_PARSER_EOF)
11960
0
      goto done;
11961
13.6k
        if (RAW == '[') {
11962
11.7k
      ctxt->instate = XML_PARSER_DTD;
11963
#ifdef DEBUG_PUSH
11964
      xmlGenericError(xmlGenericErrorContext,
11965
        "PP: entering DTD\n");
11966
#endif
11967
11.7k
        } else {
11968
      /*
11969
       * Create and update the external subset.
11970
       */
11971
1.97k
      ctxt->inSubset = 2;
11972
1.97k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11973
1.97k
          (ctxt->sax->externalSubset != NULL))
11974
1.61k
          ctxt->sax->externalSubset(ctxt->userData,
11975
1.61k
            ctxt->intSubName, ctxt->extSubSystem,
11976
1.61k
            ctxt->extSubURI);
11977
1.97k
      ctxt->inSubset = 0;
11978
1.97k
      xmlCleanSpecialAttr(ctxt);
11979
1.97k
      ctxt->instate = XML_PARSER_PROLOG;
11980
#ifdef DEBUG_PUSH
11981
      xmlGenericError(xmlGenericErrorContext,
11982
        "PP: entering PROLOG\n");
11983
#endif
11984
1.97k
        }
11985
19.6k
    } else if ((cur == '<') && (next == '!') &&
11986
19.6k
               (avail <
11987
427
                            (ctxt->instate == XML_PARSER_MISC ? 9 : 4))) {
11988
129
        goto done;
11989
19.5k
    } else if (ctxt->instate == XML_PARSER_EPILOG) {
11990
526
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11991
526
        xmlHaltParser(ctxt);
11992
#ifdef DEBUG_PUSH
11993
        xmlGenericError(xmlGenericErrorContext,
11994
          "PP: entering EOF\n");
11995
#endif
11996
526
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11997
526
      ctxt->sax->endDocument(ctxt->userData);
11998
526
        goto done;
11999
19.0k
                } else {
12000
19.0k
        ctxt->instate = XML_PARSER_START_TAG;
12001
#ifdef DEBUG_PUSH
12002
        xmlGenericError(xmlGenericErrorContext,
12003
          "PP: entering START_TAG\n");
12004
#endif
12005
19.0k
    }
12006
45.6k
    break;
12007
45.6k
            case XML_PARSER_DTD: {
12008
42.1k
                if ((!terminate) && (!xmlParseLookupInternalSubset(ctxt)))
12009
30.9k
                    goto done;
12010
11.1k
    xmlParseInternalSubset(ctxt);
12011
11.1k
    if (ctxt->instate == XML_PARSER_EOF)
12012
2.28k
        goto done;
12013
8.88k
    ctxt->inSubset = 2;
12014
8.88k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
12015
8.88k
        (ctxt->sax->externalSubset != NULL))
12016
8.13k
        ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
12017
8.13k
          ctxt->extSubSystem, ctxt->extSubURI);
12018
8.88k
    ctxt->inSubset = 0;
12019
8.88k
    xmlCleanSpecialAttr(ctxt);
12020
8.88k
    if (ctxt->instate == XML_PARSER_EOF)
12021
859
        goto done;
12022
8.02k
    ctxt->instate = XML_PARSER_PROLOG;
12023
#ifdef DEBUG_PUSH
12024
    xmlGenericError(xmlGenericErrorContext,
12025
      "PP: entering PROLOG\n");
12026
#endif
12027
8.02k
                break;
12028
8.88k
      }
12029
0
            case XML_PARSER_COMMENT:
12030
0
    xmlGenericError(xmlGenericErrorContext,
12031
0
      "PP: internal error, state == COMMENT\n");
12032
0
    ctxt->instate = XML_PARSER_CONTENT;
12033
#ifdef DEBUG_PUSH
12034
    xmlGenericError(xmlGenericErrorContext,
12035
      "PP: entering CONTENT\n");
12036
#endif
12037
0
    break;
12038
0
            case XML_PARSER_IGNORE:
12039
0
    xmlGenericError(xmlGenericErrorContext,
12040
0
      "PP: internal error, state == IGNORE");
12041
0
          ctxt->instate = XML_PARSER_DTD;
12042
#ifdef DEBUG_PUSH
12043
    xmlGenericError(xmlGenericErrorContext,
12044
      "PP: entering DTD\n");
12045
#endif
12046
0
          break;
12047
0
            case XML_PARSER_PI:
12048
0
    xmlGenericError(xmlGenericErrorContext,
12049
0
      "PP: internal error, state == PI\n");
12050
0
    ctxt->instate = XML_PARSER_CONTENT;
12051
#ifdef DEBUG_PUSH
12052
    xmlGenericError(xmlGenericErrorContext,
12053
      "PP: entering CONTENT\n");
12054
#endif
12055
0
    break;
12056
0
            case XML_PARSER_ENTITY_DECL:
12057
0
    xmlGenericError(xmlGenericErrorContext,
12058
0
      "PP: internal error, state == ENTITY_DECL\n");
12059
0
    ctxt->instate = XML_PARSER_DTD;
12060
#ifdef DEBUG_PUSH
12061
    xmlGenericError(xmlGenericErrorContext,
12062
      "PP: entering DTD\n");
12063
#endif
12064
0
    break;
12065
0
            case XML_PARSER_ENTITY_VALUE:
12066
0
    xmlGenericError(xmlGenericErrorContext,
12067
0
      "PP: internal error, state == ENTITY_VALUE\n");
12068
0
    ctxt->instate = XML_PARSER_CONTENT;
12069
#ifdef DEBUG_PUSH
12070
    xmlGenericError(xmlGenericErrorContext,
12071
      "PP: entering DTD\n");
12072
#endif
12073
0
    break;
12074
0
            case XML_PARSER_ATTRIBUTE_VALUE:
12075
0
    xmlGenericError(xmlGenericErrorContext,
12076
0
      "PP: internal error, state == ATTRIBUTE_VALUE\n");
12077
0
    ctxt->instate = XML_PARSER_START_TAG;
12078
#ifdef DEBUG_PUSH
12079
    xmlGenericError(xmlGenericErrorContext,
12080
      "PP: entering START_TAG\n");
12081
#endif
12082
0
    break;
12083
0
            case XML_PARSER_SYSTEM_LITERAL:
12084
0
    xmlGenericError(xmlGenericErrorContext,
12085
0
      "PP: internal error, state == SYSTEM_LITERAL\n");
12086
0
    ctxt->instate = XML_PARSER_START_TAG;
12087
#ifdef DEBUG_PUSH
12088
    xmlGenericError(xmlGenericErrorContext,
12089
      "PP: entering START_TAG\n");
12090
#endif
12091
0
    break;
12092
0
            case XML_PARSER_PUBLIC_LITERAL:
12093
0
    xmlGenericError(xmlGenericErrorContext,
12094
0
      "PP: internal error, state == PUBLIC_LITERAL\n");
12095
0
    ctxt->instate = XML_PARSER_START_TAG;
12096
#ifdef DEBUG_PUSH
12097
    xmlGenericError(xmlGenericErrorContext,
12098
      "PP: entering START_TAG\n");
12099
#endif
12100
0
    break;
12101
5.06M
  }
12102
5.06M
    }
12103
454k
done:
12104
#ifdef DEBUG_PUSH
12105
    xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12106
#endif
12107
454k
    return(ret);
12108
2.34k
encoding_error:
12109
2.34k
    {
12110
2.34k
        char buffer[150];
12111
12112
2.34k
  snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12113
2.34k
      ctxt->input->cur[0], ctxt->input->cur[1],
12114
2.34k
      ctxt->input->cur[2], ctxt->input->cur[3]);
12115
2.34k
  __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12116
2.34k
         "Input is not proper UTF-8, indicate encoding !\n%s",
12117
2.34k
         BAD_CAST buffer, NULL);
12118
2.34k
    }
12119
2.34k
    return(0);
12120
467k
}
12121
12122
/**
12123
 * xmlParseChunk:
12124
 * @ctxt:  an XML parser context
12125
 * @chunk:  an char array
12126
 * @size:  the size in byte of the chunk
12127
 * @terminate:  last chunk indicator
12128
 *
12129
 * Parse a Chunk of memory
12130
 *
12131
 * Returns zero if no error, the xmlParserErrors otherwise.
12132
 */
12133
int
12134
xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12135
779k
              int terminate) {
12136
779k
    int end_in_lf = 0;
12137
779k
    int remain = 0;
12138
12139
779k
    if (ctxt == NULL)
12140
0
        return(XML_ERR_INTERNAL_ERROR);
12141
779k
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12142
311k
        return(ctxt->errNo);
12143
467k
    if (ctxt->instate == XML_PARSER_EOF)
12144
41
        return(-1);
12145
467k
    if (ctxt->input == NULL)
12146
0
        return(-1);
12147
12148
467k
    ctxt->progressive = 1;
12149
467k
    if (ctxt->instate == XML_PARSER_START)
12150
35.3k
        xmlDetectSAX2(ctxt);
12151
467k
    if ((size > 0) && (chunk != NULL) && (!terminate) &&
12152
467k
        (chunk[size - 1] == '\r')) {
12153
271
  end_in_lf = 1;
12154
271
  size--;
12155
271
    }
12156
12157
467k
xmldecl_done:
12158
12159
467k
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12160
467k
        (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF))  {
12161
448k
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12162
448k
  size_t cur = ctxt->input->cur - ctxt->input->base;
12163
448k
  int res;
12164
12165
        /*
12166
         * Specific handling if we autodetected an encoding, we should not
12167
         * push more than the first line ... which depend on the encoding
12168
         * And only push the rest once the final encoding was detected
12169
         */
12170
448k
        if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12171
448k
            (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
12172
384
            unsigned int len = 45;
12173
12174
384
            if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12175
384
                               BAD_CAST "UTF-16")) ||
12176
384
                (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12177
85
                               BAD_CAST "UTF16")))
12178
299
                len = 90;
12179
85
            else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12180
85
                                    BAD_CAST "UCS-4")) ||
12181
85
                     (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12182
56
                                    BAD_CAST "UCS4")))
12183
29
                len = 180;
12184
12185
384
            if (ctxt->input->buf->rawconsumed < len)
12186
384
                len -= ctxt->input->buf->rawconsumed;
12187
12188
            /*
12189
             * Change size for reading the initial declaration only
12190
             * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12191
             * will blindly copy extra bytes from memory.
12192
             */
12193
384
            if ((unsigned int) size > len) {
12194
154
                remain = size - len;
12195
154
                size = len;
12196
230
            } else {
12197
230
                remain = 0;
12198
230
            }
12199
384
        }
12200
448k
  res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12201
448k
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12202
448k
  if (res < 0) {
12203
56
      ctxt->errNo = XML_PARSER_EOF;
12204
56
      xmlHaltParser(ctxt);
12205
56
      return (XML_PARSER_EOF);
12206
56
  }
12207
#ifdef DEBUG_PUSH
12208
  xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12209
#endif
12210
12211
448k
    } else if (ctxt->instate != XML_PARSER_EOF) {
12212
18.9k
  if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12213
18.9k
      xmlParserInputBufferPtr in = ctxt->input->buf;
12214
18.9k
      if ((in->encoder != NULL) && (in->buffer != NULL) &&
12215
18.9k
        (in->raw != NULL)) {
12216
1.22k
    int nbchars;
12217
1.22k
    size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12218
1.22k
    size_t current = ctxt->input->cur - ctxt->input->base;
12219
12220
1.22k
    nbchars = xmlCharEncInput(in, terminate);
12221
1.22k
    xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
12222
1.22k
    if (nbchars < 0) {
12223
        /* TODO 2.6.0 */
12224
98
        xmlGenericError(xmlGenericErrorContext,
12225
98
            "xmlParseChunk: encoder error\n");
12226
98
                    xmlHaltParser(ctxt);
12227
98
        return(XML_ERR_INVALID_ENCODING);
12228
98
    }
12229
1.22k
      }
12230
18.9k
  }
12231
18.9k
    }
12232
12233
467k
    if (remain != 0) {
12234
134
        xmlParseTryOrFinish(ctxt, 0);
12235
467k
    } else {
12236
467k
        xmlParseTryOrFinish(ctxt, terminate);
12237
467k
    }
12238
467k
    if (ctxt->instate == XML_PARSER_EOF)
12239
6.79k
        return(ctxt->errNo);
12240
12241
460k
    if ((ctxt->input != NULL) &&
12242
460k
         (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12243
460k
         ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12244
460k
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12245
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12246
0
        xmlHaltParser(ctxt);
12247
0
    }
12248
460k
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12249
11.2k
        return(ctxt->errNo);
12250
12251
449k
    if (remain != 0) {
12252
59
        chunk += size;
12253
59
        size = remain;
12254
59
        remain = 0;
12255
59
        goto xmldecl_done;
12256
59
    }
12257
449k
    if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12258
449k
        (ctxt->input->buf != NULL)) {
12259
252
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12260
252
           ctxt->input);
12261
252
  size_t current = ctxt->input->cur - ctxt->input->base;
12262
12263
252
  xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12264
12265
252
  xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12266
252
            base, current);
12267
252
    }
12268
449k
    if (terminate) {
12269
  /*
12270
   * Check for termination
12271
   */
12272
8.70k
  int cur_avail = 0;
12273
12274
8.70k
  if (ctxt->input != NULL) {
12275
8.70k
      if (ctxt->input->buf == NULL)
12276
0
    cur_avail = ctxt->input->length -
12277
0
          (ctxt->input->cur - ctxt->input->base);
12278
8.70k
      else
12279
8.70k
    cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12280
8.70k
                    (ctxt->input->cur - ctxt->input->base);
12281
8.70k
  }
12282
12283
8.70k
  if ((ctxt->instate != XML_PARSER_EOF) &&
12284
8.70k
      (ctxt->instate != XML_PARSER_EPILOG)) {
12285
4.43k
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12286
4.43k
  }
12287
8.70k
  if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
12288
59
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12289
59
  }
12290
8.70k
  if (ctxt->instate != XML_PARSER_EOF) {
12291
8.70k
      if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12292
8.70k
    ctxt->sax->endDocument(ctxt->userData);
12293
8.70k
  }
12294
8.70k
  ctxt->instate = XML_PARSER_EOF;
12295
8.70k
    }
12296
449k
    if (ctxt->wellFormed == 0)
12297
62.1k
  return((xmlParserErrors) ctxt->errNo);
12298
387k
    else
12299
387k
        return(0);
12300
449k
}
12301
12302
/************************************************************************
12303
 *                  *
12304
 *    I/O front end functions to the parser     *
12305
 *                  *
12306
 ************************************************************************/
12307
12308
/**
12309
 * xmlCreatePushParserCtxt:
12310
 * @sax:  a SAX handler
12311
 * @user_data:  The user data returned on SAX callbacks
12312
 * @chunk:  a pointer to an array of chars
12313
 * @size:  number of chars in the array
12314
 * @filename:  an optional file name or URI
12315
 *
12316
 * Create a parser context for using the XML parser in push mode.
12317
 * If @buffer and @size are non-NULL, the data is used to detect
12318
 * the encoding.  The remaining characters will be parsed so they
12319
 * don't need to be fed in again through xmlParseChunk.
12320
 * To allow content encoding detection, @size should be >= 4
12321
 * The value of @filename is used for fetching external entities
12322
 * and error/warning reports.
12323
 *
12324
 * Returns the new parser context or NULL
12325
 */
12326
12327
xmlParserCtxtPtr
12328
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12329
29.5k
                        const char *chunk, int size, const char *filename) {
12330
29.5k
    xmlParserCtxtPtr ctxt;
12331
29.5k
    xmlParserInputPtr inputStream;
12332
29.5k
    xmlParserInputBufferPtr buf;
12333
29.5k
    xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12334
12335
    /*
12336
     * plug some encoding conversion routines
12337
     */
12338
29.5k
    if ((chunk != NULL) && (size >= 4))
12339
14.4k
  enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12340
12341
29.5k
    buf = xmlAllocParserInputBuffer(enc);
12342
29.5k
    if (buf == NULL) return(NULL);
12343
12344
29.5k
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12345
29.5k
    if (ctxt == NULL) {
12346
0
        xmlErrMemory(NULL, "creating parser: out of memory\n");
12347
0
  xmlFreeParserInputBuffer(buf);
12348
0
  return(NULL);
12349
0
    }
12350
29.5k
    ctxt->dictNames = 1;
12351
29.5k
    if (filename == NULL) {
12352
14.7k
  ctxt->directory = NULL;
12353
14.7k
    } else {
12354
14.7k
        ctxt->directory = xmlParserGetDirectory(filename);
12355
14.7k
    }
12356
12357
29.5k
    inputStream = xmlNewInputStream(ctxt);
12358
29.5k
    if (inputStream == NULL) {
12359
0
  xmlFreeParserCtxt(ctxt);
12360
0
  xmlFreeParserInputBuffer(buf);
12361
0
  return(NULL);
12362
0
    }
12363
12364
29.5k
    if (filename == NULL)
12365
14.7k
  inputStream->filename = NULL;
12366
14.7k
    else {
12367
14.7k
  inputStream->filename = (char *)
12368
14.7k
      xmlCanonicPath((const xmlChar *) filename);
12369
14.7k
  if (inputStream->filename == NULL) {
12370
0
            xmlFreeInputStream(inputStream);
12371
0
      xmlFreeParserCtxt(ctxt);
12372
0
      xmlFreeParserInputBuffer(buf);
12373
0
      return(NULL);
12374
0
  }
12375
14.7k
    }
12376
29.5k
    inputStream->buf = buf;
12377
29.5k
    xmlBufResetInput(inputStream->buf->buffer, inputStream);
12378
29.5k
    inputPush(ctxt, inputStream);
12379
12380
    /*
12381
     * If the caller didn't provide an initial 'chunk' for determining
12382
     * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12383
     * that it can be automatically determined later
12384
     */
12385
29.5k
    ctxt->charset = XML_CHAR_ENCODING_NONE;
12386
12387
29.5k
    if ((size != 0) && (chunk != NULL) &&
12388
29.5k
        (ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12389
14.4k
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12390
14.4k
  size_t cur = ctxt->input->cur - ctxt->input->base;
12391
12392
14.4k
  xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12393
12394
14.4k
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12395
#ifdef DEBUG_PUSH
12396
  xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12397
#endif
12398
14.4k
    }
12399
12400
29.5k
    if (enc != XML_CHAR_ENCODING_NONE) {
12401
8.92k
        xmlSwitchEncoding(ctxt, enc);
12402
8.92k
    }
12403
12404
29.5k
    return(ctxt);
12405
29.5k
}
12406
#endif /* LIBXML_PUSH_ENABLED */
12407
12408
/**
12409
 * xmlHaltParser:
12410
 * @ctxt:  an XML parser context
12411
 *
12412
 * Blocks further parser processing don't override error
12413
 * for internal use
12414
 */
12415
static void
12416
25.0k
xmlHaltParser(xmlParserCtxtPtr ctxt) {
12417
25.0k
    if (ctxt == NULL)
12418
0
        return;
12419
25.0k
    ctxt->instate = XML_PARSER_EOF;
12420
25.0k
    ctxt->disableSAX = 1;
12421
25.6k
    while (ctxt->inputNr > 1)
12422
571
        xmlFreeInputStream(inputPop(ctxt));
12423
25.0k
    if (ctxt->input != NULL) {
12424
        /*
12425
   * in case there was a specific allocation deallocate before
12426
   * overriding base
12427
   */
12428
25.0k
        if (ctxt->input->free != NULL) {
12429
0
      ctxt->input->free((xmlChar *) ctxt->input->base);
12430
0
      ctxt->input->free = NULL;
12431
0
  }
12432
25.0k
        if (ctxt->input->buf != NULL) {
12433
22.3k
            xmlFreeParserInputBuffer(ctxt->input->buf);
12434
22.3k
            ctxt->input->buf = NULL;
12435
22.3k
        }
12436
25.0k
  ctxt->input->cur = BAD_CAST"";
12437
25.0k
        ctxt->input->length = 0;
12438
25.0k
  ctxt->input->base = ctxt->input->cur;
12439
25.0k
        ctxt->input->end = ctxt->input->cur;
12440
25.0k
    }
12441
25.0k
}
12442
12443
/**
12444
 * xmlStopParser:
12445
 * @ctxt:  an XML parser context
12446
 *
12447
 * Blocks further parser processing
12448
 */
12449
void
12450
14.8k
xmlStopParser(xmlParserCtxtPtr ctxt) {
12451
14.8k
    if (ctxt == NULL)
12452
0
        return;
12453
14.8k
    xmlHaltParser(ctxt);
12454
14.8k
    ctxt->errNo = XML_ERR_USER_STOP;
12455
14.8k
}
12456
12457
/**
12458
 * xmlCreateIOParserCtxt:
12459
 * @sax:  a SAX handler
12460
 * @user_data:  The user data returned on SAX callbacks
12461
 * @ioread:  an I/O read function
12462
 * @ioclose:  an I/O close function
12463
 * @ioctx:  an I/O handler
12464
 * @enc:  the charset encoding if known
12465
 *
12466
 * Create a parser context for using the XML parser with an existing
12467
 * I/O stream
12468
 *
12469
 * Returns the new parser context or NULL
12470
 */
12471
xmlParserCtxtPtr
12472
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12473
  xmlInputReadCallback   ioread, xmlInputCloseCallback  ioclose,
12474
0
  void *ioctx, xmlCharEncoding enc) {
12475
0
    xmlParserCtxtPtr ctxt;
12476
0
    xmlParserInputPtr inputStream;
12477
0
    xmlParserInputBufferPtr buf;
12478
12479
0
    if (ioread == NULL) return(NULL);
12480
12481
0
    buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12482
0
    if (buf == NULL) {
12483
0
        if (ioclose != NULL)
12484
0
            ioclose(ioctx);
12485
0
        return (NULL);
12486
0
    }
12487
12488
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12489
0
    if (ctxt == NULL) {
12490
0
  xmlFreeParserInputBuffer(buf);
12491
0
  return(NULL);
12492
0
    }
12493
12494
0
    inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12495
0
    if (inputStream == NULL) {
12496
0
  xmlFreeParserCtxt(ctxt);
12497
0
  return(NULL);
12498
0
    }
12499
0
    inputPush(ctxt, inputStream);
12500
12501
0
    return(ctxt);
12502
0
}
12503
12504
#ifdef LIBXML_VALID_ENABLED
12505
/************************************************************************
12506
 *                  *
12507
 *    Front ends when parsing a DTD       *
12508
 *                  *
12509
 ************************************************************************/
12510
12511
/**
12512
 * xmlIOParseDTD:
12513
 * @sax:  the SAX handler block or NULL
12514
 * @input:  an Input Buffer
12515
 * @enc:  the charset encoding if known
12516
 *
12517
 * Load and parse a DTD
12518
 *
12519
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12520
 * @input will be freed by the function in any case.
12521
 */
12522
12523
xmlDtdPtr
12524
xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12525
0
        xmlCharEncoding enc) {
12526
0
    xmlDtdPtr ret = NULL;
12527
0
    xmlParserCtxtPtr ctxt;
12528
0
    xmlParserInputPtr pinput = NULL;
12529
0
    xmlChar start[4];
12530
12531
0
    if (input == NULL)
12532
0
  return(NULL);
12533
12534
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12535
0
    if (ctxt == NULL) {
12536
0
        xmlFreeParserInputBuffer(input);
12537
0
  return(NULL);
12538
0
    }
12539
12540
    /* We are loading a DTD */
12541
0
    ctxt->options |= XML_PARSE_DTDLOAD;
12542
12543
0
    xmlDetectSAX2(ctxt);
12544
12545
    /*
12546
     * generate a parser input from the I/O handler
12547
     */
12548
12549
0
    pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12550
0
    if (pinput == NULL) {
12551
0
        xmlFreeParserInputBuffer(input);
12552
0
  xmlFreeParserCtxt(ctxt);
12553
0
  return(NULL);
12554
0
    }
12555
12556
    /*
12557
     * plug some encoding conversion routines here.
12558
     */
12559
0
    if (xmlPushInput(ctxt, pinput) < 0) {
12560
0
  xmlFreeParserCtxt(ctxt);
12561
0
  return(NULL);
12562
0
    }
12563
0
    if (enc != XML_CHAR_ENCODING_NONE) {
12564
0
        xmlSwitchEncoding(ctxt, enc);
12565
0
    }
12566
12567
0
    pinput->filename = NULL;
12568
0
    pinput->line = 1;
12569
0
    pinput->col = 1;
12570
0
    pinput->base = ctxt->input->cur;
12571
0
    pinput->cur = ctxt->input->cur;
12572
0
    pinput->free = NULL;
12573
12574
    /*
12575
     * let's parse that entity knowing it's an external subset.
12576
     */
12577
0
    ctxt->inSubset = 2;
12578
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12579
0
    if (ctxt->myDoc == NULL) {
12580
0
  xmlErrMemory(ctxt, "New Doc failed");
12581
0
  return(NULL);
12582
0
    }
12583
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12584
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12585
0
                                 BAD_CAST "none", BAD_CAST "none");
12586
12587
0
    if ((enc == XML_CHAR_ENCODING_NONE) &&
12588
0
        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12589
  /*
12590
   * Get the 4 first bytes and decode the charset
12591
   * if enc != XML_CHAR_ENCODING_NONE
12592
   * plug some encoding conversion routines.
12593
   */
12594
0
  start[0] = RAW;
12595
0
  start[1] = NXT(1);
12596
0
  start[2] = NXT(2);
12597
0
  start[3] = NXT(3);
12598
0
  enc = xmlDetectCharEncoding(start, 4);
12599
0
  if (enc != XML_CHAR_ENCODING_NONE) {
12600
0
      xmlSwitchEncoding(ctxt, enc);
12601
0
  }
12602
0
    }
12603
12604
0
    xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12605
12606
0
    if (ctxt->myDoc != NULL) {
12607
0
  if (ctxt->wellFormed) {
12608
0
      ret = ctxt->myDoc->extSubset;
12609
0
      ctxt->myDoc->extSubset = NULL;
12610
0
      if (ret != NULL) {
12611
0
    xmlNodePtr tmp;
12612
12613
0
    ret->doc = NULL;
12614
0
    tmp = ret->children;
12615
0
    while (tmp != NULL) {
12616
0
        tmp->doc = NULL;
12617
0
        tmp = tmp->next;
12618
0
    }
12619
0
      }
12620
0
  } else {
12621
0
      ret = NULL;
12622
0
  }
12623
0
        xmlFreeDoc(ctxt->myDoc);
12624
0
        ctxt->myDoc = NULL;
12625
0
    }
12626
0
    xmlFreeParserCtxt(ctxt);
12627
12628
0
    return(ret);
12629
0
}
12630
12631
/**
12632
 * xmlSAXParseDTD:
12633
 * @sax:  the SAX handler block
12634
 * @ExternalID:  a NAME* containing the External ID of the DTD
12635
 * @SystemID:  a NAME* containing the URL to the DTD
12636
 *
12637
 * DEPRECATED: Don't use.
12638
 *
12639
 * Load and parse an external subset.
12640
 *
12641
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12642
 */
12643
12644
xmlDtdPtr
12645
xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12646
0
                          const xmlChar *SystemID) {
12647
0
    xmlDtdPtr ret = NULL;
12648
0
    xmlParserCtxtPtr ctxt;
12649
0
    xmlParserInputPtr input = NULL;
12650
0
    xmlCharEncoding enc;
12651
0
    xmlChar* systemIdCanonic;
12652
12653
0
    if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12654
12655
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12656
0
    if (ctxt == NULL) {
12657
0
  return(NULL);
12658
0
    }
12659
12660
    /* We are loading a DTD */
12661
0
    ctxt->options |= XML_PARSE_DTDLOAD;
12662
12663
    /*
12664
     * Canonicalise the system ID
12665
     */
12666
0
    systemIdCanonic = xmlCanonicPath(SystemID);
12667
0
    if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12668
0
  xmlFreeParserCtxt(ctxt);
12669
0
  return(NULL);
12670
0
    }
12671
12672
    /*
12673
     * Ask the Entity resolver to load the damn thing
12674
     */
12675
12676
0
    if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12677
0
  input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12678
0
                                   systemIdCanonic);
12679
0
    if (input == NULL) {
12680
0
  xmlFreeParserCtxt(ctxt);
12681
0
  if (systemIdCanonic != NULL)
12682
0
      xmlFree(systemIdCanonic);
12683
0
  return(NULL);
12684
0
    }
12685
12686
    /*
12687
     * plug some encoding conversion routines here.
12688
     */
12689
0
    if (xmlPushInput(ctxt, input) < 0) {
12690
0
  xmlFreeParserCtxt(ctxt);
12691
0
  if (systemIdCanonic != NULL)
12692
0
      xmlFree(systemIdCanonic);
12693
0
  return(NULL);
12694
0
    }
12695
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12696
0
  enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12697
0
  xmlSwitchEncoding(ctxt, enc);
12698
0
    }
12699
12700
0
    if (input->filename == NULL)
12701
0
  input->filename = (char *) systemIdCanonic;
12702
0
    else
12703
0
  xmlFree(systemIdCanonic);
12704
0
    input->line = 1;
12705
0
    input->col = 1;
12706
0
    input->base = ctxt->input->cur;
12707
0
    input->cur = ctxt->input->cur;
12708
0
    input->free = NULL;
12709
12710
    /*
12711
     * let's parse that entity knowing it's an external subset.
12712
     */
12713
0
    ctxt->inSubset = 2;
12714
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12715
0
    if (ctxt->myDoc == NULL) {
12716
0
  xmlErrMemory(ctxt, "New Doc failed");
12717
0
  xmlFreeParserCtxt(ctxt);
12718
0
  return(NULL);
12719
0
    }
12720
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12721
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12722
0
                                 ExternalID, SystemID);
12723
0
    xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12724
12725
0
    if (ctxt->myDoc != NULL) {
12726
0
  if (ctxt->wellFormed) {
12727
0
      ret = ctxt->myDoc->extSubset;
12728
0
      ctxt->myDoc->extSubset = NULL;
12729
0
      if (ret != NULL) {
12730
0
    xmlNodePtr tmp;
12731
12732
0
    ret->doc = NULL;
12733
0
    tmp = ret->children;
12734
0
    while (tmp != NULL) {
12735
0
        tmp->doc = NULL;
12736
0
        tmp = tmp->next;
12737
0
    }
12738
0
      }
12739
0
  } else {
12740
0
      ret = NULL;
12741
0
  }
12742
0
        xmlFreeDoc(ctxt->myDoc);
12743
0
        ctxt->myDoc = NULL;
12744
0
    }
12745
0
    xmlFreeParserCtxt(ctxt);
12746
12747
0
    return(ret);
12748
0
}
12749
12750
12751
/**
12752
 * xmlParseDTD:
12753
 * @ExternalID:  a NAME* containing the External ID of the DTD
12754
 * @SystemID:  a NAME* containing the URL to the DTD
12755
 *
12756
 * Load and parse an external subset.
12757
 *
12758
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12759
 */
12760
12761
xmlDtdPtr
12762
0
xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12763
0
    return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12764
0
}
12765
#endif /* LIBXML_VALID_ENABLED */
12766
12767
/************************************************************************
12768
 *                  *
12769
 *    Front ends when parsing an Entity     *
12770
 *                  *
12771
 ************************************************************************/
12772
12773
/**
12774
 * xmlParseCtxtExternalEntity:
12775
 * @ctx:  the existing parsing context
12776
 * @URL:  the URL for the entity to load
12777
 * @ID:  the System ID for the entity to load
12778
 * @lst:  the return value for the set of parsed nodes
12779
 *
12780
 * Parse an external general entity within an existing parsing context
12781
 * An external general parsed entity is well-formed if it matches the
12782
 * production labeled extParsedEnt.
12783
 *
12784
 * [78] extParsedEnt ::= TextDecl? content
12785
 *
12786
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12787
 *    the parser error code otherwise
12788
 */
12789
12790
int
12791
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12792
0
                 const xmlChar *ID, xmlNodePtr *lst) {
12793
0
    void *userData;
12794
12795
0
    if (ctx == NULL) return(-1);
12796
    /*
12797
     * If the user provided their own SAX callbacks, then reuse the
12798
     * userData callback field, otherwise the expected setup in a
12799
     * DOM builder is to have userData == ctxt
12800
     */
12801
0
    if (ctx->userData == ctx)
12802
0
        userData = NULL;
12803
0
    else
12804
0
        userData = ctx->userData;
12805
0
    return xmlParseExternalEntityPrivate(ctx->myDoc, ctx, ctx->sax,
12806
0
                                         userData, ctx->depth + 1,
12807
0
                                         URL, ID, lst);
12808
0
}
12809
12810
/**
12811
 * xmlParseExternalEntityPrivate:
12812
 * @doc:  the document the chunk pertains to
12813
 * @oldctxt:  the previous parser context if available
12814
 * @sax:  the SAX handler block (possibly NULL)
12815
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12816
 * @depth:  Used for loop detection, use 0
12817
 * @URL:  the URL for the entity to load
12818
 * @ID:  the System ID for the entity to load
12819
 * @list:  the return value for the set of parsed nodes
12820
 *
12821
 * Private version of xmlParseExternalEntity()
12822
 *
12823
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12824
 *    the parser error code otherwise
12825
 */
12826
12827
static xmlParserErrors
12828
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12829
                xmlSAXHandlerPtr sax,
12830
          void *user_data, int depth, const xmlChar *URL,
12831
1.72k
          const xmlChar *ID, xmlNodePtr *list) {
12832
1.72k
    xmlParserCtxtPtr ctxt;
12833
1.72k
    xmlDocPtr newDoc;
12834
1.72k
    xmlNodePtr newRoot;
12835
1.72k
    xmlParserErrors ret = XML_ERR_OK;
12836
1.72k
    xmlChar start[4];
12837
1.72k
    xmlCharEncoding enc;
12838
12839
1.72k
    if (((depth > 40) &&
12840
1.72k
  ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12841
1.72k
  (depth > 100)) {
12842
0
  xmlFatalErrMsg(oldctxt, XML_ERR_ENTITY_LOOP,
12843
0
                       "Maximum entity nesting depth exceeded");
12844
0
        return(XML_ERR_ENTITY_LOOP);
12845
0
    }
12846
12847
1.72k
    if (list != NULL)
12848
625
        *list = NULL;
12849
1.72k
    if ((URL == NULL) && (ID == NULL))
12850
7
  return(XML_ERR_INTERNAL_ERROR);
12851
1.72k
    if (doc == NULL)
12852
0
  return(XML_ERR_INTERNAL_ERROR);
12853
12854
1.72k
    ctxt = xmlCreateEntityParserCtxtInternal(sax, user_data, URL, ID, NULL,
12855
1.72k
                                             oldctxt);
12856
1.72k
    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
12857
412
    if (oldctxt != NULL) {
12858
412
        ctxt->nbErrors = oldctxt->nbErrors;
12859
412
        ctxt->nbWarnings = oldctxt->nbWarnings;
12860
412
    }
12861
412
    xmlDetectSAX2(ctxt);
12862
12863
412
    newDoc = xmlNewDoc(BAD_CAST "1.0");
12864
412
    if (newDoc == NULL) {
12865
0
  xmlFreeParserCtxt(ctxt);
12866
0
  return(XML_ERR_INTERNAL_ERROR);
12867
0
    }
12868
412
    newDoc->properties = XML_DOC_INTERNAL;
12869
412
    if (doc) {
12870
412
        newDoc->intSubset = doc->intSubset;
12871
412
        newDoc->extSubset = doc->extSubset;
12872
412
        if (doc->dict) {
12873
324
            newDoc->dict = doc->dict;
12874
324
            xmlDictReference(newDoc->dict);
12875
324
        }
12876
412
        if (doc->URL != NULL) {
12877
265
            newDoc->URL = xmlStrdup(doc->URL);
12878
265
        }
12879
412
    }
12880
412
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12881
412
    if (newRoot == NULL) {
12882
0
  if (sax != NULL)
12883
0
  xmlFreeParserCtxt(ctxt);
12884
0
  newDoc->intSubset = NULL;
12885
0
  newDoc->extSubset = NULL;
12886
0
        xmlFreeDoc(newDoc);
12887
0
  return(XML_ERR_INTERNAL_ERROR);
12888
0
    }
12889
412
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
12890
412
    nodePush(ctxt, newDoc->children);
12891
412
    if (doc == NULL) {
12892
0
        ctxt->myDoc = newDoc;
12893
412
    } else {
12894
412
        ctxt->myDoc = doc;
12895
412
        newRoot->doc = doc;
12896
412
    }
12897
12898
    /*
12899
     * Get the 4 first bytes and decode the charset
12900
     * if enc != XML_CHAR_ENCODING_NONE
12901
     * plug some encoding conversion routines.
12902
     */
12903
412
    GROW;
12904
412
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12905
338
  start[0] = RAW;
12906
338
  start[1] = NXT(1);
12907
338
  start[2] = NXT(2);
12908
338
  start[3] = NXT(3);
12909
338
  enc = xmlDetectCharEncoding(start, 4);
12910
338
  if (enc != XML_CHAR_ENCODING_NONE) {
12911
20
      xmlSwitchEncoding(ctxt, enc);
12912
20
  }
12913
338
    }
12914
12915
    /*
12916
     * Parse a possible text declaration first
12917
     */
12918
412
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12919
6
  xmlParseTextDecl(ctxt);
12920
        /*
12921
         * An XML-1.0 document can't reference an entity not XML-1.0
12922
         */
12923
6
        if ((xmlStrEqual(oldctxt->version, BAD_CAST "1.0")) &&
12924
6
            (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12925
0
            xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12926
0
                           "Version mismatch between document and entity\n");
12927
0
        }
12928
6
    }
12929
12930
412
    ctxt->instate = XML_PARSER_CONTENT;
12931
412
    ctxt->depth = depth;
12932
412
    if (oldctxt != NULL) {
12933
412
  ctxt->_private = oldctxt->_private;
12934
412
  ctxt->loadsubset = oldctxt->loadsubset;
12935
412
  ctxt->validate = oldctxt->validate;
12936
412
  ctxt->valid = oldctxt->valid;
12937
412
  ctxt->replaceEntities = oldctxt->replaceEntities;
12938
412
        if (oldctxt->validate) {
12939
90
            ctxt->vctxt.error = oldctxt->vctxt.error;
12940
90
            ctxt->vctxt.warning = oldctxt->vctxt.warning;
12941
90
            ctxt->vctxt.userData = oldctxt->vctxt.userData;
12942
90
            ctxt->vctxt.flags = oldctxt->vctxt.flags;
12943
90
        }
12944
412
  ctxt->external = oldctxt->external;
12945
412
        if (ctxt->dict) xmlDictFree(ctxt->dict);
12946
412
        ctxt->dict = oldctxt->dict;
12947
412
        ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12948
412
        ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12949
412
        ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12950
412
        ctxt->dictNames = oldctxt->dictNames;
12951
412
        ctxt->attsDefault = oldctxt->attsDefault;
12952
412
        ctxt->attsSpecial = oldctxt->attsSpecial;
12953
412
        ctxt->linenumbers = oldctxt->linenumbers;
12954
412
  ctxt->record_info = oldctxt->record_info;
12955
412
  ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
12956
412
  ctxt->node_seq.length = oldctxt->node_seq.length;
12957
412
  ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
12958
412
    } else {
12959
  /*
12960
   * Doing validity checking on chunk without context
12961
   * doesn't make sense
12962
   */
12963
0
  ctxt->_private = NULL;
12964
0
  ctxt->validate = 0;
12965
0
  ctxt->external = 2;
12966
0
  ctxt->loadsubset = 0;
12967
0
    }
12968
12969
412
    xmlParseContent(ctxt);
12970
12971
412
    if ((RAW == '<') && (NXT(1) == '/')) {
12972
24
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12973
388
    } else if (RAW != 0) {
12974
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12975
0
    }
12976
412
    if (ctxt->node != newDoc->children) {
12977
57
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12978
57
    }
12979
12980
412
    if (!ctxt->wellFormed) {
12981
262
  ret = (xmlParserErrors)ctxt->errNo;
12982
262
        if (oldctxt != NULL) {
12983
262
            oldctxt->errNo = ctxt->errNo;
12984
262
            oldctxt->wellFormed = 0;
12985
262
            xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12986
262
        }
12987
262
    } else {
12988
150
  if (list != NULL) {
12989
129
      xmlNodePtr cur;
12990
12991
      /*
12992
       * Return the newly created nodeset after unlinking it from
12993
       * they pseudo parent.
12994
       */
12995
129
      cur = newDoc->children->children;
12996
129
      *list = cur;
12997
272
      while (cur != NULL) {
12998
143
    cur->parent = NULL;
12999
143
    cur = cur->next;
13000
143
      }
13001
129
            newDoc->children->children = NULL;
13002
129
  }
13003
150
  ret = XML_ERR_OK;
13004
150
    }
13005
13006
    /*
13007
     * Also record the size of the entity parsed
13008
     */
13009
412
    if (ctxt->input != NULL && oldctxt != NULL) {
13010
412
        unsigned long consumed = ctxt->input->consumed;
13011
13012
412
        xmlSaturatedAddSizeT(&consumed, ctxt->input->cur - ctxt->input->base);
13013
13014
412
        xmlSaturatedAdd(&oldctxt->sizeentities, consumed);
13015
412
        xmlSaturatedAdd(&oldctxt->sizeentities, ctxt->sizeentities);
13016
13017
412
        xmlSaturatedAdd(&oldctxt->sizeentcopy, consumed);
13018
412
        xmlSaturatedAdd(&oldctxt->sizeentcopy, ctxt->sizeentcopy);
13019
412
    }
13020
13021
412
    if (oldctxt != NULL) {
13022
412
        ctxt->dict = NULL;
13023
412
        ctxt->attsDefault = NULL;
13024
412
        ctxt->attsSpecial = NULL;
13025
412
        oldctxt->nbErrors = ctxt->nbErrors;
13026
412
        oldctxt->nbWarnings = ctxt->nbWarnings;
13027
412
        oldctxt->validate = ctxt->validate;
13028
412
        oldctxt->valid = ctxt->valid;
13029
412
        oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13030
412
        oldctxt->node_seq.length = ctxt->node_seq.length;
13031
412
        oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
13032
412
    }
13033
412
    ctxt->node_seq.maximum = 0;
13034
412
    ctxt->node_seq.length = 0;
13035
412
    ctxt->node_seq.buffer = NULL;
13036
412
    xmlFreeParserCtxt(ctxt);
13037
412
    newDoc->intSubset = NULL;
13038
412
    newDoc->extSubset = NULL;
13039
412
    xmlFreeDoc(newDoc);
13040
13041
412
    return(ret);
13042
412
}
13043
13044
#ifdef LIBXML_SAX1_ENABLED
13045
/**
13046
 * xmlParseExternalEntity:
13047
 * @doc:  the document the chunk pertains to
13048
 * @sax:  the SAX handler block (possibly NULL)
13049
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13050
 * @depth:  Used for loop detection, use 0
13051
 * @URL:  the URL for the entity to load
13052
 * @ID:  the System ID for the entity to load
13053
 * @lst:  the return value for the set of parsed nodes
13054
 *
13055
 * Parse an external general entity
13056
 * An external general parsed entity is well-formed if it matches the
13057
 * production labeled extParsedEnt.
13058
 *
13059
 * [78] extParsedEnt ::= TextDecl? content
13060
 *
13061
 * Returns 0 if the entity is well formed, -1 in case of args problem and
13062
 *    the parser error code otherwise
13063
 */
13064
13065
int
13066
xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
13067
0
    int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
13068
0
    return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
13069
0
                           ID, lst));
13070
0
}
13071
13072
/**
13073
 * xmlParseBalancedChunkMemory:
13074
 * @doc:  the document the chunk pertains to (must not be NULL)
13075
 * @sax:  the SAX handler block (possibly NULL)
13076
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13077
 * @depth:  Used for loop detection, use 0
13078
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13079
 * @lst:  the return value for the set of parsed nodes
13080
 *
13081
 * Parse a well-balanced chunk of an XML document
13082
 * called by the parser
13083
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13084
 * the content production in the XML grammar:
13085
 *
13086
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13087
 *
13088
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13089
 *    the parser error code otherwise
13090
 */
13091
13092
int
13093
xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13094
0
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
13095
0
    return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13096
0
                                                depth, string, lst, 0 );
13097
0
}
13098
#endif /* LIBXML_SAX1_ENABLED */
13099
13100
/**
13101
 * xmlParseBalancedChunkMemoryInternal:
13102
 * @oldctxt:  the existing parsing context
13103
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13104
 * @user_data:  the user data field for the parser context
13105
 * @lst:  the return value for the set of parsed nodes
13106
 *
13107
 *
13108
 * Parse a well-balanced chunk of an XML document
13109
 * called by the parser
13110
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13111
 * the content production in the XML grammar:
13112
 *
13113
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13114
 *
13115
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13116
 * error code otherwise
13117
 *
13118
 * In case recover is set to 1, the nodelist will not be empty even if
13119
 * the parsed chunk is not well balanced.
13120
 */
13121
static xmlParserErrors
13122
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13123
5.27k
  const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13124
5.27k
    xmlParserCtxtPtr ctxt;
13125
5.27k
    xmlDocPtr newDoc = NULL;
13126
5.27k
    xmlNodePtr newRoot;
13127
5.27k
    xmlSAXHandlerPtr oldsax = NULL;
13128
5.27k
    xmlNodePtr content = NULL;
13129
5.27k
    xmlNodePtr last = NULL;
13130
5.27k
    int size;
13131
5.27k
    xmlParserErrors ret = XML_ERR_OK;
13132
5.27k
#ifdef SAX2
13133
5.27k
    int i;
13134
5.27k
#endif
13135
13136
5.27k
    if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13137
5.27k
        (oldctxt->depth >  100)) {
13138
6
  xmlFatalErrMsg(oldctxt, XML_ERR_ENTITY_LOOP,
13139
6
                       "Maximum entity nesting depth exceeded");
13140
6
  return(XML_ERR_ENTITY_LOOP);
13141
6
    }
13142
13143
13144
5.26k
    if (lst != NULL)
13145
5.26k
        *lst = NULL;
13146
5.26k
    if (string == NULL)
13147
0
        return(XML_ERR_INTERNAL_ERROR);
13148
13149
5.26k
    size = xmlStrlen(string);
13150
13151
5.26k
    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13152
5.26k
    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13153
5.26k
    ctxt->nbErrors = oldctxt->nbErrors;
13154
5.26k
    ctxt->nbWarnings = oldctxt->nbWarnings;
13155
5.26k
    if (user_data != NULL)
13156
0
  ctxt->userData = user_data;
13157
5.26k
    else
13158
5.26k
  ctxt->userData = ctxt;
13159
5.26k
    if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13160
5.26k
    ctxt->dict = oldctxt->dict;
13161
5.26k
    ctxt->input_id = oldctxt->input_id;
13162
5.26k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13163
5.26k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13164
5.26k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13165
13166
5.26k
#ifdef SAX2
13167
    /* propagate namespaces down the entity */
13168
5.26k
    for (i = 0;i < oldctxt->nsNr;i += 2) {
13169
0
        nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13170
0
    }
13171
5.26k
#endif
13172
13173
5.26k
    oldsax = ctxt->sax;
13174
5.26k
    ctxt->sax = oldctxt->sax;
13175
5.26k
    xmlDetectSAX2(ctxt);
13176
5.26k
    ctxt->replaceEntities = oldctxt->replaceEntities;
13177
5.26k
    ctxt->options = oldctxt->options;
13178
13179
5.26k
    ctxt->_private = oldctxt->_private;
13180
5.26k
    if (oldctxt->myDoc == NULL) {
13181
0
  newDoc = xmlNewDoc(BAD_CAST "1.0");
13182
0
  if (newDoc == NULL) {
13183
0
      ctxt->sax = oldsax;
13184
0
      ctxt->dict = NULL;
13185
0
      xmlFreeParserCtxt(ctxt);
13186
0
      return(XML_ERR_INTERNAL_ERROR);
13187
0
  }
13188
0
  newDoc->properties = XML_DOC_INTERNAL;
13189
0
  newDoc->dict = ctxt->dict;
13190
0
  xmlDictReference(newDoc->dict);
13191
0
  ctxt->myDoc = newDoc;
13192
5.26k
    } else {
13193
5.26k
  ctxt->myDoc = oldctxt->myDoc;
13194
5.26k
        content = ctxt->myDoc->children;
13195
5.26k
  last = ctxt->myDoc->last;
13196
5.26k
    }
13197
5.26k
    newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13198
5.26k
    if (newRoot == NULL) {
13199
0
  ctxt->sax = oldsax;
13200
0
  ctxt->dict = NULL;
13201
0
  xmlFreeParserCtxt(ctxt);
13202
0
  if (newDoc != NULL) {
13203
0
      xmlFreeDoc(newDoc);
13204
0
  }
13205
0
  return(XML_ERR_INTERNAL_ERROR);
13206
0
    }
13207
5.26k
    ctxt->myDoc->children = NULL;
13208
5.26k
    ctxt->myDoc->last = NULL;
13209
5.26k
    xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13210
5.26k
    nodePush(ctxt, ctxt->myDoc->children);
13211
5.26k
    ctxt->instate = XML_PARSER_CONTENT;
13212
5.26k
    ctxt->depth = oldctxt->depth;
13213
13214
5.26k
    ctxt->validate = 0;
13215
5.26k
    ctxt->loadsubset = oldctxt->loadsubset;
13216
5.26k
    if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13217
  /*
13218
   * ID/IDREF registration will be done in xmlValidateElement below
13219
   */
13220
5.22k
  ctxt->loadsubset |= XML_SKIP_IDS;
13221
5.22k
    }
13222
5.26k
    ctxt->dictNames = oldctxt->dictNames;
13223
5.26k
    ctxt->attsDefault = oldctxt->attsDefault;
13224
5.26k
    ctxt->attsSpecial = oldctxt->attsSpecial;
13225
13226
5.26k
    xmlParseContent(ctxt);
13227
5.26k
    if ((RAW == '<') && (NXT(1) == '/')) {
13228
3
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13229
5.25k
    } else if (RAW != 0) {
13230
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13231
0
    }
13232
5.26k
    if (ctxt->node != ctxt->myDoc->children) {
13233
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13234
0
    }
13235
13236
5.26k
    if (!ctxt->wellFormed) {
13237
420
  ret = (xmlParserErrors)ctxt->errNo;
13238
420
        oldctxt->errNo = ctxt->errNo;
13239
420
        oldctxt->wellFormed = 0;
13240
420
        xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13241
4.84k
    } else {
13242
4.84k
        ret = XML_ERR_OK;
13243
4.84k
    }
13244
13245
5.26k
    if ((lst != NULL) && (ret == XML_ERR_OK)) {
13246
4.84k
  xmlNodePtr cur;
13247
13248
  /*
13249
   * Return the newly created nodeset after unlinking it from
13250
   * they pseudo parent.
13251
   */
13252
4.84k
  cur = ctxt->myDoc->children->children;
13253
4.84k
  *lst = cur;
13254
9.78k
  while (cur != NULL) {
13255
4.94k
#ifdef LIBXML_VALID_ENABLED
13256
4.94k
      if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13257
4.94k
    (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13258
4.94k
    (cur->type == XML_ELEMENT_NODE)) {
13259
0
    oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13260
0
      oldctxt->myDoc, cur);
13261
0
      }
13262
4.94k
#endif /* LIBXML_VALID_ENABLED */
13263
4.94k
      cur->parent = NULL;
13264
4.94k
      cur = cur->next;
13265
4.94k
  }
13266
4.84k
  ctxt->myDoc->children->children = NULL;
13267
4.84k
    }
13268
5.26k
    if (ctxt->myDoc != NULL) {
13269
5.26k
  xmlFreeNode(ctxt->myDoc->children);
13270
5.26k
        ctxt->myDoc->children = content;
13271
5.26k
        ctxt->myDoc->last = last;
13272
5.26k
    }
13273
13274
    /*
13275
     * Also record the size of the entity parsed
13276
     */
13277
5.26k
    if (ctxt->input != NULL && oldctxt != NULL) {
13278
5.26k
        unsigned long consumed = ctxt->input->consumed;
13279
13280
5.26k
        xmlSaturatedAddSizeT(&consumed, ctxt->input->cur - ctxt->input->base);
13281
13282
5.26k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, consumed);
13283
5.26k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, ctxt->sizeentcopy);
13284
5.26k
    }
13285
13286
5.26k
    oldctxt->nbErrors = ctxt->nbErrors;
13287
5.26k
    oldctxt->nbWarnings = ctxt->nbWarnings;
13288
5.26k
    ctxt->sax = oldsax;
13289
5.26k
    ctxt->dict = NULL;
13290
5.26k
    ctxt->attsDefault = NULL;
13291
5.26k
    ctxt->attsSpecial = NULL;
13292
5.26k
    xmlFreeParserCtxt(ctxt);
13293
5.26k
    if (newDoc != NULL) {
13294
0
  xmlFreeDoc(newDoc);
13295
0
    }
13296
13297
5.26k
    return(ret);
13298
5.26k
}
13299
13300
/**
13301
 * xmlParseInNodeContext:
13302
 * @node:  the context node
13303
 * @data:  the input string
13304
 * @datalen:  the input string length in bytes
13305
 * @options:  a combination of xmlParserOption
13306
 * @lst:  the return value for the set of parsed nodes
13307
 *
13308
 * Parse a well-balanced chunk of an XML document
13309
 * within the context (DTD, namespaces, etc ...) of the given node.
13310
 *
13311
 * The allowed sequence for the data is a Well Balanced Chunk defined by
13312
 * the content production in the XML grammar:
13313
 *
13314
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13315
 *
13316
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13317
 * error code otherwise
13318
 */
13319
xmlParserErrors
13320
xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13321
0
                      int options, xmlNodePtr *lst) {
13322
0
#ifdef SAX2
13323
0
    xmlParserCtxtPtr ctxt;
13324
0
    xmlDocPtr doc = NULL;
13325
0
    xmlNodePtr fake, cur;
13326
0
    int nsnr = 0;
13327
13328
0
    xmlParserErrors ret = XML_ERR_OK;
13329
13330
    /*
13331
     * check all input parameters, grab the document
13332
     */
13333
0
    if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13334
0
        return(XML_ERR_INTERNAL_ERROR);
13335
0
    switch (node->type) {
13336
0
        case XML_ELEMENT_NODE:
13337
0
        case XML_ATTRIBUTE_NODE:
13338
0
        case XML_TEXT_NODE:
13339
0
        case XML_CDATA_SECTION_NODE:
13340
0
        case XML_ENTITY_REF_NODE:
13341
0
        case XML_PI_NODE:
13342
0
        case XML_COMMENT_NODE:
13343
0
        case XML_DOCUMENT_NODE:
13344
0
        case XML_HTML_DOCUMENT_NODE:
13345
0
      break;
13346
0
  default:
13347
0
      return(XML_ERR_INTERNAL_ERROR);
13348
13349
0
    }
13350
0
    while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13351
0
           (node->type != XML_DOCUMENT_NODE) &&
13352
0
     (node->type != XML_HTML_DOCUMENT_NODE))
13353
0
  node = node->parent;
13354
0
    if (node == NULL)
13355
0
  return(XML_ERR_INTERNAL_ERROR);
13356
0
    if (node->type == XML_ELEMENT_NODE)
13357
0
  doc = node->doc;
13358
0
    else
13359
0
        doc = (xmlDocPtr) node;
13360
0
    if (doc == NULL)
13361
0
  return(XML_ERR_INTERNAL_ERROR);
13362
13363
    /*
13364
     * allocate a context and set-up everything not related to the
13365
     * node position in the tree
13366
     */
13367
0
    if (doc->type == XML_DOCUMENT_NODE)
13368
0
  ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13369
0
#ifdef LIBXML_HTML_ENABLED
13370
0
    else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13371
0
  ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13372
        /*
13373
         * When parsing in context, it makes no sense to add implied
13374
         * elements like html/body/etc...
13375
         */
13376
0
        options |= HTML_PARSE_NOIMPLIED;
13377
0
    }
13378
0
#endif
13379
0
    else
13380
0
        return(XML_ERR_INTERNAL_ERROR);
13381
13382
0
    if (ctxt == NULL)
13383
0
        return(XML_ERR_NO_MEMORY);
13384
13385
    /*
13386
     * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13387
     * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13388
     * we must wait until the last moment to free the original one.
13389
     */
13390
0
    if (doc->dict != NULL) {
13391
0
        if (ctxt->dict != NULL)
13392
0
      xmlDictFree(ctxt->dict);
13393
0
  ctxt->dict = doc->dict;
13394
0
    } else
13395
0
        options |= XML_PARSE_NODICT;
13396
13397
0
    if (doc->encoding != NULL) {
13398
0
        xmlCharEncodingHandlerPtr hdlr;
13399
13400
0
        if (ctxt->encoding != NULL)
13401
0
      xmlFree((xmlChar *) ctxt->encoding);
13402
0
        ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13403
13404
0
        hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
13405
0
        if (hdlr != NULL) {
13406
0
            xmlSwitchToEncoding(ctxt, hdlr);
13407
0
  } else {
13408
0
            return(XML_ERR_UNSUPPORTED_ENCODING);
13409
0
        }
13410
0
    }
13411
13412
0
    xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13413
0
    xmlDetectSAX2(ctxt);
13414
0
    ctxt->myDoc = doc;
13415
    /* parsing in context, i.e. as within existing content */
13416
0
    ctxt->input_id = 2;
13417
0
    ctxt->instate = XML_PARSER_CONTENT;
13418
13419
0
    fake = xmlNewDocComment(node->doc, NULL);
13420
0
    if (fake == NULL) {
13421
0
        xmlFreeParserCtxt(ctxt);
13422
0
  return(XML_ERR_NO_MEMORY);
13423
0
    }
13424
0
    xmlAddChild(node, fake);
13425
13426
0
    if (node->type == XML_ELEMENT_NODE) {
13427
0
  nodePush(ctxt, node);
13428
  /*
13429
   * initialize the SAX2 namespaces stack
13430
   */
13431
0
  cur = node;
13432
0
  while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13433
0
      xmlNsPtr ns = cur->nsDef;
13434
0
      const xmlChar *iprefix, *ihref;
13435
13436
0
      while (ns != NULL) {
13437
0
    if (ctxt->dict) {
13438
0
        iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13439
0
        ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13440
0
    } else {
13441
0
        iprefix = ns->prefix;
13442
0
        ihref = ns->href;
13443
0
    }
13444
13445
0
          if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13446
0
        nsPush(ctxt, iprefix, ihref);
13447
0
        nsnr++;
13448
0
    }
13449
0
    ns = ns->next;
13450
0
      }
13451
0
      cur = cur->parent;
13452
0
  }
13453
0
    }
13454
13455
0
    if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13456
  /*
13457
   * ID/IDREF registration will be done in xmlValidateElement below
13458
   */
13459
0
  ctxt->loadsubset |= XML_SKIP_IDS;
13460
0
    }
13461
13462
0
#ifdef LIBXML_HTML_ENABLED
13463
0
    if (doc->type == XML_HTML_DOCUMENT_NODE)
13464
0
        __htmlParseContent(ctxt);
13465
0
    else
13466
0
#endif
13467
0
  xmlParseContent(ctxt);
13468
13469
0
    nsPop(ctxt, nsnr);
13470
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13471
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13472
0
    } else if (RAW != 0) {
13473
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13474
0
    }
13475
0
    if ((ctxt->node != NULL) && (ctxt->node != node)) {
13476
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13477
0
  ctxt->wellFormed = 0;
13478
0
    }
13479
13480
0
    if (!ctxt->wellFormed) {
13481
0
        if (ctxt->errNo == 0)
13482
0
      ret = XML_ERR_INTERNAL_ERROR;
13483
0
  else
13484
0
      ret = (xmlParserErrors)ctxt->errNo;
13485
0
    } else {
13486
0
        ret = XML_ERR_OK;
13487
0
    }
13488
13489
    /*
13490
     * Return the newly created nodeset after unlinking it from
13491
     * the pseudo sibling.
13492
     */
13493
13494
0
    cur = fake->next;
13495
0
    fake->next = NULL;
13496
0
    node->last = fake;
13497
13498
0
    if (cur != NULL) {
13499
0
  cur->prev = NULL;
13500
0
    }
13501
13502
0
    *lst = cur;
13503
13504
0
    while (cur != NULL) {
13505
0
  cur->parent = NULL;
13506
0
  cur = cur->next;
13507
0
    }
13508
13509
0
    xmlUnlinkNode(fake);
13510
0
    xmlFreeNode(fake);
13511
13512
13513
0
    if (ret != XML_ERR_OK) {
13514
0
        xmlFreeNodeList(*lst);
13515
0
  *lst = NULL;
13516
0
    }
13517
13518
0
    if (doc->dict != NULL)
13519
0
        ctxt->dict = NULL;
13520
0
    xmlFreeParserCtxt(ctxt);
13521
13522
0
    return(ret);
13523
#else /* !SAX2 */
13524
    return(XML_ERR_INTERNAL_ERROR);
13525
#endif
13526
0
}
13527
13528
#ifdef LIBXML_SAX1_ENABLED
13529
/**
13530
 * xmlParseBalancedChunkMemoryRecover:
13531
 * @doc:  the document the chunk pertains to (must not be NULL)
13532
 * @sax:  the SAX handler block (possibly NULL)
13533
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13534
 * @depth:  Used for loop detection, use 0
13535
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13536
 * @lst:  the return value for the set of parsed nodes
13537
 * @recover: return nodes even if the data is broken (use 0)
13538
 *
13539
 *
13540
 * Parse a well-balanced chunk of an XML document
13541
 * called by the parser
13542
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13543
 * the content production in the XML grammar:
13544
 *
13545
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13546
 *
13547
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13548
 *    the parser error code otherwise
13549
 *
13550
 * In case recover is set to 1, the nodelist will not be empty even if
13551
 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13552
 * some extent.
13553
 */
13554
int
13555
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13556
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13557
0
     int recover) {
13558
0
    xmlParserCtxtPtr ctxt;
13559
0
    xmlDocPtr newDoc;
13560
0
    xmlSAXHandlerPtr oldsax = NULL;
13561
0
    xmlNodePtr content, newRoot;
13562
0
    int size;
13563
0
    int ret = 0;
13564
13565
0
    if (depth > 40) {
13566
0
  return(XML_ERR_ENTITY_LOOP);
13567
0
    }
13568
13569
13570
0
    if (lst != NULL)
13571
0
        *lst = NULL;
13572
0
    if (string == NULL)
13573
0
        return(-1);
13574
13575
0
    size = xmlStrlen(string);
13576
13577
0
    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13578
0
    if (ctxt == NULL) return(-1);
13579
0
    ctxt->userData = ctxt;
13580
0
    if (sax != NULL) {
13581
0
  oldsax = ctxt->sax;
13582
0
        ctxt->sax = sax;
13583
0
  if (user_data != NULL)
13584
0
      ctxt->userData = user_data;
13585
0
    }
13586
0
    newDoc = xmlNewDoc(BAD_CAST "1.0");
13587
0
    if (newDoc == NULL) {
13588
0
  xmlFreeParserCtxt(ctxt);
13589
0
  return(-1);
13590
0
    }
13591
0
    newDoc->properties = XML_DOC_INTERNAL;
13592
0
    if ((doc != NULL) && (doc->dict != NULL)) {
13593
0
        xmlDictFree(ctxt->dict);
13594
0
  ctxt->dict = doc->dict;
13595
0
  xmlDictReference(ctxt->dict);
13596
0
  ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13597
0
  ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13598
0
  ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13599
0
  ctxt->dictNames = 1;
13600
0
    } else {
13601
0
  xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13602
0
    }
13603
    /* doc == NULL is only supported for historic reasons */
13604
0
    if (doc != NULL) {
13605
0
  newDoc->intSubset = doc->intSubset;
13606
0
  newDoc->extSubset = doc->extSubset;
13607
0
    }
13608
0
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13609
0
    if (newRoot == NULL) {
13610
0
  if (sax != NULL)
13611
0
      ctxt->sax = oldsax;
13612
0
  xmlFreeParserCtxt(ctxt);
13613
0
  newDoc->intSubset = NULL;
13614
0
  newDoc->extSubset = NULL;
13615
0
        xmlFreeDoc(newDoc);
13616
0
  return(-1);
13617
0
    }
13618
0
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
13619
0
    nodePush(ctxt, newRoot);
13620
    /* doc == NULL is only supported for historic reasons */
13621
0
    if (doc == NULL) {
13622
0
  ctxt->myDoc = newDoc;
13623
0
    } else {
13624
0
  ctxt->myDoc = newDoc;
13625
0
  newDoc->children->doc = doc;
13626
  /* Ensure that doc has XML spec namespace */
13627
0
  xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13628
0
  newDoc->oldNs = doc->oldNs;
13629
0
    }
13630
0
    ctxt->instate = XML_PARSER_CONTENT;
13631
0
    ctxt->input_id = 2;
13632
0
    ctxt->depth = depth;
13633
13634
    /*
13635
     * Doing validity checking on chunk doesn't make sense
13636
     */
13637
0
    ctxt->validate = 0;
13638
0
    ctxt->loadsubset = 0;
13639
0
    xmlDetectSAX2(ctxt);
13640
13641
0
    if ( doc != NULL ){
13642
0
        content = doc->children;
13643
0
        doc->children = NULL;
13644
0
        xmlParseContent(ctxt);
13645
0
        doc->children = content;
13646
0
    }
13647
0
    else {
13648
0
        xmlParseContent(ctxt);
13649
0
    }
13650
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13651
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13652
0
    } else if (RAW != 0) {
13653
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13654
0
    }
13655
0
    if (ctxt->node != newDoc->children) {
13656
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13657
0
    }
13658
13659
0
    if (!ctxt->wellFormed) {
13660
0
        if (ctxt->errNo == 0)
13661
0
      ret = 1;
13662
0
  else
13663
0
      ret = ctxt->errNo;
13664
0
    } else {
13665
0
      ret = 0;
13666
0
    }
13667
13668
0
    if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13669
0
  xmlNodePtr cur;
13670
13671
  /*
13672
   * Return the newly created nodeset after unlinking it from
13673
   * they pseudo parent.
13674
   */
13675
0
  cur = newDoc->children->children;
13676
0
  *lst = cur;
13677
0
  while (cur != NULL) {
13678
0
      xmlSetTreeDoc(cur, doc);
13679
0
      cur->parent = NULL;
13680
0
      cur = cur->next;
13681
0
  }
13682
0
  newDoc->children->children = NULL;
13683
0
    }
13684
13685
0
    if (sax != NULL)
13686
0
  ctxt->sax = oldsax;
13687
0
    xmlFreeParserCtxt(ctxt);
13688
0
    newDoc->intSubset = NULL;
13689
0
    newDoc->extSubset = NULL;
13690
    /* This leaks the namespace list if doc == NULL */
13691
0
    newDoc->oldNs = NULL;
13692
0
    xmlFreeDoc(newDoc);
13693
13694
0
    return(ret);
13695
0
}
13696
13697
/**
13698
 * xmlSAXParseEntity:
13699
 * @sax:  the SAX handler block
13700
 * @filename:  the filename
13701
 *
13702
 * DEPRECATED: Don't use.
13703
 *
13704
 * parse an XML external entity out of context and build a tree.
13705
 * It use the given SAX function block to handle the parsing callback.
13706
 * If sax is NULL, fallback to the default DOM tree building routines.
13707
 *
13708
 * [78] extParsedEnt ::= TextDecl? content
13709
 *
13710
 * This correspond to a "Well Balanced" chunk
13711
 *
13712
 * Returns the resulting document tree
13713
 */
13714
13715
xmlDocPtr
13716
0
xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13717
0
    xmlDocPtr ret;
13718
0
    xmlParserCtxtPtr ctxt;
13719
13720
0
    ctxt = xmlCreateFileParserCtxt(filename);
13721
0
    if (ctxt == NULL) {
13722
0
  return(NULL);
13723
0
    }
13724
0
    if (sax != NULL) {
13725
0
  if (ctxt->sax != NULL)
13726
0
      xmlFree(ctxt->sax);
13727
0
        ctxt->sax = sax;
13728
0
        ctxt->userData = NULL;
13729
0
    }
13730
13731
0
    xmlParseExtParsedEnt(ctxt);
13732
13733
0
    if (ctxt->wellFormed)
13734
0
  ret = ctxt->myDoc;
13735
0
    else {
13736
0
        ret = NULL;
13737
0
        xmlFreeDoc(ctxt->myDoc);
13738
0
        ctxt->myDoc = NULL;
13739
0
    }
13740
0
    if (sax != NULL)
13741
0
        ctxt->sax = NULL;
13742
0
    xmlFreeParserCtxt(ctxt);
13743
13744
0
    return(ret);
13745
0
}
13746
13747
/**
13748
 * xmlParseEntity:
13749
 * @filename:  the filename
13750
 *
13751
 * parse an XML external entity out of context and build a tree.
13752
 *
13753
 * [78] extParsedEnt ::= TextDecl? content
13754
 *
13755
 * This correspond to a "Well Balanced" chunk
13756
 *
13757
 * Returns the resulting document tree
13758
 */
13759
13760
xmlDocPtr
13761
0
xmlParseEntity(const char *filename) {
13762
0
    return(xmlSAXParseEntity(NULL, filename));
13763
0
}
13764
#endif /* LIBXML_SAX1_ENABLED */
13765
13766
/**
13767
 * xmlCreateEntityParserCtxtInternal:
13768
 * @URL:  the entity URL
13769
 * @ID:  the entity PUBLIC ID
13770
 * @base:  a possible base for the target URI
13771
 * @pctx:  parser context used to set options on new context
13772
 *
13773
 * Create a parser context for an external entity
13774
 * Automatic support for ZLIB/Compress compressed document is provided
13775
 * by default if found at compile-time.
13776
 *
13777
 * Returns the new parser context or NULL
13778
 */
13779
static xmlParserCtxtPtr
13780
xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
13781
        const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
13782
1.72k
        xmlParserCtxtPtr pctx) {
13783
1.72k
    xmlParserCtxtPtr ctxt;
13784
1.72k
    xmlParserInputPtr inputStream;
13785
1.72k
    char *directory = NULL;
13786
1.72k
    xmlChar *uri;
13787
13788
1.72k
    ctxt = xmlNewSAXParserCtxt(sax, userData);
13789
1.72k
    if (ctxt == NULL) {
13790
0
  return(NULL);
13791
0
    }
13792
13793
1.72k
    if (pctx != NULL) {
13794
1.72k
        ctxt->options = pctx->options;
13795
1.72k
        ctxt->_private = pctx->_private;
13796
1.72k
  ctxt->input_id = pctx->input_id;
13797
1.72k
    }
13798
13799
    /* Don't read from stdin. */
13800
1.72k
    if (xmlStrcmp(URL, BAD_CAST "-") == 0)
13801
0
        URL = BAD_CAST "./-";
13802
13803
1.72k
    uri = xmlBuildURI(URL, base);
13804
13805
1.72k
    if (uri == NULL) {
13806
25
  inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13807
25
  if (inputStream == NULL) {
13808
25
      xmlFreeParserCtxt(ctxt);
13809
25
      return(NULL);
13810
25
  }
13811
13812
0
  inputPush(ctxt, inputStream);
13813
13814
0
  if ((ctxt->directory == NULL) && (directory == NULL))
13815
0
      directory = xmlParserGetDirectory((char *)URL);
13816
0
  if ((ctxt->directory == NULL) && (directory != NULL))
13817
0
      ctxt->directory = directory;
13818
1.69k
    } else {
13819
1.69k
  inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13820
1.69k
  if (inputStream == NULL) {
13821
1.28k
      xmlFree(uri);
13822
1.28k
      xmlFreeParserCtxt(ctxt);
13823
1.28k
      return(NULL);
13824
1.28k
  }
13825
13826
412
  inputPush(ctxt, inputStream);
13827
13828
412
  if ((ctxt->directory == NULL) && (directory == NULL))
13829
412
      directory = xmlParserGetDirectory((char *)uri);
13830
412
  if ((ctxt->directory == NULL) && (directory != NULL))
13831
412
      ctxt->directory = directory;
13832
412
  xmlFree(uri);
13833
412
    }
13834
412
    return(ctxt);
13835
1.72k
}
13836
13837
/**
13838
 * xmlCreateEntityParserCtxt:
13839
 * @URL:  the entity URL
13840
 * @ID:  the entity PUBLIC ID
13841
 * @base:  a possible base for the target URI
13842
 *
13843
 * Create a parser context for an external entity
13844
 * Automatic support for ZLIB/Compress compressed document is provided
13845
 * by default if found at compile-time.
13846
 *
13847
 * Returns the new parser context or NULL
13848
 */
13849
xmlParserCtxtPtr
13850
xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
13851
0
                    const xmlChar *base) {
13852
0
    return xmlCreateEntityParserCtxtInternal(NULL, NULL, URL, ID, base, NULL);
13853
13854
0
}
13855
13856
/************************************************************************
13857
 *                  *
13858
 *    Front ends when parsing from a file     *
13859
 *                  *
13860
 ************************************************************************/
13861
13862
/**
13863
 * xmlCreateURLParserCtxt:
13864
 * @filename:  the filename or URL
13865
 * @options:  a combination of xmlParserOption
13866
 *
13867
 * Create a parser context for a file or URL content.
13868
 * Automatic support for ZLIB/Compress compressed document is provided
13869
 * by default if found at compile-time and for file accesses
13870
 *
13871
 * Returns the new parser context or NULL
13872
 */
13873
xmlParserCtxtPtr
13874
xmlCreateURLParserCtxt(const char *filename, int options)
13875
0
{
13876
0
    xmlParserCtxtPtr ctxt;
13877
0
    xmlParserInputPtr inputStream;
13878
0
    char *directory = NULL;
13879
13880
0
    ctxt = xmlNewParserCtxt();
13881
0
    if (ctxt == NULL) {
13882
0
  xmlErrMemory(NULL, "cannot allocate parser context");
13883
0
  return(NULL);
13884
0
    }
13885
13886
0
    if (options)
13887
0
  xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13888
0
    ctxt->linenumbers = 1;
13889
13890
0
    inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
13891
0
    if (inputStream == NULL) {
13892
0
  xmlFreeParserCtxt(ctxt);
13893
0
  return(NULL);
13894
0
    }
13895
13896
0
    inputPush(ctxt, inputStream);
13897
0
    if ((ctxt->directory == NULL) && (directory == NULL))
13898
0
        directory = xmlParserGetDirectory(filename);
13899
0
    if ((ctxt->directory == NULL) && (directory != NULL))
13900
0
        ctxt->directory = directory;
13901
13902
0
    return(ctxt);
13903
0
}
13904
13905
/**
13906
 * xmlCreateFileParserCtxt:
13907
 * @filename:  the filename
13908
 *
13909
 * Create a parser context for a file content.
13910
 * Automatic support for ZLIB/Compress compressed document is provided
13911
 * by default if found at compile-time.
13912
 *
13913
 * Returns the new parser context or NULL
13914
 */
13915
xmlParserCtxtPtr
13916
xmlCreateFileParserCtxt(const char *filename)
13917
0
{
13918
0
    return(xmlCreateURLParserCtxt(filename, 0));
13919
0
}
13920
13921
#ifdef LIBXML_SAX1_ENABLED
13922
/**
13923
 * xmlSAXParseFileWithData:
13924
 * @sax:  the SAX handler block
13925
 * @filename:  the filename
13926
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13927
 *             documents
13928
 * @data:  the userdata
13929
 *
13930
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
13931
 *
13932
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13933
 * compressed document is provided by default if found at compile-time.
13934
 * It use the given SAX function block to handle the parsing callback.
13935
 * If sax is NULL, fallback to the default DOM tree building routines.
13936
 *
13937
 * User data (void *) is stored within the parser context in the
13938
 * context's _private member, so it is available nearly everywhere in libxml
13939
 *
13940
 * Returns the resulting document tree
13941
 */
13942
13943
xmlDocPtr
13944
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
13945
0
                        int recovery, void *data) {
13946
0
    xmlDocPtr ret;
13947
0
    xmlParserCtxtPtr ctxt;
13948
13949
0
    xmlInitParser();
13950
13951
0
    ctxt = xmlCreateFileParserCtxt(filename);
13952
0
    if (ctxt == NULL) {
13953
0
  return(NULL);
13954
0
    }
13955
0
    if (sax != NULL) {
13956
0
  if (ctxt->sax != NULL)
13957
0
      xmlFree(ctxt->sax);
13958
0
        ctxt->sax = sax;
13959
0
    }
13960
0
    xmlDetectSAX2(ctxt);
13961
0
    if (data!=NULL) {
13962
0
  ctxt->_private = data;
13963
0
    }
13964
13965
0
    if (ctxt->directory == NULL)
13966
0
        ctxt->directory = xmlParserGetDirectory(filename);
13967
13968
0
    ctxt->recovery = recovery;
13969
13970
0
    xmlParseDocument(ctxt);
13971
13972
0
    if ((ctxt->wellFormed) || recovery) {
13973
0
        ret = ctxt->myDoc;
13974
0
  if ((ret != NULL) && (ctxt->input->buf != NULL)) {
13975
0
      if (ctxt->input->buf->compressed > 0)
13976
0
    ret->compression = 9;
13977
0
      else
13978
0
    ret->compression = ctxt->input->buf->compressed;
13979
0
  }
13980
0
    }
13981
0
    else {
13982
0
       ret = NULL;
13983
0
       xmlFreeDoc(ctxt->myDoc);
13984
0
       ctxt->myDoc = NULL;
13985
0
    }
13986
0
    if (sax != NULL)
13987
0
        ctxt->sax = NULL;
13988
0
    xmlFreeParserCtxt(ctxt);
13989
13990
0
    return(ret);
13991
0
}
13992
13993
/**
13994
 * xmlSAXParseFile:
13995
 * @sax:  the SAX handler block
13996
 * @filename:  the filename
13997
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13998
 *             documents
13999
 *
14000
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
14001
 *
14002
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14003
 * compressed document is provided by default if found at compile-time.
14004
 * It use the given SAX function block to handle the parsing callback.
14005
 * If sax is NULL, fallback to the default DOM tree building routines.
14006
 *
14007
 * Returns the resulting document tree
14008
 */
14009
14010
xmlDocPtr
14011
xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14012
0
                          int recovery) {
14013
0
    return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14014
0
}
14015
14016
/**
14017
 * xmlRecoverDoc:
14018
 * @cur:  a pointer to an array of xmlChar
14019
 *
14020
 * DEPRECATED: Use xmlReadDoc with XML_PARSE_RECOVER.
14021
 *
14022
 * parse an XML in-memory document and build a tree.
14023
 * In the case the document is not Well Formed, a attempt to build a
14024
 * tree is tried anyway
14025
 *
14026
 * Returns the resulting document tree or NULL in case of failure
14027
 */
14028
14029
xmlDocPtr
14030
0
xmlRecoverDoc(const xmlChar *cur) {
14031
0
    return(xmlSAXParseDoc(NULL, cur, 1));
14032
0
}
14033
14034
/**
14035
 * xmlParseFile:
14036
 * @filename:  the filename
14037
 *
14038
 * DEPRECATED: Use xmlReadFile.
14039
 *
14040
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14041
 * compressed document is provided by default if found at compile-time.
14042
 *
14043
 * Returns the resulting document tree if the file was wellformed,
14044
 * NULL otherwise.
14045
 */
14046
14047
xmlDocPtr
14048
0
xmlParseFile(const char *filename) {
14049
0
    return(xmlSAXParseFile(NULL, filename, 0));
14050
0
}
14051
14052
/**
14053
 * xmlRecoverFile:
14054
 * @filename:  the filename
14055
 *
14056
 * DEPRECATED: Use xmlReadFile with XML_PARSE_RECOVER.
14057
 *
14058
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14059
 * compressed document is provided by default if found at compile-time.
14060
 * In the case the document is not Well Formed, it attempts to build
14061
 * a tree anyway
14062
 *
14063
 * Returns the resulting document tree or NULL in case of failure
14064
 */
14065
14066
xmlDocPtr
14067
0
xmlRecoverFile(const char *filename) {
14068
0
    return(xmlSAXParseFile(NULL, filename, 1));
14069
0
}
14070
14071
14072
/**
14073
 * xmlSetupParserForBuffer:
14074
 * @ctxt:  an XML parser context
14075
 * @buffer:  a xmlChar * buffer
14076
 * @filename:  a file name
14077
 *
14078
 * DEPRECATED: Don't use.
14079
 *
14080
 * Setup the parser context to parse a new buffer; Clears any prior
14081
 * contents from the parser context. The buffer parameter must not be
14082
 * NULL, but the filename parameter can be
14083
 */
14084
void
14085
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14086
                             const char* filename)
14087
0
{
14088
0
    xmlParserInputPtr input;
14089
14090
0
    if ((ctxt == NULL) || (buffer == NULL))
14091
0
        return;
14092
14093
0
    input = xmlNewInputStream(ctxt);
14094
0
    if (input == NULL) {
14095
0
        xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
14096
0
        xmlClearParserCtxt(ctxt);
14097
0
        return;
14098
0
    }
14099
14100
0
    xmlClearParserCtxt(ctxt);
14101
0
    if (filename != NULL)
14102
0
        input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
14103
0
    input->base = buffer;
14104
0
    input->cur = buffer;
14105
0
    input->end = &buffer[xmlStrlen(buffer)];
14106
0
    inputPush(ctxt, input);
14107
0
}
14108
14109
/**
14110
 * xmlSAXUserParseFile:
14111
 * @sax:  a SAX handler
14112
 * @user_data:  The user data returned on SAX callbacks
14113
 * @filename:  a file name
14114
 *
14115
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
14116
 *
14117
 * parse an XML file and call the given SAX handler routines.
14118
 * Automatic support for ZLIB/Compress compressed document is provided
14119
 *
14120
 * Returns 0 in case of success or a error number otherwise
14121
 */
14122
int
14123
xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14124
0
                    const char *filename) {
14125
0
    int ret = 0;
14126
0
    xmlParserCtxtPtr ctxt;
14127
14128
0
    ctxt = xmlCreateFileParserCtxt(filename);
14129
0
    if (ctxt == NULL) return -1;
14130
0
    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14131
0
  xmlFree(ctxt->sax);
14132
0
    ctxt->sax = sax;
14133
0
    xmlDetectSAX2(ctxt);
14134
14135
0
    if (user_data != NULL)
14136
0
  ctxt->userData = user_data;
14137
14138
0
    xmlParseDocument(ctxt);
14139
14140
0
    if (ctxt->wellFormed)
14141
0
  ret = 0;
14142
0
    else {
14143
0
        if (ctxt->errNo != 0)
14144
0
      ret = ctxt->errNo;
14145
0
  else
14146
0
      ret = -1;
14147
0
    }
14148
0
    if (sax != NULL)
14149
0
  ctxt->sax = NULL;
14150
0
    if (ctxt->myDoc != NULL) {
14151
0
        xmlFreeDoc(ctxt->myDoc);
14152
0
  ctxt->myDoc = NULL;
14153
0
    }
14154
0
    xmlFreeParserCtxt(ctxt);
14155
14156
0
    return ret;
14157
0
}
14158
#endif /* LIBXML_SAX1_ENABLED */
14159
14160
/************************************************************************
14161
 *                  *
14162
 *    Front ends when parsing from memory     *
14163
 *                  *
14164
 ************************************************************************/
14165
14166
/**
14167
 * xmlCreateMemoryParserCtxt:
14168
 * @buffer:  a pointer to a char array
14169
 * @size:  the size of the array
14170
 *
14171
 * Create a parser context for an XML in-memory document.
14172
 *
14173
 * Returns the new parser context or NULL
14174
 */
14175
xmlParserCtxtPtr
14176
20.0k
xmlCreateMemoryParserCtxt(const char *buffer, int size) {
14177
20.0k
    xmlParserCtxtPtr ctxt;
14178
20.0k
    xmlParserInputPtr input;
14179
20.0k
    xmlParserInputBufferPtr buf;
14180
14181
20.0k
    if (buffer == NULL)
14182
0
  return(NULL);
14183
20.0k
    if (size <= 0)
14184
84
  return(NULL);
14185
14186
19.9k
    ctxt = xmlNewParserCtxt();
14187
19.9k
    if (ctxt == NULL)
14188
0
  return(NULL);
14189
14190
19.9k
    buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14191
19.9k
    if (buf == NULL) {
14192
0
  xmlFreeParserCtxt(ctxt);
14193
0
  return(NULL);
14194
0
    }
14195
14196
19.9k
    input = xmlNewInputStream(ctxt);
14197
19.9k
    if (input == NULL) {
14198
0
  xmlFreeParserInputBuffer(buf);
14199
0
  xmlFreeParserCtxt(ctxt);
14200
0
  return(NULL);
14201
0
    }
14202
14203
19.9k
    input->filename = NULL;
14204
19.9k
    input->buf = buf;
14205
19.9k
    xmlBufResetInput(input->buf->buffer, input);
14206
14207
19.9k
    inputPush(ctxt, input);
14208
19.9k
    return(ctxt);
14209
19.9k
}
14210
14211
#ifdef LIBXML_SAX1_ENABLED
14212
/**
14213
 * xmlSAXParseMemoryWithData:
14214
 * @sax:  the SAX handler block
14215
 * @buffer:  an pointer to a char array
14216
 * @size:  the size of the array
14217
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14218
 *             documents
14219
 * @data:  the userdata
14220
 *
14221
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14222
 *
14223
 * parse an XML in-memory block and use the given SAX function block
14224
 * to handle the parsing callback. If sax is NULL, fallback to the default
14225
 * DOM tree building routines.
14226
 *
14227
 * User data (void *) is stored within the parser context in the
14228
 * context's _private member, so it is available nearly everywhere in libxml
14229
 *
14230
 * Returns the resulting document tree
14231
 */
14232
14233
xmlDocPtr
14234
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14235
0
            int size, int recovery, void *data) {
14236
0
    xmlDocPtr ret;
14237
0
    xmlParserCtxtPtr ctxt;
14238
14239
0
    xmlInitParser();
14240
14241
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14242
0
    if (ctxt == NULL) return(NULL);
14243
0
    if (sax != NULL) {
14244
0
  if (ctxt->sax != NULL)
14245
0
      xmlFree(ctxt->sax);
14246
0
        ctxt->sax = sax;
14247
0
    }
14248
0
    xmlDetectSAX2(ctxt);
14249
0
    if (data!=NULL) {
14250
0
  ctxt->_private=data;
14251
0
    }
14252
14253
0
    ctxt->recovery = recovery;
14254
14255
0
    xmlParseDocument(ctxt);
14256
14257
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14258
0
    else {
14259
0
       ret = NULL;
14260
0
       xmlFreeDoc(ctxt->myDoc);
14261
0
       ctxt->myDoc = NULL;
14262
0
    }
14263
0
    if (sax != NULL)
14264
0
  ctxt->sax = NULL;
14265
0
    xmlFreeParserCtxt(ctxt);
14266
14267
0
    return(ret);
14268
0
}
14269
14270
/**
14271
 * xmlSAXParseMemory:
14272
 * @sax:  the SAX handler block
14273
 * @buffer:  an pointer to a char array
14274
 * @size:  the size of the array
14275
 * @recovery:  work in recovery mode, i.e. tries to read not Well Formed
14276
 *             documents
14277
 *
14278
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14279
 *
14280
 * parse an XML in-memory block and use the given SAX function block
14281
 * to handle the parsing callback. If sax is NULL, fallback to the default
14282
 * DOM tree building routines.
14283
 *
14284
 * Returns the resulting document tree
14285
 */
14286
xmlDocPtr
14287
xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14288
0
            int size, int recovery) {
14289
0
    return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14290
0
}
14291
14292
/**
14293
 * xmlParseMemory:
14294
 * @buffer:  an pointer to a char array
14295
 * @size:  the size of the array
14296
 *
14297
 * DEPRECATED: Use xmlReadMemory.
14298
 *
14299
 * parse an XML in-memory block and build a tree.
14300
 *
14301
 * Returns the resulting document tree
14302
 */
14303
14304
0
xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14305
0
   return(xmlSAXParseMemory(NULL, buffer, size, 0));
14306
0
}
14307
14308
/**
14309
 * xmlRecoverMemory:
14310
 * @buffer:  an pointer to a char array
14311
 * @size:  the size of the array
14312
 *
14313
 * DEPRECATED: Use xmlReadMemory with XML_PARSE_RECOVER.
14314
 *
14315
 * parse an XML in-memory block and build a tree.
14316
 * In the case the document is not Well Formed, an attempt to
14317
 * build a tree is tried anyway
14318
 *
14319
 * Returns the resulting document tree or NULL in case of error
14320
 */
14321
14322
0
xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14323
0
   return(xmlSAXParseMemory(NULL, buffer, size, 1));
14324
0
}
14325
14326
/**
14327
 * xmlSAXUserParseMemory:
14328
 * @sax:  a SAX handler
14329
 * @user_data:  The user data returned on SAX callbacks
14330
 * @buffer:  an in-memory XML document input
14331
 * @size:  the length of the XML document in bytes
14332
 *
14333
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14334
 *
14335
 * parse an XML in-memory buffer and call the given SAX handler routines.
14336
 *
14337
 * Returns 0 in case of success or a error number otherwise
14338
 */
14339
int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14340
0
        const char *buffer, int size) {
14341
0
    int ret = 0;
14342
0
    xmlParserCtxtPtr ctxt;
14343
14344
0
    xmlInitParser();
14345
14346
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14347
0
    if (ctxt == NULL) return -1;
14348
0
    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14349
0
        xmlFree(ctxt->sax);
14350
0
    ctxt->sax = sax;
14351
0
    xmlDetectSAX2(ctxt);
14352
14353
0
    if (user_data != NULL)
14354
0
  ctxt->userData = user_data;
14355
14356
0
    xmlParseDocument(ctxt);
14357
14358
0
    if (ctxt->wellFormed)
14359
0
  ret = 0;
14360
0
    else {
14361
0
        if (ctxt->errNo != 0)
14362
0
      ret = ctxt->errNo;
14363
0
  else
14364
0
      ret = -1;
14365
0
    }
14366
0
    if (sax != NULL)
14367
0
        ctxt->sax = NULL;
14368
0
    if (ctxt->myDoc != NULL) {
14369
0
        xmlFreeDoc(ctxt->myDoc);
14370
0
  ctxt->myDoc = NULL;
14371
0
    }
14372
0
    xmlFreeParserCtxt(ctxt);
14373
14374
0
    return ret;
14375
0
}
14376
#endif /* LIBXML_SAX1_ENABLED */
14377
14378
/**
14379
 * xmlCreateDocParserCtxt:
14380
 * @cur:  a pointer to an array of xmlChar
14381
 *
14382
 * Creates a parser context for an XML in-memory document.
14383
 *
14384
 * Returns the new parser context or NULL
14385
 */
14386
xmlParserCtxtPtr
14387
0
xmlCreateDocParserCtxt(const xmlChar *cur) {
14388
0
    int len;
14389
14390
0
    if (cur == NULL)
14391
0
  return(NULL);
14392
0
    len = xmlStrlen(cur);
14393
0
    return(xmlCreateMemoryParserCtxt((const char *)cur, len));
14394
0
}
14395
14396
#ifdef LIBXML_SAX1_ENABLED
14397
/**
14398
 * xmlSAXParseDoc:
14399
 * @sax:  the SAX handler block
14400
 * @cur:  a pointer to an array of xmlChar
14401
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
14402
 *             documents
14403
 *
14404
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadDoc.
14405
 *
14406
 * parse an XML in-memory document and build a tree.
14407
 * It use the given SAX function block to handle the parsing callback.
14408
 * If sax is NULL, fallback to the default DOM tree building routines.
14409
 *
14410
 * Returns the resulting document tree
14411
 */
14412
14413
xmlDocPtr
14414
0
xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14415
0
    xmlDocPtr ret;
14416
0
    xmlParserCtxtPtr ctxt;
14417
0
    xmlSAXHandlerPtr oldsax = NULL;
14418
14419
0
    if (cur == NULL) return(NULL);
14420
14421
14422
0
    ctxt = xmlCreateDocParserCtxt(cur);
14423
0
    if (ctxt == NULL) return(NULL);
14424
0
    if (sax != NULL) {
14425
0
        oldsax = ctxt->sax;
14426
0
        ctxt->sax = sax;
14427
0
        ctxt->userData = NULL;
14428
0
    }
14429
0
    xmlDetectSAX2(ctxt);
14430
14431
0
    xmlParseDocument(ctxt);
14432
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14433
0
    else {
14434
0
       ret = NULL;
14435
0
       xmlFreeDoc(ctxt->myDoc);
14436
0
       ctxt->myDoc = NULL;
14437
0
    }
14438
0
    if (sax != NULL)
14439
0
  ctxt->sax = oldsax;
14440
0
    xmlFreeParserCtxt(ctxt);
14441
14442
0
    return(ret);
14443
0
}
14444
14445
/**
14446
 * xmlParseDoc:
14447
 * @cur:  a pointer to an array of xmlChar
14448
 *
14449
 * DEPRECATED: Use xmlReadDoc.
14450
 *
14451
 * parse an XML in-memory document and build a tree.
14452
 *
14453
 * Returns the resulting document tree
14454
 */
14455
14456
xmlDocPtr
14457
0
xmlParseDoc(const xmlChar *cur) {
14458
0
    return(xmlSAXParseDoc(NULL, cur, 0));
14459
0
}
14460
#endif /* LIBXML_SAX1_ENABLED */
14461
14462
#ifdef LIBXML_LEGACY_ENABLED
14463
/************************************************************************
14464
 *                  *
14465
 *  Specific function to keep track of entities references    *
14466
 *  and used by the XSLT debugger         *
14467
 *                  *
14468
 ************************************************************************/
14469
14470
static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14471
14472
/**
14473
 * xmlAddEntityReference:
14474
 * @ent : A valid entity
14475
 * @firstNode : A valid first node for children of entity
14476
 * @lastNode : A valid last node of children entity
14477
 *
14478
 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14479
 */
14480
static void
14481
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14482
                      xmlNodePtr lastNode)
14483
{
14484
    if (xmlEntityRefFunc != NULL) {
14485
        (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14486
    }
14487
}
14488
14489
14490
/**
14491
 * xmlSetEntityReferenceFunc:
14492
 * @func: A valid function
14493
 *
14494
 * Set the function to call call back when a xml reference has been made
14495
 */
14496
void
14497
xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14498
{
14499
    xmlEntityRefFunc = func;
14500
}
14501
#endif /* LIBXML_LEGACY_ENABLED */
14502
14503
/************************************************************************
14504
 *                  *
14505
 *        Miscellaneous       *
14506
 *                  *
14507
 ************************************************************************/
14508
14509
static int xmlParserInitialized = 0;
14510
14511
/**
14512
 * xmlInitParser:
14513
 *
14514
 * Initialization function for the XML parser.
14515
 * This is not reentrant. Call once before processing in case of
14516
 * use in multithreaded programs.
14517
 */
14518
14519
void
14520
29.3M
xmlInitParser(void) {
14521
    /*
14522
     * Note that the initialization code must not make memory allocations.
14523
     */
14524
29.3M
    if (xmlParserInitialized != 0)
14525
29.3M
  return;
14526
14527
12
#ifdef LIBXML_THREAD_ENABLED
14528
12
    __xmlGlobalInitMutexLock();
14529
12
    if (xmlParserInitialized == 0) {
14530
12
#endif
14531
#if defined(_WIN32) && (!defined(LIBXML_STATIC) || defined(LIBXML_STATIC_FOR_DLL))
14532
        if (xmlFree == free)
14533
            atexit(xmlCleanupParser);
14534
#endif
14535
14536
12
  xmlInitThreadsInternal();
14537
12
  xmlInitGlobalsInternal();
14538
12
  xmlInitMemoryInternal();
14539
12
        __xmlInitializeDict();
14540
12
  xmlInitEncodingInternal();
14541
12
  xmlRegisterDefaultInputCallbacks();
14542
12
#ifdef LIBXML_OUTPUT_ENABLED
14543
12
  xmlRegisterDefaultOutputCallbacks();
14544
12
#endif /* LIBXML_OUTPUT_ENABLED */
14545
12
#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
14546
12
  xmlInitXPathInternal();
14547
12
#endif
14548
12
  xmlParserInitialized = 1;
14549
12
#ifdef LIBXML_THREAD_ENABLED
14550
12
    }
14551
12
    __xmlGlobalInitMutexUnlock();
14552
12
#endif
14553
12
}
14554
14555
/**
14556
 * xmlCleanupParser:
14557
 *
14558
 * This function name is somewhat misleading. It does not clean up
14559
 * parser state, it cleans up memory allocated by the library itself.
14560
 * It is a cleanup function for the XML library. It tries to reclaim all
14561
 * related global memory allocated for the library processing.
14562
 * It doesn't deallocate any document related memory. One should
14563
 * call xmlCleanupParser() only when the process has finished using
14564
 * the library and all XML/HTML documents built with it.
14565
 * See also xmlInitParser() which has the opposite function of preparing
14566
 * the library for operations.
14567
 *
14568
 * WARNING: if your application is multithreaded or has plugin support
14569
 *          calling this may crash the application if another thread or
14570
 *          a plugin is still using libxml2. It's sometimes very hard to
14571
 *          guess if libxml2 is in use in the application, some libraries
14572
 *          or plugins may use it without notice. In case of doubt abstain
14573
 *          from calling this function or do it just before calling exit()
14574
 *          to avoid leak reports from valgrind !
14575
 */
14576
14577
void
14578
0
xmlCleanupParser(void) {
14579
0
    if (!xmlParserInitialized)
14580
0
  return;
14581
14582
0
    xmlCleanupCharEncodingHandlers();
14583
0
#ifdef LIBXML_CATALOG_ENABLED
14584
0
    xmlCatalogCleanup();
14585
0
#endif
14586
0
    xmlCleanupDictInternal();
14587
0
    xmlCleanupInputCallbacks();
14588
0
#ifdef LIBXML_OUTPUT_ENABLED
14589
0
    xmlCleanupOutputCallbacks();
14590
0
#endif
14591
0
#ifdef LIBXML_SCHEMAS_ENABLED
14592
0
    xmlSchemaCleanupTypes();
14593
0
    xmlRelaxNGCleanupTypes();
14594
0
#endif
14595
0
    xmlCleanupGlobalsInternal();
14596
0
    xmlCleanupThreadsInternal();
14597
0
    xmlCleanupMemoryInternal();
14598
0
    xmlParserInitialized = 0;
14599
0
}
14600
14601
#if defined(HAVE_ATTRIBUTE_DESTRUCTOR) && !defined(LIBXML_STATIC) && \
14602
    !defined(_WIN32)
14603
static void
14604
ATTRIBUTE_DESTRUCTOR
14605
0
xmlDestructor(void) {
14606
    /*
14607
     * Calling custom deallocation functions in a destructor can cause
14608
     * problems, for example with Nokogiri.
14609
     */
14610
0
    if (xmlFree == free)
14611
0
        xmlCleanupParser();
14612
0
}
14613
#endif
14614
14615
/************************************************************************
14616
 *                  *
14617
 *  New set (2.6.0) of simpler and more flexible APIs   *
14618
 *                  *
14619
 ************************************************************************/
14620
14621
/**
14622
 * DICT_FREE:
14623
 * @str:  a string
14624
 *
14625
 * Free a string if it is not owned by the "dict" dictionary in the
14626
 * current scope
14627
 */
14628
#define DICT_FREE(str)            \
14629
0
  if ((str) && ((!dict) ||       \
14630
0
      (xmlDictOwns(dict, (const xmlChar *)(str)) == 0)))  \
14631
0
      xmlFree((char *)(str));
14632
14633
/**
14634
 * xmlCtxtReset:
14635
 * @ctxt: an XML parser context
14636
 *
14637
 * Reset a parser context
14638
 */
14639
void
14640
xmlCtxtReset(xmlParserCtxtPtr ctxt)
14641
0
{
14642
0
    xmlParserInputPtr input;
14643
0
    xmlDictPtr dict;
14644
14645
0
    if (ctxt == NULL)
14646
0
        return;
14647
14648
0
    dict = ctxt->dict;
14649
14650
0
    while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14651
0
        xmlFreeInputStream(input);
14652
0
    }
14653
0
    ctxt->inputNr = 0;
14654
0
    ctxt->input = NULL;
14655
14656
0
    ctxt->spaceNr = 0;
14657
0
    if (ctxt->spaceTab != NULL) {
14658
0
  ctxt->spaceTab[0] = -1;
14659
0
  ctxt->space = &ctxt->spaceTab[0];
14660
0
    } else {
14661
0
        ctxt->space = NULL;
14662
0
    }
14663
14664
14665
0
    ctxt->nodeNr = 0;
14666
0
    ctxt->node = NULL;
14667
14668
0
    ctxt->nameNr = 0;
14669
0
    ctxt->name = NULL;
14670
14671
0
    ctxt->nsNr = 0;
14672
14673
0
    DICT_FREE(ctxt->version);
14674
0
    ctxt->version = NULL;
14675
0
    DICT_FREE(ctxt->encoding);
14676
0
    ctxt->encoding = NULL;
14677
0
    DICT_FREE(ctxt->directory);
14678
0
    ctxt->directory = NULL;
14679
0
    DICT_FREE(ctxt->extSubURI);
14680
0
    ctxt->extSubURI = NULL;
14681
0
    DICT_FREE(ctxt->extSubSystem);
14682
0
    ctxt->extSubSystem = NULL;
14683
0
    if (ctxt->myDoc != NULL)
14684
0
        xmlFreeDoc(ctxt->myDoc);
14685
0
    ctxt->myDoc = NULL;
14686
14687
0
    ctxt->standalone = -1;
14688
0
    ctxt->hasExternalSubset = 0;
14689
0
    ctxt->hasPErefs = 0;
14690
0
    ctxt->html = 0;
14691
0
    ctxt->external = 0;
14692
0
    ctxt->instate = XML_PARSER_START;
14693
0
    ctxt->token = 0;
14694
14695
0
    ctxt->wellFormed = 1;
14696
0
    ctxt->nsWellFormed = 1;
14697
0
    ctxt->disableSAX = 0;
14698
0
    ctxt->valid = 1;
14699
#if 0
14700
    ctxt->vctxt.userData = ctxt;
14701
    ctxt->vctxt.error = xmlParserValidityError;
14702
    ctxt->vctxt.warning = xmlParserValidityWarning;
14703
#endif
14704
0
    ctxt->record_info = 0;
14705
0
    ctxt->checkIndex = 0;
14706
0
    ctxt->endCheckState = 0;
14707
0
    ctxt->inSubset = 0;
14708
0
    ctxt->errNo = XML_ERR_OK;
14709
0
    ctxt->depth = 0;
14710
0
    ctxt->charset = XML_CHAR_ENCODING_UTF8;
14711
0
    ctxt->catalogs = NULL;
14712
0
    ctxt->sizeentities = 0;
14713
0
    ctxt->sizeentcopy = 0;
14714
0
    xmlInitNodeInfoSeq(&ctxt->node_seq);
14715
14716
0
    if (ctxt->attsDefault != NULL) {
14717
0
        xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
14718
0
        ctxt->attsDefault = NULL;
14719
0
    }
14720
0
    if (ctxt->attsSpecial != NULL) {
14721
0
        xmlHashFree(ctxt->attsSpecial, NULL);
14722
0
        ctxt->attsSpecial = NULL;
14723
0
    }
14724
14725
0
#ifdef LIBXML_CATALOG_ENABLED
14726
0
    if (ctxt->catalogs != NULL)
14727
0
  xmlCatalogFreeLocal(ctxt->catalogs);
14728
0
#endif
14729
0
    ctxt->nbErrors = 0;
14730
0
    ctxt->nbWarnings = 0;
14731
0
    if (ctxt->lastError.code != XML_ERR_OK)
14732
0
        xmlResetError(&ctxt->lastError);
14733
0
}
14734
14735
/**
14736
 * xmlCtxtResetPush:
14737
 * @ctxt: an XML parser context
14738
 * @chunk:  a pointer to an array of chars
14739
 * @size:  number of chars in the array
14740
 * @filename:  an optional file name or URI
14741
 * @encoding:  the document encoding, or NULL
14742
 *
14743
 * Reset a push parser context
14744
 *
14745
 * Returns 0 in case of success and 1 in case of error
14746
 */
14747
int
14748
xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14749
                 int size, const char *filename, const char *encoding)
14750
0
{
14751
0
    xmlParserInputPtr inputStream;
14752
0
    xmlParserInputBufferPtr buf;
14753
0
    xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14754
14755
0
    if (ctxt == NULL)
14756
0
        return(1);
14757
14758
0
    if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14759
0
        enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14760
14761
0
    buf = xmlAllocParserInputBuffer(enc);
14762
0
    if (buf == NULL)
14763
0
        return(1);
14764
14765
0
    if (ctxt == NULL) {
14766
0
        xmlFreeParserInputBuffer(buf);
14767
0
        return(1);
14768
0
    }
14769
14770
0
    xmlCtxtReset(ctxt);
14771
14772
0
    if (filename == NULL) {
14773
0
        ctxt->directory = NULL;
14774
0
    } else {
14775
0
        ctxt->directory = xmlParserGetDirectory(filename);
14776
0
    }
14777
14778
0
    inputStream = xmlNewInputStream(ctxt);
14779
0
    if (inputStream == NULL) {
14780
0
        xmlFreeParserInputBuffer(buf);
14781
0
        return(1);
14782
0
    }
14783
14784
0
    if (filename == NULL)
14785
0
        inputStream->filename = NULL;
14786
0
    else
14787
0
        inputStream->filename = (char *)
14788
0
            xmlCanonicPath((const xmlChar *) filename);
14789
0
    inputStream->buf = buf;
14790
0
    xmlBufResetInput(buf->buffer, inputStream);
14791
14792
0
    inputPush(ctxt, inputStream);
14793
14794
0
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14795
0
        (ctxt->input->buf != NULL)) {
14796
0
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14797
0
        size_t cur = ctxt->input->cur - ctxt->input->base;
14798
14799
0
        xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14800
14801
0
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
14802
#ifdef DEBUG_PUSH
14803
        xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14804
#endif
14805
0
    }
14806
14807
0
    if (encoding != NULL) {
14808
0
        xmlCharEncodingHandlerPtr hdlr;
14809
14810
0
        if (ctxt->encoding != NULL)
14811
0
      xmlFree((xmlChar *) ctxt->encoding);
14812
0
        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14813
14814
0
        hdlr = xmlFindCharEncodingHandler(encoding);
14815
0
        if (hdlr != NULL) {
14816
0
            xmlSwitchToEncoding(ctxt, hdlr);
14817
0
  } else {
14818
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14819
0
            "Unsupported encoding %s\n", BAD_CAST encoding);
14820
0
        }
14821
0
    } else if (enc != XML_CHAR_ENCODING_NONE) {
14822
0
        xmlSwitchEncoding(ctxt, enc);
14823
0
    }
14824
14825
0
    return(0);
14826
0
}
14827
14828
14829
/**
14830
 * xmlCtxtUseOptionsInternal:
14831
 * @ctxt: an XML parser context
14832
 * @options:  a combination of xmlParserOption
14833
 * @encoding:  the user provided encoding to use
14834
 *
14835
 * Applies the options to the parser context
14836
 *
14837
 * Returns 0 in case of success, the set of unknown or unimplemented options
14838
 *         in case of error.
14839
 */
14840
static int
14841
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
14842
44.2k
{
14843
44.2k
    if (ctxt == NULL)
14844
0
        return(-1);
14845
44.2k
    if (encoding != NULL) {
14846
0
        if (ctxt->encoding != NULL)
14847
0
      xmlFree((xmlChar *) ctxt->encoding);
14848
0
        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14849
0
    }
14850
44.2k
    if (options & XML_PARSE_RECOVER) {
14851
14.7k
        ctxt->recovery = 1;
14852
14.7k
        options -= XML_PARSE_RECOVER;
14853
14.7k
  ctxt->options |= XML_PARSE_RECOVER;
14854
14.7k
    } else
14855
29.5k
        ctxt->recovery = 0;
14856
44.2k
    if (options & XML_PARSE_DTDLOAD) {
14857
35.0k
        ctxt->loadsubset = XML_DETECT_IDS;
14858
35.0k
        options -= XML_PARSE_DTDLOAD;
14859
35.0k
  ctxt->options |= XML_PARSE_DTDLOAD;
14860
35.0k
    } else
14861
9.23k
        ctxt->loadsubset = 0;
14862
44.2k
    if (options & XML_PARSE_DTDATTR) {
14863
11.8k
        ctxt->loadsubset |= XML_COMPLETE_ATTRS;
14864
11.8k
        options -= XML_PARSE_DTDATTR;
14865
11.8k
  ctxt->options |= XML_PARSE_DTDATTR;
14866
11.8k
    }
14867
44.2k
    if (options & XML_PARSE_NOENT) {
14868
31.9k
        ctxt->replaceEntities = 1;
14869
        /* ctxt->loadsubset |= XML_DETECT_IDS; */
14870
31.9k
        options -= XML_PARSE_NOENT;
14871
31.9k
  ctxt->options |= XML_PARSE_NOENT;
14872
31.9k
    } else
14873
12.2k
        ctxt->replaceEntities = 0;
14874
44.2k
    if (options & XML_PARSE_PEDANTIC) {
14875
7.10k
        ctxt->pedantic = 1;
14876
7.10k
        options -= XML_PARSE_PEDANTIC;
14877
7.10k
  ctxt->options |= XML_PARSE_PEDANTIC;
14878
7.10k
    } else
14879
37.1k
        ctxt->pedantic = 0;
14880
44.2k
    if (options & XML_PARSE_NOBLANKS) {
14881
15.1k
        ctxt->keepBlanks = 0;
14882
15.1k
        ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
14883
15.1k
        options -= XML_PARSE_NOBLANKS;
14884
15.1k
  ctxt->options |= XML_PARSE_NOBLANKS;
14885
15.1k
    } else
14886
29.0k
        ctxt->keepBlanks = 1;
14887
44.2k
    if (options & XML_PARSE_DTDVALID) {
14888
13.9k
        ctxt->validate = 1;
14889
13.9k
        if (options & XML_PARSE_NOWARNING)
14890
11.8k
            ctxt->vctxt.warning = NULL;
14891
13.9k
        if (options & XML_PARSE_NOERROR)
14892
7.90k
            ctxt->vctxt.error = NULL;
14893
13.9k
        options -= XML_PARSE_DTDVALID;
14894
13.9k
  ctxt->options |= XML_PARSE_DTDVALID;
14895
13.9k
    } else
14896
30.3k
        ctxt->validate = 0;
14897
44.2k
    if (options & XML_PARSE_NOWARNING) {
14898
13.3k
        ctxt->sax->warning = NULL;
14899
13.3k
        options -= XML_PARSE_NOWARNING;
14900
13.3k
    }
14901
44.2k
    if (options & XML_PARSE_NOERROR) {
14902
9.12k
        ctxt->sax->error = NULL;
14903
9.12k
        ctxt->sax->fatalError = NULL;
14904
9.12k
        options -= XML_PARSE_NOERROR;
14905
9.12k
    }
14906
44.2k
#ifdef LIBXML_SAX1_ENABLED
14907
44.2k
    if (options & XML_PARSE_SAX1) {
14908
14.3k
        ctxt->sax->startElement = xmlSAX2StartElement;
14909
14.3k
        ctxt->sax->endElement = xmlSAX2EndElement;
14910
14.3k
        ctxt->sax->startElementNs = NULL;
14911
14.3k
        ctxt->sax->endElementNs = NULL;
14912
14.3k
        ctxt->sax->initialized = 1;
14913
14.3k
        options -= XML_PARSE_SAX1;
14914
14.3k
  ctxt->options |= XML_PARSE_SAX1;
14915
14.3k
    }
14916
44.2k
#endif /* LIBXML_SAX1_ENABLED */
14917
44.2k
    if (options & XML_PARSE_NODICT) {
14918
14.4k
        ctxt->dictNames = 0;
14919
14.4k
        options -= XML_PARSE_NODICT;
14920
14.4k
  ctxt->options |= XML_PARSE_NODICT;
14921
29.7k
    } else {
14922
29.7k
        ctxt->dictNames = 1;
14923
29.7k
    }
14924
44.2k
    if (options & XML_PARSE_NOCDATA) {
14925
13.9k
        ctxt->sax->cdataBlock = NULL;
14926
13.9k
        options -= XML_PARSE_NOCDATA;
14927
13.9k
  ctxt->options |= XML_PARSE_NOCDATA;
14928
13.9k
    }
14929
44.2k
    if (options & XML_PARSE_NSCLEAN) {
14930
13.0k
  ctxt->options |= XML_PARSE_NSCLEAN;
14931
13.0k
        options -= XML_PARSE_NSCLEAN;
14932
13.0k
    }
14933
44.2k
    if (options & XML_PARSE_NONET) {
14934
11.9k
  ctxt->options |= XML_PARSE_NONET;
14935
11.9k
        options -= XML_PARSE_NONET;
14936
11.9k
    }
14937
44.2k
    if (options & XML_PARSE_COMPACT) {
14938
20.4k
  ctxt->options |= XML_PARSE_COMPACT;
14939
20.4k
        options -= XML_PARSE_COMPACT;
14940
20.4k
    }
14941
44.2k
    if (options & XML_PARSE_OLD10) {
14942
10.9k
  ctxt->options |= XML_PARSE_OLD10;
14943
10.9k
        options -= XML_PARSE_OLD10;
14944
10.9k
    }
14945
44.2k
    if (options & XML_PARSE_NOBASEFIX) {
14946
13.3k
  ctxt->options |= XML_PARSE_NOBASEFIX;
14947
13.3k
        options -= XML_PARSE_NOBASEFIX;
14948
13.3k
    }
14949
44.2k
    if (options & XML_PARSE_HUGE) {
14950
8.25k
  ctxt->options |= XML_PARSE_HUGE;
14951
8.25k
        options -= XML_PARSE_HUGE;
14952
8.25k
        if (ctxt->dict != NULL)
14953
8.25k
            xmlDictSetLimit(ctxt->dict, 0);
14954
8.25k
    }
14955
44.2k
    if (options & XML_PARSE_OLDSAX) {
14956
9.92k
  ctxt->options |= XML_PARSE_OLDSAX;
14957
9.92k
        options -= XML_PARSE_OLDSAX;
14958
9.92k
    }
14959
44.2k
    if (options & XML_PARSE_IGNORE_ENC) {
14960
11.4k
  ctxt->options |= XML_PARSE_IGNORE_ENC;
14961
11.4k
        options -= XML_PARSE_IGNORE_ENC;
14962
11.4k
    }
14963
44.2k
    if (options & XML_PARSE_BIG_LINES) {
14964
11.5k
  ctxt->options |= XML_PARSE_BIG_LINES;
14965
11.5k
        options -= XML_PARSE_BIG_LINES;
14966
11.5k
    }
14967
44.2k
    ctxt->linenumbers = 1;
14968
44.2k
    return (options);
14969
44.2k
}
14970
14971
/**
14972
 * xmlCtxtUseOptions:
14973
 * @ctxt: an XML parser context
14974
 * @options:  a combination of xmlParserOption
14975
 *
14976
 * Applies the options to the parser context
14977
 *
14978
 * Returns 0 in case of success, the set of unknown or unimplemented options
14979
 *         in case of error.
14980
 */
14981
int
14982
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
14983
29.5k
{
14984
29.5k
   return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
14985
29.5k
}
14986
14987
/**
14988
 * xmlDoRead:
14989
 * @ctxt:  an XML parser context
14990
 * @URL:  the base URL to use for the document
14991
 * @encoding:  the document encoding, or NULL
14992
 * @options:  a combination of xmlParserOption
14993
 * @reuse:  keep the context for reuse
14994
 *
14995
 * Common front-end for the xmlRead functions
14996
 *
14997
 * Returns the resulting document tree or NULL
14998
 */
14999
static xmlDocPtr
15000
xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15001
          int options, int reuse)
15002
14.6k
{
15003
14.6k
    xmlDocPtr ret;
15004
15005
14.6k
    xmlCtxtUseOptionsInternal(ctxt, options, encoding);
15006
14.6k
    if (encoding != NULL) {
15007
0
        xmlCharEncodingHandlerPtr hdlr;
15008
15009
0
  hdlr = xmlFindCharEncodingHandler(encoding);
15010
0
  if (hdlr != NULL)
15011
0
      xmlSwitchToEncoding(ctxt, hdlr);
15012
0
    }
15013
14.6k
    if ((URL != NULL) && (ctxt->input != NULL) &&
15014
14.6k
        (ctxt->input->filename == NULL))
15015
14.6k
        ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
15016
14.6k
    xmlParseDocument(ctxt);
15017
14.6k
    if ((ctxt->wellFormed) || ctxt->recovery)
15018
6.78k
        ret = ctxt->myDoc;
15019
7.90k
    else {
15020
7.90k
        ret = NULL;
15021
7.90k
  if (ctxt->myDoc != NULL) {
15022
6.49k
      xmlFreeDoc(ctxt->myDoc);
15023
6.49k
  }
15024
7.90k
    }
15025
14.6k
    ctxt->myDoc = NULL;
15026
14.6k
    if (!reuse) {
15027
14.6k
  xmlFreeParserCtxt(ctxt);
15028
14.6k
    }
15029
15030
14.6k
    return (ret);
15031
14.6k
}
15032
15033
/**
15034
 * xmlReadDoc:
15035
 * @cur:  a pointer to a zero terminated string
15036
 * @URL:  the base URL to use for the document
15037
 * @encoding:  the document encoding, or NULL
15038
 * @options:  a combination of xmlParserOption
15039
 *
15040
 * parse an XML in-memory document and build a tree.
15041
 *
15042
 * Returns the resulting document tree
15043
 */
15044
xmlDocPtr
15045
xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
15046
0
{
15047
0
    xmlParserCtxtPtr ctxt;
15048
15049
0
    if (cur == NULL)
15050
0
        return (NULL);
15051
0
    xmlInitParser();
15052
15053
0
    ctxt = xmlCreateDocParserCtxt(cur);
15054
0
    if (ctxt == NULL)
15055
0
        return (NULL);
15056
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15057
0
}
15058
15059
/**
15060
 * xmlReadFile:
15061
 * @filename:  a file or URL
15062
 * @encoding:  the document encoding, or NULL
15063
 * @options:  a combination of xmlParserOption
15064
 *
15065
 * parse an XML file from the filesystem or the network.
15066
 *
15067
 * Returns the resulting document tree
15068
 */
15069
xmlDocPtr
15070
xmlReadFile(const char *filename, const char *encoding, int options)
15071
0
{
15072
0
    xmlParserCtxtPtr ctxt;
15073
15074
0
    xmlInitParser();
15075
0
    ctxt = xmlCreateURLParserCtxt(filename, options);
15076
0
    if (ctxt == NULL)
15077
0
        return (NULL);
15078
0
    return (xmlDoRead(ctxt, NULL, encoding, options, 0));
15079
0
}
15080
15081
/**
15082
 * xmlReadMemory:
15083
 * @buffer:  a pointer to a char array
15084
 * @size:  the size of the array
15085
 * @URL:  the base URL to use for the document
15086
 * @encoding:  the document encoding, or NULL
15087
 * @options:  a combination of xmlParserOption
15088
 *
15089
 * parse an XML in-memory document and build a tree.
15090
 *
15091
 * Returns the resulting document tree
15092
 */
15093
xmlDocPtr
15094
xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
15095
14.7k
{
15096
14.7k
    xmlParserCtxtPtr ctxt;
15097
15098
14.7k
    xmlInitParser();
15099
14.7k
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15100
14.7k
    if (ctxt == NULL)
15101
78
        return (NULL);
15102
14.6k
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15103
14.7k
}
15104
15105
/**
15106
 * xmlReadFd:
15107
 * @fd:  an open file descriptor
15108
 * @URL:  the base URL to use for the document
15109
 * @encoding:  the document encoding, or NULL
15110
 * @options:  a combination of xmlParserOption
15111
 *
15112
 * parse an XML from a file descriptor and build a tree.
15113
 * NOTE that the file descriptor will not be closed when the
15114
 *      reader is closed or reset.
15115
 *
15116
 * Returns the resulting document tree
15117
 */
15118
xmlDocPtr
15119
xmlReadFd(int fd, const char *URL, const char *encoding, int options)
15120
0
{
15121
0
    xmlParserCtxtPtr ctxt;
15122
0
    xmlParserInputBufferPtr input;
15123
0
    xmlParserInputPtr stream;
15124
15125
0
    if (fd < 0)
15126
0
        return (NULL);
15127
0
    xmlInitParser();
15128
15129
0
    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15130
0
    if (input == NULL)
15131
0
        return (NULL);
15132
0
    input->closecallback = NULL;
15133
0
    ctxt = xmlNewParserCtxt();
15134
0
    if (ctxt == NULL) {
15135
0
        xmlFreeParserInputBuffer(input);
15136
0
        return (NULL);
15137
0
    }
15138
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15139
0
    if (stream == NULL) {
15140
0
        xmlFreeParserInputBuffer(input);
15141
0
  xmlFreeParserCtxt(ctxt);
15142
0
        return (NULL);
15143
0
    }
15144
0
    inputPush(ctxt, stream);
15145
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15146
0
}
15147
15148
/**
15149
 * xmlReadIO:
15150
 * @ioread:  an I/O read function
15151
 * @ioclose:  an I/O close function
15152
 * @ioctx:  an I/O handler
15153
 * @URL:  the base URL to use for the document
15154
 * @encoding:  the document encoding, or NULL
15155
 * @options:  a combination of xmlParserOption
15156
 *
15157
 * parse an XML document from I/O functions and source and build a tree.
15158
 *
15159
 * Returns the resulting document tree
15160
 */
15161
xmlDocPtr
15162
xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
15163
          void *ioctx, const char *URL, const char *encoding, int options)
15164
0
{
15165
0
    xmlParserCtxtPtr ctxt;
15166
0
    xmlParserInputBufferPtr input;
15167
0
    xmlParserInputPtr stream;
15168
15169
0
    if (ioread == NULL)
15170
0
        return (NULL);
15171
0
    xmlInitParser();
15172
15173
0
    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15174
0
                                         XML_CHAR_ENCODING_NONE);
15175
0
    if (input == NULL) {
15176
0
        if (ioclose != NULL)
15177
0
            ioclose(ioctx);
15178
0
        return (NULL);
15179
0
    }
15180
0
    ctxt = xmlNewParserCtxt();
15181
0
    if (ctxt == NULL) {
15182
0
        xmlFreeParserInputBuffer(input);
15183
0
        return (NULL);
15184
0
    }
15185
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15186
0
    if (stream == NULL) {
15187
0
        xmlFreeParserInputBuffer(input);
15188
0
  xmlFreeParserCtxt(ctxt);
15189
0
        return (NULL);
15190
0
    }
15191
0
    inputPush(ctxt, stream);
15192
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
15193
0
}
15194
15195
/**
15196
 * xmlCtxtReadDoc:
15197
 * @ctxt:  an XML parser context
15198
 * @cur:  a pointer to a zero terminated string
15199
 * @URL:  the base URL to use for the document
15200
 * @encoding:  the document encoding, or NULL
15201
 * @options:  a combination of xmlParserOption
15202
 *
15203
 * parse an XML in-memory document and build a tree.
15204
 * This reuses the existing @ctxt parser context
15205
 *
15206
 * Returns the resulting document tree
15207
 */
15208
xmlDocPtr
15209
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
15210
               const char *URL, const char *encoding, int options)
15211
0
{
15212
0
    if (cur == NULL)
15213
0
        return (NULL);
15214
0
    return (xmlCtxtReadMemory(ctxt, (const char *) cur, xmlStrlen(cur), URL,
15215
0
                              encoding, options));
15216
0
}
15217
15218
/**
15219
 * xmlCtxtReadFile:
15220
 * @ctxt:  an XML parser context
15221
 * @filename:  a file or URL
15222
 * @encoding:  the document encoding, or NULL
15223
 * @options:  a combination of xmlParserOption
15224
 *
15225
 * parse an XML file from the filesystem or the network.
15226
 * This reuses the existing @ctxt parser context
15227
 *
15228
 * Returns the resulting document tree
15229
 */
15230
xmlDocPtr
15231
xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15232
                const char *encoding, int options)
15233
0
{
15234
0
    xmlParserInputPtr stream;
15235
15236
0
    if (filename == NULL)
15237
0
        return (NULL);
15238
0
    if (ctxt == NULL)
15239
0
        return (NULL);
15240
0
    xmlInitParser();
15241
15242
0
    xmlCtxtReset(ctxt);
15243
15244
0
    stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15245
0
    if (stream == NULL) {
15246
0
        return (NULL);
15247
0
    }
15248
0
    inputPush(ctxt, stream);
15249
0
    return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15250
0
}
15251
15252
/**
15253
 * xmlCtxtReadMemory:
15254
 * @ctxt:  an XML parser context
15255
 * @buffer:  a pointer to a char array
15256
 * @size:  the size of the array
15257
 * @URL:  the base URL to use for the document
15258
 * @encoding:  the document encoding, or NULL
15259
 * @options:  a combination of xmlParserOption
15260
 *
15261
 * parse an XML in-memory document and build a tree.
15262
 * This reuses the existing @ctxt parser context
15263
 *
15264
 * Returns the resulting document tree
15265
 */
15266
xmlDocPtr
15267
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15268
                  const char *URL, const char *encoding, int options)
15269
0
{
15270
0
    xmlParserInputBufferPtr input;
15271
0
    xmlParserInputPtr stream;
15272
15273
0
    if (ctxt == NULL)
15274
0
        return (NULL);
15275
0
    if (buffer == NULL)
15276
0
        return (NULL);
15277
0
    xmlInitParser();
15278
15279
0
    xmlCtxtReset(ctxt);
15280
15281
0
    input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15282
0
    if (input == NULL) {
15283
0
  return(NULL);
15284
0
    }
15285
15286
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15287
0
    if (stream == NULL) {
15288
0
  xmlFreeParserInputBuffer(input);
15289
0
  return(NULL);
15290
0
    }
15291
15292
0
    inputPush(ctxt, stream);
15293
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15294
0
}
15295
15296
/**
15297
 * xmlCtxtReadFd:
15298
 * @ctxt:  an XML parser context
15299
 * @fd:  an open file descriptor
15300
 * @URL:  the base URL to use for the document
15301
 * @encoding:  the document encoding, or NULL
15302
 * @options:  a combination of xmlParserOption
15303
 *
15304
 * parse an XML from a file descriptor and build a tree.
15305
 * This reuses the existing @ctxt parser context
15306
 * NOTE that the file descriptor will not be closed when the
15307
 *      reader is closed or reset.
15308
 *
15309
 * Returns the resulting document tree
15310
 */
15311
xmlDocPtr
15312
xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15313
              const char *URL, const char *encoding, int options)
15314
0
{
15315
0
    xmlParserInputBufferPtr input;
15316
0
    xmlParserInputPtr stream;
15317
15318
0
    if (fd < 0)
15319
0
        return (NULL);
15320
0
    if (ctxt == NULL)
15321
0
        return (NULL);
15322
0
    xmlInitParser();
15323
15324
0
    xmlCtxtReset(ctxt);
15325
15326
15327
0
    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15328
0
    if (input == NULL)
15329
0
        return (NULL);
15330
0
    input->closecallback = NULL;
15331
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15332
0
    if (stream == NULL) {
15333
0
        xmlFreeParserInputBuffer(input);
15334
0
        return (NULL);
15335
0
    }
15336
0
    inputPush(ctxt, stream);
15337
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15338
0
}
15339
15340
/**
15341
 * xmlCtxtReadIO:
15342
 * @ctxt:  an XML parser context
15343
 * @ioread:  an I/O read function
15344
 * @ioclose:  an I/O close function
15345
 * @ioctx:  an I/O handler
15346
 * @URL:  the base URL to use for the document
15347
 * @encoding:  the document encoding, or NULL
15348
 * @options:  a combination of xmlParserOption
15349
 *
15350
 * parse an XML document from I/O functions and source and build a tree.
15351
 * This reuses the existing @ctxt parser context
15352
 *
15353
 * Returns the resulting document tree
15354
 */
15355
xmlDocPtr
15356
xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15357
              xmlInputCloseCallback ioclose, void *ioctx,
15358
        const char *URL,
15359
              const char *encoding, int options)
15360
0
{
15361
0
    xmlParserInputBufferPtr input;
15362
0
    xmlParserInputPtr stream;
15363
15364
0
    if (ioread == NULL)
15365
0
        return (NULL);
15366
0
    if (ctxt == NULL)
15367
0
        return (NULL);
15368
0
    xmlInitParser();
15369
15370
0
    xmlCtxtReset(ctxt);
15371
15372
0
    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15373
0
                                         XML_CHAR_ENCODING_NONE);
15374
0
    if (input == NULL) {
15375
0
        if (ioclose != NULL)
15376
0
            ioclose(ioctx);
15377
0
        return (NULL);
15378
0
    }
15379
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15380
0
    if (stream == NULL) {
15381
0
        xmlFreeParserInputBuffer(input);
15382
0
        return (NULL);
15383
0
    }
15384
0
    inputPush(ctxt, stream);
15385
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
15386
0
}
15387