Coverage Report

Created: 2023-06-07 06:14

/src/libxml2/parser.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
 *            implemented on top of the SAX interfaces
4
 *
5
 * References:
6
 *   The XML specification:
7
 *     http://www.w3.org/TR/REC-xml
8
 *   Original 1.0 version:
9
 *     http://www.w3.org/TR/1998/REC-xml-19980210
10
 *   XML second edition working draft
11
 *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12
 *
13
 * Okay this is a big file, the parser core is around 7000 lines, then it
14
 * is followed by the progressive parser top routines, then the various
15
 * high level APIs to call the parser and a few miscellaneous functions.
16
 * A number of helper functions and deprecated ones have been moved to
17
 * parserInternals.c to reduce this file size.
18
 * As much as possible the functions are associated with their relative
19
 * production in the XML specification. A few productions defining the
20
 * different ranges of character are actually implanted either in
21
 * parserInternals.h or parserInternals.c
22
 * The DOM tree build is realized from the default SAX callbacks in
23
 * the module SAX.c.
24
 * The routines doing the validation checks are in valid.c and called either
25
 * from the SAX callbacks or as standalone functions using a preparsed
26
 * document.
27
 *
28
 * See Copyright for the status of this software.
29
 *
30
 * daniel@veillard.com
31
 */
32
33
/* To avoid EBCDIC trouble when parsing on zOS */
34
#if defined(__MVS__)
35
#pragma convert("ISO8859-1")
36
#endif
37
38
#define IN_LIBXML
39
#include "libxml.h"
40
41
#if defined(_WIN32)
42
#define XML_DIR_SEP '\\'
43
#else
44
#define XML_DIR_SEP '/'
45
#endif
46
47
#include <stdlib.h>
48
#include <limits.h>
49
#include <string.h>
50
#include <stdarg.h>
51
#include <stddef.h>
52
#include <ctype.h>
53
#include <stdlib.h>
54
#include <libxml/xmlmemory.h>
55
#include <libxml/threads.h>
56
#include <libxml/globals.h>
57
#include <libxml/tree.h>
58
#include <libxml/parser.h>
59
#include <libxml/parserInternals.h>
60
#include <libxml/HTMLparser.h>
61
#include <libxml/valid.h>
62
#include <libxml/entities.h>
63
#include <libxml/xmlerror.h>
64
#include <libxml/encoding.h>
65
#include <libxml/xmlIO.h>
66
#include <libxml/uri.h>
67
#ifdef LIBXML_CATALOG_ENABLED
68
#include <libxml/catalog.h>
69
#endif
70
#ifdef LIBXML_SCHEMAS_ENABLED
71
#include <libxml/xmlschemastypes.h>
72
#include <libxml/relaxng.h>
73
#endif
74
#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
75
#include <libxml/xpath.h>
76
#endif
77
78
#include "private/buf.h"
79
#include "private/dict.h"
80
#include "private/enc.h"
81
#include "private/entities.h"
82
#include "private/error.h"
83
#include "private/globals.h"
84
#include "private/html.h"
85
#include "private/io.h"
86
#include "private/memory.h"
87
#include "private/parser.h"
88
#include "private/threads.h"
89
#include "private/xpath.h"
90
91
struct _xmlStartTag {
92
    const xmlChar *prefix;
93
    const xmlChar *URI;
94
    int line;
95
    int nsNr;
96
};
97
98
static xmlParserCtxtPtr
99
xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
100
        const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
101
        xmlParserCtxtPtr pctx);
102
103
static int
104
xmlParseElementStart(xmlParserCtxtPtr ctxt);
105
106
static void
107
xmlParseElementEnd(xmlParserCtxtPtr ctxt);
108
109
/************************************************************************
110
 *                  *
111
 *  Arbitrary limits set in the parser. See XML_PARSE_HUGE    *
112
 *                  *
113
 ************************************************************************/
114
115
#define XML_PARSER_BIG_ENTITY 1000
116
#define XML_PARSER_LOT_ENTITY 5000
117
118
/*
119
 * Constants for protection against abusive entity expansion
120
 * ("billion laughs").
121
 */
122
123
/*
124
 * XML_PARSER_NON_LINEAR is roughly the maximum allowed amplification factor
125
 * of serialized output after entity expansion.
126
 */
127
4.44k
#define XML_PARSER_NON_LINEAR 5
128
129
/*
130
 * A certain amount is always allowed.
131
 */
132
100k
#define XML_PARSER_ALLOWED_EXPANSION 1000000
133
134
/*
135
 * Fixed cost for each entity reference. This crudely models processing time
136
 * as well to protect, for example, against exponential expansion of empty
137
 * or very short entities.
138
 */
139
100k
#define XML_ENT_FIXED_COST 20
140
141
/**
142
 * xmlParserMaxDepth:
143
 *
144
 * arbitrary depth limit for the XML documents that we allow to
145
 * process. This is not a limitation of the parser but a safety
146
 * boundary feature. It can be disabled with the XML_PARSE_HUGE
147
 * parser option.
148
 */
149
unsigned int xmlParserMaxDepth = 256;
150
151
152
153
#define SAX2 1
154
354M
#define XML_PARSER_BIG_BUFFER_SIZE 300
155
703M
#define XML_PARSER_BUFFER_SIZE 100
156
297k
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
157
158
/**
159
 * XML_PARSER_CHUNK_SIZE
160
 *
161
 * When calling GROW that's the minimal amount of data
162
 * the parser expected to have received. It is not a hard
163
 * limit but an optimization when reading strings like Names
164
 * It is not strictly needed as long as inputs available characters
165
 * are followed by 0, which should be provided by the I/O level
166
 */
167
#define XML_PARSER_CHUNK_SIZE 100
168
169
/*
170
 * List of XML prefixed PI allowed by W3C specs
171
 */
172
173
static const char* const xmlW3CPIs[] = {
174
    "xml-stylesheet",
175
    "xml-model",
176
    NULL
177
};
178
179
180
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
181
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
182
                                              const xmlChar **str);
183
184
static xmlParserErrors
185
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
186
                xmlSAXHandlerPtr sax,
187
          void *user_data, int depth, const xmlChar *URL,
188
          const xmlChar *ID, xmlNodePtr *list);
189
190
static int
191
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
192
                          const char *encoding);
193
#ifdef LIBXML_LEGACY_ENABLED
194
static void
195
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
196
                      xmlNodePtr lastNode);
197
#endif /* LIBXML_LEGACY_ENABLED */
198
199
static xmlParserErrors
200
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
201
          const xmlChar *string, void *user_data, xmlNodePtr *lst);
202
203
static int
204
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
205
206
/************************************************************************
207
 *                  *
208
 *    Some factorized error routines        *
209
 *                  *
210
 ************************************************************************/
211
212
/**
213
 * xmlErrAttributeDup:
214
 * @ctxt:  an XML parser context
215
 * @prefix:  the attribute prefix
216
 * @localname:  the attribute localname
217
 *
218
 * Handle a redefinition of attribute error
219
 */
220
static void
221
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
222
                   const xmlChar * localname)
223
6.73k
{
224
6.73k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
225
6.73k
        (ctxt->instate == XML_PARSER_EOF))
226
390
  return;
227
6.34k
    if (ctxt != NULL)
228
6.34k
  ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
229
230
6.34k
    if (prefix == NULL)
231
3.20k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
232
3.20k
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
233
3.20k
                        (const char *) localname, NULL, NULL, 0, 0,
234
3.20k
                        "Attribute %s redefined\n", localname);
235
3.14k
    else
236
3.14k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
237
3.14k
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
238
3.14k
                        (const char *) prefix, (const char *) localname,
239
3.14k
                        NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
240
3.14k
                        localname);
241
6.34k
    if (ctxt != NULL) {
242
6.34k
  ctxt->wellFormed = 0;
243
6.34k
  if (ctxt->recovery == 0)
244
6.34k
      ctxt->disableSAX = 1;
245
6.34k
    }
246
6.34k
}
247
248
/**
249
 * xmlFatalErrMsg:
250
 * @ctxt:  an XML parser context
251
 * @error:  the error number
252
 * @msg:  the error message
253
 *
254
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
255
 */
256
static void LIBXML_ATTR_FORMAT(3,0)
257
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
258
               const char *msg)
259
355k
{
260
355k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
261
355k
        (ctxt->instate == XML_PARSER_EOF))
262
4.52k
  return;
263
351k
    if (ctxt != NULL)
264
351k
  ctxt->errNo = error;
265
351k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
266
351k
                    XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
267
351k
    if (ctxt != NULL) {
268
351k
  ctxt->wellFormed = 0;
269
351k
  if (ctxt->recovery == 0)
270
351k
      ctxt->disableSAX = 1;
271
351k
    }
272
351k
}
273
274
/**
275
 * xmlWarningMsg:
276
 * @ctxt:  an XML parser context
277
 * @error:  the error number
278
 * @msg:  the error message
279
 * @str1:  extra data
280
 * @str2:  extra data
281
 *
282
 * Handle a warning.
283
 */
284
static void LIBXML_ATTR_FORMAT(3,0)
285
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
286
              const char *msg, const xmlChar *str1, const xmlChar *str2)
287
163k
{
288
163k
    xmlStructuredErrorFunc schannel = NULL;
289
290
163k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
291
163k
        (ctxt->instate == XML_PARSER_EOF))
292
0
  return;
293
163k
    if ((ctxt != NULL) && (ctxt->sax != NULL) &&
294
163k
        (ctxt->sax->initialized == XML_SAX2_MAGIC))
295
163k
        schannel = ctxt->sax->serror;
296
163k
    if (ctxt != NULL) {
297
163k
        __xmlRaiseError(schannel,
298
163k
                    (ctxt->sax) ? ctxt->sax->warning : NULL,
299
163k
                    ctxt->userData,
300
163k
                    ctxt, NULL, XML_FROM_PARSER, error,
301
163k
                    XML_ERR_WARNING, NULL, 0,
302
163k
        (const char *) str1, (const char *) str2, NULL, 0, 0,
303
163k
        msg, (const char *) str1, (const char *) str2);
304
163k
    } else {
305
0
        __xmlRaiseError(schannel, NULL, NULL,
306
0
                    ctxt, NULL, XML_FROM_PARSER, error,
307
0
                    XML_ERR_WARNING, NULL, 0,
308
0
        (const char *) str1, (const char *) str2, NULL, 0, 0,
309
0
        msg, (const char *) str1, (const char *) str2);
310
0
    }
311
163k
}
312
313
/**
314
 * xmlValidityError:
315
 * @ctxt:  an XML parser context
316
 * @error:  the error number
317
 * @msg:  the error message
318
 * @str1:  extra data
319
 *
320
 * Handle a validity error.
321
 */
322
static void LIBXML_ATTR_FORMAT(3,0)
323
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
324
              const char *msg, const xmlChar *str1, const xmlChar *str2)
325
2.07k
{
326
2.07k
    xmlStructuredErrorFunc schannel = NULL;
327
328
2.07k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
329
2.07k
        (ctxt->instate == XML_PARSER_EOF))
330
0
  return;
331
2.07k
    if (ctxt != NULL) {
332
2.07k
  ctxt->errNo = error;
333
2.07k
  if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
334
2.07k
      schannel = ctxt->sax->serror;
335
2.07k
    }
336
2.07k
    if (ctxt != NULL) {
337
2.07k
        __xmlRaiseError(schannel,
338
2.07k
                    ctxt->vctxt.error, ctxt->vctxt.userData,
339
2.07k
                    ctxt, NULL, XML_FROM_DTD, error,
340
2.07k
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
341
2.07k
        (const char *) str2, NULL, 0, 0,
342
2.07k
        msg, (const char *) str1, (const char *) str2);
343
2.07k
  ctxt->valid = 0;
344
2.07k
    } else {
345
0
        __xmlRaiseError(schannel, NULL, NULL,
346
0
                    ctxt, NULL, XML_FROM_DTD, error,
347
0
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
348
0
        (const char *) str2, NULL, 0, 0,
349
0
        msg, (const char *) str1, (const char *) str2);
350
0
    }
351
2.07k
}
352
353
/**
354
 * xmlFatalErrMsgInt:
355
 * @ctxt:  an XML parser context
356
 * @error:  the error number
357
 * @msg:  the error message
358
 * @val:  an integer value
359
 *
360
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
361
 */
362
static void LIBXML_ATTR_FORMAT(3,0)
363
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
364
                  const char *msg, int val)
365
153k
{
366
153k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
367
153k
        (ctxt->instate == XML_PARSER_EOF))
368
256
  return;
369
153k
    if (ctxt != NULL)
370
153k
  ctxt->errNo = error;
371
153k
    __xmlRaiseError(NULL, NULL, NULL,
372
153k
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
373
153k
                    NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
374
153k
    if (ctxt != NULL) {
375
153k
  ctxt->wellFormed = 0;
376
153k
  if (ctxt->recovery == 0)
377
153k
      ctxt->disableSAX = 1;
378
153k
    }
379
153k
}
380
381
/**
382
 * xmlFatalErrMsgStrIntStr:
383
 * @ctxt:  an XML parser context
384
 * @error:  the error number
385
 * @msg:  the error message
386
 * @str1:  an string info
387
 * @val:  an integer value
388
 * @str2:  an string info
389
 *
390
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
391
 */
392
static void LIBXML_ATTR_FORMAT(3,0)
393
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
394
                  const char *msg, const xmlChar *str1, int val,
395
      const xmlChar *str2)
396
143k
{
397
143k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
398
143k
        (ctxt->instate == XML_PARSER_EOF))
399
249
  return;
400
143k
    if (ctxt != NULL)
401
143k
  ctxt->errNo = error;
402
143k
    __xmlRaiseError(NULL, NULL, NULL,
403
143k
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
404
143k
                    NULL, 0, (const char *) str1, (const char *) str2,
405
143k
        NULL, val, 0, msg, str1, val, str2);
406
143k
    if (ctxt != NULL) {
407
143k
  ctxt->wellFormed = 0;
408
143k
  if (ctxt->recovery == 0)
409
143k
      ctxt->disableSAX = 1;
410
143k
    }
411
143k
}
412
413
/**
414
 * xmlFatalErrMsgStr:
415
 * @ctxt:  an XML parser context
416
 * @error:  the error number
417
 * @msg:  the error message
418
 * @val:  a string value
419
 *
420
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
421
 */
422
static void LIBXML_ATTR_FORMAT(3,0)
423
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
424
                  const char *msg, const xmlChar * val)
425
163k
{
426
163k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
427
163k
        (ctxt->instate == XML_PARSER_EOF))
428
651
  return;
429
163k
    if (ctxt != NULL)
430
163k
  ctxt->errNo = error;
431
163k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
432
163k
                    XML_FROM_PARSER, error, XML_ERR_FATAL,
433
163k
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
434
163k
                    val);
435
163k
    if (ctxt != NULL) {
436
163k
  ctxt->wellFormed = 0;
437
163k
  if (ctxt->recovery == 0)
438
163k
      ctxt->disableSAX = 1;
439
163k
    }
440
163k
}
441
442
/**
443
 * xmlErrMsgStr:
444
 * @ctxt:  an XML parser context
445
 * @error:  the error number
446
 * @msg:  the error message
447
 * @val:  a string value
448
 *
449
 * Handle a non fatal parser error
450
 */
451
static void LIBXML_ATTR_FORMAT(3,0)
452
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
453
                  const char *msg, const xmlChar * val)
454
28.4k
{
455
28.4k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
456
28.4k
        (ctxt->instate == XML_PARSER_EOF))
457
0
  return;
458
28.4k
    if (ctxt != NULL)
459
28.4k
  ctxt->errNo = error;
460
28.4k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
461
28.4k
                    XML_FROM_PARSER, error, XML_ERR_ERROR,
462
28.4k
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
463
28.4k
                    val);
464
28.4k
}
465
466
/**
467
 * xmlNsErr:
468
 * @ctxt:  an XML parser context
469
 * @error:  the error number
470
 * @msg:  the message
471
 * @info1:  extra information string
472
 * @info2:  extra information string
473
 *
474
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
475
 */
476
static void LIBXML_ATTR_FORMAT(3,0)
477
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
478
         const char *msg,
479
         const xmlChar * info1, const xmlChar * info2,
480
         const xmlChar * info3)
481
377k
{
482
377k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
483
377k
        (ctxt->instate == XML_PARSER_EOF))
484
1.71k
  return;
485
376k
    if (ctxt != NULL)
486
376k
  ctxt->errNo = error;
487
376k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
488
376k
                    XML_ERR_ERROR, NULL, 0, (const char *) info1,
489
376k
                    (const char *) info2, (const char *) info3, 0, 0, msg,
490
376k
                    info1, info2, info3);
491
376k
    if (ctxt != NULL)
492
376k
  ctxt->nsWellFormed = 0;
493
376k
}
494
495
/**
496
 * xmlNsWarn
497
 * @ctxt:  an XML parser context
498
 * @error:  the error number
499
 * @msg:  the message
500
 * @info1:  extra information string
501
 * @info2:  extra information string
502
 *
503
 * Handle a namespace warning error
504
 */
505
static void LIBXML_ATTR_FORMAT(3,0)
506
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
507
         const char *msg,
508
         const xmlChar * info1, const xmlChar * info2,
509
         const xmlChar * info3)
510
6.52k
{
511
6.52k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
512
6.52k
        (ctxt->instate == XML_PARSER_EOF))
513
0
  return;
514
6.52k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
515
6.52k
                    XML_ERR_WARNING, NULL, 0, (const char *) info1,
516
6.52k
                    (const char *) info2, (const char *) info3, 0, 0, msg,
517
6.52k
                    info1, info2, info3);
518
6.52k
}
519
520
static void
521
442k
xmlSaturatedAdd(unsigned long *dst, unsigned long val) {
522
442k
    if (val > ULONG_MAX - *dst)
523
0
        *dst = ULONG_MAX;
524
442k
    else
525
442k
        *dst += val;
526
442k
}
527
528
static void
529
151k
xmlSaturatedAddSizeT(unsigned long *dst, unsigned long val) {
530
151k
    if (val > ULONG_MAX - *dst)
531
0
        *dst = ULONG_MAX;
532
151k
    else
533
151k
        *dst += val;
534
151k
}
535
536
/**
537
 * xmlParserEntityCheck:
538
 * @ctxt:  parser context
539
 * @extra:  sum of unexpanded entity sizes
540
 *
541
 * Check for non-linear entity expansion behaviour.
542
 *
543
 * In some cases like xmlStringDecodeEntities, this function is called
544
 * for each, possibly nested entity and its unexpanded content length.
545
 *
546
 * In other cases like xmlParseReference, it's only called for each
547
 * top-level entity with its unexpanded content length plus the sum of
548
 * the unexpanded content lengths (plus fixed cost) of all nested
549
 * entities.
550
 *
551
 * Summing the unexpanded lengths also adds the length of the reference.
552
 * This is by design. Taking the length of the entity name into account
553
 * discourages attacks that try to waste CPU time with abusively long
554
 * entity names. See test/recurse/lol6.xml for example. Each call also
555
 * adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with
556
 * short entities.
557
 *
558
 * Returns 1 on error, 0 on success.
559
 */
560
static int
561
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long extra)
562
100k
{
563
100k
    unsigned long consumed;
564
100k
    xmlParserInputPtr input = ctxt->input;
565
100k
    xmlEntityPtr entity = input->entity;
566
567
    /*
568
     * Compute total consumed bytes so far, including input streams of
569
     * external entities.
570
     */
571
100k
    consumed = input->parentConsumed;
572
100k
    if ((entity == NULL) ||
573
100k
        ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
574
69.1k
         ((entity->flags & XML_ENT_PARSED) == 0))) {
575
69.1k
        xmlSaturatedAdd(&consumed, input->consumed);
576
69.1k
        xmlSaturatedAddSizeT(&consumed, input->cur - input->base);
577
69.1k
    }
578
100k
    xmlSaturatedAdd(&consumed, ctxt->sizeentities);
579
580
    /*
581
     * Add extra cost and some fixed cost.
582
     */
583
100k
    xmlSaturatedAdd(&ctxt->sizeentcopy, extra);
584
100k
    xmlSaturatedAdd(&ctxt->sizeentcopy, XML_ENT_FIXED_COST);
585
586
    /*
587
     * It's important to always use saturation arithmetic when tracking
588
     * entity sizes to make the size checks reliable. If "sizeentcopy"
589
     * overflows, we have to abort.
590
     */
591
100k
    if ((ctxt->sizeentcopy > XML_PARSER_ALLOWED_EXPANSION) &&
592
100k
        ((ctxt->sizeentcopy >= ULONG_MAX) ||
593
4.44k
         (ctxt->sizeentcopy / XML_PARSER_NON_LINEAR > consumed))) {
594
249
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP,
595
249
                       "Maximum entity amplification factor exceeded");
596
249
        xmlHaltParser(ctxt);
597
249
        return(1);
598
249
    }
599
600
100k
    return(0);
601
100k
}
602
603
/************************************************************************
604
 *                  *
605
 *    Library wide options          *
606
 *                  *
607
 ************************************************************************/
608
609
/**
610
  * xmlHasFeature:
611
  * @feature: the feature to be examined
612
  *
613
  * Examines if the library has been compiled with a given feature.
614
  *
615
  * Returns a non-zero value if the feature exist, otherwise zero.
616
  * Returns zero (0) if the feature does not exist or an unknown
617
  * unknown feature is requested, non-zero otherwise.
618
  */
619
int
620
xmlHasFeature(xmlFeature feature)
621
0
{
622
0
    switch (feature) {
623
0
  case XML_WITH_THREAD:
624
0
#ifdef LIBXML_THREAD_ENABLED
625
0
      return(1);
626
#else
627
      return(0);
628
#endif
629
0
        case XML_WITH_TREE:
630
0
#ifdef LIBXML_TREE_ENABLED
631
0
            return(1);
632
#else
633
            return(0);
634
#endif
635
0
        case XML_WITH_OUTPUT:
636
0
#ifdef LIBXML_OUTPUT_ENABLED
637
0
            return(1);
638
#else
639
            return(0);
640
#endif
641
0
        case XML_WITH_PUSH:
642
#ifdef LIBXML_PUSH_ENABLED
643
            return(1);
644
#else
645
0
            return(0);
646
0
#endif
647
0
        case XML_WITH_READER:
648
#ifdef LIBXML_READER_ENABLED
649
            return(1);
650
#else
651
0
            return(0);
652
0
#endif
653
0
        case XML_WITH_PATTERN:
654
0
#ifdef LIBXML_PATTERN_ENABLED
655
0
            return(1);
656
#else
657
            return(0);
658
#endif
659
0
        case XML_WITH_WRITER:
660
#ifdef LIBXML_WRITER_ENABLED
661
            return(1);
662
#else
663
0
            return(0);
664
0
#endif
665
0
        case XML_WITH_SAX1:
666
#ifdef LIBXML_SAX1_ENABLED
667
            return(1);
668
#else
669
0
            return(0);
670
0
#endif
671
0
        case XML_WITH_FTP:
672
#ifdef LIBXML_FTP_ENABLED
673
            return(1);
674
#else
675
0
            return(0);
676
0
#endif
677
0
        case XML_WITH_HTTP:
678
0
#ifdef LIBXML_HTTP_ENABLED
679
0
            return(1);
680
#else
681
            return(0);
682
#endif
683
0
        case XML_WITH_VALID:
684
#ifdef LIBXML_VALID_ENABLED
685
            return(1);
686
#else
687
0
            return(0);
688
0
#endif
689
0
        case XML_WITH_HTML:
690
0
#ifdef LIBXML_HTML_ENABLED
691
0
            return(1);
692
#else
693
            return(0);
694
#endif
695
0
        case XML_WITH_LEGACY:
696
#ifdef LIBXML_LEGACY_ENABLED
697
            return(1);
698
#else
699
0
            return(0);
700
0
#endif
701
0
        case XML_WITH_C14N:
702
#ifdef LIBXML_C14N_ENABLED
703
            return(1);
704
#else
705
0
            return(0);
706
0
#endif
707
0
        case XML_WITH_CATALOG:
708
0
#ifdef LIBXML_CATALOG_ENABLED
709
0
            return(1);
710
#else
711
            return(0);
712
#endif
713
0
        case XML_WITH_XPATH:
714
0
#ifdef LIBXML_XPATH_ENABLED
715
0
            return(1);
716
#else
717
            return(0);
718
#endif
719
0
        case XML_WITH_XPTR:
720
0
#ifdef LIBXML_XPTR_ENABLED
721
0
            return(1);
722
#else
723
            return(0);
724
#endif
725
0
        case XML_WITH_XINCLUDE:
726
0
#ifdef LIBXML_XINCLUDE_ENABLED
727
0
            return(1);
728
#else
729
            return(0);
730
#endif
731
0
        case XML_WITH_ICONV:
732
0
#ifdef LIBXML_ICONV_ENABLED
733
0
            return(1);
734
#else
735
            return(0);
736
#endif
737
0
        case XML_WITH_ISO8859X:
738
0
#ifdef LIBXML_ISO8859X_ENABLED
739
0
            return(1);
740
#else
741
            return(0);
742
#endif
743
0
        case XML_WITH_UNICODE:
744
#ifdef LIBXML_UNICODE_ENABLED
745
            return(1);
746
#else
747
0
            return(0);
748
0
#endif
749
0
        case XML_WITH_REGEXP:
750
#ifdef LIBXML_REGEXP_ENABLED
751
            return(1);
752
#else
753
0
            return(0);
754
0
#endif
755
0
        case XML_WITH_AUTOMATA:
756
#ifdef LIBXML_AUTOMATA_ENABLED
757
            return(1);
758
#else
759
0
            return(0);
760
0
#endif
761
0
        case XML_WITH_EXPR:
762
#ifdef LIBXML_EXPR_ENABLED
763
            return(1);
764
#else
765
0
            return(0);
766
0
#endif
767
0
        case XML_WITH_SCHEMAS:
768
#ifdef LIBXML_SCHEMAS_ENABLED
769
            return(1);
770
#else
771
0
            return(0);
772
0
#endif
773
0
        case XML_WITH_SCHEMATRON:
774
#ifdef LIBXML_SCHEMATRON_ENABLED
775
            return(1);
776
#else
777
0
            return(0);
778
0
#endif
779
0
        case XML_WITH_MODULES:
780
0
#ifdef LIBXML_MODULES_ENABLED
781
0
            return(1);
782
#else
783
            return(0);
784
#endif
785
0
        case XML_WITH_DEBUG:
786
0
#ifdef LIBXML_DEBUG_ENABLED
787
0
            return(1);
788
#else
789
            return(0);
790
#endif
791
0
        case XML_WITH_DEBUG_MEM:
792
#ifdef DEBUG_MEMORY_LOCATION
793
            return(1);
794
#else
795
0
            return(0);
796
0
#endif
797
0
        case XML_WITH_DEBUG_RUN:
798
0
            return(0);
799
0
        case XML_WITH_ZLIB:
800
#ifdef LIBXML_ZLIB_ENABLED
801
            return(1);
802
#else
803
0
            return(0);
804
0
#endif
805
0
        case XML_WITH_LZMA:
806
#ifdef LIBXML_LZMA_ENABLED
807
            return(1);
808
#else
809
0
            return(0);
810
0
#endif
811
0
        case XML_WITH_ICU:
812
#ifdef LIBXML_ICU_ENABLED
813
            return(1);
814
#else
815
0
            return(0);
816
0
#endif
817
0
        default:
818
0
      break;
819
0
     }
820
0
     return(0);
821
0
}
822
823
/************************************************************************
824
 *                  *
825
 *    SAX2 defaulted attributes handling      *
826
 *                  *
827
 ************************************************************************/
828
829
/**
830
 * xmlDetectSAX2:
831
 * @ctxt:  an XML parser context
832
 *
833
 * Do the SAX2 detection and specific initialization
834
 */
835
static void
836
348k
xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
837
348k
    xmlSAXHandlerPtr sax;
838
839
    /* Avoid unused variable warning if features are disabled. */
840
348k
    (void) sax;
841
842
348k
    if (ctxt == NULL) return;
843
348k
    sax = ctxt->sax;
844
#ifdef LIBXML_SAX1_ENABLED
845
    if ((sax) && (sax->initialized == XML_SAX2_MAGIC))
846
        ctxt->sax2 = 1;
847
#else
848
348k
    ctxt->sax2 = 1;
849
348k
#endif /* LIBXML_SAX1_ENABLED */
850
851
348k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
852
348k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
853
348k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
854
348k
    if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
855
348k
    (ctxt->str_xml_ns == NULL)) {
856
4
        xmlErrMemory(ctxt, NULL);
857
4
    }
858
348k
}
859
860
typedef struct _xmlDefAttrs xmlDefAttrs;
861
typedef xmlDefAttrs *xmlDefAttrsPtr;
862
struct _xmlDefAttrs {
863
    int nbAttrs;  /* number of defaulted attributes on that element */
864
    int maxAttrs;       /* the size of the array */
865
#if __STDC_VERSION__ >= 199901L
866
    /* Using a C99 flexible array member avoids UBSan errors. */
867
    const xmlChar *values[]; /* array of localname/prefix/values/external */
868
#else
869
    const xmlChar *values[5];
870
#endif
871
};
872
873
/**
874
 * xmlAttrNormalizeSpace:
875
 * @src: the source string
876
 * @dst: the target string
877
 *
878
 * Normalize the space in non CDATA attribute values:
879
 * If the attribute type is not CDATA, then the XML processor MUST further
880
 * process the normalized attribute value by discarding any leading and
881
 * trailing space (#x20) characters, and by replacing sequences of space
882
 * (#x20) characters by a single space (#x20) character.
883
 * Note that the size of dst need to be at least src, and if one doesn't need
884
 * to preserve dst (and it doesn't come from a dictionary or read-only) then
885
 * passing src as dst is just fine.
886
 *
887
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
888
 *         is needed.
889
 */
890
static xmlChar *
891
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
892
56.4k
{
893
56.4k
    if ((src == NULL) || (dst == NULL))
894
0
        return(NULL);
895
896
72.4k
    while (*src == 0x20) src++;
897
318k
    while (*src != 0) {
898
261k
  if (*src == 0x20) {
899
37.1k
      while (*src == 0x20) src++;
900
15.8k
      if (*src != 0)
901
14.8k
    *dst++ = 0x20;
902
245k
  } else {
903
245k
      *dst++ = *src++;
904
245k
  }
905
261k
    }
906
56.4k
    *dst = 0;
907
56.4k
    if (dst == src)
908
46.7k
       return(NULL);
909
9.74k
    return(dst);
910
56.4k
}
911
912
/**
913
 * xmlAttrNormalizeSpace2:
914
 * @src: the source string
915
 *
916
 * Normalize the space in non CDATA attribute values, a slightly more complex
917
 * front end to avoid allocation problems when running on attribute values
918
 * coming from the input.
919
 *
920
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
921
 *         is needed.
922
 */
923
static const xmlChar *
924
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
925
5.73k
{
926
5.73k
    int i;
927
5.73k
    int remove_head = 0;
928
5.73k
    int need_realloc = 0;
929
5.73k
    const xmlChar *cur;
930
931
5.73k
    if ((ctxt == NULL) || (src == NULL) || (len == NULL))
932
0
        return(NULL);
933
5.73k
    i = *len;
934
5.73k
    if (i <= 0)
935
2.25k
        return(NULL);
936
937
3.48k
    cur = src;
938
4.61k
    while (*cur == 0x20) {
939
1.13k
        cur++;
940
1.13k
  remove_head++;
941
1.13k
    }
942
110k
    while (*cur != 0) {
943
107k
  if (*cur == 0x20) {
944
2.74k
      cur++;
945
2.74k
      if ((*cur == 0x20) || (*cur == 0)) {
946
319
          need_realloc = 1;
947
319
    break;
948
319
      }
949
2.74k
  } else
950
104k
      cur++;
951
107k
    }
952
3.48k
    if (need_realloc) {
953
319
        xmlChar *ret;
954
955
319
  ret = xmlStrndup(src + remove_head, i - remove_head + 1);
956
319
  if (ret == NULL) {
957
2
      xmlErrMemory(ctxt, NULL);
958
2
      return(NULL);
959
2
  }
960
317
  xmlAttrNormalizeSpace(ret, ret);
961
317
  *len = strlen((const char *)ret);
962
317
        return(ret);
963
3.16k
    } else if (remove_head) {
964
814
        *len -= remove_head;
965
814
        memmove(src, src + remove_head, 1 + *len);
966
814
  return(src);
967
814
    }
968
2.34k
    return(NULL);
969
3.48k
}
970
971
/**
972
 * xmlAddDefAttrs:
973
 * @ctxt:  an XML parser context
974
 * @fullname:  the element fullname
975
 * @fullattr:  the attribute fullname
976
 * @value:  the attribute value
977
 *
978
 * Add a defaulted attribute for an element
979
 */
980
static void
981
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
982
               const xmlChar *fullname,
983
               const xmlChar *fullattr,
984
76.4k
               const xmlChar *value) {
985
76.4k
    xmlDefAttrsPtr defaults;
986
76.4k
    int len;
987
76.4k
    const xmlChar *name;
988
76.4k
    const xmlChar *prefix;
989
990
    /*
991
     * Allows to detect attribute redefinitions
992
     */
993
76.4k
    if (ctxt->attsSpecial != NULL) {
994
64.2k
        if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
995
10.0k
      return;
996
64.2k
    }
997
998
66.4k
    if (ctxt->attsDefault == NULL) {
999
12.2k
        ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1000
12.2k
  if (ctxt->attsDefault == NULL)
1001
17
      goto mem_error;
1002
12.2k
    }
1003
1004
    /*
1005
     * split the element name into prefix:localname , the string found
1006
     * are within the DTD and then not associated to namespace names.
1007
     */
1008
66.4k
    name = xmlSplitQName3(fullname, &len);
1009
66.4k
    if (name == NULL) {
1010
46.4k
        name = xmlDictLookup(ctxt->dict, fullname, -1);
1011
46.4k
  prefix = NULL;
1012
46.4k
    } else {
1013
19.9k
        name = xmlDictLookup(ctxt->dict, name, -1);
1014
19.9k
  prefix = xmlDictLookup(ctxt->dict, fullname, len);
1015
19.9k
    }
1016
1017
    /*
1018
     * make sure there is some storage
1019
     */
1020
66.4k
    defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1021
66.4k
    if (defaults == NULL) {
1022
25.2k
        defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1023
25.2k
                     (4 * 5) * sizeof(const xmlChar *));
1024
25.2k
  if (defaults == NULL)
1025
9
      goto mem_error;
1026
25.2k
  defaults->nbAttrs = 0;
1027
25.2k
  defaults->maxAttrs = 4;
1028
25.2k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1029
25.2k
                          defaults, NULL) < 0) {
1030
1
      xmlFree(defaults);
1031
1
      goto mem_error;
1032
1
  }
1033
41.2k
    } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1034
6.70k
        xmlDefAttrsPtr temp;
1035
1036
6.70k
        temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1037
6.70k
           (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1038
6.70k
  if (temp == NULL)
1039
3
      goto mem_error;
1040
6.70k
  defaults = temp;
1041
6.70k
  defaults->maxAttrs *= 2;
1042
6.70k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1043
6.70k
                          defaults, NULL) < 0) {
1044
0
      xmlFree(defaults);
1045
0
      goto mem_error;
1046
0
  }
1047
6.70k
    }
1048
1049
    /*
1050
     * Split the element name into prefix:localname , the string found
1051
     * are within the DTD and hen not associated to namespace names.
1052
     */
1053
66.4k
    name = xmlSplitQName3(fullattr, &len);
1054
66.4k
    if (name == NULL) {
1055
44.5k
        name = xmlDictLookup(ctxt->dict, fullattr, -1);
1056
44.5k
  prefix = NULL;
1057
44.5k
    } else {
1058
21.8k
        name = xmlDictLookup(ctxt->dict, name, -1);
1059
21.8k
  prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1060
21.8k
    }
1061
1062
66.4k
    defaults->values[5 * defaults->nbAttrs] = name;
1063
66.4k
    defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1064
    /* intern the string and precompute the end */
1065
66.4k
    len = xmlStrlen(value);
1066
66.4k
    value = xmlDictLookup(ctxt->dict, value, len);
1067
66.4k
    if (value == NULL)
1068
1
        goto mem_error;
1069
66.4k
    defaults->values[5 * defaults->nbAttrs + 2] = value;
1070
66.4k
    defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1071
66.4k
    if (ctxt->external)
1072
267
        defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1073
66.1k
    else
1074
66.1k
        defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1075
66.4k
    defaults->nbAttrs++;
1076
1077
66.4k
    return;
1078
1079
31
mem_error:
1080
31
    xmlErrMemory(ctxt, NULL);
1081
31
    return;
1082
66.4k
}
1083
1084
/**
1085
 * xmlAddSpecialAttr:
1086
 * @ctxt:  an XML parser context
1087
 * @fullname:  the element fullname
1088
 * @fullattr:  the attribute fullname
1089
 * @type:  the attribute type
1090
 *
1091
 * Register this attribute type
1092
 */
1093
static void
1094
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1095
      const xmlChar *fullname,
1096
      const xmlChar *fullattr,
1097
      int type)
1098
77.4k
{
1099
77.4k
    if (ctxt->attsSpecial == NULL) {
1100
12.9k
        ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1101
12.9k
  if (ctxt->attsSpecial == NULL)
1102
36
      goto mem_error;
1103
12.9k
    }
1104
1105
77.4k
    if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1106
10.1k
        return;
1107
1108
67.3k
    xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1109
67.3k
                     (void *) (ptrdiff_t) type);
1110
67.3k
    return;
1111
1112
36
mem_error:
1113
36
    xmlErrMemory(ctxt, NULL);
1114
36
    return;
1115
77.4k
}
1116
1117
/**
1118
 * xmlCleanSpecialAttrCallback:
1119
 *
1120
 * Removes CDATA attributes from the special attribute table
1121
 */
1122
static void
1123
xmlCleanSpecialAttrCallback(void *payload, void *data,
1124
                            const xmlChar *fullname, const xmlChar *fullattr,
1125
20.0k
                            const xmlChar *unused ATTRIBUTE_UNUSED) {
1126
20.0k
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1127
1128
20.0k
    if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1129
6.73k
        xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1130
6.73k
    }
1131
20.0k
}
1132
1133
/**
1134
 * xmlCleanSpecialAttr:
1135
 * @ctxt:  an XML parser context
1136
 *
1137
 * Trim the list of attributes defined to remove all those of type
1138
 * CDATA as they are not special. This call should be done when finishing
1139
 * to parse the DTD and before starting to parse the document root.
1140
 */
1141
static void
1142
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1143
23.1k
{
1144
23.1k
    if (ctxt->attsSpecial == NULL)
1145
18.5k
        return;
1146
1147
4.54k
    xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1148
1149
4.54k
    if (xmlHashSize(ctxt->attsSpecial) == 0) {
1150
279
        xmlHashFree(ctxt->attsSpecial, NULL);
1151
279
        ctxt->attsSpecial = NULL;
1152
279
    }
1153
4.54k
    return;
1154
23.1k
}
1155
1156
/**
1157
 * xmlCheckLanguageID:
1158
 * @lang:  pointer to the string value
1159
 *
1160
 * DEPRECATED: Internal function, do not use.
1161
 *
1162
 * Checks that the value conforms to the LanguageID production:
1163
 *
1164
 * NOTE: this is somewhat deprecated, those productions were removed from
1165
 *       the XML Second edition.
1166
 *
1167
 * [33] LanguageID ::= Langcode ('-' Subcode)*
1168
 * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1169
 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1170
 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1171
 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1172
 * [38] Subcode ::= ([a-z] | [A-Z])+
1173
 *
1174
 * The current REC reference the successors of RFC 1766, currently 5646
1175
 *
1176
 * http://www.rfc-editor.org/rfc/rfc5646.txt
1177
 * langtag       = language
1178
 *                 ["-" script]
1179
 *                 ["-" region]
1180
 *                 *("-" variant)
1181
 *                 *("-" extension)
1182
 *                 ["-" privateuse]
1183
 * language      = 2*3ALPHA            ; shortest ISO 639 code
1184
 *                 ["-" extlang]       ; sometimes followed by
1185
 *                                     ; extended language subtags
1186
 *               / 4ALPHA              ; or reserved for future use
1187
 *               / 5*8ALPHA            ; or registered language subtag
1188
 *
1189
 * extlang       = 3ALPHA              ; selected ISO 639 codes
1190
 *                 *2("-" 3ALPHA)      ; permanently reserved
1191
 *
1192
 * script        = 4ALPHA              ; ISO 15924 code
1193
 *
1194
 * region        = 2ALPHA              ; ISO 3166-1 code
1195
 *               / 3DIGIT              ; UN M.49 code
1196
 *
1197
 * variant       = 5*8alphanum         ; registered variants
1198
 *               / (DIGIT 3alphanum)
1199
 *
1200
 * extension     = singleton 1*("-" (2*8alphanum))
1201
 *
1202
 *                                     ; Single alphanumerics
1203
 *                                     ; "x" reserved for private use
1204
 * singleton     = DIGIT               ; 0 - 9
1205
 *               / %x41-57             ; A - W
1206
 *               / %x59-5A             ; Y - Z
1207
 *               / %x61-77             ; a - w
1208
 *               / %x79-7A             ; y - z
1209
 *
1210
 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1211
 * The parser below doesn't try to cope with extension or privateuse
1212
 * that could be added but that's not interoperable anyway
1213
 *
1214
 * Returns 1 if correct 0 otherwise
1215
 **/
1216
int
1217
xmlCheckLanguageID(const xmlChar * lang)
1218
0
{
1219
0
    const xmlChar *cur = lang, *nxt;
1220
1221
0
    if (cur == NULL)
1222
0
        return (0);
1223
0
    if (((cur[0] == 'i') && (cur[1] == '-')) ||
1224
0
        ((cur[0] == 'I') && (cur[1] == '-')) ||
1225
0
        ((cur[0] == 'x') && (cur[1] == '-')) ||
1226
0
        ((cur[0] == 'X') && (cur[1] == '-'))) {
1227
        /*
1228
         * Still allow IANA code and user code which were coming
1229
         * from the previous version of the XML-1.0 specification
1230
         * it's deprecated but we should not fail
1231
         */
1232
0
        cur += 2;
1233
0
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1234
0
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
1235
0
            cur++;
1236
0
        return(cur[0] == 0);
1237
0
    }
1238
0
    nxt = cur;
1239
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1240
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1241
0
           nxt++;
1242
0
    if (nxt - cur >= 4) {
1243
        /*
1244
         * Reserved
1245
         */
1246
0
        if ((nxt - cur > 8) || (nxt[0] != 0))
1247
0
            return(0);
1248
0
        return(1);
1249
0
    }
1250
0
    if (nxt - cur < 2)
1251
0
        return(0);
1252
    /* we got an ISO 639 code */
1253
0
    if (nxt[0] == 0)
1254
0
        return(1);
1255
0
    if (nxt[0] != '-')
1256
0
        return(0);
1257
1258
0
    nxt++;
1259
0
    cur = nxt;
1260
    /* now we can have extlang or script or region or variant */
1261
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1262
0
        goto region_m49;
1263
1264
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1265
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1266
0
           nxt++;
1267
0
    if (nxt - cur == 4)
1268
0
        goto script;
1269
0
    if (nxt - cur == 2)
1270
0
        goto region;
1271
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1272
0
        goto variant;
1273
0
    if (nxt - cur != 3)
1274
0
        return(0);
1275
    /* we parsed an extlang */
1276
0
    if (nxt[0] == 0)
1277
0
        return(1);
1278
0
    if (nxt[0] != '-')
1279
0
        return(0);
1280
1281
0
    nxt++;
1282
0
    cur = nxt;
1283
    /* now we can have script or region or variant */
1284
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1285
0
        goto region_m49;
1286
1287
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1288
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1289
0
           nxt++;
1290
0
    if (nxt - cur == 2)
1291
0
        goto region;
1292
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1293
0
        goto variant;
1294
0
    if (nxt - cur != 4)
1295
0
        return(0);
1296
    /* we parsed a script */
1297
0
script:
1298
0
    if (nxt[0] == 0)
1299
0
        return(1);
1300
0
    if (nxt[0] != '-')
1301
0
        return(0);
1302
1303
0
    nxt++;
1304
0
    cur = nxt;
1305
    /* now we can have region or variant */
1306
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1307
0
        goto region_m49;
1308
1309
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1310
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1311
0
           nxt++;
1312
1313
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1314
0
        goto variant;
1315
0
    if (nxt - cur != 2)
1316
0
        return(0);
1317
    /* we parsed a region */
1318
0
region:
1319
0
    if (nxt[0] == 0)
1320
0
        return(1);
1321
0
    if (nxt[0] != '-')
1322
0
        return(0);
1323
1324
0
    nxt++;
1325
0
    cur = nxt;
1326
    /* now we can just have a variant */
1327
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1328
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1329
0
           nxt++;
1330
1331
0
    if ((nxt - cur < 5) || (nxt - cur > 8))
1332
0
        return(0);
1333
1334
    /* we parsed a variant */
1335
0
variant:
1336
0
    if (nxt[0] == 0)
1337
0
        return(1);
1338
0
    if (nxt[0] != '-')
1339
0
        return(0);
1340
    /* extensions and private use subtags not checked */
1341
0
    return (1);
1342
1343
0
region_m49:
1344
0
    if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1345
0
        ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1346
0
        nxt += 3;
1347
0
        goto region;
1348
0
    }
1349
0
    return(0);
1350
0
}
1351
1352
/************************************************************************
1353
 *                  *
1354
 *    Parser stacks related functions and macros    *
1355
 *                  *
1356
 ************************************************************************/
1357
1358
static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1359
                                            const xmlChar ** str);
1360
1361
#ifdef SAX2
1362
/**
1363
 * nsPush:
1364
 * @ctxt:  an XML parser context
1365
 * @prefix:  the namespace prefix or NULL
1366
 * @URL:  the namespace name
1367
 *
1368
 * Pushes a new parser namespace on top of the ns stack
1369
 *
1370
 * Returns -1 in case of error, -2 if the namespace should be discarded
1371
 *     and the index in the stack otherwise.
1372
 */
1373
static int
1374
nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1375
365k
{
1376
365k
    if (ctxt->options & XML_PARSE_NSCLEAN) {
1377
0
        int i;
1378
0
  for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1379
0
      if (ctxt->nsTab[i] == prefix) {
1380
    /* in scope */
1381
0
          if (ctxt->nsTab[i + 1] == URL)
1382
0
        return(-2);
1383
    /* out of scope keep it */
1384
0
    break;
1385
0
      }
1386
0
  }
1387
0
    }
1388
365k
    if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1389
193k
  ctxt->nsMax = 10;
1390
193k
  ctxt->nsNr = 0;
1391
193k
  ctxt->nsTab = (const xmlChar **)
1392
193k
                xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1393
193k
  if (ctxt->nsTab == NULL) {
1394
101
      xmlErrMemory(ctxt, NULL);
1395
101
      ctxt->nsMax = 0;
1396
101
            return (-1);
1397
101
  }
1398
193k
    } else if (ctxt->nsNr >= ctxt->nsMax) {
1399
2.06k
        const xmlChar ** tmp;
1400
2.06k
        ctxt->nsMax *= 2;
1401
2.06k
        tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1402
2.06k
            ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1403
2.06k
        if (tmp == NULL) {
1404
15
            xmlErrMemory(ctxt, NULL);
1405
15
      ctxt->nsMax /= 2;
1406
15
            return (-1);
1407
15
        }
1408
2.05k
  ctxt->nsTab = tmp;
1409
2.05k
    }
1410
365k
    ctxt->nsTab[ctxt->nsNr++] = prefix;
1411
365k
    ctxt->nsTab[ctxt->nsNr++] = URL;
1412
365k
    return (ctxt->nsNr);
1413
365k
}
1414
/**
1415
 * nsPop:
1416
 * @ctxt: an XML parser context
1417
 * @nr:  the number to pop
1418
 *
1419
 * Pops the top @nr parser prefix/namespace from the ns stack
1420
 *
1421
 * Returns the number of namespaces removed
1422
 */
1423
static int
1424
nsPop(xmlParserCtxtPtr ctxt, int nr)
1425
276k
{
1426
276k
    int i;
1427
1428
276k
    if (ctxt->nsTab == NULL) return(0);
1429
276k
    if (ctxt->nsNr < nr) {
1430
0
        xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1431
0
        nr = ctxt->nsNr;
1432
0
    }
1433
276k
    if (ctxt->nsNr <= 0)
1434
0
        return (0);
1435
1436
939k
    for (i = 0;i < nr;i++) {
1437
662k
         ctxt->nsNr--;
1438
662k
   ctxt->nsTab[ctxt->nsNr] = NULL;
1439
662k
    }
1440
276k
    return(nr);
1441
276k
}
1442
#endif
1443
1444
static int
1445
208k
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1446
208k
    const xmlChar **atts;
1447
208k
    int *attallocs;
1448
208k
    int maxatts;
1449
1450
208k
    if (nr + 5 > ctxt->maxatts) {
1451
208k
  maxatts = ctxt->maxatts == 0 ? 55 : (nr + 5) * 2;
1452
208k
  atts = (const xmlChar **) xmlMalloc(
1453
208k
             maxatts * sizeof(const xmlChar *));
1454
208k
  if (atts == NULL) goto mem_error;
1455
208k
  attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1456
208k
                               (maxatts / 5) * sizeof(int));
1457
208k
  if (attallocs == NULL) {
1458
13
            xmlFree(atts);
1459
13
            goto mem_error;
1460
13
        }
1461
208k
        if (ctxt->maxatts > 0)
1462
493
            memcpy(atts, ctxt->atts, ctxt->maxatts * sizeof(const xmlChar *));
1463
208k
        xmlFree(ctxt->atts);
1464
208k
  ctxt->atts = atts;
1465
208k
  ctxt->attallocs = attallocs;
1466
208k
  ctxt->maxatts = maxatts;
1467
208k
    }
1468
208k
    return(ctxt->maxatts);
1469
55
mem_error:
1470
55
    xmlErrMemory(ctxt, NULL);
1471
55
    return(-1);
1472
208k
}
1473
1474
/**
1475
 * inputPush:
1476
 * @ctxt:  an XML parser context
1477
 * @value:  the parser input
1478
 *
1479
 * Pushes a new parser input on top of the input stack
1480
 *
1481
 * Returns -1 in case of error, the index in the stack otherwise
1482
 */
1483
int
1484
inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1485
383k
{
1486
383k
    if ((ctxt == NULL) || (value == NULL))
1487
0
        return(-1);
1488
383k
    if (ctxt->inputNr >= ctxt->inputMax) {
1489
0
        size_t newSize = ctxt->inputMax * 2;
1490
0
        xmlParserInputPtr *tmp;
1491
1492
0
        tmp = (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1493
0
                                               newSize * sizeof(*tmp));
1494
0
        if (tmp == NULL) {
1495
0
            xmlErrMemory(ctxt, NULL);
1496
0
            return (-1);
1497
0
        }
1498
0
        ctxt->inputTab = tmp;
1499
0
        ctxt->inputMax = newSize;
1500
0
    }
1501
383k
    ctxt->inputTab[ctxt->inputNr] = value;
1502
383k
    ctxt->input = value;
1503
383k
    return (ctxt->inputNr++);
1504
383k
}
1505
/**
1506
 * inputPop:
1507
 * @ctxt: an XML parser context
1508
 *
1509
 * Pops the top parser input from the input stack
1510
 *
1511
 * Returns the input just removed
1512
 */
1513
xmlParserInputPtr
1514
inputPop(xmlParserCtxtPtr ctxt)
1515
1.21M
{
1516
1.21M
    xmlParserInputPtr ret;
1517
1518
1.21M
    if (ctxt == NULL)
1519
0
        return(NULL);
1520
1.21M
    if (ctxt->inputNr <= 0)
1521
841k
        return (NULL);
1522
376k
    ctxt->inputNr--;
1523
376k
    if (ctxt->inputNr > 0)
1524
34.9k
        ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1525
341k
    else
1526
341k
        ctxt->input = NULL;
1527
376k
    ret = ctxt->inputTab[ctxt->inputNr];
1528
376k
    ctxt->inputTab[ctxt->inputNr] = NULL;
1529
376k
    return (ret);
1530
1.21M
}
1531
/**
1532
 * nodePush:
1533
 * @ctxt:  an XML parser context
1534
 * @value:  the element node
1535
 *
1536
 * DEPRECATED: Internal function, do not use.
1537
 *
1538
 * Pushes a new element node on top of the node stack
1539
 *
1540
 * Returns -1 in case of error, the index in the stack otherwise
1541
 */
1542
int
1543
nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1544
1.69M
{
1545
1.69M
    if (ctxt == NULL) return(0);
1546
1.69M
    if (ctxt->nodeNr >= ctxt->nodeMax) {
1547
3.90k
        xmlNodePtr *tmp;
1548
1549
3.90k
  tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1550
3.90k
                                      ctxt->nodeMax * 2 *
1551
3.90k
                                      sizeof(ctxt->nodeTab[0]));
1552
3.90k
        if (tmp == NULL) {
1553
2
            xmlErrMemory(ctxt, NULL);
1554
2
            return (-1);
1555
2
        }
1556
3.90k
        ctxt->nodeTab = tmp;
1557
3.90k
  ctxt->nodeMax *= 2;
1558
3.90k
    }
1559
1.69M
    if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1560
1.69M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1561
1
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1562
1
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1563
1
        xmlParserMaxDepth);
1564
1
  xmlHaltParser(ctxt);
1565
1
  return(-1);
1566
1
    }
1567
1.69M
    ctxt->nodeTab[ctxt->nodeNr] = value;
1568
1.69M
    ctxt->node = value;
1569
1.69M
    return (ctxt->nodeNr++);
1570
1.69M
}
1571
1572
/**
1573
 * nodePop:
1574
 * @ctxt: an XML parser context
1575
 *
1576
 * DEPRECATED: Internal function, do not use.
1577
 *
1578
 * Pops the top element node from the node stack
1579
 *
1580
 * Returns the node just removed
1581
 */
1582
xmlNodePtr
1583
nodePop(xmlParserCtxtPtr ctxt)
1584
1.64M
{
1585
1.64M
    xmlNodePtr ret;
1586
1587
1.64M
    if (ctxt == NULL) return(NULL);
1588
1.64M
    if (ctxt->nodeNr <= 0)
1589
77.2k
        return (NULL);
1590
1.57M
    ctxt->nodeNr--;
1591
1.57M
    if (ctxt->nodeNr > 0)
1592
1.36M
        ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1593
206k
    else
1594
206k
        ctxt->node = NULL;
1595
1.57M
    ret = ctxt->nodeTab[ctxt->nodeNr];
1596
1.57M
    ctxt->nodeTab[ctxt->nodeNr] = NULL;
1597
1.57M
    return (ret);
1598
1.64M
}
1599
1600
/**
1601
 * nameNsPush:
1602
 * @ctxt:  an XML parser context
1603
 * @value:  the element name
1604
 * @prefix:  the element prefix
1605
 * @URI:  the element namespace name
1606
 * @line:  the current line number for error messages
1607
 * @nsNr:  the number of namespaces pushed on the namespace table
1608
 *
1609
 * Pushes a new element name/prefix/URL on top of the name stack
1610
 *
1611
 * Returns -1 in case of error, the index in the stack otherwise
1612
 */
1613
static int
1614
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1615
           const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
1616
2.07M
{
1617
2.07M
    xmlStartTag *tag;
1618
1619
2.07M
    if (ctxt->nameNr >= ctxt->nameMax) {
1620
8.02k
        const xmlChar * *tmp;
1621
8.02k
        xmlStartTag *tmp2;
1622
8.02k
        ctxt->nameMax *= 2;
1623
8.02k
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1624
8.02k
                                    ctxt->nameMax *
1625
8.02k
                                    sizeof(ctxt->nameTab[0]));
1626
8.02k
        if (tmp == NULL) {
1627
5
      ctxt->nameMax /= 2;
1628
5
      goto mem_error;
1629
5
        }
1630
8.01k
  ctxt->nameTab = tmp;
1631
8.01k
        tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
1632
8.01k
                                    ctxt->nameMax *
1633
8.01k
                                    sizeof(ctxt->pushTab[0]));
1634
8.01k
        if (tmp2 == NULL) {
1635
2
      ctxt->nameMax /= 2;
1636
2
      goto mem_error;
1637
2
        }
1638
8.01k
  ctxt->pushTab = tmp2;
1639
2.06M
    } else if (ctxt->pushTab == NULL) {
1640
248k
        ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
1641
248k
                                            sizeof(ctxt->pushTab[0]));
1642
248k
        if (ctxt->pushTab == NULL)
1643
71
            goto mem_error;
1644
248k
    }
1645
2.07M
    ctxt->nameTab[ctxt->nameNr] = value;
1646
2.07M
    ctxt->name = value;
1647
2.07M
    tag = &ctxt->pushTab[ctxt->nameNr];
1648
2.07M
    tag->prefix = prefix;
1649
2.07M
    tag->URI = URI;
1650
2.07M
    tag->line = line;
1651
2.07M
    tag->nsNr = nsNr;
1652
2.07M
    return (ctxt->nameNr++);
1653
78
mem_error:
1654
78
    xmlErrMemory(ctxt, NULL);
1655
78
    return (-1);
1656
2.07M
}
1657
#ifdef LIBXML_PUSH_ENABLED
1658
/**
1659
 * nameNsPop:
1660
 * @ctxt: an XML parser context
1661
 *
1662
 * Pops the top element/prefix/URI name from the name stack
1663
 *
1664
 * Returns the name just removed
1665
 */
1666
static const xmlChar *
1667
nameNsPop(xmlParserCtxtPtr ctxt)
1668
{
1669
    const xmlChar *ret;
1670
1671
    if (ctxt->nameNr <= 0)
1672
        return (NULL);
1673
    ctxt->nameNr--;
1674
    if (ctxt->nameNr > 0)
1675
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1676
    else
1677
        ctxt->name = NULL;
1678
    ret = ctxt->nameTab[ctxt->nameNr];
1679
    ctxt->nameTab[ctxt->nameNr] = NULL;
1680
    return (ret);
1681
}
1682
#endif /* LIBXML_PUSH_ENABLED */
1683
1684
/**
1685
 * namePush:
1686
 * @ctxt:  an XML parser context
1687
 * @value:  the element name
1688
 *
1689
 * DEPRECATED: Internal function, do not use.
1690
 *
1691
 * Pushes a new element name on top of the name stack
1692
 *
1693
 * Returns -1 in case of error, the index in the stack otherwise
1694
 */
1695
int
1696
namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1697
0
{
1698
0
    if (ctxt == NULL) return (-1);
1699
1700
0
    if (ctxt->nameNr >= ctxt->nameMax) {
1701
0
        const xmlChar * *tmp;
1702
0
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1703
0
                                    ctxt->nameMax * 2 *
1704
0
                                    sizeof(ctxt->nameTab[0]));
1705
0
        if (tmp == NULL) {
1706
0
      goto mem_error;
1707
0
        }
1708
0
  ctxt->nameTab = tmp;
1709
0
        ctxt->nameMax *= 2;
1710
0
    }
1711
0
    ctxt->nameTab[ctxt->nameNr] = value;
1712
0
    ctxt->name = value;
1713
0
    return (ctxt->nameNr++);
1714
0
mem_error:
1715
0
    xmlErrMemory(ctxt, NULL);
1716
0
    return (-1);
1717
0
}
1718
1719
/**
1720
 * namePop:
1721
 * @ctxt: an XML parser context
1722
 *
1723
 * DEPRECATED: Internal function, do not use.
1724
 *
1725
 * Pops the top element name from the name stack
1726
 *
1727
 * Returns the name just removed
1728
 */
1729
const xmlChar *
1730
namePop(xmlParserCtxtPtr ctxt)
1731
1.82M
{
1732
1.82M
    const xmlChar *ret;
1733
1734
1.82M
    if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1735
26
        return (NULL);
1736
1.82M
    ctxt->nameNr--;
1737
1.82M
    if (ctxt->nameNr > 0)
1738
1.55M
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1739
267k
    else
1740
267k
        ctxt->name = NULL;
1741
1.82M
    ret = ctxt->nameTab[ctxt->nameNr];
1742
1.82M
    ctxt->nameTab[ctxt->nameNr] = NULL;
1743
1.82M
    return (ret);
1744
1.82M
}
1745
1746
2.13M
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1747
2.13M
    if (ctxt->spaceNr >= ctxt->spaceMax) {
1748
9.06k
        int *tmp;
1749
1750
9.06k
  ctxt->spaceMax *= 2;
1751
9.06k
        tmp = (int *) xmlRealloc(ctxt->spaceTab,
1752
9.06k
                           ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1753
9.06k
        if (tmp == NULL) {
1754
15
      xmlErrMemory(ctxt, NULL);
1755
15
      ctxt->spaceMax /=2;
1756
15
      return(-1);
1757
15
  }
1758
9.04k
  ctxt->spaceTab = tmp;
1759
9.04k
    }
1760
2.13M
    ctxt->spaceTab[ctxt->spaceNr] = val;
1761
2.13M
    ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1762
2.13M
    return(ctxt->spaceNr++);
1763
2.13M
}
1764
1765
1.88M
static int spacePop(xmlParserCtxtPtr ctxt) {
1766
1.88M
    int ret;
1767
1.88M
    if (ctxt->spaceNr <= 0) return(0);
1768
1.88M
    ctxt->spaceNr--;
1769
1.88M
    if (ctxt->spaceNr > 0)
1770
1.88M
  ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1771
0
    else
1772
0
        ctxt->space = &ctxt->spaceTab[0];
1773
1.88M
    ret = ctxt->spaceTab[ctxt->spaceNr];
1774
1.88M
    ctxt->spaceTab[ctxt->spaceNr] = -1;
1775
1.88M
    return(ret);
1776
1.88M
}
1777
1778
/*
1779
 * Macros for accessing the content. Those should be used only by the parser,
1780
 * and not exported.
1781
 *
1782
 * Dirty macros, i.e. one often need to make assumption on the context to
1783
 * use them
1784
 *
1785
 *   CUR_PTR return the current pointer to the xmlChar to be parsed.
1786
 *           To be used with extreme caution since operations consuming
1787
 *           characters may move the input buffer to a different location !
1788
 *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
1789
 *           This should be used internally by the parser
1790
 *           only to compare to ASCII values otherwise it would break when
1791
 *           running with UTF-8 encoding.
1792
 *   RAW     same as CUR but in the input buffer, bypass any token
1793
 *           extraction that may have been done
1794
 *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
1795
 *           to compare on ASCII based substring.
1796
 *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1797
 *           strings without newlines within the parser.
1798
 *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1799
 *           defined char within the parser.
1800
 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1801
 *
1802
 *   NEXT    Skip to the next character, this does the proper decoding
1803
 *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
1804
 *   NEXTL(l) Skip the current unicode character of l xmlChars long.
1805
 *   CUR_CHAR(l) returns the current unicode character (int), set l
1806
 *           to the number of xmlChars used for the encoding [0-5].
1807
 *   CUR_SCHAR  same but operate on a string instead of the context
1808
 *   COPY_BUF  copy the current unicode char to the target buffer, increment
1809
 *            the index
1810
 *   GROW, SHRINK  handling of input buffers
1811
 */
1812
1813
35.9M
#define RAW (*ctxt->input->cur)
1814
51.6M
#define CUR (*ctxt->input->cur)
1815
122M
#define NXT(val) ctxt->input->cur[(val)]
1816
5.28M
#define CUR_PTR ctxt->input->cur
1817
540k
#define BASE_PTR ctxt->input->base
1818
1819
#define CMP4( s, c1, c2, c3, c4 ) \
1820
14.8M
  ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1821
7.72M
    ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1822
#define CMP5( s, c1, c2, c3, c4, c5 ) \
1823
13.6M
  ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1824
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1825
12.5M
  ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1826
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1827
11.7M
  ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1828
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1829
11.1M
  ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1830
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1831
5.40M
  ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1832
5.40M
    ((unsigned char *) s)[ 8 ] == c9 )
1833
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1834
31.1k
  ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1835
31.1k
    ((unsigned char *) s)[ 9 ] == c10 )
1836
1837
3.96M
#define SKIP(val) do {             \
1838
3.96M
    ctxt->input->cur += (val),ctxt->input->col+=(val);      \
1839
3.96M
    if (*ctxt->input->cur == 0)           \
1840
3.96M
        xmlParserGrow(ctxt);           \
1841
3.96M
  } while (0)
1842
1843
#define SKIPL(val) do {             \
1844
    int skipl;                \
1845
    for(skipl=0; skipl<val; skipl++) {          \
1846
  if (*(ctxt->input->cur) == '\n') {        \
1847
  ctxt->input->line++; ctxt->input->col = 1;      \
1848
  } else ctxt->input->col++;          \
1849
  ctxt->input->cur++;           \
1850
    }                 \
1851
    if (*ctxt->input->cur == 0)           \
1852
        xmlParserGrow(ctxt);            \
1853
  } while (0)
1854
1855
8.75M
#define SHRINK if ((ctxt->progressive == 0) &&       \
1856
8.75M
       (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1857
8.75M
       (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
1858
8.75M
  xmlParserShrink(ctxt);
1859
1860
395M
#define GROW if ((ctxt->progressive == 0) &&       \
1861
395M
     (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK))  \
1862
395M
  xmlParserGrow(ctxt);
1863
1864
13.3M
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1865
1866
49.1M
#define NEXT xmlNextChar(ctxt)
1867
1868
3.58M
#define NEXT1 {               \
1869
3.58M
  ctxt->input->col++;           \
1870
3.58M
  ctxt->input->cur++;           \
1871
3.58M
  if (*ctxt->input->cur == 0)         \
1872
3.58M
      xmlParserGrow(ctxt);           \
1873
3.58M
    }
1874
1875
935M
#define NEXTL(l) do {             \
1876
935M
    if (*(ctxt->input->cur) == '\n') {         \
1877
80.4M
  ctxt->input->line++; ctxt->input->col = 1;      \
1878
854M
    } else ctxt->input->col++;           \
1879
935M
    ctxt->input->cur += l;        \
1880
935M
  } while (0)
1881
1882
937M
#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1883
563M
#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1884
1885
#define COPY_BUF(l,b,i,v)           \
1886
1.43G
    if (l == 1) b[i++] = v;           \
1887
1.43G
    else i += xmlCopyCharMultiByte(&b[i],v)
1888
1889
/**
1890
 * xmlSkipBlankChars:
1891
 * @ctxt:  the XML parser context
1892
 *
1893
 * DEPRECATED: Internal function, do not use.
1894
 *
1895
 * skip all blanks character found at that point in the input streams.
1896
 * It pops up finished entities in the process if allowable at that point.
1897
 *
1898
 * Returns the number of space chars skipped
1899
 */
1900
1901
int
1902
13.3M
xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
1903
13.3M
    int res = 0;
1904
1905
    /*
1906
     * It's Okay to use CUR/NEXT here since all the blanks are on
1907
     * the ASCII range.
1908
     */
1909
13.3M
    if (((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) ||
1910
13.3M
        (ctxt->instate == XML_PARSER_START)) {
1911
11.9M
  const xmlChar *cur;
1912
  /*
1913
   * if we are in the document content, go really fast
1914
   */
1915
11.9M
  cur = ctxt->input->cur;
1916
11.9M
  while (IS_BLANK_CH(*cur)) {
1917
5.68M
      if (*cur == '\n') {
1918
1.33M
    ctxt->input->line++; ctxt->input->col = 1;
1919
4.35M
      } else {
1920
4.35M
    ctxt->input->col++;
1921
4.35M
      }
1922
5.68M
      cur++;
1923
5.68M
      if (res < INT_MAX)
1924
5.68M
    res++;
1925
5.68M
      if (*cur == 0) {
1926
193k
    ctxt->input->cur = cur;
1927
193k
    xmlParserGrow(ctxt);
1928
193k
    cur = ctxt->input->cur;
1929
193k
      }
1930
5.68M
  }
1931
11.9M
  ctxt->input->cur = cur;
1932
11.9M
    } else {
1933
1.42M
        int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
1934
1935
2.45M
  while (ctxt->instate != XML_PARSER_EOF) {
1936
2.45M
            if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
1937
924k
    NEXT;
1938
1.53M
      } else if (CUR == '%') {
1939
                /*
1940
                 * Need to handle support of entities branching here
1941
                 */
1942
145k
          if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
1943
69.9k
                    break;
1944
75.0k
          xmlParsePEReference(ctxt);
1945
1.38M
            } else if (CUR == 0) {
1946
41.6k
                unsigned long consumed;
1947
41.6k
                xmlEntityPtr ent;
1948
1949
41.6k
                if (ctxt->inputNr <= 1)
1950
10.5k
                    break;
1951
1952
31.1k
                consumed = ctxt->input->consumed;
1953
31.1k
                xmlSaturatedAddSizeT(&consumed,
1954
31.1k
                                     ctxt->input->cur - ctxt->input->base);
1955
1956
                /*
1957
                 * Add to sizeentities when parsing an external entity
1958
                 * for the first time.
1959
                 */
1960
31.1k
                ent = ctxt->input->entity;
1961
31.1k
                if ((ent->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
1962
31.1k
                    ((ent->flags & XML_ENT_PARSED) == 0)) {
1963
991
                    ent->flags |= XML_ENT_PARSED;
1964
1965
991
                    xmlSaturatedAdd(&ctxt->sizeentities, consumed);
1966
991
                }
1967
1968
31.1k
                xmlParserEntityCheck(ctxt, consumed);
1969
1970
31.1k
                xmlPopInput(ctxt);
1971
1.34M
            } else {
1972
1.34M
                break;
1973
1.34M
            }
1974
1975
            /*
1976
             * Also increase the counter when entering or exiting a PERef.
1977
             * The spec says: "When a parameter-entity reference is recognized
1978
             * in the DTD and included, its replacement text MUST be enlarged
1979
             * by the attachment of one leading and one following space (#x20)
1980
             * character."
1981
             */
1982
1.03M
      if (res < INT_MAX)
1983
1.03M
    res++;
1984
1.03M
        }
1985
1.42M
    }
1986
13.3M
    return(res);
1987
13.3M
}
1988
1989
/************************************************************************
1990
 *                  *
1991
 *    Commodity functions to handle entities      *
1992
 *                  *
1993
 ************************************************************************/
1994
1995
/**
1996
 * xmlPopInput:
1997
 * @ctxt:  an XML parser context
1998
 *
1999
 * xmlPopInput: the current input pointed by ctxt->input came to an end
2000
 *          pop it and return the next char.
2001
 *
2002
 * Returns the current xmlChar in the parser context
2003
 */
2004
xmlChar
2005
31.1k
xmlPopInput(xmlParserCtxtPtr ctxt) {
2006
31.1k
    xmlParserInputPtr input;
2007
2008
31.1k
    if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2009
31.1k
    if (xmlParserDebugEntities)
2010
0
  xmlGenericError(xmlGenericErrorContext,
2011
0
    "Popping input %d\n", ctxt->inputNr);
2012
31.1k
    if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2013
31.1k
        (ctxt->instate != XML_PARSER_EOF))
2014
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2015
0
                    "Unfinished entity outside the DTD");
2016
31.1k
    input = inputPop(ctxt);
2017
31.1k
    if (input->entity != NULL)
2018
31.1k
        input->entity->flags &= ~XML_ENT_EXPANDING;
2019
31.1k
    xmlFreeInputStream(input);
2020
31.1k
    if (*ctxt->input->cur == 0)
2021
382
        xmlParserGrow(ctxt);
2022
31.1k
    return(CUR);
2023
31.1k
}
2024
2025
/**
2026
 * xmlPushInput:
2027
 * @ctxt:  an XML parser context
2028
 * @input:  an XML parser input fragment (entity, XML fragment ...).
2029
 *
2030
 * xmlPushInput: switch to a new input stream which is stacked on top
2031
 *               of the previous one(s).
2032
 * Returns -1 in case of error or the index in the input stack
2033
 */
2034
int
2035
48.8k
xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2036
48.8k
    int ret;
2037
48.8k
    if (input == NULL) return(-1);
2038
2039
41.7k
    if (xmlParserDebugEntities) {
2040
0
  if ((ctxt->input != NULL) && (ctxt->input->filename))
2041
0
      xmlGenericError(xmlGenericErrorContext,
2042
0
        "%s(%d): ", ctxt->input->filename,
2043
0
        ctxt->input->line);
2044
0
  xmlGenericError(xmlGenericErrorContext,
2045
0
    "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2046
0
    }
2047
41.7k
    if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2048
41.7k
        (ctxt->inputNr > 100)) {
2049
0
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2050
0
        while (ctxt->inputNr > 1)
2051
0
            xmlFreeInputStream(inputPop(ctxt));
2052
0
  return(-1);
2053
0
    }
2054
41.7k
    ret = inputPush(ctxt, input);
2055
41.7k
    if (ctxt->instate == XML_PARSER_EOF)
2056
0
        return(-1);
2057
41.7k
    GROW;
2058
41.7k
    return(ret);
2059
41.7k
}
2060
2061
/**
2062
 * xmlParseCharRef:
2063
 * @ctxt:  an XML parser context
2064
 *
2065
 * DEPRECATED: Internal function, don't use.
2066
 *
2067
 * Parse a numeric character reference. Always consumes '&'.
2068
 *
2069
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2070
 *                  '&#x' [0-9a-fA-F]+ ';'
2071
 *
2072
 * [ WFC: Legal Character ]
2073
 * Characters referred to using character references must match the
2074
 * production for Char.
2075
 *
2076
 * Returns the value parsed (as an int), 0 in case of error
2077
 */
2078
int
2079
254k
xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2080
254k
    int val = 0;
2081
254k
    int count = 0;
2082
2083
    /*
2084
     * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2085
     */
2086
254k
    if ((RAW == '&') && (NXT(1) == '#') &&
2087
254k
        (NXT(2) == 'x')) {
2088
81.1k
  SKIP(3);
2089
81.1k
  GROW;
2090
284k
  while (RAW != ';') { /* loop blocked by count */
2091
211k
      if (count++ > 20) {
2092
4.76k
    count = 0;
2093
4.76k
    GROW;
2094
4.76k
                if (ctxt->instate == XML_PARSER_EOF)
2095
220
                    return(0);
2096
4.76k
      }
2097
211k
      if ((RAW >= '0') && (RAW <= '9'))
2098
80.7k
          val = val * 16 + (CUR - '0');
2099
130k
      else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2100
57.1k
          val = val * 16 + (CUR - 'a') + 10;
2101
73.2k
      else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2102
65.1k
          val = val * 16 + (CUR - 'A') + 10;
2103
8.13k
      else {
2104
8.13k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2105
8.13k
    val = 0;
2106
8.13k
    break;
2107
8.13k
      }
2108
203k
      if (val > 0x110000)
2109
51.5k
          val = 0x110000;
2110
2111
203k
      NEXT;
2112
203k
      count++;
2113
203k
  }
2114
80.9k
  if (RAW == ';') {
2115
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2116
72.7k
      ctxt->input->col++;
2117
72.7k
      ctxt->input->cur++;
2118
72.7k
  }
2119
172k
    } else if  ((RAW == '&') && (NXT(1) == '#')) {
2120
172k
  SKIP(2);
2121
172k
  GROW;
2122
495k
  while (RAW != ';') { /* loop blocked by count */
2123
330k
      if (count++ > 20) {
2124
2.13k
    count = 0;
2125
2.13k
    GROW;
2126
2.13k
                if (ctxt->instate == XML_PARSER_EOF)
2127
221
                    return(0);
2128
2.13k
      }
2129
330k
      if ((RAW >= '0') && (RAW <= '9'))
2130
322k
          val = val * 10 + (CUR - '0');
2131
8.05k
      else {
2132
8.05k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2133
8.05k
    val = 0;
2134
8.05k
    break;
2135
8.05k
      }
2136
322k
      if (val > 0x110000)
2137
12.5k
          val = 0x110000;
2138
2139
322k
      NEXT;
2140
322k
      count++;
2141
322k
  }
2142
172k
  if (RAW == ';') {
2143
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2144
164k
      ctxt->input->col++;
2145
164k
      ctxt->input->cur++;
2146
164k
  }
2147
172k
    } else {
2148
0
        if (RAW == '&')
2149
0
            SKIP(1);
2150
0
        xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2151
0
    }
2152
2153
    /*
2154
     * [ WFC: Legal Character ]
2155
     * Characters referred to using character references must match the
2156
     * production for Char.
2157
     */
2158
253k
    if (val >= 0x110000) {
2159
1.70k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2160
1.70k
                "xmlParseCharRef: character reference out of bounds\n",
2161
1.70k
          val);
2162
251k
    } else if (IS_CHAR(val)) {
2163
228k
        return(val);
2164
228k
    } else {
2165
23.7k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2166
23.7k
                          "xmlParseCharRef: invalid xmlChar value %d\n",
2167
23.7k
                    val);
2168
23.7k
    }
2169
25.4k
    return(0);
2170
253k
}
2171
2172
/**
2173
 * xmlParseStringCharRef:
2174
 * @ctxt:  an XML parser context
2175
 * @str:  a pointer to an index in the string
2176
 *
2177
 * parse Reference declarations, variant parsing from a string rather
2178
 * than an an input flow.
2179
 *
2180
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2181
 *                  '&#x' [0-9a-fA-F]+ ';'
2182
 *
2183
 * [ WFC: Legal Character ]
2184
 * Characters referred to using character references must match the
2185
 * production for Char.
2186
 *
2187
 * Returns the value parsed (as an int), 0 in case of error, str will be
2188
 *         updated to the current value of the index
2189
 */
2190
static int
2191
17.5k
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2192
17.5k
    const xmlChar *ptr;
2193
17.5k
    xmlChar cur;
2194
17.5k
    int val = 0;
2195
2196
17.5k
    if ((str == NULL) || (*str == NULL)) return(0);
2197
17.5k
    ptr = *str;
2198
17.5k
    cur = *ptr;
2199
17.5k
    if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2200
4.64k
  ptr += 3;
2201
4.64k
  cur = *ptr;
2202
19.8k
  while (cur != ';') { /* Non input consuming loop */
2203
15.7k
      if ((cur >= '0') && (cur <= '9'))
2204
3.57k
          val = val * 16 + (cur - '0');
2205
12.1k
      else if ((cur >= 'a') && (cur <= 'f'))
2206
668
          val = val * 16 + (cur - 'a') + 10;
2207
11.5k
      else if ((cur >= 'A') && (cur <= 'F'))
2208
10.9k
          val = val * 16 + (cur - 'A') + 10;
2209
525
      else {
2210
525
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2211
525
    val = 0;
2212
525
    break;
2213
525
      }
2214
15.2k
      if (val > 0x110000)
2215
593
          val = 0x110000;
2216
2217
15.2k
      ptr++;
2218
15.2k
      cur = *ptr;
2219
15.2k
  }
2220
4.64k
  if (cur == ';')
2221
4.12k
      ptr++;
2222
12.9k
    } else if  ((cur == '&') && (ptr[1] == '#')){
2223
12.9k
  ptr += 2;
2224
12.9k
  cur = *ptr;
2225
39.1k
  while (cur != ';') { /* Non input consuming loops */
2226
28.7k
      if ((cur >= '0') && (cur <= '9'))
2227
26.2k
          val = val * 10 + (cur - '0');
2228
2.46k
      else {
2229
2.46k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2230
2.46k
    val = 0;
2231
2.46k
    break;
2232
2.46k
      }
2233
26.2k
      if (val > 0x110000)
2234
650
          val = 0x110000;
2235
2236
26.2k
      ptr++;
2237
26.2k
      cur = *ptr;
2238
26.2k
  }
2239
12.9k
  if (cur == ';')
2240
10.4k
      ptr++;
2241
12.9k
    } else {
2242
0
  xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2243
0
  return(0);
2244
0
    }
2245
17.5k
    *str = ptr;
2246
2247
    /*
2248
     * [ WFC: Legal Character ]
2249
     * Characters referred to using character references must match the
2250
     * production for Char.
2251
     */
2252
17.5k
    if (val >= 0x110000) {
2253
440
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2254
440
                "xmlParseStringCharRef: character reference out of bounds\n",
2255
440
                val);
2256
17.1k
    } else if (IS_CHAR(val)) {
2257
12.0k
        return(val);
2258
12.0k
    } else {
2259
5.01k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2260
5.01k
        "xmlParseStringCharRef: invalid xmlChar value %d\n",
2261
5.01k
        val);
2262
5.01k
    }
2263
5.45k
    return(0);
2264
17.5k
}
2265
2266
/**
2267
 * xmlParserHandlePEReference:
2268
 * @ctxt:  the parser context
2269
 *
2270
 * DEPRECATED: Internal function, do not use.
2271
 *
2272
 * [69] PEReference ::= '%' Name ';'
2273
 *
2274
 * [ WFC: No Recursion ]
2275
 * A parsed entity must not contain a recursive
2276
 * reference to itself, either directly or indirectly.
2277
 *
2278
 * [ WFC: Entity Declared ]
2279
 * In a document without any DTD, a document with only an internal DTD
2280
 * subset which contains no parameter entity references, or a document
2281
 * with "standalone='yes'", ...  ... The declaration of a parameter
2282
 * entity must precede any reference to it...
2283
 *
2284
 * [ VC: Entity Declared ]
2285
 * In a document with an external subset or external parameter entities
2286
 * with "standalone='no'", ...  ... The declaration of a parameter entity
2287
 * must precede any reference to it...
2288
 *
2289
 * [ WFC: In DTD ]
2290
 * Parameter-entity references may only appear in the DTD.
2291
 * NOTE: misleading but this is handled.
2292
 *
2293
 * A PEReference may have been detected in the current input stream
2294
 * the handling is done accordingly to
2295
 *      http://www.w3.org/TR/REC-xml#entproc
2296
 * i.e.
2297
 *   - Included in literal in entity values
2298
 *   - Included as Parameter Entity reference within DTDs
2299
 */
2300
void
2301
0
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2302
0
    switch(ctxt->instate) {
2303
0
  case XML_PARSER_CDATA_SECTION:
2304
0
      return;
2305
0
        case XML_PARSER_COMMENT:
2306
0
      return;
2307
0
  case XML_PARSER_START_TAG:
2308
0
      return;
2309
0
  case XML_PARSER_END_TAG:
2310
0
      return;
2311
0
        case XML_PARSER_EOF:
2312
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2313
0
      return;
2314
0
        case XML_PARSER_PROLOG:
2315
0
  case XML_PARSER_START:
2316
0
  case XML_PARSER_MISC:
2317
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2318
0
      return;
2319
0
  case XML_PARSER_ENTITY_DECL:
2320
0
        case XML_PARSER_CONTENT:
2321
0
        case XML_PARSER_ATTRIBUTE_VALUE:
2322
0
        case XML_PARSER_PI:
2323
0
  case XML_PARSER_SYSTEM_LITERAL:
2324
0
  case XML_PARSER_PUBLIC_LITERAL:
2325
      /* we just ignore it there */
2326
0
      return;
2327
0
        case XML_PARSER_EPILOG:
2328
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2329
0
      return;
2330
0
  case XML_PARSER_ENTITY_VALUE:
2331
      /*
2332
       * NOTE: in the case of entity values, we don't do the
2333
       *       substitution here since we need the literal
2334
       *       entity value to be able to save the internal
2335
       *       subset of the document.
2336
       *       This will be handled by xmlStringDecodeEntities
2337
       */
2338
0
      return;
2339
0
        case XML_PARSER_DTD:
2340
      /*
2341
       * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2342
       * In the internal DTD subset, parameter-entity references
2343
       * can occur only where markup declarations can occur, not
2344
       * within markup declarations.
2345
       * In that case this is handled in xmlParseMarkupDecl
2346
       */
2347
0
      if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2348
0
    return;
2349
0
      if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2350
0
    return;
2351
0
            break;
2352
0
        case XML_PARSER_IGNORE:
2353
0
            return;
2354
0
    }
2355
2356
0
    xmlParsePEReference(ctxt);
2357
0
}
2358
2359
/*
2360
 * Macro used to grow the current buffer.
2361
 * buffer##_size is expected to be a size_t
2362
 * mem_error: is expected to handle memory allocation failures
2363
 */
2364
378k
#define growBuffer(buffer, n) {           \
2365
378k
    xmlChar *tmp;             \
2366
378k
    size_t new_size = buffer##_size * 2 + n;                            \
2367
378k
    if (new_size < buffer##_size) goto mem_error;                       \
2368
378k
    tmp = (xmlChar *) xmlRealloc(buffer, new_size);                     \
2369
378k
    if (tmp == NULL) goto mem_error;         \
2370
378k
    buffer = tmp;             \
2371
378k
    buffer##_size = new_size;                                           \
2372
378k
}
2373
2374
/**
2375
 * xmlStringDecodeEntitiesInt:
2376
 * @ctxt:  the parser context
2377
 * @str:  the input string
2378
 * @len: the string length
2379
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2380
 * @end:  an end marker xmlChar, 0 if none
2381
 * @end2:  an end marker xmlChar, 0 if none
2382
 * @end3:  an end marker xmlChar, 0 if none
2383
 * @check:  whether to perform entity checks
2384
 */
2385
static xmlChar *
2386
xmlStringDecodeEntitiesInt(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2387
               int what, xmlChar end, xmlChar  end2, xmlChar end3,
2388
85.0k
                           int check) {
2389
85.0k
    xmlChar *buffer = NULL;
2390
85.0k
    size_t buffer_size = 0;
2391
85.0k
    size_t nbchars = 0;
2392
2393
85.0k
    xmlChar *current = NULL;
2394
85.0k
    xmlChar *rep = NULL;
2395
85.0k
    const xmlChar *last;
2396
85.0k
    xmlEntityPtr ent;
2397
85.0k
    int c,l;
2398
2399
85.0k
    if (str == NULL)
2400
4.15k
        return(NULL);
2401
80.8k
    last = str + len;
2402
2403
80.8k
    if (((ctxt->depth > 40) &&
2404
80.8k
         ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2405
80.8k
  (ctxt->depth > 100)) {
2406
0
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP,
2407
0
                       "Maximum entity nesting depth exceeded");
2408
0
  return(NULL);
2409
0
    }
2410
2411
    /*
2412
     * allocate a translation buffer.
2413
     */
2414
80.8k
    buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2415
80.8k
    buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2416
80.8k
    if (buffer == NULL) goto mem_error;
2417
2418
    /*
2419
     * OK loop until we reach one of the ending char or a size limit.
2420
     * we are operating on already parsed values.
2421
     */
2422
80.8k
    if (str < last)
2423
76.8k
  c = CUR_SCHAR(str, l);
2424
3.98k
    else
2425
3.98k
        c = 0;
2426
560M
    while ((c != 0) && (c != end) && /* non input consuming loop */
2427
560M
           (c != end2) && (c != end3) &&
2428
560M
           (ctxt->instate != XML_PARSER_EOF)) {
2429
2430
560M
  if (c == 0) break;
2431
560M
        if ((c == '&') && (str[1] == '#')) {
2432
17.5k
      int val = xmlParseStringCharRef(ctxt, &str);
2433
17.5k
      if (val == 0)
2434
5.45k
                goto int_error;
2435
12.0k
      COPY_BUF(0,buffer,nbchars,val);
2436
12.0k
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2437
599
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2438
599
      }
2439
560M
  } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2440
79.2k
      if (xmlParserDebugEntities)
2441
0
    xmlGenericError(xmlGenericErrorContext,
2442
0
      "String decoding Entity Reference: %.30s\n",
2443
0
      str);
2444
79.2k
      ent = xmlParseStringEntityRef(ctxt, &str);
2445
79.2k
      if ((ent != NULL) &&
2446
79.2k
    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2447
34.6k
    if (ent->content != NULL) {
2448
34.6k
        COPY_BUF(0,buffer,nbchars,ent->content[0]);
2449
34.6k
        if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2450
970
      growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2451
970
        }
2452
34.6k
    } else {
2453
0
        xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2454
0
          "predefined entity has no content\n");
2455
0
                    goto int_error;
2456
0
    }
2457
44.6k
      } else if ((ent != NULL) && (ent->content != NULL)) {
2458
17.6k
          if ((check) && (xmlParserEntityCheck(ctxt, ent->length)))
2459
100
                    goto int_error;
2460
2461
17.5k
                if (ent->flags & XML_ENT_EXPANDING) {
2462
230
              xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2463
230
                    xmlHaltParser(ctxt);
2464
230
                    ent->content[0] = 0;
2465
230
                    goto int_error;
2466
230
                }
2467
2468
17.3k
                ent->flags |= XML_ENT_EXPANDING;
2469
17.3k
    ctxt->depth++;
2470
17.3k
    rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
2471
17.3k
                        ent->length, what, 0, 0, 0, check);
2472
17.3k
    ctxt->depth--;
2473
17.3k
                ent->flags &= ~XML_ENT_EXPANDING;
2474
2475
17.3k
    if (rep == NULL) {
2476
407
                    ent->content[0] = 0;
2477
407
                    goto int_error;
2478
407
                }
2479
2480
16.9k
                current = rep;
2481
140M
                while (*current != 0) { /* non input consuming loop */
2482
140M
                    buffer[nbchars++] = *current++;
2483
140M
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2484
16.4k
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2485
16.4k
                    }
2486
140M
                }
2487
16.9k
                xmlFree(rep);
2488
16.9k
                rep = NULL;
2489
26.9k
      } else if (ent != NULL) {
2490
3.14k
    int i = xmlStrlen(ent->name);
2491
3.14k
    const xmlChar *cur = ent->name;
2492
2493
3.14k
    buffer[nbchars++] = '&';
2494
3.14k
    if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2495
569
        growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2496
569
    }
2497
7.21k
    for (;i > 0;i--)
2498
4.07k
        buffer[nbchars++] = *cur++;
2499
3.14k
    buffer[nbchars++] = ';';
2500
3.14k
      }
2501
560M
  } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2502
4.09k
      if (xmlParserDebugEntities)
2503
0
    xmlGenericError(xmlGenericErrorContext,
2504
0
      "String decoding PE Reference: %.30s\n", str);
2505
4.09k
      ent = xmlParseStringPEReference(ctxt, &str);
2506
4.09k
      if (ent != NULL) {
2507
2.02k
                if (ent->content == NULL) {
2508
        /*
2509
         * Note: external parsed entities will not be loaded,
2510
         * it is not required for a non-validating parser to
2511
         * complete external PEReferences coming from the
2512
         * internal subset
2513
         */
2514
82
        if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2515
82
      ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2516
82
      (ctxt->validate != 0)) {
2517
82
      xmlLoadEntityContent(ctxt, ent);
2518
82
        } else {
2519
0
      xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2520
0
      "not validating will not read content for PE entity %s\n",
2521
0
                          ent->name, NULL);
2522
0
        }
2523
82
    }
2524
2525
2.02k
          if ((check) && (xmlParserEntityCheck(ctxt, ent->length)))
2526
2
                    goto int_error;
2527
2528
2.01k
                if (ent->flags & XML_ENT_EXPANDING) {
2529
59
              xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2530
59
                    xmlHaltParser(ctxt);
2531
59
                    if (ent->content != NULL)
2532
33
                        ent->content[0] = 0;
2533
59
                    goto int_error;
2534
59
                }
2535
2536
1.96k
                ent->flags |= XML_ENT_EXPANDING;
2537
1.96k
    ctxt->depth++;
2538
1.96k
    rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
2539
1.96k
                        ent->length, what, 0, 0, 0, check);
2540
1.96k
    ctxt->depth--;
2541
1.96k
                ent->flags &= ~XML_ENT_EXPANDING;
2542
2543
1.96k
    if (rep == NULL) {
2544
56
                    if (ent->content != NULL)
2545
56
                        ent->content[0] = 0;
2546
56
                    goto int_error;
2547
56
                }
2548
1.90k
                current = rep;
2549
787k
                while (*current != 0) { /* non input consuming loop */
2550
785k
                    buffer[nbchars++] = *current++;
2551
785k
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2552
2.15k
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2553
2.15k
                    }
2554
785k
                }
2555
1.90k
                xmlFree(rep);
2556
1.90k
                rep = NULL;
2557
1.90k
      }
2558
560M
  } else {
2559
560M
      COPY_BUF(l,buffer,nbchars,c);
2560
560M
      str += l;
2561
560M
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2562
269k
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2563
269k
      }
2564
560M
  }
2565
560M
  if (str < last)
2566
560M
      c = CUR_SCHAR(str, l);
2567
70.0k
  else
2568
70.0k
      c = 0;
2569
560M
    }
2570
74.5k
    buffer[nbchars] = 0;
2571
74.5k
    return(buffer);
2572
2573
26
mem_error:
2574
26
    xmlErrMemory(ctxt, NULL);
2575
6.33k
int_error:
2576
6.33k
    if (rep != NULL)
2577
1
        xmlFree(rep);
2578
6.33k
    if (buffer != NULL)
2579
6.31k
        xmlFree(buffer);
2580
6.33k
    return(NULL);
2581
26
}
2582
2583
/**
2584
 * xmlStringLenDecodeEntities:
2585
 * @ctxt:  the parser context
2586
 * @str:  the input string
2587
 * @len: the string length
2588
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2589
 * @end:  an end marker xmlChar, 0 if none
2590
 * @end2:  an end marker xmlChar, 0 if none
2591
 * @end3:  an end marker xmlChar, 0 if none
2592
 *
2593
 * DEPRECATED: Internal function, don't use.
2594
 *
2595
 * Takes a entity string content and process to do the adequate substitutions.
2596
 *
2597
 * [67] Reference ::= EntityRef | CharRef
2598
 *
2599
 * [69] PEReference ::= '%' Name ';'
2600
 *
2601
 * Returns A newly allocated string with the substitution done. The caller
2602
 *      must deallocate it !
2603
 */
2604
xmlChar *
2605
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2606
                           int what, xmlChar end, xmlChar  end2,
2607
0
                           xmlChar end3) {
2608
0
    if ((ctxt == NULL) || (str == NULL) || (len < 0))
2609
0
        return(NULL);
2610
0
    return(xmlStringDecodeEntitiesInt(ctxt, str, len, what,
2611
0
                                      end, end2, end3, 0));
2612
0
}
2613
2614
/**
2615
 * xmlStringDecodeEntities:
2616
 * @ctxt:  the parser context
2617
 * @str:  the input string
2618
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2619
 * @end:  an end marker xmlChar, 0 if none
2620
 * @end2:  an end marker xmlChar, 0 if none
2621
 * @end3:  an end marker xmlChar, 0 if none
2622
 *
2623
 * DEPRECATED: Internal function, don't use.
2624
 *
2625
 * Takes a entity string content and process to do the adequate substitutions.
2626
 *
2627
 * [67] Reference ::= EntityRef | CharRef
2628
 *
2629
 * [69] PEReference ::= '%' Name ';'
2630
 *
2631
 * Returns A newly allocated string with the substitution done. The caller
2632
 *      must deallocate it !
2633
 */
2634
xmlChar *
2635
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2636
0
            xmlChar end, xmlChar  end2, xmlChar end3) {
2637
0
    if ((ctxt == NULL) || (str == NULL)) return(NULL);
2638
0
    return(xmlStringDecodeEntitiesInt(ctxt, str, xmlStrlen(str), what,
2639
0
                                      end, end2, end3, 0));
2640
0
}
2641
2642
/************************************************************************
2643
 *                  *
2644
 *    Commodity functions, cleanup needed ?     *
2645
 *                  *
2646
 ************************************************************************/
2647
2648
/**
2649
 * areBlanks:
2650
 * @ctxt:  an XML parser context
2651
 * @str:  a xmlChar *
2652
 * @len:  the size of @str
2653
 * @blank_chars: we know the chars are blanks
2654
 *
2655
 * Is this a sequence of blank chars that one can ignore ?
2656
 *
2657
 * Returns 1 if ignorable 0 otherwise.
2658
 */
2659
2660
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2661
1.61M
                     int blank_chars) {
2662
1.61M
    int i, ret;
2663
1.61M
    xmlNodePtr lastChild;
2664
2665
    /*
2666
     * Don't spend time trying to differentiate them, the same callback is
2667
     * used !
2668
     */
2669
1.61M
    if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2670
1.61M
  return(0);
2671
2672
    /*
2673
     * Check for xml:space value.
2674
     */
2675
0
    if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2676
0
        (*(ctxt->space) == -2))
2677
0
  return(0);
2678
2679
    /*
2680
     * Check that the string is made of blanks
2681
     */
2682
0
    if (blank_chars == 0) {
2683
0
  for (i = 0;i < len;i++)
2684
0
      if (!(IS_BLANK_CH(str[i]))) return(0);
2685
0
    }
2686
2687
    /*
2688
     * Look if the element is mixed content in the DTD if available
2689
     */
2690
0
    if (ctxt->node == NULL) return(0);
2691
0
    if (ctxt->myDoc != NULL) {
2692
0
  ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2693
0
        if (ret == 0) return(1);
2694
0
        if (ret == 1) return(0);
2695
0
    }
2696
2697
    /*
2698
     * Otherwise, heuristic :-\
2699
     */
2700
0
    if ((RAW != '<') && (RAW != 0xD)) return(0);
2701
0
    if ((ctxt->node->children == NULL) &&
2702
0
  (RAW == '<') && (NXT(1) == '/')) return(0);
2703
2704
0
    lastChild = xmlGetLastChild(ctxt->node);
2705
0
    if (lastChild == NULL) {
2706
0
        if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2707
0
            (ctxt->node->content != NULL)) return(0);
2708
0
    } else if (xmlNodeIsText(lastChild))
2709
0
        return(0);
2710
0
    else if ((ctxt->node->children != NULL) &&
2711
0
             (xmlNodeIsText(ctxt->node->children)))
2712
0
        return(0);
2713
0
    return(1);
2714
0
}
2715
2716
/************************************************************************
2717
 *                  *
2718
 *    Extra stuff for namespace support     *
2719
 *  Relates to http://www.w3.org/TR/WD-xml-names      *
2720
 *                  *
2721
 ************************************************************************/
2722
2723
/**
2724
 * xmlSplitQName:
2725
 * @ctxt:  an XML parser context
2726
 * @name:  an XML parser context
2727
 * @prefix:  a xmlChar **
2728
 *
2729
 * parse an UTF8 encoded XML qualified name string
2730
 *
2731
 * [NS 5] QName ::= (Prefix ':')? LocalPart
2732
 *
2733
 * [NS 6] Prefix ::= NCName
2734
 *
2735
 * [NS 7] LocalPart ::= NCName
2736
 *
2737
 * Returns the local part, and prefix is updated
2738
 *   to get the Prefix if any.
2739
 */
2740
2741
xmlChar *
2742
37.2k
xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2743
37.2k
    xmlChar buf[XML_MAX_NAMELEN + 5];
2744
37.2k
    xmlChar *buffer = NULL;
2745
37.2k
    int len = 0;
2746
37.2k
    int max = XML_MAX_NAMELEN;
2747
37.2k
    xmlChar *ret = NULL;
2748
37.2k
    const xmlChar *cur = name;
2749
37.2k
    int c;
2750
2751
37.2k
    if (prefix == NULL) return(NULL);
2752
37.2k
    *prefix = NULL;
2753
2754
37.2k
    if (cur == NULL) return(NULL);
2755
2756
#ifndef XML_XML_NAMESPACE
2757
    /* xml: prefix is not really a namespace */
2758
    if ((cur[0] == 'x') && (cur[1] == 'm') &&
2759
        (cur[2] == 'l') && (cur[3] == ':'))
2760
  return(xmlStrdup(name));
2761
#endif
2762
2763
    /* nasty but well=formed */
2764
37.2k
    if (cur[0] == ':')
2765
4.08k
  return(xmlStrdup(name));
2766
2767
33.1k
    c = *cur++;
2768
374k
    while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2769
341k
  buf[len++] = c;
2770
341k
  c = *cur++;
2771
341k
    }
2772
33.1k
    if (len >= max) {
2773
  /*
2774
   * Okay someone managed to make a huge name, so he's ready to pay
2775
   * for the processing speed.
2776
   */
2777
1.39k
  max = len * 2;
2778
2779
1.39k
  buffer = (xmlChar *) xmlMallocAtomic(max);
2780
1.39k
  if (buffer == NULL) {
2781
1
      xmlErrMemory(ctxt, NULL);
2782
1
      return(NULL);
2783
1
  }
2784
1.39k
  memcpy(buffer, buf, len);
2785
291k
  while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2786
290k
      if (len + 10 > max) {
2787
1.26k
          xmlChar *tmp;
2788
2789
1.26k
    max *= 2;
2790
1.26k
    tmp = (xmlChar *) xmlRealloc(buffer, max);
2791
1.26k
    if (tmp == NULL) {
2792
1
        xmlFree(buffer);
2793
1
        xmlErrMemory(ctxt, NULL);
2794
1
        return(NULL);
2795
1
    }
2796
1.26k
    buffer = tmp;
2797
1.26k
      }
2798
290k
      buffer[len++] = c;
2799
290k
      c = *cur++;
2800
290k
  }
2801
1.39k
  buffer[len] = 0;
2802
1.39k
    }
2803
2804
33.1k
    if ((c == ':') && (*cur == 0)) {
2805
764
        if (buffer != NULL)
2806
116
      xmlFree(buffer);
2807
764
  *prefix = NULL;
2808
764
  return(xmlStrdup(name));
2809
764
    }
2810
2811
32.3k
    if (buffer == NULL)
2812
31.1k
  ret = xmlStrndup(buf, len);
2813
1.27k
    else {
2814
1.27k
  ret = buffer;
2815
1.27k
  buffer = NULL;
2816
1.27k
  max = XML_MAX_NAMELEN;
2817
1.27k
    }
2818
2819
2820
32.3k
    if (c == ':') {
2821
13.1k
  c = *cur;
2822
13.1k
        *prefix = ret;
2823
13.1k
  if (c == 0) {
2824
0
      return(xmlStrndup(BAD_CAST "", 0));
2825
0
  }
2826
13.1k
  len = 0;
2827
2828
  /*
2829
   * Check that the first character is proper to start
2830
   * a new name
2831
   */
2832
13.1k
  if (!(((c >= 0x61) && (c <= 0x7A)) ||
2833
13.1k
        ((c >= 0x41) && (c <= 0x5A)) ||
2834
13.1k
        (c == '_') || (c == ':'))) {
2835
4.97k
      int l;
2836
4.97k
      int first = CUR_SCHAR(cur, l);
2837
2838
4.97k
      if (!IS_LETTER(first) && (first != '_')) {
2839
939
    xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
2840
939
          "Name %s is not XML Namespace compliant\n",
2841
939
          name);
2842
939
      }
2843
4.97k
  }
2844
13.1k
  cur++;
2845
2846
237k
  while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2847
223k
      buf[len++] = c;
2848
223k
      c = *cur++;
2849
223k
  }
2850
13.1k
  if (len >= max) {
2851
      /*
2852
       * Okay someone managed to make a huge name, so he's ready to pay
2853
       * for the processing speed.
2854
       */
2855
1.32k
      max = len * 2;
2856
2857
1.32k
      buffer = (xmlChar *) xmlMallocAtomic(max);
2858
1.32k
      if (buffer == NULL) {
2859
1
          xmlErrMemory(ctxt, NULL);
2860
1
    return(NULL);
2861
1
      }
2862
1.32k
      memcpy(buffer, buf, len);
2863
205k
      while (c != 0) { /* tested bigname2.xml */
2864
204k
    if (len + 10 > max) {
2865
1.06k
        xmlChar *tmp;
2866
2867
1.06k
        max *= 2;
2868
1.06k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
2869
1.06k
        if (tmp == NULL) {
2870
1
      xmlErrMemory(ctxt, NULL);
2871
1
      xmlFree(buffer);
2872
1
      return(NULL);
2873
1
        }
2874
1.06k
        buffer = tmp;
2875
1.06k
    }
2876
204k
    buffer[len++] = c;
2877
204k
    c = *cur++;
2878
204k
      }
2879
1.32k
      buffer[len] = 0;
2880
1.32k
  }
2881
2882
13.1k
  if (buffer == NULL)
2883
11.8k
      ret = xmlStrndup(buf, len);
2884
1.32k
  else {
2885
1.32k
      ret = buffer;
2886
1.32k
  }
2887
13.1k
    }
2888
2889
32.3k
    return(ret);
2890
32.3k
}
2891
2892
/************************************************************************
2893
 *                  *
2894
 *      The parser itself       *
2895
 *  Relates to http://www.w3.org/TR/REC-xml       *
2896
 *                  *
2897
 ************************************************************************/
2898
2899
/************************************************************************
2900
 *                  *
2901
 *  Routines to parse Name, NCName and NmToken      *
2902
 *                  *
2903
 ************************************************************************/
2904
#ifdef DEBUG
2905
static unsigned long nbParseName = 0;
2906
static unsigned long nbParseNmToken = 0;
2907
static unsigned long nbParseNCName = 0;
2908
static unsigned long nbParseNCNameComplex = 0;
2909
static unsigned long nbParseNameComplex = 0;
2910
static unsigned long nbParseStringName = 0;
2911
#endif
2912
2913
/*
2914
 * The two following functions are related to the change of accepted
2915
 * characters for Name and NmToken in the Revision 5 of XML-1.0
2916
 * They correspond to the modified production [4] and the new production [4a]
2917
 * changes in that revision. Also note that the macros used for the
2918
 * productions Letter, Digit, CombiningChar and Extender are not needed
2919
 * anymore.
2920
 * We still keep compatibility to pre-revision5 parsing semantic if the
2921
 * new XML_PARSE_OLD10 option is given to the parser.
2922
 */
2923
static int
2924
431k
xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
2925
431k
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
2926
        /*
2927
   * Use the new checks of production [4] [4a] amd [5] of the
2928
   * Update 5 of XML-1.0
2929
   */
2930
431k
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
2931
431k
      (((c >= 'a') && (c <= 'z')) ||
2932
431k
       ((c >= 'A') && (c <= 'Z')) ||
2933
431k
       (c == '_') || (c == ':') ||
2934
431k
       ((c >= 0xC0) && (c <= 0xD6)) ||
2935
431k
       ((c >= 0xD8) && (c <= 0xF6)) ||
2936
431k
       ((c >= 0xF8) && (c <= 0x2FF)) ||
2937
431k
       ((c >= 0x370) && (c <= 0x37D)) ||
2938
431k
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
2939
431k
       ((c >= 0x200C) && (c <= 0x200D)) ||
2940
431k
       ((c >= 0x2070) && (c <= 0x218F)) ||
2941
431k
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
2942
431k
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
2943
431k
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
2944
431k
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
2945
431k
       ((c >= 0x10000) && (c <= 0xEFFFF))))
2946
335k
      return(1);
2947
431k
    } else {
2948
0
        if (IS_LETTER(c) || (c == '_') || (c == ':'))
2949
0
      return(1);
2950
0
    }
2951
96.4k
    return(0);
2952
431k
}
2953
2954
static int
2955
82.8M
xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
2956
82.8M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
2957
        /*
2958
   * Use the new checks of production [4] [4a] amd [5] of the
2959
   * Update 5 of XML-1.0
2960
   */
2961
82.8M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
2962
82.8M
      (((c >= 'a') && (c <= 'z')) ||
2963
82.8M
       ((c >= 'A') && (c <= 'Z')) ||
2964
82.8M
       ((c >= '0') && (c <= '9')) || /* !start */
2965
82.8M
       (c == '_') || (c == ':') ||
2966
82.8M
       (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
2967
82.8M
       ((c >= 0xC0) && (c <= 0xD6)) ||
2968
82.8M
       ((c >= 0xD8) && (c <= 0xF6)) ||
2969
82.8M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
2970
82.8M
       ((c >= 0x300) && (c <= 0x36F)) || /* !start */
2971
82.8M
       ((c >= 0x370) && (c <= 0x37D)) ||
2972
82.8M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
2973
82.8M
       ((c >= 0x200C) && (c <= 0x200D)) ||
2974
82.8M
       ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
2975
82.8M
       ((c >= 0x2070) && (c <= 0x218F)) ||
2976
82.8M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
2977
82.8M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
2978
82.8M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
2979
82.8M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
2980
82.8M
       ((c >= 0x10000) && (c <= 0xEFFFF))))
2981
82.5M
       return(1);
2982
82.8M
    } else {
2983
0
        if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
2984
0
            (c == '.') || (c == '-') ||
2985
0
      (c == '_') || (c == ':') ||
2986
0
      (IS_COMBINING(c)) ||
2987
0
      (IS_EXTENDER(c)))
2988
0
      return(1);
2989
0
    }
2990
287k
    return(0);
2991
82.8M
}
2992
2993
static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
2994
                                          int *len, int *alloc, int normalize);
2995
2996
static const xmlChar *
2997
240k
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
2998
240k
    int len = 0, l;
2999
240k
    int c;
3000
240k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3001
0
                    XML_MAX_TEXT_LENGTH :
3002
240k
                    XML_MAX_NAME_LENGTH;
3003
3004
#ifdef DEBUG
3005
    nbParseNameComplex++;
3006
#endif
3007
3008
    /*
3009
     * Handler for more complex cases
3010
     */
3011
240k
    c = CUR_CHAR(l);
3012
240k
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3013
        /*
3014
   * Use the new checks of production [4] [4a] amd [5] of the
3015
   * Update 5 of XML-1.0
3016
   */
3017
240k
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3018
240k
      (!(((c >= 'a') && (c <= 'z')) ||
3019
234k
         ((c >= 'A') && (c <= 'Z')) ||
3020
234k
         (c == '_') || (c == ':') ||
3021
234k
         ((c >= 0xC0) && (c <= 0xD6)) ||
3022
234k
         ((c >= 0xD8) && (c <= 0xF6)) ||
3023
234k
         ((c >= 0xF8) && (c <= 0x2FF)) ||
3024
234k
         ((c >= 0x370) && (c <= 0x37D)) ||
3025
234k
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
3026
234k
         ((c >= 0x200C) && (c <= 0x200D)) ||
3027
234k
         ((c >= 0x2070) && (c <= 0x218F)) ||
3028
234k
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3029
234k
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
3030
234k
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
3031
234k
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3032
234k
         ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3033
92.7k
      return(NULL);
3034
92.7k
  }
3035
147k
  len += l;
3036
147k
  NEXTL(l);
3037
147k
  c = CUR_CHAR(l);
3038
32.9M
  while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3039
32.9M
         (((c >= 'a') && (c <= 'z')) ||
3040
32.8M
          ((c >= 'A') && (c <= 'Z')) ||
3041
32.8M
          ((c >= '0') && (c <= '9')) || /* !start */
3042
32.8M
          (c == '_') || (c == ':') ||
3043
32.8M
          (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3044
32.8M
          ((c >= 0xC0) && (c <= 0xD6)) ||
3045
32.8M
          ((c >= 0xD8) && (c <= 0xF6)) ||
3046
32.8M
          ((c >= 0xF8) && (c <= 0x2FF)) ||
3047
32.8M
          ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3048
32.8M
          ((c >= 0x370) && (c <= 0x37D)) ||
3049
32.8M
          ((c >= 0x37F) && (c <= 0x1FFF)) ||
3050
32.8M
          ((c >= 0x200C) && (c <= 0x200D)) ||
3051
32.8M
          ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3052
32.8M
          ((c >= 0x2070) && (c <= 0x218F)) ||
3053
32.8M
          ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3054
32.8M
          ((c >= 0x3001) && (c <= 0xD7FF)) ||
3055
32.8M
          ((c >= 0xF900) && (c <= 0xFDCF)) ||
3056
32.8M
          ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3057
32.8M
          ((c >= 0x10000) && (c <= 0xEFFFF))
3058
32.8M
    )) {
3059
32.7M
            if (len <= INT_MAX - l)
3060
32.7M
          len += l;
3061
32.7M
      NEXTL(l);
3062
32.7M
      c = CUR_CHAR(l);
3063
32.7M
  }
3064
147k
    } else {
3065
0
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3066
0
      (!IS_LETTER(c) && (c != '_') &&
3067
0
       (c != ':'))) {
3068
0
      return(NULL);
3069
0
  }
3070
0
  len += l;
3071
0
  NEXTL(l);
3072
0
  c = CUR_CHAR(l);
3073
3074
0
  while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3075
0
         ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3076
0
    (c == '.') || (c == '-') ||
3077
0
    (c == '_') || (c == ':') ||
3078
0
    (IS_COMBINING(c)) ||
3079
0
    (IS_EXTENDER(c)))) {
3080
0
            if (len <= INT_MAX - l)
3081
0
          len += l;
3082
0
      NEXTL(l);
3083
0
      c = CUR_CHAR(l);
3084
0
  }
3085
0
    }
3086
147k
    if (ctxt->instate == XML_PARSER_EOF)
3087
1.80k
        return(NULL);
3088
145k
    if (len > maxLength) {
3089
375
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3090
375
        return(NULL);
3091
375
    }
3092
145k
    if (ctxt->input->cur - ctxt->input->base < len) {
3093
        /*
3094
         * There were a couple of bugs where PERefs lead to to a change
3095
         * of the buffer. Check the buffer size to avoid passing an invalid
3096
         * pointer to xmlDictLookup.
3097
         */
3098
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3099
0
                    "unexpected change of input buffer");
3100
0
        return (NULL);
3101
0
    }
3102
145k
    if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3103
593
        return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3104
144k
    return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3105
145k
}
3106
3107
/**
3108
 * xmlParseName:
3109
 * @ctxt:  an XML parser context
3110
 *
3111
 * DEPRECATED: Internal function, don't use.
3112
 *
3113
 * parse an XML name.
3114
 *
3115
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3116
 *                  CombiningChar | Extender
3117
 *
3118
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3119
 *
3120
 * [6] Names ::= Name (#x20 Name)*
3121
 *
3122
 * Returns the Name parsed or NULL
3123
 */
3124
3125
const xmlChar *
3126
850k
xmlParseName(xmlParserCtxtPtr ctxt) {
3127
850k
    const xmlChar *in;
3128
850k
    const xmlChar *ret;
3129
850k
    size_t count = 0;
3130
850k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3131
0
                       XML_MAX_TEXT_LENGTH :
3132
850k
                       XML_MAX_NAME_LENGTH;
3133
3134
850k
    GROW;
3135
850k
    if (ctxt->instate == XML_PARSER_EOF)
3136
509
        return(NULL);
3137
3138
#ifdef DEBUG
3139
    nbParseName++;
3140
#endif
3141
3142
    /*
3143
     * Accelerator for simple ASCII names
3144
     */
3145
850k
    in = ctxt->input->cur;
3146
850k
    if (((*in >= 0x61) && (*in <= 0x7A)) ||
3147
850k
  ((*in >= 0x41) && (*in <= 0x5A)) ||
3148
850k
  (*in == '_') || (*in == ':')) {
3149
671k
  in++;
3150
85.0M
  while (((*in >= 0x61) && (*in <= 0x7A)) ||
3151
85.0M
         ((*in >= 0x41) && (*in <= 0x5A)) ||
3152
85.0M
         ((*in >= 0x30) && (*in <= 0x39)) ||
3153
85.0M
         (*in == '_') || (*in == '-') ||
3154
85.0M
         (*in == ':') || (*in == '.'))
3155
84.3M
      in++;
3156
671k
  if ((*in > 0) && (*in < 0x80)) {
3157
610k
      count = in - ctxt->input->cur;
3158
610k
            if (count > maxLength) {
3159
744
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3160
744
                return(NULL);
3161
744
            }
3162
609k
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3163
609k
      ctxt->input->cur = in;
3164
609k
      ctxt->input->col += count;
3165
609k
      if (ret == NULL)
3166
13
          xmlErrMemory(ctxt, NULL);
3167
609k
      return(ret);
3168
610k
  }
3169
671k
    }
3170
    /* accelerator for special cases */
3171
240k
    return(xmlParseNameComplex(ctxt));
3172
850k
}
3173
3174
static const xmlChar *
3175
330k
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3176
330k
    int len = 0, l;
3177
330k
    int c;
3178
330k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3179
0
                    XML_MAX_TEXT_LENGTH :
3180
330k
                    XML_MAX_NAME_LENGTH;
3181
330k
    size_t startPosition = 0;
3182
3183
#ifdef DEBUG
3184
    nbParseNCNameComplex++;
3185
#endif
3186
3187
    /*
3188
     * Handler for more complex cases
3189
     */
3190
330k
    startPosition = CUR_PTR - BASE_PTR;
3191
330k
    c = CUR_CHAR(l);
3192
330k
    if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3193
330k
  (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3194
120k
  return(NULL);
3195
120k
    }
3196
3197
31.0M
    while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3198
31.0M
     (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3199
30.8M
        if (len <= INT_MAX - l)
3200
30.8M
      len += l;
3201
30.8M
  NEXTL(l);
3202
30.8M
  c = CUR_CHAR(l);
3203
30.8M
    }
3204
210k
    if (ctxt->instate == XML_PARSER_EOF)
3205
268
        return(NULL);
3206
210k
    if (len > maxLength) {
3207
515
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3208
515
        return(NULL);
3209
515
    }
3210
209k
    return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
3211
210k
}
3212
3213
/**
3214
 * xmlParseNCName:
3215
 * @ctxt:  an XML parser context
3216
 * @len:  length of the string parsed
3217
 *
3218
 * parse an XML name.
3219
 *
3220
 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3221
 *                      CombiningChar | Extender
3222
 *
3223
 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3224
 *
3225
 * Returns the Name parsed or NULL
3226
 */
3227
3228
static const xmlChar *
3229
6.93M
xmlParseNCName(xmlParserCtxtPtr ctxt) {
3230
6.93M
    const xmlChar *in, *e;
3231
6.93M
    const xmlChar *ret;
3232
6.93M
    size_t count = 0;
3233
6.93M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3234
0
                       XML_MAX_TEXT_LENGTH :
3235
6.93M
                       XML_MAX_NAME_LENGTH;
3236
3237
#ifdef DEBUG
3238
    nbParseNCName++;
3239
#endif
3240
3241
    /*
3242
     * Accelerator for simple ASCII names
3243
     */
3244
6.93M
    in = ctxt->input->cur;
3245
6.93M
    e = ctxt->input->end;
3246
6.93M
    if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3247
6.93M
   ((*in >= 0x41) && (*in <= 0x5A)) ||
3248
6.93M
   (*in == '_')) && (in < e)) {
3249
6.74M
  in++;
3250
101M
  while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3251
101M
          ((*in >= 0x41) && (*in <= 0x5A)) ||
3252
101M
          ((*in >= 0x30) && (*in <= 0x39)) ||
3253
101M
          (*in == '_') || (*in == '-') ||
3254
101M
          (*in == '.')) && (in < e))
3255
94.6M
      in++;
3256
6.74M
  if (in >= e)
3257
1.46k
      goto complex;
3258
6.73M
  if ((*in > 0) && (*in < 0x80)) {
3259
6.60M
      count = in - ctxt->input->cur;
3260
6.60M
            if (count > maxLength) {
3261
799
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3262
799
                return(NULL);
3263
799
            }
3264
6.60M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3265
6.60M
      ctxt->input->cur = in;
3266
6.60M
      ctxt->input->col += count;
3267
6.60M
      if (ret == NULL) {
3268
15
          xmlErrMemory(ctxt, NULL);
3269
15
      }
3270
6.60M
      return(ret);
3271
6.60M
  }
3272
6.73M
    }
3273
330k
complex:
3274
330k
    return(xmlParseNCNameComplex(ctxt));
3275
6.93M
}
3276
3277
/**
3278
 * xmlParseNameAndCompare:
3279
 * @ctxt:  an XML parser context
3280
 *
3281
 * parse an XML name and compares for match
3282
 * (specialized for endtag parsing)
3283
 *
3284
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3285
 * and the name for mismatch
3286
 */
3287
3288
static const xmlChar *
3289
160k
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3290
160k
    register const xmlChar *cmp = other;
3291
160k
    register const xmlChar *in;
3292
160k
    const xmlChar *ret;
3293
3294
160k
    GROW;
3295
160k
    if (ctxt->instate == XML_PARSER_EOF)
3296
253
        return(NULL);
3297
3298
159k
    in = ctxt->input->cur;
3299
728k
    while (*in != 0 && *in == *cmp) {
3300
568k
  ++in;
3301
568k
  ++cmp;
3302
568k
    }
3303
159k
    if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3304
  /* success */
3305
150k
  ctxt->input->col += in - ctxt->input->cur;
3306
150k
  ctxt->input->cur = in;
3307
150k
  return (const xmlChar*) 1;
3308
150k
    }
3309
    /* failure (or end of input buffer), check with full function */
3310
9.65k
    ret = xmlParseName (ctxt);
3311
    /* strings coming from the dictionary direct compare possible */
3312
9.65k
    if (ret == other) {
3313
825
  return (const xmlChar*) 1;
3314
825
    }
3315
8.82k
    return ret;
3316
9.65k
}
3317
3318
/**
3319
 * xmlParseStringName:
3320
 * @ctxt:  an XML parser context
3321
 * @str:  a pointer to the string pointer (IN/OUT)
3322
 *
3323
 * parse an XML name.
3324
 *
3325
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3326
 *                  CombiningChar | Extender
3327
 *
3328
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3329
 *
3330
 * [6] Names ::= Name (#x20 Name)*
3331
 *
3332
 * Returns the Name parsed or NULL. The @str pointer
3333
 * is updated to the current location in the string.
3334
 */
3335
3336
static xmlChar *
3337
113k
xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3338
113k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3339
113k
    const xmlChar *cur = *str;
3340
113k
    int len = 0, l;
3341
113k
    int c;
3342
113k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3343
0
                    XML_MAX_TEXT_LENGTH :
3344
113k
                    XML_MAX_NAME_LENGTH;
3345
3346
#ifdef DEBUG
3347
    nbParseStringName++;
3348
#endif
3349
3350
113k
    c = CUR_SCHAR(cur, l);
3351
113k
    if (!xmlIsNameStartChar(ctxt, c)) {
3352
2.68k
  return(NULL);
3353
2.68k
    }
3354
3355
110k
    COPY_BUF(l,buf,len,c);
3356
110k
    cur += l;
3357
110k
    c = CUR_SCHAR(cur, l);
3358
423k
    while (xmlIsNameChar(ctxt, c)) {
3359
316k
  COPY_BUF(l,buf,len,c);
3360
316k
  cur += l;
3361
316k
  c = CUR_SCHAR(cur, l);
3362
316k
  if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3363
      /*
3364
       * Okay someone managed to make a huge name, so he's ready to pay
3365
       * for the processing speed.
3366
       */
3367
3.64k
      xmlChar *buffer;
3368
3.64k
      int max = len * 2;
3369
3370
3.64k
      buffer = (xmlChar *) xmlMallocAtomic(max);
3371
3.64k
      if (buffer == NULL) {
3372
2
          xmlErrMemory(ctxt, NULL);
3373
2
    return(NULL);
3374
2
      }
3375
3.64k
      memcpy(buffer, buf, len);
3376
2.88M
      while (xmlIsNameChar(ctxt, c)) {
3377
2.88M
    if (len + 10 > max) {
3378
10.6k
        xmlChar *tmp;
3379
3380
10.6k
        max *= 2;
3381
10.6k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3382
10.6k
        if (tmp == NULL) {
3383
1
      xmlErrMemory(ctxt, NULL);
3384
1
      xmlFree(buffer);
3385
1
      return(NULL);
3386
1
        }
3387
10.6k
        buffer = tmp;
3388
10.6k
    }
3389
2.88M
    COPY_BUF(l,buffer,len,c);
3390
2.88M
    cur += l;
3391
2.88M
    c = CUR_SCHAR(cur, l);
3392
2.88M
                if (len > maxLength) {
3393
86
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3394
86
                    xmlFree(buffer);
3395
86
                    return(NULL);
3396
86
                }
3397
2.88M
      }
3398
3.55k
      buffer[len] = 0;
3399
3.55k
      *str = cur;
3400
3.55k
      return(buffer);
3401
3.64k
  }
3402
316k
    }
3403
107k
    if (len > maxLength) {
3404
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3405
0
        return(NULL);
3406
0
    }
3407
107k
    *str = cur;
3408
107k
    return(xmlStrndup(buf, len));
3409
107k
}
3410
3411
/**
3412
 * xmlParseNmtoken:
3413
 * @ctxt:  an XML parser context
3414
 *
3415
 * DEPRECATED: Internal function, don't use.
3416
 *
3417
 * parse an XML Nmtoken.
3418
 *
3419
 * [7] Nmtoken ::= (NameChar)+
3420
 *
3421
 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3422
 *
3423
 * Returns the Nmtoken parsed or NULL
3424
 */
3425
3426
xmlChar *
3427
22.8k
xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3428
22.8k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3429
22.8k
    int len = 0, l;
3430
22.8k
    int c;
3431
22.8k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3432
0
                    XML_MAX_TEXT_LENGTH :
3433
22.8k
                    XML_MAX_NAME_LENGTH;
3434
3435
#ifdef DEBUG
3436
    nbParseNmToken++;
3437
#endif
3438
3439
22.8k
    c = CUR_CHAR(l);
3440
3441
243k
    while (xmlIsNameChar(ctxt, c)) {
3442
223k
  COPY_BUF(l,buf,len,c);
3443
223k
  NEXTL(l);
3444
223k
  c = CUR_CHAR(l);
3445
223k
  if (len >= XML_MAX_NAMELEN) {
3446
      /*
3447
       * Okay someone managed to make a huge token, so he's ready to pay
3448
       * for the processing speed.
3449
       */
3450
2.47k
      xmlChar *buffer;
3451
2.47k
      int max = len * 2;
3452
3453
2.47k
      buffer = (xmlChar *) xmlMallocAtomic(max);
3454
2.47k
      if (buffer == NULL) {
3455
11
          xmlErrMemory(ctxt, NULL);
3456
11
    return(NULL);
3457
11
      }
3458
2.46k
      memcpy(buffer, buf, len);
3459
48.2M
      while (xmlIsNameChar(ctxt, c)) {
3460
48.2M
    if (len + 10 > max) {
3461
10.0k
        xmlChar *tmp;
3462
3463
10.0k
        max *= 2;
3464
10.0k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3465
10.0k
        if (tmp == NULL) {
3466
1
      xmlErrMemory(ctxt, NULL);
3467
1
      xmlFree(buffer);
3468
1
      return(NULL);
3469
1
        }
3470
10.0k
        buffer = tmp;
3471
10.0k
    }
3472
48.2M
    COPY_BUF(l,buffer,len,c);
3473
48.2M
                if (len > maxLength) {
3474
990
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3475
990
                    xmlFree(buffer);
3476
990
                    return(NULL);
3477
990
                }
3478
48.2M
    NEXTL(l);
3479
48.2M
    c = CUR_CHAR(l);
3480
48.2M
      }
3481
1.47k
      buffer[len] = 0;
3482
1.47k
            if (ctxt->instate == XML_PARSER_EOF) {
3483
418
                xmlFree(buffer);
3484
418
                return(NULL);
3485
418
            }
3486
1.05k
      return(buffer);
3487
1.47k
  }
3488
223k
    }
3489
20.3k
    if (ctxt->instate == XML_PARSER_EOF)
3490
616
        return(NULL);
3491
19.7k
    if (len == 0)
3492
10.3k
        return(NULL);
3493
9.34k
    if (len > maxLength) {
3494
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3495
0
        return(NULL);
3496
0
    }
3497
9.34k
    return(xmlStrndup(buf, len));
3498
9.34k
}
3499
3500
/**
3501
 * xmlParseEntityValue:
3502
 * @ctxt:  an XML parser context
3503
 * @orig:  if non-NULL store a copy of the original entity value
3504
 *
3505
 * DEPRECATED: Internal function, don't use.
3506
 *
3507
 * parse a value for ENTITY declarations
3508
 *
3509
 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3510
 *                 "'" ([^%&'] | PEReference | Reference)* "'"
3511
 *
3512
 * Returns the EntityValue parsed with reference substituted or NULL
3513
 */
3514
3515
xmlChar *
3516
44.7k
xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3517
44.7k
    xmlChar *buf = NULL;
3518
44.7k
    int len = 0;
3519
44.7k
    int size = XML_PARSER_BUFFER_SIZE;
3520
44.7k
    int c, l;
3521
44.7k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3522
0
                    XML_MAX_HUGE_LENGTH :
3523
44.7k
                    XML_MAX_TEXT_LENGTH;
3524
44.7k
    xmlChar stop;
3525
44.7k
    xmlChar *ret = NULL;
3526
44.7k
    const xmlChar *cur = NULL;
3527
44.7k
    xmlParserInputPtr input;
3528
3529
44.7k
    if (RAW == '"') stop = '"';
3530
26.4k
    else if (RAW == '\'') stop = '\'';
3531
0
    else {
3532
0
  xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3533
0
  return(NULL);
3534
0
    }
3535
44.7k
    buf = (xmlChar *) xmlMallocAtomic(size);
3536
44.7k
    if (buf == NULL) {
3537
21
  xmlErrMemory(ctxt, NULL);
3538
21
  return(NULL);
3539
21
    }
3540
3541
    /*
3542
     * The content of the entity definition is copied in a buffer.
3543
     */
3544
3545
44.7k
    ctxt->instate = XML_PARSER_ENTITY_VALUE;
3546
44.7k
    input = ctxt->input;
3547
44.7k
    GROW;
3548
44.7k
    if (ctxt->instate == XML_PARSER_EOF)
3549
202
        goto error;
3550
44.5k
    NEXT;
3551
44.5k
    c = CUR_CHAR(l);
3552
    /*
3553
     * NOTE: 4.4.5 Included in Literal
3554
     * When a parameter entity reference appears in a literal entity
3555
     * value, ... a single or double quote character in the replacement
3556
     * text is always treated as a normal data character and will not
3557
     * terminate the literal.
3558
     * In practice it means we stop the loop only when back at parsing
3559
     * the initial entity and the quote is found
3560
     */
3561
255M
    while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3562
255M
      (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3563
255M
  if (len + 5 >= size) {
3564
28.3k
      xmlChar *tmp;
3565
3566
28.3k
      size *= 2;
3567
28.3k
      tmp = (xmlChar *) xmlRealloc(buf, size);
3568
28.3k
      if (tmp == NULL) {
3569
2
    xmlErrMemory(ctxt, NULL);
3570
2
                goto error;
3571
2
      }
3572
28.3k
      buf = tmp;
3573
28.3k
  }
3574
255M
  COPY_BUF(l,buf,len,c);
3575
255M
  NEXTL(l);
3576
3577
255M
  GROW;
3578
255M
  c = CUR_CHAR(l);
3579
255M
  if (c == 0) {
3580
1.19k
      GROW;
3581
1.19k
      c = CUR_CHAR(l);
3582
1.19k
  }
3583
3584
255M
        if (len > maxLength) {
3585
1
            xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
3586
1
                           "entity value too long\n");
3587
1
            goto error;
3588
1
        }
3589
255M
    }
3590
44.5k
    buf[len] = 0;
3591
44.5k
    if (ctxt->instate == XML_PARSER_EOF)
3592
572
        goto error;
3593
43.9k
    if (c != stop) {
3594
1.13k
        xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3595
1.13k
        goto error;
3596
1.13k
    }
3597
42.8k
    NEXT;
3598
3599
    /*
3600
     * Raise problem w.r.t. '&' and '%' being used in non-entities
3601
     * reference constructs. Note Charref will be handled in
3602
     * xmlStringDecodeEntities()
3603
     */
3604
42.8k
    cur = buf;
3605
680M
    while (*cur != 0) { /* non input consuming */
3606
680M
  if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3607
30.1k
      xmlChar *name;
3608
30.1k
      xmlChar tmp = *cur;
3609
30.1k
            int nameOk = 0;
3610
3611
30.1k
      cur++;
3612
30.1k
      name = xmlParseStringName(ctxt, &cur);
3613
30.1k
            if (name != NULL) {
3614
28.6k
                nameOk = 1;
3615
28.6k
                xmlFree(name);
3616
28.6k
            }
3617
30.1k
            if ((nameOk == 0) || (*cur != ';')) {
3618
2.48k
    xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3619
2.48k
      "EntityValue: '%c' forbidden except for entities references\n",
3620
2.48k
                            tmp);
3621
2.48k
                goto error;
3622
2.48k
      }
3623
27.6k
      if ((tmp == '%') && (ctxt->inSubset == 1) &&
3624
27.6k
    (ctxt->inputNr == 1)) {
3625
239
    xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3626
239
                goto error;
3627
239
      }
3628
27.4k
      if (*cur == 0)
3629
0
          break;
3630
27.4k
  }
3631
680M
  cur++;
3632
680M
    }
3633
3634
    /*
3635
     * Then PEReference entities are substituted.
3636
     *
3637
     * NOTE: 4.4.7 Bypassed
3638
     * When a general entity reference appears in the EntityValue in
3639
     * an entity declaration, it is bypassed and left as is.
3640
     * so XML_SUBSTITUTE_REF is not set here.
3641
     */
3642
40.1k
    ++ctxt->depth;
3643
40.1k
    ret = xmlStringDecodeEntitiesInt(ctxt, buf, len, XML_SUBSTITUTE_PEREF,
3644
40.1k
                                     0, 0, 0, /* check */ 1);
3645
40.1k
    --ctxt->depth;
3646
3647
40.1k
    if (orig != NULL) {
3648
40.1k
        *orig = buf;
3649
40.1k
        buf = NULL;
3650
40.1k
    }
3651
3652
44.7k
error:
3653
44.7k
    if (buf != NULL)
3654
4.63k
        xmlFree(buf);
3655
44.7k
    return(ret);
3656
40.1k
}
3657
3658
/**
3659
 * xmlParseAttValueComplex:
3660
 * @ctxt:  an XML parser context
3661
 * @len:   the resulting attribute len
3662
 * @normalize:  whether to apply the inner normalization
3663
 *
3664
 * parse a value for an attribute, this is the fallback function
3665
 * of xmlParseAttValue() when the attribute parsing requires handling
3666
 * of non-ASCII characters, or normalization compaction.
3667
 *
3668
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3669
 */
3670
static xmlChar *
3671
436k
xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3672
436k
    xmlChar limit = 0;
3673
436k
    xmlChar *buf = NULL;
3674
436k
    xmlChar *rep = NULL;
3675
436k
    size_t len = 0;
3676
436k
    size_t buf_size = 0;
3677
436k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3678
0
                       XML_MAX_HUGE_LENGTH :
3679
436k
                       XML_MAX_TEXT_LENGTH;
3680
436k
    int c, l, in_space = 0;
3681
436k
    xmlChar *current = NULL;
3682
436k
    xmlEntityPtr ent;
3683
3684
436k
    if (NXT(0) == '"') {
3685
424k
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3686
424k
  limit = '"';
3687
424k
        NEXT;
3688
424k
    } else if (NXT(0) == '\'') {
3689
12.2k
  limit = '\'';
3690
12.2k
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3691
12.2k
        NEXT;
3692
12.2k
    } else {
3693
0
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3694
0
  return(NULL);
3695
0
    }
3696
3697
    /*
3698
     * allocate a translation buffer.
3699
     */
3700
436k
    buf_size = XML_PARSER_BUFFER_SIZE;
3701
436k
    buf = (xmlChar *) xmlMallocAtomic(buf_size);
3702
436k
    if (buf == NULL) goto mem_error;
3703
3704
    /*
3705
     * OK loop until we reach one of the ending char or a size limit.
3706
     */
3707
436k
    c = CUR_CHAR(l);
3708
108M
    while (((NXT(0) != limit) && /* checked */
3709
108M
            (IS_CHAR(c)) && (c != '<')) &&
3710
108M
            (ctxt->instate != XML_PARSER_EOF)) {
3711
108M
  if (c == '&') {
3712
236k
      in_space = 0;
3713
236k
      if (NXT(1) == '#') {
3714
168k
    int val = xmlParseCharRef(ctxt);
3715
3716
168k
    if (val == '&') {
3717
8.38k
        if (ctxt->replaceEntities) {
3718
8.38k
      if (len + 10 > buf_size) {
3719
225
          growBuffer(buf, 10);
3720
225
      }
3721
8.38k
      buf[len++] = '&';
3722
8.38k
        } else {
3723
      /*
3724
       * The reparsing will be done in xmlStringGetNodeList()
3725
       * called by the attribute() function in SAX.c
3726
       */
3727
0
      if (len + 10 > buf_size) {
3728
0
          growBuffer(buf, 10);
3729
0
      }
3730
0
      buf[len++] = '&';
3731
0
      buf[len++] = '#';
3732
0
      buf[len++] = '3';
3733
0
      buf[len++] = '8';
3734
0
      buf[len++] = ';';
3735
0
        }
3736
160k
    } else if (val != 0) {
3737
147k
        if (len + 10 > buf_size) {
3738
475
      growBuffer(buf, 10);
3739
475
        }
3740
147k
        len += xmlCopyChar(0, &buf[len], val);
3741
147k
    }
3742
168k
      } else {
3743
67.4k
    ent = xmlParseEntityRef(ctxt);
3744
67.4k
    if ((ent != NULL) &&
3745
67.4k
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3746
882
        if (len + 10 > buf_size) {
3747
139
      growBuffer(buf, 10);
3748
139
        }
3749
881
        if ((ctxt->replaceEntities == 0) &&
3750
881
            (ent->content[0] == '&')) {
3751
0
      buf[len++] = '&';
3752
0
      buf[len++] = '#';
3753
0
      buf[len++] = '3';
3754
0
      buf[len++] = '8';
3755
0
      buf[len++] = ';';
3756
881
        } else {
3757
881
      buf[len++] = ent->content[0];
3758
881
        }
3759
66.5k
    } else if ((ent != NULL) &&
3760
66.5k
               (ctxt->replaceEntities != 0)) {
3761
25.7k
        if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3762
25.7k
                        if (xmlParserEntityCheck(ctxt, ent->length))
3763
115
                            goto error;
3764
3765
25.6k
      ++ctxt->depth;
3766
25.6k
      rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
3767
25.6k
                                ent->length, XML_SUBSTITUTE_REF, 0, 0, 0,
3768
25.6k
                                /* check */ 1);
3769
25.6k
      --ctxt->depth;
3770
25.6k
      if (rep != NULL) {
3771
20.0k
          current = rep;
3772
899M
          while (*current != 0) { /* non input consuming */
3773
899M
                                if ((*current == 0xD) || (*current == 0xA) ||
3774
899M
                                    (*current == 0x9)) {
3775
27.6k
                                    buf[len++] = 0x20;
3776
27.6k
                                    current++;
3777
27.6k
                                } else
3778
899M
                                    buf[len++] = *current++;
3779
899M
        if (len + 10 > buf_size) {
3780
43.0k
            growBuffer(buf, 10);
3781
43.0k
        }
3782
899M
          }
3783
20.0k
          xmlFree(rep);
3784
20.0k
          rep = NULL;
3785
20.0k
      }
3786
25.6k
        } else {
3787
0
      if (len + 10 > buf_size) {
3788
0
          growBuffer(buf, 10);
3789
0
      }
3790
0
      if (ent->content != NULL)
3791
0
          buf[len++] = ent->content[0];
3792
0
        }
3793
40.8k
    } else if (ent != NULL) {
3794
0
        int i = xmlStrlen(ent->name);
3795
0
        const xmlChar *cur = ent->name;
3796
3797
        /*
3798
                     * We also check for recursion and amplification
3799
                     * when entities are not substituted. They're
3800
                     * often expanded later.
3801
         */
3802
0
        if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3803
0
      (ent->content != NULL)) {
3804
0
                        if ((ent->flags & XML_ENT_CHECKED) == 0) {
3805
0
                            unsigned long oldCopy = ctxt->sizeentcopy;
3806
3807
0
                            ctxt->sizeentcopy = ent->length;
3808
3809
0
                            ++ctxt->depth;
3810
0
                            rep = xmlStringDecodeEntitiesInt(ctxt,
3811
0
                                    ent->content, ent->length,
3812
0
                                    XML_SUBSTITUTE_REF, 0, 0, 0,
3813
0
                                    /* check */ 1);
3814
0
                            --ctxt->depth;
3815
3816
                            /*
3817
                             * If we're parsing DTD content, the entity
3818
                             * might reference other entities which
3819
                             * weren't defined yet, so the check isn't
3820
                             * reliable.
3821
                             */
3822
0
                            if (ctxt->inSubset == 0) {
3823
0
                                ent->flags |= XML_ENT_CHECKED;
3824
0
                                ent->expandedSize = ctxt->sizeentcopy;
3825
0
                            }
3826
3827
0
                            if (rep != NULL) {
3828
0
                                xmlFree(rep);
3829
0
                                rep = NULL;
3830
0
                            } else {
3831
0
                                ent->content[0] = 0;
3832
0
                            }
3833
3834
0
                            if (xmlParserEntityCheck(ctxt, oldCopy))
3835
0
                                goto error;
3836
0
                        } else {
3837
0
                            if (xmlParserEntityCheck(ctxt, ent->expandedSize))
3838
0
                                goto error;
3839
0
                        }
3840
0
        }
3841
3842
        /*
3843
         * Just output the reference
3844
         */
3845
0
        buf[len++] = '&';
3846
0
        while (len + i + 10 > buf_size) {
3847
0
      growBuffer(buf, i + 10);
3848
0
        }
3849
0
        for (;i > 0;i--)
3850
0
      buf[len++] = *cur++;
3851
0
        buf[len++] = ';';
3852
0
    }
3853
67.4k
      }
3854
108M
  } else {
3855
108M
      if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
3856
28.0M
          if ((len != 0) || (!normalize)) {
3857
28.0M
        if ((!normalize) || (!in_space)) {
3858
28.0M
      COPY_BUF(l,buf,len,0x20);
3859
28.0M
      while (len + 10 > buf_size) {
3860
91.8k
          growBuffer(buf, 10);
3861
91.8k
      }
3862
28.0M
        }
3863
28.0M
        in_space = 1;
3864
28.0M
    }
3865
80.0M
      } else {
3866
80.0M
          in_space = 0;
3867
80.0M
    COPY_BUF(l,buf,len,c);
3868
80.0M
    if (len + 10 > buf_size) {
3869
331k
        growBuffer(buf, 10);
3870
331k
    }
3871
80.0M
      }
3872
108M
      NEXTL(l);
3873
108M
  }
3874
108M
  GROW;
3875
108M
  c = CUR_CHAR(l);
3876
108M
        if (len > maxLength) {
3877
3
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3878
3
                           "AttValue length too long\n");
3879
3
            goto mem_error;
3880
3
        }
3881
108M
    }
3882
436k
    if (ctxt->instate == XML_PARSER_EOF)
3883
676
        goto error;
3884
3885
435k
    if ((in_space) && (normalize)) {
3886
7.10k
        while ((len > 0) && (buf[len - 1] == 0x20)) len--;
3887
2.78k
    }
3888
435k
    buf[len] = 0;
3889
435k
    if (RAW == '<') {
3890
19.7k
  xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
3891
416k
    } else if (RAW != limit) {
3892
10.3k
  if ((c != 0) && (!IS_CHAR(c))) {
3893
4.43k
      xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3894
4.43k
         "invalid character in attribute value\n");
3895
5.91k
  } else {
3896
5.91k
      xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3897
5.91k
         "AttValue: ' expected\n");
3898
5.91k
        }
3899
10.3k
    } else
3900
405k
  NEXT;
3901
3902
435k
    if (attlen != NULL) *attlen = len;
3903
435k
    return(buf);
3904
3905
135
mem_error:
3906
135
    xmlErrMemory(ctxt, NULL);
3907
926
error:
3908
926
    if (buf != NULL)
3909
813
        xmlFree(buf);
3910
926
    if (rep != NULL)
3911
5
        xmlFree(rep);
3912
926
    return(NULL);
3913
135
}
3914
3915
/**
3916
 * xmlParseAttValue:
3917
 * @ctxt:  an XML parser context
3918
 *
3919
 * DEPRECATED: Internal function, don't use.
3920
 *
3921
 * parse a value for an attribute
3922
 * Note: the parser won't do substitution of entities here, this
3923
 * will be handled later in xmlStringGetNodeList
3924
 *
3925
 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3926
 *                   "'" ([^<&'] | Reference)* "'"
3927
 *
3928
 * 3.3.3 Attribute-Value Normalization:
3929
 * Before the value of an attribute is passed to the application or
3930
 * checked for validity, the XML processor must normalize it as follows:
3931
 * - a character reference is processed by appending the referenced
3932
 *   character to the attribute value
3933
 * - an entity reference is processed by recursively processing the
3934
 *   replacement text of the entity
3935
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3936
 *   appending #x20 to the normalized value, except that only a single
3937
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
3938
 *   parsed entity or the literal entity value of an internal parsed entity
3939
 * - other characters are processed by appending them to the normalized value
3940
 * If the declared value is not CDATA, then the XML processor must further
3941
 * process the normalized attribute value by discarding any leading and
3942
 * trailing space (#x20) characters, and by replacing sequences of space
3943
 * (#x20) characters by a single space (#x20) character.
3944
 * All attributes for which no declaration has been read should be treated
3945
 * by a non-validating parser as if declared CDATA.
3946
 *
3947
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3948
 */
3949
3950
3951
xmlChar *
3952
87.9k
xmlParseAttValue(xmlParserCtxtPtr ctxt) {
3953
87.9k
    if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
3954
87.9k
    return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
3955
87.9k
}
3956
3957
/**
3958
 * xmlParseSystemLiteral:
3959
 * @ctxt:  an XML parser context
3960
 *
3961
 * DEPRECATED: Internal function, don't use.
3962
 *
3963
 * parse an XML Literal
3964
 *
3965
 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3966
 *
3967
 * Returns the SystemLiteral parsed or NULL
3968
 */
3969
3970
xmlChar *
3971
35.1k
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3972
35.1k
    xmlChar *buf = NULL;
3973
35.1k
    int len = 0;
3974
35.1k
    int size = XML_PARSER_BUFFER_SIZE;
3975
35.1k
    int cur, l;
3976
35.1k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3977
0
                    XML_MAX_TEXT_LENGTH :
3978
35.1k
                    XML_MAX_NAME_LENGTH;
3979
35.1k
    xmlChar stop;
3980
35.1k
    int state = ctxt->instate;
3981
3982
35.1k
    if (RAW == '"') {
3983
15.3k
        NEXT;
3984
15.3k
  stop = '"';
3985
19.8k
    } else if (RAW == '\'') {
3986
18.1k
        NEXT;
3987
18.1k
  stop = '\'';
3988
18.1k
    } else {
3989
1.64k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
3990
1.64k
  return(NULL);
3991
1.64k
    }
3992
3993
33.4k
    buf = (xmlChar *) xmlMallocAtomic(size);
3994
33.4k
    if (buf == NULL) {
3995
13
        xmlErrMemory(ctxt, NULL);
3996
13
  return(NULL);
3997
13
    }
3998
33.4k
    ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3999
33.4k
    cur = CUR_CHAR(l);
4000
5.91M
    while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4001
5.88M
  if (len + 5 >= size) {
4002
8.27k
      xmlChar *tmp;
4003
4004
8.27k
      size *= 2;
4005
8.27k
      tmp = (xmlChar *) xmlRealloc(buf, size);
4006
8.27k
      if (tmp == NULL) {
4007
3
          xmlFree(buf);
4008
3
    xmlErrMemory(ctxt, NULL);
4009
3
    ctxt->instate = (xmlParserInputState) state;
4010
3
    return(NULL);
4011
3
      }
4012
8.26k
      buf = tmp;
4013
8.26k
  }
4014
5.88M
  COPY_BUF(l,buf,len,cur);
4015
5.88M
        if (len > maxLength) {
4016
101
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4017
101
            xmlFree(buf);
4018
101
            ctxt->instate = (xmlParserInputState) state;
4019
101
            return(NULL);
4020
101
        }
4021
5.88M
  NEXTL(l);
4022
5.88M
  cur = CUR_CHAR(l);
4023
5.88M
    }
4024
33.3k
    buf[len] = 0;
4025
33.3k
    if (ctxt->instate == XML_PARSER_EOF) {
4026
280
        xmlFree(buf);
4027
280
        return(NULL);
4028
280
    }
4029
33.0k
    ctxt->instate = (xmlParserInputState) state;
4030
33.0k
    if (!IS_CHAR(cur)) {
4031
747
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4032
32.3k
    } else {
4033
32.3k
  NEXT;
4034
32.3k
    }
4035
33.0k
    return(buf);
4036
33.3k
}
4037
4038
/**
4039
 * xmlParsePubidLiteral:
4040
 * @ctxt:  an XML parser context
4041
 *
4042
 * DEPRECATED: Internal function, don't use.
4043
 *
4044
 * parse an XML public literal
4045
 *
4046
 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4047
 *
4048
 * Returns the PubidLiteral parsed or NULL.
4049
 */
4050
4051
xmlChar *
4052
22.9k
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4053
22.9k
    xmlChar *buf = NULL;
4054
22.9k
    int len = 0;
4055
22.9k
    int size = XML_PARSER_BUFFER_SIZE;
4056
22.9k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4057
0
                    XML_MAX_TEXT_LENGTH :
4058
22.9k
                    XML_MAX_NAME_LENGTH;
4059
22.9k
    xmlChar cur;
4060
22.9k
    xmlChar stop;
4061
22.9k
    xmlParserInputState oldstate = ctxt->instate;
4062
4063
22.9k
    if (RAW == '"') {
4064
6.86k
        NEXT;
4065
6.86k
  stop = '"';
4066
16.0k
    } else if (RAW == '\'') {
4067
13.4k
        NEXT;
4068
13.4k
  stop = '\'';
4069
13.4k
    } else {
4070
2.67k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4071
2.67k
  return(NULL);
4072
2.67k
    }
4073
20.2k
    buf = (xmlChar *) xmlMallocAtomic(size);
4074
20.2k
    if (buf == NULL) {
4075
6
  xmlErrMemory(ctxt, NULL);
4076
6
  return(NULL);
4077
6
    }
4078
20.2k
    ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4079
20.2k
    cur = CUR;
4080
861k
    while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4081
841k
  if (len + 1 >= size) {
4082
3.20k
      xmlChar *tmp;
4083
4084
3.20k
      size *= 2;
4085
3.20k
      tmp = (xmlChar *) xmlRealloc(buf, size);
4086
3.20k
      if (tmp == NULL) {
4087
1
    xmlErrMemory(ctxt, NULL);
4088
1
    xmlFree(buf);
4089
1
    return(NULL);
4090
1
      }
4091
3.20k
      buf = tmp;
4092
3.20k
  }
4093
841k
  buf[len++] = cur;
4094
841k
        if (len > maxLength) {
4095
2
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4096
2
            xmlFree(buf);
4097
2
            return(NULL);
4098
2
        }
4099
841k
  NEXT;
4100
841k
  cur = CUR;
4101
841k
    }
4102
20.2k
    buf[len] = 0;
4103
20.2k
    if (ctxt->instate == XML_PARSER_EOF) {
4104
298
        xmlFree(buf);
4105
298
        return(NULL);
4106
298
    }
4107
19.9k
    if (cur != stop) {
4108
3.65k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4109
16.3k
    } else {
4110
16.3k
  NEXTL(1);
4111
16.3k
    }
4112
19.9k
    ctxt->instate = oldstate;
4113
19.9k
    return(buf);
4114
20.2k
}
4115
4116
static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial);
4117
4118
/*
4119
 * used for the test in the inner loop of the char data testing
4120
 */
4121
static const unsigned char test_char_data[256] = {
4122
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4123
    0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4124
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4125
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4126
    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4127
    0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4128
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4129
    0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4130
    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4131
    0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4132
    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4133
    0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4134
    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4135
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4136
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4137
    0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4138
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4139
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4140
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4141
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4142
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4143
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4144
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4145
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4146
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4147
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4148
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4149
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4150
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4151
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4152
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4153
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4154
};
4155
4156
/**
4157
 * xmlParseCharDataInternal:
4158
 * @ctxt:  an XML parser context
4159
 * @partial:  buffer may contain partial UTF-8 sequences
4160
 *
4161
 * Parse character data. Always makes progress if the first char isn't
4162
 * '<' or '&'.
4163
 *
4164
 * The right angle bracket (>) may be represented using the string "&gt;",
4165
 * and must, for compatibility, be escaped using "&gt;" or a character
4166
 * reference when it appears in the string "]]>" in content, when that
4167
 * string is not marking the end of a CDATA section.
4168
 *
4169
 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4170
 */
4171
static void
4172
2.11M
xmlParseCharDataInternal(xmlParserCtxtPtr ctxt, int partial) {
4173
2.11M
    const xmlChar *in;
4174
2.11M
    int nbchar = 0;
4175
2.11M
    int line = ctxt->input->line;
4176
2.11M
    int col = ctxt->input->col;
4177
2.11M
    int ccol;
4178
4179
2.11M
    GROW;
4180
    /*
4181
     * Accelerated common case where input don't need to be
4182
     * modified before passing it to the handler.
4183
     */
4184
2.11M
    in = ctxt->input->cur;
4185
2.24M
    do {
4186
2.99M
get_more_space:
4187
4.73M
        while (*in == 0x20) { in++; ctxt->input->col++; }
4188
2.99M
        if (*in == 0xA) {
4189
3.15M
            do {
4190
3.15M
                ctxt->input->line++; ctxt->input->col = 1;
4191
3.15M
                in++;
4192
3.15M
            } while (*in == 0xA);
4193
746k
            goto get_more_space;
4194
746k
        }
4195
2.24M
        if (*in == '<') {
4196
646k
            nbchar = in - ctxt->input->cur;
4197
646k
            if (nbchar > 0) {
4198
645k
                const xmlChar *tmp = ctxt->input->cur;
4199
645k
                ctxt->input->cur = in;
4200
4201
645k
                if ((ctxt->sax != NULL) &&
4202
645k
                    (ctxt->sax->ignorableWhitespace !=
4203
645k
                     ctxt->sax->characters)) {
4204
0
                    if (areBlanks(ctxt, tmp, nbchar, 1)) {
4205
0
                        if (ctxt->sax->ignorableWhitespace != NULL)
4206
0
                            ctxt->sax->ignorableWhitespace(ctxt->userData,
4207
0
                                                   tmp, nbchar);
4208
0
                    } else {
4209
0
                        if (ctxt->sax->characters != NULL)
4210
0
                            ctxt->sax->characters(ctxt->userData,
4211
0
                                                  tmp, nbchar);
4212
0
                        if (*ctxt->space == -1)
4213
0
                            *ctxt->space = -2;
4214
0
                    }
4215
645k
                } else if ((ctxt->sax != NULL) &&
4216
645k
                           (ctxt->sax->characters != NULL)) {
4217
645k
                    ctxt->sax->characters(ctxt->userData,
4218
645k
                                          tmp, nbchar);
4219
645k
                }
4220
645k
            }
4221
646k
            return;
4222
646k
        }
4223
4224
2.09M
get_more:
4225
2.09M
        ccol = ctxt->input->col;
4226
39.0M
        while (test_char_data[*in]) {
4227
37.0M
            in++;
4228
37.0M
            ccol++;
4229
37.0M
        }
4230
2.09M
        ctxt->input->col = ccol;
4231
2.09M
        if (*in == 0xA) {
4232
11.0M
            do {
4233
11.0M
                ctxt->input->line++; ctxt->input->col = 1;
4234
11.0M
                in++;
4235
11.0M
            } while (*in == 0xA);
4236
278k
            goto get_more;
4237
278k
        }
4238
1.81M
        if (*in == ']') {
4239
216k
            if ((in[1] == ']') && (in[2] == '>')) {
4240
5.94k
                xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4241
5.94k
                if (ctxt->instate != XML_PARSER_EOF)
4242
5.94k
                    ctxt->input->cur = in + 1;
4243
5.94k
                return;
4244
5.94k
            }
4245
210k
            in++;
4246
210k
            ctxt->input->col++;
4247
210k
            goto get_more;
4248
216k
        }
4249
1.59M
        nbchar = in - ctxt->input->cur;
4250
1.59M
        if (nbchar > 0) {
4251
1.22M
            if ((ctxt->sax != NULL) &&
4252
1.22M
                (ctxt->sax->ignorableWhitespace !=
4253
1.22M
                 ctxt->sax->characters) &&
4254
1.22M
                (IS_BLANK_CH(*ctxt->input->cur))) {
4255
0
                const xmlChar *tmp = ctxt->input->cur;
4256
0
                ctxt->input->cur = in;
4257
4258
0
                if (areBlanks(ctxt, tmp, nbchar, 0)) {
4259
0
                    if (ctxt->sax->ignorableWhitespace != NULL)
4260
0
                        ctxt->sax->ignorableWhitespace(ctxt->userData,
4261
0
                                                       tmp, nbchar);
4262
0
                } else {
4263
0
                    if (ctxt->sax->characters != NULL)
4264
0
                        ctxt->sax->characters(ctxt->userData,
4265
0
                                              tmp, nbchar);
4266
0
                    if (*ctxt->space == -1)
4267
0
                        *ctxt->space = -2;
4268
0
                }
4269
0
                line = ctxt->input->line;
4270
0
                col = ctxt->input->col;
4271
1.22M
            } else if (ctxt->sax != NULL) {
4272
1.22M
                if (ctxt->sax->characters != NULL)
4273
1.22M
                    ctxt->sax->characters(ctxt->userData,
4274
1.22M
                                          ctxt->input->cur, nbchar);
4275
1.22M
                line = ctxt->input->line;
4276
1.22M
                col = ctxt->input->col;
4277
1.22M
            }
4278
1.22M
        }
4279
1.59M
        ctxt->input->cur = in;
4280
1.59M
        if (*in == 0xD) {
4281
158k
            in++;
4282
158k
            if (*in == 0xA) {
4283
142k
                ctxt->input->cur = in;
4284
142k
                in++;
4285
142k
                ctxt->input->line++; ctxt->input->col = 1;
4286
142k
                continue; /* while */
4287
142k
            }
4288
16.5k
            in--;
4289
16.5k
        }
4290
1.45M
        if (*in == '<') {
4291
531k
            return;
4292
531k
        }
4293
922k
        if (*in == '&') {
4294
57.1k
            return;
4295
57.1k
        }
4296
865k
        SHRINK;
4297
865k
        GROW;
4298
865k
        if (ctxt->instate == XML_PARSER_EOF)
4299
389
            return;
4300
865k
        in = ctxt->input->cur;
4301
1.00M
    } while (((*in >= 0x20) && (*in <= 0x7F)) ||
4302
1.00M
             (*in == 0x09) || (*in == 0x0a));
4303
872k
    ctxt->input->line = line;
4304
872k
    ctxt->input->col = col;
4305
872k
    xmlParseCharDataComplex(ctxt, partial);
4306
872k
}
4307
4308
/**
4309
 * xmlParseCharDataComplex:
4310
 * @ctxt:  an XML parser context
4311
 * @cdata:  int indicating whether we are within a CDATA section
4312
 *
4313
 * Always makes progress if the first char isn't '<' or '&'.
4314
 *
4315
 * parse a CharData section.this is the fallback function
4316
 * of xmlParseCharData() when the parsing requires handling
4317
 * of non-ASCII characters.
4318
 */
4319
static void
4320
872k
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial) {
4321
872k
    xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4322
872k
    int nbchar = 0;
4323
872k
    int cur, l;
4324
4325
872k
    cur = CUR_CHAR(l);
4326
355M
    while ((cur != '<') && /* checked */
4327
355M
           (cur != '&') &&
4328
355M
     (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4329
354M
  if ((cur == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4330
1.34k
      xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4331
1.34k
  }
4332
354M
  COPY_BUF(l,buf,nbchar,cur);
4333
  /* move current position before possible calling of ctxt->sax->characters */
4334
354M
  NEXTL(l);
4335
354M
  if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4336
2.64M
      buf[nbchar] = 0;
4337
4338
      /*
4339
       * OK the segment is to be consumed as chars.
4340
       */
4341
2.64M
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4342
1.04M
    if (areBlanks(ctxt, buf, nbchar, 0)) {
4343
0
        if (ctxt->sax->ignorableWhitespace != NULL)
4344
0
      ctxt->sax->ignorableWhitespace(ctxt->userData,
4345
0
                                     buf, nbchar);
4346
1.04M
    } else {
4347
1.04M
        if (ctxt->sax->characters != NULL)
4348
1.04M
      ctxt->sax->characters(ctxt->userData, buf, nbchar);
4349
1.04M
        if ((ctxt->sax->characters !=
4350
1.04M
             ctxt->sax->ignorableWhitespace) &&
4351
1.04M
      (*ctxt->space == -1))
4352
0
      *ctxt->space = -2;
4353
1.04M
    }
4354
1.04M
      }
4355
2.64M
      nbchar = 0;
4356
            /* something really bad happened in the SAX callback */
4357
2.64M
            if (ctxt->instate != XML_PARSER_CONTENT)
4358
8
                return;
4359
2.64M
            SHRINK;
4360
2.64M
  }
4361
354M
  cur = CUR_CHAR(l);
4362
354M
    }
4363
872k
    if (ctxt->instate == XML_PARSER_EOF)
4364
1.63k
        return;
4365
870k
    if (nbchar != 0) {
4366
773k
        buf[nbchar] = 0;
4367
  /*
4368
   * OK the segment is to be consumed as chars.
4369
   */
4370
773k
  if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4371
569k
      if (areBlanks(ctxt, buf, nbchar, 0)) {
4372
0
    if (ctxt->sax->ignorableWhitespace != NULL)
4373
0
        ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4374
569k
      } else {
4375
569k
    if (ctxt->sax->characters != NULL)
4376
569k
        ctxt->sax->characters(ctxt->userData, buf, nbchar);
4377
569k
    if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4378
569k
        (*ctxt->space == -1))
4379
0
        *ctxt->space = -2;
4380
569k
      }
4381
569k
  }
4382
773k
    }
4383
    /*
4384
     * cur == 0 can mean
4385
     *
4386
     * - XML_PARSER_EOF or memory error. This is checked above.
4387
     * - An actual 0 character.
4388
     * - End of buffer.
4389
     * - An incomplete UTF-8 sequence. This is allowed if partial is set.
4390
     */
4391
870k
    if (ctxt->input->cur < ctxt->input->end) {
4392
861k
        if ((cur == 0) && (CUR != 0)) {
4393
566
            if (partial == 0) {
4394
566
                xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4395
566
                        "Incomplete UTF-8 sequence starting with %02X\n", CUR);
4396
566
                NEXTL(1);
4397
566
            }
4398
860k
        } else if ((cur != '<') && (cur != '&')) {
4399
            /* Generate the error and skip the offending character */
4400
102k
            xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4401
102k
                              "PCDATA invalid Char value %d\n", cur);
4402
102k
            NEXTL(l);
4403
102k
        }
4404
861k
    }
4405
870k
}
4406
4407
/**
4408
 * xmlParseCharData:
4409
 * @ctxt:  an XML parser context
4410
 * @cdata:  unused
4411
 *
4412
 * DEPRECATED: Internal function, don't use.
4413
 */
4414
void
4415
0
xmlParseCharData(xmlParserCtxtPtr ctxt, ATTRIBUTE_UNUSED int cdata) {
4416
0
    xmlParseCharDataInternal(ctxt, 0);
4417
0
}
4418
4419
/**
4420
 * xmlParseExternalID:
4421
 * @ctxt:  an XML parser context
4422
 * @publicID:  a xmlChar** receiving PubidLiteral
4423
 * @strict: indicate whether we should restrict parsing to only
4424
 *          production [75], see NOTE below
4425
 *
4426
 * DEPRECATED: Internal function, don't use.
4427
 *
4428
 * Parse an External ID or a Public ID
4429
 *
4430
 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4431
 *       'PUBLIC' S PubidLiteral S SystemLiteral
4432
 *
4433
 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4434
 *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
4435
 *
4436
 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4437
 *
4438
 * Returns the function returns SystemLiteral and in the second
4439
 *                case publicID receives PubidLiteral, is strict is off
4440
 *                it is possible to return NULL and have publicID set.
4441
 */
4442
4443
xmlChar *
4444
107k
xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4445
107k
    xmlChar *URI = NULL;
4446
4447
107k
    *publicID = NULL;
4448
107k
    if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4449
18.8k
        SKIP(6);
4450
18.8k
  if (SKIP_BLANKS == 0) {
4451
280
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4452
280
                     "Space required after 'SYSTEM'\n");
4453
280
  }
4454
18.8k
  URI = xmlParseSystemLiteral(ctxt);
4455
18.8k
  if (URI == NULL) {
4456
288
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4457
288
        }
4458
88.9k
    } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4459
22.9k
        SKIP(6);
4460
22.9k
  if (SKIP_BLANKS == 0) {
4461
2.49k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4462
2.49k
        "Space required after 'PUBLIC'\n");
4463
2.49k
  }
4464
22.9k
  *publicID = xmlParsePubidLiteral(ctxt);
4465
22.9k
  if (*publicID == NULL) {
4466
2.98k
      xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4467
2.98k
  }
4468
22.9k
  if (strict) {
4469
      /*
4470
       * We don't handle [83] so "S SystemLiteral" is required.
4471
       */
4472
14.7k
      if (SKIP_BLANKS == 0) {
4473
1.96k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4474
1.96k
      "Space required after the Public Identifier\n");
4475
1.96k
      }
4476
14.7k
  } else {
4477
      /*
4478
       * We handle [83] so we return immediately, if
4479
       * "S SystemLiteral" is not detected. We skip blanks if no
4480
             * system literal was found, but this is harmless since we must
4481
             * be at the end of a NotationDecl.
4482
       */
4483
8.18k
      if (SKIP_BLANKS == 0) return(NULL);
4484
4.43k
      if ((CUR != '\'') && (CUR != '"')) return(NULL);
4485
4.43k
  }
4486
16.3k
  URI = xmlParseSystemLiteral(ctxt);
4487
16.3k
  if (URI == NULL) {
4488
1.75k
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4489
1.75k
        }
4490
16.3k
    }
4491
101k
    return(URI);
4492
107k
}
4493
4494
/**
4495
 * xmlParseCommentComplex:
4496
 * @ctxt:  an XML parser context
4497
 * @buf:  the already parsed part of the buffer
4498
 * @len:  number of bytes in the buffer
4499
 * @size:  allocated size of the buffer
4500
 *
4501
 * Skip an XML (SGML) comment <!-- .... -->
4502
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4503
 *  must not occur within comments. "
4504
 * This is the slow routine in case the accelerator for ascii didn't work
4505
 *
4506
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4507
 */
4508
static void
4509
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4510
34.6k
                       size_t len, size_t size) {
4511
34.6k
    int q, ql;
4512
34.6k
    int r, rl;
4513
34.6k
    int cur, l;
4514
34.6k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4515
0
                       XML_MAX_HUGE_LENGTH :
4516
34.6k
                       XML_MAX_TEXT_LENGTH;
4517
34.6k
    int inputid;
4518
4519
34.6k
    inputid = ctxt->input->id;
4520
4521
34.6k
    if (buf == NULL) {
4522
4.66k
        len = 0;
4523
4.66k
  size = XML_PARSER_BUFFER_SIZE;
4524
4.66k
  buf = (xmlChar *) xmlMallocAtomic(size);
4525
4.66k
  if (buf == NULL) {
4526
96
      xmlErrMemory(ctxt, NULL);
4527
96
      return;
4528
96
  }
4529
4.66k
    }
4530
34.5k
    q = CUR_CHAR(ql);
4531
34.5k
    if (q == 0)
4532
1.51k
        goto not_terminated;
4533
33.0k
    if (!IS_CHAR(q)) {
4534
1.36k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4535
1.36k
                          "xmlParseComment: invalid xmlChar value %d\n",
4536
1.36k
                    q);
4537
1.36k
  xmlFree (buf);
4538
1.36k
  return;
4539
1.36k
    }
4540
31.7k
    NEXTL(ql);
4541
31.7k
    r = CUR_CHAR(rl);
4542
31.7k
    if (r == 0)
4543
265
        goto not_terminated;
4544
31.4k
    if (!IS_CHAR(r)) {
4545
8.09k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4546
8.09k
                          "xmlParseComment: invalid xmlChar value %d\n",
4547
8.09k
                    r);
4548
8.09k
  xmlFree (buf);
4549
8.09k
  return;
4550
8.09k
    }
4551
23.3k
    NEXTL(rl);
4552
23.3k
    cur = CUR_CHAR(l);
4553
23.3k
    if (cur == 0)
4554
491
        goto not_terminated;
4555
10.3M
    while (IS_CHAR(cur) && /* checked */
4556
10.3M
           ((cur != '>') ||
4557
10.3M
      (r != '-') || (q != '-'))) {
4558
10.3M
  if ((r == '-') && (q == '-')) {
4559
14.5k
      xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4560
14.5k
  }
4561
10.3M
  if (len + 5 >= size) {
4562
8.70k
      xmlChar *new_buf;
4563
8.70k
            size_t new_size;
4564
4565
8.70k
      new_size = size * 2;
4566
8.70k
      new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4567
8.70k
      if (new_buf == NULL) {
4568
3
    xmlFree (buf);
4569
3
    xmlErrMemory(ctxt, NULL);
4570
3
    return;
4571
3
      }
4572
8.70k
      buf = new_buf;
4573
8.70k
            size = new_size;
4574
8.70k
  }
4575
10.3M
  COPY_BUF(ql,buf,len,q);
4576
10.3M
        if (len > maxLength) {
4577
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4578
0
                         "Comment too big found", NULL);
4579
0
            xmlFree (buf);
4580
0
            return;
4581
0
        }
4582
4583
10.3M
  q = r;
4584
10.3M
  ql = rl;
4585
10.3M
  r = cur;
4586
10.3M
  rl = l;
4587
4588
10.3M
  NEXTL(l);
4589
10.3M
  cur = CUR_CHAR(l);
4590
4591
10.3M
    }
4592
22.8k
    buf[len] = 0;
4593
22.8k
    if (ctxt->instate == XML_PARSER_EOF) {
4594
234
        xmlFree(buf);
4595
234
        return;
4596
234
    }
4597
22.6k
    if (cur == 0) {
4598
1.82k
  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4599
1.82k
                       "Comment not terminated \n<!--%.50s\n", buf);
4600
20.8k
    } else if (!IS_CHAR(cur)) {
4601
6.18k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4602
6.18k
                          "xmlParseComment: invalid xmlChar value %d\n",
4603
6.18k
                    cur);
4604
14.6k
    } else {
4605
14.6k
  if (inputid != ctxt->input->id) {
4606
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4607
0
               "Comment doesn't start and stop in the same"
4608
0
                           " entity\n");
4609
0
  }
4610
14.6k
        NEXT;
4611
14.6k
  if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4612
14.6k
      (!ctxt->disableSAX))
4613
5.82k
      ctxt->sax->comment(ctxt->userData, buf);
4614
14.6k
    }
4615
22.6k
    xmlFree(buf);
4616
22.6k
    return;
4617
2.26k
not_terminated:
4618
2.26k
    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4619
2.26k
       "Comment not terminated\n", NULL);
4620
2.26k
    xmlFree(buf);
4621
2.26k
    return;
4622
22.8k
}
4623
4624
/**
4625
 * xmlParseComment:
4626
 * @ctxt:  an XML parser context
4627
 *
4628
 * DEPRECATED: Internal function, don't use.
4629
 *
4630
 * Parse an XML (SGML) comment. Always consumes '<!'.
4631
 *
4632
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4633
 *  must not occur within comments. "
4634
 *
4635
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4636
 */
4637
void
4638
144k
xmlParseComment(xmlParserCtxtPtr ctxt) {
4639
144k
    xmlChar *buf = NULL;
4640
144k
    size_t size = XML_PARSER_BUFFER_SIZE;
4641
144k
    size_t len = 0;
4642
144k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4643
0
                       XML_MAX_HUGE_LENGTH :
4644
144k
                       XML_MAX_TEXT_LENGTH;
4645
144k
    xmlParserInputState state;
4646
144k
    const xmlChar *in;
4647
144k
    size_t nbchar = 0;
4648
144k
    int ccol;
4649
144k
    int inputid;
4650
4651
    /*
4652
     * Check that there is a comment right here.
4653
     */
4654
144k
    if ((RAW != '<') || (NXT(1) != '!'))
4655
0
        return;
4656
144k
    SKIP(2);
4657
144k
    if ((RAW != '-') || (NXT(1) != '-'))
4658
225
        return;
4659
143k
    state = ctxt->instate;
4660
143k
    ctxt->instate = XML_PARSER_COMMENT;
4661
143k
    inputid = ctxt->input->id;
4662
143k
    SKIP(2);
4663
143k
    GROW;
4664
4665
    /*
4666
     * Accelerated common case where input don't need to be
4667
     * modified before passing it to the handler.
4668
     */
4669
143k
    in = ctxt->input->cur;
4670
145k
    do {
4671
145k
  if (*in == 0xA) {
4672
3.31k
      do {
4673
3.31k
    ctxt->input->line++; ctxt->input->col = 1;
4674
3.31k
    in++;
4675
3.31k
      } while (*in == 0xA);
4676
3.03k
  }
4677
198k
get_more:
4678
198k
        ccol = ctxt->input->col;
4679
3.43M
  while (((*in > '-') && (*in <= 0x7F)) ||
4680
3.43M
         ((*in >= 0x20) && (*in < '-')) ||
4681
3.43M
         (*in == 0x09)) {
4682
3.23M
        in++;
4683
3.23M
        ccol++;
4684
3.23M
  }
4685
198k
  ctxt->input->col = ccol;
4686
198k
  if (*in == 0xA) {
4687
10.9k
      do {
4688
10.9k
    ctxt->input->line++; ctxt->input->col = 1;
4689
10.9k
    in++;
4690
10.9k
      } while (*in == 0xA);
4691
7.28k
      goto get_more;
4692
7.28k
  }
4693
191k
  nbchar = in - ctxt->input->cur;
4694
  /*
4695
   * save current set of data
4696
   */
4697
191k
  if (nbchar > 0) {
4698
182k
      if ((ctxt->sax != NULL) &&
4699
182k
    (ctxt->sax->comment != NULL)) {
4700
182k
    if (buf == NULL) {
4701
138k
        if ((*in == '-') && (in[1] == '-'))
4702
105k
            size = nbchar + 1;
4703
33.2k
        else
4704
33.2k
            size = XML_PARSER_BUFFER_SIZE + nbchar;
4705
138k
        buf = (xmlChar *) xmlMallocAtomic(size);
4706
138k
        if (buf == NULL) {
4707
281
            xmlErrMemory(ctxt, NULL);
4708
281
      ctxt->instate = state;
4709
281
      return;
4710
281
        }
4711
137k
        len = 0;
4712
137k
    } else if (len + nbchar + 1 >= size) {
4713
3.71k
        xmlChar *new_buf;
4714
3.71k
        size  += len + nbchar + XML_PARSER_BUFFER_SIZE;
4715
3.71k
        new_buf = (xmlChar *) xmlRealloc(buf, size);
4716
3.71k
        if (new_buf == NULL) {
4717
1
            xmlFree (buf);
4718
1
      xmlErrMemory(ctxt, NULL);
4719
1
      ctxt->instate = state;
4720
1
      return;
4721
1
        }
4722
3.71k
        buf = new_buf;
4723
3.71k
    }
4724
182k
    memcpy(&buf[len], ctxt->input->cur, nbchar);
4725
182k
    len += nbchar;
4726
182k
    buf[len] = 0;
4727
182k
      }
4728
182k
  }
4729
191k
        if (len > maxLength) {
4730
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4731
0
                         "Comment too big found", NULL);
4732
0
            xmlFree (buf);
4733
0
            return;
4734
0
        }
4735
191k
  ctxt->input->cur = in;
4736
191k
  if (*in == 0xA) {
4737
0
      in++;
4738
0
      ctxt->input->line++; ctxt->input->col = 1;
4739
0
  }
4740
191k
  if (*in == 0xD) {
4741
2.82k
      in++;
4742
2.82k
      if (*in == 0xA) {
4743
561
    ctxt->input->cur = in;
4744
561
    in++;
4745
561
    ctxt->input->line++; ctxt->input->col = 1;
4746
561
    goto get_more;
4747
561
      }
4748
2.26k
      in--;
4749
2.26k
  }
4750
190k
  SHRINK;
4751
190k
  GROW;
4752
190k
        if (ctxt->instate == XML_PARSER_EOF) {
4753
236
            xmlFree(buf);
4754
236
            return;
4755
236
        }
4756
190k
  in = ctxt->input->cur;
4757
190k
  if (*in == '-') {
4758
153k
      if (in[1] == '-') {
4759
138k
          if (in[2] == '>') {
4760
108k
        if (ctxt->input->id != inputid) {
4761
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4762
0
                     "comment doesn't start and stop in the"
4763
0
                                       " same entity\n");
4764
0
        }
4765
108k
        SKIP(3);
4766
108k
        if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4767
108k
            (!ctxt->disableSAX)) {
4768
68.2k
      if (buf != NULL)
4769
67.9k
          ctxt->sax->comment(ctxt->userData, buf);
4770
302
      else
4771
302
          ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4772
68.2k
        }
4773
108k
        if (buf != NULL)
4774
107k
            xmlFree(buf);
4775
108k
        if (ctxt->instate != XML_PARSER_EOF)
4776
108k
      ctxt->instate = state;
4777
108k
        return;
4778
108k
    }
4779
29.9k
    if (buf != NULL) {
4780
28.8k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
4781
28.8k
                          "Double hyphen within comment: "
4782
28.8k
                                      "<!--%.50s\n",
4783
28.8k
              buf);
4784
28.8k
    } else
4785
1.11k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
4786
1.11k
                          "Double hyphen within comment\n", NULL);
4787
29.9k
                if (ctxt->instate == XML_PARSER_EOF) {
4788
0
                    xmlFree(buf);
4789
0
                    return;
4790
0
                }
4791
29.9k
    in++;
4792
29.9k
    ctxt->input->col++;
4793
29.9k
      }
4794
45.0k
      in++;
4795
45.0k
      ctxt->input->col++;
4796
45.0k
      goto get_more;
4797
153k
  }
4798
190k
    } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
4799
34.6k
    xmlParseCommentComplex(ctxt, buf, len, size);
4800
34.6k
    ctxt->instate = state;
4801
34.6k
    return;
4802
143k
}
4803
4804
4805
/**
4806
 * xmlParsePITarget:
4807
 * @ctxt:  an XML parser context
4808
 *
4809
 * DEPRECATED: Internal function, don't use.
4810
 *
4811
 * parse the name of a PI
4812
 *
4813
 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4814
 *
4815
 * Returns the PITarget name or NULL
4816
 */
4817
4818
const xmlChar *
4819
116k
xmlParsePITarget(xmlParserCtxtPtr ctxt) {
4820
116k
    const xmlChar *name;
4821
4822
116k
    name = xmlParseName(ctxt);
4823
116k
    if ((name != NULL) &&
4824
116k
        ((name[0] == 'x') || (name[0] == 'X')) &&
4825
116k
        ((name[1] == 'm') || (name[1] == 'M')) &&
4826
116k
        ((name[2] == 'l') || (name[2] == 'L'))) {
4827
9.53k
  int i;
4828
9.53k
  if ((name[0] == 'x') && (name[1] == 'm') &&
4829
9.53k
      (name[2] == 'l') && (name[3] == 0)) {
4830
1.20k
      xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4831
1.20k
     "XML declaration allowed only at the start of the document\n");
4832
1.20k
      return(name);
4833
8.33k
  } else if (name[3] == 0) {
4834
586
      xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
4835
586
      return(name);
4836
586
  }
4837
13.8k
  for (i = 0;;i++) {
4838
13.8k
      if (xmlW3CPIs[i] == NULL) break;
4839
10.8k
      if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4840
4.74k
          return(name);
4841
10.8k
  }
4842
2.99k
  xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4843
2.99k
          "xmlParsePITarget: invalid name prefix 'xml'\n",
4844
2.99k
          NULL, NULL);
4845
2.99k
    }
4846
109k
    if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
4847
4.37k
  xmlNsErr(ctxt, XML_NS_ERR_COLON,
4848
4.37k
     "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
4849
4.37k
    }
4850
109k
    return(name);
4851
116k
}
4852
4853
#ifdef LIBXML_CATALOG_ENABLED
4854
/**
4855
 * xmlParseCatalogPI:
4856
 * @ctxt:  an XML parser context
4857
 * @catalog:  the PI value string
4858
 *
4859
 * parse an XML Catalog Processing Instruction.
4860
 *
4861
 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4862
 *
4863
 * Occurs only if allowed by the user and if happening in the Misc
4864
 * part of the document before any doctype information
4865
 * This will add the given catalog to the parsing context in order
4866
 * to be used if there is a resolution need further down in the document
4867
 */
4868
4869
static void
4870
5.10k
xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4871
5.10k
    xmlChar *URL = NULL;
4872
5.10k
    const xmlChar *tmp, *base;
4873
5.10k
    xmlChar marker;
4874
4875
5.10k
    tmp = catalog;
4876
5.10k
    while (IS_BLANK_CH(*tmp)) tmp++;
4877
5.10k
    if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4878
1.06k
  goto error;
4879
4.04k
    tmp += 7;
4880
4.04k
    while (IS_BLANK_CH(*tmp)) tmp++;
4881
4.04k
    if (*tmp != '=') {
4882
641
  return;
4883
641
    }
4884
3.39k
    tmp++;
4885
3.39k
    while (IS_BLANK_CH(*tmp)) tmp++;
4886
3.39k
    marker = *tmp;
4887
3.39k
    if ((marker != '\'') && (marker != '"'))
4888
683
  goto error;
4889
2.71k
    tmp++;
4890
2.71k
    base = tmp;
4891
32.7k
    while ((*tmp != 0) && (*tmp != marker)) tmp++;
4892
2.71k
    if (*tmp == 0)
4893
235
  goto error;
4894
2.48k
    URL = xmlStrndup(base, tmp - base);
4895
2.48k
    tmp++;
4896
2.48k
    while (IS_BLANK_CH(*tmp)) tmp++;
4897
2.48k
    if (*tmp != 0)
4898
1.29k
  goto error;
4899
4900
1.18k
    if (URL != NULL) {
4901
1.18k
  ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4902
1.18k
  xmlFree(URL);
4903
1.18k
    }
4904
1.18k
    return;
4905
4906
3.28k
error:
4907
3.28k
    xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4908
3.28k
            "Catalog PI syntax error: %s\n",
4909
3.28k
      catalog, NULL);
4910
3.28k
    if (URL != NULL)
4911
1.29k
  xmlFree(URL);
4912
3.28k
}
4913
#endif
4914
4915
/**
4916
 * xmlParsePI:
4917
 * @ctxt:  an XML parser context
4918
 *
4919
 * DEPRECATED: Internal function, don't use.
4920
 *
4921
 * parse an XML Processing Instruction.
4922
 *
4923
 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4924
 *
4925
 * The processing is transferred to SAX once parsed.
4926
 */
4927
4928
void
4929
116k
xmlParsePI(xmlParserCtxtPtr ctxt) {
4930
116k
    xmlChar *buf = NULL;
4931
116k
    size_t len = 0;
4932
116k
    size_t size = XML_PARSER_BUFFER_SIZE;
4933
116k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4934
0
                       XML_MAX_HUGE_LENGTH :
4935
116k
                       XML_MAX_TEXT_LENGTH;
4936
116k
    int cur, l;
4937
116k
    const xmlChar *target;
4938
116k
    xmlParserInputState state;
4939
4940
116k
    if ((RAW == '<') && (NXT(1) == '?')) {
4941
116k
  int inputid = ctxt->input->id;
4942
116k
  state = ctxt->instate;
4943
116k
        ctxt->instate = XML_PARSER_PI;
4944
  /*
4945
   * this is a Processing Instruction.
4946
   */
4947
116k
  SKIP(2);
4948
4949
  /*
4950
   * Parse the target name and check for special support like
4951
   * namespace.
4952
   */
4953
116k
        target = xmlParsePITarget(ctxt);
4954
116k
  if (target != NULL) {
4955
114k
      if ((RAW == '?') && (NXT(1) == '>')) {
4956
53.9k
    if (inputid != ctxt->input->id) {
4957
0
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4958
0
                             "PI declaration doesn't start and stop in"
4959
0
                                   " the same entity\n");
4960
0
    }
4961
53.9k
    SKIP(2);
4962
4963
    /*
4964
     * SAX: PI detected.
4965
     */
4966
53.9k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
4967
53.9k
        (ctxt->sax->processingInstruction != NULL))
4968
19.5k
        ctxt->sax->processingInstruction(ctxt->userData,
4969
19.5k
                                         target, NULL);
4970
53.9k
    if (ctxt->instate != XML_PARSER_EOF)
4971
53.9k
        ctxt->instate = state;
4972
53.9k
    return;
4973
53.9k
      }
4974
60.2k
      buf = (xmlChar *) xmlMallocAtomic(size);
4975
60.2k
      if (buf == NULL) {
4976
325
    xmlErrMemory(ctxt, NULL);
4977
325
    ctxt->instate = state;
4978
325
    return;
4979
325
      }
4980
59.9k
      if (SKIP_BLANKS == 0) {
4981
12.9k
    xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4982
12.9k
        "ParsePI: PI %s space expected\n", target);
4983
12.9k
      }
4984
59.9k
      cur = CUR_CHAR(l);
4985
51.4M
      while (IS_CHAR(cur) && /* checked */
4986
51.4M
       ((cur != '?') || (NXT(1) != '>'))) {
4987
51.3M
    if (len + 5 >= size) {
4988
8.55k
        xmlChar *tmp;
4989
8.55k
                    size_t new_size = size * 2;
4990
8.55k
        tmp = (xmlChar *) xmlRealloc(buf, new_size);
4991
8.55k
        if (tmp == NULL) {
4992
2
      xmlErrMemory(ctxt, NULL);
4993
2
      xmlFree(buf);
4994
2
      ctxt->instate = state;
4995
2
      return;
4996
2
        }
4997
8.55k
        buf = tmp;
4998
8.55k
                    size = new_size;
4999
8.55k
    }
5000
51.3M
    COPY_BUF(l,buf,len,cur);
5001
51.3M
                if (len > maxLength) {
5002
2
                    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5003
2
                                      "PI %s too big found", target);
5004
2
                    xmlFree(buf);
5005
2
                    ctxt->instate = state;
5006
2
                    return;
5007
2
                }
5008
51.3M
    NEXTL(l);
5009
51.3M
    cur = CUR_CHAR(l);
5010
51.3M
      }
5011
59.9k
      buf[len] = 0;
5012
59.9k
            if (ctxt->instate == XML_PARSER_EOF) {
5013
295
                xmlFree(buf);
5014
295
                return;
5015
295
            }
5016
59.6k
      if (cur != '?') {
5017
9.88k
    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5018
9.88k
          "ParsePI: PI %s never end ...\n", target);
5019
49.7k
      } else {
5020
49.7k
    if (inputid != ctxt->input->id) {
5021
293
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5022
293
                             "PI declaration doesn't start and stop in"
5023
293
                                   " the same entity\n");
5024
293
    }
5025
49.7k
    SKIP(2);
5026
5027
49.7k
#ifdef LIBXML_CATALOG_ENABLED
5028
49.7k
    if (((state == XML_PARSER_MISC) ||
5029
49.7k
               (state == XML_PARSER_START)) &&
5030
49.7k
        (xmlStrEqual(target, XML_CATALOG_PI))) {
5031
5.10k
        xmlCatalogAllow allow = xmlCatalogGetDefaults();
5032
5.10k
        if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5033
5.10k
      (allow == XML_CATA_ALLOW_ALL))
5034
5.10k
      xmlParseCatalogPI(ctxt, buf);
5035
5.10k
    }
5036
49.7k
#endif
5037
5038
5039
    /*
5040
     * SAX: PI detected.
5041
     */
5042
49.7k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5043
49.7k
        (ctxt->sax->processingInstruction != NULL))
5044
25.6k
        ctxt->sax->processingInstruction(ctxt->userData,
5045
25.6k
                                         target, buf);
5046
49.7k
      }
5047
59.6k
      xmlFree(buf);
5048
59.6k
  } else {
5049
1.87k
      xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5050
1.87k
  }
5051
61.5k
  if (ctxt->instate != XML_PARSER_EOF)
5052
61.2k
      ctxt->instate = state;
5053
61.5k
    }
5054
116k
}
5055
5056
/**
5057
 * xmlParseNotationDecl:
5058
 * @ctxt:  an XML parser context
5059
 *
5060
 * DEPRECATED: Internal function, don't use.
5061
 *
5062
 * Parse a notation declaration. Always consumes '<!'.
5063
 *
5064
 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>'
5065
 *
5066
 * Hence there is actually 3 choices:
5067
 *     'PUBLIC' S PubidLiteral
5068
 *     'PUBLIC' S PubidLiteral S SystemLiteral
5069
 * and 'SYSTEM' S SystemLiteral
5070
 *
5071
 * See the NOTE on xmlParseExternalID().
5072
 */
5073
5074
void
5075
13.0k
xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5076
13.0k
    const xmlChar *name;
5077
13.0k
    xmlChar *Pubid;
5078
13.0k
    xmlChar *Systemid;
5079
5080
13.0k
    if ((CUR != '<') || (NXT(1) != '!'))
5081
0
        return;
5082
13.0k
    SKIP(2);
5083
5084
13.0k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5085
11.2k
  int inputid = ctxt->input->id;
5086
11.2k
  SKIP(8);
5087
11.2k
  if (SKIP_BLANKS == 0) {
5088
299
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5089
299
         "Space required after '<!NOTATION'\n");
5090
299
      return;
5091
299
  }
5092
5093
10.9k
        name = xmlParseName(ctxt);
5094
10.9k
  if (name == NULL) {
5095
281
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5096
281
      return;
5097
281
  }
5098
10.6k
  if (xmlStrchr(name, ':') != NULL) {
5099
3.17k
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5100
3.17k
         "colons are forbidden from notation names '%s'\n",
5101
3.17k
         name, NULL, NULL);
5102
3.17k
  }
5103
10.6k
  if (SKIP_BLANKS == 0) {
5104
383
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5105
383
         "Space required after the NOTATION name'\n");
5106
383
      return;
5107
383
  }
5108
5109
  /*
5110
   * Parse the IDs.
5111
   */
5112
10.2k
  Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5113
10.2k
  SKIP_BLANKS;
5114
5115
10.2k
  if (RAW == '>') {
5116
7.08k
      if (inputid != ctxt->input->id) {
5117
2.17k
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5118
2.17k
                         "Notation declaration doesn't start and stop"
5119
2.17k
                               " in the same entity\n");
5120
2.17k
      }
5121
7.08k
      NEXT;
5122
7.08k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5123
7.08k
    (ctxt->sax->notationDecl != NULL))
5124
4.54k
    ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5125
7.08k
  } else {
5126
3.15k
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5127
3.15k
  }
5128
10.2k
  if (Systemid != NULL) xmlFree(Systemid);
5129
10.2k
  if (Pubid != NULL) xmlFree(Pubid);
5130
10.2k
    }
5131
13.0k
}
5132
5133
/**
5134
 * xmlParseEntityDecl:
5135
 * @ctxt:  an XML parser context
5136
 *
5137
 * DEPRECATED: Internal function, don't use.
5138
 *
5139
 * Parse an entity declaration. Always consumes '<!'.
5140
 *
5141
 * [70] EntityDecl ::= GEDecl | PEDecl
5142
 *
5143
 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5144
 *
5145
 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5146
 *
5147
 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5148
 *
5149
 * [74] PEDef ::= EntityValue | ExternalID
5150
 *
5151
 * [76] NDataDecl ::= S 'NDATA' S Name
5152
 *
5153
 * [ VC: Notation Declared ]
5154
 * The Name must match the declared name of a notation.
5155
 */
5156
5157
void
5158
68.0k
xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5159
68.0k
    const xmlChar *name = NULL;
5160
68.0k
    xmlChar *value = NULL;
5161
68.0k
    xmlChar *URI = NULL, *literal = NULL;
5162
68.0k
    const xmlChar *ndata = NULL;
5163
68.0k
    int isParameter = 0;
5164
68.0k
    xmlChar *orig = NULL;
5165
5166
68.0k
    if ((CUR != '<') || (NXT(1) != '!'))
5167
0
        return;
5168
68.0k
    SKIP(2);
5169
5170
    /* GROW; done in the caller */
5171
68.0k
    if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5172
67.1k
  int inputid = ctxt->input->id;
5173
67.1k
  SKIP(6);
5174
67.1k
  if (SKIP_BLANKS == 0) {
5175
2.98k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5176
2.98k
         "Space required after '<!ENTITY'\n");
5177
2.98k
  }
5178
5179
67.1k
  if (RAW == '%') {
5180
16.0k
      NEXT;
5181
16.0k
      if (SKIP_BLANKS == 0) {
5182
547
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5183
547
             "Space required after '%%'\n");
5184
547
      }
5185
16.0k
      isParameter = 1;
5186
16.0k
  }
5187
5188
67.1k
        name = xmlParseName(ctxt);
5189
67.1k
  if (name == NULL) {
5190
1.67k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5191
1.67k
                     "xmlParseEntityDecl: no name\n");
5192
1.67k
            return;
5193
1.67k
  }
5194
65.4k
  if (xmlStrchr(name, ':') != NULL) {
5195
693
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5196
693
         "colons are forbidden from entities names '%s'\n",
5197
693
         name, NULL, NULL);
5198
693
  }
5199
65.4k
  if (SKIP_BLANKS == 0) {
5200
9.35k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5201
9.35k
         "Space required after the entity name\n");
5202
9.35k
  }
5203
5204
65.4k
  ctxt->instate = XML_PARSER_ENTITY_DECL;
5205
  /*
5206
   * handle the various case of definitions...
5207
   */
5208
65.4k
  if (isParameter) {
5209
15.9k
      if ((RAW == '"') || (RAW == '\'')) {
5210
9.25k
          value = xmlParseEntityValue(ctxt, &orig);
5211
9.25k
    if (value) {
5212
7.68k
        if ((ctxt->sax != NULL) &&
5213
7.68k
      (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5214
5.20k
      ctxt->sax->entityDecl(ctxt->userData, name,
5215
5.20k
                        XML_INTERNAL_PARAMETER_ENTITY,
5216
5.20k
            NULL, NULL, value);
5217
7.68k
    }
5218
9.25k
      } else {
5219
6.67k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5220
6.67k
    if ((URI == NULL) && (literal == NULL)) {
5221
1.26k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5222
1.26k
    }
5223
6.67k
    if (URI) {
5224
5.17k
        xmlURIPtr uri;
5225
5226
5.17k
        uri = xmlParseURI((const char *) URI);
5227
5.17k
        if (uri == NULL) {
5228
1.77k
            xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5229
1.77k
             "Invalid URI: %s\n", URI);
5230
      /*
5231
       * This really ought to be a well formedness error
5232
       * but the XML Core WG decided otherwise c.f. issue
5233
       * E26 of the XML erratas.
5234
       */
5235
3.39k
        } else {
5236
3.39k
      if (uri->fragment != NULL) {
5237
          /*
5238
           * Okay this is foolish to block those but not
5239
           * invalid URIs.
5240
           */
5241
237
          xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5242
3.16k
      } else {
5243
3.16k
          if ((ctxt->sax != NULL) &&
5244
3.16k
        (!ctxt->disableSAX) &&
5245
3.16k
        (ctxt->sax->entityDecl != NULL))
5246
2.20k
        ctxt->sax->entityDecl(ctxt->userData, name,
5247
2.20k
              XML_EXTERNAL_PARAMETER_ENTITY,
5248
2.20k
              literal, URI, NULL);
5249
3.16k
      }
5250
3.39k
      xmlFreeURI(uri);
5251
3.39k
        }
5252
5.17k
    }
5253
6.67k
      }
5254
49.5k
  } else {
5255
49.5k
      if ((RAW == '"') || (RAW == '\'')) {
5256
35.5k
          value = xmlParseEntityValue(ctxt, &orig);
5257
35.5k
    if ((ctxt->sax != NULL) &&
5258
35.5k
        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5259
16.9k
        ctxt->sax->entityDecl(ctxt->userData, name,
5260
16.9k
        XML_INTERNAL_GENERAL_ENTITY,
5261
16.9k
        NULL, NULL, value);
5262
    /*
5263
     * For expat compatibility in SAX mode.
5264
     */
5265
35.5k
    if ((ctxt->myDoc == NULL) ||
5266
35.5k
        (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5267
13.6k
        if (ctxt->myDoc == NULL) {
5268
3.93k
      ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5269
3.93k
      if (ctxt->myDoc == NULL) {
5270
7
          xmlErrMemory(ctxt, "New Doc failed");
5271
7
          goto done;
5272
7
      }
5273
3.93k
      ctxt->myDoc->properties = XML_DOC_INTERNAL;
5274
3.93k
        }
5275
13.6k
        if (ctxt->myDoc->intSubset == NULL)
5276
3.93k
      ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5277
3.93k
              BAD_CAST "fake", NULL, NULL);
5278
5279
13.6k
        xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5280
13.6k
                    NULL, NULL, value);
5281
13.6k
    }
5282
35.5k
      } else {
5283
13.9k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5284
13.9k
    if ((URI == NULL) && (literal == NULL)) {
5285
1.42k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5286
1.42k
    }
5287
13.9k
    if (URI) {
5288
12.1k
        xmlURIPtr uri;
5289
5290
12.1k
        uri = xmlParseURI((const char *)URI);
5291
12.1k
        if (uri == NULL) {
5292
1.18k
            xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5293
1.18k
             "Invalid URI: %s\n", URI);
5294
      /*
5295
       * This really ought to be a well formedness error
5296
       * but the XML Core WG decided otherwise c.f. issue
5297
       * E26 of the XML erratas.
5298
       */
5299
10.9k
        } else {
5300
10.9k
      if (uri->fragment != NULL) {
5301
          /*
5302
           * Okay this is foolish to block those but not
5303
           * invalid URIs.
5304
           */
5305
67
          xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5306
67
      }
5307
10.9k
      xmlFreeURI(uri);
5308
10.9k
        }
5309
12.1k
    }
5310
13.9k
    if ((RAW != '>') && (SKIP_BLANKS == 0)) {
5311
2.08k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5312
2.08k
           "Space required before 'NDATA'\n");
5313
2.08k
    }
5314
13.9k
    if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5315
3.69k
        SKIP(5);
5316
3.69k
        if (SKIP_BLANKS == 0) {
5317
281
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5318
281
               "Space required after 'NDATA'\n");
5319
281
        }
5320
3.69k
        ndata = xmlParseName(ctxt);
5321
3.69k
        if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5322
3.69k
            (ctxt->sax->unparsedEntityDecl != NULL))
5323
2.51k
      ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5324
2.51k
            literal, URI, ndata);
5325
10.2k
    } else {
5326
10.2k
        if ((ctxt->sax != NULL) &&
5327
10.2k
            (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5328
6.82k
      ctxt->sax->entityDecl(ctxt->userData, name,
5329
6.82k
            XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5330
6.82k
            literal, URI, NULL);
5331
        /*
5332
         * For expat compatibility in SAX mode.
5333
         * assuming the entity replacement was asked for
5334
         */
5335
10.2k
        if ((ctxt->replaceEntities != 0) &&
5336
10.2k
      ((ctxt->myDoc == NULL) ||
5337
10.2k
      (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5338
1.38k
      if (ctxt->myDoc == NULL) {
5339
1.05k
          ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5340
1.05k
          if (ctxt->myDoc == NULL) {
5341
4
              xmlErrMemory(ctxt, "New Doc failed");
5342
4
        goto done;
5343
4
          }
5344
1.05k
          ctxt->myDoc->properties = XML_DOC_INTERNAL;
5345
1.05k
      }
5346
5347
1.38k
      if (ctxt->myDoc->intSubset == NULL)
5348
1.05k
          ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5349
1.05k
            BAD_CAST "fake", NULL, NULL);
5350
1.38k
      xmlSAX2EntityDecl(ctxt, name,
5351
1.38k
                  XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5352
1.38k
                  literal, URI, NULL);
5353
1.38k
        }
5354
10.2k
    }
5355
13.9k
      }
5356
49.5k
  }
5357
65.4k
  if (ctxt->instate == XML_PARSER_EOF)
5358
922
      goto done;
5359
64.5k
  SKIP_BLANKS;
5360
64.5k
  if (RAW != '>') {
5361
7.57k
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5362
7.57k
              "xmlParseEntityDecl: entity %s not terminated\n", name);
5363
7.57k
      xmlHaltParser(ctxt);
5364
56.9k
  } else {
5365
56.9k
      if (inputid != ctxt->input->id) {
5366
233
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5367
233
                         "Entity declaration doesn't start and stop in"
5368
233
                               " the same entity\n");
5369
233
      }
5370
56.9k
      NEXT;
5371
56.9k
  }
5372
64.5k
  if (orig != NULL) {
5373
      /*
5374
       * Ugly mechanism to save the raw entity value.
5375
       */
5376
40.0k
      xmlEntityPtr cur = NULL;
5377
5378
40.0k
      if (isParameter) {
5379
7.76k
          if ((ctxt->sax != NULL) &&
5380
7.76k
        (ctxt->sax->getParameterEntity != NULL))
5381
7.76k
        cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5382
32.2k
      } else {
5383
32.2k
          if ((ctxt->sax != NULL) &&
5384
32.2k
        (ctxt->sax->getEntity != NULL))
5385
32.2k
        cur = ctxt->sax->getEntity(ctxt->userData, name);
5386
32.2k
    if ((cur == NULL) && (ctxt->userData==ctxt)) {
5387
2.55k
        cur = xmlSAX2GetEntity(ctxt, name);
5388
2.55k
    }
5389
32.2k
      }
5390
40.0k
            if ((cur != NULL) && (cur->orig == NULL)) {
5391
27.6k
    cur->orig = orig;
5392
27.6k
                orig = NULL;
5393
27.6k
      }
5394
40.0k
  }
5395
5396
65.4k
done:
5397
65.4k
  if (value != NULL) xmlFree(value);
5398
65.4k
  if (URI != NULL) xmlFree(URI);
5399
65.4k
  if (literal != NULL) xmlFree(literal);
5400
65.4k
        if (orig != NULL) xmlFree(orig);
5401
65.4k
    }
5402
68.0k
}
5403
5404
/**
5405
 * xmlParseDefaultDecl:
5406
 * @ctxt:  an XML parser context
5407
 * @value:  Receive a possible fixed default value for the attribute
5408
 *
5409
 * DEPRECATED: Internal function, don't use.
5410
 *
5411
 * Parse an attribute default declaration
5412
 *
5413
 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5414
 *
5415
 * [ VC: Required Attribute ]
5416
 * if the default declaration is the keyword #REQUIRED, then the
5417
 * attribute must be specified for all elements of the type in the
5418
 * attribute-list declaration.
5419
 *
5420
 * [ VC: Attribute Default Legal ]
5421
 * The declared default value must meet the lexical constraints of
5422
 * the declared attribute type c.f. xmlValidateAttributeDecl()
5423
 *
5424
 * [ VC: Fixed Attribute Default ]
5425
 * if an attribute has a default value declared with the #FIXED
5426
 * keyword, instances of that attribute must match the default value.
5427
 *
5428
 * [ WFC: No < in Attribute Values ]
5429
 * handled in xmlParseAttValue()
5430
 *
5431
 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5432
 *          or XML_ATTRIBUTE_FIXED.
5433
 */
5434
5435
int
5436
88.6k
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5437
88.6k
    int val;
5438
88.6k
    xmlChar *ret;
5439
5440
88.6k
    *value = NULL;
5441
88.6k
    if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5442
258
  SKIP(9);
5443
258
  return(XML_ATTRIBUTE_REQUIRED);
5444
258
    }
5445
88.4k
    if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5446
478
  SKIP(8);
5447
478
  return(XML_ATTRIBUTE_IMPLIED);
5448
478
    }
5449
87.9k
    val = XML_ATTRIBUTE_NONE;
5450
87.9k
    if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5451
1.65k
  SKIP(6);
5452
1.65k
  val = XML_ATTRIBUTE_FIXED;
5453
1.65k
  if (SKIP_BLANKS == 0) {
5454
234
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5455
234
         "Space required after '#FIXED'\n");
5456
234
  }
5457
1.65k
    }
5458
87.9k
    ret = xmlParseAttValue(ctxt);
5459
87.9k
    ctxt->instate = XML_PARSER_DTD;
5460
87.9k
    if (ret == NULL) {
5461
6.56k
  xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5462
6.56k
           "Attribute default value declaration error\n");
5463
6.56k
    } else
5464
81.3k
        *value = ret;
5465
87.9k
    return(val);
5466
88.4k
}
5467
5468
/**
5469
 * xmlParseNotationType:
5470
 * @ctxt:  an XML parser context
5471
 *
5472
 * DEPRECATED: Internal function, don't use.
5473
 *
5474
 * parse an Notation attribute type.
5475
 *
5476
 * Note: the leading 'NOTATION' S part has already being parsed...
5477
 *
5478
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5479
 *
5480
 * [ VC: Notation Attributes ]
5481
 * Values of this type must match one of the notation names included
5482
 * in the declaration; all notation names in the declaration must be declared.
5483
 *
5484
 * Returns: the notation attribute tree built while parsing
5485
 */
5486
5487
xmlEnumerationPtr
5488
2.41k
xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5489
2.41k
    const xmlChar *name;
5490
2.41k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5491
5492
2.41k
    if (RAW != '(') {
5493
512
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5494
512
  return(NULL);
5495
512
    }
5496
3.75k
    do {
5497
3.75k
        NEXT;
5498
3.75k
  SKIP_BLANKS;
5499
3.75k
        name = xmlParseName(ctxt);
5500
3.75k
  if (name == NULL) {
5501
627
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5502
627
         "Name expected in NOTATION declaration\n");
5503
627
            xmlFreeEnumeration(ret);
5504
627
      return(NULL);
5505
627
  }
5506
3.12k
  tmp = ret;
5507
6.89k
  while (tmp != NULL) {
5508
4.39k
      if (xmlStrEqual(name, tmp->name)) {
5509
630
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5510
630
    "standalone: attribute notation value token %s duplicated\n",
5511
630
         name, NULL);
5512
630
    if (!xmlDictOwns(ctxt->dict, name))
5513
0
        xmlFree((xmlChar *) name);
5514
630
    break;
5515
630
      }
5516
3.76k
      tmp = tmp->next;
5517
3.76k
  }
5518
3.12k
  if (tmp == NULL) {
5519
2.49k
      cur = xmlCreateEnumeration(name);
5520
2.49k
      if (cur == NULL) {
5521
4
                xmlFreeEnumeration(ret);
5522
4
                return(NULL);
5523
4
            }
5524
2.49k
      if (last == NULL) ret = last = cur;
5525
876
      else {
5526
876
    last->next = cur;
5527
876
    last = cur;
5528
876
      }
5529
2.49k
  }
5530
3.12k
  SKIP_BLANKS;
5531
3.12k
    } while (RAW == '|');
5532
1.27k
    if (RAW != ')') {
5533
883
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5534
883
        xmlFreeEnumeration(ret);
5535
883
  return(NULL);
5536
883
    }
5537
390
    NEXT;
5538
390
    return(ret);
5539
1.27k
}
5540
5541
/**
5542
 * xmlParseEnumerationType:
5543
 * @ctxt:  an XML parser context
5544
 *
5545
 * DEPRECATED: Internal function, don't use.
5546
 *
5547
 * parse an Enumeration attribute type.
5548
 *
5549
 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5550
 *
5551
 * [ VC: Enumeration ]
5552
 * Values of this type must match one of the Nmtoken tokens in
5553
 * the declaration
5554
 *
5555
 * Returns: the enumeration attribute tree built while parsing
5556
 */
5557
5558
xmlEnumerationPtr
5559
13.3k
xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5560
13.3k
    xmlChar *name;
5561
13.3k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5562
5563
13.3k
    if (RAW != '(') {
5564
9.29k
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5565
9.29k
  return(NULL);
5566
9.29k
    }
5567
6.45k
    do {
5568
6.45k
        NEXT;
5569
6.45k
  SKIP_BLANKS;
5570
6.45k
        name = xmlParseNmtoken(ctxt);
5571
6.45k
  if (name == NULL) {
5572
1.29k
      xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5573
1.29k
      return(ret);
5574
1.29k
  }
5575
5.16k
  tmp = ret;
5576
8.27k
  while (tmp != NULL) {
5577
4.55k
      if (xmlStrEqual(name, tmp->name)) {
5578
1.44k
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5579
1.44k
    "standalone: attribute enumeration value token %s duplicated\n",
5580
1.44k
         name, NULL);
5581
1.44k
    if (!xmlDictOwns(ctxt->dict, name))
5582
1.44k
        xmlFree(name);
5583
1.44k
    break;
5584
1.44k
      }
5585
3.10k
      tmp = tmp->next;
5586
3.10k
  }
5587
5.16k
  if (tmp == NULL) {
5588
3.72k
      cur = xmlCreateEnumeration(name);
5589
3.72k
      if (!xmlDictOwns(ctxt->dict, name))
5590
3.72k
    xmlFree(name);
5591
3.72k
      if (cur == NULL) {
5592
2
                xmlFreeEnumeration(ret);
5593
2
                return(NULL);
5594
2
            }
5595
3.71k
      if (last == NULL) ret = last = cur;
5596
900
      else {
5597
900
    last->next = cur;
5598
900
    last = cur;
5599
900
      }
5600
3.71k
  }
5601
5.16k
  SKIP_BLANKS;
5602
5.16k
    } while (RAW == '|');
5603
2.80k
    if (RAW != ')') {
5604
1.18k
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5605
1.18k
  return(ret);
5606
1.18k
    }
5607
1.61k
    NEXT;
5608
1.61k
    return(ret);
5609
2.80k
}
5610
5611
/**
5612
 * xmlParseEnumeratedType:
5613
 * @ctxt:  an XML parser context
5614
 * @tree:  the enumeration tree built while parsing
5615
 *
5616
 * DEPRECATED: Internal function, don't use.
5617
 *
5618
 * parse an Enumerated attribute type.
5619
 *
5620
 * [57] EnumeratedType ::= NotationType | Enumeration
5621
 *
5622
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5623
 *
5624
 *
5625
 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5626
 */
5627
5628
int
5629
16.0k
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5630
16.0k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5631
2.69k
  SKIP(8);
5632
2.69k
  if (SKIP_BLANKS == 0) {
5633
275
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5634
275
         "Space required after 'NOTATION'\n");
5635
275
      return(0);
5636
275
  }
5637
2.41k
  *tree = xmlParseNotationType(ctxt);
5638
2.41k
  if (*tree == NULL) return(0);
5639
390
  return(XML_ATTRIBUTE_NOTATION);
5640
2.41k
    }
5641
13.3k
    *tree = xmlParseEnumerationType(ctxt);
5642
13.3k
    if (*tree == NULL) return(0);
5643
2.81k
    return(XML_ATTRIBUTE_ENUMERATION);
5644
13.3k
}
5645
5646
/**
5647
 * xmlParseAttributeType:
5648
 * @ctxt:  an XML parser context
5649
 * @tree:  the enumeration tree built while parsing
5650
 *
5651
 * DEPRECATED: Internal function, don't use.
5652
 *
5653
 * parse the Attribute list def for an element
5654
 *
5655
 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5656
 *
5657
 * [55] StringType ::= 'CDATA'
5658
 *
5659
 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5660
 *                        'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5661
 *
5662
 * Validity constraints for attribute values syntax are checked in
5663
 * xmlValidateAttributeValue()
5664
 *
5665
 * [ VC: ID ]
5666
 * Values of type ID must match the Name production. A name must not
5667
 * appear more than once in an XML document as a value of this type;
5668
 * i.e., ID values must uniquely identify the elements which bear them.
5669
 *
5670
 * [ VC: One ID per Element Type ]
5671
 * No element type may have more than one ID attribute specified.
5672
 *
5673
 * [ VC: ID Attribute Default ]
5674
 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5675
 *
5676
 * [ VC: IDREF ]
5677
 * Values of type IDREF must match the Name production, and values
5678
 * of type IDREFS must match Names; each IDREF Name must match the value
5679
 * of an ID attribute on some element in the XML document; i.e. IDREF
5680
 * values must match the value of some ID attribute.
5681
 *
5682
 * [ VC: Entity Name ]
5683
 * Values of type ENTITY must match the Name production, values
5684
 * of type ENTITIES must match Names; each Entity Name must match the
5685
 * name of an unparsed entity declared in the DTD.
5686
 *
5687
 * [ VC: Name Token ]
5688
 * Values of type NMTOKEN must match the Nmtoken production; values
5689
 * of type NMTOKENS must match Nmtokens.
5690
 *
5691
 * Returns the attribute type
5692
 */
5693
int
5694
105k
xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5695
105k
    if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5696
25.5k
  SKIP(5);
5697
25.5k
  return(XML_ATTRIBUTE_CDATA);
5698
79.7k
     } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5699
4.89k
  SKIP(6);
5700
4.89k
  return(XML_ATTRIBUTE_IDREFS);
5701
74.8k
     } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5702
10.6k
  SKIP(5);
5703
10.6k
  return(XML_ATTRIBUTE_IDREF);
5704
64.2k
     } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5705
45.4k
        SKIP(2);
5706
45.4k
  return(XML_ATTRIBUTE_ID);
5707
45.4k
     } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5708
397
  SKIP(6);
5709
397
  return(XML_ATTRIBUTE_ENTITY);
5710
18.3k
     } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5711
806
  SKIP(8);
5712
806
  return(XML_ATTRIBUTE_ENTITIES);
5713
17.5k
     } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5714
536
  SKIP(8);
5715
536
  return(XML_ATTRIBUTE_NMTOKENS);
5716
17.0k
     } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5717
956
  SKIP(7);
5718
956
  return(XML_ATTRIBUTE_NMTOKEN);
5719
956
     }
5720
16.0k
     return(xmlParseEnumeratedType(ctxt, tree));
5721
105k
}
5722
5723
/**
5724
 * xmlParseAttributeListDecl:
5725
 * @ctxt:  an XML parser context
5726
 *
5727
 * DEPRECATED: Internal function, don't use.
5728
 *
5729
 * Parse an attribute list declaration for an element. Always consumes '<!'.
5730
 *
5731
 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5732
 *
5733
 * [53] AttDef ::= S Name S AttType S DefaultDecl
5734
 *
5735
 */
5736
void
5737
55.4k
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
5738
55.4k
    const xmlChar *elemName;
5739
55.4k
    const xmlChar *attrName;
5740
55.4k
    xmlEnumerationPtr tree;
5741
5742
55.4k
    if ((CUR != '<') || (NXT(1) != '!'))
5743
0
        return;
5744
55.4k
    SKIP(2);
5745
5746
55.4k
    if (CMP7(CUR_PTR, 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5747
53.8k
  int inputid = ctxt->input->id;
5748
5749
53.8k
  SKIP(7);
5750
53.8k
  if (SKIP_BLANKS == 0) {
5751
9.98k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5752
9.98k
                     "Space required after '<!ATTLIST'\n");
5753
9.98k
  }
5754
53.8k
        elemName = xmlParseName(ctxt);
5755
53.8k
  if (elemName == NULL) {
5756
1.33k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5757
1.33k
         "ATTLIST: no name for Element\n");
5758
1.33k
      return;
5759
1.33k
  }
5760
52.5k
  SKIP_BLANKS;
5761
52.5k
  GROW;
5762
130k
  while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
5763
114k
      int type;
5764
114k
      int def;
5765
114k
      xmlChar *defaultValue = NULL;
5766
5767
114k
      GROW;
5768
114k
            tree = NULL;
5769
114k
      attrName = xmlParseName(ctxt);
5770
114k
      if (attrName == NULL) {
5771
5.91k
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5772
5.91k
             "ATTLIST: no name for Attribute\n");
5773
5.91k
    break;
5774
5.91k
      }
5775
108k
      GROW;
5776
108k
      if (SKIP_BLANKS == 0) {
5777
2.81k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5778
2.81k
            "Space required after the attribute name\n");
5779
2.81k
    break;
5780
2.81k
      }
5781
5782
105k
      type = xmlParseAttributeType(ctxt, &tree);
5783
105k
      if (type <= 0) {
5784
12.8k
          break;
5785
12.8k
      }
5786
5787
92.4k
      GROW;
5788
92.4k
      if (SKIP_BLANKS == 0) {
5789
3.79k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5790
3.79k
             "Space required after the attribute type\n");
5791
3.79k
          if (tree != NULL)
5792
1.20k
        xmlFreeEnumeration(tree);
5793
3.79k
    break;
5794
3.79k
      }
5795
5796
88.6k
      def = xmlParseDefaultDecl(ctxt, &defaultValue);
5797
88.6k
      if (def <= 0) {
5798
0
                if (defaultValue != NULL)
5799
0
        xmlFree(defaultValue);
5800
0
          if (tree != NULL)
5801
0
        xmlFreeEnumeration(tree);
5802
0
          break;
5803
0
      }
5804
88.6k
      if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
5805
56.1k
          xmlAttrNormalizeSpace(defaultValue, defaultValue);
5806
5807
88.6k
      GROW;
5808
88.6k
            if (RAW != '>') {
5809
72.8k
    if (SKIP_BLANKS == 0) {
5810
11.1k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5811
11.1k
      "Space required after the attribute default value\n");
5812
11.1k
        if (defaultValue != NULL)
5813
4.87k
      xmlFree(defaultValue);
5814
11.1k
        if (tree != NULL)
5815
983
      xmlFreeEnumeration(tree);
5816
11.1k
        break;
5817
11.1k
    }
5818
72.8k
      }
5819
77.4k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5820
77.4k
    (ctxt->sax->attributeDecl != NULL))
5821
37.2k
    ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
5822
37.2k
                          type, def, defaultValue, tree);
5823
40.2k
      else if (tree != NULL)
5824
389
    xmlFreeEnumeration(tree);
5825
5826
77.4k
      if ((ctxt->sax2) && (defaultValue != NULL) &&
5827
77.4k
          (def != XML_ATTRIBUTE_IMPLIED) &&
5828
77.4k
    (def != XML_ATTRIBUTE_REQUIRED)) {
5829
76.4k
    xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
5830
76.4k
      }
5831
77.4k
      if (ctxt->sax2) {
5832
77.4k
    xmlAddSpecialAttr(ctxt, elemName, attrName, type);
5833
77.4k
      }
5834
77.4k
      if (defaultValue != NULL)
5835
76.4k
          xmlFree(defaultValue);
5836
77.4k
      GROW;
5837
77.4k
  }
5838
52.5k
  if (RAW == '>') {
5839
16.5k
      if (inputid != ctxt->input->id) {
5840
133
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5841
133
                               "Attribute list declaration doesn't start and"
5842
133
                               " stop in the same entity\n");
5843
133
      }
5844
16.5k
      NEXT;
5845
16.5k
  }
5846
52.5k
    }
5847
55.4k
}
5848
5849
/**
5850
 * xmlParseElementMixedContentDecl:
5851
 * @ctxt:  an XML parser context
5852
 * @inputchk:  the input used for the current entity, needed for boundary checks
5853
 *
5854
 * DEPRECATED: Internal function, don't use.
5855
 *
5856
 * parse the declaration for a Mixed Element content
5857
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5858
 *
5859
 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5860
 *                '(' S? '#PCDATA' S? ')'
5861
 *
5862
 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5863
 *
5864
 * [ VC: No Duplicate Types ]
5865
 * The same name must not appear more than once in a single
5866
 * mixed-content declaration.
5867
 *
5868
 * returns: the list of the xmlElementContentPtr describing the element choices
5869
 */
5870
xmlElementContentPtr
5871
4.50k
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
5872
4.50k
    xmlElementContentPtr ret = NULL, cur = NULL, n;
5873
4.50k
    const xmlChar *elem = NULL;
5874
5875
4.50k
    GROW;
5876
4.50k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
5877
4.50k
  SKIP(7);
5878
4.50k
  SKIP_BLANKS;
5879
4.50k
  if (RAW == ')') {
5880
2.33k
      if (ctxt->input->id != inputchk) {
5881
83
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5882
83
                               "Element content declaration doesn't start and"
5883
83
                               " stop in the same entity\n");
5884
83
      }
5885
2.33k
      NEXT;
5886
2.33k
      ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
5887
2.33k
      if (ret == NULL)
5888
5
          return(NULL);
5889
2.33k
      if (RAW == '*') {
5890
11
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
5891
11
    NEXT;
5892
11
      }
5893
2.33k
      return(ret);
5894
2.33k
  }
5895
2.16k
  if ((RAW == '(') || (RAW == '|')) {
5896
946
      ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
5897
946
      if (ret == NULL) return(NULL);
5898
946
  }
5899
3.41k
  while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
5900
1.50k
      NEXT;
5901
1.50k
      if (elem == NULL) {
5902
726
          ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
5903
726
    if (ret == NULL) {
5904
1
        xmlFreeDocElementContent(ctxt->myDoc, cur);
5905
1
                    return(NULL);
5906
1
                }
5907
725
    ret->c1 = cur;
5908
725
    if (cur != NULL)
5909
725
        cur->parent = ret;
5910
725
    cur = ret;
5911
775
      } else {
5912
775
          n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
5913
775
    if (n == NULL) {
5914
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
5915
0
                    return(NULL);
5916
0
                }
5917
775
    n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
5918
775
    if (n->c1 != NULL)
5919
775
        n->c1->parent = n;
5920
775
          cur->c2 = n;
5921
775
    if (n != NULL)
5922
775
        n->parent = cur;
5923
775
    cur = n;
5924
775
      }
5925
1.50k
      SKIP_BLANKS;
5926
1.50k
      elem = xmlParseName(ctxt);
5927
1.50k
      if (elem == NULL) {
5928
249
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5929
249
      "xmlParseElementMixedContentDecl : Name expected\n");
5930
249
    xmlFreeDocElementContent(ctxt->myDoc, ret);
5931
249
    return(NULL);
5932
249
      }
5933
1.25k
      SKIP_BLANKS;
5934
1.25k
      GROW;
5935
1.25k
  }
5936
1.91k
  if ((RAW == ')') && (NXT(1) == '*')) {
5937
220
      if (elem != NULL) {
5938
220
    cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
5939
220
                                   XML_ELEMENT_CONTENT_ELEMENT);
5940
220
    if (cur->c2 != NULL)
5941
220
        cur->c2->parent = cur;
5942
220
            }
5943
220
            if (ret != NULL)
5944
220
                ret->ocur = XML_ELEMENT_CONTENT_MULT;
5945
220
      if (ctxt->input->id != inputchk) {
5946
13
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5947
13
                               "Element content declaration doesn't start and"
5948
13
                               " stop in the same entity\n");
5949
13
      }
5950
220
      SKIP(2);
5951
1.69k
  } else {
5952
1.69k
      xmlFreeDocElementContent(ctxt->myDoc, ret);
5953
1.69k
      xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
5954
1.69k
      return(NULL);
5955
1.69k
  }
5956
5957
1.91k
    } else {
5958
0
  xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
5959
0
    }
5960
220
    return(ret);
5961
4.50k
}
5962
5963
/**
5964
 * xmlParseElementChildrenContentDeclPriv:
5965
 * @ctxt:  an XML parser context
5966
 * @inputchk:  the input used for the current entity, needed for boundary checks
5967
 * @depth: the level of recursion
5968
 *
5969
 * parse the declaration for a Mixed Element content
5970
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5971
 *
5972
 *
5973
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5974
 *
5975
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5976
 *
5977
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5978
 *
5979
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5980
 *
5981
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5982
 * TODO Parameter-entity replacement text must be properly nested
5983
 *  with parenthesized groups. That is to say, if either of the
5984
 *  opening or closing parentheses in a choice, seq, or Mixed
5985
 *  construct is contained in the replacement text for a parameter
5986
 *  entity, both must be contained in the same replacement text. For
5987
 *  interoperability, if a parameter-entity reference appears in a
5988
 *  choice, seq, or Mixed construct, its replacement text should not
5989
 *  be empty, and neither the first nor last non-blank character of
5990
 *  the replacement text should be a connector (| or ,).
5991
 *
5992
 * Returns the tree of xmlElementContentPtr describing the element
5993
 *          hierarchy.
5994
 */
5995
static xmlElementContentPtr
5996
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
5997
92.7k
                                       int depth) {
5998
92.7k
    xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
5999
92.7k
    const xmlChar *elem;
6000
92.7k
    xmlChar type = 0;
6001
6002
92.7k
    if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6003
92.7k
        (depth >  2048)) {
6004
219
        xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6005
219
"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6006
219
                          depth);
6007
219
  return(NULL);
6008
219
    }
6009
92.5k
    SKIP_BLANKS;
6010
92.5k
    GROW;
6011
92.5k
    if (RAW == '(') {
6012
55.5k
  int inputid = ctxt->input->id;
6013
6014
        /* Recurse on first child */
6015
55.5k
  NEXT;
6016
55.5k
  SKIP_BLANKS;
6017
55.5k
        cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6018
55.5k
                                                           depth + 1);
6019
55.5k
        if (cur == NULL)
6020
34.0k
            return(NULL);
6021
21.5k
  SKIP_BLANKS;
6022
21.5k
  GROW;
6023
36.9k
    } else {
6024
36.9k
  elem = xmlParseName(ctxt);
6025
36.9k
  if (elem == NULL) {
6026
2.64k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6027
2.64k
      return(NULL);
6028
2.64k
  }
6029
34.3k
        cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6030
34.3k
  if (cur == NULL) {
6031
10
      xmlErrMemory(ctxt, NULL);
6032
10
      return(NULL);
6033
10
  }
6034
34.3k
  GROW;
6035
34.3k
  if (RAW == '?') {
6036
3.54k
      cur->ocur = XML_ELEMENT_CONTENT_OPT;
6037
3.54k
      NEXT;
6038
30.7k
  } else if (RAW == '*') {
6039
3.77k
      cur->ocur = XML_ELEMENT_CONTENT_MULT;
6040
3.77k
      NEXT;
6041
26.9k
  } else if (RAW == '+') {
6042
4.75k
      cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6043
4.75k
      NEXT;
6044
22.2k
  } else {
6045
22.2k
      cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6046
22.2k
  }
6047
34.3k
  GROW;
6048
34.3k
    }
6049
55.8k
    SKIP_BLANKS;
6050
89.9k
    while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6051
        /*
6052
   * Each loop we parse one separator and one element.
6053
   */
6054
47.4k
        if (RAW == ',') {
6055
3.28k
      if (type == 0) type = CUR;
6056
6057
      /*
6058
       * Detect "Name | Name , Name" error
6059
       */
6060
765
      else if (type != CUR) {
6061
315
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6062
315
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6063
315
                      type);
6064
315
    if ((last != NULL) && (last != ret))
6065
315
        xmlFreeDocElementContent(ctxt->myDoc, last);
6066
315
    if (ret != NULL)
6067
315
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6068
315
    return(NULL);
6069
315
      }
6070
2.96k
      NEXT;
6071
6072
2.96k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6073
2.96k
      if (op == NULL) {
6074
5
    if ((last != NULL) && (last != ret))
6075
1
        xmlFreeDocElementContent(ctxt->myDoc, last);
6076
5
          xmlFreeDocElementContent(ctxt->myDoc, ret);
6077
5
    return(NULL);
6078
5
      }
6079
2.96k
      if (last == NULL) {
6080
2.51k
    op->c1 = ret;
6081
2.51k
    if (ret != NULL)
6082
2.51k
        ret->parent = op;
6083
2.51k
    ret = cur = op;
6084
2.51k
      } else {
6085
449
          cur->c2 = op;
6086
449
    if (op != NULL)
6087
449
        op->parent = cur;
6088
449
    op->c1 = last;
6089
449
    if (last != NULL)
6090
449
        last->parent = op;
6091
449
    cur =op;
6092
449
    last = NULL;
6093
449
      }
6094
44.1k
  } else if (RAW == '|') {
6095
41.6k
      if (type == 0) type = CUR;
6096
6097
      /*
6098
       * Detect "Name , Name | Name" error
6099
       */
6100
16.9k
      else if (type != CUR) {
6101
204
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6102
204
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6103
204
          type);
6104
204
    if ((last != NULL) && (last != ret))
6105
204
        xmlFreeDocElementContent(ctxt->myDoc, last);
6106
204
    if (ret != NULL)
6107
204
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6108
204
    return(NULL);
6109
204
      }
6110
41.4k
      NEXT;
6111
6112
41.4k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6113
41.4k
      if (op == NULL) {
6114
5
    if ((last != NULL) && (last != ret))
6115
3
        xmlFreeDocElementContent(ctxt->myDoc, last);
6116
5
    if (ret != NULL)
6117
5
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6118
5
    return(NULL);
6119
5
      }
6120
41.4k
      if (last == NULL) {
6121
24.7k
    op->c1 = ret;
6122
24.7k
    if (ret != NULL)
6123
24.7k
        ret->parent = op;
6124
24.7k
    ret = cur = op;
6125
24.7k
      } else {
6126
16.7k
          cur->c2 = op;
6127
16.7k
    if (op != NULL)
6128
16.7k
        op->parent = cur;
6129
16.7k
    op->c1 = last;
6130
16.7k
    if (last != NULL)
6131
16.7k
        last->parent = op;
6132
16.7k
    cur =op;
6133
16.7k
    last = NULL;
6134
16.7k
      }
6135
41.4k
  } else {
6136
2.50k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6137
2.50k
      if ((last != NULL) && (last != ret))
6138
1.10k
          xmlFreeDocElementContent(ctxt->myDoc, last);
6139
2.50k
      if (ret != NULL)
6140
2.50k
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6141
2.50k
      return(NULL);
6142
2.50k
  }
6143
44.3k
  GROW;
6144
44.3k
  SKIP_BLANKS;
6145
44.3k
  GROW;
6146
44.3k
  if (RAW == '(') {
6147
26.9k
      int inputid = ctxt->input->id;
6148
      /* Recurse on second child */
6149
26.9k
      NEXT;
6150
26.9k
      SKIP_BLANKS;
6151
26.9k
      last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6152
26.9k
                                                          depth + 1);
6153
26.9k
            if (last == NULL) {
6154
9.82k
    if (ret != NULL)
6155
9.82k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6156
9.82k
    return(NULL);
6157
9.82k
            }
6158
17.1k
      SKIP_BLANKS;
6159
17.4k
  } else {
6160
17.4k
      elem = xmlParseName(ctxt);
6161
17.4k
      if (elem == NULL) {
6162
407
    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6163
407
    if (ret != NULL)
6164
407
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6165
407
    return(NULL);
6166
407
      }
6167
17.0k
      last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6168
17.0k
      if (last == NULL) {
6169
3
    if (ret != NULL)
6170
3
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6171
3
    return(NULL);
6172
3
      }
6173
17.0k
      if (RAW == '?') {
6174
8.67k
    last->ocur = XML_ELEMENT_CONTENT_OPT;
6175
8.67k
    NEXT;
6176
8.67k
      } else if (RAW == '*') {
6177
1.62k
    last->ocur = XML_ELEMENT_CONTENT_MULT;
6178
1.62k
    NEXT;
6179
6.72k
      } else if (RAW == '+') {
6180
240
    last->ocur = XML_ELEMENT_CONTENT_PLUS;
6181
240
    NEXT;
6182
6.48k
      } else {
6183
6.48k
    last->ocur = XML_ELEMENT_CONTENT_ONCE;
6184
6.48k
      }
6185
17.0k
  }
6186
34.1k
  SKIP_BLANKS;
6187
34.1k
  GROW;
6188
34.1k
    }
6189
42.5k
    if ((cur != NULL) && (last != NULL)) {
6190
15.3k
        cur->c2 = last;
6191
15.3k
  if (last != NULL)
6192
15.3k
      last->parent = cur;
6193
15.3k
    }
6194
42.5k
    if (ctxt->input->id != inputchk) {
6195
77
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6196
77
                       "Element content declaration doesn't start and stop in"
6197
77
                       " the same entity\n");
6198
77
    }
6199
42.5k
    NEXT;
6200
42.5k
    if (RAW == '?') {
6201
2.18k
  if (ret != NULL) {
6202
2.18k
      if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6203
2.18k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6204
1.35k
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6205
833
      else
6206
833
          ret->ocur = XML_ELEMENT_CONTENT_OPT;
6207
2.18k
  }
6208
2.18k
  NEXT;
6209
40.3k
    } else if (RAW == '*') {
6210
4.36k
  if (ret != NULL) {
6211
4.36k
      ret->ocur = XML_ELEMENT_CONTENT_MULT;
6212
4.36k
      cur = ret;
6213
      /*
6214
       * Some normalization:
6215
       * (a | b* | c?)* == (a | b | c)*
6216
       */
6217
6.50k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6218
2.13k
    if ((cur->c1 != NULL) &&
6219
2.13k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6220
2.13k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6221
1.17k
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6222
2.13k
    if ((cur->c2 != NULL) &&
6223
2.13k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6224
2.13k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6225
1.05k
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6226
2.13k
    cur = cur->c2;
6227
2.13k
      }
6228
4.36k
  }
6229
4.36k
  NEXT;
6230
36.0k
    } else if (RAW == '+') {
6231
19.2k
  if (ret != NULL) {
6232
19.2k
      int found = 0;
6233
6234
19.2k
      if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6235
19.2k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6236
4.55k
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6237
14.6k
      else
6238
14.6k
          ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6239
      /*
6240
       * Some normalization:
6241
       * (a | b*)+ == (a | b)*
6242
       * (a | b?)+ == (a | b)*
6243
       */
6244
46.5k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6245
27.3k
    if ((cur->c1 != NULL) &&
6246
27.3k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6247
27.3k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6248
7.98k
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6249
7.98k
        found = 1;
6250
7.98k
    }
6251
27.3k
    if ((cur->c2 != NULL) &&
6252
27.3k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6253
27.3k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6254
11.0k
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6255
11.0k
        found = 1;
6256
11.0k
    }
6257
27.3k
    cur = cur->c2;
6258
27.3k
      }
6259
19.2k
      if (found)
6260
11.7k
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6261
19.2k
  }
6262
19.2k
  NEXT;
6263
19.2k
    }
6264
42.5k
    return(ret);
6265
55.8k
}
6266
6267
/**
6268
 * xmlParseElementChildrenContentDecl:
6269
 * @ctxt:  an XML parser context
6270
 * @inputchk:  the input used for the current entity, needed for boundary checks
6271
 *
6272
 * DEPRECATED: Internal function, don't use.
6273
 *
6274
 * parse the declaration for a Mixed Element content
6275
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6276
 *
6277
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6278
 *
6279
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6280
 *
6281
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6282
 *
6283
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6284
 *
6285
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6286
 * TODO Parameter-entity replacement text must be properly nested
6287
 *  with parenthesized groups. That is to say, if either of the
6288
 *  opening or closing parentheses in a choice, seq, or Mixed
6289
 *  construct is contained in the replacement text for a parameter
6290
 *  entity, both must be contained in the same replacement text. For
6291
 *  interoperability, if a parameter-entity reference appears in a
6292
 *  choice, seq, or Mixed construct, its replacement text should not
6293
 *  be empty, and neither the first nor last non-blank character of
6294
 *  the replacement text should be a connector (| or ,).
6295
 *
6296
 * Returns the tree of xmlElementContentPtr describing the element
6297
 *          hierarchy.
6298
 */
6299
xmlElementContentPtr
6300
0
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6301
    /* stub left for API/ABI compat */
6302
0
    return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6303
0
}
6304
6305
/**
6306
 * xmlParseElementContentDecl:
6307
 * @ctxt:  an XML parser context
6308
 * @name:  the name of the element being defined.
6309
 * @result:  the Element Content pointer will be stored here if any
6310
 *
6311
 * DEPRECATED: Internal function, don't use.
6312
 *
6313
 * parse the declaration for an Element content either Mixed or Children,
6314
 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6315
 *
6316
 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6317
 *
6318
 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6319
 */
6320
6321
int
6322
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6323
15.0k
                           xmlElementContentPtr *result) {
6324
6325
15.0k
    xmlElementContentPtr tree = NULL;
6326
15.0k
    int inputid = ctxt->input->id;
6327
15.0k
    int res;
6328
6329
15.0k
    *result = NULL;
6330
6331
15.0k
    if (RAW != '(') {
6332
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6333
0
    "xmlParseElementContentDecl : %s '(' expected\n", name);
6334
0
  return(-1);
6335
0
    }
6336
15.0k
    NEXT;
6337
15.0k
    GROW;
6338
15.0k
    if (ctxt->instate == XML_PARSER_EOF)
6339
336
        return(-1);
6340
14.7k
    SKIP_BLANKS;
6341
14.7k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6342
4.50k
        tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6343
4.50k
  res = XML_ELEMENT_TYPE_MIXED;
6344
10.2k
    } else {
6345
10.2k
        tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6346
10.2k
  res = XML_ELEMENT_TYPE_ELEMENT;
6347
10.2k
    }
6348
14.7k
    SKIP_BLANKS;
6349
14.7k
    *result = tree;
6350
14.7k
    return(res);
6351
15.0k
}
6352
6353
/**
6354
 * xmlParseElementDecl:
6355
 * @ctxt:  an XML parser context
6356
 *
6357
 * DEPRECATED: Internal function, don't use.
6358
 *
6359
 * Parse an element declaration. Always consumes '<!'.
6360
 *
6361
 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6362
 *
6363
 * [ VC: Unique Element Type Declaration ]
6364
 * No element type may be declared more than once
6365
 *
6366
 * Returns the type of the element, or -1 in case of error
6367
 */
6368
int
6369
22.5k
xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6370
22.5k
    const xmlChar *name;
6371
22.5k
    int ret = -1;
6372
22.5k
    xmlElementContentPtr content  = NULL;
6373
6374
22.5k
    if ((CUR != '<') || (NXT(1) != '!'))
6375
0
        return(ret);
6376
22.5k
    SKIP(2);
6377
6378
    /* GROW; done in the caller */
6379
22.5k
    if (CMP7(CUR_PTR, 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6380
21.3k
  int inputid = ctxt->input->id;
6381
6382
21.3k
  SKIP(7);
6383
21.3k
  if (SKIP_BLANKS == 0) {
6384
239
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6385
239
               "Space required after 'ELEMENT'\n");
6386
239
      return(-1);
6387
239
  }
6388
21.1k
        name = xmlParseName(ctxt);
6389
21.1k
  if (name == NULL) {
6390
247
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6391
247
         "xmlParseElementDecl: no name for Element\n");
6392
247
      return(-1);
6393
247
  }
6394
20.8k
  if (SKIP_BLANKS == 0) {
6395
4.34k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6396
4.34k
         "Space required after the element name\n");
6397
4.34k
  }
6398
20.8k
  if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6399
2.42k
      SKIP(5);
6400
      /*
6401
       * Element must always be empty.
6402
       */
6403
2.42k
      ret = XML_ELEMENT_TYPE_EMPTY;
6404
18.4k
  } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6405
18.4k
             (NXT(2) == 'Y')) {
6406
1.34k
      SKIP(3);
6407
      /*
6408
       * Element is a generic container.
6409
       */
6410
1.34k
      ret = XML_ELEMENT_TYPE_ANY;
6411
17.0k
  } else if (RAW == '(') {
6412
15.0k
      ret = xmlParseElementContentDecl(ctxt, name, &content);
6413
15.0k
  } else {
6414
      /*
6415
       * [ WFC: PEs in Internal Subset ] error handling.
6416
       */
6417
2.03k
      if ((RAW == '%') && (ctxt->external == 0) &&
6418
2.03k
          (ctxt->inputNr == 1)) {
6419
216
    xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6420
216
    "PEReference: forbidden within markup decl in internal subset\n");
6421
1.81k
      } else {
6422
1.81k
    xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6423
1.81k
          "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6424
1.81k
            }
6425
2.03k
      return(-1);
6426
2.03k
  }
6427
6428
18.8k
  SKIP_BLANKS;
6429
6430
18.8k
  if (RAW != '>') {
6431
8.57k
      xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6432
8.57k
      if (content != NULL) {
6433
524
    xmlFreeDocElementContent(ctxt->myDoc, content);
6434
524
      }
6435
10.2k
  } else {
6436
10.2k
      if (inputid != ctxt->input->id) {
6437
580
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6438
580
                               "Element declaration doesn't start and stop in"
6439
580
                               " the same entity\n");
6440
580
      }
6441
6442
10.2k
      NEXT;
6443
10.2k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6444
10.2k
    (ctxt->sax->elementDecl != NULL)) {
6445
5.84k
    if (content != NULL)
6446
3.16k
        content->parent = NULL;
6447
5.84k
          ctxt->sax->elementDecl(ctxt->userData, name, ret,
6448
5.84k
                           content);
6449
5.84k
    if ((content != NULL) && (content->parent == NULL)) {
6450
        /*
6451
         * this is a trick: if xmlAddElementDecl is called,
6452
         * instead of copying the full tree it is plugged directly
6453
         * if called from the parser. Avoid duplicating the
6454
         * interfaces or change the API/ABI
6455
         */
6456
1.04k
        xmlFreeDocElementContent(ctxt->myDoc, content);
6457
1.04k
    }
6458
5.84k
      } else if (content != NULL) {
6459
2.76k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6460
2.76k
      }
6461
10.2k
  }
6462
18.8k
    }
6463
20.0k
    return(ret);
6464
22.5k
}
6465
6466
/**
6467
 * xmlParseConditionalSections
6468
 * @ctxt:  an XML parser context
6469
 *
6470
 * Parse a conditional section. Always consumes '<!['.
6471
 *
6472
 * [61] conditionalSect ::= includeSect | ignoreSect
6473
 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6474
 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6475
 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6476
 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6477
 */
6478
6479
static void
6480
3.95k
xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6481
3.95k
    int *inputIds = NULL;
6482
3.95k
    size_t inputIdsSize = 0;
6483
3.95k
    size_t depth = 0;
6484
6485
14.2k
    while (ctxt->instate != XML_PARSER_EOF) {
6486
14.1k
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6487
5.82k
            int id = ctxt->input->id;
6488
6489
5.82k
            SKIP(3);
6490
5.82k
            SKIP_BLANKS;
6491
6492
5.82k
            if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6493
2.92k
                SKIP(7);
6494
2.92k
                SKIP_BLANKS;
6495
2.92k
                if (RAW != '[') {
6496
251
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6497
251
                    xmlHaltParser(ctxt);
6498
251
                    goto error;
6499
251
                }
6500
2.67k
                if (ctxt->input->id != id) {
6501
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6502
0
                                   "All markup of the conditional section is"
6503
0
                                   " not in the same entity\n");
6504
0
                }
6505
2.67k
                NEXT;
6506
6507
2.67k
                if (inputIdsSize <= depth) {
6508
1.34k
                    int *tmp;
6509
6510
1.34k
                    inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2);
6511
1.34k
                    tmp = (int *) xmlRealloc(inputIds,
6512
1.34k
                            inputIdsSize * sizeof(int));
6513
1.34k
                    if (tmp == NULL) {
6514
3
                        xmlErrMemory(ctxt, NULL);
6515
3
                        goto error;
6516
3
                    }
6517
1.33k
                    inputIds = tmp;
6518
1.33k
                }
6519
2.67k
                inputIds[depth] = id;
6520
2.67k
                depth++;
6521
2.89k
            } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6522
850
                size_t ignoreDepth = 0;
6523
6524
850
                SKIP(6);
6525
850
                SKIP_BLANKS;
6526
850
                if (RAW != '[') {
6527
4
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6528
4
                    xmlHaltParser(ctxt);
6529
4
                    goto error;
6530
4
                }
6531
846
                if (ctxt->input->id != id) {
6532
33
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6533
33
                                   "All markup of the conditional section is"
6534
33
                                   " not in the same entity\n");
6535
33
                }
6536
846
                NEXT;
6537
6538
26.0k
                while (RAW != 0) {
6539
25.4k
                    if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6540
742
                        SKIP(3);
6541
742
                        ignoreDepth++;
6542
                        /* Check for integer overflow */
6543
742
                        if (ignoreDepth == 0) {
6544
0
                            xmlErrMemory(ctxt, NULL);
6545
0
                            goto error;
6546
0
                        }
6547
24.6k
                    } else if ((RAW == ']') && (NXT(1) == ']') &&
6548
24.6k
                               (NXT(2) == '>')) {
6549
689
                        if (ignoreDepth == 0)
6550
241
                            break;
6551
448
                        SKIP(3);
6552
448
                        ignoreDepth--;
6553
24.0k
                    } else {
6554
24.0k
                        NEXT;
6555
24.0k
                    }
6556
25.4k
                }
6557
6558
846
    if (RAW == 0) {
6559
605
        xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6560
605
                    goto error;
6561
605
    }
6562
241
                if (ctxt->input->id != id) {
6563
28
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6564
28
                                   "All markup of the conditional section is"
6565
28
                                   " not in the same entity\n");
6566
28
                }
6567
241
                SKIP(3);
6568
2.04k
            } else {
6569
2.04k
                xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6570
2.04k
                xmlHaltParser(ctxt);
6571
2.04k
                goto error;
6572
2.04k
            }
6573
8.35k
        } else if ((depth > 0) &&
6574
8.35k
                   (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6575
114
            depth--;
6576
114
            if (ctxt->input->id != inputIds[depth]) {
6577
3
                xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6578
3
                               "All markup of the conditional section is not"
6579
3
                               " in the same entity\n");
6580
3
            }
6581
114
            SKIP(3);
6582
8.23k
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
6583
7.60k
            xmlParseMarkupDecl(ctxt);
6584
7.60k
        } else {
6585
629
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6586
629
            xmlHaltParser(ctxt);
6587
629
            goto error;
6588
629
        }
6589
6590
10.6k
        if (depth == 0)
6591
355
            break;
6592
6593
10.2k
        SKIP_BLANKS;
6594
10.2k
        SHRINK;
6595
10.2k
        GROW;
6596
10.2k
    }
6597
6598
3.95k
error:
6599
3.95k
    xmlFree(inputIds);
6600
3.95k
}
6601
6602
/**
6603
 * xmlParseMarkupDecl:
6604
 * @ctxt:  an XML parser context
6605
 *
6606
 * DEPRECATED: Internal function, don't use.
6607
 *
6608
 * Parse markup declarations. Always consumes '<!' or '<?'.
6609
 *
6610
 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6611
 *                     NotationDecl | PI | Comment
6612
 *
6613
 * [ VC: Proper Declaration/PE Nesting ]
6614
 * Parameter-entity replacement text must be properly nested with
6615
 * markup declarations. That is to say, if either the first character
6616
 * or the last character of a markup declaration (markupdecl above) is
6617
 * contained in the replacement text for a parameter-entity reference,
6618
 * both must be contained in the same replacement text.
6619
 *
6620
 * [ WFC: PEs in Internal Subset ]
6621
 * In the internal DTD subset, parameter-entity references can occur
6622
 * only where markup declarations can occur, not within markup declarations.
6623
 * (This does not apply to references that occur in external parameter
6624
 * entities or to the external subset.)
6625
 */
6626
void
6627
201k
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6628
201k
    GROW;
6629
201k
    if (CUR == '<') {
6630
200k
        if (NXT(1) == '!') {
6631
177k
      switch (NXT(2)) {
6632
91.0k
          case 'E':
6633
91.0k
        if (NXT(3) == 'L')
6634
22.5k
      xmlParseElementDecl(ctxt);
6635
68.4k
        else if (NXT(3) == 'N')
6636
68.0k
      xmlParseEntityDecl(ctxt);
6637
443
                    else
6638
443
                        SKIP(2);
6639
91.0k
        break;
6640
55.4k
          case 'A':
6641
55.4k
        xmlParseAttributeListDecl(ctxt);
6642
55.4k
        break;
6643
13.0k
          case 'N':
6644
13.0k
        xmlParseNotationDecl(ctxt);
6645
13.0k
        break;
6646
14.4k
          case '-':
6647
14.4k
        xmlParseComment(ctxt);
6648
14.4k
        break;
6649
3.90k
    default:
6650
        /* there is an error but it will be detected later */
6651
3.90k
                    SKIP(2);
6652
3.90k
        break;
6653
177k
      }
6654
177k
  } else if (NXT(1) == '?') {
6655
23.0k
      xmlParsePI(ctxt);
6656
23.0k
  }
6657
200k
    }
6658
6659
    /*
6660
     * detect requirement to exit there and act accordingly
6661
     * and avoid having instate overridden later on
6662
     */
6663
201k
    if (ctxt->instate == XML_PARSER_EOF)
6664
10.5k
        return;
6665
6666
190k
    ctxt->instate = XML_PARSER_DTD;
6667
190k
}
6668
6669
/**
6670
 * xmlParseTextDecl:
6671
 * @ctxt:  an XML parser context
6672
 *
6673
 * DEPRECATED: Internal function, don't use.
6674
 *
6675
 * parse an XML declaration header for external entities
6676
 *
6677
 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6678
 */
6679
6680
void
6681
10.8k
xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6682
10.8k
    xmlChar *version;
6683
10.8k
    const xmlChar *encoding;
6684
10.8k
    int oldstate;
6685
6686
    /*
6687
     * We know that '<?xml' is here.
6688
     */
6689
10.8k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6690
10.3k
  SKIP(5);
6691
10.3k
    } else {
6692
461
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6693
461
  return;
6694
461
    }
6695
6696
    /* Avoid expansion of parameter entities when skipping blanks. */
6697
10.3k
    oldstate = ctxt->instate;
6698
10.3k
    ctxt->instate = XML_PARSER_START;
6699
6700
10.3k
    if (SKIP_BLANKS == 0) {
6701
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6702
0
           "Space needed after '<?xml'\n");
6703
0
    }
6704
6705
    /*
6706
     * We may have the VersionInfo here.
6707
     */
6708
10.3k
    version = xmlParseVersionInfo(ctxt);
6709
10.3k
    if (version == NULL)
6710
5.96k
  version = xmlCharStrdup(XML_DEFAULT_VERSION);
6711
4.37k
    else {
6712
4.37k
  if (SKIP_BLANKS == 0) {
6713
589
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6714
589
               "Space needed here\n");
6715
589
  }
6716
4.37k
    }
6717
10.3k
    ctxt->input->version = version;
6718
6719
    /*
6720
     * We must have the encoding declaration
6721
     */
6722
10.3k
    encoding = xmlParseEncodingDecl(ctxt);
6723
10.3k
    if (ctxt->instate == XML_PARSER_EOF)
6724
45
        return;
6725
10.2k
    if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6726
  /*
6727
   * The XML REC instructs us to stop parsing right here
6728
   */
6729
370
        ctxt->instate = oldstate;
6730
370
        return;
6731
370
    }
6732
9.92k
    if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6733
266
  xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6734
266
           "Missing encoding in text declaration\n");
6735
266
    }
6736
6737
9.92k
    SKIP_BLANKS;
6738
9.92k
    if ((RAW == '?') && (NXT(1) == '>')) {
6739
2.57k
        SKIP(2);
6740
7.35k
    } else if (RAW == '>') {
6741
        /* Deprecated old WD ... */
6742
660
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6743
660
  NEXT;
6744
6.69k
    } else {
6745
6.69k
        int c;
6746
6747
6.69k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6748
34.2M
        while ((c = CUR) != 0) {
6749
34.2M
            NEXT;
6750
34.2M
            if (c == '>')
6751
2.11k
                break;
6752
34.2M
        }
6753
6.69k
    }
6754
6755
9.92k
    ctxt->instate = oldstate;
6756
9.92k
}
6757
6758
/**
6759
 * xmlParseExternalSubset:
6760
 * @ctxt:  an XML parser context
6761
 * @ExternalID: the external identifier
6762
 * @SystemID: the system identifier (or URL)
6763
 *
6764
 * parse Markup declarations from an external subset
6765
 *
6766
 * [30] extSubset ::= textDecl? extSubsetDecl
6767
 *
6768
 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6769
 */
6770
void
6771
xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6772
6.82k
                       const xmlChar *SystemID) {
6773
6.82k
    xmlDetectSAX2(ctxt);
6774
6.82k
    GROW;
6775
6776
6.82k
    if ((ctxt->encoding == NULL) &&
6777
6.82k
        (ctxt->input->end - ctxt->input->cur >= 4)) {
6778
6.60k
        xmlChar start[4];
6779
6.60k
  xmlCharEncoding enc;
6780
6781
6.60k
  start[0] = RAW;
6782
6.60k
  start[1] = NXT(1);
6783
6.60k
  start[2] = NXT(2);
6784
6.60k
  start[3] = NXT(3);
6785
6.60k
  enc = xmlDetectCharEncoding(start, 4);
6786
6.60k
  if (enc != XML_CHAR_ENCODING_NONE)
6787
2.15k
      xmlSwitchEncoding(ctxt, enc);
6788
6.60k
    }
6789
6790
6.82k
    if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
6791
1.91k
  xmlParseTextDecl(ctxt);
6792
1.91k
  if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6793
      /*
6794
       * The XML REC instructs us to stop parsing right here
6795
       */
6796
243
      xmlHaltParser(ctxt);
6797
243
      return;
6798
243
  }
6799
1.91k
    }
6800
6.58k
    if (ctxt->myDoc == NULL) {
6801
0
        ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
6802
0
  if (ctxt->myDoc == NULL) {
6803
0
      xmlErrMemory(ctxt, "New Doc failed");
6804
0
      return;
6805
0
  }
6806
0
  ctxt->myDoc->properties = XML_DOC_INTERNAL;
6807
0
    }
6808
6.58k
    if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6809
0
        xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6810
6811
6.58k
    ctxt->instate = XML_PARSER_DTD;
6812
6.58k
    ctxt->external = 1;
6813
6.58k
    SKIP_BLANKS;
6814
19.9k
    while ((ctxt->instate != XML_PARSER_EOF) && (RAW != 0)) {
6815
15.6k
  GROW;
6816
15.6k
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6817
2.97k
            xmlParseConditionalSections(ctxt);
6818
12.6k
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
6819
10.4k
            xmlParseMarkupDecl(ctxt);
6820
10.4k
        } else {
6821
2.27k
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6822
2.27k
            xmlHaltParser(ctxt);
6823
2.27k
            return;
6824
2.27k
        }
6825
13.3k
        SKIP_BLANKS;
6826
13.3k
        SHRINK;
6827
13.3k
    }
6828
6829
4.30k
    if (RAW != 0) {
6830
9
  xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6831
9
    }
6832
6833
4.30k
}
6834
6835
/**
6836
 * xmlParseReference:
6837
 * @ctxt:  an XML parser context
6838
 *
6839
 * DEPRECATED: Internal function, don't use.
6840
 *
6841
 * parse and handle entity references in content, depending on the SAX
6842
 * interface, this may end-up in a call to character() if this is a
6843
 * CharRef, a predefined entity, if there is no reference() callback.
6844
 * or if the parser was asked to switch to that mode.
6845
 *
6846
 * Always consumes '&'.
6847
 *
6848
 * [67] Reference ::= EntityRef | CharRef
6849
 */
6850
void
6851
176k
xmlParseReference(xmlParserCtxtPtr ctxt) {
6852
176k
    xmlEntityPtr ent;
6853
176k
    xmlChar *val;
6854
176k
    int was_checked;
6855
176k
    xmlNodePtr list = NULL;
6856
176k
    xmlParserErrors ret = XML_ERR_OK;
6857
6858
6859
176k
    if (RAW != '&')
6860
0
        return;
6861
6862
    /*
6863
     * Simple case of a CharRef
6864
     */
6865
176k
    if (NXT(1) == '#') {
6866
85.3k
  int i = 0;
6867
85.3k
  xmlChar out[16];
6868
85.3k
  int hex = NXT(2);
6869
85.3k
  int value = xmlParseCharRef(ctxt);
6870
6871
85.3k
  if (value == 0)
6872
12.9k
      return;
6873
72.4k
  if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
6874
      /*
6875
       * So we are using non-UTF-8 buffers
6876
       * Check that the char fit on 8bits, if not
6877
       * generate a CharRef.
6878
       */
6879
26.2k
      if (value <= 0xFF) {
6880
12.9k
    out[0] = value;
6881
12.9k
    out[1] = 0;
6882
12.9k
    if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6883
12.9k
        (!ctxt->disableSAX))
6884
0
        ctxt->sax->characters(ctxt->userData, out, 1);
6885
13.2k
      } else {
6886
13.2k
    if ((hex == 'x') || (hex == 'X'))
6887
11.4k
        snprintf((char *)out, sizeof(out), "#x%X", value);
6888
1.85k
    else
6889
1.85k
        snprintf((char *)out, sizeof(out), "#%d", value);
6890
13.2k
    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6891
13.2k
        (!ctxt->disableSAX))
6892
0
        ctxt->sax->reference(ctxt->userData, out);
6893
13.2k
      }
6894
46.2k
  } else {
6895
      /*
6896
       * Just encode the value in UTF-8
6897
       */
6898
46.2k
      COPY_BUF(0 ,out, i, value);
6899
46.2k
      out[i] = 0;
6900
46.2k
      if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6901
46.2k
    (!ctxt->disableSAX))
6902
22.1k
    ctxt->sax->characters(ctxt->userData, out, i);
6903
46.2k
  }
6904
72.4k
  return;
6905
85.3k
    }
6906
6907
    /*
6908
     * We are seeing an entity reference
6909
     */
6910
91.4k
    ent = xmlParseEntityRef(ctxt);
6911
91.4k
    if (ent == NULL) return;
6912
57.4k
    if (!ctxt->wellFormed)
6913
19.3k
  return;
6914
38.0k
    was_checked = ent->flags & XML_ENT_PARSED;
6915
6916
    /* special case of predefined entities */
6917
38.0k
    if ((ent->name == NULL) ||
6918
38.0k
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
6919
13.2k
  val = ent->content;
6920
13.2k
  if (val == NULL) return;
6921
  /*
6922
   * inline the entity.
6923
   */
6924
13.2k
  if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6925
13.2k
      (!ctxt->disableSAX))
6926
13.2k
      ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6927
13.2k
  return;
6928
13.2k
    }
6929
6930
    /*
6931
     * The first reference to the entity trigger a parsing phase
6932
     * where the ent->children is filled with the result from
6933
     * the parsing.
6934
     * Note: external parsed entities will not be loaded, it is not
6935
     * required for a non-validating parser, unless the parsing option
6936
     * of validating, or substituting entities were given. Doing so is
6937
     * far more secure as the parser will only process data coming from
6938
     * the document entity by default.
6939
     */
6940
24.7k
    if (((ent->flags & XML_ENT_PARSED) == 0) &&
6941
24.7k
        ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
6942
8.37k
         (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
6943
8.37k
  unsigned long oldsizeentcopy = ctxt->sizeentcopy;
6944
6945
  /*
6946
   * This is a bit hackish but this seems the best
6947
   * way to make sure both SAX and DOM entity support
6948
   * behaves okay.
6949
   */
6950
8.37k
  void *user_data;
6951
8.37k
  if (ctxt->userData == ctxt)
6952
8.37k
      user_data = NULL;
6953
0
  else
6954
0
      user_data = ctxt->userData;
6955
6956
        /* Avoid overflow as much as possible */
6957
8.37k
        ctxt->sizeentcopy = 0;
6958
6959
8.37k
        if (ent->flags & XML_ENT_EXPANDING) {
6960
294
            xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6961
294
            xmlHaltParser(ctxt);
6962
294
            return;
6963
294
        }
6964
6965
8.08k
        ent->flags |= XML_ENT_EXPANDING;
6966
6967
  /*
6968
   * Check that this entity is well formed
6969
   * 4.3.2: An internal general parsed entity is well-formed
6970
   * if its replacement text matches the production labeled
6971
   * content.
6972
   */
6973
8.08k
  if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6974
2.96k
      ctxt->depth++;
6975
2.96k
      ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
6976
2.96k
                                                user_data, &list);
6977
2.96k
      ctxt->depth--;
6978
6979
5.12k
  } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6980
5.12k
      ctxt->depth++;
6981
5.12k
      ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
6982
5.12k
                                     user_data, ctxt->depth, ent->URI,
6983
5.12k
             ent->ExternalID, &list);
6984
5.12k
      ctxt->depth--;
6985
5.12k
  } else {
6986
0
      ret = XML_ERR_ENTITY_PE_INTERNAL;
6987
0
      xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6988
0
       "invalid entity type found\n", NULL);
6989
0
  }
6990
6991
8.08k
        ent->flags &= ~XML_ENT_EXPANDING;
6992
8.08k
        ent->flags |= XML_ENT_PARSED | XML_ENT_CHECKED;
6993
8.08k
        ent->expandedSize = ctxt->sizeentcopy;
6994
8.08k
  if (ret == XML_ERR_ENTITY_LOOP) {
6995
591
            xmlHaltParser(ctxt);
6996
591
      xmlFreeNodeList(list);
6997
591
      return;
6998
591
  }
6999
7.49k
  if (xmlParserEntityCheck(ctxt, oldsizeentcopy)) {
7000
0
      xmlFreeNodeList(list);
7001
0
      return;
7002
0
  }
7003
7004
7.49k
  if ((ret == XML_ERR_OK) && (list != NULL)) {
7005
1.95k
            ent->children = list;
7006
            /*
7007
             * Prune it directly in the generated document
7008
             * except for single text nodes.
7009
             */
7010
1.95k
            if ((ctxt->replaceEntities == 0) ||
7011
1.95k
                (ctxt->parseMode == XML_PARSE_READER) ||
7012
1.95k
                ((list->type == XML_TEXT_NODE) &&
7013
1.95k
                 (list->next == NULL))) {
7014
600
                ent->owner = 1;
7015
1.20k
                while (list != NULL) {
7016
600
                    list->parent = (xmlNodePtr) ent;
7017
600
                    if (list->doc != ent->doc)
7018
0
                        xmlSetTreeDoc(list, ent->doc);
7019
600
                    if (list->next == NULL)
7020
600
                        ent->last = list;
7021
600
                    list = list->next;
7022
600
                }
7023
600
                list = NULL;
7024
1.35k
            } else {
7025
1.35k
                ent->owner = 0;
7026
8.79k
                while (list != NULL) {
7027
7.44k
                    list->parent = (xmlNodePtr) ctxt->node;
7028
7.44k
                    list->doc = ctxt->myDoc;
7029
7.44k
                    if (list->next == NULL)
7030
1.35k
                        ent->last = list;
7031
7.44k
                    list = list->next;
7032
7.44k
                }
7033
1.35k
                list = ent->children;
7034
#ifdef LIBXML_LEGACY_ENABLED
7035
                if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7036
                    xmlAddEntityReference(ent, list, NULL);
7037
#endif /* LIBXML_LEGACY_ENABLED */
7038
1.35k
            }
7039
5.54k
  } else if ((ret != XML_ERR_OK) &&
7040
5.54k
       (ret != XML_WAR_UNDECLARED_ENTITY)) {
7041
4.87k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7042
4.87k
         "Entity '%s' failed to parse\n", ent->name);
7043
4.87k
            if (ent->content != NULL)
7044
886
                ent->content[0] = 0;
7045
4.87k
  } else if (list != NULL) {
7046
0
      xmlFreeNodeList(list);
7047
0
      list = NULL;
7048
0
  }
7049
7050
        /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7051
7.49k
        was_checked = 0;
7052
7.49k
    }
7053
7054
    /*
7055
     * Now that the entity content has been gathered
7056
     * provide it to the application, this can take different forms based
7057
     * on the parsing modes.
7058
     */
7059
23.8k
    if (ent->children == NULL) {
7060
  /*
7061
   * Probably running in SAX mode and the callbacks don't
7062
   * build the entity content. So unless we already went
7063
   * though parsing for first checking go though the entity
7064
   * content to generate callbacks associated to the entity
7065
   */
7066
11.9k
  if (was_checked != 0) {
7067
6.39k
      void *user_data;
7068
      /*
7069
       * This is a bit hackish but this seems the best
7070
       * way to make sure both SAX and DOM entity support
7071
       * behaves okay.
7072
       */
7073
6.39k
      if (ctxt->userData == ctxt)
7074
6.39k
    user_data = NULL;
7075
0
      else
7076
0
    user_data = ctxt->userData;
7077
7078
6.39k
      if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7079
31
    ctxt->depth++;
7080
31
    ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7081
31
           ent->content, user_data, NULL);
7082
31
    ctxt->depth--;
7083
6.36k
      } else if (ent->etype ==
7084
6.36k
           XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7085
6.36k
          unsigned long oldsizeentities = ctxt->sizeentities;
7086
7087
6.36k
    ctxt->depth++;
7088
6.36k
    ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7089
6.36k
         ctxt->sax, user_data, ctxt->depth,
7090
6.36k
         ent->URI, ent->ExternalID, NULL);
7091
6.36k
    ctxt->depth--;
7092
7093
                /* Undo the change to sizeentities */
7094
6.36k
                ctxt->sizeentities = oldsizeentities;
7095
6.36k
      } else {
7096
0
    ret = XML_ERR_ENTITY_PE_INTERNAL;
7097
0
    xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7098
0
           "invalid entity type found\n", NULL);
7099
0
      }
7100
6.39k
      if (ret == XML_ERR_ENTITY_LOOP) {
7101
0
    xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7102
0
    return;
7103
0
      }
7104
6.39k
            if (xmlParserEntityCheck(ctxt, 0))
7105
0
                return;
7106
6.39k
  }
7107
11.9k
  if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7108
11.9k
      (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7109
      /*
7110
       * Entity reference callback comes second, it's somewhat
7111
       * superfluous but a compatibility to historical behaviour
7112
       */
7113
0
      ctxt->sax->reference(ctxt->userData, ent->name);
7114
0
  }
7115
11.9k
  return;
7116
11.9k
    }
7117
7118
    /*
7119
     * We also check for amplification if entities aren't substituted.
7120
     * They might be expanded later.
7121
     */
7122
11.9k
    if ((was_checked != 0) &&
7123
11.9k
        (xmlParserEntityCheck(ctxt, ent->expandedSize)))
7124
28
        return;
7125
7126
    /*
7127
     * If we didn't get any children for the entity being built
7128
     */
7129
11.8k
    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7130
11.8k
  (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7131
  /*
7132
   * Create a node.
7133
   */
7134
0
  ctxt->sax->reference(ctxt->userData, ent->name);
7135
0
  return;
7136
0
    }
7137
7138
11.8k
    if (ctxt->replaceEntities)  {
7139
  /*
7140
   * There is a problem on the handling of _private for entities
7141
   * (bug 155816): Should we copy the content of the field from
7142
   * the entity (possibly overwriting some value set by the user
7143
   * when a copy is created), should we leave it alone, or should
7144
   * we try to take care of different situations?  The problem
7145
   * is exacerbated by the usage of this field by the xmlReader.
7146
   * To fix this bug, we look at _private on the created node
7147
   * and, if it's NULL, we copy in whatever was in the entity.
7148
   * If it's not NULL we leave it alone.  This is somewhat of a
7149
   * hack - maybe we should have further tests to determine
7150
   * what to do.
7151
   */
7152
11.8k
  if (ctxt->node != NULL) {
7153
      /*
7154
       * Seems we are generating the DOM content, do
7155
       * a simple tree copy for all references except the first
7156
       * In the first occurrence list contains the replacement.
7157
       */
7158
11.8k
      if (((list == NULL) && (ent->owner == 0)) ||
7159
11.8k
    (ctxt->parseMode == XML_PARSE_READER)) {
7160
0
    xmlNodePtr nw = NULL, cur, firstChild = NULL;
7161
7162
    /*
7163
     * when operating on a reader, the entities definitions
7164
     * are always owning the entities subtree.
7165
    if (ctxt->parseMode == XML_PARSE_READER)
7166
        ent->owner = 1;
7167
     */
7168
7169
0
    cur = ent->children;
7170
0
    while (cur != NULL) {
7171
0
        nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7172
0
        if (nw != NULL) {
7173
0
      if (nw->_private == NULL)
7174
0
          nw->_private = cur->_private;
7175
0
      if (firstChild == NULL){
7176
0
          firstChild = nw;
7177
0
      }
7178
0
      nw = xmlAddChild(ctxt->node, nw);
7179
0
        }
7180
0
        if (cur == ent->last) {
7181
      /*
7182
       * needed to detect some strange empty
7183
       * node cases in the reader tests
7184
       */
7185
0
      if ((ctxt->parseMode == XML_PARSE_READER) &&
7186
0
          (nw != NULL) &&
7187
0
          (nw->type == XML_ELEMENT_NODE) &&
7188
0
          (nw->children == NULL))
7189
0
          nw->extra = 1;
7190
7191
0
      break;
7192
0
        }
7193
0
        cur = cur->next;
7194
0
    }
7195
#ifdef LIBXML_LEGACY_ENABLED
7196
    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7197
      xmlAddEntityReference(ent, firstChild, nw);
7198
#endif /* LIBXML_LEGACY_ENABLED */
7199
11.8k
      } else if ((list == NULL) || (ctxt->inputNr > 0)) {
7200
11.8k
    xmlNodePtr nw = NULL, cur, next, last,
7201
11.8k
         firstChild = NULL;
7202
7203
    /*
7204
     * Copy the entity child list and make it the new
7205
     * entity child list. The goal is to make sure any
7206
     * ID or REF referenced will be the one from the
7207
     * document content and not the entity copy.
7208
     */
7209
11.8k
    cur = ent->children;
7210
11.8k
    ent->children = NULL;
7211
11.8k
    last = ent->last;
7212
11.8k
    ent->last = NULL;
7213
33.0k
    while (cur != NULL) {
7214
33.0k
        next = cur->next;
7215
33.0k
        cur->next = NULL;
7216
33.0k
        cur->parent = NULL;
7217
33.0k
        nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7218
33.0k
        if (nw != NULL) {
7219
32.8k
      if (nw->_private == NULL)
7220
32.8k
          nw->_private = cur->_private;
7221
32.8k
      if (firstChild == NULL){
7222
11.8k
          firstChild = cur;
7223
11.8k
      }
7224
32.8k
      xmlAddChild((xmlNodePtr) ent, nw);
7225
32.8k
        }
7226
33.0k
        xmlAddChild(ctxt->node, cur);
7227
33.0k
        if (cur == last)
7228
11.8k
      break;
7229
21.1k
        cur = next;
7230
21.1k
    }
7231
11.8k
    if (ent->owner == 0)
7232
1.35k
        ent->owner = 1;
7233
#ifdef LIBXML_LEGACY_ENABLED
7234
    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7235
      xmlAddEntityReference(ent, firstChild, nw);
7236
#endif /* LIBXML_LEGACY_ENABLED */
7237
11.8k
      } else {
7238
0
    const xmlChar *nbktext;
7239
7240
    /*
7241
     * the name change is to avoid coalescing of the
7242
     * node with a possible previous text one which
7243
     * would make ent->children a dangling pointer
7244
     */
7245
0
    nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7246
0
          -1);
7247
0
    if (ent->children->type == XML_TEXT_NODE)
7248
0
        ent->children->name = nbktext;
7249
0
    if ((ent->last != ent->children) &&
7250
0
        (ent->last->type == XML_TEXT_NODE))
7251
0
        ent->last->name = nbktext;
7252
0
    xmlAddChildList(ctxt->node, ent->children);
7253
0
      }
7254
7255
      /*
7256
       * This is to avoid a nasty side effect, see
7257
       * characters() in SAX.c
7258
       */
7259
11.8k
      ctxt->nodemem = 0;
7260
11.8k
      ctxt->nodelen = 0;
7261
11.8k
      return;
7262
11.8k
  }
7263
11.8k
    }
7264
11.8k
}
7265
7266
/**
7267
 * xmlParseEntityRef:
7268
 * @ctxt:  an XML parser context
7269
 *
7270
 * DEPRECATED: Internal function, don't use.
7271
 *
7272
 * Parse an entitiy reference. Always consumes '&'.
7273
 *
7274
 * [68] EntityRef ::= '&' Name ';'
7275
 *
7276
 * [ WFC: Entity Declared ]
7277
 * In a document without any DTD, a document with only an internal DTD
7278
 * subset which contains no parameter entity references, or a document
7279
 * with "standalone='yes'", the Name given in the entity reference
7280
 * must match that in an entity declaration, except that well-formed
7281
 * documents need not declare any of the following entities: amp, lt,
7282
 * gt, apos, quot.  The declaration of a parameter entity must precede
7283
 * any reference to it.  Similarly, the declaration of a general entity
7284
 * must precede any reference to it which appears in a default value in an
7285
 * attribute-list declaration. Note that if entities are declared in the
7286
 * external subset or in external parameter entities, a non-validating
7287
 * processor is not obligated to read and process their declarations;
7288
 * for such documents, the rule that an entity must be declared is a
7289
 * well-formedness constraint only if standalone='yes'.
7290
 *
7291
 * [ WFC: Parsed Entity ]
7292
 * An entity reference must not contain the name of an unparsed entity
7293
 *
7294
 * Returns the xmlEntityPtr if found, or NULL otherwise.
7295
 */
7296
xmlEntityPtr
7297
158k
xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7298
158k
    const xmlChar *name;
7299
158k
    xmlEntityPtr ent = NULL;
7300
7301
158k
    GROW;
7302
158k
    if (ctxt->instate == XML_PARSER_EOF)
7303
220
        return(NULL);
7304
7305
158k
    if (RAW != '&')
7306
0
        return(NULL);
7307
158k
    NEXT;
7308
158k
    name = xmlParseName(ctxt);
7309
158k
    if (name == NULL) {
7310
21.8k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7311
21.8k
           "xmlParseEntityRef: no name\n");
7312
21.8k
        return(NULL);
7313
21.8k
    }
7314
136k
    if (RAW != ';') {
7315
16.3k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7316
16.3k
  return(NULL);
7317
16.3k
    }
7318
120k
    NEXT;
7319
7320
    /*
7321
     * Predefined entities override any extra definition
7322
     */
7323
120k
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7324
120k
        ent = xmlGetPredefinedEntity(name);
7325
120k
        if (ent != NULL)
7326
18.4k
            return(ent);
7327
120k
    }
7328
7329
    /*
7330
     * Ask first SAX for entity resolution, otherwise try the
7331
     * entities which may have stored in the parser context.
7332
     */
7333
101k
    if (ctxt->sax != NULL) {
7334
101k
  if (ctxt->sax->getEntity != NULL)
7335
101k
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7336
101k
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7337
101k
      (ctxt->options & XML_PARSE_OLDSAX))
7338
0
      ent = xmlGetPredefinedEntity(name);
7339
101k
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7340
101k
      (ctxt->userData==ctxt)) {
7341
22.9k
      ent = xmlSAX2GetEntity(ctxt, name);
7342
22.9k
  }
7343
101k
    }
7344
101k
    if (ctxt->instate == XML_PARSER_EOF)
7345
279
  return(NULL);
7346
    /*
7347
     * [ WFC: Entity Declared ]
7348
     * In a document without any DTD, a document with only an
7349
     * internal DTD subset which contains no parameter entity
7350
     * references, or a document with "standalone='yes'", the
7351
     * Name given in the entity reference must match that in an
7352
     * entity declaration, except that well-formed documents
7353
     * need not declare any of the following entities: amp, lt,
7354
     * gt, apos, quot.
7355
     * The declaration of a parameter entity must precede any
7356
     * reference to it.
7357
     * Similarly, the declaration of a general entity must
7358
     * precede any reference to it which appears in a default
7359
     * value in an attribute-list declaration. Note that if
7360
     * entities are declared in the external subset or in
7361
     * external parameter entities, a non-validating processor
7362
     * is not obligated to read and process their declarations;
7363
     * for such documents, the rule that an entity must be
7364
     * declared is a well-formedness constraint only if
7365
     * standalone='yes'.
7366
     */
7367
101k
    if (ent == NULL) {
7368
36.0k
  if ((ctxt->standalone == 1) ||
7369
36.0k
      ((ctxt->hasExternalSubset == 0) &&
7370
34.5k
       (ctxt->hasPErefs == 0))) {
7371
10.9k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7372
10.9k
         "Entity '%s' not defined\n", name);
7373
25.1k
  } else {
7374
25.1k
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7375
25.1k
         "Entity '%s' not defined\n", name);
7376
25.1k
      if ((ctxt->inSubset == 0) &&
7377
25.1k
    (ctxt->sax != NULL) &&
7378
25.1k
    (ctxt->sax->reference != NULL)) {
7379
24.5k
    ctxt->sax->reference(ctxt->userData, name);
7380
24.5k
      }
7381
25.1k
  }
7382
36.0k
  ctxt->valid = 0;
7383
36.0k
    }
7384
7385
    /*
7386
     * [ WFC: Parsed Entity ]
7387
     * An entity reference must not contain the name of an
7388
     * unparsed entity
7389
     */
7390
65.5k
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7391
301
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7392
301
     "Entity reference to unparsed entity %s\n", name);
7393
301
    }
7394
7395
    /*
7396
     * [ WFC: No External Entity References ]
7397
     * Attribute values cannot contain direct or indirect
7398
     * entity references to external entities.
7399
     */
7400
65.2k
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7401
65.2k
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7402
3.61k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7403
3.61k
       "Attribute references external entity '%s'\n", name);
7404
3.61k
    }
7405
    /*
7406
     * [ WFC: No < in Attribute Values ]
7407
     * The replacement text of any entity referred to directly or
7408
     * indirectly in an attribute value (other than "&lt;") must
7409
     * not contain a <.
7410
     */
7411
61.6k
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7412
61.6k
       (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7413
22.1k
  if ((ent->flags & XML_ENT_CHECKED_LT) == 0) {
7414
8.50k
            if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7415
1.12k
                ent->flags |= XML_ENT_CONTAINS_LT;
7416
8.50k
            ent->flags |= XML_ENT_CHECKED_LT;
7417
8.50k
        }
7418
22.1k
        if (ent->flags & XML_ENT_CONTAINS_LT)
7419
12.6k
            xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7420
12.6k
                    "'<' in entity '%s' is not allowed in attributes "
7421
12.6k
                    "values\n", name);
7422
22.1k
    }
7423
7424
    /*
7425
     * Internal check, no parameter entities here ...
7426
     */
7427
39.5k
    else {
7428
39.5k
  switch (ent->etype) {
7429
0
      case XML_INTERNAL_PARAMETER_ENTITY:
7430
0
      case XML_EXTERNAL_PARAMETER_ENTITY:
7431
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7432
0
       "Attempt to reference the parameter entity '%s'\n",
7433
0
            name);
7434
0
      break;
7435
39.5k
      default:
7436
39.5k
      break;
7437
39.5k
  }
7438
39.5k
    }
7439
7440
    /*
7441
     * [ WFC: No Recursion ]
7442
     * A parsed entity must not contain a recursive reference
7443
     * to itself, either directly or indirectly.
7444
     * Done somewhere else
7445
     */
7446
101k
    return(ent);
7447
101k
}
7448
7449
/**
7450
 * xmlParseStringEntityRef:
7451
 * @ctxt:  an XML parser context
7452
 * @str:  a pointer to an index in the string
7453
 *
7454
 * parse ENTITY references declarations, but this version parses it from
7455
 * a string value.
7456
 *
7457
 * [68] EntityRef ::= '&' Name ';'
7458
 *
7459
 * [ WFC: Entity Declared ]
7460
 * In a document without any DTD, a document with only an internal DTD
7461
 * subset which contains no parameter entity references, or a document
7462
 * with "standalone='yes'", the Name given in the entity reference
7463
 * must match that in an entity declaration, except that well-formed
7464
 * documents need not declare any of the following entities: amp, lt,
7465
 * gt, apos, quot.  The declaration of a parameter entity must precede
7466
 * any reference to it.  Similarly, the declaration of a general entity
7467
 * must precede any reference to it which appears in a default value in an
7468
 * attribute-list declaration. Note that if entities are declared in the
7469
 * external subset or in external parameter entities, a non-validating
7470
 * processor is not obligated to read and process their declarations;
7471
 * for such documents, the rule that an entity must be declared is a
7472
 * well-formedness constraint only if standalone='yes'.
7473
 *
7474
 * [ WFC: Parsed Entity ]
7475
 * An entity reference must not contain the name of an unparsed entity
7476
 *
7477
 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7478
 * is updated to the current location in the string.
7479
 */
7480
static xmlEntityPtr
7481
79.2k
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7482
79.2k
    xmlChar *name;
7483
79.2k
    const xmlChar *ptr;
7484
79.2k
    xmlChar cur;
7485
79.2k
    xmlEntityPtr ent = NULL;
7486
7487
79.2k
    if ((str == NULL) || (*str == NULL))
7488
0
        return(NULL);
7489
79.2k
    ptr = *str;
7490
79.2k
    cur = *ptr;
7491
79.2k
    if (cur != '&')
7492
0
  return(NULL);
7493
7494
79.2k
    ptr++;
7495
79.2k
    name = xmlParseStringName(ctxt, &ptr);
7496
79.2k
    if (name == NULL) {
7497
916
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7498
916
           "xmlParseStringEntityRef: no name\n");
7499
916
  *str = ptr;
7500
916
  return(NULL);
7501
916
    }
7502
78.3k
    if (*ptr != ';') {
7503
6.51k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7504
6.51k
        xmlFree(name);
7505
6.51k
  *str = ptr;
7506
6.51k
  return(NULL);
7507
6.51k
    }
7508
71.8k
    ptr++;
7509
7510
7511
    /*
7512
     * Predefined entities override any extra definition
7513
     */
7514
71.8k
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7515
71.8k
        ent = xmlGetPredefinedEntity(name);
7516
71.8k
        if (ent != NULL) {
7517
34.6k
            xmlFree(name);
7518
34.6k
            *str = ptr;
7519
34.6k
            return(ent);
7520
34.6k
        }
7521
71.8k
    }
7522
7523
    /*
7524
     * Ask first SAX for entity resolution, otherwise try the
7525
     * entities which may have stored in the parser context.
7526
     */
7527
37.1k
    if (ctxt->sax != NULL) {
7528
37.1k
  if (ctxt->sax->getEntity != NULL)
7529
37.1k
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7530
37.1k
  if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7531
0
      ent = xmlGetPredefinedEntity(name);
7532
37.1k
  if ((ent == NULL) && (ctxt->userData==ctxt)) {
7533
16.3k
      ent = xmlSAX2GetEntity(ctxt, name);
7534
16.3k
  }
7535
37.1k
    }
7536
37.1k
    if (ctxt->instate == XML_PARSER_EOF) {
7537
0
  xmlFree(name);
7538
0
  return(NULL);
7539
0
    }
7540
7541
    /*
7542
     * [ WFC: Entity Declared ]
7543
     * In a document without any DTD, a document with only an
7544
     * internal DTD subset which contains no parameter entity
7545
     * references, or a document with "standalone='yes'", the
7546
     * Name given in the entity reference must match that in an
7547
     * entity declaration, except that well-formed documents
7548
     * need not declare any of the following entities: amp, lt,
7549
     * gt, apos, quot.
7550
     * The declaration of a parameter entity must precede any
7551
     * reference to it.
7552
     * Similarly, the declaration of a general entity must
7553
     * precede any reference to it which appears in a default
7554
     * value in an attribute-list declaration. Note that if
7555
     * entities are declared in the external subset or in
7556
     * external parameter entities, a non-validating processor
7557
     * is not obligated to read and process their declarations;
7558
     * for such documents, the rule that an entity must be
7559
     * declared is a well-formedness constraint only if
7560
     * standalone='yes'.
7561
     */
7562
37.1k
    if (ent == NULL) {
7563
16.3k
  if ((ctxt->standalone == 1) ||
7564
16.3k
      ((ctxt->hasExternalSubset == 0) &&
7565
16.1k
       (ctxt->hasPErefs == 0))) {
7566
16.0k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7567
16.0k
         "Entity '%s' not defined\n", name);
7568
16.0k
  } else {
7569
380
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7570
380
        "Entity '%s' not defined\n",
7571
380
        name);
7572
380
  }
7573
  /* TODO ? check regressions ctxt->valid = 0; */
7574
16.3k
    }
7575
7576
    /*
7577
     * [ WFC: Parsed Entity ]
7578
     * An entity reference must not contain the name of an
7579
     * unparsed entity
7580
     */
7581
20.7k
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7582
198
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7583
198
     "Entity reference to unparsed entity %s\n", name);
7584
198
    }
7585
7586
    /*
7587
     * [ WFC: No External Entity References ]
7588
     * Attribute values cannot contain direct or indirect
7589
     * entity references to external entities.
7590
     */
7591
20.5k
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7592
20.5k
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7593
289
  xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7594
289
   "Attribute references external entity '%s'\n", name);
7595
289
    }
7596
    /*
7597
     * [ WFC: No < in Attribute Values ]
7598
     * The replacement text of any entity referred to directly or
7599
     * indirectly in an attribute value (other than "&lt;") must
7600
     * not contain a <.
7601
     */
7602
20.3k
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7603
20.3k
       (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7604
20.3k
  if ((ent->flags & XML_ENT_CHECKED_LT) == 0) {
7605
1.47k
            if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7606
425
                ent->flags |= XML_ENT_CONTAINS_LT;
7607
1.47k
            ent->flags |= XML_ENT_CHECKED_LT;
7608
1.47k
        }
7609
20.3k
        if (ent->flags & XML_ENT_CONTAINS_LT)
7610
13.0k
            xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7611
13.0k
                    "'<' in entity '%s' is not allowed in attributes "
7612
13.0k
                    "values\n", name);
7613
20.3k
    }
7614
7615
    /*
7616
     * Internal check, no parameter entities here ...
7617
     */
7618
0
    else {
7619
0
  switch (ent->etype) {
7620
0
      case XML_INTERNAL_PARAMETER_ENTITY:
7621
0
      case XML_EXTERNAL_PARAMETER_ENTITY:
7622
0
    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7623
0
       "Attempt to reference the parameter entity '%s'\n",
7624
0
          name);
7625
0
      break;
7626
0
      default:
7627
0
      break;
7628
0
  }
7629
0
    }
7630
7631
    /*
7632
     * [ WFC: No Recursion ]
7633
     * A parsed entity must not contain a recursive reference
7634
     * to itself, either directly or indirectly.
7635
     * Done somewhere else
7636
     */
7637
7638
37.1k
    xmlFree(name);
7639
37.1k
    *str = ptr;
7640
37.1k
    return(ent);
7641
37.1k
}
7642
7643
/**
7644
 * xmlParsePEReference:
7645
 * @ctxt:  an XML parser context
7646
 *
7647
 * DEPRECATED: Internal function, don't use.
7648
 *
7649
 * Parse a parameter entity reference. Always consumes '%'.
7650
 *
7651
 * The entity content is handled directly by pushing it's content as
7652
 * a new input stream.
7653
 *
7654
 * [69] PEReference ::= '%' Name ';'
7655
 *
7656
 * [ WFC: No Recursion ]
7657
 * A parsed entity must not contain a recursive
7658
 * reference to itself, either directly or indirectly.
7659
 *
7660
 * [ WFC: Entity Declared ]
7661
 * In a document without any DTD, a document with only an internal DTD
7662
 * subset which contains no parameter entity references, or a document
7663
 * with "standalone='yes'", ...  ... The declaration of a parameter
7664
 * entity must precede any reference to it...
7665
 *
7666
 * [ VC: Entity Declared ]
7667
 * In a document with an external subset or external parameter entities
7668
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7669
 * must precede any reference to it...
7670
 *
7671
 * [ WFC: In DTD ]
7672
 * Parameter-entity references may only appear in the DTD.
7673
 * NOTE: misleading but this is handled.
7674
 */
7675
void
7676
xmlParsePEReference(xmlParserCtxtPtr ctxt)
7677
125k
{
7678
125k
    const xmlChar *name;
7679
125k
    xmlEntityPtr entity = NULL;
7680
125k
    xmlParserInputPtr input;
7681
7682
125k
    if (RAW != '%')
7683
0
        return;
7684
125k
    NEXT;
7685
125k
    name = xmlParseName(ctxt);
7686
125k
    if (name == NULL) {
7687
48.0k
  xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
7688
48.0k
  return;
7689
48.0k
    }
7690
77.0k
    if (xmlParserDebugEntities)
7691
0
  xmlGenericError(xmlGenericErrorContext,
7692
0
    "PEReference: %s\n", name);
7693
77.0k
    if (RAW != ';') {
7694
21.0k
  xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
7695
21.0k
        return;
7696
21.0k
    }
7697
7698
55.9k
    NEXT;
7699
7700
    /*
7701
     * Request the entity from SAX
7702
     */
7703
55.9k
    if ((ctxt->sax != NULL) &&
7704
55.9k
  (ctxt->sax->getParameterEntity != NULL))
7705
55.9k
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7706
55.9k
    if (ctxt->instate == XML_PARSER_EOF)
7707
296
  return;
7708
55.6k
    if (entity == NULL) {
7709
  /*
7710
   * [ WFC: Entity Declared ]
7711
   * In a document without any DTD, a document with only an
7712
   * internal DTD subset which contains no parameter entity
7713
   * references, or a document with "standalone='yes'", ...
7714
   * ... The declaration of a parameter entity must precede
7715
   * any reference to it...
7716
   */
7717
13.5k
  if ((ctxt->standalone == 1) ||
7718
13.5k
      ((ctxt->hasExternalSubset == 0) &&
7719
13.3k
       (ctxt->hasPErefs == 0))) {
7720
1.36k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7721
1.36k
            "PEReference: %%%s; not found\n",
7722
1.36k
            name);
7723
12.1k
  } else {
7724
      /*
7725
       * [ VC: Entity Declared ]
7726
       * In a document with an external subset or external
7727
       * parameter entities with "standalone='no'", ...
7728
       * ... The declaration of a parameter entity must
7729
       * precede any reference to it...
7730
       */
7731
12.1k
            if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
7732
0
                xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
7733
0
                                 "PEReference: %%%s; not found\n",
7734
0
                                 name, NULL);
7735
0
            } else
7736
12.1k
                xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7737
12.1k
                              "PEReference: %%%s; not found\n",
7738
12.1k
                              name, NULL);
7739
12.1k
            ctxt->valid = 0;
7740
12.1k
  }
7741
42.1k
    } else {
7742
  /*
7743
   * Internal checking in case the entity quest barfed
7744
   */
7745
42.1k
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7746
42.1k
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7747
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7748
0
      "Internal: %%%s; is not a parameter entity\n",
7749
0
        name, NULL);
7750
42.1k
  } else {
7751
42.1k
            xmlChar start[4];
7752
42.1k
            xmlCharEncoding enc;
7753
42.1k
            unsigned long parentConsumed;
7754
42.1k
            xmlEntityPtr oldEnt;
7755
7756
42.1k
      if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7757
42.1k
          ((ctxt->options & XML_PARSE_NOENT) == 0) &&
7758
42.1k
    ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
7759
42.1k
    ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
7760
42.1k
    ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
7761
42.1k
    (ctxt->replaceEntities == 0) &&
7762
42.1k
    (ctxt->validate == 0))
7763
0
    return;
7764
7765
42.1k
            if (entity->flags & XML_ENT_EXPANDING) {
7766
214
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7767
214
                xmlHaltParser(ctxt);
7768
214
                return;
7769
214
            }
7770
7771
            /* Must be computed from old input before pushing new input. */
7772
41.9k
            parentConsumed = ctxt->input->parentConsumed;
7773
41.9k
            oldEnt = ctxt->input->entity;
7774
41.9k
            if ((oldEnt == NULL) ||
7775
41.9k
                ((oldEnt->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7776
41.9k
                 ((oldEnt->flags & XML_ENT_PARSED) == 0))) {
7777
41.9k
                xmlSaturatedAdd(&parentConsumed, ctxt->input->consumed);
7778
41.9k
                xmlSaturatedAddSizeT(&parentConsumed,
7779
41.9k
                                     ctxt->input->cur - ctxt->input->base);
7780
41.9k
            }
7781
7782
41.9k
      input = xmlNewEntityInputStream(ctxt, entity);
7783
41.9k
      if (xmlPushInput(ctxt, input) < 0) {
7784
7.05k
                xmlFreeInputStream(input);
7785
7.05k
    return;
7786
7.05k
            }
7787
7788
34.8k
            entity->flags |= XML_ENT_EXPANDING;
7789
7790
34.8k
            input->parentConsumed = parentConsumed;
7791
7792
34.8k
      if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
7793
                /*
7794
                 * Get the 4 first bytes and decode the charset
7795
                 * if enc != XML_CHAR_ENCODING_NONE
7796
                 * plug some encoding conversion routines.
7797
                 * Note that, since we may have some non-UTF8
7798
                 * encoding (like UTF16, bug 135229), the 'length'
7799
                 * is not known, but we can calculate based upon
7800
                 * the amount of data in the buffer.
7801
                 */
7802
17.4k
                GROW
7803
17.4k
                if (ctxt->instate == XML_PARSER_EOF)
7804
0
                    return;
7805
17.4k
                if ((ctxt->input->end - ctxt->input->cur)>=4) {
7806
17.1k
                    start[0] = RAW;
7807
17.1k
                    start[1] = NXT(1);
7808
17.1k
                    start[2] = NXT(2);
7809
17.1k
                    start[3] = NXT(3);
7810
17.1k
                    enc = xmlDetectCharEncoding(start, 4);
7811
17.1k
                    if (enc != XML_CHAR_ENCODING_NONE) {
7812
13.6k
                        xmlSwitchEncoding(ctxt, enc);
7813
13.6k
                    }
7814
17.1k
                }
7815
7816
17.4k
                if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7817
17.4k
                    (IS_BLANK_CH(NXT(5)))) {
7818
6.64k
                    xmlParseTextDecl(ctxt);
7819
6.64k
                }
7820
17.4k
            }
7821
34.8k
  }
7822
42.1k
    }
7823
48.4k
    ctxt->hasPErefs = 1;
7824
48.4k
}
7825
7826
/**
7827
 * xmlLoadEntityContent:
7828
 * @ctxt:  an XML parser context
7829
 * @entity: an unloaded system entity
7830
 *
7831
 * Load the original content of the given system entity from the
7832
 * ExternalID/SystemID given. This is to be used for Included in Literal
7833
 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
7834
 *
7835
 * Returns 0 in case of success and -1 in case of failure
7836
 */
7837
static int
7838
82
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7839
82
    xmlParserInputPtr input;
7840
82
    xmlBufferPtr buf;
7841
82
    int l, c;
7842
7843
82
    if ((ctxt == NULL) || (entity == NULL) ||
7844
82
        ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7845
82
   (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7846
82
  (entity->content != NULL)) {
7847
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7848
0
              "xmlLoadEntityContent parameter error");
7849
0
        return(-1);
7850
0
    }
7851
7852
82
    if (xmlParserDebugEntities)
7853
0
  xmlGenericError(xmlGenericErrorContext,
7854
0
    "Reading %s entity content input\n", entity->name);
7855
7856
82
    buf = xmlBufferCreate();
7857
82
    if (buf == NULL) {
7858
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7859
0
              "xmlLoadEntityContent parameter error");
7860
0
        return(-1);
7861
0
    }
7862
82
    xmlBufferSetAllocationScheme(buf, XML_BUFFER_ALLOC_DOUBLEIT);
7863
7864
82
    input = xmlNewEntityInputStream(ctxt, entity);
7865
82
    if (input == NULL) {
7866
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7867
0
              "xmlLoadEntityContent input error");
7868
0
  xmlBufferFree(buf);
7869
0
        return(-1);
7870
0
    }
7871
7872
    /*
7873
     * Push the entity as the current input, read char by char
7874
     * saving to the buffer until the end of the entity or an error
7875
     */
7876
82
    if (xmlPushInput(ctxt, input) < 0) {
7877
0
        xmlBufferFree(buf);
7878
0
  xmlFreeInputStream(input);
7879
0
  return(-1);
7880
0
    }
7881
7882
82
    GROW;
7883
82
    c = CUR_CHAR(l);
7884
300k
    while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
7885
300k
           (IS_CHAR(c))) {
7886
300k
        xmlBufferAdd(buf, ctxt->input->cur, l);
7887
300k
  NEXTL(l);
7888
300k
  c = CUR_CHAR(l);
7889
300k
    }
7890
82
    if (ctxt->instate == XML_PARSER_EOF) {
7891
0
  xmlBufferFree(buf);
7892
0
  return(-1);
7893
0
    }
7894
7895
82
    if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
7896
56
        xmlSaturatedAdd(&ctxt->sizeentities, ctxt->input->consumed);
7897
56
        xmlPopInput(ctxt);
7898
56
    } else if (!IS_CHAR(c)) {
7899
26
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
7900
26
                          "xmlLoadEntityContent: invalid char value %d\n",
7901
26
                    c);
7902
26
  xmlBufferFree(buf);
7903
26
  return(-1);
7904
26
    }
7905
56
    entity->content = buf->content;
7906
56
    entity->length = buf->use;
7907
56
    buf->content = NULL;
7908
56
    xmlBufferFree(buf);
7909
7910
56
    return(0);
7911
82
}
7912
7913
/**
7914
 * xmlParseStringPEReference:
7915
 * @ctxt:  an XML parser context
7916
 * @str:  a pointer to an index in the string
7917
 *
7918
 * parse PEReference declarations
7919
 *
7920
 * [69] PEReference ::= '%' Name ';'
7921
 *
7922
 * [ WFC: No Recursion ]
7923
 * A parsed entity must not contain a recursive
7924
 * reference to itself, either directly or indirectly.
7925
 *
7926
 * [ WFC: Entity Declared ]
7927
 * In a document without any DTD, a document with only an internal DTD
7928
 * subset which contains no parameter entity references, or a document
7929
 * with "standalone='yes'", ...  ... The declaration of a parameter
7930
 * entity must precede any reference to it...
7931
 *
7932
 * [ VC: Entity Declared ]
7933
 * In a document with an external subset or external parameter entities
7934
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7935
 * must precede any reference to it...
7936
 *
7937
 * [ WFC: In DTD ]
7938
 * Parameter-entity references may only appear in the DTD.
7939
 * NOTE: misleading but this is handled.
7940
 *
7941
 * Returns the string of the entity content.
7942
 *         str is updated to the current value of the index
7943
 */
7944
static xmlEntityPtr
7945
4.09k
xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7946
4.09k
    const xmlChar *ptr;
7947
4.09k
    xmlChar cur;
7948
4.09k
    xmlChar *name;
7949
4.09k
    xmlEntityPtr entity = NULL;
7950
7951
4.09k
    if ((str == NULL) || (*str == NULL)) return(NULL);
7952
4.09k
    ptr = *str;
7953
4.09k
    cur = *ptr;
7954
4.09k
    if (cur != '%')
7955
0
        return(NULL);
7956
4.09k
    ptr++;
7957
4.09k
    name = xmlParseStringName(ctxt, &ptr);
7958
4.09k
    if (name == NULL) {
7959
315
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7960
315
           "xmlParseStringPEReference: no name\n");
7961
315
  *str = ptr;
7962
315
  return(NULL);
7963
315
    }
7964
3.77k
    cur = *ptr;
7965
3.77k
    if (cur != ';') {
7966
32
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7967
32
  xmlFree(name);
7968
32
  *str = ptr;
7969
32
  return(NULL);
7970
32
    }
7971
3.74k
    ptr++;
7972
7973
    /*
7974
     * Request the entity from SAX
7975
     */
7976
3.74k
    if ((ctxt->sax != NULL) &&
7977
3.74k
  (ctxt->sax->getParameterEntity != NULL))
7978
3.74k
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7979
3.74k
    if (ctxt->instate == XML_PARSER_EOF) {
7980
0
  xmlFree(name);
7981
0
  *str = ptr;
7982
0
  return(NULL);
7983
0
    }
7984
3.74k
    if (entity == NULL) {
7985
  /*
7986
   * [ WFC: Entity Declared ]
7987
   * In a document without any DTD, a document with only an
7988
   * internal DTD subset which contains no parameter entity
7989
   * references, or a document with "standalone='yes'", ...
7990
   * ... The declaration of a parameter entity must precede
7991
   * any reference to it...
7992
   */
7993
1.72k
  if ((ctxt->standalone == 1) ||
7994
1.72k
      ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
7995
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7996
0
     "PEReference: %%%s; not found\n", name);
7997
1.72k
  } else {
7998
      /*
7999
       * [ VC: Entity Declared ]
8000
       * In a document with an external subset or external
8001
       * parameter entities with "standalone='no'", ...
8002
       * ... The declaration of a parameter entity must
8003
       * precede any reference to it...
8004
       */
8005
1.72k
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8006
1.72k
        "PEReference: %%%s; not found\n",
8007
1.72k
        name, NULL);
8008
1.72k
      ctxt->valid = 0;
8009
1.72k
  }
8010
2.02k
    } else {
8011
  /*
8012
   * Internal checking in case the entity quest barfed
8013
   */
8014
2.02k
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8015
2.02k
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8016
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8017
0
        "%%%s; is not a parameter entity\n",
8018
0
        name, NULL);
8019
0
  }
8020
2.02k
    }
8021
3.74k
    ctxt->hasPErefs = 1;
8022
3.74k
    xmlFree(name);
8023
3.74k
    *str = ptr;
8024
3.74k
    return(entity);
8025
3.74k
}
8026
8027
/**
8028
 * xmlParseDocTypeDecl:
8029
 * @ctxt:  an XML parser context
8030
 *
8031
 * DEPRECATED: Internal function, don't use.
8032
 *
8033
 * parse a DOCTYPE declaration
8034
 *
8035
 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8036
 *                      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8037
 *
8038
 * [ VC: Root Element Type ]
8039
 * The Name in the document type declaration must match the element
8040
 * type of the root element.
8041
 */
8042
8043
void
8044
76.8k
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8045
76.8k
    const xmlChar *name = NULL;
8046
76.8k
    xmlChar *ExternalID = NULL;
8047
76.8k
    xmlChar *URI = NULL;
8048
8049
    /*
8050
     * We know that '<!DOCTYPE' has been detected.
8051
     */
8052
76.8k
    SKIP(9);
8053
8054
76.8k
    SKIP_BLANKS;
8055
8056
    /*
8057
     * Parse the DOCTYPE name.
8058
     */
8059
76.8k
    name = xmlParseName(ctxt);
8060
76.8k
    if (name == NULL) {
8061
3.14k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8062
3.14k
           "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8063
3.14k
    }
8064
76.8k
    ctxt->intSubName = name;
8065
8066
76.8k
    SKIP_BLANKS;
8067
8068
    /*
8069
     * Check for SystemID and ExternalID
8070
     */
8071
76.8k
    URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8072
8073
76.8k
    if ((URI != NULL) || (ExternalID != NULL)) {
8074
13.5k
        ctxt->hasExternalSubset = 1;
8075
13.5k
    }
8076
76.8k
    ctxt->extSubURI = URI;
8077
76.8k
    ctxt->extSubSystem = ExternalID;
8078
8079
76.8k
    SKIP_BLANKS;
8080
8081
    /*
8082
     * Create and update the internal subset.
8083
     */
8084
76.8k
    if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8085
76.8k
  (!ctxt->disableSAX))
8086
59.0k
  ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8087
76.8k
    if (ctxt->instate == XML_PARSER_EOF)
8088
762
  return;
8089
8090
    /*
8091
     * Is there any internal subset declarations ?
8092
     * they are handled separately in xmlParseInternalSubset()
8093
     */
8094
76.1k
    if (RAW == '[')
8095
61.5k
  return;
8096
8097
    /*
8098
     * We should be at the end of the DOCTYPE declaration.
8099
     */
8100
14.6k
    if (RAW != '>') {
8101
5.90k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8102
5.90k
    }
8103
14.6k
    NEXT;
8104
14.6k
}
8105
8106
/**
8107
 * xmlParseInternalSubset:
8108
 * @ctxt:  an XML parser context
8109
 *
8110
 * parse the internal subset declaration
8111
 *
8112
 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8113
 */
8114
8115
static void
8116
62.7k
xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8117
    /*
8118
     * Is there any DTD definition ?
8119
     */
8120
62.7k
    if (RAW == '[') {
8121
62.7k
        int baseInputNr = ctxt->inputNr;
8122
62.7k
        ctxt->instate = XML_PARSER_DTD;
8123
62.7k
        NEXT;
8124
  /*
8125
   * Parse the succession of Markup declarations and
8126
   * PEReferences.
8127
   * Subsequence (markupdecl | PEReference | S)*
8128
   */
8129
62.7k
  SKIP_BLANKS;
8130
296k
  while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) &&
8131
296k
               (ctxt->instate != XML_PARSER_EOF)) {
8132
8133
            /*
8134
             * Conditional sections are allowed from external entities included
8135
             * by PE References in the internal subset.
8136
             */
8137
270k
            if ((ctxt->inputNr > 1) && (ctxt->input->filename != NULL) &&
8138
270k
                (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8139
977
                xmlParseConditionalSections(ctxt);
8140
269k
            } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
8141
183k
          xmlParseMarkupDecl(ctxt);
8142
183k
            } else if (RAW == '%') {
8143
50.0k
          xmlParsePEReference(ctxt);
8144
50.0k
            } else {
8145
36.2k
    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8146
36.2k
                        "xmlParseInternalSubset: error detected in"
8147
36.2k
                        " Markup declaration\n");
8148
36.2k
                xmlHaltParser(ctxt);
8149
36.2k
                return;
8150
36.2k
            }
8151
234k
      SKIP_BLANKS;
8152
234k
            SHRINK;
8153
234k
            GROW;
8154
234k
  }
8155
26.5k
  if (RAW == ']') {
8156
16.1k
      NEXT;
8157
16.1k
      SKIP_BLANKS;
8158
16.1k
  }
8159
26.5k
    }
8160
8161
    /*
8162
     * We should be at the end of the DOCTYPE declaration.
8163
     */
8164
26.5k
    if (RAW != '>') {
8165
11.3k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8166
11.3k
  return;
8167
11.3k
    }
8168
15.1k
    NEXT;
8169
15.1k
}
8170
8171
#ifdef LIBXML_SAX1_ENABLED
8172
/**
8173
 * xmlParseAttribute:
8174
 * @ctxt:  an XML parser context
8175
 * @value:  a xmlChar ** used to store the value of the attribute
8176
 *
8177
 * DEPRECATED: Internal function, don't use.
8178
 *
8179
 * parse an attribute
8180
 *
8181
 * [41] Attribute ::= Name Eq AttValue
8182
 *
8183
 * [ WFC: No External Entity References ]
8184
 * Attribute values cannot contain direct or indirect entity references
8185
 * to external entities.
8186
 *
8187
 * [ WFC: No < in Attribute Values ]
8188
 * The replacement text of any entity referred to directly or indirectly in
8189
 * an attribute value (other than "&lt;") must not contain a <.
8190
 *
8191
 * [ VC: Attribute Value Type ]
8192
 * The attribute must have been declared; the value must be of the type
8193
 * declared for it.
8194
 *
8195
 * [25] Eq ::= S? '=' S?
8196
 *
8197
 * With namespace:
8198
 *
8199
 * [NS 11] Attribute ::= QName Eq AttValue
8200
 *
8201
 * Also the case QName == xmlns:??? is handled independently as a namespace
8202
 * definition.
8203
 *
8204
 * Returns the attribute name, and the value in *value.
8205
 */
8206
8207
const xmlChar *
8208
xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8209
    const xmlChar *name;
8210
    xmlChar *val;
8211
8212
    *value = NULL;
8213
    GROW;
8214
    name = xmlParseName(ctxt);
8215
    if (name == NULL) {
8216
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8217
                 "error parsing attribute name\n");
8218
        return(NULL);
8219
    }
8220
8221
    /*
8222
     * read the value
8223
     */
8224
    SKIP_BLANKS;
8225
    if (RAW == '=') {
8226
        NEXT;
8227
  SKIP_BLANKS;
8228
  val = xmlParseAttValue(ctxt);
8229
  ctxt->instate = XML_PARSER_CONTENT;
8230
    } else {
8231
  xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8232
         "Specification mandates value for attribute %s\n", name);
8233
  return(name);
8234
    }
8235
8236
    /*
8237
     * Check that xml:lang conforms to the specification
8238
     * No more registered as an error, just generate a warning now
8239
     * since this was deprecated in XML second edition
8240
     */
8241
    if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8242
  if (!xmlCheckLanguageID(val)) {
8243
      xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8244
              "Malformed value for xml:lang : %s\n",
8245
        val, NULL);
8246
  }
8247
    }
8248
8249
    /*
8250
     * Check that xml:space conforms to the specification
8251
     */
8252
    if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8253
  if (xmlStrEqual(val, BAD_CAST "default"))
8254
      *(ctxt->space) = 0;
8255
  else if (xmlStrEqual(val, BAD_CAST "preserve"))
8256
      *(ctxt->space) = 1;
8257
  else {
8258
    xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8259
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8260
                                 val, NULL);
8261
  }
8262
    }
8263
8264
    *value = val;
8265
    return(name);
8266
}
8267
8268
/**
8269
 * xmlParseStartTag:
8270
 * @ctxt:  an XML parser context
8271
 *
8272
 * DEPRECATED: Internal function, don't use.
8273
 *
8274
 * Parse a start tag. Always consumes '<'.
8275
 *
8276
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8277
 *
8278
 * [ WFC: Unique Att Spec ]
8279
 * No attribute name may appear more than once in the same start-tag or
8280
 * empty-element tag.
8281
 *
8282
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8283
 *
8284
 * [ WFC: Unique Att Spec ]
8285
 * No attribute name may appear more than once in the same start-tag or
8286
 * empty-element tag.
8287
 *
8288
 * With namespace:
8289
 *
8290
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8291
 *
8292
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8293
 *
8294
 * Returns the element name parsed
8295
 */
8296
8297
const xmlChar *
8298
xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8299
    const xmlChar *name;
8300
    const xmlChar *attname;
8301
    xmlChar *attvalue;
8302
    const xmlChar **atts = ctxt->atts;
8303
    int nbatts = 0;
8304
    int maxatts = ctxt->maxatts;
8305
    int i;
8306
8307
    if (RAW != '<') return(NULL);
8308
    NEXT1;
8309
8310
    name = xmlParseName(ctxt);
8311
    if (name == NULL) {
8312
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8313
       "xmlParseStartTag: invalid element name\n");
8314
        return(NULL);
8315
    }
8316
8317
    /*
8318
     * Now parse the attributes, it ends up with the ending
8319
     *
8320
     * (S Attribute)* S?
8321
     */
8322
    SKIP_BLANKS;
8323
    GROW;
8324
8325
    while (((RAW != '>') &&
8326
     ((RAW != '/') || (NXT(1) != '>')) &&
8327
     (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8328
  attname = xmlParseAttribute(ctxt, &attvalue);
8329
        if (attname == NULL) {
8330
      xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8331
         "xmlParseStartTag: problem parsing attributes\n");
8332
      break;
8333
  }
8334
        if (attvalue != NULL) {
8335
      /*
8336
       * [ WFC: Unique Att Spec ]
8337
       * No attribute name may appear more than once in the same
8338
       * start-tag or empty-element tag.
8339
       */
8340
      for (i = 0; i < nbatts;i += 2) {
8341
          if (xmlStrEqual(atts[i], attname)) {
8342
        xmlErrAttributeDup(ctxt, NULL, attname);
8343
        xmlFree(attvalue);
8344
        goto failed;
8345
    }
8346
      }
8347
      /*
8348
       * Add the pair to atts
8349
       */
8350
      if (atts == NULL) {
8351
          maxatts = 22; /* allow for 10 attrs by default */
8352
          atts = (const xmlChar **)
8353
           xmlMalloc(maxatts * sizeof(xmlChar *));
8354
    if (atts == NULL) {
8355
        xmlErrMemory(ctxt, NULL);
8356
        if (attvalue != NULL)
8357
      xmlFree(attvalue);
8358
        goto failed;
8359
    }
8360
    ctxt->atts = atts;
8361
    ctxt->maxatts = maxatts;
8362
      } else if (nbatts + 4 > maxatts) {
8363
          const xmlChar **n;
8364
8365
          maxatts *= 2;
8366
          n = (const xmlChar **) xmlRealloc((void *) atts,
8367
               maxatts * sizeof(const xmlChar *));
8368
    if (n == NULL) {
8369
        xmlErrMemory(ctxt, NULL);
8370
        if (attvalue != NULL)
8371
      xmlFree(attvalue);
8372
        goto failed;
8373
    }
8374
    atts = n;
8375
    ctxt->atts = atts;
8376
    ctxt->maxatts = maxatts;
8377
      }
8378
      atts[nbatts++] = attname;
8379
      atts[nbatts++] = attvalue;
8380
      atts[nbatts] = NULL;
8381
      atts[nbatts + 1] = NULL;
8382
  } else {
8383
      if (attvalue != NULL)
8384
    xmlFree(attvalue);
8385
  }
8386
8387
failed:
8388
8389
  GROW
8390
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8391
      break;
8392
  if (SKIP_BLANKS == 0) {
8393
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8394
         "attributes construct error\n");
8395
  }
8396
  SHRINK;
8397
        GROW;
8398
    }
8399
8400
    /*
8401
     * SAX: Start of Element !
8402
     */
8403
    if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8404
  (!ctxt->disableSAX)) {
8405
  if (nbatts > 0)
8406
      ctxt->sax->startElement(ctxt->userData, name, atts);
8407
  else
8408
      ctxt->sax->startElement(ctxt->userData, name, NULL);
8409
    }
8410
8411
    if (atts != NULL) {
8412
        /* Free only the content strings */
8413
        for (i = 1;i < nbatts;i+=2)
8414
      if (atts[i] != NULL)
8415
         xmlFree((xmlChar *) atts[i]);
8416
    }
8417
    return(name);
8418
}
8419
8420
/**
8421
 * xmlParseEndTag1:
8422
 * @ctxt:  an XML parser context
8423
 * @line:  line of the start tag
8424
 * @nsNr:  number of namespaces on the start tag
8425
 *
8426
 * Parse an end tag. Always consumes '</'.
8427
 *
8428
 * [42] ETag ::= '</' Name S? '>'
8429
 *
8430
 * With namespace
8431
 *
8432
 * [NS 9] ETag ::= '</' QName S? '>'
8433
 */
8434
8435
static void
8436
xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8437
    const xmlChar *name;
8438
8439
    GROW;
8440
    if ((RAW != '<') || (NXT(1) != '/')) {
8441
  xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8442
           "xmlParseEndTag: '</' not found\n");
8443
  return;
8444
    }
8445
    SKIP(2);
8446
8447
    name = xmlParseNameAndCompare(ctxt,ctxt->name);
8448
8449
    /*
8450
     * We should definitely be at the ending "S? '>'" part
8451
     */
8452
    GROW;
8453
    SKIP_BLANKS;
8454
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8455
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8456
    } else
8457
  NEXT1;
8458
8459
    /*
8460
     * [ WFC: Element Type Match ]
8461
     * The Name in an element's end-tag must match the element type in the
8462
     * start-tag.
8463
     *
8464
     */
8465
    if (name != (xmlChar*)1) {
8466
        if (name == NULL) name = BAD_CAST "unparsable";
8467
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8468
         "Opening and ending tag mismatch: %s line %d and %s\n",
8469
                    ctxt->name, line, name);
8470
    }
8471
8472
    /*
8473
     * SAX: End of Tag
8474
     */
8475
    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8476
  (!ctxt->disableSAX))
8477
        ctxt->sax->endElement(ctxt->userData, ctxt->name);
8478
8479
    namePop(ctxt);
8480
    spacePop(ctxt);
8481
    return;
8482
}
8483
8484
/**
8485
 * xmlParseEndTag:
8486
 * @ctxt:  an XML parser context
8487
 *
8488
 * DEPRECATED: Internal function, don't use.
8489
 *
8490
 * parse an end of tag
8491
 *
8492
 * [42] ETag ::= '</' Name S? '>'
8493
 *
8494
 * With namespace
8495
 *
8496
 * [NS 9] ETag ::= '</' QName S? '>'
8497
 */
8498
8499
void
8500
xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8501
    xmlParseEndTag1(ctxt, 0);
8502
}
8503
#endif /* LIBXML_SAX1_ENABLED */
8504
8505
/************************************************************************
8506
 *                  *
8507
 *          SAX 2 specific operations       *
8508
 *                  *
8509
 ************************************************************************/
8510
8511
/*
8512
 * xmlGetNamespace:
8513
 * @ctxt:  an XML parser context
8514
 * @prefix:  the prefix to lookup
8515
 *
8516
 * Lookup the namespace name for the @prefix (which ca be NULL)
8517
 * The prefix must come from the @ctxt->dict dictionary
8518
 *
8519
 * Returns the namespace name or NULL if not bound
8520
 */
8521
static const xmlChar *
8522
2.50M
xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8523
2.50M
    int i;
8524
8525
2.50M
    if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8526
3.32M
    for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8527
2.36M
        if (ctxt->nsTab[i] == prefix) {
8528
1.26M
      if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8529
3.49k
          return(NULL);
8530
1.25M
      return(ctxt->nsTab[i + 1]);
8531
1.26M
  }
8532
963k
    return(NULL);
8533
2.22M
}
8534
8535
/**
8536
 * xmlParseQName:
8537
 * @ctxt:  an XML parser context
8538
 * @prefix:  pointer to store the prefix part
8539
 *
8540
 * parse an XML Namespace QName
8541
 *
8542
 * [6]  QName  ::= (Prefix ':')? LocalPart
8543
 * [7]  Prefix  ::= NCName
8544
 * [8]  LocalPart  ::= NCName
8545
 *
8546
 * Returns the Name parsed or NULL
8547
 */
8548
8549
static const xmlChar *
8550
4.82M
xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8551
4.82M
    const xmlChar *l, *p;
8552
8553
4.82M
    GROW;
8554
4.82M
    if (ctxt->instate == XML_PARSER_EOF)
8555
251
        return(NULL);
8556
8557
4.82M
    l = xmlParseNCName(ctxt);
8558
4.82M
    if (l == NULL) {
8559
105k
        if (CUR == ':') {
8560
9.58k
      l = xmlParseName(ctxt);
8561
9.58k
      if (l != NULL) {
8562
9.43k
          xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8563
9.43k
             "Failed to parse QName '%s'\n", l, NULL, NULL);
8564
9.43k
    *prefix = NULL;
8565
9.43k
    return(l);
8566
9.43k
      }
8567
9.58k
  }
8568
95.5k
        return(NULL);
8569
105k
    }
8570
4.71M
    if (CUR == ':') {
8571
2.11M
        NEXT;
8572
2.11M
  p = l;
8573
2.11M
  l = xmlParseNCName(ctxt);
8574
2.11M
  if (l == NULL) {
8575
16.8k
      xmlChar *tmp;
8576
8577
16.8k
            if (ctxt->instate == XML_PARSER_EOF)
8578
467
                return(NULL);
8579
16.3k
            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8580
16.3k
               "Failed to parse QName '%s:'\n", p, NULL, NULL);
8581
16.3k
      l = xmlParseNmtoken(ctxt);
8582
16.3k
      if (l == NULL) {
8583
11.1k
                if (ctxt->instate == XML_PARSER_EOF)
8584
599
                    return(NULL);
8585
10.5k
    tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8586
10.5k
            } else {
8587
5.19k
    tmp = xmlBuildQName(l, p, NULL, 0);
8588
5.19k
    xmlFree((char *)l);
8589
5.19k
      }
8590
15.7k
      p = xmlDictLookup(ctxt->dict, tmp, -1);
8591
15.7k
      if (tmp != NULL) xmlFree(tmp);
8592
15.7k
      *prefix = NULL;
8593
15.7k
      return(p);
8594
16.3k
  }
8595
2.10M
  if (CUR == ':') {
8596
24.3k
      xmlChar *tmp;
8597
8598
24.3k
            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8599
24.3k
               "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8600
24.3k
      NEXT;
8601
24.3k
      tmp = (xmlChar *) xmlParseName(ctxt);
8602
24.3k
      if (tmp != NULL) {
8603
17.2k
          tmp = xmlBuildQName(tmp, l, NULL, 0);
8604
17.2k
    l = xmlDictLookup(ctxt->dict, tmp, -1);
8605
17.2k
    if (tmp != NULL) xmlFree(tmp);
8606
17.2k
    *prefix = p;
8607
17.2k
    return(l);
8608
17.2k
      }
8609
7.01k
            if (ctxt->instate == XML_PARSER_EOF)
8610
229
                return(NULL);
8611
6.78k
      tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8612
6.78k
      l = xmlDictLookup(ctxt->dict, tmp, -1);
8613
6.78k
      if (tmp != NULL) xmlFree(tmp);
8614
6.78k
      *prefix = p;
8615
6.78k
      return(l);
8616
7.01k
  }
8617
2.07M
  *prefix = p;
8618
2.07M
    } else
8619
2.59M
        *prefix = NULL;
8620
4.67M
    return(l);
8621
4.71M
}
8622
8623
/**
8624
 * xmlParseQNameAndCompare:
8625
 * @ctxt:  an XML parser context
8626
 * @name:  the localname
8627
 * @prefix:  the prefix, if any.
8628
 *
8629
 * parse an XML name and compares for match
8630
 * (specialized for endtag parsing)
8631
 *
8632
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8633
 * and the name for mismatch
8634
 */
8635
8636
static const xmlChar *
8637
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8638
446k
                        xmlChar const *prefix) {
8639
446k
    const xmlChar *cmp;
8640
446k
    const xmlChar *in;
8641
446k
    const xmlChar *ret;
8642
446k
    const xmlChar *prefix2;
8643
8644
446k
    if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8645
8646
446k
    GROW;
8647
446k
    in = ctxt->input->cur;
8648
8649
446k
    cmp = prefix;
8650
1.24M
    while (*in != 0 && *in == *cmp) {
8651
798k
  ++in;
8652
798k
  ++cmp;
8653
798k
    }
8654
446k
    if ((*cmp == 0) && (*in == ':')) {
8655
440k
        in++;
8656
440k
  cmp = name;
8657
4.14M
  while (*in != 0 && *in == *cmp) {
8658
3.70M
      ++in;
8659
3.70M
      ++cmp;
8660
3.70M
  }
8661
440k
  if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8662
      /* success */
8663
435k
            ctxt->input->col += in - ctxt->input->cur;
8664
435k
      ctxt->input->cur = in;
8665
435k
      return((const xmlChar*) 1);
8666
435k
  }
8667
440k
    }
8668
    /*
8669
     * all strings coms from the dictionary, equality can be done directly
8670
     */
8671
11.2k
    ret = xmlParseQName (ctxt, &prefix2);
8672
11.2k
    if ((ret == name) && (prefix == prefix2))
8673
471
  return((const xmlChar*) 1);
8674
10.7k
    return ret;
8675
11.2k
}
8676
8677
/**
8678
 * xmlParseAttValueInternal:
8679
 * @ctxt:  an XML parser context
8680
 * @len:  attribute len result
8681
 * @alloc:  whether the attribute was reallocated as a new string
8682
 * @normalize:  if 1 then further non-CDATA normalization must be done
8683
 *
8684
 * parse a value for an attribute.
8685
 * NOTE: if no normalization is needed, the routine will return pointers
8686
 *       directly from the data buffer.
8687
 *
8688
 * 3.3.3 Attribute-Value Normalization:
8689
 * Before the value of an attribute is passed to the application or
8690
 * checked for validity, the XML processor must normalize it as follows:
8691
 * - a character reference is processed by appending the referenced
8692
 *   character to the attribute value
8693
 * - an entity reference is processed by recursively processing the
8694
 *   replacement text of the entity
8695
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8696
 *   appending #x20 to the normalized value, except that only a single
8697
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
8698
 *   parsed entity or the literal entity value of an internal parsed entity
8699
 * - other characters are processed by appending them to the normalized value
8700
 * If the declared value is not CDATA, then the XML processor must further
8701
 * process the normalized attribute value by discarding any leading and
8702
 * trailing space (#x20) characters, and by replacing sequences of space
8703
 * (#x20) characters by a single space (#x20) character.
8704
 * All attributes for which no declaration has been read should be treated
8705
 * by a non-validating parser as if declared CDATA.
8706
 *
8707
 * Returns the AttValue parsed or NULL. The value has to be freed by the
8708
 *     caller if it was copied, this can be detected by val[*len] == 0.
8709
 */
8710
8711
#define GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) \
8712
1.65k
    const xmlChar *oldbase = ctxt->input->base;\
8713
1.65k
    GROW;\
8714
1.65k
    if (ctxt->instate == XML_PARSER_EOF)\
8715
1.65k
        return(NULL);\
8716
1.65k
    if (oldbase != ctxt->input->base) {\
8717
0
        ptrdiff_t delta = ctxt->input->base - oldbase;\
8718
0
        start = start + delta;\
8719
0
        in = in + delta;\
8720
0
    }\
8721
1.65k
    end = ctxt->input->end;
8722
8723
static xmlChar *
8724
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8725
                         int normalize)
8726
2.69M
{
8727
2.69M
    xmlChar limit = 0;
8728
2.69M
    const xmlChar *in = NULL, *start, *end, *last;
8729
2.69M
    xmlChar *ret = NULL;
8730
2.69M
    int line, col;
8731
2.69M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
8732
0
                    XML_MAX_HUGE_LENGTH :
8733
2.69M
                    XML_MAX_TEXT_LENGTH;
8734
8735
2.69M
    GROW;
8736
2.69M
    in = (xmlChar *) CUR_PTR;
8737
2.69M
    line = ctxt->input->line;
8738
2.69M
    col = ctxt->input->col;
8739
2.69M
    if (*in != '"' && *in != '\'') {
8740
9.31k
        xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
8741
9.31k
        return (NULL);
8742
9.31k
    }
8743
2.69M
    ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
8744
8745
    /*
8746
     * try to handle in this routine the most common case where no
8747
     * allocation of a new string is required and where content is
8748
     * pure ASCII.
8749
     */
8750
2.69M
    limit = *in++;
8751
2.69M
    col++;
8752
2.69M
    end = ctxt->input->end;
8753
2.69M
    start = in;
8754
2.69M
    if (in >= end) {
8755
265
        GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
8756
265
    }
8757
2.69M
    if (normalize) {
8758
        /*
8759
   * Skip any leading spaces
8760
   */
8761
22.7k
  while ((in < end) && (*in != limit) &&
8762
22.7k
         ((*in == 0x20) || (*in == 0x9) ||
8763
21.6k
          (*in == 0xA) || (*in == 0xD))) {
8764
15.0k
      if (*in == 0xA) {
8765
13.4k
          line++; col = 1;
8766
13.4k
      } else {
8767
1.60k
          col++;
8768
1.60k
      }
8769
15.0k
      in++;
8770
15.0k
      start = in;
8771
15.0k
      if (in >= end) {
8772
253
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
8773
253
                if ((in - start) > maxLength) {
8774
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8775
0
                                   "AttValue length too long\n");
8776
0
                    return(NULL);
8777
0
                }
8778
253
      }
8779
15.0k
  }
8780
45.1k
  while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8781
45.1k
         (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8782
37.8k
      col++;
8783
37.8k
      if ((*in++ == 0x20) && (*in == 0x20)) break;
8784
37.5k
      if (in >= end) {
8785
282
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
8786
282
                if ((in - start) > maxLength) {
8787
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8788
0
                                   "AttValue length too long\n");
8789
0
                    return(NULL);
8790
0
                }
8791
282
      }
8792
37.5k
  }
8793
7.67k
  last = in;
8794
  /*
8795
   * skip the trailing blanks
8796
   */
8797
8.11k
  while ((last[-1] == 0x20) && (last > start)) last--;
8798
14.5k
  while ((in < end) && (*in != limit) &&
8799
14.5k
         ((*in == 0x20) || (*in == 0x9) ||
8800
11.8k
          (*in == 0xA) || (*in == 0xD))) {
8801
6.91k
      if (*in == 0xA) {
8802
5.24k
          line++, col = 1;
8803
5.24k
      } else {
8804
1.66k
          col++;
8805
1.66k
      }
8806
6.91k
      in++;
8807
6.91k
      if (in >= end) {
8808
239
    const xmlChar *oldbase = ctxt->input->base;
8809
239
    GROW;
8810
239
                if (ctxt->instate == XML_PARSER_EOF)
8811
0
                    return(NULL);
8812
239
    if (oldbase != ctxt->input->base) {
8813
0
        ptrdiff_t delta = ctxt->input->base - oldbase;
8814
0
        start = start + delta;
8815
0
        in = in + delta;
8816
0
        last = last + delta;
8817
0
    }
8818
239
    end = ctxt->input->end;
8819
239
                if ((in - start) > maxLength) {
8820
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8821
0
                                   "AttValue length too long\n");
8822
0
                    return(NULL);
8823
0
                }
8824
239
      }
8825
6.91k
  }
8826
7.67k
        if ((in - start) > maxLength) {
8827
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8828
0
                           "AttValue length too long\n");
8829
0
            return(NULL);
8830
0
        }
8831
7.67k
  if (*in != limit) goto need_complex;
8832
2.68M
    } else {
8833
35.9M
  while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8834
35.9M
         (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8835
33.2M
      in++;
8836
33.2M
      col++;
8837
33.2M
      if (in >= end) {
8838
856
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
8839
856
                if ((in - start) > maxLength) {
8840
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8841
0
                                   "AttValue length too long\n");
8842
0
                    return(NULL);
8843
0
                }
8844
856
      }
8845
33.2M
  }
8846
2.68M
  last = in;
8847
2.68M
        if ((in - start) > maxLength) {
8848
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8849
0
                           "AttValue length too long\n");
8850
0
            return(NULL);
8851
0
        }
8852
2.68M
  if (*in != limit) goto need_complex;
8853
2.68M
    }
8854
2.25M
    in++;
8855
2.25M
    col++;
8856
2.25M
    if (len != NULL) {
8857
2.18M
        if (alloc) *alloc = 0;
8858
2.18M
        *len = last - start;
8859
2.18M
        ret = (xmlChar *) start;
8860
2.18M
    } else {
8861
67.2k
        if (alloc) *alloc = 1;
8862
67.2k
        ret = xmlStrndup(start, last - start);
8863
67.2k
    }
8864
2.25M
    CUR_PTR = in;
8865
2.25M
    ctxt->input->line = line;
8866
2.25M
    ctxt->input->col = col;
8867
2.25M
    return ret;
8868
436k
need_complex:
8869
436k
    if (alloc) *alloc = 1;
8870
436k
    return xmlParseAttValueComplex(ctxt, len, normalize);
8871
2.69M
}
8872
8873
/**
8874
 * xmlParseAttribute2:
8875
 * @ctxt:  an XML parser context
8876
 * @pref:  the element prefix
8877
 * @elem:  the element name
8878
 * @prefix:  a xmlChar ** used to store the value of the attribute prefix
8879
 * @value:  a xmlChar ** used to store the value of the attribute
8880
 * @len:  an int * to save the length of the attribute
8881
 * @alloc:  an int * to indicate if the attribute was allocated
8882
 *
8883
 * parse an attribute in the new SAX2 framework.
8884
 *
8885
 * Returns the attribute name, and the value in *value, .
8886
 */
8887
8888
static const xmlChar *
8889
xmlParseAttribute2(xmlParserCtxtPtr ctxt,
8890
                   const xmlChar * pref, const xmlChar * elem,
8891
                   const xmlChar ** prefix, xmlChar ** value,
8892
                   int *len, int *alloc)
8893
2.67M
{
8894
2.67M
    const xmlChar *name;
8895
2.67M
    xmlChar *val, *internal_val = NULL;
8896
2.67M
    int normalize = 0;
8897
8898
2.67M
    *value = NULL;
8899
2.67M
    GROW;
8900
2.67M
    name = xmlParseQName(ctxt, prefix);
8901
2.67M
    if (name == NULL) {
8902
39.5k
        xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8903
39.5k
                       "error parsing attribute name\n");
8904
39.5k
        return (NULL);
8905
39.5k
    }
8906
8907
    /*
8908
     * get the type if needed
8909
     */
8910
2.63M
    if (ctxt->attsSpecial != NULL) {
8911
16.7k
        int type;
8912
8913
16.7k
        type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
8914
16.7k
                                                 pref, elem, *prefix, name);
8915
16.7k
        if (type != 0)
8916
8.36k
            normalize = 1;
8917
16.7k
    }
8918
8919
    /*
8920
     * read the value
8921
     */
8922
2.63M
    SKIP_BLANKS;
8923
2.63M
    if (RAW == '=') {
8924
2.61M
        NEXT;
8925
2.61M
        SKIP_BLANKS;
8926
2.61M
        val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
8927
2.61M
        if (val == NULL)
8928
3.86k
            return (NULL);
8929
2.60M
  if (normalize) {
8930
      /*
8931
       * Sometimes a second normalisation pass for spaces is needed
8932
       * but that only happens if charrefs or entities references
8933
       * have been used in the attribute value, i.e. the attribute
8934
       * value have been extracted in an allocated string already.
8935
       */
8936
7.67k
      if (*alloc) {
8937
5.73k
          const xmlChar *val2;
8938
8939
5.73k
          val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
8940
5.73k
    if ((val2 != NULL) && (val2 != val)) {
8941
317
        xmlFree(val);
8942
317
        val = (xmlChar *) val2;
8943
317
    }
8944
5.73k
      }
8945
7.67k
  }
8946
2.60M
        ctxt->instate = XML_PARSER_CONTENT;
8947
2.60M
    } else {
8948
21.4k
        xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8949
21.4k
                          "Specification mandates value for attribute %s\n",
8950
21.4k
                          name);
8951
21.4k
        return (name);
8952
21.4k
    }
8953
8954
2.60M
    if (*prefix == ctxt->str_xml) {
8955
        /*
8956
         * Check that xml:lang conforms to the specification
8957
         * No more registered as an error, just generate a warning now
8958
         * since this was deprecated in XML second edition
8959
         */
8960
268k
        if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8961
0
            internal_val = xmlStrndup(val, *len);
8962
0
            if (!xmlCheckLanguageID(internal_val)) {
8963
0
                xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8964
0
                              "Malformed value for xml:lang : %s\n",
8965
0
                              internal_val, NULL);
8966
0
            }
8967
0
        }
8968
8969
        /*
8970
         * Check that xml:space conforms to the specification
8971
         */
8972
268k
        if (xmlStrEqual(name, BAD_CAST "space")) {
8973
1.65k
            internal_val = xmlStrndup(val, *len);
8974
1.65k
            if (xmlStrEqual(internal_val, BAD_CAST "default"))
8975
284
                *(ctxt->space) = 0;
8976
1.37k
            else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8977
729
                *(ctxt->space) = 1;
8978
643
            else {
8979
643
                xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8980
643
                              "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8981
643
                              internal_val, NULL);
8982
643
            }
8983
1.65k
        }
8984
268k
        if (internal_val) {
8985
1.63k
            xmlFree(internal_val);
8986
1.63k
        }
8987
268k
    }
8988
8989
2.60M
    *value = val;
8990
2.60M
    return (name);
8991
2.63M
}
8992
/**
8993
 * xmlParseStartTag2:
8994
 * @ctxt:  an XML parser context
8995
 *
8996
 * Parse a start tag. Always consumes '<'.
8997
 *
8998
 * This routine is called when running SAX2 parsing
8999
 *
9000
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9001
 *
9002
 * [ WFC: Unique Att Spec ]
9003
 * No attribute name may appear more than once in the same start-tag or
9004
 * empty-element tag.
9005
 *
9006
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9007
 *
9008
 * [ WFC: Unique Att Spec ]
9009
 * No attribute name may appear more than once in the same start-tag or
9010
 * empty-element tag.
9011
 *
9012
 * With namespace:
9013
 *
9014
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9015
 *
9016
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9017
 *
9018
 * Returns the element name parsed
9019
 */
9020
9021
static const xmlChar *
9022
xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9023
2.13M
                  const xmlChar **URI, int *tlen) {
9024
2.13M
    const xmlChar *localname;
9025
2.13M
    const xmlChar *prefix;
9026
2.13M
    const xmlChar *attname;
9027
2.13M
    const xmlChar *aprefix;
9028
2.13M
    const xmlChar *nsname;
9029
2.13M
    xmlChar *attvalue;
9030
2.13M
    const xmlChar **atts = ctxt->atts;
9031
2.13M
    int maxatts = ctxt->maxatts;
9032
2.13M
    int nratts, nbatts, nbdef, inputid;
9033
2.13M
    int i, j, nbNs, attval;
9034
2.13M
    size_t cur;
9035
2.13M
    int nsNr = ctxt->nsNr;
9036
9037
2.13M
    if (RAW != '<') return(NULL);
9038
2.13M
    NEXT1;
9039
9040
2.13M
    cur = ctxt->input->cur - ctxt->input->base;
9041
2.13M
    inputid = ctxt->input->id;
9042
2.13M
    nbatts = 0;
9043
2.13M
    nratts = 0;
9044
2.13M
    nbdef = 0;
9045
2.13M
    nbNs = 0;
9046
2.13M
    attval = 0;
9047
    /* Forget any namespaces added during an earlier parse of this element. */
9048
2.13M
    ctxt->nsNr = nsNr;
9049
9050
2.13M
    localname = xmlParseQName(ctxt, &prefix);
9051
2.13M
    if (localname == NULL) {
9052
58.4k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9053
58.4k
           "StartTag: invalid element name\n");
9054
58.4k
        return(NULL);
9055
58.4k
    }
9056
2.07M
    *tlen = ctxt->input->cur - ctxt->input->base - cur;
9057
9058
    /*
9059
     * Now parse the attributes, it ends up with the ending
9060
     *
9061
     * (S Attribute)* S?
9062
     */
9063
2.07M
    SKIP_BLANKS;
9064
2.07M
    GROW;
9065
9066
3.52M
    while (((RAW != '>') &&
9067
3.52M
     ((RAW != '/') || (NXT(1) != '>')) &&
9068
3.52M
     (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9069
2.67M
  int len = -1, alloc = 0;
9070
9071
2.67M
  attname = xmlParseAttribute2(ctxt, prefix, localname,
9072
2.67M
                               &aprefix, &attvalue, &len, &alloc);
9073
2.67M
        if (attname == NULL) {
9074
43.4k
      xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9075
43.4k
           "xmlParseStartTag: problem parsing attributes\n");
9076
43.4k
      break;
9077
43.4k
  }
9078
2.62M
        if (attvalue == NULL)
9079
21.4k
            goto next_attr;
9080
2.60M
  if (len < 0) len = xmlStrlen(attvalue);
9081
9082
2.60M
        if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9083
23.9k
            const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9084
23.9k
            xmlURIPtr uri;
9085
9086
23.9k
            if (URL == NULL) {
9087
4
                xmlErrMemory(ctxt, "dictionary allocation failure");
9088
4
                if ((attvalue != NULL) && (alloc != 0))
9089
1
                    xmlFree(attvalue);
9090
4
                localname = NULL;
9091
4
                goto done;
9092
4
            }
9093
23.9k
            if (*URL != 0) {
9094
22.9k
                uri = xmlParseURI((const char *) URL);
9095
22.9k
                if (uri == NULL) {
9096
11.4k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9097
11.4k
                             "xmlns: '%s' is not a valid URI\n",
9098
11.4k
                                       URL, NULL, NULL);
9099
11.5k
                } else {
9100
11.5k
                    if (uri->scheme == NULL) {
9101
6.52k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9102
6.52k
                                  "xmlns: URI %s is not absolute\n",
9103
6.52k
                                  URL, NULL, NULL);
9104
6.52k
                    }
9105
11.5k
                    xmlFreeURI(uri);
9106
11.5k
                }
9107
22.9k
                if (URL == ctxt->str_xml_ns) {
9108
203
                    if (attname != ctxt->str_xml) {
9109
203
                        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9110
203
                     "xml namespace URI cannot be the default namespace\n",
9111
203
                                 NULL, NULL, NULL);
9112
203
                    }
9113
203
                    goto next_attr;
9114
203
                }
9115
22.7k
                if ((len == 29) &&
9116
22.7k
                    (xmlStrEqual(URL,
9117
629
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9118
233
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9119
233
                         "reuse of the xmlns namespace name is forbidden\n",
9120
233
                             NULL, NULL, NULL);
9121
233
                    goto next_attr;
9122
233
                }
9123
22.7k
            }
9124
            /*
9125
             * check that it's not a defined namespace
9126
             */
9127
26.4k
            for (j = 1;j <= nbNs;j++)
9128
3.80k
                if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9129
820
                    break;
9130
23.5k
            if (j <= nbNs)
9131
820
                xmlErrAttributeDup(ctxt, NULL, attname);
9132
22.6k
            else
9133
22.6k
                if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9134
9135
2.58M
        } else if (aprefix == ctxt->str_xmlns) {
9136
342k
            const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9137
342k
            xmlURIPtr uri;
9138
9139
342k
            if (attname == ctxt->str_xml) {
9140
987
                if (URL != ctxt->str_xml_ns) {
9141
914
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9142
914
                             "xml namespace prefix mapped to wrong URI\n",
9143
914
                             NULL, NULL, NULL);
9144
914
                }
9145
                /*
9146
                 * Do not keep a namespace definition node
9147
                 */
9148
987
                goto next_attr;
9149
987
            }
9150
341k
            if (URL == ctxt->str_xml_ns) {
9151
334
                if (attname != ctxt->str_xml) {
9152
334
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9153
334
                             "xml namespace URI mapped to wrong prefix\n",
9154
334
                             NULL, NULL, NULL);
9155
334
                }
9156
334
                goto next_attr;
9157
334
            }
9158
341k
            if (attname == ctxt->str_xmlns) {
9159
463
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9160
463
                         "redefinition of the xmlns prefix is forbidden\n",
9161
463
                         NULL, NULL, NULL);
9162
463
                goto next_attr;
9163
463
            }
9164
340k
            if ((len == 29) &&
9165
340k
                (xmlStrEqual(URL,
9166
1.68k
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9167
446
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9168
446
                         "reuse of the xmlns namespace name is forbidden\n",
9169
446
                         NULL, NULL, NULL);
9170
446
                goto next_attr;
9171
446
            }
9172
340k
            if ((URL == NULL) || (URL[0] == 0)) {
9173
1.59k
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9174
1.59k
                         "xmlns:%s: Empty XML namespace is not allowed\n",
9175
1.59k
                              attname, NULL, NULL);
9176
1.59k
                goto next_attr;
9177
338k
            } else {
9178
338k
                uri = xmlParseURI((const char *) URL);
9179
338k
                if (uri == NULL) {
9180
15.2k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9181
15.2k
                         "xmlns:%s: '%s' is not a valid URI\n",
9182
15.2k
                                       attname, URL, NULL);
9183
323k
                } else {
9184
323k
                    if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9185
0
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9186
0
                                  "xmlns:%s: URI %s is not absolute\n",
9187
0
                                  attname, URL, NULL);
9188
0
                    }
9189
323k
                    xmlFreeURI(uri);
9190
323k
                }
9191
338k
            }
9192
9193
            /*
9194
             * check that it's not a defined namespace
9195
             */
9196
411k
            for (j = 1;j <= nbNs;j++)
9197
75.5k
                if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9198
2.44k
                    break;
9199
338k
            if (j <= nbNs)
9200
2.44k
                xmlErrAttributeDup(ctxt, aprefix, attname);
9201
336k
            else
9202
336k
                if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9203
9204
2.24M
        } else {
9205
            /*
9206
             * Add the pair to atts
9207
             */
9208
2.24M
            if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9209
207k
                if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9210
49
                    goto next_attr;
9211
49
                }
9212
207k
                maxatts = ctxt->maxatts;
9213
207k
                atts = ctxt->atts;
9214
207k
            }
9215
2.24M
            ctxt->attallocs[nratts++] = alloc;
9216
2.24M
            atts[nbatts++] = attname;
9217
2.24M
            atts[nbatts++] = aprefix;
9218
            /*
9219
             * The namespace URI field is used temporarily to point at the
9220
             * base of the current input buffer for non-alloced attributes.
9221
             * When the input buffer is reallocated, all the pointers become
9222
             * invalid, but they can be reconstructed later.
9223
             */
9224
2.24M
            if (alloc)
9225
389k
                atts[nbatts++] = NULL;
9226
1.85M
            else
9227
1.85M
                atts[nbatts++] = ctxt->input->base;
9228
2.24M
            atts[nbatts++] = attvalue;
9229
2.24M
            attvalue += len;
9230
2.24M
            atts[nbatts++] = attvalue;
9231
            /*
9232
             * tag if some deallocation is needed
9233
             */
9234
2.24M
            if (alloc != 0) attval = 1;
9235
2.24M
            attvalue = NULL; /* moved into atts */
9236
2.24M
        }
9237
9238
2.62M
next_attr:
9239
2.62M
        if ((attvalue != NULL) && (alloc != 0)) {
9240
32.1k
            xmlFree(attvalue);
9241
32.1k
            attvalue = NULL;
9242
32.1k
        }
9243
9244
2.62M
  GROW
9245
2.62M
        if (ctxt->instate == XML_PARSER_EOF)
9246
584
            break;
9247
2.62M
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9248
1.12M
      break;
9249
1.50M
  if (SKIP_BLANKS == 0) {
9250
50.8k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9251
50.8k
         "attributes construct error\n");
9252
50.8k
      break;
9253
50.8k
  }
9254
1.45M
        GROW;
9255
1.45M
    }
9256
9257
2.07M
    if (ctxt->input->id != inputid) {
9258
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9259
0
                    "Unexpected change of input\n");
9260
0
        localname = NULL;
9261
0
        goto done;
9262
0
    }
9263
9264
    /* Reconstruct attribute value pointers. */
9265
4.31M
    for (i = 0, j = 0; j < nratts; i += 5, j++) {
9266
2.24M
        if (atts[i+2] != NULL) {
9267
            /*
9268
             * Arithmetic on dangling pointers is technically undefined
9269
             * behavior, but well...
9270
             */
9271
1.85M
            const xmlChar *old = atts[i+2];
9272
1.85M
            atts[i+2]  = NULL;    /* Reset repurposed namespace URI */
9273
1.85M
            atts[i+3] = ctxt->input->base + (atts[i+3] - old);  /* value */
9274
1.85M
            atts[i+4] = ctxt->input->base + (atts[i+4] - old);  /* valuend */
9275
1.85M
        }
9276
2.24M
    }
9277
9278
    /*
9279
     * The attributes defaulting
9280
     */
9281
2.07M
    if (ctxt->attsDefault != NULL) {
9282
30.2k
        xmlDefAttrsPtr defaults;
9283
9284
30.2k
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9285
30.2k
  if (defaults != NULL) {
9286
64.2k
      for (i = 0;i < defaults->nbAttrs;i++) {
9287
43.5k
          attname = defaults->values[5 * i];
9288
43.5k
    aprefix = defaults->values[5 * i + 1];
9289
9290
                /*
9291
     * special work for namespaces defaulted defs
9292
     */
9293
43.5k
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9294
        /*
9295
         * check that it's not a defined namespace
9296
         */
9297
5.87k
        for (j = 1;j <= nbNs;j++)
9298
2.79k
            if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9299
1.33k
          break;
9300
4.41k
              if (j <= nbNs) continue;
9301
9302
3.07k
        nsname = xmlGetNamespace(ctxt, NULL);
9303
3.07k
        if (nsname != defaults->values[5 * i + 2]) {
9304
2.16k
      if (nsPush(ctxt, NULL,
9305
2.16k
                 defaults->values[5 * i + 2]) > 0)
9306
2.16k
          nbNs++;
9307
2.16k
        }
9308
39.1k
    } else if (aprefix == ctxt->str_xmlns) {
9309
        /*
9310
         * check that it's not a defined namespace
9311
         */
9312
4.90k
        for (j = 1;j <= nbNs;j++)
9313
1.76k
            if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9314
359
          break;
9315
3.50k
              if (j <= nbNs) continue;
9316
9317
3.14k
        nsname = xmlGetNamespace(ctxt, attname);
9318
3.14k
        if (nsname != defaults->values[5 * i + 2]) {
9319
2.51k
      if (nsPush(ctxt, attname,
9320
2.51k
                 defaults->values[5 * i + 2]) > 0)
9321
2.51k
          nbNs++;
9322
2.51k
        }
9323
35.6k
    } else {
9324
        /*
9325
         * check that it's not a defined attribute
9326
         */
9327
96.3k
        for (j = 0;j < nbatts;j+=5) {
9328
65.8k
      if ((attname == atts[j]) && (aprefix == atts[j+1]))
9329
5.18k
          break;
9330
65.8k
        }
9331
35.6k
        if (j < nbatts) continue;
9332
9333
30.4k
        if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9334
1.29k
      if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9335
6
                            localname = NULL;
9336
6
                            goto done;
9337
6
      }
9338
1.29k
      maxatts = ctxt->maxatts;
9339
1.29k
      atts = ctxt->atts;
9340
1.29k
        }
9341
30.4k
        atts[nbatts++] = attname;
9342
30.4k
        atts[nbatts++] = aprefix;
9343
30.4k
        if (aprefix == NULL)
9344
9.03k
      atts[nbatts++] = NULL;
9345
21.4k
        else
9346
21.4k
            atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9347
30.4k
        atts[nbatts++] = defaults->values[5 * i + 2];
9348
30.4k
        atts[nbatts++] = defaults->values[5 * i + 3];
9349
30.4k
        if ((ctxt->standalone == 1) &&
9350
30.4k
            (defaults->values[5 * i + 4] != NULL)) {
9351
0
      xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9352
0
    "standalone: attribute %s on %s defaulted from external subset\n",
9353
0
                                   attname, localname);
9354
0
        }
9355
30.4k
        nbdef++;
9356
30.4k
    }
9357
43.5k
      }
9358
20.6k
  }
9359
30.2k
    }
9360
9361
    /*
9362
     * The attributes checkings
9363
     */
9364
4.34M
    for (i = 0; i < nbatts;i += 5) {
9365
        /*
9366
  * The default namespace does not apply to attribute names.
9367
  */
9368
2.27M
  if (atts[i + 1] != NULL) {
9369
395k
      nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9370
395k
      if (nsname == NULL) {
9371
120k
    xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9372
120k
        "Namespace prefix %s for %s on %s is not defined\n",
9373
120k
        atts[i + 1], atts[i], localname);
9374
120k
      }
9375
395k
      atts[i + 2] = nsname;
9376
395k
  } else
9377
1.87M
      nsname = NULL;
9378
  /*
9379
   * [ WFC: Unique Att Spec ]
9380
   * No attribute name may appear more than once in the same
9381
   * start-tag or empty-element tag.
9382
   * As extended by the Namespace in XML REC.
9383
   */
9384
3.74M
        for (j = 0; j < i;j += 5) {
9385
1.47M
      if (atts[i] == atts[j]) {
9386
9.59k
          if (atts[i+1] == atts[j+1]) {
9387
3.46k
        xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9388
3.46k
        break;
9389
3.46k
    }
9390
6.13k
    if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9391
627
        xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9392
627
           "Namespaced Attribute %s in '%s' redefined\n",
9393
627
           atts[i], nsname, NULL);
9394
627
        break;
9395
627
    }
9396
6.13k
      }
9397
1.47M
  }
9398
2.27M
    }
9399
9400
2.07M
    nsname = xmlGetNamespace(ctxt, prefix);
9401
2.07M
    if ((prefix != NULL) && (nsname == NULL)) {
9402
167k
  xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9403
167k
           "Namespace prefix %s on %s is not defined\n",
9404
167k
     prefix, localname, NULL);
9405
167k
    }
9406
2.07M
    *pref = prefix;
9407
2.07M
    *URI = nsname;
9408
9409
    /*
9410
     * SAX: Start of Element !
9411
     */
9412
2.07M
    if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9413
2.07M
  (!ctxt->disableSAX)) {
9414
1.68M
  if (nbNs > 0)
9415
246k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9416
246k
        nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9417
246k
        nbatts / 5, nbdef, atts);
9418
1.43M
  else
9419
1.43M
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9420
1.43M
                    nsname, 0, NULL, nbatts / 5, nbdef, atts);
9421
1.68M
    }
9422
9423
2.07M
done:
9424
    /*
9425
     * Free up attribute allocated strings if needed
9426
     */
9427
2.07M
    if (attval != 0) {
9428
1.05M
  for (i = 3,j = 0; j < nratts;i += 5,j++)
9429
735k
      if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9430
389k
          xmlFree((xmlChar *) atts[i]);
9431
322k
    }
9432
9433
2.07M
    return(localname);
9434
2.07M
}
9435
9436
/**
9437
 * xmlParseEndTag2:
9438
 * @ctxt:  an XML parser context
9439
 * @line:  line of the start tag
9440
 * @nsNr:  number of namespaces on the start tag
9441
 *
9442
 * Parse an end tag. Always consumes '</'.
9443
 *
9444
 * [42] ETag ::= '</' Name S? '>'
9445
 *
9446
 * With namespace
9447
 *
9448
 * [NS 9] ETag ::= '</' QName S? '>'
9449
 */
9450
9451
static void
9452
606k
xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9453
606k
    const xmlChar *name;
9454
9455
606k
    GROW;
9456
606k
    if ((RAW != '<') || (NXT(1) != '/')) {
9457
229
  xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9458
229
  return;
9459
229
    }
9460
606k
    SKIP(2);
9461
9462
606k
    if (tag->prefix == NULL)
9463
160k
        name = xmlParseNameAndCompare(ctxt, ctxt->name);
9464
446k
    else
9465
446k
        name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9466
9467
    /*
9468
     * We should definitely be at the ending "S? '>'" part
9469
     */
9470
606k
    GROW;
9471
606k
    if (ctxt->instate == XML_PARSER_EOF)
9472
486
        return;
9473
606k
    SKIP_BLANKS;
9474
606k
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9475
9.01k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9476
9.01k
    } else
9477
597k
  NEXT1;
9478
9479
    /*
9480
     * [ WFC: Element Type Match ]
9481
     * The Name in an element's end-tag must match the element type in the
9482
     * start-tag.
9483
     *
9484
     */
9485
606k
    if (name != (xmlChar*)1) {
9486
19.4k
        if (name == NULL) name = BAD_CAST "unparsable";
9487
19.4k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9488
19.4k
         "Opening and ending tag mismatch: %s line %d and %s\n",
9489
19.4k
                    ctxt->name, tag->line, name);
9490
19.4k
    }
9491
9492
    /*
9493
     * SAX: End of Tag
9494
     */
9495
606k
    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9496
606k
  (!ctxt->disableSAX))
9497
545k
  ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9498
545k
                                tag->URI);
9499
9500
606k
    spacePop(ctxt);
9501
606k
    if (tag->nsNr != 0)
9502
177k
  nsPop(ctxt, tag->nsNr);
9503
606k
}
9504
9505
/**
9506
 * xmlParseCDSect:
9507
 * @ctxt:  an XML parser context
9508
 *
9509
 * DEPRECATED: Internal function, don't use.
9510
 *
9511
 * Parse escaped pure raw content. Always consumes '<!['.
9512
 *
9513
 * [18] CDSect ::= CDStart CData CDEnd
9514
 *
9515
 * [19] CDStart ::= '<![CDATA['
9516
 *
9517
 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9518
 *
9519
 * [21] CDEnd ::= ']]>'
9520
 */
9521
void
9522
17.2k
xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9523
17.2k
    xmlChar *buf = NULL;
9524
17.2k
    int len = 0;
9525
17.2k
    int size = XML_PARSER_BUFFER_SIZE;
9526
17.2k
    int r, rl;
9527
17.2k
    int s, sl;
9528
17.2k
    int cur, l;
9529
17.2k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9530
0
                    XML_MAX_HUGE_LENGTH :
9531
17.2k
                    XML_MAX_TEXT_LENGTH;
9532
9533
17.2k
    if ((CUR != '<') || (NXT(1) != '!') || (NXT(2) != '['))
9534
0
        return;
9535
17.2k
    SKIP(3);
9536
9537
17.2k
    if (!CMP6(CUR_PTR, 'C', 'D', 'A', 'T', 'A', '['))
9538
0
        return;
9539
17.2k
    SKIP(6);
9540
9541
17.2k
    ctxt->instate = XML_PARSER_CDATA_SECTION;
9542
17.2k
    r = CUR_CHAR(rl);
9543
17.2k
    if (!IS_CHAR(r)) {
9544
1.39k
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9545
1.39k
        goto out;
9546
1.39k
    }
9547
15.8k
    NEXTL(rl);
9548
15.8k
    s = CUR_CHAR(sl);
9549
15.8k
    if (!IS_CHAR(s)) {
9550
2.21k
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9551
2.21k
        goto out;
9552
2.21k
    }
9553
13.5k
    NEXTL(sl);
9554
13.5k
    cur = CUR_CHAR(l);
9555
13.5k
    buf = (xmlChar *) xmlMallocAtomic(size);
9556
13.5k
    if (buf == NULL) {
9557
13
  xmlErrMemory(ctxt, NULL);
9558
13
        goto out;
9559
13
    }
9560
36.4M
    while (IS_CHAR(cur) &&
9561
36.4M
           ((r != ']') || (s != ']') || (cur != '>'))) {
9562
36.3M
  if (len + 5 >= size) {
9563
3.55k
      xmlChar *tmp;
9564
9565
3.55k
      tmp = (xmlChar *) xmlRealloc(buf, size * 2);
9566
3.55k
      if (tmp == NULL) {
9567
1
    xmlErrMemory(ctxt, NULL);
9568
1
                goto out;
9569
1
      }
9570
3.55k
      buf = tmp;
9571
3.55k
      size *= 2;
9572
3.55k
  }
9573
36.3M
  COPY_BUF(rl,buf,len,r);
9574
36.3M
        if (len > maxLength) {
9575
0
            xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9576
0
                           "CData section too big found\n");
9577
0
            goto out;
9578
0
        }
9579
36.3M
  r = s;
9580
36.3M
  rl = sl;
9581
36.3M
  s = cur;
9582
36.3M
  sl = l;
9583
36.3M
  NEXTL(l);
9584
36.3M
  cur = CUR_CHAR(l);
9585
36.3M
    }
9586
13.5k
    buf[len] = 0;
9587
13.5k
    if (ctxt->instate == XML_PARSER_EOF) {
9588
285
        xmlFree(buf);
9589
285
        return;
9590
285
    }
9591
13.2k
    if (cur != '>') {
9592
4.80k
  xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9593
4.80k
                       "CData section not finished\n%.50s\n", buf);
9594
4.80k
        goto out;
9595
4.80k
    }
9596
8.48k
    NEXTL(l);
9597
9598
    /*
9599
     * OK the buffer is to be consumed as cdata.
9600
     */
9601
8.48k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9602
7.11k
  if (ctxt->sax->cdataBlock != NULL)
9603
0
      ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9604
7.11k
  else if (ctxt->sax->characters != NULL)
9605
7.11k
      ctxt->sax->characters(ctxt->userData, buf, len);
9606
7.11k
    }
9607
9608
16.9k
out:
9609
16.9k
    if (ctxt->instate != XML_PARSER_EOF)
9610
16.6k
        ctxt->instate = XML_PARSER_CONTENT;
9611
16.9k
    xmlFree(buf);
9612
16.9k
}
9613
9614
/**
9615
 * xmlParseContentInternal:
9616
 * @ctxt:  an XML parser context
9617
 *
9618
 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9619
 * unexpected EOF to the caller.
9620
 */
9621
9622
static void
9623
231k
xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9624
231k
    int nameNr = ctxt->nameNr;
9625
9626
231k
    GROW;
9627
5.02M
    while ((RAW != 0) &&
9628
5.02M
     (ctxt->instate != XML_PARSER_EOF)) {
9629
4.98M
  const xmlChar *cur = ctxt->input->cur;
9630
9631
  /*
9632
   * First case : a Processing Instruction.
9633
   */
9634
4.98M
  if ((*cur == '<') && (cur[1] == '?')) {
9635
54.3k
      xmlParsePI(ctxt);
9636
54.3k
  }
9637
9638
  /*
9639
   * Second case : a CDSection
9640
   */
9641
  /* 2.6.0 test was *cur not RAW */
9642
4.92M
  else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9643
17.2k
      xmlParseCDSect(ctxt);
9644
17.2k
  }
9645
9646
  /*
9647
   * Third case :  a comment
9648
   */
9649
4.90M
  else if ((*cur == '<') && (NXT(1) == '!') &&
9650
4.90M
     (NXT(2) == '-') && (NXT(3) == '-')) {
9651
123k
      xmlParseComment(ctxt);
9652
123k
      ctxt->instate = XML_PARSER_CONTENT;
9653
123k
  }
9654
9655
  /*
9656
   * Fourth case :  a sub-element.
9657
   */
9658
4.78M
  else if (*cur == '<') {
9659
2.49M
            if (NXT(1) == '/') {
9660
607k
                if (ctxt->nameNr <= nameNr)
9661
191k
                    break;
9662
415k
          xmlParseElementEnd(ctxt);
9663
1.88M
            } else {
9664
1.88M
          xmlParseElementStart(ctxt);
9665
1.88M
            }
9666
2.49M
  }
9667
9668
  /*
9669
   * Fifth case : a reference. If if has not been resolved,
9670
   *    parsing returns it's Name, create the node
9671
   */
9672
9673
2.29M
  else if (*cur == '&') {
9674
176k
      xmlParseReference(ctxt);
9675
176k
  }
9676
9677
  /*
9678
   * Last case, text. Note that References are handled directly.
9679
   */
9680
2.11M
  else {
9681
2.11M
      xmlParseCharDataInternal(ctxt, 0);
9682
2.11M
  }
9683
9684
4.78M
  SHRINK;
9685
4.78M
  GROW;
9686
4.78M
    }
9687
231k
}
9688
9689
/**
9690
 * xmlParseContent:
9691
 * @ctxt:  an XML parser context
9692
 *
9693
 * Parse a content sequence. Stops at EOF or '</'.
9694
 *
9695
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9696
 */
9697
9698
void
9699
8.88k
xmlParseContent(xmlParserCtxtPtr ctxt) {
9700
8.88k
    int nameNr = ctxt->nameNr;
9701
9702
8.88k
    xmlParseContentInternal(ctxt);
9703
9704
8.88k
    if ((ctxt->instate != XML_PARSER_EOF) && (ctxt->nameNr > nameNr)) {
9705
2.08k
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9706
2.08k
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9707
2.08k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9708
2.08k
                "Premature end of data in tag %s line %d\n",
9709
2.08k
    name, line, NULL);
9710
2.08k
    }
9711
8.88k
}
9712
9713
/**
9714
 * xmlParseElement:
9715
 * @ctxt:  an XML parser context
9716
 *
9717
 * DEPRECATED: Internal function, don't use.
9718
 *
9719
 * parse an XML element
9720
 *
9721
 * [39] element ::= EmptyElemTag | STag content ETag
9722
 *
9723
 * [ WFC: Element Type Match ]
9724
 * The Name in an element's end-tag must match the element type in the
9725
 * start-tag.
9726
 *
9727
 */
9728
9729
void
9730
248k
xmlParseElement(xmlParserCtxtPtr ctxt) {
9731
248k
    if (xmlParseElementStart(ctxt) != 0)
9732
25.6k
        return;
9733
9734
223k
    xmlParseContentInternal(ctxt);
9735
223k
    if (ctxt->instate == XML_PARSER_EOF)
9736
7.13k
  return;
9737
9738
215k
    if (CUR == 0) {
9739
24.6k
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9740
24.6k
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9741
24.6k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9742
24.6k
                "Premature end of data in tag %s line %d\n",
9743
24.6k
    name, line, NULL);
9744
24.6k
        return;
9745
24.6k
    }
9746
9747
191k
    xmlParseElementEnd(ctxt);
9748
191k
}
9749
9750
/**
9751
 * xmlParseElementStart:
9752
 * @ctxt:  an XML parser context
9753
 *
9754
 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
9755
 * opening tag was parsed, 1 if an empty element was parsed.
9756
 *
9757
 * Always consumes '<'.
9758
 */
9759
static int
9760
2.13M
xmlParseElementStart(xmlParserCtxtPtr ctxt) {
9761
2.13M
    const xmlChar *name;
9762
2.13M
    const xmlChar *prefix = NULL;
9763
2.13M
    const xmlChar *URI = NULL;
9764
2.13M
    xmlParserNodeInfo node_info;
9765
2.13M
    int line, tlen = 0;
9766
2.13M
    xmlNodePtr cur;
9767
2.13M
    int nsNr = ctxt->nsNr;
9768
9769
2.13M
    if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9770
2.13M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9771
419
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9772
419
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9773
419
        xmlParserMaxDepth);
9774
419
  xmlHaltParser(ctxt);
9775
419
  return(-1);
9776
419
    }
9777
9778
    /* Capture start position */
9779
2.13M
    if (ctxt->record_info) {
9780
0
        node_info.begin_pos = ctxt->input->consumed +
9781
0
                          (CUR_PTR - ctxt->input->base);
9782
0
  node_info.begin_line = ctxt->input->line;
9783
0
    }
9784
9785
2.13M
    if (ctxt->spaceNr == 0)
9786
0
  spacePush(ctxt, -1);
9787
2.13M
    else if (*ctxt->space == -2)
9788
0
  spacePush(ctxt, -1);
9789
2.13M
    else
9790
2.13M
  spacePush(ctxt, *ctxt->space);
9791
9792
2.13M
    line = ctxt->input->line;
9793
#ifdef LIBXML_SAX1_ENABLED
9794
    if (ctxt->sax2)
9795
#endif /* LIBXML_SAX1_ENABLED */
9796
2.13M
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
9797
#ifdef LIBXML_SAX1_ENABLED
9798
    else
9799
  name = xmlParseStartTag(ctxt);
9800
#endif /* LIBXML_SAX1_ENABLED */
9801
2.13M
    if (ctxt->instate == XML_PARSER_EOF)
9802
4.08k
  return(-1);
9803
2.13M
    if (name == NULL) {
9804
57.6k
  spacePop(ctxt);
9805
57.6k
        return(-1);
9806
57.6k
    }
9807
2.07M
    nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
9808
2.07M
    cur = ctxt->node;
9809
9810
#ifdef LIBXML_VALID_ENABLED
9811
    /*
9812
     * [ VC: Root Element Type ]
9813
     * The Name in the document type declaration must match the element
9814
     * type of the root element.
9815
     */
9816
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9817
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
9818
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9819
#endif /* LIBXML_VALID_ENABLED */
9820
9821
    /*
9822
     * Check for an Empty Element.
9823
     */
9824
2.07M
    if ((RAW == '/') && (NXT(1) == '>')) {
9825
1.12M
        SKIP(2);
9826
1.12M
  if (ctxt->sax2) {
9827
1.12M
      if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9828
1.12M
    (!ctxt->disableSAX))
9829
1.00M
    ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
9830
#ifdef LIBXML_SAX1_ENABLED
9831
  } else {
9832
      if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9833
    (!ctxt->disableSAX))
9834
    ctxt->sax->endElement(ctxt->userData, name);
9835
#endif /* LIBXML_SAX1_ENABLED */
9836
1.12M
  }
9837
1.12M
  namePop(ctxt);
9838
1.12M
  spacePop(ctxt);
9839
1.12M
  if (nsNr != ctxt->nsNr)
9840
78.9k
      nsPop(ctxt, ctxt->nsNr - nsNr);
9841
1.12M
  if (cur != NULL && ctxt->record_info) {
9842
0
            node_info.node = cur;
9843
0
            node_info.end_pos = ctxt->input->consumed +
9844
0
                                (CUR_PTR - ctxt->input->base);
9845
0
            node_info.end_line = ctxt->input->line;
9846
0
            xmlParserAddNodeInfo(ctxt, &node_info);
9847
0
  }
9848
1.12M
  return(1);
9849
1.12M
    }
9850
952k
    if (RAW == '>') {
9851
854k
        NEXT1;
9852
854k
        if (cur != NULL && ctxt->record_info) {
9853
0
            node_info.node = cur;
9854
0
            node_info.end_pos = 0;
9855
0
            node_info.end_line = 0;
9856
0
            xmlParserAddNodeInfo(ctxt, &node_info);
9857
0
        }
9858
854k
    } else {
9859
97.6k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9860
97.6k
         "Couldn't find end of Start Tag %s line %d\n",
9861
97.6k
                    name, line, NULL);
9862
9863
  /*
9864
   * end of parsing of this node.
9865
   */
9866
97.6k
  nodePop(ctxt);
9867
97.6k
  namePop(ctxt);
9868
97.6k
  spacePop(ctxt);
9869
97.6k
  if (nsNr != ctxt->nsNr)
9870
20.3k
      nsPop(ctxt, ctxt->nsNr - nsNr);
9871
97.6k
  return(-1);
9872
97.6k
    }
9873
9874
854k
    return(0);
9875
952k
}
9876
9877
/**
9878
 * xmlParseElementEnd:
9879
 * @ctxt:  an XML parser context
9880
 *
9881
 * Parse the end of an XML element. Always consumes '</'.
9882
 */
9883
static void
9884
606k
xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
9885
606k
    xmlNodePtr cur = ctxt->node;
9886
9887
606k
    if (ctxt->nameNr <= 0) {
9888
0
        if ((RAW == '<') && (NXT(1) == '/'))
9889
0
            SKIP(2);
9890
0
        return;
9891
0
    }
9892
9893
    /*
9894
     * parse the end of tag: '</' should be here.
9895
     */
9896
606k
    if (ctxt->sax2) {
9897
606k
  xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
9898
606k
  namePop(ctxt);
9899
606k
    }
9900
#ifdef LIBXML_SAX1_ENABLED
9901
    else
9902
  xmlParseEndTag1(ctxt, 0);
9903
#endif /* LIBXML_SAX1_ENABLED */
9904
9905
    /*
9906
     * Capture end position
9907
     */
9908
606k
    if (cur != NULL && ctxt->record_info) {
9909
0
        xmlParserNodeInfoPtr node_info;
9910
9911
0
        node_info = (xmlParserNodeInfoPtr) xmlParserFindNodeInfo(ctxt, cur);
9912
0
        if (node_info != NULL) {
9913
0
            node_info->end_pos = ctxt->input->consumed +
9914
0
                                 (CUR_PTR - ctxt->input->base);
9915
0
            node_info->end_line = ctxt->input->line;
9916
0
        }
9917
0
    }
9918
606k
}
9919
9920
/**
9921
 * xmlParseVersionNum:
9922
 * @ctxt:  an XML parser context
9923
 *
9924
 * DEPRECATED: Internal function, don't use.
9925
 *
9926
 * parse the XML version value.
9927
 *
9928
 * [26] VersionNum ::= '1.' [0-9]+
9929
 *
9930
 * In practice allow [0-9].[0-9]+ at that level
9931
 *
9932
 * Returns the string giving the XML version number, or NULL
9933
 */
9934
xmlChar *
9935
193k
xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
9936
193k
    xmlChar *buf = NULL;
9937
193k
    int len = 0;
9938
193k
    int size = 10;
9939
193k
    xmlChar cur;
9940
9941
193k
    buf = (xmlChar *) xmlMallocAtomic(size);
9942
193k
    if (buf == NULL) {
9943
40
  xmlErrMemory(ctxt, NULL);
9944
40
  return(NULL);
9945
40
    }
9946
193k
    cur = CUR;
9947
193k
    if (!((cur >= '0') && (cur <= '9'))) {
9948
1.49k
  xmlFree(buf);
9949
1.49k
  return(NULL);
9950
1.49k
    }
9951
191k
    buf[len++] = cur;
9952
191k
    NEXT;
9953
191k
    cur=CUR;
9954
191k
    if (cur != '.') {
9955
541
  xmlFree(buf);
9956
541
  return(NULL);
9957
541
    }
9958
191k
    buf[len++] = cur;
9959
191k
    NEXT;
9960
191k
    cur=CUR;
9961
277k
    while ((cur >= '0') && (cur <= '9')) {
9962
86.2k
  if (len + 1 >= size) {
9963
2.01k
      xmlChar *tmp;
9964
9965
2.01k
      size *= 2;
9966
2.01k
      tmp = (xmlChar *) xmlRealloc(buf, size);
9967
2.01k
      if (tmp == NULL) {
9968
1
          xmlFree(buf);
9969
1
    xmlErrMemory(ctxt, NULL);
9970
1
    return(NULL);
9971
1
      }
9972
2.01k
      buf = tmp;
9973
2.01k
  }
9974
86.2k
  buf[len++] = cur;
9975
86.2k
  NEXT;
9976
86.2k
  cur=CUR;
9977
86.2k
    }
9978
191k
    buf[len] = 0;
9979
191k
    return(buf);
9980
191k
}
9981
9982
/**
9983
 * xmlParseVersionInfo:
9984
 * @ctxt:  an XML parser context
9985
 *
9986
 * DEPRECATED: Internal function, don't use.
9987
 *
9988
 * parse the XML version.
9989
 *
9990
 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
9991
 *
9992
 * [25] Eq ::= S? '=' S?
9993
 *
9994
 * Returns the version string, e.g. "1.0"
9995
 */
9996
9997
xmlChar *
9998
210k
xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
9999
210k
    xmlChar *version = NULL;
10000
10001
210k
    if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10002
197k
  SKIP(7);
10003
197k
  SKIP_BLANKS;
10004
197k
  if (RAW != '=') {
10005
2.91k
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10006
2.91k
      return(NULL);
10007
2.91k
        }
10008
194k
  NEXT;
10009
194k
  SKIP_BLANKS;
10010
194k
  if (RAW == '"') {
10011
189k
      NEXT;
10012
189k
      version = xmlParseVersionNum(ctxt);
10013
189k
      if (RAW != '"') {
10014
3.61k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10015
3.61k
      } else
10016
185k
          NEXT;
10017
189k
  } else if (RAW == '\''){
10018
3.76k
      NEXT;
10019
3.76k
      version = xmlParseVersionNum(ctxt);
10020
3.76k
      if (RAW != '\'') {
10021
1.35k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10022
1.35k
      } else
10023
2.41k
          NEXT;
10024
3.76k
  } else {
10025
806
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10026
806
  }
10027
194k
    }
10028
207k
    return(version);
10029
210k
}
10030
10031
/**
10032
 * xmlParseEncName:
10033
 * @ctxt:  an XML parser context
10034
 *
10035
 * DEPRECATED: Internal function, don't use.
10036
 *
10037
 * parse the XML encoding name
10038
 *
10039
 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10040
 *
10041
 * Returns the encoding name value or NULL
10042
 */
10043
xmlChar *
10044
181k
xmlParseEncName(xmlParserCtxtPtr ctxt) {
10045
181k
    xmlChar *buf = NULL;
10046
181k
    int len = 0;
10047
181k
    int size = 10;
10048
181k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
10049
0
                    XML_MAX_TEXT_LENGTH :
10050
181k
                    XML_MAX_NAME_LENGTH;
10051
181k
    xmlChar cur;
10052
10053
181k
    cur = CUR;
10054
181k
    if (((cur >= 'a') && (cur <= 'z')) ||
10055
181k
        ((cur >= 'A') && (cur <= 'Z'))) {
10056
181k
  buf = (xmlChar *) xmlMallocAtomic(size);
10057
181k
  if (buf == NULL) {
10058
116
      xmlErrMemory(ctxt, NULL);
10059
116
      return(NULL);
10060
116
  }
10061
10062
180k
  buf[len++] = cur;
10063
180k
  NEXT;
10064
180k
  cur = CUR;
10065
3.09M
  while (((cur >= 'a') && (cur <= 'z')) ||
10066
3.09M
         ((cur >= 'A') && (cur <= 'Z')) ||
10067
3.09M
         ((cur >= '0') && (cur <= '9')) ||
10068
3.09M
         (cur == '.') || (cur == '_') ||
10069
3.09M
         (cur == '-')) {
10070
2.91M
      if (len + 1 >= size) {
10071
25.8k
          xmlChar *tmp;
10072
10073
25.8k
    size *= 2;
10074
25.8k
    tmp = (xmlChar *) xmlRealloc(buf, size);
10075
25.8k
    if (tmp == NULL) {
10076
2
        xmlErrMemory(ctxt, NULL);
10077
2
        xmlFree(buf);
10078
2
        return(NULL);
10079
2
    }
10080
25.8k
    buf = tmp;
10081
25.8k
      }
10082
2.91M
      buf[len++] = cur;
10083
2.91M
            if (len > maxLength) {
10084
35
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "EncName");
10085
35
                xmlFree(buf);
10086
35
                return(NULL);
10087
35
            }
10088
2.91M
      NEXT;
10089
2.91M
      cur = CUR;
10090
2.91M
        }
10091
180k
  buf[len] = 0;
10092
180k
    } else {
10093
652
  xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10094
652
    }
10095
181k
    return(buf);
10096
181k
}
10097
10098
/**
10099
 * xmlParseEncodingDecl:
10100
 * @ctxt:  an XML parser context
10101
 *
10102
 * DEPRECATED: Internal function, don't use.
10103
 *
10104
 * parse the XML encoding declaration
10105
 *
10106
 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'")
10107
 *
10108
 * this setups the conversion filters.
10109
 *
10110
 * Returns the encoding value or NULL
10111
 */
10112
10113
const xmlChar *
10114
200k
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10115
200k
    xmlChar *encoding = NULL;
10116
10117
200k
    SKIP_BLANKS;
10118
200k
    if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10119
183k
  SKIP(8);
10120
183k
  SKIP_BLANKS;
10121
183k
  if (RAW != '=') {
10122
829
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10123
829
      return(NULL);
10124
829
        }
10125
182k
  NEXT;
10126
182k
  SKIP_BLANKS;
10127
182k
  if (RAW == '"') {
10128
177k
      NEXT;
10129
177k
      encoding = xmlParseEncName(ctxt);
10130
177k
      if (RAW != '"') {
10131
1.91k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10132
1.91k
    xmlFree((xmlChar *) encoding);
10133
1.91k
    return(NULL);
10134
1.91k
      } else
10135
175k
          NEXT;
10136
177k
  } else if (RAW == '\''){
10137
3.89k
      NEXT;
10138
3.89k
      encoding = xmlParseEncName(ctxt);
10139
3.89k
      if (RAW != '\'') {
10140
259
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10141
259
    xmlFree((xmlChar *) encoding);
10142
259
    return(NULL);
10143
259
      } else
10144
3.63k
          NEXT;
10145
3.89k
  } else {
10146
549
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10147
549
  }
10148
10149
        /*
10150
         * Non standard parsing, allowing the user to ignore encoding
10151
         */
10152
180k
        if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10153
0
      xmlFree((xmlChar *) encoding);
10154
0
            return(NULL);
10155
0
  }
10156
10157
  /*
10158
   * UTF-16 encoding switch has already taken place at this stage,
10159
   * more over the little-endian/big-endian selection is already done
10160
   */
10161
180k
        if ((encoding != NULL) &&
10162
180k
      ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10163
179k
       (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10164
      /*
10165
       * If no encoding was passed to the parser, that we are
10166
       * using UTF-16 and no decoder is present i.e. the
10167
       * document is apparently UTF-8 compatible, then raise an
10168
       * encoding mismatch fatal error
10169
       */
10170
477
      if ((ctxt->encoding == NULL) &&
10171
477
          (ctxt->input->buf != NULL) &&
10172
477
          (ctxt->input->buf->encoder == NULL)) {
10173
391
    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10174
391
      "Document labelled UTF-16 but has UTF-8 content\n");
10175
391
      }
10176
477
      if (ctxt->encoding != NULL)
10177
86
    xmlFree((xmlChar *) ctxt->encoding);
10178
477
      ctxt->encoding = encoding;
10179
477
  }
10180
  /*
10181
   * UTF-8 encoding is handled natively
10182
   */
10183
179k
        else if ((encoding != NULL) &&
10184
179k
      ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10185
179k
       (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10186
            /* TODO: Check for encoding mismatch. */
10187
1.40k
      if (ctxt->encoding != NULL)
10188
6
    xmlFree((xmlChar *) ctxt->encoding);
10189
1.40k
      ctxt->encoding = encoding;
10190
1.40k
  }
10191
178k
  else if (encoding != NULL) {
10192
177k
      xmlCharEncodingHandlerPtr handler;
10193
10194
177k
      if (ctxt->input->encoding != NULL)
10195
0
    xmlFree((xmlChar *) ctxt->input->encoding);
10196
177k
      ctxt->input->encoding = encoding;
10197
10198
177k
            handler = xmlFindCharEncodingHandler((const char *) encoding);
10199
177k
      if (handler != NULL) {
10200
176k
    if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10201
        /* failed to convert */
10202
320
        ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10203
320
        return(NULL);
10204
320
    }
10205
176k
      } else {
10206
1.36k
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10207
1.36k
      "Unsupported encoding %s\n", encoding);
10208
1.36k
    return(NULL);
10209
1.36k
      }
10210
177k
  }
10211
180k
    }
10212
195k
    return(encoding);
10213
200k
}
10214
10215
/**
10216
 * xmlParseSDDecl:
10217
 * @ctxt:  an XML parser context
10218
 *
10219
 * DEPRECATED: Internal function, don't use.
10220
 *
10221
 * parse the XML standalone declaration
10222
 *
10223
 * [32] SDDecl ::= S 'standalone' Eq
10224
 *                 (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10225
 *
10226
 * [ VC: Standalone Document Declaration ]
10227
 * TODO The standalone document declaration must have the value "no"
10228
 * if any external markup declarations contain declarations of:
10229
 *  - attributes with default values, if elements to which these
10230
 *    attributes apply appear in the document without specifications
10231
 *    of values for these attributes, or
10232
 *  - entities (other than amp, lt, gt, apos, quot), if references
10233
 *    to those entities appear in the document, or
10234
 *  - attributes with values subject to normalization, where the
10235
 *    attribute appears in the document with a value which will change
10236
 *    as a result of normalization, or
10237
 *  - element types with element content, if white space occurs directly
10238
 *    within any instance of those types.
10239
 *
10240
 * Returns:
10241
 *   1 if standalone="yes"
10242
 *   0 if standalone="no"
10243
 *  -2 if standalone attribute is missing or invalid
10244
 *    (A standalone value of -2 means that the XML declaration was found,
10245
 *     but no value was specified for the standalone attribute).
10246
 */
10247
10248
int
10249
31.1k
xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10250
31.1k
    int standalone = -2;
10251
10252
31.1k
    SKIP_BLANKS;
10253
31.1k
    if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10254
7.97k
  SKIP(10);
10255
7.97k
        SKIP_BLANKS;
10256
7.97k
  if (RAW != '=') {
10257
286
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10258
286
      return(standalone);
10259
286
        }
10260
7.69k
  NEXT;
10261
7.69k
  SKIP_BLANKS;
10262
7.69k
        if (RAW == '\''){
10263
3.21k
      NEXT;
10264
3.21k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10265
1.52k
          standalone = 0;
10266
1.52k
                SKIP(2);
10267
1.68k
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10268
1.68k
                 (NXT(2) == 's')) {
10269
535
          standalone = 1;
10270
535
    SKIP(3);
10271
1.15k
            } else {
10272
1.15k
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10273
1.15k
      }
10274
3.21k
      if (RAW != '\'') {
10275
1.78k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10276
1.78k
      } else
10277
1.43k
          NEXT;
10278
4.47k
  } else if (RAW == '"'){
10279
4.27k
      NEXT;
10280
4.27k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10281
2.91k
          standalone = 0;
10282
2.91k
    SKIP(2);
10283
2.91k
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10284
1.36k
                 (NXT(2) == 's')) {
10285
519
          standalone = 1;
10286
519
                SKIP(3);
10287
842
            } else {
10288
842
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10289
842
      }
10290
4.27k
      if (RAW != '"') {
10291
1.43k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10292
1.43k
      } else
10293
2.83k
          NEXT;
10294
4.27k
  } else {
10295
206
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10296
206
        }
10297
7.69k
    }
10298
30.8k
    return(standalone);
10299
31.1k
}
10300
10301
/**
10302
 * xmlParseXMLDecl:
10303
 * @ctxt:  an XML parser context
10304
 *
10305
 * DEPRECATED: Internal function, don't use.
10306
 *
10307
 * parse an XML declaration header
10308
 *
10309
 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10310
 */
10311
10312
void
10313
200k
xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10314
200k
    xmlChar *version;
10315
10316
    /*
10317
     * This value for standalone indicates that the document has an
10318
     * XML declaration but it does not have a standalone attribute.
10319
     * It will be overwritten later if a standalone attribute is found.
10320
     */
10321
200k
    ctxt->input->standalone = -2;
10322
10323
    /*
10324
     * We know that '<?xml' is here.
10325
     */
10326
200k
    SKIP(5);
10327
10328
200k
    if (!IS_BLANK_CH(RAW)) {
10329
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10330
0
                 "Blank needed after '<?xml'\n");
10331
0
    }
10332
200k
    SKIP_BLANKS;
10333
10334
    /*
10335
     * We must have the VersionInfo here.
10336
     */
10337
200k
    version = xmlParseVersionInfo(ctxt);
10338
200k
    if (version == NULL) {
10339
13.5k
  xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10340
186k
    } else {
10341
186k
  if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10342
      /*
10343
       * Changed here for XML-1.0 5th edition
10344
       */
10345
150k
      if (ctxt->options & XML_PARSE_OLD10) {
10346
0
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10347
0
                "Unsupported version '%s'\n",
10348
0
                version);
10349
150k
      } else {
10350
150k
          if ((version[0] == '1') && ((version[1] == '.'))) {
10351
143k
        xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10352
143k
                      "Unsupported version '%s'\n",
10353
143k
          version, NULL);
10354
143k
    } else {
10355
7.46k
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10356
7.46k
              "Unsupported version '%s'\n",
10357
7.46k
              version);
10358
7.46k
    }
10359
150k
      }
10360
150k
  }
10361
186k
  if (ctxt->version != NULL)
10362
0
      xmlFree((void *) ctxt->version);
10363
186k
  ctxt->version = version;
10364
186k
    }
10365
10366
    /*
10367
     * We may have the encoding declaration
10368
     */
10369
200k
    if (!IS_BLANK_CH(RAW)) {
10370
24.5k
        if ((RAW == '?') && (NXT(1) == '>')) {
10371
10.4k
      SKIP(2);
10372
10.4k
      return;
10373
10.4k
  }
10374
14.1k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10375
14.1k
    }
10376
189k
    xmlParseEncodingDecl(ctxt);
10377
189k
    if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10378
189k
         (ctxt->instate == XML_PARSER_EOF)) {
10379
  /*
10380
   * The XML REC instructs us to stop parsing right here
10381
   */
10382
1.68k
        return;
10383
1.68k
    }
10384
10385
    /*
10386
     * We may have the standalone status.
10387
     */
10388
188k
    if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10389
161k
        if ((RAW == '?') && (NXT(1) == '>')) {
10390
157k
      SKIP(2);
10391
157k
      return;
10392
157k
  }
10393
4.62k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10394
4.62k
    }
10395
10396
    /*
10397
     * We can grow the input buffer freely at that point
10398
     */
10399
31.1k
    GROW;
10400
10401
31.1k
    SKIP_BLANKS;
10402
31.1k
    ctxt->input->standalone = xmlParseSDDecl(ctxt);
10403
10404
31.1k
    SKIP_BLANKS;
10405
31.1k
    if ((RAW == '?') && (NXT(1) == '>')) {
10406
5.43k
        SKIP(2);
10407
25.7k
    } else if (RAW == '>') {
10408
        /* Deprecated old WD ... */
10409
2.14k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10410
2.14k
  NEXT;
10411
23.5k
    } else {
10412
23.5k
        int c;
10413
10414
23.5k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10415
1.20M
        while ((c = CUR) != 0) {
10416
1.20M
            NEXT;
10417
1.20M
            if (c == '>')
10418
16.9k
                break;
10419
1.20M
        }
10420
23.5k
    }
10421
31.1k
}
10422
10423
/**
10424
 * xmlParseMisc:
10425
 * @ctxt:  an XML parser context
10426
 *
10427
 * DEPRECATED: Internal function, don't use.
10428
 *
10429
 * parse an XML Misc* optional field.
10430
 *
10431
 * [27] Misc ::= Comment | PI |  S
10432
 */
10433
10434
void
10435
599k
xmlParseMisc(xmlParserCtxtPtr ctxt) {
10436
643k
    while (ctxt->instate != XML_PARSER_EOF) {
10437
642k
        SKIP_BLANKS;
10438
642k
        GROW;
10439
642k
        if ((RAW == '<') && (NXT(1) == '?')) {
10440
38.7k
      xmlParsePI(ctxt);
10441
604k
        } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10442
5.84k
      xmlParseComment(ctxt);
10443
598k
        } else {
10444
598k
            break;
10445
598k
        }
10446
642k
    }
10447
599k
}
10448
10449
/**
10450
 * xmlParseDocument:
10451
 * @ctxt:  an XML parser context
10452
 *
10453
 * parse an XML document (and build a tree if using the standard SAX
10454
 * interface).
10455
 *
10456
 * [1] document ::= prolog element Misc*
10457
 *
10458
 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10459
 *
10460
 * Returns 0, -1 in case of error. the parser context is augmented
10461
 *                as a result of the parsing.
10462
 */
10463
10464
int
10465
332k
xmlParseDocument(xmlParserCtxtPtr ctxt) {
10466
332k
    xmlChar start[4];
10467
332k
    xmlCharEncoding enc;
10468
10469
332k
    xmlInitParser();
10470
10471
332k
    if ((ctxt == NULL) || (ctxt->input == NULL))
10472
0
        return(-1);
10473
10474
332k
    GROW;
10475
10476
    /*
10477
     * SAX: detecting the level.
10478
     */
10479
332k
    xmlDetectSAX2(ctxt);
10480
10481
    /*
10482
     * SAX: beginning of the document processing.
10483
     */
10484
332k
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10485
332k
        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10486
332k
    if (ctxt->instate == XML_PARSER_EOF)
10487
0
  return(-1);
10488
10489
332k
    if ((ctxt->encoding == NULL) &&
10490
332k
        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10491
  /*
10492
   * Get the 4 first bytes and decode the charset
10493
   * if enc != XML_CHAR_ENCODING_NONE
10494
   * plug some encoding conversion routines.
10495
   */
10496
330k
  start[0] = RAW;
10497
330k
  start[1] = NXT(1);
10498
330k
  start[2] = NXT(2);
10499
330k
  start[3] = NXT(3);
10500
330k
  enc = xmlDetectCharEncoding(&start[0], 4);
10501
330k
  if (enc != XML_CHAR_ENCODING_NONE) {
10502
217k
      xmlSwitchEncoding(ctxt, enc);
10503
217k
  }
10504
330k
    }
10505
10506
10507
332k
    if (CUR == 0) {
10508
2.00k
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10509
2.00k
  return(-1);
10510
2.00k
    }
10511
10512
330k
    GROW;
10513
330k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10514
10515
  /*
10516
   * Note that we will switch encoding on the fly.
10517
   */
10518
200k
  xmlParseXMLDecl(ctxt);
10519
200k
  if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10520
200k
      (ctxt->instate == XML_PARSER_EOF)) {
10521
      /*
10522
       * The XML REC instructs us to stop parsing right here
10523
       */
10524
2.22k
      return(-1);
10525
2.22k
  }
10526
198k
  ctxt->standalone = ctxt->input->standalone;
10527
198k
  SKIP_BLANKS;
10528
198k
    } else {
10529
129k
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10530
129k
    }
10531
328k
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10532
296k
        ctxt->sax->startDocument(ctxt->userData);
10533
328k
    if (ctxt->instate == XML_PARSER_EOF)
10534
783
  return(-1);
10535
327k
    if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10536
327k
        (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10537
0
  ctxt->myDoc->compression = ctxt->input->buf->compressed;
10538
0
    }
10539
10540
    /*
10541
     * The Misc part of the Prolog
10542
     */
10543
327k
    xmlParseMisc(ctxt);
10544
10545
    /*
10546
     * Then possibly doc type declaration(s) and more Misc
10547
     * (doctypedecl Misc*)?
10548
     */
10549
327k
    GROW;
10550
327k
    if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10551
10552
76.8k
  ctxt->inSubset = 1;
10553
76.8k
  xmlParseDocTypeDecl(ctxt);
10554
76.8k
  if (RAW == '[') {
10555
62.7k
      ctxt->instate = XML_PARSER_DTD;
10556
62.7k
      xmlParseInternalSubset(ctxt);
10557
62.7k
      if (ctxt->instate == XML_PARSER_EOF)
10558
46.6k
    return(-1);
10559
62.7k
  }
10560
10561
  /*
10562
   * Create and update the external subset.
10563
   */
10564
30.2k
  ctxt->inSubset = 2;
10565
30.2k
  if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10566
30.2k
      (!ctxt->disableSAX))
10567
20.4k
      ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10568
20.4k
                                ctxt->extSubSystem, ctxt->extSubURI);
10569
30.2k
  if (ctxt->instate == XML_PARSER_EOF)
10570
7.13k
      return(-1);
10571
23.1k
  ctxt->inSubset = 0;
10572
10573
23.1k
        xmlCleanSpecialAttr(ctxt);
10574
10575
23.1k
  ctxt->instate = XML_PARSER_PROLOG;
10576
23.1k
  xmlParseMisc(ctxt);
10577
23.1k
    }
10578
10579
    /*
10580
     * Time to start parsing the tree itself
10581
     */
10582
273k
    GROW;
10583
273k
    if (RAW != '<') {
10584
24.8k
  xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10585
24.8k
           "Start tag expected, '<' not found\n");
10586
248k
    } else {
10587
248k
  ctxt->instate = XML_PARSER_CONTENT;
10588
248k
  xmlParseElement(ctxt);
10589
248k
  ctxt->instate = XML_PARSER_EPILOG;
10590
10591
10592
  /*
10593
   * The Misc part at the end
10594
   */
10595
248k
  xmlParseMisc(ctxt);
10596
10597
248k
  if (RAW != 0) {
10598
15.2k
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10599
15.2k
  }
10600
248k
  ctxt->instate = XML_PARSER_EOF;
10601
248k
    }
10602
10603
    /*
10604
     * SAX: end of the document processing.
10605
     */
10606
273k
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10607
273k
        ctxt->sax->endDocument(ctxt->userData);
10608
10609
    /*
10610
     * Remove locally kept entity definitions if the tree was not built
10611
     */
10612
273k
    if ((ctxt->myDoc != NULL) &&
10613
273k
  (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10614
2.19k
  xmlFreeDoc(ctxt->myDoc);
10615
2.19k
  ctxt->myDoc = NULL;
10616
2.19k
    }
10617
10618
273k
    if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10619
192k
        ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10620
192k
  if (ctxt->valid)
10621
192k
      ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10622
192k
  if (ctxt->nsWellFormed)
10623
116k
      ctxt->myDoc->properties |= XML_DOC_NSVALID;
10624
192k
  if (ctxt->options & XML_PARSE_OLD10)
10625
0
      ctxt->myDoc->properties |= XML_DOC_OLD10;
10626
192k
    }
10627
273k
    if (! ctxt->wellFormed) {
10628
80.7k
  ctxt->valid = 0;
10629
80.7k
  return(-1);
10630
80.7k
    }
10631
192k
    return(0);
10632
273k
}
10633
10634
/**
10635
 * xmlParseExtParsedEnt:
10636
 * @ctxt:  an XML parser context
10637
 *
10638
 * parse a general parsed entity
10639
 * An external general parsed entity is well-formed if it matches the
10640
 * production labeled extParsedEnt.
10641
 *
10642
 * [78] extParsedEnt ::= TextDecl? content
10643
 *
10644
 * Returns 0, -1 in case of error. the parser context is augmented
10645
 *                as a result of the parsing.
10646
 */
10647
10648
int
10649
0
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10650
0
    xmlChar start[4];
10651
0
    xmlCharEncoding enc;
10652
10653
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
10654
0
        return(-1);
10655
10656
0
    xmlDetectSAX2(ctxt);
10657
10658
0
    GROW;
10659
10660
    /*
10661
     * SAX: beginning of the document processing.
10662
     */
10663
0
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10664
0
        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10665
10666
    /*
10667
     * Get the 4 first bytes and decode the charset
10668
     * if enc != XML_CHAR_ENCODING_NONE
10669
     * plug some encoding conversion routines.
10670
     */
10671
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10672
0
  start[0] = RAW;
10673
0
  start[1] = NXT(1);
10674
0
  start[2] = NXT(2);
10675
0
  start[3] = NXT(3);
10676
0
  enc = xmlDetectCharEncoding(start, 4);
10677
0
  if (enc != XML_CHAR_ENCODING_NONE) {
10678
0
      xmlSwitchEncoding(ctxt, enc);
10679
0
  }
10680
0
    }
10681
10682
10683
0
    if (CUR == 0) {
10684
0
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10685
0
    }
10686
10687
    /*
10688
     * Check for the XMLDecl in the Prolog.
10689
     */
10690
0
    GROW;
10691
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10692
10693
  /*
10694
   * Note that we will switch encoding on the fly.
10695
   */
10696
0
  xmlParseXMLDecl(ctxt);
10697
0
  if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10698
      /*
10699
       * The XML REC instructs us to stop parsing right here
10700
       */
10701
0
      return(-1);
10702
0
  }
10703
0
  SKIP_BLANKS;
10704
0
    } else {
10705
0
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10706
0
    }
10707
0
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10708
0
        ctxt->sax->startDocument(ctxt->userData);
10709
0
    if (ctxt->instate == XML_PARSER_EOF)
10710
0
  return(-1);
10711
10712
    /*
10713
     * Doing validity checking on chunk doesn't make sense
10714
     */
10715
0
    ctxt->instate = XML_PARSER_CONTENT;
10716
0
    ctxt->validate = 0;
10717
0
    ctxt->loadsubset = 0;
10718
0
    ctxt->depth = 0;
10719
10720
0
    xmlParseContent(ctxt);
10721
0
    if (ctxt->instate == XML_PARSER_EOF)
10722
0
  return(-1);
10723
10724
0
    if ((RAW == '<') && (NXT(1) == '/')) {
10725
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10726
0
    } else if (RAW != 0) {
10727
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
10728
0
    }
10729
10730
    /*
10731
     * SAX: end of the document processing.
10732
     */
10733
0
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10734
0
        ctxt->sax->endDocument(ctxt->userData);
10735
10736
0
    if (! ctxt->wellFormed) return(-1);
10737
0
    return(0);
10738
0
}
10739
10740
#ifdef LIBXML_PUSH_ENABLED
10741
/************************************************************************
10742
 *                  *
10743
 *    Progressive parsing interfaces        *
10744
 *                  *
10745
 ************************************************************************/
10746
10747
/**
10748
 * xmlParseLookupChar:
10749
 * @ctxt:  an XML parser context
10750
 * @c:  character
10751
 *
10752
 * Check whether the input buffer contains a character.
10753
 */
10754
static int
10755
xmlParseLookupChar(xmlParserCtxtPtr ctxt, int c) {
10756
    const xmlChar *cur;
10757
10758
    if (ctxt->checkIndex == 0) {
10759
        cur = ctxt->input->cur + 1;
10760
    } else {
10761
        cur = ctxt->input->cur + ctxt->checkIndex;
10762
    }
10763
10764
    if (memchr(cur, c, ctxt->input->end - cur) == NULL) {
10765
        size_t index = ctxt->input->end - ctxt->input->cur;
10766
10767
        if (index > LONG_MAX) {
10768
            ctxt->checkIndex = 0;
10769
            return(1);
10770
        }
10771
        ctxt->checkIndex = index;
10772
        return(0);
10773
    } else {
10774
        ctxt->checkIndex = 0;
10775
        return(1);
10776
    }
10777
}
10778
10779
/**
10780
 * xmlParseLookupString:
10781
 * @ctxt:  an XML parser context
10782
 * @startDelta: delta to apply at the start
10783
 * @str:  string
10784
 * @strLen:  length of string
10785
 *
10786
 * Check whether the input buffer contains a string.
10787
 */
10788
static const xmlChar *
10789
xmlParseLookupString(xmlParserCtxtPtr ctxt, size_t startDelta,
10790
                     const char *str, size_t strLen) {
10791
    const xmlChar *cur, *term;
10792
10793
    if (ctxt->checkIndex == 0) {
10794
        cur = ctxt->input->cur + startDelta;
10795
    } else {
10796
        cur = ctxt->input->cur + ctxt->checkIndex;
10797
    }
10798
10799
    term = BAD_CAST strstr((const char *) cur, str);
10800
    if (term == NULL) {
10801
        const xmlChar *end = ctxt->input->end;
10802
        size_t index;
10803
10804
        /* Rescan (strLen - 1) characters. */
10805
        if ((size_t) (end - cur) < strLen)
10806
            end = cur;
10807
        else
10808
            end -= strLen - 1;
10809
        index = end - ctxt->input->cur;
10810
        if (index > LONG_MAX) {
10811
            ctxt->checkIndex = 0;
10812
            return(ctxt->input->end - strLen);
10813
        }
10814
        ctxt->checkIndex = index;
10815
    } else {
10816
        ctxt->checkIndex = 0;
10817
    }
10818
10819
    return(term);
10820
}
10821
10822
/**
10823
 * xmlParseLookupCharData:
10824
 * @ctxt:  an XML parser context
10825
 *
10826
 * Check whether the input buffer contains terminated char data.
10827
 */
10828
static int
10829
xmlParseLookupCharData(xmlParserCtxtPtr ctxt) {
10830
    const xmlChar *cur = ctxt->input->cur + ctxt->checkIndex;
10831
    const xmlChar *end = ctxt->input->end;
10832
    size_t index;
10833
10834
    while (cur < end) {
10835
        if ((*cur == '<') || (*cur == '&')) {
10836
            ctxt->checkIndex = 0;
10837
            return(1);
10838
        }
10839
        cur++;
10840
    }
10841
10842
    index = cur - ctxt->input->cur;
10843
    if (index > LONG_MAX) {
10844
        ctxt->checkIndex = 0;
10845
        return(1);
10846
    }
10847
    ctxt->checkIndex = index;
10848
    return(0);
10849
}
10850
10851
/**
10852
 * xmlParseLookupGt:
10853
 * @ctxt:  an XML parser context
10854
 *
10855
 * Check whether there's enough data in the input buffer to finish parsing
10856
 * a start tag. This has to take quotes into account.
10857
 */
10858
static int
10859
xmlParseLookupGt(xmlParserCtxtPtr ctxt) {
10860
    const xmlChar *cur;
10861
    const xmlChar *end = ctxt->input->end;
10862
    int state = ctxt->endCheckState;
10863
    size_t index;
10864
10865
    if (ctxt->checkIndex == 0)
10866
        cur = ctxt->input->cur + 1;
10867
    else
10868
        cur = ctxt->input->cur + ctxt->checkIndex;
10869
10870
    while (cur < end) {
10871
        if (state) {
10872
            if (*cur == state)
10873
                state = 0;
10874
        } else if (*cur == '\'' || *cur == '"') {
10875
            state = *cur;
10876
        } else if (*cur == '>') {
10877
            ctxt->checkIndex = 0;
10878
            ctxt->endCheckState = 0;
10879
            return(1);
10880
        }
10881
        cur++;
10882
    }
10883
10884
    index = cur - ctxt->input->cur;
10885
    if (index > LONG_MAX) {
10886
        ctxt->checkIndex = 0;
10887
        ctxt->endCheckState = 0;
10888
        return(1);
10889
    }
10890
    ctxt->checkIndex = index;
10891
    ctxt->endCheckState = state;
10892
    return(0);
10893
}
10894
10895
/**
10896
 * xmlParseLookupInternalSubset:
10897
 * @ctxt:  an XML parser context
10898
 *
10899
 * Check whether there's enough data in the input buffer to finish parsing
10900
 * the internal subset.
10901
 */
10902
static int
10903
xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt) {
10904
    /*
10905
     * Sorry, but progressive parsing of the internal subset is not
10906
     * supported. We first check that the full content of the internal
10907
     * subset is available and parsing is launched only at that point.
10908
     * Internal subset ends with "']' S? '>'" in an unescaped section and
10909
     * not in a ']]>' sequence which are conditional sections.
10910
     */
10911
    const xmlChar *cur, *start;
10912
    const xmlChar *end = ctxt->input->end;
10913
    int state = ctxt->endCheckState;
10914
    size_t index;
10915
10916
    if (ctxt->checkIndex == 0) {
10917
        cur = ctxt->input->cur + 1;
10918
    } else {
10919
        cur = ctxt->input->cur + ctxt->checkIndex;
10920
    }
10921
    start = cur;
10922
10923
    while (cur < end) {
10924
        if (state == '-') {
10925
            if ((*cur == '-') &&
10926
                (cur[1] == '-') &&
10927
                (cur[2] == '>')) {
10928
                state = 0;
10929
                cur += 3;
10930
                start = cur;
10931
                continue;
10932
            }
10933
        }
10934
        else if (state == ']') {
10935
            if (*cur == '>') {
10936
                ctxt->checkIndex = 0;
10937
                ctxt->endCheckState = 0;
10938
                return(1);
10939
            }
10940
            if (IS_BLANK_CH(*cur)) {
10941
                state = ' ';
10942
            } else if (*cur != ']') {
10943
                state = 0;
10944
                start = cur;
10945
                continue;
10946
            }
10947
        }
10948
        else if (state == ' ') {
10949
            if (*cur == '>') {
10950
                ctxt->checkIndex = 0;
10951
                ctxt->endCheckState = 0;
10952
                return(1);
10953
            }
10954
            if (!IS_BLANK_CH(*cur)) {
10955
                state = 0;
10956
                start = cur;
10957
                continue;
10958
            }
10959
        }
10960
        else if (state != 0) {
10961
            if (*cur == state) {
10962
                state = 0;
10963
                start = cur + 1;
10964
            }
10965
        }
10966
        else if (*cur == '<') {
10967
            if ((cur[1] == '!') &&
10968
                (cur[2] == '-') &&
10969
                (cur[3] == '-')) {
10970
                state = '-';
10971
                cur += 4;
10972
                /* Don't treat <!--> as comment */
10973
                start = cur;
10974
                continue;
10975
            }
10976
        }
10977
        else if ((*cur == '"') || (*cur == '\'') || (*cur == ']')) {
10978
            state = *cur;
10979
        }
10980
10981
        cur++;
10982
    }
10983
10984
    /*
10985
     * Rescan the three last characters to detect "<!--" and "-->"
10986
     * split across chunks.
10987
     */
10988
    if ((state == 0) || (state == '-')) {
10989
        if (cur - start < 3)
10990
            cur = start;
10991
        else
10992
            cur -= 3;
10993
    }
10994
    index = cur - ctxt->input->cur;
10995
    if (index > LONG_MAX) {
10996
        ctxt->checkIndex = 0;
10997
        ctxt->endCheckState = 0;
10998
        return(1);
10999
    }
11000
    ctxt->checkIndex = index;
11001
    ctxt->endCheckState = state;
11002
    return(0);
11003
}
11004
11005
/**
11006
 * xmlCheckCdataPush:
11007
 * @cur: pointer to the block of characters
11008
 * @len: length of the block in bytes
11009
 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
11010
 *
11011
 * Check that the block of characters is okay as SCdata content [20]
11012
 *
11013
 * Returns the number of bytes to pass if okay, a negative index where an
11014
 *         UTF-8 error occurred otherwise
11015
 */
11016
static int
11017
xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
11018
    int ix;
11019
    unsigned char c;
11020
    int codepoint;
11021
11022
    if ((utf == NULL) || (len <= 0))
11023
        return(0);
11024
11025
    for (ix = 0; ix < len;) {      /* string is 0-terminated */
11026
        c = utf[ix];
11027
        if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11028
      if (c >= 0x20)
11029
    ix++;
11030
      else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11031
          ix++;
11032
      else
11033
          return(-ix);
11034
  } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11035
      if (ix + 2 > len) return(complete ? -ix : ix);
11036
      if ((utf[ix+1] & 0xc0 ) != 0x80)
11037
          return(-ix);
11038
      codepoint = (utf[ix] & 0x1f) << 6;
11039
      codepoint |= utf[ix+1] & 0x3f;
11040
      if (!xmlIsCharQ(codepoint))
11041
          return(-ix);
11042
      ix += 2;
11043
  } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11044
      if (ix + 3 > len) return(complete ? -ix : ix);
11045
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11046
          ((utf[ix+2] & 0xc0) != 0x80))
11047
        return(-ix);
11048
      codepoint = (utf[ix] & 0xf) << 12;
11049
      codepoint |= (utf[ix+1] & 0x3f) << 6;
11050
      codepoint |= utf[ix+2] & 0x3f;
11051
      if (!xmlIsCharQ(codepoint))
11052
          return(-ix);
11053
      ix += 3;
11054
  } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11055
      if (ix + 4 > len) return(complete ? -ix : ix);
11056
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11057
          ((utf[ix+2] & 0xc0) != 0x80) ||
11058
    ((utf[ix+3] & 0xc0) != 0x80))
11059
        return(-ix);
11060
      codepoint = (utf[ix] & 0x7) << 18;
11061
      codepoint |= (utf[ix+1] & 0x3f) << 12;
11062
      codepoint |= (utf[ix+2] & 0x3f) << 6;
11063
      codepoint |= utf[ix+3] & 0x3f;
11064
      if (!xmlIsCharQ(codepoint))
11065
          return(-ix);
11066
      ix += 4;
11067
  } else        /* unknown encoding */
11068
      return(-ix);
11069
      }
11070
      return(ix);
11071
}
11072
11073
/**
11074
 * xmlParseTryOrFinish:
11075
 * @ctxt:  an XML parser context
11076
 * @terminate:  last chunk indicator
11077
 *
11078
 * Try to progress on parsing
11079
 *
11080
 * Returns zero if no parsing was possible
11081
 */
11082
static int
11083
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11084
    int ret = 0;
11085
    int tlen;
11086
    size_t avail;
11087
    xmlChar cur, next;
11088
11089
    if (ctxt->input == NULL)
11090
        return(0);
11091
11092
#ifdef DEBUG_PUSH
11093
    switch (ctxt->instate) {
11094
  case XML_PARSER_EOF:
11095
      xmlGenericError(xmlGenericErrorContext,
11096
        "PP: try EOF\n"); break;
11097
  case XML_PARSER_START:
11098
      xmlGenericError(xmlGenericErrorContext,
11099
        "PP: try START\n"); break;
11100
  case XML_PARSER_MISC:
11101
      xmlGenericError(xmlGenericErrorContext,
11102
        "PP: try MISC\n");break;
11103
  case XML_PARSER_COMMENT:
11104
      xmlGenericError(xmlGenericErrorContext,
11105
        "PP: try COMMENT\n");break;
11106
  case XML_PARSER_PROLOG:
11107
      xmlGenericError(xmlGenericErrorContext,
11108
        "PP: try PROLOG\n");break;
11109
  case XML_PARSER_START_TAG:
11110
      xmlGenericError(xmlGenericErrorContext,
11111
        "PP: try START_TAG\n");break;
11112
  case XML_PARSER_CONTENT:
11113
      xmlGenericError(xmlGenericErrorContext,
11114
        "PP: try CONTENT\n");break;
11115
  case XML_PARSER_CDATA_SECTION:
11116
      xmlGenericError(xmlGenericErrorContext,
11117
        "PP: try CDATA_SECTION\n");break;
11118
  case XML_PARSER_END_TAG:
11119
      xmlGenericError(xmlGenericErrorContext,
11120
        "PP: try END_TAG\n");break;
11121
  case XML_PARSER_ENTITY_DECL:
11122
      xmlGenericError(xmlGenericErrorContext,
11123
        "PP: try ENTITY_DECL\n");break;
11124
  case XML_PARSER_ENTITY_VALUE:
11125
      xmlGenericError(xmlGenericErrorContext,
11126
        "PP: try ENTITY_VALUE\n");break;
11127
  case XML_PARSER_ATTRIBUTE_VALUE:
11128
      xmlGenericError(xmlGenericErrorContext,
11129
        "PP: try ATTRIBUTE_VALUE\n");break;
11130
  case XML_PARSER_DTD:
11131
      xmlGenericError(xmlGenericErrorContext,
11132
        "PP: try DTD\n");break;
11133
  case XML_PARSER_EPILOG:
11134
      xmlGenericError(xmlGenericErrorContext,
11135
        "PP: try EPILOG\n");break;
11136
  case XML_PARSER_PI:
11137
      xmlGenericError(xmlGenericErrorContext,
11138
        "PP: try PI\n");break;
11139
        case XML_PARSER_IGNORE:
11140
            xmlGenericError(xmlGenericErrorContext,
11141
        "PP: try IGNORE\n");break;
11142
    }
11143
#endif
11144
11145
    if ((ctxt->input != NULL) &&
11146
        (ctxt->input->cur - ctxt->input->base > 4096)) {
11147
        xmlParserShrink(ctxt);
11148
    }
11149
11150
    while (ctxt->instate != XML_PARSER_EOF) {
11151
  if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11152
      return(0);
11153
11154
  if (ctxt->input == NULL) break;
11155
  if (ctxt->input->buf != NULL) {
11156
      /*
11157
       * If we are operating on converted input, try to flush
11158
       * remaining chars to avoid them stalling in the non-converted
11159
       * buffer.
11160
       */
11161
      if ((ctxt->input->buf->raw != NULL) &&
11162
    (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11163
                size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11164
                                                 ctxt->input);
11165
    size_t current = ctxt->input->cur - ctxt->input->base;
11166
                int res;
11167
11168
    res = xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11169
                xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11170
                                      base, current);
11171
                if (res < 0) {
11172
                    xmlFatalErr(ctxt, ctxt->input->buf->error, NULL);
11173
                    xmlHaltParser(ctxt);
11174
                    return(0);
11175
                }
11176
      }
11177
  }
11178
        avail = ctxt->input->end - ctxt->input->cur;
11179
        if (avail < 1)
11180
      goto done;
11181
        switch (ctxt->instate) {
11182
            case XML_PARSER_EOF:
11183
          /*
11184
     * Document parsing is done !
11185
     */
11186
          goto done;
11187
            case XML_PARSER_START:
11188
    if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11189
        xmlChar start[4];
11190
        xmlCharEncoding enc;
11191
11192
        /*
11193
         * Very first chars read from the document flow.
11194
         */
11195
        if (avail < 4)
11196
      goto done;
11197
11198
        /*
11199
         * Get the 4 first bytes and decode the charset
11200
         * if enc != XML_CHAR_ENCODING_NONE
11201
         * plug some encoding conversion routines,
11202
         * else xmlSwitchEncoding will set to (default)
11203
         * UTF8.
11204
         */
11205
        start[0] = RAW;
11206
        start[1] = NXT(1);
11207
        start[2] = NXT(2);
11208
        start[3] = NXT(3);
11209
        enc = xmlDetectCharEncoding(start, 4);
11210
                    /*
11211
                     * We need more bytes to detect EBCDIC code pages.
11212
                     * See xmlDetectEBCDIC.
11213
                     */
11214
                    if ((enc == XML_CHAR_ENCODING_EBCDIC) &&
11215
                        (!terminate) && (avail < 200))
11216
                        goto done;
11217
        xmlSwitchEncoding(ctxt, enc);
11218
        break;
11219
    }
11220
11221
    if (avail < 2)
11222
        goto done;
11223
    cur = ctxt->input->cur[0];
11224
    next = ctxt->input->cur[1];
11225
    if (cur == 0) {
11226
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11227
      ctxt->sax->setDocumentLocator(ctxt->userData,
11228
                  &xmlDefaultSAXLocator);
11229
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11230
        xmlHaltParser(ctxt);
11231
#ifdef DEBUG_PUSH
11232
        xmlGenericError(xmlGenericErrorContext,
11233
          "PP: entering EOF\n");
11234
#endif
11235
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11236
      ctxt->sax->endDocument(ctxt->userData);
11237
        goto done;
11238
    }
11239
          if ((cur == '<') && (next == '?')) {
11240
        /* PI or XML decl */
11241
        if (avail < 5) goto done;
11242
        if ((!terminate) &&
11243
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11244
      goto done;
11245
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11246
      ctxt->sax->setDocumentLocator(ctxt->userData,
11247
                  &xmlDefaultSAXLocator);
11248
        if ((ctxt->input->cur[2] == 'x') &&
11249
      (ctxt->input->cur[3] == 'm') &&
11250
      (ctxt->input->cur[4] == 'l') &&
11251
      (IS_BLANK_CH(ctxt->input->cur[5]))) {
11252
      ret += 5;
11253
#ifdef DEBUG_PUSH
11254
      xmlGenericError(xmlGenericErrorContext,
11255
        "PP: Parsing XML Decl\n");
11256
#endif
11257
      xmlParseXMLDecl(ctxt);
11258
      if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11259
          /*
11260
           * The XML REC instructs us to stop parsing right
11261
           * here
11262
           */
11263
          xmlHaltParser(ctxt);
11264
          return(0);
11265
      }
11266
      ctxt->standalone = ctxt->input->standalone;
11267
      if ((ctxt->encoding == NULL) &&
11268
          (ctxt->input->encoding != NULL))
11269
          ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11270
      if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11271
          (!ctxt->disableSAX))
11272
          ctxt->sax->startDocument(ctxt->userData);
11273
      ctxt->instate = XML_PARSER_MISC;
11274
#ifdef DEBUG_PUSH
11275
      xmlGenericError(xmlGenericErrorContext,
11276
        "PP: entering MISC\n");
11277
#endif
11278
        } else {
11279
      ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11280
      if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11281
          (!ctxt->disableSAX))
11282
          ctxt->sax->startDocument(ctxt->userData);
11283
      ctxt->instate = XML_PARSER_MISC;
11284
#ifdef DEBUG_PUSH
11285
      xmlGenericError(xmlGenericErrorContext,
11286
        "PP: entering MISC\n");
11287
#endif
11288
        }
11289
    } else {
11290
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11291
      ctxt->sax->setDocumentLocator(ctxt->userData,
11292
                  &xmlDefaultSAXLocator);
11293
        ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11294
        if (ctxt->version == NULL) {
11295
            xmlErrMemory(ctxt, NULL);
11296
      break;
11297
        }
11298
        if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11299
            (!ctxt->disableSAX))
11300
      ctxt->sax->startDocument(ctxt->userData);
11301
        ctxt->instate = XML_PARSER_MISC;
11302
#ifdef DEBUG_PUSH
11303
        xmlGenericError(xmlGenericErrorContext,
11304
          "PP: entering MISC\n");
11305
#endif
11306
    }
11307
    break;
11308
            case XML_PARSER_START_TAG: {
11309
          const xmlChar *name;
11310
    const xmlChar *prefix = NULL;
11311
    const xmlChar *URI = NULL;
11312
                int line = ctxt->input->line;
11313
    int nsNr = ctxt->nsNr;
11314
11315
    if ((avail < 2) && (ctxt->inputNr == 1))
11316
        goto done;
11317
    cur = ctxt->input->cur[0];
11318
          if (cur != '<') {
11319
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11320
        xmlHaltParser(ctxt);
11321
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11322
      ctxt->sax->endDocument(ctxt->userData);
11323
        goto done;
11324
    }
11325
    if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11326
                    goto done;
11327
    if (ctxt->spaceNr == 0)
11328
        spacePush(ctxt, -1);
11329
    else if (*ctxt->space == -2)
11330
        spacePush(ctxt, -1);
11331
    else
11332
        spacePush(ctxt, *ctxt->space);
11333
#ifdef LIBXML_SAX1_ENABLED
11334
    if (ctxt->sax2)
11335
#endif /* LIBXML_SAX1_ENABLED */
11336
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11337
#ifdef LIBXML_SAX1_ENABLED
11338
    else
11339
        name = xmlParseStartTag(ctxt);
11340
#endif /* LIBXML_SAX1_ENABLED */
11341
    if (ctxt->instate == XML_PARSER_EOF)
11342
        goto done;
11343
    if (name == NULL) {
11344
        spacePop(ctxt);
11345
        xmlHaltParser(ctxt);
11346
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11347
      ctxt->sax->endDocument(ctxt->userData);
11348
        goto done;
11349
    }
11350
#ifdef LIBXML_VALID_ENABLED
11351
    /*
11352
     * [ VC: Root Element Type ]
11353
     * The Name in the document type declaration must match
11354
     * the element type of the root element.
11355
     */
11356
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11357
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
11358
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11359
#endif /* LIBXML_VALID_ENABLED */
11360
11361
    /*
11362
     * Check for an Empty Element.
11363
     */
11364
    if ((RAW == '/') && (NXT(1) == '>')) {
11365
        SKIP(2);
11366
11367
        if (ctxt->sax2) {
11368
      if ((ctxt->sax != NULL) &&
11369
          (ctxt->sax->endElementNs != NULL) &&
11370
          (!ctxt->disableSAX))
11371
          ctxt->sax->endElementNs(ctxt->userData, name,
11372
                                  prefix, URI);
11373
      if (ctxt->nsNr - nsNr > 0)
11374
          nsPop(ctxt, ctxt->nsNr - nsNr);
11375
#ifdef LIBXML_SAX1_ENABLED
11376
        } else {
11377
      if ((ctxt->sax != NULL) &&
11378
          (ctxt->sax->endElement != NULL) &&
11379
          (!ctxt->disableSAX))
11380
          ctxt->sax->endElement(ctxt->userData, name);
11381
#endif /* LIBXML_SAX1_ENABLED */
11382
        }
11383
        if (ctxt->instate == XML_PARSER_EOF)
11384
      goto done;
11385
        spacePop(ctxt);
11386
        if (ctxt->nameNr == 0) {
11387
      ctxt->instate = XML_PARSER_EPILOG;
11388
        } else {
11389
      ctxt->instate = XML_PARSER_CONTENT;
11390
        }
11391
        break;
11392
    }
11393
    if (RAW == '>') {
11394
        NEXT;
11395
    } else {
11396
        xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11397
           "Couldn't find end of Start Tag %s\n",
11398
           name);
11399
        nodePop(ctxt);
11400
        spacePop(ctxt);
11401
    }
11402
                nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
11403
11404
    ctxt->instate = XML_PARSER_CONTENT;
11405
                break;
11406
      }
11407
            case XML_PARSER_CONTENT: {
11408
    if ((avail < 2) && (ctxt->inputNr == 1))
11409
        goto done;
11410
    cur = ctxt->input->cur[0];
11411
    next = ctxt->input->cur[1];
11412
11413
    if ((cur == '<') && (next == '/')) {
11414
        ctxt->instate = XML_PARSER_END_TAG;
11415
        break;
11416
          } else if ((cur == '<') && (next == '?')) {
11417
        if ((!terminate) &&
11418
            (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11419
      goto done;
11420
        xmlParsePI(ctxt);
11421
        ctxt->instate = XML_PARSER_CONTENT;
11422
    } else if ((cur == '<') && (next != '!')) {
11423
        ctxt->instate = XML_PARSER_START_TAG;
11424
        break;
11425
    } else if ((cur == '<') && (next == '!') &&
11426
               (ctxt->input->cur[2] == '-') &&
11427
         (ctxt->input->cur[3] == '-')) {
11428
        if ((!terminate) &&
11429
            (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11430
      goto done;
11431
        xmlParseComment(ctxt);
11432
        ctxt->instate = XML_PARSER_CONTENT;
11433
    } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11434
        (ctxt->input->cur[2] == '[') &&
11435
        (ctxt->input->cur[3] == 'C') &&
11436
        (ctxt->input->cur[4] == 'D') &&
11437
        (ctxt->input->cur[5] == 'A') &&
11438
        (ctxt->input->cur[6] == 'T') &&
11439
        (ctxt->input->cur[7] == 'A') &&
11440
        (ctxt->input->cur[8] == '[')) {
11441
        SKIP(9);
11442
        ctxt->instate = XML_PARSER_CDATA_SECTION;
11443
        break;
11444
    } else if ((cur == '<') && (next == '!') &&
11445
               (avail < 9)) {
11446
        goto done;
11447
    } else if (cur == '<') {
11448
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11449
                    "detected an error in element content\n");
11450
                    SKIP(1);
11451
    } else if (cur == '&') {
11452
        if ((!terminate) && (!xmlParseLookupChar(ctxt, ';')))
11453
      goto done;
11454
        xmlParseReference(ctxt);
11455
    } else {
11456
        /* TODO Avoid the extra copy, handle directly !!! */
11457
        /*
11458
         * Goal of the following test is:
11459
         *  - minimize calls to the SAX 'character' callback
11460
         *    when they are mergeable
11461
         *  - handle an problem for isBlank when we only parse
11462
         *    a sequence of blank chars and the next one is
11463
         *    not available to check against '<' presence.
11464
         *  - tries to homogenize the differences in SAX
11465
         *    callbacks between the push and pull versions
11466
         *    of the parser.
11467
         */
11468
        if ((ctxt->inputNr == 1) &&
11469
            (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11470
      if ((!terminate) && (!xmlParseLookupCharData(ctxt)))
11471
          goto done;
11472
                    }
11473
                    ctxt->checkIndex = 0;
11474
        xmlParseCharDataInternal(ctxt, !terminate);
11475
    }
11476
    break;
11477
      }
11478
            case XML_PARSER_END_TAG:
11479
    if (avail < 2)
11480
        goto done;
11481
    if ((!terminate) && (!xmlParseLookupChar(ctxt, '>')))
11482
        goto done;
11483
    if (ctxt->sax2) {
11484
              xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11485
        nameNsPop(ctxt);
11486
    }
11487
#ifdef LIBXML_SAX1_ENABLED
11488
      else
11489
        xmlParseEndTag1(ctxt, 0);
11490
#endif /* LIBXML_SAX1_ENABLED */
11491
    if (ctxt->instate == XML_PARSER_EOF) {
11492
        /* Nothing */
11493
    } else if (ctxt->nameNr == 0) {
11494
        ctxt->instate = XML_PARSER_EPILOG;
11495
    } else {
11496
        ctxt->instate = XML_PARSER_CONTENT;
11497
    }
11498
    break;
11499
            case XML_PARSER_CDATA_SECTION: {
11500
          /*
11501
     * The Push mode need to have the SAX callback for
11502
     * cdataBlock merge back contiguous callbacks.
11503
     */
11504
    const xmlChar *term;
11505
11506
                if (terminate) {
11507
                    /*
11508
                     * Don't call xmlParseLookupString. If 'terminate'
11509
                     * is set, checkIndex is invalid.
11510
                     */
11511
                    term = BAD_CAST strstr((const char *) ctxt->input->cur,
11512
                                           "]]>");
11513
                } else {
11514
        term = xmlParseLookupString(ctxt, 0, "]]>", 3);
11515
                }
11516
11517
    if (term == NULL) {
11518
        int tmp, size;
11519
11520
                    if (terminate) {
11521
                        /* Unfinished CDATA section */
11522
                        size = ctxt->input->end - ctxt->input->cur;
11523
                    } else {
11524
                        if (avail < XML_PARSER_BIG_BUFFER_SIZE + 2)
11525
                            goto done;
11526
                        ctxt->checkIndex = 0;
11527
                        /* XXX: Why don't we pass the full buffer? */
11528
                        size = XML_PARSER_BIG_BUFFER_SIZE;
11529
                    }
11530
                    tmp = xmlCheckCdataPush(ctxt->input->cur, size, 0);
11531
                    if (tmp <= 0) {
11532
                        tmp = -tmp;
11533
                        ctxt->input->cur += tmp;
11534
                        goto encoding_error;
11535
                    }
11536
                    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11537
                        if (ctxt->sax->cdataBlock != NULL)
11538
                            ctxt->sax->cdataBlock(ctxt->userData,
11539
                                                  ctxt->input->cur, tmp);
11540
                        else if (ctxt->sax->characters != NULL)
11541
                            ctxt->sax->characters(ctxt->userData,
11542
                                                  ctxt->input->cur, tmp);
11543
                    }
11544
                    if (ctxt->instate == XML_PARSER_EOF)
11545
                        goto done;
11546
                    SKIPL(tmp);
11547
    } else {
11548
                    int base = term - CUR_PTR;
11549
        int tmp;
11550
11551
        tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11552
        if ((tmp < 0) || (tmp != base)) {
11553
      tmp = -tmp;
11554
      ctxt->input->cur += tmp;
11555
      goto encoding_error;
11556
        }
11557
        if ((ctxt->sax != NULL) && (base == 0) &&
11558
            (ctxt->sax->cdataBlock != NULL) &&
11559
            (!ctxt->disableSAX)) {
11560
      /*
11561
       * Special case to provide identical behaviour
11562
       * between pull and push parsers on enpty CDATA
11563
       * sections
11564
       */
11565
       if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11566
           (!strncmp((const char *)&ctxt->input->cur[-9],
11567
                     "<![CDATA[", 9)))
11568
           ctxt->sax->cdataBlock(ctxt->userData,
11569
                                 BAD_CAST "", 0);
11570
        } else if ((ctxt->sax != NULL) && (base > 0) &&
11571
      (!ctxt->disableSAX)) {
11572
      if (ctxt->sax->cdataBlock != NULL)
11573
          ctxt->sax->cdataBlock(ctxt->userData,
11574
              ctxt->input->cur, base);
11575
      else if (ctxt->sax->characters != NULL)
11576
          ctxt->sax->characters(ctxt->userData,
11577
              ctxt->input->cur, base);
11578
        }
11579
        if (ctxt->instate == XML_PARSER_EOF)
11580
      goto done;
11581
        SKIPL(base + 3);
11582
        ctxt->instate = XML_PARSER_CONTENT;
11583
#ifdef DEBUG_PUSH
11584
        xmlGenericError(xmlGenericErrorContext,
11585
          "PP: entering CONTENT\n");
11586
#endif
11587
    }
11588
    break;
11589
      }
11590
            case XML_PARSER_MISC:
11591
            case XML_PARSER_PROLOG:
11592
            case XML_PARSER_EPILOG:
11593
    SKIP_BLANKS;
11594
                avail = ctxt->input->end - ctxt->input->cur;
11595
    if (avail < 2)
11596
        goto done;
11597
    cur = ctxt->input->cur[0];
11598
    next = ctxt->input->cur[1];
11599
          if ((cur == '<') && (next == '?')) {
11600
        if ((!terminate) &&
11601
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11602
      goto done;
11603
#ifdef DEBUG_PUSH
11604
        xmlGenericError(xmlGenericErrorContext,
11605
          "PP: Parsing PI\n");
11606
#endif
11607
        xmlParsePI(ctxt);
11608
        if (ctxt->instate == XML_PARSER_EOF)
11609
      goto done;
11610
    } else if ((cur == '<') && (next == '!') &&
11611
        (ctxt->input->cur[2] == '-') &&
11612
        (ctxt->input->cur[3] == '-')) {
11613
        if ((!terminate) &&
11614
                        (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11615
      goto done;
11616
#ifdef DEBUG_PUSH
11617
        xmlGenericError(xmlGenericErrorContext,
11618
          "PP: Parsing Comment\n");
11619
#endif
11620
        xmlParseComment(ctxt);
11621
        if (ctxt->instate == XML_PARSER_EOF)
11622
      goto done;
11623
    } else if ((ctxt->instate == XML_PARSER_MISC) &&
11624
                    (cur == '<') && (next == '!') &&
11625
        (ctxt->input->cur[2] == 'D') &&
11626
        (ctxt->input->cur[3] == 'O') &&
11627
        (ctxt->input->cur[4] == 'C') &&
11628
        (ctxt->input->cur[5] == 'T') &&
11629
        (ctxt->input->cur[6] == 'Y') &&
11630
        (ctxt->input->cur[7] == 'P') &&
11631
        (ctxt->input->cur[8] == 'E')) {
11632
        if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11633
                        goto done;
11634
#ifdef DEBUG_PUSH
11635
        xmlGenericError(xmlGenericErrorContext,
11636
          "PP: Parsing internal subset\n");
11637
#endif
11638
        ctxt->inSubset = 1;
11639
        xmlParseDocTypeDecl(ctxt);
11640
        if (ctxt->instate == XML_PARSER_EOF)
11641
      goto done;
11642
        if (RAW == '[') {
11643
      ctxt->instate = XML_PARSER_DTD;
11644
#ifdef DEBUG_PUSH
11645
      xmlGenericError(xmlGenericErrorContext,
11646
        "PP: entering DTD\n");
11647
#endif
11648
        } else {
11649
      /*
11650
       * Create and update the external subset.
11651
       */
11652
      ctxt->inSubset = 2;
11653
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11654
          (ctxt->sax->externalSubset != NULL))
11655
          ctxt->sax->externalSubset(ctxt->userData,
11656
            ctxt->intSubName, ctxt->extSubSystem,
11657
            ctxt->extSubURI);
11658
      ctxt->inSubset = 0;
11659
      xmlCleanSpecialAttr(ctxt);
11660
      ctxt->instate = XML_PARSER_PROLOG;
11661
#ifdef DEBUG_PUSH
11662
      xmlGenericError(xmlGenericErrorContext,
11663
        "PP: entering PROLOG\n");
11664
#endif
11665
        }
11666
    } else if ((cur == '<') && (next == '!') &&
11667
               (avail <
11668
                            (ctxt->instate == XML_PARSER_MISC ? 9 : 4))) {
11669
        goto done;
11670
    } else if (ctxt->instate == XML_PARSER_EPILOG) {
11671
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11672
        xmlHaltParser(ctxt);
11673
#ifdef DEBUG_PUSH
11674
        xmlGenericError(xmlGenericErrorContext,
11675
          "PP: entering EOF\n");
11676
#endif
11677
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11678
      ctxt->sax->endDocument(ctxt->userData);
11679
        goto done;
11680
                } else {
11681
        ctxt->instate = XML_PARSER_START_TAG;
11682
#ifdef DEBUG_PUSH
11683
        xmlGenericError(xmlGenericErrorContext,
11684
          "PP: entering START_TAG\n");
11685
#endif
11686
    }
11687
    break;
11688
            case XML_PARSER_DTD: {
11689
                if ((!terminate) && (!xmlParseLookupInternalSubset(ctxt)))
11690
                    goto done;
11691
    xmlParseInternalSubset(ctxt);
11692
    if (ctxt->instate == XML_PARSER_EOF)
11693
        goto done;
11694
    ctxt->inSubset = 2;
11695
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11696
        (ctxt->sax->externalSubset != NULL))
11697
        ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11698
          ctxt->extSubSystem, ctxt->extSubURI);
11699
    ctxt->inSubset = 0;
11700
    xmlCleanSpecialAttr(ctxt);
11701
    if (ctxt->instate == XML_PARSER_EOF)
11702
        goto done;
11703
    ctxt->instate = XML_PARSER_PROLOG;
11704
#ifdef DEBUG_PUSH
11705
    xmlGenericError(xmlGenericErrorContext,
11706
      "PP: entering PROLOG\n");
11707
#endif
11708
                break;
11709
      }
11710
            case XML_PARSER_COMMENT:
11711
    xmlGenericError(xmlGenericErrorContext,
11712
      "PP: internal error, state == COMMENT\n");
11713
    ctxt->instate = XML_PARSER_CONTENT;
11714
#ifdef DEBUG_PUSH
11715
    xmlGenericError(xmlGenericErrorContext,
11716
      "PP: entering CONTENT\n");
11717
#endif
11718
    break;
11719
            case XML_PARSER_IGNORE:
11720
    xmlGenericError(xmlGenericErrorContext,
11721
      "PP: internal error, state == IGNORE");
11722
          ctxt->instate = XML_PARSER_DTD;
11723
#ifdef DEBUG_PUSH
11724
    xmlGenericError(xmlGenericErrorContext,
11725
      "PP: entering DTD\n");
11726
#endif
11727
          break;
11728
            case XML_PARSER_PI:
11729
    xmlGenericError(xmlGenericErrorContext,
11730
      "PP: internal error, state == PI\n");
11731
    ctxt->instate = XML_PARSER_CONTENT;
11732
#ifdef DEBUG_PUSH
11733
    xmlGenericError(xmlGenericErrorContext,
11734
      "PP: entering CONTENT\n");
11735
#endif
11736
    break;
11737
            case XML_PARSER_ENTITY_DECL:
11738
    xmlGenericError(xmlGenericErrorContext,
11739
      "PP: internal error, state == ENTITY_DECL\n");
11740
    ctxt->instate = XML_PARSER_DTD;
11741
#ifdef DEBUG_PUSH
11742
    xmlGenericError(xmlGenericErrorContext,
11743
      "PP: entering DTD\n");
11744
#endif
11745
    break;
11746
            case XML_PARSER_ENTITY_VALUE:
11747
    xmlGenericError(xmlGenericErrorContext,
11748
      "PP: internal error, state == ENTITY_VALUE\n");
11749
    ctxt->instate = XML_PARSER_CONTENT;
11750
#ifdef DEBUG_PUSH
11751
    xmlGenericError(xmlGenericErrorContext,
11752
      "PP: entering DTD\n");
11753
#endif
11754
    break;
11755
            case XML_PARSER_ATTRIBUTE_VALUE:
11756
    xmlGenericError(xmlGenericErrorContext,
11757
      "PP: internal error, state == ATTRIBUTE_VALUE\n");
11758
    ctxt->instate = XML_PARSER_START_TAG;
11759
#ifdef DEBUG_PUSH
11760
    xmlGenericError(xmlGenericErrorContext,
11761
      "PP: entering START_TAG\n");
11762
#endif
11763
    break;
11764
            case XML_PARSER_SYSTEM_LITERAL:
11765
    xmlGenericError(xmlGenericErrorContext,
11766
      "PP: internal error, state == SYSTEM_LITERAL\n");
11767
    ctxt->instate = XML_PARSER_START_TAG;
11768
#ifdef DEBUG_PUSH
11769
    xmlGenericError(xmlGenericErrorContext,
11770
      "PP: entering START_TAG\n");
11771
#endif
11772
    break;
11773
            case XML_PARSER_PUBLIC_LITERAL:
11774
    xmlGenericError(xmlGenericErrorContext,
11775
      "PP: internal error, state == PUBLIC_LITERAL\n");
11776
    ctxt->instate = XML_PARSER_START_TAG;
11777
#ifdef DEBUG_PUSH
11778
    xmlGenericError(xmlGenericErrorContext,
11779
      "PP: entering START_TAG\n");
11780
#endif
11781
    break;
11782
  }
11783
    }
11784
done:
11785
#ifdef DEBUG_PUSH
11786
    xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
11787
#endif
11788
    return(ret);
11789
encoding_error:
11790
    if (ctxt->input->end - ctxt->input->cur < 4) {
11791
  __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
11792
         "Input is not proper UTF-8, indicate encoding !\n",
11793
         NULL, NULL);
11794
    } else {
11795
        char buffer[150];
11796
11797
  snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
11798
      ctxt->input->cur[0], ctxt->input->cur[1],
11799
      ctxt->input->cur[2], ctxt->input->cur[3]);
11800
  __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
11801
         "Input is not proper UTF-8, indicate encoding !\n%s",
11802
         BAD_CAST buffer, NULL);
11803
    }
11804
    return(0);
11805
}
11806
11807
/**
11808
 * xmlParseChunk:
11809
 * @ctxt:  an XML parser context
11810
 * @chunk:  an char array
11811
 * @size:  the size in byte of the chunk
11812
 * @terminate:  last chunk indicator
11813
 *
11814
 * Parse a Chunk of memory
11815
 *
11816
 * Returns zero if no error, the xmlParserErrors otherwise.
11817
 */
11818
int
11819
xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
11820
              int terminate) {
11821
    int end_in_lf = 0;
11822
11823
    if (ctxt == NULL)
11824
        return(XML_ERR_INTERNAL_ERROR);
11825
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11826
        return(ctxt->errNo);
11827
    if (ctxt->instate == XML_PARSER_EOF)
11828
        return(-1);
11829
    if (ctxt->input == NULL)
11830
        return(-1);
11831
11832
    ctxt->progressive = 1;
11833
    if (ctxt->instate == XML_PARSER_START)
11834
        xmlDetectSAX2(ctxt);
11835
    if ((size > 0) && (chunk != NULL) && (!terminate) &&
11836
        (chunk[size - 1] == '\r')) {
11837
  end_in_lf = 1;
11838
  size--;
11839
    }
11840
11841
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
11842
        (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF))  {
11843
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
11844
  size_t cur = ctxt->input->cur - ctxt->input->base;
11845
  int res;
11846
11847
  res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11848
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
11849
  if (res < 0) {
11850
            xmlFatalErr(ctxt, ctxt->input->buf->error, NULL);
11851
      xmlHaltParser(ctxt);
11852
      return(ctxt->errNo);
11853
  }
11854
#ifdef DEBUG_PUSH
11855
  xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11856
#endif
11857
11858
    } else if (ctxt->instate != XML_PARSER_EOF) {
11859
  if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
11860
      xmlParserInputBufferPtr in = ctxt->input->buf;
11861
      if ((in->encoder != NULL) && (in->buffer != NULL) &&
11862
        (in->raw != NULL)) {
11863
    int nbchars;
11864
    size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
11865
    size_t current = ctxt->input->cur - ctxt->input->base;
11866
11867
    nbchars = xmlCharEncInput(in, terminate);
11868
    xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
11869
    if (nbchars < 0) {
11870
              xmlFatalErr(ctxt, in->error, NULL);
11871
                    xmlHaltParser(ctxt);
11872
        return(ctxt->errNo);
11873
    }
11874
      }
11875
  }
11876
    }
11877
11878
    xmlParseTryOrFinish(ctxt, terminate);
11879
    if (ctxt->instate == XML_PARSER_EOF)
11880
        return(ctxt->errNo);
11881
11882
    if ((ctxt->input != NULL) &&
11883
         (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
11884
         ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
11885
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
11886
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
11887
        xmlHaltParser(ctxt);
11888
    }
11889
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11890
        return(ctxt->errNo);
11891
11892
    if ((end_in_lf == 1) && (ctxt->input != NULL) &&
11893
        (ctxt->input->buf != NULL)) {
11894
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11895
           ctxt->input);
11896
  size_t current = ctxt->input->cur - ctxt->input->base;
11897
        int res;
11898
11899
  res = xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
11900
  xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11901
            base, current);
11902
        if (res < 0) {
11903
            xmlFatalErr(ctxt, ctxt->input->buf->error, NULL);
11904
            xmlHaltParser(ctxt);
11905
            return(ctxt->errNo);
11906
        }
11907
    }
11908
    if (terminate) {
11909
  /*
11910
   * Check for termination
11911
   */
11912
  if ((ctxt->instate != XML_PARSER_EOF) &&
11913
      (ctxt->instate != XML_PARSER_EPILOG)) {
11914
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11915
  }
11916
  if ((ctxt->instate == XML_PARSER_EPILOG) &&
11917
            (ctxt->input->cur < ctxt->input->end)) {
11918
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11919
  }
11920
  if (ctxt->instate != XML_PARSER_EOF) {
11921
      if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11922
    ctxt->sax->endDocument(ctxt->userData);
11923
  }
11924
  ctxt->instate = XML_PARSER_EOF;
11925
    }
11926
    if (ctxt->wellFormed == 0)
11927
  return((xmlParserErrors) ctxt->errNo);
11928
    else
11929
        return(0);
11930
}
11931
11932
/************************************************************************
11933
 *                  *
11934
 *    I/O front end functions to the parser     *
11935
 *                  *
11936
 ************************************************************************/
11937
11938
/**
11939
 * xmlCreatePushParserCtxt:
11940
 * @sax:  a SAX handler
11941
 * @user_data:  The user data returned on SAX callbacks
11942
 * @chunk:  a pointer to an array of chars
11943
 * @size:  number of chars in the array
11944
 * @filename:  an optional file name or URI
11945
 *
11946
 * Create a parser context for using the XML parser in push mode.
11947
 * If @buffer and @size are non-NULL, the data is used to detect
11948
 * the encoding.  The remaining characters will be parsed so they
11949
 * don't need to be fed in again through xmlParseChunk.
11950
 * To allow content encoding detection, @size should be >= 4
11951
 * The value of @filename is used for fetching external entities
11952
 * and error/warning reports.
11953
 *
11954
 * Returns the new parser context or NULL
11955
 */
11956
11957
xmlParserCtxtPtr
11958
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11959
                        const char *chunk, int size, const char *filename) {
11960
    xmlParserCtxtPtr ctxt;
11961
    xmlParserInputPtr inputStream;
11962
    xmlParserInputBufferPtr buf;
11963
11964
    buf = xmlAllocParserInputBuffer(XML_CHAR_ENCODING_NONE);
11965
    if (buf == NULL) return(NULL);
11966
11967
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
11968
    if (ctxt == NULL) {
11969
        xmlErrMemory(NULL, "creating parser: out of memory\n");
11970
  xmlFreeParserInputBuffer(buf);
11971
  return(NULL);
11972
    }
11973
    ctxt->dictNames = 1;
11974
    if (filename == NULL) {
11975
  ctxt->directory = NULL;
11976
    } else {
11977
        ctxt->directory = xmlParserGetDirectory(filename);
11978
    }
11979
11980
    inputStream = xmlNewInputStream(ctxt);
11981
    if (inputStream == NULL) {
11982
  xmlFreeParserCtxt(ctxt);
11983
  xmlFreeParserInputBuffer(buf);
11984
  return(NULL);
11985
    }
11986
11987
    if (filename == NULL)
11988
  inputStream->filename = NULL;
11989
    else {
11990
  inputStream->filename = (char *)
11991
      xmlCanonicPath((const xmlChar *) filename);
11992
  if (inputStream->filename == NULL) {
11993
            xmlFreeInputStream(inputStream);
11994
      xmlFreeParserCtxt(ctxt);
11995
      xmlFreeParserInputBuffer(buf);
11996
      return(NULL);
11997
  }
11998
    }
11999
    inputStream->buf = buf;
12000
    xmlBufResetInput(inputStream->buf->buffer, inputStream);
12001
    inputPush(ctxt, inputStream);
12002
12003
    /*
12004
     * If the caller didn't provide an initial 'chunk' for determining
12005
     * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12006
     * that it can be automatically determined later
12007
     */
12008
    ctxt->charset = XML_CHAR_ENCODING_NONE;
12009
12010
    if ((size != 0) && (chunk != NULL) &&
12011
        (ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12012
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12013
  size_t cur = ctxt->input->cur - ctxt->input->base;
12014
        int res;
12015
12016
  res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12017
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12018
        if (res < 0) {
12019
            xmlFatalErr(ctxt, ctxt->input->buf->error, NULL);
12020
            xmlHaltParser(ctxt);
12021
        }
12022
#ifdef DEBUG_PUSH
12023
  xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12024
#endif
12025
    }
12026
12027
    return(ctxt);
12028
}
12029
#endif /* LIBXML_PUSH_ENABLED */
12030
12031
/**
12032
 * xmlStopParser:
12033
 * @ctxt:  an XML parser context
12034
 *
12035
 * Blocks further parser processing
12036
 */
12037
void
12038
10.1M
xmlStopParser(xmlParserCtxtPtr ctxt) {
12039
10.1M
    if (ctxt == NULL)
12040
10.1M
        return;
12041
893
    xmlHaltParser(ctxt);
12042
893
    ctxt->errNo = XML_ERR_USER_STOP;
12043
893
}
12044
12045
/**
12046
 * xmlCreateIOParserCtxt:
12047
 * @sax:  a SAX handler
12048
 * @user_data:  The user data returned on SAX callbacks
12049
 * @ioread:  an I/O read function
12050
 * @ioclose:  an I/O close function
12051
 * @ioctx:  an I/O handler
12052
 * @enc:  the charset encoding if known
12053
 *
12054
 * Create a parser context for using the XML parser with an existing
12055
 * I/O stream
12056
 *
12057
 * Returns the new parser context or NULL
12058
 */
12059
xmlParserCtxtPtr
12060
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12061
  xmlInputReadCallback   ioread, xmlInputCloseCallback  ioclose,
12062
0
  void *ioctx, xmlCharEncoding enc) {
12063
0
    xmlParserCtxtPtr ctxt;
12064
0
    xmlParserInputPtr inputStream;
12065
0
    xmlParserInputBufferPtr buf;
12066
12067
0
    if (ioread == NULL) return(NULL);
12068
12069
0
    buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12070
0
    if (buf == NULL) {
12071
0
        if (ioclose != NULL)
12072
0
            ioclose(ioctx);
12073
0
        return (NULL);
12074
0
    }
12075
12076
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12077
0
    if (ctxt == NULL) {
12078
0
  xmlFreeParserInputBuffer(buf);
12079
0
  return(NULL);
12080
0
    }
12081
12082
0
    inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12083
0
    if (inputStream == NULL) {
12084
0
  xmlFreeParserCtxt(ctxt);
12085
0
  return(NULL);
12086
0
    }
12087
0
    inputPush(ctxt, inputStream);
12088
12089
0
    return(ctxt);
12090
0
}
12091
12092
#ifdef LIBXML_VALID_ENABLED
12093
/************************************************************************
12094
 *                  *
12095
 *    Front ends when parsing a DTD       *
12096
 *                  *
12097
 ************************************************************************/
12098
12099
/**
12100
 * xmlIOParseDTD:
12101
 * @sax:  the SAX handler block or NULL
12102
 * @input:  an Input Buffer
12103
 * @enc:  the charset encoding if known
12104
 *
12105
 * Load and parse a DTD
12106
 *
12107
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12108
 * @input will be freed by the function in any case.
12109
 */
12110
12111
xmlDtdPtr
12112
xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12113
        xmlCharEncoding enc) {
12114
    xmlDtdPtr ret = NULL;
12115
    xmlParserCtxtPtr ctxt;
12116
    xmlParserInputPtr pinput = NULL;
12117
    xmlChar start[4];
12118
12119
    if (input == NULL)
12120
  return(NULL);
12121
12122
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12123
    if (ctxt == NULL) {
12124
        xmlFreeParserInputBuffer(input);
12125
  return(NULL);
12126
    }
12127
12128
    /* We are loading a DTD */
12129
    ctxt->options |= XML_PARSE_DTDLOAD;
12130
12131
    xmlDetectSAX2(ctxt);
12132
12133
    /*
12134
     * generate a parser input from the I/O handler
12135
     */
12136
12137
    pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12138
    if (pinput == NULL) {
12139
        xmlFreeParserInputBuffer(input);
12140
  xmlFreeParserCtxt(ctxt);
12141
  return(NULL);
12142
    }
12143
12144
    /*
12145
     * plug some encoding conversion routines here.
12146
     */
12147
    if (xmlPushInput(ctxt, pinput) < 0) {
12148
  xmlFreeParserCtxt(ctxt);
12149
  return(NULL);
12150
    }
12151
    if (enc != XML_CHAR_ENCODING_NONE) {
12152
        xmlSwitchEncoding(ctxt, enc);
12153
    }
12154
12155
    pinput->filename = NULL;
12156
    pinput->line = 1;
12157
    pinput->col = 1;
12158
    pinput->base = ctxt->input->cur;
12159
    pinput->cur = ctxt->input->cur;
12160
    pinput->free = NULL;
12161
12162
    /*
12163
     * let's parse that entity knowing it's an external subset.
12164
     */
12165
    ctxt->inSubset = 2;
12166
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12167
    if (ctxt->myDoc == NULL) {
12168
  xmlErrMemory(ctxt, "New Doc failed");
12169
  return(NULL);
12170
    }
12171
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12172
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12173
                                 BAD_CAST "none", BAD_CAST "none");
12174
12175
    if ((enc == XML_CHAR_ENCODING_NONE) &&
12176
        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12177
  /*
12178
   * Get the 4 first bytes and decode the charset
12179
   * if enc != XML_CHAR_ENCODING_NONE
12180
   * plug some encoding conversion routines.
12181
   */
12182
  start[0] = RAW;
12183
  start[1] = NXT(1);
12184
  start[2] = NXT(2);
12185
  start[3] = NXT(3);
12186
  enc = xmlDetectCharEncoding(start, 4);
12187
  if (enc != XML_CHAR_ENCODING_NONE) {
12188
      xmlSwitchEncoding(ctxt, enc);
12189
  }
12190
    }
12191
12192
    xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12193
12194
    if (ctxt->myDoc != NULL) {
12195
  if (ctxt->wellFormed) {
12196
      ret = ctxt->myDoc->extSubset;
12197
      ctxt->myDoc->extSubset = NULL;
12198
      if (ret != NULL) {
12199
    xmlNodePtr tmp;
12200
12201
    ret->doc = NULL;
12202
    tmp = ret->children;
12203
    while (tmp != NULL) {
12204
        tmp->doc = NULL;
12205
        tmp = tmp->next;
12206
    }
12207
      }
12208
  } else {
12209
      ret = NULL;
12210
  }
12211
        xmlFreeDoc(ctxt->myDoc);
12212
        ctxt->myDoc = NULL;
12213
    }
12214
    xmlFreeParserCtxt(ctxt);
12215
12216
    return(ret);
12217
}
12218
12219
/**
12220
 * xmlSAXParseDTD:
12221
 * @sax:  the SAX handler block
12222
 * @ExternalID:  a NAME* containing the External ID of the DTD
12223
 * @SystemID:  a NAME* containing the URL to the DTD
12224
 *
12225
 * DEPRECATED: Don't use.
12226
 *
12227
 * Load and parse an external subset.
12228
 *
12229
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12230
 */
12231
12232
xmlDtdPtr
12233
xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12234
                          const xmlChar *SystemID) {
12235
    xmlDtdPtr ret = NULL;
12236
    xmlParserCtxtPtr ctxt;
12237
    xmlParserInputPtr input = NULL;
12238
    xmlCharEncoding enc;
12239
    xmlChar* systemIdCanonic;
12240
12241
    if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12242
12243
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12244
    if (ctxt == NULL) {
12245
  return(NULL);
12246
    }
12247
12248
    /* We are loading a DTD */
12249
    ctxt->options |= XML_PARSE_DTDLOAD;
12250
12251
    /*
12252
     * Canonicalise the system ID
12253
     */
12254
    systemIdCanonic = xmlCanonicPath(SystemID);
12255
    if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12256
  xmlFreeParserCtxt(ctxt);
12257
  return(NULL);
12258
    }
12259
12260
    /*
12261
     * Ask the Entity resolver to load the damn thing
12262
     */
12263
12264
    if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12265
  input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12266
                                   systemIdCanonic);
12267
    if (input == NULL) {
12268
  xmlFreeParserCtxt(ctxt);
12269
  if (systemIdCanonic != NULL)
12270
      xmlFree(systemIdCanonic);
12271
  return(NULL);
12272
    }
12273
12274
    /*
12275
     * plug some encoding conversion routines here.
12276
     */
12277
    if (xmlPushInput(ctxt, input) < 0) {
12278
  xmlFreeParserCtxt(ctxt);
12279
  if (systemIdCanonic != NULL)
12280
      xmlFree(systemIdCanonic);
12281
  return(NULL);
12282
    }
12283
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12284
  enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12285
  xmlSwitchEncoding(ctxt, enc);
12286
    }
12287
12288
    if (input->filename == NULL)
12289
  input->filename = (char *) systemIdCanonic;
12290
    else
12291
  xmlFree(systemIdCanonic);
12292
    input->line = 1;
12293
    input->col = 1;
12294
    input->base = ctxt->input->cur;
12295
    input->cur = ctxt->input->cur;
12296
    input->free = NULL;
12297
12298
    /*
12299
     * let's parse that entity knowing it's an external subset.
12300
     */
12301
    ctxt->inSubset = 2;
12302
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12303
    if (ctxt->myDoc == NULL) {
12304
  xmlErrMemory(ctxt, "New Doc failed");
12305
  xmlFreeParserCtxt(ctxt);
12306
  return(NULL);
12307
    }
12308
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12309
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12310
                                 ExternalID, SystemID);
12311
    xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12312
12313
    if (ctxt->myDoc != NULL) {
12314
  if (ctxt->wellFormed) {
12315
      ret = ctxt->myDoc->extSubset;
12316
      ctxt->myDoc->extSubset = NULL;
12317
      if (ret != NULL) {
12318
    xmlNodePtr tmp;
12319
12320
    ret->doc = NULL;
12321
    tmp = ret->children;
12322
    while (tmp != NULL) {
12323
        tmp->doc = NULL;
12324
        tmp = tmp->next;
12325
    }
12326
      }
12327
  } else {
12328
      ret = NULL;
12329
  }
12330
        xmlFreeDoc(ctxt->myDoc);
12331
        ctxt->myDoc = NULL;
12332
    }
12333
    xmlFreeParserCtxt(ctxt);
12334
12335
    return(ret);
12336
}
12337
12338
12339
/**
12340
 * xmlParseDTD:
12341
 * @ExternalID:  a NAME* containing the External ID of the DTD
12342
 * @SystemID:  a NAME* containing the URL to the DTD
12343
 *
12344
 * Load and parse an external subset.
12345
 *
12346
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12347
 */
12348
12349
xmlDtdPtr
12350
xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12351
    return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12352
}
12353
#endif /* LIBXML_VALID_ENABLED */
12354
12355
/************************************************************************
12356
 *                  *
12357
 *    Front ends when parsing an Entity     *
12358
 *                  *
12359
 ************************************************************************/
12360
12361
/**
12362
 * xmlParseCtxtExternalEntity:
12363
 * @ctx:  the existing parsing context
12364
 * @URL:  the URL for the entity to load
12365
 * @ID:  the System ID for the entity to load
12366
 * @lst:  the return value for the set of parsed nodes
12367
 *
12368
 * Parse an external general entity within an existing parsing context
12369
 * An external general parsed entity is well-formed if it matches the
12370
 * production labeled extParsedEnt.
12371
 *
12372
 * [78] extParsedEnt ::= TextDecl? content
12373
 *
12374
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12375
 *    the parser error code otherwise
12376
 */
12377
12378
int
12379
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12380
0
                 const xmlChar *ID, xmlNodePtr *lst) {
12381
0
    void *userData;
12382
12383
0
    if (ctx == NULL) return(-1);
12384
    /*
12385
     * If the user provided their own SAX callbacks, then reuse the
12386
     * userData callback field, otherwise the expected setup in a
12387
     * DOM builder is to have userData == ctxt
12388
     */
12389
0
    if (ctx->userData == ctx)
12390
0
        userData = NULL;
12391
0
    else
12392
0
        userData = ctx->userData;
12393
0
    return xmlParseExternalEntityPrivate(ctx->myDoc, ctx, ctx->sax,
12394
0
                                         userData, ctx->depth + 1,
12395
0
                                         URL, ID, lst);
12396
0
}
12397
12398
/**
12399
 * xmlParseExternalEntityPrivate:
12400
 * @doc:  the document the chunk pertains to
12401
 * @oldctxt:  the previous parser context if available
12402
 * @sax:  the SAX handler block (possibly NULL)
12403
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12404
 * @depth:  Used for loop detection, use 0
12405
 * @URL:  the URL for the entity to load
12406
 * @ID:  the System ID for the entity to load
12407
 * @list:  the return value for the set of parsed nodes
12408
 *
12409
 * Private version of xmlParseExternalEntity()
12410
 *
12411
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12412
 *    the parser error code otherwise
12413
 */
12414
12415
static xmlParserErrors
12416
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12417
                xmlSAXHandlerPtr sax,
12418
          void *user_data, int depth, const xmlChar *URL,
12419
11.4k
          const xmlChar *ID, xmlNodePtr *list) {
12420
11.4k
    xmlParserCtxtPtr ctxt;
12421
11.4k
    xmlDocPtr newDoc;
12422
11.4k
    xmlNodePtr newRoot;
12423
11.4k
    xmlParserErrors ret = XML_ERR_OK;
12424
11.4k
    xmlChar start[4];
12425
11.4k
    xmlCharEncoding enc;
12426
12427
11.4k
    if (((depth > 40) &&
12428
11.4k
  ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12429
11.4k
  (depth > 100)) {
12430
0
  xmlFatalErrMsg(oldctxt, XML_ERR_ENTITY_LOOP,
12431
0
                       "Maximum entity nesting depth exceeded");
12432
0
        return(XML_ERR_ENTITY_LOOP);
12433
0
    }
12434
12435
11.4k
    if (list != NULL)
12436
5.12k
        *list = NULL;
12437
11.4k
    if ((URL == NULL) && (ID == NULL))
12438
0
  return(XML_ERR_INTERNAL_ERROR);
12439
11.4k
    if (doc == NULL)
12440
0
  return(XML_ERR_INTERNAL_ERROR);
12441
12442
11.4k
    ctxt = xmlCreateEntityParserCtxtInternal(sax, user_data, URL, ID, NULL,
12443
11.4k
                                             oldctxt);
12444
11.4k
    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
12445
5.95k
    if (oldctxt != NULL) {
12446
5.95k
        ctxt->nbErrors = oldctxt->nbErrors;
12447
5.95k
        ctxt->nbWarnings = oldctxt->nbWarnings;
12448
5.95k
    }
12449
5.95k
    xmlDetectSAX2(ctxt);
12450
12451
5.95k
    newDoc = xmlNewDoc(BAD_CAST "1.0");
12452
5.95k
    if (newDoc == NULL) {
12453
5
  xmlFreeParserCtxt(ctxt);
12454
5
  return(XML_ERR_INTERNAL_ERROR);
12455
5
    }
12456
5.95k
    newDoc->properties = XML_DOC_INTERNAL;
12457
5.95k
    if (doc) {
12458
5.95k
        newDoc->intSubset = doc->intSubset;
12459
5.95k
        newDoc->extSubset = doc->extSubset;
12460
5.95k
        if (doc->dict) {
12461
5.95k
            newDoc->dict = doc->dict;
12462
5.95k
            xmlDictReference(newDoc->dict);
12463
5.95k
        }
12464
5.95k
        if (doc->URL != NULL) {
12465
5.95k
            newDoc->URL = xmlStrdup(doc->URL);
12466
5.95k
        }
12467
5.95k
    }
12468
5.95k
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12469
5.95k
    if (newRoot == NULL) {
12470
1
  if (sax != NULL)
12471
1
  xmlFreeParserCtxt(ctxt);
12472
1
  newDoc->intSubset = NULL;
12473
1
  newDoc->extSubset = NULL;
12474
1
        xmlFreeDoc(newDoc);
12475
1
  return(XML_ERR_INTERNAL_ERROR);
12476
1
    }
12477
5.94k
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
12478
5.94k
    nodePush(ctxt, newDoc->children);
12479
5.94k
    if (doc == NULL) {
12480
0
        ctxt->myDoc = newDoc;
12481
5.94k
    } else {
12482
5.94k
        ctxt->myDoc = doc;
12483
5.94k
        newRoot->doc = doc;
12484
5.94k
    }
12485
12486
    /*
12487
     * Get the 4 first bytes and decode the charset
12488
     * if enc != XML_CHAR_ENCODING_NONE
12489
     * plug some encoding conversion routines.
12490
     */
12491
5.94k
    GROW;
12492
5.94k
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12493
5.07k
  start[0] = RAW;
12494
5.07k
  start[1] = NXT(1);
12495
5.07k
  start[2] = NXT(2);
12496
5.07k
  start[3] = NXT(3);
12497
5.07k
  enc = xmlDetectCharEncoding(start, 4);
12498
5.07k
  if (enc != XML_CHAR_ENCODING_NONE) {
12499
2.61k
      xmlSwitchEncoding(ctxt, enc);
12500
2.61k
  }
12501
5.07k
    }
12502
12503
    /*
12504
     * Parse a possible text declaration first
12505
     */
12506
5.94k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12507
2.25k
  xmlParseTextDecl(ctxt);
12508
        /*
12509
         * An XML-1.0 document can't reference an entity not XML-1.0
12510
         */
12511
2.25k
        if ((xmlStrEqual(oldctxt->version, BAD_CAST "1.0")) &&
12512
2.25k
            (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12513
38
            xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12514
38
                           "Version mismatch between document and entity\n");
12515
38
        }
12516
2.25k
    }
12517
12518
5.94k
    ctxt->instate = XML_PARSER_CONTENT;
12519
5.94k
    ctxt->depth = depth;
12520
5.94k
    if (oldctxt != NULL) {
12521
5.94k
  ctxt->_private = oldctxt->_private;
12522
5.94k
  ctxt->loadsubset = oldctxt->loadsubset;
12523
5.94k
  ctxt->validate = oldctxt->validate;
12524
5.94k
  ctxt->valid = oldctxt->valid;
12525
5.94k
  ctxt->replaceEntities = oldctxt->replaceEntities;
12526
5.94k
        if (oldctxt->validate) {
12527
0
            ctxt->vctxt.error = oldctxt->vctxt.error;
12528
0
            ctxt->vctxt.warning = oldctxt->vctxt.warning;
12529
0
            ctxt->vctxt.userData = oldctxt->vctxt.userData;
12530
0
            ctxt->vctxt.flags = oldctxt->vctxt.flags;
12531
0
        }
12532
5.94k
  ctxt->external = oldctxt->external;
12533
5.94k
        if (ctxt->dict) xmlDictFree(ctxt->dict);
12534
5.94k
        ctxt->dict = oldctxt->dict;
12535
5.94k
        ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12536
5.94k
        ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12537
5.94k
        ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12538
5.94k
        ctxt->dictNames = oldctxt->dictNames;
12539
5.94k
        ctxt->attsDefault = oldctxt->attsDefault;
12540
5.94k
        ctxt->attsSpecial = oldctxt->attsSpecial;
12541
5.94k
        ctxt->linenumbers = oldctxt->linenumbers;
12542
5.94k
  ctxt->record_info = oldctxt->record_info;
12543
5.94k
  ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
12544
5.94k
  ctxt->node_seq.length = oldctxt->node_seq.length;
12545
5.94k
  ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
12546
5.94k
    } else {
12547
  /*
12548
   * Doing validity checking on chunk without context
12549
   * doesn't make sense
12550
   */
12551
0
  ctxt->_private = NULL;
12552
0
  ctxt->validate = 0;
12553
0
  ctxt->external = 2;
12554
0
  ctxt->loadsubset = 0;
12555
0
    }
12556
12557
5.94k
    xmlParseContent(ctxt);
12558
12559
5.94k
    if ((RAW == '<') && (NXT(1) == '/')) {
12560
341
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12561
5.60k
    } else if (RAW != 0) {
12562
31
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12563
31
    }
12564
5.94k
    if (ctxt->node != newDoc->children) {
12565
2.22k
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12566
2.22k
    }
12567
12568
5.94k
    if (!ctxt->wellFormed) {
12569
3.98k
  ret = (xmlParserErrors)ctxt->errNo;
12570
3.98k
        if (oldctxt != NULL) {
12571
3.98k
            oldctxt->errNo = ctxt->errNo;
12572
3.98k
            oldctxt->wellFormed = 0;
12573
3.98k
            xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12574
3.98k
        }
12575
3.98k
    } else {
12576
1.96k
  if (list != NULL) {
12577
567
      xmlNodePtr cur;
12578
12579
      /*
12580
       * Return the newly created nodeset after unlinking it from
12581
       * they pseudo parent.
12582
       */
12583
567
      cur = newDoc->children->children;
12584
567
      *list = cur;
12585
1.66k
      while (cur != NULL) {
12586
1.09k
    cur->parent = NULL;
12587
1.09k
    cur = cur->next;
12588
1.09k
      }
12589
567
            newDoc->children->children = NULL;
12590
567
  }
12591
1.96k
  ret = XML_ERR_OK;
12592
1.96k
    }
12593
12594
    /*
12595
     * Also record the size of the entity parsed
12596
     */
12597
5.94k
    if (ctxt->input != NULL && oldctxt != NULL) {
12598
5.94k
        unsigned long consumed = ctxt->input->consumed;
12599
12600
5.94k
        xmlSaturatedAddSizeT(&consumed, ctxt->input->cur - ctxt->input->base);
12601
12602
5.94k
        xmlSaturatedAdd(&oldctxt->sizeentities, consumed);
12603
5.94k
        xmlSaturatedAdd(&oldctxt->sizeentities, ctxt->sizeentities);
12604
12605
5.94k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, consumed);
12606
5.94k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, ctxt->sizeentcopy);
12607
5.94k
    }
12608
12609
5.94k
    if (oldctxt != NULL) {
12610
5.94k
        ctxt->dict = NULL;
12611
5.94k
        ctxt->attsDefault = NULL;
12612
5.94k
        ctxt->attsSpecial = NULL;
12613
5.94k
        oldctxt->nbErrors = ctxt->nbErrors;
12614
5.94k
        oldctxt->nbWarnings = ctxt->nbWarnings;
12615
5.94k
        oldctxt->validate = ctxt->validate;
12616
5.94k
        oldctxt->valid = ctxt->valid;
12617
5.94k
        oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
12618
5.94k
        oldctxt->node_seq.length = ctxt->node_seq.length;
12619
5.94k
        oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
12620
5.94k
    }
12621
5.94k
    ctxt->node_seq.maximum = 0;
12622
5.94k
    ctxt->node_seq.length = 0;
12623
5.94k
    ctxt->node_seq.buffer = NULL;
12624
5.94k
    xmlFreeParserCtxt(ctxt);
12625
5.94k
    newDoc->intSubset = NULL;
12626
5.94k
    newDoc->extSubset = NULL;
12627
5.94k
    xmlFreeDoc(newDoc);
12628
12629
5.94k
    return(ret);
12630
5.95k
}
12631
12632
#ifdef LIBXML_SAX1_ENABLED
12633
/**
12634
 * xmlParseExternalEntity:
12635
 * @doc:  the document the chunk pertains to
12636
 * @sax:  the SAX handler block (possibly NULL)
12637
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12638
 * @depth:  Used for loop detection, use 0
12639
 * @URL:  the URL for the entity to load
12640
 * @ID:  the System ID for the entity to load
12641
 * @lst:  the return value for the set of parsed nodes
12642
 *
12643
 * Parse an external general entity
12644
 * An external general parsed entity is well-formed if it matches the
12645
 * production labeled extParsedEnt.
12646
 *
12647
 * [78] extParsedEnt ::= TextDecl? content
12648
 *
12649
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12650
 *    the parser error code otherwise
12651
 */
12652
12653
int
12654
xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
12655
    int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
12656
    return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
12657
                           ID, lst));
12658
}
12659
12660
/**
12661
 * xmlParseBalancedChunkMemory:
12662
 * @doc:  the document the chunk pertains to (must not be NULL)
12663
 * @sax:  the SAX handler block (possibly NULL)
12664
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12665
 * @depth:  Used for loop detection, use 0
12666
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
12667
 * @lst:  the return value for the set of parsed nodes
12668
 *
12669
 * Parse a well-balanced chunk of an XML document
12670
 * called by the parser
12671
 * The allowed sequence for the Well Balanced Chunk is the one defined by
12672
 * the content production in the XML grammar:
12673
 *
12674
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12675
 *
12676
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12677
 *    the parser error code otherwise
12678
 */
12679
12680
int
12681
xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12682
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
12683
    return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
12684
                                                depth, string, lst, 0 );
12685
}
12686
#endif /* LIBXML_SAX1_ENABLED */
12687
12688
/**
12689
 * xmlParseBalancedChunkMemoryInternal:
12690
 * @oldctxt:  the existing parsing context
12691
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
12692
 * @user_data:  the user data field for the parser context
12693
 * @lst:  the return value for the set of parsed nodes
12694
 *
12695
 *
12696
 * Parse a well-balanced chunk of an XML document
12697
 * called by the parser
12698
 * The allowed sequence for the Well Balanced Chunk is the one defined by
12699
 * the content production in the XML grammar:
12700
 *
12701
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12702
 *
12703
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12704
 * error code otherwise
12705
 *
12706
 * In case recover is set to 1, the nodelist will not be empty even if
12707
 * the parsed chunk is not well balanced.
12708
 */
12709
static xmlParserErrors
12710
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
12711
2.99k
  const xmlChar *string, void *user_data, xmlNodePtr *lst) {
12712
2.99k
    xmlParserCtxtPtr ctxt;
12713
2.99k
    xmlDocPtr newDoc = NULL;
12714
2.99k
    xmlNodePtr newRoot;
12715
2.99k
    xmlSAXHandlerPtr oldsax = NULL;
12716
2.99k
    xmlNodePtr content = NULL;
12717
2.99k
    xmlNodePtr last = NULL;
12718
2.99k
    int size;
12719
2.99k
    xmlParserErrors ret = XML_ERR_OK;
12720
2.99k
#ifdef SAX2
12721
2.99k
    int i;
12722
2.99k
#endif
12723
12724
2.99k
    if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12725
2.99k
        (oldctxt->depth >  100)) {
12726
0
  xmlFatalErrMsg(oldctxt, XML_ERR_ENTITY_LOOP,
12727
0
                       "Maximum entity nesting depth exceeded");
12728
0
  return(XML_ERR_ENTITY_LOOP);
12729
0
    }
12730
12731
12732
2.99k
    if (lst != NULL)
12733
2.96k
        *lst = NULL;
12734
2.99k
    if (string == NULL)
12735
0
        return(XML_ERR_INTERNAL_ERROR);
12736
12737
2.99k
    size = xmlStrlen(string);
12738
12739
2.99k
    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
12740
2.99k
    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
12741
2.94k
    ctxt->nbErrors = oldctxt->nbErrors;
12742
2.94k
    ctxt->nbWarnings = oldctxt->nbWarnings;
12743
2.94k
    if (user_data != NULL)
12744
0
  ctxt->userData = user_data;
12745
2.94k
    else
12746
2.94k
  ctxt->userData = ctxt;
12747
2.94k
    if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12748
2.94k
    ctxt->dict = oldctxt->dict;
12749
2.94k
    ctxt->input_id = oldctxt->input_id;
12750
2.94k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12751
2.94k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12752
2.94k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12753
12754
2.94k
#ifdef SAX2
12755
    /* propagate namespaces down the entity */
12756
5.04k
    for (i = 0;i < oldctxt->nsNr;i += 2) {
12757
2.09k
        nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
12758
2.09k
    }
12759
2.94k
#endif
12760
12761
2.94k
    oldsax = ctxt->sax;
12762
2.94k
    ctxt->sax = oldctxt->sax;
12763
2.94k
    xmlDetectSAX2(ctxt);
12764
2.94k
    ctxt->replaceEntities = oldctxt->replaceEntities;
12765
2.94k
    ctxt->options = oldctxt->options;
12766
12767
2.94k
    ctxt->_private = oldctxt->_private;
12768
2.94k
    if (oldctxt->myDoc == NULL) {
12769
0
  newDoc = xmlNewDoc(BAD_CAST "1.0");
12770
0
  if (newDoc == NULL) {
12771
0
      ctxt->sax = oldsax;
12772
0
      ctxt->dict = NULL;
12773
0
      xmlFreeParserCtxt(ctxt);
12774
0
      return(XML_ERR_INTERNAL_ERROR);
12775
0
  }
12776
0
  newDoc->properties = XML_DOC_INTERNAL;
12777
0
  newDoc->dict = ctxt->dict;
12778
0
  xmlDictReference(newDoc->dict);
12779
0
  ctxt->myDoc = newDoc;
12780
2.94k
    } else {
12781
2.94k
  ctxt->myDoc = oldctxt->myDoc;
12782
2.94k
        content = ctxt->myDoc->children;
12783
2.94k
  last = ctxt->myDoc->last;
12784
2.94k
    }
12785
2.94k
    newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
12786
2.94k
    if (newRoot == NULL) {
12787
8
  ctxt->sax = oldsax;
12788
8
  ctxt->dict = NULL;
12789
8
  xmlFreeParserCtxt(ctxt);
12790
8
  if (newDoc != NULL) {
12791
0
      xmlFreeDoc(newDoc);
12792
0
  }
12793
8
  return(XML_ERR_INTERNAL_ERROR);
12794
8
    }
12795
2.93k
    ctxt->myDoc->children = NULL;
12796
2.93k
    ctxt->myDoc->last = NULL;
12797
2.93k
    xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
12798
2.93k
    nodePush(ctxt, ctxt->myDoc->children);
12799
2.93k
    ctxt->instate = XML_PARSER_CONTENT;
12800
2.93k
    ctxt->depth = oldctxt->depth;
12801
12802
2.93k
    ctxt->validate = 0;
12803
2.93k
    ctxt->loadsubset = oldctxt->loadsubset;
12804
2.93k
    if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
12805
  /*
12806
   * ID/IDREF registration will be done in xmlValidateElement below
12807
   */
12808
2.93k
  ctxt->loadsubset |= XML_SKIP_IDS;
12809
2.93k
    }
12810
2.93k
    ctxt->dictNames = oldctxt->dictNames;
12811
2.93k
    ctxt->attsDefault = oldctxt->attsDefault;
12812
2.93k
    ctxt->attsSpecial = oldctxt->attsSpecial;
12813
12814
2.93k
    xmlParseContent(ctxt);
12815
2.93k
    if ((RAW == '<') && (NXT(1) == '/')) {
12816
277
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12817
2.66k
    } else if (RAW != 0) {
12818
30
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12819
30
    }
12820
2.93k
    if (ctxt->node != ctxt->myDoc->children) {
12821
431
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12822
431
    }
12823
12824
2.93k
    if (!ctxt->wellFormed) {
12825
1.46k
  ret = (xmlParserErrors)ctxt->errNo;
12826
1.46k
        oldctxt->errNo = ctxt->errNo;
12827
1.46k
        oldctxt->wellFormed = 0;
12828
1.46k
        xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12829
1.46k
    } else {
12830
1.46k
        ret = XML_ERR_OK;
12831
1.46k
    }
12832
12833
2.93k
    if ((lst != NULL) && (ret == XML_ERR_OK)) {
12834
1.46k
  xmlNodePtr cur;
12835
12836
  /*
12837
   * Return the newly created nodeset after unlinking it from
12838
   * they pseudo parent.
12839
   */
12840
1.46k
  cur = ctxt->myDoc->children->children;
12841
1.46k
  *lst = cur;
12842
8.41k
  while (cur != NULL) {
12843
#ifdef LIBXML_VALID_ENABLED
12844
      if ((oldctxt->validate) && (oldctxt->wellFormed) &&
12845
    (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
12846
    (cur->type == XML_ELEMENT_NODE)) {
12847
    oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
12848
      oldctxt->myDoc, cur);
12849
      }
12850
#endif /* LIBXML_VALID_ENABLED */
12851
6.94k
      cur->parent = NULL;
12852
6.94k
      cur = cur->next;
12853
6.94k
  }
12854
1.46k
  ctxt->myDoc->children->children = NULL;
12855
1.46k
    }
12856
2.93k
    if (ctxt->myDoc != NULL) {
12857
2.93k
  xmlFreeNode(ctxt->myDoc->children);
12858
2.93k
        ctxt->myDoc->children = content;
12859
2.93k
        ctxt->myDoc->last = last;
12860
2.93k
    }
12861
12862
    /*
12863
     * Also record the size of the entity parsed
12864
     */
12865
2.93k
    if (ctxt->input != NULL && oldctxt != NULL) {
12866
2.93k
        unsigned long consumed = ctxt->input->consumed;
12867
12868
2.93k
        xmlSaturatedAddSizeT(&consumed, ctxt->input->cur - ctxt->input->base);
12869
12870
2.93k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, consumed);
12871
2.93k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, ctxt->sizeentcopy);
12872
2.93k
    }
12873
12874
2.93k
    oldctxt->nbErrors = ctxt->nbErrors;
12875
2.93k
    oldctxt->nbWarnings = ctxt->nbWarnings;
12876
2.93k
    ctxt->sax = oldsax;
12877
2.93k
    ctxt->dict = NULL;
12878
2.93k
    ctxt->attsDefault = NULL;
12879
2.93k
    ctxt->attsSpecial = NULL;
12880
2.93k
    xmlFreeParserCtxt(ctxt);
12881
2.93k
    if (newDoc != NULL) {
12882
0
  xmlFreeDoc(newDoc);
12883
0
    }
12884
12885
2.93k
    return(ret);
12886
2.94k
}
12887
12888
/**
12889
 * xmlParseInNodeContext:
12890
 * @node:  the context node
12891
 * @data:  the input string
12892
 * @datalen:  the input string length in bytes
12893
 * @options:  a combination of xmlParserOption
12894
 * @lst:  the return value for the set of parsed nodes
12895
 *
12896
 * Parse a well-balanced chunk of an XML document
12897
 * within the context (DTD, namespaces, etc ...) of the given node.
12898
 *
12899
 * The allowed sequence for the data is a Well Balanced Chunk defined by
12900
 * the content production in the XML grammar:
12901
 *
12902
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12903
 *
12904
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12905
 * error code otherwise
12906
 */
12907
xmlParserErrors
12908
xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
12909
0
                      int options, xmlNodePtr *lst) {
12910
0
#ifdef SAX2
12911
0
    xmlParserCtxtPtr ctxt;
12912
0
    xmlDocPtr doc = NULL;
12913
0
    xmlNodePtr fake, cur;
12914
0
    int nsnr = 0;
12915
12916
0
    xmlParserErrors ret = XML_ERR_OK;
12917
12918
    /*
12919
     * check all input parameters, grab the document
12920
     */
12921
0
    if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
12922
0
        return(XML_ERR_INTERNAL_ERROR);
12923
0
    switch (node->type) {
12924
0
        case XML_ELEMENT_NODE:
12925
0
        case XML_ATTRIBUTE_NODE:
12926
0
        case XML_TEXT_NODE:
12927
0
        case XML_CDATA_SECTION_NODE:
12928
0
        case XML_ENTITY_REF_NODE:
12929
0
        case XML_PI_NODE:
12930
0
        case XML_COMMENT_NODE:
12931
0
        case XML_DOCUMENT_NODE:
12932
0
        case XML_HTML_DOCUMENT_NODE:
12933
0
      break;
12934
0
  default:
12935
0
      return(XML_ERR_INTERNAL_ERROR);
12936
12937
0
    }
12938
0
    while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
12939
0
           (node->type != XML_DOCUMENT_NODE) &&
12940
0
     (node->type != XML_HTML_DOCUMENT_NODE))
12941
0
  node = node->parent;
12942
0
    if (node == NULL)
12943
0
  return(XML_ERR_INTERNAL_ERROR);
12944
0
    if (node->type == XML_ELEMENT_NODE)
12945
0
  doc = node->doc;
12946
0
    else
12947
0
        doc = (xmlDocPtr) node;
12948
0
    if (doc == NULL)
12949
0
  return(XML_ERR_INTERNAL_ERROR);
12950
12951
    /*
12952
     * allocate a context and set-up everything not related to the
12953
     * node position in the tree
12954
     */
12955
0
    if (doc->type == XML_DOCUMENT_NODE)
12956
0
  ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
12957
0
#ifdef LIBXML_HTML_ENABLED
12958
0
    else if (doc->type == XML_HTML_DOCUMENT_NODE) {
12959
0
  ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
12960
        /*
12961
         * When parsing in context, it makes no sense to add implied
12962
         * elements like html/body/etc...
12963
         */
12964
0
        options |= HTML_PARSE_NOIMPLIED;
12965
0
    }
12966
0
#endif
12967
0
    else
12968
0
        return(XML_ERR_INTERNAL_ERROR);
12969
12970
0
    if (ctxt == NULL)
12971
0
        return(XML_ERR_NO_MEMORY);
12972
12973
    /*
12974
     * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
12975
     * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
12976
     * we must wait until the last moment to free the original one.
12977
     */
12978
0
    if (doc->dict != NULL) {
12979
0
        if (ctxt->dict != NULL)
12980
0
      xmlDictFree(ctxt->dict);
12981
0
  ctxt->dict = doc->dict;
12982
0
    } else
12983
0
        options |= XML_PARSE_NODICT;
12984
12985
0
    if (doc->encoding != NULL) {
12986
0
        xmlCharEncodingHandlerPtr hdlr;
12987
12988
0
        if (ctxt->encoding != NULL)
12989
0
      xmlFree((xmlChar *) ctxt->encoding);
12990
0
        ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
12991
12992
0
        hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
12993
0
        if (hdlr != NULL) {
12994
0
            xmlSwitchToEncoding(ctxt, hdlr);
12995
0
  } else {
12996
0
            return(XML_ERR_UNSUPPORTED_ENCODING);
12997
0
        }
12998
0
    }
12999
13000
0
    xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13001
0
    xmlDetectSAX2(ctxt);
13002
0
    ctxt->myDoc = doc;
13003
    /* parsing in context, i.e. as within existing content */
13004
0
    ctxt->input_id = 2;
13005
0
    ctxt->instate = XML_PARSER_CONTENT;
13006
13007
0
    fake = xmlNewDocComment(node->doc, NULL);
13008
0
    if (fake == NULL) {
13009
0
        xmlFreeParserCtxt(ctxt);
13010
0
  return(XML_ERR_NO_MEMORY);
13011
0
    }
13012
0
    xmlAddChild(node, fake);
13013
13014
0
    if (node->type == XML_ELEMENT_NODE) {
13015
0
  nodePush(ctxt, node);
13016
  /*
13017
   * initialize the SAX2 namespaces stack
13018
   */
13019
0
  cur = node;
13020
0
  while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13021
0
      xmlNsPtr ns = cur->nsDef;
13022
0
      const xmlChar *iprefix, *ihref;
13023
13024
0
      while (ns != NULL) {
13025
0
    if (ctxt->dict) {
13026
0
        iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13027
0
        ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13028
0
    } else {
13029
0
        iprefix = ns->prefix;
13030
0
        ihref = ns->href;
13031
0
    }
13032
13033
0
          if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13034
0
        nsPush(ctxt, iprefix, ihref);
13035
0
        nsnr++;
13036
0
    }
13037
0
    ns = ns->next;
13038
0
      }
13039
0
      cur = cur->parent;
13040
0
  }
13041
0
    }
13042
13043
0
    if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13044
  /*
13045
   * ID/IDREF registration will be done in xmlValidateElement below
13046
   */
13047
0
  ctxt->loadsubset |= XML_SKIP_IDS;
13048
0
    }
13049
13050
0
#ifdef LIBXML_HTML_ENABLED
13051
0
    if (doc->type == XML_HTML_DOCUMENT_NODE)
13052
0
        __htmlParseContent(ctxt);
13053
0
    else
13054
0
#endif
13055
0
  xmlParseContent(ctxt);
13056
13057
0
    nsPop(ctxt, nsnr);
13058
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13059
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13060
0
    } else if (RAW != 0) {
13061
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13062
0
    }
13063
0
    if ((ctxt->node != NULL) && (ctxt->node != node)) {
13064
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13065
0
  ctxt->wellFormed = 0;
13066
0
    }
13067
13068
0
    if (!ctxt->wellFormed) {
13069
0
        if (ctxt->errNo == 0)
13070
0
      ret = XML_ERR_INTERNAL_ERROR;
13071
0
  else
13072
0
      ret = (xmlParserErrors)ctxt->errNo;
13073
0
    } else {
13074
0
        ret = XML_ERR_OK;
13075
0
    }
13076
13077
    /*
13078
     * Return the newly created nodeset after unlinking it from
13079
     * the pseudo sibling.
13080
     */
13081
13082
0
    cur = fake->next;
13083
0
    fake->next = NULL;
13084
0
    node->last = fake;
13085
13086
0
    if (cur != NULL) {
13087
0
  cur->prev = NULL;
13088
0
    }
13089
13090
0
    *lst = cur;
13091
13092
0
    while (cur != NULL) {
13093
0
  cur->parent = NULL;
13094
0
  cur = cur->next;
13095
0
    }
13096
13097
0
    xmlUnlinkNode(fake);
13098
0
    xmlFreeNode(fake);
13099
13100
13101
0
    if (ret != XML_ERR_OK) {
13102
0
        xmlFreeNodeList(*lst);
13103
0
  *lst = NULL;
13104
0
    }
13105
13106
0
    if (doc->dict != NULL)
13107
0
        ctxt->dict = NULL;
13108
0
    xmlFreeParserCtxt(ctxt);
13109
13110
0
    return(ret);
13111
#else /* !SAX2 */
13112
    return(XML_ERR_INTERNAL_ERROR);
13113
#endif
13114
0
}
13115
13116
#ifdef LIBXML_SAX1_ENABLED
13117
/**
13118
 * xmlParseBalancedChunkMemoryRecover:
13119
 * @doc:  the document the chunk pertains to (must not be NULL)
13120
 * @sax:  the SAX handler block (possibly NULL)
13121
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13122
 * @depth:  Used for loop detection, use 0
13123
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13124
 * @lst:  the return value for the set of parsed nodes
13125
 * @recover: return nodes even if the data is broken (use 0)
13126
 *
13127
 *
13128
 * Parse a well-balanced chunk of an XML document
13129
 * called by the parser
13130
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13131
 * the content production in the XML grammar:
13132
 *
13133
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13134
 *
13135
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13136
 *    the parser error code otherwise
13137
 *
13138
 * In case recover is set to 1, the nodelist will not be empty even if
13139
 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13140
 * some extent.
13141
 */
13142
int
13143
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13144
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13145
     int recover) {
13146
    xmlParserCtxtPtr ctxt;
13147
    xmlDocPtr newDoc;
13148
    xmlSAXHandlerPtr oldsax = NULL;
13149
    xmlNodePtr content, newRoot;
13150
    int size;
13151
    int ret = 0;
13152
13153
    if (depth > 40) {
13154
  return(XML_ERR_ENTITY_LOOP);
13155
    }
13156
13157
13158
    if (lst != NULL)
13159
        *lst = NULL;
13160
    if (string == NULL)
13161
        return(-1);
13162
13163
    size = xmlStrlen(string);
13164
13165
    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13166
    if (ctxt == NULL) return(-1);
13167
    ctxt->userData = ctxt;
13168
    if (sax != NULL) {
13169
  oldsax = ctxt->sax;
13170
        ctxt->sax = sax;
13171
  if (user_data != NULL)
13172
      ctxt->userData = user_data;
13173
    }
13174
    newDoc = xmlNewDoc(BAD_CAST "1.0");
13175
    if (newDoc == NULL) {
13176
  xmlFreeParserCtxt(ctxt);
13177
  return(-1);
13178
    }
13179
    newDoc->properties = XML_DOC_INTERNAL;
13180
    if ((doc != NULL) && (doc->dict != NULL)) {
13181
        xmlDictFree(ctxt->dict);
13182
  ctxt->dict = doc->dict;
13183
  xmlDictReference(ctxt->dict);
13184
  ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13185
  ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13186
  ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13187
  ctxt->dictNames = 1;
13188
    } else {
13189
  xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13190
    }
13191
    /* doc == NULL is only supported for historic reasons */
13192
    if (doc != NULL) {
13193
  newDoc->intSubset = doc->intSubset;
13194
  newDoc->extSubset = doc->extSubset;
13195
    }
13196
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13197
    if (newRoot == NULL) {
13198
  if (sax != NULL)
13199
      ctxt->sax = oldsax;
13200
  xmlFreeParserCtxt(ctxt);
13201
  newDoc->intSubset = NULL;
13202
  newDoc->extSubset = NULL;
13203
        xmlFreeDoc(newDoc);
13204
  return(-1);
13205
    }
13206
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
13207
    nodePush(ctxt, newRoot);
13208
    /* doc == NULL is only supported for historic reasons */
13209
    if (doc == NULL) {
13210
  ctxt->myDoc = newDoc;
13211
    } else {
13212
  ctxt->myDoc = newDoc;
13213
  newDoc->children->doc = doc;
13214
  /* Ensure that doc has XML spec namespace */
13215
  xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13216
  newDoc->oldNs = doc->oldNs;
13217
    }
13218
    ctxt->instate = XML_PARSER_CONTENT;
13219
    ctxt->input_id = 2;
13220
    ctxt->depth = depth;
13221
13222
    /*
13223
     * Doing validity checking on chunk doesn't make sense
13224
     */
13225
    ctxt->validate = 0;
13226
    ctxt->loadsubset = 0;
13227
    xmlDetectSAX2(ctxt);
13228
13229
    if ( doc != NULL ){
13230
        content = doc->children;
13231
        doc->children = NULL;
13232
        xmlParseContent(ctxt);
13233
        doc->children = content;
13234
    }
13235
    else {
13236
        xmlParseContent(ctxt);
13237
    }
13238
    if ((RAW == '<') && (NXT(1) == '/')) {
13239
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13240
    } else if (RAW != 0) {
13241
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13242
    }
13243
    if (ctxt->node != newDoc->children) {
13244
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13245
    }
13246
13247
    if (!ctxt->wellFormed) {
13248
        if (ctxt->errNo == 0)
13249
      ret = 1;
13250
  else
13251
      ret = ctxt->errNo;
13252
    } else {
13253
      ret = 0;
13254
    }
13255
13256
    if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13257
  xmlNodePtr cur;
13258
13259
  /*
13260
   * Return the newly created nodeset after unlinking it from
13261
   * they pseudo parent.
13262
   */
13263
  cur = newDoc->children->children;
13264
  *lst = cur;
13265
  while (cur != NULL) {
13266
      xmlSetTreeDoc(cur, doc);
13267
      cur->parent = NULL;
13268
      cur = cur->next;
13269
  }
13270
  newDoc->children->children = NULL;
13271
    }
13272
13273
    if (sax != NULL)
13274
  ctxt->sax = oldsax;
13275
    xmlFreeParserCtxt(ctxt);
13276
    newDoc->intSubset = NULL;
13277
    newDoc->extSubset = NULL;
13278
    /* This leaks the namespace list if doc == NULL */
13279
    newDoc->oldNs = NULL;
13280
    xmlFreeDoc(newDoc);
13281
13282
    return(ret);
13283
}
13284
13285
/**
13286
 * xmlSAXParseEntity:
13287
 * @sax:  the SAX handler block
13288
 * @filename:  the filename
13289
 *
13290
 * DEPRECATED: Don't use.
13291
 *
13292
 * parse an XML external entity out of context and build a tree.
13293
 * It use the given SAX function block to handle the parsing callback.
13294
 * If sax is NULL, fallback to the default DOM tree building routines.
13295
 *
13296
 * [78] extParsedEnt ::= TextDecl? content
13297
 *
13298
 * This correspond to a "Well Balanced" chunk
13299
 *
13300
 * Returns the resulting document tree
13301
 */
13302
13303
xmlDocPtr
13304
xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13305
    xmlDocPtr ret;
13306
    xmlParserCtxtPtr ctxt;
13307
13308
    ctxt = xmlCreateFileParserCtxt(filename);
13309
    if (ctxt == NULL) {
13310
  return(NULL);
13311
    }
13312
    if (sax != NULL) {
13313
  if (ctxt->sax != NULL)
13314
      xmlFree(ctxt->sax);
13315
        ctxt->sax = sax;
13316
        ctxt->userData = NULL;
13317
    }
13318
13319
    xmlParseExtParsedEnt(ctxt);
13320
13321
    if (ctxt->wellFormed)
13322
  ret = ctxt->myDoc;
13323
    else {
13324
        ret = NULL;
13325
        xmlFreeDoc(ctxt->myDoc);
13326
        ctxt->myDoc = NULL;
13327
    }
13328
    if (sax != NULL)
13329
        ctxt->sax = NULL;
13330
    xmlFreeParserCtxt(ctxt);
13331
13332
    return(ret);
13333
}
13334
13335
/**
13336
 * xmlParseEntity:
13337
 * @filename:  the filename
13338
 *
13339
 * parse an XML external entity out of context and build a tree.
13340
 *
13341
 * [78] extParsedEnt ::= TextDecl? content
13342
 *
13343
 * This correspond to a "Well Balanced" chunk
13344
 *
13345
 * Returns the resulting document tree
13346
 */
13347
13348
xmlDocPtr
13349
xmlParseEntity(const char *filename) {
13350
    return(xmlSAXParseEntity(NULL, filename));
13351
}
13352
#endif /* LIBXML_SAX1_ENABLED */
13353
13354
/**
13355
 * xmlCreateEntityParserCtxtInternal:
13356
 * @URL:  the entity URL
13357
 * @ID:  the entity PUBLIC ID
13358
 * @base:  a possible base for the target URI
13359
 * @pctx:  parser context used to set options on new context
13360
 *
13361
 * Create a parser context for an external entity
13362
 * Automatic support for ZLIB/Compress compressed document is provided
13363
 * by default if found at compile-time.
13364
 *
13365
 * Returns the new parser context or NULL
13366
 */
13367
static xmlParserCtxtPtr
13368
xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
13369
        const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
13370
11.4k
        xmlParserCtxtPtr pctx) {
13371
11.4k
    xmlParserCtxtPtr ctxt;
13372
11.4k
    xmlParserInputPtr inputStream;
13373
11.4k
    char *directory = NULL;
13374
11.4k
    xmlChar *uri;
13375
13376
11.4k
    ctxt = xmlNewSAXParserCtxt(sax, userData);
13377
11.4k
    if (ctxt == NULL) {
13378
195
  return(NULL);
13379
195
    }
13380
13381
11.2k
    if (pctx != NULL) {
13382
11.2k
        ctxt->options = pctx->options;
13383
11.2k
        ctxt->_private = pctx->_private;
13384
11.2k
  ctxt->input_id = pctx->input_id;
13385
11.2k
    }
13386
13387
    /* Don't read from stdin. */
13388
11.2k
    if (xmlStrcmp(URL, BAD_CAST "-") == 0)
13389
10
        URL = BAD_CAST "./-";
13390
13391
11.2k
    uri = xmlBuildURI(URL, base);
13392
13393
11.2k
    if (uri == NULL) {
13394
4.70k
  inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13395
4.70k
  if (inputStream == NULL) {
13396
2.39k
      xmlFreeParserCtxt(ctxt);
13397
2.39k
      return(NULL);
13398
2.39k
  }
13399
13400
2.31k
  inputPush(ctxt, inputStream);
13401
13402
2.31k
  if ((ctxt->directory == NULL) && (directory == NULL))
13403
2.31k
      directory = xmlParserGetDirectory((char *)URL);
13404
2.31k
  if ((ctxt->directory == NULL) && (directory != NULL))
13405
2.31k
      ctxt->directory = directory;
13406
6.57k
    } else {
13407
6.57k
  inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13408
6.57k
  if (inputStream == NULL) {
13409
2.93k
      xmlFree(uri);
13410
2.93k
      xmlFreeParserCtxt(ctxt);
13411
2.93k
      return(NULL);
13412
2.93k
  }
13413
13414
3.64k
  inputPush(ctxt, inputStream);
13415
13416
3.64k
  if ((ctxt->directory == NULL) && (directory == NULL))
13417
3.64k
      directory = xmlParserGetDirectory((char *)uri);
13418
3.64k
  if ((ctxt->directory == NULL) && (directory != NULL))
13419
3.63k
      ctxt->directory = directory;
13420
3.64k
  xmlFree(uri);
13421
3.64k
    }
13422
5.95k
    return(ctxt);
13423
11.2k
}
13424
13425
/**
13426
 * xmlCreateEntityParserCtxt:
13427
 * @URL:  the entity URL
13428
 * @ID:  the entity PUBLIC ID
13429
 * @base:  a possible base for the target URI
13430
 *
13431
 * Create a parser context for an external entity
13432
 * Automatic support for ZLIB/Compress compressed document is provided
13433
 * by default if found at compile-time.
13434
 *
13435
 * Returns the new parser context or NULL
13436
 */
13437
xmlParserCtxtPtr
13438
xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
13439
0
                    const xmlChar *base) {
13440
0
    return xmlCreateEntityParserCtxtInternal(NULL, NULL, URL, ID, base, NULL);
13441
13442
0
}
13443
13444
/************************************************************************
13445
 *                  *
13446
 *    Front ends when parsing from a file     *
13447
 *                  *
13448
 ************************************************************************/
13449
13450
/**
13451
 * xmlCreateURLParserCtxt:
13452
 * @filename:  the filename or URL
13453
 * @options:  a combination of xmlParserOption
13454
 *
13455
 * Create a parser context for a file or URL content.
13456
 * Automatic support for ZLIB/Compress compressed document is provided
13457
 * by default if found at compile-time and for file accesses
13458
 *
13459
 * Returns the new parser context or NULL
13460
 */
13461
xmlParserCtxtPtr
13462
xmlCreateURLParserCtxt(const char *filename, int options)
13463
0
{
13464
0
    xmlParserCtxtPtr ctxt;
13465
0
    xmlParserInputPtr inputStream;
13466
0
    char *directory = NULL;
13467
13468
0
    ctxt = xmlNewParserCtxt();
13469
0
    if (ctxt == NULL) {
13470
0
  xmlErrMemory(NULL, "cannot allocate parser context");
13471
0
  return(NULL);
13472
0
    }
13473
13474
0
    if (options)
13475
0
  xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13476
0
    ctxt->linenumbers = 1;
13477
13478
0
    inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
13479
0
    if (inputStream == NULL) {
13480
0
  xmlFreeParserCtxt(ctxt);
13481
0
  return(NULL);
13482
0
    }
13483
13484
0
    inputPush(ctxt, inputStream);
13485
0
    if ((ctxt->directory == NULL) && (directory == NULL))
13486
0
        directory = xmlParserGetDirectory(filename);
13487
0
    if ((ctxt->directory == NULL) && (directory != NULL))
13488
0
        ctxt->directory = directory;
13489
13490
0
    return(ctxt);
13491
0
}
13492
13493
/**
13494
 * xmlCreateFileParserCtxt:
13495
 * @filename:  the filename
13496
 *
13497
 * Create a parser context for a file content.
13498
 * Automatic support for ZLIB/Compress compressed document is provided
13499
 * by default if found at compile-time.
13500
 *
13501
 * Returns the new parser context or NULL
13502
 */
13503
xmlParserCtxtPtr
13504
xmlCreateFileParserCtxt(const char *filename)
13505
0
{
13506
0
    return(xmlCreateURLParserCtxt(filename, 0));
13507
0
}
13508
13509
#ifdef LIBXML_SAX1_ENABLED
13510
/**
13511
 * xmlSAXParseFileWithData:
13512
 * @sax:  the SAX handler block
13513
 * @filename:  the filename
13514
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13515
 *             documents
13516
 * @data:  the userdata
13517
 *
13518
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
13519
 *
13520
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13521
 * compressed document is provided by default if found at compile-time.
13522
 * It use the given SAX function block to handle the parsing callback.
13523
 * If sax is NULL, fallback to the default DOM tree building routines.
13524
 *
13525
 * User data (void *) is stored within the parser context in the
13526
 * context's _private member, so it is available nearly everywhere in libxml
13527
 *
13528
 * Returns the resulting document tree
13529
 */
13530
13531
xmlDocPtr
13532
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
13533
                        int recovery, void *data) {
13534
    xmlDocPtr ret;
13535
    xmlParserCtxtPtr ctxt;
13536
13537
    xmlInitParser();
13538
13539
    ctxt = xmlCreateFileParserCtxt(filename);
13540
    if (ctxt == NULL) {
13541
  return(NULL);
13542
    }
13543
    if (sax != NULL) {
13544
  if (ctxt->sax != NULL)
13545
      xmlFree(ctxt->sax);
13546
        ctxt->sax = sax;
13547
    }
13548
    xmlDetectSAX2(ctxt);
13549
    if (data!=NULL) {
13550
  ctxt->_private = data;
13551
    }
13552
13553
    if (ctxt->directory == NULL)
13554
        ctxt->directory = xmlParserGetDirectory(filename);
13555
13556
    ctxt->recovery = recovery;
13557
13558
    xmlParseDocument(ctxt);
13559
13560
    if ((ctxt->wellFormed) || recovery) {
13561
        ret = ctxt->myDoc;
13562
  if ((ret != NULL) && (ctxt->input->buf != NULL)) {
13563
      if (ctxt->input->buf->compressed > 0)
13564
    ret->compression = 9;
13565
      else
13566
    ret->compression = ctxt->input->buf->compressed;
13567
  }
13568
    }
13569
    else {
13570
       ret = NULL;
13571
       xmlFreeDoc(ctxt->myDoc);
13572
       ctxt->myDoc = NULL;
13573
    }
13574
    if (sax != NULL)
13575
        ctxt->sax = NULL;
13576
    xmlFreeParserCtxt(ctxt);
13577
13578
    return(ret);
13579
}
13580
13581
/**
13582
 * xmlSAXParseFile:
13583
 * @sax:  the SAX handler block
13584
 * @filename:  the filename
13585
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13586
 *             documents
13587
 *
13588
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
13589
 *
13590
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13591
 * compressed document is provided by default if found at compile-time.
13592
 * It use the given SAX function block to handle the parsing callback.
13593
 * If sax is NULL, fallback to the default DOM tree building routines.
13594
 *
13595
 * Returns the resulting document tree
13596
 */
13597
13598
xmlDocPtr
13599
xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
13600
                          int recovery) {
13601
    return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
13602
}
13603
13604
/**
13605
 * xmlRecoverDoc:
13606
 * @cur:  a pointer to an array of xmlChar
13607
 *
13608
 * DEPRECATED: Use xmlReadDoc with XML_PARSE_RECOVER.
13609
 *
13610
 * parse an XML in-memory document and build a tree.
13611
 * In the case the document is not Well Formed, a attempt to build a
13612
 * tree is tried anyway
13613
 *
13614
 * Returns the resulting document tree or NULL in case of failure
13615
 */
13616
13617
xmlDocPtr
13618
xmlRecoverDoc(const xmlChar *cur) {
13619
    return(xmlSAXParseDoc(NULL, cur, 1));
13620
}
13621
13622
/**
13623
 * xmlParseFile:
13624
 * @filename:  the filename
13625
 *
13626
 * DEPRECATED: Use xmlReadFile.
13627
 *
13628
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13629
 * compressed document is provided by default if found at compile-time.
13630
 *
13631
 * Returns the resulting document tree if the file was wellformed,
13632
 * NULL otherwise.
13633
 */
13634
13635
xmlDocPtr
13636
xmlParseFile(const char *filename) {
13637
    return(xmlSAXParseFile(NULL, filename, 0));
13638
}
13639
13640
/**
13641
 * xmlRecoverFile:
13642
 * @filename:  the filename
13643
 *
13644
 * DEPRECATED: Use xmlReadFile with XML_PARSE_RECOVER.
13645
 *
13646
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13647
 * compressed document is provided by default if found at compile-time.
13648
 * In the case the document is not Well Formed, it attempts to build
13649
 * a tree anyway
13650
 *
13651
 * Returns the resulting document tree or NULL in case of failure
13652
 */
13653
13654
xmlDocPtr
13655
xmlRecoverFile(const char *filename) {
13656
    return(xmlSAXParseFile(NULL, filename, 1));
13657
}
13658
13659
13660
/**
13661
 * xmlSetupParserForBuffer:
13662
 * @ctxt:  an XML parser context
13663
 * @buffer:  a xmlChar * buffer
13664
 * @filename:  a file name
13665
 *
13666
 * DEPRECATED: Don't use.
13667
 *
13668
 * Setup the parser context to parse a new buffer; Clears any prior
13669
 * contents from the parser context. The buffer parameter must not be
13670
 * NULL, but the filename parameter can be
13671
 */
13672
void
13673
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
13674
                             const char* filename)
13675
{
13676
    xmlParserInputPtr input;
13677
13678
    if ((ctxt == NULL) || (buffer == NULL))
13679
        return;
13680
13681
    input = xmlNewInputStream(ctxt);
13682
    if (input == NULL) {
13683
        xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
13684
        xmlClearParserCtxt(ctxt);
13685
        return;
13686
    }
13687
13688
    xmlClearParserCtxt(ctxt);
13689
    if (filename != NULL)
13690
        input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
13691
    input->base = buffer;
13692
    input->cur = buffer;
13693
    input->end = &buffer[xmlStrlen(buffer)];
13694
    inputPush(ctxt, input);
13695
}
13696
13697
/**
13698
 * xmlSAXUserParseFile:
13699
 * @sax:  a SAX handler
13700
 * @user_data:  The user data returned on SAX callbacks
13701
 * @filename:  a file name
13702
 *
13703
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
13704
 *
13705
 * parse an XML file and call the given SAX handler routines.
13706
 * Automatic support for ZLIB/Compress compressed document is provided
13707
 *
13708
 * Returns 0 in case of success or a error number otherwise
13709
 */
13710
int
13711
xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
13712
                    const char *filename) {
13713
    int ret = 0;
13714
    xmlParserCtxtPtr ctxt;
13715
13716
    ctxt = xmlCreateFileParserCtxt(filename);
13717
    if (ctxt == NULL) return -1;
13718
    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
13719
  xmlFree(ctxt->sax);
13720
    ctxt->sax = sax;
13721
    xmlDetectSAX2(ctxt);
13722
13723
    if (user_data != NULL)
13724
  ctxt->userData = user_data;
13725
13726
    xmlParseDocument(ctxt);
13727
13728
    if (ctxt->wellFormed)
13729
  ret = 0;
13730
    else {
13731
        if (ctxt->errNo != 0)
13732
      ret = ctxt->errNo;
13733
  else
13734
      ret = -1;
13735
    }
13736
    if (sax != NULL)
13737
  ctxt->sax = NULL;
13738
    if (ctxt->myDoc != NULL) {
13739
        xmlFreeDoc(ctxt->myDoc);
13740
  ctxt->myDoc = NULL;
13741
    }
13742
    xmlFreeParserCtxt(ctxt);
13743
13744
    return ret;
13745
}
13746
#endif /* LIBXML_SAX1_ENABLED */
13747
13748
/************************************************************************
13749
 *                  *
13750
 *    Front ends when parsing from memory     *
13751
 *                  *
13752
 ************************************************************************/
13753
13754
/**
13755
 * xmlCreateMemoryParserCtxt:
13756
 * @buffer:  a pointer to a char array
13757
 * @size:  the size of the array
13758
 *
13759
 * Create a parser context for an XML in-memory document.
13760
 *
13761
 * Returns the new parser context or NULL
13762
 */
13763
xmlParserCtxtPtr
13764
62.2k
xmlCreateMemoryParserCtxt(const char *buffer, int size) {
13765
62.2k
    xmlParserCtxtPtr ctxt;
13766
62.2k
    xmlParserInputPtr input;
13767
62.2k
    xmlParserInputBufferPtr buf;
13768
13769
62.2k
    if (buffer == NULL)
13770
0
  return(NULL);
13771
62.2k
    if (size <= 0)
13772
34
  return(NULL);
13773
13774
62.1k
    ctxt = xmlNewParserCtxt();
13775
62.1k
    if (ctxt == NULL)
13776
38
  return(NULL);
13777
13778
62.1k
    buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
13779
62.1k
    if (buf == NULL) {
13780
2
  xmlFreeParserCtxt(ctxt);
13781
2
  return(NULL);
13782
2
    }
13783
13784
62.1k
    input = xmlNewInputStream(ctxt);
13785
62.1k
    if (input == NULL) {
13786
1
  xmlFreeParserInputBuffer(buf);
13787
1
  xmlFreeParserCtxt(ctxt);
13788
1
  return(NULL);
13789
1
    }
13790
13791
62.1k
    input->filename = NULL;
13792
62.1k
    input->buf = buf;
13793
62.1k
    xmlBufResetInput(input->buf->buffer, input);
13794
13795
62.1k
    inputPush(ctxt, input);
13796
62.1k
    return(ctxt);
13797
62.1k
}
13798
13799
#ifdef LIBXML_SAX1_ENABLED
13800
/**
13801
 * xmlSAXParseMemoryWithData:
13802
 * @sax:  the SAX handler block
13803
 * @buffer:  an pointer to a char array
13804
 * @size:  the size of the array
13805
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13806
 *             documents
13807
 * @data:  the userdata
13808
 *
13809
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
13810
 *
13811
 * parse an XML in-memory block and use the given SAX function block
13812
 * to handle the parsing callback. If sax is NULL, fallback to the default
13813
 * DOM tree building routines.
13814
 *
13815
 * User data (void *) is stored within the parser context in the
13816
 * context's _private member, so it is available nearly everywhere in libxml
13817
 *
13818
 * Returns the resulting document tree
13819
 */
13820
13821
xmlDocPtr
13822
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
13823
            int size, int recovery, void *data) {
13824
    xmlDocPtr ret;
13825
    xmlParserCtxtPtr ctxt;
13826
13827
    xmlInitParser();
13828
13829
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13830
    if (ctxt == NULL) return(NULL);
13831
    if (sax != NULL) {
13832
  if (ctxt->sax != NULL)
13833
      xmlFree(ctxt->sax);
13834
        ctxt->sax = sax;
13835
    }
13836
    xmlDetectSAX2(ctxt);
13837
    if (data!=NULL) {
13838
  ctxt->_private=data;
13839
    }
13840
13841
    ctxt->recovery = recovery;
13842
13843
    xmlParseDocument(ctxt);
13844
13845
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13846
    else {
13847
       ret = NULL;
13848
       xmlFreeDoc(ctxt->myDoc);
13849
       ctxt->myDoc = NULL;
13850
    }
13851
    if (sax != NULL)
13852
  ctxt->sax = NULL;
13853
    xmlFreeParserCtxt(ctxt);
13854
13855
    return(ret);
13856
}
13857
13858
/**
13859
 * xmlSAXParseMemory:
13860
 * @sax:  the SAX handler block
13861
 * @buffer:  an pointer to a char array
13862
 * @size:  the size of the array
13863
 * @recovery:  work in recovery mode, i.e. tries to read not Well Formed
13864
 *             documents
13865
 *
13866
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
13867
 *
13868
 * parse an XML in-memory block and use the given SAX function block
13869
 * to handle the parsing callback. If sax is NULL, fallback to the default
13870
 * DOM tree building routines.
13871
 *
13872
 * Returns the resulting document tree
13873
 */
13874
xmlDocPtr
13875
xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
13876
            int size, int recovery) {
13877
    return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
13878
}
13879
13880
/**
13881
 * xmlParseMemory:
13882
 * @buffer:  an pointer to a char array
13883
 * @size:  the size of the array
13884
 *
13885
 * DEPRECATED: Use xmlReadMemory.
13886
 *
13887
 * parse an XML in-memory block and build a tree.
13888
 *
13889
 * Returns the resulting document tree
13890
 */
13891
13892
xmlDocPtr xmlParseMemory(const char *buffer, int size) {
13893
   return(xmlSAXParseMemory(NULL, buffer, size, 0));
13894
}
13895
13896
/**
13897
 * xmlRecoverMemory:
13898
 * @buffer:  an pointer to a char array
13899
 * @size:  the size of the array
13900
 *
13901
 * DEPRECATED: Use xmlReadMemory with XML_PARSE_RECOVER.
13902
 *
13903
 * parse an XML in-memory block and build a tree.
13904
 * In the case the document is not Well Formed, an attempt to
13905
 * build a tree is tried anyway
13906
 *
13907
 * Returns the resulting document tree or NULL in case of error
13908
 */
13909
13910
xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
13911
   return(xmlSAXParseMemory(NULL, buffer, size, 1));
13912
}
13913
13914
/**
13915
 * xmlSAXUserParseMemory:
13916
 * @sax:  a SAX handler
13917
 * @user_data:  The user data returned on SAX callbacks
13918
 * @buffer:  an in-memory XML document input
13919
 * @size:  the length of the XML document in bytes
13920
 *
13921
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
13922
 *
13923
 * parse an XML in-memory buffer and call the given SAX handler routines.
13924
 *
13925
 * Returns 0 in case of success or a error number otherwise
13926
 */
13927
int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
13928
        const char *buffer, int size) {
13929
    int ret = 0;
13930
    xmlParserCtxtPtr ctxt;
13931
13932
    xmlInitParser();
13933
13934
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13935
    if (ctxt == NULL) return -1;
13936
    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
13937
        xmlFree(ctxt->sax);
13938
    ctxt->sax = sax;
13939
    xmlDetectSAX2(ctxt);
13940
13941
    if (user_data != NULL)
13942
  ctxt->userData = user_data;
13943
13944
    xmlParseDocument(ctxt);
13945
13946
    if (ctxt->wellFormed)
13947
  ret = 0;
13948
    else {
13949
        if (ctxt->errNo != 0)
13950
      ret = ctxt->errNo;
13951
  else
13952
      ret = -1;
13953
    }
13954
    if (sax != NULL)
13955
        ctxt->sax = NULL;
13956
    if (ctxt->myDoc != NULL) {
13957
        xmlFreeDoc(ctxt->myDoc);
13958
  ctxt->myDoc = NULL;
13959
    }
13960
    xmlFreeParserCtxt(ctxt);
13961
13962
    return ret;
13963
}
13964
#endif /* LIBXML_SAX1_ENABLED */
13965
13966
/**
13967
 * xmlCreateDocParserCtxt:
13968
 * @cur:  a pointer to an array of xmlChar
13969
 *
13970
 * Creates a parser context for an XML in-memory document.
13971
 *
13972
 * Returns the new parser context or NULL
13973
 */
13974
xmlParserCtxtPtr
13975
0
xmlCreateDocParserCtxt(const xmlChar *cur) {
13976
0
    int len;
13977
13978
0
    if (cur == NULL)
13979
0
  return(NULL);
13980
0
    len = xmlStrlen(cur);
13981
0
    return(xmlCreateMemoryParserCtxt((const char *)cur, len));
13982
0
}
13983
13984
#ifdef LIBXML_SAX1_ENABLED
13985
/**
13986
 * xmlSAXParseDoc:
13987
 * @sax:  the SAX handler block
13988
 * @cur:  a pointer to an array of xmlChar
13989
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13990
 *             documents
13991
 *
13992
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadDoc.
13993
 *
13994
 * parse an XML in-memory document and build a tree.
13995
 * It use the given SAX function block to handle the parsing callback.
13996
 * If sax is NULL, fallback to the default DOM tree building routines.
13997
 *
13998
 * Returns the resulting document tree
13999
 */
14000
14001
xmlDocPtr
14002
xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14003
    xmlDocPtr ret;
14004
    xmlParserCtxtPtr ctxt;
14005
    xmlSAXHandlerPtr oldsax = NULL;
14006
14007
    if (cur == NULL) return(NULL);
14008
14009
14010
    ctxt = xmlCreateDocParserCtxt(cur);
14011
    if (ctxt == NULL) return(NULL);
14012
    if (sax != NULL) {
14013
        oldsax = ctxt->sax;
14014
        ctxt->sax = sax;
14015
        ctxt->userData = NULL;
14016
    }
14017
    xmlDetectSAX2(ctxt);
14018
14019
    xmlParseDocument(ctxt);
14020
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14021
    else {
14022
       ret = NULL;
14023
       xmlFreeDoc(ctxt->myDoc);
14024
       ctxt->myDoc = NULL;
14025
    }
14026
    if (sax != NULL)
14027
  ctxt->sax = oldsax;
14028
    xmlFreeParserCtxt(ctxt);
14029
14030
    return(ret);
14031
}
14032
14033
/**
14034
 * xmlParseDoc:
14035
 * @cur:  a pointer to an array of xmlChar
14036
 *
14037
 * DEPRECATED: Use xmlReadDoc.
14038
 *
14039
 * parse an XML in-memory document and build a tree.
14040
 *
14041
 * Returns the resulting document tree
14042
 */
14043
14044
xmlDocPtr
14045
xmlParseDoc(const xmlChar *cur) {
14046
    return(xmlSAXParseDoc(NULL, cur, 0));
14047
}
14048
#endif /* LIBXML_SAX1_ENABLED */
14049
14050
#ifdef LIBXML_LEGACY_ENABLED
14051
/************************************************************************
14052
 *                  *
14053
 *  Specific function to keep track of entities references    *
14054
 *  and used by the XSLT debugger         *
14055
 *                  *
14056
 ************************************************************************/
14057
14058
static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14059
14060
/**
14061
 * xmlAddEntityReference:
14062
 * @ent : A valid entity
14063
 * @firstNode : A valid first node for children of entity
14064
 * @lastNode : A valid last node of children entity
14065
 *
14066
 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14067
 */
14068
static void
14069
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14070
                      xmlNodePtr lastNode)
14071
{
14072
    if (xmlEntityRefFunc != NULL) {
14073
        (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14074
    }
14075
}
14076
14077
14078
/**
14079
 * xmlSetEntityReferenceFunc:
14080
 * @func: A valid function
14081
 *
14082
 * Set the function to call call back when a xml reference has been made
14083
 */
14084
void
14085
xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14086
{
14087
    xmlEntityRefFunc = func;
14088
}
14089
#endif /* LIBXML_LEGACY_ENABLED */
14090
14091
/************************************************************************
14092
 *                  *
14093
 *        Miscellaneous       *
14094
 *                  *
14095
 ************************************************************************/
14096
14097
static int xmlParserInitialized = 0;
14098
14099
/**
14100
 * xmlInitParser:
14101
 *
14102
 * Initialization function for the XML parser.
14103
 * This is not reentrant. Call once before processing in case of
14104
 * use in multithreaded programs.
14105
 */
14106
14107
void
14108
41.3M
xmlInitParser(void) {
14109
    /*
14110
     * Note that the initialization code must not make memory allocations.
14111
     */
14112
41.3M
    if (xmlParserInitialized != 0)
14113
41.3M
  return;
14114
14115
2
#ifdef LIBXML_THREAD_ENABLED
14116
2
    __xmlGlobalInitMutexLock();
14117
2
    if (xmlParserInitialized == 0) {
14118
2
#endif
14119
#if defined(_WIN32) && (!defined(LIBXML_STATIC) || defined(LIBXML_STATIC_FOR_DLL))
14120
        if (xmlFree == free)
14121
            atexit(xmlCleanupParser);
14122
#endif
14123
14124
2
  xmlInitThreadsInternal();
14125
2
  xmlInitGlobalsInternal();
14126
2
  xmlInitMemoryInternal();
14127
2
        __xmlInitializeDict();
14128
2
  xmlInitEncodingInternal();
14129
2
  xmlRegisterDefaultInputCallbacks();
14130
2
#ifdef LIBXML_OUTPUT_ENABLED
14131
2
  xmlRegisterDefaultOutputCallbacks();
14132
2
#endif /* LIBXML_OUTPUT_ENABLED */
14133
2
#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
14134
2
  xmlInitXPathInternal();
14135
2
#endif
14136
2
  xmlParserInitialized = 1;
14137
2
#ifdef LIBXML_THREAD_ENABLED
14138
2
    }
14139
2
    __xmlGlobalInitMutexUnlock();
14140
2
#endif
14141
2
}
14142
14143
/**
14144
 * xmlCleanupParser:
14145
 *
14146
 * This function name is somewhat misleading. It does not clean up
14147
 * parser state, it cleans up memory allocated by the library itself.
14148
 * It is a cleanup function for the XML library. It tries to reclaim all
14149
 * related global memory allocated for the library processing.
14150
 * It doesn't deallocate any document related memory. One should
14151
 * call xmlCleanupParser() only when the process has finished using
14152
 * the library and all XML/HTML documents built with it.
14153
 * See also xmlInitParser() which has the opposite function of preparing
14154
 * the library for operations.
14155
 *
14156
 * WARNING: if your application is multithreaded or has plugin support
14157
 *          calling this may crash the application if another thread or
14158
 *          a plugin is still using libxml2. It's sometimes very hard to
14159
 *          guess if libxml2 is in use in the application, some libraries
14160
 *          or plugins may use it without notice. In case of doubt abstain
14161
 *          from calling this function or do it just before calling exit()
14162
 *          to avoid leak reports from valgrind !
14163
 */
14164
14165
void
14166
0
xmlCleanupParser(void) {
14167
0
    if (!xmlParserInitialized)
14168
0
  return;
14169
14170
0
    xmlCleanupCharEncodingHandlers();
14171
0
#ifdef LIBXML_CATALOG_ENABLED
14172
0
    xmlCatalogCleanup();
14173
0
#endif
14174
0
    xmlCleanupDictInternal();
14175
0
    xmlCleanupInputCallbacks();
14176
0
#ifdef LIBXML_OUTPUT_ENABLED
14177
0
    xmlCleanupOutputCallbacks();
14178
0
#endif
14179
#ifdef LIBXML_SCHEMAS_ENABLED
14180
    xmlSchemaCleanupTypes();
14181
    xmlRelaxNGCleanupTypes();
14182
#endif
14183
0
    xmlCleanupGlobalsInternal();
14184
0
    xmlCleanupThreadsInternal();
14185
0
    xmlCleanupMemoryInternal();
14186
0
    xmlParserInitialized = 0;
14187
0
}
14188
14189
#if defined(HAVE_ATTRIBUTE_DESTRUCTOR) && !defined(LIBXML_STATIC) && \
14190
    !defined(_WIN32)
14191
static void
14192
ATTRIBUTE_DESTRUCTOR
14193
0
xmlDestructor(void) {
14194
    /*
14195
     * Calling custom deallocation functions in a destructor can cause
14196
     * problems, for example with Nokogiri.
14197
     */
14198
0
    if (xmlFree == free)
14199
0
        xmlCleanupParser();
14200
0
}
14201
#endif
14202
14203
/************************************************************************
14204
 *                  *
14205
 *  New set (2.6.0) of simpler and more flexible APIs   *
14206
 *                  *
14207
 ************************************************************************/
14208
14209
/**
14210
 * DICT_FREE:
14211
 * @str:  a string
14212
 *
14213
 * Free a string if it is not owned by the "dict" dictionary in the
14214
 * current scope
14215
 */
14216
#define DICT_FREE(str)            \
14217
0
  if ((str) && ((!dict) ||       \
14218
0
      (xmlDictOwns(dict, (const xmlChar *)(str)) == 0)))  \
14219
0
      xmlFree((char *)(str));
14220
14221
/**
14222
 * xmlCtxtReset:
14223
 * @ctxt: an XML parser context
14224
 *
14225
 * Reset a parser context
14226
 */
14227
void
14228
xmlCtxtReset(xmlParserCtxtPtr ctxt)
14229
0
{
14230
0
    xmlParserInputPtr input;
14231
0
    xmlDictPtr dict;
14232
14233
0
    if (ctxt == NULL)
14234
0
        return;
14235
14236
0
    dict = ctxt->dict;
14237
14238
0
    while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14239
0
        xmlFreeInputStream(input);
14240
0
    }
14241
0
    ctxt->inputNr = 0;
14242
0
    ctxt->input = NULL;
14243
14244
0
    ctxt->spaceNr = 0;
14245
0
    if (ctxt->spaceTab != NULL) {
14246
0
  ctxt->spaceTab[0] = -1;
14247
0
  ctxt->space = &ctxt->spaceTab[0];
14248
0
    } else {
14249
0
        ctxt->space = NULL;
14250
0
    }
14251
14252
14253
0
    ctxt->nodeNr = 0;
14254
0
    ctxt->node = NULL;
14255
14256
0
    ctxt->nameNr = 0;
14257
0
    ctxt->name = NULL;
14258
14259
0
    ctxt->nsNr = 0;
14260
14261
0
    DICT_FREE(ctxt->version);
14262
0
    ctxt->version = NULL;
14263
0
    DICT_FREE(ctxt->encoding);
14264
0
    ctxt->encoding = NULL;
14265
0
    DICT_FREE(ctxt->directory);
14266
0
    ctxt->directory = NULL;
14267
0
    DICT_FREE(ctxt->extSubURI);
14268
0
    ctxt->extSubURI = NULL;
14269
0
    DICT_FREE(ctxt->extSubSystem);
14270
0
    ctxt->extSubSystem = NULL;
14271
0
    if (ctxt->myDoc != NULL)
14272
0
        xmlFreeDoc(ctxt->myDoc);
14273
0
    ctxt->myDoc = NULL;
14274
14275
0
    ctxt->standalone = -1;
14276
0
    ctxt->hasExternalSubset = 0;
14277
0
    ctxt->hasPErefs = 0;
14278
0
    ctxt->html = 0;
14279
0
    ctxt->external = 0;
14280
0
    ctxt->instate = XML_PARSER_START;
14281
0
    ctxt->token = 0;
14282
14283
0
    ctxt->wellFormed = 1;
14284
0
    ctxt->nsWellFormed = 1;
14285
0
    ctxt->disableSAX = 0;
14286
0
    ctxt->valid = 1;
14287
#if 0
14288
    ctxt->vctxt.userData = ctxt;
14289
    ctxt->vctxt.error = xmlParserValidityError;
14290
    ctxt->vctxt.warning = xmlParserValidityWarning;
14291
#endif
14292
0
    ctxt->record_info = 0;
14293
0
    ctxt->checkIndex = 0;
14294
0
    ctxt->endCheckState = 0;
14295
0
    ctxt->inSubset = 0;
14296
0
    ctxt->errNo = XML_ERR_OK;
14297
0
    ctxt->depth = 0;
14298
0
    ctxt->charset = XML_CHAR_ENCODING_UTF8;
14299
0
    ctxt->catalogs = NULL;
14300
0
    ctxt->sizeentities = 0;
14301
0
    ctxt->sizeentcopy = 0;
14302
0
    xmlInitNodeInfoSeq(&ctxt->node_seq);
14303
14304
0
    if (ctxt->attsDefault != NULL) {
14305
0
        xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
14306
0
        ctxt->attsDefault = NULL;
14307
0
    }
14308
0
    if (ctxt->attsSpecial != NULL) {
14309
0
        xmlHashFree(ctxt->attsSpecial, NULL);
14310
0
        ctxt->attsSpecial = NULL;
14311
0
    }
14312
14313
0
#ifdef LIBXML_CATALOG_ENABLED
14314
0
    if (ctxt->catalogs != NULL)
14315
0
  xmlCatalogFreeLocal(ctxt->catalogs);
14316
0
#endif
14317
0
    ctxt->nbErrors = 0;
14318
0
    ctxt->nbWarnings = 0;
14319
0
    if (ctxt->lastError.code != XML_ERR_OK)
14320
0
        xmlResetError(&ctxt->lastError);
14321
0
}
14322
14323
/**
14324
 * xmlCtxtResetPush:
14325
 * @ctxt: an XML parser context
14326
 * @chunk:  a pointer to an array of chars
14327
 * @size:  number of chars in the array
14328
 * @filename:  an optional file name or URI
14329
 * @encoding:  the document encoding, or NULL
14330
 *
14331
 * Reset a push parser context
14332
 *
14333
 * Returns 0 in case of success and 1 in case of error
14334
 */
14335
int
14336
xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14337
                 int size, const char *filename, const char *encoding)
14338
0
{
14339
0
    xmlParserInputPtr inputStream;
14340
0
    xmlParserInputBufferPtr buf;
14341
0
    xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14342
14343
0
    if (ctxt == NULL)
14344
0
        return(1);
14345
14346
0
    if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14347
0
        enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14348
14349
0
    buf = xmlAllocParserInputBuffer(enc);
14350
0
    if (buf == NULL)
14351
0
        return(1);
14352
14353
0
    if (ctxt == NULL) {
14354
0
        xmlFreeParserInputBuffer(buf);
14355
0
        return(1);
14356
0
    }
14357
14358
0
    xmlCtxtReset(ctxt);
14359
14360
0
    if (filename == NULL) {
14361
0
        ctxt->directory = NULL;
14362
0
    } else {
14363
0
        ctxt->directory = xmlParserGetDirectory(filename);
14364
0
    }
14365
14366
0
    inputStream = xmlNewInputStream(ctxt);
14367
0
    if (inputStream == NULL) {
14368
0
        xmlFreeParserInputBuffer(buf);
14369
0
        return(1);
14370
0
    }
14371
14372
0
    if (filename == NULL)
14373
0
        inputStream->filename = NULL;
14374
0
    else
14375
0
        inputStream->filename = (char *)
14376
0
            xmlCanonicPath((const xmlChar *) filename);
14377
0
    inputStream->buf = buf;
14378
0
    xmlBufResetInput(buf->buffer, inputStream);
14379
14380
0
    inputPush(ctxt, inputStream);
14381
14382
0
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14383
0
        (ctxt->input->buf != NULL)) {
14384
0
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14385
0
        size_t cur = ctxt->input->cur - ctxt->input->base;
14386
0
        int res;
14387
14388
0
        res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14389
0
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
14390
0
        if (res < 0) {
14391
0
            xmlFatalErr(ctxt, ctxt->input->buf->error, NULL);
14392
0
            xmlHaltParser(ctxt);
14393
0
            return(1);
14394
0
        }
14395
#ifdef DEBUG_PUSH
14396
        xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14397
#endif
14398
0
    }
14399
14400
0
    if (encoding != NULL) {
14401
0
        xmlCharEncodingHandlerPtr hdlr;
14402
14403
0
        if (ctxt->encoding != NULL)
14404
0
      xmlFree((xmlChar *) ctxt->encoding);
14405
0
        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14406
14407
0
        hdlr = xmlFindCharEncodingHandler(encoding);
14408
0
        if (hdlr != NULL) {
14409
0
            xmlSwitchToEncoding(ctxt, hdlr);
14410
0
  } else {
14411
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14412
0
            "Unsupported encoding %s\n", BAD_CAST encoding);
14413
0
        }
14414
0
    } else if (enc != XML_CHAR_ENCODING_NONE) {
14415
0
        xmlSwitchEncoding(ctxt, enc);
14416
0
    }
14417
14418
0
    return(0);
14419
0
}
14420
14421
14422
/**
14423
 * xmlCtxtUseOptionsInternal:
14424
 * @ctxt: an XML parser context
14425
 * @options:  a combination of xmlParserOption
14426
 * @encoding:  the user provided encoding to use
14427
 *
14428
 * Applies the options to the parser context
14429
 *
14430
 * Returns 0 in case of success, the set of unknown or unimplemented options
14431
 *         in case of error.
14432
 */
14433
static int
14434
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
14435
406k
{
14436
406k
    if (ctxt == NULL)
14437
0
        return(-1);
14438
406k
    if (encoding != NULL) {
14439
0
        if (ctxt->encoding != NULL)
14440
0
      xmlFree((xmlChar *) ctxt->encoding);
14441
0
        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14442
0
    }
14443
406k
    if (options & XML_PARSE_RECOVER) {
14444
0
        ctxt->recovery = 1;
14445
0
        options -= XML_PARSE_RECOVER;
14446
0
  ctxt->options |= XML_PARSE_RECOVER;
14447
0
    } else
14448
406k
        ctxt->recovery = 0;
14449
406k
    if (options & XML_PARSE_DTDLOAD) {
14450
406k
        ctxt->loadsubset = XML_DETECT_IDS;
14451
406k
        options -= XML_PARSE_DTDLOAD;
14452
406k
  ctxt->options |= XML_PARSE_DTDLOAD;
14453
406k
    } else
14454
0
        ctxt->loadsubset = 0;
14455
406k
    if (options & XML_PARSE_DTDATTR) {
14456
406k
        ctxt->loadsubset |= XML_COMPLETE_ATTRS;
14457
406k
        options -= XML_PARSE_DTDATTR;
14458
406k
  ctxt->options |= XML_PARSE_DTDATTR;
14459
406k
    }
14460
406k
    if (options & XML_PARSE_NOENT) {
14461
406k
        ctxt->replaceEntities = 1;
14462
        /* ctxt->loadsubset |= XML_DETECT_IDS; */
14463
406k
        options -= XML_PARSE_NOENT;
14464
406k
  ctxt->options |= XML_PARSE_NOENT;
14465
406k
    } else
14466
0
        ctxt->replaceEntities = 0;
14467
406k
    if (options & XML_PARSE_PEDANTIC) {
14468
0
        ctxt->pedantic = 1;
14469
0
        options -= XML_PARSE_PEDANTIC;
14470
0
  ctxt->options |= XML_PARSE_PEDANTIC;
14471
0
    } else
14472
406k
        ctxt->pedantic = 0;
14473
406k
    if (options & XML_PARSE_NOBLANKS) {
14474
0
        ctxt->keepBlanks = 0;
14475
0
        ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
14476
0
        options -= XML_PARSE_NOBLANKS;
14477
0
  ctxt->options |= XML_PARSE_NOBLANKS;
14478
0
    } else
14479
406k
        ctxt->keepBlanks = 1;
14480
406k
    if (options & XML_PARSE_DTDVALID) {
14481
0
        ctxt->validate = 1;
14482
0
        if (options & XML_PARSE_NOWARNING)
14483
0
            ctxt->vctxt.warning = NULL;
14484
0
        if (options & XML_PARSE_NOERROR)
14485
0
            ctxt->vctxt.error = NULL;
14486
0
        options -= XML_PARSE_DTDVALID;
14487
0
  ctxt->options |= XML_PARSE_DTDVALID;
14488
0
    } else
14489
406k
        ctxt->validate = 0;
14490
406k
    if (options & XML_PARSE_NOWARNING) {
14491
0
        ctxt->sax->warning = NULL;
14492
0
        options -= XML_PARSE_NOWARNING;
14493
0
    }
14494
406k
    if (options & XML_PARSE_NOERROR) {
14495
0
        ctxt->sax->error = NULL;
14496
0
        ctxt->sax->fatalError = NULL;
14497
0
        options -= XML_PARSE_NOERROR;
14498
0
    }
14499
#ifdef LIBXML_SAX1_ENABLED
14500
    if (options & XML_PARSE_SAX1) {
14501
        ctxt->sax->startElementNs = NULL;
14502
        ctxt->sax->endElementNs = NULL;
14503
        ctxt->sax->initialized = 1;
14504
        options -= XML_PARSE_SAX1;
14505
  ctxt->options |= XML_PARSE_SAX1;
14506
    }
14507
#endif /* LIBXML_SAX1_ENABLED */
14508
406k
    if (options & XML_PARSE_NODICT) {
14509
0
        ctxt->dictNames = 0;
14510
0
        options -= XML_PARSE_NODICT;
14511
0
  ctxt->options |= XML_PARSE_NODICT;
14512
406k
    } else {
14513
406k
        ctxt->dictNames = 1;
14514
406k
    }
14515
406k
    if (options & XML_PARSE_NOCDATA) {
14516
406k
        ctxt->sax->cdataBlock = NULL;
14517
406k
        options -= XML_PARSE_NOCDATA;
14518
406k
  ctxt->options |= XML_PARSE_NOCDATA;
14519
406k
    }
14520
406k
    if (options & XML_PARSE_NSCLEAN) {
14521
0
  ctxt->options |= XML_PARSE_NSCLEAN;
14522
0
        options -= XML_PARSE_NSCLEAN;
14523
0
    }
14524
406k
    if (options & XML_PARSE_NONET) {
14525
0
  ctxt->options |= XML_PARSE_NONET;
14526
0
        options -= XML_PARSE_NONET;
14527
0
    }
14528
406k
    if (options & XML_PARSE_COMPACT) {
14529
0
  ctxt->options |= XML_PARSE_COMPACT;
14530
0
        options -= XML_PARSE_COMPACT;
14531
0
    }
14532
406k
    if (options & XML_PARSE_OLD10) {
14533
0
  ctxt->options |= XML_PARSE_OLD10;
14534
0
        options -= XML_PARSE_OLD10;
14535
0
    }
14536
406k
    if (options & XML_PARSE_NOBASEFIX) {
14537
0
  ctxt->options |= XML_PARSE_NOBASEFIX;
14538
0
        options -= XML_PARSE_NOBASEFIX;
14539
0
    }
14540
406k
    if (options & XML_PARSE_HUGE) {
14541
0
  ctxt->options |= XML_PARSE_HUGE;
14542
0
        options -= XML_PARSE_HUGE;
14543
0
        if (ctxt->dict != NULL)
14544
0
            xmlDictSetLimit(ctxt->dict, 0);
14545
0
    }
14546
406k
    if (options & XML_PARSE_OLDSAX) {
14547
0
  ctxt->options |= XML_PARSE_OLDSAX;
14548
0
        options -= XML_PARSE_OLDSAX;
14549
0
    }
14550
406k
    if (options & XML_PARSE_IGNORE_ENC) {
14551
0
  ctxt->options |= XML_PARSE_IGNORE_ENC;
14552
0
        options -= XML_PARSE_IGNORE_ENC;
14553
0
    }
14554
406k
    if (options & XML_PARSE_BIG_LINES) {
14555
0
  ctxt->options |= XML_PARSE_BIG_LINES;
14556
0
        options -= XML_PARSE_BIG_LINES;
14557
0
    }
14558
406k
    ctxt->linenumbers = 1;
14559
406k
    return (options);
14560
406k
}
14561
14562
/**
14563
 * xmlCtxtUseOptions:
14564
 * @ctxt: an XML parser context
14565
 * @options:  a combination of xmlParserOption
14566
 *
14567
 * Applies the options to the parser context
14568
 *
14569
 * Returns 0 in case of success, the set of unknown or unimplemented options
14570
 *         in case of error.
14571
 */
14572
int
14573
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
14574
347k
{
14575
347k
   return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
14576
347k
}
14577
14578
/**
14579
 * xmlDoRead:
14580
 * @ctxt:  an XML parser context
14581
 * @URL:  the base URL to use for the document
14582
 * @encoding:  the document encoding, or NULL
14583
 * @options:  a combination of xmlParserOption
14584
 * @reuse:  keep the context for reuse
14585
 *
14586
 * Common front-end for the xmlRead functions
14587
 *
14588
 * Returns the resulting document tree or NULL
14589
 */
14590
static xmlDocPtr
14591
xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
14592
          int options, int reuse)
14593
59.1k
{
14594
59.1k
    xmlDocPtr ret;
14595
14596
59.1k
    xmlCtxtUseOptionsInternal(ctxt, options, encoding);
14597
59.1k
    if (encoding != NULL) {
14598
0
        xmlCharEncodingHandlerPtr hdlr;
14599
14600
        /*
14601
         * TODO: We should consider to set XML_PARSE_IGNORE_ENC if the
14602
         * caller provided an encoding. Otherwise, we might switch to
14603
         * the encoding from the XML declaration which is likely to
14604
         * break things. Also see xmlSwitchInputEncoding.
14605
         */
14606
0
  hdlr = xmlFindCharEncodingHandler(encoding);
14607
0
  if (hdlr != NULL)
14608
0
      xmlSwitchToEncoding(ctxt, hdlr);
14609
0
    }
14610
59.1k
    if ((URL != NULL) && (ctxt->input != NULL) &&
14611
59.1k
        (ctxt->input->filename == NULL))
14612
59.1k
        ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
14613
59.1k
    xmlParseDocument(ctxt);
14614
59.1k
    if ((ctxt->wellFormed) || ctxt->recovery)
14615
46.6k
        ret = ctxt->myDoc;
14616
12.5k
    else {
14617
12.5k
        ret = NULL;
14618
12.5k
  if (ctxt->myDoc != NULL) {
14619
11.2k
      xmlFreeDoc(ctxt->myDoc);
14620
11.2k
  }
14621
12.5k
    }
14622
59.1k
    ctxt->myDoc = NULL;
14623
59.1k
    if (!reuse) {
14624
59.1k
  xmlFreeParserCtxt(ctxt);
14625
59.1k
    }
14626
14627
59.1k
    return (ret);
14628
59.1k
}
14629
14630
/**
14631
 * xmlReadDoc:
14632
 * @cur:  a pointer to a zero terminated string
14633
 * @URL:  the base URL to use for the document
14634
 * @encoding:  the document encoding, or NULL
14635
 * @options:  a combination of xmlParserOption
14636
 *
14637
 * parse an XML in-memory document and build a tree.
14638
 *
14639
 * Returns the resulting document tree
14640
 */
14641
xmlDocPtr
14642
xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
14643
0
{
14644
0
    xmlParserCtxtPtr ctxt;
14645
14646
0
    if (cur == NULL)
14647
0
        return (NULL);
14648
0
    xmlInitParser();
14649
14650
0
    ctxt = xmlCreateDocParserCtxt(cur);
14651
0
    if (ctxt == NULL)
14652
0
        return (NULL);
14653
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
14654
0
}
14655
14656
/**
14657
 * xmlReadFile:
14658
 * @filename:  a file or URL
14659
 * @encoding:  the document encoding, or NULL
14660
 * @options:  a combination of xmlParserOption
14661
 *
14662
 * parse an XML file from the filesystem or the network.
14663
 *
14664
 * Returns the resulting document tree
14665
 */
14666
xmlDocPtr
14667
xmlReadFile(const char *filename, const char *encoding, int options)
14668
0
{
14669
0
    xmlParserCtxtPtr ctxt;
14670
14671
0
    xmlInitParser();
14672
0
    ctxt = xmlCreateURLParserCtxt(filename, options);
14673
0
    if (ctxt == NULL)
14674
0
        return (NULL);
14675
0
    return (xmlDoRead(ctxt, NULL, encoding, options, 0));
14676
0
}
14677
14678
/**
14679
 * xmlReadMemory:
14680
 * @buffer:  a pointer to a char array
14681
 * @size:  the size of the array
14682
 * @URL:  the base URL to use for the document
14683
 * @encoding:  the document encoding, or NULL
14684
 * @options:  a combination of xmlParserOption
14685
 *
14686
 * parse an XML in-memory document and build a tree.
14687
 *
14688
 * Returns the resulting document tree
14689
 */
14690
xmlDocPtr
14691
xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
14692
59.2k
{
14693
59.2k
    xmlParserCtxtPtr ctxt;
14694
14695
59.2k
    xmlInitParser();
14696
59.2k
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14697
59.2k
    if (ctxt == NULL)
14698
28
        return (NULL);
14699
59.1k
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
14700
59.2k
}
14701
14702
/**
14703
 * xmlReadFd:
14704
 * @fd:  an open file descriptor
14705
 * @URL:  the base URL to use for the document
14706
 * @encoding:  the document encoding, or NULL
14707
 * @options:  a combination of xmlParserOption
14708
 *
14709
 * parse an XML from a file descriptor and build a tree.
14710
 * NOTE that the file descriptor will not be closed when the
14711
 *      reader is closed or reset.
14712
 *
14713
 * Returns the resulting document tree
14714
 */
14715
xmlDocPtr
14716
xmlReadFd(int fd, const char *URL, const char *encoding, int options)
14717
0
{
14718
0
    xmlParserCtxtPtr ctxt;
14719
0
    xmlParserInputBufferPtr input;
14720
0
    xmlParserInputPtr stream;
14721
14722
0
    if (fd < 0)
14723
0
        return (NULL);
14724
0
    xmlInitParser();
14725
14726
0
    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14727
0
    if (input == NULL)
14728
0
        return (NULL);
14729
0
    input->closecallback = NULL;
14730
0
    ctxt = xmlNewParserCtxt();
14731
0
    if (ctxt == NULL) {
14732
0
        xmlFreeParserInputBuffer(input);
14733
0
        return (NULL);
14734
0
    }
14735
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14736
0
    if (stream == NULL) {
14737
0
        xmlFreeParserInputBuffer(input);
14738
0
  xmlFreeParserCtxt(ctxt);
14739
0
        return (NULL);
14740
0
    }
14741
0
    inputPush(ctxt, stream);
14742
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
14743
0
}
14744
14745
/**
14746
 * xmlReadIO:
14747
 * @ioread:  an I/O read function
14748
 * @ioclose:  an I/O close function
14749
 * @ioctx:  an I/O handler
14750
 * @URL:  the base URL to use for the document
14751
 * @encoding:  the document encoding, or NULL
14752
 * @options:  a combination of xmlParserOption
14753
 *
14754
 * parse an XML document from I/O functions and source and build a tree.
14755
 *
14756
 * Returns the resulting document tree
14757
 */
14758
xmlDocPtr
14759
xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
14760
          void *ioctx, const char *URL, const char *encoding, int options)
14761
0
{
14762
0
    xmlParserCtxtPtr ctxt;
14763
0
    xmlParserInputBufferPtr input;
14764
0
    xmlParserInputPtr stream;
14765
14766
0
    if (ioread == NULL)
14767
0
        return (NULL);
14768
0
    xmlInitParser();
14769
14770
0
    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14771
0
                                         XML_CHAR_ENCODING_NONE);
14772
0
    if (input == NULL) {
14773
0
        if (ioclose != NULL)
14774
0
            ioclose(ioctx);
14775
0
        return (NULL);
14776
0
    }
14777
0
    ctxt = xmlNewParserCtxt();
14778
0
    if (ctxt == NULL) {
14779
0
        xmlFreeParserInputBuffer(input);
14780
0
        return (NULL);
14781
0
    }
14782
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14783
0
    if (stream == NULL) {
14784
0
        xmlFreeParserInputBuffer(input);
14785
0
  xmlFreeParserCtxt(ctxt);
14786
0
        return (NULL);
14787
0
    }
14788
0
    inputPush(ctxt, stream);
14789
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
14790
0
}
14791
14792
/**
14793
 * xmlCtxtReadDoc:
14794
 * @ctxt:  an XML parser context
14795
 * @cur:  a pointer to a zero terminated string
14796
 * @URL:  the base URL to use for the document
14797
 * @encoding:  the document encoding, or NULL
14798
 * @options:  a combination of xmlParserOption
14799
 *
14800
 * parse an XML in-memory document and build a tree.
14801
 * This reuses the existing @ctxt parser context
14802
 *
14803
 * Returns the resulting document tree
14804
 */
14805
xmlDocPtr
14806
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
14807
               const char *URL, const char *encoding, int options)
14808
0
{
14809
0
    if (cur == NULL)
14810
0
        return (NULL);
14811
0
    return (xmlCtxtReadMemory(ctxt, (const char *) cur, xmlStrlen(cur), URL,
14812
0
                              encoding, options));
14813
0
}
14814
14815
/**
14816
 * xmlCtxtReadFile:
14817
 * @ctxt:  an XML parser context
14818
 * @filename:  a file or URL
14819
 * @encoding:  the document encoding, or NULL
14820
 * @options:  a combination of xmlParserOption
14821
 *
14822
 * parse an XML file from the filesystem or the network.
14823
 * This reuses the existing @ctxt parser context
14824
 *
14825
 * Returns the resulting document tree
14826
 */
14827
xmlDocPtr
14828
xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
14829
                const char *encoding, int options)
14830
0
{
14831
0
    xmlParserInputPtr stream;
14832
14833
0
    if (filename == NULL)
14834
0
        return (NULL);
14835
0
    if (ctxt == NULL)
14836
0
        return (NULL);
14837
0
    xmlInitParser();
14838
14839
0
    xmlCtxtReset(ctxt);
14840
14841
0
    stream = xmlLoadExternalEntity(filename, NULL, ctxt);
14842
0
    if (stream == NULL) {
14843
0
        return (NULL);
14844
0
    }
14845
0
    inputPush(ctxt, stream);
14846
0
    return (xmlDoRead(ctxt, NULL, encoding, options, 1));
14847
0
}
14848
14849
/**
14850
 * xmlCtxtReadMemory:
14851
 * @ctxt:  an XML parser context
14852
 * @buffer:  a pointer to a char array
14853
 * @size:  the size of the array
14854
 * @URL:  the base URL to use for the document
14855
 * @encoding:  the document encoding, or NULL
14856
 * @options:  a combination of xmlParserOption
14857
 *
14858
 * parse an XML in-memory document and build a tree.
14859
 * This reuses the existing @ctxt parser context
14860
 *
14861
 * Returns the resulting document tree
14862
 */
14863
xmlDocPtr
14864
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
14865
                  const char *URL, const char *encoding, int options)
14866
0
{
14867
0
    xmlParserInputBufferPtr input;
14868
0
    xmlParserInputPtr stream;
14869
14870
0
    if (ctxt == NULL)
14871
0
        return (NULL);
14872
0
    if (buffer == NULL)
14873
0
        return (NULL);
14874
0
    xmlInitParser();
14875
14876
0
    xmlCtxtReset(ctxt);
14877
14878
0
    input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14879
0
    if (input == NULL) {
14880
0
  return(NULL);
14881
0
    }
14882
14883
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14884
0
    if (stream == NULL) {
14885
0
  xmlFreeParserInputBuffer(input);
14886
0
  return(NULL);
14887
0
    }
14888
14889
0
    inputPush(ctxt, stream);
14890
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
14891
0
}
14892
14893
/**
14894
 * xmlCtxtReadFd:
14895
 * @ctxt:  an XML parser context
14896
 * @fd:  an open file descriptor
14897
 * @URL:  the base URL to use for the document
14898
 * @encoding:  the document encoding, or NULL
14899
 * @options:  a combination of xmlParserOption
14900
 *
14901
 * parse an XML from a file descriptor and build a tree.
14902
 * This reuses the existing @ctxt parser context
14903
 * NOTE that the file descriptor will not be closed when the
14904
 *      reader is closed or reset.
14905
 *
14906
 * Returns the resulting document tree
14907
 */
14908
xmlDocPtr
14909
xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
14910
              const char *URL, const char *encoding, int options)
14911
0
{
14912
0
    xmlParserInputBufferPtr input;
14913
0
    xmlParserInputPtr stream;
14914
14915
0
    if (fd < 0)
14916
0
        return (NULL);
14917
0
    if (ctxt == NULL)
14918
0
        return (NULL);
14919
0
    xmlInitParser();
14920
14921
0
    xmlCtxtReset(ctxt);
14922
14923
14924
0
    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14925
0
    if (input == NULL)
14926
0
        return (NULL);
14927
0
    input->closecallback = NULL;
14928
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14929
0
    if (stream == NULL) {
14930
0
        xmlFreeParserInputBuffer(input);
14931
0
        return (NULL);
14932
0
    }
14933
0
    inputPush(ctxt, stream);
14934
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
14935
0
}
14936
14937
/**
14938
 * xmlCtxtReadIO:
14939
 * @ctxt:  an XML parser context
14940
 * @ioread:  an I/O read function
14941
 * @ioclose:  an I/O close function
14942
 * @ioctx:  an I/O handler
14943
 * @URL:  the base URL to use for the document
14944
 * @encoding:  the document encoding, or NULL
14945
 * @options:  a combination of xmlParserOption
14946
 *
14947
 * parse an XML document from I/O functions and source and build a tree.
14948
 * This reuses the existing @ctxt parser context
14949
 *
14950
 * Returns the resulting document tree
14951
 */
14952
xmlDocPtr
14953
xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
14954
              xmlInputCloseCallback ioclose, void *ioctx,
14955
        const char *URL,
14956
              const char *encoding, int options)
14957
0
{
14958
0
    xmlParserInputBufferPtr input;
14959
0
    xmlParserInputPtr stream;
14960
14961
0
    if (ioread == NULL)
14962
0
        return (NULL);
14963
0
    if (ctxt == NULL)
14964
0
        return (NULL);
14965
0
    xmlInitParser();
14966
14967
0
    xmlCtxtReset(ctxt);
14968
14969
0
    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14970
0
                                         XML_CHAR_ENCODING_NONE);
14971
0
    if (input == NULL) {
14972
0
        if (ioclose != NULL)
14973
0
            ioclose(ioctx);
14974
0
        return (NULL);
14975
0
    }
14976
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14977
0
    if (stream == NULL) {
14978
0
        xmlFreeParserInputBuffer(input);
14979
0
        return (NULL);
14980
0
    }
14981
0
    inputPush(ctxt, stream);
14982
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
14983
0
}
14984