Coverage Report

Created: 2023-06-07 06:14

/src/libxml2/parser.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
 *            implemented on top of the SAX interfaces
4
 *
5
 * References:
6
 *   The XML specification:
7
 *     http://www.w3.org/TR/REC-xml
8
 *   Original 1.0 version:
9
 *     http://www.w3.org/TR/1998/REC-xml-19980210
10
 *   XML second edition working draft
11
 *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12
 *
13
 * Okay this is a big file, the parser core is around 7000 lines, then it
14
 * is followed by the progressive parser top routines, then the various
15
 * high level APIs to call the parser and a few miscellaneous functions.
16
 * A number of helper functions and deprecated ones have been moved to
17
 * parserInternals.c to reduce this file size.
18
 * As much as possible the functions are associated with their relative
19
 * production in the XML specification. A few productions defining the
20
 * different ranges of character are actually implanted either in
21
 * parserInternals.h or parserInternals.c
22
 * The DOM tree build is realized from the default SAX callbacks in
23
 * the module SAX.c.
24
 * The routines doing the validation checks are in valid.c and called either
25
 * from the SAX callbacks or as standalone functions using a preparsed
26
 * document.
27
 *
28
 * See Copyright for the status of this software.
29
 *
30
 * daniel@veillard.com
31
 */
32
33
/* To avoid EBCDIC trouble when parsing on zOS */
34
#if defined(__MVS__)
35
#pragma convert("ISO8859-1")
36
#endif
37
38
#define IN_LIBXML
39
#include "libxml.h"
40
41
#if defined(_WIN32)
42
#define XML_DIR_SEP '\\'
43
#else
44
#define XML_DIR_SEP '/'
45
#endif
46
47
#include <stdlib.h>
48
#include <limits.h>
49
#include <string.h>
50
#include <stdarg.h>
51
#include <stddef.h>
52
#include <ctype.h>
53
#include <stdlib.h>
54
#include <libxml/xmlmemory.h>
55
#include <libxml/threads.h>
56
#include <libxml/globals.h>
57
#include <libxml/tree.h>
58
#include <libxml/parser.h>
59
#include <libxml/parserInternals.h>
60
#include <libxml/HTMLparser.h>
61
#include <libxml/valid.h>
62
#include <libxml/entities.h>
63
#include <libxml/xmlerror.h>
64
#include <libxml/encoding.h>
65
#include <libxml/xmlIO.h>
66
#include <libxml/uri.h>
67
#ifdef LIBXML_CATALOG_ENABLED
68
#include <libxml/catalog.h>
69
#endif
70
#ifdef LIBXML_SCHEMAS_ENABLED
71
#include <libxml/xmlschemastypes.h>
72
#include <libxml/relaxng.h>
73
#endif
74
#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
75
#include <libxml/xpath.h>
76
#endif
77
78
#include "private/buf.h"
79
#include "private/dict.h"
80
#include "private/enc.h"
81
#include "private/entities.h"
82
#include "private/error.h"
83
#include "private/globals.h"
84
#include "private/html.h"
85
#include "private/io.h"
86
#include "private/memory.h"
87
#include "private/parser.h"
88
#include "private/threads.h"
89
#include "private/xpath.h"
90
91
struct _xmlStartTag {
92
    const xmlChar *prefix;
93
    const xmlChar *URI;
94
    int line;
95
    int nsNr;
96
};
97
98
static xmlParserCtxtPtr
99
xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
100
        const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
101
        xmlParserCtxtPtr pctx);
102
103
static int
104
xmlParseElementStart(xmlParserCtxtPtr ctxt);
105
106
static void
107
xmlParseElementEnd(xmlParserCtxtPtr ctxt);
108
109
/************************************************************************
110
 *                  *
111
 *  Arbitrary limits set in the parser. See XML_PARSE_HUGE    *
112
 *                  *
113
 ************************************************************************/
114
115
#define XML_PARSER_BIG_ENTITY 1000
116
#define XML_PARSER_LOT_ENTITY 5000
117
118
/*
119
 * Constants for protection against abusive entity expansion
120
 * ("billion laughs").
121
 */
122
123
/*
124
 * XML_PARSER_NON_LINEAR is roughly the maximum allowed amplification factor
125
 * of serialized output after entity expansion.
126
 */
127
4.57k
#define XML_PARSER_NON_LINEAR 5
128
129
/*
130
 * A certain amount is always allowed.
131
 */
132
101k
#define XML_PARSER_ALLOWED_EXPANSION 1000000
133
134
/*
135
 * Fixed cost for each entity reference. This crudely models processing time
136
 * as well to protect, for example, against exponential expansion of empty
137
 * or very short entities.
138
 */
139
101k
#define XML_ENT_FIXED_COST 20
140
141
/**
142
 * xmlParserMaxDepth:
143
 *
144
 * arbitrary depth limit for the XML documents that we allow to
145
 * process. This is not a limitation of the parser but a safety
146
 * boundary feature. It can be disabled with the XML_PARSE_HUGE
147
 * parser option.
148
 */
149
unsigned int xmlParserMaxDepth = 256;
150
151
152
153
#define SAX2 1
154
378M
#define XML_PARSER_BIG_BUFFER_SIZE 300
155
718M
#define XML_PARSER_BUFFER_SIZE 100
156
319k
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
157
158
/**
159
 * XML_PARSER_CHUNK_SIZE
160
 *
161
 * When calling GROW that's the minimal amount of data
162
 * the parser expected to have received. It is not a hard
163
 * limit but an optimization when reading strings like Names
164
 * It is not strictly needed as long as inputs available characters
165
 * are followed by 0, which should be provided by the I/O level
166
 */
167
#define XML_PARSER_CHUNK_SIZE 100
168
169
/*
170
 * List of XML prefixed PI allowed by W3C specs
171
 */
172
173
static const char* const xmlW3CPIs[] = {
174
    "xml-stylesheet",
175
    "xml-model",
176
    NULL
177
};
178
179
180
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
181
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
182
                                              const xmlChar **str);
183
184
static xmlParserErrors
185
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
186
                xmlSAXHandlerPtr sax,
187
          void *user_data, int depth, const xmlChar *URL,
188
          const xmlChar *ID, xmlNodePtr *list);
189
190
static int
191
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
192
                          const char *encoding);
193
#ifdef LIBXML_LEGACY_ENABLED
194
static void
195
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
196
                      xmlNodePtr lastNode);
197
#endif /* LIBXML_LEGACY_ENABLED */
198
199
static xmlParserErrors
200
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
201
          const xmlChar *string, void *user_data, xmlNodePtr *lst);
202
203
static int
204
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
205
206
/************************************************************************
207
 *                  *
208
 *    Some factorized error routines        *
209
 *                  *
210
 ************************************************************************/
211
212
/**
213
 * xmlErrAttributeDup:
214
 * @ctxt:  an XML parser context
215
 * @prefix:  the attribute prefix
216
 * @localname:  the attribute localname
217
 *
218
 * Handle a redefinition of attribute error
219
 */
220
static void
221
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
222
                   const xmlChar * localname)
223
6.84k
{
224
6.84k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
225
6.84k
        (ctxt->instate == XML_PARSER_EOF))
226
390
  return;
227
6.45k
    if (ctxt != NULL)
228
6.45k
  ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
229
230
6.45k
    if (prefix == NULL)
231
3.30k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
232
3.30k
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
233
3.30k
                        (const char *) localname, NULL, NULL, 0, 0,
234
3.30k
                        "Attribute %s redefined\n", localname);
235
3.14k
    else
236
3.14k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
237
3.14k
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
238
3.14k
                        (const char *) prefix, (const char *) localname,
239
3.14k
                        NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
240
3.14k
                        localname);
241
6.45k
    if (ctxt != NULL) {
242
6.45k
  ctxt->wellFormed = 0;
243
6.45k
  if (ctxt->recovery == 0)
244
6.34k
      ctxt->disableSAX = 1;
245
6.45k
    }
246
6.45k
}
247
248
/**
249
 * xmlFatalErrMsg:
250
 * @ctxt:  an XML parser context
251
 * @error:  the error number
252
 * @msg:  the error message
253
 *
254
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
255
 */
256
static void LIBXML_ATTR_FORMAT(3,0)
257
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
258
               const char *msg)
259
1.76M
{
260
1.76M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
261
1.76M
        (ctxt->instate == XML_PARSER_EOF))
262
4.52k
  return;
263
1.76M
    if (ctxt != NULL)
264
1.76M
  ctxt->errNo = error;
265
1.76M
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
266
1.76M
                    XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
267
1.76M
    if (ctxt != NULL) {
268
1.76M
  ctxt->wellFormed = 0;
269
1.76M
  if (ctxt->recovery == 0)
270
352k
      ctxt->disableSAX = 1;
271
1.76M
    }
272
1.76M
}
273
274
/**
275
 * xmlWarningMsg:
276
 * @ctxt:  an XML parser context
277
 * @error:  the error number
278
 * @msg:  the error message
279
 * @str1:  extra data
280
 * @str2:  extra data
281
 *
282
 * Handle a warning.
283
 */
284
static void LIBXML_ATTR_FORMAT(3,0)
285
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
286
              const char *msg, const xmlChar *str1, const xmlChar *str2)
287
164k
{
288
164k
    xmlStructuredErrorFunc schannel = NULL;
289
290
164k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
291
164k
        (ctxt->instate == XML_PARSER_EOF))
292
0
  return;
293
164k
    if ((ctxt != NULL) && (ctxt->sax != NULL) &&
294
164k
        (ctxt->sax->initialized == XML_SAX2_MAGIC))
295
164k
        schannel = ctxt->sax->serror;
296
164k
    if (ctxt != NULL) {
297
164k
        __xmlRaiseError(schannel,
298
164k
                    (ctxt->sax) ? ctxt->sax->warning : NULL,
299
164k
                    ctxt->userData,
300
164k
                    ctxt, NULL, XML_FROM_PARSER, error,
301
164k
                    XML_ERR_WARNING, NULL, 0,
302
164k
        (const char *) str1, (const char *) str2, NULL, 0, 0,
303
164k
        msg, (const char *) str1, (const char *) str2);
304
164k
    } else {
305
0
        __xmlRaiseError(schannel, NULL, NULL,
306
0
                    ctxt, NULL, XML_FROM_PARSER, error,
307
0
                    XML_ERR_WARNING, NULL, 0,
308
0
        (const char *) str1, (const char *) str2, NULL, 0, 0,
309
0
        msg, (const char *) str1, (const char *) str2);
310
0
    }
311
164k
}
312
313
/**
314
 * xmlValidityError:
315
 * @ctxt:  an XML parser context
316
 * @error:  the error number
317
 * @msg:  the error message
318
 * @str1:  extra data
319
 *
320
 * Handle a validity error.
321
 */
322
static void LIBXML_ATTR_FORMAT(3,0)
323
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
324
              const char *msg, const xmlChar *str1, const xmlChar *str2)
325
2.07k
{
326
2.07k
    xmlStructuredErrorFunc schannel = NULL;
327
328
2.07k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
329
2.07k
        (ctxt->instate == XML_PARSER_EOF))
330
0
  return;
331
2.07k
    if (ctxt != NULL) {
332
2.07k
  ctxt->errNo = error;
333
2.07k
  if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
334
2.07k
      schannel = ctxt->sax->serror;
335
2.07k
    }
336
2.07k
    if (ctxt != NULL) {
337
2.07k
        __xmlRaiseError(schannel,
338
2.07k
                    ctxt->vctxt.error, ctxt->vctxt.userData,
339
2.07k
                    ctxt, NULL, XML_FROM_DTD, error,
340
2.07k
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
341
2.07k
        (const char *) str2, NULL, 0, 0,
342
2.07k
        msg, (const char *) str1, (const char *) str2);
343
2.07k
  ctxt->valid = 0;
344
2.07k
    } else {
345
0
        __xmlRaiseError(schannel, NULL, NULL,
346
0
                    ctxt, NULL, XML_FROM_DTD, error,
347
0
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
348
0
        (const char *) str2, NULL, 0, 0,
349
0
        msg, (const char *) str1, (const char *) str2);
350
0
    }
351
2.07k
}
352
353
/**
354
 * xmlFatalErrMsgInt:
355
 * @ctxt:  an XML parser context
356
 * @error:  the error number
357
 * @msg:  the error message
358
 * @val:  an integer value
359
 *
360
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
361
 */
362
static void LIBXML_ATTR_FORMAT(3,0)
363
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
364
                  const char *msg, int val)
365
248k
{
366
248k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
367
248k
        (ctxt->instate == XML_PARSER_EOF))
368
256
  return;
369
248k
    if (ctxt != NULL)
370
248k
  ctxt->errNo = error;
371
248k
    __xmlRaiseError(NULL, NULL, NULL,
372
248k
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
373
248k
                    NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
374
248k
    if (ctxt != NULL) {
375
248k
  ctxt->wellFormed = 0;
376
248k
  if (ctxt->recovery == 0)
377
153k
      ctxt->disableSAX = 1;
378
248k
    }
379
248k
}
380
381
/**
382
 * xmlFatalErrMsgStrIntStr:
383
 * @ctxt:  an XML parser context
384
 * @error:  the error number
385
 * @msg:  the error message
386
 * @str1:  an string info
387
 * @val:  an integer value
388
 * @str2:  an string info
389
 *
390
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
391
 */
392
static void LIBXML_ATTR_FORMAT(3,0)
393
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
394
                  const char *msg, const xmlChar *str1, int val,
395
      const xmlChar *str2)
396
1.47M
{
397
1.47M
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
398
1.47M
        (ctxt->instate == XML_PARSER_EOF))
399
249
  return;
400
1.47M
    if (ctxt != NULL)
401
1.47M
  ctxt->errNo = error;
402
1.47M
    __xmlRaiseError(NULL, NULL, NULL,
403
1.47M
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
404
1.47M
                    NULL, 0, (const char *) str1, (const char *) str2,
405
1.47M
        NULL, val, 0, msg, str1, val, str2);
406
1.47M
    if (ctxt != NULL) {
407
1.47M
  ctxt->wellFormed = 0;
408
1.47M
  if (ctxt->recovery == 0)
409
143k
      ctxt->disableSAX = 1;
410
1.47M
    }
411
1.47M
}
412
413
/**
414
 * xmlFatalErrMsgStr:
415
 * @ctxt:  an XML parser context
416
 * @error:  the error number
417
 * @msg:  the error message
418
 * @val:  a string value
419
 *
420
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
421
 */
422
static void LIBXML_ATTR_FORMAT(3,0)
423
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
424
                  const char *msg, const xmlChar * val)
425
189k
{
426
189k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
427
189k
        (ctxt->instate == XML_PARSER_EOF))
428
651
  return;
429
188k
    if (ctxt != NULL)
430
188k
  ctxt->errNo = error;
431
188k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
432
188k
                    XML_FROM_PARSER, error, XML_ERR_FATAL,
433
188k
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
434
188k
                    val);
435
188k
    if (ctxt != NULL) {
436
188k
  ctxt->wellFormed = 0;
437
188k
  if (ctxt->recovery == 0)
438
164k
      ctxt->disableSAX = 1;
439
188k
    }
440
188k
}
441
442
/**
443
 * xmlErrMsgStr:
444
 * @ctxt:  an XML parser context
445
 * @error:  the error number
446
 * @msg:  the error message
447
 * @val:  a string value
448
 *
449
 * Handle a non fatal parser error
450
 */
451
static void LIBXML_ATTR_FORMAT(3,0)
452
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
453
                  const char *msg, const xmlChar * val)
454
30.1k
{
455
30.1k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
456
30.1k
        (ctxt->instate == XML_PARSER_EOF))
457
0
  return;
458
30.1k
    if (ctxt != NULL)
459
30.1k
  ctxt->errNo = error;
460
30.1k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
461
30.1k
                    XML_FROM_PARSER, error, XML_ERR_ERROR,
462
30.1k
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
463
30.1k
                    val);
464
30.1k
}
465
466
/**
467
 * xmlNsErr:
468
 * @ctxt:  an XML parser context
469
 * @error:  the error number
470
 * @msg:  the message
471
 * @info1:  extra information string
472
 * @info2:  extra information string
473
 *
474
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
475
 */
476
static void LIBXML_ATTR_FORMAT(3,0)
477
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
478
         const char *msg,
479
         const xmlChar * info1, const xmlChar * info2,
480
         const xmlChar * info3)
481
417k
{
482
417k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
483
417k
        (ctxt->instate == XML_PARSER_EOF))
484
1.71k
  return;
485
415k
    if (ctxt != NULL)
486
415k
  ctxt->errNo = error;
487
415k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
488
415k
                    XML_ERR_ERROR, NULL, 0, (const char *) info1,
489
415k
                    (const char *) info2, (const char *) info3, 0, 0, msg,
490
415k
                    info1, info2, info3);
491
415k
    if (ctxt != NULL)
492
415k
  ctxt->nsWellFormed = 0;
493
415k
}
494
495
/**
496
 * xmlNsWarn
497
 * @ctxt:  an XML parser context
498
 * @error:  the error number
499
 * @msg:  the message
500
 * @info1:  extra information string
501
 * @info2:  extra information string
502
 *
503
 * Handle a namespace warning error
504
 */
505
static void LIBXML_ATTR_FORMAT(3,0)
506
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
507
         const char *msg,
508
         const xmlChar * info1, const xmlChar * info2,
509
         const xmlChar * info3)
510
7.18k
{
511
7.18k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
512
7.18k
        (ctxt->instate == XML_PARSER_EOF))
513
0
  return;
514
7.18k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
515
7.18k
                    XML_ERR_WARNING, NULL, 0, (const char *) info1,
516
7.18k
                    (const char *) info2, (const char *) info3, 0, 0, msg,
517
7.18k
                    info1, info2, info3);
518
7.18k
}
519
520
static void
521
447k
xmlSaturatedAdd(unsigned long *dst, unsigned long val) {
522
447k
    if (val > ULONG_MAX - *dst)
523
0
        *dst = ULONG_MAX;
524
447k
    else
525
447k
        *dst += val;
526
447k
}
527
528
static void
529
152k
xmlSaturatedAddSizeT(unsigned long *dst, unsigned long val) {
530
152k
    if (val > ULONG_MAX - *dst)
531
0
        *dst = ULONG_MAX;
532
152k
    else
533
152k
        *dst += val;
534
152k
}
535
536
/**
537
 * xmlParserEntityCheck:
538
 * @ctxt:  parser context
539
 * @extra:  sum of unexpanded entity sizes
540
 *
541
 * Check for non-linear entity expansion behaviour.
542
 *
543
 * In some cases like xmlStringDecodeEntities, this function is called
544
 * for each, possibly nested entity and its unexpanded content length.
545
 *
546
 * In other cases like xmlParseReference, it's only called for each
547
 * top-level entity with its unexpanded content length plus the sum of
548
 * the unexpanded content lengths (plus fixed cost) of all nested
549
 * entities.
550
 *
551
 * Summing the unexpanded lengths also adds the length of the reference.
552
 * This is by design. Taking the length of the entity name into account
553
 * discourages attacks that try to waste CPU time with abusively long
554
 * entity names. See test/recurse/lol6.xml for example. Each call also
555
 * adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with
556
 * short entities.
557
 *
558
 * Returns 1 on error, 0 on success.
559
 */
560
static int
561
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long extra)
562
101k
{
563
101k
    unsigned long consumed;
564
101k
    xmlParserInputPtr input = ctxt->input;
565
101k
    xmlEntityPtr entity = input->entity;
566
567
    /*
568
     * Compute total consumed bytes so far, including input streams of
569
     * external entities.
570
     */
571
101k
    consumed = input->parentConsumed;
572
101k
    if ((entity == NULL) ||
573
101k
        ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
574
70.3k
         ((entity->flags & XML_ENT_PARSED) == 0))) {
575
70.3k
        xmlSaturatedAdd(&consumed, input->consumed);
576
70.3k
        xmlSaturatedAddSizeT(&consumed, input->cur - input->base);
577
70.3k
    }
578
101k
    xmlSaturatedAdd(&consumed, ctxt->sizeentities);
579
580
    /*
581
     * Add extra cost and some fixed cost.
582
     */
583
101k
    xmlSaturatedAdd(&ctxt->sizeentcopy, extra);
584
101k
    xmlSaturatedAdd(&ctxt->sizeentcopy, XML_ENT_FIXED_COST);
585
586
    /*
587
     * It's important to always use saturation arithmetic when tracking
588
     * entity sizes to make the size checks reliable. If "sizeentcopy"
589
     * overflows, we have to abort.
590
     */
591
101k
    if ((ctxt->sizeentcopy > XML_PARSER_ALLOWED_EXPANSION) &&
592
101k
        ((ctxt->sizeentcopy >= ULONG_MAX) ||
593
4.57k
         (ctxt->sizeentcopy / XML_PARSER_NON_LINEAR > consumed))) {
594
272
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP,
595
272
                       "Maximum entity amplification factor exceeded");
596
272
        xmlHaltParser(ctxt);
597
272
        return(1);
598
272
    }
599
600
101k
    return(0);
601
101k
}
602
603
/************************************************************************
604
 *                  *
605
 *    Library wide options          *
606
 *                  *
607
 ************************************************************************/
608
609
/**
610
  * xmlHasFeature:
611
  * @feature: the feature to be examined
612
  *
613
  * Examines if the library has been compiled with a given feature.
614
  *
615
  * Returns a non-zero value if the feature exist, otherwise zero.
616
  * Returns zero (0) if the feature does not exist or an unknown
617
  * unknown feature is requested, non-zero otherwise.
618
  */
619
int
620
xmlHasFeature(xmlFeature feature)
621
0
{
622
0
    switch (feature) {
623
0
  case XML_WITH_THREAD:
624
0
#ifdef LIBXML_THREAD_ENABLED
625
0
      return(1);
626
#else
627
      return(0);
628
#endif
629
0
        case XML_WITH_TREE:
630
0
#ifdef LIBXML_TREE_ENABLED
631
0
            return(1);
632
#else
633
            return(0);
634
#endif
635
0
        case XML_WITH_OUTPUT:
636
0
#ifdef LIBXML_OUTPUT_ENABLED
637
0
            return(1);
638
#else
639
            return(0);
640
#endif
641
0
        case XML_WITH_PUSH:
642
#ifdef LIBXML_PUSH_ENABLED
643
            return(1);
644
#else
645
0
            return(0);
646
0
#endif
647
0
        case XML_WITH_READER:
648
#ifdef LIBXML_READER_ENABLED
649
            return(1);
650
#else
651
0
            return(0);
652
0
#endif
653
0
        case XML_WITH_PATTERN:
654
0
#ifdef LIBXML_PATTERN_ENABLED
655
0
            return(1);
656
#else
657
            return(0);
658
#endif
659
0
        case XML_WITH_WRITER:
660
#ifdef LIBXML_WRITER_ENABLED
661
            return(1);
662
#else
663
0
            return(0);
664
0
#endif
665
0
        case XML_WITH_SAX1:
666
#ifdef LIBXML_SAX1_ENABLED
667
            return(1);
668
#else
669
0
            return(0);
670
0
#endif
671
0
        case XML_WITH_FTP:
672
#ifdef LIBXML_FTP_ENABLED
673
            return(1);
674
#else
675
0
            return(0);
676
0
#endif
677
0
        case XML_WITH_HTTP:
678
0
#ifdef LIBXML_HTTP_ENABLED
679
0
            return(1);
680
#else
681
            return(0);
682
#endif
683
0
        case XML_WITH_VALID:
684
#ifdef LIBXML_VALID_ENABLED
685
            return(1);
686
#else
687
0
            return(0);
688
0
#endif
689
0
        case XML_WITH_HTML:
690
0
#ifdef LIBXML_HTML_ENABLED
691
0
            return(1);
692
#else
693
            return(0);
694
#endif
695
0
        case XML_WITH_LEGACY:
696
#ifdef LIBXML_LEGACY_ENABLED
697
            return(1);
698
#else
699
0
            return(0);
700
0
#endif
701
0
        case XML_WITH_C14N:
702
#ifdef LIBXML_C14N_ENABLED
703
            return(1);
704
#else
705
0
            return(0);
706
0
#endif
707
0
        case XML_WITH_CATALOG:
708
0
#ifdef LIBXML_CATALOG_ENABLED
709
0
            return(1);
710
#else
711
            return(0);
712
#endif
713
0
        case XML_WITH_XPATH:
714
0
#ifdef LIBXML_XPATH_ENABLED
715
0
            return(1);
716
#else
717
            return(0);
718
#endif
719
0
        case XML_WITH_XPTR:
720
0
#ifdef LIBXML_XPTR_ENABLED
721
0
            return(1);
722
#else
723
            return(0);
724
#endif
725
0
        case XML_WITH_XINCLUDE:
726
0
#ifdef LIBXML_XINCLUDE_ENABLED
727
0
            return(1);
728
#else
729
            return(0);
730
#endif
731
0
        case XML_WITH_ICONV:
732
0
#ifdef LIBXML_ICONV_ENABLED
733
0
            return(1);
734
#else
735
            return(0);
736
#endif
737
0
        case XML_WITH_ISO8859X:
738
0
#ifdef LIBXML_ISO8859X_ENABLED
739
0
            return(1);
740
#else
741
            return(0);
742
#endif
743
0
        case XML_WITH_UNICODE:
744
#ifdef LIBXML_UNICODE_ENABLED
745
            return(1);
746
#else
747
0
            return(0);
748
0
#endif
749
0
        case XML_WITH_REGEXP:
750
#ifdef LIBXML_REGEXP_ENABLED
751
            return(1);
752
#else
753
0
            return(0);
754
0
#endif
755
0
        case XML_WITH_AUTOMATA:
756
#ifdef LIBXML_AUTOMATA_ENABLED
757
            return(1);
758
#else
759
0
            return(0);
760
0
#endif
761
0
        case XML_WITH_EXPR:
762
#ifdef LIBXML_EXPR_ENABLED
763
            return(1);
764
#else
765
0
            return(0);
766
0
#endif
767
0
        case XML_WITH_SCHEMAS:
768
#ifdef LIBXML_SCHEMAS_ENABLED
769
            return(1);
770
#else
771
0
            return(0);
772
0
#endif
773
0
        case XML_WITH_SCHEMATRON:
774
#ifdef LIBXML_SCHEMATRON_ENABLED
775
            return(1);
776
#else
777
0
            return(0);
778
0
#endif
779
0
        case XML_WITH_MODULES:
780
0
#ifdef LIBXML_MODULES_ENABLED
781
0
            return(1);
782
#else
783
            return(0);
784
#endif
785
0
        case XML_WITH_DEBUG:
786
0
#ifdef LIBXML_DEBUG_ENABLED
787
0
            return(1);
788
#else
789
            return(0);
790
#endif
791
0
        case XML_WITH_DEBUG_MEM:
792
#ifdef DEBUG_MEMORY_LOCATION
793
            return(1);
794
#else
795
0
            return(0);
796
0
#endif
797
0
        case XML_WITH_DEBUG_RUN:
798
0
            return(0);
799
0
        case XML_WITH_ZLIB:
800
#ifdef LIBXML_ZLIB_ENABLED
801
            return(1);
802
#else
803
0
            return(0);
804
0
#endif
805
0
        case XML_WITH_LZMA:
806
#ifdef LIBXML_LZMA_ENABLED
807
            return(1);
808
#else
809
0
            return(0);
810
0
#endif
811
0
        case XML_WITH_ICU:
812
#ifdef LIBXML_ICU_ENABLED
813
            return(1);
814
#else
815
0
            return(0);
816
0
#endif
817
0
        default:
818
0
      break;
819
0
     }
820
0
     return(0);
821
0
}
822
823
/************************************************************************
824
 *                  *
825
 *    SAX2 defaulted attributes handling      *
826
 *                  *
827
 ************************************************************************/
828
829
/**
830
 * xmlDetectSAX2:
831
 * @ctxt:  an XML parser context
832
 *
833
 * Do the SAX2 detection and specific initialization
834
 */
835
static void
836
348k
xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
837
348k
    xmlSAXHandlerPtr sax;
838
839
    /* Avoid unused variable warning if features are disabled. */
840
348k
    (void) sax;
841
842
348k
    if (ctxt == NULL) return;
843
348k
    sax = ctxt->sax;
844
#ifdef LIBXML_SAX1_ENABLED
845
    if ((sax) && (sax->initialized == XML_SAX2_MAGIC))
846
        ctxt->sax2 = 1;
847
#else
848
348k
    ctxt->sax2 = 1;
849
348k
#endif /* LIBXML_SAX1_ENABLED */
850
851
348k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
852
348k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
853
348k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
854
348k
    if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
855
348k
    (ctxt->str_xml_ns == NULL)) {
856
4
        xmlErrMemory(ctxt, NULL);
857
4
    }
858
348k
}
859
860
typedef struct _xmlDefAttrs xmlDefAttrs;
861
typedef xmlDefAttrs *xmlDefAttrsPtr;
862
struct _xmlDefAttrs {
863
    int nbAttrs;  /* number of defaulted attributes on that element */
864
    int maxAttrs;       /* the size of the array */
865
#if __STDC_VERSION__ >= 199901L
866
    /* Using a C99 flexible array member avoids UBSan errors. */
867
    const xmlChar *values[]; /* array of localname/prefix/values/external */
868
#else
869
    const xmlChar *values[5];
870
#endif
871
};
872
873
/**
874
 * xmlAttrNormalizeSpace:
875
 * @src: the source string
876
 * @dst: the target string
877
 *
878
 * Normalize the space in non CDATA attribute values:
879
 * If the attribute type is not CDATA, then the XML processor MUST further
880
 * process the normalized attribute value by discarding any leading and
881
 * trailing space (#x20) characters, and by replacing sequences of space
882
 * (#x20) characters by a single space (#x20) character.
883
 * Note that the size of dst need to be at least src, and if one doesn't need
884
 * to preserve dst (and it doesn't come from a dictionary or read-only) then
885
 * passing src as dst is just fine.
886
 *
887
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
888
 *         is needed.
889
 */
890
static xmlChar *
891
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
892
58.5k
{
893
58.5k
    if ((src == NULL) || (dst == NULL))
894
0
        return(NULL);
895
896
79.1k
    while (*src == 0x20) src++;
897
1.04M
    while (*src != 0) {
898
984k
  if (*src == 0x20) {
899
145k
      while (*src == 0x20) src++;
900
21.4k
      if (*src != 0)
901
18.3k
    *dst++ = 0x20;
902
962k
  } else {
903
962k
      *dst++ = *src++;
904
962k
  }
905
984k
    }
906
58.5k
    *dst = 0;
907
58.5k
    if (dst == src)
908
46.7k
       return(NULL);
909
11.8k
    return(dst);
910
58.5k
}
911
912
/**
913
 * xmlAttrNormalizeSpace2:
914
 * @src: the source string
915
 *
916
 * Normalize the space in non CDATA attribute values, a slightly more complex
917
 * front end to avoid allocation problems when running on attribute values
918
 * coming from the input.
919
 *
920
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
921
 *         is needed.
922
 */
923
static const xmlChar *
924
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
925
6.09k
{
926
6.09k
    int i;
927
6.09k
    int remove_head = 0;
928
6.09k
    int need_realloc = 0;
929
6.09k
    const xmlChar *cur;
930
931
6.09k
    if ((ctxt == NULL) || (src == NULL) || (len == NULL))
932
0
        return(NULL);
933
6.09k
    i = *len;
934
6.09k
    if (i <= 0)
935
2.25k
        return(NULL);
936
937
3.84k
    cur = src;
938
4.97k
    while (*cur == 0x20) {
939
1.13k
        cur++;
940
1.13k
  remove_head++;
941
1.13k
    }
942
118k
    while (*cur != 0) {
943
114k
  if (*cur == 0x20) {
944
3.41k
      cur++;
945
3.41k
      if ((*cur == 0x20) || (*cur == 0)) {
946
319
          need_realloc = 1;
947
319
    break;
948
319
      }
949
3.41k
  } else
950
111k
      cur++;
951
114k
    }
952
3.84k
    if (need_realloc) {
953
319
        xmlChar *ret;
954
955
319
  ret = xmlStrndup(src + remove_head, i - remove_head + 1);
956
319
  if (ret == NULL) {
957
2
      xmlErrMemory(ctxt, NULL);
958
2
      return(NULL);
959
2
  }
960
317
  xmlAttrNormalizeSpace(ret, ret);
961
317
  *len = strlen((const char *)ret);
962
317
        return(ret);
963
3.52k
    } else if (remove_head) {
964
814
        *len -= remove_head;
965
814
        memmove(src, src + remove_head, 1 + *len);
966
814
  return(src);
967
814
    }
968
2.71k
    return(NULL);
969
3.84k
}
970
971
/**
972
 * xmlAddDefAttrs:
973
 * @ctxt:  an XML parser context
974
 * @fullname:  the element fullname
975
 * @fullattr:  the attribute fullname
976
 * @value:  the attribute value
977
 *
978
 * Add a defaulted attribute for an element
979
 */
980
static void
981
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
982
               const xmlChar *fullname,
983
               const xmlChar *fullattr,
984
78.2k
               const xmlChar *value) {
985
78.2k
    xmlDefAttrsPtr defaults;
986
78.2k
    int len;
987
78.2k
    const xmlChar *name;
988
78.2k
    const xmlChar *prefix;
989
990
    /*
991
     * Allows to detect attribute redefinitions
992
     */
993
78.2k
    if (ctxt->attsSpecial != NULL) {
994
66.0k
        if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
995
11.6k
      return;
996
66.0k
    }
997
998
66.6k
    if (ctxt->attsDefault == NULL) {
999
12.2k
        ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1000
12.2k
  if (ctxt->attsDefault == NULL)
1001
17
      goto mem_error;
1002
12.2k
    }
1003
1004
    /*
1005
     * split the element name into prefix:localname , the string found
1006
     * are within the DTD and then not associated to namespace names.
1007
     */
1008
66.6k
    name = xmlSplitQName3(fullname, &len);
1009
66.6k
    if (name == NULL) {
1010
46.7k
        name = xmlDictLookup(ctxt->dict, fullname, -1);
1011
46.7k
  prefix = NULL;
1012
46.7k
    } else {
1013
19.9k
        name = xmlDictLookup(ctxt->dict, name, -1);
1014
19.9k
  prefix = xmlDictLookup(ctxt->dict, fullname, len);
1015
19.9k
    }
1016
1017
    /*
1018
     * make sure there is some storage
1019
     */
1020
66.6k
    defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1021
66.6k
    if (defaults == NULL) {
1022
25.2k
        defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1023
25.2k
                     (4 * 5) * sizeof(const xmlChar *));
1024
25.2k
  if (defaults == NULL)
1025
9
      goto mem_error;
1026
25.2k
  defaults->nbAttrs = 0;
1027
25.2k
  defaults->maxAttrs = 4;
1028
25.2k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1029
25.2k
                          defaults, NULL) < 0) {
1030
1
      xmlFree(defaults);
1031
1
      goto mem_error;
1032
1
  }
1033
41.4k
    } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1034
6.72k
        xmlDefAttrsPtr temp;
1035
1036
6.72k
        temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1037
6.72k
           (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1038
6.72k
  if (temp == NULL)
1039
3
      goto mem_error;
1040
6.72k
  defaults = temp;
1041
6.72k
  defaults->maxAttrs *= 2;
1042
6.72k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1043
6.72k
                          defaults, NULL) < 0) {
1044
0
      xmlFree(defaults);
1045
0
      goto mem_error;
1046
0
  }
1047
6.72k
    }
1048
1049
    /*
1050
     * Split the element name into prefix:localname , the string found
1051
     * are within the DTD and hen not associated to namespace names.
1052
     */
1053
66.6k
    name = xmlSplitQName3(fullattr, &len);
1054
66.6k
    if (name == NULL) {
1055
44.6k
        name = xmlDictLookup(ctxt->dict, fullattr, -1);
1056
44.6k
  prefix = NULL;
1057
44.6k
    } else {
1058
21.9k
        name = xmlDictLookup(ctxt->dict, name, -1);
1059
21.9k
  prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1060
21.9k
    }
1061
1062
66.6k
    defaults->values[5 * defaults->nbAttrs] = name;
1063
66.6k
    defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1064
    /* intern the string and precompute the end */
1065
66.6k
    len = xmlStrlen(value);
1066
66.6k
    value = xmlDictLookup(ctxt->dict, value, len);
1067
66.6k
    if (value == NULL)
1068
1
        goto mem_error;
1069
66.6k
    defaults->values[5 * defaults->nbAttrs + 2] = value;
1070
66.6k
    defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1071
66.6k
    if (ctxt->external)
1072
267
        defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1073
66.3k
    else
1074
66.3k
        defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1075
66.6k
    defaults->nbAttrs++;
1076
1077
66.6k
    return;
1078
1079
31
mem_error:
1080
31
    xmlErrMemory(ctxt, NULL);
1081
31
    return;
1082
66.6k
}
1083
1084
/**
1085
 * xmlAddSpecialAttr:
1086
 * @ctxt:  an XML parser context
1087
 * @fullname:  the element fullname
1088
 * @fullattr:  the attribute fullname
1089
 * @type:  the attribute type
1090
 *
1091
 * Register this attribute type
1092
 */
1093
static void
1094
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1095
      const xmlChar *fullname,
1096
      const xmlChar *fullattr,
1097
      int type)
1098
79.2k
{
1099
79.2k
    if (ctxt->attsSpecial == NULL) {
1100
12.9k
        ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1101
12.9k
  if (ctxt->attsSpecial == NULL)
1102
36
      goto mem_error;
1103
12.9k
    }
1104
1105
79.2k
    if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1106
11.7k
        return;
1107
1108
67.5k
    xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1109
67.5k
                     (void *) (ptrdiff_t) type);
1110
67.5k
    return;
1111
1112
36
mem_error:
1113
36
    xmlErrMemory(ctxt, NULL);
1114
36
    return;
1115
79.2k
}
1116
1117
/**
1118
 * xmlCleanSpecialAttrCallback:
1119
 *
1120
 * Removes CDATA attributes from the special attribute table
1121
 */
1122
static void
1123
xmlCleanSpecialAttrCallback(void *payload, void *data,
1124
                            const xmlChar *fullname, const xmlChar *fullattr,
1125
20.0k
                            const xmlChar *unused ATTRIBUTE_UNUSED) {
1126
20.0k
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1127
1128
20.0k
    if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1129
6.73k
        xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1130
6.73k
    }
1131
20.0k
}
1132
1133
/**
1134
 * xmlCleanSpecialAttr:
1135
 * @ctxt:  an XML parser context
1136
 *
1137
 * Trim the list of attributes defined to remove all those of type
1138
 * CDATA as they are not special. This call should be done when finishing
1139
 * to parse the DTD and before starting to parse the document root.
1140
 */
1141
static void
1142
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1143
23.1k
{
1144
23.1k
    if (ctxt->attsSpecial == NULL)
1145
18.6k
        return;
1146
1147
4.55k
    xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1148
1149
4.55k
    if (xmlHashSize(ctxt->attsSpecial) == 0) {
1150
279
        xmlHashFree(ctxt->attsSpecial, NULL);
1151
279
        ctxt->attsSpecial = NULL;
1152
279
    }
1153
4.55k
    return;
1154
23.1k
}
1155
1156
/**
1157
 * xmlCheckLanguageID:
1158
 * @lang:  pointer to the string value
1159
 *
1160
 * DEPRECATED: Internal function, do not use.
1161
 *
1162
 * Checks that the value conforms to the LanguageID production:
1163
 *
1164
 * NOTE: this is somewhat deprecated, those productions were removed from
1165
 *       the XML Second edition.
1166
 *
1167
 * [33] LanguageID ::= Langcode ('-' Subcode)*
1168
 * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1169
 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1170
 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1171
 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1172
 * [38] Subcode ::= ([a-z] | [A-Z])+
1173
 *
1174
 * The current REC reference the successors of RFC 1766, currently 5646
1175
 *
1176
 * http://www.rfc-editor.org/rfc/rfc5646.txt
1177
 * langtag       = language
1178
 *                 ["-" script]
1179
 *                 ["-" region]
1180
 *                 *("-" variant)
1181
 *                 *("-" extension)
1182
 *                 ["-" privateuse]
1183
 * language      = 2*3ALPHA            ; shortest ISO 639 code
1184
 *                 ["-" extlang]       ; sometimes followed by
1185
 *                                     ; extended language subtags
1186
 *               / 4ALPHA              ; or reserved for future use
1187
 *               / 5*8ALPHA            ; or registered language subtag
1188
 *
1189
 * extlang       = 3ALPHA              ; selected ISO 639 codes
1190
 *                 *2("-" 3ALPHA)      ; permanently reserved
1191
 *
1192
 * script        = 4ALPHA              ; ISO 15924 code
1193
 *
1194
 * region        = 2ALPHA              ; ISO 3166-1 code
1195
 *               / 3DIGIT              ; UN M.49 code
1196
 *
1197
 * variant       = 5*8alphanum         ; registered variants
1198
 *               / (DIGIT 3alphanum)
1199
 *
1200
 * extension     = singleton 1*("-" (2*8alphanum))
1201
 *
1202
 *                                     ; Single alphanumerics
1203
 *                                     ; "x" reserved for private use
1204
 * singleton     = DIGIT               ; 0 - 9
1205
 *               / %x41-57             ; A - W
1206
 *               / %x59-5A             ; Y - Z
1207
 *               / %x61-77             ; a - w
1208
 *               / %x79-7A             ; y - z
1209
 *
1210
 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1211
 * The parser below doesn't try to cope with extension or privateuse
1212
 * that could be added but that's not interoperable anyway
1213
 *
1214
 * Returns 1 if correct 0 otherwise
1215
 **/
1216
int
1217
xmlCheckLanguageID(const xmlChar * lang)
1218
0
{
1219
0
    const xmlChar *cur = lang, *nxt;
1220
1221
0
    if (cur == NULL)
1222
0
        return (0);
1223
0
    if (((cur[0] == 'i') && (cur[1] == '-')) ||
1224
0
        ((cur[0] == 'I') && (cur[1] == '-')) ||
1225
0
        ((cur[0] == 'x') && (cur[1] == '-')) ||
1226
0
        ((cur[0] == 'X') && (cur[1] == '-'))) {
1227
        /*
1228
         * Still allow IANA code and user code which were coming
1229
         * from the previous version of the XML-1.0 specification
1230
         * it's deprecated but we should not fail
1231
         */
1232
0
        cur += 2;
1233
0
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1234
0
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
1235
0
            cur++;
1236
0
        return(cur[0] == 0);
1237
0
    }
1238
0
    nxt = cur;
1239
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1240
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1241
0
           nxt++;
1242
0
    if (nxt - cur >= 4) {
1243
        /*
1244
         * Reserved
1245
         */
1246
0
        if ((nxt - cur > 8) || (nxt[0] != 0))
1247
0
            return(0);
1248
0
        return(1);
1249
0
    }
1250
0
    if (nxt - cur < 2)
1251
0
        return(0);
1252
    /* we got an ISO 639 code */
1253
0
    if (nxt[0] == 0)
1254
0
        return(1);
1255
0
    if (nxt[0] != '-')
1256
0
        return(0);
1257
1258
0
    nxt++;
1259
0
    cur = nxt;
1260
    /* now we can have extlang or script or region or variant */
1261
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1262
0
        goto region_m49;
1263
1264
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1265
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1266
0
           nxt++;
1267
0
    if (nxt - cur == 4)
1268
0
        goto script;
1269
0
    if (nxt - cur == 2)
1270
0
        goto region;
1271
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1272
0
        goto variant;
1273
0
    if (nxt - cur != 3)
1274
0
        return(0);
1275
    /* we parsed an extlang */
1276
0
    if (nxt[0] == 0)
1277
0
        return(1);
1278
0
    if (nxt[0] != '-')
1279
0
        return(0);
1280
1281
0
    nxt++;
1282
0
    cur = nxt;
1283
    /* now we can have script or region or variant */
1284
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1285
0
        goto region_m49;
1286
1287
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1288
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1289
0
           nxt++;
1290
0
    if (nxt - cur == 2)
1291
0
        goto region;
1292
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1293
0
        goto variant;
1294
0
    if (nxt - cur != 4)
1295
0
        return(0);
1296
    /* we parsed a script */
1297
0
script:
1298
0
    if (nxt[0] == 0)
1299
0
        return(1);
1300
0
    if (nxt[0] != '-')
1301
0
        return(0);
1302
1303
0
    nxt++;
1304
0
    cur = nxt;
1305
    /* now we can have region or variant */
1306
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1307
0
        goto region_m49;
1308
1309
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1310
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1311
0
           nxt++;
1312
1313
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1314
0
        goto variant;
1315
0
    if (nxt - cur != 2)
1316
0
        return(0);
1317
    /* we parsed a region */
1318
0
region:
1319
0
    if (nxt[0] == 0)
1320
0
        return(1);
1321
0
    if (nxt[0] != '-')
1322
0
        return(0);
1323
1324
0
    nxt++;
1325
0
    cur = nxt;
1326
    /* now we can just have a variant */
1327
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1328
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1329
0
           nxt++;
1330
1331
0
    if ((nxt - cur < 5) || (nxt - cur > 8))
1332
0
        return(0);
1333
1334
    /* we parsed a variant */
1335
0
variant:
1336
0
    if (nxt[0] == 0)
1337
0
        return(1);
1338
0
    if (nxt[0] != '-')
1339
0
        return(0);
1340
    /* extensions and private use subtags not checked */
1341
0
    return (1);
1342
1343
0
region_m49:
1344
0
    if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1345
0
        ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1346
0
        nxt += 3;
1347
0
        goto region;
1348
0
    }
1349
0
    return(0);
1350
0
}
1351
1352
/************************************************************************
1353
 *                  *
1354
 *    Parser stacks related functions and macros    *
1355
 *                  *
1356
 ************************************************************************/
1357
1358
static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1359
                                            const xmlChar ** str);
1360
1361
#ifdef SAX2
1362
/**
1363
 * nsPush:
1364
 * @ctxt:  an XML parser context
1365
 * @prefix:  the namespace prefix or NULL
1366
 * @URL:  the namespace name
1367
 *
1368
 * Pushes a new parser namespace on top of the ns stack
1369
 *
1370
 * Returns -1 in case of error, -2 if the namespace should be discarded
1371
 *     and the index in the stack otherwise.
1372
 */
1373
static int
1374
nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1375
372k
{
1376
372k
    if (ctxt->options & XML_PARSE_NSCLEAN) {
1377
0
        int i;
1378
0
  for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1379
0
      if (ctxt->nsTab[i] == prefix) {
1380
    /* in scope */
1381
0
          if (ctxt->nsTab[i + 1] == URL)
1382
0
        return(-2);
1383
    /* out of scope keep it */
1384
0
    break;
1385
0
      }
1386
0
  }
1387
0
    }
1388
372k
    if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1389
194k
  ctxt->nsMax = 10;
1390
194k
  ctxt->nsNr = 0;
1391
194k
  ctxt->nsTab = (const xmlChar **)
1392
194k
                xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1393
194k
  if (ctxt->nsTab == NULL) {
1394
101
      xmlErrMemory(ctxt, NULL);
1395
101
      ctxt->nsMax = 0;
1396
101
            return (-1);
1397
101
  }
1398
194k
    } else if (ctxt->nsNr >= ctxt->nsMax) {
1399
2.10k
        const xmlChar ** tmp;
1400
2.10k
        ctxt->nsMax *= 2;
1401
2.10k
        tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1402
2.10k
            ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1403
2.10k
        if (tmp == NULL) {
1404
15
            xmlErrMemory(ctxt, NULL);
1405
15
      ctxt->nsMax /= 2;
1406
15
            return (-1);
1407
15
        }
1408
2.09k
  ctxt->nsTab = tmp;
1409
2.09k
    }
1410
372k
    ctxt->nsTab[ctxt->nsNr++] = prefix;
1411
372k
    ctxt->nsTab[ctxt->nsNr++] = URL;
1412
372k
    return (ctxt->nsNr);
1413
372k
}
1414
/**
1415
 * nsPop:
1416
 * @ctxt: an XML parser context
1417
 * @nr:  the number to pop
1418
 *
1419
 * Pops the top @nr parser prefix/namespace from the ns stack
1420
 *
1421
 * Returns the number of namespaces removed
1422
 */
1423
static int
1424
nsPop(xmlParserCtxtPtr ctxt, int nr)
1425
282k
{
1426
282k
    int i;
1427
1428
282k
    if (ctxt->nsTab == NULL) return(0);
1429
282k
    if (ctxt->nsNr < nr) {
1430
0
        xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1431
0
        nr = ctxt->nsNr;
1432
0
    }
1433
282k
    if (ctxt->nsNr <= 0)
1434
0
        return (0);
1435
1436
957k
    for (i = 0;i < nr;i++) {
1437
675k
         ctxt->nsNr--;
1438
675k
   ctxt->nsTab[ctxt->nsNr] = NULL;
1439
675k
    }
1440
282k
    return(nr);
1441
282k
}
1442
#endif
1443
1444
static int
1445
208k
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1446
208k
    const xmlChar **atts;
1447
208k
    int *attallocs;
1448
208k
    int maxatts;
1449
1450
208k
    if (nr + 5 > ctxt->maxatts) {
1451
208k
  maxatts = ctxt->maxatts == 0 ? 55 : (nr + 5) * 2;
1452
208k
  atts = (const xmlChar **) xmlMalloc(
1453
208k
             maxatts * sizeof(const xmlChar *));
1454
208k
  if (atts == NULL) goto mem_error;
1455
208k
  attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1456
208k
                               (maxatts / 5) * sizeof(int));
1457
208k
  if (attallocs == NULL) {
1458
13
            xmlFree(atts);
1459
13
            goto mem_error;
1460
13
        }
1461
208k
        if (ctxt->maxatts > 0)
1462
493
            memcpy(atts, ctxt->atts, ctxt->maxatts * sizeof(const xmlChar *));
1463
208k
        xmlFree(ctxt->atts);
1464
208k
  ctxt->atts = atts;
1465
208k
  ctxt->attallocs = attallocs;
1466
208k
  ctxt->maxatts = maxatts;
1467
208k
    }
1468
208k
    return(ctxt->maxatts);
1469
55
mem_error:
1470
55
    xmlErrMemory(ctxt, NULL);
1471
55
    return(-1);
1472
208k
}
1473
1474
/**
1475
 * inputPush:
1476
 * @ctxt:  an XML parser context
1477
 * @value:  the parser input
1478
 *
1479
 * Pushes a new parser input on top of the input stack
1480
 *
1481
 * Returns -1 in case of error, the index in the stack otherwise
1482
 */
1483
int
1484
inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1485
383k
{
1486
383k
    if ((ctxt == NULL) || (value == NULL))
1487
0
        return(-1);
1488
383k
    if (ctxt->inputNr >= ctxt->inputMax) {
1489
0
        size_t newSize = ctxt->inputMax * 2;
1490
0
        xmlParserInputPtr *tmp;
1491
1492
0
        tmp = (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1493
0
                                               newSize * sizeof(*tmp));
1494
0
        if (tmp == NULL) {
1495
0
            xmlErrMemory(ctxt, NULL);
1496
0
            return (-1);
1497
0
        }
1498
0
        ctxt->inputTab = tmp;
1499
0
        ctxt->inputMax = newSize;
1500
0
    }
1501
383k
    ctxt->inputTab[ctxt->inputNr] = value;
1502
383k
    ctxt->input = value;
1503
383k
    return (ctxt->inputNr++);
1504
383k
}
1505
/**
1506
 * inputPop:
1507
 * @ctxt: an XML parser context
1508
 *
1509
 * Pops the top parser input from the input stack
1510
 *
1511
 * Returns the input just removed
1512
 */
1513
xmlParserInputPtr
1514
inputPop(xmlParserCtxtPtr ctxt)
1515
1.21M
{
1516
1.21M
    xmlParserInputPtr ret;
1517
1518
1.21M
    if (ctxt == NULL)
1519
0
        return(NULL);
1520
1.21M
    if (ctxt->inputNr <= 0)
1521
842k
        return (NULL);
1522
376k
    ctxt->inputNr--;
1523
376k
    if (ctxt->inputNr > 0)
1524
34.9k
        ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1525
341k
    else
1526
341k
        ctxt->input = NULL;
1527
376k
    ret = ctxt->inputTab[ctxt->inputNr];
1528
376k
    ctxt->inputTab[ctxt->inputNr] = NULL;
1529
376k
    return (ret);
1530
1.21M
}
1531
/**
1532
 * nodePush:
1533
 * @ctxt:  an XML parser context
1534
 * @value:  the element node
1535
 *
1536
 * DEPRECATED: Internal function, do not use.
1537
 *
1538
 * Pushes a new element node on top of the node stack
1539
 *
1540
 * Returns -1 in case of error, the index in the stack otherwise
1541
 */
1542
int
1543
nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1544
3.03M
{
1545
3.03M
    if (ctxt == NULL) return(0);
1546
3.03M
    if (ctxt->nodeNr >= ctxt->nodeMax) {
1547
4.37k
        xmlNodePtr *tmp;
1548
1549
4.37k
  tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1550
4.37k
                                      ctxt->nodeMax * 2 *
1551
4.37k
                                      sizeof(ctxt->nodeTab[0]));
1552
4.37k
        if (tmp == NULL) {
1553
2
            xmlErrMemory(ctxt, NULL);
1554
2
            return (-1);
1555
2
        }
1556
4.37k
        ctxt->nodeTab = tmp;
1557
4.37k
  ctxt->nodeMax *= 2;
1558
4.37k
    }
1559
3.03M
    if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1560
3.03M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1561
1
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1562
1
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1563
1
        xmlParserMaxDepth);
1564
1
  xmlHaltParser(ctxt);
1565
1
  return(-1);
1566
1
    }
1567
3.03M
    ctxt->nodeTab[ctxt->nodeNr] = value;
1568
3.03M
    ctxt->node = value;
1569
3.03M
    return (ctxt->nodeNr++);
1570
3.03M
}
1571
1572
/**
1573
 * nodePop:
1574
 * @ctxt: an XML parser context
1575
 *
1576
 * DEPRECATED: Internal function, do not use.
1577
 *
1578
 * Pops the top element node from the node stack
1579
 *
1580
 * Returns the node just removed
1581
 */
1582
xmlNodePtr
1583
nodePop(xmlParserCtxtPtr ctxt)
1584
2.97M
{
1585
2.97M
    xmlNodePtr ret;
1586
1587
2.97M
    if (ctxt == NULL) return(NULL);
1588
2.97M
    if (ctxt->nodeNr <= 0)
1589
77.2k
        return (NULL);
1590
2.90M
    ctxt->nodeNr--;
1591
2.90M
    if (ctxt->nodeNr > 0)
1592
2.69M
        ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1593
206k
    else
1594
206k
        ctxt->node = NULL;
1595
2.90M
    ret = ctxt->nodeTab[ctxt->nodeNr];
1596
2.90M
    ctxt->nodeTab[ctxt->nodeNr] = NULL;
1597
2.90M
    return (ret);
1598
2.97M
}
1599
1600
/**
1601
 * nameNsPush:
1602
 * @ctxt:  an XML parser context
1603
 * @value:  the element name
1604
 * @prefix:  the element prefix
1605
 * @URI:  the element namespace name
1606
 * @line:  the current line number for error messages
1607
 * @nsNr:  the number of namespaces pushed on the namespace table
1608
 *
1609
 * Pushes a new element name/prefix/URL on top of the name stack
1610
 *
1611
 * Returns -1 in case of error, the index in the stack otherwise
1612
 */
1613
static int
1614
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1615
           const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
1616
3.41M
{
1617
3.41M
    xmlStartTag *tag;
1618
1619
3.41M
    if (ctxt->nameNr >= ctxt->nameMax) {
1620
8.49k
        const xmlChar * *tmp;
1621
8.49k
        xmlStartTag *tmp2;
1622
8.49k
        ctxt->nameMax *= 2;
1623
8.49k
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1624
8.49k
                                    ctxt->nameMax *
1625
8.49k
                                    sizeof(ctxt->nameTab[0]));
1626
8.49k
        if (tmp == NULL) {
1627
5
      ctxt->nameMax /= 2;
1628
5
      goto mem_error;
1629
5
        }
1630
8.49k
  ctxt->nameTab = tmp;
1631
8.49k
        tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
1632
8.49k
                                    ctxt->nameMax *
1633
8.49k
                                    sizeof(ctxt->pushTab[0]));
1634
8.49k
        if (tmp2 == NULL) {
1635
2
      ctxt->nameMax /= 2;
1636
2
      goto mem_error;
1637
2
        }
1638
8.48k
  ctxt->pushTab = tmp2;
1639
3.40M
    } else if (ctxt->pushTab == NULL) {
1640
249k
        ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
1641
249k
                                            sizeof(ctxt->pushTab[0]));
1642
249k
        if (ctxt->pushTab == NULL)
1643
71
            goto mem_error;
1644
249k
    }
1645
3.41M
    ctxt->nameTab[ctxt->nameNr] = value;
1646
3.41M
    ctxt->name = value;
1647
3.41M
    tag = &ctxt->pushTab[ctxt->nameNr];
1648
3.41M
    tag->prefix = prefix;
1649
3.41M
    tag->URI = URI;
1650
3.41M
    tag->line = line;
1651
3.41M
    tag->nsNr = nsNr;
1652
3.41M
    return (ctxt->nameNr++);
1653
78
mem_error:
1654
78
    xmlErrMemory(ctxt, NULL);
1655
78
    return (-1);
1656
3.41M
}
1657
#ifdef LIBXML_PUSH_ENABLED
1658
/**
1659
 * nameNsPop:
1660
 * @ctxt: an XML parser context
1661
 *
1662
 * Pops the top element/prefix/URI name from the name stack
1663
 *
1664
 * Returns the name just removed
1665
 */
1666
static const xmlChar *
1667
nameNsPop(xmlParserCtxtPtr ctxt)
1668
{
1669
    const xmlChar *ret;
1670
1671
    if (ctxt->nameNr <= 0)
1672
        return (NULL);
1673
    ctxt->nameNr--;
1674
    if (ctxt->nameNr > 0)
1675
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1676
    else
1677
        ctxt->name = NULL;
1678
    ret = ctxt->nameTab[ctxt->nameNr];
1679
    ctxt->nameTab[ctxt->nameNr] = NULL;
1680
    return (ret);
1681
}
1682
#endif /* LIBXML_PUSH_ENABLED */
1683
1684
/**
1685
 * namePush:
1686
 * @ctxt:  an XML parser context
1687
 * @value:  the element name
1688
 *
1689
 * DEPRECATED: Internal function, do not use.
1690
 *
1691
 * Pushes a new element name on top of the name stack
1692
 *
1693
 * Returns -1 in case of error, the index in the stack otherwise
1694
 */
1695
int
1696
namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1697
0
{
1698
0
    if (ctxt == NULL) return (-1);
1699
1700
0
    if (ctxt->nameNr >= ctxt->nameMax) {
1701
0
        const xmlChar * *tmp;
1702
0
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1703
0
                                    ctxt->nameMax * 2 *
1704
0
                                    sizeof(ctxt->nameTab[0]));
1705
0
        if (tmp == NULL) {
1706
0
      goto mem_error;
1707
0
        }
1708
0
  ctxt->nameTab = tmp;
1709
0
        ctxt->nameMax *= 2;
1710
0
    }
1711
0
    ctxt->nameTab[ctxt->nameNr] = value;
1712
0
    ctxt->name = value;
1713
0
    return (ctxt->nameNr++);
1714
0
mem_error:
1715
0
    xmlErrMemory(ctxt, NULL);
1716
0
    return (-1);
1717
0
}
1718
1719
/**
1720
 * namePop:
1721
 * @ctxt: an XML parser context
1722
 *
1723
 * DEPRECATED: Internal function, do not use.
1724
 *
1725
 * Pops the top element name from the name stack
1726
 *
1727
 * Returns the name just removed
1728
 */
1729
const xmlChar *
1730
namePop(xmlParserCtxtPtr ctxt)
1731
3.15M
{
1732
3.15M
    const xmlChar *ret;
1733
1734
3.15M
    if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1735
26
        return (NULL);
1736
3.15M
    ctxt->nameNr--;
1737
3.15M
    if (ctxt->nameNr > 0)
1738
2.89M
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1739
267k
    else
1740
267k
        ctxt->name = NULL;
1741
3.15M
    ret = ctxt->nameTab[ctxt->nameNr];
1742
3.15M
    ctxt->nameTab[ctxt->nameNr] = NULL;
1743
3.15M
    return (ret);
1744
3.15M
}
1745
1746
3.50M
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1747
3.50M
    if (ctxt->spaceNr >= ctxt->spaceMax) {
1748
9.54k
        int *tmp;
1749
1750
9.54k
  ctxt->spaceMax *= 2;
1751
9.54k
        tmp = (int *) xmlRealloc(ctxt->spaceTab,
1752
9.54k
                           ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1753
9.54k
        if (tmp == NULL) {
1754
15
      xmlErrMemory(ctxt, NULL);
1755
15
      ctxt->spaceMax /=2;
1756
15
      return(-1);
1757
15
  }
1758
9.52k
  ctxt->spaceTab = tmp;
1759
9.52k
    }
1760
3.50M
    ctxt->spaceTab[ctxt->spaceNr] = val;
1761
3.50M
    ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1762
3.50M
    return(ctxt->spaceNr++);
1763
3.50M
}
1764
1765
3.24M
static int spacePop(xmlParserCtxtPtr ctxt) {
1766
3.24M
    int ret;
1767
3.24M
    if (ctxt->spaceNr <= 0) return(0);
1768
3.24M
    ctxt->spaceNr--;
1769
3.24M
    if (ctxt->spaceNr > 0)
1770
3.24M
  ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1771
0
    else
1772
0
        ctxt->space = &ctxt->spaceTab[0];
1773
3.24M
    ret = ctxt->spaceTab[ctxt->spaceNr];
1774
3.24M
    ctxt->spaceTab[ctxt->spaceNr] = -1;
1775
3.24M
    return(ret);
1776
3.24M
}
1777
1778
/*
1779
 * Macros for accessing the content. Those should be used only by the parser,
1780
 * and not exported.
1781
 *
1782
 * Dirty macros, i.e. one often need to make assumption on the context to
1783
 * use them
1784
 *
1785
 *   CUR_PTR return the current pointer to the xmlChar to be parsed.
1786
 *           To be used with extreme caution since operations consuming
1787
 *           characters may move the input buffer to a different location !
1788
 *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
1789
 *           This should be used internally by the parser
1790
 *           only to compare to ASCII values otherwise it would break when
1791
 *           running with UTF-8 encoding.
1792
 *   RAW     same as CUR but in the input buffer, bypass any token
1793
 *           extraction that may have been done
1794
 *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
1795
 *           to compare on ASCII based substring.
1796
 *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1797
 *           strings without newlines within the parser.
1798
 *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1799
 *           defined char within the parser.
1800
 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1801
 *
1802
 *   NEXT    Skip to the next character, this does the proper decoding
1803
 *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
1804
 *   NEXTL(l) Skip the current unicode character of l xmlChars long.
1805
 *   CUR_CHAR(l) returns the current unicode character (int), set l
1806
 *           to the number of xmlChars used for the encoding [0-5].
1807
 *   CUR_SCHAR  same but operate on a string instead of the context
1808
 *   COPY_BUF  copy the current unicode char to the target buffer, increment
1809
 *            the index
1810
 *   GROW, SHRINK  handling of input buffers
1811
 */
1812
1813
44.8M
#define RAW (*ctxt->input->cur)
1814
59.4M
#define CUR (*ctxt->input->cur)
1815
134M
#define NXT(val) ctxt->input->cur[(val)]
1816
6.67M
#define CUR_PTR ctxt->input->cur
1817
1.93M
#define BASE_PTR ctxt->input->base
1818
1819
#define CMP4( s, c1, c2, c3, c4 ) \
1820
18.2M
  ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1821
9.44M
    ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1822
#define CMP5( s, c1, c2, c3, c4, c5 ) \
1823
17.0M
  ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1824
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1825
15.9M
  ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1826
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1827
15.1M
  ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1828
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1829
14.4M
  ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1830
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1831
7.06M
  ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1832
7.06M
    ((unsigned char *) s)[ 8 ] == c9 )
1833
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1834
31.2k
  ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1835
31.2k
    ((unsigned char *) s)[ 9 ] == c10 )
1836
1837
4.05M
#define SKIP(val) do {             \
1838
4.05M
    ctxt->input->cur += (val),ctxt->input->col+=(val);      \
1839
4.05M
    if (*ctxt->input->cur == 0)           \
1840
4.05M
        xmlParserGrow(ctxt);           \
1841
4.05M
  } while (0)
1842
1843
#define SKIPL(val) do {             \
1844
    int skipl;                \
1845
    for(skipl=0; skipl<val; skipl++) {          \
1846
  if (*(ctxt->input->cur) == '\n') {        \
1847
  ctxt->input->line++; ctxt->input->col = 1;      \
1848
  } else ctxt->input->col++;          \
1849
  ctxt->input->cur++;           \
1850
    }                 \
1851
    if (*ctxt->input->cur == 0)           \
1852
        xmlParserGrow(ctxt);            \
1853
  } while (0)
1854
1855
10.7M
#define SHRINK if ((ctxt->progressive == 0) &&       \
1856
10.7M
       (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1857
10.7M
       (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
1858
10.7M
  xmlParserShrink(ctxt);
1859
1860
422M
#define GROW if ((ctxt->progressive == 0) &&       \
1861
422M
     (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK))  \
1862
422M
  xmlParserGrow(ctxt);
1863
1864
14.9M
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1865
1866
54.5M
#define NEXT xmlNextChar(ctxt)
1867
1868
4.97M
#define NEXT1 {               \
1869
4.97M
  ctxt->input->col++;           \
1870
4.97M
  ctxt->input->cur++;           \
1871
4.97M
  if (*ctxt->input->cur == 0)         \
1872
4.97M
      xmlParserGrow(ctxt);           \
1873
4.97M
    }
1874
1875
991M
#define NEXTL(l) do {             \
1876
991M
    if (*(ctxt->input->cur) == '\n') {         \
1877
82.0M
  ctxt->input->line++; ctxt->input->col = 1;      \
1878
909M
    } else ctxt->input->col++;           \
1879
991M
    ctxt->input->cur += l;        \
1880
991M
  } while (0)
1881
1882
995M
#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1883
579M
#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1884
1885
#define COPY_BUF(l,b,i,v)           \
1886
1.49G
    if (l == 1) b[i++] = v;           \
1887
1.49G
    else i += xmlCopyCharMultiByte(&b[i],v)
1888
1889
/**
1890
 * xmlSkipBlankChars:
1891
 * @ctxt:  the XML parser context
1892
 *
1893
 * DEPRECATED: Internal function, do not use.
1894
 *
1895
 * skip all blanks character found at that point in the input streams.
1896
 * It pops up finished entities in the process if allowable at that point.
1897
 *
1898
 * Returns the number of space chars skipped
1899
 */
1900
1901
int
1902
14.9M
xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
1903
14.9M
    int res = 0;
1904
1905
    /*
1906
     * It's Okay to use CUR/NEXT here since all the blanks are on
1907
     * the ASCII range.
1908
     */
1909
14.9M
    if (((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) ||
1910
14.9M
        (ctxt->instate == XML_PARSER_START)) {
1911
13.3M
  const xmlChar *cur;
1912
  /*
1913
   * if we are in the document content, go really fast
1914
   */
1915
13.3M
  cur = ctxt->input->cur;
1916
13.3M
  while (IS_BLANK_CH(*cur)) {
1917
5.74M
      if (*cur == '\n') {
1918
1.34M
    ctxt->input->line++; ctxt->input->col = 1;
1919
4.40M
      } else {
1920
4.40M
    ctxt->input->col++;
1921
4.40M
      }
1922
5.74M
      cur++;
1923
5.74M
      if (res < INT_MAX)
1924
5.74M
    res++;
1925
5.74M
      if (*cur == 0) {
1926
193k
    ctxt->input->cur = cur;
1927
193k
    xmlParserGrow(ctxt);
1928
193k
    cur = ctxt->input->cur;
1929
193k
      }
1930
5.74M
  }
1931
13.3M
  ctxt->input->cur = cur;
1932
13.3M
    } else {
1933
1.52M
        int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
1934
1935
2.82M
  while (ctxt->instate != XML_PARSER_EOF) {
1936
2.82M
            if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
1937
1.18M
    NEXT;
1938
1.63M
      } else if (CUR == '%') {
1939
                /*
1940
                 * Need to handle support of entities branching here
1941
                 */
1942
149k
          if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
1943
73.9k
                    break;
1944
75.0k
          xmlParsePEReference(ctxt);
1945
1.48M
            } else if (CUR == 0) {
1946
41.6k
                unsigned long consumed;
1947
41.6k
                xmlEntityPtr ent;
1948
1949
41.6k
                if (ctxt->inputNr <= 1)
1950
10.5k
                    break;
1951
1952
31.1k
                consumed = ctxt->input->consumed;
1953
31.1k
                xmlSaturatedAddSizeT(&consumed,
1954
31.1k
                                     ctxt->input->cur - ctxt->input->base);
1955
1956
                /*
1957
                 * Add to sizeentities when parsing an external entity
1958
                 * for the first time.
1959
                 */
1960
31.1k
                ent = ctxt->input->entity;
1961
31.1k
                if ((ent->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
1962
31.1k
                    ((ent->flags & XML_ENT_PARSED) == 0)) {
1963
991
                    ent->flags |= XML_ENT_PARSED;
1964
1965
991
                    xmlSaturatedAdd(&ctxt->sizeentities, consumed);
1966
991
                }
1967
1968
31.1k
                xmlParserEntityCheck(ctxt, consumed);
1969
1970
31.1k
                xmlPopInput(ctxt);
1971
1.44M
            } else {
1972
1.44M
                break;
1973
1.44M
            }
1974
1975
            /*
1976
             * Also increase the counter when entering or exiting a PERef.
1977
             * The spec says: "When a parameter-entity reference is recognized
1978
             * in the DTD and included, its replacement text MUST be enlarged
1979
             * by the attachment of one leading and one following space (#x20)
1980
             * character."
1981
             */
1982
1.29M
      if (res < INT_MAX)
1983
1.29M
    res++;
1984
1.29M
        }
1985
1.52M
    }
1986
14.9M
    return(res);
1987
14.9M
}
1988
1989
/************************************************************************
1990
 *                  *
1991
 *    Commodity functions to handle entities      *
1992
 *                  *
1993
 ************************************************************************/
1994
1995
/**
1996
 * xmlPopInput:
1997
 * @ctxt:  an XML parser context
1998
 *
1999
 * xmlPopInput: the current input pointed by ctxt->input came to an end
2000
 *          pop it and return the next char.
2001
 *
2002
 * Returns the current xmlChar in the parser context
2003
 */
2004
xmlChar
2005
31.1k
xmlPopInput(xmlParserCtxtPtr ctxt) {
2006
31.1k
    xmlParserInputPtr input;
2007
2008
31.1k
    if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2009
31.1k
    if (xmlParserDebugEntities)
2010
0
  xmlGenericError(xmlGenericErrorContext,
2011
0
    "Popping input %d\n", ctxt->inputNr);
2012
31.1k
    if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2013
31.1k
        (ctxt->instate != XML_PARSER_EOF))
2014
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2015
0
                    "Unfinished entity outside the DTD");
2016
31.1k
    input = inputPop(ctxt);
2017
31.1k
    if (input->entity != NULL)
2018
31.1k
        input->entity->flags &= ~XML_ENT_EXPANDING;
2019
31.1k
    xmlFreeInputStream(input);
2020
31.1k
    if (*ctxt->input->cur == 0)
2021
382
        xmlParserGrow(ctxt);
2022
31.1k
    return(CUR);
2023
31.1k
}
2024
2025
/**
2026
 * xmlPushInput:
2027
 * @ctxt:  an XML parser context
2028
 * @input:  an XML parser input fragment (entity, XML fragment ...).
2029
 *
2030
 * xmlPushInput: switch to a new input stream which is stacked on top
2031
 *               of the previous one(s).
2032
 * Returns -1 in case of error or the index in the input stack
2033
 */
2034
int
2035
48.8k
xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2036
48.8k
    int ret;
2037
48.8k
    if (input == NULL) return(-1);
2038
2039
41.7k
    if (xmlParserDebugEntities) {
2040
0
  if ((ctxt->input != NULL) && (ctxt->input->filename))
2041
0
      xmlGenericError(xmlGenericErrorContext,
2042
0
        "%s(%d): ", ctxt->input->filename,
2043
0
        ctxt->input->line);
2044
0
  xmlGenericError(xmlGenericErrorContext,
2045
0
    "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2046
0
    }
2047
41.7k
    if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2048
41.7k
        (ctxt->inputNr > 100)) {
2049
0
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2050
0
        while (ctxt->inputNr > 1)
2051
0
            xmlFreeInputStream(inputPop(ctxt));
2052
0
  return(-1);
2053
0
    }
2054
41.7k
    ret = inputPush(ctxt, input);
2055
41.7k
    if (ctxt->instate == XML_PARSER_EOF)
2056
0
        return(-1);
2057
41.7k
    GROW;
2058
41.7k
    return(ret);
2059
41.7k
}
2060
2061
/**
2062
 * xmlParseCharRef:
2063
 * @ctxt:  an XML parser context
2064
 *
2065
 * DEPRECATED: Internal function, don't use.
2066
 *
2067
 * Parse a numeric character reference. Always consumes '&'.
2068
 *
2069
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2070
 *                  '&#x' [0-9a-fA-F]+ ';'
2071
 *
2072
 * [ WFC: Legal Character ]
2073
 * Characters referred to using character references must match the
2074
 * production for Char.
2075
 *
2076
 * Returns the value parsed (as an int), 0 in case of error
2077
 */
2078
int
2079
261k
xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2080
261k
    int val = 0;
2081
261k
    int count = 0;
2082
2083
    /*
2084
     * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2085
     */
2086
261k
    if ((RAW == '&') && (NXT(1) == '#') &&
2087
261k
        (NXT(2) == 'x')) {
2088
87.2k
  SKIP(3);
2089
87.2k
  GROW;
2090
304k
  while (RAW != ';') { /* loop blocked by count */
2091
227k
      if (count++ > 20) {
2092
4.76k
    count = 0;
2093
4.76k
    GROW;
2094
4.76k
                if (ctxt->instate == XML_PARSER_EOF)
2095
220
                    return(0);
2096
4.76k
      }
2097
227k
      if ((RAW >= '0') && (RAW <= '9'))
2098
85.4k
          val = val * 16 + (CUR - '0');
2099
141k
      else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2100
66.2k
          val = val * 16 + (CUR - 'a') + 10;
2101
75.5k
      else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2102
65.7k
          val = val * 16 + (CUR - 'A') + 10;
2103
9.78k
      else {
2104
9.78k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2105
9.78k
    val = 0;
2106
9.78k
    break;
2107
9.78k
      }
2108
217k
      if (val > 0x110000)
2109
54.5k
          val = 0x110000;
2110
2111
217k
      NEXT;
2112
217k
      count++;
2113
217k
  }
2114
86.9k
  if (RAW == ';') {
2115
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2116
77.2k
      ctxt->input->col++;
2117
77.2k
      ctxt->input->cur++;
2118
77.2k
  }
2119
174k
    } else if  ((RAW == '&') && (NXT(1) == '#')) {
2120
174k
  SKIP(2);
2121
174k
  GROW;
2122
500k
  while (RAW != ';') { /* loop blocked by count */
2123
334k
      if (count++ > 20) {
2124
2.13k
    count = 0;
2125
2.13k
    GROW;
2126
2.13k
                if (ctxt->instate == XML_PARSER_EOF)
2127
221
                    return(0);
2128
2.13k
      }
2129
334k
      if ((RAW >= '0') && (RAW <= '9'))
2130
325k
          val = val * 10 + (CUR - '0');
2131
8.71k
      else {
2132
8.71k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2133
8.71k
    val = 0;
2134
8.71k
    break;
2135
8.71k
      }
2136
325k
      if (val > 0x110000)
2137
12.5k
          val = 0x110000;
2138
2139
325k
      NEXT;
2140
325k
      count++;
2141
325k
  }
2142
173k
  if (RAW == ';') {
2143
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2144
165k
      ctxt->input->col++;
2145
165k
      ctxt->input->cur++;
2146
165k
  }
2147
173k
    } else {
2148
0
        if (RAW == '&')
2149
0
            SKIP(1);
2150
0
        xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2151
0
    }
2152
2153
    /*
2154
     * [ WFC: Legal Character ]
2155
     * Characters referred to using character references must match the
2156
     * production for Char.
2157
     */
2158
260k
    if (val >= 0x110000) {
2159
1.70k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2160
1.70k
                "xmlParseCharRef: character reference out of bounds\n",
2161
1.70k
          val);
2162
259k
    } else if (IS_CHAR(val)) {
2163
233k
        return(val);
2164
233k
    } else {
2165
26.0k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2166
26.0k
                          "xmlParseCharRef: invalid xmlChar value %d\n",
2167
26.0k
                    val);
2168
26.0k
    }
2169
27.7k
    return(0);
2170
260k
}
2171
2172
/**
2173
 * xmlParseStringCharRef:
2174
 * @ctxt:  an XML parser context
2175
 * @str:  a pointer to an index in the string
2176
 *
2177
 * parse Reference declarations, variant parsing from a string rather
2178
 * than an an input flow.
2179
 *
2180
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2181
 *                  '&#x' [0-9a-fA-F]+ ';'
2182
 *
2183
 * [ WFC: Legal Character ]
2184
 * Characters referred to using character references must match the
2185
 * production for Char.
2186
 *
2187
 * Returns the value parsed (as an int), 0 in case of error, str will be
2188
 *         updated to the current value of the index
2189
 */
2190
static int
2191
58.1k
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2192
58.1k
    const xmlChar *ptr;
2193
58.1k
    xmlChar cur;
2194
58.1k
    int val = 0;
2195
2196
58.1k
    if ((str == NULL) || (*str == NULL)) return(0);
2197
58.1k
    ptr = *str;
2198
58.1k
    cur = *ptr;
2199
58.1k
    if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2200
28.8k
  ptr += 3;
2201
28.8k
  cur = *ptr;
2202
151k
  while (cur != ';') { /* Non input consuming loop */
2203
126k
      if ((cur >= '0') && (cur <= '9'))
2204
4.82k
          val = val * 16 + (cur - '0');
2205
121k
      else if ((cur >= 'a') && (cur <= 'f'))
2206
106k
          val = val * 16 + (cur - 'a') + 10;
2207
14.9k
      else if ((cur >= 'A') && (cur <= 'F'))
2208
10.9k
          val = val * 16 + (cur - 'A') + 10;
2209
4.01k
      else {
2210
4.01k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2211
4.01k
    val = 0;
2212
4.01k
    break;
2213
4.01k
      }
2214
122k
      if (val > 0x110000)
2215
69.0k
          val = 0x110000;
2216
2217
122k
      ptr++;
2218
122k
      cur = *ptr;
2219
122k
  }
2220
28.8k
  if (cur == ';')
2221
24.8k
      ptr++;
2222
29.2k
    } else if  ((cur == '&') && (ptr[1] == '#')){
2223
29.2k
  ptr += 2;
2224
29.2k
  cur = *ptr;
2225
102k
  while (cur != ';') { /* Non input consuming loops */
2226
77.4k
      if ((cur >= '0') && (cur <= '9'))
2227
73.6k
          val = val * 10 + (cur - '0');
2228
3.78k
      else {
2229
3.78k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2230
3.78k
    val = 0;
2231
3.78k
    break;
2232
3.78k
      }
2233
73.6k
      if (val > 0x110000)
2234
1.17k
          val = 0x110000;
2235
2236
73.6k
      ptr++;
2237
73.6k
      cur = *ptr;
2238
73.6k
  }
2239
29.2k
  if (cur == ';')
2240
25.5k
      ptr++;
2241
29.2k
    } else {
2242
0
  xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2243
0
  return(0);
2244
0
    }
2245
58.1k
    *str = ptr;
2246
2247
    /*
2248
     * [ WFC: Legal Character ]
2249
     * Characters referred to using character references must match the
2250
     * production for Char.
2251
     */
2252
58.1k
    if (val >= 0x110000) {
2253
576
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2254
576
                "xmlParseStringCharRef: character reference out of bounds\n",
2255
576
                val);
2256
57.6k
    } else if (IS_CHAR(val)) {
2257
47.3k
        return(val);
2258
47.3k
    } else {
2259
10.2k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2260
10.2k
        "xmlParseStringCharRef: invalid xmlChar value %d\n",
2261
10.2k
        val);
2262
10.2k
    }
2263
10.8k
    return(0);
2264
58.1k
}
2265
2266
/**
2267
 * xmlParserHandlePEReference:
2268
 * @ctxt:  the parser context
2269
 *
2270
 * DEPRECATED: Internal function, do not use.
2271
 *
2272
 * [69] PEReference ::= '%' Name ';'
2273
 *
2274
 * [ WFC: No Recursion ]
2275
 * A parsed entity must not contain a recursive
2276
 * reference to itself, either directly or indirectly.
2277
 *
2278
 * [ WFC: Entity Declared ]
2279
 * In a document without any DTD, a document with only an internal DTD
2280
 * subset which contains no parameter entity references, or a document
2281
 * with "standalone='yes'", ...  ... The declaration of a parameter
2282
 * entity must precede any reference to it...
2283
 *
2284
 * [ VC: Entity Declared ]
2285
 * In a document with an external subset or external parameter entities
2286
 * with "standalone='no'", ...  ... The declaration of a parameter entity
2287
 * must precede any reference to it...
2288
 *
2289
 * [ WFC: In DTD ]
2290
 * Parameter-entity references may only appear in the DTD.
2291
 * NOTE: misleading but this is handled.
2292
 *
2293
 * A PEReference may have been detected in the current input stream
2294
 * the handling is done accordingly to
2295
 *      http://www.w3.org/TR/REC-xml#entproc
2296
 * i.e.
2297
 *   - Included in literal in entity values
2298
 *   - Included as Parameter Entity reference within DTDs
2299
 */
2300
void
2301
0
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2302
0
    switch(ctxt->instate) {
2303
0
  case XML_PARSER_CDATA_SECTION:
2304
0
      return;
2305
0
        case XML_PARSER_COMMENT:
2306
0
      return;
2307
0
  case XML_PARSER_START_TAG:
2308
0
      return;
2309
0
  case XML_PARSER_END_TAG:
2310
0
      return;
2311
0
        case XML_PARSER_EOF:
2312
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2313
0
      return;
2314
0
        case XML_PARSER_PROLOG:
2315
0
  case XML_PARSER_START:
2316
0
  case XML_PARSER_MISC:
2317
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2318
0
      return;
2319
0
  case XML_PARSER_ENTITY_DECL:
2320
0
        case XML_PARSER_CONTENT:
2321
0
        case XML_PARSER_ATTRIBUTE_VALUE:
2322
0
        case XML_PARSER_PI:
2323
0
  case XML_PARSER_SYSTEM_LITERAL:
2324
0
  case XML_PARSER_PUBLIC_LITERAL:
2325
      /* we just ignore it there */
2326
0
      return;
2327
0
        case XML_PARSER_EPILOG:
2328
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2329
0
      return;
2330
0
  case XML_PARSER_ENTITY_VALUE:
2331
      /*
2332
       * NOTE: in the case of entity values, we don't do the
2333
       *       substitution here since we need the literal
2334
       *       entity value to be able to save the internal
2335
       *       subset of the document.
2336
       *       This will be handled by xmlStringDecodeEntities
2337
       */
2338
0
      return;
2339
0
        case XML_PARSER_DTD:
2340
      /*
2341
       * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2342
       * In the internal DTD subset, parameter-entity references
2343
       * can occur only where markup declarations can occur, not
2344
       * within markup declarations.
2345
       * In that case this is handled in xmlParseMarkupDecl
2346
       */
2347
0
      if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2348
0
    return;
2349
0
      if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2350
0
    return;
2351
0
            break;
2352
0
        case XML_PARSER_IGNORE:
2353
0
            return;
2354
0
    }
2355
2356
0
    xmlParsePEReference(ctxt);
2357
0
}
2358
2359
/*
2360
 * Macro used to grow the current buffer.
2361
 * buffer##_size is expected to be a size_t
2362
 * mem_error: is expected to handle memory allocation failures
2363
 */
2364
388k
#define growBuffer(buffer, n) {           \
2365
388k
    xmlChar *tmp;             \
2366
388k
    size_t new_size = buffer##_size * 2 + n;                            \
2367
388k
    if (new_size < buffer##_size) goto mem_error;                       \
2368
388k
    tmp = (xmlChar *) xmlRealloc(buffer, new_size);                     \
2369
388k
    if (tmp == NULL) goto mem_error;         \
2370
388k
    buffer = tmp;             \
2371
388k
    buffer##_size = new_size;                                           \
2372
388k
}
2373
2374
/**
2375
 * xmlStringDecodeEntitiesInt:
2376
 * @ctxt:  the parser context
2377
 * @str:  the input string
2378
 * @len: the string length
2379
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2380
 * @end:  an end marker xmlChar, 0 if none
2381
 * @end2:  an end marker xmlChar, 0 if none
2382
 * @end3:  an end marker xmlChar, 0 if none
2383
 * @check:  whether to perform entity checks
2384
 */
2385
static xmlChar *
2386
xmlStringDecodeEntitiesInt(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2387
               int what, xmlChar end, xmlChar  end2, xmlChar end3,
2388
104k
                           int check) {
2389
104k
    xmlChar *buffer = NULL;
2390
104k
    size_t buffer_size = 0;
2391
104k
    size_t nbchars = 0;
2392
2393
104k
    xmlChar *current = NULL;
2394
104k
    xmlChar *rep = NULL;
2395
104k
    const xmlChar *last;
2396
104k
    xmlEntityPtr ent;
2397
104k
    int c,l;
2398
2399
104k
    if (str == NULL)
2400
4.15k
        return(NULL);
2401
100k
    last = str + len;
2402
2403
100k
    if (((ctxt->depth > 40) &&
2404
100k
         ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2405
100k
  (ctxt->depth > 100)) {
2406
0
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP,
2407
0
                       "Maximum entity nesting depth exceeded");
2408
0
  return(NULL);
2409
0
    }
2410
2411
    /*
2412
     * allocate a translation buffer.
2413
     */
2414
100k
    buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2415
100k
    buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2416
100k
    if (buffer == NULL) goto mem_error;
2417
2418
    /*
2419
     * OK loop until we reach one of the ending char or a size limit.
2420
     * we are operating on already parsed values.
2421
     */
2422
100k
    if (str < last)
2423
96.1k
  c = CUR_SCHAR(str, l);
2424
3.98k
    else
2425
3.98k
        c = 0;
2426
575M
    while ((c != 0) && (c != end) && /* non input consuming loop */
2427
575M
           (c != end2) && (c != end3) &&
2428
575M
           (ctxt->instate != XML_PARSER_EOF)) {
2429
2430
575M
  if (c == 0) break;
2431
575M
        if ((c == '&') && (str[1] == '#')) {
2432
58.1k
      int val = xmlParseStringCharRef(ctxt, &str);
2433
58.1k
      if (val == 0)
2434
10.8k
                goto int_error;
2435
47.3k
      COPY_BUF(0,buffer,nbchars,val);
2436
47.3k
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2437
5.62k
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2438
5.62k
      }
2439
575M
  } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2440
91.2k
      if (xmlParserDebugEntities)
2441
0
    xmlGenericError(xmlGenericErrorContext,
2442
0
      "String decoding Entity Reference: %.30s\n",
2443
0
      str);
2444
91.2k
      ent = xmlParseStringEntityRef(ctxt, &str);
2445
91.2k
      if ((ent != NULL) &&
2446
91.2k
    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2447
34.6k
    if (ent->content != NULL) {
2448
34.6k
        COPY_BUF(0,buffer,nbchars,ent->content[0]);
2449
34.6k
        if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2450
970
      growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2451
970
        }
2452
34.6k
    } else {
2453
0
        xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2454
0
          "predefined entity has no content\n");
2455
0
                    goto int_error;
2456
0
    }
2457
56.5k
      } else if ((ent != NULL) && (ent->content != NULL)) {
2458
17.6k
          if ((check) && (xmlParserEntityCheck(ctxt, ent->length)))
2459
100
                    goto int_error;
2460
2461
17.5k
                if (ent->flags & XML_ENT_EXPANDING) {
2462
230
              xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2463
230
                    xmlHaltParser(ctxt);
2464
230
                    ent->content[0] = 0;
2465
230
                    goto int_error;
2466
230
                }
2467
2468
17.3k
                ent->flags |= XML_ENT_EXPANDING;
2469
17.3k
    ctxt->depth++;
2470
17.3k
    rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
2471
17.3k
                        ent->length, what, 0, 0, 0, check);
2472
17.3k
    ctxt->depth--;
2473
17.3k
                ent->flags &= ~XML_ENT_EXPANDING;
2474
2475
17.3k
    if (rep == NULL) {
2476
407
                    ent->content[0] = 0;
2477
407
                    goto int_error;
2478
407
                }
2479
2480
16.9k
                current = rep;
2481
140M
                while (*current != 0) { /* non input consuming loop */
2482
140M
                    buffer[nbchars++] = *current++;
2483
140M
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2484
16.4k
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2485
16.4k
                    }
2486
140M
                }
2487
16.9k
                xmlFree(rep);
2488
16.9k
                rep = NULL;
2489
38.9k
      } else if (ent != NULL) {
2490
3.14k
    int i = xmlStrlen(ent->name);
2491
3.14k
    const xmlChar *cur = ent->name;
2492
2493
3.14k
    buffer[nbchars++] = '&';
2494
3.14k
    if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2495
569
        growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2496
569
    }
2497
7.21k
    for (;i > 0;i--)
2498
4.07k
        buffer[nbchars++] = *cur++;
2499
3.14k
    buffer[nbchars++] = ';';
2500
3.14k
      }
2501
575M
  } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2502
4.09k
      if (xmlParserDebugEntities)
2503
0
    xmlGenericError(xmlGenericErrorContext,
2504
0
      "String decoding PE Reference: %.30s\n", str);
2505
4.09k
      ent = xmlParseStringPEReference(ctxt, &str);
2506
4.09k
      if (ent != NULL) {
2507
2.02k
                if (ent->content == NULL) {
2508
        /*
2509
         * Note: external parsed entities will not be loaded,
2510
         * it is not required for a non-validating parser to
2511
         * complete external PEReferences coming from the
2512
         * internal subset
2513
         */
2514
82
        if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2515
82
      ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2516
82
      (ctxt->validate != 0)) {
2517
82
      xmlLoadEntityContent(ctxt, ent);
2518
82
        } else {
2519
0
      xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2520
0
      "not validating will not read content for PE entity %s\n",
2521
0
                          ent->name, NULL);
2522
0
        }
2523
82
    }
2524
2525
2.02k
          if ((check) && (xmlParserEntityCheck(ctxt, ent->length)))
2526
2
                    goto int_error;
2527
2528
2.01k
                if (ent->flags & XML_ENT_EXPANDING) {
2529
59
              xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2530
59
                    xmlHaltParser(ctxt);
2531
59
                    if (ent->content != NULL)
2532
33
                        ent->content[0] = 0;
2533
59
                    goto int_error;
2534
59
                }
2535
2536
1.96k
                ent->flags |= XML_ENT_EXPANDING;
2537
1.96k
    ctxt->depth++;
2538
1.96k
    rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
2539
1.96k
                        ent->length, what, 0, 0, 0, check);
2540
1.96k
    ctxt->depth--;
2541
1.96k
                ent->flags &= ~XML_ENT_EXPANDING;
2542
2543
1.96k
    if (rep == NULL) {
2544
56
                    if (ent->content != NULL)
2545
56
                        ent->content[0] = 0;
2546
56
                    goto int_error;
2547
56
                }
2548
1.90k
                current = rep;
2549
787k
                while (*current != 0) { /* non input consuming loop */
2550
785k
                    buffer[nbchars++] = *current++;
2551
785k
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2552
2.15k
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2553
2.15k
                    }
2554
785k
                }
2555
1.90k
                xmlFree(rep);
2556
1.90k
                rep = NULL;
2557
1.90k
      }
2558
575M
  } else {
2559
575M
      COPY_BUF(l,buffer,nbchars,c);
2560
575M
      str += l;
2561
575M
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2562
274k
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2563
274k
      }
2564
575M
  }
2565
575M
  if (str < last)
2566
575M
      c = CUR_SCHAR(str, l);
2567
83.9k
  else
2568
83.9k
      c = 0;
2569
575M
    }
2570
88.4k
    buffer[nbchars] = 0;
2571
88.4k
    return(buffer);
2572
2573
26
mem_error:
2574
26
    xmlErrMemory(ctxt, NULL);
2575
11.7k
int_error:
2576
11.7k
    if (rep != NULL)
2577
1
        xmlFree(rep);
2578
11.7k
    if (buffer != NULL)
2579
11.7k
        xmlFree(buffer);
2580
11.7k
    return(NULL);
2581
26
}
2582
2583
/**
2584
 * xmlStringLenDecodeEntities:
2585
 * @ctxt:  the parser context
2586
 * @str:  the input string
2587
 * @len: the string length
2588
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2589
 * @end:  an end marker xmlChar, 0 if none
2590
 * @end2:  an end marker xmlChar, 0 if none
2591
 * @end3:  an end marker xmlChar, 0 if none
2592
 *
2593
 * DEPRECATED: Internal function, don't use.
2594
 *
2595
 * Takes a entity string content and process to do the adequate substitutions.
2596
 *
2597
 * [67] Reference ::= EntityRef | CharRef
2598
 *
2599
 * [69] PEReference ::= '%' Name ';'
2600
 *
2601
 * Returns A newly allocated string with the substitution done. The caller
2602
 *      must deallocate it !
2603
 */
2604
xmlChar *
2605
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2606
                           int what, xmlChar end, xmlChar  end2,
2607
0
                           xmlChar end3) {
2608
0
    if ((ctxt == NULL) || (str == NULL) || (len < 0))
2609
0
        return(NULL);
2610
0
    return(xmlStringDecodeEntitiesInt(ctxt, str, len, what,
2611
0
                                      end, end2, end3, 0));
2612
0
}
2613
2614
/**
2615
 * xmlStringDecodeEntities:
2616
 * @ctxt:  the parser context
2617
 * @str:  the input string
2618
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2619
 * @end:  an end marker xmlChar, 0 if none
2620
 * @end2:  an end marker xmlChar, 0 if none
2621
 * @end3:  an end marker xmlChar, 0 if none
2622
 *
2623
 * DEPRECATED: Internal function, don't use.
2624
 *
2625
 * Takes a entity string content and process to do the adequate substitutions.
2626
 *
2627
 * [67] Reference ::= EntityRef | CharRef
2628
 *
2629
 * [69] PEReference ::= '%' Name ';'
2630
 *
2631
 * Returns A newly allocated string with the substitution done. The caller
2632
 *      must deallocate it !
2633
 */
2634
xmlChar *
2635
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2636
0
            xmlChar end, xmlChar  end2, xmlChar end3) {
2637
0
    if ((ctxt == NULL) || (str == NULL)) return(NULL);
2638
0
    return(xmlStringDecodeEntitiesInt(ctxt, str, xmlStrlen(str), what,
2639
0
                                      end, end2, end3, 0));
2640
0
}
2641
2642
/************************************************************************
2643
 *                  *
2644
 *    Commodity functions, cleanup needed ?     *
2645
 *                  *
2646
 ************************************************************************/
2647
2648
/**
2649
 * areBlanks:
2650
 * @ctxt:  an XML parser context
2651
 * @str:  a xmlChar *
2652
 * @len:  the size of @str
2653
 * @blank_chars: we know the chars are blanks
2654
 *
2655
 * Is this a sequence of blank chars that one can ignore ?
2656
 *
2657
 * Returns 1 if ignorable 0 otherwise.
2658
 */
2659
2660
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2661
1.81M
                     int blank_chars) {
2662
1.81M
    int i, ret;
2663
1.81M
    xmlNodePtr lastChild;
2664
2665
    /*
2666
     * Don't spend time trying to differentiate them, the same callback is
2667
     * used !
2668
     */
2669
1.81M
    if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2670
1.81M
  return(0);
2671
2672
    /*
2673
     * Check for xml:space value.
2674
     */
2675
0
    if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2676
0
        (*(ctxt->space) == -2))
2677
0
  return(0);
2678
2679
    /*
2680
     * Check that the string is made of blanks
2681
     */
2682
0
    if (blank_chars == 0) {
2683
0
  for (i = 0;i < len;i++)
2684
0
      if (!(IS_BLANK_CH(str[i]))) return(0);
2685
0
    }
2686
2687
    /*
2688
     * Look if the element is mixed content in the DTD if available
2689
     */
2690
0
    if (ctxt->node == NULL) return(0);
2691
0
    if (ctxt->myDoc != NULL) {
2692
0
  ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2693
0
        if (ret == 0) return(1);
2694
0
        if (ret == 1) return(0);
2695
0
    }
2696
2697
    /*
2698
     * Otherwise, heuristic :-\
2699
     */
2700
0
    if ((RAW != '<') && (RAW != 0xD)) return(0);
2701
0
    if ((ctxt->node->children == NULL) &&
2702
0
  (RAW == '<') && (NXT(1) == '/')) return(0);
2703
2704
0
    lastChild = xmlGetLastChild(ctxt->node);
2705
0
    if (lastChild == NULL) {
2706
0
        if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2707
0
            (ctxt->node->content != NULL)) return(0);
2708
0
    } else if (xmlNodeIsText(lastChild))
2709
0
        return(0);
2710
0
    else if ((ctxt->node->children != NULL) &&
2711
0
             (xmlNodeIsText(ctxt->node->children)))
2712
0
        return(0);
2713
0
    return(1);
2714
0
}
2715
2716
/************************************************************************
2717
 *                  *
2718
 *    Extra stuff for namespace support     *
2719
 *  Relates to http://www.w3.org/TR/WD-xml-names      *
2720
 *                  *
2721
 ************************************************************************/
2722
2723
/**
2724
 * xmlSplitQName:
2725
 * @ctxt:  an XML parser context
2726
 * @name:  an XML parser context
2727
 * @prefix:  a xmlChar **
2728
 *
2729
 * parse an UTF8 encoded XML qualified name string
2730
 *
2731
 * [NS 5] QName ::= (Prefix ':')? LocalPart
2732
 *
2733
 * [NS 6] Prefix ::= NCName
2734
 *
2735
 * [NS 7] LocalPart ::= NCName
2736
 *
2737
 * Returns the local part, and prefix is updated
2738
 *   to get the Prefix if any.
2739
 */
2740
2741
xmlChar *
2742
39.0k
xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2743
39.0k
    xmlChar buf[XML_MAX_NAMELEN + 5];
2744
39.0k
    xmlChar *buffer = NULL;
2745
39.0k
    int len = 0;
2746
39.0k
    int max = XML_MAX_NAMELEN;
2747
39.0k
    xmlChar *ret = NULL;
2748
39.0k
    const xmlChar *cur = name;
2749
39.0k
    int c;
2750
2751
39.0k
    if (prefix == NULL) return(NULL);
2752
39.0k
    *prefix = NULL;
2753
2754
39.0k
    if (cur == NULL) return(NULL);
2755
2756
#ifndef XML_XML_NAMESPACE
2757
    /* xml: prefix is not really a namespace */
2758
    if ((cur[0] == 'x') && (cur[1] == 'm') &&
2759
        (cur[2] == 'l') && (cur[3] == ':'))
2760
  return(xmlStrdup(name));
2761
#endif
2762
2763
    /* nasty but well=formed */
2764
39.0k
    if (cur[0] == ':')
2765
4.08k
  return(xmlStrdup(name));
2766
2767
34.9k
    c = *cur++;
2768
504k
    while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2769
469k
  buf[len++] = c;
2770
469k
  c = *cur++;
2771
469k
    }
2772
34.9k
    if (len >= max) {
2773
  /*
2774
   * Okay someone managed to make a huge name, so he's ready to pay
2775
   * for the processing speed.
2776
   */
2777
2.33k
  max = len * 2;
2778
2779
2.33k
  buffer = (xmlChar *) xmlMallocAtomic(max);
2780
2.33k
  if (buffer == NULL) {
2781
1
      xmlErrMemory(ctxt, NULL);
2782
1
      return(NULL);
2783
1
  }
2784
2.33k
  memcpy(buffer, buf, len);
2785
797k
  while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2786
795k
      if (len + 10 > max) {
2787
3.31k
          xmlChar *tmp;
2788
2789
3.31k
    max *= 2;
2790
3.31k
    tmp = (xmlChar *) xmlRealloc(buffer, max);
2791
3.31k
    if (tmp == NULL) {
2792
1
        xmlFree(buffer);
2793
1
        xmlErrMemory(ctxt, NULL);
2794
1
        return(NULL);
2795
1
    }
2796
3.30k
    buffer = tmp;
2797
3.30k
      }
2798
795k
      buffer[len++] = c;
2799
795k
      c = *cur++;
2800
795k
  }
2801
2.33k
  buffer[len] = 0;
2802
2.33k
    }
2803
2804
34.9k
    if ((c == ':') && (*cur == 0)) {
2805
764
        if (buffer != NULL)
2806
116
      xmlFree(buffer);
2807
764
  *prefix = NULL;
2808
764
  return(xmlStrdup(name));
2809
764
    }
2810
2811
34.1k
    if (buffer == NULL)
2812
31.9k
  ret = xmlStrndup(buf, len);
2813
2.21k
    else {
2814
2.21k
  ret = buffer;
2815
2.21k
  buffer = NULL;
2816
2.21k
  max = XML_MAX_NAMELEN;
2817
2.21k
    }
2818
2819
2820
34.1k
    if (c == ':') {
2821
14.1k
  c = *cur;
2822
14.1k
        *prefix = ret;
2823
14.1k
  if (c == 0) {
2824
0
      return(xmlStrndup(BAD_CAST "", 0));
2825
0
  }
2826
14.1k
  len = 0;
2827
2828
  /*
2829
   * Check that the first character is proper to start
2830
   * a new name
2831
   */
2832
14.1k
  if (!(((c >= 0x61) && (c <= 0x7A)) ||
2833
14.1k
        ((c >= 0x41) && (c <= 0x5A)) ||
2834
14.1k
        (c == '_') || (c == ':'))) {
2835
4.98k
      int l;
2836
4.98k
      int first = CUR_SCHAR(cur, l);
2837
2838
4.98k
      if (!IS_LETTER(first) && (first != '_')) {
2839
939
    xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
2840
939
          "Name %s is not XML Namespace compliant\n",
2841
939
          name);
2842
939
      }
2843
4.98k
  }
2844
14.1k
  cur++;
2845
2846
312k
  while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2847
298k
      buf[len++] = c;
2848
298k
      c = *cur++;
2849
298k
  }
2850
14.1k
  if (len >= max) {
2851
      /*
2852
       * Okay someone managed to make a huge name, so he's ready to pay
2853
       * for the processing speed.
2854
       */
2855
1.97k
      max = len * 2;
2856
2857
1.97k
      buffer = (xmlChar *) xmlMallocAtomic(max);
2858
1.97k
      if (buffer == NULL) {
2859
1
          xmlErrMemory(ctxt, NULL);
2860
1
    return(NULL);
2861
1
      }
2862
1.97k
      memcpy(buffer, buf, len);
2863
652k
      while (c != 0) { /* tested bigname2.xml */
2864
650k
    if (len + 10 > max) {
2865
2.45k
        xmlChar *tmp;
2866
2867
2.45k
        max *= 2;
2868
2.45k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
2869
2.45k
        if (tmp == NULL) {
2870
1
      xmlErrMemory(ctxt, NULL);
2871
1
      xmlFree(buffer);
2872
1
      return(NULL);
2873
1
        }
2874
2.45k
        buffer = tmp;
2875
2.45k
    }
2876
650k
    buffer[len++] = c;
2877
650k
    c = *cur++;
2878
650k
      }
2879
1.97k
      buffer[len] = 0;
2880
1.97k
  }
2881
2882
14.1k
  if (buffer == NULL)
2883
12.1k
      ret = xmlStrndup(buf, len);
2884
1.97k
  else {
2885
1.97k
      ret = buffer;
2886
1.97k
  }
2887
14.1k
    }
2888
2889
34.1k
    return(ret);
2890
34.1k
}
2891
2892
/************************************************************************
2893
 *                  *
2894
 *      The parser itself       *
2895
 *  Relates to http://www.w3.org/TR/REC-xml       *
2896
 *                  *
2897
 ************************************************************************/
2898
2899
/************************************************************************
2900
 *                  *
2901
 *  Routines to parse Name, NCName and NmToken      *
2902
 *                  *
2903
 ************************************************************************/
2904
#ifdef DEBUG
2905
static unsigned long nbParseName = 0;
2906
static unsigned long nbParseNmToken = 0;
2907
static unsigned long nbParseNCName = 0;
2908
static unsigned long nbParseNCNameComplex = 0;
2909
static unsigned long nbParseNameComplex = 0;
2910
static unsigned long nbParseStringName = 0;
2911
#endif
2912
2913
/*
2914
 * The two following functions are related to the change of accepted
2915
 * characters for Name and NmToken in the Revision 5 of XML-1.0
2916
 * They correspond to the modified production [4] and the new production [4a]
2917
 * changes in that revision. Also note that the macros used for the
2918
 * productions Letter, Digit, CombiningChar and Extender are not needed
2919
 * anymore.
2920
 * We still keep compatibility to pre-revision5 parsing semantic if the
2921
 * new XML_PARSE_OLD10 option is given to the parser.
2922
 */
2923
static int
2924
1.81M
xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
2925
1.81M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
2926
        /*
2927
   * Use the new checks of production [4] [4a] amd [5] of the
2928
   * Update 5 of XML-1.0
2929
   */
2930
1.81M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
2931
1.81M
      (((c >= 'a') && (c <= 'z')) ||
2932
1.81M
       ((c >= 'A') && (c <= 'Z')) ||
2933
1.81M
       (c == '_') || (c == ':') ||
2934
1.81M
       ((c >= 0xC0) && (c <= 0xD6)) ||
2935
1.81M
       ((c >= 0xD8) && (c <= 0xF6)) ||
2936
1.81M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
2937
1.81M
       ((c >= 0x370) && (c <= 0x37D)) ||
2938
1.81M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
2939
1.81M
       ((c >= 0x200C) && (c <= 0x200D)) ||
2940
1.81M
       ((c >= 0x2070) && (c <= 0x218F)) ||
2941
1.81M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
2942
1.81M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
2943
1.81M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
2944
1.81M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
2945
1.81M
       ((c >= 0x10000) && (c <= 0xEFFFF))))
2946
377k
      return(1);
2947
1.81M
    } else {
2948
0
        if (IS_LETTER(c) || (c == '_') || (c == ':'))
2949
0
      return(1);
2950
0
    }
2951
1.43M
    return(0);
2952
1.81M
}
2953
2954
static int
2955
85.2M
xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
2956
85.2M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
2957
        /*
2958
   * Use the new checks of production [4] [4a] amd [5] of the
2959
   * Update 5 of XML-1.0
2960
   */
2961
85.2M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
2962
85.2M
      (((c >= 'a') && (c <= 'z')) ||
2963
85.2M
       ((c >= 'A') && (c <= 'Z')) ||
2964
85.2M
       ((c >= '0') && (c <= '9')) || /* !start */
2965
85.2M
       (c == '_') || (c == ':') ||
2966
85.2M
       (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
2967
85.2M
       ((c >= 0xC0) && (c <= 0xD6)) ||
2968
85.2M
       ((c >= 0xD8) && (c <= 0xF6)) ||
2969
85.2M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
2970
85.2M
       ((c >= 0x300) && (c <= 0x36F)) || /* !start */
2971
85.2M
       ((c >= 0x370) && (c <= 0x37D)) ||
2972
85.2M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
2973
85.2M
       ((c >= 0x200C) && (c <= 0x200D)) ||
2974
85.2M
       ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
2975
85.2M
       ((c >= 0x2070) && (c <= 0x218F)) ||
2976
85.2M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
2977
85.2M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
2978
85.2M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
2979
85.2M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
2980
85.2M
       ((c >= 0x10000) && (c <= 0xEFFFF))))
2981
84.9M
       return(1);
2982
85.2M
    } else {
2983
0
        if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
2984
0
            (c == '.') || (c == '-') ||
2985
0
      (c == '_') || (c == ':') ||
2986
0
      (IS_COMBINING(c)) ||
2987
0
      (IS_EXTENDER(c)))
2988
0
      return(1);
2989
0
    }
2990
315k
    return(0);
2991
85.2M
}
2992
2993
static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
2994
                                          int *len, int *alloc, int normalize);
2995
2996
static const xmlChar *
2997
296k
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
2998
296k
    int len = 0, l;
2999
296k
    int c;
3000
296k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3001
0
                    XML_MAX_TEXT_LENGTH :
3002
296k
                    XML_MAX_NAME_LENGTH;
3003
3004
#ifdef DEBUG
3005
    nbParseNameComplex++;
3006
#endif
3007
3008
    /*
3009
     * Handler for more complex cases
3010
     */
3011
296k
    c = CUR_CHAR(l);
3012
296k
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3013
        /*
3014
   * Use the new checks of production [4] [4a] amd [5] of the
3015
   * Update 5 of XML-1.0
3016
   */
3017
296k
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3018
296k
      (!(((c >= 'a') && (c <= 'z')) ||
3019
290k
         ((c >= 'A') && (c <= 'Z')) ||
3020
290k
         (c == '_') || (c == ':') ||
3021
290k
         ((c >= 0xC0) && (c <= 0xD6)) ||
3022
290k
         ((c >= 0xD8) && (c <= 0xF6)) ||
3023
290k
         ((c >= 0xF8) && (c <= 0x2FF)) ||
3024
290k
         ((c >= 0x370) && (c <= 0x37D)) ||
3025
290k
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
3026
290k
         ((c >= 0x200C) && (c <= 0x200D)) ||
3027
290k
         ((c >= 0x2070) && (c <= 0x218F)) ||
3028
290k
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3029
290k
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
3030
290k
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
3031
290k
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3032
290k
         ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3033
135k
      return(NULL);
3034
135k
  }
3035
160k
  len += l;
3036
160k
  NEXTL(l);
3037
160k
  c = CUR_CHAR(l);
3038
38.9M
  while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3039
38.9M
         (((c >= 'a') && (c <= 'z')) ||
3040
38.8M
          ((c >= 'A') && (c <= 'Z')) ||
3041
38.8M
          ((c >= '0') && (c <= '9')) || /* !start */
3042
38.8M
          (c == '_') || (c == ':') ||
3043
38.8M
          (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3044
38.8M
          ((c >= 0xC0) && (c <= 0xD6)) ||
3045
38.8M
          ((c >= 0xD8) && (c <= 0xF6)) ||
3046
38.8M
          ((c >= 0xF8) && (c <= 0x2FF)) ||
3047
38.8M
          ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3048
38.8M
          ((c >= 0x370) && (c <= 0x37D)) ||
3049
38.8M
          ((c >= 0x37F) && (c <= 0x1FFF)) ||
3050
38.8M
          ((c >= 0x200C) && (c <= 0x200D)) ||
3051
38.8M
          ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3052
38.8M
          ((c >= 0x2070) && (c <= 0x218F)) ||
3053
38.8M
          ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3054
38.8M
          ((c >= 0x3001) && (c <= 0xD7FF)) ||
3055
38.8M
          ((c >= 0xF900) && (c <= 0xFDCF)) ||
3056
38.8M
          ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3057
38.8M
          ((c >= 0x10000) && (c <= 0xEFFFF))
3058
38.8M
    )) {
3059
38.7M
            if (len <= INT_MAX - l)
3060
38.7M
          len += l;
3061
38.7M
      NEXTL(l);
3062
38.7M
      c = CUR_CHAR(l);
3063
38.7M
  }
3064
160k
    } else {
3065
0
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3066
0
      (!IS_LETTER(c) && (c != '_') &&
3067
0
       (c != ':'))) {
3068
0
      return(NULL);
3069
0
  }
3070
0
  len += l;
3071
0
  NEXTL(l);
3072
0
  c = CUR_CHAR(l);
3073
3074
0
  while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3075
0
         ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3076
0
    (c == '.') || (c == '-') ||
3077
0
    (c == '_') || (c == ':') ||
3078
0
    (IS_COMBINING(c)) ||
3079
0
    (IS_EXTENDER(c)))) {
3080
0
            if (len <= INT_MAX - l)
3081
0
          len += l;
3082
0
      NEXTL(l);
3083
0
      c = CUR_CHAR(l);
3084
0
  }
3085
0
    }
3086
160k
    if (ctxt->instate == XML_PARSER_EOF)
3087
1.80k
        return(NULL);
3088
158k
    if (len > maxLength) {
3089
419
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3090
419
        return(NULL);
3091
419
    }
3092
158k
    if (ctxt->input->cur - ctxt->input->base < len) {
3093
        /*
3094
         * There were a couple of bugs where PERefs lead to to a change
3095
         * of the buffer. Check the buffer size to avoid passing an invalid
3096
         * pointer to xmlDictLookup.
3097
         */
3098
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3099
0
                    "unexpected change of input buffer");
3100
0
        return (NULL);
3101
0
    }
3102
158k
    if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3103
594
        return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3104
157k
    return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3105
158k
}
3106
3107
/**
3108
 * xmlParseName:
3109
 * @ctxt:  an XML parser context
3110
 *
3111
 * DEPRECATED: Internal function, don't use.
3112
 *
3113
 * parse an XML name.
3114
 *
3115
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3116
 *                  CombiningChar | Extender
3117
 *
3118
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3119
 *
3120
 * [6] Names ::= Name (#x20 Name)*
3121
 *
3122
 * Returns the Name parsed or NULL
3123
 */
3124
3125
const xmlChar *
3126
964k
xmlParseName(xmlParserCtxtPtr ctxt) {
3127
964k
    const xmlChar *in;
3128
964k
    const xmlChar *ret;
3129
964k
    size_t count = 0;
3130
964k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3131
0
                       XML_MAX_TEXT_LENGTH :
3132
964k
                       XML_MAX_NAME_LENGTH;
3133
3134
964k
    GROW;
3135
964k
    if (ctxt->instate == XML_PARSER_EOF)
3136
509
        return(NULL);
3137
3138
#ifdef DEBUG
3139
    nbParseName++;
3140
#endif
3141
3142
    /*
3143
     * Accelerator for simple ASCII names
3144
     */
3145
964k
    in = ctxt->input->cur;
3146
964k
    if (((*in >= 0x61) && (*in <= 0x7A)) ||
3147
964k
  ((*in >= 0x41) && (*in <= 0x5A)) ||
3148
964k
  (*in == '_') || (*in == ':')) {
3149
739k
  in++;
3150
88.8M
  while (((*in >= 0x61) && (*in <= 0x7A)) ||
3151
88.8M
         ((*in >= 0x41) && (*in <= 0x5A)) ||
3152
88.8M
         ((*in >= 0x30) && (*in <= 0x39)) ||
3153
88.8M
         (*in == '_') || (*in == '-') ||
3154
88.8M
         (*in == ':') || (*in == '.'))
3155
88.0M
      in++;
3156
739k
  if ((*in > 0) && (*in < 0x80)) {
3157
667k
      count = in - ctxt->input->cur;
3158
667k
            if (count > maxLength) {
3159
775
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3160
775
                return(NULL);
3161
775
            }
3162
666k
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3163
666k
      ctxt->input->cur = in;
3164
666k
      ctxt->input->col += count;
3165
666k
      if (ret == NULL)
3166
13
          xmlErrMemory(ctxt, NULL);
3167
666k
      return(ret);
3168
667k
  }
3169
739k
    }
3170
    /* accelerator for special cases */
3171
296k
    return(xmlParseNameComplex(ctxt));
3172
964k
}
3173
3174
static const xmlChar *
3175
1.69M
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3176
1.69M
    int len = 0, l;
3177
1.69M
    int c;
3178
1.69M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3179
0
                    XML_MAX_TEXT_LENGTH :
3180
1.69M
                    XML_MAX_NAME_LENGTH;
3181
1.69M
    size_t startPosition = 0;
3182
3183
#ifdef DEBUG
3184
    nbParseNCNameComplex++;
3185
#endif
3186
3187
    /*
3188
     * Handler for more complex cases
3189
     */
3190
1.69M
    startPosition = CUR_PTR - BASE_PTR;
3191
1.69M
    c = CUR_CHAR(l);
3192
1.69M
    if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3193
1.69M
  (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3194
1.46M
  return(NULL);
3195
1.46M
    }
3196
3197
32.7M
    while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3198
32.7M
     (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3199
32.5M
        if (len <= INT_MAX - l)
3200
32.5M
      len += l;
3201
32.5M
  NEXTL(l);
3202
32.5M
  c = CUR_CHAR(l);
3203
32.5M
    }
3204
234k
    if (ctxt->instate == XML_PARSER_EOF)
3205
268
        return(NULL);
3206
233k
    if (len > maxLength) {
3207
540
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3208
540
        return(NULL);
3209
540
    }
3210
233k
    return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
3211
233k
}
3212
3213
/**
3214
 * xmlParseNCName:
3215
 * @ctxt:  an XML parser context
3216
 * @len:  length of the string parsed
3217
 *
3218
 * parse an XML name.
3219
 *
3220
 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3221
 *                      CombiningChar | Extender
3222
 *
3223
 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3224
 *
3225
 * Returns the Name parsed or NULL
3226
 */
3227
3228
static const xmlChar *
3229
9.66M
xmlParseNCName(xmlParserCtxtPtr ctxt) {
3230
9.66M
    const xmlChar *in, *e;
3231
9.66M
    const xmlChar *ret;
3232
9.66M
    size_t count = 0;
3233
9.66M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3234
0
                       XML_MAX_TEXT_LENGTH :
3235
9.66M
                       XML_MAX_NAME_LENGTH;
3236
3237
#ifdef DEBUG
3238
    nbParseNCName++;
3239
#endif
3240
3241
    /*
3242
     * Accelerator for simple ASCII names
3243
     */
3244
9.66M
    in = ctxt->input->cur;
3245
9.66M
    e = ctxt->input->end;
3246
9.66M
    if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3247
9.66M
   ((*in >= 0x41) && (*in <= 0x5A)) ||
3248
9.66M
   (*in == '_')) && (in < e)) {
3249
8.10M
  in++;
3250
106M
  while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3251
106M
          ((*in >= 0x41) && (*in <= 0x5A)) ||
3252
106M
          ((*in >= 0x30) && (*in <= 0x39)) ||
3253
106M
          (*in == '_') || (*in == '-') ||
3254
106M
          (*in == '.')) && (in < e))
3255
98.2M
      in++;
3256
8.10M
  if (in >= e)
3257
1.47k
      goto complex;
3258
8.10M
  if ((*in > 0) && (*in < 0x80)) {
3259
7.96M
      count = in - ctxt->input->cur;
3260
7.96M
            if (count > maxLength) {
3261
829
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3262
829
                return(NULL);
3263
829
            }
3264
7.96M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3265
7.96M
      ctxt->input->cur = in;
3266
7.96M
      ctxt->input->col += count;
3267
7.96M
      if (ret == NULL) {
3268
15
          xmlErrMemory(ctxt, NULL);
3269
15
      }
3270
7.96M
      return(ret);
3271
7.96M
  }
3272
8.10M
    }
3273
1.69M
complex:
3274
1.69M
    return(xmlParseNCNameComplex(ctxt));
3275
9.66M
}
3276
3277
/**
3278
 * xmlParseNameAndCompare:
3279
 * @ctxt:  an XML parser context
3280
 *
3281
 * parse an XML name and compares for match
3282
 * (specialized for endtag parsing)
3283
 *
3284
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3285
 * and the name for mismatch
3286
 */
3287
3288
static const xmlChar *
3289
161k
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3290
161k
    register const xmlChar *cmp = other;
3291
161k
    register const xmlChar *in;
3292
161k
    const xmlChar *ret;
3293
3294
161k
    GROW;
3295
161k
    if (ctxt->instate == XML_PARSER_EOF)
3296
253
        return(NULL);
3297
3298
161k
    in = ctxt->input->cur;
3299
730k
    while (*in != 0 && *in == *cmp) {
3300
569k
  ++in;
3301
569k
  ++cmp;
3302
569k
    }
3303
161k
    if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3304
  /* success */
3305
150k
  ctxt->input->col += in - ctxt->input->cur;
3306
150k
  ctxt->input->cur = in;
3307
150k
  return (const xmlChar*) 1;
3308
150k
    }
3309
    /* failure (or end of input buffer), check with full function */
3310
10.7k
    ret = xmlParseName (ctxt);
3311
    /* strings coming from the dictionary direct compare possible */
3312
10.7k
    if (ret == other) {
3313
825
  return (const xmlChar*) 1;
3314
825
    }
3315
9.97k
    return ret;
3316
10.7k
}
3317
3318
/**
3319
 * xmlParseStringName:
3320
 * @ctxt:  an XML parser context
3321
 * @str:  a pointer to the string pointer (IN/OUT)
3322
 *
3323
 * parse an XML name.
3324
 *
3325
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3326
 *                  CombiningChar | Extender
3327
 *
3328
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3329
 *
3330
 * [6] Names ::= Name (#x20 Name)*
3331
 *
3332
 * Returns the Name parsed or NULL. The @str pointer
3333
 * is updated to the current location in the string.
3334
 */
3335
3336
static xmlChar *
3337
131k
xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3338
131k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3339
131k
    const xmlChar *cur = *str;
3340
131k
    int len = 0, l;
3341
131k
    int c;
3342
131k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3343
0
                    XML_MAX_TEXT_LENGTH :
3344
131k
                    XML_MAX_NAME_LENGTH;
3345
3346
#ifdef DEBUG
3347
    nbParseStringName++;
3348
#endif
3349
3350
131k
    c = CUR_SCHAR(cur, l);
3351
131k
    if (!xmlIsNameStartChar(ctxt, c)) {
3352
9.34k
  return(NULL);
3353
9.34k
    }
3354
3355
122k
    COPY_BUF(l,buf,len,c);
3356
122k
    cur += l;
3357
122k
    c = CUR_SCHAR(cur, l);
3358
519k
    while (xmlIsNameChar(ctxt, c)) {
3359
400k
  COPY_BUF(l,buf,len,c);
3360
400k
  cur += l;
3361
400k
  c = CUR_SCHAR(cur, l);
3362
400k
  if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3363
      /*
3364
       * Okay someone managed to make a huge name, so he's ready to pay
3365
       * for the processing speed.
3366
       */
3367
3.80k
      xmlChar *buffer;
3368
3.80k
      int max = len * 2;
3369
3370
3.80k
      buffer = (xmlChar *) xmlMallocAtomic(max);
3371
3.80k
      if (buffer == NULL) {
3372
2
          xmlErrMemory(ctxt, NULL);
3373
2
    return(NULL);
3374
2
      }
3375
3.80k
      memcpy(buffer, buf, len);
3376
3.02M
      while (xmlIsNameChar(ctxt, c)) {
3377
3.01M
    if (len + 10 > max) {
3378
10.9k
        xmlChar *tmp;
3379
3380
10.9k
        max *= 2;
3381
10.9k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3382
10.9k
        if (tmp == NULL) {
3383
1
      xmlErrMemory(ctxt, NULL);
3384
1
      xmlFree(buffer);
3385
1
      return(NULL);
3386
1
        }
3387
10.9k
        buffer = tmp;
3388
10.9k
    }
3389
3.01M
    COPY_BUF(l,buffer,len,c);
3390
3.01M
    cur += l;
3391
3.01M
    c = CUR_SCHAR(cur, l);
3392
3.01M
                if (len > maxLength) {
3393
87
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3394
87
                    xmlFree(buffer);
3395
87
                    return(NULL);
3396
87
                }
3397
3.01M
      }
3398
3.71k
      buffer[len] = 0;
3399
3.71k
      *str = cur;
3400
3.71k
      return(buffer);
3401
3.80k
  }
3402
400k
    }
3403
118k
    if (len > maxLength) {
3404
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3405
0
        return(NULL);
3406
0
    }
3407
118k
    *str = cur;
3408
118k
    return(xmlStrndup(buf, len));
3409
118k
}
3410
3411
/**
3412
 * xmlParseNmtoken:
3413
 * @ctxt:  an XML parser context
3414
 *
3415
 * DEPRECATED: Internal function, don't use.
3416
 *
3417
 * parse an XML Nmtoken.
3418
 *
3419
 * [7] Nmtoken ::= (NameChar)+
3420
 *
3421
 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3422
 *
3423
 * Returns the Nmtoken parsed or NULL
3424
 */
3425
3426
xmlChar *
3427
29.5k
xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3428
29.5k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3429
29.5k
    int len = 0, l;
3430
29.5k
    int c;
3431
29.5k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3432
0
                    XML_MAX_TEXT_LENGTH :
3433
29.5k
                    XML_MAX_NAME_LENGTH;
3434
3435
#ifdef DEBUG
3436
    nbParseNmToken++;
3437
#endif
3438
3439
29.5k
    c = CUR_CHAR(l);
3440
3441
278k
    while (xmlIsNameChar(ctxt, c)) {
3442
251k
  COPY_BUF(l,buf,len,c);
3443
251k
  NEXTL(l);
3444
251k
  c = CUR_CHAR(l);
3445
251k
  if (len >= XML_MAX_NAMELEN) {
3446
      /*
3447
       * Okay someone managed to make a huge token, so he's ready to pay
3448
       * for the processing speed.
3449
       */
3450
2.50k
      xmlChar *buffer;
3451
2.50k
      int max = len * 2;
3452
3453
2.50k
      buffer = (xmlChar *) xmlMallocAtomic(max);
3454
2.50k
      if (buffer == NULL) {
3455
11
          xmlErrMemory(ctxt, NULL);
3456
11
    return(NULL);
3457
11
      }
3458
2.49k
      memcpy(buffer, buf, len);
3459
48.7M
      while (xmlIsNameChar(ctxt, c)) {
3460
48.7M
    if (len + 10 > max) {
3461
10.1k
        xmlChar *tmp;
3462
3463
10.1k
        max *= 2;
3464
10.1k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3465
10.1k
        if (tmp == NULL) {
3466
1
      xmlErrMemory(ctxt, NULL);
3467
1
      xmlFree(buffer);
3468
1
      return(NULL);
3469
1
        }
3470
10.1k
        buffer = tmp;
3471
10.1k
    }
3472
48.7M
    COPY_BUF(l,buffer,len,c);
3473
48.7M
                if (len > maxLength) {
3474
999
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3475
999
                    xmlFree(buffer);
3476
999
                    return(NULL);
3477
999
                }
3478
48.7M
    NEXTL(l);
3479
48.7M
    c = CUR_CHAR(l);
3480
48.7M
      }
3481
1.49k
      buffer[len] = 0;
3482
1.49k
            if (ctxt->instate == XML_PARSER_EOF) {
3483
418
                xmlFree(buffer);
3484
418
                return(NULL);
3485
418
            }
3486
1.07k
      return(buffer);
3487
1.49k
  }
3488
251k
    }
3489
27.0k
    if (ctxt->instate == XML_PARSER_EOF)
3490
616
        return(NULL);
3491
26.4k
    if (len == 0)
3492
11.4k
        return(NULL);
3493
14.9k
    if (len > maxLength) {
3494
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3495
0
        return(NULL);
3496
0
    }
3497
14.9k
    return(xmlStrndup(buf, len));
3498
14.9k
}
3499
3500
/**
3501
 * xmlParseEntityValue:
3502
 * @ctxt:  an XML parser context
3503
 * @orig:  if non-NULL store a copy of the original entity value
3504
 *
3505
 * DEPRECATED: Internal function, don't use.
3506
 *
3507
 * parse a value for ENTITY declarations
3508
 *
3509
 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3510
 *                 "'" ([^%&'] | PEReference | Reference)* "'"
3511
 *
3512
 * Returns the EntityValue parsed with reference substituted or NULL
3513
 */
3514
3515
xmlChar *
3516
65.9k
xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3517
65.9k
    xmlChar *buf = NULL;
3518
65.9k
    int len = 0;
3519
65.9k
    int size = XML_PARSER_BUFFER_SIZE;
3520
65.9k
    int c, l;
3521
65.9k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3522
0
                    XML_MAX_HUGE_LENGTH :
3523
65.9k
                    XML_MAX_TEXT_LENGTH;
3524
65.9k
    xmlChar stop;
3525
65.9k
    xmlChar *ret = NULL;
3526
65.9k
    const xmlChar *cur = NULL;
3527
65.9k
    xmlParserInputPtr input;
3528
3529
65.9k
    if (RAW == '"') stop = '"';
3530
26.4k
    else if (RAW == '\'') stop = '\'';
3531
0
    else {
3532
0
  xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3533
0
  return(NULL);
3534
0
    }
3535
65.9k
    buf = (xmlChar *) xmlMallocAtomic(size);
3536
65.9k
    if (buf == NULL) {
3537
21
  xmlErrMemory(ctxt, NULL);
3538
21
  return(NULL);
3539
21
    }
3540
3541
    /*
3542
     * The content of the entity definition is copied in a buffer.
3543
     */
3544
3545
65.9k
    ctxt->instate = XML_PARSER_ENTITY_VALUE;
3546
65.9k
    input = ctxt->input;
3547
65.9k
    GROW;
3548
65.9k
    if (ctxt->instate == XML_PARSER_EOF)
3549
202
        goto error;
3550
65.7k
    NEXT;
3551
65.7k
    c = CUR_CHAR(l);
3552
    /*
3553
     * NOTE: 4.4.5 Included in Literal
3554
     * When a parameter entity reference appears in a literal entity
3555
     * value, ... a single or double quote character in the replacement
3556
     * text is always treated as a normal data character and will not
3557
     * terminate the literal.
3558
     * In practice it means we stop the loop only when back at parsing
3559
     * the initial entity and the quote is found
3560
     */
3561
266M
    while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3562
266M
      (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3563
266M
  if (len + 5 >= size) {
3564
46.8k
      xmlChar *tmp;
3565
3566
46.8k
      size *= 2;
3567
46.8k
      tmp = (xmlChar *) xmlRealloc(buf, size);
3568
46.8k
      if (tmp == NULL) {
3569
2
    xmlErrMemory(ctxt, NULL);
3570
2
                goto error;
3571
2
      }
3572
46.8k
      buf = tmp;
3573
46.8k
  }
3574
266M
  COPY_BUF(l,buf,len,c);
3575
266M
  NEXTL(l);
3576
3577
266M
  GROW;
3578
266M
  c = CUR_CHAR(l);
3579
266M
  if (c == 0) {
3580
1.20k
      GROW;
3581
1.20k
      c = CUR_CHAR(l);
3582
1.20k
  }
3583
3584
266M
        if (len > maxLength) {
3585
1
            xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
3586
1
                           "entity value too long\n");
3587
1
            goto error;
3588
1
        }
3589
266M
    }
3590
65.7k
    buf[len] = 0;
3591
65.7k
    if (ctxt->instate == XML_PARSER_EOF)
3592
572
        goto error;
3593
65.1k
    if (c != stop) {
3594
1.14k
        xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3595
1.14k
        goto error;
3596
1.14k
    }
3597
64.0k
    NEXT;
3598
3599
    /*
3600
     * Raise problem w.r.t. '&' and '%' being used in non-entities
3601
     * reference constructs. Note Charref will be handled in
3602
     * xmlStringDecodeEntities()
3603
     */
3604
64.0k
    cur = buf;
3605
691M
    while (*cur != 0) { /* non input consuming */
3606
691M
  if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3607
36.4k
      xmlChar *name;
3608
36.4k
      xmlChar tmp = *cur;
3609
36.4k
            int nameOk = 0;
3610
3611
36.4k
      cur++;
3612
36.4k
      name = xmlParseStringName(ctxt, &cur);
3613
36.4k
            if (name != NULL) {
3614
34.8k
                nameOk = 1;
3615
34.8k
                xmlFree(name);
3616
34.8k
            }
3617
36.4k
            if ((nameOk == 0) || (*cur != ';')) {
3618
4.38k
    xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3619
4.38k
      "EntityValue: '%c' forbidden except for entities references\n",
3620
4.38k
                            tmp);
3621
4.38k
                goto error;
3622
4.38k
      }
3623
32.0k
      if ((tmp == '%') && (ctxt->inSubset == 1) &&
3624
32.0k
    (ctxt->inputNr == 1)) {
3625
239
    xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3626
239
                goto error;
3627
239
      }
3628
31.8k
      if (*cur == 0)
3629
0
          break;
3630
31.8k
  }
3631
691M
  cur++;
3632
691M
    }
3633
3634
    /*
3635
     * Then PEReference entities are substituted.
3636
     *
3637
     * NOTE: 4.4.7 Bypassed
3638
     * When a general entity reference appears in the EntityValue in
3639
     * an entity declaration, it is bypassed and left as is.
3640
     * so XML_SUBSTITUTE_REF is not set here.
3641
     */
3642
59.3k
    ++ctxt->depth;
3643
59.3k
    ret = xmlStringDecodeEntitiesInt(ctxt, buf, len, XML_SUBSTITUTE_PEREF,
3644
59.3k
                                     0, 0, 0, /* check */ 1);
3645
59.3k
    --ctxt->depth;
3646
3647
59.3k
    if (orig != NULL) {
3648
59.3k
        *orig = buf;
3649
59.3k
        buf = NULL;
3650
59.3k
    }
3651
3652
65.9k
error:
3653
65.9k
    if (buf != NULL)
3654
6.54k
        xmlFree(buf);
3655
65.9k
    return(ret);
3656
59.3k
}
3657
3658
/**
3659
 * xmlParseAttValueComplex:
3660
 * @ctxt:  an XML parser context
3661
 * @len:   the resulting attribute len
3662
 * @normalize:  whether to apply the inner normalization
3663
 *
3664
 * parse a value for an attribute, this is the fallback function
3665
 * of xmlParseAttValue() when the attribute parsing requires handling
3666
 * of non-ASCII characters, or normalization compaction.
3667
 *
3668
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3669
 */
3670
static xmlChar *
3671
446k
xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3672
446k
    xmlChar limit = 0;
3673
446k
    xmlChar *buf = NULL;
3674
446k
    xmlChar *rep = NULL;
3675
446k
    size_t len = 0;
3676
446k
    size_t buf_size = 0;
3677
446k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3678
0
                       XML_MAX_HUGE_LENGTH :
3679
446k
                       XML_MAX_TEXT_LENGTH;
3680
446k
    int c, l, in_space = 0;
3681
446k
    xmlChar *current = NULL;
3682
446k
    xmlEntityPtr ent;
3683
3684
446k
    if (NXT(0) == '"') {
3685
425k
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3686
425k
  limit = '"';
3687
425k
        NEXT;
3688
425k
    } else if (NXT(0) == '\'') {
3689
21.5k
  limit = '\'';
3690
21.5k
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3691
21.5k
        NEXT;
3692
21.5k
    } else {
3693
0
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3694
0
  return(NULL);
3695
0
    }
3696
3697
    /*
3698
     * allocate a translation buffer.
3699
     */
3700
446k
    buf_size = XML_PARSER_BUFFER_SIZE;
3701
446k
    buf = (xmlChar *) xmlMallocAtomic(buf_size);
3702
446k
    if (buf == NULL) goto mem_error;
3703
3704
    /*
3705
     * OK loop until we reach one of the ending char or a size limit.
3706
     */
3707
446k
    c = CUR_CHAR(l);
3708
117M
    while (((NXT(0) != limit) && /* checked */
3709
117M
            (IS_CHAR(c)) && (c != '<')) &&
3710
117M
            (ctxt->instate != XML_PARSER_EOF)) {
3711
117M
  if (c == '&') {
3712
283k
      in_space = 0;
3713
283k
      if (NXT(1) == '#') {
3714
169k
    int val = xmlParseCharRef(ctxt);
3715
3716
169k
    if (val == '&') {
3717
8.38k
        if (ctxt->replaceEntities) {
3718
8.38k
      if (len + 10 > buf_size) {
3719
225
          growBuffer(buf, 10);
3720
225
      }
3721
8.38k
      buf[len++] = '&';
3722
8.38k
        } else {
3723
      /*
3724
       * The reparsing will be done in xmlStringGetNodeList()
3725
       * called by the attribute() function in SAX.c
3726
       */
3727
0
      if (len + 10 > buf_size) {
3728
0
          growBuffer(buf, 10);
3729
0
      }
3730
0
      buf[len++] = '&';
3731
0
      buf[len++] = '#';
3732
0
      buf[len++] = '3';
3733
0
      buf[len++] = '8';
3734
0
      buf[len++] = ';';
3735
0
        }
3736
160k
    } else if (val != 0) {
3737
147k
        if (len + 10 > buf_size) {
3738
477
      growBuffer(buf, 10);
3739
477
        }
3740
147k
        len += xmlCopyChar(0, &buf[len], val);
3741
147k
    }
3742
169k
      } else {
3743
114k
    ent = xmlParseEntityRef(ctxt);
3744
114k
    if ((ent != NULL) &&
3745
114k
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3746
10.4k
        if (len + 10 > buf_size) {
3747
163
      growBuffer(buf, 10);
3748
163
        }
3749
10.4k
        if ((ctxt->replaceEntities == 0) &&
3750
10.4k
            (ent->content[0] == '&')) {
3751
9.53k
      buf[len++] = '&';
3752
9.53k
      buf[len++] = '#';
3753
9.53k
      buf[len++] = '3';
3754
9.53k
      buf[len++] = '8';
3755
9.53k
      buf[len++] = ';';
3756
9.53k
        } else {
3757
910
      buf[len++] = ent->content[0];
3758
910
        }
3759
103k
    } else if ((ent != NULL) &&
3760
103k
               (ctxt->replaceEntities != 0)) {
3761
25.7k
        if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3762
25.7k
                        if (xmlParserEntityCheck(ctxt, ent->length))
3763
115
                            goto error;
3764
3765
25.6k
      ++ctxt->depth;
3766
25.6k
      rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
3767
25.6k
                                ent->length, XML_SUBSTITUTE_REF, 0, 0, 0,
3768
25.6k
                                /* check */ 1);
3769
25.6k
      --ctxt->depth;
3770
25.6k
      if (rep != NULL) {
3771
20.0k
          current = rep;
3772
899M
          while (*current != 0) { /* non input consuming */
3773
899M
                                if ((*current == 0xD) || (*current == 0xA) ||
3774
899M
                                    (*current == 0x9)) {
3775
27.6k
                                    buf[len++] = 0x20;
3776
27.6k
                                    current++;
3777
27.6k
                                } else
3778
899M
                                    buf[len++] = *current++;
3779
899M
        if (len + 10 > buf_size) {
3780
43.0k
            growBuffer(buf, 10);
3781
43.0k
        }
3782
899M
          }
3783
20.0k
          xmlFree(rep);
3784
20.0k
          rep = NULL;
3785
20.0k
      }
3786
25.6k
        } else {
3787
0
      if (len + 10 > buf_size) {
3788
0
          growBuffer(buf, 10);
3789
0
      }
3790
0
      if (ent->content != NULL)
3791
0
          buf[len++] = ent->content[0];
3792
0
        }
3793
77.9k
    } else if (ent != NULL) {
3794
7.80k
        int i = xmlStrlen(ent->name);
3795
7.80k
        const xmlChar *cur = ent->name;
3796
3797
        /*
3798
                     * We also check for recursion and amplification
3799
                     * when entities are not substituted. They're
3800
                     * often expanded later.
3801
         */
3802
7.80k
        if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3803
7.80k
      (ent->content != NULL)) {
3804
1.11k
                        if ((ent->flags & XML_ENT_CHECKED) == 0) {
3805
36
                            unsigned long oldCopy = ctxt->sizeentcopy;
3806
3807
36
                            ctxt->sizeentcopy = ent->length;
3808
3809
36
                            ++ctxt->depth;
3810
36
                            rep = xmlStringDecodeEntitiesInt(ctxt,
3811
36
                                    ent->content, ent->length,
3812
36
                                    XML_SUBSTITUTE_REF, 0, 0, 0,
3813
36
                                    /* check */ 1);
3814
36
                            --ctxt->depth;
3815
3816
                            /*
3817
                             * If we're parsing DTD content, the entity
3818
                             * might reference other entities which
3819
                             * weren't defined yet, so the check isn't
3820
                             * reliable.
3821
                             */
3822
36
                            if (ctxt->inSubset == 0) {
3823
36
                                ent->flags |= XML_ENT_CHECKED;
3824
36
                                ent->expandedSize = ctxt->sizeentcopy;
3825
36
                            }
3826
3827
36
                            if (rep != NULL) {
3828
36
                                xmlFree(rep);
3829
36
                                rep = NULL;
3830
36
                            } else {
3831
0
                                ent->content[0] = 0;
3832
0
                            }
3833
3834
36
                            if (xmlParserEntityCheck(ctxt, oldCopy))
3835
0
                                goto error;
3836
1.08k
                        } else {
3837
1.08k
                            if (xmlParserEntityCheck(ctxt, ent->expandedSize))
3838
23
                                goto error;
3839
1.08k
                        }
3840
1.11k
        }
3841
3842
        /*
3843
         * Just output the reference
3844
         */
3845
7.78k
        buf[len++] = '&';
3846
7.91k
        while (len + i + 10 > buf_size) {
3847
266
      growBuffer(buf, i + 10);
3848
266
        }
3849
15.5k
        for (;i > 0;i--)
3850
7.78k
      buf[len++] = *cur++;
3851
7.78k
        buf[len++] = ';';
3852
7.78k
    }
3853
114k
      }
3854
116M
  } else {
3855
116M
      if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
3856
29.3M
          if ((len != 0) || (!normalize)) {
3857
29.2M
        if ((!normalize) || (!in_space)) {
3858
29.2M
      COPY_BUF(l,buf,len,0x20);
3859
29.3M
      while (len + 10 > buf_size) {
3860
93.3k
          growBuffer(buf, 10);
3861
93.3k
      }
3862
29.2M
        }
3863
29.2M
        in_space = 1;
3864
29.2M
    }
3865
87.5M
      } else {
3866
87.5M
          in_space = 0;
3867
87.5M
    COPY_BUF(l,buf,len,c);
3868
87.5M
    if (len + 10 > buf_size) {
3869
340k
        growBuffer(buf, 10);
3870
340k
    }
3871
87.5M
      }
3872
116M
      NEXTL(l);
3873
116M
  }
3874
117M
  GROW;
3875
117M
  c = CUR_CHAR(l);
3876
117M
        if (len > maxLength) {
3877
3
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3878
3
                           "AttValue length too long\n");
3879
3
            goto mem_error;
3880
3
        }
3881
117M
    }
3882
446k
    if (ctxt->instate == XML_PARSER_EOF)
3883
676
        goto error;
3884
3885
445k
    if ((in_space) && (normalize)) {
3886
7.18k
        while ((len > 0) && (buf[len - 1] == 0x20)) len--;
3887
2.82k
    }
3888
445k
    buf[len] = 0;
3889
445k
    if (RAW == '<') {
3890
25.1k
  xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
3891
420k
    } else if (RAW != limit) {
3892
11.9k
  if ((c != 0) && (!IS_CHAR(c))) {
3893
6.03k
      xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3894
6.03k
         "invalid character in attribute value\n");
3895
6.03k
  } else {
3896
5.93k
      xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3897
5.93k
         "AttValue: ' expected\n");
3898
5.93k
        }
3899
11.9k
    } else
3900
408k
  NEXT;
3901
3902
445k
    if (attlen != NULL) *attlen = len;
3903
445k
    return(buf);
3904
3905
135
mem_error:
3906
135
    xmlErrMemory(ctxt, NULL);
3907
949
error:
3908
949
    if (buf != NULL)
3909
836
        xmlFree(buf);
3910
949
    if (rep != NULL)
3911
5
        xmlFree(rep);
3912
949
    return(NULL);
3913
135
}
3914
3915
/**
3916
 * xmlParseAttValue:
3917
 * @ctxt:  an XML parser context
3918
 *
3919
 * DEPRECATED: Internal function, don't use.
3920
 *
3921
 * parse a value for an attribute
3922
 * Note: the parser won't do substitution of entities here, this
3923
 * will be handled later in xmlStringGetNodeList
3924
 *
3925
 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3926
 *                   "'" ([^<&'] | Reference)* "'"
3927
 *
3928
 * 3.3.3 Attribute-Value Normalization:
3929
 * Before the value of an attribute is passed to the application or
3930
 * checked for validity, the XML processor must normalize it as follows:
3931
 * - a character reference is processed by appending the referenced
3932
 *   character to the attribute value
3933
 * - an entity reference is processed by recursively processing the
3934
 *   replacement text of the entity
3935
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3936
 *   appending #x20 to the normalized value, except that only a single
3937
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
3938
 *   parsed entity or the literal entity value of an internal parsed entity
3939
 * - other characters are processed by appending them to the normalized value
3940
 * If the declared value is not CDATA, then the XML processor must further
3941
 * process the normalized attribute value by discarding any leading and
3942
 * trailing space (#x20) characters, and by replacing sequences of space
3943
 * (#x20) characters by a single space (#x20) character.
3944
 * All attributes for which no declaration has been read should be treated
3945
 * by a non-validating parser as if declared CDATA.
3946
 *
3947
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3948
 */
3949
3950
3951
xmlChar *
3952
90.4k
xmlParseAttValue(xmlParserCtxtPtr ctxt) {
3953
90.4k
    if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
3954
90.4k
    return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
3955
90.4k
}
3956
3957
/**
3958
 * xmlParseSystemLiteral:
3959
 * @ctxt:  an XML parser context
3960
 *
3961
 * DEPRECATED: Internal function, don't use.
3962
 *
3963
 * parse an XML Literal
3964
 *
3965
 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3966
 *
3967
 * Returns the SystemLiteral parsed or NULL
3968
 */
3969
3970
xmlChar *
3971
39.1k
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3972
39.1k
    xmlChar *buf = NULL;
3973
39.1k
    int len = 0;
3974
39.1k
    int size = XML_PARSER_BUFFER_SIZE;
3975
39.1k
    int cur, l;
3976
39.1k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3977
0
                    XML_MAX_TEXT_LENGTH :
3978
39.1k
                    XML_MAX_NAME_LENGTH;
3979
39.1k
    xmlChar stop;
3980
39.1k
    int state = ctxt->instate;
3981
3982
39.1k
    if (RAW == '"') {
3983
19.2k
        NEXT;
3984
19.2k
  stop = '"';
3985
19.8k
    } else if (RAW == '\'') {
3986
18.1k
        NEXT;
3987
18.1k
  stop = '\'';
3988
18.1k
    } else {
3989
1.69k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
3990
1.69k
  return(NULL);
3991
1.69k
    }
3992
3993
37.4k
    buf = (xmlChar *) xmlMallocAtomic(size);
3994
37.4k
    if (buf == NULL) {
3995
13
        xmlErrMemory(ctxt, NULL);
3996
13
  return(NULL);
3997
13
    }
3998
37.4k
    ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3999
37.4k
    cur = CUR_CHAR(l);
4000
6.85M
    while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4001
6.81M
  if (len + 5 >= size) {
4002
15.1k
      xmlChar *tmp;
4003
4004
15.1k
      size *= 2;
4005
15.1k
      tmp = (xmlChar *) xmlRealloc(buf, size);
4006
15.1k
      if (tmp == NULL) {
4007
3
          xmlFree(buf);
4008
3
    xmlErrMemory(ctxt, NULL);
4009
3
    ctxt->instate = (xmlParserInputState) state;
4010
3
    return(NULL);
4011
3
      }
4012
15.1k
      buf = tmp;
4013
15.1k
  }
4014
6.81M
  COPY_BUF(l,buf,len,cur);
4015
6.81M
        if (len > maxLength) {
4016
101
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4017
101
            xmlFree(buf);
4018
101
            ctxt->instate = (xmlParserInputState) state;
4019
101
            return(NULL);
4020
101
        }
4021
6.81M
  NEXTL(l);
4022
6.81M
  cur = CUR_CHAR(l);
4023
6.81M
    }
4024
37.3k
    buf[len] = 0;
4025
37.3k
    if (ctxt->instate == XML_PARSER_EOF) {
4026
280
        xmlFree(buf);
4027
280
        return(NULL);
4028
280
    }
4029
37.0k
    ctxt->instate = (xmlParserInputState) state;
4030
37.0k
    if (!IS_CHAR(cur)) {
4031
748
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4032
36.3k
    } else {
4033
36.3k
  NEXT;
4034
36.3k
    }
4035
37.0k
    return(buf);
4036
37.3k
}
4037
4038
/**
4039
 * xmlParsePubidLiteral:
4040
 * @ctxt:  an XML parser context
4041
 *
4042
 * DEPRECATED: Internal function, don't use.
4043
 *
4044
 * parse an XML public literal
4045
 *
4046
 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4047
 *
4048
 * Returns the PubidLiteral parsed or NULL.
4049
 */
4050
4051
xmlChar *
4052
26.9k
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4053
26.9k
    xmlChar *buf = NULL;
4054
26.9k
    int len = 0;
4055
26.9k
    int size = XML_PARSER_BUFFER_SIZE;
4056
26.9k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4057
0
                    XML_MAX_TEXT_LENGTH :
4058
26.9k
                    XML_MAX_NAME_LENGTH;
4059
26.9k
    xmlChar cur;
4060
26.9k
    xmlChar stop;
4061
26.9k
    xmlParserInputState oldstate = ctxt->instate;
4062
4063
26.9k
    if (RAW == '"') {
4064
10.8k
        NEXT;
4065
10.8k
  stop = '"';
4066
16.0k
    } else if (RAW == '\'') {
4067
13.4k
        NEXT;
4068
13.4k
  stop = '\'';
4069
13.4k
    } else {
4070
2.67k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4071
2.67k
  return(NULL);
4072
2.67k
    }
4073
24.3k
    buf = (xmlChar *) xmlMallocAtomic(size);
4074
24.3k
    if (buf == NULL) {
4075
6
  xmlErrMemory(ctxt, NULL);
4076
6
  return(NULL);
4077
6
    }
4078
24.2k
    ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4079
24.2k
    cur = CUR;
4080
980k
    while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4081
955k
  if (len + 1 >= size) {
4082
3.33k
      xmlChar *tmp;
4083
4084
3.33k
      size *= 2;
4085
3.33k
      tmp = (xmlChar *) xmlRealloc(buf, size);
4086
3.33k
      if (tmp == NULL) {
4087
1
    xmlErrMemory(ctxt, NULL);
4088
1
    xmlFree(buf);
4089
1
    return(NULL);
4090
1
      }
4091
3.32k
      buf = tmp;
4092
3.32k
  }
4093
955k
  buf[len++] = cur;
4094
955k
        if (len > maxLength) {
4095
4
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4096
4
            xmlFree(buf);
4097
4
            return(NULL);
4098
4
        }
4099
955k
  NEXT;
4100
955k
  cur = CUR;
4101
955k
    }
4102
24.2k
    buf[len] = 0;
4103
24.2k
    if (ctxt->instate == XML_PARSER_EOF) {
4104
298
        xmlFree(buf);
4105
298
        return(NULL);
4106
298
    }
4107
23.9k
    if (cur != stop) {
4108
3.65k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4109
20.3k
    } else {
4110
20.3k
  NEXTL(1);
4111
20.3k
    }
4112
23.9k
    ctxt->instate = oldstate;
4113
23.9k
    return(buf);
4114
24.2k
}
4115
4116
static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial);
4117
4118
/*
4119
 * used for the test in the inner loop of the char data testing
4120
 */
4121
static const unsigned char test_char_data[256] = {
4122
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4123
    0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4124
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4125
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4126
    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4127
    0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4128
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4129
    0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4130
    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4131
    0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4132
    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4133
    0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4134
    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4135
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4136
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4137
    0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4138
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4139
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4140
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4141
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4142
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4143
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4144
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4145
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4146
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4147
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4148
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4149
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4150
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4151
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4152
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4153
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4154
};
4155
4156
/**
4157
 * xmlParseCharDataInternal:
4158
 * @ctxt:  an XML parser context
4159
 * @partial:  buffer may contain partial UTF-8 sequences
4160
 *
4161
 * Parse character data. Always makes progress if the first char isn't
4162
 * '<' or '&'.
4163
 *
4164
 * The right angle bracket (>) may be represented using the string "&gt;",
4165
 * and must, for compatibility, be escaped using "&gt;" or a character
4166
 * reference when it appears in the string "]]>" in content, when that
4167
 * string is not marking the end of a CDATA section.
4168
 *
4169
 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4170
 */
4171
static void
4172
2.36M
xmlParseCharDataInternal(xmlParserCtxtPtr ctxt, int partial) {
4173
2.36M
    const xmlChar *in;
4174
2.36M
    int nbchar = 0;
4175
2.36M
    int line = ctxt->input->line;
4176
2.36M
    int col = ctxt->input->col;
4177
2.36M
    int ccol;
4178
4179
2.36M
    GROW;
4180
    /*
4181
     * Accelerated common case where input don't need to be
4182
     * modified before passing it to the handler.
4183
     */
4184
2.36M
    in = ctxt->input->cur;
4185
2.50M
    do {
4186
3.25M
get_more_space:
4187
5.02M
        while (*in == 0x20) { in++; ctxt->input->col++; }
4188
3.25M
        if (*in == 0xA) {
4189
3.22M
            do {
4190
3.22M
                ctxt->input->line++; ctxt->input->col = 1;
4191
3.22M
                in++;
4192
3.22M
            } while (*in == 0xA);
4193
756k
            goto get_more_space;
4194
756k
        }
4195
2.50M
        if (*in == '<') {
4196
655k
            nbchar = in - ctxt->input->cur;
4197
655k
            if (nbchar > 0) {
4198
654k
                const xmlChar *tmp = ctxt->input->cur;
4199
654k
                ctxt->input->cur = in;
4200
4201
654k
                if ((ctxt->sax != NULL) &&
4202
654k
                    (ctxt->sax->ignorableWhitespace !=
4203
654k
                     ctxt->sax->characters)) {
4204
0
                    if (areBlanks(ctxt, tmp, nbchar, 1)) {
4205
0
                        if (ctxt->sax->ignorableWhitespace != NULL)
4206
0
                            ctxt->sax->ignorableWhitespace(ctxt->userData,
4207
0
                                                   tmp, nbchar);
4208
0
                    } else {
4209
0
                        if (ctxt->sax->characters != NULL)
4210
0
                            ctxt->sax->characters(ctxt->userData,
4211
0
                                                  tmp, nbchar);
4212
0
                        if (*ctxt->space == -1)
4213
0
                            *ctxt->space = -2;
4214
0
                    }
4215
654k
                } else if ((ctxt->sax != NULL) &&
4216
654k
                           (ctxt->sax->characters != NULL)) {
4217
654k
                    ctxt->sax->characters(ctxt->userData,
4218
654k
                                          tmp, nbchar);
4219
654k
                }
4220
654k
            }
4221
655k
            return;
4222
655k
        }
4223
4224
2.34M
get_more:
4225
2.34M
        ccol = ctxt->input->col;
4226
64.4M
        while (test_char_data[*in]) {
4227
62.0M
            in++;
4228
62.0M
            ccol++;
4229
62.0M
        }
4230
2.34M
        ctxt->input->col = ccol;
4231
2.34M
        if (*in == 0xA) {
4232
11.0M
            do {
4233
11.0M
                ctxt->input->line++; ctxt->input->col = 1;
4234
11.0M
                in++;
4235
11.0M
            } while (*in == 0xA);
4236
286k
            goto get_more;
4237
286k
        }
4238
2.05M
        if (*in == ']') {
4239
219k
            if ((in[1] == ']') && (in[2] == '>')) {
4240
5.95k
                xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4241
5.95k
                if (ctxt->instate != XML_PARSER_EOF)
4242
5.95k
                    ctxt->input->cur = in + 1;
4243
5.95k
                return;
4244
5.95k
            }
4245
214k
            in++;
4246
214k
            ctxt->input->col++;
4247
214k
            goto get_more;
4248
219k
        }
4249
1.83M
        nbchar = in - ctxt->input->cur;
4250
1.83M
        if (nbchar > 0) {
4251
1.37M
            if ((ctxt->sax != NULL) &&
4252
1.37M
                (ctxt->sax->ignorableWhitespace !=
4253
1.37M
                 ctxt->sax->characters) &&
4254
1.37M
                (IS_BLANK_CH(*ctxt->input->cur))) {
4255
0
                const xmlChar *tmp = ctxt->input->cur;
4256
0
                ctxt->input->cur = in;
4257
4258
0
                if (areBlanks(ctxt, tmp, nbchar, 0)) {
4259
0
                    if (ctxt->sax->ignorableWhitespace != NULL)
4260
0
                        ctxt->sax->ignorableWhitespace(ctxt->userData,
4261
0
                                                       tmp, nbchar);
4262
0
                } else {
4263
0
                    if (ctxt->sax->characters != NULL)
4264
0
                        ctxt->sax->characters(ctxt->userData,
4265
0
                                              tmp, nbchar);
4266
0
                    if (*ctxt->space == -1)
4267
0
                        *ctxt->space = -2;
4268
0
                }
4269
0
                line = ctxt->input->line;
4270
0
                col = ctxt->input->col;
4271
1.37M
            } else if (ctxt->sax != NULL) {
4272
1.37M
                if (ctxt->sax->characters != NULL)
4273
1.37M
                    ctxt->sax->characters(ctxt->userData,
4274
1.37M
                                          ctxt->input->cur, nbchar);
4275
1.37M
                line = ctxt->input->line;
4276
1.37M
                col = ctxt->input->col;
4277
1.37M
            }
4278
1.37M
        }
4279
1.83M
        ctxt->input->cur = in;
4280
1.83M
        if (*in == 0xD) {
4281
159k
            in++;
4282
159k
            if (*in == 0xA) {
4283
142k
                ctxt->input->cur = in;
4284
142k
                in++;
4285
142k
                ctxt->input->line++; ctxt->input->col = 1;
4286
142k
                continue; /* while */
4287
142k
            }
4288
16.9k
            in--;
4289
16.9k
        }
4290
1.69M
        if (*in == '<') {
4291
651k
            return;
4292
651k
        }
4293
1.04M
        if (*in == '&') {
4294
62.4k
            return;
4295
62.4k
        }
4296
983k
        SHRINK;
4297
983k
        GROW;
4298
983k
        if (ctxt->instate == XML_PARSER_EOF)
4299
389
            return;
4300
983k
        in = ctxt->input->cur;
4301
1.12M
    } while (((*in >= 0x20) && (*in <= 0x7F)) ||
4302
1.12M
             (*in == 0x09) || (*in == 0x0a));
4303
989k
    ctxt->input->line = line;
4304
989k
    ctxt->input->col = col;
4305
989k
    xmlParseCharDataComplex(ctxt, partial);
4306
989k
}
4307
4308
/**
4309
 * xmlParseCharDataComplex:
4310
 * @ctxt:  an XML parser context
4311
 * @cdata:  int indicating whether we are within a CDATA section
4312
 *
4313
 * Always makes progress if the first char isn't '<' or '&'.
4314
 *
4315
 * parse a CharData section.this is the fallback function
4316
 * of xmlParseCharData() when the parsing requires handling
4317
 * of non-ASCII characters.
4318
 */
4319
static void
4320
989k
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial) {
4321
989k
    xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4322
989k
    int nbchar = 0;
4323
989k
    int cur, l;
4324
4325
989k
    cur = CUR_CHAR(l);
4326
379M
    while ((cur != '<') && /* checked */
4327
379M
           (cur != '&') &&
4328
379M
     (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4329
378M
  if ((cur == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4330
1.62k
      xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4331
1.62k
  }
4332
378M
  COPY_BUF(l,buf,nbchar,cur);
4333
  /* move current position before possible calling of ctxt->sax->characters */
4334
378M
  NEXTL(l);
4335
378M
  if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4336
2.80M
      buf[nbchar] = 0;
4337
4338
      /*
4339
       * OK the segment is to be consumed as chars.
4340
       */
4341
2.80M
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4342
1.19M
    if (areBlanks(ctxt, buf, nbchar, 0)) {
4343
0
        if (ctxt->sax->ignorableWhitespace != NULL)
4344
0
      ctxt->sax->ignorableWhitespace(ctxt->userData,
4345
0
                                     buf, nbchar);
4346
1.19M
    } else {
4347
1.19M
        if (ctxt->sax->characters != NULL)
4348
1.19M
      ctxt->sax->characters(ctxt->userData, buf, nbchar);
4349
1.19M
        if ((ctxt->sax->characters !=
4350
1.19M
             ctxt->sax->ignorableWhitespace) &&
4351
1.19M
      (*ctxt->space == -1))
4352
0
      *ctxt->space = -2;
4353
1.19M
    }
4354
1.19M
      }
4355
2.80M
      nbchar = 0;
4356
            /* something really bad happened in the SAX callback */
4357
2.80M
            if (ctxt->instate != XML_PARSER_CONTENT)
4358
8
                return;
4359
2.80M
            SHRINK;
4360
2.80M
  }
4361
378M
  cur = CUR_CHAR(l);
4362
378M
    }
4363
989k
    if (ctxt->instate == XML_PARSER_EOF)
4364
1.63k
        return;
4365
988k
    if (nbchar != 0) {
4366
820k
        buf[nbchar] = 0;
4367
  /*
4368
   * OK the segment is to be consumed as chars.
4369
   */
4370
820k
  if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4371
617k
      if (areBlanks(ctxt, buf, nbchar, 0)) {
4372
0
    if (ctxt->sax->ignorableWhitespace != NULL)
4373
0
        ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4374
617k
      } else {
4375
617k
    if (ctxt->sax->characters != NULL)
4376
617k
        ctxt->sax->characters(ctxt->userData, buf, nbchar);
4377
617k
    if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4378
617k
        (*ctxt->space == -1))
4379
0
        *ctxt->space = -2;
4380
617k
      }
4381
617k
  }
4382
820k
    }
4383
    /*
4384
     * cur == 0 can mean
4385
     *
4386
     * - XML_PARSER_EOF or memory error. This is checked above.
4387
     * - An actual 0 character.
4388
     * - End of buffer.
4389
     * - An incomplete UTF-8 sequence. This is allowed if partial is set.
4390
     */
4391
988k
    if (ctxt->input->cur < ctxt->input->end) {
4392
978k
        if ((cur == 0) && (CUR != 0)) {
4393
566
            if (partial == 0) {
4394
566
                xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4395
566
                        "Incomplete UTF-8 sequence starting with %02X\n", CUR);
4396
566
                NEXTL(1);
4397
566
            }
4398
978k
        } else if ((cur != '<') && (cur != '&')) {
4399
            /* Generate the error and skip the offending character */
4400
188k
            xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4401
188k
                              "PCDATA invalid Char value %d\n", cur);
4402
188k
            NEXTL(l);
4403
188k
        }
4404
978k
    }
4405
988k
}
4406
4407
/**
4408
 * xmlParseCharData:
4409
 * @ctxt:  an XML parser context
4410
 * @cdata:  unused
4411
 *
4412
 * DEPRECATED: Internal function, don't use.
4413
 */
4414
void
4415
0
xmlParseCharData(xmlParserCtxtPtr ctxt, ATTRIBUTE_UNUSED int cdata) {
4416
0
    xmlParseCharDataInternal(ctxt, 0);
4417
0
}
4418
4419
/**
4420
 * xmlParseExternalID:
4421
 * @ctxt:  an XML parser context
4422
 * @publicID:  a xmlChar** receiving PubidLiteral
4423
 * @strict: indicate whether we should restrict parsing to only
4424
 *          production [75], see NOTE below
4425
 *
4426
 * DEPRECATED: Internal function, don't use.
4427
 *
4428
 * Parse an External ID or a Public ID
4429
 *
4430
 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4431
 *       'PUBLIC' S PubidLiteral S SystemLiteral
4432
 *
4433
 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4434
 *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
4435
 *
4436
 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4437
 *
4438
 * Returns the function returns SystemLiteral and in the second
4439
 *                case publicID receives PubidLiteral, is strict is off
4440
 *                it is possible to return NULL and have publicID set.
4441
 */
4442
4443
xmlChar *
4444
111k
xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4445
111k
    xmlChar *URI = NULL;
4446
4447
111k
    *publicID = NULL;
4448
111k
    if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4449
18.8k
        SKIP(6);
4450
18.8k
  if (SKIP_BLANKS == 0) {
4451
280
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4452
280
                     "Space required after 'SYSTEM'\n");
4453
280
  }
4454
18.8k
  URI = xmlParseSystemLiteral(ctxt);
4455
18.8k
  if (URI == NULL) {
4456
288
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4457
288
        }
4458
93.1k
    } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4459
26.9k
        SKIP(6);
4460
26.9k
  if (SKIP_BLANKS == 0) {
4461
2.49k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4462
2.49k
        "Space required after 'PUBLIC'\n");
4463
2.49k
  }
4464
26.9k
  *publicID = xmlParsePubidLiteral(ctxt);
4465
26.9k
  if (*publicID == NULL) {
4466
2.98k
      xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4467
2.98k
  }
4468
26.9k
  if (strict) {
4469
      /*
4470
       * We don't handle [83] so "S SystemLiteral" is required.
4471
       */
4472
18.7k
      if (SKIP_BLANKS == 0) {
4473
2.02k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4474
2.02k
      "Space required after the Public Identifier\n");
4475
2.02k
      }
4476
18.7k
  } else {
4477
      /*
4478
       * We handle [83] so we return immediately, if
4479
       * "S SystemLiteral" is not detected. We skip blanks if no
4480
             * system literal was found, but this is harmless since we must
4481
             * be at the end of a NotationDecl.
4482
       */
4483
8.18k
      if (SKIP_BLANKS == 0) return(NULL);
4484
4.43k
      if ((CUR != '\'') && (CUR != '"')) return(NULL);
4485
4.43k
  }
4486
20.3k
  URI = xmlParseSystemLiteral(ctxt);
4487
20.3k
  if (URI == NULL) {
4488
1.80k
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4489
1.80k
        }
4490
20.3k
    }
4491
105k
    return(URI);
4492
111k
}
4493
4494
/**
4495
 * xmlParseCommentComplex:
4496
 * @ctxt:  an XML parser context
4497
 * @buf:  the already parsed part of the buffer
4498
 * @len:  number of bytes in the buffer
4499
 * @size:  allocated size of the buffer
4500
 *
4501
 * Skip an XML (SGML) comment <!-- .... -->
4502
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4503
 *  must not occur within comments. "
4504
 * This is the slow routine in case the accelerator for ascii didn't work
4505
 *
4506
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4507
 */
4508
static void
4509
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4510
35.0k
                       size_t len, size_t size) {
4511
35.0k
    int q, ql;
4512
35.0k
    int r, rl;
4513
35.0k
    int cur, l;
4514
35.0k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4515
0
                       XML_MAX_HUGE_LENGTH :
4516
35.0k
                       XML_MAX_TEXT_LENGTH;
4517
35.0k
    int inputid;
4518
4519
35.0k
    inputid = ctxt->input->id;
4520
4521
35.0k
    if (buf == NULL) {
4522
4.75k
        len = 0;
4523
4.75k
  size = XML_PARSER_BUFFER_SIZE;
4524
4.75k
  buf = (xmlChar *) xmlMallocAtomic(size);
4525
4.75k
  if (buf == NULL) {
4526
96
      xmlErrMemory(ctxt, NULL);
4527
96
      return;
4528
96
  }
4529
4.75k
    }
4530
34.9k
    q = CUR_CHAR(ql);
4531
34.9k
    if (q == 0)
4532
1.51k
        goto not_terminated;
4533
33.4k
    if (!IS_CHAR(q)) {
4534
1.43k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4535
1.43k
                          "xmlParseComment: invalid xmlChar value %d\n",
4536
1.43k
                    q);
4537
1.43k
  xmlFree (buf);
4538
1.43k
  return;
4539
1.43k
    }
4540
32.0k
    NEXTL(ql);
4541
32.0k
    r = CUR_CHAR(rl);
4542
32.0k
    if (r == 0)
4543
265
        goto not_terminated;
4544
31.7k
    if (!IS_CHAR(r)) {
4545
8.09k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4546
8.09k
                          "xmlParseComment: invalid xmlChar value %d\n",
4547
8.09k
                    r);
4548
8.09k
  xmlFree (buf);
4549
8.09k
  return;
4550
8.09k
    }
4551
23.6k
    NEXTL(rl);
4552
23.6k
    cur = CUR_CHAR(l);
4553
23.6k
    if (cur == 0)
4554
492
        goto not_terminated;
4555
10.9M
    while (IS_CHAR(cur) && /* checked */
4556
10.9M
           ((cur != '>') ||
4557
10.9M
      (r != '-') || (q != '-'))) {
4558
10.9M
  if ((r == '-') && (q == '-')) {
4559
14.8k
      xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4560
14.8k
  }
4561
10.9M
  if (len + 5 >= size) {
4562
9.04k
      xmlChar *new_buf;
4563
9.04k
            size_t new_size;
4564
4565
9.04k
      new_size = size * 2;
4566
9.04k
      new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4567
9.04k
      if (new_buf == NULL) {
4568
3
    xmlFree (buf);
4569
3
    xmlErrMemory(ctxt, NULL);
4570
3
    return;
4571
3
      }
4572
9.04k
      buf = new_buf;
4573
9.04k
            size = new_size;
4574
9.04k
  }
4575
10.9M
  COPY_BUF(ql,buf,len,q);
4576
10.9M
        if (len > maxLength) {
4577
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4578
0
                         "Comment too big found", NULL);
4579
0
            xmlFree (buf);
4580
0
            return;
4581
0
        }
4582
4583
10.9M
  q = r;
4584
10.9M
  ql = rl;
4585
10.9M
  r = cur;
4586
10.9M
  rl = l;
4587
4588
10.9M
  NEXTL(l);
4589
10.9M
  cur = CUR_CHAR(l);
4590
4591
10.9M
    }
4592
23.1k
    buf[len] = 0;
4593
23.1k
    if (ctxt->instate == XML_PARSER_EOF) {
4594
234
        xmlFree(buf);
4595
234
        return;
4596
234
    }
4597
22.9k
    if (cur == 0) {
4598
1.82k
  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4599
1.82k
                       "Comment not terminated \n<!--%.50s\n", buf);
4600
21.0k
    } else if (!IS_CHAR(cur)) {
4601
6.33k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4602
6.33k
                          "xmlParseComment: invalid xmlChar value %d\n",
4603
6.33k
                    cur);
4604
14.7k
    } else {
4605
14.7k
  if (inputid != ctxt->input->id) {
4606
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4607
0
               "Comment doesn't start and stop in the same"
4608
0
                           " entity\n");
4609
0
  }
4610
14.7k
        NEXT;
4611
14.7k
  if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4612
14.7k
      (!ctxt->disableSAX))
4613
5.97k
      ctxt->sax->comment(ctxt->userData, buf);
4614
14.7k
    }
4615
22.9k
    xmlFree(buf);
4616
22.9k
    return;
4617
2.27k
not_terminated:
4618
2.27k
    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4619
2.27k
       "Comment not terminated\n", NULL);
4620
2.27k
    xmlFree(buf);
4621
2.27k
    return;
4622
23.1k
}
4623
4624
/**
4625
 * xmlParseComment:
4626
 * @ctxt:  an XML parser context
4627
 *
4628
 * DEPRECATED: Internal function, don't use.
4629
 *
4630
 * Parse an XML (SGML) comment. Always consumes '<!'.
4631
 *
4632
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4633
 *  must not occur within comments. "
4634
 *
4635
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4636
 */
4637
void
4638
145k
xmlParseComment(xmlParserCtxtPtr ctxt) {
4639
145k
    xmlChar *buf = NULL;
4640
145k
    size_t size = XML_PARSER_BUFFER_SIZE;
4641
145k
    size_t len = 0;
4642
145k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4643
0
                       XML_MAX_HUGE_LENGTH :
4644
145k
                       XML_MAX_TEXT_LENGTH;
4645
145k
    xmlParserInputState state;
4646
145k
    const xmlChar *in;
4647
145k
    size_t nbchar = 0;
4648
145k
    int ccol;
4649
145k
    int inputid;
4650
4651
    /*
4652
     * Check that there is a comment right here.
4653
     */
4654
145k
    if ((RAW != '<') || (NXT(1) != '!'))
4655
0
        return;
4656
145k
    SKIP(2);
4657
145k
    if ((RAW != '-') || (NXT(1) != '-'))
4658
225
        return;
4659
145k
    state = ctxt->instate;
4660
145k
    ctxt->instate = XML_PARSER_COMMENT;
4661
145k
    inputid = ctxt->input->id;
4662
145k
    SKIP(2);
4663
145k
    GROW;
4664
4665
    /*
4666
     * Accelerated common case where input don't need to be
4667
     * modified before passing it to the handler.
4668
     */
4669
145k
    in = ctxt->input->cur;
4670
146k
    do {
4671
146k
  if (*in == 0xA) {
4672
3.33k
      do {
4673
3.33k
    ctxt->input->line++; ctxt->input->col = 1;
4674
3.33k
    in++;
4675
3.33k
      } while (*in == 0xA);
4676
3.03k
  }
4677
200k
get_more:
4678
200k
        ccol = ctxt->input->col;
4679
3.80M
  while (((*in > '-') && (*in <= 0x7F)) ||
4680
3.80M
         ((*in >= 0x20) && (*in < '-')) ||
4681
3.80M
         (*in == 0x09)) {
4682
3.60M
        in++;
4683
3.60M
        ccol++;
4684
3.60M
  }
4685
200k
  ctxt->input->col = ccol;
4686
200k
  if (*in == 0xA) {
4687
21.7k
      do {
4688
21.7k
    ctxt->input->line++; ctxt->input->col = 1;
4689
21.7k
    in++;
4690
21.7k
      } while (*in == 0xA);
4691
7.53k
      goto get_more;
4692
7.53k
  }
4693
192k
  nbchar = in - ctxt->input->cur;
4694
  /*
4695
   * save current set of data
4696
   */
4697
192k
  if (nbchar > 0) {
4698
183k
      if ((ctxt->sax != NULL) &&
4699
183k
    (ctxt->sax->comment != NULL)) {
4700
183k
    if (buf == NULL) {
4701
139k
        if ((*in == '-') && (in[1] == '-'))
4702
105k
            size = nbchar + 1;
4703
33.5k
        else
4704
33.5k
            size = XML_PARSER_BUFFER_SIZE + nbchar;
4705
139k
        buf = (xmlChar *) xmlMallocAtomic(size);
4706
139k
        if (buf == NULL) {
4707
281
            xmlErrMemory(ctxt, NULL);
4708
281
      ctxt->instate = state;
4709
281
      return;
4710
281
        }
4711
138k
        len = 0;
4712
138k
    } else if (len + nbchar + 1 >= size) {
4713
3.79k
        xmlChar *new_buf;
4714
3.79k
        size  += len + nbchar + XML_PARSER_BUFFER_SIZE;
4715
3.79k
        new_buf = (xmlChar *) xmlRealloc(buf, size);
4716
3.79k
        if (new_buf == NULL) {
4717
1
            xmlFree (buf);
4718
1
      xmlErrMemory(ctxt, NULL);
4719
1
      ctxt->instate = state;
4720
1
      return;
4721
1
        }
4722
3.79k
        buf = new_buf;
4723
3.79k
    }
4724
183k
    memcpy(&buf[len], ctxt->input->cur, nbchar);
4725
183k
    len += nbchar;
4726
183k
    buf[len] = 0;
4727
183k
      }
4728
183k
  }
4729
192k
        if (len > maxLength) {
4730
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4731
0
                         "Comment too big found", NULL);
4732
0
            xmlFree (buf);
4733
0
            return;
4734
0
        }
4735
192k
  ctxt->input->cur = in;
4736
192k
  if (*in == 0xA) {
4737
0
      in++;
4738
0
      ctxt->input->line++; ctxt->input->col = 1;
4739
0
  }
4740
192k
  if (*in == 0xD) {
4741
2.82k
      in++;
4742
2.82k
      if (*in == 0xA) {
4743
561
    ctxt->input->cur = in;
4744
561
    in++;
4745
561
    ctxt->input->line++; ctxt->input->col = 1;
4746
561
    goto get_more;
4747
561
      }
4748
2.26k
      in--;
4749
2.26k
  }
4750
191k
  SHRINK;
4751
191k
  GROW;
4752
191k
        if (ctxt->instate == XML_PARSER_EOF) {
4753
236
            xmlFree(buf);
4754
236
            return;
4755
236
        }
4756
191k
  in = ctxt->input->cur;
4757
191k
  if (*in == '-') {
4758
154k
      if (in[1] == '-') {
4759
139k
          if (in[2] == '>') {
4760
109k
        if (ctxt->input->id != inputid) {
4761
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4762
0
                     "comment doesn't start and stop in the"
4763
0
                                       " same entity\n");
4764
0
        }
4765
109k
        SKIP(3);
4766
109k
        if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4767
109k
            (!ctxt->disableSAX)) {
4768
68.9k
      if (buf != NULL)
4769
68.6k
          ctxt->sax->comment(ctxt->userData, buf);
4770
306
      else
4771
306
          ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4772
68.9k
        }
4773
109k
        if (buf != NULL)
4774
108k
            xmlFree(buf);
4775
109k
        if (ctxt->instate != XML_PARSER_EOF)
4776
109k
      ctxt->instate = state;
4777
109k
        return;
4778
109k
    }
4779
30.0k
    if (buf != NULL) {
4780
28.9k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
4781
28.9k
                          "Double hyphen within comment: "
4782
28.9k
                                      "<!--%.50s\n",
4783
28.9k
              buf);
4784
28.9k
    } else
4785
1.12k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
4786
1.12k
                          "Double hyphen within comment\n", NULL);
4787
30.0k
                if (ctxt->instate == XML_PARSER_EOF) {
4788
0
                    xmlFree(buf);
4789
0
                    return;
4790
0
                }
4791
30.0k
    in++;
4792
30.0k
    ctxt->input->col++;
4793
30.0k
      }
4794
45.3k
      in++;
4795
45.3k
      ctxt->input->col++;
4796
45.3k
      goto get_more;
4797
154k
  }
4798
191k
    } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
4799
35.0k
    xmlParseCommentComplex(ctxt, buf, len, size);
4800
35.0k
    ctxt->instate = state;
4801
35.0k
    return;
4802
145k
}
4803
4804
4805
/**
4806
 * xmlParsePITarget:
4807
 * @ctxt:  an XML parser context
4808
 *
4809
 * DEPRECATED: Internal function, don't use.
4810
 *
4811
 * parse the name of a PI
4812
 *
4813
 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4814
 *
4815
 * Returns the PITarget name or NULL
4816
 */
4817
4818
const xmlChar *
4819
125k
xmlParsePITarget(xmlParserCtxtPtr ctxt) {
4820
125k
    const xmlChar *name;
4821
4822
125k
    name = xmlParseName(ctxt);
4823
125k
    if ((name != NULL) &&
4824
125k
        ((name[0] == 'x') || (name[0] == 'X')) &&
4825
125k
        ((name[1] == 'm') || (name[1] == 'M')) &&
4826
125k
        ((name[2] == 'l') || (name[2] == 'L'))) {
4827
14.5k
  int i;
4828
14.5k
  if ((name[0] == 'x') && (name[1] == 'm') &&
4829
14.5k
      (name[2] == 'l') && (name[3] == 0)) {
4830
3.76k
      xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4831
3.76k
     "XML declaration allowed only at the start of the document\n");
4832
3.76k
      return(name);
4833
10.7k
  } else if (name[3] == 0) {
4834
1.99k
      xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
4835
1.99k
      return(name);
4836
1.99k
  }
4837
16.9k
  for (i = 0;;i++) {
4838
16.9k
      if (xmlW3CPIs[i] == NULL) break;
4839
12.9k
      if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4840
4.74k
          return(name);
4841
12.9k
  }
4842
4.01k
  xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4843
4.01k
          "xmlParsePITarget: invalid name prefix 'xml'\n",
4844
4.01k
          NULL, NULL);
4845
4.01k
    }
4846
114k
    if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
4847
5.12k
  xmlNsErr(ctxt, XML_NS_ERR_COLON,
4848
5.12k
     "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
4849
5.12k
    }
4850
114k
    return(name);
4851
125k
}
4852
4853
#ifdef LIBXML_CATALOG_ENABLED
4854
/**
4855
 * xmlParseCatalogPI:
4856
 * @ctxt:  an XML parser context
4857
 * @catalog:  the PI value string
4858
 *
4859
 * parse an XML Catalog Processing Instruction.
4860
 *
4861
 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4862
 *
4863
 * Occurs only if allowed by the user and if happening in the Misc
4864
 * part of the document before any doctype information
4865
 * This will add the given catalog to the parsing context in order
4866
 * to be used if there is a resolution need further down in the document
4867
 */
4868
4869
static void
4870
5.11k
xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4871
5.11k
    xmlChar *URL = NULL;
4872
5.11k
    const xmlChar *tmp, *base;
4873
5.11k
    xmlChar marker;
4874
4875
5.11k
    tmp = catalog;
4876
5.11k
    while (IS_BLANK_CH(*tmp)) tmp++;
4877
5.11k
    if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4878
1.07k
  goto error;
4879
4.04k
    tmp += 7;
4880
4.04k
    while (IS_BLANK_CH(*tmp)) tmp++;
4881
4.04k
    if (*tmp != '=') {
4882
641
  return;
4883
641
    }
4884
3.40k
    tmp++;
4885
3.40k
    while (IS_BLANK_CH(*tmp)) tmp++;
4886
3.40k
    marker = *tmp;
4887
3.40k
    if ((marker != '\'') && (marker != '"'))
4888
683
  goto error;
4889
2.72k
    tmp++;
4890
2.72k
    base = tmp;
4891
33.2k
    while ((*tmp != 0) && (*tmp != marker)) tmp++;
4892
2.72k
    if (*tmp == 0)
4893
235
  goto error;
4894
2.48k
    URL = xmlStrndup(base, tmp - base);
4895
2.48k
    tmp++;
4896
2.48k
    while (IS_BLANK_CH(*tmp)) tmp++;
4897
2.48k
    if (*tmp != 0)
4898
1.30k
  goto error;
4899
4900
1.18k
    if (URL != NULL) {
4901
1.18k
  ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4902
1.18k
  xmlFree(URL);
4903
1.18k
    }
4904
1.18k
    return;
4905
4906
3.29k
error:
4907
3.29k
    xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4908
3.29k
            "Catalog PI syntax error: %s\n",
4909
3.29k
      catalog, NULL);
4910
3.29k
    if (URL != NULL)
4911
1.30k
  xmlFree(URL);
4912
3.29k
}
4913
#endif
4914
4915
/**
4916
 * xmlParsePI:
4917
 * @ctxt:  an XML parser context
4918
 *
4919
 * DEPRECATED: Internal function, don't use.
4920
 *
4921
 * parse an XML Processing Instruction.
4922
 *
4923
 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4924
 *
4925
 * The processing is transferred to SAX once parsed.
4926
 */
4927
4928
void
4929
125k
xmlParsePI(xmlParserCtxtPtr ctxt) {
4930
125k
    xmlChar *buf = NULL;
4931
125k
    size_t len = 0;
4932
125k
    size_t size = XML_PARSER_BUFFER_SIZE;
4933
125k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4934
0
                       XML_MAX_HUGE_LENGTH :
4935
125k
                       XML_MAX_TEXT_LENGTH;
4936
125k
    int cur, l;
4937
125k
    const xmlChar *target;
4938
125k
    xmlParserInputState state;
4939
4940
125k
    if ((RAW == '<') && (NXT(1) == '?')) {
4941
125k
  int inputid = ctxt->input->id;
4942
125k
  state = ctxt->instate;
4943
125k
        ctxt->instate = XML_PARSER_PI;
4944
  /*
4945
   * this is a Processing Instruction.
4946
   */
4947
125k
  SKIP(2);
4948
4949
  /*
4950
   * Parse the target name and check for special support like
4951
   * namespace.
4952
   */
4953
125k
        target = xmlParsePITarget(ctxt);
4954
125k
  if (target != NULL) {
4955
122k
      if ((RAW == '?') && (NXT(1) == '>')) {
4956
54.9k
    if (inputid != ctxt->input->id) {
4957
0
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4958
0
                             "PI declaration doesn't start and stop in"
4959
0
                                   " the same entity\n");
4960
0
    }
4961
54.9k
    SKIP(2);
4962
4963
    /*
4964
     * SAX: PI detected.
4965
     */
4966
54.9k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
4967
54.9k
        (ctxt->sax->processingInstruction != NULL))
4968
20.0k
        ctxt->sax->processingInstruction(ctxt->userData,
4969
20.0k
                                         target, NULL);
4970
54.9k
    if (ctxt->instate != XML_PARSER_EOF)
4971
54.9k
        ctxt->instate = state;
4972
54.9k
    return;
4973
54.9k
      }
4974
67.9k
      buf = (xmlChar *) xmlMallocAtomic(size);
4975
67.9k
      if (buf == NULL) {
4976
325
    xmlErrMemory(ctxt, NULL);
4977
325
    ctxt->instate = state;
4978
325
    return;
4979
325
      }
4980
67.5k
      if (SKIP_BLANKS == 0) {
4981
18.3k
    xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4982
18.3k
        "ParsePI: PI %s space expected\n", target);
4983
18.3k
      }
4984
67.5k
      cur = CUR_CHAR(l);
4985
52.4M
      while (IS_CHAR(cur) && /* checked */
4986
52.4M
       ((cur != '?') || (NXT(1) != '>'))) {
4987
52.3M
    if (len + 5 >= size) {
4988
10.0k
        xmlChar *tmp;
4989
10.0k
                    size_t new_size = size * 2;
4990
10.0k
        tmp = (xmlChar *) xmlRealloc(buf, new_size);
4991
10.0k
        if (tmp == NULL) {
4992
2
      xmlErrMemory(ctxt, NULL);
4993
2
      xmlFree(buf);
4994
2
      ctxt->instate = state;
4995
2
      return;
4996
2
        }
4997
10.0k
        buf = tmp;
4998
10.0k
                    size = new_size;
4999
10.0k
    }
5000
52.3M
    COPY_BUF(l,buf,len,cur);
5001
52.3M
                if (len > maxLength) {
5002
2
                    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5003
2
                                      "PI %s too big found", target);
5004
2
                    xmlFree(buf);
5005
2
                    ctxt->instate = state;
5006
2
                    return;
5007
2
                }
5008
52.3M
    NEXTL(l);
5009
52.3M
    cur = CUR_CHAR(l);
5010
52.3M
      }
5011
67.5k
      buf[len] = 0;
5012
67.5k
            if (ctxt->instate == XML_PARSER_EOF) {
5013
295
                xmlFree(buf);
5014
295
                return;
5015
295
            }
5016
67.3k
      if (cur != '?') {
5017
14.0k
    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5018
14.0k
          "ParsePI: PI %s never end ...\n", target);
5019
53.2k
      } else {
5020
53.2k
    if (inputid != ctxt->input->id) {
5021
293
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5022
293
                             "PI declaration doesn't start and stop in"
5023
293
                                   " the same entity\n");
5024
293
    }
5025
53.2k
    SKIP(2);
5026
5027
53.2k
#ifdef LIBXML_CATALOG_ENABLED
5028
53.2k
    if (((state == XML_PARSER_MISC) ||
5029
53.2k
               (state == XML_PARSER_START)) &&
5030
53.2k
        (xmlStrEqual(target, XML_CATALOG_PI))) {
5031
5.11k
        xmlCatalogAllow allow = xmlCatalogGetDefaults();
5032
5.11k
        if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5033
5.11k
      (allow == XML_CATA_ALLOW_ALL))
5034
5.11k
      xmlParseCatalogPI(ctxt, buf);
5035
5.11k
    }
5036
53.2k
#endif
5037
5038
5039
    /*
5040
     * SAX: PI detected.
5041
     */
5042
53.2k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5043
53.2k
        (ctxt->sax->processingInstruction != NULL))
5044
26.8k
        ctxt->sax->processingInstruction(ctxt->userData,
5045
26.8k
                                         target, buf);
5046
53.2k
      }
5047
67.3k
      xmlFree(buf);
5048
67.3k
  } else {
5049
2.38k
      xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5050
2.38k
  }
5051
69.6k
  if (ctxt->instate != XML_PARSER_EOF)
5052
69.4k
      ctxt->instate = state;
5053
69.6k
    }
5054
125k
}
5055
5056
/**
5057
 * xmlParseNotationDecl:
5058
 * @ctxt:  an XML parser context
5059
 *
5060
 * DEPRECATED: Internal function, don't use.
5061
 *
5062
 * Parse a notation declaration. Always consumes '<!'.
5063
 *
5064
 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>'
5065
 *
5066
 * Hence there is actually 3 choices:
5067
 *     'PUBLIC' S PubidLiteral
5068
 *     'PUBLIC' S PubidLiteral S SystemLiteral
5069
 * and 'SYSTEM' S SystemLiteral
5070
 *
5071
 * See the NOTE on xmlParseExternalID().
5072
 */
5073
5074
void
5075
13.0k
xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5076
13.0k
    const xmlChar *name;
5077
13.0k
    xmlChar *Pubid;
5078
13.0k
    xmlChar *Systemid;
5079
5080
13.0k
    if ((CUR != '<') || (NXT(1) != '!'))
5081
0
        return;
5082
13.0k
    SKIP(2);
5083
5084
13.0k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5085
11.2k
  int inputid = ctxt->input->id;
5086
11.2k
  SKIP(8);
5087
11.2k
  if (SKIP_BLANKS == 0) {
5088
299
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5089
299
         "Space required after '<!NOTATION'\n");
5090
299
      return;
5091
299
  }
5092
5093
10.9k
        name = xmlParseName(ctxt);
5094
10.9k
  if (name == NULL) {
5095
281
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5096
281
      return;
5097
281
  }
5098
10.6k
  if (xmlStrchr(name, ':') != NULL) {
5099
3.17k
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5100
3.17k
         "colons are forbidden from notation names '%s'\n",
5101
3.17k
         name, NULL, NULL);
5102
3.17k
  }
5103
10.6k
  if (SKIP_BLANKS == 0) {
5104
383
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5105
383
         "Space required after the NOTATION name'\n");
5106
383
      return;
5107
383
  }
5108
5109
  /*
5110
   * Parse the IDs.
5111
   */
5112
10.2k
  Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5113
10.2k
  SKIP_BLANKS;
5114
5115
10.2k
  if (RAW == '>') {
5116
7.08k
      if (inputid != ctxt->input->id) {
5117
2.17k
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5118
2.17k
                         "Notation declaration doesn't start and stop"
5119
2.17k
                               " in the same entity\n");
5120
2.17k
      }
5121
7.08k
      NEXT;
5122
7.08k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5123
7.08k
    (ctxt->sax->notationDecl != NULL))
5124
4.54k
    ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5125
7.08k
  } else {
5126
3.15k
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5127
3.15k
  }
5128
10.2k
  if (Systemid != NULL) xmlFree(Systemid);
5129
10.2k
  if (Pubid != NULL) xmlFree(Pubid);
5130
10.2k
    }
5131
13.0k
}
5132
5133
/**
5134
 * xmlParseEntityDecl:
5135
 * @ctxt:  an XML parser context
5136
 *
5137
 * DEPRECATED: Internal function, don't use.
5138
 *
5139
 * Parse an entity declaration. Always consumes '<!'.
5140
 *
5141
 * [70] EntityDecl ::= GEDecl | PEDecl
5142
 *
5143
 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5144
 *
5145
 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5146
 *
5147
 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5148
 *
5149
 * [74] PEDef ::= EntityValue | ExternalID
5150
 *
5151
 * [76] NDataDecl ::= S 'NDATA' S Name
5152
 *
5153
 * [ VC: Notation Declared ]
5154
 * The Name must match the declared name of a notation.
5155
 */
5156
5157
void
5158
93.2k
xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5159
93.2k
    const xmlChar *name = NULL;
5160
93.2k
    xmlChar *value = NULL;
5161
93.2k
    xmlChar *URI = NULL, *literal = NULL;
5162
93.2k
    const xmlChar *ndata = NULL;
5163
93.2k
    int isParameter = 0;
5164
93.2k
    xmlChar *orig = NULL;
5165
5166
93.2k
    if ((CUR != '<') || (NXT(1) != '!'))
5167
0
        return;
5168
93.2k
    SKIP(2);
5169
5170
    /* GROW; done in the caller */
5171
93.2k
    if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5172
92.3k
  int inputid = ctxt->input->id;
5173
92.3k
  SKIP(6);
5174
92.3k
  if (SKIP_BLANKS == 0) {
5175
6.94k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5176
6.94k
         "Space required after '<!ENTITY'\n");
5177
6.94k
  }
5178
5179
92.3k
  if (RAW == '%') {
5180
20.0k
      NEXT;
5181
20.0k
      if (SKIP_BLANKS == 0) {
5182
4.50k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5183
4.50k
             "Space required after '%%'\n");
5184
4.50k
      }
5185
20.0k
      isParameter = 1;
5186
20.0k
  }
5187
5188
92.3k
        name = xmlParseName(ctxt);
5189
92.3k
  if (name == NULL) {
5190
1.67k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5191
1.67k
                     "xmlParseEntityDecl: no name\n");
5192
1.67k
            return;
5193
1.67k
  }
5194
90.6k
  if (xmlStrchr(name, ':') != NULL) {
5195
14.5k
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5196
14.5k
         "colons are forbidden from entities names '%s'\n",
5197
14.5k
         name, NULL, NULL);
5198
14.5k
  }
5199
90.6k
  if (SKIP_BLANKS == 0) {
5200
9.36k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5201
9.36k
         "Space required after the entity name\n");
5202
9.36k
  }
5203
5204
90.6k
  ctxt->instate = XML_PARSER_ENTITY_DECL;
5205
  /*
5206
   * handle the various case of definitions...
5207
   */
5208
90.6k
  if (isParameter) {
5209
19.8k
      if ((RAW == '"') || (RAW == '\'')) {
5210
9.25k
          value = xmlParseEntityValue(ctxt, &orig);
5211
9.25k
    if (value) {
5212
7.68k
        if ((ctxt->sax != NULL) &&
5213
7.68k
      (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5214
5.20k
      ctxt->sax->entityDecl(ctxt->userData, name,
5215
5.20k
                        XML_INTERNAL_PARAMETER_ENTITY,
5216
5.20k
            NULL, NULL, value);
5217
7.68k
    }
5218
10.6k
      } else {
5219
10.6k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5220
10.6k
    if ((URI == NULL) && (literal == NULL)) {
5221
1.26k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5222
1.26k
    }
5223
10.6k
    if (URI) {
5224
9.08k
        xmlURIPtr uri;
5225
5226
9.08k
        uri = xmlParseURI((const char *) URI);
5227
9.08k
        if (uri == NULL) {
5228
3.36k
            xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5229
3.36k
             "Invalid URI: %s\n", URI);
5230
      /*
5231
       * This really ought to be a well formedness error
5232
       * but the XML Core WG decided otherwise c.f. issue
5233
       * E26 of the XML erratas.
5234
       */
5235
5.71k
        } else {
5236
5.71k
      if (uri->fragment != NULL) {
5237
          /*
5238
           * Okay this is foolish to block those but not
5239
           * invalid URIs.
5240
           */
5241
1.15k
          xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5242
4.56k
      } else {
5243
4.56k
          if ((ctxt->sax != NULL) &&
5244
4.56k
        (!ctxt->disableSAX) &&
5245
4.56k
        (ctxt->sax->entityDecl != NULL))
5246
3.60k
        ctxt->sax->entityDecl(ctxt->userData, name,
5247
3.60k
              XML_EXTERNAL_PARAMETER_ENTITY,
5248
3.60k
              literal, URI, NULL);
5249
4.56k
      }
5250
5.71k
      xmlFreeURI(uri);
5251
5.71k
        }
5252
9.08k
    }
5253
10.6k
      }
5254
70.7k
  } else {
5255
70.7k
      if ((RAW == '"') || (RAW == '\'')) {
5256
56.6k
          value = xmlParseEntityValue(ctxt, &orig);
5257
56.6k
    if ((ctxt->sax != NULL) &&
5258
56.6k
        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5259
38.1k
        ctxt->sax->entityDecl(ctxt->userData, name,
5260
38.1k
        XML_INTERNAL_GENERAL_ENTITY,
5261
38.1k
        NULL, NULL, value);
5262
    /*
5263
     * For expat compatibility in SAX mode.
5264
     */
5265
56.6k
    if ((ctxt->myDoc == NULL) ||
5266
56.6k
        (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5267
13.6k
        if (ctxt->myDoc == NULL) {
5268
3.93k
      ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5269
3.93k
      if (ctxt->myDoc == NULL) {
5270
7
          xmlErrMemory(ctxt, "New Doc failed");
5271
7
          goto done;
5272
7
      }
5273
3.93k
      ctxt->myDoc->properties = XML_DOC_INTERNAL;
5274
3.93k
        }
5275
13.6k
        if (ctxt->myDoc->intSubset == NULL)
5276
3.93k
      ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5277
3.93k
              BAD_CAST "fake", NULL, NULL);
5278
5279
13.6k
        xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5280
13.6k
                    NULL, NULL, value);
5281
13.6k
    }
5282
56.6k
      } else {
5283
14.0k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5284
14.0k
    if ((URI == NULL) && (literal == NULL)) {
5285
1.42k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5286
1.42k
    }
5287
14.0k
    if (URI) {
5288
12.2k
        xmlURIPtr uri;
5289
5290
12.2k
        uri = xmlParseURI((const char *)URI);
5291
12.2k
        if (uri == NULL) {
5292
1.24k
            xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5293
1.24k
             "Invalid URI: %s\n", URI);
5294
      /*
5295
       * This really ought to be a well formedness error
5296
       * but the XML Core WG decided otherwise c.f. issue
5297
       * E26 of the XML erratas.
5298
       */
5299
10.9k
        } else {
5300
10.9k
      if (uri->fragment != NULL) {
5301
          /*
5302
           * Okay this is foolish to block those but not
5303
           * invalid URIs.
5304
           */
5305
67
          xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5306
67
      }
5307
10.9k
      xmlFreeURI(uri);
5308
10.9k
        }
5309
12.2k
    }
5310
14.0k
    if ((RAW != '>') && (SKIP_BLANKS == 0)) {
5311
2.08k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5312
2.08k
           "Space required before 'NDATA'\n");
5313
2.08k
    }
5314
14.0k
    if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5315
3.69k
        SKIP(5);
5316
3.69k
        if (SKIP_BLANKS == 0) {
5317
281
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5318
281
               "Space required after 'NDATA'\n");
5319
281
        }
5320
3.69k
        ndata = xmlParseName(ctxt);
5321
3.69k
        if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5322
3.69k
            (ctxt->sax->unparsedEntityDecl != NULL))
5323
2.51k
      ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5324
2.51k
            literal, URI, ndata);
5325
10.3k
    } else {
5326
10.3k
        if ((ctxt->sax != NULL) &&
5327
10.3k
            (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5328
6.89k
      ctxt->sax->entityDecl(ctxt->userData, name,
5329
6.89k
            XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5330
6.89k
            literal, URI, NULL);
5331
        /*
5332
         * For expat compatibility in SAX mode.
5333
         * assuming the entity replacement was asked for
5334
         */
5335
10.3k
        if ((ctxt->replaceEntities != 0) &&
5336
10.3k
      ((ctxt->myDoc == NULL) ||
5337
10.2k
      (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5338
1.38k
      if (ctxt->myDoc == NULL) {
5339
1.05k
          ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5340
1.05k
          if (ctxt->myDoc == NULL) {
5341
4
              xmlErrMemory(ctxt, "New Doc failed");
5342
4
        goto done;
5343
4
          }
5344
1.05k
          ctxt->myDoc->properties = XML_DOC_INTERNAL;
5345
1.05k
      }
5346
5347
1.38k
      if (ctxt->myDoc->intSubset == NULL)
5348
1.05k
          ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5349
1.05k
            BAD_CAST "fake", NULL, NULL);
5350
1.38k
      xmlSAX2EntityDecl(ctxt, name,
5351
1.38k
                  XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5352
1.38k
                  literal, URI, NULL);
5353
1.38k
        }
5354
10.3k
    }
5355
14.0k
      }
5356
70.7k
  }
5357
90.6k
  if (ctxt->instate == XML_PARSER_EOF)
5358
922
      goto done;
5359
89.6k
  SKIP_BLANKS;
5360
89.6k
  if (RAW != '>') {
5361
7.59k
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5362
7.59k
              "xmlParseEntityDecl: entity %s not terminated\n", name);
5363
7.59k
      xmlHaltParser(ctxt);
5364
82.0k
  } else {
5365
82.0k
      if (inputid != ctxt->input->id) {
5366
233
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5367
233
                         "Entity declaration doesn't start and stop in"
5368
233
                               " the same entity\n");
5369
233
      }
5370
82.0k
      NEXT;
5371
82.0k
  }
5372
89.6k
  if (orig != NULL) {
5373
      /*
5374
       * Ugly mechanism to save the raw entity value.
5375
       */
5376
59.3k
      xmlEntityPtr cur = NULL;
5377
5378
59.3k
      if (isParameter) {
5379
7.76k
          if ((ctxt->sax != NULL) &&
5380
7.76k
        (ctxt->sax->getParameterEntity != NULL))
5381
7.76k
        cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5382
51.5k
      } else {
5383
51.5k
          if ((ctxt->sax != NULL) &&
5384
51.5k
        (ctxt->sax->getEntity != NULL))
5385
51.5k
        cur = ctxt->sax->getEntity(ctxt->userData, name);
5386
51.5k
    if ((cur == NULL) && (ctxt->userData==ctxt)) {
5387
2.55k
        cur = xmlSAX2GetEntity(ctxt, name);
5388
2.55k
    }
5389
51.5k
      }
5390
59.3k
            if ((cur != NULL) && (cur->orig == NULL)) {
5391
27.7k
    cur->orig = orig;
5392
27.7k
                orig = NULL;
5393
27.7k
      }
5394
59.3k
  }
5395
5396
90.6k
done:
5397
90.6k
  if (value != NULL) xmlFree(value);
5398
90.6k
  if (URI != NULL) xmlFree(URI);
5399
90.6k
  if (literal != NULL) xmlFree(literal);
5400
90.6k
        if (orig != NULL) xmlFree(orig);
5401
90.6k
    }
5402
93.2k
}
5403
5404
/**
5405
 * xmlParseDefaultDecl:
5406
 * @ctxt:  an XML parser context
5407
 * @value:  Receive a possible fixed default value for the attribute
5408
 *
5409
 * DEPRECATED: Internal function, don't use.
5410
 *
5411
 * Parse an attribute default declaration
5412
 *
5413
 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5414
 *
5415
 * [ VC: Required Attribute ]
5416
 * if the default declaration is the keyword #REQUIRED, then the
5417
 * attribute must be specified for all elements of the type in the
5418
 * attribute-list declaration.
5419
 *
5420
 * [ VC: Attribute Default Legal ]
5421
 * The declared default value must meet the lexical constraints of
5422
 * the declared attribute type c.f. xmlValidateAttributeDecl()
5423
 *
5424
 * [ VC: Fixed Attribute Default ]
5425
 * if an attribute has a default value declared with the #FIXED
5426
 * keyword, instances of that attribute must match the default value.
5427
 *
5428
 * [ WFC: No < in Attribute Values ]
5429
 * handled in xmlParseAttValue()
5430
 *
5431
 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5432
 *          or XML_ATTRIBUTE_FIXED.
5433
 */
5434
5435
int
5436
91.2k
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5437
91.2k
    int val;
5438
91.2k
    xmlChar *ret;
5439
5440
91.2k
    *value = NULL;
5441
91.2k
    if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5442
258
  SKIP(9);
5443
258
  return(XML_ATTRIBUTE_REQUIRED);
5444
258
    }
5445
90.9k
    if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5446
478
  SKIP(8);
5447
478
  return(XML_ATTRIBUTE_IMPLIED);
5448
478
    }
5449
90.4k
    val = XML_ATTRIBUTE_NONE;
5450
90.4k
    if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5451
1.65k
  SKIP(6);
5452
1.65k
  val = XML_ATTRIBUTE_FIXED;
5453
1.65k
  if (SKIP_BLANKS == 0) {
5454
234
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5455
234
         "Space required after '#FIXED'\n");
5456
234
  }
5457
1.65k
    }
5458
90.4k
    ret = xmlParseAttValue(ctxt);
5459
90.4k
    ctxt->instate = XML_PARSER_DTD;
5460
90.4k
    if (ret == NULL) {
5461
7.04k
  xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5462
7.04k
           "Attribute default value declaration error\n");
5463
7.04k
    } else
5464
83.4k
        *value = ret;
5465
90.4k
    return(val);
5466
90.9k
}
5467
5468
/**
5469
 * xmlParseNotationType:
5470
 * @ctxt:  an XML parser context
5471
 *
5472
 * DEPRECATED: Internal function, don't use.
5473
 *
5474
 * parse an Notation attribute type.
5475
 *
5476
 * Note: the leading 'NOTATION' S part has already being parsed...
5477
 *
5478
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5479
 *
5480
 * [ VC: Notation Attributes ]
5481
 * Values of this type must match one of the notation names included
5482
 * in the declaration; all notation names in the declaration must be declared.
5483
 *
5484
 * Returns: the notation attribute tree built while parsing
5485
 */
5486
5487
xmlEnumerationPtr
5488
2.41k
xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5489
2.41k
    const xmlChar *name;
5490
2.41k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5491
5492
2.41k
    if (RAW != '(') {
5493
512
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5494
512
  return(NULL);
5495
512
    }
5496
3.75k
    do {
5497
3.75k
        NEXT;
5498
3.75k
  SKIP_BLANKS;
5499
3.75k
        name = xmlParseName(ctxt);
5500
3.75k
  if (name == NULL) {
5501
627
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5502
627
         "Name expected in NOTATION declaration\n");
5503
627
            xmlFreeEnumeration(ret);
5504
627
      return(NULL);
5505
627
  }
5506
3.12k
  tmp = ret;
5507
6.89k
  while (tmp != NULL) {
5508
4.39k
      if (xmlStrEqual(name, tmp->name)) {
5509
630
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5510
630
    "standalone: attribute notation value token %s duplicated\n",
5511
630
         name, NULL);
5512
630
    if (!xmlDictOwns(ctxt->dict, name))
5513
0
        xmlFree((xmlChar *) name);
5514
630
    break;
5515
630
      }
5516
3.76k
      tmp = tmp->next;
5517
3.76k
  }
5518
3.12k
  if (tmp == NULL) {
5519
2.49k
      cur = xmlCreateEnumeration(name);
5520
2.49k
      if (cur == NULL) {
5521
4
                xmlFreeEnumeration(ret);
5522
4
                return(NULL);
5523
4
            }
5524
2.49k
      if (last == NULL) ret = last = cur;
5525
876
      else {
5526
876
    last->next = cur;
5527
876
    last = cur;
5528
876
      }
5529
2.49k
  }
5530
3.12k
  SKIP_BLANKS;
5531
3.12k
    } while (RAW == '|');
5532
1.27k
    if (RAW != ')') {
5533
883
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5534
883
        xmlFreeEnumeration(ret);
5535
883
  return(NULL);
5536
883
    }
5537
390
    NEXT;
5538
390
    return(ret);
5539
1.27k
}
5540
5541
/**
5542
 * xmlParseEnumerationType:
5543
 * @ctxt:  an XML parser context
5544
 *
5545
 * DEPRECATED: Internal function, don't use.
5546
 *
5547
 * parse an Enumeration attribute type.
5548
 *
5549
 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5550
 *
5551
 * [ VC: Enumeration ]
5552
 * Values of this type must match one of the Nmtoken tokens in
5553
 * the declaration
5554
 *
5555
 * Returns: the enumeration attribute tree built while parsing
5556
 */
5557
5558
xmlEnumerationPtr
5559
16.2k
xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5560
16.2k
    xmlChar *name;
5561
16.2k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5562
5563
16.2k
    if (RAW != '(') {
5564
9.29k
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5565
9.29k
  return(NULL);
5566
9.29k
    }
5567
11.8k
    do {
5568
11.8k
        NEXT;
5569
11.8k
  SKIP_BLANKS;
5570
11.8k
        name = xmlParseNmtoken(ctxt);
5571
11.8k
  if (name == NULL) {
5572
1.29k
      xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5573
1.29k
      return(ret);
5574
1.29k
  }
5575
10.5k
  tmp = ret;
5576
16.2k
  while (tmp != NULL) {
5577
7.11k
      if (xmlStrEqual(name, tmp->name)) {
5578
1.44k
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5579
1.44k
    "standalone: attribute enumeration value token %s duplicated\n",
5580
1.44k
         name, NULL);
5581
1.44k
    if (!xmlDictOwns(ctxt->dict, name))
5582
1.44k
        xmlFree(name);
5583
1.44k
    break;
5584
1.44k
      }
5585
5.66k
      tmp = tmp->next;
5586
5.66k
  }
5587
10.5k
  if (tmp == NULL) {
5588
9.10k
      cur = xmlCreateEnumeration(name);
5589
9.10k
      if (!xmlDictOwns(ctxt->dict, name))
5590
9.10k
    xmlFree(name);
5591
9.10k
      if (cur == NULL) {
5592
2
                xmlFreeEnumeration(ret);
5593
2
                return(NULL);
5594
2
            }
5595
9.10k
      if (last == NULL) ret = last = cur;
5596
3.46k
      else {
5597
3.46k
    last->next = cur;
5598
3.46k
    last = cur;
5599
3.46k
      }
5600
9.10k
  }
5601
10.5k
  SKIP_BLANKS;
5602
10.5k
    } while (RAW == '|');
5603
5.62k
    if (RAW != ')') {
5604
1.44k
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5605
1.44k
  return(ret);
5606
1.44k
    }
5607
4.17k
    NEXT;
5608
4.17k
    return(ret);
5609
5.62k
}
5610
5611
/**
5612
 * xmlParseEnumeratedType:
5613
 * @ctxt:  an XML parser context
5614
 * @tree:  the enumeration tree built while parsing
5615
 *
5616
 * DEPRECATED: Internal function, don't use.
5617
 *
5618
 * parse an Enumerated attribute type.
5619
 *
5620
 * [57] EnumeratedType ::= NotationType | Enumeration
5621
 *
5622
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5623
 *
5624
 *
5625
 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5626
 */
5627
5628
int
5629
18.9k
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5630
18.9k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5631
2.69k
  SKIP(8);
5632
2.69k
  if (SKIP_BLANKS == 0) {
5633
275
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5634
275
         "Space required after 'NOTATION'\n");
5635
275
      return(0);
5636
275
  }
5637
2.41k
  *tree = xmlParseNotationType(ctxt);
5638
2.41k
  if (*tree == NULL) return(0);
5639
390
  return(XML_ATTRIBUTE_NOTATION);
5640
2.41k
    }
5641
16.2k
    *tree = xmlParseEnumerationType(ctxt);
5642
16.2k
    if (*tree == NULL) return(0);
5643
5.64k
    return(XML_ATTRIBUTE_ENUMERATION);
5644
16.2k
}
5645
5646
/**
5647
 * xmlParseAttributeType:
5648
 * @ctxt:  an XML parser context
5649
 * @tree:  the enumeration tree built while parsing
5650
 *
5651
 * DEPRECATED: Internal function, don't use.
5652
 *
5653
 * parse the Attribute list def for an element
5654
 *
5655
 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5656
 *
5657
 * [55] StringType ::= 'CDATA'
5658
 *
5659
 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5660
 *                        'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5661
 *
5662
 * Validity constraints for attribute values syntax are checked in
5663
 * xmlValidateAttributeValue()
5664
 *
5665
 * [ VC: ID ]
5666
 * Values of type ID must match the Name production. A name must not
5667
 * appear more than once in an XML document as a value of this type;
5668
 * i.e., ID values must uniquely identify the elements which bear them.
5669
 *
5670
 * [ VC: One ID per Element Type ]
5671
 * No element type may have more than one ID attribute specified.
5672
 *
5673
 * [ VC: ID Attribute Default ]
5674
 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5675
 *
5676
 * [ VC: IDREF ]
5677
 * Values of type IDREF must match the Name production, and values
5678
 * of type IDREFS must match Names; each IDREF Name must match the value
5679
 * of an ID attribute on some element in the XML document; i.e. IDREF
5680
 * values must match the value of some ID attribute.
5681
 *
5682
 * [ VC: Entity Name ]
5683
 * Values of type ENTITY must match the Name production, values
5684
 * of type ENTITIES must match Names; each Entity Name must match the
5685
 * name of an unparsed entity declared in the DTD.
5686
 *
5687
 * [ VC: Name Token ]
5688
 * Values of type NMTOKEN must match the Nmtoken production; values
5689
 * of type NMTOKENS must match Nmtokens.
5690
 *
5691
 * Returns the attribute type
5692
 */
5693
int
5694
108k
xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5695
108k
    if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5696
25.5k
  SKIP(5);
5697
25.5k
  return(XML_ATTRIBUTE_CDATA);
5698
82.5k
     } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5699
4.89k
  SKIP(6);
5700
4.89k
  return(XML_ATTRIBUTE_IDREFS);
5701
77.7k
     } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5702
10.6k
  SKIP(5);
5703
10.6k
  return(XML_ATTRIBUTE_IDREF);
5704
67.0k
     } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5705
45.4k
        SKIP(2);
5706
45.4k
  return(XML_ATTRIBUTE_ID);
5707
45.4k
     } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5708
397
  SKIP(6);
5709
397
  return(XML_ATTRIBUTE_ENTITY);
5710
21.2k
     } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5711
806
  SKIP(8);
5712
806
  return(XML_ATTRIBUTE_ENTITIES);
5713
20.4k
     } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5714
536
  SKIP(8);
5715
536
  return(XML_ATTRIBUTE_NMTOKENS);
5716
19.8k
     } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5717
956
  SKIP(7);
5718
956
  return(XML_ATTRIBUTE_NMTOKEN);
5719
956
     }
5720
18.9k
     return(xmlParseEnumeratedType(ctxt, tree));
5721
108k
}
5722
5723
/**
5724
 * xmlParseAttributeListDecl:
5725
 * @ctxt:  an XML parser context
5726
 *
5727
 * DEPRECATED: Internal function, don't use.
5728
 *
5729
 * Parse an attribute list declaration for an element. Always consumes '<!'.
5730
 *
5731
 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5732
 *
5733
 * [53] AttDef ::= S Name S AttType S DefaultDecl
5734
 *
5735
 */
5736
void
5737
56.6k
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
5738
56.6k
    const xmlChar *elemName;
5739
56.6k
    const xmlChar *attrName;
5740
56.6k
    xmlEnumerationPtr tree;
5741
5742
56.6k
    if ((CUR != '<') || (NXT(1) != '!'))
5743
0
        return;
5744
56.6k
    SKIP(2);
5745
5746
56.6k
    if (CMP7(CUR_PTR, 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5747
55.0k
  int inputid = ctxt->input->id;
5748
5749
55.0k
  SKIP(7);
5750
55.0k
  if (SKIP_BLANKS == 0) {
5751
11.1k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5752
11.1k
                     "Space required after '<!ATTLIST'\n");
5753
11.1k
  }
5754
55.0k
        elemName = xmlParseName(ctxt);
5755
55.0k
  if (elemName == NULL) {
5756
1.33k
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5757
1.33k
         "ATTLIST: no name for Element\n");
5758
1.33k
      return;
5759
1.33k
  }
5760
53.6k
  SKIP_BLANKS;
5761
53.6k
  GROW;
5762
132k
  while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
5763
117k
      int type;
5764
117k
      int def;
5765
117k
      xmlChar *defaultValue = NULL;
5766
5767
117k
      GROW;
5768
117k
            tree = NULL;
5769
117k
      attrName = xmlParseName(ctxt);
5770
117k
      if (attrName == NULL) {
5771
5.91k
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5772
5.91k
             "ATTLIST: no name for Attribute\n");
5773
5.91k
    break;
5774
5.91k
      }
5775
111k
      GROW;
5776
111k
      if (SKIP_BLANKS == 0) {
5777
2.94k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5778
2.94k
            "Space required after the attribute name\n");
5779
2.94k
    break;
5780
2.94k
      }
5781
5782
108k
      type = xmlParseAttributeType(ctxt, &tree);
5783
108k
      if (type <= 0) {
5784
12.8k
          break;
5785
12.8k
      }
5786
5787
95.2k
      GROW;
5788
95.2k
      if (SKIP_BLANKS == 0) {
5789
4.05k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5790
4.05k
             "Space required after the attribute type\n");
5791
4.05k
          if (tree != NULL)
5792
1.46k
        xmlFreeEnumeration(tree);
5793
4.05k
    break;
5794
4.05k
      }
5795
5796
91.2k
      def = xmlParseDefaultDecl(ctxt, &defaultValue);
5797
91.2k
      if (def <= 0) {
5798
0
                if (defaultValue != NULL)
5799
0
        xmlFree(defaultValue);
5800
0
          if (tree != NULL)
5801
0
        xmlFreeEnumeration(tree);
5802
0
          break;
5803
0
      }
5804
91.2k
      if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
5805
58.2k
          xmlAttrNormalizeSpace(defaultValue, defaultValue);
5806
5807
91.2k
      GROW;
5808
91.2k
            if (RAW != '>') {
5809
75.3k
    if (SKIP_BLANKS == 0) {
5810
11.9k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5811
11.9k
      "Space required after the attribute default value\n");
5812
11.9k
        if (defaultValue != NULL)
5813
5.15k
      xmlFree(defaultValue);
5814
11.9k
        if (tree != NULL)
5815
1.74k
      xmlFreeEnumeration(tree);
5816
11.9k
        break;
5817
11.9k
    }
5818
75.3k
      }
5819
79.2k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5820
79.2k
    (ctxt->sax->attributeDecl != NULL))
5821
39.0k
    ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
5822
39.0k
                          type, def, defaultValue, tree);
5823
40.2k
      else if (tree != NULL)
5824
389
    xmlFreeEnumeration(tree);
5825
5826
79.2k
      if ((ctxt->sax2) && (defaultValue != NULL) &&
5827
79.2k
          (def != XML_ATTRIBUTE_IMPLIED) &&
5828
79.2k
    (def != XML_ATTRIBUTE_REQUIRED)) {
5829
78.2k
    xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
5830
78.2k
      }
5831
79.2k
      if (ctxt->sax2) {
5832
79.2k
    xmlAddSpecialAttr(ctxt, elemName, attrName, type);
5833
79.2k
      }
5834
79.2k
      if (defaultValue != NULL)
5835
78.2k
          xmlFree(defaultValue);
5836
79.2k
      GROW;
5837
79.2k
  }
5838
53.6k
  if (RAW == '>') {
5839
16.5k
      if (inputid != ctxt->input->id) {
5840
133
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5841
133
                               "Attribute list declaration doesn't start and"
5842
133
                               " stop in the same entity\n");
5843
133
      }
5844
16.5k
      NEXT;
5845
16.5k
  }
5846
53.6k
    }
5847
56.6k
}
5848
5849
/**
5850
 * xmlParseElementMixedContentDecl:
5851
 * @ctxt:  an XML parser context
5852
 * @inputchk:  the input used for the current entity, needed for boundary checks
5853
 *
5854
 * DEPRECATED: Internal function, don't use.
5855
 *
5856
 * parse the declaration for a Mixed Element content
5857
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5858
 *
5859
 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5860
 *                '(' S? '#PCDATA' S? ')'
5861
 *
5862
 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5863
 *
5864
 * [ VC: No Duplicate Types ]
5865
 * The same name must not appear more than once in a single
5866
 * mixed-content declaration.
5867
 *
5868
 * returns: the list of the xmlElementContentPtr describing the element choices
5869
 */
5870
xmlElementContentPtr
5871
4.50k
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
5872
4.50k
    xmlElementContentPtr ret = NULL, cur = NULL, n;
5873
4.50k
    const xmlChar *elem = NULL;
5874
5875
4.50k
    GROW;
5876
4.50k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
5877
4.50k
  SKIP(7);
5878
4.50k
  SKIP_BLANKS;
5879
4.50k
  if (RAW == ')') {
5880
2.33k
      if (ctxt->input->id != inputchk) {
5881
83
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5882
83
                               "Element content declaration doesn't start and"
5883
83
                               " stop in the same entity\n");
5884
83
      }
5885
2.33k
      NEXT;
5886
2.33k
      ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
5887
2.33k
      if (ret == NULL)
5888
5
          return(NULL);
5889
2.33k
      if (RAW == '*') {
5890
11
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
5891
11
    NEXT;
5892
11
      }
5893
2.33k
      return(ret);
5894
2.33k
  }
5895
2.16k
  if ((RAW == '(') || (RAW == '|')) {
5896
946
      ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
5897
946
      if (ret == NULL) return(NULL);
5898
946
  }
5899
3.41k
  while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
5900
1.50k
      NEXT;
5901
1.50k
      if (elem == NULL) {
5902
726
          ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
5903
726
    if (ret == NULL) {
5904
1
        xmlFreeDocElementContent(ctxt->myDoc, cur);
5905
1
                    return(NULL);
5906
1
                }
5907
725
    ret->c1 = cur;
5908
725
    if (cur != NULL)
5909
725
        cur->parent = ret;
5910
725
    cur = ret;
5911
775
      } else {
5912
775
          n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
5913
775
    if (n == NULL) {
5914
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
5915
0
                    return(NULL);
5916
0
                }
5917
775
    n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
5918
775
    if (n->c1 != NULL)
5919
775
        n->c1->parent = n;
5920
775
          cur->c2 = n;
5921
775
    if (n != NULL)
5922
775
        n->parent = cur;
5923
775
    cur = n;
5924
775
      }
5925
1.50k
      SKIP_BLANKS;
5926
1.50k
      elem = xmlParseName(ctxt);
5927
1.50k
      if (elem == NULL) {
5928
249
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5929
249
      "xmlParseElementMixedContentDecl : Name expected\n");
5930
249
    xmlFreeDocElementContent(ctxt->myDoc, ret);
5931
249
    return(NULL);
5932
249
      }
5933
1.25k
      SKIP_BLANKS;
5934
1.25k
      GROW;
5935
1.25k
  }
5936
1.91k
  if ((RAW == ')') && (NXT(1) == '*')) {
5937
220
      if (elem != NULL) {
5938
220
    cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
5939
220
                                   XML_ELEMENT_CONTENT_ELEMENT);
5940
220
    if (cur->c2 != NULL)
5941
220
        cur->c2->parent = cur;
5942
220
            }
5943
220
            if (ret != NULL)
5944
220
                ret->ocur = XML_ELEMENT_CONTENT_MULT;
5945
220
      if (ctxt->input->id != inputchk) {
5946
13
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5947
13
                               "Element content declaration doesn't start and"
5948
13
                               " stop in the same entity\n");
5949
13
      }
5950
220
      SKIP(2);
5951
1.69k
  } else {
5952
1.69k
      xmlFreeDocElementContent(ctxt->myDoc, ret);
5953
1.69k
      xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
5954
1.69k
      return(NULL);
5955
1.69k
  }
5956
5957
1.91k
    } else {
5958
0
  xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
5959
0
    }
5960
220
    return(ret);
5961
4.50k
}
5962
5963
/**
5964
 * xmlParseElementChildrenContentDeclPriv:
5965
 * @ctxt:  an XML parser context
5966
 * @inputchk:  the input used for the current entity, needed for boundary checks
5967
 * @depth: the level of recursion
5968
 *
5969
 * parse the declaration for a Mixed Element content
5970
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5971
 *
5972
 *
5973
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5974
 *
5975
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5976
 *
5977
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5978
 *
5979
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5980
 *
5981
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5982
 * TODO Parameter-entity replacement text must be properly nested
5983
 *  with parenthesized groups. That is to say, if either of the
5984
 *  opening or closing parentheses in a choice, seq, or Mixed
5985
 *  construct is contained in the replacement text for a parameter
5986
 *  entity, both must be contained in the same replacement text. For
5987
 *  interoperability, if a parameter-entity reference appears in a
5988
 *  choice, seq, or Mixed construct, its replacement text should not
5989
 *  be empty, and neither the first nor last non-blank character of
5990
 *  the replacement text should be a connector (| or ,).
5991
 *
5992
 * Returns the tree of xmlElementContentPtr describing the element
5993
 *          hierarchy.
5994
 */
5995
static xmlElementContentPtr
5996
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
5997
92.7k
                                       int depth) {
5998
92.7k
    xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
5999
92.7k
    const xmlChar *elem;
6000
92.7k
    xmlChar type = 0;
6001
6002
92.7k
    if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6003
92.7k
        (depth >  2048)) {
6004
219
        xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6005
219
"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6006
219
                          depth);
6007
219
  return(NULL);
6008
219
    }
6009
92.5k
    SKIP_BLANKS;
6010
92.5k
    GROW;
6011
92.5k
    if (RAW == '(') {
6012
55.5k
  int inputid = ctxt->input->id;
6013
6014
        /* Recurse on first child */
6015
55.5k
  NEXT;
6016
55.5k
  SKIP_BLANKS;
6017
55.5k
        cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6018
55.5k
                                                           depth + 1);
6019
55.5k
        if (cur == NULL)
6020
34.0k
            return(NULL);
6021
21.5k
  SKIP_BLANKS;
6022
21.5k
  GROW;
6023
36.9k
    } else {
6024
36.9k
  elem = xmlParseName(ctxt);
6025
36.9k
  if (elem == NULL) {
6026
2.64k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6027
2.64k
      return(NULL);
6028
2.64k
  }
6029
34.3k
        cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6030
34.3k
  if (cur == NULL) {
6031
10
      xmlErrMemory(ctxt, NULL);
6032
10
      return(NULL);
6033
10
  }
6034
34.3k
  GROW;
6035
34.3k
  if (RAW == '?') {
6036
3.54k
      cur->ocur = XML_ELEMENT_CONTENT_OPT;
6037
3.54k
      NEXT;
6038
30.7k
  } else if (RAW == '*') {
6039
3.77k
      cur->ocur = XML_ELEMENT_CONTENT_MULT;
6040
3.77k
      NEXT;
6041
26.9k
  } else if (RAW == '+') {
6042
4.75k
      cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6043
4.75k
      NEXT;
6044
22.2k
  } else {
6045
22.2k
      cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6046
22.2k
  }
6047
34.3k
  GROW;
6048
34.3k
    }
6049
55.8k
    SKIP_BLANKS;
6050
89.9k
    while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6051
        /*
6052
   * Each loop we parse one separator and one element.
6053
   */
6054
47.4k
        if (RAW == ',') {
6055
3.28k
      if (type == 0) type = CUR;
6056
6057
      /*
6058
       * Detect "Name | Name , Name" error
6059
       */
6060
765
      else if (type != CUR) {
6061
315
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6062
315
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6063
315
                      type);
6064
315
    if ((last != NULL) && (last != ret))
6065
315
        xmlFreeDocElementContent(ctxt->myDoc, last);
6066
315
    if (ret != NULL)
6067
315
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6068
315
    return(NULL);
6069
315
      }
6070
2.96k
      NEXT;
6071
6072
2.96k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6073
2.96k
      if (op == NULL) {
6074
5
    if ((last != NULL) && (last != ret))
6075
1
        xmlFreeDocElementContent(ctxt->myDoc, last);
6076
5
          xmlFreeDocElementContent(ctxt->myDoc, ret);
6077
5
    return(NULL);
6078
5
      }
6079
2.96k
      if (last == NULL) {
6080
2.51k
    op->c1 = ret;
6081
2.51k
    if (ret != NULL)
6082
2.51k
        ret->parent = op;
6083
2.51k
    ret = cur = op;
6084
2.51k
      } else {
6085
449
          cur->c2 = op;
6086
449
    if (op != NULL)
6087
449
        op->parent = cur;
6088
449
    op->c1 = last;
6089
449
    if (last != NULL)
6090
449
        last->parent = op;
6091
449
    cur =op;
6092
449
    last = NULL;
6093
449
      }
6094
44.1k
  } else if (RAW == '|') {
6095
41.6k
      if (type == 0) type = CUR;
6096
6097
      /*
6098
       * Detect "Name , Name | Name" error
6099
       */
6100
16.9k
      else if (type != CUR) {
6101
204
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6102
204
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6103
204
          type);
6104
204
    if ((last != NULL) && (last != ret))
6105
204
        xmlFreeDocElementContent(ctxt->myDoc, last);
6106
204
    if (ret != NULL)
6107
204
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6108
204
    return(NULL);
6109
204
      }
6110
41.4k
      NEXT;
6111
6112
41.4k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6113
41.4k
      if (op == NULL) {
6114
5
    if ((last != NULL) && (last != ret))
6115
3
        xmlFreeDocElementContent(ctxt->myDoc, last);
6116
5
    if (ret != NULL)
6117
5
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6118
5
    return(NULL);
6119
5
      }
6120
41.4k
      if (last == NULL) {
6121
24.7k
    op->c1 = ret;
6122
24.7k
    if (ret != NULL)
6123
24.7k
        ret->parent = op;
6124
24.7k
    ret = cur = op;
6125
24.7k
      } else {
6126
16.7k
          cur->c2 = op;
6127
16.7k
    if (op != NULL)
6128
16.7k
        op->parent = cur;
6129
16.7k
    op->c1 = last;
6130
16.7k
    if (last != NULL)
6131
16.7k
        last->parent = op;
6132
16.7k
    cur =op;
6133
16.7k
    last = NULL;
6134
16.7k
      }
6135
41.4k
  } else {
6136
2.50k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6137
2.50k
      if ((last != NULL) && (last != ret))
6138
1.10k
          xmlFreeDocElementContent(ctxt->myDoc, last);
6139
2.50k
      if (ret != NULL)
6140
2.50k
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6141
2.50k
      return(NULL);
6142
2.50k
  }
6143
44.3k
  GROW;
6144
44.3k
  SKIP_BLANKS;
6145
44.3k
  GROW;
6146
44.3k
  if (RAW == '(') {
6147
26.9k
      int inputid = ctxt->input->id;
6148
      /* Recurse on second child */
6149
26.9k
      NEXT;
6150
26.9k
      SKIP_BLANKS;
6151
26.9k
      last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6152
26.9k
                                                          depth + 1);
6153
26.9k
            if (last == NULL) {
6154
9.82k
    if (ret != NULL)
6155
9.82k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6156
9.82k
    return(NULL);
6157
9.82k
            }
6158
17.1k
      SKIP_BLANKS;
6159
17.4k
  } else {
6160
17.4k
      elem = xmlParseName(ctxt);
6161
17.4k
      if (elem == NULL) {
6162
407
    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6163
407
    if (ret != NULL)
6164
407
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6165
407
    return(NULL);
6166
407
      }
6167
17.0k
      last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6168
17.0k
      if (last == NULL) {
6169
3
    if (ret != NULL)
6170
3
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6171
3
    return(NULL);
6172
3
      }
6173
17.0k
      if (RAW == '?') {
6174
8.67k
    last->ocur = XML_ELEMENT_CONTENT_OPT;
6175
8.67k
    NEXT;
6176
8.67k
      } else if (RAW == '*') {
6177
1.62k
    last->ocur = XML_ELEMENT_CONTENT_MULT;
6178
1.62k
    NEXT;
6179
6.72k
      } else if (RAW == '+') {
6180
240
    last->ocur = XML_ELEMENT_CONTENT_PLUS;
6181
240
    NEXT;
6182
6.48k
      } else {
6183
6.48k
    last->ocur = XML_ELEMENT_CONTENT_ONCE;
6184
6.48k
      }
6185
17.0k
  }
6186
34.1k
  SKIP_BLANKS;
6187
34.1k
  GROW;
6188
34.1k
    }
6189
42.5k
    if ((cur != NULL) && (last != NULL)) {
6190
15.3k
        cur->c2 = last;
6191
15.3k
  if (last != NULL)
6192
15.3k
      last->parent = cur;
6193
15.3k
    }
6194
42.5k
    if (ctxt->input->id != inputchk) {
6195
77
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6196
77
                       "Element content declaration doesn't start and stop in"
6197
77
                       " the same entity\n");
6198
77
    }
6199
42.5k
    NEXT;
6200
42.5k
    if (RAW == '?') {
6201
2.18k
  if (ret != NULL) {
6202
2.18k
      if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6203
2.18k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6204
1.35k
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6205
833
      else
6206
833
          ret->ocur = XML_ELEMENT_CONTENT_OPT;
6207
2.18k
  }
6208
2.18k
  NEXT;
6209
40.3k
    } else if (RAW == '*') {
6210
4.36k
  if (ret != NULL) {
6211
4.36k
      ret->ocur = XML_ELEMENT_CONTENT_MULT;
6212
4.36k
      cur = ret;
6213
      /*
6214
       * Some normalization:
6215
       * (a | b* | c?)* == (a | b | c)*
6216
       */
6217
6.50k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6218
2.13k
    if ((cur->c1 != NULL) &&
6219
2.13k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6220
2.13k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6221
1.17k
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6222
2.13k
    if ((cur->c2 != NULL) &&
6223
2.13k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6224
2.13k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6225
1.05k
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6226
2.13k
    cur = cur->c2;
6227
2.13k
      }
6228
4.36k
  }
6229
4.36k
  NEXT;
6230
36.0k
    } else if (RAW == '+') {
6231
19.2k
  if (ret != NULL) {
6232
19.2k
      int found = 0;
6233
6234
19.2k
      if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6235
19.2k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6236
4.55k
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6237
14.6k
      else
6238
14.6k
          ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6239
      /*
6240
       * Some normalization:
6241
       * (a | b*)+ == (a | b)*
6242
       * (a | b?)+ == (a | b)*
6243
       */
6244
46.5k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6245
27.3k
    if ((cur->c1 != NULL) &&
6246
27.3k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6247
27.3k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6248
7.98k
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6249
7.98k
        found = 1;
6250
7.98k
    }
6251
27.3k
    if ((cur->c2 != NULL) &&
6252
27.3k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6253
27.3k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6254
11.0k
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6255
11.0k
        found = 1;
6256
11.0k
    }
6257
27.3k
    cur = cur->c2;
6258
27.3k
      }
6259
19.2k
      if (found)
6260
11.7k
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6261
19.2k
  }
6262
19.2k
  NEXT;
6263
19.2k
    }
6264
42.5k
    return(ret);
6265
55.8k
}
6266
6267
/**
6268
 * xmlParseElementChildrenContentDecl:
6269
 * @ctxt:  an XML parser context
6270
 * @inputchk:  the input used for the current entity, needed for boundary checks
6271
 *
6272
 * DEPRECATED: Internal function, don't use.
6273
 *
6274
 * parse the declaration for a Mixed Element content
6275
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6276
 *
6277
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6278
 *
6279
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6280
 *
6281
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6282
 *
6283
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6284
 *
6285
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6286
 * TODO Parameter-entity replacement text must be properly nested
6287
 *  with parenthesized groups. That is to say, if either of the
6288
 *  opening or closing parentheses in a choice, seq, or Mixed
6289
 *  construct is contained in the replacement text for a parameter
6290
 *  entity, both must be contained in the same replacement text. For
6291
 *  interoperability, if a parameter-entity reference appears in a
6292
 *  choice, seq, or Mixed construct, its replacement text should not
6293
 *  be empty, and neither the first nor last non-blank character of
6294
 *  the replacement text should be a connector (| or ,).
6295
 *
6296
 * Returns the tree of xmlElementContentPtr describing the element
6297
 *          hierarchy.
6298
 */
6299
xmlElementContentPtr
6300
0
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6301
    /* stub left for API/ABI compat */
6302
0
    return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6303
0
}
6304
6305
/**
6306
 * xmlParseElementContentDecl:
6307
 * @ctxt:  an XML parser context
6308
 * @name:  the name of the element being defined.
6309
 * @result:  the Element Content pointer will be stored here if any
6310
 *
6311
 * DEPRECATED: Internal function, don't use.
6312
 *
6313
 * parse the declaration for an Element content either Mixed or Children,
6314
 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6315
 *
6316
 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6317
 *
6318
 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6319
 */
6320
6321
int
6322
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6323
15.0k
                           xmlElementContentPtr *result) {
6324
6325
15.0k
    xmlElementContentPtr tree = NULL;
6326
15.0k
    int inputid = ctxt->input->id;
6327
15.0k
    int res;
6328
6329
15.0k
    *result = NULL;
6330
6331
15.0k
    if (RAW != '(') {
6332
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6333
0
    "xmlParseElementContentDecl : %s '(' expected\n", name);
6334
0
  return(-1);
6335
0
    }
6336
15.0k
    NEXT;
6337
15.0k
    GROW;
6338
15.0k
    if (ctxt->instate == XML_PARSER_EOF)
6339
336
        return(-1);
6340
14.7k
    SKIP_BLANKS;
6341
14.7k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6342
4.50k
        tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6343
4.50k
  res = XML_ELEMENT_TYPE_MIXED;
6344
10.2k
    } else {
6345
10.2k
        tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6346
10.2k
  res = XML_ELEMENT_TYPE_ELEMENT;
6347
10.2k
    }
6348
14.7k
    SKIP_BLANKS;
6349
14.7k
    *result = tree;
6350
14.7k
    return(res);
6351
15.0k
}
6352
6353
/**
6354
 * xmlParseElementDecl:
6355
 * @ctxt:  an XML parser context
6356
 *
6357
 * DEPRECATED: Internal function, don't use.
6358
 *
6359
 * Parse an element declaration. Always consumes '<!'.
6360
 *
6361
 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6362
 *
6363
 * [ VC: Unique Element Type Declaration ]
6364
 * No element type may be declared more than once
6365
 *
6366
 * Returns the type of the element, or -1 in case of error
6367
 */
6368
int
6369
22.5k
xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6370
22.5k
    const xmlChar *name;
6371
22.5k
    int ret = -1;
6372
22.5k
    xmlElementContentPtr content  = NULL;
6373
6374
22.5k
    if ((CUR != '<') || (NXT(1) != '!'))
6375
0
        return(ret);
6376
22.5k
    SKIP(2);
6377
6378
    /* GROW; done in the caller */
6379
22.5k
    if (CMP7(CUR_PTR, 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6380
21.3k
  int inputid = ctxt->input->id;
6381
6382
21.3k
  SKIP(7);
6383
21.3k
  if (SKIP_BLANKS == 0) {
6384
239
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6385
239
               "Space required after 'ELEMENT'\n");
6386
239
      return(-1);
6387
239
  }
6388
21.1k
        name = xmlParseName(ctxt);
6389
21.1k
  if (name == NULL) {
6390
247
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6391
247
         "xmlParseElementDecl: no name for Element\n");
6392
247
      return(-1);
6393
247
  }
6394
20.8k
  if (SKIP_BLANKS == 0) {
6395
4.34k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6396
4.34k
         "Space required after the element name\n");
6397
4.34k
  }
6398
20.8k
  if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6399
2.42k
      SKIP(5);
6400
      /*
6401
       * Element must always be empty.
6402
       */
6403
2.42k
      ret = XML_ELEMENT_TYPE_EMPTY;
6404
18.4k
  } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6405
18.4k
             (NXT(2) == 'Y')) {
6406
1.34k
      SKIP(3);
6407
      /*
6408
       * Element is a generic container.
6409
       */
6410
1.34k
      ret = XML_ELEMENT_TYPE_ANY;
6411
17.0k
  } else if (RAW == '(') {
6412
15.0k
      ret = xmlParseElementContentDecl(ctxt, name, &content);
6413
15.0k
  } else {
6414
      /*
6415
       * [ WFC: PEs in Internal Subset ] error handling.
6416
       */
6417
2.03k
      if ((RAW == '%') && (ctxt->external == 0) &&
6418
2.03k
          (ctxt->inputNr == 1)) {
6419
216
    xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6420
216
    "PEReference: forbidden within markup decl in internal subset\n");
6421
1.81k
      } else {
6422
1.81k
    xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6423
1.81k
          "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6424
1.81k
            }
6425
2.03k
      return(-1);
6426
2.03k
  }
6427
6428
18.8k
  SKIP_BLANKS;
6429
6430
18.8k
  if (RAW != '>') {
6431
8.57k
      xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6432
8.57k
      if (content != NULL) {
6433
524
    xmlFreeDocElementContent(ctxt->myDoc, content);
6434
524
      }
6435
10.2k
  } else {
6436
10.2k
      if (inputid != ctxt->input->id) {
6437
580
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6438
580
                               "Element declaration doesn't start and stop in"
6439
580
                               " the same entity\n");
6440
580
      }
6441
6442
10.2k
      NEXT;
6443
10.2k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6444
10.2k
    (ctxt->sax->elementDecl != NULL)) {
6445
5.84k
    if (content != NULL)
6446
3.16k
        content->parent = NULL;
6447
5.84k
          ctxt->sax->elementDecl(ctxt->userData, name, ret,
6448
5.84k
                           content);
6449
5.84k
    if ((content != NULL) && (content->parent == NULL)) {
6450
        /*
6451
         * this is a trick: if xmlAddElementDecl is called,
6452
         * instead of copying the full tree it is plugged directly
6453
         * if called from the parser. Avoid duplicating the
6454
         * interfaces or change the API/ABI
6455
         */
6456
1.04k
        xmlFreeDocElementContent(ctxt->myDoc, content);
6457
1.04k
    }
6458
5.84k
      } else if (content != NULL) {
6459
2.76k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6460
2.76k
      }
6461
10.2k
  }
6462
18.8k
    }
6463
20.0k
    return(ret);
6464
22.5k
}
6465
6466
/**
6467
 * xmlParseConditionalSections
6468
 * @ctxt:  an XML parser context
6469
 *
6470
 * Parse a conditional section. Always consumes '<!['.
6471
 *
6472
 * [61] conditionalSect ::= includeSect | ignoreSect
6473
 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6474
 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6475
 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6476
 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6477
 */
6478
6479
static void
6480
3.95k
xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6481
3.95k
    int *inputIds = NULL;
6482
3.95k
    size_t inputIdsSize = 0;
6483
3.95k
    size_t depth = 0;
6484
6485
14.2k
    while (ctxt->instate != XML_PARSER_EOF) {
6486
14.1k
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6487
5.82k
            int id = ctxt->input->id;
6488
6489
5.82k
            SKIP(3);
6490
5.82k
            SKIP_BLANKS;
6491
6492
5.82k
            if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6493
2.92k
                SKIP(7);
6494
2.92k
                SKIP_BLANKS;
6495
2.92k
                if (RAW != '[') {
6496
251
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6497
251
                    xmlHaltParser(ctxt);
6498
251
                    goto error;
6499
251
                }
6500
2.67k
                if (ctxt->input->id != id) {
6501
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6502
0
                                   "All markup of the conditional section is"
6503
0
                                   " not in the same entity\n");
6504
0
                }
6505
2.67k
                NEXT;
6506
6507
2.67k
                if (inputIdsSize <= depth) {
6508
1.34k
                    int *tmp;
6509
6510
1.34k
                    inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2);
6511
1.34k
                    tmp = (int *) xmlRealloc(inputIds,
6512
1.34k
                            inputIdsSize * sizeof(int));
6513
1.34k
                    if (tmp == NULL) {
6514
3
                        xmlErrMemory(ctxt, NULL);
6515
3
                        goto error;
6516
3
                    }
6517
1.33k
                    inputIds = tmp;
6518
1.33k
                }
6519
2.67k
                inputIds[depth] = id;
6520
2.67k
                depth++;
6521
2.89k
            } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6522
850
                size_t ignoreDepth = 0;
6523
6524
850
                SKIP(6);
6525
850
                SKIP_BLANKS;
6526
850
                if (RAW != '[') {
6527
4
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6528
4
                    xmlHaltParser(ctxt);
6529
4
                    goto error;
6530
4
                }
6531
846
                if (ctxt->input->id != id) {
6532
33
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6533
33
                                   "All markup of the conditional section is"
6534
33
                                   " not in the same entity\n");
6535
33
                }
6536
846
                NEXT;
6537
6538
26.0k
                while (RAW != 0) {
6539
25.4k
                    if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6540
742
                        SKIP(3);
6541
742
                        ignoreDepth++;
6542
                        /* Check for integer overflow */
6543
742
                        if (ignoreDepth == 0) {
6544
0
                            xmlErrMemory(ctxt, NULL);
6545
0
                            goto error;
6546
0
                        }
6547
24.6k
                    } else if ((RAW == ']') && (NXT(1) == ']') &&
6548
24.6k
                               (NXT(2) == '>')) {
6549
689
                        if (ignoreDepth == 0)
6550
241
                            break;
6551
448
                        SKIP(3);
6552
448
                        ignoreDepth--;
6553
24.0k
                    } else {
6554
24.0k
                        NEXT;
6555
24.0k
                    }
6556
25.4k
                }
6557
6558
846
    if (RAW == 0) {
6559
605
        xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6560
605
                    goto error;
6561
605
    }
6562
241
                if (ctxt->input->id != id) {
6563
28
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6564
28
                                   "All markup of the conditional section is"
6565
28
                                   " not in the same entity\n");
6566
28
                }
6567
241
                SKIP(3);
6568
2.04k
            } else {
6569
2.04k
                xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6570
2.04k
                xmlHaltParser(ctxt);
6571
2.04k
                goto error;
6572
2.04k
            }
6573
8.35k
        } else if ((depth > 0) &&
6574
8.35k
                   (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6575
114
            depth--;
6576
114
            if (ctxt->input->id != inputIds[depth]) {
6577
3
                xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6578
3
                               "All markup of the conditional section is not"
6579
3
                               " in the same entity\n");
6580
3
            }
6581
114
            SKIP(3);
6582
8.23k
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
6583
7.60k
            xmlParseMarkupDecl(ctxt);
6584
7.60k
        } else {
6585
629
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6586
629
            xmlHaltParser(ctxt);
6587
629
            goto error;
6588
629
        }
6589
6590
10.6k
        if (depth == 0)
6591
355
            break;
6592
6593
10.2k
        SKIP_BLANKS;
6594
10.2k
        SHRINK;
6595
10.2k
        GROW;
6596
10.2k
    }
6597
6598
3.95k
error:
6599
3.95k
    xmlFree(inputIds);
6600
3.95k
}
6601
6602
/**
6603
 * xmlParseMarkupDecl:
6604
 * @ctxt:  an XML parser context
6605
 *
6606
 * DEPRECATED: Internal function, don't use.
6607
 *
6608
 * Parse markup declarations. Always consumes '<!' or '<?'.
6609
 *
6610
 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6611
 *                     NotationDecl | PI | Comment
6612
 *
6613
 * [ VC: Proper Declaration/PE Nesting ]
6614
 * Parameter-entity replacement text must be properly nested with
6615
 * markup declarations. That is to say, if either the first character
6616
 * or the last character of a markup declaration (markupdecl above) is
6617
 * contained in the replacement text for a parameter-entity reference,
6618
 * both must be contained in the same replacement text.
6619
 *
6620
 * [ WFC: PEs in Internal Subset ]
6621
 * In the internal DTD subset, parameter-entity references can occur
6622
 * only where markup declarations can occur, not within markup declarations.
6623
 * (This does not apply to references that occur in external parameter
6624
 * entities or to the external subset.)
6625
 */
6626
void
6627
227k
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6628
227k
    GROW;
6629
227k
    if (CUR == '<') {
6630
227k
        if (NXT(1) == '!') {
6631
204k
      switch (NXT(2)) {
6632
116k
          case 'E':
6633
116k
        if (NXT(3) == 'L')
6634
22.5k
      xmlParseElementDecl(ctxt);
6635
93.6k
        else if (NXT(3) == 'N')
6636
93.2k
      xmlParseEntityDecl(ctxt);
6637
443
                    else
6638
443
                        SKIP(2);
6639
116k
        break;
6640
56.6k
          case 'A':
6641
56.6k
        xmlParseAttributeListDecl(ctxt);
6642
56.6k
        break;
6643
13.0k
          case 'N':
6644
13.0k
        xmlParseNotationDecl(ctxt);
6645
13.0k
        break;
6646
14.4k
          case '-':
6647
14.4k
        xmlParseComment(ctxt);
6648
14.4k
        break;
6649
3.90k
    default:
6650
        /* there is an error but it will be detected later */
6651
3.90k
                    SKIP(2);
6652
3.90k
        break;
6653
204k
      }
6654
204k
  } else if (NXT(1) == '?') {
6655
23.2k
      xmlParsePI(ctxt);
6656
23.2k
  }
6657
227k
    }
6658
6659
    /*
6660
     * detect requirement to exit there and act accordingly
6661
     * and avoid having instate overridden later on
6662
     */
6663
227k
    if (ctxt->instate == XML_PARSER_EOF)
6664
10.5k
        return;
6665
6666
217k
    ctxt->instate = XML_PARSER_DTD;
6667
217k
}
6668
6669
/**
6670
 * xmlParseTextDecl:
6671
 * @ctxt:  an XML parser context
6672
 *
6673
 * DEPRECATED: Internal function, don't use.
6674
 *
6675
 * parse an XML declaration header for external entities
6676
 *
6677
 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6678
 */
6679
6680
void
6681
10.8k
xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6682
10.8k
    xmlChar *version;
6683
10.8k
    const xmlChar *encoding;
6684
10.8k
    int oldstate;
6685
6686
    /*
6687
     * We know that '<?xml' is here.
6688
     */
6689
10.8k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6690
10.3k
  SKIP(5);
6691
10.3k
    } else {
6692
461
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6693
461
  return;
6694
461
    }
6695
6696
    /* Avoid expansion of parameter entities when skipping blanks. */
6697
10.3k
    oldstate = ctxt->instate;
6698
10.3k
    ctxt->instate = XML_PARSER_START;
6699
6700
10.3k
    if (SKIP_BLANKS == 0) {
6701
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6702
0
           "Space needed after '<?xml'\n");
6703
0
    }
6704
6705
    /*
6706
     * We may have the VersionInfo here.
6707
     */
6708
10.3k
    version = xmlParseVersionInfo(ctxt);
6709
10.3k
    if (version == NULL)
6710
5.96k
  version = xmlCharStrdup(XML_DEFAULT_VERSION);
6711
4.37k
    else {
6712
4.37k
  if (SKIP_BLANKS == 0) {
6713
589
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6714
589
               "Space needed here\n");
6715
589
  }
6716
4.37k
    }
6717
10.3k
    ctxt->input->version = version;
6718
6719
    /*
6720
     * We must have the encoding declaration
6721
     */
6722
10.3k
    encoding = xmlParseEncodingDecl(ctxt);
6723
10.3k
    if (ctxt->instate == XML_PARSER_EOF)
6724
45
        return;
6725
10.2k
    if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6726
  /*
6727
   * The XML REC instructs us to stop parsing right here
6728
   */
6729
370
        ctxt->instate = oldstate;
6730
370
        return;
6731
370
    }
6732
9.92k
    if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6733
266
  xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6734
266
           "Missing encoding in text declaration\n");
6735
266
    }
6736
6737
9.92k
    SKIP_BLANKS;
6738
9.92k
    if ((RAW == '?') && (NXT(1) == '>')) {
6739
2.57k
        SKIP(2);
6740
7.35k
    } else if (RAW == '>') {
6741
        /* Deprecated old WD ... */
6742
660
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6743
660
  NEXT;
6744
6.69k
    } else {
6745
6.69k
        int c;
6746
6747
6.69k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6748
34.2M
        while ((c = CUR) != 0) {
6749
34.2M
            NEXT;
6750
34.2M
            if (c == '>')
6751
2.11k
                break;
6752
34.2M
        }
6753
6.69k
    }
6754
6755
9.92k
    ctxt->instate = oldstate;
6756
9.92k
}
6757
6758
/**
6759
 * xmlParseExternalSubset:
6760
 * @ctxt:  an XML parser context
6761
 * @ExternalID: the external identifier
6762
 * @SystemID: the system identifier (or URL)
6763
 *
6764
 * parse Markup declarations from an external subset
6765
 *
6766
 * [30] extSubset ::= textDecl? extSubsetDecl
6767
 *
6768
 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6769
 */
6770
void
6771
xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6772
6.82k
                       const xmlChar *SystemID) {
6773
6.82k
    xmlDetectSAX2(ctxt);
6774
6.82k
    GROW;
6775
6776
6.82k
    if ((ctxt->encoding == NULL) &&
6777
6.82k
        (ctxt->input->end - ctxt->input->cur >= 4)) {
6778
6.60k
        xmlChar start[4];
6779
6.60k
  xmlCharEncoding enc;
6780
6781
6.60k
  start[0] = RAW;
6782
6.60k
  start[1] = NXT(1);
6783
6.60k
  start[2] = NXT(2);
6784
6.60k
  start[3] = NXT(3);
6785
6.60k
  enc = xmlDetectCharEncoding(start, 4);
6786
6.60k
  if (enc != XML_CHAR_ENCODING_NONE)
6787
2.15k
      xmlSwitchEncoding(ctxt, enc);
6788
6.60k
    }
6789
6790
6.82k
    if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
6791
1.91k
  xmlParseTextDecl(ctxt);
6792
1.91k
  if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6793
      /*
6794
       * The XML REC instructs us to stop parsing right here
6795
       */
6796
243
      xmlHaltParser(ctxt);
6797
243
      return;
6798
243
  }
6799
1.91k
    }
6800
6.58k
    if (ctxt->myDoc == NULL) {
6801
0
        ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
6802
0
  if (ctxt->myDoc == NULL) {
6803
0
      xmlErrMemory(ctxt, "New Doc failed");
6804
0
      return;
6805
0
  }
6806
0
  ctxt->myDoc->properties = XML_DOC_INTERNAL;
6807
0
    }
6808
6.58k
    if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6809
0
        xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6810
6811
6.58k
    ctxt->instate = XML_PARSER_DTD;
6812
6.58k
    ctxt->external = 1;
6813
6.58k
    SKIP_BLANKS;
6814
19.9k
    while ((ctxt->instate != XML_PARSER_EOF) && (RAW != 0)) {
6815
15.6k
  GROW;
6816
15.6k
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6817
2.97k
            xmlParseConditionalSections(ctxt);
6818
12.6k
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
6819
10.4k
            xmlParseMarkupDecl(ctxt);
6820
10.4k
        } else {
6821
2.27k
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6822
2.27k
            xmlHaltParser(ctxt);
6823
2.27k
            return;
6824
2.27k
        }
6825
13.3k
        SKIP_BLANKS;
6826
13.3k
        SHRINK;
6827
13.3k
    }
6828
6829
4.30k
    if (RAW != 0) {
6830
9
  xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6831
9
    }
6832
6833
4.30k
}
6834
6835
/**
6836
 * xmlParseReference:
6837
 * @ctxt:  an XML parser context
6838
 *
6839
 * DEPRECATED: Internal function, don't use.
6840
 *
6841
 * parse and handle entity references in content, depending on the SAX
6842
 * interface, this may end-up in a call to character() if this is a
6843
 * CharRef, a predefined entity, if there is no reference() callback.
6844
 * or if the parser was asked to switch to that mode.
6845
 *
6846
 * Always consumes '&'.
6847
 *
6848
 * [67] Reference ::= EntityRef | CharRef
6849
 */
6850
void
6851
201k
xmlParseReference(xmlParserCtxtPtr ctxt) {
6852
201k
    xmlEntityPtr ent;
6853
201k
    xmlChar *val;
6854
201k
    int was_checked;
6855
201k
    xmlNodePtr list = NULL;
6856
201k
    xmlParserErrors ret = XML_ERR_OK;
6857
6858
6859
201k
    if (RAW != '&')
6860
0
        return;
6861
6862
    /*
6863
     * Simple case of a CharRef
6864
     */
6865
201k
    if (NXT(1) == '#') {
6866
92.2k
  int i = 0;
6867
92.2k
  xmlChar out[16];
6868
92.2k
  int hex = NXT(2);
6869
92.2k
  int value = xmlParseCharRef(ctxt);
6870
6871
92.2k
  if (value == 0)
6872
15.2k
      return;
6873
77.0k
  if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
6874
      /*
6875
       * So we are using non-UTF-8 buffers
6876
       * Check that the char fit on 8bits, if not
6877
       * generate a CharRef.
6878
       */
6879
30.1k
      if (value <= 0xFF) {
6880
16.2k
    out[0] = value;
6881
16.2k
    out[1] = 0;
6882
16.2k
    if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6883
16.2k
        (!ctxt->disableSAX))
6884
3.28k
        ctxt->sax->characters(ctxt->userData, out, 1);
6885
16.2k
      } else {
6886
13.8k
    if ((hex == 'x') || (hex == 'X'))
6887
11.5k
        snprintf((char *)out, sizeof(out), "#x%X", value);
6888
2.30k
    else
6889
2.30k
        snprintf((char *)out, sizeof(out), "#%d", value);
6890
13.8k
    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6891
13.8k
        (!ctxt->disableSAX))
6892
633
        ctxt->sax->reference(ctxt->userData, out);
6893
13.8k
      }
6894
46.8k
  } else {
6895
      /*
6896
       * Just encode the value in UTF-8
6897
       */
6898
46.8k
      COPY_BUF(0 ,out, i, value);
6899
46.8k
      out[i] = 0;
6900
46.8k
      if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6901
46.8k
    (!ctxt->disableSAX))
6902
22.7k
    ctxt->sax->characters(ctxt->userData, out, i);
6903
46.8k
  }
6904
77.0k
  return;
6905
92.2k
    }
6906
6907
    /*
6908
     * We are seeing an entity reference
6909
     */
6910
109k
    ent = xmlParseEntityRef(ctxt);
6911
109k
    if (ent == NULL) return;
6912
59.2k
    if (!ctxt->wellFormed)
6913
21.1k
  return;
6914
38.0k
    was_checked = ent->flags & XML_ENT_PARSED;
6915
6916
    /* special case of predefined entities */
6917
38.0k
    if ((ent->name == NULL) ||
6918
38.0k
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
6919
13.2k
  val = ent->content;
6920
13.2k
  if (val == NULL) return;
6921
  /*
6922
   * inline the entity.
6923
   */
6924
13.2k
  if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6925
13.2k
      (!ctxt->disableSAX))
6926
13.2k
      ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6927
13.2k
  return;
6928
13.2k
    }
6929
6930
    /*
6931
     * The first reference to the entity trigger a parsing phase
6932
     * where the ent->children is filled with the result from
6933
     * the parsing.
6934
     * Note: external parsed entities will not be loaded, it is not
6935
     * required for a non-validating parser, unless the parsing option
6936
     * of validating, or substituting entities were given. Doing so is
6937
     * far more secure as the parser will only process data coming from
6938
     * the document entity by default.
6939
     */
6940
24.7k
    if (((ent->flags & XML_ENT_PARSED) == 0) &&
6941
24.7k
        ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
6942
8.38k
         (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
6943
8.38k
  unsigned long oldsizeentcopy = ctxt->sizeentcopy;
6944
6945
  /*
6946
   * This is a bit hackish but this seems the best
6947
   * way to make sure both SAX and DOM entity support
6948
   * behaves okay.
6949
   */
6950
8.38k
  void *user_data;
6951
8.38k
  if (ctxt->userData == ctxt)
6952
8.38k
      user_data = NULL;
6953
0
  else
6954
0
      user_data = ctxt->userData;
6955
6956
        /* Avoid overflow as much as possible */
6957
8.38k
        ctxt->sizeentcopy = 0;
6958
6959
8.38k
        if (ent->flags & XML_ENT_EXPANDING) {
6960
294
            xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6961
294
            xmlHaltParser(ctxt);
6962
294
            return;
6963
294
        }
6964
6965
8.08k
        ent->flags |= XML_ENT_EXPANDING;
6966
6967
  /*
6968
   * Check that this entity is well formed
6969
   * 4.3.2: An internal general parsed entity is well-formed
6970
   * if its replacement text matches the production labeled
6971
   * content.
6972
   */
6973
8.08k
  if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6974
2.96k
      ctxt->depth++;
6975
2.96k
      ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
6976
2.96k
                                                user_data, &list);
6977
2.96k
      ctxt->depth--;
6978
6979
5.12k
  } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6980
5.12k
      ctxt->depth++;
6981
5.12k
      ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
6982
5.12k
                                     user_data, ctxt->depth, ent->URI,
6983
5.12k
             ent->ExternalID, &list);
6984
5.12k
      ctxt->depth--;
6985
5.12k
  } else {
6986
0
      ret = XML_ERR_ENTITY_PE_INTERNAL;
6987
0
      xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6988
0
       "invalid entity type found\n", NULL);
6989
0
  }
6990
6991
8.08k
        ent->flags &= ~XML_ENT_EXPANDING;
6992
8.08k
        ent->flags |= XML_ENT_PARSED | XML_ENT_CHECKED;
6993
8.08k
        ent->expandedSize = ctxt->sizeentcopy;
6994
8.08k
  if (ret == XML_ERR_ENTITY_LOOP) {
6995
591
            xmlHaltParser(ctxt);
6996
591
      xmlFreeNodeList(list);
6997
591
      return;
6998
591
  }
6999
7.49k
  if (xmlParserEntityCheck(ctxt, oldsizeentcopy)) {
7000
0
      xmlFreeNodeList(list);
7001
0
      return;
7002
0
  }
7003
7004
7.49k
  if ((ret == XML_ERR_OK) && (list != NULL)) {
7005
1.95k
            ent->children = list;
7006
            /*
7007
             * Prune it directly in the generated document
7008
             * except for single text nodes.
7009
             */
7010
1.95k
            if ((ctxt->replaceEntities == 0) ||
7011
1.95k
                (ctxt->parseMode == XML_PARSE_READER) ||
7012
1.95k
                ((list->type == XML_TEXT_NODE) &&
7013
1.95k
                 (list->next == NULL))) {
7014
601
                ent->owner = 1;
7015
1.20k
                while (list != NULL) {
7016
602
                    list->parent = (xmlNodePtr) ent;
7017
602
                    if (list->doc != ent->doc)
7018
0
                        xmlSetTreeDoc(list, ent->doc);
7019
602
                    if (list->next == NULL)
7020
601
                        ent->last = list;
7021
602
                    list = list->next;
7022
602
                }
7023
601
                list = NULL;
7024
1.35k
            } else {
7025
1.35k
                ent->owner = 0;
7026
8.79k
                while (list != NULL) {
7027
7.44k
                    list->parent = (xmlNodePtr) ctxt->node;
7028
7.44k
                    list->doc = ctxt->myDoc;
7029
7.44k
                    if (list->next == NULL)
7030
1.35k
                        ent->last = list;
7031
7.44k
                    list = list->next;
7032
7.44k
                }
7033
1.35k
                list = ent->children;
7034
#ifdef LIBXML_LEGACY_ENABLED
7035
                if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7036
                    xmlAddEntityReference(ent, list, NULL);
7037
#endif /* LIBXML_LEGACY_ENABLED */
7038
1.35k
            }
7039
5.54k
  } else if ((ret != XML_ERR_OK) &&
7040
5.54k
       (ret != XML_WAR_UNDECLARED_ENTITY)) {
7041
4.87k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7042
4.87k
         "Entity '%s' failed to parse\n", ent->name);
7043
4.87k
            if (ent->content != NULL)
7044
889
                ent->content[0] = 0;
7045
4.87k
  } else if (list != NULL) {
7046
0
      xmlFreeNodeList(list);
7047
0
      list = NULL;
7048
0
  }
7049
7050
        /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7051
7.49k
        was_checked = 0;
7052
7.49k
    }
7053
7054
    /*
7055
     * Now that the entity content has been gathered
7056
     * provide it to the application, this can take different forms based
7057
     * on the parsing modes.
7058
     */
7059
23.8k
    if (ent->children == NULL) {
7060
  /*
7061
   * Probably running in SAX mode and the callbacks don't
7062
   * build the entity content. So unless we already went
7063
   * though parsing for first checking go though the entity
7064
   * content to generate callbacks associated to the entity
7065
   */
7066
11.9k
  if (was_checked != 0) {
7067
6.39k
      void *user_data;
7068
      /*
7069
       * This is a bit hackish but this seems the best
7070
       * way to make sure both SAX and DOM entity support
7071
       * behaves okay.
7072
       */
7073
6.39k
      if (ctxt->userData == ctxt)
7074
6.39k
    user_data = NULL;
7075
0
      else
7076
0
    user_data = ctxt->userData;
7077
7078
6.39k
      if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7079
31
    ctxt->depth++;
7080
31
    ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7081
31
           ent->content, user_data, NULL);
7082
31
    ctxt->depth--;
7083
6.36k
      } else if (ent->etype ==
7084
6.36k
           XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7085
6.36k
          unsigned long oldsizeentities = ctxt->sizeentities;
7086
7087
6.36k
    ctxt->depth++;
7088
6.36k
    ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7089
6.36k
         ctxt->sax, user_data, ctxt->depth,
7090
6.36k
         ent->URI, ent->ExternalID, NULL);
7091
6.36k
    ctxt->depth--;
7092
7093
                /* Undo the change to sizeentities */
7094
6.36k
                ctxt->sizeentities = oldsizeentities;
7095
6.36k
      } else {
7096
0
    ret = XML_ERR_ENTITY_PE_INTERNAL;
7097
0
    xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7098
0
           "invalid entity type found\n", NULL);
7099
0
      }
7100
6.39k
      if (ret == XML_ERR_ENTITY_LOOP) {
7101
0
    xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7102
0
    return;
7103
0
      }
7104
6.39k
            if (xmlParserEntityCheck(ctxt, 0))
7105
0
                return;
7106
6.39k
  }
7107
11.9k
  if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7108
11.9k
      (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7109
      /*
7110
       * Entity reference callback comes second, it's somewhat
7111
       * superfluous but a compatibility to historical behaviour
7112
       */
7113
3
      ctxt->sax->reference(ctxt->userData, ent->name);
7114
3
  }
7115
11.9k
  return;
7116
11.9k
    }
7117
7118
    /*
7119
     * We also check for amplification if entities aren't substituted.
7120
     * They might be expanded later.
7121
     */
7122
11.9k
    if ((was_checked != 0) &&
7123
11.9k
        (xmlParserEntityCheck(ctxt, ent->expandedSize)))
7124
28
        return;
7125
7126
    /*
7127
     * If we didn't get any children for the entity being built
7128
     */
7129
11.8k
    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7130
11.8k
  (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7131
  /*
7132
   * Create a node.
7133
   */
7134
1
  ctxt->sax->reference(ctxt->userData, ent->name);
7135
1
  return;
7136
1
    }
7137
7138
11.8k
    if (ctxt->replaceEntities)  {
7139
  /*
7140
   * There is a problem on the handling of _private for entities
7141
   * (bug 155816): Should we copy the content of the field from
7142
   * the entity (possibly overwriting some value set by the user
7143
   * when a copy is created), should we leave it alone, or should
7144
   * we try to take care of different situations?  The problem
7145
   * is exacerbated by the usage of this field by the xmlReader.
7146
   * To fix this bug, we look at _private on the created node
7147
   * and, if it's NULL, we copy in whatever was in the entity.
7148
   * If it's not NULL we leave it alone.  This is somewhat of a
7149
   * hack - maybe we should have further tests to determine
7150
   * what to do.
7151
   */
7152
11.8k
  if (ctxt->node != NULL) {
7153
      /*
7154
       * Seems we are generating the DOM content, do
7155
       * a simple tree copy for all references except the first
7156
       * In the first occurrence list contains the replacement.
7157
       */
7158
11.8k
      if (((list == NULL) && (ent->owner == 0)) ||
7159
11.8k
    (ctxt->parseMode == XML_PARSE_READER)) {
7160
0
    xmlNodePtr nw = NULL, cur, firstChild = NULL;
7161
7162
    /*
7163
     * when operating on a reader, the entities definitions
7164
     * are always owning the entities subtree.
7165
    if (ctxt->parseMode == XML_PARSE_READER)
7166
        ent->owner = 1;
7167
     */
7168
7169
0
    cur = ent->children;
7170
0
    while (cur != NULL) {
7171
0
        nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7172
0
        if (nw != NULL) {
7173
0
      if (nw->_private == NULL)
7174
0
          nw->_private = cur->_private;
7175
0
      if (firstChild == NULL){
7176
0
          firstChild = nw;
7177
0
      }
7178
0
      nw = xmlAddChild(ctxt->node, nw);
7179
0
        }
7180
0
        if (cur == ent->last) {
7181
      /*
7182
       * needed to detect some strange empty
7183
       * node cases in the reader tests
7184
       */
7185
0
      if ((ctxt->parseMode == XML_PARSE_READER) &&
7186
0
          (nw != NULL) &&
7187
0
          (nw->type == XML_ELEMENT_NODE) &&
7188
0
          (nw->children == NULL))
7189
0
          nw->extra = 1;
7190
7191
0
      break;
7192
0
        }
7193
0
        cur = cur->next;
7194
0
    }
7195
#ifdef LIBXML_LEGACY_ENABLED
7196
    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7197
      xmlAddEntityReference(ent, firstChild, nw);
7198
#endif /* LIBXML_LEGACY_ENABLED */
7199
11.8k
      } else if ((list == NULL) || (ctxt->inputNr > 0)) {
7200
11.8k
    xmlNodePtr nw = NULL, cur, next, last,
7201
11.8k
         firstChild = NULL;
7202
7203
    /*
7204
     * Copy the entity child list and make it the new
7205
     * entity child list. The goal is to make sure any
7206
     * ID or REF referenced will be the one from the
7207
     * document content and not the entity copy.
7208
     */
7209
11.8k
    cur = ent->children;
7210
11.8k
    ent->children = NULL;
7211
11.8k
    last = ent->last;
7212
11.8k
    ent->last = NULL;
7213
33.0k
    while (cur != NULL) {
7214
33.0k
        next = cur->next;
7215
33.0k
        cur->next = NULL;
7216
33.0k
        cur->parent = NULL;
7217
33.0k
        nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7218
33.0k
        if (nw != NULL) {
7219
32.8k
      if (nw->_private == NULL)
7220
32.8k
          nw->_private = cur->_private;
7221
32.8k
      if (firstChild == NULL){
7222
11.8k
          firstChild = cur;
7223
11.8k
      }
7224
32.8k
      xmlAddChild((xmlNodePtr) ent, nw);
7225
32.8k
        }
7226
33.0k
        xmlAddChild(ctxt->node, cur);
7227
33.0k
        if (cur == last)
7228
11.8k
      break;
7229
21.1k
        cur = next;
7230
21.1k
    }
7231
11.8k
    if (ent->owner == 0)
7232
1.35k
        ent->owner = 1;
7233
#ifdef LIBXML_LEGACY_ENABLED
7234
    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7235
      xmlAddEntityReference(ent, firstChild, nw);
7236
#endif /* LIBXML_LEGACY_ENABLED */
7237
11.8k
      } else {
7238
0
    const xmlChar *nbktext;
7239
7240
    /*
7241
     * the name change is to avoid coalescing of the
7242
     * node with a possible previous text one which
7243
     * would make ent->children a dangling pointer
7244
     */
7245
0
    nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7246
0
          -1);
7247
0
    if (ent->children->type == XML_TEXT_NODE)
7248
0
        ent->children->name = nbktext;
7249
0
    if ((ent->last != ent->children) &&
7250
0
        (ent->last->type == XML_TEXT_NODE))
7251
0
        ent->last->name = nbktext;
7252
0
    xmlAddChildList(ctxt->node, ent->children);
7253
0
      }
7254
7255
      /*
7256
       * This is to avoid a nasty side effect, see
7257
       * characters() in SAX.c
7258
       */
7259
11.8k
      ctxt->nodemem = 0;
7260
11.8k
      ctxt->nodelen = 0;
7261
11.8k
      return;
7262
11.8k
  }
7263
11.8k
    }
7264
11.8k
}
7265
7266
/**
7267
 * xmlParseEntityRef:
7268
 * @ctxt:  an XML parser context
7269
 *
7270
 * DEPRECATED: Internal function, don't use.
7271
 *
7272
 * Parse an entitiy reference. Always consumes '&'.
7273
 *
7274
 * [68] EntityRef ::= '&' Name ';'
7275
 *
7276
 * [ WFC: Entity Declared ]
7277
 * In a document without any DTD, a document with only an internal DTD
7278
 * subset which contains no parameter entity references, or a document
7279
 * with "standalone='yes'", the Name given in the entity reference
7280
 * must match that in an entity declaration, except that well-formed
7281
 * documents need not declare any of the following entities: amp, lt,
7282
 * gt, apos, quot.  The declaration of a parameter entity must precede
7283
 * any reference to it.  Similarly, the declaration of a general entity
7284
 * must precede any reference to it which appears in a default value in an
7285
 * attribute-list declaration. Note that if entities are declared in the
7286
 * external subset or in external parameter entities, a non-validating
7287
 * processor is not obligated to read and process their declarations;
7288
 * for such documents, the rule that an entity must be declared is a
7289
 * well-formedness constraint only if standalone='yes'.
7290
 *
7291
 * [ WFC: Parsed Entity ]
7292
 * An entity reference must not contain the name of an unparsed entity
7293
 *
7294
 * Returns the xmlEntityPtr if found, or NULL otherwise.
7295
 */
7296
xmlEntityPtr
7297
223k
xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7298
223k
    const xmlChar *name;
7299
223k
    xmlEntityPtr ent = NULL;
7300
7301
223k
    GROW;
7302
223k
    if (ctxt->instate == XML_PARSER_EOF)
7303
220
        return(NULL);
7304
7305
222k
    if (RAW != '&')
7306
0
        return(NULL);
7307
222k
    NEXT;
7308
222k
    name = xmlParseName(ctxt);
7309
222k
    if (name == NULL) {
7310
63.4k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7311
63.4k
           "xmlParseEntityRef: no name\n");
7312
63.4k
        return(NULL);
7313
63.4k
    }
7314
159k
    if (RAW != ';') {
7315
19.2k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7316
19.2k
  return(NULL);
7317
19.2k
    }
7318
140k
    NEXT;
7319
7320
    /*
7321
     * Predefined entities override any extra definition
7322
     */
7323
140k
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7324
140k
        ent = xmlGetPredefinedEntity(name);
7325
140k
        if (ent != NULL)
7326
28.0k
            return(ent);
7327
140k
    }
7328
7329
    /*
7330
     * Ask first SAX for entity resolution, otherwise try the
7331
     * entities which may have stored in the parser context.
7332
     */
7333
112k
    if (ctxt->sax != NULL) {
7334
112k
  if (ctxt->sax->getEntity != NULL)
7335
112k
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7336
112k
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7337
112k
      (ctxt->options & XML_PARSE_OLDSAX))
7338
0
      ent = xmlGetPredefinedEntity(name);
7339
112k
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7340
112k
      (ctxt->userData==ctxt)) {
7341
22.9k
      ent = xmlSAX2GetEntity(ctxt, name);
7342
22.9k
  }
7343
112k
    }
7344
112k
    if (ctxt->instate == XML_PARSER_EOF)
7345
279
  return(NULL);
7346
    /*
7347
     * [ WFC: Entity Declared ]
7348
     * In a document without any DTD, a document with only an
7349
     * internal DTD subset which contains no parameter entity
7350
     * references, or a document with "standalone='yes'", the
7351
     * Name given in the entity reference must match that in an
7352
     * entity declaration, except that well-formed documents
7353
     * need not declare any of the following entities: amp, lt,
7354
     * gt, apos, quot.
7355
     * The declaration of a parameter entity must precede any
7356
     * reference to it.
7357
     * Similarly, the declaration of a general entity must
7358
     * precede any reference to it which appears in a default
7359
     * value in an attribute-list declaration. Note that if
7360
     * entities are declared in the external subset or in
7361
     * external parameter entities, a non-validating processor
7362
     * is not obligated to read and process their declarations;
7363
     * for such documents, the rule that an entity must be
7364
     * declared is a well-formedness constraint only if
7365
     * standalone='yes'.
7366
     */
7367
112k
    if (ent == NULL) {
7368
36.9k
  if ((ctxt->standalone == 1) ||
7369
36.9k
      ((ctxt->hasExternalSubset == 0) &&
7370
35.3k
       (ctxt->hasPErefs == 0))) {
7371
11.7k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7372
11.7k
         "Entity '%s' not defined\n", name);
7373
25.1k
  } else {
7374
25.1k
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7375
25.1k
         "Entity '%s' not defined\n", name);
7376
25.1k
      if ((ctxt->inSubset == 0) &&
7377
25.1k
    (ctxt->sax != NULL) &&
7378
25.1k
    (ctxt->sax->reference != NULL)) {
7379
24.5k
    ctxt->sax->reference(ctxt->userData, name);
7380
24.5k
      }
7381
25.1k
  }
7382
36.9k
  ctxt->valid = 0;
7383
36.9k
    }
7384
7385
    /*
7386
     * [ WFC: Parsed Entity ]
7387
     * An entity reference must not contain the name of an
7388
     * unparsed entity
7389
     */
7390
75.1k
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7391
301
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7392
301
     "Entity reference to unparsed entity %s\n", name);
7393
301
    }
7394
7395
    /*
7396
     * [ WFC: No External Entity References ]
7397
     * Attribute values cannot contain direct or indirect
7398
     * entity references to external entities.
7399
     */
7400
74.8k
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7401
74.8k
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7402
10.3k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7403
10.3k
       "Attribute references external entity '%s'\n", name);
7404
10.3k
    }
7405
    /*
7406
     * [ WFC: No < in Attribute Values ]
7407
     * The replacement text of any entity referred to directly or
7408
     * indirectly in an attribute value (other than "&lt;") must
7409
     * not contain a <.
7410
     */
7411
64.5k
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7412
64.5k
       (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7413
23.2k
  if ((ent->flags & XML_ENT_CHECKED_LT) == 0) {
7414
8.54k
            if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7415
1.16k
                ent->flags |= XML_ENT_CONTAINS_LT;
7416
8.54k
            ent->flags |= XML_ENT_CHECKED_LT;
7417
8.54k
        }
7418
23.2k
        if (ent->flags & XML_ENT_CONTAINS_LT)
7419
13.7k
            xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7420
13.7k
                    "'<' in entity '%s' is not allowed in attributes "
7421
13.7k
                    "values\n", name);
7422
23.2k
    }
7423
7424
    /*
7425
     * Internal check, no parameter entities here ...
7426
     */
7427
41.3k
    else {
7428
41.3k
  switch (ent->etype) {
7429
0
      case XML_INTERNAL_PARAMETER_ENTITY:
7430
0
      case XML_EXTERNAL_PARAMETER_ENTITY:
7431
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7432
0
       "Attempt to reference the parameter entity '%s'\n",
7433
0
            name);
7434
0
      break;
7435
41.3k
      default:
7436
41.3k
      break;
7437
41.3k
  }
7438
41.3k
    }
7439
7440
    /*
7441
     * [ WFC: No Recursion ]
7442
     * A parsed entity must not contain a recursive reference
7443
     * to itself, either directly or indirectly.
7444
     * Done somewhere else
7445
     */
7446
112k
    return(ent);
7447
112k
}
7448
7449
/**
7450
 * xmlParseStringEntityRef:
7451
 * @ctxt:  an XML parser context
7452
 * @str:  a pointer to an index in the string
7453
 *
7454
 * parse ENTITY references declarations, but this version parses it from
7455
 * a string value.
7456
 *
7457
 * [68] EntityRef ::= '&' Name ';'
7458
 *
7459
 * [ WFC: Entity Declared ]
7460
 * In a document without any DTD, a document with only an internal DTD
7461
 * subset which contains no parameter entity references, or a document
7462
 * with "standalone='yes'", the Name given in the entity reference
7463
 * must match that in an entity declaration, except that well-formed
7464
 * documents need not declare any of the following entities: amp, lt,
7465
 * gt, apos, quot.  The declaration of a parameter entity must precede
7466
 * any reference to it.  Similarly, the declaration of a general entity
7467
 * must precede any reference to it which appears in a default value in an
7468
 * attribute-list declaration. Note that if entities are declared in the
7469
 * external subset or in external parameter entities, a non-validating
7470
 * processor is not obligated to read and process their declarations;
7471
 * for such documents, the rule that an entity must be declared is a
7472
 * well-formedness constraint only if standalone='yes'.
7473
 *
7474
 * [ WFC: Parsed Entity ]
7475
 * An entity reference must not contain the name of an unparsed entity
7476
 *
7477
 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7478
 * is updated to the current location in the string.
7479
 */
7480
static xmlEntityPtr
7481
91.2k
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7482
91.2k
    xmlChar *name;
7483
91.2k
    const xmlChar *ptr;
7484
91.2k
    xmlChar cur;
7485
91.2k
    xmlEntityPtr ent = NULL;
7486
7487
91.2k
    if ((str == NULL) || (*str == NULL))
7488
0
        return(NULL);
7489
91.2k
    ptr = *str;
7490
91.2k
    cur = *ptr;
7491
91.2k
    if (cur != '&')
7492
0
  return(NULL);
7493
7494
91.2k
    ptr++;
7495
91.2k
    name = xmlParseStringName(ctxt, &ptr);
7496
91.2k
    if (name == NULL) {
7497
7.52k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7498
7.52k
           "xmlParseStringEntityRef: no name\n");
7499
7.52k
  *str = ptr;
7500
7.52k
  return(NULL);
7501
7.52k
    }
7502
83.7k
    if (*ptr != ';') {
7503
7.93k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7504
7.93k
        xmlFree(name);
7505
7.93k
  *str = ptr;
7506
7.93k
  return(NULL);
7507
7.93k
    }
7508
75.7k
    ptr++;
7509
7510
7511
    /*
7512
     * Predefined entities override any extra definition
7513
     */
7514
75.7k
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7515
75.7k
        ent = xmlGetPredefinedEntity(name);
7516
75.7k
        if (ent != NULL) {
7517
34.6k
            xmlFree(name);
7518
34.6k
            *str = ptr;
7519
34.6k
            return(ent);
7520
34.6k
        }
7521
75.7k
    }
7522
7523
    /*
7524
     * Ask first SAX for entity resolution, otherwise try the
7525
     * entities which may have stored in the parser context.
7526
     */
7527
41.1k
    if (ctxt->sax != NULL) {
7528
41.1k
  if (ctxt->sax->getEntity != NULL)
7529
41.1k
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7530
41.1k
  if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7531
0
      ent = xmlGetPredefinedEntity(name);
7532
41.1k
  if ((ent == NULL) && (ctxt->userData==ctxt)) {
7533
20.3k
      ent = xmlSAX2GetEntity(ctxt, name);
7534
20.3k
  }
7535
41.1k
    }
7536
41.1k
    if (ctxt->instate == XML_PARSER_EOF) {
7537
0
  xmlFree(name);
7538
0
  return(NULL);
7539
0
    }
7540
7541
    /*
7542
     * [ WFC: Entity Declared ]
7543
     * In a document without any DTD, a document with only an
7544
     * internal DTD subset which contains no parameter entity
7545
     * references, or a document with "standalone='yes'", the
7546
     * Name given in the entity reference must match that in an
7547
     * entity declaration, except that well-formed documents
7548
     * need not declare any of the following entities: amp, lt,
7549
     * gt, apos, quot.
7550
     * The declaration of a parameter entity must precede any
7551
     * reference to it.
7552
     * Similarly, the declaration of a general entity must
7553
     * precede any reference to it which appears in a default
7554
     * value in an attribute-list declaration. Note that if
7555
     * entities are declared in the external subset or in
7556
     * external parameter entities, a non-validating processor
7557
     * is not obligated to read and process their declarations;
7558
     * for such documents, the rule that an entity must be
7559
     * declared is a well-formedness constraint only if
7560
     * standalone='yes'.
7561
     */
7562
41.1k
    if (ent == NULL) {
7563
20.3k
  if ((ctxt->standalone == 1) ||
7564
20.3k
      ((ctxt->hasExternalSubset == 0) &&
7565
20.0k
       (ctxt->hasPErefs == 0))) {
7566
19.9k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7567
19.9k
         "Entity '%s' not defined\n", name);
7568
19.9k
  } else {
7569
380
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7570
380
        "Entity '%s' not defined\n",
7571
380
        name);
7572
380
  }
7573
  /* TODO ? check regressions ctxt->valid = 0; */
7574
20.3k
    }
7575
7576
    /*
7577
     * [ WFC: Parsed Entity ]
7578
     * An entity reference must not contain the name of an
7579
     * unparsed entity
7580
     */
7581
20.7k
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7582
198
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7583
198
     "Entity reference to unparsed entity %s\n", name);
7584
198
    }
7585
7586
    /*
7587
     * [ WFC: No External Entity References ]
7588
     * Attribute values cannot contain direct or indirect
7589
     * entity references to external entities.
7590
     */
7591
20.5k
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7592
20.5k
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7593
289
  xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7594
289
   "Attribute references external entity '%s'\n", name);
7595
289
    }
7596
    /*
7597
     * [ WFC: No < in Attribute Values ]
7598
     * The replacement text of any entity referred to directly or
7599
     * indirectly in an attribute value (other than "&lt;") must
7600
     * not contain a <.
7601
     */
7602
20.3k
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7603
20.3k
       (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7604
20.3k
  if ((ent->flags & XML_ENT_CHECKED_LT) == 0) {
7605
1.47k
            if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7606
425
                ent->flags |= XML_ENT_CONTAINS_LT;
7607
1.47k
            ent->flags |= XML_ENT_CHECKED_LT;
7608
1.47k
        }
7609
20.3k
        if (ent->flags & XML_ENT_CONTAINS_LT)
7610
13.0k
            xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7611
13.0k
                    "'<' in entity '%s' is not allowed in attributes "
7612
13.0k
                    "values\n", name);
7613
20.3k
    }
7614
7615
    /*
7616
     * Internal check, no parameter entities here ...
7617
     */
7618
0
    else {
7619
0
  switch (ent->etype) {
7620
0
      case XML_INTERNAL_PARAMETER_ENTITY:
7621
0
      case XML_EXTERNAL_PARAMETER_ENTITY:
7622
0
    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7623
0
       "Attempt to reference the parameter entity '%s'\n",
7624
0
          name);
7625
0
      break;
7626
0
      default:
7627
0
      break;
7628
0
  }
7629
0
    }
7630
7631
    /*
7632
     * [ WFC: No Recursion ]
7633
     * A parsed entity must not contain a recursive reference
7634
     * to itself, either directly or indirectly.
7635
     * Done somewhere else
7636
     */
7637
7638
41.1k
    xmlFree(name);
7639
41.1k
    *str = ptr;
7640
41.1k
    return(ent);
7641
41.1k
}
7642
7643
/**
7644
 * xmlParsePEReference:
7645
 * @ctxt:  an XML parser context
7646
 *
7647
 * DEPRECATED: Internal function, don't use.
7648
 *
7649
 * Parse a parameter entity reference. Always consumes '%'.
7650
 *
7651
 * The entity content is handled directly by pushing it's content as
7652
 * a new input stream.
7653
 *
7654
 * [69] PEReference ::= '%' Name ';'
7655
 *
7656
 * [ WFC: No Recursion ]
7657
 * A parsed entity must not contain a recursive
7658
 * reference to itself, either directly or indirectly.
7659
 *
7660
 * [ WFC: Entity Declared ]
7661
 * In a document without any DTD, a document with only an internal DTD
7662
 * subset which contains no parameter entity references, or a document
7663
 * with "standalone='yes'", ...  ... The declaration of a parameter
7664
 * entity must precede any reference to it...
7665
 *
7666
 * [ VC: Entity Declared ]
7667
 * In a document with an external subset or external parameter entities
7668
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7669
 * must precede any reference to it...
7670
 *
7671
 * [ WFC: In DTD ]
7672
 * Parameter-entity references may only appear in the DTD.
7673
 * NOTE: misleading but this is handled.
7674
 */
7675
void
7676
xmlParsePEReference(xmlParserCtxtPtr ctxt)
7677
125k
{
7678
125k
    const xmlChar *name;
7679
125k
    xmlEntityPtr entity = NULL;
7680
125k
    xmlParserInputPtr input;
7681
7682
125k
    if (RAW != '%')
7683
0
        return;
7684
125k
    NEXT;
7685
125k
    name = xmlParseName(ctxt);
7686
125k
    if (name == NULL) {
7687
48.0k
  xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
7688
48.0k
  return;
7689
48.0k
    }
7690
77.0k
    if (xmlParserDebugEntities)
7691
0
  xmlGenericError(xmlGenericErrorContext,
7692
0
    "PEReference: %s\n", name);
7693
77.0k
    if (RAW != ';') {
7694
21.0k
  xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
7695
21.0k
        return;
7696
21.0k
    }
7697
7698
55.9k
    NEXT;
7699
7700
    /*
7701
     * Request the entity from SAX
7702
     */
7703
55.9k
    if ((ctxt->sax != NULL) &&
7704
55.9k
  (ctxt->sax->getParameterEntity != NULL))
7705
55.9k
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7706
55.9k
    if (ctxt->instate == XML_PARSER_EOF)
7707
296
  return;
7708
55.6k
    if (entity == NULL) {
7709
  /*
7710
   * [ WFC: Entity Declared ]
7711
   * In a document without any DTD, a document with only an
7712
   * internal DTD subset which contains no parameter entity
7713
   * references, or a document with "standalone='yes'", ...
7714
   * ... The declaration of a parameter entity must precede
7715
   * any reference to it...
7716
   */
7717
13.5k
  if ((ctxt->standalone == 1) ||
7718
13.5k
      ((ctxt->hasExternalSubset == 0) &&
7719
13.3k
       (ctxt->hasPErefs == 0))) {
7720
1.36k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7721
1.36k
            "PEReference: %%%s; not found\n",
7722
1.36k
            name);
7723
12.1k
  } else {
7724
      /*
7725
       * [ VC: Entity Declared ]
7726
       * In a document with an external subset or external
7727
       * parameter entities with "standalone='no'", ...
7728
       * ... The declaration of a parameter entity must
7729
       * precede any reference to it...
7730
       */
7731
12.1k
            if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
7732
0
                xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
7733
0
                                 "PEReference: %%%s; not found\n",
7734
0
                                 name, NULL);
7735
0
            } else
7736
12.1k
                xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7737
12.1k
                              "PEReference: %%%s; not found\n",
7738
12.1k
                              name, NULL);
7739
12.1k
            ctxt->valid = 0;
7740
12.1k
  }
7741
42.1k
    } else {
7742
  /*
7743
   * Internal checking in case the entity quest barfed
7744
   */
7745
42.1k
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7746
42.1k
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7747
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7748
0
      "Internal: %%%s; is not a parameter entity\n",
7749
0
        name, NULL);
7750
42.1k
  } else {
7751
42.1k
            xmlChar start[4];
7752
42.1k
            xmlCharEncoding enc;
7753
42.1k
            unsigned long parentConsumed;
7754
42.1k
            xmlEntityPtr oldEnt;
7755
7756
42.1k
      if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7757
42.1k
          ((ctxt->options & XML_PARSE_NOENT) == 0) &&
7758
42.1k
    ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
7759
42.1k
    ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
7760
42.1k
    ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
7761
42.1k
    (ctxt->replaceEntities == 0) &&
7762
42.1k
    (ctxt->validate == 0))
7763
0
    return;
7764
7765
42.1k
            if (entity->flags & XML_ENT_EXPANDING) {
7766
214
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7767
214
                xmlHaltParser(ctxt);
7768
214
                return;
7769
214
            }
7770
7771
            /* Must be computed from old input before pushing new input. */
7772
41.9k
            parentConsumed = ctxt->input->parentConsumed;
7773
41.9k
            oldEnt = ctxt->input->entity;
7774
41.9k
            if ((oldEnt == NULL) ||
7775
41.9k
                ((oldEnt->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7776
41.9k
                 ((oldEnt->flags & XML_ENT_PARSED) == 0))) {
7777
41.9k
                xmlSaturatedAdd(&parentConsumed, ctxt->input->consumed);
7778
41.9k
                xmlSaturatedAddSizeT(&parentConsumed,
7779
41.9k
                                     ctxt->input->cur - ctxt->input->base);
7780
41.9k
            }
7781
7782
41.9k
      input = xmlNewEntityInputStream(ctxt, entity);
7783
41.9k
      if (xmlPushInput(ctxt, input) < 0) {
7784
7.05k
                xmlFreeInputStream(input);
7785
7.05k
    return;
7786
7.05k
            }
7787
7788
34.8k
            entity->flags |= XML_ENT_EXPANDING;
7789
7790
34.8k
            input->parentConsumed = parentConsumed;
7791
7792
34.8k
      if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
7793
                /*
7794
                 * Get the 4 first bytes and decode the charset
7795
                 * if enc != XML_CHAR_ENCODING_NONE
7796
                 * plug some encoding conversion routines.
7797
                 * Note that, since we may have some non-UTF8
7798
                 * encoding (like UTF16, bug 135229), the 'length'
7799
                 * is not known, but we can calculate based upon
7800
                 * the amount of data in the buffer.
7801
                 */
7802
17.4k
                GROW
7803
17.4k
                if (ctxt->instate == XML_PARSER_EOF)
7804
0
                    return;
7805
17.4k
                if ((ctxt->input->end - ctxt->input->cur)>=4) {
7806
17.1k
                    start[0] = RAW;
7807
17.1k
                    start[1] = NXT(1);
7808
17.1k
                    start[2] = NXT(2);
7809
17.1k
                    start[3] = NXT(3);
7810
17.1k
                    enc = xmlDetectCharEncoding(start, 4);
7811
17.1k
                    if (enc != XML_CHAR_ENCODING_NONE) {
7812
13.6k
                        xmlSwitchEncoding(ctxt, enc);
7813
13.6k
                    }
7814
17.1k
                }
7815
7816
17.4k
                if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7817
17.4k
                    (IS_BLANK_CH(NXT(5)))) {
7818
6.64k
                    xmlParseTextDecl(ctxt);
7819
6.64k
                }
7820
17.4k
            }
7821
34.8k
  }
7822
42.1k
    }
7823
48.4k
    ctxt->hasPErefs = 1;
7824
48.4k
}
7825
7826
/**
7827
 * xmlLoadEntityContent:
7828
 * @ctxt:  an XML parser context
7829
 * @entity: an unloaded system entity
7830
 *
7831
 * Load the original content of the given system entity from the
7832
 * ExternalID/SystemID given. This is to be used for Included in Literal
7833
 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
7834
 *
7835
 * Returns 0 in case of success and -1 in case of failure
7836
 */
7837
static int
7838
82
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7839
82
    xmlParserInputPtr input;
7840
82
    xmlBufferPtr buf;
7841
82
    int l, c;
7842
7843
82
    if ((ctxt == NULL) || (entity == NULL) ||
7844
82
        ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7845
82
   (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7846
82
  (entity->content != NULL)) {
7847
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7848
0
              "xmlLoadEntityContent parameter error");
7849
0
        return(-1);
7850
0
    }
7851
7852
82
    if (xmlParserDebugEntities)
7853
0
  xmlGenericError(xmlGenericErrorContext,
7854
0
    "Reading %s entity content input\n", entity->name);
7855
7856
82
    buf = xmlBufferCreate();
7857
82
    if (buf == NULL) {
7858
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7859
0
              "xmlLoadEntityContent parameter error");
7860
0
        return(-1);
7861
0
    }
7862
82
    xmlBufferSetAllocationScheme(buf, XML_BUFFER_ALLOC_DOUBLEIT);
7863
7864
82
    input = xmlNewEntityInputStream(ctxt, entity);
7865
82
    if (input == NULL) {
7866
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7867
0
              "xmlLoadEntityContent input error");
7868
0
  xmlBufferFree(buf);
7869
0
        return(-1);
7870
0
    }
7871
7872
    /*
7873
     * Push the entity as the current input, read char by char
7874
     * saving to the buffer until the end of the entity or an error
7875
     */
7876
82
    if (xmlPushInput(ctxt, input) < 0) {
7877
0
        xmlBufferFree(buf);
7878
0
  xmlFreeInputStream(input);
7879
0
  return(-1);
7880
0
    }
7881
7882
82
    GROW;
7883
82
    c = CUR_CHAR(l);
7884
300k
    while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
7885
300k
           (IS_CHAR(c))) {
7886
300k
        xmlBufferAdd(buf, ctxt->input->cur, l);
7887
300k
  NEXTL(l);
7888
300k
  c = CUR_CHAR(l);
7889
300k
    }
7890
82
    if (ctxt->instate == XML_PARSER_EOF) {
7891
0
  xmlBufferFree(buf);
7892
0
  return(-1);
7893
0
    }
7894
7895
82
    if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
7896
56
        xmlSaturatedAdd(&ctxt->sizeentities, ctxt->input->consumed);
7897
56
        xmlPopInput(ctxt);
7898
56
    } else if (!IS_CHAR(c)) {
7899
26
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
7900
26
                          "xmlLoadEntityContent: invalid char value %d\n",
7901
26
                    c);
7902
26
  xmlBufferFree(buf);
7903
26
  return(-1);
7904
26
    }
7905
56
    entity->content = buf->content;
7906
56
    entity->length = buf->use;
7907
56
    buf->content = NULL;
7908
56
    xmlBufferFree(buf);
7909
7910
56
    return(0);
7911
82
}
7912
7913
/**
7914
 * xmlParseStringPEReference:
7915
 * @ctxt:  an XML parser context
7916
 * @str:  a pointer to an index in the string
7917
 *
7918
 * parse PEReference declarations
7919
 *
7920
 * [69] PEReference ::= '%' Name ';'
7921
 *
7922
 * [ WFC: No Recursion ]
7923
 * A parsed entity must not contain a recursive
7924
 * reference to itself, either directly or indirectly.
7925
 *
7926
 * [ WFC: Entity Declared ]
7927
 * In a document without any DTD, a document with only an internal DTD
7928
 * subset which contains no parameter entity references, or a document
7929
 * with "standalone='yes'", ...  ... The declaration of a parameter
7930
 * entity must precede any reference to it...
7931
 *
7932
 * [ VC: Entity Declared ]
7933
 * In a document with an external subset or external parameter entities
7934
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7935
 * must precede any reference to it...
7936
 *
7937
 * [ WFC: In DTD ]
7938
 * Parameter-entity references may only appear in the DTD.
7939
 * NOTE: misleading but this is handled.
7940
 *
7941
 * Returns the string of the entity content.
7942
 *         str is updated to the current value of the index
7943
 */
7944
static xmlEntityPtr
7945
4.09k
xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7946
4.09k
    const xmlChar *ptr;
7947
4.09k
    xmlChar cur;
7948
4.09k
    xmlChar *name;
7949
4.09k
    xmlEntityPtr entity = NULL;
7950
7951
4.09k
    if ((str == NULL) || (*str == NULL)) return(NULL);
7952
4.09k
    ptr = *str;
7953
4.09k
    cur = *ptr;
7954
4.09k
    if (cur != '%')
7955
0
        return(NULL);
7956
4.09k
    ptr++;
7957
4.09k
    name = xmlParseStringName(ctxt, &ptr);
7958
4.09k
    if (name == NULL) {
7959
315
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7960
315
           "xmlParseStringPEReference: no name\n");
7961
315
  *str = ptr;
7962
315
  return(NULL);
7963
315
    }
7964
3.77k
    cur = *ptr;
7965
3.77k
    if (cur != ';') {
7966
32
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7967
32
  xmlFree(name);
7968
32
  *str = ptr;
7969
32
  return(NULL);
7970
32
    }
7971
3.74k
    ptr++;
7972
7973
    /*
7974
     * Request the entity from SAX
7975
     */
7976
3.74k
    if ((ctxt->sax != NULL) &&
7977
3.74k
  (ctxt->sax->getParameterEntity != NULL))
7978
3.74k
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7979
3.74k
    if (ctxt->instate == XML_PARSER_EOF) {
7980
0
  xmlFree(name);
7981
0
  *str = ptr;
7982
0
  return(NULL);
7983
0
    }
7984
3.74k
    if (entity == NULL) {
7985
  /*
7986
   * [ WFC: Entity Declared ]
7987
   * In a document without any DTD, a document with only an
7988
   * internal DTD subset which contains no parameter entity
7989
   * references, or a document with "standalone='yes'", ...
7990
   * ... The declaration of a parameter entity must precede
7991
   * any reference to it...
7992
   */
7993
1.72k
  if ((ctxt->standalone == 1) ||
7994
1.72k
      ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
7995
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7996
0
     "PEReference: %%%s; not found\n", name);
7997
1.72k
  } else {
7998
      /*
7999
       * [ VC: Entity Declared ]
8000
       * In a document with an external subset or external
8001
       * parameter entities with "standalone='no'", ...
8002
       * ... The declaration of a parameter entity must
8003
       * precede any reference to it...
8004
       */
8005
1.72k
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8006
1.72k
        "PEReference: %%%s; not found\n",
8007
1.72k
        name, NULL);
8008
1.72k
      ctxt->valid = 0;
8009
1.72k
  }
8010
2.02k
    } else {
8011
  /*
8012
   * Internal checking in case the entity quest barfed
8013
   */
8014
2.02k
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8015
2.02k
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8016
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8017
0
        "%%%s; is not a parameter entity\n",
8018
0
        name, NULL);
8019
0
  }
8020
2.02k
    }
8021
3.74k
    ctxt->hasPErefs = 1;
8022
3.74k
    xmlFree(name);
8023
3.74k
    *str = ptr;
8024
3.74k
    return(entity);
8025
3.74k
}
8026
8027
/**
8028
 * xmlParseDocTypeDecl:
8029
 * @ctxt:  an XML parser context
8030
 *
8031
 * DEPRECATED: Internal function, don't use.
8032
 *
8033
 * parse a DOCTYPE declaration
8034
 *
8035
 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8036
 *                      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8037
 *
8038
 * [ VC: Root Element Type ]
8039
 * The Name in the document type declaration must match the element
8040
 * type of the root element.
8041
 */
8042
8043
void
8044
76.9k
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8045
76.9k
    const xmlChar *name = NULL;
8046
76.9k
    xmlChar *ExternalID = NULL;
8047
76.9k
    xmlChar *URI = NULL;
8048
8049
    /*
8050
     * We know that '<!DOCTYPE' has been detected.
8051
     */
8052
76.9k
    SKIP(9);
8053
8054
76.9k
    SKIP_BLANKS;
8055
8056
    /*
8057
     * Parse the DOCTYPE name.
8058
     */
8059
76.9k
    name = xmlParseName(ctxt);
8060
76.9k
    if (name == NULL) {
8061
3.14k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8062
3.14k
           "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8063
3.14k
    }
8064
76.9k
    ctxt->intSubName = name;
8065
8066
76.9k
    SKIP_BLANKS;
8067
8068
    /*
8069
     * Check for SystemID and ExternalID
8070
     */
8071
76.9k
    URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8072
8073
76.9k
    if ((URI != NULL) || (ExternalID != NULL)) {
8074
13.5k
        ctxt->hasExternalSubset = 1;
8075
13.5k
    }
8076
76.9k
    ctxt->extSubURI = URI;
8077
76.9k
    ctxt->extSubSystem = ExternalID;
8078
8079
76.9k
    SKIP_BLANKS;
8080
8081
    /*
8082
     * Create and update the internal subset.
8083
     */
8084
76.9k
    if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8085
76.9k
  (!ctxt->disableSAX))
8086
59.1k
  ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8087
76.9k
    if (ctxt->instate == XML_PARSER_EOF)
8088
762
  return;
8089
8090
    /*
8091
     * Is there any internal subset declarations ?
8092
     * they are handled separately in xmlParseInternalSubset()
8093
     */
8094
76.2k
    if (RAW == '[')
8095
61.6k
  return;
8096
8097
    /*
8098
     * We should be at the end of the DOCTYPE declaration.
8099
     */
8100
14.6k
    if (RAW != '>') {
8101
5.91k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8102
5.91k
    }
8103
14.6k
    NEXT;
8104
14.6k
}
8105
8106
/**
8107
 * xmlParseInternalSubset:
8108
 * @ctxt:  an XML parser context
8109
 *
8110
 * parse the internal subset declaration
8111
 *
8112
 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8113
 */
8114
8115
static void
8116
62.8k
xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8117
    /*
8118
     * Is there any DTD definition ?
8119
     */
8120
62.8k
    if (RAW == '[') {
8121
62.8k
        int baseInputNr = ctxt->inputNr;
8122
62.8k
        ctxt->instate = XML_PARSER_DTD;
8123
62.8k
        NEXT;
8124
  /*
8125
   * Parse the succession of Markup declarations and
8126
   * PEReferences.
8127
   * Subsequence (markupdecl | PEReference | S)*
8128
   */
8129
62.8k
  SKIP_BLANKS;
8130
323k
  while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) &&
8131
323k
               (ctxt->instate != XML_PARSER_EOF)) {
8132
8133
            /*
8134
             * Conditional sections are allowed from external entities included
8135
             * by PE References in the internal subset.
8136
             */
8137
297k
            if ((ctxt->inputNr > 1) && (ctxt->input->filename != NULL) &&
8138
297k
                (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8139
977
                xmlParseConditionalSections(ctxt);
8140
296k
            } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
8141
209k
          xmlParseMarkupDecl(ctxt);
8142
209k
            } else if (RAW == '%') {
8143
50.0k
          xmlParsePEReference(ctxt);
8144
50.0k
            } else {
8145
36.2k
    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8146
36.2k
                        "xmlParseInternalSubset: error detected in"
8147
36.2k
                        " Markup declaration\n");
8148
36.2k
                xmlHaltParser(ctxt);
8149
36.2k
                return;
8150
36.2k
            }
8151
260k
      SKIP_BLANKS;
8152
260k
            SHRINK;
8153
260k
            GROW;
8154
260k
  }
8155
26.5k
  if (RAW == ']') {
8156
16.1k
      NEXT;
8157
16.1k
      SKIP_BLANKS;
8158
16.1k
  }
8159
26.5k
    }
8160
8161
    /*
8162
     * We should be at the end of the DOCTYPE declaration.
8163
     */
8164
26.5k
    if (RAW != '>') {
8165
11.3k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8166
11.3k
  return;
8167
11.3k
    }
8168
15.2k
    NEXT;
8169
15.2k
}
8170
8171
#ifdef LIBXML_SAX1_ENABLED
8172
/**
8173
 * xmlParseAttribute:
8174
 * @ctxt:  an XML parser context
8175
 * @value:  a xmlChar ** used to store the value of the attribute
8176
 *
8177
 * DEPRECATED: Internal function, don't use.
8178
 *
8179
 * parse an attribute
8180
 *
8181
 * [41] Attribute ::= Name Eq AttValue
8182
 *
8183
 * [ WFC: No External Entity References ]
8184
 * Attribute values cannot contain direct or indirect entity references
8185
 * to external entities.
8186
 *
8187
 * [ WFC: No < in Attribute Values ]
8188
 * The replacement text of any entity referred to directly or indirectly in
8189
 * an attribute value (other than "&lt;") must not contain a <.
8190
 *
8191
 * [ VC: Attribute Value Type ]
8192
 * The attribute must have been declared; the value must be of the type
8193
 * declared for it.
8194
 *
8195
 * [25] Eq ::= S? '=' S?
8196
 *
8197
 * With namespace:
8198
 *
8199
 * [NS 11] Attribute ::= QName Eq AttValue
8200
 *
8201
 * Also the case QName == xmlns:??? is handled independently as a namespace
8202
 * definition.
8203
 *
8204
 * Returns the attribute name, and the value in *value.
8205
 */
8206
8207
const xmlChar *
8208
xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8209
    const xmlChar *name;
8210
    xmlChar *val;
8211
8212
    *value = NULL;
8213
    GROW;
8214
    name = xmlParseName(ctxt);
8215
    if (name == NULL) {
8216
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8217
                 "error parsing attribute name\n");
8218
        return(NULL);
8219
    }
8220
8221
    /*
8222
     * read the value
8223
     */
8224
    SKIP_BLANKS;
8225
    if (RAW == '=') {
8226
        NEXT;
8227
  SKIP_BLANKS;
8228
  val = xmlParseAttValue(ctxt);
8229
  ctxt->instate = XML_PARSER_CONTENT;
8230
    } else {
8231
  xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8232
         "Specification mandates value for attribute %s\n", name);
8233
  return(name);
8234
    }
8235
8236
    /*
8237
     * Check that xml:lang conforms to the specification
8238
     * No more registered as an error, just generate a warning now
8239
     * since this was deprecated in XML second edition
8240
     */
8241
    if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8242
  if (!xmlCheckLanguageID(val)) {
8243
      xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8244
              "Malformed value for xml:lang : %s\n",
8245
        val, NULL);
8246
  }
8247
    }
8248
8249
    /*
8250
     * Check that xml:space conforms to the specification
8251
     */
8252
    if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8253
  if (xmlStrEqual(val, BAD_CAST "default"))
8254
      *(ctxt->space) = 0;
8255
  else if (xmlStrEqual(val, BAD_CAST "preserve"))
8256
      *(ctxt->space) = 1;
8257
  else {
8258
    xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8259
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8260
                                 val, NULL);
8261
  }
8262
    }
8263
8264
    *value = val;
8265
    return(name);
8266
}
8267
8268
/**
8269
 * xmlParseStartTag:
8270
 * @ctxt:  an XML parser context
8271
 *
8272
 * DEPRECATED: Internal function, don't use.
8273
 *
8274
 * Parse a start tag. Always consumes '<'.
8275
 *
8276
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8277
 *
8278
 * [ WFC: Unique Att Spec ]
8279
 * No attribute name may appear more than once in the same start-tag or
8280
 * empty-element tag.
8281
 *
8282
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8283
 *
8284
 * [ WFC: Unique Att Spec ]
8285
 * No attribute name may appear more than once in the same start-tag or
8286
 * empty-element tag.
8287
 *
8288
 * With namespace:
8289
 *
8290
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8291
 *
8292
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8293
 *
8294
 * Returns the element name parsed
8295
 */
8296
8297
const xmlChar *
8298
xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8299
    const xmlChar *name;
8300
    const xmlChar *attname;
8301
    xmlChar *attvalue;
8302
    const xmlChar **atts = ctxt->atts;
8303
    int nbatts = 0;
8304
    int maxatts = ctxt->maxatts;
8305
    int i;
8306
8307
    if (RAW != '<') return(NULL);
8308
    NEXT1;
8309
8310
    name = xmlParseName(ctxt);
8311
    if (name == NULL) {
8312
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8313
       "xmlParseStartTag: invalid element name\n");
8314
        return(NULL);
8315
    }
8316
8317
    /*
8318
     * Now parse the attributes, it ends up with the ending
8319
     *
8320
     * (S Attribute)* S?
8321
     */
8322
    SKIP_BLANKS;
8323
    GROW;
8324
8325
    while (((RAW != '>') &&
8326
     ((RAW != '/') || (NXT(1) != '>')) &&
8327
     (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8328
  attname = xmlParseAttribute(ctxt, &attvalue);
8329
        if (attname == NULL) {
8330
      xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8331
         "xmlParseStartTag: problem parsing attributes\n");
8332
      break;
8333
  }
8334
        if (attvalue != NULL) {
8335
      /*
8336
       * [ WFC: Unique Att Spec ]
8337
       * No attribute name may appear more than once in the same
8338
       * start-tag or empty-element tag.
8339
       */
8340
      for (i = 0; i < nbatts;i += 2) {
8341
          if (xmlStrEqual(atts[i], attname)) {
8342
        xmlErrAttributeDup(ctxt, NULL, attname);
8343
        xmlFree(attvalue);
8344
        goto failed;
8345
    }
8346
      }
8347
      /*
8348
       * Add the pair to atts
8349
       */
8350
      if (atts == NULL) {
8351
          maxatts = 22; /* allow for 10 attrs by default */
8352
          atts = (const xmlChar **)
8353
           xmlMalloc(maxatts * sizeof(xmlChar *));
8354
    if (atts == NULL) {
8355
        xmlErrMemory(ctxt, NULL);
8356
        if (attvalue != NULL)
8357
      xmlFree(attvalue);
8358
        goto failed;
8359
    }
8360
    ctxt->atts = atts;
8361
    ctxt->maxatts = maxatts;
8362
      } else if (nbatts + 4 > maxatts) {
8363
          const xmlChar **n;
8364
8365
          maxatts *= 2;
8366
          n = (const xmlChar **) xmlRealloc((void *) atts,
8367
               maxatts * sizeof(const xmlChar *));
8368
    if (n == NULL) {
8369
        xmlErrMemory(ctxt, NULL);
8370
        if (attvalue != NULL)
8371
      xmlFree(attvalue);
8372
        goto failed;
8373
    }
8374
    atts = n;
8375
    ctxt->atts = atts;
8376
    ctxt->maxatts = maxatts;
8377
      }
8378
      atts[nbatts++] = attname;
8379
      atts[nbatts++] = attvalue;
8380
      atts[nbatts] = NULL;
8381
      atts[nbatts + 1] = NULL;
8382
  } else {
8383
      if (attvalue != NULL)
8384
    xmlFree(attvalue);
8385
  }
8386
8387
failed:
8388
8389
  GROW
8390
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8391
      break;
8392
  if (SKIP_BLANKS == 0) {
8393
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8394
         "attributes construct error\n");
8395
  }
8396
  SHRINK;
8397
        GROW;
8398
    }
8399
8400
    /*
8401
     * SAX: Start of Element !
8402
     */
8403
    if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8404
  (!ctxt->disableSAX)) {
8405
  if (nbatts > 0)
8406
      ctxt->sax->startElement(ctxt->userData, name, atts);
8407
  else
8408
      ctxt->sax->startElement(ctxt->userData, name, NULL);
8409
    }
8410
8411
    if (atts != NULL) {
8412
        /* Free only the content strings */
8413
        for (i = 1;i < nbatts;i+=2)
8414
      if (atts[i] != NULL)
8415
         xmlFree((xmlChar *) atts[i]);
8416
    }
8417
    return(name);
8418
}
8419
8420
/**
8421
 * xmlParseEndTag1:
8422
 * @ctxt:  an XML parser context
8423
 * @line:  line of the start tag
8424
 * @nsNr:  number of namespaces on the start tag
8425
 *
8426
 * Parse an end tag. Always consumes '</'.
8427
 *
8428
 * [42] ETag ::= '</' Name S? '>'
8429
 *
8430
 * With namespace
8431
 *
8432
 * [NS 9] ETag ::= '</' QName S? '>'
8433
 */
8434
8435
static void
8436
xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8437
    const xmlChar *name;
8438
8439
    GROW;
8440
    if ((RAW != '<') || (NXT(1) != '/')) {
8441
  xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8442
           "xmlParseEndTag: '</' not found\n");
8443
  return;
8444
    }
8445
    SKIP(2);
8446
8447
    name = xmlParseNameAndCompare(ctxt,ctxt->name);
8448
8449
    /*
8450
     * We should definitely be at the ending "S? '>'" part
8451
     */
8452
    GROW;
8453
    SKIP_BLANKS;
8454
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8455
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8456
    } else
8457
  NEXT1;
8458
8459
    /*
8460
     * [ WFC: Element Type Match ]
8461
     * The Name in an element's end-tag must match the element type in the
8462
     * start-tag.
8463
     *
8464
     */
8465
    if (name != (xmlChar*)1) {
8466
        if (name == NULL) name = BAD_CAST "unparsable";
8467
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8468
         "Opening and ending tag mismatch: %s line %d and %s\n",
8469
                    ctxt->name, line, name);
8470
    }
8471
8472
    /*
8473
     * SAX: End of Tag
8474
     */
8475
    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8476
  (!ctxt->disableSAX))
8477
        ctxt->sax->endElement(ctxt->userData, ctxt->name);
8478
8479
    namePop(ctxt);
8480
    spacePop(ctxt);
8481
    return;
8482
}
8483
8484
/**
8485
 * xmlParseEndTag:
8486
 * @ctxt:  an XML parser context
8487
 *
8488
 * DEPRECATED: Internal function, don't use.
8489
 *
8490
 * parse an end of tag
8491
 *
8492
 * [42] ETag ::= '</' Name S? '>'
8493
 *
8494
 * With namespace
8495
 *
8496
 * [NS 9] ETag ::= '</' QName S? '>'
8497
 */
8498
8499
void
8500
xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8501
    xmlParseEndTag1(ctxt, 0);
8502
}
8503
#endif /* LIBXML_SAX1_ENABLED */
8504
8505
/************************************************************************
8506
 *                  *
8507
 *          SAX 2 specific operations       *
8508
 *                  *
8509
 ************************************************************************/
8510
8511
/*
8512
 * xmlGetNamespace:
8513
 * @ctxt:  an XML parser context
8514
 * @prefix:  the prefix to lookup
8515
 *
8516
 * Lookup the namespace name for the @prefix (which ca be NULL)
8517
 * The prefix must come from the @ctxt->dict dictionary
8518
 *
8519
 * Returns the namespace name or NULL if not bound
8520
 */
8521
static const xmlChar *
8522
3.85M
xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8523
3.85M
    int i;
8524
8525
3.85M
    if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8526
4.76M
    for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8527
3.33M
        if (ctxt->nsTab[i] == prefix) {
8528
2.13M
      if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8529
872k
          return(NULL);
8530
1.26M
      return(ctxt->nsTab[i + 1]);
8531
2.13M
  }
8532
1.43M
    return(NULL);
8533
3.56M
}
8534
8535
/**
8536
 * xmlParseQName:
8537
 * @ctxt:  an XML parser context
8538
 * @prefix:  pointer to store the prefix part
8539
 *
8540
 * parse an XML Namespace QName
8541
 *
8542
 * [6]  QName  ::= (Prefix ':')? LocalPart
8543
 * [7]  Prefix  ::= NCName
8544
 * [8]  LocalPart  ::= NCName
8545
 *
8546
 * Returns the Name parsed or NULL
8547
 */
8548
8549
static const xmlChar *
8550
7.52M
xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8551
7.52M
    const xmlChar *l, *p;
8552
8553
7.52M
    GROW;
8554
7.52M
    if (ctxt->instate == XML_PARSER_EOF)
8555
251
        return(NULL);
8556
8557
7.52M
    l = xmlParseNCName(ctxt);
8558
7.52M
    if (l == NULL) {
8559
1.44M
        if (CUR == ':') {
8560
16.4k
      l = xmlParseName(ctxt);
8561
16.4k
      if (l != NULL) {
8562
16.2k
          xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8563
16.2k
             "Failed to parse QName '%s'\n", l, NULL, NULL);
8564
16.2k
    *prefix = NULL;
8565
16.2k
    return(l);
8566
16.2k
      }
8567
16.4k
  }
8568
1.43M
        return(NULL);
8569
1.44M
    }
8570
6.07M
    if (CUR == ':') {
8571
2.14M
        NEXT;
8572
2.14M
  p = l;
8573
2.14M
  l = xmlParseNCName(ctxt);
8574
2.14M
  if (l == NULL) {
8575
18.1k
      xmlChar *tmp;
8576
8577
18.1k
            if (ctxt->instate == XML_PARSER_EOF)
8578
467
                return(NULL);
8579
17.7k
            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8580
17.7k
               "Failed to parse QName '%s:'\n", p, NULL, NULL);
8581
17.7k
      l = xmlParseNmtoken(ctxt);
8582
17.7k
      if (l == NULL) {
8583
12.2k
                if (ctxt->instate == XML_PARSER_EOF)
8584
599
                    return(NULL);
8585
11.6k
    tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8586
11.6k
            } else {
8587
5.45k
    tmp = xmlBuildQName(l, p, NULL, 0);
8588
5.45k
    xmlFree((char *)l);
8589
5.45k
      }
8590
17.1k
      p = xmlDictLookup(ctxt->dict, tmp, -1);
8591
17.1k
      if (tmp != NULL) xmlFree(tmp);
8592
17.1k
      *prefix = NULL;
8593
17.1k
      return(p);
8594
17.7k
  }
8595
2.12M
  if (CUR == ':') {
8596
27.3k
      xmlChar *tmp;
8597
8598
27.3k
            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8599
27.3k
               "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8600
27.3k
      NEXT;
8601
27.3k
      tmp = (xmlChar *) xmlParseName(ctxt);
8602
27.3k
      if (tmp != NULL) {
8603
19.8k
          tmp = xmlBuildQName(tmp, l, NULL, 0);
8604
19.8k
    l = xmlDictLookup(ctxt->dict, tmp, -1);
8605
19.8k
    if (tmp != NULL) xmlFree(tmp);
8606
19.8k
    *prefix = p;
8607
19.8k
    return(l);
8608
19.8k
      }
8609
7.53k
            if (ctxt->instate == XML_PARSER_EOF)
8610
229
                return(NULL);
8611
7.30k
      tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8612
7.30k
      l = xmlDictLookup(ctxt->dict, tmp, -1);
8613
7.30k
      if (tmp != NULL) xmlFree(tmp);
8614
7.30k
      *prefix = p;
8615
7.30k
      return(l);
8616
7.53k
  }
8617
2.10M
  *prefix = p;
8618
2.10M
    } else
8619
3.92M
        *prefix = NULL;
8620
6.02M
    return(l);
8621
6.07M
}
8622
8623
/**
8624
 * xmlParseQNameAndCompare:
8625
 * @ctxt:  an XML parser context
8626
 * @name:  the localname
8627
 * @prefix:  the prefix, if any.
8628
 *
8629
 * parse an XML name and compares for match
8630
 * (specialized for endtag parsing)
8631
 *
8632
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8633
 * and the name for mismatch
8634
 */
8635
8636
static const xmlChar *
8637
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8638
448k
                        xmlChar const *prefix) {
8639
448k
    const xmlChar *cmp;
8640
448k
    const xmlChar *in;
8641
448k
    const xmlChar *ret;
8642
448k
    const xmlChar *prefix2;
8643
8644
448k
    if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8645
8646
448k
    GROW;
8647
448k
    in = ctxt->input->cur;
8648
8649
448k
    cmp = prefix;
8650
1.24M
    while (*in != 0 && *in == *cmp) {
8651
800k
  ++in;
8652
800k
  ++cmp;
8653
800k
    }
8654
448k
    if ((*cmp == 0) && (*in == ':')) {
8655
442k
        in++;
8656
442k
  cmp = name;
8657
4.14M
  while (*in != 0 && *in == *cmp) {
8658
3.70M
      ++in;
8659
3.70M
      ++cmp;
8660
3.70M
  }
8661
442k
  if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8662
      /* success */
8663
436k
            ctxt->input->col += in - ctxt->input->cur;
8664
436k
      ctxt->input->cur = in;
8665
436k
      return((const xmlChar*) 1);
8666
436k
  }
8667
442k
    }
8668
    /*
8669
     * all strings coms from the dictionary, equality can be done directly
8670
     */
8671
12.7k
    ret = xmlParseQName (ctxt, &prefix2);
8672
12.7k
    if ((ret == name) && (prefix == prefix2))
8673
477
  return((const xmlChar*) 1);
8674
12.2k
    return ret;
8675
12.7k
}
8676
8677
/**
8678
 * xmlParseAttValueInternal:
8679
 * @ctxt:  an XML parser context
8680
 * @len:  attribute len result
8681
 * @alloc:  whether the attribute was reallocated as a new string
8682
 * @normalize:  if 1 then further non-CDATA normalization must be done
8683
 *
8684
 * parse a value for an attribute.
8685
 * NOTE: if no normalization is needed, the routine will return pointers
8686
 *       directly from the data buffer.
8687
 *
8688
 * 3.3.3 Attribute-Value Normalization:
8689
 * Before the value of an attribute is passed to the application or
8690
 * checked for validity, the XML processor must normalize it as follows:
8691
 * - a character reference is processed by appending the referenced
8692
 *   character to the attribute value
8693
 * - an entity reference is processed by recursively processing the
8694
 *   replacement text of the entity
8695
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8696
 *   appending #x20 to the normalized value, except that only a single
8697
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
8698
 *   parsed entity or the literal entity value of an internal parsed entity
8699
 * - other characters are processed by appending them to the normalized value
8700
 * If the declared value is not CDATA, then the XML processor must further
8701
 * process the normalized attribute value by discarding any leading and
8702
 * trailing space (#x20) characters, and by replacing sequences of space
8703
 * (#x20) characters by a single space (#x20) character.
8704
 * All attributes for which no declaration has been read should be treated
8705
 * by a non-validating parser as if declared CDATA.
8706
 *
8707
 * Returns the AttValue parsed or NULL. The value has to be freed by the
8708
 *     caller if it was copied, this can be detected by val[*len] == 0.
8709
 */
8710
8711
#define GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) \
8712
1.65k
    const xmlChar *oldbase = ctxt->input->base;\
8713
1.65k
    GROW;\
8714
1.65k
    if (ctxt->instate == XML_PARSER_EOF)\
8715
1.65k
        return(NULL);\
8716
1.65k
    if (oldbase != ctxt->input->base) {\
8717
0
        ptrdiff_t delta = ctxt->input->base - oldbase;\
8718
0
        start = start + delta;\
8719
0
        in = in + delta;\
8720
0
    }\
8721
1.65k
    end = ctxt->input->end;
8722
8723
static xmlChar *
8724
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8725
                         int normalize)
8726
2.71M
{
8727
2.71M
    xmlChar limit = 0;
8728
2.71M
    const xmlChar *in = NULL, *start, *end, *last;
8729
2.71M
    xmlChar *ret = NULL;
8730
2.71M
    int line, col;
8731
2.71M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
8732
0
                    XML_MAX_HUGE_LENGTH :
8733
2.71M
                    XML_MAX_TEXT_LENGTH;
8734
8735
2.71M
    GROW;
8736
2.71M
    in = (xmlChar *) CUR_PTR;
8737
2.71M
    line = ctxt->input->line;
8738
2.71M
    col = ctxt->input->col;
8739
2.71M
    if (*in != '"' && *in != '\'') {
8740
10.8k
        xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
8741
10.8k
        return (NULL);
8742
10.8k
    }
8743
2.70M
    ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
8744
8745
    /*
8746
     * try to handle in this routine the most common case where no
8747
     * allocation of a new string is required and where content is
8748
     * pure ASCII.
8749
     */
8750
2.70M
    limit = *in++;
8751
2.70M
    col++;
8752
2.70M
    end = ctxt->input->end;
8753
2.70M
    start = in;
8754
2.70M
    if (in >= end) {
8755
265
        GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
8756
265
    }
8757
2.70M
    if (normalize) {
8758
        /*
8759
   * Skip any leading spaces
8760
   */
8761
23.8k
  while ((in < end) && (*in != limit) &&
8762
23.8k
         ((*in == 0x20) || (*in == 0x9) ||
8763
22.6k
          (*in == 0xA) || (*in == 0xD))) {
8764
15.0k
      if (*in == 0xA) {
8765
13.4k
          line++; col = 1;
8766
13.4k
      } else {
8767
1.60k
          col++;
8768
1.60k
      }
8769
15.0k
      in++;
8770
15.0k
      start = in;
8771
15.0k
      if (in >= end) {
8772
253
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
8773
253
                if ((in - start) > maxLength) {
8774
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8775
0
                                   "AttValue length too long\n");
8776
0
                    return(NULL);
8777
0
                }
8778
253
      }
8779
15.0k
  }
8780
48.2k
  while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8781
48.2k
         (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8782
40.3k
      col++;
8783
40.3k
      if ((*in++ == 0x20) && (*in == 0x20)) break;
8784
39.5k
      if (in >= end) {
8785
282
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
8786
282
                if ((in - start) > maxLength) {
8787
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8788
0
                                   "AttValue length too long\n");
8789
0
                    return(NULL);
8790
0
                }
8791
282
      }
8792
39.5k
  }
8793
8.74k
  last = in;
8794
  /*
8795
   * skip the trailing blanks
8796
   */
8797
9.60k
  while ((last[-1] == 0x20) && (last > start)) last--;
8798
18.5k
  while ((in < end) && (*in != limit) &&
8799
18.5k
         ((*in == 0x20) || (*in == 0x9) ||
8800
15.1k
          (*in == 0xA) || (*in == 0xD))) {
8801
9.82k
      if (*in == 0xA) {
8802
5.43k
          line++, col = 1;
8803
5.43k
      } else {
8804
4.39k
          col++;
8805
4.39k
      }
8806
9.82k
      in++;
8807
9.82k
      if (in >= end) {
8808
239
    const xmlChar *oldbase = ctxt->input->base;
8809
239
    GROW;
8810
239
                if (ctxt->instate == XML_PARSER_EOF)
8811
0
                    return(NULL);
8812
239
    if (oldbase != ctxt->input->base) {
8813
0
        ptrdiff_t delta = ctxt->input->base - oldbase;
8814
0
        start = start + delta;
8815
0
        in = in + delta;
8816
0
        last = last + delta;
8817
0
    }
8818
239
    end = ctxt->input->end;
8819
239
                if ((in - start) > maxLength) {
8820
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8821
0
                                   "AttValue length too long\n");
8822
0
                    return(NULL);
8823
0
                }
8824
239
      }
8825
9.82k
  }
8826
8.74k
        if ((in - start) > maxLength) {
8827
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8828
0
                           "AttValue length too long\n");
8829
0
            return(NULL);
8830
0
        }
8831
8.74k
  if (*in != limit) goto need_complex;
8832
2.69M
    } else {
8833
37.0M
  while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8834
37.0M
         (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8835
34.3M
      in++;
8836
34.3M
      col++;
8837
34.3M
      if (in >= end) {
8838
857
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
8839
857
                if ((in - start) > maxLength) {
8840
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8841
0
                                   "AttValue length too long\n");
8842
0
                    return(NULL);
8843
0
                }
8844
857
      }
8845
34.3M
  }
8846
2.69M
  last = in;
8847
2.69M
        if ((in - start) > maxLength) {
8848
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8849
0
                           "AttValue length too long\n");
8850
0
            return(NULL);
8851
0
        }
8852
2.69M
  if (*in != limit) goto need_complex;
8853
2.69M
    }
8854
2.26M
    in++;
8855
2.26M
    col++;
8856
2.26M
    if (len != NULL) {
8857
2.19M
        if (alloc) *alloc = 0;
8858
2.19M
        *len = last - start;
8859
2.19M
        ret = (xmlChar *) start;
8860
2.19M
    } else {
8861
68.4k
        if (alloc) *alloc = 1;
8862
68.4k
        ret = xmlStrndup(start, last - start);
8863
68.4k
    }
8864
2.26M
    CUR_PTR = in;
8865
2.26M
    ctxt->input->line = line;
8866
2.26M
    ctxt->input->col = col;
8867
2.26M
    return ret;
8868
446k
need_complex:
8869
446k
    if (alloc) *alloc = 1;
8870
446k
    return xmlParseAttValueComplex(ctxt, len, normalize);
8871
2.70M
}
8872
8873
/**
8874
 * xmlParseAttribute2:
8875
 * @ctxt:  an XML parser context
8876
 * @pref:  the element prefix
8877
 * @elem:  the element name
8878
 * @prefix:  a xmlChar ** used to store the value of the attribute prefix
8879
 * @value:  a xmlChar ** used to store the value of the attribute
8880
 * @len:  an int * to save the length of the attribute
8881
 * @alloc:  an int * to indicate if the attribute was allocated
8882
 *
8883
 * parse an attribute in the new SAX2 framework.
8884
 *
8885
 * Returns the attribute name, and the value in *value, .
8886
 */
8887
8888
static const xmlChar *
8889
xmlParseAttribute2(xmlParserCtxtPtr ctxt,
8890
                   const xmlChar * pref, const xmlChar * elem,
8891
                   const xmlChar ** prefix, xmlChar ** value,
8892
                   int *len, int *alloc)
8893
4.00M
{
8894
4.00M
    const xmlChar *name;
8895
4.00M
    xmlChar *val, *internal_val = NULL;
8896
4.00M
    int normalize = 0;
8897
8898
4.00M
    *value = NULL;
8899
4.00M
    GROW;
8900
4.00M
    name = xmlParseQName(ctxt, prefix);
8901
4.00M
    if (name == NULL) {
8902
1.34M
        xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8903
1.34M
                       "error parsing attribute name\n");
8904
1.34M
        return (NULL);
8905
1.34M
    }
8906
8907
    /*
8908
     * get the type if needed
8909
     */
8910
2.65M
    if (ctxt->attsSpecial != NULL) {
8911
18.5k
        int type;
8912
8913
18.5k
        type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
8914
18.5k
                                                 pref, elem, *prefix, name);
8915
18.5k
        if (type != 0)
8916
9.63k
            normalize = 1;
8917
18.5k
    }
8918
8919
    /*
8920
     * read the value
8921
     */
8922
2.65M
    SKIP_BLANKS;
8923
2.65M
    if (RAW == '=') {
8924
2.62M
        NEXT;
8925
2.62M
        SKIP_BLANKS;
8926
2.62M
        val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
8927
2.62M
        if (val == NULL)
8928
4.94k
            return (NULL);
8929
2.62M
  if (normalize) {
8930
      /*
8931
       * Sometimes a second normalisation pass for spaces is needed
8932
       * but that only happens if charrefs or entities references
8933
       * have been used in the attribute value, i.e. the attribute
8934
       * value have been extracted in an allocated string already.
8935
       */
8936
8.73k
      if (*alloc) {
8937
6.09k
          const xmlChar *val2;
8938
8939
6.09k
          val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
8940
6.09k
    if ((val2 != NULL) && (val2 != val)) {
8941
317
        xmlFree(val);
8942
317
        val = (xmlChar *) val2;
8943
317
    }
8944
6.09k
      }
8945
8.73k
  }
8946
2.62M
        ctxt->instate = XML_PARSER_CONTENT;
8947
2.62M
    } else {
8948
24.7k
        xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8949
24.7k
                          "Specification mandates value for attribute %s\n",
8950
24.7k
                          name);
8951
24.7k
        return (name);
8952
24.7k
    }
8953
8954
2.62M
    if (*prefix == ctxt->str_xml) {
8955
        /*
8956
         * Check that xml:lang conforms to the specification
8957
         * No more registered as an error, just generate a warning now
8958
         * since this was deprecated in XML second edition
8959
         */
8960
272k
        if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8961
0
            internal_val = xmlStrndup(val, *len);
8962
0
            if (!xmlCheckLanguageID(internal_val)) {
8963
0
                xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8964
0
                              "Malformed value for xml:lang : %s\n",
8965
0
                              internal_val, NULL);
8966
0
            }
8967
0
        }
8968
8969
        /*
8970
         * Check that xml:space conforms to the specification
8971
         */
8972
272k
        if (xmlStrEqual(name, BAD_CAST "space")) {
8973
1.65k
            internal_val = xmlStrndup(val, *len);
8974
1.65k
            if (xmlStrEqual(internal_val, BAD_CAST "default"))
8975
284
                *(ctxt->space) = 0;
8976
1.37k
            else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8977
729
                *(ctxt->space) = 1;
8978
643
            else {
8979
643
                xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8980
643
                              "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8981
643
                              internal_val, NULL);
8982
643
            }
8983
1.65k
        }
8984
272k
        if (internal_val) {
8985
1.63k
            xmlFree(internal_val);
8986
1.63k
        }
8987
272k
    }
8988
8989
2.62M
    *value = val;
8990
2.62M
    return (name);
8991
2.65M
}
8992
/**
8993
 * xmlParseStartTag2:
8994
 * @ctxt:  an XML parser context
8995
 *
8996
 * Parse a start tag. Always consumes '<'.
8997
 *
8998
 * This routine is called when running SAX2 parsing
8999
 *
9000
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9001
 *
9002
 * [ WFC: Unique Att Spec ]
9003
 * No attribute name may appear more than once in the same start-tag or
9004
 * empty-element tag.
9005
 *
9006
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9007
 *
9008
 * [ WFC: Unique Att Spec ]
9009
 * No attribute name may appear more than once in the same start-tag or
9010
 * empty-element tag.
9011
 *
9012
 * With namespace:
9013
 *
9014
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9015
 *
9016
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9017
 *
9018
 * Returns the element name parsed
9019
 */
9020
9021
static const xmlChar *
9022
xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9023
3.50M
                  const xmlChar **URI, int *tlen) {
9024
3.50M
    const xmlChar *localname;
9025
3.50M
    const xmlChar *prefix;
9026
3.50M
    const xmlChar *attname;
9027
3.50M
    const xmlChar *aprefix;
9028
3.50M
    const xmlChar *nsname;
9029
3.50M
    xmlChar *attvalue;
9030
3.50M
    const xmlChar **atts = ctxt->atts;
9031
3.50M
    int maxatts = ctxt->maxatts;
9032
3.50M
    int nratts, nbatts, nbdef, inputid;
9033
3.50M
    int i, j, nbNs, attval;
9034
3.50M
    size_t cur;
9035
3.50M
    int nsNr = ctxt->nsNr;
9036
9037
3.50M
    if (RAW != '<') return(NULL);
9038
3.50M
    NEXT1;
9039
9040
3.50M
    cur = ctxt->input->cur - ctxt->input->base;
9041
3.50M
    inputid = ctxt->input->id;
9042
3.50M
    nbatts = 0;
9043
3.50M
    nratts = 0;
9044
3.50M
    nbdef = 0;
9045
3.50M
    nbNs = 0;
9046
3.50M
    attval = 0;
9047
    /* Forget any namespaces added during an earlier parse of this element. */
9048
3.50M
    ctxt->nsNr = nsNr;
9049
9050
3.50M
    localname = xmlParseQName(ctxt, &prefix);
9051
3.50M
    if (localname == NULL) {
9052
84.8k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9053
84.8k
           "StartTag: invalid element name\n");
9054
84.8k
        return(NULL);
9055
84.8k
    }
9056
3.42M
    *tlen = ctxt->input->cur - ctxt->input->base - cur;
9057
9058
    /*
9059
     * Now parse the attributes, it ends up with the ending
9060
     *
9061
     * (S Attribute)* S?
9062
     */
9063
3.42M
    SKIP_BLANKS;
9064
3.42M
    GROW;
9065
9066
4.87M
    while (((RAW != '>') &&
9067
4.87M
     ((RAW != '/') || (NXT(1) != '>')) &&
9068
4.87M
     (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9069
4.00M
  int len = -1, alloc = 0;
9070
9071
4.00M
  attname = xmlParseAttribute2(ctxt, prefix, localname,
9072
4.00M
                               &aprefix, &attvalue, &len, &alloc);
9073
4.00M
        if (attname == NULL) {
9074
1.35M
      xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9075
1.35M
           "xmlParseStartTag: problem parsing attributes\n");
9076
1.35M
      break;
9077
1.35M
  }
9078
2.64M
        if (attvalue == NULL)
9079
24.7k
            goto next_attr;
9080
2.62M
  if (len < 0) len = xmlStrlen(attvalue);
9081
9082
2.62M
        if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9083
25.8k
            const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9084
25.8k
            xmlURIPtr uri;
9085
9086
25.8k
            if (URL == NULL) {
9087
4
                xmlErrMemory(ctxt, "dictionary allocation failure");
9088
4
                if ((attvalue != NULL) && (alloc != 0))
9089
1
                    xmlFree(attvalue);
9090
4
                localname = NULL;
9091
4
                goto done;
9092
4
            }
9093
25.8k
            if (*URL != 0) {
9094
24.6k
                uri = xmlParseURI((const char *) URL);
9095
24.6k
                if (uri == NULL) {
9096
12.3k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9097
12.3k
                             "xmlns: '%s' is not a valid URI\n",
9098
12.3k
                                       URL, NULL, NULL);
9099
12.3k
                } else {
9100
12.2k
                    if (uri->scheme == NULL) {
9101
7.18k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9102
7.18k
                                  "xmlns: URI %s is not absolute\n",
9103
7.18k
                                  URL, NULL, NULL);
9104
7.18k
                    }
9105
12.2k
                    xmlFreeURI(uri);
9106
12.2k
                }
9107
24.6k
                if (URL == ctxt->str_xml_ns) {
9108
203
                    if (attname != ctxt->str_xml) {
9109
203
                        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9110
203
                     "xml namespace URI cannot be the default namespace\n",
9111
203
                                 NULL, NULL, NULL);
9112
203
                    }
9113
203
                    goto next_attr;
9114
203
                }
9115
24.4k
                if ((len == 29) &&
9116
24.4k
                    (xmlStrEqual(URL,
9117
673
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9118
233
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9119
233
                         "reuse of the xmlns namespace name is forbidden\n",
9120
233
                             NULL, NULL, NULL);
9121
233
                    goto next_attr;
9122
233
                }
9123
24.4k
            }
9124
            /*
9125
             * check that it's not a defined namespace
9126
             */
9127
28.7k
            for (j = 1;j <= nbNs;j++)
9128
4.29k
                if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9129
926
                    break;
9130
25.3k
            if (j <= nbNs)
9131
926
                xmlErrAttributeDup(ctxt, NULL, attname);
9132
24.4k
            else
9133
24.4k
                if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9134
9135
2.59M
        } else if (aprefix == ctxt->str_xmlns) {
9136
347k
            const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9137
347k
            xmlURIPtr uri;
9138
9139
347k
            if (attname == ctxt->str_xml) {
9140
1.01k
                if (URL != ctxt->str_xml_ns) {
9141
942
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9142
942
                             "xml namespace prefix mapped to wrong URI\n",
9143
942
                             NULL, NULL, NULL);
9144
942
                }
9145
                /*
9146
                 * Do not keep a namespace definition node
9147
                 */
9148
1.01k
                goto next_attr;
9149
1.01k
            }
9150
346k
            if (URL == ctxt->str_xml_ns) {
9151
334
                if (attname != ctxt->str_xml) {
9152
334
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9153
334
                             "xml namespace URI mapped to wrong prefix\n",
9154
334
                             NULL, NULL, NULL);
9155
334
                }
9156
334
                goto next_attr;
9157
334
            }
9158
346k
            if (attname == ctxt->str_xmlns) {
9159
463
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9160
463
                         "redefinition of the xmlns prefix is forbidden\n",
9161
463
                         NULL, NULL, NULL);
9162
463
                goto next_attr;
9163
463
            }
9164
345k
            if ((len == 29) &&
9165
345k
                (xmlStrEqual(URL,
9166
1.68k
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9167
446
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9168
446
                         "reuse of the xmlns namespace name is forbidden\n",
9169
446
                         NULL, NULL, NULL);
9170
446
                goto next_attr;
9171
446
            }
9172
345k
            if ((URL == NULL) || (URL[0] == 0)) {
9173
1.83k
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9174
1.83k
                         "xmlns:%s: Empty XML namespace is not allowed\n",
9175
1.83k
                              attname, NULL, NULL);
9176
1.83k
                goto next_attr;
9177
343k
            } else {
9178
343k
                uri = xmlParseURI((const char *) URL);
9179
343k
                if (uri == NULL) {
9180
16.8k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9181
16.8k
                         "xmlns:%s: '%s' is not a valid URI\n",
9182
16.8k
                                       attname, URL, NULL);
9183
326k
                } else {
9184
326k
                    if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9185
0
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9186
0
                                  "xmlns:%s: URI %s is not absolute\n",
9187
0
                                  attname, URL, NULL);
9188
0
                    }
9189
326k
                    xmlFreeURI(uri);
9190
326k
                }
9191
343k
            }
9192
9193
            /*
9194
             * check that it's not a defined namespace
9195
             */
9196
416k
            for (j = 1;j <= nbNs;j++)
9197
75.6k
                if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9198
2.44k
                    break;
9199
343k
            if (j <= nbNs)
9200
2.44k
                xmlErrAttributeDup(ctxt, aprefix, attname);
9201
341k
            else
9202
341k
                if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9203
9204
2.25M
        } else {
9205
            /*
9206
             * Add the pair to atts
9207
             */
9208
2.25M
            if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9209
207k
                if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9210
49
                    goto next_attr;
9211
49
                }
9212
207k
                maxatts = ctxt->maxatts;
9213
207k
                atts = ctxt->atts;
9214
207k
            }
9215
2.25M
            ctxt->attallocs[nratts++] = alloc;
9216
2.25M
            atts[nbatts++] = attname;
9217
2.25M
            atts[nbatts++] = aprefix;
9218
            /*
9219
             * The namespace URI field is used temporarily to point at the
9220
             * base of the current input buffer for non-alloced attributes.
9221
             * When the input buffer is reallocated, all the pointers become
9222
             * invalid, but they can be reconstructed later.
9223
             */
9224
2.25M
            if (alloc)
9225
395k
                atts[nbatts++] = NULL;
9226
1.85M
            else
9227
1.85M
                atts[nbatts++] = ctxt->input->base;
9228
2.25M
            atts[nbatts++] = attvalue;
9229
2.25M
            attvalue += len;
9230
2.25M
            atts[nbatts++] = attvalue;
9231
            /*
9232
             * tag if some deallocation is needed
9233
             */
9234
2.25M
            if (alloc != 0) attval = 1;
9235
2.25M
            attvalue = NULL; /* moved into atts */
9236
2.25M
        }
9237
9238
2.64M
next_attr:
9239
2.64M
        if ((attvalue != NULL) && (alloc != 0)) {
9240
35.5k
            xmlFree(attvalue);
9241
35.5k
            attvalue = NULL;
9242
35.5k
        }
9243
9244
2.64M
  GROW
9245
2.64M
        if (ctxt->instate == XML_PARSER_EOF)
9246
584
            break;
9247
2.64M
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9248
1.13M
      break;
9249
1.51M
  if (SKIP_BLANKS == 0) {
9250
63.7k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9251
63.7k
         "attributes construct error\n");
9252
63.7k
      break;
9253
63.7k
  }
9254
1.45M
        GROW;
9255
1.45M
    }
9256
9257
3.42M
    if (ctxt->input->id != inputid) {
9258
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9259
0
                    "Unexpected change of input\n");
9260
0
        localname = NULL;
9261
0
        goto done;
9262
0
    }
9263
9264
    /* Reconstruct attribute value pointers. */
9265
5.67M
    for (i = 0, j = 0; j < nratts; i += 5, j++) {
9266
2.25M
        if (atts[i+2] != NULL) {
9267
            /*
9268
             * Arithmetic on dangling pointers is technically undefined
9269
             * behavior, but well...
9270
             */
9271
1.85M
            const xmlChar *old = atts[i+2];
9272
1.85M
            atts[i+2]  = NULL;    /* Reset repurposed namespace URI */
9273
1.85M
            atts[i+3] = ctxt->input->base + (atts[i+3] - old);  /* value */
9274
1.85M
            atts[i+4] = ctxt->input->base + (atts[i+4] - old);  /* valuend */
9275
1.85M
        }
9276
2.25M
    }
9277
9278
    /*
9279
     * The attributes defaulting
9280
     */
9281
3.42M
    if (ctxt->attsDefault != NULL) {
9282
34.3k
        xmlDefAttrsPtr defaults;
9283
9284
34.3k
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9285
34.3k
  if (defaults != NULL) {
9286
66.8k
      for (i = 0;i < defaults->nbAttrs;i++) {
9287
44.8k
          attname = defaults->values[5 * i];
9288
44.8k
    aprefix = defaults->values[5 * i + 1];
9289
9290
                /*
9291
     * special work for namespaces defaulted defs
9292
     */
9293
44.8k
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9294
        /*
9295
         * check that it's not a defined namespace
9296
         */
9297
5.87k
        for (j = 1;j <= nbNs;j++)
9298
2.79k
            if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9299
1.33k
          break;
9300
4.41k
              if (j <= nbNs) continue;
9301
9302
3.07k
        nsname = xmlGetNamespace(ctxt, NULL);
9303
3.07k
        if (nsname != defaults->values[5 * i + 2]) {
9304
2.16k
      if (nsPush(ctxt, NULL,
9305
2.16k
                 defaults->values[5 * i + 2]) > 0)
9306
2.16k
          nbNs++;
9307
2.16k
        }
9308
40.4k
    } else if (aprefix == ctxt->str_xmlns) {
9309
        /*
9310
         * check that it's not a defined namespace
9311
         */
9312
4.90k
        for (j = 1;j <= nbNs;j++)
9313
1.76k
            if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9314
359
          break;
9315
3.50k
              if (j <= nbNs) continue;
9316
9317
3.14k
        nsname = xmlGetNamespace(ctxt, attname);
9318
3.14k
        if (nsname != defaults->values[5 * i + 2]) {
9319
2.51k
      if (nsPush(ctxt, attname,
9320
2.51k
                 defaults->values[5 * i + 2]) > 0)
9321
2.51k
          nbNs++;
9322
2.51k
        }
9323
36.9k
    } else {
9324
        /*
9325
         * check that it's not a defined attribute
9326
         */
9327
97.6k
        for (j = 0;j < nbatts;j+=5) {
9328
66.9k
      if ((attname == atts[j]) && (aprefix == atts[j+1]))
9329
6.24k
          break;
9330
66.9k
        }
9331
36.9k
        if (j < nbatts) continue;
9332
9333
30.7k
        if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9334
1.29k
      if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9335
6
                            localname = NULL;
9336
6
                            goto done;
9337
6
      }
9338
1.29k
      maxatts = ctxt->maxatts;
9339
1.29k
      atts = ctxt->atts;
9340
1.29k
        }
9341
30.7k
        atts[nbatts++] = attname;
9342
30.7k
        atts[nbatts++] = aprefix;
9343
30.7k
        if (aprefix == NULL)
9344
9.25k
      atts[nbatts++] = NULL;
9345
21.4k
        else
9346
21.4k
            atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9347
30.7k
        atts[nbatts++] = defaults->values[5 * i + 2];
9348
30.7k
        atts[nbatts++] = defaults->values[5 * i + 3];
9349
30.7k
        if ((ctxt->standalone == 1) &&
9350
30.7k
            (defaults->values[5 * i + 4] != NULL)) {
9351
0
      xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9352
0
    "standalone: attribute %s on %s defaulted from external subset\n",
9353
0
                                   attname, localname);
9354
0
        }
9355
30.7k
        nbdef++;
9356
30.7k
    }
9357
44.8k
      }
9358
21.9k
  }
9359
34.3k
    }
9360
9361
    /*
9362
     * The attributes checkings
9363
     */
9364
5.70M
    for (i = 0; i < nbatts;i += 5) {
9365
        /*
9366
  * The default namespace does not apply to attribute names.
9367
  */
9368
2.28M
  if (atts[i + 1] != NULL) {
9369
400k
      nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9370
400k
      if (nsname == NULL) {
9371
121k
    xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9372
121k
        "Namespace prefix %s for %s on %s is not defined\n",
9373
121k
        atts[i + 1], atts[i], localname);
9374
121k
      }
9375
400k
      atts[i + 2] = nsname;
9376
400k
  } else
9377
1.87M
      nsname = NULL;
9378
  /*
9379
   * [ WFC: Unique Att Spec ]
9380
   * No attribute name may appear more than once in the same
9381
   * start-tag or empty-element tag.
9382
   * As extended by the Namespace in XML REC.
9383
   */
9384
3.75M
        for (j = 0; j < i;j += 5) {
9385
1.47M
      if (atts[i] == atts[j]) {
9386
9.59k
          if (atts[i+1] == atts[j+1]) {
9387
3.46k
        xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9388
3.46k
        break;
9389
3.46k
    }
9390
6.13k
    if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9391
627
        xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9392
627
           "Namespaced Attribute %s in '%s' redefined\n",
9393
627
           atts[i], nsname, NULL);
9394
627
        break;
9395
627
    }
9396
6.13k
      }
9397
1.47M
  }
9398
2.28M
    }
9399
9400
3.42M
    nsname = xmlGetNamespace(ctxt, prefix);
9401
3.42M
    if ((prefix != NULL) && (nsname == NULL)) {
9402
177k
  xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9403
177k
           "Namespace prefix %s on %s is not defined\n",
9404
177k
     prefix, localname, NULL);
9405
177k
    }
9406
3.42M
    *pref = prefix;
9407
3.42M
    *URI = nsname;
9408
9409
    /*
9410
     * SAX: Start of Element !
9411
     */
9412
3.42M
    if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9413
3.42M
  (!ctxt->disableSAX)) {
9414
3.02M
  if (nbNs > 0)
9415
252k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9416
252k
        nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9417
252k
        nbatts / 5, nbdef, atts);
9418
2.77M
  else
9419
2.77M
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9420
2.77M
                    nsname, 0, NULL, nbatts / 5, nbdef, atts);
9421
3.02M
    }
9422
9423
3.42M
done:
9424
    /*
9425
     * Free up attribute allocated strings if needed
9426
     */
9427
3.42M
    if (attval != 0) {
9428
1.07M
  for (i = 3,j = 0; j < nratts;i += 5,j++)
9429
741k
      if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9430
395k
          xmlFree((xmlChar *) atts[i]);
9431
328k
    }
9432
9433
3.42M
    return(localname);
9434
3.42M
}
9435
9436
/**
9437
 * xmlParseEndTag2:
9438
 * @ctxt:  an XML parser context
9439
 * @line:  line of the start tag
9440
 * @nsNr:  number of namespaces on the start tag
9441
 *
9442
 * Parse an end tag. Always consumes '</'.
9443
 *
9444
 * [42] ETag ::= '</' Name S? '>'
9445
 *
9446
 * With namespace
9447
 *
9448
 * [NS 9] ETag ::= '</' QName S? '>'
9449
 */
9450
9451
static void
9452
611k
xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9453
611k
    const xmlChar *name;
9454
9455
611k
    GROW;
9456
611k
    if ((RAW != '<') || (NXT(1) != '/')) {
9457
229
  xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9458
229
  return;
9459
229
    }
9460
610k
    SKIP(2);
9461
9462
610k
    if (tag->prefix == NULL)
9463
161k
        name = xmlParseNameAndCompare(ctxt, ctxt->name);
9464
448k
    else
9465
448k
        name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9466
9467
    /*
9468
     * We should definitely be at the ending "S? '>'" part
9469
     */
9470
610k
    GROW;
9471
610k
    if (ctxt->instate == XML_PARSER_EOF)
9472
486
        return;
9473
610k
    SKIP_BLANKS;
9474
610k
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9475
10.8k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9476
10.8k
    } else
9477
599k
  NEXT1;
9478
9479
    /*
9480
     * [ WFC: Element Type Match ]
9481
     * The Name in an element's end-tag must match the element type in the
9482
     * start-tag.
9483
     *
9484
     */
9485
610k
    if (name != (xmlChar*)1) {
9486
22.1k
        if (name == NULL) name = BAD_CAST "unparsable";
9487
22.1k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9488
22.1k
         "Opening and ending tag mismatch: %s line %d and %s\n",
9489
22.1k
                    ctxt->name, tag->line, name);
9490
22.1k
    }
9491
9492
    /*
9493
     * SAX: End of Tag
9494
     */
9495
610k
    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9496
610k
  (!ctxt->disableSAX))
9497
549k
  ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9498
549k
                                tag->URI);
9499
9500
610k
    spacePop(ctxt);
9501
610k
    if (tag->nsNr != 0)
9502
177k
  nsPop(ctxt, tag->nsNr);
9503
610k
}
9504
9505
/**
9506
 * xmlParseCDSect:
9507
 * @ctxt:  an XML parser context
9508
 *
9509
 * DEPRECATED: Internal function, don't use.
9510
 *
9511
 * Parse escaped pure raw content. Always consumes '<!['.
9512
 *
9513
 * [18] CDSect ::= CDStart CData CDEnd
9514
 *
9515
 * [19] CDStart ::= '<![CDATA['
9516
 *
9517
 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9518
 *
9519
 * [21] CDEnd ::= ']]>'
9520
 */
9521
void
9522
18.1k
xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9523
18.1k
    xmlChar *buf = NULL;
9524
18.1k
    int len = 0;
9525
18.1k
    int size = XML_PARSER_BUFFER_SIZE;
9526
18.1k
    int r, rl;
9527
18.1k
    int s, sl;
9528
18.1k
    int cur, l;
9529
18.1k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9530
0
                    XML_MAX_HUGE_LENGTH :
9531
18.1k
                    XML_MAX_TEXT_LENGTH;
9532
9533
18.1k
    if ((CUR != '<') || (NXT(1) != '!') || (NXT(2) != '['))
9534
0
        return;
9535
18.1k
    SKIP(3);
9536
9537
18.1k
    if (!CMP6(CUR_PTR, 'C', 'D', 'A', 'T', 'A', '['))
9538
0
        return;
9539
18.1k
    SKIP(6);
9540
9541
18.1k
    ctxt->instate = XML_PARSER_CDATA_SECTION;
9542
18.1k
    r = CUR_CHAR(rl);
9543
18.1k
    if (!IS_CHAR(r)) {
9544
1.39k
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9545
1.39k
        goto out;
9546
1.39k
    }
9547
16.7k
    NEXTL(rl);
9548
16.7k
    s = CUR_CHAR(sl);
9549
16.7k
    if (!IS_CHAR(s)) {
9550
2.22k
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9551
2.22k
        goto out;
9552
2.22k
    }
9553
14.5k
    NEXTL(sl);
9554
14.5k
    cur = CUR_CHAR(l);
9555
14.5k
    buf = (xmlChar *) xmlMallocAtomic(size);
9556
14.5k
    if (buf == NULL) {
9557
13
  xmlErrMemory(ctxt, NULL);
9558
13
        goto out;
9559
13
    }
9560
39.4M
    while (IS_CHAR(cur) &&
9561
39.4M
           ((r != ']') || (s != ']') || (cur != '>'))) {
9562
39.4M
  if (len + 5 >= size) {
9563
3.91k
      xmlChar *tmp;
9564
9565
3.91k
      tmp = (xmlChar *) xmlRealloc(buf, size * 2);
9566
3.91k
      if (tmp == NULL) {
9567
1
    xmlErrMemory(ctxt, NULL);
9568
1
                goto out;
9569
1
      }
9570
3.91k
      buf = tmp;
9571
3.91k
      size *= 2;
9572
3.91k
  }
9573
39.4M
  COPY_BUF(rl,buf,len,r);
9574
39.4M
        if (len > maxLength) {
9575
0
            xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9576
0
                           "CData section too big found\n");
9577
0
            goto out;
9578
0
        }
9579
39.4M
  r = s;
9580
39.4M
  rl = sl;
9581
39.4M
  s = cur;
9582
39.4M
  sl = l;
9583
39.4M
  NEXTL(l);
9584
39.4M
  cur = CUR_CHAR(l);
9585
39.4M
    }
9586
14.5k
    buf[len] = 0;
9587
14.5k
    if (ctxt->instate == XML_PARSER_EOF) {
9588
288
        xmlFree(buf);
9589
288
        return;
9590
288
    }
9591
14.2k
    if (cur != '>') {
9592
4.85k
  xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9593
4.85k
                       "CData section not finished\n%.50s\n", buf);
9594
4.85k
        goto out;
9595
4.85k
    }
9596
9.41k
    NEXTL(l);
9597
9598
    /*
9599
     * OK the buffer is to be consumed as cdata.
9600
     */
9601
9.41k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9602
8.03k
  if (ctxt->sax->cdataBlock != NULL)
9603
927
      ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9604
7.11k
  else if (ctxt->sax->characters != NULL)
9605
7.11k
      ctxt->sax->characters(ctxt->userData, buf, len);
9606
8.03k
    }
9607
9608
17.8k
out:
9609
17.8k
    if (ctxt->instate != XML_PARSER_EOF)
9610
17.6k
        ctxt->instate = XML_PARSER_CONTENT;
9611
17.8k
    xmlFree(buf);
9612
17.8k
}
9613
9614
/**
9615
 * xmlParseContentInternal:
9616
 * @ctxt:  an XML parser context
9617
 *
9618
 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9619
 * unexpected EOF to the caller.
9620
 */
9621
9622
static void
9623
232k
xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9624
232k
    int nameNr = ctxt->nameNr;
9625
9626
232k
    GROW;
9627
6.68M
    while ((RAW != 0) &&
9628
6.68M
     (ctxt->instate != XML_PARSER_EOF)) {
9629
6.64M
  const xmlChar *cur = ctxt->input->cur;
9630
9631
  /*
9632
   * First case : a Processing Instruction.
9633
   */
9634
6.64M
  if ((*cur == '<') && (cur[1] == '?')) {
9635
63.1k
      xmlParsePI(ctxt);
9636
63.1k
  }
9637
9638
  /*
9639
   * Second case : a CDSection
9640
   */
9641
  /* 2.6.0 test was *cur not RAW */
9642
6.57M
  else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9643
18.1k
      xmlParseCDSect(ctxt);
9644
18.1k
  }
9645
9646
  /*
9647
   * Third case :  a comment
9648
   */
9649
6.56M
  else if ((*cur == '<') && (NXT(1) == '!') &&
9650
6.56M
     (NXT(2) == '-') && (NXT(3) == '-')) {
9651
124k
      xmlParseComment(ctxt);
9652
124k
      ctxt->instate = XML_PARSER_CONTENT;
9653
124k
  }
9654
9655
  /*
9656
   * Fourth case :  a sub-element.
9657
   */
9658
6.43M
  else if (*cur == '<') {
9659
3.86M
            if (NXT(1) == '/') {
9660
611k
                if (ctxt->nameNr <= nameNr)
9661
191k
                    break;
9662
419k
          xmlParseElementEnd(ctxt);
9663
3.25M
            } else {
9664
3.25M
          xmlParseElementStart(ctxt);
9665
3.25M
            }
9666
3.86M
  }
9667
9668
  /*
9669
   * Fifth case : a reference. If if has not been resolved,
9670
   *    parsing returns it's Name, create the node
9671
   */
9672
9673
2.56M
  else if (*cur == '&') {
9674
201k
      xmlParseReference(ctxt);
9675
201k
  }
9676
9677
  /*
9678
   * Last case, text. Note that References are handled directly.
9679
   */
9680
2.36M
  else {
9681
2.36M
      xmlParseCharDataInternal(ctxt, 0);
9682
2.36M
  }
9683
9684
6.45M
  SHRINK;
9685
6.45M
  GROW;
9686
6.45M
    }
9687
232k
}
9688
9689
/**
9690
 * xmlParseContent:
9691
 * @ctxt:  an XML parser context
9692
 *
9693
 * Parse a content sequence. Stops at EOF or '</'.
9694
 *
9695
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9696
 */
9697
9698
void
9699
8.89k
xmlParseContent(xmlParserCtxtPtr ctxt) {
9700
8.89k
    int nameNr = ctxt->nameNr;
9701
9702
8.89k
    xmlParseContentInternal(ctxt);
9703
9704
8.89k
    if ((ctxt->instate != XML_PARSER_EOF) && (ctxt->nameNr > nameNr)) {
9705
2.08k
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9706
2.08k
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9707
2.08k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9708
2.08k
                "Premature end of data in tag %s line %d\n",
9709
2.08k
    name, line, NULL);
9710
2.08k
    }
9711
8.89k
}
9712
9713
/**
9714
 * xmlParseElement:
9715
 * @ctxt:  an XML parser context
9716
 *
9717
 * DEPRECATED: Internal function, don't use.
9718
 *
9719
 * parse an XML element
9720
 *
9721
 * [39] element ::= EmptyElemTag | STag content ETag
9722
 *
9723
 * [ WFC: Element Type Match ]
9724
 * The Name in an element's end-tag must match the element type in the
9725
 * start-tag.
9726
 *
9727
 */
9728
9729
void
9730
249k
xmlParseElement(xmlParserCtxtPtr ctxt) {
9731
249k
    if (xmlParseElementStart(ctxt) != 0)
9732
25.6k
        return;
9733
9734
223k
    xmlParseContentInternal(ctxt);
9735
223k
    if (ctxt->instate == XML_PARSER_EOF)
9736
7.17k
  return;
9737
9738
216k
    if (CUR == 0) {
9739
24.9k
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9740
24.9k
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9741
24.9k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9742
24.9k
                "Premature end of data in tag %s line %d\n",
9743
24.9k
    name, line, NULL);
9744
24.9k
        return;
9745
24.9k
    }
9746
9747
191k
    xmlParseElementEnd(ctxt);
9748
191k
}
9749
9750
/**
9751
 * xmlParseElementStart:
9752
 * @ctxt:  an XML parser context
9753
 *
9754
 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
9755
 * opening tag was parsed, 1 if an empty element was parsed.
9756
 *
9757
 * Always consumes '<'.
9758
 */
9759
static int
9760
3.50M
xmlParseElementStart(xmlParserCtxtPtr ctxt) {
9761
3.50M
    const xmlChar *name;
9762
3.50M
    const xmlChar *prefix = NULL;
9763
3.50M
    const xmlChar *URI = NULL;
9764
3.50M
    xmlParserNodeInfo node_info;
9765
3.50M
    int line, tlen = 0;
9766
3.50M
    xmlNodePtr cur;
9767
3.50M
    int nsNr = ctxt->nsNr;
9768
9769
3.50M
    if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9770
3.50M
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9771
425
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9772
425
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9773
425
        xmlParserMaxDepth);
9774
425
  xmlHaltParser(ctxt);
9775
425
  return(-1);
9776
425
    }
9777
9778
    /* Capture start position */
9779
3.50M
    if (ctxt->record_info) {
9780
0
        node_info.begin_pos = ctxt->input->consumed +
9781
0
                          (CUR_PTR - ctxt->input->base);
9782
0
  node_info.begin_line = ctxt->input->line;
9783
0
    }
9784
9785
3.50M
    if (ctxt->spaceNr == 0)
9786
0
  spacePush(ctxt, -1);
9787
3.50M
    else if (*ctxt->space == -2)
9788
0
  spacePush(ctxt, -1);
9789
3.50M
    else
9790
3.50M
  spacePush(ctxt, *ctxt->space);
9791
9792
3.50M
    line = ctxt->input->line;
9793
#ifdef LIBXML_SAX1_ENABLED
9794
    if (ctxt->sax2)
9795
#endif /* LIBXML_SAX1_ENABLED */
9796
3.50M
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
9797
#ifdef LIBXML_SAX1_ENABLED
9798
    else
9799
  name = xmlParseStartTag(ctxt);
9800
#endif /* LIBXML_SAX1_ENABLED */
9801
3.50M
    if (ctxt->instate == XML_PARSER_EOF)
9802
4.11k
  return(-1);
9803
3.50M
    if (name == NULL) {
9804
84.0k
  spacePop(ctxt);
9805
84.0k
        return(-1);
9806
84.0k
    }
9807
3.41M
    nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
9808
3.41M
    cur = ctxt->node;
9809
9810
#ifdef LIBXML_VALID_ENABLED
9811
    /*
9812
     * [ VC: Root Element Type ]
9813
     * The Name in the document type declaration must match the element
9814
     * type of the root element.
9815
     */
9816
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9817
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
9818
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9819
#endif /* LIBXML_VALID_ENABLED */
9820
9821
    /*
9822
     * Check for an Empty Element.
9823
     */
9824
3.41M
    if ((RAW == '/') && (NXT(1) == '>')) {
9825
1.12M
        SKIP(2);
9826
1.12M
  if (ctxt->sax2) {
9827
1.12M
      if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9828
1.12M
    (!ctxt->disableSAX))
9829
1.00M
    ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
9830
#ifdef LIBXML_SAX1_ENABLED
9831
  } else {
9832
      if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9833
    (!ctxt->disableSAX))
9834
    ctxt->sax->endElement(ctxt->userData, name);
9835
#endif /* LIBXML_SAX1_ENABLED */
9836
1.12M
  }
9837
1.12M
  namePop(ctxt);
9838
1.12M
  spacePop(ctxt);
9839
1.12M
  if (nsNr != ctxt->nsNr)
9840
78.9k
      nsPop(ctxt, ctxt->nsNr - nsNr);
9841
1.12M
  if (cur != NULL && ctxt->record_info) {
9842
0
            node_info.node = cur;
9843
0
            node_info.end_pos = ctxt->input->consumed +
9844
0
                                (CUR_PTR - ctxt->input->base);
9845
0
            node_info.end_line = ctxt->input->line;
9846
0
            xmlParserAddNodeInfo(ctxt, &node_info);
9847
0
  }
9848
1.12M
  return(1);
9849
1.12M
    }
9850
2.29M
    if (RAW == '>') {
9851
871k
        NEXT1;
9852
871k
        if (cur != NULL && ctxt->record_info) {
9853
0
            node_info.node = cur;
9854
0
            node_info.end_pos = 0;
9855
0
            node_info.end_line = 0;
9856
0
            xmlParserAddNodeInfo(ctxt, &node_info);
9857
0
        }
9858
1.42M
    } else {
9859
1.42M
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9860
1.42M
         "Couldn't find end of Start Tag %s line %d\n",
9861
1.42M
                    name, line, NULL);
9862
9863
  /*
9864
   * end of parsing of this node.
9865
   */
9866
1.42M
  nodePop(ctxt);
9867
1.42M
  namePop(ctxt);
9868
1.42M
  spacePop(ctxt);
9869
1.42M
  if (nsNr != ctxt->nsNr)
9870
25.5k
      nsPop(ctxt, ctxt->nsNr - nsNr);
9871
1.42M
  return(-1);
9872
1.42M
    }
9873
9874
871k
    return(0);
9875
2.29M
}
9876
9877
/**
9878
 * xmlParseElementEnd:
9879
 * @ctxt:  an XML parser context
9880
 *
9881
 * Parse the end of an XML element. Always consumes '</'.
9882
 */
9883
static void
9884
611k
xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
9885
611k
    xmlNodePtr cur = ctxt->node;
9886
9887
611k
    if (ctxt->nameNr <= 0) {
9888
0
        if ((RAW == '<') && (NXT(1) == '/'))
9889
0
            SKIP(2);
9890
0
        return;
9891
0
    }
9892
9893
    /*
9894
     * parse the end of tag: '</' should be here.
9895
     */
9896
611k
    if (ctxt->sax2) {
9897
611k
  xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
9898
611k
  namePop(ctxt);
9899
611k
    }
9900
#ifdef LIBXML_SAX1_ENABLED
9901
    else
9902
  xmlParseEndTag1(ctxt, 0);
9903
#endif /* LIBXML_SAX1_ENABLED */
9904
9905
    /*
9906
     * Capture end position
9907
     */
9908
611k
    if (cur != NULL && ctxt->record_info) {
9909
0
        xmlParserNodeInfoPtr node_info;
9910
9911
0
        node_info = (xmlParserNodeInfoPtr) xmlParserFindNodeInfo(ctxt, cur);
9912
0
        if (node_info != NULL) {
9913
0
            node_info->end_pos = ctxt->input->consumed +
9914
0
                                 (CUR_PTR - ctxt->input->base);
9915
0
            node_info->end_line = ctxt->input->line;
9916
0
        }
9917
0
    }
9918
611k
}
9919
9920
/**
9921
 * xmlParseVersionNum:
9922
 * @ctxt:  an XML parser context
9923
 *
9924
 * DEPRECATED: Internal function, don't use.
9925
 *
9926
 * parse the XML version value.
9927
 *
9928
 * [26] VersionNum ::= '1.' [0-9]+
9929
 *
9930
 * In practice allow [0-9].[0-9]+ at that level
9931
 *
9932
 * Returns the string giving the XML version number, or NULL
9933
 */
9934
xmlChar *
9935
193k
xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
9936
193k
    xmlChar *buf = NULL;
9937
193k
    int len = 0;
9938
193k
    int size = 10;
9939
193k
    xmlChar cur;
9940
9941
193k
    buf = (xmlChar *) xmlMallocAtomic(size);
9942
193k
    if (buf == NULL) {
9943
40
  xmlErrMemory(ctxt, NULL);
9944
40
  return(NULL);
9945
40
    }
9946
193k
    cur = CUR;
9947
193k
    if (!((cur >= '0') && (cur <= '9'))) {
9948
1.49k
  xmlFree(buf);
9949
1.49k
  return(NULL);
9950
1.49k
    }
9951
191k
    buf[len++] = cur;
9952
191k
    NEXT;
9953
191k
    cur=CUR;
9954
191k
    if (cur != '.') {
9955
541
  xmlFree(buf);
9956
541
  return(NULL);
9957
541
    }
9958
191k
    buf[len++] = cur;
9959
191k
    NEXT;
9960
191k
    cur=CUR;
9961
1.05M
    while ((cur >= '0') && (cur <= '9')) {
9962
865k
  if (len + 1 >= size) {
9963
2.06k
      xmlChar *tmp;
9964
9965
2.06k
      size *= 2;
9966
2.06k
      tmp = (xmlChar *) xmlRealloc(buf, size);
9967
2.06k
      if (tmp == NULL) {
9968
1
          xmlFree(buf);
9969
1
    xmlErrMemory(ctxt, NULL);
9970
1
    return(NULL);
9971
1
      }
9972
2.05k
      buf = tmp;
9973
2.05k
  }
9974
865k
  buf[len++] = cur;
9975
865k
  NEXT;
9976
865k
  cur=CUR;
9977
865k
    }
9978
191k
    buf[len] = 0;
9979
191k
    return(buf);
9980
191k
}
9981
9982
/**
9983
 * xmlParseVersionInfo:
9984
 * @ctxt:  an XML parser context
9985
 *
9986
 * DEPRECATED: Internal function, don't use.
9987
 *
9988
 * parse the XML version.
9989
 *
9990
 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
9991
 *
9992
 * [25] Eq ::= S? '=' S?
9993
 *
9994
 * Returns the version string, e.g. "1.0"
9995
 */
9996
9997
xmlChar *
9998
210k
xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
9999
210k
    xmlChar *version = NULL;
10000
10001
210k
    if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10002
197k
  SKIP(7);
10003
197k
  SKIP_BLANKS;
10004
197k
  if (RAW != '=') {
10005
2.91k
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10006
2.91k
      return(NULL);
10007
2.91k
        }
10008
194k
  NEXT;
10009
194k
  SKIP_BLANKS;
10010
194k
  if (RAW == '"') {
10011
189k
      NEXT;
10012
189k
      version = xmlParseVersionNum(ctxt);
10013
189k
      if (RAW != '"') {
10014
3.62k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10015
3.62k
      } else
10016
185k
          NEXT;
10017
189k
  } else if (RAW == '\''){
10018
3.76k
      NEXT;
10019
3.76k
      version = xmlParseVersionNum(ctxt);
10020
3.76k
      if (RAW != '\'') {
10021
1.35k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10022
1.35k
      } else
10023
2.41k
          NEXT;
10024
3.76k
  } else {
10025
806
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10026
806
  }
10027
194k
    }
10028
207k
    return(version);
10029
210k
}
10030
10031
/**
10032
 * xmlParseEncName:
10033
 * @ctxt:  an XML parser context
10034
 *
10035
 * DEPRECATED: Internal function, don't use.
10036
 *
10037
 * parse the XML encoding name
10038
 *
10039
 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10040
 *
10041
 * Returns the encoding name value or NULL
10042
 */
10043
xmlChar *
10044
181k
xmlParseEncName(xmlParserCtxtPtr ctxt) {
10045
181k
    xmlChar *buf = NULL;
10046
181k
    int len = 0;
10047
181k
    int size = 10;
10048
181k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
10049
0
                    XML_MAX_TEXT_LENGTH :
10050
181k
                    XML_MAX_NAME_LENGTH;
10051
181k
    xmlChar cur;
10052
10053
181k
    cur = CUR;
10054
181k
    if (((cur >= 'a') && (cur <= 'z')) ||
10055
181k
        ((cur >= 'A') && (cur <= 'Z'))) {
10056
181k
  buf = (xmlChar *) xmlMallocAtomic(size);
10057
181k
  if (buf == NULL) {
10058
116
      xmlErrMemory(ctxt, NULL);
10059
116
      return(NULL);
10060
116
  }
10061
10062
180k
  buf[len++] = cur;
10063
180k
  NEXT;
10064
180k
  cur = CUR;
10065
3.09M
  while (((cur >= 'a') && (cur <= 'z')) ||
10066
3.09M
         ((cur >= 'A') && (cur <= 'Z')) ||
10067
3.09M
         ((cur >= '0') && (cur <= '9')) ||
10068
3.09M
         (cur == '.') || (cur == '_') ||
10069
3.09M
         (cur == '-')) {
10070
2.91M
      if (len + 1 >= size) {
10071
25.9k
          xmlChar *tmp;
10072
10073
25.9k
    size *= 2;
10074
25.9k
    tmp = (xmlChar *) xmlRealloc(buf, size);
10075
25.9k
    if (tmp == NULL) {
10076
2
        xmlErrMemory(ctxt, NULL);
10077
2
        xmlFree(buf);
10078
2
        return(NULL);
10079
2
    }
10080
25.9k
    buf = tmp;
10081
25.9k
      }
10082
2.91M
      buf[len++] = cur;
10083
2.91M
            if (len > maxLength) {
10084
35
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "EncName");
10085
35
                xmlFree(buf);
10086
35
                return(NULL);
10087
35
            }
10088
2.91M
      NEXT;
10089
2.91M
      cur = CUR;
10090
2.91M
        }
10091
180k
  buf[len] = 0;
10092
180k
    } else {
10093
652
  xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10094
652
    }
10095
181k
    return(buf);
10096
181k
}
10097
10098
/**
10099
 * xmlParseEncodingDecl:
10100
 * @ctxt:  an XML parser context
10101
 *
10102
 * DEPRECATED: Internal function, don't use.
10103
 *
10104
 * parse the XML encoding declaration
10105
 *
10106
 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'")
10107
 *
10108
 * this setups the conversion filters.
10109
 *
10110
 * Returns the encoding value or NULL
10111
 */
10112
10113
const xmlChar *
10114
200k
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10115
200k
    xmlChar *encoding = NULL;
10116
10117
200k
    SKIP_BLANKS;
10118
200k
    if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10119
183k
  SKIP(8);
10120
183k
  SKIP_BLANKS;
10121
183k
  if (RAW != '=') {
10122
829
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10123
829
      return(NULL);
10124
829
        }
10125
182k
  NEXT;
10126
182k
  SKIP_BLANKS;
10127
182k
  if (RAW == '"') {
10128
177k
      NEXT;
10129
177k
      encoding = xmlParseEncName(ctxt);
10130
177k
      if (RAW != '"') {
10131
1.91k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10132
1.91k
    xmlFree((xmlChar *) encoding);
10133
1.91k
    return(NULL);
10134
1.91k
      } else
10135
175k
          NEXT;
10136
177k
  } else if (RAW == '\''){
10137
3.98k
      NEXT;
10138
3.98k
      encoding = xmlParseEncName(ctxt);
10139
3.98k
      if (RAW != '\'') {
10140
262
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10141
262
    xmlFree((xmlChar *) encoding);
10142
262
    return(NULL);
10143
262
      } else
10144
3.72k
          NEXT;
10145
3.98k
  } else {
10146
549
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10147
549
  }
10148
10149
        /*
10150
         * Non standard parsing, allowing the user to ignore encoding
10151
         */
10152
180k
        if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10153
0
      xmlFree((xmlChar *) encoding);
10154
0
            return(NULL);
10155
0
  }
10156
10157
  /*
10158
   * UTF-16 encoding switch has already taken place at this stage,
10159
   * more over the little-endian/big-endian selection is already done
10160
   */
10161
180k
        if ((encoding != NULL) &&
10162
180k
      ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10163
179k
       (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10164
      /*
10165
       * If no encoding was passed to the parser, that we are
10166
       * using UTF-16 and no decoder is present i.e. the
10167
       * document is apparently UTF-8 compatible, then raise an
10168
       * encoding mismatch fatal error
10169
       */
10170
477
      if ((ctxt->encoding == NULL) &&
10171
477
          (ctxt->input->buf != NULL) &&
10172
477
          (ctxt->input->buf->encoder == NULL)) {
10173
391
    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10174
391
      "Document labelled UTF-16 but has UTF-8 content\n");
10175
391
      }
10176
477
      if (ctxt->encoding != NULL)
10177
86
    xmlFree((xmlChar *) ctxt->encoding);
10178
477
      ctxt->encoding = encoding;
10179
477
  }
10180
  /*
10181
   * UTF-8 encoding is handled natively
10182
   */
10183
179k
        else if ((encoding != NULL) &&
10184
179k
      ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10185
179k
       (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10186
            /* TODO: Check for encoding mismatch. */
10187
1.40k
      if (ctxt->encoding != NULL)
10188
6
    xmlFree((xmlChar *) ctxt->encoding);
10189
1.40k
      ctxt->encoding = encoding;
10190
1.40k
  }
10191
178k
  else if (encoding != NULL) {
10192
177k
      xmlCharEncodingHandlerPtr handler;
10193
10194
177k
      if (ctxt->input->encoding != NULL)
10195
0
    xmlFree((xmlChar *) ctxt->input->encoding);
10196
177k
      ctxt->input->encoding = encoding;
10197
10198
177k
            handler = xmlFindCharEncodingHandler((const char *) encoding);
10199
177k
      if (handler != NULL) {
10200
176k
    if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10201
        /* failed to convert */
10202
320
        ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10203
320
        return(NULL);
10204
320
    }
10205
176k
      } else {
10206
1.36k
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10207
1.36k
      "Unsupported encoding %s\n", encoding);
10208
1.36k
    return(NULL);
10209
1.36k
      }
10210
177k
  }
10211
180k
    }
10212
195k
    return(encoding);
10213
200k
}
10214
10215
/**
10216
 * xmlParseSDDecl:
10217
 * @ctxt:  an XML parser context
10218
 *
10219
 * DEPRECATED: Internal function, don't use.
10220
 *
10221
 * parse the XML standalone declaration
10222
 *
10223
 * [32] SDDecl ::= S 'standalone' Eq
10224
 *                 (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10225
 *
10226
 * [ VC: Standalone Document Declaration ]
10227
 * TODO The standalone document declaration must have the value "no"
10228
 * if any external markup declarations contain declarations of:
10229
 *  - attributes with default values, if elements to which these
10230
 *    attributes apply appear in the document without specifications
10231
 *    of values for these attributes, or
10232
 *  - entities (other than amp, lt, gt, apos, quot), if references
10233
 *    to those entities appear in the document, or
10234
 *  - attributes with values subject to normalization, where the
10235
 *    attribute appears in the document with a value which will change
10236
 *    as a result of normalization, or
10237
 *  - element types with element content, if white space occurs directly
10238
 *    within any instance of those types.
10239
 *
10240
 * Returns:
10241
 *   1 if standalone="yes"
10242
 *   0 if standalone="no"
10243
 *  -2 if standalone attribute is missing or invalid
10244
 *    (A standalone value of -2 means that the XML declaration was found,
10245
 *     but no value was specified for the standalone attribute).
10246
 */
10247
10248
int
10249
31.2k
xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10250
31.2k
    int standalone = -2;
10251
10252
31.2k
    SKIP_BLANKS;
10253
31.2k
    if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10254
7.97k
  SKIP(10);
10255
7.97k
        SKIP_BLANKS;
10256
7.97k
  if (RAW != '=') {
10257
286
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10258
286
      return(standalone);
10259
286
        }
10260
7.69k
  NEXT;
10261
7.69k
  SKIP_BLANKS;
10262
7.69k
        if (RAW == '\''){
10263
3.21k
      NEXT;
10264
3.21k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10265
1.52k
          standalone = 0;
10266
1.52k
                SKIP(2);
10267
1.68k
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10268
1.68k
                 (NXT(2) == 's')) {
10269
535
          standalone = 1;
10270
535
    SKIP(3);
10271
1.15k
            } else {
10272
1.15k
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10273
1.15k
      }
10274
3.21k
      if (RAW != '\'') {
10275
1.78k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10276
1.78k
      } else
10277
1.43k
          NEXT;
10278
4.47k
  } else if (RAW == '"'){
10279
4.27k
      NEXT;
10280
4.27k
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10281
2.91k
          standalone = 0;
10282
2.91k
    SKIP(2);
10283
2.91k
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10284
1.36k
                 (NXT(2) == 's')) {
10285
519
          standalone = 1;
10286
519
                SKIP(3);
10287
842
            } else {
10288
842
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10289
842
      }
10290
4.27k
      if (RAW != '"') {
10291
1.43k
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10292
1.43k
      } else
10293
2.83k
          NEXT;
10294
4.27k
  } else {
10295
206
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10296
206
        }
10297
7.69k
    }
10298
30.9k
    return(standalone);
10299
31.2k
}
10300
10301
/**
10302
 * xmlParseXMLDecl:
10303
 * @ctxt:  an XML parser context
10304
 *
10305
 * DEPRECATED: Internal function, don't use.
10306
 *
10307
 * parse an XML declaration header
10308
 *
10309
 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10310
 */
10311
10312
void
10313
200k
xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10314
200k
    xmlChar *version;
10315
10316
    /*
10317
     * This value for standalone indicates that the document has an
10318
     * XML declaration but it does not have a standalone attribute.
10319
     * It will be overwritten later if a standalone attribute is found.
10320
     */
10321
200k
    ctxt->input->standalone = -2;
10322
10323
    /*
10324
     * We know that '<?xml' is here.
10325
     */
10326
200k
    SKIP(5);
10327
10328
200k
    if (!IS_BLANK_CH(RAW)) {
10329
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10330
0
                 "Blank needed after '<?xml'\n");
10331
0
    }
10332
200k
    SKIP_BLANKS;
10333
10334
    /*
10335
     * We must have the VersionInfo here.
10336
     */
10337
200k
    version = xmlParseVersionInfo(ctxt);
10338
200k
    if (version == NULL) {
10339
13.6k
  xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10340
186k
    } else {
10341
186k
  if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10342
      /*
10343
       * Changed here for XML-1.0 5th edition
10344
       */
10345
150k
      if (ctxt->options & XML_PARSE_OLD10) {
10346
0
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10347
0
                "Unsupported version '%s'\n",
10348
0
                version);
10349
150k
      } else {
10350
150k
          if ((version[0] == '1') && ((version[1] == '.'))) {
10351
143k
        xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10352
143k
                      "Unsupported version '%s'\n",
10353
143k
          version, NULL);
10354
143k
    } else {
10355
7.46k
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10356
7.46k
              "Unsupported version '%s'\n",
10357
7.46k
              version);
10358
7.46k
    }
10359
150k
      }
10360
150k
  }
10361
186k
  if (ctxt->version != NULL)
10362
0
      xmlFree((void *) ctxt->version);
10363
186k
  ctxt->version = version;
10364
186k
    }
10365
10366
    /*
10367
     * We may have the encoding declaration
10368
     */
10369
200k
    if (!IS_BLANK_CH(RAW)) {
10370
24.6k
        if ((RAW == '?') && (NXT(1) == '>')) {
10371
10.4k
      SKIP(2);
10372
10.4k
      return;
10373
10.4k
  }
10374
14.2k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10375
14.2k
    }
10376
190k
    xmlParseEncodingDecl(ctxt);
10377
190k
    if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10378
190k
         (ctxt->instate == XML_PARSER_EOF)) {
10379
  /*
10380
   * The XML REC instructs us to stop parsing right here
10381
   */
10382
1.68k
        return;
10383
1.68k
    }
10384
10385
    /*
10386
     * We may have the standalone status.
10387
     */
10388
188k
    if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10389
161k
        if ((RAW == '?') && (NXT(1) == '>')) {
10390
157k
      SKIP(2);
10391
157k
      return;
10392
157k
  }
10393
4.71k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10394
4.71k
    }
10395
10396
    /*
10397
     * We can grow the input buffer freely at that point
10398
     */
10399
31.2k
    GROW;
10400
10401
31.2k
    SKIP_BLANKS;
10402
31.2k
    ctxt->input->standalone = xmlParseSDDecl(ctxt);
10403
10404
31.2k
    SKIP_BLANKS;
10405
31.2k
    if ((RAW == '?') && (NXT(1) == '>')) {
10406
5.43k
        SKIP(2);
10407
25.8k
    } else if (RAW == '>') {
10408
        /* Deprecated old WD ... */
10409
2.14k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10410
2.14k
  NEXT;
10411
23.6k
    } else {
10412
23.6k
        int c;
10413
10414
23.6k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10415
5.13M
        while ((c = CUR) != 0) {
10416
5.12M
            NEXT;
10417
5.12M
            if (c == '>')
10418
16.9k
                break;
10419
5.12M
        }
10420
23.6k
    }
10421
31.2k
}
10422
10423
/**
10424
 * xmlParseMisc:
10425
 * @ctxt:  an XML parser context
10426
 *
10427
 * DEPRECATED: Internal function, don't use.
10428
 *
10429
 * parse an XML Misc* optional field.
10430
 *
10431
 * [27] Misc ::= Comment | PI |  S
10432
 */
10433
10434
void
10435
599k
xmlParseMisc(xmlParserCtxtPtr ctxt) {
10436
644k
    while (ctxt->instate != XML_PARSER_EOF) {
10437
643k
        SKIP_BLANKS;
10438
643k
        GROW;
10439
643k
        if ((RAW == '<') && (NXT(1) == '?')) {
10440
38.8k
      xmlParsePI(ctxt);
10441
604k
        } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10442
5.84k
      xmlParseComment(ctxt);
10443
599k
        } else {
10444
599k
            break;
10445
599k
        }
10446
643k
    }
10447
599k
}
10448
10449
/**
10450
 * xmlParseDocument:
10451
 * @ctxt:  an XML parser context
10452
 *
10453
 * parse an XML document (and build a tree if using the standard SAX
10454
 * interface).
10455
 *
10456
 * [1] document ::= prolog element Misc*
10457
 *
10458
 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10459
 *
10460
 * Returns 0, -1 in case of error. the parser context is augmented
10461
 *                as a result of the parsing.
10462
 */
10463
10464
int
10465
332k
xmlParseDocument(xmlParserCtxtPtr ctxt) {
10466
332k
    xmlChar start[4];
10467
332k
    xmlCharEncoding enc;
10468
10469
332k
    xmlInitParser();
10470
10471
332k
    if ((ctxt == NULL) || (ctxt->input == NULL))
10472
0
        return(-1);
10473
10474
332k
    GROW;
10475
10476
    /*
10477
     * SAX: detecting the level.
10478
     */
10479
332k
    xmlDetectSAX2(ctxt);
10480
10481
    /*
10482
     * SAX: beginning of the document processing.
10483
     */
10484
332k
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10485
332k
        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10486
332k
    if (ctxt->instate == XML_PARSER_EOF)
10487
0
  return(-1);
10488
10489
332k
    if ((ctxt->encoding == NULL) &&
10490
332k
        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10491
  /*
10492
   * Get the 4 first bytes and decode the charset
10493
   * if enc != XML_CHAR_ENCODING_NONE
10494
   * plug some encoding conversion routines.
10495
   */
10496
331k
  start[0] = RAW;
10497
331k
  start[1] = NXT(1);
10498
331k
  start[2] = NXT(2);
10499
331k
  start[3] = NXT(3);
10500
331k
  enc = xmlDetectCharEncoding(&start[0], 4);
10501
331k
  if (enc != XML_CHAR_ENCODING_NONE) {
10502
217k
      xmlSwitchEncoding(ctxt, enc);
10503
217k
  }
10504
331k
    }
10505
10506
10507
332k
    if (CUR == 0) {
10508
2.00k
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10509
2.00k
  return(-1);
10510
2.00k
    }
10511
10512
330k
    GROW;
10513
330k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10514
10515
  /*
10516
   * Note that we will switch encoding on the fly.
10517
   */
10518
200k
  xmlParseXMLDecl(ctxt);
10519
200k
  if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10520
200k
      (ctxt->instate == XML_PARSER_EOF)) {
10521
      /*
10522
       * The XML REC instructs us to stop parsing right here
10523
       */
10524
2.22k
      return(-1);
10525
2.22k
  }
10526
198k
  ctxt->standalone = ctxt->input->standalone;
10527
198k
  SKIP_BLANKS;
10528
198k
    } else {
10529
130k
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10530
130k
    }
10531
328k
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10532
296k
        ctxt->sax->startDocument(ctxt->userData);
10533
328k
    if (ctxt->instate == XML_PARSER_EOF)
10534
783
  return(-1);
10535
327k
    if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10536
327k
        (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10537
0
  ctxt->myDoc->compression = ctxt->input->buf->compressed;
10538
0
    }
10539
10540
    /*
10541
     * The Misc part of the Prolog
10542
     */
10543
327k
    xmlParseMisc(ctxt);
10544
10545
    /*
10546
     * Then possibly doc type declaration(s) and more Misc
10547
     * (doctypedecl Misc*)?
10548
     */
10549
327k
    GROW;
10550
327k
    if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10551
10552
76.9k
  ctxt->inSubset = 1;
10553
76.9k
  xmlParseDocTypeDecl(ctxt);
10554
76.9k
  if (RAW == '[') {
10555
62.8k
      ctxt->instate = XML_PARSER_DTD;
10556
62.8k
      xmlParseInternalSubset(ctxt);
10557
62.8k
      if (ctxt->instate == XML_PARSER_EOF)
10558
46.6k
    return(-1);
10559
62.8k
  }
10560
10561
  /*
10562
   * Create and update the external subset.
10563
   */
10564
30.3k
  ctxt->inSubset = 2;
10565
30.3k
  if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10566
30.3k
      (!ctxt->disableSAX))
10567
20.5k
      ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10568
20.5k
                                ctxt->extSubSystem, ctxt->extSubURI);
10569
30.3k
  if (ctxt->instate == XML_PARSER_EOF)
10570
7.13k
      return(-1);
10571
23.1k
  ctxt->inSubset = 0;
10572
10573
23.1k
        xmlCleanSpecialAttr(ctxt);
10574
10575
23.1k
  ctxt->instate = XML_PARSER_PROLOG;
10576
23.1k
  xmlParseMisc(ctxt);
10577
23.1k
    }
10578
10579
    /*
10580
     * Time to start parsing the tree itself
10581
     */
10582
273k
    GROW;
10583
273k
    if (RAW != '<') {
10584
24.8k
  xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10585
24.8k
           "Start tag expected, '<' not found\n");
10586
249k
    } else {
10587
249k
  ctxt->instate = XML_PARSER_CONTENT;
10588
249k
  xmlParseElement(ctxt);
10589
249k
  ctxt->instate = XML_PARSER_EPILOG;
10590
10591
10592
  /*
10593
   * The Misc part at the end
10594
   */
10595
249k
  xmlParseMisc(ctxt);
10596
10597
249k
  if (RAW != 0) {
10598
15.2k
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10599
15.2k
  }
10600
249k
  ctxt->instate = XML_PARSER_EOF;
10601
249k
    }
10602
10603
    /*
10604
     * SAX: end of the document processing.
10605
     */
10606
273k
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10607
273k
        ctxt->sax->endDocument(ctxt->userData);
10608
10609
    /*
10610
     * Remove locally kept entity definitions if the tree was not built
10611
     */
10612
273k
    if ((ctxt->myDoc != NULL) &&
10613
273k
  (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10614
2.19k
  xmlFreeDoc(ctxt->myDoc);
10615
2.19k
  ctxt->myDoc = NULL;
10616
2.19k
    }
10617
10618
273k
    if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10619
192k
        ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10620
192k
  if (ctxt->valid)
10621
192k
      ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10622
192k
  if (ctxt->nsWellFormed)
10623
116k
      ctxt->myDoc->properties |= XML_DOC_NSVALID;
10624
192k
  if (ctxt->options & XML_PARSE_OLD10)
10625
0
      ctxt->myDoc->properties |= XML_DOC_OLD10;
10626
192k
    }
10627
273k
    if (! ctxt->wellFormed) {
10628
81.1k
  ctxt->valid = 0;
10629
81.1k
  return(-1);
10630
81.1k
    }
10631
192k
    return(0);
10632
273k
}
10633
10634
/**
10635
 * xmlParseExtParsedEnt:
10636
 * @ctxt:  an XML parser context
10637
 *
10638
 * parse a general parsed entity
10639
 * An external general parsed entity is well-formed if it matches the
10640
 * production labeled extParsedEnt.
10641
 *
10642
 * [78] extParsedEnt ::= TextDecl? content
10643
 *
10644
 * Returns 0, -1 in case of error. the parser context is augmented
10645
 *                as a result of the parsing.
10646
 */
10647
10648
int
10649
0
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10650
0
    xmlChar start[4];
10651
0
    xmlCharEncoding enc;
10652
10653
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
10654
0
        return(-1);
10655
10656
0
    xmlDetectSAX2(ctxt);
10657
10658
0
    GROW;
10659
10660
    /*
10661
     * SAX: beginning of the document processing.
10662
     */
10663
0
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10664
0
        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10665
10666
    /*
10667
     * Get the 4 first bytes and decode the charset
10668
     * if enc != XML_CHAR_ENCODING_NONE
10669
     * plug some encoding conversion routines.
10670
     */
10671
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10672
0
  start[0] = RAW;
10673
0
  start[1] = NXT(1);
10674
0
  start[2] = NXT(2);
10675
0
  start[3] = NXT(3);
10676
0
  enc = xmlDetectCharEncoding(start, 4);
10677
0
  if (enc != XML_CHAR_ENCODING_NONE) {
10678
0
      xmlSwitchEncoding(ctxt, enc);
10679
0
  }
10680
0
    }
10681
10682
10683
0
    if (CUR == 0) {
10684
0
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10685
0
    }
10686
10687
    /*
10688
     * Check for the XMLDecl in the Prolog.
10689
     */
10690
0
    GROW;
10691
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10692
10693
  /*
10694
   * Note that we will switch encoding on the fly.
10695
   */
10696
0
  xmlParseXMLDecl(ctxt);
10697
0
  if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10698
      /*
10699
       * The XML REC instructs us to stop parsing right here
10700
       */
10701
0
      return(-1);
10702
0
  }
10703
0
  SKIP_BLANKS;
10704
0
    } else {
10705
0
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10706
0
    }
10707
0
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10708
0
        ctxt->sax->startDocument(ctxt->userData);
10709
0
    if (ctxt->instate == XML_PARSER_EOF)
10710
0
  return(-1);
10711
10712
    /*
10713
     * Doing validity checking on chunk doesn't make sense
10714
     */
10715
0
    ctxt->instate = XML_PARSER_CONTENT;
10716
0
    ctxt->validate = 0;
10717
0
    ctxt->loadsubset = 0;
10718
0
    ctxt->depth = 0;
10719
10720
0
    xmlParseContent(ctxt);
10721
0
    if (ctxt->instate == XML_PARSER_EOF)
10722
0
  return(-1);
10723
10724
0
    if ((RAW == '<') && (NXT(1) == '/')) {
10725
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10726
0
    } else if (RAW != 0) {
10727
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
10728
0
    }
10729
10730
    /*
10731
     * SAX: end of the document processing.
10732
     */
10733
0
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10734
0
        ctxt->sax->endDocument(ctxt->userData);
10735
10736
0
    if (! ctxt->wellFormed) return(-1);
10737
0
    return(0);
10738
0
}
10739
10740
#ifdef LIBXML_PUSH_ENABLED
10741
/************************************************************************
10742
 *                  *
10743
 *    Progressive parsing interfaces        *
10744
 *                  *
10745
 ************************************************************************/
10746
10747
/**
10748
 * xmlParseLookupChar:
10749
 * @ctxt:  an XML parser context
10750
 * @c:  character
10751
 *
10752
 * Check whether the input buffer contains a character.
10753
 */
10754
static int
10755
xmlParseLookupChar(xmlParserCtxtPtr ctxt, int c) {
10756
    const xmlChar *cur;
10757
10758
    if (ctxt->checkIndex == 0) {
10759
        cur = ctxt->input->cur + 1;
10760
    } else {
10761
        cur = ctxt->input->cur + ctxt->checkIndex;
10762
    }
10763
10764
    if (memchr(cur, c, ctxt->input->end - cur) == NULL) {
10765
        size_t index = ctxt->input->end - ctxt->input->cur;
10766
10767
        if (index > LONG_MAX) {
10768
            ctxt->checkIndex = 0;
10769
            return(1);
10770
        }
10771
        ctxt->checkIndex = index;
10772
        return(0);
10773
    } else {
10774
        ctxt->checkIndex = 0;
10775
        return(1);
10776
    }
10777
}
10778
10779
/**
10780
 * xmlParseLookupString:
10781
 * @ctxt:  an XML parser context
10782
 * @startDelta: delta to apply at the start
10783
 * @str:  string
10784
 * @strLen:  length of string
10785
 *
10786
 * Check whether the input buffer contains a string.
10787
 */
10788
static const xmlChar *
10789
xmlParseLookupString(xmlParserCtxtPtr ctxt, size_t startDelta,
10790
                     const char *str, size_t strLen) {
10791
    const xmlChar *cur, *term;
10792
10793
    if (ctxt->checkIndex == 0) {
10794
        cur = ctxt->input->cur + startDelta;
10795
    } else {
10796
        cur = ctxt->input->cur + ctxt->checkIndex;
10797
    }
10798
10799
    term = BAD_CAST strstr((const char *) cur, str);
10800
    if (term == NULL) {
10801
        const xmlChar *end = ctxt->input->end;
10802
        size_t index;
10803
10804
        /* Rescan (strLen - 1) characters. */
10805
        if ((size_t) (end - cur) < strLen)
10806
            end = cur;
10807
        else
10808
            end -= strLen - 1;
10809
        index = end - ctxt->input->cur;
10810
        if (index > LONG_MAX) {
10811
            ctxt->checkIndex = 0;
10812
            return(ctxt->input->end - strLen);
10813
        }
10814
        ctxt->checkIndex = index;
10815
    } else {
10816
        ctxt->checkIndex = 0;
10817
    }
10818
10819
    return(term);
10820
}
10821
10822
/**
10823
 * xmlParseLookupCharData:
10824
 * @ctxt:  an XML parser context
10825
 *
10826
 * Check whether the input buffer contains terminated char data.
10827
 */
10828
static int
10829
xmlParseLookupCharData(xmlParserCtxtPtr ctxt) {
10830
    const xmlChar *cur = ctxt->input->cur + ctxt->checkIndex;
10831
    const xmlChar *end = ctxt->input->end;
10832
    size_t index;
10833
10834
    while (cur < end) {
10835
        if ((*cur == '<') || (*cur == '&')) {
10836
            ctxt->checkIndex = 0;
10837
            return(1);
10838
        }
10839
        cur++;
10840
    }
10841
10842
    index = cur - ctxt->input->cur;
10843
    if (index > LONG_MAX) {
10844
        ctxt->checkIndex = 0;
10845
        return(1);
10846
    }
10847
    ctxt->checkIndex = index;
10848
    return(0);
10849
}
10850
10851
/**
10852
 * xmlParseLookupGt:
10853
 * @ctxt:  an XML parser context
10854
 *
10855
 * Check whether there's enough data in the input buffer to finish parsing
10856
 * a start tag. This has to take quotes into account.
10857
 */
10858
static int
10859
xmlParseLookupGt(xmlParserCtxtPtr ctxt) {
10860
    const xmlChar *cur;
10861
    const xmlChar *end = ctxt->input->end;
10862
    int state = ctxt->endCheckState;
10863
    size_t index;
10864
10865
    if (ctxt->checkIndex == 0)
10866
        cur = ctxt->input->cur + 1;
10867
    else
10868
        cur = ctxt->input->cur + ctxt->checkIndex;
10869
10870
    while (cur < end) {
10871
        if (state) {
10872
            if (*cur == state)
10873
                state = 0;
10874
        } else if (*cur == '\'' || *cur == '"') {
10875
            state = *cur;
10876
        } else if (*cur == '>') {
10877
            ctxt->checkIndex = 0;
10878
            ctxt->endCheckState = 0;
10879
            return(1);
10880
        }
10881
        cur++;
10882
    }
10883
10884
    index = cur - ctxt->input->cur;
10885
    if (index > LONG_MAX) {
10886
        ctxt->checkIndex = 0;
10887
        ctxt->endCheckState = 0;
10888
        return(1);
10889
    }
10890
    ctxt->checkIndex = index;
10891
    ctxt->endCheckState = state;
10892
    return(0);
10893
}
10894
10895
/**
10896
 * xmlParseLookupInternalSubset:
10897
 * @ctxt:  an XML parser context
10898
 *
10899
 * Check whether there's enough data in the input buffer to finish parsing
10900
 * the internal subset.
10901
 */
10902
static int
10903
xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt) {
10904
    /*
10905
     * Sorry, but progressive parsing of the internal subset is not
10906
     * supported. We first check that the full content of the internal
10907
     * subset is available and parsing is launched only at that point.
10908
     * Internal subset ends with "']' S? '>'" in an unescaped section and
10909
     * not in a ']]>' sequence which are conditional sections.
10910
     */
10911
    const xmlChar *cur, *start;
10912
    const xmlChar *end = ctxt->input->end;
10913
    int state = ctxt->endCheckState;
10914
    size_t index;
10915
10916
    if (ctxt->checkIndex == 0) {
10917
        cur = ctxt->input->cur + 1;
10918
    } else {
10919
        cur = ctxt->input->cur + ctxt->checkIndex;
10920
    }
10921
    start = cur;
10922
10923
    while (cur < end) {
10924
        if (state == '-') {
10925
            if ((*cur == '-') &&
10926
                (cur[1] == '-') &&
10927
                (cur[2] == '>')) {
10928
                state = 0;
10929
                cur += 3;
10930
                start = cur;
10931
                continue;
10932
            }
10933
        }
10934
        else if (state == ']') {
10935
            if (*cur == '>') {
10936
                ctxt->checkIndex = 0;
10937
                ctxt->endCheckState = 0;
10938
                return(1);
10939
            }
10940
            if (IS_BLANK_CH(*cur)) {
10941
                state = ' ';
10942
            } else if (*cur != ']') {
10943
                state = 0;
10944
                start = cur;
10945
                continue;
10946
            }
10947
        }
10948
        else if (state == ' ') {
10949
            if (*cur == '>') {
10950
                ctxt->checkIndex = 0;
10951
                ctxt->endCheckState = 0;
10952
                return(1);
10953
            }
10954
            if (!IS_BLANK_CH(*cur)) {
10955
                state = 0;
10956
                start = cur;
10957
                continue;
10958
            }
10959
        }
10960
        else if (state != 0) {
10961
            if (*cur == state) {
10962
                state = 0;
10963
                start = cur + 1;
10964
            }
10965
        }
10966
        else if (*cur == '<') {
10967
            if ((cur[1] == '!') &&
10968
                (cur[2] == '-') &&
10969
                (cur[3] == '-')) {
10970
                state = '-';
10971
                cur += 4;
10972
                /* Don't treat <!--> as comment */
10973
                start = cur;
10974
                continue;
10975
            }
10976
        }
10977
        else if ((*cur == '"') || (*cur == '\'') || (*cur == ']')) {
10978
            state = *cur;
10979
        }
10980
10981
        cur++;
10982
    }
10983
10984
    /*
10985
     * Rescan the three last characters to detect "<!--" and "-->"
10986
     * split across chunks.
10987
     */
10988
    if ((state == 0) || (state == '-')) {
10989
        if (cur - start < 3)
10990
            cur = start;
10991
        else
10992
            cur -= 3;
10993
    }
10994
    index = cur - ctxt->input->cur;
10995
    if (index > LONG_MAX) {
10996
        ctxt->checkIndex = 0;
10997
        ctxt->endCheckState = 0;
10998
        return(1);
10999
    }
11000
    ctxt->checkIndex = index;
11001
    ctxt->endCheckState = state;
11002
    return(0);
11003
}
11004
11005
/**
11006
 * xmlCheckCdataPush:
11007
 * @cur: pointer to the block of characters
11008
 * @len: length of the block in bytes
11009
 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
11010
 *
11011
 * Check that the block of characters is okay as SCdata content [20]
11012
 *
11013
 * Returns the number of bytes to pass if okay, a negative index where an
11014
 *         UTF-8 error occurred otherwise
11015
 */
11016
static int
11017
xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
11018
    int ix;
11019
    unsigned char c;
11020
    int codepoint;
11021
11022
    if ((utf == NULL) || (len <= 0))
11023
        return(0);
11024
11025
    for (ix = 0; ix < len;) {      /* string is 0-terminated */
11026
        c = utf[ix];
11027
        if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11028
      if (c >= 0x20)
11029
    ix++;
11030
      else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11031
          ix++;
11032
      else
11033
          return(-ix);
11034
  } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11035
      if (ix + 2 > len) return(complete ? -ix : ix);
11036
      if ((utf[ix+1] & 0xc0 ) != 0x80)
11037
          return(-ix);
11038
      codepoint = (utf[ix] & 0x1f) << 6;
11039
      codepoint |= utf[ix+1] & 0x3f;
11040
      if (!xmlIsCharQ(codepoint))
11041
          return(-ix);
11042
      ix += 2;
11043
  } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11044
      if (ix + 3 > len) return(complete ? -ix : ix);
11045
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11046
          ((utf[ix+2] & 0xc0) != 0x80))
11047
        return(-ix);
11048
      codepoint = (utf[ix] & 0xf) << 12;
11049
      codepoint |= (utf[ix+1] & 0x3f) << 6;
11050
      codepoint |= utf[ix+2] & 0x3f;
11051
      if (!xmlIsCharQ(codepoint))
11052
          return(-ix);
11053
      ix += 3;
11054
  } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11055
      if (ix + 4 > len) return(complete ? -ix : ix);
11056
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11057
          ((utf[ix+2] & 0xc0) != 0x80) ||
11058
    ((utf[ix+3] & 0xc0) != 0x80))
11059
        return(-ix);
11060
      codepoint = (utf[ix] & 0x7) << 18;
11061
      codepoint |= (utf[ix+1] & 0x3f) << 12;
11062
      codepoint |= (utf[ix+2] & 0x3f) << 6;
11063
      codepoint |= utf[ix+3] & 0x3f;
11064
      if (!xmlIsCharQ(codepoint))
11065
          return(-ix);
11066
      ix += 4;
11067
  } else        /* unknown encoding */
11068
      return(-ix);
11069
      }
11070
      return(ix);
11071
}
11072
11073
/**
11074
 * xmlParseTryOrFinish:
11075
 * @ctxt:  an XML parser context
11076
 * @terminate:  last chunk indicator
11077
 *
11078
 * Try to progress on parsing
11079
 *
11080
 * Returns zero if no parsing was possible
11081
 */
11082
static int
11083
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11084
    int ret = 0;
11085
    int tlen;
11086
    size_t avail;
11087
    xmlChar cur, next;
11088
11089
    if (ctxt->input == NULL)
11090
        return(0);
11091
11092
#ifdef DEBUG_PUSH
11093
    switch (ctxt->instate) {
11094
  case XML_PARSER_EOF:
11095
      xmlGenericError(xmlGenericErrorContext,
11096
        "PP: try EOF\n"); break;
11097
  case XML_PARSER_START:
11098
      xmlGenericError(xmlGenericErrorContext,
11099
        "PP: try START\n"); break;
11100
  case XML_PARSER_MISC:
11101
      xmlGenericError(xmlGenericErrorContext,
11102
        "PP: try MISC\n");break;
11103
  case XML_PARSER_COMMENT:
11104
      xmlGenericError(xmlGenericErrorContext,
11105
        "PP: try COMMENT\n");break;
11106
  case XML_PARSER_PROLOG:
11107
      xmlGenericError(xmlGenericErrorContext,
11108
        "PP: try PROLOG\n");break;
11109
  case XML_PARSER_START_TAG:
11110
      xmlGenericError(xmlGenericErrorContext,
11111
        "PP: try START_TAG\n");break;
11112
  case XML_PARSER_CONTENT:
11113
      xmlGenericError(xmlGenericErrorContext,
11114
        "PP: try CONTENT\n");break;
11115
  case XML_PARSER_CDATA_SECTION:
11116
      xmlGenericError(xmlGenericErrorContext,
11117
        "PP: try CDATA_SECTION\n");break;
11118
  case XML_PARSER_END_TAG:
11119
      xmlGenericError(xmlGenericErrorContext,
11120
        "PP: try END_TAG\n");break;
11121
  case XML_PARSER_ENTITY_DECL:
11122
      xmlGenericError(xmlGenericErrorContext,
11123
        "PP: try ENTITY_DECL\n");break;
11124
  case XML_PARSER_ENTITY_VALUE:
11125
      xmlGenericError(xmlGenericErrorContext,
11126
        "PP: try ENTITY_VALUE\n");break;
11127
  case XML_PARSER_ATTRIBUTE_VALUE:
11128
      xmlGenericError(xmlGenericErrorContext,
11129
        "PP: try ATTRIBUTE_VALUE\n");break;
11130
  case XML_PARSER_DTD:
11131
      xmlGenericError(xmlGenericErrorContext,
11132
        "PP: try DTD\n");break;
11133
  case XML_PARSER_EPILOG:
11134
      xmlGenericError(xmlGenericErrorContext,
11135
        "PP: try EPILOG\n");break;
11136
  case XML_PARSER_PI:
11137
      xmlGenericError(xmlGenericErrorContext,
11138
        "PP: try PI\n");break;
11139
        case XML_PARSER_IGNORE:
11140
            xmlGenericError(xmlGenericErrorContext,
11141
        "PP: try IGNORE\n");break;
11142
    }
11143
#endif
11144
11145
    if ((ctxt->input != NULL) &&
11146
        (ctxt->input->cur - ctxt->input->base > 4096)) {
11147
        xmlParserShrink(ctxt);
11148
    }
11149
11150
    while (ctxt->instate != XML_PARSER_EOF) {
11151
  if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11152
      return(0);
11153
11154
  if (ctxt->input == NULL) break;
11155
  if (ctxt->input->buf != NULL) {
11156
      /*
11157
       * If we are operating on converted input, try to flush
11158
       * remaining chars to avoid them stalling in the non-converted
11159
       * buffer.
11160
       */
11161
      if ((ctxt->input->buf->raw != NULL) &&
11162
    (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11163
                size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11164
                                                 ctxt->input);
11165
    size_t current = ctxt->input->cur - ctxt->input->base;
11166
                int res;
11167
11168
    res = xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11169
                xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11170
                                      base, current);
11171
                if (res < 0) {
11172
                    xmlFatalErr(ctxt, ctxt->input->buf->error, NULL);
11173
                    xmlHaltParser(ctxt);
11174
                    return(0);
11175
                }
11176
      }
11177
  }
11178
        avail = ctxt->input->end - ctxt->input->cur;
11179
        if (avail < 1)
11180
      goto done;
11181
        switch (ctxt->instate) {
11182
            case XML_PARSER_EOF:
11183
          /*
11184
     * Document parsing is done !
11185
     */
11186
          goto done;
11187
            case XML_PARSER_START:
11188
    if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11189
        xmlChar start[4];
11190
        xmlCharEncoding enc;
11191
11192
        /*
11193
         * Very first chars read from the document flow.
11194
         */
11195
        if (avail < 4)
11196
      goto done;
11197
11198
        /*
11199
         * Get the 4 first bytes and decode the charset
11200
         * if enc != XML_CHAR_ENCODING_NONE
11201
         * plug some encoding conversion routines,
11202
         * else xmlSwitchEncoding will set to (default)
11203
         * UTF8.
11204
         */
11205
        start[0] = RAW;
11206
        start[1] = NXT(1);
11207
        start[2] = NXT(2);
11208
        start[3] = NXT(3);
11209
        enc = xmlDetectCharEncoding(start, 4);
11210
                    /*
11211
                     * We need more bytes to detect EBCDIC code pages.
11212
                     * See xmlDetectEBCDIC.
11213
                     */
11214
                    if ((enc == XML_CHAR_ENCODING_EBCDIC) &&
11215
                        (!terminate) && (avail < 200))
11216
                        goto done;
11217
        xmlSwitchEncoding(ctxt, enc);
11218
        break;
11219
    }
11220
11221
    if (avail < 2)
11222
        goto done;
11223
    cur = ctxt->input->cur[0];
11224
    next = ctxt->input->cur[1];
11225
    if (cur == 0) {
11226
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11227
      ctxt->sax->setDocumentLocator(ctxt->userData,
11228
                  &xmlDefaultSAXLocator);
11229
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11230
        xmlHaltParser(ctxt);
11231
#ifdef DEBUG_PUSH
11232
        xmlGenericError(xmlGenericErrorContext,
11233
          "PP: entering EOF\n");
11234
#endif
11235
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11236
      ctxt->sax->endDocument(ctxt->userData);
11237
        goto done;
11238
    }
11239
          if ((cur == '<') && (next == '?')) {
11240
        /* PI or XML decl */
11241
        if (avail < 5) goto done;
11242
        if ((!terminate) &&
11243
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11244
      goto done;
11245
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11246
      ctxt->sax->setDocumentLocator(ctxt->userData,
11247
                  &xmlDefaultSAXLocator);
11248
        if ((ctxt->input->cur[2] == 'x') &&
11249
      (ctxt->input->cur[3] == 'm') &&
11250
      (ctxt->input->cur[4] == 'l') &&
11251
      (IS_BLANK_CH(ctxt->input->cur[5]))) {
11252
      ret += 5;
11253
#ifdef DEBUG_PUSH
11254
      xmlGenericError(xmlGenericErrorContext,
11255
        "PP: Parsing XML Decl\n");
11256
#endif
11257
      xmlParseXMLDecl(ctxt);
11258
      if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11259
          /*
11260
           * The XML REC instructs us to stop parsing right
11261
           * here
11262
           */
11263
          xmlHaltParser(ctxt);
11264
          return(0);
11265
      }
11266
      ctxt->standalone = ctxt->input->standalone;
11267
      if ((ctxt->encoding == NULL) &&
11268
          (ctxt->input->encoding != NULL))
11269
          ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11270
      if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11271
          (!ctxt->disableSAX))
11272
          ctxt->sax->startDocument(ctxt->userData);
11273
      ctxt->instate = XML_PARSER_MISC;
11274
#ifdef DEBUG_PUSH
11275
      xmlGenericError(xmlGenericErrorContext,
11276
        "PP: entering MISC\n");
11277
#endif
11278
        } else {
11279
      ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11280
      if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11281
          (!ctxt->disableSAX))
11282
          ctxt->sax->startDocument(ctxt->userData);
11283
      ctxt->instate = XML_PARSER_MISC;
11284
#ifdef DEBUG_PUSH
11285
      xmlGenericError(xmlGenericErrorContext,
11286
        "PP: entering MISC\n");
11287
#endif
11288
        }
11289
    } else {
11290
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11291
      ctxt->sax->setDocumentLocator(ctxt->userData,
11292
                  &xmlDefaultSAXLocator);
11293
        ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11294
        if (ctxt->version == NULL) {
11295
            xmlErrMemory(ctxt, NULL);
11296
      break;
11297
        }
11298
        if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11299
            (!ctxt->disableSAX))
11300
      ctxt->sax->startDocument(ctxt->userData);
11301
        ctxt->instate = XML_PARSER_MISC;
11302
#ifdef DEBUG_PUSH
11303
        xmlGenericError(xmlGenericErrorContext,
11304
          "PP: entering MISC\n");
11305
#endif
11306
    }
11307
    break;
11308
            case XML_PARSER_START_TAG: {
11309
          const xmlChar *name;
11310
    const xmlChar *prefix = NULL;
11311
    const xmlChar *URI = NULL;
11312
                int line = ctxt->input->line;
11313
    int nsNr = ctxt->nsNr;
11314
11315
    if ((avail < 2) && (ctxt->inputNr == 1))
11316
        goto done;
11317
    cur = ctxt->input->cur[0];
11318
          if (cur != '<') {
11319
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11320
        xmlHaltParser(ctxt);
11321
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11322
      ctxt->sax->endDocument(ctxt->userData);
11323
        goto done;
11324
    }
11325
    if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11326
                    goto done;
11327
    if (ctxt->spaceNr == 0)
11328
        spacePush(ctxt, -1);
11329
    else if (*ctxt->space == -2)
11330
        spacePush(ctxt, -1);
11331
    else
11332
        spacePush(ctxt, *ctxt->space);
11333
#ifdef LIBXML_SAX1_ENABLED
11334
    if (ctxt->sax2)
11335
#endif /* LIBXML_SAX1_ENABLED */
11336
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11337
#ifdef LIBXML_SAX1_ENABLED
11338
    else
11339
        name = xmlParseStartTag(ctxt);
11340
#endif /* LIBXML_SAX1_ENABLED */
11341
    if (ctxt->instate == XML_PARSER_EOF)
11342
        goto done;
11343
    if (name == NULL) {
11344
        spacePop(ctxt);
11345
        xmlHaltParser(ctxt);
11346
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11347
      ctxt->sax->endDocument(ctxt->userData);
11348
        goto done;
11349
    }
11350
#ifdef LIBXML_VALID_ENABLED
11351
    /*
11352
     * [ VC: Root Element Type ]
11353
     * The Name in the document type declaration must match
11354
     * the element type of the root element.
11355
     */
11356
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11357
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
11358
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11359
#endif /* LIBXML_VALID_ENABLED */
11360
11361
    /*
11362
     * Check for an Empty Element.
11363
     */
11364
    if ((RAW == '/') && (NXT(1) == '>')) {
11365
        SKIP(2);
11366
11367
        if (ctxt->sax2) {
11368
      if ((ctxt->sax != NULL) &&
11369
          (ctxt->sax->endElementNs != NULL) &&
11370
          (!ctxt->disableSAX))
11371
          ctxt->sax->endElementNs(ctxt->userData, name,
11372
                                  prefix, URI);
11373
      if (ctxt->nsNr - nsNr > 0)
11374
          nsPop(ctxt, ctxt->nsNr - nsNr);
11375
#ifdef LIBXML_SAX1_ENABLED
11376
        } else {
11377
      if ((ctxt->sax != NULL) &&
11378
          (ctxt->sax->endElement != NULL) &&
11379
          (!ctxt->disableSAX))
11380
          ctxt->sax->endElement(ctxt->userData, name);
11381
#endif /* LIBXML_SAX1_ENABLED */
11382
        }
11383
        if (ctxt->instate == XML_PARSER_EOF)
11384
      goto done;
11385
        spacePop(ctxt);
11386
        if (ctxt->nameNr == 0) {
11387
      ctxt->instate = XML_PARSER_EPILOG;
11388
        } else {
11389
      ctxt->instate = XML_PARSER_CONTENT;
11390
        }
11391
        break;
11392
    }
11393
    if (RAW == '>') {
11394
        NEXT;
11395
    } else {
11396
        xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11397
           "Couldn't find end of Start Tag %s\n",
11398
           name);
11399
        nodePop(ctxt);
11400
        spacePop(ctxt);
11401
    }
11402
                nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
11403
11404
    ctxt->instate = XML_PARSER_CONTENT;
11405
                break;
11406
      }
11407
            case XML_PARSER_CONTENT: {
11408
    if ((avail < 2) && (ctxt->inputNr == 1))
11409
        goto done;
11410
    cur = ctxt->input->cur[0];
11411
    next = ctxt->input->cur[1];
11412
11413
    if ((cur == '<') && (next == '/')) {
11414
        ctxt->instate = XML_PARSER_END_TAG;
11415
        break;
11416
          } else if ((cur == '<') && (next == '?')) {
11417
        if ((!terminate) &&
11418
            (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11419
      goto done;
11420
        xmlParsePI(ctxt);
11421
        ctxt->instate = XML_PARSER_CONTENT;
11422
    } else if ((cur == '<') && (next != '!')) {
11423
        ctxt->instate = XML_PARSER_START_TAG;
11424
        break;
11425
    } else if ((cur == '<') && (next == '!') &&
11426
               (ctxt->input->cur[2] == '-') &&
11427
         (ctxt->input->cur[3] == '-')) {
11428
        if ((!terminate) &&
11429
            (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11430
      goto done;
11431
        xmlParseComment(ctxt);
11432
        ctxt->instate = XML_PARSER_CONTENT;
11433
    } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11434
        (ctxt->input->cur[2] == '[') &&
11435
        (ctxt->input->cur[3] == 'C') &&
11436
        (ctxt->input->cur[4] == 'D') &&
11437
        (ctxt->input->cur[5] == 'A') &&
11438
        (ctxt->input->cur[6] == 'T') &&
11439
        (ctxt->input->cur[7] == 'A') &&
11440
        (ctxt->input->cur[8] == '[')) {
11441
        SKIP(9);
11442
        ctxt->instate = XML_PARSER_CDATA_SECTION;
11443
        break;
11444
    } else if ((cur == '<') && (next == '!') &&
11445
               (avail < 9)) {
11446
        goto done;
11447
    } else if (cur == '<') {
11448
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11449
                    "detected an error in element content\n");
11450
                    SKIP(1);
11451
    } else if (cur == '&') {
11452
        if ((!terminate) && (!xmlParseLookupChar(ctxt, ';')))
11453
      goto done;
11454
        xmlParseReference(ctxt);
11455
    } else {
11456
        /* TODO Avoid the extra copy, handle directly !!! */
11457
        /*
11458
         * Goal of the following test is:
11459
         *  - minimize calls to the SAX 'character' callback
11460
         *    when they are mergeable
11461
         *  - handle an problem for isBlank when we only parse
11462
         *    a sequence of blank chars and the next one is
11463
         *    not available to check against '<' presence.
11464
         *  - tries to homogenize the differences in SAX
11465
         *    callbacks between the push and pull versions
11466
         *    of the parser.
11467
         */
11468
        if ((ctxt->inputNr == 1) &&
11469
            (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11470
      if ((!terminate) && (!xmlParseLookupCharData(ctxt)))
11471
          goto done;
11472
                    }
11473
                    ctxt->checkIndex = 0;
11474
        xmlParseCharDataInternal(ctxt, !terminate);
11475
    }
11476
    break;
11477
      }
11478
            case XML_PARSER_END_TAG:
11479
    if (avail < 2)
11480
        goto done;
11481
    if ((!terminate) && (!xmlParseLookupChar(ctxt, '>')))
11482
        goto done;
11483
    if (ctxt->sax2) {
11484
              xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11485
        nameNsPop(ctxt);
11486
    }
11487
#ifdef LIBXML_SAX1_ENABLED
11488
      else
11489
        xmlParseEndTag1(ctxt, 0);
11490
#endif /* LIBXML_SAX1_ENABLED */
11491
    if (ctxt->instate == XML_PARSER_EOF) {
11492
        /* Nothing */
11493
    } else if (ctxt->nameNr == 0) {
11494
        ctxt->instate = XML_PARSER_EPILOG;
11495
    } else {
11496
        ctxt->instate = XML_PARSER_CONTENT;
11497
    }
11498
    break;
11499
            case XML_PARSER_CDATA_SECTION: {
11500
          /*
11501
     * The Push mode need to have the SAX callback for
11502
     * cdataBlock merge back contiguous callbacks.
11503
     */
11504
    const xmlChar *term;
11505
11506
                if (terminate) {
11507
                    /*
11508
                     * Don't call xmlParseLookupString. If 'terminate'
11509
                     * is set, checkIndex is invalid.
11510
                     */
11511
                    term = BAD_CAST strstr((const char *) ctxt->input->cur,
11512
                                           "]]>");
11513
                } else {
11514
        term = xmlParseLookupString(ctxt, 0, "]]>", 3);
11515
                }
11516
11517
    if (term == NULL) {
11518
        int tmp, size;
11519
11520
                    if (terminate) {
11521
                        /* Unfinished CDATA section */
11522
                        size = ctxt->input->end - ctxt->input->cur;
11523
                    } else {
11524
                        if (avail < XML_PARSER_BIG_BUFFER_SIZE + 2)
11525
                            goto done;
11526
                        ctxt->checkIndex = 0;
11527
                        /* XXX: Why don't we pass the full buffer? */
11528
                        size = XML_PARSER_BIG_BUFFER_SIZE;
11529
                    }
11530
                    tmp = xmlCheckCdataPush(ctxt->input->cur, size, 0);
11531
                    if (tmp <= 0) {
11532
                        tmp = -tmp;
11533
                        ctxt->input->cur += tmp;
11534
                        goto encoding_error;
11535
                    }
11536
                    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11537
                        if (ctxt->sax->cdataBlock != NULL)
11538
                            ctxt->sax->cdataBlock(ctxt->userData,
11539
                                                  ctxt->input->cur, tmp);
11540
                        else if (ctxt->sax->characters != NULL)
11541
                            ctxt->sax->characters(ctxt->userData,
11542
                                                  ctxt->input->cur, tmp);
11543
                    }
11544
                    if (ctxt->instate == XML_PARSER_EOF)
11545
                        goto done;
11546
                    SKIPL(tmp);
11547
    } else {
11548
                    int base = term - CUR_PTR;
11549
        int tmp;
11550
11551
        tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11552
        if ((tmp < 0) || (tmp != base)) {
11553
      tmp = -tmp;
11554
      ctxt->input->cur += tmp;
11555
      goto encoding_error;
11556
        }
11557
        if ((ctxt->sax != NULL) && (base == 0) &&
11558
            (ctxt->sax->cdataBlock != NULL) &&
11559
            (!ctxt->disableSAX)) {
11560
      /*
11561
       * Special case to provide identical behaviour
11562
       * between pull and push parsers on enpty CDATA
11563
       * sections
11564
       */
11565
       if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11566
           (!strncmp((const char *)&ctxt->input->cur[-9],
11567
                     "<![CDATA[", 9)))
11568
           ctxt->sax->cdataBlock(ctxt->userData,
11569
                                 BAD_CAST "", 0);
11570
        } else if ((ctxt->sax != NULL) && (base > 0) &&
11571
      (!ctxt->disableSAX)) {
11572
      if (ctxt->sax->cdataBlock != NULL)
11573
          ctxt->sax->cdataBlock(ctxt->userData,
11574
              ctxt->input->cur, base);
11575
      else if (ctxt->sax->characters != NULL)
11576
          ctxt->sax->characters(ctxt->userData,
11577
              ctxt->input->cur, base);
11578
        }
11579
        if (ctxt->instate == XML_PARSER_EOF)
11580
      goto done;
11581
        SKIPL(base + 3);
11582
        ctxt->instate = XML_PARSER_CONTENT;
11583
#ifdef DEBUG_PUSH
11584
        xmlGenericError(xmlGenericErrorContext,
11585
          "PP: entering CONTENT\n");
11586
#endif
11587
    }
11588
    break;
11589
      }
11590
            case XML_PARSER_MISC:
11591
            case XML_PARSER_PROLOG:
11592
            case XML_PARSER_EPILOG:
11593
    SKIP_BLANKS;
11594
                avail = ctxt->input->end - ctxt->input->cur;
11595
    if (avail < 2)
11596
        goto done;
11597
    cur = ctxt->input->cur[0];
11598
    next = ctxt->input->cur[1];
11599
          if ((cur == '<') && (next == '?')) {
11600
        if ((!terminate) &&
11601
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11602
      goto done;
11603
#ifdef DEBUG_PUSH
11604
        xmlGenericError(xmlGenericErrorContext,
11605
          "PP: Parsing PI\n");
11606
#endif
11607
        xmlParsePI(ctxt);
11608
        if (ctxt->instate == XML_PARSER_EOF)
11609
      goto done;
11610
    } else if ((cur == '<') && (next == '!') &&
11611
        (ctxt->input->cur[2] == '-') &&
11612
        (ctxt->input->cur[3] == '-')) {
11613
        if ((!terminate) &&
11614
                        (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11615
      goto done;
11616
#ifdef DEBUG_PUSH
11617
        xmlGenericError(xmlGenericErrorContext,
11618
          "PP: Parsing Comment\n");
11619
#endif
11620
        xmlParseComment(ctxt);
11621
        if (ctxt->instate == XML_PARSER_EOF)
11622
      goto done;
11623
    } else if ((ctxt->instate == XML_PARSER_MISC) &&
11624
                    (cur == '<') && (next == '!') &&
11625
        (ctxt->input->cur[2] == 'D') &&
11626
        (ctxt->input->cur[3] == 'O') &&
11627
        (ctxt->input->cur[4] == 'C') &&
11628
        (ctxt->input->cur[5] == 'T') &&
11629
        (ctxt->input->cur[6] == 'Y') &&
11630
        (ctxt->input->cur[7] == 'P') &&
11631
        (ctxt->input->cur[8] == 'E')) {
11632
        if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11633
                        goto done;
11634
#ifdef DEBUG_PUSH
11635
        xmlGenericError(xmlGenericErrorContext,
11636
          "PP: Parsing internal subset\n");
11637
#endif
11638
        ctxt->inSubset = 1;
11639
        xmlParseDocTypeDecl(ctxt);
11640
        if (ctxt->instate == XML_PARSER_EOF)
11641
      goto done;
11642
        if (RAW == '[') {
11643
      ctxt->instate = XML_PARSER_DTD;
11644
#ifdef DEBUG_PUSH
11645
      xmlGenericError(xmlGenericErrorContext,
11646
        "PP: entering DTD\n");
11647
#endif
11648
        } else {
11649
      /*
11650
       * Create and update the external subset.
11651
       */
11652
      ctxt->inSubset = 2;
11653
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11654
          (ctxt->sax->externalSubset != NULL))
11655
          ctxt->sax->externalSubset(ctxt->userData,
11656
            ctxt->intSubName, ctxt->extSubSystem,
11657
            ctxt->extSubURI);
11658
      ctxt->inSubset = 0;
11659
      xmlCleanSpecialAttr(ctxt);
11660
      ctxt->instate = XML_PARSER_PROLOG;
11661
#ifdef DEBUG_PUSH
11662
      xmlGenericError(xmlGenericErrorContext,
11663
        "PP: entering PROLOG\n");
11664
#endif
11665
        }
11666
    } else if ((cur == '<') && (next == '!') &&
11667
               (avail <
11668
                            (ctxt->instate == XML_PARSER_MISC ? 9 : 4))) {
11669
        goto done;
11670
    } else if (ctxt->instate == XML_PARSER_EPILOG) {
11671
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11672
        xmlHaltParser(ctxt);
11673
#ifdef DEBUG_PUSH
11674
        xmlGenericError(xmlGenericErrorContext,
11675
          "PP: entering EOF\n");
11676
#endif
11677
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11678
      ctxt->sax->endDocument(ctxt->userData);
11679
        goto done;
11680
                } else {
11681
        ctxt->instate = XML_PARSER_START_TAG;
11682
#ifdef DEBUG_PUSH
11683
        xmlGenericError(xmlGenericErrorContext,
11684
          "PP: entering START_TAG\n");
11685
#endif
11686
    }
11687
    break;
11688
            case XML_PARSER_DTD: {
11689
                if ((!terminate) && (!xmlParseLookupInternalSubset(ctxt)))
11690
                    goto done;
11691
    xmlParseInternalSubset(ctxt);
11692
    if (ctxt->instate == XML_PARSER_EOF)
11693
        goto done;
11694
    ctxt->inSubset = 2;
11695
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11696
        (ctxt->sax->externalSubset != NULL))
11697
        ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11698
          ctxt->extSubSystem, ctxt->extSubURI);
11699
    ctxt->inSubset = 0;
11700
    xmlCleanSpecialAttr(ctxt);
11701
    if (ctxt->instate == XML_PARSER_EOF)
11702
        goto done;
11703
    ctxt->instate = XML_PARSER_PROLOG;
11704
#ifdef DEBUG_PUSH
11705
    xmlGenericError(xmlGenericErrorContext,
11706
      "PP: entering PROLOG\n");
11707
#endif
11708
                break;
11709
      }
11710
            case XML_PARSER_COMMENT:
11711
    xmlGenericError(xmlGenericErrorContext,
11712
      "PP: internal error, state == COMMENT\n");
11713
    ctxt->instate = XML_PARSER_CONTENT;
11714
#ifdef DEBUG_PUSH
11715
    xmlGenericError(xmlGenericErrorContext,
11716
      "PP: entering CONTENT\n");
11717
#endif
11718
    break;
11719
            case XML_PARSER_IGNORE:
11720
    xmlGenericError(xmlGenericErrorContext,
11721
      "PP: internal error, state == IGNORE");
11722
          ctxt->instate = XML_PARSER_DTD;
11723
#ifdef DEBUG_PUSH
11724
    xmlGenericError(xmlGenericErrorContext,
11725
      "PP: entering DTD\n");
11726
#endif
11727
          break;
11728
            case XML_PARSER_PI:
11729
    xmlGenericError(xmlGenericErrorContext,
11730
      "PP: internal error, state == PI\n");
11731
    ctxt->instate = XML_PARSER_CONTENT;
11732
#ifdef DEBUG_PUSH
11733
    xmlGenericError(xmlGenericErrorContext,
11734
      "PP: entering CONTENT\n");
11735
#endif
11736
    break;
11737
            case XML_PARSER_ENTITY_DECL:
11738
    xmlGenericError(xmlGenericErrorContext,
11739
      "PP: internal error, state == ENTITY_DECL\n");
11740
    ctxt->instate = XML_PARSER_DTD;
11741
#ifdef DEBUG_PUSH
11742
    xmlGenericError(xmlGenericErrorContext,
11743
      "PP: entering DTD\n");
11744
#endif
11745
    break;
11746
            case XML_PARSER_ENTITY_VALUE:
11747
    xmlGenericError(xmlGenericErrorContext,
11748
      "PP: internal error, state == ENTITY_VALUE\n");
11749
    ctxt->instate = XML_PARSER_CONTENT;
11750
#ifdef DEBUG_PUSH
11751
    xmlGenericError(xmlGenericErrorContext,
11752
      "PP: entering DTD\n");
11753
#endif
11754
    break;
11755
            case XML_PARSER_ATTRIBUTE_VALUE:
11756
    xmlGenericError(xmlGenericErrorContext,
11757
      "PP: internal error, state == ATTRIBUTE_VALUE\n");
11758
    ctxt->instate = XML_PARSER_START_TAG;
11759
#ifdef DEBUG_PUSH
11760
    xmlGenericError(xmlGenericErrorContext,
11761
      "PP: entering START_TAG\n");
11762
#endif
11763
    break;
11764
            case XML_PARSER_SYSTEM_LITERAL:
11765
    xmlGenericError(xmlGenericErrorContext,
11766
      "PP: internal error, state == SYSTEM_LITERAL\n");
11767
    ctxt->instate = XML_PARSER_START_TAG;
11768
#ifdef DEBUG_PUSH
11769
    xmlGenericError(xmlGenericErrorContext,
11770
      "PP: entering START_TAG\n");
11771
#endif
11772
    break;
11773
            case XML_PARSER_PUBLIC_LITERAL:
11774
    xmlGenericError(xmlGenericErrorContext,
11775
      "PP: internal error, state == PUBLIC_LITERAL\n");
11776
    ctxt->instate = XML_PARSER_START_TAG;
11777
#ifdef DEBUG_PUSH
11778
    xmlGenericError(xmlGenericErrorContext,
11779
      "PP: entering START_TAG\n");
11780
#endif
11781
    break;
11782
  }
11783
    }
11784
done:
11785
#ifdef DEBUG_PUSH
11786
    xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
11787
#endif
11788
    return(ret);
11789
encoding_error:
11790
    if (ctxt->input->end - ctxt->input->cur < 4) {
11791
  __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
11792
         "Input is not proper UTF-8, indicate encoding !\n",
11793
         NULL, NULL);
11794
    } else {
11795
        char buffer[150];
11796
11797
  snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
11798
      ctxt->input->cur[0], ctxt->input->cur[1],
11799
      ctxt->input->cur[2], ctxt->input->cur[3]);
11800
  __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
11801
         "Input is not proper UTF-8, indicate encoding !\n%s",
11802
         BAD_CAST buffer, NULL);
11803
    }
11804
    return(0);
11805
}
11806
11807
/**
11808
 * xmlParseChunk:
11809
 * @ctxt:  an XML parser context
11810
 * @chunk:  an char array
11811
 * @size:  the size in byte of the chunk
11812
 * @terminate:  last chunk indicator
11813
 *
11814
 * Parse a Chunk of memory
11815
 *
11816
 * Returns zero if no error, the xmlParserErrors otherwise.
11817
 */
11818
int
11819
xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
11820
              int terminate) {
11821
    int end_in_lf = 0;
11822
11823
    if (ctxt == NULL)
11824
        return(XML_ERR_INTERNAL_ERROR);
11825
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11826
        return(ctxt->errNo);
11827
    if (ctxt->instate == XML_PARSER_EOF)
11828
        return(-1);
11829
    if (ctxt->input == NULL)
11830
        return(-1);
11831
11832
    ctxt->progressive = 1;
11833
    if (ctxt->instate == XML_PARSER_START)
11834
        xmlDetectSAX2(ctxt);
11835
    if ((size > 0) && (chunk != NULL) && (!terminate) &&
11836
        (chunk[size - 1] == '\r')) {
11837
  end_in_lf = 1;
11838
  size--;
11839
    }
11840
11841
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
11842
        (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF))  {
11843
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
11844
  size_t cur = ctxt->input->cur - ctxt->input->base;
11845
  int res;
11846
11847
  res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11848
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
11849
  if (res < 0) {
11850
            xmlFatalErr(ctxt, ctxt->input->buf->error, NULL);
11851
      xmlHaltParser(ctxt);
11852
      return(ctxt->errNo);
11853
  }
11854
#ifdef DEBUG_PUSH
11855
  xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11856
#endif
11857
11858
    } else if (ctxt->instate != XML_PARSER_EOF) {
11859
  if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
11860
      xmlParserInputBufferPtr in = ctxt->input->buf;
11861
      if ((in->encoder != NULL) && (in->buffer != NULL) &&
11862
        (in->raw != NULL)) {
11863
    int nbchars;
11864
    size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
11865
    size_t current = ctxt->input->cur - ctxt->input->base;
11866
11867
    nbchars = xmlCharEncInput(in, terminate);
11868
    xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
11869
    if (nbchars < 0) {
11870
              xmlFatalErr(ctxt, in->error, NULL);
11871
                    xmlHaltParser(ctxt);
11872
        return(ctxt->errNo);
11873
    }
11874
      }
11875
  }
11876
    }
11877
11878
    xmlParseTryOrFinish(ctxt, terminate);
11879
    if (ctxt->instate == XML_PARSER_EOF)
11880
        return(ctxt->errNo);
11881
11882
    if ((ctxt->input != NULL) &&
11883
         (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
11884
         ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
11885
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
11886
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
11887
        xmlHaltParser(ctxt);
11888
    }
11889
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11890
        return(ctxt->errNo);
11891
11892
    if ((end_in_lf == 1) && (ctxt->input != NULL) &&
11893
        (ctxt->input->buf != NULL)) {
11894
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11895
           ctxt->input);
11896
  size_t current = ctxt->input->cur - ctxt->input->base;
11897
        int res;
11898
11899
  res = xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
11900
  xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11901
            base, current);
11902
        if (res < 0) {
11903
            xmlFatalErr(ctxt, ctxt->input->buf->error, NULL);
11904
            xmlHaltParser(ctxt);
11905
            return(ctxt->errNo);
11906
        }
11907
    }
11908
    if (terminate) {
11909
  /*
11910
   * Check for termination
11911
   */
11912
  if ((ctxt->instate != XML_PARSER_EOF) &&
11913
      (ctxt->instate != XML_PARSER_EPILOG)) {
11914
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11915
  }
11916
  if ((ctxt->instate == XML_PARSER_EPILOG) &&
11917
            (ctxt->input->cur < ctxt->input->end)) {
11918
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11919
  }
11920
  if (ctxt->instate != XML_PARSER_EOF) {
11921
      if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11922
    ctxt->sax->endDocument(ctxt->userData);
11923
  }
11924
  ctxt->instate = XML_PARSER_EOF;
11925
    }
11926
    if (ctxt->wellFormed == 0)
11927
  return((xmlParserErrors) ctxt->errNo);
11928
    else
11929
        return(0);
11930
}
11931
11932
/************************************************************************
11933
 *                  *
11934
 *    I/O front end functions to the parser     *
11935
 *                  *
11936
 ************************************************************************/
11937
11938
/**
11939
 * xmlCreatePushParserCtxt:
11940
 * @sax:  a SAX handler
11941
 * @user_data:  The user data returned on SAX callbacks
11942
 * @chunk:  a pointer to an array of chars
11943
 * @size:  number of chars in the array
11944
 * @filename:  an optional file name or URI
11945
 *
11946
 * Create a parser context for using the XML parser in push mode.
11947
 * If @buffer and @size are non-NULL, the data is used to detect
11948
 * the encoding.  The remaining characters will be parsed so they
11949
 * don't need to be fed in again through xmlParseChunk.
11950
 * To allow content encoding detection, @size should be >= 4
11951
 * The value of @filename is used for fetching external entities
11952
 * and error/warning reports.
11953
 *
11954
 * Returns the new parser context or NULL
11955
 */
11956
11957
xmlParserCtxtPtr
11958
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11959
                        const char *chunk, int size, const char *filename) {
11960
    xmlParserCtxtPtr ctxt;
11961
    xmlParserInputPtr inputStream;
11962
    xmlParserInputBufferPtr buf;
11963
11964
    buf = xmlAllocParserInputBuffer(XML_CHAR_ENCODING_NONE);
11965
    if (buf == NULL) return(NULL);
11966
11967
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
11968
    if (ctxt == NULL) {
11969
        xmlErrMemory(NULL, "creating parser: out of memory\n");
11970
  xmlFreeParserInputBuffer(buf);
11971
  return(NULL);
11972
    }
11973
    ctxt->dictNames = 1;
11974
    if (filename == NULL) {
11975
  ctxt->directory = NULL;
11976
    } else {
11977
        ctxt->directory = xmlParserGetDirectory(filename);
11978
    }
11979
11980
    inputStream = xmlNewInputStream(ctxt);
11981
    if (inputStream == NULL) {
11982
  xmlFreeParserCtxt(ctxt);
11983
  xmlFreeParserInputBuffer(buf);
11984
  return(NULL);
11985
    }
11986
11987
    if (filename == NULL)
11988
  inputStream->filename = NULL;
11989
    else {
11990
  inputStream->filename = (char *)
11991
      xmlCanonicPath((const xmlChar *) filename);
11992
  if (inputStream->filename == NULL) {
11993
            xmlFreeInputStream(inputStream);
11994
      xmlFreeParserCtxt(ctxt);
11995
      xmlFreeParserInputBuffer(buf);
11996
      return(NULL);
11997
  }
11998
    }
11999
    inputStream->buf = buf;
12000
    xmlBufResetInput(inputStream->buf->buffer, inputStream);
12001
    inputPush(ctxt, inputStream);
12002
12003
    /*
12004
     * If the caller didn't provide an initial 'chunk' for determining
12005
     * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12006
     * that it can be automatically determined later
12007
     */
12008
    ctxt->charset = XML_CHAR_ENCODING_NONE;
12009
12010
    if ((size != 0) && (chunk != NULL) &&
12011
        (ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12012
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12013
  size_t cur = ctxt->input->cur - ctxt->input->base;
12014
        int res;
12015
12016
  res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12017
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12018
        if (res < 0) {
12019
            xmlFatalErr(ctxt, ctxt->input->buf->error, NULL);
12020
            xmlHaltParser(ctxt);
12021
        }
12022
#ifdef DEBUG_PUSH
12023
  xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12024
#endif
12025
    }
12026
12027
    return(ctxt);
12028
}
12029
#endif /* LIBXML_PUSH_ENABLED */
12030
12031
/**
12032
 * xmlStopParser:
12033
 * @ctxt:  an XML parser context
12034
 *
12035
 * Blocks further parser processing
12036
 */
12037
void
12038
10.4M
xmlStopParser(xmlParserCtxtPtr ctxt) {
12039
10.4M
    if (ctxt == NULL)
12040
10.4M
        return;
12041
893
    xmlHaltParser(ctxt);
12042
893
    ctxt->errNo = XML_ERR_USER_STOP;
12043
893
}
12044
12045
/**
12046
 * xmlCreateIOParserCtxt:
12047
 * @sax:  a SAX handler
12048
 * @user_data:  The user data returned on SAX callbacks
12049
 * @ioread:  an I/O read function
12050
 * @ioclose:  an I/O close function
12051
 * @ioctx:  an I/O handler
12052
 * @enc:  the charset encoding if known
12053
 *
12054
 * Create a parser context for using the XML parser with an existing
12055
 * I/O stream
12056
 *
12057
 * Returns the new parser context or NULL
12058
 */
12059
xmlParserCtxtPtr
12060
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12061
  xmlInputReadCallback   ioread, xmlInputCloseCallback  ioclose,
12062
0
  void *ioctx, xmlCharEncoding enc) {
12063
0
    xmlParserCtxtPtr ctxt;
12064
0
    xmlParserInputPtr inputStream;
12065
0
    xmlParserInputBufferPtr buf;
12066
12067
0
    if (ioread == NULL) return(NULL);
12068
12069
0
    buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12070
0
    if (buf == NULL) {
12071
0
        if (ioclose != NULL)
12072
0
            ioclose(ioctx);
12073
0
        return (NULL);
12074
0
    }
12075
12076
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12077
0
    if (ctxt == NULL) {
12078
0
  xmlFreeParserInputBuffer(buf);
12079
0
  return(NULL);
12080
0
    }
12081
12082
0
    inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12083
0
    if (inputStream == NULL) {
12084
0
  xmlFreeParserCtxt(ctxt);
12085
0
  return(NULL);
12086
0
    }
12087
0
    inputPush(ctxt, inputStream);
12088
12089
0
    return(ctxt);
12090
0
}
12091
12092
#ifdef LIBXML_VALID_ENABLED
12093
/************************************************************************
12094
 *                  *
12095
 *    Front ends when parsing a DTD       *
12096
 *                  *
12097
 ************************************************************************/
12098
12099
/**
12100
 * xmlIOParseDTD:
12101
 * @sax:  the SAX handler block or NULL
12102
 * @input:  an Input Buffer
12103
 * @enc:  the charset encoding if known
12104
 *
12105
 * Load and parse a DTD
12106
 *
12107
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12108
 * @input will be freed by the function in any case.
12109
 */
12110
12111
xmlDtdPtr
12112
xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12113
        xmlCharEncoding enc) {
12114
    xmlDtdPtr ret = NULL;
12115
    xmlParserCtxtPtr ctxt;
12116
    xmlParserInputPtr pinput = NULL;
12117
    xmlChar start[4];
12118
12119
    if (input == NULL)
12120
  return(NULL);
12121
12122
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12123
    if (ctxt == NULL) {
12124
        xmlFreeParserInputBuffer(input);
12125
  return(NULL);
12126
    }
12127
12128
    /* We are loading a DTD */
12129
    ctxt->options |= XML_PARSE_DTDLOAD;
12130
12131
    xmlDetectSAX2(ctxt);
12132
12133
    /*
12134
     * generate a parser input from the I/O handler
12135
     */
12136
12137
    pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12138
    if (pinput == NULL) {
12139
        xmlFreeParserInputBuffer(input);
12140
  xmlFreeParserCtxt(ctxt);
12141
  return(NULL);
12142
    }
12143
12144
    /*
12145
     * plug some encoding conversion routines here.
12146
     */
12147
    if (xmlPushInput(ctxt, pinput) < 0) {
12148
  xmlFreeParserCtxt(ctxt);
12149
  return(NULL);
12150
    }
12151
    if (enc != XML_CHAR_ENCODING_NONE) {
12152
        xmlSwitchEncoding(ctxt, enc);
12153
    }
12154
12155
    pinput->filename = NULL;
12156
    pinput->line = 1;
12157
    pinput->col = 1;
12158
    pinput->base = ctxt->input->cur;
12159
    pinput->cur = ctxt->input->cur;
12160
    pinput->free = NULL;
12161
12162
    /*
12163
     * let's parse that entity knowing it's an external subset.
12164
     */
12165
    ctxt->inSubset = 2;
12166
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12167
    if (ctxt->myDoc == NULL) {
12168
  xmlErrMemory(ctxt, "New Doc failed");
12169
  return(NULL);
12170
    }
12171
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12172
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12173
                                 BAD_CAST "none", BAD_CAST "none");
12174
12175
    if ((enc == XML_CHAR_ENCODING_NONE) &&
12176
        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12177
  /*
12178
   * Get the 4 first bytes and decode the charset
12179
   * if enc != XML_CHAR_ENCODING_NONE
12180
   * plug some encoding conversion routines.
12181
   */
12182
  start[0] = RAW;
12183
  start[1] = NXT(1);
12184
  start[2] = NXT(2);
12185
  start[3] = NXT(3);
12186
  enc = xmlDetectCharEncoding(start, 4);
12187
  if (enc != XML_CHAR_ENCODING_NONE) {
12188
      xmlSwitchEncoding(ctxt, enc);
12189
  }
12190
    }
12191
12192
    xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12193
12194
    if (ctxt->myDoc != NULL) {
12195
  if (ctxt->wellFormed) {
12196
      ret = ctxt->myDoc->extSubset;
12197
      ctxt->myDoc->extSubset = NULL;
12198
      if (ret != NULL) {
12199
    xmlNodePtr tmp;
12200
12201
    ret->doc = NULL;
12202
    tmp = ret->children;
12203
    while (tmp != NULL) {
12204
        tmp->doc = NULL;
12205
        tmp = tmp->next;
12206
    }
12207
      }
12208
  } else {
12209
      ret = NULL;
12210
  }
12211
        xmlFreeDoc(ctxt->myDoc);
12212
        ctxt->myDoc = NULL;
12213
    }
12214
    xmlFreeParserCtxt(ctxt);
12215
12216
    return(ret);
12217
}
12218
12219
/**
12220
 * xmlSAXParseDTD:
12221
 * @sax:  the SAX handler block
12222
 * @ExternalID:  a NAME* containing the External ID of the DTD
12223
 * @SystemID:  a NAME* containing the URL to the DTD
12224
 *
12225
 * DEPRECATED: Don't use.
12226
 *
12227
 * Load and parse an external subset.
12228
 *
12229
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12230
 */
12231
12232
xmlDtdPtr
12233
xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12234
                          const xmlChar *SystemID) {
12235
    xmlDtdPtr ret = NULL;
12236
    xmlParserCtxtPtr ctxt;
12237
    xmlParserInputPtr input = NULL;
12238
    xmlCharEncoding enc;
12239
    xmlChar* systemIdCanonic;
12240
12241
    if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12242
12243
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12244
    if (ctxt == NULL) {
12245
  return(NULL);
12246
    }
12247
12248
    /* We are loading a DTD */
12249
    ctxt->options |= XML_PARSE_DTDLOAD;
12250
12251
    /*
12252
     * Canonicalise the system ID
12253
     */
12254
    systemIdCanonic = xmlCanonicPath(SystemID);
12255
    if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12256
  xmlFreeParserCtxt(ctxt);
12257
  return(NULL);
12258
    }
12259
12260
    /*
12261
     * Ask the Entity resolver to load the damn thing
12262
     */
12263
12264
    if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12265
  input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12266
                                   systemIdCanonic);
12267
    if (input == NULL) {
12268
  xmlFreeParserCtxt(ctxt);
12269
  if (systemIdCanonic != NULL)
12270
      xmlFree(systemIdCanonic);
12271
  return(NULL);
12272
    }
12273
12274
    /*
12275
     * plug some encoding conversion routines here.
12276
     */
12277
    if (xmlPushInput(ctxt, input) < 0) {
12278
  xmlFreeParserCtxt(ctxt);
12279
  if (systemIdCanonic != NULL)
12280
      xmlFree(systemIdCanonic);
12281
  return(NULL);
12282
    }
12283
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12284
  enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12285
  xmlSwitchEncoding(ctxt, enc);
12286
    }
12287
12288
    if (input->filename == NULL)
12289
  input->filename = (char *) systemIdCanonic;
12290
    else
12291
  xmlFree(systemIdCanonic);
12292
    input->line = 1;
12293
    input->col = 1;
12294
    input->base = ctxt->input->cur;
12295
    input->cur = ctxt->input->cur;
12296
    input->free = NULL;
12297
12298
    /*
12299
     * let's parse that entity knowing it's an external subset.
12300
     */
12301
    ctxt->inSubset = 2;
12302
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12303
    if (ctxt->myDoc == NULL) {
12304
  xmlErrMemory(ctxt, "New Doc failed");
12305
  xmlFreeParserCtxt(ctxt);
12306
  return(NULL);
12307
    }
12308
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12309
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12310
                                 ExternalID, SystemID);
12311
    xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12312
12313
    if (ctxt->myDoc != NULL) {
12314
  if (ctxt->wellFormed) {
12315
      ret = ctxt->myDoc->extSubset;
12316
      ctxt->myDoc->extSubset = NULL;
12317
      if (ret != NULL) {
12318
    xmlNodePtr tmp;
12319
12320
    ret->doc = NULL;
12321
    tmp = ret->children;
12322
    while (tmp != NULL) {
12323
        tmp->doc = NULL;
12324
        tmp = tmp->next;
12325
    }
12326
      }
12327
  } else {
12328
      ret = NULL;
12329
  }
12330
        xmlFreeDoc(ctxt->myDoc);
12331
        ctxt->myDoc = NULL;
12332
    }
12333
    xmlFreeParserCtxt(ctxt);
12334
12335
    return(ret);
12336
}
12337
12338
12339
/**
12340
 * xmlParseDTD:
12341
 * @ExternalID:  a NAME* containing the External ID of the DTD
12342
 * @SystemID:  a NAME* containing the URL to the DTD
12343
 *
12344
 * Load and parse an external subset.
12345
 *
12346
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12347
 */
12348
12349
xmlDtdPtr
12350
xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12351
    return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12352
}
12353
#endif /* LIBXML_VALID_ENABLED */
12354
12355
/************************************************************************
12356
 *                  *
12357
 *    Front ends when parsing an Entity     *
12358
 *                  *
12359
 ************************************************************************/
12360
12361
/**
12362
 * xmlParseCtxtExternalEntity:
12363
 * @ctx:  the existing parsing context
12364
 * @URL:  the URL for the entity to load
12365
 * @ID:  the System ID for the entity to load
12366
 * @lst:  the return value for the set of parsed nodes
12367
 *
12368
 * Parse an external general entity within an existing parsing context
12369
 * An external general parsed entity is well-formed if it matches the
12370
 * production labeled extParsedEnt.
12371
 *
12372
 * [78] extParsedEnt ::= TextDecl? content
12373
 *
12374
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12375
 *    the parser error code otherwise
12376
 */
12377
12378
int
12379
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12380
0
                 const xmlChar *ID, xmlNodePtr *lst) {
12381
0
    void *userData;
12382
12383
0
    if (ctx == NULL) return(-1);
12384
    /*
12385
     * If the user provided their own SAX callbacks, then reuse the
12386
     * userData callback field, otherwise the expected setup in a
12387
     * DOM builder is to have userData == ctxt
12388
     */
12389
0
    if (ctx->userData == ctx)
12390
0
        userData = NULL;
12391
0
    else
12392
0
        userData = ctx->userData;
12393
0
    return xmlParseExternalEntityPrivate(ctx->myDoc, ctx, ctx->sax,
12394
0
                                         userData, ctx->depth + 1,
12395
0
                                         URL, ID, lst);
12396
0
}
12397
12398
/**
12399
 * xmlParseExternalEntityPrivate:
12400
 * @doc:  the document the chunk pertains to
12401
 * @oldctxt:  the previous parser context if available
12402
 * @sax:  the SAX handler block (possibly NULL)
12403
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12404
 * @depth:  Used for loop detection, use 0
12405
 * @URL:  the URL for the entity to load
12406
 * @ID:  the System ID for the entity to load
12407
 * @list:  the return value for the set of parsed nodes
12408
 *
12409
 * Private version of xmlParseExternalEntity()
12410
 *
12411
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12412
 *    the parser error code otherwise
12413
 */
12414
12415
static xmlParserErrors
12416
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12417
                xmlSAXHandlerPtr sax,
12418
          void *user_data, int depth, const xmlChar *URL,
12419
11.4k
          const xmlChar *ID, xmlNodePtr *list) {
12420
11.4k
    xmlParserCtxtPtr ctxt;
12421
11.4k
    xmlDocPtr newDoc;
12422
11.4k
    xmlNodePtr newRoot;
12423
11.4k
    xmlParserErrors ret = XML_ERR_OK;
12424
11.4k
    xmlChar start[4];
12425
11.4k
    xmlCharEncoding enc;
12426
12427
11.4k
    if (((depth > 40) &&
12428
11.4k
  ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12429
11.4k
  (depth > 100)) {
12430
0
  xmlFatalErrMsg(oldctxt, XML_ERR_ENTITY_LOOP,
12431
0
                       "Maximum entity nesting depth exceeded");
12432
0
        return(XML_ERR_ENTITY_LOOP);
12433
0
    }
12434
12435
11.4k
    if (list != NULL)
12436
5.12k
        *list = NULL;
12437
11.4k
    if ((URL == NULL) && (ID == NULL))
12438
0
  return(XML_ERR_INTERNAL_ERROR);
12439
11.4k
    if (doc == NULL)
12440
0
  return(XML_ERR_INTERNAL_ERROR);
12441
12442
11.4k
    ctxt = xmlCreateEntityParserCtxtInternal(sax, user_data, URL, ID, NULL,
12443
11.4k
                                             oldctxt);
12444
11.4k
    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
12445
5.95k
    if (oldctxt != NULL) {
12446
5.95k
        ctxt->nbErrors = oldctxt->nbErrors;
12447
5.95k
        ctxt->nbWarnings = oldctxt->nbWarnings;
12448
5.95k
    }
12449
5.95k
    xmlDetectSAX2(ctxt);
12450
12451
5.95k
    newDoc = xmlNewDoc(BAD_CAST "1.0");
12452
5.95k
    if (newDoc == NULL) {
12453
5
  xmlFreeParserCtxt(ctxt);
12454
5
  return(XML_ERR_INTERNAL_ERROR);
12455
5
    }
12456
5.95k
    newDoc->properties = XML_DOC_INTERNAL;
12457
5.95k
    if (doc) {
12458
5.95k
        newDoc->intSubset = doc->intSubset;
12459
5.95k
        newDoc->extSubset = doc->extSubset;
12460
5.95k
        if (doc->dict) {
12461
5.95k
            newDoc->dict = doc->dict;
12462
5.95k
            xmlDictReference(newDoc->dict);
12463
5.95k
        }
12464
5.95k
        if (doc->URL != NULL) {
12465
5.95k
            newDoc->URL = xmlStrdup(doc->URL);
12466
5.95k
        }
12467
5.95k
    }
12468
5.95k
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12469
5.95k
    if (newRoot == NULL) {
12470
1
  if (sax != NULL)
12471
1
  xmlFreeParserCtxt(ctxt);
12472
1
  newDoc->intSubset = NULL;
12473
1
  newDoc->extSubset = NULL;
12474
1
        xmlFreeDoc(newDoc);
12475
1
  return(XML_ERR_INTERNAL_ERROR);
12476
1
    }
12477
5.94k
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
12478
5.94k
    nodePush(ctxt, newDoc->children);
12479
5.94k
    if (doc == NULL) {
12480
0
        ctxt->myDoc = newDoc;
12481
5.94k
    } else {
12482
5.94k
        ctxt->myDoc = doc;
12483
5.94k
        newRoot->doc = doc;
12484
5.94k
    }
12485
12486
    /*
12487
     * Get the 4 first bytes and decode the charset
12488
     * if enc != XML_CHAR_ENCODING_NONE
12489
     * plug some encoding conversion routines.
12490
     */
12491
5.94k
    GROW;
12492
5.94k
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12493
5.07k
  start[0] = RAW;
12494
5.07k
  start[1] = NXT(1);
12495
5.07k
  start[2] = NXT(2);
12496
5.07k
  start[3] = NXT(3);
12497
5.07k
  enc = xmlDetectCharEncoding(start, 4);
12498
5.07k
  if (enc != XML_CHAR_ENCODING_NONE) {
12499
2.61k
      xmlSwitchEncoding(ctxt, enc);
12500
2.61k
  }
12501
5.07k
    }
12502
12503
    /*
12504
     * Parse a possible text declaration first
12505
     */
12506
5.94k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12507
2.25k
  xmlParseTextDecl(ctxt);
12508
        /*
12509
         * An XML-1.0 document can't reference an entity not XML-1.0
12510
         */
12511
2.25k
        if ((xmlStrEqual(oldctxt->version, BAD_CAST "1.0")) &&
12512
2.25k
            (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12513
38
            xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12514
38
                           "Version mismatch between document and entity\n");
12515
38
        }
12516
2.25k
    }
12517
12518
5.94k
    ctxt->instate = XML_PARSER_CONTENT;
12519
5.94k
    ctxt->depth = depth;
12520
5.94k
    if (oldctxt != NULL) {
12521
5.94k
  ctxt->_private = oldctxt->_private;
12522
5.94k
  ctxt->loadsubset = oldctxt->loadsubset;
12523
5.94k
  ctxt->validate = oldctxt->validate;
12524
5.94k
  ctxt->valid = oldctxt->valid;
12525
5.94k
  ctxt->replaceEntities = oldctxt->replaceEntities;
12526
5.94k
        if (oldctxt->validate) {
12527
0
            ctxt->vctxt.error = oldctxt->vctxt.error;
12528
0
            ctxt->vctxt.warning = oldctxt->vctxt.warning;
12529
0
            ctxt->vctxt.userData = oldctxt->vctxt.userData;
12530
0
            ctxt->vctxt.flags = oldctxt->vctxt.flags;
12531
0
        }
12532
5.94k
  ctxt->external = oldctxt->external;
12533
5.94k
        if (ctxt->dict) xmlDictFree(ctxt->dict);
12534
5.94k
        ctxt->dict = oldctxt->dict;
12535
5.94k
        ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12536
5.94k
        ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12537
5.94k
        ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12538
5.94k
        ctxt->dictNames = oldctxt->dictNames;
12539
5.94k
        ctxt->attsDefault = oldctxt->attsDefault;
12540
5.94k
        ctxt->attsSpecial = oldctxt->attsSpecial;
12541
5.94k
        ctxt->linenumbers = oldctxt->linenumbers;
12542
5.94k
  ctxt->record_info = oldctxt->record_info;
12543
5.94k
  ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
12544
5.94k
  ctxt->node_seq.length = oldctxt->node_seq.length;
12545
5.94k
  ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
12546
5.94k
    } else {
12547
  /*
12548
   * Doing validity checking on chunk without context
12549
   * doesn't make sense
12550
   */
12551
0
  ctxt->_private = NULL;
12552
0
  ctxt->validate = 0;
12553
0
  ctxt->external = 2;
12554
0
  ctxt->loadsubset = 0;
12555
0
    }
12556
12557
5.94k
    xmlParseContent(ctxt);
12558
12559
5.94k
    if ((RAW == '<') && (NXT(1) == '/')) {
12560
341
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12561
5.60k
    } else if (RAW != 0) {
12562
31
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12563
31
    }
12564
5.94k
    if (ctxt->node != newDoc->children) {
12565
2.22k
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12566
2.22k
    }
12567
12568
5.94k
    if (!ctxt->wellFormed) {
12569
3.98k
  ret = (xmlParserErrors)ctxt->errNo;
12570
3.98k
        if (oldctxt != NULL) {
12571
3.98k
            oldctxt->errNo = ctxt->errNo;
12572
3.98k
            oldctxt->wellFormed = 0;
12573
3.98k
            xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12574
3.98k
        }
12575
3.98k
    } else {
12576
1.96k
  if (list != NULL) {
12577
567
      xmlNodePtr cur;
12578
12579
      /*
12580
       * Return the newly created nodeset after unlinking it from
12581
       * they pseudo parent.
12582
       */
12583
567
      cur = newDoc->children->children;
12584
567
      *list = cur;
12585
1.66k
      while (cur != NULL) {
12586
1.09k
    cur->parent = NULL;
12587
1.09k
    cur = cur->next;
12588
1.09k
      }
12589
567
            newDoc->children->children = NULL;
12590
567
  }
12591
1.96k
  ret = XML_ERR_OK;
12592
1.96k
    }
12593
12594
    /*
12595
     * Also record the size of the entity parsed
12596
     */
12597
5.94k
    if (ctxt->input != NULL && oldctxt != NULL) {
12598
5.94k
        unsigned long consumed = ctxt->input->consumed;
12599
12600
5.94k
        xmlSaturatedAddSizeT(&consumed, ctxt->input->cur - ctxt->input->base);
12601
12602
5.94k
        xmlSaturatedAdd(&oldctxt->sizeentities, consumed);
12603
5.94k
        xmlSaturatedAdd(&oldctxt->sizeentities, ctxt->sizeentities);
12604
12605
5.94k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, consumed);
12606
5.94k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, ctxt->sizeentcopy);
12607
5.94k
    }
12608
12609
5.94k
    if (oldctxt != NULL) {
12610
5.94k
        ctxt->dict = NULL;
12611
5.94k
        ctxt->attsDefault = NULL;
12612
5.94k
        ctxt->attsSpecial = NULL;
12613
5.94k
        oldctxt->nbErrors = ctxt->nbErrors;
12614
5.94k
        oldctxt->nbWarnings = ctxt->nbWarnings;
12615
5.94k
        oldctxt->validate = ctxt->validate;
12616
5.94k
        oldctxt->valid = ctxt->valid;
12617
5.94k
        oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
12618
5.94k
        oldctxt->node_seq.length = ctxt->node_seq.length;
12619
5.94k
        oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
12620
5.94k
    }
12621
5.94k
    ctxt->node_seq.maximum = 0;
12622
5.94k
    ctxt->node_seq.length = 0;
12623
5.94k
    ctxt->node_seq.buffer = NULL;
12624
5.94k
    xmlFreeParserCtxt(ctxt);
12625
5.94k
    newDoc->intSubset = NULL;
12626
5.94k
    newDoc->extSubset = NULL;
12627
5.94k
    xmlFreeDoc(newDoc);
12628
12629
5.94k
    return(ret);
12630
5.95k
}
12631
12632
#ifdef LIBXML_SAX1_ENABLED
12633
/**
12634
 * xmlParseExternalEntity:
12635
 * @doc:  the document the chunk pertains to
12636
 * @sax:  the SAX handler block (possibly NULL)
12637
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12638
 * @depth:  Used for loop detection, use 0
12639
 * @URL:  the URL for the entity to load
12640
 * @ID:  the System ID for the entity to load
12641
 * @lst:  the return value for the set of parsed nodes
12642
 *
12643
 * Parse an external general entity
12644
 * An external general parsed entity is well-formed if it matches the
12645
 * production labeled extParsedEnt.
12646
 *
12647
 * [78] extParsedEnt ::= TextDecl? content
12648
 *
12649
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12650
 *    the parser error code otherwise
12651
 */
12652
12653
int
12654
xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
12655
    int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
12656
    return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
12657
                           ID, lst));
12658
}
12659
12660
/**
12661
 * xmlParseBalancedChunkMemory:
12662
 * @doc:  the document the chunk pertains to (must not be NULL)
12663
 * @sax:  the SAX handler block (possibly NULL)
12664
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12665
 * @depth:  Used for loop detection, use 0
12666
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
12667
 * @lst:  the return value for the set of parsed nodes
12668
 *
12669
 * Parse a well-balanced chunk of an XML document
12670
 * called by the parser
12671
 * The allowed sequence for the Well Balanced Chunk is the one defined by
12672
 * the content production in the XML grammar:
12673
 *
12674
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12675
 *
12676
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12677
 *    the parser error code otherwise
12678
 */
12679
12680
int
12681
xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12682
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
12683
    return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
12684
                                                depth, string, lst, 0 );
12685
}
12686
#endif /* LIBXML_SAX1_ENABLED */
12687
12688
/**
12689
 * xmlParseBalancedChunkMemoryInternal:
12690
 * @oldctxt:  the existing parsing context
12691
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
12692
 * @user_data:  the user data field for the parser context
12693
 * @lst:  the return value for the set of parsed nodes
12694
 *
12695
 *
12696
 * Parse a well-balanced chunk of an XML document
12697
 * called by the parser
12698
 * The allowed sequence for the Well Balanced Chunk is the one defined by
12699
 * the content production in the XML grammar:
12700
 *
12701
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12702
 *
12703
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12704
 * error code otherwise
12705
 *
12706
 * In case recover is set to 1, the nodelist will not be empty even if
12707
 * the parsed chunk is not well balanced.
12708
 */
12709
static xmlParserErrors
12710
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
12711
2.99k
  const xmlChar *string, void *user_data, xmlNodePtr *lst) {
12712
2.99k
    xmlParserCtxtPtr ctxt;
12713
2.99k
    xmlDocPtr newDoc = NULL;
12714
2.99k
    xmlNodePtr newRoot;
12715
2.99k
    xmlSAXHandlerPtr oldsax = NULL;
12716
2.99k
    xmlNodePtr content = NULL;
12717
2.99k
    xmlNodePtr last = NULL;
12718
2.99k
    int size;
12719
2.99k
    xmlParserErrors ret = XML_ERR_OK;
12720
2.99k
#ifdef SAX2
12721
2.99k
    int i;
12722
2.99k
#endif
12723
12724
2.99k
    if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12725
2.99k
        (oldctxt->depth >  100)) {
12726
0
  xmlFatalErrMsg(oldctxt, XML_ERR_ENTITY_LOOP,
12727
0
                       "Maximum entity nesting depth exceeded");
12728
0
  return(XML_ERR_ENTITY_LOOP);
12729
0
    }
12730
12731
12732
2.99k
    if (lst != NULL)
12733
2.96k
        *lst = NULL;
12734
2.99k
    if (string == NULL)
12735
0
        return(XML_ERR_INTERNAL_ERROR);
12736
12737
2.99k
    size = xmlStrlen(string);
12738
12739
2.99k
    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
12740
2.99k
    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
12741
2.95k
    ctxt->nbErrors = oldctxt->nbErrors;
12742
2.95k
    ctxt->nbWarnings = oldctxt->nbWarnings;
12743
2.95k
    if (user_data != NULL)
12744
0
  ctxt->userData = user_data;
12745
2.95k
    else
12746
2.95k
  ctxt->userData = ctxt;
12747
2.95k
    if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12748
2.95k
    ctxt->dict = oldctxt->dict;
12749
2.95k
    ctxt->input_id = oldctxt->input_id;
12750
2.95k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12751
2.95k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12752
2.95k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12753
12754
2.95k
#ifdef SAX2
12755
    /* propagate namespaces down the entity */
12756
5.05k
    for (i = 0;i < oldctxt->nsNr;i += 2) {
12757
2.10k
        nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
12758
2.10k
    }
12759
2.95k
#endif
12760
12761
2.95k
    oldsax = ctxt->sax;
12762
2.95k
    ctxt->sax = oldctxt->sax;
12763
2.95k
    xmlDetectSAX2(ctxt);
12764
2.95k
    ctxt->replaceEntities = oldctxt->replaceEntities;
12765
2.95k
    ctxt->options = oldctxt->options;
12766
12767
2.95k
    ctxt->_private = oldctxt->_private;
12768
2.95k
    if (oldctxt->myDoc == NULL) {
12769
0
  newDoc = xmlNewDoc(BAD_CAST "1.0");
12770
0
  if (newDoc == NULL) {
12771
0
      ctxt->sax = oldsax;
12772
0
      ctxt->dict = NULL;
12773
0
      xmlFreeParserCtxt(ctxt);
12774
0
      return(XML_ERR_INTERNAL_ERROR);
12775
0
  }
12776
0
  newDoc->properties = XML_DOC_INTERNAL;
12777
0
  newDoc->dict = ctxt->dict;
12778
0
  xmlDictReference(newDoc->dict);
12779
0
  ctxt->myDoc = newDoc;
12780
2.95k
    } else {
12781
2.95k
  ctxt->myDoc = oldctxt->myDoc;
12782
2.95k
        content = ctxt->myDoc->children;
12783
2.95k
  last = ctxt->myDoc->last;
12784
2.95k
    }
12785
2.95k
    newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
12786
2.95k
    if (newRoot == NULL) {
12787
8
  ctxt->sax = oldsax;
12788
8
  ctxt->dict = NULL;
12789
8
  xmlFreeParserCtxt(ctxt);
12790
8
  if (newDoc != NULL) {
12791
0
      xmlFreeDoc(newDoc);
12792
0
  }
12793
8
  return(XML_ERR_INTERNAL_ERROR);
12794
8
    }
12795
2.94k
    ctxt->myDoc->children = NULL;
12796
2.94k
    ctxt->myDoc->last = NULL;
12797
2.94k
    xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
12798
2.94k
    nodePush(ctxt, ctxt->myDoc->children);
12799
2.94k
    ctxt->instate = XML_PARSER_CONTENT;
12800
2.94k
    ctxt->depth = oldctxt->depth;
12801
12802
2.94k
    ctxt->validate = 0;
12803
2.94k
    ctxt->loadsubset = oldctxt->loadsubset;
12804
2.94k
    if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
12805
  /*
12806
   * ID/IDREF registration will be done in xmlValidateElement below
12807
   */
12808
2.93k
  ctxt->loadsubset |= XML_SKIP_IDS;
12809
2.93k
    }
12810
2.94k
    ctxt->dictNames = oldctxt->dictNames;
12811
2.94k
    ctxt->attsDefault = oldctxt->attsDefault;
12812
2.94k
    ctxt->attsSpecial = oldctxt->attsSpecial;
12813
12814
2.94k
    xmlParseContent(ctxt);
12815
2.94k
    if ((RAW == '<') && (NXT(1) == '/')) {
12816
277
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12817
2.66k
    } else if (RAW != 0) {
12818
30
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12819
30
    }
12820
2.94k
    if (ctxt->node != ctxt->myDoc->children) {
12821
433
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12822
433
    }
12823
12824
2.94k
    if (!ctxt->wellFormed) {
12825
1.47k
  ret = (xmlParserErrors)ctxt->errNo;
12826
1.47k
        oldctxt->errNo = ctxt->errNo;
12827
1.47k
        oldctxt->wellFormed = 0;
12828
1.47k
        xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12829
1.47k
    } else {
12830
1.47k
        ret = XML_ERR_OK;
12831
1.47k
    }
12832
12833
2.94k
    if ((lst != NULL) && (ret == XML_ERR_OK)) {
12834
1.47k
  xmlNodePtr cur;
12835
12836
  /*
12837
   * Return the newly created nodeset after unlinking it from
12838
   * they pseudo parent.
12839
   */
12840
1.47k
  cur = ctxt->myDoc->children->children;
12841
1.47k
  *lst = cur;
12842
8.42k
  while (cur != NULL) {
12843
#ifdef LIBXML_VALID_ENABLED
12844
      if ((oldctxt->validate) && (oldctxt->wellFormed) &&
12845
    (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
12846
    (cur->type == XML_ELEMENT_NODE)) {
12847
    oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
12848
      oldctxt->myDoc, cur);
12849
      }
12850
#endif /* LIBXML_VALID_ENABLED */
12851
6.95k
      cur->parent = NULL;
12852
6.95k
      cur = cur->next;
12853
6.95k
  }
12854
1.47k
  ctxt->myDoc->children->children = NULL;
12855
1.47k
    }
12856
2.94k
    if (ctxt->myDoc != NULL) {
12857
2.94k
  xmlFreeNode(ctxt->myDoc->children);
12858
2.94k
        ctxt->myDoc->children = content;
12859
2.94k
        ctxt->myDoc->last = last;
12860
2.94k
    }
12861
12862
    /*
12863
     * Also record the size of the entity parsed
12864
     */
12865
2.94k
    if (ctxt->input != NULL && oldctxt != NULL) {
12866
2.94k
        unsigned long consumed = ctxt->input->consumed;
12867
12868
2.94k
        xmlSaturatedAddSizeT(&consumed, ctxt->input->cur - ctxt->input->base);
12869
12870
2.94k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, consumed);
12871
2.94k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, ctxt->sizeentcopy);
12872
2.94k
    }
12873
12874
2.94k
    oldctxt->nbErrors = ctxt->nbErrors;
12875
2.94k
    oldctxt->nbWarnings = ctxt->nbWarnings;
12876
2.94k
    ctxt->sax = oldsax;
12877
2.94k
    ctxt->dict = NULL;
12878
2.94k
    ctxt->attsDefault = NULL;
12879
2.94k
    ctxt->attsSpecial = NULL;
12880
2.94k
    xmlFreeParserCtxt(ctxt);
12881
2.94k
    if (newDoc != NULL) {
12882
0
  xmlFreeDoc(newDoc);
12883
0
    }
12884
12885
2.94k
    return(ret);
12886
2.95k
}
12887
12888
/**
12889
 * xmlParseInNodeContext:
12890
 * @node:  the context node
12891
 * @data:  the input string
12892
 * @datalen:  the input string length in bytes
12893
 * @options:  a combination of xmlParserOption
12894
 * @lst:  the return value for the set of parsed nodes
12895
 *
12896
 * Parse a well-balanced chunk of an XML document
12897
 * within the context (DTD, namespaces, etc ...) of the given node.
12898
 *
12899
 * The allowed sequence for the data is a Well Balanced Chunk defined by
12900
 * the content production in the XML grammar:
12901
 *
12902
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12903
 *
12904
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12905
 * error code otherwise
12906
 */
12907
xmlParserErrors
12908
xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
12909
0
                      int options, xmlNodePtr *lst) {
12910
0
#ifdef SAX2
12911
0
    xmlParserCtxtPtr ctxt;
12912
0
    xmlDocPtr doc = NULL;
12913
0
    xmlNodePtr fake, cur;
12914
0
    int nsnr = 0;
12915
12916
0
    xmlParserErrors ret = XML_ERR_OK;
12917
12918
    /*
12919
     * check all input parameters, grab the document
12920
     */
12921
0
    if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
12922
0
        return(XML_ERR_INTERNAL_ERROR);
12923
0
    switch (node->type) {
12924
0
        case XML_ELEMENT_NODE:
12925
0
        case XML_ATTRIBUTE_NODE:
12926
0
        case XML_TEXT_NODE:
12927
0
        case XML_CDATA_SECTION_NODE:
12928
0
        case XML_ENTITY_REF_NODE:
12929
0
        case XML_PI_NODE:
12930
0
        case XML_COMMENT_NODE:
12931
0
        case XML_DOCUMENT_NODE:
12932
0
        case XML_HTML_DOCUMENT_NODE:
12933
0
      break;
12934
0
  default:
12935
0
      return(XML_ERR_INTERNAL_ERROR);
12936
12937
0
    }
12938
0
    while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
12939
0
           (node->type != XML_DOCUMENT_NODE) &&
12940
0
     (node->type != XML_HTML_DOCUMENT_NODE))
12941
0
  node = node->parent;
12942
0
    if (node == NULL)
12943
0
  return(XML_ERR_INTERNAL_ERROR);
12944
0
    if (node->type == XML_ELEMENT_NODE)
12945
0
  doc = node->doc;
12946
0
    else
12947
0
        doc = (xmlDocPtr) node;
12948
0
    if (doc == NULL)
12949
0
  return(XML_ERR_INTERNAL_ERROR);
12950
12951
    /*
12952
     * allocate a context and set-up everything not related to the
12953
     * node position in the tree
12954
     */
12955
0
    if (doc->type == XML_DOCUMENT_NODE)
12956
0
  ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
12957
0
#ifdef LIBXML_HTML_ENABLED
12958
0
    else if (doc->type == XML_HTML_DOCUMENT_NODE) {
12959
0
  ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
12960
        /*
12961
         * When parsing in context, it makes no sense to add implied
12962
         * elements like html/body/etc...
12963
         */
12964
0
        options |= HTML_PARSE_NOIMPLIED;
12965
0
    }
12966
0
#endif
12967
0
    else
12968
0
        return(XML_ERR_INTERNAL_ERROR);
12969
12970
0
    if (ctxt == NULL)
12971
0
        return(XML_ERR_NO_MEMORY);
12972
12973
    /*
12974
     * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
12975
     * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
12976
     * we must wait until the last moment to free the original one.
12977
     */
12978
0
    if (doc->dict != NULL) {
12979
0
        if (ctxt->dict != NULL)
12980
0
      xmlDictFree(ctxt->dict);
12981
0
  ctxt->dict = doc->dict;
12982
0
    } else
12983
0
        options |= XML_PARSE_NODICT;
12984
12985
0
    if (doc->encoding != NULL) {
12986
0
        xmlCharEncodingHandlerPtr hdlr;
12987
12988
0
        if (ctxt->encoding != NULL)
12989
0
      xmlFree((xmlChar *) ctxt->encoding);
12990
0
        ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
12991
12992
0
        hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
12993
0
        if (hdlr != NULL) {
12994
0
            xmlSwitchToEncoding(ctxt, hdlr);
12995
0
  } else {
12996
0
            return(XML_ERR_UNSUPPORTED_ENCODING);
12997
0
        }
12998
0
    }
12999
13000
0
    xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13001
0
    xmlDetectSAX2(ctxt);
13002
0
    ctxt->myDoc = doc;
13003
    /* parsing in context, i.e. as within existing content */
13004
0
    ctxt->input_id = 2;
13005
0
    ctxt->instate = XML_PARSER_CONTENT;
13006
13007
0
    fake = xmlNewDocComment(node->doc, NULL);
13008
0
    if (fake == NULL) {
13009
0
        xmlFreeParserCtxt(ctxt);
13010
0
  return(XML_ERR_NO_MEMORY);
13011
0
    }
13012
0
    xmlAddChild(node, fake);
13013
13014
0
    if (node->type == XML_ELEMENT_NODE) {
13015
0
  nodePush(ctxt, node);
13016
  /*
13017
   * initialize the SAX2 namespaces stack
13018
   */
13019
0
  cur = node;
13020
0
  while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13021
0
      xmlNsPtr ns = cur->nsDef;
13022
0
      const xmlChar *iprefix, *ihref;
13023
13024
0
      while (ns != NULL) {
13025
0
    if (ctxt->dict) {
13026
0
        iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13027
0
        ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13028
0
    } else {
13029
0
        iprefix = ns->prefix;
13030
0
        ihref = ns->href;
13031
0
    }
13032
13033
0
          if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13034
0
        nsPush(ctxt, iprefix, ihref);
13035
0
        nsnr++;
13036
0
    }
13037
0
    ns = ns->next;
13038
0
      }
13039
0
      cur = cur->parent;
13040
0
  }
13041
0
    }
13042
13043
0
    if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13044
  /*
13045
   * ID/IDREF registration will be done in xmlValidateElement below
13046
   */
13047
0
  ctxt->loadsubset |= XML_SKIP_IDS;
13048
0
    }
13049
13050
0
#ifdef LIBXML_HTML_ENABLED
13051
0
    if (doc->type == XML_HTML_DOCUMENT_NODE)
13052
0
        __htmlParseContent(ctxt);
13053
0
    else
13054
0
#endif
13055
0
  xmlParseContent(ctxt);
13056
13057
0
    nsPop(ctxt, nsnr);
13058
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13059
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13060
0
    } else if (RAW != 0) {
13061
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13062
0
    }
13063
0
    if ((ctxt->node != NULL) && (ctxt->node != node)) {
13064
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13065
0
  ctxt->wellFormed = 0;
13066
0
    }
13067
13068
0
    if (!ctxt->wellFormed) {
13069
0
        if (ctxt->errNo == 0)
13070
0
      ret = XML_ERR_INTERNAL_ERROR;
13071
0
  else
13072
0
      ret = (xmlParserErrors)ctxt->errNo;
13073
0
    } else {
13074
0
        ret = XML_ERR_OK;
13075
0
    }
13076
13077
    /*
13078
     * Return the newly created nodeset after unlinking it from
13079
     * the pseudo sibling.
13080
     */
13081
13082
0
    cur = fake->next;
13083
0
    fake->next = NULL;
13084
0
    node->last = fake;
13085
13086
0
    if (cur != NULL) {
13087
0
  cur->prev = NULL;
13088
0
    }
13089
13090
0
    *lst = cur;
13091
13092
0
    while (cur != NULL) {
13093
0
  cur->parent = NULL;
13094
0
  cur = cur->next;
13095
0
    }
13096
13097
0
    xmlUnlinkNode(fake);
13098
0
    xmlFreeNode(fake);
13099
13100
13101
0
    if (ret != XML_ERR_OK) {
13102
0
        xmlFreeNodeList(*lst);
13103
0
  *lst = NULL;
13104
0
    }
13105
13106
0
    if (doc->dict != NULL)
13107
0
        ctxt->dict = NULL;
13108
0
    xmlFreeParserCtxt(ctxt);
13109
13110
0
    return(ret);
13111
#else /* !SAX2 */
13112
    return(XML_ERR_INTERNAL_ERROR);
13113
#endif
13114
0
}
13115
13116
#ifdef LIBXML_SAX1_ENABLED
13117
/**
13118
 * xmlParseBalancedChunkMemoryRecover:
13119
 * @doc:  the document the chunk pertains to (must not be NULL)
13120
 * @sax:  the SAX handler block (possibly NULL)
13121
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13122
 * @depth:  Used for loop detection, use 0
13123
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13124
 * @lst:  the return value for the set of parsed nodes
13125
 * @recover: return nodes even if the data is broken (use 0)
13126
 *
13127
 *
13128
 * Parse a well-balanced chunk of an XML document
13129
 * called by the parser
13130
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13131
 * the content production in the XML grammar:
13132
 *
13133
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13134
 *
13135
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13136
 *    the parser error code otherwise
13137
 *
13138
 * In case recover is set to 1, the nodelist will not be empty even if
13139
 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13140
 * some extent.
13141
 */
13142
int
13143
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13144
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13145
     int recover) {
13146
    xmlParserCtxtPtr ctxt;
13147
    xmlDocPtr newDoc;
13148
    xmlSAXHandlerPtr oldsax = NULL;
13149
    xmlNodePtr content, newRoot;
13150
    int size;
13151
    int ret = 0;
13152
13153
    if (depth > 40) {
13154
  return(XML_ERR_ENTITY_LOOP);
13155
    }
13156
13157
13158
    if (lst != NULL)
13159
        *lst = NULL;
13160
    if (string == NULL)
13161
        return(-1);
13162
13163
    size = xmlStrlen(string);
13164
13165
    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13166
    if (ctxt == NULL) return(-1);
13167
    ctxt->userData = ctxt;
13168
    if (sax != NULL) {
13169
  oldsax = ctxt->sax;
13170
        ctxt->sax = sax;
13171
  if (user_data != NULL)
13172
      ctxt->userData = user_data;
13173
    }
13174
    newDoc = xmlNewDoc(BAD_CAST "1.0");
13175
    if (newDoc == NULL) {
13176
  xmlFreeParserCtxt(ctxt);
13177
  return(-1);
13178
    }
13179
    newDoc->properties = XML_DOC_INTERNAL;
13180
    if ((doc != NULL) && (doc->dict != NULL)) {
13181
        xmlDictFree(ctxt->dict);
13182
  ctxt->dict = doc->dict;
13183
  xmlDictReference(ctxt->dict);
13184
  ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13185
  ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13186
  ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13187
  ctxt->dictNames = 1;
13188
    } else {
13189
  xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13190
    }
13191
    /* doc == NULL is only supported for historic reasons */
13192
    if (doc != NULL) {
13193
  newDoc->intSubset = doc->intSubset;
13194
  newDoc->extSubset = doc->extSubset;
13195
    }
13196
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13197
    if (newRoot == NULL) {
13198
  if (sax != NULL)
13199
      ctxt->sax = oldsax;
13200
  xmlFreeParserCtxt(ctxt);
13201
  newDoc->intSubset = NULL;
13202
  newDoc->extSubset = NULL;
13203
        xmlFreeDoc(newDoc);
13204
  return(-1);
13205
    }
13206
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
13207
    nodePush(ctxt, newRoot);
13208
    /* doc == NULL is only supported for historic reasons */
13209
    if (doc == NULL) {
13210
  ctxt->myDoc = newDoc;
13211
    } else {
13212
  ctxt->myDoc = newDoc;
13213
  newDoc->children->doc = doc;
13214
  /* Ensure that doc has XML spec namespace */
13215
  xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13216
  newDoc->oldNs = doc->oldNs;
13217
    }
13218
    ctxt->instate = XML_PARSER_CONTENT;
13219
    ctxt->input_id = 2;
13220
    ctxt->depth = depth;
13221
13222
    /*
13223
     * Doing validity checking on chunk doesn't make sense
13224
     */
13225
    ctxt->validate = 0;
13226
    ctxt->loadsubset = 0;
13227
    xmlDetectSAX2(ctxt);
13228
13229
    if ( doc != NULL ){
13230
        content = doc->children;
13231
        doc->children = NULL;
13232
        xmlParseContent(ctxt);
13233
        doc->children = content;
13234
    }
13235
    else {
13236
        xmlParseContent(ctxt);
13237
    }
13238
    if ((RAW == '<') && (NXT(1) == '/')) {
13239
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13240
    } else if (RAW != 0) {
13241
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13242
    }
13243
    if (ctxt->node != newDoc->children) {
13244
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13245
    }
13246
13247
    if (!ctxt->wellFormed) {
13248
        if (ctxt->errNo == 0)
13249
      ret = 1;
13250
  else
13251
      ret = ctxt->errNo;
13252
    } else {
13253
      ret = 0;
13254
    }
13255
13256
    if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13257
  xmlNodePtr cur;
13258
13259
  /*
13260
   * Return the newly created nodeset after unlinking it from
13261
   * they pseudo parent.
13262
   */
13263
  cur = newDoc->children->children;
13264
  *lst = cur;
13265
  while (cur != NULL) {
13266
      xmlSetTreeDoc(cur, doc);
13267
      cur->parent = NULL;
13268
      cur = cur->next;
13269
  }
13270
  newDoc->children->children = NULL;
13271
    }
13272
13273
    if (sax != NULL)
13274
  ctxt->sax = oldsax;
13275
    xmlFreeParserCtxt(ctxt);
13276
    newDoc->intSubset = NULL;
13277
    newDoc->extSubset = NULL;
13278
    /* This leaks the namespace list if doc == NULL */
13279
    newDoc->oldNs = NULL;
13280
    xmlFreeDoc(newDoc);
13281
13282
    return(ret);
13283
}
13284
13285
/**
13286
 * xmlSAXParseEntity:
13287
 * @sax:  the SAX handler block
13288
 * @filename:  the filename
13289
 *
13290
 * DEPRECATED: Don't use.
13291
 *
13292
 * parse an XML external entity out of context and build a tree.
13293
 * It use the given SAX function block to handle the parsing callback.
13294
 * If sax is NULL, fallback to the default DOM tree building routines.
13295
 *
13296
 * [78] extParsedEnt ::= TextDecl? content
13297
 *
13298
 * This correspond to a "Well Balanced" chunk
13299
 *
13300
 * Returns the resulting document tree
13301
 */
13302
13303
xmlDocPtr
13304
xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13305
    xmlDocPtr ret;
13306
    xmlParserCtxtPtr ctxt;
13307
13308
    ctxt = xmlCreateFileParserCtxt(filename);
13309
    if (ctxt == NULL) {
13310
  return(NULL);
13311
    }
13312
    if (sax != NULL) {
13313
  if (ctxt->sax != NULL)
13314
      xmlFree(ctxt->sax);
13315
        ctxt->sax = sax;
13316
        ctxt->userData = NULL;
13317
    }
13318
13319
    xmlParseExtParsedEnt(ctxt);
13320
13321
    if (ctxt->wellFormed)
13322
  ret = ctxt->myDoc;
13323
    else {
13324
        ret = NULL;
13325
        xmlFreeDoc(ctxt->myDoc);
13326
        ctxt->myDoc = NULL;
13327
    }
13328
    if (sax != NULL)
13329
        ctxt->sax = NULL;
13330
    xmlFreeParserCtxt(ctxt);
13331
13332
    return(ret);
13333
}
13334
13335
/**
13336
 * xmlParseEntity:
13337
 * @filename:  the filename
13338
 *
13339
 * parse an XML external entity out of context and build a tree.
13340
 *
13341
 * [78] extParsedEnt ::= TextDecl? content
13342
 *
13343
 * This correspond to a "Well Balanced" chunk
13344
 *
13345
 * Returns the resulting document tree
13346
 */
13347
13348
xmlDocPtr
13349
xmlParseEntity(const char *filename) {
13350
    return(xmlSAXParseEntity(NULL, filename));
13351
}
13352
#endif /* LIBXML_SAX1_ENABLED */
13353
13354
/**
13355
 * xmlCreateEntityParserCtxtInternal:
13356
 * @URL:  the entity URL
13357
 * @ID:  the entity PUBLIC ID
13358
 * @base:  a possible base for the target URI
13359
 * @pctx:  parser context used to set options on new context
13360
 *
13361
 * Create a parser context for an external entity
13362
 * Automatic support for ZLIB/Compress compressed document is provided
13363
 * by default if found at compile-time.
13364
 *
13365
 * Returns the new parser context or NULL
13366
 */
13367
static xmlParserCtxtPtr
13368
xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
13369
        const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
13370
11.4k
        xmlParserCtxtPtr pctx) {
13371
11.4k
    xmlParserCtxtPtr ctxt;
13372
11.4k
    xmlParserInputPtr inputStream;
13373
11.4k
    char *directory = NULL;
13374
11.4k
    xmlChar *uri;
13375
13376
11.4k
    ctxt = xmlNewSAXParserCtxt(sax, userData);
13377
11.4k
    if (ctxt == NULL) {
13378
195
  return(NULL);
13379
195
    }
13380
13381
11.2k
    if (pctx != NULL) {
13382
11.2k
        ctxt->options = pctx->options;
13383
11.2k
        ctxt->_private = pctx->_private;
13384
11.2k
  ctxt->input_id = pctx->input_id;
13385
11.2k
    }
13386
13387
    /* Don't read from stdin. */
13388
11.2k
    if (xmlStrcmp(URL, BAD_CAST "-") == 0)
13389
10
        URL = BAD_CAST "./-";
13390
13391
11.2k
    uri = xmlBuildURI(URL, base);
13392
13393
11.2k
    if (uri == NULL) {
13394
4.70k
  inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13395
4.70k
  if (inputStream == NULL) {
13396
2.39k
      xmlFreeParserCtxt(ctxt);
13397
2.39k
      return(NULL);
13398
2.39k
  }
13399
13400
2.31k
  inputPush(ctxt, inputStream);
13401
13402
2.31k
  if ((ctxt->directory == NULL) && (directory == NULL))
13403
2.31k
      directory = xmlParserGetDirectory((char *)URL);
13404
2.31k
  if ((ctxt->directory == NULL) && (directory != NULL))
13405
2.31k
      ctxt->directory = directory;
13406
6.57k
    } else {
13407
6.57k
  inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13408
6.57k
  if (inputStream == NULL) {
13409
2.93k
      xmlFree(uri);
13410
2.93k
      xmlFreeParserCtxt(ctxt);
13411
2.93k
      return(NULL);
13412
2.93k
  }
13413
13414
3.64k
  inputPush(ctxt, inputStream);
13415
13416
3.64k
  if ((ctxt->directory == NULL) && (directory == NULL))
13417
3.64k
      directory = xmlParserGetDirectory((char *)uri);
13418
3.64k
  if ((ctxt->directory == NULL) && (directory != NULL))
13419
3.63k
      ctxt->directory = directory;
13420
3.64k
  xmlFree(uri);
13421
3.64k
    }
13422
5.95k
    return(ctxt);
13423
11.2k
}
13424
13425
/**
13426
 * xmlCreateEntityParserCtxt:
13427
 * @URL:  the entity URL
13428
 * @ID:  the entity PUBLIC ID
13429
 * @base:  a possible base for the target URI
13430
 *
13431
 * Create a parser context for an external entity
13432
 * Automatic support for ZLIB/Compress compressed document is provided
13433
 * by default if found at compile-time.
13434
 *
13435
 * Returns the new parser context or NULL
13436
 */
13437
xmlParserCtxtPtr
13438
xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
13439
0
                    const xmlChar *base) {
13440
0
    return xmlCreateEntityParserCtxtInternal(NULL, NULL, URL, ID, base, NULL);
13441
13442
0
}
13443
13444
/************************************************************************
13445
 *                  *
13446
 *    Front ends when parsing from a file     *
13447
 *                  *
13448
 ************************************************************************/
13449
13450
/**
13451
 * xmlCreateURLParserCtxt:
13452
 * @filename:  the filename or URL
13453
 * @options:  a combination of xmlParserOption
13454
 *
13455
 * Create a parser context for a file or URL content.
13456
 * Automatic support for ZLIB/Compress compressed document is provided
13457
 * by default if found at compile-time and for file accesses
13458
 *
13459
 * Returns the new parser context or NULL
13460
 */
13461
xmlParserCtxtPtr
13462
xmlCreateURLParserCtxt(const char *filename, int options)
13463
0
{
13464
0
    xmlParserCtxtPtr ctxt;
13465
0
    xmlParserInputPtr inputStream;
13466
0
    char *directory = NULL;
13467
13468
0
    ctxt = xmlNewParserCtxt();
13469
0
    if (ctxt == NULL) {
13470
0
  xmlErrMemory(NULL, "cannot allocate parser context");
13471
0
  return(NULL);
13472
0
    }
13473
13474
0
    if (options)
13475
0
  xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13476
0
    ctxt->linenumbers = 1;
13477
13478
0
    inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
13479
0
    if (inputStream == NULL) {
13480
0
  xmlFreeParserCtxt(ctxt);
13481
0
  return(NULL);
13482
0
    }
13483
13484
0
    inputPush(ctxt, inputStream);
13485
0
    if ((ctxt->directory == NULL) && (directory == NULL))
13486
0
        directory = xmlParserGetDirectory(filename);
13487
0
    if ((ctxt->directory == NULL) && (directory != NULL))
13488
0
        ctxt->directory = directory;
13489
13490
0
    return(ctxt);
13491
0
}
13492
13493
/**
13494
 * xmlCreateFileParserCtxt:
13495
 * @filename:  the filename
13496
 *
13497
 * Create a parser context for a file content.
13498
 * Automatic support for ZLIB/Compress compressed document is provided
13499
 * by default if found at compile-time.
13500
 *
13501
 * Returns the new parser context or NULL
13502
 */
13503
xmlParserCtxtPtr
13504
xmlCreateFileParserCtxt(const char *filename)
13505
0
{
13506
0
    return(xmlCreateURLParserCtxt(filename, 0));
13507
0
}
13508
13509
#ifdef LIBXML_SAX1_ENABLED
13510
/**
13511
 * xmlSAXParseFileWithData:
13512
 * @sax:  the SAX handler block
13513
 * @filename:  the filename
13514
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13515
 *             documents
13516
 * @data:  the userdata
13517
 *
13518
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
13519
 *
13520
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13521
 * compressed document is provided by default if found at compile-time.
13522
 * It use the given SAX function block to handle the parsing callback.
13523
 * If sax is NULL, fallback to the default DOM tree building routines.
13524
 *
13525
 * User data (void *) is stored within the parser context in the
13526
 * context's _private member, so it is available nearly everywhere in libxml
13527
 *
13528
 * Returns the resulting document tree
13529
 */
13530
13531
xmlDocPtr
13532
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
13533
                        int recovery, void *data) {
13534
    xmlDocPtr ret;
13535
    xmlParserCtxtPtr ctxt;
13536
13537
    xmlInitParser();
13538
13539
    ctxt = xmlCreateFileParserCtxt(filename);
13540
    if (ctxt == NULL) {
13541
  return(NULL);
13542
    }
13543
    if (sax != NULL) {
13544
  if (ctxt->sax != NULL)
13545
      xmlFree(ctxt->sax);
13546
        ctxt->sax = sax;
13547
    }
13548
    xmlDetectSAX2(ctxt);
13549
    if (data!=NULL) {
13550
  ctxt->_private = data;
13551
    }
13552
13553
    if (ctxt->directory == NULL)
13554
        ctxt->directory = xmlParserGetDirectory(filename);
13555
13556
    ctxt->recovery = recovery;
13557
13558
    xmlParseDocument(ctxt);
13559
13560
    if ((ctxt->wellFormed) || recovery) {
13561
        ret = ctxt->myDoc;
13562
  if ((ret != NULL) && (ctxt->input->buf != NULL)) {
13563
      if (ctxt->input->buf->compressed > 0)
13564
    ret->compression = 9;
13565
      else
13566
    ret->compression = ctxt->input->buf->compressed;
13567
  }
13568
    }
13569
    else {
13570
       ret = NULL;
13571
       xmlFreeDoc(ctxt->myDoc);
13572
       ctxt->myDoc = NULL;
13573
    }
13574
    if (sax != NULL)
13575
        ctxt->sax = NULL;
13576
    xmlFreeParserCtxt(ctxt);
13577
13578
    return(ret);
13579
}
13580
13581
/**
13582
 * xmlSAXParseFile:
13583
 * @sax:  the SAX handler block
13584
 * @filename:  the filename
13585
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13586
 *             documents
13587
 *
13588
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
13589
 *
13590
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13591
 * compressed document is provided by default if found at compile-time.
13592
 * It use the given SAX function block to handle the parsing callback.
13593
 * If sax is NULL, fallback to the default DOM tree building routines.
13594
 *
13595
 * Returns the resulting document tree
13596
 */
13597
13598
xmlDocPtr
13599
xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
13600
                          int recovery) {
13601
    return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
13602
}
13603
13604
/**
13605
 * xmlRecoverDoc:
13606
 * @cur:  a pointer to an array of xmlChar
13607
 *
13608
 * DEPRECATED: Use xmlReadDoc with XML_PARSE_RECOVER.
13609
 *
13610
 * parse an XML in-memory document and build a tree.
13611
 * In the case the document is not Well Formed, a attempt to build a
13612
 * tree is tried anyway
13613
 *
13614
 * Returns the resulting document tree or NULL in case of failure
13615
 */
13616
13617
xmlDocPtr
13618
xmlRecoverDoc(const xmlChar *cur) {
13619
    return(xmlSAXParseDoc(NULL, cur, 1));
13620
}
13621
13622
/**
13623
 * xmlParseFile:
13624
 * @filename:  the filename
13625
 *
13626
 * DEPRECATED: Use xmlReadFile.
13627
 *
13628
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13629
 * compressed document is provided by default if found at compile-time.
13630
 *
13631
 * Returns the resulting document tree if the file was wellformed,
13632
 * NULL otherwise.
13633
 */
13634
13635
xmlDocPtr
13636
xmlParseFile(const char *filename) {
13637
    return(xmlSAXParseFile(NULL, filename, 0));
13638
}
13639
13640
/**
13641
 * xmlRecoverFile:
13642
 * @filename:  the filename
13643
 *
13644
 * DEPRECATED: Use xmlReadFile with XML_PARSE_RECOVER.
13645
 *
13646
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13647
 * compressed document is provided by default if found at compile-time.
13648
 * In the case the document is not Well Formed, it attempts to build
13649
 * a tree anyway
13650
 *
13651
 * Returns the resulting document tree or NULL in case of failure
13652
 */
13653
13654
xmlDocPtr
13655
xmlRecoverFile(const char *filename) {
13656
    return(xmlSAXParseFile(NULL, filename, 1));
13657
}
13658
13659
13660
/**
13661
 * xmlSetupParserForBuffer:
13662
 * @ctxt:  an XML parser context
13663
 * @buffer:  a xmlChar * buffer
13664
 * @filename:  a file name
13665
 *
13666
 * DEPRECATED: Don't use.
13667
 *
13668
 * Setup the parser context to parse a new buffer; Clears any prior
13669
 * contents from the parser context. The buffer parameter must not be
13670
 * NULL, but the filename parameter can be
13671
 */
13672
void
13673
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
13674
                             const char* filename)
13675
{
13676
    xmlParserInputPtr input;
13677
13678
    if ((ctxt == NULL) || (buffer == NULL))
13679
        return;
13680
13681
    input = xmlNewInputStream(ctxt);
13682
    if (input == NULL) {
13683
        xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
13684
        xmlClearParserCtxt(ctxt);
13685
        return;
13686
    }
13687
13688
    xmlClearParserCtxt(ctxt);
13689
    if (filename != NULL)
13690
        input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
13691
    input->base = buffer;
13692
    input->cur = buffer;
13693
    input->end = &buffer[xmlStrlen(buffer)];
13694
    inputPush(ctxt, input);
13695
}
13696
13697
/**
13698
 * xmlSAXUserParseFile:
13699
 * @sax:  a SAX handler
13700
 * @user_data:  The user data returned on SAX callbacks
13701
 * @filename:  a file name
13702
 *
13703
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
13704
 *
13705
 * parse an XML file and call the given SAX handler routines.
13706
 * Automatic support for ZLIB/Compress compressed document is provided
13707
 *
13708
 * Returns 0 in case of success or a error number otherwise
13709
 */
13710
int
13711
xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
13712
                    const char *filename) {
13713
    int ret = 0;
13714
    xmlParserCtxtPtr ctxt;
13715
13716
    ctxt = xmlCreateFileParserCtxt(filename);
13717
    if (ctxt == NULL) return -1;
13718
    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
13719
  xmlFree(ctxt->sax);
13720
    ctxt->sax = sax;
13721
    xmlDetectSAX2(ctxt);
13722
13723
    if (user_data != NULL)
13724
  ctxt->userData = user_data;
13725
13726
    xmlParseDocument(ctxt);
13727
13728
    if (ctxt->wellFormed)
13729
  ret = 0;
13730
    else {
13731
        if (ctxt->errNo != 0)
13732
      ret = ctxt->errNo;
13733
  else
13734
      ret = -1;
13735
    }
13736
    if (sax != NULL)
13737
  ctxt->sax = NULL;
13738
    if (ctxt->myDoc != NULL) {
13739
        xmlFreeDoc(ctxt->myDoc);
13740
  ctxt->myDoc = NULL;
13741
    }
13742
    xmlFreeParserCtxt(ctxt);
13743
13744
    return ret;
13745
}
13746
#endif /* LIBXML_SAX1_ENABLED */
13747
13748
/************************************************************************
13749
 *                  *
13750
 *    Front ends when parsing from memory     *
13751
 *                  *
13752
 ************************************************************************/
13753
13754
/**
13755
 * xmlCreateMemoryParserCtxt:
13756
 * @buffer:  a pointer to a char array
13757
 * @size:  the size of the array
13758
 *
13759
 * Create a parser context for an XML in-memory document.
13760
 *
13761
 * Returns the new parser context or NULL
13762
 */
13763
xmlParserCtxtPtr
13764
62.6k
xmlCreateMemoryParserCtxt(const char *buffer, int size) {
13765
62.6k
    xmlParserCtxtPtr ctxt;
13766
62.6k
    xmlParserInputPtr input;
13767
62.6k
    xmlParserInputBufferPtr buf;
13768
13769
62.6k
    if (buffer == NULL)
13770
4
  return(NULL);
13771
62.6k
    if (size <= 0)
13772
34
  return(NULL);
13773
13774
62.5k
    ctxt = xmlNewParserCtxt();
13775
62.5k
    if (ctxt == NULL)
13776
38
  return(NULL);
13777
13778
62.5k
    buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
13779
62.5k
    if (buf == NULL) {
13780
2
  xmlFreeParserCtxt(ctxt);
13781
2
  return(NULL);
13782
2
    }
13783
13784
62.5k
    input = xmlNewInputStream(ctxt);
13785
62.5k
    if (input == NULL) {
13786
1
  xmlFreeParserInputBuffer(buf);
13787
1
  xmlFreeParserCtxt(ctxt);
13788
1
  return(NULL);
13789
1
    }
13790
13791
62.5k
    input->filename = NULL;
13792
62.5k
    input->buf = buf;
13793
62.5k
    xmlBufResetInput(input->buf->buffer, input);
13794
13795
62.5k
    inputPush(ctxt, input);
13796
62.5k
    return(ctxt);
13797
62.5k
}
13798
13799
#ifdef LIBXML_SAX1_ENABLED
13800
/**
13801
 * xmlSAXParseMemoryWithData:
13802
 * @sax:  the SAX handler block
13803
 * @buffer:  an pointer to a char array
13804
 * @size:  the size of the array
13805
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13806
 *             documents
13807
 * @data:  the userdata
13808
 *
13809
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
13810
 *
13811
 * parse an XML in-memory block and use the given SAX function block
13812
 * to handle the parsing callback. If sax is NULL, fallback to the default
13813
 * DOM tree building routines.
13814
 *
13815
 * User data (void *) is stored within the parser context in the
13816
 * context's _private member, so it is available nearly everywhere in libxml
13817
 *
13818
 * Returns the resulting document tree
13819
 */
13820
13821
xmlDocPtr
13822
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
13823
            int size, int recovery, void *data) {
13824
    xmlDocPtr ret;
13825
    xmlParserCtxtPtr ctxt;
13826
13827
    xmlInitParser();
13828
13829
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13830
    if (ctxt == NULL) return(NULL);
13831
    if (sax != NULL) {
13832
  if (ctxt->sax != NULL)
13833
      xmlFree(ctxt->sax);
13834
        ctxt->sax = sax;
13835
    }
13836
    xmlDetectSAX2(ctxt);
13837
    if (data!=NULL) {
13838
  ctxt->_private=data;
13839
    }
13840
13841
    ctxt->recovery = recovery;
13842
13843
    xmlParseDocument(ctxt);
13844
13845
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13846
    else {
13847
       ret = NULL;
13848
       xmlFreeDoc(ctxt->myDoc);
13849
       ctxt->myDoc = NULL;
13850
    }
13851
    if (sax != NULL)
13852
  ctxt->sax = NULL;
13853
    xmlFreeParserCtxt(ctxt);
13854
13855
    return(ret);
13856
}
13857
13858
/**
13859
 * xmlSAXParseMemory:
13860
 * @sax:  the SAX handler block
13861
 * @buffer:  an pointer to a char array
13862
 * @size:  the size of the array
13863
 * @recovery:  work in recovery mode, i.e. tries to read not Well Formed
13864
 *             documents
13865
 *
13866
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
13867
 *
13868
 * parse an XML in-memory block and use the given SAX function block
13869
 * to handle the parsing callback. If sax is NULL, fallback to the default
13870
 * DOM tree building routines.
13871
 *
13872
 * Returns the resulting document tree
13873
 */
13874
xmlDocPtr
13875
xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
13876
            int size, int recovery) {
13877
    return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
13878
}
13879
13880
/**
13881
 * xmlParseMemory:
13882
 * @buffer:  an pointer to a char array
13883
 * @size:  the size of the array
13884
 *
13885
 * DEPRECATED: Use xmlReadMemory.
13886
 *
13887
 * parse an XML in-memory block and build a tree.
13888
 *
13889
 * Returns the resulting document tree
13890
 */
13891
13892
xmlDocPtr xmlParseMemory(const char *buffer, int size) {
13893
   return(xmlSAXParseMemory(NULL, buffer, size, 0));
13894
}
13895
13896
/**
13897
 * xmlRecoverMemory:
13898
 * @buffer:  an pointer to a char array
13899
 * @size:  the size of the array
13900
 *
13901
 * DEPRECATED: Use xmlReadMemory with XML_PARSE_RECOVER.
13902
 *
13903
 * parse an XML in-memory block and build a tree.
13904
 * In the case the document is not Well Formed, an attempt to
13905
 * build a tree is tried anyway
13906
 *
13907
 * Returns the resulting document tree or NULL in case of error
13908
 */
13909
13910
xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
13911
   return(xmlSAXParseMemory(NULL, buffer, size, 1));
13912
}
13913
13914
/**
13915
 * xmlSAXUserParseMemory:
13916
 * @sax:  a SAX handler
13917
 * @user_data:  The user data returned on SAX callbacks
13918
 * @buffer:  an in-memory XML document input
13919
 * @size:  the length of the XML document in bytes
13920
 *
13921
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
13922
 *
13923
 * parse an XML in-memory buffer and call the given SAX handler routines.
13924
 *
13925
 * Returns 0 in case of success or a error number otherwise
13926
 */
13927
int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
13928
        const char *buffer, int size) {
13929
    int ret = 0;
13930
    xmlParserCtxtPtr ctxt;
13931
13932
    xmlInitParser();
13933
13934
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13935
    if (ctxt == NULL) return -1;
13936
    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
13937
        xmlFree(ctxt->sax);
13938
    ctxt->sax = sax;
13939
    xmlDetectSAX2(ctxt);
13940
13941
    if (user_data != NULL)
13942
  ctxt->userData = user_data;
13943
13944
    xmlParseDocument(ctxt);
13945
13946
    if (ctxt->wellFormed)
13947
  ret = 0;
13948
    else {
13949
        if (ctxt->errNo != 0)
13950
      ret = ctxt->errNo;
13951
  else
13952
      ret = -1;
13953
    }
13954
    if (sax != NULL)
13955
        ctxt->sax = NULL;
13956
    if (ctxt->myDoc != NULL) {
13957
        xmlFreeDoc(ctxt->myDoc);
13958
  ctxt->myDoc = NULL;
13959
    }
13960
    xmlFreeParserCtxt(ctxt);
13961
13962
    return ret;
13963
}
13964
#endif /* LIBXML_SAX1_ENABLED */
13965
13966
/**
13967
 * xmlCreateDocParserCtxt:
13968
 * @cur:  a pointer to an array of xmlChar
13969
 *
13970
 * Creates a parser context for an XML in-memory document.
13971
 *
13972
 * Returns the new parser context or NULL
13973
 */
13974
xmlParserCtxtPtr
13975
0
xmlCreateDocParserCtxt(const xmlChar *cur) {
13976
0
    int len;
13977
13978
0
    if (cur == NULL)
13979
0
  return(NULL);
13980
0
    len = xmlStrlen(cur);
13981
0
    return(xmlCreateMemoryParserCtxt((const char *)cur, len));
13982
0
}
13983
13984
#ifdef LIBXML_SAX1_ENABLED
13985
/**
13986
 * xmlSAXParseDoc:
13987
 * @sax:  the SAX handler block
13988
 * @cur:  a pointer to an array of xmlChar
13989
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13990
 *             documents
13991
 *
13992
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadDoc.
13993
 *
13994
 * parse an XML in-memory document and build a tree.
13995
 * It use the given SAX function block to handle the parsing callback.
13996
 * If sax is NULL, fallback to the default DOM tree building routines.
13997
 *
13998
 * Returns the resulting document tree
13999
 */
14000
14001
xmlDocPtr
14002
xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14003
    xmlDocPtr ret;
14004
    xmlParserCtxtPtr ctxt;
14005
    xmlSAXHandlerPtr oldsax = NULL;
14006
14007
    if (cur == NULL) return(NULL);
14008
14009
14010
    ctxt = xmlCreateDocParserCtxt(cur);
14011
    if (ctxt == NULL) return(NULL);
14012
    if (sax != NULL) {
14013
        oldsax = ctxt->sax;
14014
        ctxt->sax = sax;
14015
        ctxt->userData = NULL;
14016
    }
14017
    xmlDetectSAX2(ctxt);
14018
14019
    xmlParseDocument(ctxt);
14020
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14021
    else {
14022
       ret = NULL;
14023
       xmlFreeDoc(ctxt->myDoc);
14024
       ctxt->myDoc = NULL;
14025
    }
14026
    if (sax != NULL)
14027
  ctxt->sax = oldsax;
14028
    xmlFreeParserCtxt(ctxt);
14029
14030
    return(ret);
14031
}
14032
14033
/**
14034
 * xmlParseDoc:
14035
 * @cur:  a pointer to an array of xmlChar
14036
 *
14037
 * DEPRECATED: Use xmlReadDoc.
14038
 *
14039
 * parse an XML in-memory document and build a tree.
14040
 *
14041
 * Returns the resulting document tree
14042
 */
14043
14044
xmlDocPtr
14045
xmlParseDoc(const xmlChar *cur) {
14046
    return(xmlSAXParseDoc(NULL, cur, 0));
14047
}
14048
#endif /* LIBXML_SAX1_ENABLED */
14049
14050
#ifdef LIBXML_LEGACY_ENABLED
14051
/************************************************************************
14052
 *                  *
14053
 *  Specific function to keep track of entities references    *
14054
 *  and used by the XSLT debugger         *
14055
 *                  *
14056
 ************************************************************************/
14057
14058
static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14059
14060
/**
14061
 * xmlAddEntityReference:
14062
 * @ent : A valid entity
14063
 * @firstNode : A valid first node for children of entity
14064
 * @lastNode : A valid last node of children entity
14065
 *
14066
 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14067
 */
14068
static void
14069
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14070
                      xmlNodePtr lastNode)
14071
{
14072
    if (xmlEntityRefFunc != NULL) {
14073
        (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14074
    }
14075
}
14076
14077
14078
/**
14079
 * xmlSetEntityReferenceFunc:
14080
 * @func: A valid function
14081
 *
14082
 * Set the function to call call back when a xml reference has been made
14083
 */
14084
void
14085
xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14086
{
14087
    xmlEntityRefFunc = func;
14088
}
14089
#endif /* LIBXML_LEGACY_ENABLED */
14090
14091
/************************************************************************
14092
 *                  *
14093
 *        Miscellaneous       *
14094
 *                  *
14095
 ************************************************************************/
14096
14097
static int xmlParserInitialized = 0;
14098
14099
/**
14100
 * xmlInitParser:
14101
 *
14102
 * Initialization function for the XML parser.
14103
 * This is not reentrant. Call once before processing in case of
14104
 * use in multithreaded programs.
14105
 */
14106
14107
void
14108
52.7M
xmlInitParser(void) {
14109
    /*
14110
     * Note that the initialization code must not make memory allocations.
14111
     */
14112
52.7M
    if (xmlParserInitialized != 0)
14113
52.7M
  return;
14114
14115
4
#ifdef LIBXML_THREAD_ENABLED
14116
4
    __xmlGlobalInitMutexLock();
14117
4
    if (xmlParserInitialized == 0) {
14118
4
#endif
14119
#if defined(_WIN32) && (!defined(LIBXML_STATIC) || defined(LIBXML_STATIC_FOR_DLL))
14120
        if (xmlFree == free)
14121
            atexit(xmlCleanupParser);
14122
#endif
14123
14124
4
  xmlInitThreadsInternal();
14125
4
  xmlInitGlobalsInternal();
14126
4
  xmlInitMemoryInternal();
14127
4
        __xmlInitializeDict();
14128
4
  xmlInitEncodingInternal();
14129
4
  xmlRegisterDefaultInputCallbacks();
14130
4
#ifdef LIBXML_OUTPUT_ENABLED
14131
4
  xmlRegisterDefaultOutputCallbacks();
14132
4
#endif /* LIBXML_OUTPUT_ENABLED */
14133
4
#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
14134
4
  xmlInitXPathInternal();
14135
4
#endif
14136
4
  xmlParserInitialized = 1;
14137
4
#ifdef LIBXML_THREAD_ENABLED
14138
4
    }
14139
4
    __xmlGlobalInitMutexUnlock();
14140
4
#endif
14141
4
}
14142
14143
/**
14144
 * xmlCleanupParser:
14145
 *
14146
 * This function name is somewhat misleading. It does not clean up
14147
 * parser state, it cleans up memory allocated by the library itself.
14148
 * It is a cleanup function for the XML library. It tries to reclaim all
14149
 * related global memory allocated for the library processing.
14150
 * It doesn't deallocate any document related memory. One should
14151
 * call xmlCleanupParser() only when the process has finished using
14152
 * the library and all XML/HTML documents built with it.
14153
 * See also xmlInitParser() which has the opposite function of preparing
14154
 * the library for operations.
14155
 *
14156
 * WARNING: if your application is multithreaded or has plugin support
14157
 *          calling this may crash the application if another thread or
14158
 *          a plugin is still using libxml2. It's sometimes very hard to
14159
 *          guess if libxml2 is in use in the application, some libraries
14160
 *          or plugins may use it without notice. In case of doubt abstain
14161
 *          from calling this function or do it just before calling exit()
14162
 *          to avoid leak reports from valgrind !
14163
 */
14164
14165
void
14166
0
xmlCleanupParser(void) {
14167
0
    if (!xmlParserInitialized)
14168
0
  return;
14169
14170
0
    xmlCleanupCharEncodingHandlers();
14171
0
#ifdef LIBXML_CATALOG_ENABLED
14172
0
    xmlCatalogCleanup();
14173
0
#endif
14174
0
    xmlCleanupDictInternal();
14175
0
    xmlCleanupInputCallbacks();
14176
0
#ifdef LIBXML_OUTPUT_ENABLED
14177
0
    xmlCleanupOutputCallbacks();
14178
0
#endif
14179
#ifdef LIBXML_SCHEMAS_ENABLED
14180
    xmlSchemaCleanupTypes();
14181
    xmlRelaxNGCleanupTypes();
14182
#endif
14183
0
    xmlCleanupGlobalsInternal();
14184
0
    xmlCleanupThreadsInternal();
14185
0
    xmlCleanupMemoryInternal();
14186
0
    xmlParserInitialized = 0;
14187
0
}
14188
14189
#if defined(HAVE_ATTRIBUTE_DESTRUCTOR) && !defined(LIBXML_STATIC) && \
14190
    !defined(_WIN32)
14191
static void
14192
ATTRIBUTE_DESTRUCTOR
14193
0
xmlDestructor(void) {
14194
    /*
14195
     * Calling custom deallocation functions in a destructor can cause
14196
     * problems, for example with Nokogiri.
14197
     */
14198
0
    if (xmlFree == free)
14199
0
        xmlCleanupParser();
14200
0
}
14201
#endif
14202
14203
/************************************************************************
14204
 *                  *
14205
 *  New set (2.6.0) of simpler and more flexible APIs   *
14206
 *                  *
14207
 ************************************************************************/
14208
14209
/**
14210
 * DICT_FREE:
14211
 * @str:  a string
14212
 *
14213
 * Free a string if it is not owned by the "dict" dictionary in the
14214
 * current scope
14215
 */
14216
#define DICT_FREE(str)            \
14217
0
  if ((str) && ((!dict) ||       \
14218
0
      (xmlDictOwns(dict, (const xmlChar *)(str)) == 0)))  \
14219
0
      xmlFree((char *)(str));
14220
14221
/**
14222
 * xmlCtxtReset:
14223
 * @ctxt: an XML parser context
14224
 *
14225
 * Reset a parser context
14226
 */
14227
void
14228
xmlCtxtReset(xmlParserCtxtPtr ctxt)
14229
0
{
14230
0
    xmlParserInputPtr input;
14231
0
    xmlDictPtr dict;
14232
14233
0
    if (ctxt == NULL)
14234
0
        return;
14235
14236
0
    dict = ctxt->dict;
14237
14238
0
    while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14239
0
        xmlFreeInputStream(input);
14240
0
    }
14241
0
    ctxt->inputNr = 0;
14242
0
    ctxt->input = NULL;
14243
14244
0
    ctxt->spaceNr = 0;
14245
0
    if (ctxt->spaceTab != NULL) {
14246
0
  ctxt->spaceTab[0] = -1;
14247
0
  ctxt->space = &ctxt->spaceTab[0];
14248
0
    } else {
14249
0
        ctxt->space = NULL;
14250
0
    }
14251
14252
14253
0
    ctxt->nodeNr = 0;
14254
0
    ctxt->node = NULL;
14255
14256
0
    ctxt->nameNr = 0;
14257
0
    ctxt->name = NULL;
14258
14259
0
    ctxt->nsNr = 0;
14260
14261
0
    DICT_FREE(ctxt->version);
14262
0
    ctxt->version = NULL;
14263
0
    DICT_FREE(ctxt->encoding);
14264
0
    ctxt->encoding = NULL;
14265
0
    DICT_FREE(ctxt->directory);
14266
0
    ctxt->directory = NULL;
14267
0
    DICT_FREE(ctxt->extSubURI);
14268
0
    ctxt->extSubURI = NULL;
14269
0
    DICT_FREE(ctxt->extSubSystem);
14270
0
    ctxt->extSubSystem = NULL;
14271
0
    if (ctxt->myDoc != NULL)
14272
0
        xmlFreeDoc(ctxt->myDoc);
14273
0
    ctxt->myDoc = NULL;
14274
14275
0
    ctxt->standalone = -1;
14276
0
    ctxt->hasExternalSubset = 0;
14277
0
    ctxt->hasPErefs = 0;
14278
0
    ctxt->html = 0;
14279
0
    ctxt->external = 0;
14280
0
    ctxt->instate = XML_PARSER_START;
14281
0
    ctxt->token = 0;
14282
14283
0
    ctxt->wellFormed = 1;
14284
0
    ctxt->nsWellFormed = 1;
14285
0
    ctxt->disableSAX = 0;
14286
0
    ctxt->valid = 1;
14287
#if 0
14288
    ctxt->vctxt.userData = ctxt;
14289
    ctxt->vctxt.error = xmlParserValidityError;
14290
    ctxt->vctxt.warning = xmlParserValidityWarning;
14291
#endif
14292
0
    ctxt->record_info = 0;
14293
0
    ctxt->checkIndex = 0;
14294
0
    ctxt->endCheckState = 0;
14295
0
    ctxt->inSubset = 0;
14296
0
    ctxt->errNo = XML_ERR_OK;
14297
0
    ctxt->depth = 0;
14298
0
    ctxt->charset = XML_CHAR_ENCODING_UTF8;
14299
0
    ctxt->catalogs = NULL;
14300
0
    ctxt->sizeentities = 0;
14301
0
    ctxt->sizeentcopy = 0;
14302
0
    xmlInitNodeInfoSeq(&ctxt->node_seq);
14303
14304
0
    if (ctxt->attsDefault != NULL) {
14305
0
        xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
14306
0
        ctxt->attsDefault = NULL;
14307
0
    }
14308
0
    if (ctxt->attsSpecial != NULL) {
14309
0
        xmlHashFree(ctxt->attsSpecial, NULL);
14310
0
        ctxt->attsSpecial = NULL;
14311
0
    }
14312
14313
0
#ifdef LIBXML_CATALOG_ENABLED
14314
0
    if (ctxt->catalogs != NULL)
14315
0
  xmlCatalogFreeLocal(ctxt->catalogs);
14316
0
#endif
14317
0
    ctxt->nbErrors = 0;
14318
0
    ctxt->nbWarnings = 0;
14319
0
    if (ctxt->lastError.code != XML_ERR_OK)
14320
0
        xmlResetError(&ctxt->lastError);
14321
0
}
14322
14323
/**
14324
 * xmlCtxtResetPush:
14325
 * @ctxt: an XML parser context
14326
 * @chunk:  a pointer to an array of chars
14327
 * @size:  number of chars in the array
14328
 * @filename:  an optional file name or URI
14329
 * @encoding:  the document encoding, or NULL
14330
 *
14331
 * Reset a push parser context
14332
 *
14333
 * Returns 0 in case of success and 1 in case of error
14334
 */
14335
int
14336
xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14337
                 int size, const char *filename, const char *encoding)
14338
0
{
14339
0
    xmlParserInputPtr inputStream;
14340
0
    xmlParserInputBufferPtr buf;
14341
0
    xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14342
14343
0
    if (ctxt == NULL)
14344
0
        return(1);
14345
14346
0
    if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14347
0
        enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14348
14349
0
    buf = xmlAllocParserInputBuffer(enc);
14350
0
    if (buf == NULL)
14351
0
        return(1);
14352
14353
0
    if (ctxt == NULL) {
14354
0
        xmlFreeParserInputBuffer(buf);
14355
0
        return(1);
14356
0
    }
14357
14358
0
    xmlCtxtReset(ctxt);
14359
14360
0
    if (filename == NULL) {
14361
0
        ctxt->directory = NULL;
14362
0
    } else {
14363
0
        ctxt->directory = xmlParserGetDirectory(filename);
14364
0
    }
14365
14366
0
    inputStream = xmlNewInputStream(ctxt);
14367
0
    if (inputStream == NULL) {
14368
0
        xmlFreeParserInputBuffer(buf);
14369
0
        return(1);
14370
0
    }
14371
14372
0
    if (filename == NULL)
14373
0
        inputStream->filename = NULL;
14374
0
    else
14375
0
        inputStream->filename = (char *)
14376
0
            xmlCanonicPath((const xmlChar *) filename);
14377
0
    inputStream->buf = buf;
14378
0
    xmlBufResetInput(buf->buffer, inputStream);
14379
14380
0
    inputPush(ctxt, inputStream);
14381
14382
0
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14383
0
        (ctxt->input->buf != NULL)) {
14384
0
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14385
0
        size_t cur = ctxt->input->cur - ctxt->input->base;
14386
0
        int res;
14387
14388
0
        res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14389
0
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
14390
0
        if (res < 0) {
14391
0
            xmlFatalErr(ctxt, ctxt->input->buf->error, NULL);
14392
0
            xmlHaltParser(ctxt);
14393
0
            return(1);
14394
0
        }
14395
#ifdef DEBUG_PUSH
14396
        xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14397
#endif
14398
0
    }
14399
14400
0
    if (encoding != NULL) {
14401
0
        xmlCharEncodingHandlerPtr hdlr;
14402
14403
0
        if (ctxt->encoding != NULL)
14404
0
      xmlFree((xmlChar *) ctxt->encoding);
14405
0
        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14406
14407
0
        hdlr = xmlFindCharEncodingHandler(encoding);
14408
0
        if (hdlr != NULL) {
14409
0
            xmlSwitchToEncoding(ctxt, hdlr);
14410
0
  } else {
14411
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14412
0
            "Unsupported encoding %s\n", BAD_CAST encoding);
14413
0
        }
14414
0
    } else if (enc != XML_CHAR_ENCODING_NONE) {
14415
0
        xmlSwitchEncoding(ctxt, enc);
14416
0
    }
14417
14418
0
    return(0);
14419
0
}
14420
14421
14422
/**
14423
 * xmlCtxtUseOptionsInternal:
14424
 * @ctxt: an XML parser context
14425
 * @options:  a combination of xmlParserOption
14426
 * @encoding:  the user provided encoding to use
14427
 *
14428
 * Applies the options to the parser context
14429
 *
14430
 * Returns 0 in case of success, the set of unknown or unimplemented options
14431
 *         in case of error.
14432
 */
14433
static int
14434
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
14435
406k
{
14436
406k
    if (ctxt == NULL)
14437
0
        return(-1);
14438
406k
    if (encoding != NULL) {
14439
0
        if (ctxt->encoding != NULL)
14440
0
      xmlFree((xmlChar *) ctxt->encoding);
14441
0
        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14442
0
    }
14443
406k
    if (options & XML_PARSE_RECOVER) {
14444
406
        ctxt->recovery = 1;
14445
406
        options -= XML_PARSE_RECOVER;
14446
406
  ctxt->options |= XML_PARSE_RECOVER;
14447
406
    } else
14448
406k
        ctxt->recovery = 0;
14449
406k
    if (options & XML_PARSE_DTDLOAD) {
14450
406k
        ctxt->loadsubset = XML_DETECT_IDS;
14451
406k
        options -= XML_PARSE_DTDLOAD;
14452
406k
  ctxt->options |= XML_PARSE_DTDLOAD;
14453
406k
    } else
14454
406
        ctxt->loadsubset = 0;
14455
406k
    if (options & XML_PARSE_DTDATTR) {
14456
406k
        ctxt->loadsubset |= XML_COMPLETE_ATTRS;
14457
406k
        options -= XML_PARSE_DTDATTR;
14458
406k
  ctxt->options |= XML_PARSE_DTDATTR;
14459
406k
    }
14460
406k
    if (options & XML_PARSE_NOENT) {
14461
406k
        ctxt->replaceEntities = 1;
14462
        /* ctxt->loadsubset |= XML_DETECT_IDS; */
14463
406k
        options -= XML_PARSE_NOENT;
14464
406k
  ctxt->options |= XML_PARSE_NOENT;
14465
406k
    } else
14466
406
        ctxt->replaceEntities = 0;
14467
406k
    if (options & XML_PARSE_PEDANTIC) {
14468
0
        ctxt->pedantic = 1;
14469
0
        options -= XML_PARSE_PEDANTIC;
14470
0
  ctxt->options |= XML_PARSE_PEDANTIC;
14471
0
    } else
14472
406k
        ctxt->pedantic = 0;
14473
406k
    if (options & XML_PARSE_NOBLANKS) {
14474
0
        ctxt->keepBlanks = 0;
14475
0
        ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
14476
0
        options -= XML_PARSE_NOBLANKS;
14477
0
  ctxt->options |= XML_PARSE_NOBLANKS;
14478
0
    } else
14479
406k
        ctxt->keepBlanks = 1;
14480
406k
    if (options & XML_PARSE_DTDVALID) {
14481
0
        ctxt->validate = 1;
14482
0
        if (options & XML_PARSE_NOWARNING)
14483
0
            ctxt->vctxt.warning = NULL;
14484
0
        if (options & XML_PARSE_NOERROR)
14485
0
            ctxt->vctxt.error = NULL;
14486
0
        options -= XML_PARSE_DTDVALID;
14487
0
  ctxt->options |= XML_PARSE_DTDVALID;
14488
0
    } else
14489
406k
        ctxt->validate = 0;
14490
406k
    if (options & XML_PARSE_NOWARNING) {
14491
0
        ctxt->sax->warning = NULL;
14492
0
        options -= XML_PARSE_NOWARNING;
14493
0
    }
14494
406k
    if (options & XML_PARSE_NOERROR) {
14495
0
        ctxt->sax->error = NULL;
14496
0
        ctxt->sax->fatalError = NULL;
14497
0
        options -= XML_PARSE_NOERROR;
14498
0
    }
14499
#ifdef LIBXML_SAX1_ENABLED
14500
    if (options & XML_PARSE_SAX1) {
14501
        ctxt->sax->startElementNs = NULL;
14502
        ctxt->sax->endElementNs = NULL;
14503
        ctxt->sax->initialized = 1;
14504
        options -= XML_PARSE_SAX1;
14505
  ctxt->options |= XML_PARSE_SAX1;
14506
    }
14507
#endif /* LIBXML_SAX1_ENABLED */
14508
406k
    if (options & XML_PARSE_NODICT) {
14509
0
        ctxt->dictNames = 0;
14510
0
        options -= XML_PARSE_NODICT;
14511
0
  ctxt->options |= XML_PARSE_NODICT;
14512
406k
    } else {
14513
406k
        ctxt->dictNames = 1;
14514
406k
    }
14515
406k
    if (options & XML_PARSE_NOCDATA) {
14516
406k
        ctxt->sax->cdataBlock = NULL;
14517
406k
        options -= XML_PARSE_NOCDATA;
14518
406k
  ctxt->options |= XML_PARSE_NOCDATA;
14519
406k
    }
14520
406k
    if (options & XML_PARSE_NSCLEAN) {
14521
0
  ctxt->options |= XML_PARSE_NSCLEAN;
14522
0
        options -= XML_PARSE_NSCLEAN;
14523
0
    }
14524
406k
    if (options & XML_PARSE_NONET) {
14525
0
  ctxt->options |= XML_PARSE_NONET;
14526
0
        options -= XML_PARSE_NONET;
14527
0
    }
14528
406k
    if (options & XML_PARSE_COMPACT) {
14529
0
  ctxt->options |= XML_PARSE_COMPACT;
14530
0
        options -= XML_PARSE_COMPACT;
14531
0
    }
14532
406k
    if (options & XML_PARSE_OLD10) {
14533
0
  ctxt->options |= XML_PARSE_OLD10;
14534
0
        options -= XML_PARSE_OLD10;
14535
0
    }
14536
406k
    if (options & XML_PARSE_NOBASEFIX) {
14537
0
  ctxt->options |= XML_PARSE_NOBASEFIX;
14538
0
        options -= XML_PARSE_NOBASEFIX;
14539
0
    }
14540
406k
    if (options & XML_PARSE_HUGE) {
14541
0
  ctxt->options |= XML_PARSE_HUGE;
14542
0
        options -= XML_PARSE_HUGE;
14543
0
        if (ctxt->dict != NULL)
14544
0
            xmlDictSetLimit(ctxt->dict, 0);
14545
0
    }
14546
406k
    if (options & XML_PARSE_OLDSAX) {
14547
0
  ctxt->options |= XML_PARSE_OLDSAX;
14548
0
        options -= XML_PARSE_OLDSAX;
14549
0
    }
14550
406k
    if (options & XML_PARSE_IGNORE_ENC) {
14551
0
  ctxt->options |= XML_PARSE_IGNORE_ENC;
14552
0
        options -= XML_PARSE_IGNORE_ENC;
14553
0
    }
14554
406k
    if (options & XML_PARSE_BIG_LINES) {
14555
0
  ctxt->options |= XML_PARSE_BIG_LINES;
14556
0
        options -= XML_PARSE_BIG_LINES;
14557
0
    }
14558
406k
    ctxt->linenumbers = 1;
14559
406k
    return (options);
14560
406k
}
14561
14562
/**
14563
 * xmlCtxtUseOptions:
14564
 * @ctxt: an XML parser context
14565
 * @options:  a combination of xmlParserOption
14566
 *
14567
 * Applies the options to the parser context
14568
 *
14569
 * Returns 0 in case of success, the set of unknown or unimplemented options
14570
 *         in case of error.
14571
 */
14572
int
14573
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
14574
347k
{
14575
347k
   return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
14576
347k
}
14577
14578
/**
14579
 * xmlDoRead:
14580
 * @ctxt:  an XML parser context
14581
 * @URL:  the base URL to use for the document
14582
 * @encoding:  the document encoding, or NULL
14583
 * @options:  a combination of xmlParserOption
14584
 * @reuse:  keep the context for reuse
14585
 *
14586
 * Common front-end for the xmlRead functions
14587
 *
14588
 * Returns the resulting document tree or NULL
14589
 */
14590
static xmlDocPtr
14591
xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
14592
          int options, int reuse)
14593
59.5k
{
14594
59.5k
    xmlDocPtr ret;
14595
14596
59.5k
    xmlCtxtUseOptionsInternal(ctxt, options, encoding);
14597
59.5k
    if (encoding != NULL) {
14598
0
        xmlCharEncodingHandlerPtr hdlr;
14599
14600
        /*
14601
         * TODO: We should consider to set XML_PARSE_IGNORE_ENC if the
14602
         * caller provided an encoding. Otherwise, we might switch to
14603
         * the encoding from the XML declaration which is likely to
14604
         * break things. Also see xmlSwitchInputEncoding.
14605
         */
14606
0
  hdlr = xmlFindCharEncodingHandler(encoding);
14607
0
  if (hdlr != NULL)
14608
0
      xmlSwitchToEncoding(ctxt, hdlr);
14609
0
    }
14610
59.5k
    if ((URL != NULL) && (ctxt->input != NULL) &&
14611
59.5k
        (ctxt->input->filename == NULL))
14612
59.1k
        ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
14613
59.5k
    xmlParseDocument(ctxt);
14614
59.5k
    if ((ctxt->wellFormed) || ctxt->recovery)
14615
47.0k
        ret = ctxt->myDoc;
14616
12.5k
    else {
14617
12.5k
        ret = NULL;
14618
12.5k
  if (ctxt->myDoc != NULL) {
14619
11.2k
      xmlFreeDoc(ctxt->myDoc);
14620
11.2k
  }
14621
12.5k
    }
14622
59.5k
    ctxt->myDoc = NULL;
14623
59.5k
    if (!reuse) {
14624
59.5k
  xmlFreeParserCtxt(ctxt);
14625
59.5k
    }
14626
14627
59.5k
    return (ret);
14628
59.5k
}
14629
14630
/**
14631
 * xmlReadDoc:
14632
 * @cur:  a pointer to a zero terminated string
14633
 * @URL:  the base URL to use for the document
14634
 * @encoding:  the document encoding, or NULL
14635
 * @options:  a combination of xmlParserOption
14636
 *
14637
 * parse an XML in-memory document and build a tree.
14638
 *
14639
 * Returns the resulting document tree
14640
 */
14641
xmlDocPtr
14642
xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
14643
0
{
14644
0
    xmlParserCtxtPtr ctxt;
14645
14646
0
    if (cur == NULL)
14647
0
        return (NULL);
14648
0
    xmlInitParser();
14649
14650
0
    ctxt = xmlCreateDocParserCtxt(cur);
14651
0
    if (ctxt == NULL)
14652
0
        return (NULL);
14653
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
14654
0
}
14655
14656
/**
14657
 * xmlReadFile:
14658
 * @filename:  a file or URL
14659
 * @encoding:  the document encoding, or NULL
14660
 * @options:  a combination of xmlParserOption
14661
 *
14662
 * parse an XML file from the filesystem or the network.
14663
 *
14664
 * Returns the resulting document tree
14665
 */
14666
xmlDocPtr
14667
xmlReadFile(const char *filename, const char *encoding, int options)
14668
0
{
14669
0
    xmlParserCtxtPtr ctxt;
14670
14671
0
    xmlInitParser();
14672
0
    ctxt = xmlCreateURLParserCtxt(filename, options);
14673
0
    if (ctxt == NULL)
14674
0
        return (NULL);
14675
0
    return (xmlDoRead(ctxt, NULL, encoding, options, 0));
14676
0
}
14677
14678
/**
14679
 * xmlReadMemory:
14680
 * @buffer:  a pointer to a char array
14681
 * @size:  the size of the array
14682
 * @URL:  the base URL to use for the document
14683
 * @encoding:  the document encoding, or NULL
14684
 * @options:  a combination of xmlParserOption
14685
 *
14686
 * parse an XML in-memory document and build a tree.
14687
 *
14688
 * Returns the resulting document tree
14689
 */
14690
xmlDocPtr
14691
xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
14692
59.6k
{
14693
59.6k
    xmlParserCtxtPtr ctxt;
14694
14695
59.6k
    xmlInitParser();
14696
59.6k
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14697
59.6k
    if (ctxt == NULL)
14698
32
        return (NULL);
14699
59.5k
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
14700
59.6k
}
14701
14702
/**
14703
 * xmlReadFd:
14704
 * @fd:  an open file descriptor
14705
 * @URL:  the base URL to use for the document
14706
 * @encoding:  the document encoding, or NULL
14707
 * @options:  a combination of xmlParserOption
14708
 *
14709
 * parse an XML from a file descriptor and build a tree.
14710
 * NOTE that the file descriptor will not be closed when the
14711
 *      reader is closed or reset.
14712
 *
14713
 * Returns the resulting document tree
14714
 */
14715
xmlDocPtr
14716
xmlReadFd(int fd, const char *URL, const char *encoding, int options)
14717
0
{
14718
0
    xmlParserCtxtPtr ctxt;
14719
0
    xmlParserInputBufferPtr input;
14720
0
    xmlParserInputPtr stream;
14721
14722
0
    if (fd < 0)
14723
0
        return (NULL);
14724
0
    xmlInitParser();
14725
14726
0
    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14727
0
    if (input == NULL)
14728
0
        return (NULL);
14729
0
    input->closecallback = NULL;
14730
0
    ctxt = xmlNewParserCtxt();
14731
0
    if (ctxt == NULL) {
14732
0
        xmlFreeParserInputBuffer(input);
14733
0
        return (NULL);
14734
0
    }
14735
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14736
0
    if (stream == NULL) {
14737
0
        xmlFreeParserInputBuffer(input);
14738
0
  xmlFreeParserCtxt(ctxt);
14739
0
        return (NULL);
14740
0
    }
14741
0
    inputPush(ctxt, stream);
14742
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
14743
0
}
14744
14745
/**
14746
 * xmlReadIO:
14747
 * @ioread:  an I/O read function
14748
 * @ioclose:  an I/O close function
14749
 * @ioctx:  an I/O handler
14750
 * @URL:  the base URL to use for the document
14751
 * @encoding:  the document encoding, or NULL
14752
 * @options:  a combination of xmlParserOption
14753
 *
14754
 * parse an XML document from I/O functions and source and build a tree.
14755
 *
14756
 * Returns the resulting document tree
14757
 */
14758
xmlDocPtr
14759
xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
14760
          void *ioctx, const char *URL, const char *encoding, int options)
14761
0
{
14762
0
    xmlParserCtxtPtr ctxt;
14763
0
    xmlParserInputBufferPtr input;
14764
0
    xmlParserInputPtr stream;
14765
14766
0
    if (ioread == NULL)
14767
0
        return (NULL);
14768
0
    xmlInitParser();
14769
14770
0
    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14771
0
                                         XML_CHAR_ENCODING_NONE);
14772
0
    if (input == NULL) {
14773
0
        if (ioclose != NULL)
14774
0
            ioclose(ioctx);
14775
0
        return (NULL);
14776
0
    }
14777
0
    ctxt = xmlNewParserCtxt();
14778
0
    if (ctxt == NULL) {
14779
0
        xmlFreeParserInputBuffer(input);
14780
0
        return (NULL);
14781
0
    }
14782
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14783
0
    if (stream == NULL) {
14784
0
        xmlFreeParserInputBuffer(input);
14785
0
  xmlFreeParserCtxt(ctxt);
14786
0
        return (NULL);
14787
0
    }
14788
0
    inputPush(ctxt, stream);
14789
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
14790
0
}
14791
14792
/**
14793
 * xmlCtxtReadDoc:
14794
 * @ctxt:  an XML parser context
14795
 * @cur:  a pointer to a zero terminated string
14796
 * @URL:  the base URL to use for the document
14797
 * @encoding:  the document encoding, or NULL
14798
 * @options:  a combination of xmlParserOption
14799
 *
14800
 * parse an XML in-memory document and build a tree.
14801
 * This reuses the existing @ctxt parser context
14802
 *
14803
 * Returns the resulting document tree
14804
 */
14805
xmlDocPtr
14806
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
14807
               const char *URL, const char *encoding, int options)
14808
0
{
14809
0
    if (cur == NULL)
14810
0
        return (NULL);
14811
0
    return (xmlCtxtReadMemory(ctxt, (const char *) cur, xmlStrlen(cur), URL,
14812
0
                              encoding, options));
14813
0
}
14814
14815
/**
14816
 * xmlCtxtReadFile:
14817
 * @ctxt:  an XML parser context
14818
 * @filename:  a file or URL
14819
 * @encoding:  the document encoding, or NULL
14820
 * @options:  a combination of xmlParserOption
14821
 *
14822
 * parse an XML file from the filesystem or the network.
14823
 * This reuses the existing @ctxt parser context
14824
 *
14825
 * Returns the resulting document tree
14826
 */
14827
xmlDocPtr
14828
xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
14829
                const char *encoding, int options)
14830
0
{
14831
0
    xmlParserInputPtr stream;
14832
14833
0
    if (filename == NULL)
14834
0
        return (NULL);
14835
0
    if (ctxt == NULL)
14836
0
        return (NULL);
14837
0
    xmlInitParser();
14838
14839
0
    xmlCtxtReset(ctxt);
14840
14841
0
    stream = xmlLoadExternalEntity(filename, NULL, ctxt);
14842
0
    if (stream == NULL) {
14843
0
        return (NULL);
14844
0
    }
14845
0
    inputPush(ctxt, stream);
14846
0
    return (xmlDoRead(ctxt, NULL, encoding, options, 1));
14847
0
}
14848
14849
/**
14850
 * xmlCtxtReadMemory:
14851
 * @ctxt:  an XML parser context
14852
 * @buffer:  a pointer to a char array
14853
 * @size:  the size of the array
14854
 * @URL:  the base URL to use for the document
14855
 * @encoding:  the document encoding, or NULL
14856
 * @options:  a combination of xmlParserOption
14857
 *
14858
 * parse an XML in-memory document and build a tree.
14859
 * This reuses the existing @ctxt parser context
14860
 *
14861
 * Returns the resulting document tree
14862
 */
14863
xmlDocPtr
14864
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
14865
                  const char *URL, const char *encoding, int options)
14866
0
{
14867
0
    xmlParserInputBufferPtr input;
14868
0
    xmlParserInputPtr stream;
14869
14870
0
    if (ctxt == NULL)
14871
0
        return (NULL);
14872
0
    if (buffer == NULL)
14873
0
        return (NULL);
14874
0
    xmlInitParser();
14875
14876
0
    xmlCtxtReset(ctxt);
14877
14878
0
    input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14879
0
    if (input == NULL) {
14880
0
  return(NULL);
14881
0
    }
14882
14883
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14884
0
    if (stream == NULL) {
14885
0
  xmlFreeParserInputBuffer(input);
14886
0
  return(NULL);
14887
0
    }
14888
14889
0
    inputPush(ctxt, stream);
14890
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
14891
0
}
14892
14893
/**
14894
 * xmlCtxtReadFd:
14895
 * @ctxt:  an XML parser context
14896
 * @fd:  an open file descriptor
14897
 * @URL:  the base URL to use for the document
14898
 * @encoding:  the document encoding, or NULL
14899
 * @options:  a combination of xmlParserOption
14900
 *
14901
 * parse an XML from a file descriptor and build a tree.
14902
 * This reuses the existing @ctxt parser context
14903
 * NOTE that the file descriptor will not be closed when the
14904
 *      reader is closed or reset.
14905
 *
14906
 * Returns the resulting document tree
14907
 */
14908
xmlDocPtr
14909
xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
14910
              const char *URL, const char *encoding, int options)
14911
0
{
14912
0
    xmlParserInputBufferPtr input;
14913
0
    xmlParserInputPtr stream;
14914
14915
0
    if (fd < 0)
14916
0
        return (NULL);
14917
0
    if (ctxt == NULL)
14918
0
        return (NULL);
14919
0
    xmlInitParser();
14920
14921
0
    xmlCtxtReset(ctxt);
14922
14923
14924
0
    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14925
0
    if (input == NULL)
14926
0
        return (NULL);
14927
0
    input->closecallback = NULL;
14928
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14929
0
    if (stream == NULL) {
14930
0
        xmlFreeParserInputBuffer(input);
14931
0
        return (NULL);
14932
0
    }
14933
0
    inputPush(ctxt, stream);
14934
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
14935
0
}
14936
14937
/**
14938
 * xmlCtxtReadIO:
14939
 * @ctxt:  an XML parser context
14940
 * @ioread:  an I/O read function
14941
 * @ioclose:  an I/O close function
14942
 * @ioctx:  an I/O handler
14943
 * @URL:  the base URL to use for the document
14944
 * @encoding:  the document encoding, or NULL
14945
 * @options:  a combination of xmlParserOption
14946
 *
14947
 * parse an XML document from I/O functions and source and build a tree.
14948
 * This reuses the existing @ctxt parser context
14949
 *
14950
 * Returns the resulting document tree
14951
 */
14952
xmlDocPtr
14953
xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
14954
              xmlInputCloseCallback ioclose, void *ioctx,
14955
        const char *URL,
14956
              const char *encoding, int options)
14957
0
{
14958
0
    xmlParserInputBufferPtr input;
14959
0
    xmlParserInputPtr stream;
14960
14961
0
    if (ioread == NULL)
14962
0
        return (NULL);
14963
0
    if (ctxt == NULL)
14964
0
        return (NULL);
14965
0
    xmlInitParser();
14966
14967
0
    xmlCtxtReset(ctxt);
14968
14969
0
    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14970
0
                                         XML_CHAR_ENCODING_NONE);
14971
0
    if (input == NULL) {
14972
0
        if (ioclose != NULL)
14973
0
            ioclose(ioctx);
14974
0
        return (NULL);
14975
0
    }
14976
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14977
0
    if (stream == NULL) {
14978
0
        xmlFreeParserInputBuffer(input);
14979
0
        return (NULL);
14980
0
    }
14981
0
    inputPush(ctxt, stream);
14982
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
14983
0
}
14984