Coverage Report

Created: 2023-06-07 06:51

/src/libxml2/parser.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
 *            implemented on top of the SAX interfaces
4
 *
5
 * References:
6
 *   The XML specification:
7
 *     http://www.w3.org/TR/REC-xml
8
 *   Original 1.0 version:
9
 *     http://www.w3.org/TR/1998/REC-xml-19980210
10
 *   XML second edition working draft
11
 *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12
 *
13
 * Okay this is a big file, the parser core is around 7000 lines, then it
14
 * is followed by the progressive parser top routines, then the various
15
 * high level APIs to call the parser and a few miscellaneous functions.
16
 * A number of helper functions and deprecated ones have been moved to
17
 * parserInternals.c to reduce this file size.
18
 * As much as possible the functions are associated with their relative
19
 * production in the XML specification. A few productions defining the
20
 * different ranges of character are actually implanted either in
21
 * parserInternals.h or parserInternals.c
22
 * The DOM tree build is realized from the default SAX callbacks in
23
 * the module SAX.c.
24
 * The routines doing the validation checks are in valid.c and called either
25
 * from the SAX callbacks or as standalone functions using a preparsed
26
 * document.
27
 *
28
 * See Copyright for the status of this software.
29
 *
30
 * daniel@veillard.com
31
 */
32
33
/* To avoid EBCDIC trouble when parsing on zOS */
34
#if defined(__MVS__)
35
#pragma convert("ISO8859-1")
36
#endif
37
38
#define IN_LIBXML
39
#include "libxml.h"
40
41
#if defined(_WIN32)
42
#define XML_DIR_SEP '\\'
43
#else
44
#define XML_DIR_SEP '/'
45
#endif
46
47
#include <stdlib.h>
48
#include <limits.h>
49
#include <string.h>
50
#include <stdarg.h>
51
#include <stddef.h>
52
#include <ctype.h>
53
#include <stdlib.h>
54
#include <libxml/xmlmemory.h>
55
#include <libxml/threads.h>
56
#include <libxml/globals.h>
57
#include <libxml/tree.h>
58
#include <libxml/parser.h>
59
#include <libxml/parserInternals.h>
60
#include <libxml/HTMLparser.h>
61
#include <libxml/valid.h>
62
#include <libxml/entities.h>
63
#include <libxml/xmlerror.h>
64
#include <libxml/encoding.h>
65
#include <libxml/xmlIO.h>
66
#include <libxml/uri.h>
67
#ifdef LIBXML_CATALOG_ENABLED
68
#include <libxml/catalog.h>
69
#endif
70
#ifdef LIBXML_SCHEMAS_ENABLED
71
#include <libxml/xmlschemastypes.h>
72
#include <libxml/relaxng.h>
73
#endif
74
#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
75
#include <libxml/xpath.h>
76
#endif
77
78
#include "private/buf.h"
79
#include "private/dict.h"
80
#include "private/enc.h"
81
#include "private/entities.h"
82
#include "private/error.h"
83
#include "private/globals.h"
84
#include "private/html.h"
85
#include "private/io.h"
86
#include "private/memory.h"
87
#include "private/parser.h"
88
#include "private/threads.h"
89
#include "private/xpath.h"
90
91
struct _xmlStartTag {
92
    const xmlChar *prefix;
93
    const xmlChar *URI;
94
    int line;
95
    int nsNr;
96
};
97
98
static xmlParserCtxtPtr
99
xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
100
        const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
101
        xmlParserCtxtPtr pctx);
102
103
static int
104
xmlParseElementStart(xmlParserCtxtPtr ctxt);
105
106
static void
107
xmlParseElementEnd(xmlParserCtxtPtr ctxt);
108
109
/************************************************************************
110
 *                  *
111
 *  Arbitrary limits set in the parser. See XML_PARSE_HUGE    *
112
 *                  *
113
 ************************************************************************/
114
115
#define XML_PARSER_BIG_ENTITY 1000
116
#define XML_PARSER_LOT_ENTITY 5000
117
118
/*
119
 * Constants for protection against abusive entity expansion
120
 * ("billion laughs").
121
 */
122
123
/*
124
 * XML_PARSER_NON_LINEAR is roughly the maximum allowed amplification factor
125
 * of serialized output after entity expansion.
126
 */
127
384
#define XML_PARSER_NON_LINEAR 5
128
129
/*
130
 * A certain amount is always allowed.
131
 */
132
173k
#define XML_PARSER_ALLOWED_EXPANSION 1000000
133
134
/*
135
 * Fixed cost for each entity reference. This crudely models processing time
136
 * as well to protect, for example, against exponential expansion of empty
137
 * or very short entities.
138
 */
139
173k
#define XML_ENT_FIXED_COST 20
140
141
/**
142
 * xmlParserMaxDepth:
143
 *
144
 * arbitrary depth limit for the XML documents that we allow to
145
 * process. This is not a limitation of the parser but a safety
146
 * boundary feature. It can be disabled with the XML_PARSE_HUGE
147
 * parser option.
148
 */
149
unsigned int xmlParserMaxDepth = 256;
150
151
152
153
#define SAX2 1
154
9.61M
#define XML_PARSER_BIG_BUFFER_SIZE 300
155
107M
#define XML_PARSER_BUFFER_SIZE 100
156
37.9k
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
157
158
/**
159
 * XML_PARSER_CHUNK_SIZE
160
 *
161
 * When calling GROW that's the minimal amount of data
162
 * the parser expected to have received. It is not a hard
163
 * limit but an optimization when reading strings like Names
164
 * It is not strictly needed as long as inputs available characters
165
 * are followed by 0, which should be provided by the I/O level
166
 */
167
#define XML_PARSER_CHUNK_SIZE 100
168
169
/*
170
 * List of XML prefixed PI allowed by W3C specs
171
 */
172
173
static const char* const xmlW3CPIs[] = {
174
    "xml-stylesheet",
175
    "xml-model",
176
    NULL
177
};
178
179
180
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
181
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
182
                                              const xmlChar **str);
183
184
static xmlParserErrors
185
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
186
                xmlSAXHandlerPtr sax,
187
          void *user_data, int depth, const xmlChar *URL,
188
          const xmlChar *ID, xmlNodePtr *list);
189
190
static int
191
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
192
                          const char *encoding);
193
#ifdef LIBXML_LEGACY_ENABLED
194
static void
195
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
196
                      xmlNodePtr lastNode);
197
#endif /* LIBXML_LEGACY_ENABLED */
198
199
static xmlParserErrors
200
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
201
          const xmlChar *string, void *user_data, xmlNodePtr *lst);
202
203
static int
204
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
205
206
/************************************************************************
207
 *                  *
208
 *    Some factorized error routines        *
209
 *                  *
210
 ************************************************************************/
211
212
/**
213
 * xmlErrAttributeDup:
214
 * @ctxt:  an XML parser context
215
 * @prefix:  the attribute prefix
216
 * @localname:  the attribute localname
217
 *
218
 * Handle a redefinition of attribute error
219
 */
220
static void
221
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
222
                   const xmlChar * localname)
223
5.87k
{
224
5.87k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
225
5.87k
        (ctxt->instate == XML_PARSER_EOF))
226
83
  return;
227
5.79k
    if (ctxt != NULL)
228
5.79k
  ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
229
230
5.79k
    if (prefix == NULL)
231
3.52k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
232
3.52k
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
233
3.52k
                        (const char *) localname, NULL, NULL, 0, 0,
234
3.52k
                        "Attribute %s redefined\n", localname);
235
2.27k
    else
236
2.27k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
237
2.27k
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
238
2.27k
                        (const char *) prefix, (const char *) localname,
239
2.27k
                        NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
240
2.27k
                        localname);
241
5.79k
    if (ctxt != NULL) {
242
5.79k
  ctxt->wellFormed = 0;
243
5.79k
  if (ctxt->recovery == 0)
244
5.79k
      ctxt->disableSAX = 1;
245
5.79k
    }
246
5.79k
}
247
248
/**
249
 * xmlFatalErrMsg:
250
 * @ctxt:  an XML parser context
251
 * @error:  the error number
252
 * @msg:  the error message
253
 *
254
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
255
 */
256
static void LIBXML_ATTR_FORMAT(3,0)
257
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
258
               const char *msg)
259
169k
{
260
169k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
261
169k
        (ctxt->instate == XML_PARSER_EOF))
262
249
  return;
263
169k
    if (ctxt != NULL)
264
169k
  ctxt->errNo = error;
265
169k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
266
169k
                    XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
267
169k
    if (ctxt != NULL) {
268
169k
  ctxt->wellFormed = 0;
269
169k
  if (ctxt->recovery == 0)
270
169k
      ctxt->disableSAX = 1;
271
169k
    }
272
169k
}
273
274
/**
275
 * xmlWarningMsg:
276
 * @ctxt:  an XML parser context
277
 * @error:  the error number
278
 * @msg:  the error message
279
 * @str1:  extra data
280
 * @str2:  extra data
281
 *
282
 * Handle a warning.
283
 */
284
static void LIBXML_ATTR_FORMAT(3,0)
285
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
286
              const char *msg, const xmlChar *str1, const xmlChar *str2)
287
17.0k
{
288
17.0k
    xmlStructuredErrorFunc schannel = NULL;
289
290
17.0k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
291
17.0k
        (ctxt->instate == XML_PARSER_EOF))
292
0
  return;
293
17.0k
    if ((ctxt != NULL) && (ctxt->sax != NULL) &&
294
17.0k
        (ctxt->sax->initialized == XML_SAX2_MAGIC))
295
17.0k
        schannel = ctxt->sax->serror;
296
17.0k
    if (ctxt != NULL) {
297
17.0k
        __xmlRaiseError(schannel,
298
17.0k
                    (ctxt->sax) ? ctxt->sax->warning : NULL,
299
17.0k
                    ctxt->userData,
300
17.0k
                    ctxt, NULL, XML_FROM_PARSER, error,
301
17.0k
                    XML_ERR_WARNING, NULL, 0,
302
17.0k
        (const char *) str1, (const char *) str2, NULL, 0, 0,
303
17.0k
        msg, (const char *) str1, (const char *) str2);
304
17.0k
    } else {
305
0
        __xmlRaiseError(schannel, NULL, NULL,
306
0
                    ctxt, NULL, XML_FROM_PARSER, error,
307
0
                    XML_ERR_WARNING, NULL, 0,
308
0
        (const char *) str1, (const char *) str2, NULL, 0, 0,
309
0
        msg, (const char *) str1, (const char *) str2);
310
0
    }
311
17.0k
}
312
313
/**
314
 * xmlValidityError:
315
 * @ctxt:  an XML parser context
316
 * @error:  the error number
317
 * @msg:  the error message
318
 * @str1:  extra data
319
 *
320
 * Handle a validity error.
321
 */
322
static void LIBXML_ATTR_FORMAT(3,0)
323
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
324
              const char *msg, const xmlChar *str1, const xmlChar *str2)
325
1.56k
{
326
1.56k
    xmlStructuredErrorFunc schannel = NULL;
327
328
1.56k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
329
1.56k
        (ctxt->instate == XML_PARSER_EOF))
330
0
  return;
331
1.56k
    if (ctxt != NULL) {
332
1.56k
  ctxt->errNo = error;
333
1.56k
  if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
334
1.56k
      schannel = ctxt->sax->serror;
335
1.56k
    }
336
1.56k
    if (ctxt != NULL) {
337
1.56k
        __xmlRaiseError(schannel,
338
1.56k
                    ctxt->vctxt.error, ctxt->vctxt.userData,
339
1.56k
                    ctxt, NULL, XML_FROM_DTD, error,
340
1.56k
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
341
1.56k
        (const char *) str2, NULL, 0, 0,
342
1.56k
        msg, (const char *) str1, (const char *) str2);
343
1.56k
  ctxt->valid = 0;
344
1.56k
    } else {
345
0
        __xmlRaiseError(schannel, NULL, NULL,
346
0
                    ctxt, NULL, XML_FROM_DTD, error,
347
0
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
348
0
        (const char *) str2, NULL, 0, 0,
349
0
        msg, (const char *) str1, (const char *) str2);
350
0
    }
351
1.56k
}
352
353
/**
354
 * xmlFatalErrMsgInt:
355
 * @ctxt:  an XML parser context
356
 * @error:  the error number
357
 * @msg:  the error message
358
 * @val:  an integer value
359
 *
360
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
361
 */
362
static void LIBXML_ATTR_FORMAT(3,0)
363
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
364
                  const char *msg, int val)
365
117k
{
366
117k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
367
117k
        (ctxt->instate == XML_PARSER_EOF))
368
3
  return;
369
117k
    if (ctxt != NULL)
370
117k
  ctxt->errNo = error;
371
117k
    __xmlRaiseError(NULL, NULL, NULL,
372
117k
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
373
117k
                    NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
374
117k
    if (ctxt != NULL) {
375
117k
  ctxt->wellFormed = 0;
376
117k
  if (ctxt->recovery == 0)
377
117k
      ctxt->disableSAX = 1;
378
117k
    }
379
117k
}
380
381
/**
382
 * xmlFatalErrMsgStrIntStr:
383
 * @ctxt:  an XML parser context
384
 * @error:  the error number
385
 * @msg:  the error message
386
 * @str1:  an string info
387
 * @val:  an integer value
388
 * @str2:  an string info
389
 *
390
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
391
 */
392
static void LIBXML_ATTR_FORMAT(3,0)
393
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
394
                  const char *msg, const xmlChar *str1, int val,
395
      const xmlChar *str2)
396
69.8k
{
397
69.8k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
398
69.8k
        (ctxt->instate == XML_PARSER_EOF))
399
3
  return;
400
69.8k
    if (ctxt != NULL)
401
69.8k
  ctxt->errNo = error;
402
69.8k
    __xmlRaiseError(NULL, NULL, NULL,
403
69.8k
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
404
69.8k
                    NULL, 0, (const char *) str1, (const char *) str2,
405
69.8k
        NULL, val, 0, msg, str1, val, str2);
406
69.8k
    if (ctxt != NULL) {
407
69.8k
  ctxt->wellFormed = 0;
408
69.8k
  if (ctxt->recovery == 0)
409
69.8k
      ctxt->disableSAX = 1;
410
69.8k
    }
411
69.8k
}
412
413
/**
414
 * xmlFatalErrMsgStr:
415
 * @ctxt:  an XML parser context
416
 * @error:  the error number
417
 * @msg:  the error message
418
 * @val:  a string value
419
 *
420
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
421
 */
422
static void LIBXML_ATTR_FORMAT(3,0)
423
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
424
                  const char *msg, const xmlChar * val)
425
203k
{
426
203k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
427
203k
        (ctxt->instate == XML_PARSER_EOF))
428
11
  return;
429
203k
    if (ctxt != NULL)
430
203k
  ctxt->errNo = error;
431
203k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
432
203k
                    XML_FROM_PARSER, error, XML_ERR_FATAL,
433
203k
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
434
203k
                    val);
435
203k
    if (ctxt != NULL) {
436
203k
  ctxt->wellFormed = 0;
437
203k
  if (ctxt->recovery == 0)
438
203k
      ctxt->disableSAX = 1;
439
203k
    }
440
203k
}
441
442
/**
443
 * xmlErrMsgStr:
444
 * @ctxt:  an XML parser context
445
 * @error:  the error number
446
 * @msg:  the error message
447
 * @val:  a string value
448
 *
449
 * Handle a non fatal parser error
450
 */
451
static void LIBXML_ATTR_FORMAT(3,0)
452
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
453
                  const char *msg, const xmlChar * val)
454
3.24k
{
455
3.24k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
456
3.24k
        (ctxt->instate == XML_PARSER_EOF))
457
0
  return;
458
3.24k
    if (ctxt != NULL)
459
3.24k
  ctxt->errNo = error;
460
3.24k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
461
3.24k
                    XML_FROM_PARSER, error, XML_ERR_ERROR,
462
3.24k
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
463
3.24k
                    val);
464
3.24k
}
465
466
/**
467
 * xmlNsErr:
468
 * @ctxt:  an XML parser context
469
 * @error:  the error number
470
 * @msg:  the message
471
 * @info1:  extra information string
472
 * @info2:  extra information string
473
 *
474
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
475
 */
476
static void LIBXML_ATTR_FORMAT(3,0)
477
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
478
         const char *msg,
479
         const xmlChar * info1, const xmlChar * info2,
480
         const xmlChar * info3)
481
101k
{
482
101k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
483
101k
        (ctxt->instate == XML_PARSER_EOF))
484
165
  return;
485
101k
    if (ctxt != NULL)
486
101k
  ctxt->errNo = error;
487
101k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
488
101k
                    XML_ERR_ERROR, NULL, 0, (const char *) info1,
489
101k
                    (const char *) info2, (const char *) info3, 0, 0, msg,
490
101k
                    info1, info2, info3);
491
101k
    if (ctxt != NULL)
492
101k
  ctxt->nsWellFormed = 0;
493
101k
}
494
495
/**
496
 * xmlNsWarn
497
 * @ctxt:  an XML parser context
498
 * @error:  the error number
499
 * @msg:  the message
500
 * @info1:  extra information string
501
 * @info2:  extra information string
502
 *
503
 * Handle a namespace warning error
504
 */
505
static void LIBXML_ATTR_FORMAT(3,0)
506
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
507
         const char *msg,
508
         const xmlChar * info1, const xmlChar * info2,
509
         const xmlChar * info3)
510
2.26k
{
511
2.26k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
512
2.26k
        (ctxt->instate == XML_PARSER_EOF))
513
0
  return;
514
2.26k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
515
2.26k
                    XML_ERR_WARNING, NULL, 0, (const char *) info1,
516
2.26k
                    (const char *) info2, (const char *) info3, 0, 0, msg,
517
2.26k
                    info1, info2, info3);
518
2.26k
}
519
520
static void
521
731k
xmlSaturatedAdd(unsigned long *dst, unsigned long val) {
522
731k
    if (val > ULONG_MAX - *dst)
523
0
        *dst = ULONG_MAX;
524
731k
    else
525
731k
        *dst += val;
526
731k
}
527
528
static void
529
224k
xmlSaturatedAddSizeT(unsigned long *dst, unsigned long val) {
530
224k
    if (val > ULONG_MAX - *dst)
531
0
        *dst = ULONG_MAX;
532
224k
    else
533
224k
        *dst += val;
534
224k
}
535
536
/**
537
 * xmlParserEntityCheck:
538
 * @ctxt:  parser context
539
 * @extra:  sum of unexpanded entity sizes
540
 *
541
 * Check for non-linear entity expansion behaviour.
542
 *
543
 * In some cases like xmlStringDecodeEntities, this function is called
544
 * for each, possibly nested entity and its unexpanded content length.
545
 *
546
 * In other cases like xmlParseReference, it's only called for each
547
 * top-level entity with its unexpanded content length plus the sum of
548
 * the unexpanded content lengths (plus fixed cost) of all nested
549
 * entities.
550
 *
551
 * Summing the unexpanded lengths also adds the length of the reference.
552
 * This is by design. Taking the length of the entity name into account
553
 * discourages attacks that try to waste CPU time with abusively long
554
 * entity names. See test/recurse/lol6.xml for example. Each call also
555
 * adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with
556
 * short entities.
557
 *
558
 * Returns 1 on error, 0 on success.
559
 */
560
static int
561
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long extra)
562
173k
{
563
173k
    unsigned long consumed;
564
173k
    xmlParserInputPtr input = ctxt->input;
565
173k
    xmlEntityPtr entity = input->entity;
566
567
    /*
568
     * Compute total consumed bytes so far, including input streams of
569
     * external entities.
570
     */
571
173k
    consumed = input->parentConsumed;
572
173k
    if ((entity == NULL) ||
573
173k
        ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
574
128k
         ((entity->flags & XML_ENT_PARSED) == 0))) {
575
128k
        xmlSaturatedAdd(&consumed, input->consumed);
576
128k
        xmlSaturatedAddSizeT(&consumed, input->cur - input->base);
577
128k
    }
578
173k
    xmlSaturatedAdd(&consumed, ctxt->sizeentities);
579
580
    /*
581
     * Add extra cost and some fixed cost.
582
     */
583
173k
    xmlSaturatedAdd(&ctxt->sizeentcopy, extra);
584
173k
    xmlSaturatedAdd(&ctxt->sizeentcopy, XML_ENT_FIXED_COST);
585
586
    /*
587
     * It's important to always use saturation arithmetic when tracking
588
     * entity sizes to make the size checks reliable. If "sizeentcopy"
589
     * overflows, we have to abort.
590
     */
591
173k
    if ((ctxt->sizeentcopy > XML_PARSER_ALLOWED_EXPANSION) &&
592
173k
        ((ctxt->sizeentcopy >= ULONG_MAX) ||
593
384
         (ctxt->sizeentcopy / XML_PARSER_NON_LINEAR > consumed))) {
594
42
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP,
595
42
                       "Maximum entity amplification factor exceeded");
596
42
        xmlHaltParser(ctxt);
597
42
        return(1);
598
42
    }
599
600
173k
    return(0);
601
173k
}
602
603
/************************************************************************
604
 *                  *
605
 *    Library wide options          *
606
 *                  *
607
 ************************************************************************/
608
609
/**
610
  * xmlHasFeature:
611
  * @feature: the feature to be examined
612
  *
613
  * Examines if the library has been compiled with a given feature.
614
  *
615
  * Returns a non-zero value if the feature exist, otherwise zero.
616
  * Returns zero (0) if the feature does not exist or an unknown
617
  * unknown feature is requested, non-zero otherwise.
618
  */
619
int
620
xmlHasFeature(xmlFeature feature)
621
0
{
622
0
    switch (feature) {
623
0
  case XML_WITH_THREAD:
624
0
#ifdef LIBXML_THREAD_ENABLED
625
0
      return(1);
626
#else
627
      return(0);
628
#endif
629
0
        case XML_WITH_TREE:
630
0
#ifdef LIBXML_TREE_ENABLED
631
0
            return(1);
632
#else
633
            return(0);
634
#endif
635
0
        case XML_WITH_OUTPUT:
636
0
#ifdef LIBXML_OUTPUT_ENABLED
637
0
            return(1);
638
#else
639
            return(0);
640
#endif
641
0
        case XML_WITH_PUSH:
642
0
#ifdef LIBXML_PUSH_ENABLED
643
0
            return(1);
644
#else
645
            return(0);
646
#endif
647
0
        case XML_WITH_READER:
648
0
#ifdef LIBXML_READER_ENABLED
649
0
            return(1);
650
#else
651
            return(0);
652
#endif
653
0
        case XML_WITH_PATTERN:
654
0
#ifdef LIBXML_PATTERN_ENABLED
655
0
            return(1);
656
#else
657
            return(0);
658
#endif
659
0
        case XML_WITH_WRITER:
660
0
#ifdef LIBXML_WRITER_ENABLED
661
0
            return(1);
662
#else
663
            return(0);
664
#endif
665
0
        case XML_WITH_SAX1:
666
0
#ifdef LIBXML_SAX1_ENABLED
667
0
            return(1);
668
#else
669
            return(0);
670
#endif
671
0
        case XML_WITH_FTP:
672
#ifdef LIBXML_FTP_ENABLED
673
            return(1);
674
#else
675
0
            return(0);
676
0
#endif
677
0
        case XML_WITH_HTTP:
678
#ifdef LIBXML_HTTP_ENABLED
679
            return(1);
680
#else
681
0
            return(0);
682
0
#endif
683
0
        case XML_WITH_VALID:
684
0
#ifdef LIBXML_VALID_ENABLED
685
0
            return(1);
686
#else
687
            return(0);
688
#endif
689
0
        case XML_WITH_HTML:
690
0
#ifdef LIBXML_HTML_ENABLED
691
0
            return(1);
692
#else
693
            return(0);
694
#endif
695
0
        case XML_WITH_LEGACY:
696
#ifdef LIBXML_LEGACY_ENABLED
697
            return(1);
698
#else
699
0
            return(0);
700
0
#endif
701
0
        case XML_WITH_C14N:
702
0
#ifdef LIBXML_C14N_ENABLED
703
0
            return(1);
704
#else
705
            return(0);
706
#endif
707
0
        case XML_WITH_CATALOG:
708
0
#ifdef LIBXML_CATALOG_ENABLED
709
0
            return(1);
710
#else
711
            return(0);
712
#endif
713
0
        case XML_WITH_XPATH:
714
0
#ifdef LIBXML_XPATH_ENABLED
715
0
            return(1);
716
#else
717
            return(0);
718
#endif
719
0
        case XML_WITH_XPTR:
720
0
#ifdef LIBXML_XPTR_ENABLED
721
0
            return(1);
722
#else
723
            return(0);
724
#endif
725
0
        case XML_WITH_XINCLUDE:
726
0
#ifdef LIBXML_XINCLUDE_ENABLED
727
0
            return(1);
728
#else
729
            return(0);
730
#endif
731
0
        case XML_WITH_ICONV:
732
0
#ifdef LIBXML_ICONV_ENABLED
733
0
            return(1);
734
#else
735
            return(0);
736
#endif
737
0
        case XML_WITH_ISO8859X:
738
0
#ifdef LIBXML_ISO8859X_ENABLED
739
0
            return(1);
740
#else
741
            return(0);
742
#endif
743
0
        case XML_WITH_UNICODE:
744
0
#ifdef LIBXML_UNICODE_ENABLED
745
0
            return(1);
746
#else
747
            return(0);
748
#endif
749
0
        case XML_WITH_REGEXP:
750
0
#ifdef LIBXML_REGEXP_ENABLED
751
0
            return(1);
752
#else
753
            return(0);
754
#endif
755
0
        case XML_WITH_AUTOMATA:
756
0
#ifdef LIBXML_AUTOMATA_ENABLED
757
0
            return(1);
758
#else
759
            return(0);
760
#endif
761
0
        case XML_WITH_EXPR:
762
#ifdef LIBXML_EXPR_ENABLED
763
            return(1);
764
#else
765
0
            return(0);
766
0
#endif
767
0
        case XML_WITH_SCHEMAS:
768
0
#ifdef LIBXML_SCHEMAS_ENABLED
769
0
            return(1);
770
#else
771
            return(0);
772
#endif
773
0
        case XML_WITH_SCHEMATRON:
774
0
#ifdef LIBXML_SCHEMATRON_ENABLED
775
0
            return(1);
776
#else
777
            return(0);
778
#endif
779
0
        case XML_WITH_MODULES:
780
0
#ifdef LIBXML_MODULES_ENABLED
781
0
            return(1);
782
#else
783
            return(0);
784
#endif
785
0
        case XML_WITH_DEBUG:
786
#ifdef LIBXML_DEBUG_ENABLED
787
            return(1);
788
#else
789
0
            return(0);
790
0
#endif
791
0
        case XML_WITH_DEBUG_MEM:
792
#ifdef DEBUG_MEMORY_LOCATION
793
            return(1);
794
#else
795
0
            return(0);
796
0
#endif
797
0
        case XML_WITH_DEBUG_RUN:
798
0
            return(0);
799
0
        case XML_WITH_ZLIB:
800
0
#ifdef LIBXML_ZLIB_ENABLED
801
0
            return(1);
802
#else
803
            return(0);
804
#endif
805
0
        case XML_WITH_LZMA:
806
0
#ifdef LIBXML_LZMA_ENABLED
807
0
            return(1);
808
#else
809
            return(0);
810
#endif
811
0
        case XML_WITH_ICU:
812
#ifdef LIBXML_ICU_ENABLED
813
            return(1);
814
#else
815
0
            return(0);
816
0
#endif
817
0
        default:
818
0
      break;
819
0
     }
820
0
     return(0);
821
0
}
822
823
/************************************************************************
824
 *                  *
825
 *    SAX2 defaulted attributes handling      *
826
 *                  *
827
 ************************************************************************/
828
829
/**
830
 * xmlDetectSAX2:
831
 * @ctxt:  an XML parser context
832
 *
833
 * Do the SAX2 detection and specific initialization
834
 */
835
static void
836
42.1k
xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
837
42.1k
    xmlSAXHandlerPtr sax;
838
839
    /* Avoid unused variable warning if features are disabled. */
840
42.1k
    (void) sax;
841
842
42.1k
    if (ctxt == NULL) return;
843
42.1k
    sax = ctxt->sax;
844
42.1k
#ifdef LIBXML_SAX1_ENABLED
845
42.1k
    if ((sax) && (sax->initialized == XML_SAX2_MAGIC))
846
42.1k
        ctxt->sax2 = 1;
847
#else
848
    ctxt->sax2 = 1;
849
#endif /* LIBXML_SAX1_ENABLED */
850
851
42.1k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
852
42.1k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
853
42.1k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
854
42.1k
    if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
855
42.1k
    (ctxt->str_xml_ns == NULL)) {
856
0
        xmlErrMemory(ctxt, NULL);
857
0
    }
858
42.1k
}
859
860
typedef struct _xmlDefAttrs xmlDefAttrs;
861
typedef xmlDefAttrs *xmlDefAttrsPtr;
862
struct _xmlDefAttrs {
863
    int nbAttrs;  /* number of defaulted attributes on that element */
864
    int maxAttrs;       /* the size of the array */
865
#if __STDC_VERSION__ >= 199901L
866
    /* Using a C99 flexible array member avoids UBSan errors. */
867
    const xmlChar *values[]; /* array of localname/prefix/values/external */
868
#else
869
    const xmlChar *values[5];
870
#endif
871
};
872
873
/**
874
 * xmlAttrNormalizeSpace:
875
 * @src: the source string
876
 * @dst: the target string
877
 *
878
 * Normalize the space in non CDATA attribute values:
879
 * If the attribute type is not CDATA, then the XML processor MUST further
880
 * process the normalized attribute value by discarding any leading and
881
 * trailing space (#x20) characters, and by replacing sequences of space
882
 * (#x20) characters by a single space (#x20) character.
883
 * Note that the size of dst need to be at least src, and if one doesn't need
884
 * to preserve dst (and it doesn't come from a dictionary or read-only) then
885
 * passing src as dst is just fine.
886
 *
887
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
888
 *         is needed.
889
 */
890
static xmlChar *
891
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
892
43.9k
{
893
43.9k
    if ((src == NULL) || (dst == NULL))
894
0
        return(NULL);
895
896
54.7k
    while (*src == 0x20) src++;
897
739k
    while (*src != 0) {
898
695k
  if (*src == 0x20) {
899
98.0k
      while (*src == 0x20) src++;
900
41.2k
      if (*src != 0)
901
37.2k
    *dst++ = 0x20;
902
653k
  } else {
903
653k
      *dst++ = *src++;
904
653k
  }
905
695k
    }
906
43.9k
    *dst = 0;
907
43.9k
    if (dst == src)
908
32.1k
       return(NULL);
909
11.7k
    return(dst);
910
43.9k
}
911
912
/**
913
 * xmlAttrNormalizeSpace2:
914
 * @src: the source string
915
 *
916
 * Normalize the space in non CDATA attribute values, a slightly more complex
917
 * front end to avoid allocation problems when running on attribute values
918
 * coming from the input.
919
 *
920
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
921
 *         is needed.
922
 */
923
static const xmlChar *
924
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
925
3.16k
{
926
3.16k
    int i;
927
3.16k
    int remove_head = 0;
928
3.16k
    int need_realloc = 0;
929
3.16k
    const xmlChar *cur;
930
931
3.16k
    if ((ctxt == NULL) || (src == NULL) || (len == NULL))
932
0
        return(NULL);
933
3.16k
    i = *len;
934
3.16k
    if (i <= 0)
935
788
        return(NULL);
936
937
2.37k
    cur = src;
938
15.8k
    while (*cur == 0x20) {
939
13.4k
        cur++;
940
13.4k
  remove_head++;
941
13.4k
    }
942
45.5k
    while (*cur != 0) {
943
43.5k
  if (*cur == 0x20) {
944
2.88k
      cur++;
945
2.88k
      if ((*cur == 0x20) || (*cur == 0)) {
946
406
          need_realloc = 1;
947
406
    break;
948
406
      }
949
2.88k
  } else
950
40.7k
      cur++;
951
43.5k
    }
952
2.37k
    if (need_realloc) {
953
406
        xmlChar *ret;
954
955
406
  ret = xmlStrndup(src + remove_head, i - remove_head + 1);
956
406
  if (ret == NULL) {
957
0
      xmlErrMemory(ctxt, NULL);
958
0
      return(NULL);
959
0
  }
960
406
  xmlAttrNormalizeSpace(ret, ret);
961
406
  *len = strlen((const char *)ret);
962
406
        return(ret);
963
1.96k
    } else if (remove_head) {
964
496
        *len -= remove_head;
965
496
        memmove(src, src + remove_head, 1 + *len);
966
496
  return(src);
967
496
    }
968
1.47k
    return(NULL);
969
2.37k
}
970
971
/**
972
 * xmlAddDefAttrs:
973
 * @ctxt:  an XML parser context
974
 * @fullname:  the element fullname
975
 * @fullattr:  the attribute fullname
976
 * @value:  the attribute value
977
 *
978
 * Add a defaulted attribute for an element
979
 */
980
static void
981
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
982
               const xmlChar *fullname,
983
               const xmlChar *fullattr,
984
43.0k
               const xmlChar *value) {
985
43.0k
    xmlDefAttrsPtr defaults;
986
43.0k
    int len;
987
43.0k
    const xmlChar *name;
988
43.0k
    const xmlChar *prefix;
989
990
    /*
991
     * Allows to detect attribute redefinitions
992
     */
993
43.0k
    if (ctxt->attsSpecial != NULL) {
994
41.4k
        if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
995
28.6k
      return;
996
41.4k
    }
997
998
14.3k
    if (ctxt->attsDefault == NULL) {
999
1.59k
        ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1000
1.59k
  if (ctxt->attsDefault == NULL)
1001
0
      goto mem_error;
1002
1.59k
    }
1003
1004
    /*
1005
     * split the element name into prefix:localname , the string found
1006
     * are within the DTD and then not associated to namespace names.
1007
     */
1008
14.3k
    name = xmlSplitQName3(fullname, &len);
1009
14.3k
    if (name == NULL) {
1010
7.89k
        name = xmlDictLookup(ctxt->dict, fullname, -1);
1011
7.89k
  prefix = NULL;
1012
7.89k
    } else {
1013
6.47k
        name = xmlDictLookup(ctxt->dict, name, -1);
1014
6.47k
  prefix = xmlDictLookup(ctxt->dict, fullname, len);
1015
6.47k
    }
1016
1017
    /*
1018
     * make sure there is some storage
1019
     */
1020
14.3k
    defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1021
14.3k
    if (defaults == NULL) {
1022
2.56k
        defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
1023
2.56k
                     (4 * 5) * sizeof(const xmlChar *));
1024
2.56k
  if (defaults == NULL)
1025
0
      goto mem_error;
1026
2.56k
  defaults->nbAttrs = 0;
1027
2.56k
  defaults->maxAttrs = 4;
1028
2.56k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1029
2.56k
                          defaults, NULL) < 0) {
1030
0
      xmlFree(defaults);
1031
0
      goto mem_error;
1032
0
  }
1033
11.8k
    } else if (defaults->nbAttrs >= defaults->maxAttrs) {
1034
1.74k
        xmlDefAttrsPtr temp;
1035
1036
1.74k
        temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
1037
1.74k
           (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
1038
1.74k
  if (temp == NULL)
1039
0
      goto mem_error;
1040
1.74k
  defaults = temp;
1041
1.74k
  defaults->maxAttrs *= 2;
1042
1.74k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1043
1.74k
                          defaults, NULL) < 0) {
1044
0
      xmlFree(defaults);
1045
0
      goto mem_error;
1046
0
  }
1047
1.74k
    }
1048
1049
    /*
1050
     * Split the element name into prefix:localname , the string found
1051
     * are within the DTD and hen not associated to namespace names.
1052
     */
1053
14.3k
    name = xmlSplitQName3(fullattr, &len);
1054
14.3k
    if (name == NULL) {
1055
9.40k
        name = xmlDictLookup(ctxt->dict, fullattr, -1);
1056
9.40k
  prefix = NULL;
1057
9.40k
    } else {
1058
4.96k
        name = xmlDictLookup(ctxt->dict, name, -1);
1059
4.96k
  prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1060
4.96k
    }
1061
1062
14.3k
    defaults->values[5 * defaults->nbAttrs] = name;
1063
14.3k
    defaults->values[5 * defaults->nbAttrs + 1] = prefix;
1064
    /* intern the string and precompute the end */
1065
14.3k
    len = xmlStrlen(value);
1066
14.3k
    value = xmlDictLookup(ctxt->dict, value, len);
1067
14.3k
    if (value == NULL)
1068
0
        goto mem_error;
1069
14.3k
    defaults->values[5 * defaults->nbAttrs + 2] = value;
1070
14.3k
    defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1071
14.3k
    if (ctxt->external)
1072
0
        defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1073
14.3k
    else
1074
14.3k
        defaults->values[5 * defaults->nbAttrs + 4] = NULL;
1075
14.3k
    defaults->nbAttrs++;
1076
1077
14.3k
    return;
1078
1079
0
mem_error:
1080
0
    xmlErrMemory(ctxt, NULL);
1081
0
    return;
1082
14.3k
}
1083
1084
/**
1085
 * xmlAddSpecialAttr:
1086
 * @ctxt:  an XML parser context
1087
 * @fullname:  the element fullname
1088
 * @fullattr:  the attribute fullname
1089
 * @type:  the attribute type
1090
 *
1091
 * Register this attribute type
1092
 */
1093
static void
1094
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1095
      const xmlChar *fullname,
1096
      const xmlChar *fullattr,
1097
      int type)
1098
51.0k
{
1099
51.0k
    if (ctxt->attsSpecial == NULL) {
1100
1.76k
        ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1101
1.76k
  if (ctxt->attsSpecial == NULL)
1102
0
      goto mem_error;
1103
1.76k
    }
1104
1105
51.0k
    if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1106
36.2k
        return;
1107
1108
14.8k
    xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1109
14.8k
                     (void *) (ptrdiff_t) type);
1110
14.8k
    return;
1111
1112
0
mem_error:
1113
0
    xmlErrMemory(ctxt, NULL);
1114
0
    return;
1115
51.0k
}
1116
1117
/**
1118
 * xmlCleanSpecialAttrCallback:
1119
 *
1120
 * Removes CDATA attributes from the special attribute table
1121
 */
1122
static void
1123
xmlCleanSpecialAttrCallback(void *payload, void *data,
1124
                            const xmlChar *fullname, const xmlChar *fullattr,
1125
3.25k
                            const xmlChar *unused ATTRIBUTE_UNUSED) {
1126
3.25k
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1127
1128
3.25k
    if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1129
438
        xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1130
438
    }
1131
3.25k
}
1132
1133
/**
1134
 * xmlCleanSpecialAttr:
1135
 * @ctxt:  an XML parser context
1136
 *
1137
 * Trim the list of attributes defined to remove all those of type
1138
 * CDATA as they are not special. This call should be done when finishing
1139
 * to parse the DTD and before starting to parse the document root.
1140
 */
1141
static void
1142
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1143
3.47k
{
1144
3.47k
    if (ctxt->attsSpecial == NULL)
1145
2.81k
        return;
1146
1147
664
    xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1148
1149
664
    if (xmlHashSize(ctxt->attsSpecial) == 0) {
1150
13
        xmlHashFree(ctxt->attsSpecial, NULL);
1151
13
        ctxt->attsSpecial = NULL;
1152
13
    }
1153
664
    return;
1154
3.47k
}
1155
1156
/**
1157
 * xmlCheckLanguageID:
1158
 * @lang:  pointer to the string value
1159
 *
1160
 * DEPRECATED: Internal function, do not use.
1161
 *
1162
 * Checks that the value conforms to the LanguageID production:
1163
 *
1164
 * NOTE: this is somewhat deprecated, those productions were removed from
1165
 *       the XML Second edition.
1166
 *
1167
 * [33] LanguageID ::= Langcode ('-' Subcode)*
1168
 * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1169
 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1170
 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1171
 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1172
 * [38] Subcode ::= ([a-z] | [A-Z])+
1173
 *
1174
 * The current REC reference the successors of RFC 1766, currently 5646
1175
 *
1176
 * http://www.rfc-editor.org/rfc/rfc5646.txt
1177
 * langtag       = language
1178
 *                 ["-" script]
1179
 *                 ["-" region]
1180
 *                 *("-" variant)
1181
 *                 *("-" extension)
1182
 *                 ["-" privateuse]
1183
 * language      = 2*3ALPHA            ; shortest ISO 639 code
1184
 *                 ["-" extlang]       ; sometimes followed by
1185
 *                                     ; extended language subtags
1186
 *               / 4ALPHA              ; or reserved for future use
1187
 *               / 5*8ALPHA            ; or registered language subtag
1188
 *
1189
 * extlang       = 3ALPHA              ; selected ISO 639 codes
1190
 *                 *2("-" 3ALPHA)      ; permanently reserved
1191
 *
1192
 * script        = 4ALPHA              ; ISO 15924 code
1193
 *
1194
 * region        = 2ALPHA              ; ISO 3166-1 code
1195
 *               / 3DIGIT              ; UN M.49 code
1196
 *
1197
 * variant       = 5*8alphanum         ; registered variants
1198
 *               / (DIGIT 3alphanum)
1199
 *
1200
 * extension     = singleton 1*("-" (2*8alphanum))
1201
 *
1202
 *                                     ; Single alphanumerics
1203
 *                                     ; "x" reserved for private use
1204
 * singleton     = DIGIT               ; 0 - 9
1205
 *               / %x41-57             ; A - W
1206
 *               / %x59-5A             ; Y - Z
1207
 *               / %x61-77             ; a - w
1208
 *               / %x79-7A             ; y - z
1209
 *
1210
 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1211
 * The parser below doesn't try to cope with extension or privateuse
1212
 * that could be added but that's not interoperable anyway
1213
 *
1214
 * Returns 1 if correct 0 otherwise
1215
 **/
1216
int
1217
xmlCheckLanguageID(const xmlChar * lang)
1218
0
{
1219
0
    const xmlChar *cur = lang, *nxt;
1220
1221
0
    if (cur == NULL)
1222
0
        return (0);
1223
0
    if (((cur[0] == 'i') && (cur[1] == '-')) ||
1224
0
        ((cur[0] == 'I') && (cur[1] == '-')) ||
1225
0
        ((cur[0] == 'x') && (cur[1] == '-')) ||
1226
0
        ((cur[0] == 'X') && (cur[1] == '-'))) {
1227
        /*
1228
         * Still allow IANA code and user code which were coming
1229
         * from the previous version of the XML-1.0 specification
1230
         * it's deprecated but we should not fail
1231
         */
1232
0
        cur += 2;
1233
0
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1234
0
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
1235
0
            cur++;
1236
0
        return(cur[0] == 0);
1237
0
    }
1238
0
    nxt = cur;
1239
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1240
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1241
0
           nxt++;
1242
0
    if (nxt - cur >= 4) {
1243
        /*
1244
         * Reserved
1245
         */
1246
0
        if ((nxt - cur > 8) || (nxt[0] != 0))
1247
0
            return(0);
1248
0
        return(1);
1249
0
    }
1250
0
    if (nxt - cur < 2)
1251
0
        return(0);
1252
    /* we got an ISO 639 code */
1253
0
    if (nxt[0] == 0)
1254
0
        return(1);
1255
0
    if (nxt[0] != '-')
1256
0
        return(0);
1257
1258
0
    nxt++;
1259
0
    cur = nxt;
1260
    /* now we can have extlang or script or region or variant */
1261
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1262
0
        goto region_m49;
1263
1264
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1265
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1266
0
           nxt++;
1267
0
    if (nxt - cur == 4)
1268
0
        goto script;
1269
0
    if (nxt - cur == 2)
1270
0
        goto region;
1271
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1272
0
        goto variant;
1273
0
    if (nxt - cur != 3)
1274
0
        return(0);
1275
    /* we parsed an extlang */
1276
0
    if (nxt[0] == 0)
1277
0
        return(1);
1278
0
    if (nxt[0] != '-')
1279
0
        return(0);
1280
1281
0
    nxt++;
1282
0
    cur = nxt;
1283
    /* now we can have script or region or variant */
1284
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1285
0
        goto region_m49;
1286
1287
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1288
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1289
0
           nxt++;
1290
0
    if (nxt - cur == 2)
1291
0
        goto region;
1292
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1293
0
        goto variant;
1294
0
    if (nxt - cur != 4)
1295
0
        return(0);
1296
    /* we parsed a script */
1297
0
script:
1298
0
    if (nxt[0] == 0)
1299
0
        return(1);
1300
0
    if (nxt[0] != '-')
1301
0
        return(0);
1302
1303
0
    nxt++;
1304
0
    cur = nxt;
1305
    /* now we can have region or variant */
1306
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1307
0
        goto region_m49;
1308
1309
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1310
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1311
0
           nxt++;
1312
1313
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1314
0
        goto variant;
1315
0
    if (nxt - cur != 2)
1316
0
        return(0);
1317
    /* we parsed a region */
1318
0
region:
1319
0
    if (nxt[0] == 0)
1320
0
        return(1);
1321
0
    if (nxt[0] != '-')
1322
0
        return(0);
1323
1324
0
    nxt++;
1325
0
    cur = nxt;
1326
    /* now we can just have a variant */
1327
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1328
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1329
0
           nxt++;
1330
1331
0
    if ((nxt - cur < 5) || (nxt - cur > 8))
1332
0
        return(0);
1333
1334
    /* we parsed a variant */
1335
0
variant:
1336
0
    if (nxt[0] == 0)
1337
0
        return(1);
1338
0
    if (nxt[0] != '-')
1339
0
        return(0);
1340
    /* extensions and private use subtags not checked */
1341
0
    return (1);
1342
1343
0
region_m49:
1344
0
    if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1345
0
        ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1346
0
        nxt += 3;
1347
0
        goto region;
1348
0
    }
1349
0
    return(0);
1350
0
}
1351
1352
/************************************************************************
1353
 *                  *
1354
 *    Parser stacks related functions and macros    *
1355
 *                  *
1356
 ************************************************************************/
1357
1358
static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1359
                                            const xmlChar ** str);
1360
1361
#ifdef SAX2
1362
/**
1363
 * nsPush:
1364
 * @ctxt:  an XML parser context
1365
 * @prefix:  the namespace prefix or NULL
1366
 * @URL:  the namespace name
1367
 *
1368
 * Pushes a new parser namespace on top of the ns stack
1369
 *
1370
 * Returns -1 in case of error, -2 if the namespace should be discarded
1371
 *     and the index in the stack otherwise.
1372
 */
1373
static int
1374
nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1375
46.3k
{
1376
46.3k
    if (ctxt->options & XML_PARSE_NSCLEAN) {
1377
0
        int i;
1378
0
  for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
1379
0
      if (ctxt->nsTab[i] == prefix) {
1380
    /* in scope */
1381
0
          if (ctxt->nsTab[i + 1] == URL)
1382
0
        return(-2);
1383
    /* out of scope keep it */
1384
0
    break;
1385
0
      }
1386
0
  }
1387
0
    }
1388
46.3k
    if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1389
16.0k
  ctxt->nsMax = 10;
1390
16.0k
  ctxt->nsNr = 0;
1391
16.0k
  ctxt->nsTab = (const xmlChar **)
1392
16.0k
                xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1393
16.0k
  if (ctxt->nsTab == NULL) {
1394
0
      xmlErrMemory(ctxt, NULL);
1395
0
      ctxt->nsMax = 0;
1396
0
            return (-1);
1397
0
  }
1398
30.3k
    } else if (ctxt->nsNr >= ctxt->nsMax) {
1399
853
        const xmlChar ** tmp;
1400
853
        ctxt->nsMax *= 2;
1401
853
        tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1402
853
            ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1403
853
        if (tmp == NULL) {
1404
0
            xmlErrMemory(ctxt, NULL);
1405
0
      ctxt->nsMax /= 2;
1406
0
            return (-1);
1407
0
        }
1408
853
  ctxt->nsTab = tmp;
1409
853
    }
1410
46.3k
    ctxt->nsTab[ctxt->nsNr++] = prefix;
1411
46.3k
    ctxt->nsTab[ctxt->nsNr++] = URL;
1412
46.3k
    return (ctxt->nsNr);
1413
46.3k
}
1414
/**
1415
 * nsPop:
1416
 * @ctxt: an XML parser context
1417
 * @nr:  the number to pop
1418
 *
1419
 * Pops the top @nr parser prefix/namespace from the ns stack
1420
 *
1421
 * Returns the number of namespaces removed
1422
 */
1423
static int
1424
nsPop(xmlParserCtxtPtr ctxt, int nr)
1425
29.5k
{
1426
29.5k
    int i;
1427
1428
29.5k
    if (ctxt->nsTab == NULL) return(0);
1429
29.5k
    if (ctxt->nsNr < nr) {
1430
0
        xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1431
0
        nr = ctxt->nsNr;
1432
0
    }
1433
29.5k
    if (ctxt->nsNr <= 0)
1434
0
        return (0);
1435
1436
97.9k
    for (i = 0;i < nr;i++) {
1437
68.4k
         ctxt->nsNr--;
1438
68.4k
   ctxt->nsTab[ctxt->nsNr] = NULL;
1439
68.4k
    }
1440
29.5k
    return(nr);
1441
29.5k
}
1442
#endif
1443
1444
static int
1445
17.1k
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1446
17.1k
    const xmlChar **atts;
1447
17.1k
    int *attallocs;
1448
17.1k
    int maxatts;
1449
1450
17.1k
    if (nr + 5 > ctxt->maxatts) {
1451
17.1k
  maxatts = ctxt->maxatts == 0 ? 55 : (nr + 5) * 2;
1452
17.1k
  atts = (const xmlChar **) xmlMalloc(
1453
17.1k
             maxatts * sizeof(const xmlChar *));
1454
17.1k
  if (atts == NULL) goto mem_error;
1455
17.1k
  attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1456
17.1k
                               (maxatts / 5) * sizeof(int));
1457
17.1k
  if (attallocs == NULL) {
1458
0
            xmlFree(atts);
1459
0
            goto mem_error;
1460
0
        }
1461
17.1k
        if (ctxt->maxatts > 0)
1462
94
            memcpy(atts, ctxt->atts, ctxt->maxatts * sizeof(const xmlChar *));
1463
17.1k
        xmlFree(ctxt->atts);
1464
17.1k
  ctxt->atts = atts;
1465
17.1k
  ctxt->attallocs = attallocs;
1466
17.1k
  ctxt->maxatts = maxatts;
1467
17.1k
    }
1468
17.1k
    return(ctxt->maxatts);
1469
0
mem_error:
1470
0
    xmlErrMemory(ctxt, NULL);
1471
0
    return(-1);
1472
17.1k
}
1473
1474
/**
1475
 * inputPush:
1476
 * @ctxt:  an XML parser context
1477
 * @value:  the parser input
1478
 *
1479
 * Pushes a new parser input on top of the input stack
1480
 *
1481
 * Returns -1 in case of error, the index in the stack otherwise
1482
 */
1483
int
1484
inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1485
84.5k
{
1486
84.5k
    if ((ctxt == NULL) || (value == NULL))
1487
0
        return(-1);
1488
84.5k
    if (ctxt->inputNr >= ctxt->inputMax) {
1489
0
        size_t newSize = ctxt->inputMax * 2;
1490
0
        xmlParserInputPtr *tmp;
1491
1492
0
        tmp = (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1493
0
                                               newSize * sizeof(*tmp));
1494
0
        if (tmp == NULL) {
1495
0
            xmlErrMemory(ctxt, NULL);
1496
0
            return (-1);
1497
0
        }
1498
0
        ctxt->inputTab = tmp;
1499
0
        ctxt->inputMax = newSize;
1500
0
    }
1501
84.5k
    ctxt->inputTab[ctxt->inputNr] = value;
1502
84.5k
    ctxt->input = value;
1503
84.5k
    return (ctxt->inputNr++);
1504
84.5k
}
1505
/**
1506
 * inputPop:
1507
 * @ctxt: an XML parser context
1508
 *
1509
 * Pops the top parser input from the input stack
1510
 *
1511
 * Returns the input just removed
1512
 */
1513
xmlParserInputPtr
1514
inputPop(xmlParserCtxtPtr ctxt)
1515
224k
{
1516
224k
    xmlParserInputPtr ret;
1517
1518
224k
    if (ctxt == NULL)
1519
0
        return(NULL);
1520
224k
    if (ctxt->inputNr <= 0)
1521
139k
        return (NULL);
1522
84.5k
    ctxt->inputNr--;
1523
84.5k
    if (ctxt->inputNr > 0)
1524
42.3k
        ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1525
42.1k
    else
1526
42.1k
        ctxt->input = NULL;
1527
84.5k
    ret = ctxt->inputTab[ctxt->inputNr];
1528
84.5k
    ctxt->inputTab[ctxt->inputNr] = NULL;
1529
84.5k
    return (ret);
1530
224k
}
1531
/**
1532
 * nodePush:
1533
 * @ctxt:  an XML parser context
1534
 * @value:  the element node
1535
 *
1536
 * DEPRECATED: Internal function, do not use.
1537
 *
1538
 * Pushes a new element node on top of the node stack
1539
 *
1540
 * Returns -1 in case of error, the index in the stack otherwise
1541
 */
1542
int
1543
nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1544
411k
{
1545
411k
    if (ctxt == NULL) return(0);
1546
411k
    if (ctxt->nodeNr >= ctxt->nodeMax) {
1547
958
        xmlNodePtr *tmp;
1548
1549
958
  tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1550
958
                                      ctxt->nodeMax * 2 *
1551
958
                                      sizeof(ctxt->nodeTab[0]));
1552
958
        if (tmp == NULL) {
1553
0
            xmlErrMemory(ctxt, NULL);
1554
0
            return (-1);
1555
0
        }
1556
958
        ctxt->nodeTab = tmp;
1557
958
  ctxt->nodeMax *= 2;
1558
958
    }
1559
411k
    if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1560
411k
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1561
5
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1562
5
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1563
5
        xmlParserMaxDepth);
1564
5
  xmlHaltParser(ctxt);
1565
5
  return(-1);
1566
5
    }
1567
411k
    ctxt->nodeTab[ctxt->nodeNr] = value;
1568
411k
    ctxt->node = value;
1569
411k
    return (ctxt->nodeNr++);
1570
411k
}
1571
1572
/**
1573
 * nodePop:
1574
 * @ctxt: an XML parser context
1575
 *
1576
 * DEPRECATED: Internal function, do not use.
1577
 *
1578
 * Pops the top element node from the node stack
1579
 *
1580
 * Returns the node just removed
1581
 */
1582
xmlNodePtr
1583
nodePop(xmlParserCtxtPtr ctxt)
1584
427k
{
1585
427k
    xmlNodePtr ret;
1586
1587
427k
    if (ctxt == NULL) return(NULL);
1588
427k
    if (ctxt->nodeNr <= 0)
1589
53.3k
        return (NULL);
1590
373k
    ctxt->nodeNr--;
1591
373k
    if (ctxt->nodeNr > 0)
1592
357k
        ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1593
16.3k
    else
1594
16.3k
        ctxt->node = NULL;
1595
373k
    ret = ctxt->nodeTab[ctxt->nodeNr];
1596
373k
    ctxt->nodeTab[ctxt->nodeNr] = NULL;
1597
373k
    return (ret);
1598
427k
}
1599
1600
/**
1601
 * nameNsPush:
1602
 * @ctxt:  an XML parser context
1603
 * @value:  the element name
1604
 * @prefix:  the element prefix
1605
 * @URI:  the element namespace name
1606
 * @line:  the current line number for error messages
1607
 * @nsNr:  the number of namespaces pushed on the namespace table
1608
 *
1609
 * Pushes a new element name/prefix/URL on top of the name stack
1610
 *
1611
 * Returns -1 in case of error, the index in the stack otherwise
1612
 */
1613
static int
1614
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1615
           const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
1616
497k
{
1617
497k
    xmlStartTag *tag;
1618
1619
497k
    if (ctxt->nameNr >= ctxt->nameMax) {
1620
1.97k
        const xmlChar * *tmp;
1621
1.97k
        xmlStartTag *tmp2;
1622
1.97k
        ctxt->nameMax *= 2;
1623
1.97k
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1624
1.97k
                                    ctxt->nameMax *
1625
1.97k
                                    sizeof(ctxt->nameTab[0]));
1626
1.97k
        if (tmp == NULL) {
1627
0
      ctxt->nameMax /= 2;
1628
0
      goto mem_error;
1629
0
        }
1630
1.97k
  ctxt->nameTab = tmp;
1631
1.97k
        tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
1632
1.97k
                                    ctxt->nameMax *
1633
1.97k
                                    sizeof(ctxt->pushTab[0]));
1634
1.97k
        if (tmp2 == NULL) {
1635
0
      ctxt->nameMax /= 2;
1636
0
      goto mem_error;
1637
0
        }
1638
1.97k
  ctxt->pushTab = tmp2;
1639
495k
    } else if (ctxt->pushTab == NULL) {
1640
22.5k
        ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
1641
22.5k
                                            sizeof(ctxt->pushTab[0]));
1642
22.5k
        if (ctxt->pushTab == NULL)
1643
0
            goto mem_error;
1644
22.5k
    }
1645
497k
    ctxt->nameTab[ctxt->nameNr] = value;
1646
497k
    ctxt->name = value;
1647
497k
    tag = &ctxt->pushTab[ctxt->nameNr];
1648
497k
    tag->prefix = prefix;
1649
497k
    tag->URI = URI;
1650
497k
    tag->line = line;
1651
497k
    tag->nsNr = nsNr;
1652
497k
    return (ctxt->nameNr++);
1653
0
mem_error:
1654
0
    xmlErrMemory(ctxt, NULL);
1655
0
    return (-1);
1656
497k
}
1657
#ifdef LIBXML_PUSH_ENABLED
1658
/**
1659
 * nameNsPop:
1660
 * @ctxt: an XML parser context
1661
 *
1662
 * Pops the top element/prefix/URI name from the name stack
1663
 *
1664
 * Returns the name just removed
1665
 */
1666
static const xmlChar *
1667
nameNsPop(xmlParserCtxtPtr ctxt)
1668
0
{
1669
0
    const xmlChar *ret;
1670
1671
0
    if (ctxt->nameNr <= 0)
1672
0
        return (NULL);
1673
0
    ctxt->nameNr--;
1674
0
    if (ctxt->nameNr > 0)
1675
0
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1676
0
    else
1677
0
        ctxt->name = NULL;
1678
0
    ret = ctxt->nameTab[ctxt->nameNr];
1679
0
    ctxt->nameTab[ctxt->nameNr] = NULL;
1680
0
    return (ret);
1681
0
}
1682
#endif /* LIBXML_PUSH_ENABLED */
1683
1684
/**
1685
 * namePush:
1686
 * @ctxt:  an XML parser context
1687
 * @value:  the element name
1688
 *
1689
 * DEPRECATED: Internal function, do not use.
1690
 *
1691
 * Pushes a new element name on top of the name stack
1692
 *
1693
 * Returns -1 in case of error, the index in the stack otherwise
1694
 */
1695
int
1696
namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
1697
0
{
1698
0
    if (ctxt == NULL) return (-1);
1699
1700
0
    if (ctxt->nameNr >= ctxt->nameMax) {
1701
0
        const xmlChar * *tmp;
1702
0
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1703
0
                                    ctxt->nameMax * 2 *
1704
0
                                    sizeof(ctxt->nameTab[0]));
1705
0
        if (tmp == NULL) {
1706
0
      goto mem_error;
1707
0
        }
1708
0
  ctxt->nameTab = tmp;
1709
0
        ctxt->nameMax *= 2;
1710
0
    }
1711
0
    ctxt->nameTab[ctxt->nameNr] = value;
1712
0
    ctxt->name = value;
1713
0
    return (ctxt->nameNr++);
1714
0
mem_error:
1715
0
    xmlErrMemory(ctxt, NULL);
1716
0
    return (-1);
1717
0
}
1718
1719
/**
1720
 * namePop:
1721
 * @ctxt: an XML parser context
1722
 *
1723
 * DEPRECATED: Internal function, do not use.
1724
 *
1725
 * Pops the top element name from the name stack
1726
 *
1727
 * Returns the name just removed
1728
 */
1729
const xmlChar *
1730
namePop(xmlParserCtxtPtr ctxt)
1731
444k
{
1732
444k
    const xmlChar *ret;
1733
1734
444k
    if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1735
0
        return (NULL);
1736
444k
    ctxt->nameNr--;
1737
444k
    if (ctxt->nameNr > 0)
1738
425k
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1739
18.3k
    else
1740
18.3k
        ctxt->name = NULL;
1741
444k
    ret = ctxt->nameTab[ctxt->nameNr];
1742
444k
    ctxt->nameTab[ctxt->nameNr] = NULL;
1743
444k
    return (ret);
1744
444k
}
1745
1746
518k
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
1747
518k
    if (ctxt->spaceNr >= ctxt->spaceMax) {
1748
1.99k
        int *tmp;
1749
1750
1.99k
  ctxt->spaceMax *= 2;
1751
1.99k
        tmp = (int *) xmlRealloc(ctxt->spaceTab,
1752
1.99k
                           ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1753
1.99k
        if (tmp == NULL) {
1754
0
      xmlErrMemory(ctxt, NULL);
1755
0
      ctxt->spaceMax /=2;
1756
0
      return(-1);
1757
0
  }
1758
1.99k
  ctxt->spaceTab = tmp;
1759
1.99k
    }
1760
518k
    ctxt->spaceTab[ctxt->spaceNr] = val;
1761
518k
    ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1762
518k
    return(ctxt->spaceNr++);
1763
518k
}
1764
1765
464k
static int spacePop(xmlParserCtxtPtr ctxt) {
1766
464k
    int ret;
1767
464k
    if (ctxt->spaceNr <= 0) return(0);
1768
464k
    ctxt->spaceNr--;
1769
464k
    if (ctxt->spaceNr > 0)
1770
448k
  ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1771
16.2k
    else
1772
16.2k
        ctxt->space = &ctxt->spaceTab[0];
1773
464k
    ret = ctxt->spaceTab[ctxt->spaceNr];
1774
464k
    ctxt->spaceTab[ctxt->spaceNr] = -1;
1775
464k
    return(ret);
1776
464k
}
1777
1778
/*
1779
 * Macros for accessing the content. Those should be used only by the parser,
1780
 * and not exported.
1781
 *
1782
 * Dirty macros, i.e. one often need to make assumption on the context to
1783
 * use them
1784
 *
1785
 *   CUR_PTR return the current pointer to the xmlChar to be parsed.
1786
 *           To be used with extreme caution since operations consuming
1787
 *           characters may move the input buffer to a different location !
1788
 *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
1789
 *           This should be used internally by the parser
1790
 *           only to compare to ASCII values otherwise it would break when
1791
 *           running with UTF-8 encoding.
1792
 *   RAW     same as CUR but in the input buffer, bypass any token
1793
 *           extraction that may have been done
1794
 *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
1795
 *           to compare on ASCII based substring.
1796
 *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
1797
 *           strings without newlines within the parser.
1798
 *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1799
 *           defined char within the parser.
1800
 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1801
 *
1802
 *   NEXT    Skip to the next character, this does the proper decoding
1803
 *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
1804
 *   NEXTL(l) Skip the current unicode character of l xmlChars long.
1805
 *   CUR_CHAR(l) returns the current unicode character (int), set l
1806
 *           to the number of xmlChars used for the encoding [0-5].
1807
 *   CUR_SCHAR  same but operate on a string instead of the context
1808
 *   COPY_BUF  copy the current unicode char to the target buffer, increment
1809
 *            the index
1810
 *   GROW, SHRINK  handling of input buffers
1811
 */
1812
1813
9.20M
#define RAW (*ctxt->input->cur)
1814
9.80M
#define CUR (*ctxt->input->cur)
1815
16.0M
#define NXT(val) ctxt->input->cur[(val)]
1816
1.10M
#define CUR_PTR ctxt->input->cur
1817
151k
#define BASE_PTR ctxt->input->base
1818
1819
#define CMP4( s, c1, c2, c3, c4 ) \
1820
4.12M
  ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1821
2.09M
    ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1822
#define CMP5( s, c1, c2, c3, c4, c5 ) \
1823
3.86M
  ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1824
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1825
3.45M
  ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1826
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1827
3.15M
  ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1828
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1829
2.88M
  ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1830
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1831
1.35M
  ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1832
1.35M
    ((unsigned char *) s)[ 8 ] == c9 )
1833
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1834
3.29k
  ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1835
3.29k
    ((unsigned char *) s)[ 9 ] == c10 )
1836
1837
796k
#define SKIP(val) do {             \
1838
796k
    ctxt->input->cur += (val),ctxt->input->col+=(val);      \
1839
796k
    if (*ctxt->input->cur == 0)           \
1840
796k
        xmlParserGrow(ctxt);           \
1841
796k
  } while (0)
1842
1843
0
#define SKIPL(val) do {             \
1844
0
    int skipl;                \
1845
0
    for(skipl=0; skipl<val; skipl++) {         \
1846
0
  if (*(ctxt->input->cur) == '\n') {       \
1847
0
  ctxt->input->line++; ctxt->input->col = 1;      \
1848
0
  } else ctxt->input->col++;         \
1849
0
  ctxt->input->cur++;           \
1850
0
    }                 \
1851
0
    if (*ctxt->input->cur == 0)           \
1852
0
        xmlParserGrow(ctxt);           \
1853
0
  } while (0)
1854
1855
1.68M
#define SHRINK if ((ctxt->progressive == 0) &&       \
1856
1.68M
       (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1857
1.68M
       (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
1858
1.68M
  xmlParserShrink(ctxt);
1859
1860
22.4M
#define GROW if ((ctxt->progressive == 0) &&       \
1861
22.4M
     (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK))  \
1862
22.4M
  xmlParserGrow(ctxt);
1863
1864
2.90M
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1865
1866
9.07M
#define NEXT xmlNextChar(ctxt)
1867
1868
766k
#define NEXT1 {               \
1869
766k
  ctxt->input->col++;           \
1870
766k
  ctxt->input->cur++;           \
1871
766k
  if (*ctxt->input->cur == 0)         \
1872
766k
      xmlParserGrow(ctxt);           \
1873
766k
    }
1874
1875
42.4M
#define NEXTL(l) do {             \
1876
42.4M
    if (*(ctxt->input->cur) == '\n') {         \
1877
385k
  ctxt->input->line++; ctxt->input->col = 1;      \
1878
42.0M
    } else ctxt->input->col++;           \
1879
42.4M
    ctxt->input->cur += l;        \
1880
42.4M
  } while (0)
1881
1882
43.0M
#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1883
42.7M
#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1884
1885
#define COPY_BUF(l,b,i,v)           \
1886
74.8M
    if (l == 1) b[i++] = v;           \
1887
74.8M
    else i += xmlCopyCharMultiByte(&b[i],v)
1888
1889
/**
1890
 * xmlSkipBlankChars:
1891
 * @ctxt:  the XML parser context
1892
 *
1893
 * DEPRECATED: Internal function, do not use.
1894
 *
1895
 * skip all blanks character found at that point in the input streams.
1896
 * It pops up finished entities in the process if allowable at that point.
1897
 *
1898
 * Returns the number of space chars skipped
1899
 */
1900
1901
int
1902
2.90M
xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
1903
2.90M
    int res = 0;
1904
1905
    /*
1906
     * It's Okay to use CUR/NEXT here since all the blanks are on
1907
     * the ASCII range.
1908
     */
1909
2.90M
    if (((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) ||
1910
2.90M
        (ctxt->instate == XML_PARSER_START)) {
1911
2.19M
  const xmlChar *cur;
1912
  /*
1913
   * if we are in the document content, go really fast
1914
   */
1915
2.19M
  cur = ctxt->input->cur;
1916
2.19M
  while (IS_BLANK_CH(*cur)) {
1917
907k
      if (*cur == '\n') {
1918
139k
    ctxt->input->line++; ctxt->input->col = 1;
1919
767k
      } else {
1920
767k
    ctxt->input->col++;
1921
767k
      }
1922
907k
      cur++;
1923
907k
      if (res < INT_MAX)
1924
907k
    res++;
1925
907k
      if (*cur == 0) {
1926
9.95k
    ctxt->input->cur = cur;
1927
9.95k
    xmlParserGrow(ctxt);
1928
9.95k
    cur = ctxt->input->cur;
1929
9.95k
      }
1930
907k
  }
1931
2.19M
  ctxt->input->cur = cur;
1932
2.19M
    } else {
1933
711k
        int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
1934
1935
1.30M
  while (ctxt->instate != XML_PARSER_EOF) {
1936
1.30M
            if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
1937
506k
    NEXT;
1938
793k
      } else if (CUR == '%') {
1939
                /*
1940
                 * Need to handle support of entities branching here
1941
                 */
1942
110k
          if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
1943
70.0k
                    break;
1944
40.5k
          xmlParsePEReference(ctxt);
1945
682k
            } else if (CUR == 0) {
1946
44.3k
                unsigned long consumed;
1947
44.3k
                xmlEntityPtr ent;
1948
1949
44.3k
                if (ctxt->inputNr <= 1)
1950
2.36k
                    break;
1951
1952
41.9k
                consumed = ctxt->input->consumed;
1953
41.9k
                xmlSaturatedAddSizeT(&consumed,
1954
41.9k
                                     ctxt->input->cur - ctxt->input->base);
1955
1956
                /*
1957
                 * Add to sizeentities when parsing an external entity
1958
                 * for the first time.
1959
                 */
1960
41.9k
                ent = ctxt->input->entity;
1961
41.9k
                if ((ent->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
1962
41.9k
                    ((ent->flags & XML_ENT_PARSED) == 0)) {
1963
1.53k
                    ent->flags |= XML_ENT_PARSED;
1964
1965
1.53k
                    xmlSaturatedAdd(&ctxt->sizeentities, consumed);
1966
1.53k
                }
1967
1968
41.9k
                xmlParserEntityCheck(ctxt, consumed);
1969
1970
41.9k
                xmlPopInput(ctxt);
1971
638k
            } else {
1972
638k
                break;
1973
638k
            }
1974
1975
            /*
1976
             * Also increase the counter when entering or exiting a PERef.
1977
             * The spec says: "When a parameter-entity reference is recognized
1978
             * in the DTD and included, its replacement text MUST be enlarged
1979
             * by the attachment of one leading and one following space (#x20)
1980
             * character."
1981
             */
1982
589k
      if (res < INT_MAX)
1983
589k
    res++;
1984
589k
        }
1985
711k
    }
1986
2.90M
    return(res);
1987
2.90M
}
1988
1989
/************************************************************************
1990
 *                  *
1991
 *    Commodity functions to handle entities      *
1992
 *                  *
1993
 ************************************************************************/
1994
1995
/**
1996
 * xmlPopInput:
1997
 * @ctxt:  an XML parser context
1998
 *
1999
 * xmlPopInput: the current input pointed by ctxt->input came to an end
2000
 *          pop it and return the next char.
2001
 *
2002
 * Returns the current xmlChar in the parser context
2003
 */
2004
xmlChar
2005
42.0k
xmlPopInput(xmlParserCtxtPtr ctxt) {
2006
42.0k
    xmlParserInputPtr input;
2007
2008
42.0k
    if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2009
42.0k
    if (xmlParserDebugEntities)
2010
0
  xmlGenericError(xmlGenericErrorContext,
2011
0
    "Popping input %d\n", ctxt->inputNr);
2012
42.0k
    if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2013
42.0k
        (ctxt->instate != XML_PARSER_EOF))
2014
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2015
0
                    "Unfinished entity outside the DTD");
2016
42.0k
    input = inputPop(ctxt);
2017
42.0k
    if (input->entity != NULL)
2018
42.0k
        input->entity->flags &= ~XML_ENT_EXPANDING;
2019
42.0k
    xmlFreeInputStream(input);
2020
42.0k
    if (*ctxt->input->cur == 0)
2021
227
        xmlParserGrow(ctxt);
2022
42.0k
    return(CUR);
2023
42.0k
}
2024
2025
/**
2026
 * xmlPushInput:
2027
 * @ctxt:  an XML parser context
2028
 * @input:  an XML parser input fragment (entity, XML fragment ...).
2029
 *
2030
 * xmlPushInput: switch to a new input stream which is stacked on top
2031
 *               of the previous one(s).
2032
 * Returns -1 in case of error or the index in the input stack
2033
 */
2034
int
2035
43.3k
xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2036
43.3k
    int ret;
2037
43.3k
    if (input == NULL) return(-1);
2038
2039
42.3k
    if (xmlParserDebugEntities) {
2040
0
  if ((ctxt->input != NULL) && (ctxt->input->filename))
2041
0
      xmlGenericError(xmlGenericErrorContext,
2042
0
        "%s(%d): ", ctxt->input->filename,
2043
0
        ctxt->input->line);
2044
0
  xmlGenericError(xmlGenericErrorContext,
2045
0
    "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2046
0
    }
2047
42.3k
    if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2048
42.3k
        (ctxt->inputNr > 100)) {
2049
0
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2050
0
        while (ctxt->inputNr > 1)
2051
0
            xmlFreeInputStream(inputPop(ctxt));
2052
0
  return(-1);
2053
0
    }
2054
42.3k
    ret = inputPush(ctxt, input);
2055
42.3k
    if (ctxt->instate == XML_PARSER_EOF)
2056
0
        return(-1);
2057
42.3k
    GROW;
2058
42.3k
    return(ret);
2059
42.3k
}
2060
2061
/**
2062
 * xmlParseCharRef:
2063
 * @ctxt:  an XML parser context
2064
 *
2065
 * DEPRECATED: Internal function, don't use.
2066
 *
2067
 * Parse a numeric character reference. Always consumes '&'.
2068
 *
2069
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2070
 *                  '&#x' [0-9a-fA-F]+ ';'
2071
 *
2072
 * [ WFC: Legal Character ]
2073
 * Characters referred to using character references must match the
2074
 * production for Char.
2075
 *
2076
 * Returns the value parsed (as an int), 0 in case of error
2077
 */
2078
int
2079
67.9k
xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2080
67.9k
    int val = 0;
2081
67.9k
    int count = 0;
2082
2083
    /*
2084
     * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2085
     */
2086
67.9k
    if ((RAW == '&') && (NXT(1) == '#') &&
2087
67.9k
        (NXT(2) == 'x')) {
2088
47.0k
  SKIP(3);
2089
47.0k
  GROW;
2090
211k
  while (RAW != ';') { /* loop blocked by count */
2091
166k
      if (count++ > 20) {
2092
2.78k
    count = 0;
2093
2.78k
    GROW;
2094
2.78k
                if (ctxt->instate == XML_PARSER_EOF)
2095
3
                    return(0);
2096
2.78k
      }
2097
166k
      if ((RAW >= '0') && (RAW <= '9'))
2098
142k
          val = val * 16 + (CUR - '0');
2099
24.8k
      else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2100
10.8k
          val = val * 16 + (CUR - 'a') + 10;
2101
14.0k
      else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2102
11.6k
          val = val * 16 + (CUR - 'A') + 10;
2103
2.32k
      else {
2104
2.32k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2105
2.32k
    val = 0;
2106
2.32k
    break;
2107
2.32k
      }
2108
164k
      if (val > 0x110000)
2109
32.2k
          val = 0x110000;
2110
2111
164k
      NEXT;
2112
164k
      count++;
2113
164k
  }
2114
47.0k
  if (RAW == ';') {
2115
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2116
44.7k
      ctxt->input->col++;
2117
44.7k
      ctxt->input->cur++;
2118
44.7k
  }
2119
47.0k
    } else if  ((RAW == '&') && (NXT(1) == '#')) {
2120
20.9k
  SKIP(2);
2121
20.9k
  GROW;
2122
72.9k
  while (RAW != ';') { /* loop blocked by count */
2123
56.4k
      if (count++ > 20) {
2124
666
    count = 0;
2125
666
    GROW;
2126
666
                if (ctxt->instate == XML_PARSER_EOF)
2127
3
                    return(0);
2128
666
      }
2129
56.4k
      if ((RAW >= '0') && (RAW <= '9'))
2130
51.9k
          val = val * 10 + (CUR - '0');
2131
4.47k
      else {
2132
4.47k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2133
4.47k
    val = 0;
2134
4.47k
    break;
2135
4.47k
      }
2136
51.9k
      if (val > 0x110000)
2137
4.82k
          val = 0x110000;
2138
2139
51.9k
      NEXT;
2140
51.9k
      count++;
2141
51.9k
  }
2142
20.9k
  if (RAW == ';') {
2143
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2144
16.4k
      ctxt->input->col++;
2145
16.4k
      ctxt->input->cur++;
2146
16.4k
  }
2147
20.9k
    } else {
2148
0
        if (RAW == '&')
2149
0
            SKIP(1);
2150
0
        xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2151
0
    }
2152
2153
    /*
2154
     * [ WFC: Legal Character ]
2155
     * Characters referred to using character references must match the
2156
     * production for Char.
2157
     */
2158
67.9k
    if (val >= 0x110000) {
2159
785
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2160
785
                "xmlParseCharRef: character reference out of bounds\n",
2161
785
          val);
2162
67.2k
    } else if (IS_CHAR(val)) {
2163
59.2k
        return(val);
2164
59.2k
    } else {
2165
7.95k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2166
7.95k
                          "xmlParseCharRef: invalid xmlChar value %d\n",
2167
7.95k
                    val);
2168
7.95k
    }
2169
8.73k
    return(0);
2170
67.9k
}
2171
2172
/**
2173
 * xmlParseStringCharRef:
2174
 * @ctxt:  an XML parser context
2175
 * @str:  a pointer to an index in the string
2176
 *
2177
 * parse Reference declarations, variant parsing from a string rather
2178
 * than an an input flow.
2179
 *
2180
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2181
 *                  '&#x' [0-9a-fA-F]+ ';'
2182
 *
2183
 * [ WFC: Legal Character ]
2184
 * Characters referred to using character references must match the
2185
 * production for Char.
2186
 *
2187
 * Returns the value parsed (as an int), 0 in case of error, str will be
2188
 *         updated to the current value of the index
2189
 */
2190
static int
2191
7.72k
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2192
7.72k
    const xmlChar *ptr;
2193
7.72k
    xmlChar cur;
2194
7.72k
    int val = 0;
2195
2196
7.72k
    if ((str == NULL) || (*str == NULL)) return(0);
2197
7.72k
    ptr = *str;
2198
7.72k
    cur = *ptr;
2199
7.72k
    if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2200
5.50k
  ptr += 3;
2201
5.50k
  cur = *ptr;
2202
17.8k
  while (cur != ';') { /* Non input consuming loop */
2203
12.9k
      if ((cur >= '0') && (cur <= '9'))
2204
10.0k
          val = val * 16 + (cur - '0');
2205
2.91k
      else if ((cur >= 'a') && (cur <= 'f'))
2206
957
          val = val * 16 + (cur - 'a') + 10;
2207
1.95k
      else if ((cur >= 'A') && (cur <= 'F'))
2208
1.40k
          val = val * 16 + (cur - 'A') + 10;
2209
555
      else {
2210
555
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2211
555
    val = 0;
2212
555
    break;
2213
555
      }
2214
12.3k
      if (val > 0x110000)
2215
1.06k
          val = 0x110000;
2216
2217
12.3k
      ptr++;
2218
12.3k
      cur = *ptr;
2219
12.3k
  }
2220
5.50k
  if (cur == ';')
2221
4.95k
      ptr++;
2222
5.50k
    } else if  ((cur == '&') && (ptr[1] == '#')){
2223
2.22k
  ptr += 2;
2224
2.22k
  cur = *ptr;
2225
13.9k
  while (cur != ';') { /* Non input consuming loops */
2226
12.6k
      if ((cur >= '0') && (cur <= '9'))
2227
11.7k
          val = val * 10 + (cur - '0');
2228
929
      else {
2229
929
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2230
929
    val = 0;
2231
929
    break;
2232
929
      }
2233
11.7k
      if (val > 0x110000)
2234
711
          val = 0x110000;
2235
2236
11.7k
      ptr++;
2237
11.7k
      cur = *ptr;
2238
11.7k
  }
2239
2.22k
  if (cur == ';')
2240
1.29k
      ptr++;
2241
2.22k
    } else {
2242
0
  xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2243
0
  return(0);
2244
0
    }
2245
7.72k
    *str = ptr;
2246
2247
    /*
2248
     * [ WFC: Legal Character ]
2249
     * Characters referred to using character references must match the
2250
     * production for Char.
2251
     */
2252
7.72k
    if (val >= 0x110000) {
2253
223
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2254
223
                "xmlParseStringCharRef: character reference out of bounds\n",
2255
223
                val);
2256
7.50k
    } else if (IS_CHAR(val)) {
2257
5.42k
        return(val);
2258
5.42k
    } else {
2259
2.08k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2260
2.08k
        "xmlParseStringCharRef: invalid xmlChar value %d\n",
2261
2.08k
        val);
2262
2.08k
    }
2263
2.30k
    return(0);
2264
7.72k
}
2265
2266
/**
2267
 * xmlParserHandlePEReference:
2268
 * @ctxt:  the parser context
2269
 *
2270
 * DEPRECATED: Internal function, do not use.
2271
 *
2272
 * [69] PEReference ::= '%' Name ';'
2273
 *
2274
 * [ WFC: No Recursion ]
2275
 * A parsed entity must not contain a recursive
2276
 * reference to itself, either directly or indirectly.
2277
 *
2278
 * [ WFC: Entity Declared ]
2279
 * In a document without any DTD, a document with only an internal DTD
2280
 * subset which contains no parameter entity references, or a document
2281
 * with "standalone='yes'", ...  ... The declaration of a parameter
2282
 * entity must precede any reference to it...
2283
 *
2284
 * [ VC: Entity Declared ]
2285
 * In a document with an external subset or external parameter entities
2286
 * with "standalone='no'", ...  ... The declaration of a parameter entity
2287
 * must precede any reference to it...
2288
 *
2289
 * [ WFC: In DTD ]
2290
 * Parameter-entity references may only appear in the DTD.
2291
 * NOTE: misleading but this is handled.
2292
 *
2293
 * A PEReference may have been detected in the current input stream
2294
 * the handling is done accordingly to
2295
 *      http://www.w3.org/TR/REC-xml#entproc
2296
 * i.e.
2297
 *   - Included in literal in entity values
2298
 *   - Included as Parameter Entity reference within DTDs
2299
 */
2300
void
2301
0
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2302
0
    switch(ctxt->instate) {
2303
0
  case XML_PARSER_CDATA_SECTION:
2304
0
      return;
2305
0
        case XML_PARSER_COMMENT:
2306
0
      return;
2307
0
  case XML_PARSER_START_TAG:
2308
0
      return;
2309
0
  case XML_PARSER_END_TAG:
2310
0
      return;
2311
0
        case XML_PARSER_EOF:
2312
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2313
0
      return;
2314
0
        case XML_PARSER_PROLOG:
2315
0
  case XML_PARSER_START:
2316
0
  case XML_PARSER_MISC:
2317
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2318
0
      return;
2319
0
  case XML_PARSER_ENTITY_DECL:
2320
0
        case XML_PARSER_CONTENT:
2321
0
        case XML_PARSER_ATTRIBUTE_VALUE:
2322
0
        case XML_PARSER_PI:
2323
0
  case XML_PARSER_SYSTEM_LITERAL:
2324
0
  case XML_PARSER_PUBLIC_LITERAL:
2325
      /* we just ignore it there */
2326
0
      return;
2327
0
        case XML_PARSER_EPILOG:
2328
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2329
0
      return;
2330
0
  case XML_PARSER_ENTITY_VALUE:
2331
      /*
2332
       * NOTE: in the case of entity values, we don't do the
2333
       *       substitution here since we need the literal
2334
       *       entity value to be able to save the internal
2335
       *       subset of the document.
2336
       *       This will be handled by xmlStringDecodeEntities
2337
       */
2338
0
      return;
2339
0
        case XML_PARSER_DTD:
2340
      /*
2341
       * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2342
       * In the internal DTD subset, parameter-entity references
2343
       * can occur only where markup declarations can occur, not
2344
       * within markup declarations.
2345
       * In that case this is handled in xmlParseMarkupDecl
2346
       */
2347
0
      if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2348
0
    return;
2349
0
      if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2350
0
    return;
2351
0
            break;
2352
0
        case XML_PARSER_IGNORE:
2353
0
            return;
2354
0
    }
2355
2356
0
    xmlParsePEReference(ctxt);
2357
0
}
2358
2359
/*
2360
 * Macro used to grow the current buffer.
2361
 * buffer##_size is expected to be a size_t
2362
 * mem_error: is expected to handle memory allocation failures
2363
 */
2364
190k
#define growBuffer(buffer, n) {           \
2365
190k
    xmlChar *tmp;             \
2366
190k
    size_t new_size = buffer##_size * 2 + n;                            \
2367
190k
    if (new_size < buffer##_size) goto mem_error;                       \
2368
190k
    tmp = (xmlChar *) xmlRealloc(buffer, new_size);                     \
2369
190k
    if (tmp == NULL) goto mem_error;         \
2370
190k
    buffer = tmp;             \
2371
190k
    buffer##_size = new_size;                                           \
2372
190k
}
2373
2374
/**
2375
 * xmlStringDecodeEntitiesInt:
2376
 * @ctxt:  the parser context
2377
 * @str:  the input string
2378
 * @len: the string length
2379
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2380
 * @end:  an end marker xmlChar, 0 if none
2381
 * @end2:  an end marker xmlChar, 0 if none
2382
 * @end3:  an end marker xmlChar, 0 if none
2383
 * @check:  whether to perform entity checks
2384
 */
2385
static xmlChar *
2386
xmlStringDecodeEntitiesInt(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2387
               int what, xmlChar end, xmlChar  end2, xmlChar end3,
2388
114k
                           int check) {
2389
114k
    xmlChar *buffer = NULL;
2390
114k
    size_t buffer_size = 0;
2391
114k
    size_t nbchars = 0;
2392
2393
114k
    xmlChar *current = NULL;
2394
114k
    xmlChar *rep = NULL;
2395
114k
    const xmlChar *last;
2396
114k
    xmlEntityPtr ent;
2397
114k
    int c,l;
2398
2399
114k
    if (str == NULL)
2400
1.73k
        return(NULL);
2401
112k
    last = str + len;
2402
2403
112k
    if (((ctxt->depth > 40) &&
2404
112k
         ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2405
112k
  (ctxt->depth > 100)) {
2406
0
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP,
2407
0
                       "Maximum entity nesting depth exceeded");
2408
0
  return(NULL);
2409
0
    }
2410
2411
    /*
2412
     * allocate a translation buffer.
2413
     */
2414
112k
    buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2415
112k
    buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2416
112k
    if (buffer == NULL) goto mem_error;
2417
2418
    /*
2419
     * OK loop until we reach one of the ending char or a size limit.
2420
     * we are operating on already parsed values.
2421
     */
2422
112k
    if (str < last)
2423
112k
  c = CUR_SCHAR(str, l);
2424
430
    else
2425
430
        c = 0;
2426
41.8M
    while ((c != 0) && (c != end) && /* non input consuming loop */
2427
41.8M
           (c != end2) && (c != end3) &&
2428
41.8M
           (ctxt->instate != XML_PARSER_EOF)) {
2429
2430
41.7M
  if (c == 0) break;
2431
41.7M
        if ((c == '&') && (str[1] == '#')) {
2432
7.72k
      int val = xmlParseStringCharRef(ctxt, &str);
2433
7.72k
      if (val == 0)
2434
2.30k
                goto int_error;
2435
5.42k
      COPY_BUF(0,buffer,nbchars,val);
2436
5.42k
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2437
138
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2438
138
      }
2439
41.7M
  } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2440
161k
      if (xmlParserDebugEntities)
2441
0
    xmlGenericError(xmlGenericErrorContext,
2442
0
      "String decoding Entity Reference: %.30s\n",
2443
0
      str);
2444
161k
      ent = xmlParseStringEntityRef(ctxt, &str);
2445
161k
      if ((ent != NULL) &&
2446
161k
    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2447
3.44k
    if (ent->content != NULL) {
2448
3.44k
        COPY_BUF(0,buffer,nbchars,ent->content[0]);
2449
3.44k
        if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2450
482
      growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2451
482
        }
2452
3.44k
    } else {
2453
0
        xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2454
0
          "predefined entity has no content\n");
2455
0
                    goto int_error;
2456
0
    }
2457
158k
      } else if ((ent != NULL) && (ent->content != NULL)) {
2458
90.7k
          if ((check) && (xmlParserEntityCheck(ctxt, ent->length)))
2459
13
                    goto int_error;
2460
2461
90.7k
                if (ent->flags & XML_ENT_EXPANDING) {
2462
10
              xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2463
10
                    xmlHaltParser(ctxt);
2464
10
                    ent->content[0] = 0;
2465
10
                    goto int_error;
2466
10
                }
2467
2468
90.7k
                ent->flags |= XML_ENT_EXPANDING;
2469
90.7k
    ctxt->depth++;
2470
90.7k
    rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
2471
90.7k
                        ent->length, what, 0, 0, 0, check);
2472
90.7k
    ctxt->depth--;
2473
90.7k
                ent->flags &= ~XML_ENT_EXPANDING;
2474
2475
90.7k
    if (rep == NULL) {
2476
24
                    ent->content[0] = 0;
2477
24
                    goto int_error;
2478
24
                }
2479
2480
90.7k
                current = rep;
2481
61.6M
                while (*current != 0) { /* non input consuming loop */
2482
61.5M
                    buffer[nbchars++] = *current++;
2483
61.5M
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2484
107k
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2485
107k
                    }
2486
61.5M
                }
2487
90.7k
                xmlFree(rep);
2488
90.7k
                rep = NULL;
2489
90.7k
      } else if (ent != NULL) {
2490
1.12k
    int i = xmlStrlen(ent->name);
2491
1.12k
    const xmlChar *cur = ent->name;
2492
2493
1.12k
    buffer[nbchars++] = '&';
2494
1.12k
    if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2495
484
        growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2496
484
    }
2497
2.59k
    for (;i > 0;i--)
2498
1.46k
        buffer[nbchars++] = *cur++;
2499
1.12k
    buffer[nbchars++] = ';';
2500
1.12k
      }
2501
41.5M
  } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2502
7.24k
      if (xmlParserDebugEntities)
2503
0
    xmlGenericError(xmlGenericErrorContext,
2504
0
      "String decoding PE Reference: %.30s\n", str);
2505
7.24k
      ent = xmlParseStringPEReference(ctxt, &str);
2506
7.24k
      if (ent != NULL) {
2507
3.54k
                if (ent->content == NULL) {
2508
        /*
2509
         * Note: external parsed entities will not be loaded,
2510
         * it is not required for a non-validating parser to
2511
         * complete external PEReferences coming from the
2512
         * internal subset
2513
         */
2514
728
        if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2515
728
      ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2516
728
      (ctxt->validate != 0)) {
2517
728
      xmlLoadEntityContent(ctxt, ent);
2518
728
        } else {
2519
0
      xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2520
0
      "not validating will not read content for PE entity %s\n",
2521
0
                          ent->name, NULL);
2522
0
        }
2523
728
    }
2524
2525
3.54k
          if ((check) && (xmlParserEntityCheck(ctxt, ent->length)))
2526
2
                    goto int_error;
2527
2528
3.54k
                if (ent->flags & XML_ENT_EXPANDING) {
2529
102
              xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2530
102
                    xmlHaltParser(ctxt);
2531
102
                    if (ent->content != NULL)
2532
52
                        ent->content[0] = 0;
2533
102
                    goto int_error;
2534
102
                }
2535
2536
3.44k
                ent->flags |= XML_ENT_EXPANDING;
2537
3.44k
    ctxt->depth++;
2538
3.44k
    rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
2539
3.44k
                        ent->length, what, 0, 0, 0, check);
2540
3.44k
    ctxt->depth--;
2541
3.44k
                ent->flags &= ~XML_ENT_EXPANDING;
2542
2543
3.44k
    if (rep == NULL) {
2544
678
                    if (ent->content != NULL)
2545
82
                        ent->content[0] = 0;
2546
678
                    goto int_error;
2547
678
                }
2548
2.76k
                current = rep;
2549
4.01M
                while (*current != 0) { /* non input consuming loop */
2550
4.01M
                    buffer[nbchars++] = *current++;
2551
4.01M
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2552
368
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2553
368
                    }
2554
4.01M
                }
2555
2.76k
                xmlFree(rep);
2556
2.76k
                rep = NULL;
2557
2.76k
      }
2558
41.5M
  } else {
2559
41.5M
      COPY_BUF(l,buffer,nbchars,c);
2560
41.5M
      str += l;
2561
41.5M
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2562
180k
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2563
180k
      }
2564
41.5M
  }
2565
41.7M
  if (str < last)
2566
41.6M
      c = CUR_SCHAR(str, l);
2567
109k
  else
2568
109k
      c = 0;
2569
41.7M
    }
2570
109k
    buffer[nbchars] = 0;
2571
109k
    return(buffer);
2572
2573
0
mem_error:
2574
0
    xmlErrMemory(ctxt, NULL);
2575
3.13k
int_error:
2576
3.13k
    if (rep != NULL)
2577
0
        xmlFree(rep);
2578
3.13k
    if (buffer != NULL)
2579
3.13k
        xmlFree(buffer);
2580
3.13k
    return(NULL);
2581
0
}
2582
2583
/**
2584
 * xmlStringLenDecodeEntities:
2585
 * @ctxt:  the parser context
2586
 * @str:  the input string
2587
 * @len: the string length
2588
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2589
 * @end:  an end marker xmlChar, 0 if none
2590
 * @end2:  an end marker xmlChar, 0 if none
2591
 * @end3:  an end marker xmlChar, 0 if none
2592
 *
2593
 * DEPRECATED: Internal function, don't use.
2594
 *
2595
 * Takes a entity string content and process to do the adequate substitutions.
2596
 *
2597
 * [67] Reference ::= EntityRef | CharRef
2598
 *
2599
 * [69] PEReference ::= '%' Name ';'
2600
 *
2601
 * Returns A newly allocated string with the substitution done. The caller
2602
 *      must deallocate it !
2603
 */
2604
xmlChar *
2605
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2606
                           int what, xmlChar end, xmlChar  end2,
2607
0
                           xmlChar end3) {
2608
0
    if ((ctxt == NULL) || (str == NULL) || (len < 0))
2609
0
        return(NULL);
2610
0
    return(xmlStringDecodeEntitiesInt(ctxt, str, len, what,
2611
0
                                      end, end2, end3, 0));
2612
0
}
2613
2614
/**
2615
 * xmlStringDecodeEntities:
2616
 * @ctxt:  the parser context
2617
 * @str:  the input string
2618
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2619
 * @end:  an end marker xmlChar, 0 if none
2620
 * @end2:  an end marker xmlChar, 0 if none
2621
 * @end3:  an end marker xmlChar, 0 if none
2622
 *
2623
 * DEPRECATED: Internal function, don't use.
2624
 *
2625
 * Takes a entity string content and process to do the adequate substitutions.
2626
 *
2627
 * [67] Reference ::= EntityRef | CharRef
2628
 *
2629
 * [69] PEReference ::= '%' Name ';'
2630
 *
2631
 * Returns A newly allocated string with the substitution done. The caller
2632
 *      must deallocate it !
2633
 */
2634
xmlChar *
2635
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2636
0
            xmlChar end, xmlChar  end2, xmlChar end3) {
2637
0
    if ((ctxt == NULL) || (str == NULL)) return(NULL);
2638
0
    return(xmlStringDecodeEntitiesInt(ctxt, str, xmlStrlen(str), what,
2639
0
                                      end, end2, end3, 0));
2640
0
}
2641
2642
/************************************************************************
2643
 *                  *
2644
 *    Commodity functions, cleanup needed ?     *
2645
 *                  *
2646
 ************************************************************************/
2647
2648
/**
2649
 * areBlanks:
2650
 * @ctxt:  an XML parser context
2651
 * @str:  a xmlChar *
2652
 * @len:  the size of @str
2653
 * @blank_chars: we know the chars are blanks
2654
 *
2655
 * Is this a sequence of blank chars that one can ignore ?
2656
 *
2657
 * Returns 1 if ignorable 0 otherwise.
2658
 */
2659
2660
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2661
80.1k
                     int blank_chars) {
2662
80.1k
    int i, ret;
2663
80.1k
    xmlNodePtr lastChild;
2664
2665
    /*
2666
     * Don't spend time trying to differentiate them, the same callback is
2667
     * used !
2668
     */
2669
80.1k
    if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2670
80.1k
  return(0);
2671
2672
    /*
2673
     * Check for xml:space value.
2674
     */
2675
0
    if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2676
0
        (*(ctxt->space) == -2))
2677
0
  return(0);
2678
2679
    /*
2680
     * Check that the string is made of blanks
2681
     */
2682
0
    if (blank_chars == 0) {
2683
0
  for (i = 0;i < len;i++)
2684
0
      if (!(IS_BLANK_CH(str[i]))) return(0);
2685
0
    }
2686
2687
    /*
2688
     * Look if the element is mixed content in the DTD if available
2689
     */
2690
0
    if (ctxt->node == NULL) return(0);
2691
0
    if (ctxt->myDoc != NULL) {
2692
0
  ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2693
0
        if (ret == 0) return(1);
2694
0
        if (ret == 1) return(0);
2695
0
    }
2696
2697
    /*
2698
     * Otherwise, heuristic :-\
2699
     */
2700
0
    if ((RAW != '<') && (RAW != 0xD)) return(0);
2701
0
    if ((ctxt->node->children == NULL) &&
2702
0
  (RAW == '<') && (NXT(1) == '/')) return(0);
2703
2704
0
    lastChild = xmlGetLastChild(ctxt->node);
2705
0
    if (lastChild == NULL) {
2706
0
        if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2707
0
            (ctxt->node->content != NULL)) return(0);
2708
0
    } else if (xmlNodeIsText(lastChild))
2709
0
        return(0);
2710
0
    else if ((ctxt->node->children != NULL) &&
2711
0
             (xmlNodeIsText(ctxt->node->children)))
2712
0
        return(0);
2713
0
    return(1);
2714
0
}
2715
2716
/************************************************************************
2717
 *                  *
2718
 *    Extra stuff for namespace support     *
2719
 *  Relates to http://www.w3.org/TR/WD-xml-names      *
2720
 *                  *
2721
 ************************************************************************/
2722
2723
/**
2724
 * xmlSplitQName:
2725
 * @ctxt:  an XML parser context
2726
 * @name:  an XML parser context
2727
 * @prefix:  a xmlChar **
2728
 *
2729
 * parse an UTF8 encoded XML qualified name string
2730
 *
2731
 * [NS 5] QName ::= (Prefix ':')? LocalPart
2732
 *
2733
 * [NS 6] Prefix ::= NCName
2734
 *
2735
 * [NS 7] LocalPart ::= NCName
2736
 *
2737
 * Returns the local part, and prefix is updated
2738
 *   to get the Prefix if any.
2739
 */
2740
2741
xmlChar *
2742
32.4k
xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2743
32.4k
    xmlChar buf[XML_MAX_NAMELEN + 5];
2744
32.4k
    xmlChar *buffer = NULL;
2745
32.4k
    int len = 0;
2746
32.4k
    int max = XML_MAX_NAMELEN;
2747
32.4k
    xmlChar *ret = NULL;
2748
32.4k
    const xmlChar *cur = name;
2749
32.4k
    int c;
2750
2751
32.4k
    if (prefix == NULL) return(NULL);
2752
32.4k
    *prefix = NULL;
2753
2754
32.4k
    if (cur == NULL) return(NULL);
2755
2756
#ifndef XML_XML_NAMESPACE
2757
    /* xml: prefix is not really a namespace */
2758
    if ((cur[0] == 'x') && (cur[1] == 'm') &&
2759
        (cur[2] == 'l') && (cur[3] == ':'))
2760
  return(xmlStrdup(name));
2761
#endif
2762
2763
    /* nasty but well=formed */
2764
32.4k
    if (cur[0] == ':')
2765
1.57k
  return(xmlStrdup(name));
2766
2767
30.9k
    c = *cur++;
2768
345k
    while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2769
314k
  buf[len++] = c;
2770
314k
  c = *cur++;
2771
314k
    }
2772
30.9k
    if (len >= max) {
2773
  /*
2774
   * Okay someone managed to make a huge name, so he's ready to pay
2775
   * for the processing speed.
2776
   */
2777
1.08k
  max = len * 2;
2778
2779
1.08k
  buffer = (xmlChar *) xmlMallocAtomic(max);
2780
1.08k
  if (buffer == NULL) {
2781
0
      xmlErrMemory(ctxt, NULL);
2782
0
      return(NULL);
2783
0
  }
2784
1.08k
  memcpy(buffer, buf, len);
2785
47.1k
  while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2786
46.0k
      if (len + 10 > max) {
2787
256
          xmlChar *tmp;
2788
2789
256
    max *= 2;
2790
256
    tmp = (xmlChar *) xmlRealloc(buffer, max);
2791
256
    if (tmp == NULL) {
2792
0
        xmlFree(buffer);
2793
0
        xmlErrMemory(ctxt, NULL);
2794
0
        return(NULL);
2795
0
    }
2796
256
    buffer = tmp;
2797
256
      }
2798
46.0k
      buffer[len++] = c;
2799
46.0k
      c = *cur++;
2800
46.0k
  }
2801
1.08k
  buffer[len] = 0;
2802
1.08k
    }
2803
2804
30.9k
    if ((c == ':') && (*cur == 0)) {
2805
4.43k
        if (buffer != NULL)
2806
349
      xmlFree(buffer);
2807
4.43k
  *prefix = NULL;
2808
4.43k
  return(xmlStrdup(name));
2809
4.43k
    }
2810
2811
26.4k
    if (buffer == NULL)
2812
25.7k
  ret = xmlStrndup(buf, len);
2813
738
    else {
2814
738
  ret = buffer;
2815
738
  buffer = NULL;
2816
738
  max = XML_MAX_NAMELEN;
2817
738
    }
2818
2819
2820
26.4k
    if (c == ':') {
2821
6.98k
  c = *cur;
2822
6.98k
        *prefix = ret;
2823
6.98k
  if (c == 0) {
2824
0
      return(xmlStrndup(BAD_CAST "", 0));
2825
0
  }
2826
6.98k
  len = 0;
2827
2828
  /*
2829
   * Check that the first character is proper to start
2830
   * a new name
2831
   */
2832
6.98k
  if (!(((c >= 0x61) && (c <= 0x7A)) ||
2833
6.98k
        ((c >= 0x41) && (c <= 0x5A)) ||
2834
6.98k
        (c == '_') || (c == ':'))) {
2835
2.13k
      int l;
2836
2.13k
      int first = CUR_SCHAR(cur, l);
2837
2838
2.13k
      if (!IS_LETTER(first) && (first != '_')) {
2839
20
    xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
2840
20
          "Name %s is not XML Namespace compliant\n",
2841
20
          name);
2842
20
      }
2843
2.13k
  }
2844
6.98k
  cur++;
2845
2846
201k
  while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2847
194k
      buf[len++] = c;
2848
194k
      c = *cur++;
2849
194k
  }
2850
6.98k
  if (len >= max) {
2851
      /*
2852
       * Okay someone managed to make a huge name, so he's ready to pay
2853
       * for the processing speed.
2854
       */
2855
861
      max = len * 2;
2856
2857
861
      buffer = (xmlChar *) xmlMallocAtomic(max);
2858
861
      if (buffer == NULL) {
2859
0
          xmlErrMemory(ctxt, NULL);
2860
0
    return(NULL);
2861
0
      }
2862
861
      memcpy(buffer, buf, len);
2863
36.4k
      while (c != 0) { /* tested bigname2.xml */
2864
35.5k
    if (len + 10 > max) {
2865
271
        xmlChar *tmp;
2866
2867
271
        max *= 2;
2868
271
        tmp = (xmlChar *) xmlRealloc(buffer, max);
2869
271
        if (tmp == NULL) {
2870
0
      xmlErrMemory(ctxt, NULL);
2871
0
      xmlFree(buffer);
2872
0
      return(NULL);
2873
0
        }
2874
271
        buffer = tmp;
2875
271
    }
2876
35.5k
    buffer[len++] = c;
2877
35.5k
    c = *cur++;
2878
35.5k
      }
2879
861
      buffer[len] = 0;
2880
861
  }
2881
2882
6.98k
  if (buffer == NULL)
2883
6.12k
      ret = xmlStrndup(buf, len);
2884
861
  else {
2885
861
      ret = buffer;
2886
861
  }
2887
6.98k
    }
2888
2889
26.4k
    return(ret);
2890
26.4k
}
2891
2892
/************************************************************************
2893
 *                  *
2894
 *      The parser itself       *
2895
 *  Relates to http://www.w3.org/TR/REC-xml       *
2896
 *                  *
2897
 ************************************************************************/
2898
2899
/************************************************************************
2900
 *                  *
2901
 *  Routines to parse Name, NCName and NmToken      *
2902
 *                  *
2903
 ************************************************************************/
2904
#ifdef DEBUG
2905
static unsigned long nbParseName = 0;
2906
static unsigned long nbParseNmToken = 0;
2907
static unsigned long nbParseNCName = 0;
2908
static unsigned long nbParseNCNameComplex = 0;
2909
static unsigned long nbParseNameComplex = 0;
2910
static unsigned long nbParseStringName = 0;
2911
#endif
2912
2913
/*
2914
 * The two following functions are related to the change of accepted
2915
 * characters for Name and NmToken in the Revision 5 of XML-1.0
2916
 * They correspond to the modified production [4] and the new production [4a]
2917
 * changes in that revision. Also note that the macros used for the
2918
 * productions Letter, Digit, CombiningChar and Extender are not needed
2919
 * anymore.
2920
 * We still keep compatibility to pre-revision5 parsing semantic if the
2921
 * new XML_PARSE_OLD10 option is given to the parser.
2922
 */
2923
static int
2924
285k
xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
2925
285k
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
2926
        /*
2927
   * Use the new checks of production [4] [4a] amd [5] of the
2928
   * Update 5 of XML-1.0
2929
   */
2930
285k
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
2931
285k
      (((c >= 'a') && (c <= 'z')) ||
2932
285k
       ((c >= 'A') && (c <= 'Z')) ||
2933
285k
       (c == '_') || (c == ':') ||
2934
285k
       ((c >= 0xC0) && (c <= 0xD6)) ||
2935
285k
       ((c >= 0xD8) && (c <= 0xF6)) ||
2936
285k
       ((c >= 0xF8) && (c <= 0x2FF)) ||
2937
285k
       ((c >= 0x370) && (c <= 0x37D)) ||
2938
285k
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
2939
285k
       ((c >= 0x200C) && (c <= 0x200D)) ||
2940
285k
       ((c >= 0x2070) && (c <= 0x218F)) ||
2941
285k
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
2942
285k
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
2943
285k
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
2944
285k
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
2945
285k
       ((c >= 0x10000) && (c <= 0xEFFFF))))
2946
239k
      return(1);
2947
285k
    } else {
2948
0
        if (IS_LETTER(c) || (c == '_') || (c == ':'))
2949
0
      return(1);
2950
0
    }
2951
46.8k
    return(0);
2952
285k
}
2953
2954
static int
2955
5.82M
xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
2956
5.82M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
2957
        /*
2958
   * Use the new checks of production [4] [4a] amd [5] of the
2959
   * Update 5 of XML-1.0
2960
   */
2961
5.82M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
2962
5.82M
      (((c >= 'a') && (c <= 'z')) ||
2963
5.82M
       ((c >= 'A') && (c <= 'Z')) ||
2964
5.82M
       ((c >= '0') && (c <= '9')) || /* !start */
2965
5.82M
       (c == '_') || (c == ':') ||
2966
5.82M
       (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
2967
5.82M
       ((c >= 0xC0) && (c <= 0xD6)) ||
2968
5.82M
       ((c >= 0xD8) && (c <= 0xF6)) ||
2969
5.82M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
2970
5.82M
       ((c >= 0x300) && (c <= 0x36F)) || /* !start */
2971
5.82M
       ((c >= 0x370) && (c <= 0x37D)) ||
2972
5.82M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
2973
5.82M
       ((c >= 0x200C) && (c <= 0x200D)) ||
2974
5.82M
       ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
2975
5.82M
       ((c >= 0x2070) && (c <= 0x218F)) ||
2976
5.82M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
2977
5.82M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
2978
5.82M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
2979
5.82M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
2980
5.82M
       ((c >= 0x10000) && (c <= 0xEFFFF))))
2981
5.58M
       return(1);
2982
5.82M
    } else {
2983
0
        if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
2984
0
            (c == '.') || (c == '-') ||
2985
0
      (c == '_') || (c == ':') ||
2986
0
      (IS_COMBINING(c)) ||
2987
0
      (IS_EXTENDER(c)))
2988
0
      return(1);
2989
0
    }
2990
235k
    return(0);
2991
5.82M
}
2992
2993
static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
2994
                                          int *len, int *alloc, int normalize);
2995
2996
static const xmlChar *
2997
129k
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
2998
129k
    int len = 0, l;
2999
129k
    int c;
3000
129k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3001
0
                    XML_MAX_TEXT_LENGTH :
3002
129k
                    XML_MAX_NAME_LENGTH;
3003
3004
#ifdef DEBUG
3005
    nbParseNameComplex++;
3006
#endif
3007
3008
    /*
3009
     * Handler for more complex cases
3010
     */
3011
129k
    c = CUR_CHAR(l);
3012
129k
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3013
        /*
3014
   * Use the new checks of production [4] [4a] amd [5] of the
3015
   * Update 5 of XML-1.0
3016
   */
3017
129k
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3018
129k
      (!(((c >= 'a') && (c <= 'z')) ||
3019
105k
         ((c >= 'A') && (c <= 'Z')) ||
3020
105k
         (c == '_') || (c == ':') ||
3021
105k
         ((c >= 0xC0) && (c <= 0xD6)) ||
3022
105k
         ((c >= 0xD8) && (c <= 0xF6)) ||
3023
105k
         ((c >= 0xF8) && (c <= 0x2FF)) ||
3024
105k
         ((c >= 0x370) && (c <= 0x37D)) ||
3025
105k
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
3026
105k
         ((c >= 0x200C) && (c <= 0x200D)) ||
3027
105k
         ((c >= 0x2070) && (c <= 0x218F)) ||
3028
105k
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3029
105k
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
3030
105k
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
3031
105k
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3032
105k
         ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3033
51.6k
      return(NULL);
3034
51.6k
  }
3035
77.4k
  len += l;
3036
77.4k
  NEXTL(l);
3037
77.4k
  c = CUR_CHAR(l);
3038
4.16M
  while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3039
4.16M
         (((c >= 'a') && (c <= 'z')) ||
3040
4.15M
          ((c >= 'A') && (c <= 'Z')) ||
3041
4.15M
          ((c >= '0') && (c <= '9')) || /* !start */
3042
4.15M
          (c == '_') || (c == ':') ||
3043
4.15M
          (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3044
4.15M
          ((c >= 0xC0) && (c <= 0xD6)) ||
3045
4.15M
          ((c >= 0xD8) && (c <= 0xF6)) ||
3046
4.15M
          ((c >= 0xF8) && (c <= 0x2FF)) ||
3047
4.15M
          ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3048
4.15M
          ((c >= 0x370) && (c <= 0x37D)) ||
3049
4.15M
          ((c >= 0x37F) && (c <= 0x1FFF)) ||
3050
4.15M
          ((c >= 0x200C) && (c <= 0x200D)) ||
3051
4.15M
          ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3052
4.15M
          ((c >= 0x2070) && (c <= 0x218F)) ||
3053
4.15M
          ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3054
4.15M
          ((c >= 0x3001) && (c <= 0xD7FF)) ||
3055
4.15M
          ((c >= 0xF900) && (c <= 0xFDCF)) ||
3056
4.15M
          ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3057
4.15M
          ((c >= 0x10000) && (c <= 0xEFFFF))
3058
4.15M
    )) {
3059
4.08M
            if (len <= INT_MAX - l)
3060
4.08M
          len += l;
3061
4.08M
      NEXTL(l);
3062
4.08M
      c = CUR_CHAR(l);
3063
4.08M
  }
3064
77.4k
    } else {
3065
0
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3066
0
      (!IS_LETTER(c) && (c != '_') &&
3067
0
       (c != ':'))) {
3068
0
      return(NULL);
3069
0
  }
3070
0
  len += l;
3071
0
  NEXTL(l);
3072
0
  c = CUR_CHAR(l);
3073
3074
0
  while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3075
0
         ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3076
0
    (c == '.') || (c == '-') ||
3077
0
    (c == '_') || (c == ':') ||
3078
0
    (IS_COMBINING(c)) ||
3079
0
    (IS_EXTENDER(c)))) {
3080
0
            if (len <= INT_MAX - l)
3081
0
          len += l;
3082
0
      NEXTL(l);
3083
0
      c = CUR_CHAR(l);
3084
0
  }
3085
0
    }
3086
77.4k
    if (ctxt->instate == XML_PARSER_EOF)
3087
24
        return(NULL);
3088
77.4k
    if (len > maxLength) {
3089
85
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3090
85
        return(NULL);
3091
85
    }
3092
77.3k
    if (ctxt->input->cur - ctxt->input->base < len) {
3093
        /*
3094
         * There were a couple of bugs where PERefs lead to to a change
3095
         * of the buffer. Check the buffer size to avoid passing an invalid
3096
         * pointer to xmlDictLookup.
3097
         */
3098
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3099
0
                    "unexpected change of input buffer");
3100
0
        return (NULL);
3101
0
    }
3102
77.3k
    if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3103
230
        return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3104
77.0k
    return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3105
77.3k
}
3106
3107
/**
3108
 * xmlParseName:
3109
 * @ctxt:  an XML parser context
3110
 *
3111
 * DEPRECATED: Internal function, don't use.
3112
 *
3113
 * parse an XML name.
3114
 *
3115
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3116
 *                  CombiningChar | Extender
3117
 *
3118
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3119
 *
3120
 * [6] Names ::= Name (#x20 Name)*
3121
 *
3122
 * Returns the Name parsed or NULL
3123
 */
3124
3125
const xmlChar *
3126
399k
xmlParseName(xmlParserCtxtPtr ctxt) {
3127
399k
    const xmlChar *in;
3128
399k
    const xmlChar *ret;
3129
399k
    size_t count = 0;
3130
399k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3131
0
                       XML_MAX_TEXT_LENGTH :
3132
399k
                       XML_MAX_NAME_LENGTH;
3133
3134
399k
    GROW;
3135
399k
    if (ctxt->instate == XML_PARSER_EOF)
3136
7
        return(NULL);
3137
3138
#ifdef DEBUG
3139
    nbParseName++;
3140
#endif
3141
3142
    /*
3143
     * Accelerator for simple ASCII names
3144
     */
3145
399k
    in = ctxt->input->cur;
3146
399k
    if (((*in >= 0x61) && (*in <= 0x7A)) ||
3147
399k
  ((*in >= 0x41) && (*in <= 0x5A)) ||
3148
399k
  (*in == '_') || (*in == ':')) {
3149
298k
  in++;
3150
1.25M
  while (((*in >= 0x61) && (*in <= 0x7A)) ||
3151
1.25M
         ((*in >= 0x41) && (*in <= 0x5A)) ||
3152
1.25M
         ((*in >= 0x30) && (*in <= 0x39)) ||
3153
1.25M
         (*in == '_') || (*in == '-') ||
3154
1.25M
         (*in == ':') || (*in == '.'))
3155
956k
      in++;
3156
298k
  if ((*in > 0) && (*in < 0x80)) {
3157
270k
      count = in - ctxt->input->cur;
3158
270k
            if (count > maxLength) {
3159
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3160
0
                return(NULL);
3161
0
            }
3162
270k
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3163
270k
      ctxt->input->cur = in;
3164
270k
      ctxt->input->col += count;
3165
270k
      if (ret == NULL)
3166
0
          xmlErrMemory(ctxt, NULL);
3167
270k
      return(ret);
3168
270k
  }
3169
298k
    }
3170
    /* accelerator for special cases */
3171
129k
    return(xmlParseNameComplex(ctxt));
3172
399k
}
3173
3174
static const xmlChar *
3175
111k
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3176
111k
    int len = 0, l;
3177
111k
    int c;
3178
111k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3179
0
                    XML_MAX_TEXT_LENGTH :
3180
111k
                    XML_MAX_NAME_LENGTH;
3181
111k
    size_t startPosition = 0;
3182
3183
#ifdef DEBUG
3184
    nbParseNCNameComplex++;
3185
#endif
3186
3187
    /*
3188
     * Handler for more complex cases
3189
     */
3190
111k
    startPosition = CUR_PTR - BASE_PTR;
3191
111k
    c = CUR_CHAR(l);
3192
111k
    if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3193
111k
  (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3194
72.4k
  return(NULL);
3195
72.4k
    }
3196
3197
3.17M
    while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3198
3.17M
     (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3199
3.14M
        if (len <= INT_MAX - l)
3200
3.14M
      len += l;
3201
3.14M
  NEXTL(l);
3202
3.14M
  c = CUR_CHAR(l);
3203
3.14M
    }
3204
39.3k
    if (ctxt->instate == XML_PARSER_EOF)
3205
24
        return(NULL);
3206
39.2k
    if (len > maxLength) {
3207
31
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3208
31
        return(NULL);
3209
31
    }
3210
39.2k
    return(xmlDictLookup(ctxt->dict, (BASE_PTR + startPosition), len));
3211
39.2k
}
3212
3213
/**
3214
 * xmlParseNCName:
3215
 * @ctxt:  an XML parser context
3216
 * @len:  length of the string parsed
3217
 *
3218
 * parse an XML name.
3219
 *
3220
 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3221
 *                      CombiningChar | Extender
3222
 *
3223
 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3224
 *
3225
 * Returns the Name parsed or NULL
3226
 */
3227
3228
static const xmlChar *
3229
1.54M
xmlParseNCName(xmlParserCtxtPtr ctxt) {
3230
1.54M
    const xmlChar *in, *e;
3231
1.54M
    const xmlChar *ret;
3232
1.54M
    size_t count = 0;
3233
1.54M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3234
0
                       XML_MAX_TEXT_LENGTH :
3235
1.54M
                       XML_MAX_NAME_LENGTH;
3236
3237
#ifdef DEBUG
3238
    nbParseNCName++;
3239
#endif
3240
3241
    /*
3242
     * Accelerator for simple ASCII names
3243
     */
3244
1.54M
    in = ctxt->input->cur;
3245
1.54M
    e = ctxt->input->end;
3246
1.54M
    if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3247
1.54M
   ((*in >= 0x41) && (*in <= 0x5A)) ||
3248
1.54M
   (*in == '_')) && (in < e)) {
3249
1.44M
  in++;
3250
7.47M
  while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3251
7.47M
          ((*in >= 0x41) && (*in <= 0x5A)) ||
3252
7.47M
          ((*in >= 0x30) && (*in <= 0x39)) ||
3253
7.47M
          (*in == '_') || (*in == '-') ||
3254
7.47M
          (*in == '.')) && (in < e))
3255
6.02M
      in++;
3256
1.44M
  if (in >= e)
3257
482
      goto complex;
3258
1.44M
  if ((*in > 0) && (*in < 0x80)) {
3259
1.43M
      count = in - ctxt->input->cur;
3260
1.43M
            if (count > maxLength) {
3261
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3262
0
                return(NULL);
3263
0
            }
3264
1.43M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3265
1.43M
      ctxt->input->cur = in;
3266
1.43M
      ctxt->input->col += count;
3267
1.43M
      if (ret == NULL) {
3268
0
          xmlErrMemory(ctxt, NULL);
3269
0
      }
3270
1.43M
      return(ret);
3271
1.43M
  }
3272
1.44M
    }
3273
111k
complex:
3274
111k
    return(xmlParseNCNameComplex(ctxt));
3275
1.54M
}
3276
3277
/**
3278
 * xmlParseNameAndCompare:
3279
 * @ctxt:  an XML parser context
3280
 *
3281
 * parse an XML name and compares for match
3282
 * (specialized for endtag parsing)
3283
 *
3284
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3285
 * and the name for mismatch
3286
 */
3287
3288
static const xmlChar *
3289
24.9k
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3290
24.9k
    register const xmlChar *cmp = other;
3291
24.9k
    register const xmlChar *in;
3292
24.9k
    const xmlChar *ret;
3293
3294
24.9k
    GROW;
3295
24.9k
    if (ctxt->instate == XML_PARSER_EOF)
3296
3
        return(NULL);
3297
3298
24.9k
    in = ctxt->input->cur;
3299
193k
    while (*in != 0 && *in == *cmp) {
3300
168k
  ++in;
3301
168k
  ++cmp;
3302
168k
    }
3303
24.9k
    if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3304
  /* success */
3305
18.9k
  ctxt->input->col += in - ctxt->input->cur;
3306
18.9k
  ctxt->input->cur = in;
3307
18.9k
  return (const xmlChar*) 1;
3308
18.9k
    }
3309
    /* failure (or end of input buffer), check with full function */
3310
6.03k
    ret = xmlParseName (ctxt);
3311
    /* strings coming from the dictionary direct compare possible */
3312
6.03k
    if (ret == other) {
3313
2.37k
  return (const xmlChar*) 1;
3314
2.37k
    }
3315
3.66k
    return ret;
3316
6.03k
}
3317
3318
/**
3319
 * xmlParseStringName:
3320
 * @ctxt:  an XML parser context
3321
 * @str:  a pointer to the string pointer (IN/OUT)
3322
 *
3323
 * parse an XML name.
3324
 *
3325
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3326
 *                  CombiningChar | Extender
3327
 *
3328
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3329
 *
3330
 * [6] Names ::= Name (#x20 Name)*
3331
 *
3332
 * Returns the Name parsed or NULL. The @str pointer
3333
 * is updated to the current location in the string.
3334
 */
3335
3336
static xmlChar *
3337
179k
xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3338
179k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3339
179k
    const xmlChar *cur = *str;
3340
179k
    int len = 0, l;
3341
179k
    int c;
3342
179k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3343
0
                    XML_MAX_TEXT_LENGTH :
3344
179k
                    XML_MAX_NAME_LENGTH;
3345
3346
#ifdef DEBUG
3347
    nbParseStringName++;
3348
#endif
3349
3350
179k
    c = CUR_SCHAR(cur, l);
3351
179k
    if (!xmlIsNameStartChar(ctxt, c)) {
3352
2.33k
  return(NULL);
3353
2.33k
    }
3354
3355
177k
    COPY_BUF(l,buf,len,c);
3356
177k
    cur += l;
3357
177k
    c = CUR_SCHAR(cur, l);
3358
545k
    while (xmlIsNameChar(ctxt, c)) {
3359
368k
  COPY_BUF(l,buf,len,c);
3360
368k
  cur += l;
3361
368k
  c = CUR_SCHAR(cur, l);
3362
368k
  if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3363
      /*
3364
       * Okay someone managed to make a huge name, so he's ready to pay
3365
       * for the processing speed.
3366
       */
3367
432
      xmlChar *buffer;
3368
432
      int max = len * 2;
3369
3370
432
      buffer = (xmlChar *) xmlMallocAtomic(max);
3371
432
      if (buffer == NULL) {
3372
0
          xmlErrMemory(ctxt, NULL);
3373
0
    return(NULL);
3374
0
      }
3375
432
      memcpy(buffer, buf, len);
3376
320k
      while (xmlIsNameChar(ctxt, c)) {
3377
320k
    if (len + 10 > max) {
3378
420
        xmlChar *tmp;
3379
3380
420
        max *= 2;
3381
420
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3382
420
        if (tmp == NULL) {
3383
0
      xmlErrMemory(ctxt, NULL);
3384
0
      xmlFree(buffer);
3385
0
      return(NULL);
3386
0
        }
3387
420
        buffer = tmp;
3388
420
    }
3389
320k
    COPY_BUF(l,buffer,len,c);
3390
320k
    cur += l;
3391
320k
    c = CUR_SCHAR(cur, l);
3392
320k
                if (len > maxLength) {
3393
13
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3394
13
                    xmlFree(buffer);
3395
13
                    return(NULL);
3396
13
                }
3397
320k
      }
3398
419
      buffer[len] = 0;
3399
419
      *str = cur;
3400
419
      return(buffer);
3401
432
  }
3402
368k
    }
3403
176k
    if (len > maxLength) {
3404
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3405
0
        return(NULL);
3406
0
    }
3407
176k
    *str = cur;
3408
176k
    return(xmlStrndup(buf, len));
3409
176k
}
3410
3411
/**
3412
 * xmlParseNmtoken:
3413
 * @ctxt:  an XML parser context
3414
 *
3415
 * DEPRECATED: Internal function, don't use.
3416
 *
3417
 * parse an XML Nmtoken.
3418
 *
3419
 * [7] Nmtoken ::= (NameChar)+
3420
 *
3421
 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3422
 *
3423
 * Returns the Nmtoken parsed or NULL
3424
 */
3425
3426
xmlChar *
3427
33.1k
xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3428
33.1k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3429
33.1k
    int len = 0, l;
3430
33.1k
    int c;
3431
33.1k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3432
0
                    XML_MAX_TEXT_LENGTH :
3433
33.1k
                    XML_MAX_NAME_LENGTH;
3434
3435
#ifdef DEBUG
3436
    nbParseNmToken++;
3437
#endif
3438
3439
33.1k
    c = CUR_CHAR(l);
3440
3441
155k
    while (xmlIsNameChar(ctxt, c)) {
3442
123k
  COPY_BUF(l,buf,len,c);
3443
123k
  NEXTL(l);
3444
123k
  c = CUR_CHAR(l);
3445
123k
  if (len >= XML_MAX_NAMELEN) {
3446
      /*
3447
       * Okay someone managed to make a huge token, so he's ready to pay
3448
       * for the processing speed.
3449
       */
3450
999
      xmlChar *buffer;
3451
999
      int max = len * 2;
3452
3453
999
      buffer = (xmlChar *) xmlMallocAtomic(max);
3454
999
      if (buffer == NULL) {
3455
0
          xmlErrMemory(ctxt, NULL);
3456
0
    return(NULL);
3457
0
      }
3458
999
      memcpy(buffer, buf, len);
3459
1.63M
      while (xmlIsNameChar(ctxt, c)) {
3460
1.62M
    if (len + 10 > max) {
3461
1.80k
        xmlChar *tmp;
3462
3463
1.80k
        max *= 2;
3464
1.80k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3465
1.80k
        if (tmp == NULL) {
3466
0
      xmlErrMemory(ctxt, NULL);
3467
0
      xmlFree(buffer);
3468
0
      return(NULL);
3469
0
        }
3470
1.80k
        buffer = tmp;
3471
1.80k
    }
3472
1.62M
    COPY_BUF(l,buffer,len,c);
3473
1.62M
                if (len > maxLength) {
3474
65
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3475
65
                    xmlFree(buffer);
3476
65
                    return(NULL);
3477
65
                }
3478
1.62M
    NEXTL(l);
3479
1.62M
    c = CUR_CHAR(l);
3480
1.62M
      }
3481
934
      buffer[len] = 0;
3482
934
            if (ctxt->instate == XML_PARSER_EOF) {
3483
7
                xmlFree(buffer);
3484
7
                return(NULL);
3485
7
            }
3486
927
      return(buffer);
3487
934
  }
3488
123k
    }
3489
32.1k
    if (ctxt->instate == XML_PARSER_EOF)
3490
3
        return(NULL);
3491
32.1k
    if (len == 0)
3492
3.16k
        return(NULL);
3493
28.9k
    if (len > maxLength) {
3494
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3495
0
        return(NULL);
3496
0
    }
3497
28.9k
    return(xmlStrndup(buf, len));
3498
28.9k
}
3499
3500
/**
3501
 * xmlParseEntityValue:
3502
 * @ctxt:  an XML parser context
3503
 * @orig:  if non-NULL store a copy of the original entity value
3504
 *
3505
 * DEPRECATED: Internal function, don't use.
3506
 *
3507
 * parse a value for ENTITY declarations
3508
 *
3509
 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3510
 *                 "'" ([^%&'] | PEReference | Reference)* "'"
3511
 *
3512
 * Returns the EntityValue parsed with reference substituted or NULL
3513
 */
3514
3515
xmlChar *
3516
11.3k
xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3517
11.3k
    xmlChar *buf = NULL;
3518
11.3k
    int len = 0;
3519
11.3k
    int size = XML_PARSER_BUFFER_SIZE;
3520
11.3k
    int c, l;
3521
11.3k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3522
0
                    XML_MAX_HUGE_LENGTH :
3523
11.3k
                    XML_MAX_TEXT_LENGTH;
3524
11.3k
    xmlChar stop;
3525
11.3k
    xmlChar *ret = NULL;
3526
11.3k
    const xmlChar *cur = NULL;
3527
11.3k
    xmlParserInputPtr input;
3528
3529
11.3k
    if (RAW == '"') stop = '"';
3530
2.38k
    else if (RAW == '\'') stop = '\'';
3531
0
    else {
3532
0
  xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3533
0
  return(NULL);
3534
0
    }
3535
11.3k
    buf = (xmlChar *) xmlMallocAtomic(size);
3536
11.3k
    if (buf == NULL) {
3537
0
  xmlErrMemory(ctxt, NULL);
3538
0
  return(NULL);
3539
0
    }
3540
3541
    /*
3542
     * The content of the entity definition is copied in a buffer.
3543
     */
3544
3545
11.3k
    ctxt->instate = XML_PARSER_ENTITY_VALUE;
3546
11.3k
    input = ctxt->input;
3547
11.3k
    GROW;
3548
11.3k
    if (ctxt->instate == XML_PARSER_EOF)
3549
1
        goto error;
3550
11.3k
    NEXT;
3551
11.3k
    c = CUR_CHAR(l);
3552
    /*
3553
     * NOTE: 4.4.5 Included in Literal
3554
     * When a parameter entity reference appears in a literal entity
3555
     * value, ... a single or double quote character in the replacement
3556
     * text is always treated as a normal data character and will not
3557
     * terminate the literal.
3558
     * In practice it means we stop the loop only when back at parsing
3559
     * the initial entity and the quote is found
3560
     */
3561
2.35M
    while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3562
2.35M
      (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3563
2.34M
  if (len + 5 >= size) {
3564
4.74k
      xmlChar *tmp;
3565
3566
4.74k
      size *= 2;
3567
4.74k
      tmp = (xmlChar *) xmlRealloc(buf, size);
3568
4.74k
      if (tmp == NULL) {
3569
0
    xmlErrMemory(ctxt, NULL);
3570
0
                goto error;
3571
0
      }
3572
4.74k
      buf = tmp;
3573
4.74k
  }
3574
2.34M
  COPY_BUF(l,buf,len,c);
3575
2.34M
  NEXTL(l);
3576
3577
2.34M
  GROW;
3578
2.34M
  c = CUR_CHAR(l);
3579
2.34M
  if (c == 0) {
3580
209
      GROW;
3581
209
      c = CUR_CHAR(l);
3582
209
  }
3583
3584
2.34M
        if (len > maxLength) {
3585
0
            xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
3586
0
                           "entity value too long\n");
3587
0
            goto error;
3588
0
        }
3589
2.34M
    }
3590
11.3k
    buf[len] = 0;
3591
11.3k
    if (ctxt->instate == XML_PARSER_EOF)
3592
1
        goto error;
3593
11.3k
    if (c != stop) {
3594
227
        xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3595
227
        goto error;
3596
227
    }
3597
11.1k
    NEXT;
3598
3599
    /*
3600
     * Raise problem w.r.t. '&' and '%' being used in non-entities
3601
     * reference constructs. Note Charref will be handled in
3602
     * xmlStringDecodeEntities()
3603
     */
3604
11.1k
    cur = buf;
3605
2.59M
    while (*cur != 0) { /* non input consuming */
3606
2.58M
  if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3607
10.4k
      xmlChar *name;
3608
10.4k
      xmlChar tmp = *cur;
3609
10.4k
            int nameOk = 0;
3610
3611
10.4k
      cur++;
3612
10.4k
      name = xmlParseStringName(ctxt, &cur);
3613
10.4k
            if (name != NULL) {
3614
9.84k
                nameOk = 1;
3615
9.84k
                xmlFree(name);
3616
9.84k
            }
3617
10.4k
            if ((nameOk == 0) || (*cur != ';')) {
3618
1.24k
    xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3619
1.24k
      "EntityValue: '%c' forbidden except for entities references\n",
3620
1.24k
                            tmp);
3621
1.24k
                goto error;
3622
1.24k
      }
3623
9.24k
      if ((tmp == '%') && (ctxt->inSubset == 1) &&
3624
9.24k
    (ctxt->inputNr == 1)) {
3625
115
    xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3626
115
                goto error;
3627
115
      }
3628
9.13k
      if (*cur == 0)
3629
0
          break;
3630
9.13k
  }
3631
2.58M
  cur++;
3632
2.58M
    }
3633
3634
    /*
3635
     * Then PEReference entities are substituted.
3636
     *
3637
     * NOTE: 4.4.7 Bypassed
3638
     * When a general entity reference appears in the EntityValue in
3639
     * an entity declaration, it is bypassed and left as is.
3640
     * so XML_SUBSTITUTE_REF is not set here.
3641
     */
3642
9.78k
    ++ctxt->depth;
3643
9.78k
    ret = xmlStringDecodeEntitiesInt(ctxt, buf, len, XML_SUBSTITUTE_PEREF,
3644
9.78k
                                     0, 0, 0, /* check */ 1);
3645
9.78k
    --ctxt->depth;
3646
3647
9.78k
    if (orig != NULL) {
3648
9.78k
        *orig = buf;
3649
9.78k
        buf = NULL;
3650
9.78k
    }
3651
3652
11.3k
error:
3653
11.3k
    if (buf != NULL)
3654
1.58k
        xmlFree(buf);
3655
11.3k
    return(ret);
3656
9.78k
}
3657
3658
/**
3659
 * xmlParseAttValueComplex:
3660
 * @ctxt:  an XML parser context
3661
 * @len:   the resulting attribute len
3662
 * @normalize:  whether to apply the inner normalization
3663
 *
3664
 * parse a value for an attribute, this is the fallback function
3665
 * of xmlParseAttValue() when the attribute parsing requires handling
3666
 * of non-ASCII characters, or normalization compaction.
3667
 *
3668
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3669
 */
3670
static xmlChar *
3671
186k
xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
3672
186k
    xmlChar limit = 0;
3673
186k
    xmlChar *buf = NULL;
3674
186k
    xmlChar *rep = NULL;
3675
186k
    size_t len = 0;
3676
186k
    size_t buf_size = 0;
3677
186k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3678
0
                       XML_MAX_HUGE_LENGTH :
3679
186k
                       XML_MAX_TEXT_LENGTH;
3680
186k
    int c, l, in_space = 0;
3681
186k
    xmlChar *current = NULL;
3682
186k
    xmlEntityPtr ent;
3683
3684
186k
    if (NXT(0) == '"') {
3685
171k
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3686
171k
  limit = '"';
3687
171k
        NEXT;
3688
171k
    } else if (NXT(0) == '\'') {
3689
14.9k
  limit = '\'';
3690
14.9k
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3691
14.9k
        NEXT;
3692
14.9k
    } else {
3693
0
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3694
0
  return(NULL);
3695
0
    }
3696
3697
    /*
3698
     * allocate a translation buffer.
3699
     */
3700
186k
    buf_size = XML_PARSER_BUFFER_SIZE;
3701
186k
    buf = (xmlChar *) xmlMallocAtomic(buf_size);
3702
186k
    if (buf == NULL) goto mem_error;
3703
3704
    /*
3705
     * OK loop until we reach one of the ending char or a size limit.
3706
     */
3707
186k
    c = CUR_CHAR(l);
3708
12.7M
    while (((NXT(0) != limit) && /* checked */
3709
12.7M
            (IS_CHAR(c)) && (c != '<')) &&
3710
12.7M
            (ctxt->instate != XML_PARSER_EOF)) {
3711
12.5M
  if (c == '&') {
3712
79.0k
      in_space = 0;
3713
79.0k
      if (NXT(1) == '#') {
3714
59.3k
    int val = xmlParseCharRef(ctxt);
3715
3716
59.3k
    if (val == '&') {
3717
536
        if (ctxt->replaceEntities) {
3718
536
      if (len + 10 > buf_size) {
3719
448
          growBuffer(buf, 10);
3720
448
      }
3721
536
      buf[len++] = '&';
3722
536
        } else {
3723
      /*
3724
       * The reparsing will be done in xmlStringGetNodeList()
3725
       * called by the attribute() function in SAX.c
3726
       */
3727
0
      if (len + 10 > buf_size) {
3728
0
          growBuffer(buf, 10);
3729
0
      }
3730
0
      buf[len++] = '&';
3731
0
      buf[len++] = '#';
3732
0
      buf[len++] = '3';
3733
0
      buf[len++] = '8';
3734
0
      buf[len++] = ';';
3735
0
        }
3736
58.8k
    } else if (val != 0) {
3737
54.5k
        if (len + 10 > buf_size) {
3738
442
      growBuffer(buf, 10);
3739
442
        }
3740
54.5k
        len += xmlCopyChar(0, &buf[len], val);
3741
54.5k
    }
3742
59.3k
      } else {
3743
19.6k
    ent = xmlParseEntityRef(ctxt);
3744
19.6k
    if ((ent != NULL) &&
3745
19.6k
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3746
1.86k
        if (len + 10 > buf_size) {
3747
398
      growBuffer(buf, 10);
3748
398
        }
3749
1.86k
        if ((ctxt->replaceEntities == 0) &&
3750
1.86k
            (ent->content[0] == '&')) {
3751
0
      buf[len++] = '&';
3752
0
      buf[len++] = '#';
3753
0
      buf[len++] = '3';
3754
0
      buf[len++] = '8';
3755
0
      buf[len++] = ';';
3756
1.86k
        } else {
3757
1.86k
      buf[len++] = ent->content[0];
3758
1.86k
        }
3759
17.7k
    } else if ((ent != NULL) &&
3760
17.7k
               (ctxt->replaceEntities != 0)) {
3761
10.6k
        if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3762
10.6k
                        if (xmlParserEntityCheck(ctxt, ent->length))
3763
3
                            goto error;
3764
3765
10.6k
      ++ctxt->depth;
3766
10.6k
      rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
3767
10.6k
                                ent->length, XML_SUBSTITUTE_REF, 0, 0, 0,
3768
10.6k
                                /* check */ 1);
3769
10.6k
      --ctxt->depth;
3770
10.6k
      if (rep != NULL) {
3771
8.21k
          current = rep;
3772
39.9M
          while (*current != 0) { /* non input consuming */
3773
39.9M
                                if ((*current == 0xD) || (*current == 0xA) ||
3774
39.9M
                                    (*current == 0x9)) {
3775
28.8k
                                    buf[len++] = 0x20;
3776
28.8k
                                    current++;
3777
28.8k
                                } else
3778
39.9M
                                    buf[len++] = *current++;
3779
39.9M
        if (len + 10 > buf_size) {
3780
6.01k
            growBuffer(buf, 10);
3781
6.01k
        }
3782
39.9M
          }
3783
8.21k
          xmlFree(rep);
3784
8.21k
          rep = NULL;
3785
8.21k
      }
3786
10.6k
        } else {
3787
0
      if (len + 10 > buf_size) {
3788
0
          growBuffer(buf, 10);
3789
0
      }
3790
0
      if (ent->content != NULL)
3791
0
          buf[len++] = ent->content[0];
3792
0
        }
3793
10.6k
    } else if (ent != NULL) {
3794
0
        int i = xmlStrlen(ent->name);
3795
0
        const xmlChar *cur = ent->name;
3796
3797
        /*
3798
                     * We also check for recursion and amplification
3799
                     * when entities are not substituted. They're
3800
                     * often expanded later.
3801
         */
3802
0
        if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3803
0
      (ent->content != NULL)) {
3804
0
                        if ((ent->flags & XML_ENT_CHECKED) == 0) {
3805
0
                            unsigned long oldCopy = ctxt->sizeentcopy;
3806
3807
0
                            ctxt->sizeentcopy = ent->length;
3808
3809
0
                            ++ctxt->depth;
3810
0
                            rep = xmlStringDecodeEntitiesInt(ctxt,
3811
0
                                    ent->content, ent->length,
3812
0
                                    XML_SUBSTITUTE_REF, 0, 0, 0,
3813
0
                                    /* check */ 1);
3814
0
                            --ctxt->depth;
3815
3816
                            /*
3817
                             * If we're parsing DTD content, the entity
3818
                             * might reference other entities which
3819
                             * weren't defined yet, so the check isn't
3820
                             * reliable.
3821
                             */
3822
0
                            if (ctxt->inSubset == 0) {
3823
0
                                ent->flags |= XML_ENT_CHECKED;
3824
0
                                ent->expandedSize = ctxt->sizeentcopy;
3825
0
                            }
3826
3827
0
                            if (rep != NULL) {
3828
0
                                xmlFree(rep);
3829
0
                                rep = NULL;
3830
0
                            } else {
3831
0
                                ent->content[0] = 0;
3832
0
                            }
3833
3834
0
                            if (xmlParserEntityCheck(ctxt, oldCopy))
3835
0
                                goto error;
3836
0
                        } else {
3837
0
                            if (xmlParserEntityCheck(ctxt, ent->expandedSize))
3838
0
                                goto error;
3839
0
                        }
3840
0
        }
3841
3842
        /*
3843
         * Just output the reference
3844
         */
3845
0
        buf[len++] = '&';
3846
0
        while (len + i + 10 > buf_size) {
3847
0
      growBuffer(buf, i + 10);
3848
0
        }
3849
0
        for (;i > 0;i--)
3850
0
      buf[len++] = *cur++;
3851
0
        buf[len++] = ';';
3852
0
    }
3853
19.6k
      }
3854
12.4M
  } else {
3855
12.4M
      if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
3856
1.18M
          if ((len != 0) || (!normalize)) {
3857
1.18M
        if ((!normalize) || (!in_space)) {
3858
1.18M
      COPY_BUF(l,buf,len,0x20);
3859
1.18M
      while (len + 10 > buf_size) {
3860
8.10k
          growBuffer(buf, 10);
3861
8.10k
      }
3862
1.18M
        }
3863
1.18M
        in_space = 1;
3864
1.18M
    }
3865
11.3M
      } else {
3866
11.3M
          in_space = 0;
3867
11.3M
    COPY_BUF(l,buf,len,c);
3868
11.3M
    if (len + 10 > buf_size) {
3869
76.7k
        growBuffer(buf, 10);
3870
76.7k
    }
3871
11.3M
      }
3872
12.4M
      NEXTL(l);
3873
12.4M
  }
3874
12.5M
  GROW;
3875
12.5M
  c = CUR_CHAR(l);
3876
12.5M
        if (len > maxLength) {
3877
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3878
0
                           "AttValue length too long\n");
3879
0
            goto mem_error;
3880
0
        }
3881
12.5M
    }
3882
186k
    if (ctxt->instate == XML_PARSER_EOF)
3883
33
        goto error;
3884
3885
186k
    if ((in_space) && (normalize)) {
3886
6.83k
        while ((len > 0) && (buf[len - 1] == 0x20)) len--;
3887
646
    }
3888
186k
    buf[len] = 0;
3889
186k
    if (RAW == '<') {
3890
7.14k
  xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
3891
179k
    } else if (RAW != limit) {
3892
10.7k
  if ((c != 0) && (!IS_CHAR(c))) {
3893
7.88k
      xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3894
7.88k
         "invalid character in attribute value\n");
3895
7.88k
  } else {
3896
2.85k
      xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3897
2.85k
         "AttValue: ' expected\n");
3898
2.85k
        }
3899
10.7k
    } else
3900
168k
  NEXT;
3901
3902
186k
    if (attlen != NULL) *attlen = len;
3903
186k
    return(buf);
3904
3905
0
mem_error:
3906
0
    xmlErrMemory(ctxt, NULL);
3907
36
error:
3908
36
    if (buf != NULL)
3909
36
        xmlFree(buf);
3910
36
    if (rep != NULL)
3911
0
        xmlFree(rep);
3912
36
    return(NULL);
3913
0
}
3914
3915
/**
3916
 * xmlParseAttValue:
3917
 * @ctxt:  an XML parser context
3918
 *
3919
 * DEPRECATED: Internal function, don't use.
3920
 *
3921
 * parse a value for an attribute
3922
 * Note: the parser won't do substitution of entities here, this
3923
 * will be handled later in xmlStringGetNodeList
3924
 *
3925
 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3926
 *                   "'" ([^<&'] | Reference)* "'"
3927
 *
3928
 * 3.3.3 Attribute-Value Normalization:
3929
 * Before the value of an attribute is passed to the application or
3930
 * checked for validity, the XML processor must normalize it as follows:
3931
 * - a character reference is processed by appending the referenced
3932
 *   character to the attribute value
3933
 * - an entity reference is processed by recursively processing the
3934
 *   replacement text of the entity
3935
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3936
 *   appending #x20 to the normalized value, except that only a single
3937
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
3938
 *   parsed entity or the literal entity value of an internal parsed entity
3939
 * - other characters are processed by appending them to the normalized value
3940
 * If the declared value is not CDATA, then the XML processor must further
3941
 * process the normalized attribute value by discarding any leading and
3942
 * trailing space (#x20) characters, and by replacing sequences of space
3943
 * (#x20) characters by a single space (#x20) character.
3944
 * All attributes for which no declaration has been read should be treated
3945
 * by a non-validating parser as if declared CDATA.
3946
 *
3947
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3948
 */
3949
3950
3951
xmlChar *
3952
45.6k
xmlParseAttValue(xmlParserCtxtPtr ctxt) {
3953
45.6k
    if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
3954
45.6k
    return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
3955
45.6k
}
3956
3957
/**
3958
 * xmlParseSystemLiteral:
3959
 * @ctxt:  an XML parser context
3960
 *
3961
 * DEPRECATED: Internal function, don't use.
3962
 *
3963
 * parse an XML Literal
3964
 *
3965
 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3966
 *
3967
 * Returns the SystemLiteral parsed or NULL
3968
 */
3969
3970
xmlChar *
3971
9.23k
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3972
9.23k
    xmlChar *buf = NULL;
3973
9.23k
    int len = 0;
3974
9.23k
    int size = XML_PARSER_BUFFER_SIZE;
3975
9.23k
    int cur, l;
3976
9.23k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3977
0
                    XML_MAX_TEXT_LENGTH :
3978
9.23k
                    XML_MAX_NAME_LENGTH;
3979
9.23k
    xmlChar stop;
3980
9.23k
    int state = ctxt->instate;
3981
3982
9.23k
    if (RAW == '"') {
3983
6.76k
        NEXT;
3984
6.76k
  stop = '"';
3985
6.76k
    } else if (RAW == '\'') {
3986
1.83k
        NEXT;
3987
1.83k
  stop = '\'';
3988
1.83k
    } else {
3989
634
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
3990
634
  return(NULL);
3991
634
    }
3992
3993
8.59k
    buf = (xmlChar *) xmlMallocAtomic(size);
3994
8.59k
    if (buf == NULL) {
3995
0
        xmlErrMemory(ctxt, NULL);
3996
0
  return(NULL);
3997
0
    }
3998
8.59k
    ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3999
8.59k
    cur = CUR_CHAR(l);
4000
638k
    while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4001
629k
  if (len + 5 >= size) {
4002
1.03k
      xmlChar *tmp;
4003
4004
1.03k
      size *= 2;
4005
1.03k
      tmp = (xmlChar *) xmlRealloc(buf, size);
4006
1.03k
      if (tmp == NULL) {
4007
0
          xmlFree(buf);
4008
0
    xmlErrMemory(ctxt, NULL);
4009
0
    ctxt->instate = (xmlParserInputState) state;
4010
0
    return(NULL);
4011
0
      }
4012
1.03k
      buf = tmp;
4013
1.03k
  }
4014
629k
  COPY_BUF(l,buf,len,cur);
4015
629k
        if (len > maxLength) {
4016
21
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4017
21
            xmlFree(buf);
4018
21
            ctxt->instate = (xmlParserInputState) state;
4019
21
            return(NULL);
4020
21
        }
4021
629k
  NEXTL(l);
4022
629k
  cur = CUR_CHAR(l);
4023
629k
    }
4024
8.57k
    buf[len] = 0;
4025
8.57k
    if (ctxt->instate == XML_PARSER_EOF) {
4026
8
        xmlFree(buf);
4027
8
        return(NULL);
4028
8
    }
4029
8.56k
    ctxt->instate = (xmlParserInputState) state;
4030
8.56k
    if (!IS_CHAR(cur)) {
4031
624
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4032
7.94k
    } else {
4033
7.94k
  NEXT;
4034
7.94k
    }
4035
8.56k
    return(buf);
4036
8.57k
}
4037
4038
/**
4039
 * xmlParsePubidLiteral:
4040
 * @ctxt:  an XML parser context
4041
 *
4042
 * DEPRECATED: Internal function, don't use.
4043
 *
4044
 * parse an XML public literal
4045
 *
4046
 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4047
 *
4048
 * Returns the PubidLiteral parsed or NULL.
4049
 */
4050
4051
xmlChar *
4052
6.92k
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4053
6.92k
    xmlChar *buf = NULL;
4054
6.92k
    int len = 0;
4055
6.92k
    int size = XML_PARSER_BUFFER_SIZE;
4056
6.92k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4057
0
                    XML_MAX_TEXT_LENGTH :
4058
6.92k
                    XML_MAX_NAME_LENGTH;
4059
6.92k
    xmlChar cur;
4060
6.92k
    xmlChar stop;
4061
6.92k
    xmlParserInputState oldstate = ctxt->instate;
4062
4063
6.92k
    if (RAW == '"') {
4064
6.20k
        NEXT;
4065
6.20k
  stop = '"';
4066
6.20k
    } else if (RAW == '\'') {
4067
632
        NEXT;
4068
632
  stop = '\'';
4069
632
    } else {
4070
93
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4071
93
  return(NULL);
4072
93
    }
4073
6.83k
    buf = (xmlChar *) xmlMallocAtomic(size);
4074
6.83k
    if (buf == NULL) {
4075
0
  xmlErrMemory(ctxt, NULL);
4076
0
  return(NULL);
4077
0
    }
4078
6.83k
    ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4079
6.83k
    cur = CUR;
4080
28.4k
    while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4081
21.6k
  if (len + 1 >= size) {
4082
78
      xmlChar *tmp;
4083
4084
78
      size *= 2;
4085
78
      tmp = (xmlChar *) xmlRealloc(buf, size);
4086
78
      if (tmp == NULL) {
4087
0
    xmlErrMemory(ctxt, NULL);
4088
0
    xmlFree(buf);
4089
0
    return(NULL);
4090
0
      }
4091
78
      buf = tmp;
4092
78
  }
4093
21.6k
  buf[len++] = cur;
4094
21.6k
        if (len > maxLength) {
4095
0
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4096
0
            xmlFree(buf);
4097
0
            return(NULL);
4098
0
        }
4099
21.6k
  NEXT;
4100
21.6k
  cur = CUR;
4101
21.6k
    }
4102
6.83k
    buf[len] = 0;
4103
6.83k
    if (ctxt->instate == XML_PARSER_EOF) {
4104
1
        xmlFree(buf);
4105
1
        return(NULL);
4106
1
    }
4107
6.83k
    if (cur != stop) {
4108
2.99k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4109
3.84k
    } else {
4110
3.84k
  NEXTL(1);
4111
3.84k
    }
4112
6.83k
    ctxt->instate = oldstate;
4113
6.83k
    return(buf);
4114
6.83k
}
4115
4116
static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial);
4117
4118
/*
4119
 * used for the test in the inner loop of the char data testing
4120
 */
4121
static const unsigned char test_char_data[256] = {
4122
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4123
    0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4124
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4125
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4126
    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4127
    0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4128
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4129
    0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4130
    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4131
    0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4132
    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4133
    0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4134
    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4135
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4136
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4137
    0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4138
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4139
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4140
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4141
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4142
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4143
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4144
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4145
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4146
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4147
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4148
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4149
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4150
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4151
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4152
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4153
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4154
};
4155
4156
/**
4157
 * xmlParseCharDataInternal:
4158
 * @ctxt:  an XML parser context
4159
 * @partial:  buffer may contain partial UTF-8 sequences
4160
 *
4161
 * Parse character data. Always makes progress if the first char isn't
4162
 * '<' or '&'.
4163
 *
4164
 * The right angle bracket (>) may be represented using the string "&gt;",
4165
 * and must, for compatibility, be escaped using "&gt;" or a character
4166
 * reference when it appears in the string "]]>" in content, when that
4167
 * string is not marking the end of a CDATA section.
4168
 *
4169
 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4170
 */
4171
static void
4172
598k
xmlParseCharDataInternal(xmlParserCtxtPtr ctxt, int partial) {
4173
598k
    const xmlChar *in;
4174
598k
    int nbchar = 0;
4175
598k
    int line = ctxt->input->line;
4176
598k
    int col = ctxt->input->col;
4177
598k
    int ccol;
4178
4179
598k
    GROW;
4180
    /*
4181
     * Accelerated common case where input don't need to be
4182
     * modified before passing it to the handler.
4183
     */
4184
598k
    in = ctxt->input->cur;
4185
702k
    do {
4186
862k
get_more_space:
4187
1.82M
        while (*in == 0x20) { in++; ctxt->input->col++; }
4188
862k
        if (*in == 0xA) {
4189
204k
            do {
4190
204k
                ctxt->input->line++; ctxt->input->col = 1;
4191
204k
                in++;
4192
204k
            } while (*in == 0xA);
4193
160k
            goto get_more_space;
4194
160k
        }
4195
702k
        if (*in == '<') {
4196
192k
            nbchar = in - ctxt->input->cur;
4197
192k
            if (nbchar > 0) {
4198
192k
                const xmlChar *tmp = ctxt->input->cur;
4199
192k
                ctxt->input->cur = in;
4200
4201
192k
                if ((ctxt->sax != NULL) &&
4202
192k
                    (ctxt->sax->ignorableWhitespace !=
4203
192k
                     ctxt->sax->characters)) {
4204
0
                    if (areBlanks(ctxt, tmp, nbchar, 1)) {
4205
0
                        if (ctxt->sax->ignorableWhitespace != NULL)
4206
0
                            ctxt->sax->ignorableWhitespace(ctxt->userData,
4207
0
                                                   tmp, nbchar);
4208
0
                    } else {
4209
0
                        if (ctxt->sax->characters != NULL)
4210
0
                            ctxt->sax->characters(ctxt->userData,
4211
0
                                                  tmp, nbchar);
4212
0
                        if (*ctxt->space == -1)
4213
0
                            *ctxt->space = -2;
4214
0
                    }
4215
192k
                } else if ((ctxt->sax != NULL) &&
4216
192k
                           (ctxt->sax->characters != NULL)) {
4217
192k
                    ctxt->sax->characters(ctxt->userData,
4218
192k
                                          tmp, nbchar);
4219
192k
                }
4220
192k
            }
4221
192k
            return;
4222
192k
        }
4223
4224
584k
get_more:
4225
584k
        ccol = ctxt->input->col;
4226
2.53M
        while (test_char_data[*in]) {
4227
1.95M
            in++;
4228
1.95M
            ccol++;
4229
1.95M
        }
4230
584k
        ctxt->input->col = ccol;
4231
584k
        if (*in == 0xA) {
4232
81.7k
            do {
4233
81.7k
                ctxt->input->line++; ctxt->input->col = 1;
4234
81.7k
                in++;
4235
81.7k
            } while (*in == 0xA);
4236
65.8k
            goto get_more;
4237
65.8k
        }
4238
518k
        if (*in == ']') {
4239
8.63k
            if ((in[1] == ']') && (in[2] == '>')) {
4240
399
                xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4241
399
                if (ctxt->instate != XML_PARSER_EOF)
4242
399
                    ctxt->input->cur = in + 1;
4243
399
                return;
4244
399
            }
4245
8.23k
            in++;
4246
8.23k
            ctxt->input->col++;
4247
8.23k
            goto get_more;
4248
8.63k
        }
4249
509k
        nbchar = in - ctxt->input->cur;
4250
509k
        if (nbchar > 0) {
4251
284k
            if ((ctxt->sax != NULL) &&
4252
284k
                (ctxt->sax->ignorableWhitespace !=
4253
284k
                 ctxt->sax->characters) &&
4254
284k
                (IS_BLANK_CH(*ctxt->input->cur))) {
4255
0
                const xmlChar *tmp = ctxt->input->cur;
4256
0
                ctxt->input->cur = in;
4257
4258
0
                if (areBlanks(ctxt, tmp, nbchar, 0)) {
4259
0
                    if (ctxt->sax->ignorableWhitespace != NULL)
4260
0
                        ctxt->sax->ignorableWhitespace(ctxt->userData,
4261
0
                                                       tmp, nbchar);
4262
0
                } else {
4263
0
                    if (ctxt->sax->characters != NULL)
4264
0
                        ctxt->sax->characters(ctxt->userData,
4265
0
                                              tmp, nbchar);
4266
0
                    if (*ctxt->space == -1)
4267
0
                        *ctxt->space = -2;
4268
0
                }
4269
0
                line = ctxt->input->line;
4270
0
                col = ctxt->input->col;
4271
284k
            } else if (ctxt->sax != NULL) {
4272
284k
                if (ctxt->sax->characters != NULL)
4273
284k
                    ctxt->sax->characters(ctxt->userData,
4274
284k
                                          ctxt->input->cur, nbchar);
4275
284k
                line = ctxt->input->line;
4276
284k
                col = ctxt->input->col;
4277
284k
            }
4278
284k
        }
4279
509k
        ctxt->input->cur = in;
4280
509k
        if (*in == 0xD) {
4281
125k
            in++;
4282
125k
            if (*in == 0xA) {
4283
108k
                ctxt->input->cur = in;
4284
108k
                in++;
4285
108k
                ctxt->input->line++; ctxt->input->col = 1;
4286
108k
                continue; /* while */
4287
108k
            }
4288
17.6k
            in--;
4289
17.6k
        }
4290
401k
        if (*in == '<') {
4291
189k
            return;
4292
189k
        }
4293
212k
        if (*in == '&') {
4294
33.0k
            return;
4295
33.0k
        }
4296
179k
        SHRINK;
4297
179k
        GROW;
4298
179k
        if (ctxt->instate == XML_PARSER_EOF)
4299
3
            return;
4300
179k
        in = ctxt->input->cur;
4301
287k
    } while (((*in >= 0x20) && (*in <= 0x7F)) ||
4302
287k
             (*in == 0x09) || (*in == 0x0a));
4303
183k
    ctxt->input->line = line;
4304
183k
    ctxt->input->col = col;
4305
183k
    xmlParseCharDataComplex(ctxt, partial);
4306
183k
}
4307
4308
/**
4309
 * xmlParseCharDataComplex:
4310
 * @ctxt:  an XML parser context
4311
 * @cdata:  int indicating whether we are within a CDATA section
4312
 *
4313
 * Always makes progress if the first char isn't '<' or '&'.
4314
 *
4315
 * parse a CharData section.this is the fallback function
4316
 * of xmlParseCharData() when the parsing requires handling
4317
 * of non-ASCII characters.
4318
 */
4319
static void
4320
183k
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial) {
4321
183k
    xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4322
183k
    int nbchar = 0;
4323
183k
    int cur, l;
4324
4325
183k
    cur = CUR_CHAR(l);
4326
9.69M
    while ((cur != '<') && /* checked */
4327
9.69M
           (cur != '&') &&
4328
9.69M
     (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
4329
9.50M
  if ((cur == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4330
282
      xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4331
282
  }
4332
9.50M
  COPY_BUF(l,buf,nbchar,cur);
4333
  /* move current position before possible calling of ctxt->sax->characters */
4334
9.50M
  NEXTL(l);
4335
9.50M
  if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4336
75.2k
      buf[nbchar] = 0;
4337
4338
      /*
4339
       * OK the segment is to be consumed as chars.
4340
       */
4341
75.2k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4342
29.1k
    if (areBlanks(ctxt, buf, nbchar, 0)) {
4343
0
        if (ctxt->sax->ignorableWhitespace != NULL)
4344
0
      ctxt->sax->ignorableWhitespace(ctxt->userData,
4345
0
                                     buf, nbchar);
4346
29.1k
    } else {
4347
29.1k
        if (ctxt->sax->characters != NULL)
4348
29.1k
      ctxt->sax->characters(ctxt->userData, buf, nbchar);
4349
29.1k
        if ((ctxt->sax->characters !=
4350
29.1k
             ctxt->sax->ignorableWhitespace) &&
4351
29.1k
      (*ctxt->space == -1))
4352
0
      *ctxt->space = -2;
4353
29.1k
    }
4354
29.1k
      }
4355
75.2k
      nbchar = 0;
4356
            /* something really bad happened in the SAX callback */
4357
75.2k
            if (ctxt->instate != XML_PARSER_CONTENT)
4358
0
                return;
4359
75.2k
            SHRINK;
4360
75.2k
  }
4361
9.50M
  cur = CUR_CHAR(l);
4362
9.50M
    }
4363
183k
    if (ctxt->instate == XML_PARSER_EOF)
4364
55
        return;
4365
183k
    if (nbchar != 0) {
4366
85.6k
        buf[nbchar] = 0;
4367
  /*
4368
   * OK the segment is to be consumed as chars.
4369
   */
4370
85.6k
  if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4371
50.9k
      if (areBlanks(ctxt, buf, nbchar, 0)) {
4372
0
    if (ctxt->sax->ignorableWhitespace != NULL)
4373
0
        ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4374
50.9k
      } else {
4375
50.9k
    if (ctxt->sax->characters != NULL)
4376
50.9k
        ctxt->sax->characters(ctxt->userData, buf, nbchar);
4377
50.9k
    if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4378
50.9k
        (*ctxt->space == -1))
4379
0
        *ctxt->space = -2;
4380
50.9k
      }
4381
50.9k
  }
4382
85.6k
    }
4383
    /*
4384
     * cur == 0 can mean
4385
     *
4386
     * - XML_PARSER_EOF or memory error. This is checked above.
4387
     * - An actual 0 character.
4388
     * - End of buffer.
4389
     * - An incomplete UTF-8 sequence. This is allowed if partial is set.
4390
     */
4391
183k
    if (ctxt->input->cur < ctxt->input->end) {
4392
180k
        if ((cur == 0) && (CUR != 0)) {
4393
83
            if (partial == 0) {
4394
83
                xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4395
83
                        "Incomplete UTF-8 sequence starting with %02X\n", CUR);
4396
83
                NEXTL(1);
4397
83
            }
4398
180k
        } else if ((cur != '<') && (cur != '&')) {
4399
            /* Generate the error and skip the offending character */
4400
102k
            xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4401
102k
                              "PCDATA invalid Char value %d\n", cur);
4402
102k
            NEXTL(l);
4403
102k
        }
4404
180k
    }
4405
183k
}
4406
4407
/**
4408
 * xmlParseCharData:
4409
 * @ctxt:  an XML parser context
4410
 * @cdata:  unused
4411
 *
4412
 * DEPRECATED: Internal function, don't use.
4413
 */
4414
void
4415
0
xmlParseCharData(xmlParserCtxtPtr ctxt, ATTRIBUTE_UNUSED int cdata) {
4416
0
    xmlParseCharDataInternal(ctxt, 0);
4417
0
}
4418
4419
/**
4420
 * xmlParseExternalID:
4421
 * @ctxt:  an XML parser context
4422
 * @publicID:  a xmlChar** receiving PubidLiteral
4423
 * @strict: indicate whether we should restrict parsing to only
4424
 *          production [75], see NOTE below
4425
 *
4426
 * DEPRECATED: Internal function, don't use.
4427
 *
4428
 * Parse an External ID or a Public ID
4429
 *
4430
 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4431
 *       'PUBLIC' S PubidLiteral S SystemLiteral
4432
 *
4433
 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4434
 *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
4435
 *
4436
 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4437
 *
4438
 * Returns the function returns SystemLiteral and in the second
4439
 *                case publicID receives PubidLiteral, is strict is off
4440
 *                it is possible to return NULL and have publicID set.
4441
 */
4442
4443
xmlChar *
4444
20.8k
xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4445
20.8k
    xmlChar *URI = NULL;
4446
4447
20.8k
    *publicID = NULL;
4448
20.8k
    if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4449
3.21k
        SKIP(6);
4450
3.21k
  if (SKIP_BLANKS == 0) {
4451
143
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4452
143
                     "Space required after 'SYSTEM'\n");
4453
143
  }
4454
3.21k
  URI = xmlParseSystemLiteral(ctxt);
4455
3.21k
  if (URI == NULL) {
4456
299
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4457
299
        }
4458
17.5k
    } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4459
6.92k
        SKIP(6);
4460
6.92k
  if (SKIP_BLANKS == 0) {
4461
1.20k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4462
1.20k
        "Space required after 'PUBLIC'\n");
4463
1.20k
  }
4464
6.92k
  *publicID = xmlParsePubidLiteral(ctxt);
4465
6.92k
  if (*publicID == NULL) {
4466
94
      xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4467
94
  }
4468
6.92k
  if (strict) {
4469
      /*
4470
       * We don't handle [83] so "S SystemLiteral" is required.
4471
       */
4472
4.35k
      if (SKIP_BLANKS == 0) {
4473
927
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4474
927
      "Space required after the Public Identifier\n");
4475
927
      }
4476
4.35k
  } else {
4477
      /*
4478
       * We handle [83] so we return immediately, if
4479
       * "S SystemLiteral" is not detected. We skip blanks if no
4480
             * system literal was found, but this is harmless since we must
4481
             * be at the end of a NotationDecl.
4482
       */
4483
2.57k
      if (SKIP_BLANKS == 0) return(NULL);
4484
2.02k
      if ((CUR != '\'') && (CUR != '"')) return(NULL);
4485
2.02k
  }
4486
6.02k
  URI = xmlParseSystemLiteral(ctxt);
4487
6.02k
  if (URI == NULL) {
4488
364
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4489
364
        }
4490
6.02k
    }
4491
19.9k
    return(URI);
4492
20.8k
}
4493
4494
/**
4495
 * xmlParseCommentComplex:
4496
 * @ctxt:  an XML parser context
4497
 * @buf:  the already parsed part of the buffer
4498
 * @len:  number of bytes in the buffer
4499
 * @size:  allocated size of the buffer
4500
 *
4501
 * Skip an XML (SGML) comment <!-- .... -->
4502
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4503
 *  must not occur within comments. "
4504
 * This is the slow routine in case the accelerator for ascii didn't work
4505
 *
4506
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4507
 */
4508
static void
4509
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4510
12.4k
                       size_t len, size_t size) {
4511
12.4k
    int q, ql;
4512
12.4k
    int r, rl;
4513
12.4k
    int cur, l;
4514
12.4k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4515
0
                       XML_MAX_HUGE_LENGTH :
4516
12.4k
                       XML_MAX_TEXT_LENGTH;
4517
12.4k
    int inputid;
4518
4519
12.4k
    inputid = ctxt->input->id;
4520
4521
12.4k
    if (buf == NULL) {
4522
2.65k
        len = 0;
4523
2.65k
  size = XML_PARSER_BUFFER_SIZE;
4524
2.65k
  buf = (xmlChar *) xmlMallocAtomic(size);
4525
2.65k
  if (buf == NULL) {
4526
0
      xmlErrMemory(ctxt, NULL);
4527
0
      return;
4528
0
  }
4529
2.65k
    }
4530
12.4k
    q = CUR_CHAR(ql);
4531
12.4k
    if (q == 0)
4532
944
        goto not_terminated;
4533
11.5k
    if (!IS_CHAR(q)) {
4534
782
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4535
782
                          "xmlParseComment: invalid xmlChar value %d\n",
4536
782
                    q);
4537
782
  xmlFree (buf);
4538
782
  return;
4539
782
    }
4540
10.7k
    NEXTL(ql);
4541
10.7k
    r = CUR_CHAR(rl);
4542
10.7k
    if (r == 0)
4543
395
        goto not_terminated;
4544
10.3k
    if (!IS_CHAR(r)) {
4545
759
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4546
759
                          "xmlParseComment: invalid xmlChar value %d\n",
4547
759
                    r);
4548
759
  xmlFree (buf);
4549
759
  return;
4550
759
    }
4551
9.58k
    NEXTL(rl);
4552
9.58k
    cur = CUR_CHAR(l);
4553
9.58k
    if (cur == 0)
4554
352
        goto not_terminated;
4555
553k
    while (IS_CHAR(cur) && /* checked */
4556
553k
           ((cur != '>') ||
4557
550k
      (r != '-') || (q != '-'))) {
4558
543k
  if ((r == '-') && (q == '-')) {
4559
3.37k
      xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4560
3.37k
  }
4561
543k
  if (len + 5 >= size) {
4562
1.76k
      xmlChar *new_buf;
4563
1.76k
            size_t new_size;
4564
4565
1.76k
      new_size = size * 2;
4566
1.76k
      new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4567
1.76k
      if (new_buf == NULL) {
4568
0
    xmlFree (buf);
4569
0
    xmlErrMemory(ctxt, NULL);
4570
0
    return;
4571
0
      }
4572
1.76k
      buf = new_buf;
4573
1.76k
            size = new_size;
4574
1.76k
  }
4575
543k
  COPY_BUF(ql,buf,len,q);
4576
543k
        if (len > maxLength) {
4577
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4578
0
                         "Comment too big found", NULL);
4579
0
            xmlFree (buf);
4580
0
            return;
4581
0
        }
4582
4583
543k
  q = r;
4584
543k
  ql = rl;
4585
543k
  r = cur;
4586
543k
  rl = l;
4587
4588
543k
  NEXTL(l);
4589
543k
  cur = CUR_CHAR(l);
4590
4591
543k
    }
4592
9.23k
    buf[len] = 0;
4593
9.23k
    if (ctxt->instate == XML_PARSER_EOF) {
4594
5
        xmlFree(buf);
4595
5
        return;
4596
5
    }
4597
9.22k
    if (cur == 0) {
4598
2.02k
  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4599
2.02k
                       "Comment not terminated \n<!--%.50s\n", buf);
4600
7.20k
    } else if (!IS_CHAR(cur)) {
4601
1.04k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4602
1.04k
                          "xmlParseComment: invalid xmlChar value %d\n",
4603
1.04k
                    cur);
4604
6.16k
    } else {
4605
6.16k
  if (inputid != ctxt->input->id) {
4606
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4607
0
               "Comment doesn't start and stop in the same"
4608
0
                           " entity\n");
4609
0
  }
4610
6.16k
        NEXT;
4611
6.16k
  if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4612
6.16k
      (!ctxt->disableSAX))
4613
1.32k
      ctxt->sax->comment(ctxt->userData, buf);
4614
6.16k
    }
4615
9.22k
    xmlFree(buf);
4616
9.22k
    return;
4617
1.69k
not_terminated:
4618
1.69k
    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4619
1.69k
       "Comment not terminated\n", NULL);
4620
1.69k
    xmlFree(buf);
4621
1.69k
    return;
4622
9.23k
}
4623
4624
/**
4625
 * xmlParseComment:
4626
 * @ctxt:  an XML parser context
4627
 *
4628
 * DEPRECATED: Internal function, don't use.
4629
 *
4630
 * Parse an XML (SGML) comment. Always consumes '<!'.
4631
 *
4632
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4633
 *  must not occur within comments. "
4634
 *
4635
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4636
 */
4637
void
4638
19.8k
xmlParseComment(xmlParserCtxtPtr ctxt) {
4639
19.8k
    xmlChar *buf = NULL;
4640
19.8k
    size_t size = XML_PARSER_BUFFER_SIZE;
4641
19.8k
    size_t len = 0;
4642
19.8k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4643
0
                       XML_MAX_HUGE_LENGTH :
4644
19.8k
                       XML_MAX_TEXT_LENGTH;
4645
19.8k
    xmlParserInputState state;
4646
19.8k
    const xmlChar *in;
4647
19.8k
    size_t nbchar = 0;
4648
19.8k
    int ccol;
4649
19.8k
    int inputid;
4650
4651
    /*
4652
     * Check that there is a comment right here.
4653
     */
4654
19.8k
    if ((RAW != '<') || (NXT(1) != '!'))
4655
0
        return;
4656
19.8k
    SKIP(2);
4657
19.8k
    if ((RAW != '-') || (NXT(1) != '-'))
4658
3
        return;
4659
19.8k
    state = ctxt->instate;
4660
19.8k
    ctxt->instate = XML_PARSER_COMMENT;
4661
19.8k
    inputid = ctxt->input->id;
4662
19.8k
    SKIP(2);
4663
19.8k
    GROW;
4664
4665
    /*
4666
     * Accelerated common case where input don't need to be
4667
     * modified before passing it to the handler.
4668
     */
4669
19.8k
    in = ctxt->input->cur;
4670
19.9k
    do {
4671
19.9k
  if (*in == 0xA) {
4672
1.14k
      do {
4673
1.14k
    ctxt->input->line++; ctxt->input->col = 1;
4674
1.14k
    in++;
4675
1.14k
      } while (*in == 0xA);
4676
918
  }
4677
50.0k
get_more:
4678
50.0k
        ccol = ctxt->input->col;
4679
420k
  while (((*in > '-') && (*in <= 0x7F)) ||
4680
420k
         ((*in >= 0x20) && (*in < '-')) ||
4681
420k
         (*in == 0x09)) {
4682
370k
        in++;
4683
370k
        ccol++;
4684
370k
  }
4685
50.0k
  ctxt->input->col = ccol;
4686
50.0k
  if (*in == 0xA) {
4687
6.76k
      do {
4688
6.76k
    ctxt->input->line++; ctxt->input->col = 1;
4689
6.76k
    in++;
4690
6.76k
      } while (*in == 0xA);
4691
5.49k
      goto get_more;
4692
5.49k
  }
4693
44.6k
  nbchar = in - ctxt->input->cur;
4694
  /*
4695
   * save current set of data
4696
   */
4697
44.6k
  if (nbchar > 0) {
4698
39.5k
      if ((ctxt->sax != NULL) &&
4699
39.5k
    (ctxt->sax->comment != NULL)) {
4700
39.5k
    if (buf == NULL) {
4701
16.5k
        if ((*in == '-') && (in[1] == '-'))
4702
6.82k
            size = nbchar + 1;
4703
9.68k
        else
4704
9.68k
            size = XML_PARSER_BUFFER_SIZE + nbchar;
4705
16.5k
        buf = (xmlChar *) xmlMallocAtomic(size);
4706
16.5k
        if (buf == NULL) {
4707
0
            xmlErrMemory(ctxt, NULL);
4708
0
      ctxt->instate = state;
4709
0
      return;
4710
0
        }
4711
16.5k
        len = 0;
4712
23.0k
    } else if (len + nbchar + 1 >= size) {
4713
1.10k
        xmlChar *new_buf;
4714
1.10k
        size  += len + nbchar + XML_PARSER_BUFFER_SIZE;
4715
1.10k
        new_buf = (xmlChar *) xmlRealloc(buf, size);
4716
1.10k
        if (new_buf == NULL) {
4717
0
            xmlFree (buf);
4718
0
      xmlErrMemory(ctxt, NULL);
4719
0
      ctxt->instate = state;
4720
0
      return;
4721
0
        }
4722
1.10k
        buf = new_buf;
4723
1.10k
    }
4724
39.5k
    memcpy(&buf[len], ctxt->input->cur, nbchar);
4725
39.5k
    len += nbchar;
4726
39.5k
    buf[len] = 0;
4727
39.5k
      }
4728
39.5k
  }
4729
44.6k
        if (len > maxLength) {
4730
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4731
0
                         "Comment too big found", NULL);
4732
0
            xmlFree (buf);
4733
0
            return;
4734
0
        }
4735
44.6k
  ctxt->input->cur = in;
4736
44.6k
  if (*in == 0xA) {
4737
0
      in++;
4738
0
      ctxt->input->line++; ctxt->input->col = 1;
4739
0
  }
4740
44.6k
  if (*in == 0xD) {
4741
3.06k
      in++;
4742
3.06k
      if (*in == 0xA) {
4743
1.84k
    ctxt->input->cur = in;
4744
1.84k
    in++;
4745
1.84k
    ctxt->input->line++; ctxt->input->col = 1;
4746
1.84k
    goto get_more;
4747
1.84k
      }
4748
1.22k
      in--;
4749
1.22k
  }
4750
42.7k
  SHRINK;
4751
42.7k
  GROW;
4752
42.7k
        if (ctxt->instate == XML_PARSER_EOF) {
4753
6
            xmlFree(buf);
4754
6
            return;
4755
6
        }
4756
42.7k
  in = ctxt->input->cur;
4757
42.7k
  if (*in == '-') {
4758
30.1k
      if (in[1] == '-') {
4759
19.8k
          if (in[2] == '>') {
4760
7.34k
        if (ctxt->input->id != inputid) {
4761
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4762
0
                     "comment doesn't start and stop in the"
4763
0
                                       " same entity\n");
4764
0
        }
4765
7.34k
        SKIP(3);
4766
7.34k
        if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4767
7.34k
            (!ctxt->disableSAX)) {
4768
3.20k
      if (buf != NULL)
4769
3.02k
          ctxt->sax->comment(ctxt->userData, buf);
4770
177
      else
4771
177
          ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4772
3.20k
        }
4773
7.34k
        if (buf != NULL)
4774
6.69k
            xmlFree(buf);
4775
7.34k
        if (ctxt->instate != XML_PARSER_EOF)
4776
7.34k
      ctxt->instate = state;
4777
7.34k
        return;
4778
7.34k
    }
4779
12.4k
    if (buf != NULL) {
4780
11.6k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
4781
11.6k
                          "Double hyphen within comment: "
4782
11.6k
                                      "<!--%.50s\n",
4783
11.6k
              buf);
4784
11.6k
    } else
4785
818
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
4786
818
                          "Double hyphen within comment\n", NULL);
4787
12.4k
                if (ctxt->instate == XML_PARSER_EOF) {
4788
0
                    xmlFree(buf);
4789
0
                    return;
4790
0
                }
4791
12.4k
    in++;
4792
12.4k
    ctxt->input->col++;
4793
12.4k
      }
4794
22.7k
      in++;
4795
22.7k
      ctxt->input->col++;
4796
22.7k
      goto get_more;
4797
30.1k
  }
4798
42.7k
    } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
4799
12.4k
    xmlParseCommentComplex(ctxt, buf, len, size);
4800
12.4k
    ctxt->instate = state;
4801
12.4k
    return;
4802
19.8k
}
4803
4804
4805
/**
4806
 * xmlParsePITarget:
4807
 * @ctxt:  an XML parser context
4808
 *
4809
 * DEPRECATED: Internal function, don't use.
4810
 *
4811
 * parse the name of a PI
4812
 *
4813
 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4814
 *
4815
 * Returns the PITarget name or NULL
4816
 */
4817
4818
const xmlChar *
4819
26.3k
xmlParsePITarget(xmlParserCtxtPtr ctxt) {
4820
26.3k
    const xmlChar *name;
4821
4822
26.3k
    name = xmlParseName(ctxt);
4823
26.3k
    if ((name != NULL) &&
4824
26.3k
        ((name[0] == 'x') || (name[0] == 'X')) &&
4825
26.3k
        ((name[1] == 'm') || (name[1] == 'M')) &&
4826
26.3k
        ((name[2] == 'l') || (name[2] == 'L'))) {
4827
3.74k
  int i;
4828
3.74k
  if ((name[0] == 'x') && (name[1] == 'm') &&
4829
3.74k
      (name[2] == 'l') && (name[3] == 0)) {
4830
1.73k
      xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4831
1.73k
     "XML declaration allowed only at the start of the document\n");
4832
1.73k
      return(name);
4833
2.01k
  } else if (name[3] == 0) {
4834
498
      xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
4835
498
      return(name);
4836
498
  }
4837
4.43k
  for (i = 0;;i++) {
4838
4.43k
      if (xmlW3CPIs[i] == NULL) break;
4839
3.02k
      if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4840
96
          return(name);
4841
3.02k
  }
4842
1.41k
  xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4843
1.41k
          "xmlParsePITarget: invalid name prefix 'xml'\n",
4844
1.41k
          NULL, NULL);
4845
1.41k
    }
4846
24.0k
    if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
4847
1.53k
  xmlNsErr(ctxt, XML_NS_ERR_COLON,
4848
1.53k
     "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
4849
1.53k
    }
4850
24.0k
    return(name);
4851
26.3k
}
4852
4853
#ifdef LIBXML_CATALOG_ENABLED
4854
/**
4855
 * xmlParseCatalogPI:
4856
 * @ctxt:  an XML parser context
4857
 * @catalog:  the PI value string
4858
 *
4859
 * parse an XML Catalog Processing Instruction.
4860
 *
4861
 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4862
 *
4863
 * Occurs only if allowed by the user and if happening in the Misc
4864
 * part of the document before any doctype information
4865
 * This will add the given catalog to the parsing context in order
4866
 * to be used if there is a resolution need further down in the document
4867
 */
4868
4869
static void
4870
3.00k
xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4871
3.00k
    xmlChar *URL = NULL;
4872
3.00k
    const xmlChar *tmp, *base;
4873
3.00k
    xmlChar marker;
4874
4875
3.00k
    tmp = catalog;
4876
3.00k
    while (IS_BLANK_CH(*tmp)) tmp++;
4877
3.00k
    if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4878
448
  goto error;
4879
2.55k
    tmp += 7;
4880
4.34k
    while (IS_BLANK_CH(*tmp)) tmp++;
4881
2.55k
    if (*tmp != '=') {
4882
735
  return;
4883
735
    }
4884
1.82k
    tmp++;
4885
1.82k
    while (IS_BLANK_CH(*tmp)) tmp++;
4886
1.82k
    marker = *tmp;
4887
1.82k
    if ((marker != '\'') && (marker != '"'))
4888
411
  goto error;
4889
1.40k
    tmp++;
4890
1.40k
    base = tmp;
4891
15.0k
    while ((*tmp != 0) && (*tmp != marker)) tmp++;
4892
1.40k
    if (*tmp == 0)
4893
205
  goto error;
4894
1.20k
    URL = xmlStrndup(base, tmp - base);
4895
1.20k
    tmp++;
4896
1.20k
    while (IS_BLANK_CH(*tmp)) tmp++;
4897
1.20k
    if (*tmp != 0)
4898
823
  goto error;
4899
4900
381
    if (URL != NULL) {
4901
381
  ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4902
381
  xmlFree(URL);
4903
381
    }
4904
381
    return;
4905
4906
1.88k
error:
4907
1.88k
    xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4908
1.88k
            "Catalog PI syntax error: %s\n",
4909
1.88k
      catalog, NULL);
4910
1.88k
    if (URL != NULL)
4911
823
  xmlFree(URL);
4912
1.88k
}
4913
#endif
4914
4915
/**
4916
 * xmlParsePI:
4917
 * @ctxt:  an XML parser context
4918
 *
4919
 * DEPRECATED: Internal function, don't use.
4920
 *
4921
 * parse an XML Processing Instruction.
4922
 *
4923
 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4924
 *
4925
 * The processing is transferred to SAX once parsed.
4926
 */
4927
4928
void
4929
26.3k
xmlParsePI(xmlParserCtxtPtr ctxt) {
4930
26.3k
    xmlChar *buf = NULL;
4931
26.3k
    size_t len = 0;
4932
26.3k
    size_t size = XML_PARSER_BUFFER_SIZE;
4933
26.3k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4934
0
                       XML_MAX_HUGE_LENGTH :
4935
26.3k
                       XML_MAX_TEXT_LENGTH;
4936
26.3k
    int cur, l;
4937
26.3k
    const xmlChar *target;
4938
26.3k
    xmlParserInputState state;
4939
4940
26.3k
    if ((RAW == '<') && (NXT(1) == '?')) {
4941
26.3k
  int inputid = ctxt->input->id;
4942
26.3k
  state = ctxt->instate;
4943
26.3k
        ctxt->instate = XML_PARSER_PI;
4944
  /*
4945
   * this is a Processing Instruction.
4946
   */
4947
26.3k
  SKIP(2);
4948
4949
  /*
4950
   * Parse the target name and check for special support like
4951
   * namespace.
4952
   */
4953
26.3k
        target = xmlParsePITarget(ctxt);
4954
26.3k
  if (target != NULL) {
4955
24.5k
      if ((RAW == '?') && (NXT(1) == '>')) {
4956
5.35k
    if (inputid != ctxt->input->id) {
4957
0
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4958
0
                             "PI declaration doesn't start and stop in"
4959
0
                                   " the same entity\n");
4960
0
    }
4961
5.35k
    SKIP(2);
4962
4963
    /*
4964
     * SAX: PI detected.
4965
     */
4966
5.35k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
4967
5.35k
        (ctxt->sax->processingInstruction != NULL))
4968
3.92k
        ctxt->sax->processingInstruction(ctxt->userData,
4969
3.92k
                                         target, NULL);
4970
5.35k
    if (ctxt->instate != XML_PARSER_EOF)
4971
5.35k
        ctxt->instate = state;
4972
5.35k
    return;
4973
5.35k
      }
4974
19.1k
      buf = (xmlChar *) xmlMallocAtomic(size);
4975
19.1k
      if (buf == NULL) {
4976
0
    xmlErrMemory(ctxt, NULL);
4977
0
    ctxt->instate = state;
4978
0
    return;
4979
0
      }
4980
19.1k
      if (SKIP_BLANKS == 0) {
4981
9.87k
    xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4982
9.87k
        "ParsePI: PI %s space expected\n", target);
4983
9.87k
      }
4984
19.1k
      cur = CUR_CHAR(l);
4985
4.55M
      while (IS_CHAR(cur) && /* checked */
4986
4.55M
       ((cur != '?') || (NXT(1) != '>'))) {
4987
4.53M
    if (len + 5 >= size) {
4988
9.29k
        xmlChar *tmp;
4989
9.29k
                    size_t new_size = size * 2;
4990
9.29k
        tmp = (xmlChar *) xmlRealloc(buf, new_size);
4991
9.29k
        if (tmp == NULL) {
4992
0
      xmlErrMemory(ctxt, NULL);
4993
0
      xmlFree(buf);
4994
0
      ctxt->instate = state;
4995
0
      return;
4996
0
        }
4997
9.29k
        buf = tmp;
4998
9.29k
                    size = new_size;
4999
9.29k
    }
5000
4.53M
    COPY_BUF(l,buf,len,cur);
5001
4.53M
                if (len > maxLength) {
5002
0
                    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5003
0
                                      "PI %s too big found", target);
5004
0
                    xmlFree(buf);
5005
0
                    ctxt->instate = state;
5006
0
                    return;
5007
0
                }
5008
4.53M
    NEXTL(l);
5009
4.53M
    cur = CUR_CHAR(l);
5010
4.53M
      }
5011
19.1k
      buf[len] = 0;
5012
19.1k
            if (ctxt->instate == XML_PARSER_EOF) {
5013
6
                xmlFree(buf);
5014
6
                return;
5015
6
            }
5016
19.1k
      if (cur != '?') {
5017
8.33k
    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5018
8.33k
          "ParsePI: PI %s never end ...\n", target);
5019
10.8k
      } else {
5020
10.8k
    if (inputid != ctxt->input->id) {
5021
154
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5022
154
                             "PI declaration doesn't start and stop in"
5023
154
                                   " the same entity\n");
5024
154
    }
5025
10.8k
    SKIP(2);
5026
5027
10.8k
#ifdef LIBXML_CATALOG_ENABLED
5028
10.8k
    if (((state == XML_PARSER_MISC) ||
5029
10.8k
               (state == XML_PARSER_START)) &&
5030
10.8k
        (xmlStrEqual(target, XML_CATALOG_PI))) {
5031
3.00k
        xmlCatalogAllow allow = xmlCatalogGetDefaults();
5032
3.00k
        if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5033
3.00k
      (allow == XML_CATA_ALLOW_ALL))
5034
3.00k
      xmlParseCatalogPI(ctxt, buf);
5035
3.00k
    }
5036
10.8k
#endif
5037
5038
5039
    /*
5040
     * SAX: PI detected.
5041
     */
5042
10.8k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5043
10.8k
        (ctxt->sax->processingInstruction != NULL))
5044
2.20k
        ctxt->sax->processingInstruction(ctxt->userData,
5045
2.20k
                                         target, buf);
5046
10.8k
      }
5047
19.1k
      xmlFree(buf);
5048
19.1k
  } else {
5049
1.80k
      xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5050
1.80k
  }
5051
20.9k
  if (ctxt->instate != XML_PARSER_EOF)
5052
20.9k
      ctxt->instate = state;
5053
20.9k
    }
5054
26.3k
}
5055
5056
/**
5057
 * xmlParseNotationDecl:
5058
 * @ctxt:  an XML parser context
5059
 *
5060
 * DEPRECATED: Internal function, don't use.
5061
 *
5062
 * Parse a notation declaration. Always consumes '<!'.
5063
 *
5064
 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>'
5065
 *
5066
 * Hence there is actually 3 choices:
5067
 *     'PUBLIC' S PubidLiteral
5068
 *     'PUBLIC' S PubidLiteral S SystemLiteral
5069
 * and 'SYSTEM' S SystemLiteral
5070
 *
5071
 * See the NOTE on xmlParseExternalID().
5072
 */
5073
5074
void
5075
6.02k
xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5076
6.02k
    const xmlChar *name;
5077
6.02k
    xmlChar *Pubid;
5078
6.02k
    xmlChar *Systemid;
5079
5080
6.02k
    if ((CUR != '<') || (NXT(1) != '!'))
5081
0
        return;
5082
6.02k
    SKIP(2);
5083
5084
6.02k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5085
5.99k
  int inputid = ctxt->input->id;
5086
5.99k
  SKIP(8);
5087
5.99k
  if (SKIP_BLANKS == 0) {
5088
788
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5089
788
         "Space required after '<!NOTATION'\n");
5090
788
      return;
5091
788
  }
5092
5093
5.20k
        name = xmlParseName(ctxt);
5094
5.20k
  if (name == NULL) {
5095
811
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5096
811
      return;
5097
811
  }
5098
4.39k
  if (xmlStrchr(name, ':') != NULL) {
5099
226
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5100
226
         "colons are forbidden from notation names '%s'\n",
5101
226
         name, NULL, NULL);
5102
226
  }
5103
4.39k
  if (SKIP_BLANKS == 0) {
5104
310
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5105
310
         "Space required after the NOTATION name'\n");
5106
310
      return;
5107
310
  }
5108
5109
  /*
5110
   * Parse the IDs.
5111
   */
5112
4.08k
  Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5113
4.08k
  SKIP_BLANKS;
5114
5115
4.08k
  if (RAW == '>') {
5116
1.68k
      if (inputid != ctxt->input->id) {
5117
963
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5118
963
                         "Notation declaration doesn't start and stop"
5119
963
                               " in the same entity\n");
5120
963
      }
5121
1.68k
      NEXT;
5122
1.68k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5123
1.68k
    (ctxt->sax->notationDecl != NULL))
5124
151
    ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5125
2.40k
  } else {
5126
2.40k
      xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5127
2.40k
  }
5128
4.08k
  if (Systemid != NULL) xmlFree(Systemid);
5129
4.08k
  if (Pubid != NULL) xmlFree(Pubid);
5130
4.08k
    }
5131
6.02k
}
5132
5133
/**
5134
 * xmlParseEntityDecl:
5135
 * @ctxt:  an XML parser context
5136
 *
5137
 * DEPRECATED: Internal function, don't use.
5138
 *
5139
 * Parse an entity declaration. Always consumes '<!'.
5140
 *
5141
 * [70] EntityDecl ::= GEDecl | PEDecl
5142
 *
5143
 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5144
 *
5145
 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5146
 *
5147
 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5148
 *
5149
 * [74] PEDef ::= EntityValue | ExternalID
5150
 *
5151
 * [76] NDataDecl ::= S 'NDATA' S Name
5152
 *
5153
 * [ VC: Notation Declared ]
5154
 * The Name must match the declared name of a notation.
5155
 */
5156
5157
void
5158
20.1k
xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5159
20.1k
    const xmlChar *name = NULL;
5160
20.1k
    xmlChar *value = NULL;
5161
20.1k
    xmlChar *URI = NULL, *literal = NULL;
5162
20.1k
    const xmlChar *ndata = NULL;
5163
20.1k
    int isParameter = 0;
5164
20.1k
    xmlChar *orig = NULL;
5165
5166
20.1k
    if ((CUR != '<') || (NXT(1) != '!'))
5167
0
        return;
5168
20.1k
    SKIP(2);
5169
5170
    /* GROW; done in the caller */
5171
20.1k
    if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5172
20.1k
  int inputid = ctxt->input->id;
5173
20.1k
  SKIP(6);
5174
20.1k
  if (SKIP_BLANKS == 0) {
5175
5.82k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5176
5.82k
         "Space required after '<!ENTITY'\n");
5177
5.82k
  }
5178
5179
20.1k
  if (RAW == '%') {
5180
5.90k
      NEXT;
5181
5.90k
      if (SKIP_BLANKS == 0) {
5182
118
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5183
118
             "Space required after '%%'\n");
5184
118
      }
5185
5.90k
      isParameter = 1;
5186
5.90k
  }
5187
5188
20.1k
        name = xmlParseName(ctxt);
5189
20.1k
  if (name == NULL) {
5190
532
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5191
532
                     "xmlParseEntityDecl: no name\n");
5192
532
            return;
5193
532
  }
5194
19.5k
  if (xmlStrchr(name, ':') != NULL) {
5195
596
      xmlNsErr(ctxt, XML_NS_ERR_COLON,
5196
596
         "colons are forbidden from entities names '%s'\n",
5197
596
         name, NULL, NULL);
5198
596
  }
5199
19.5k
  if (SKIP_BLANKS == 0) {
5200
4.88k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5201
4.88k
         "Space required after the entity name\n");
5202
4.88k
  }
5203
5204
19.5k
  ctxt->instate = XML_PARSER_ENTITY_DECL;
5205
  /*
5206
   * handle the various case of definitions...
5207
   */
5208
19.5k
  if (isParameter) {
5209
5.47k
      if ((RAW == '"') || (RAW == '\'')) {
5210
2.42k
          value = xmlParseEntityValue(ctxt, &orig);
5211
2.42k
    if (value) {
5212
1.77k
        if ((ctxt->sax != NULL) &&
5213
1.77k
      (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5214
1.01k
      ctxt->sax->entityDecl(ctxt->userData, name,
5215
1.01k
                        XML_INTERNAL_PARAMETER_ENTITY,
5216
1.01k
            NULL, NULL, value);
5217
1.77k
    }
5218
3.05k
      } else {
5219
3.05k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5220
3.05k
    if ((URI == NULL) && (literal == NULL)) {
5221
102
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5222
102
    }
5223
3.05k
    if (URI) {
5224
2.73k
        xmlURIPtr uri;
5225
5226
2.73k
        uri = xmlParseURI((const char *) URI);
5227
2.73k
        if (uri == NULL) {
5228
450
            xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5229
450
             "Invalid URI: %s\n", URI);
5230
      /*
5231
       * This really ought to be a well formedness error
5232
       * but the XML Core WG decided otherwise c.f. issue
5233
       * E26 of the XML erratas.
5234
       */
5235
2.28k
        } else {
5236
2.28k
      if (uri->fragment != NULL) {
5237
          /*
5238
           * Okay this is foolish to block those but not
5239
           * invalid URIs.
5240
           */
5241
81
          xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5242
2.20k
      } else {
5243
2.20k
          if ((ctxt->sax != NULL) &&
5244
2.20k
        (!ctxt->disableSAX) &&
5245
2.20k
        (ctxt->sax->entityDecl != NULL))
5246
2.08k
        ctxt->sax->entityDecl(ctxt->userData, name,
5247
2.08k
              XML_EXTERNAL_PARAMETER_ENTITY,
5248
2.08k
              literal, URI, NULL);
5249
2.20k
      }
5250
2.28k
      xmlFreeURI(uri);
5251
2.28k
        }
5252
2.73k
    }
5253
3.05k
      }
5254
14.1k
  } else {
5255
14.1k
      if ((RAW == '"') || (RAW == '\'')) {
5256
8.94k
          value = xmlParseEntityValue(ctxt, &orig);
5257
8.94k
    if ((ctxt->sax != NULL) &&
5258
8.94k
        (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5259
2.44k
        ctxt->sax->entityDecl(ctxt->userData, name,
5260
2.44k
        XML_INTERNAL_GENERAL_ENTITY,
5261
2.44k
        NULL, NULL, value);
5262
    /*
5263
     * For expat compatibility in SAX mode.
5264
     */
5265
8.94k
    if ((ctxt->myDoc == NULL) ||
5266
8.94k
        (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5267
3.12k
        if (ctxt->myDoc == NULL) {
5268
379
      ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5269
379
      if (ctxt->myDoc == NULL) {
5270
0
          xmlErrMemory(ctxt, "New Doc failed");
5271
0
          goto done;
5272
0
      }
5273
379
      ctxt->myDoc->properties = XML_DOC_INTERNAL;
5274
379
        }
5275
3.12k
        if (ctxt->myDoc->intSubset == NULL)
5276
379
      ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5277
379
              BAD_CAST "fake", NULL, NULL);
5278
5279
3.12k
        xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5280
3.12k
                    NULL, NULL, value);
5281
3.12k
    }
5282
8.94k
      } else {
5283
5.16k
          URI = xmlParseExternalID(ctxt, &literal, 1);
5284
5.16k
    if ((URI == NULL) && (literal == NULL)) {
5285
1.40k
        xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5286
1.40k
    }
5287
5.16k
    if (URI) {
5288
3.66k
        xmlURIPtr uri;
5289
5290
3.66k
        uri = xmlParseURI((const char *)URI);
5291
3.66k
        if (uri == NULL) {
5292
841
            xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5293
841
             "Invalid URI: %s\n", URI);
5294
      /*
5295
       * This really ought to be a well formedness error
5296
       * but the XML Core WG decided otherwise c.f. issue
5297
       * E26 of the XML erratas.
5298
       */
5299
2.81k
        } else {
5300
2.81k
      if (uri->fragment != NULL) {
5301
          /*
5302
           * Okay this is foolish to block those but not
5303
           * invalid URIs.
5304
           */
5305
297
          xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5306
297
      }
5307
2.81k
      xmlFreeURI(uri);
5308
2.81k
        }
5309
3.66k
    }
5310
5.16k
    if ((RAW != '>') && (SKIP_BLANKS == 0)) {
5311
805
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5312
805
           "Space required before 'NDATA'\n");
5313
805
    }
5314
5.16k
    if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5315
574
        SKIP(5);
5316
574
        if (SKIP_BLANKS == 0) {
5317
195
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5318
195
               "Space required after 'NDATA'\n");
5319
195
        }
5320
574
        ndata = xmlParseName(ctxt);
5321
574
        if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5322
574
            (ctxt->sax->unparsedEntityDecl != NULL))
5323
187
      ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5324
187
            literal, URI, ndata);
5325
4.59k
    } else {
5326
4.59k
        if ((ctxt->sax != NULL) &&
5327
4.59k
            (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5328
2.15k
      ctxt->sax->entityDecl(ctxt->userData, name,
5329
2.15k
            XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5330
2.15k
            literal, URI, NULL);
5331
        /*
5332
         * For expat compatibility in SAX mode.
5333
         * assuming the entity replacement was asked for
5334
         */
5335
4.59k
        if ((ctxt->replaceEntities != 0) &&
5336
4.59k
      ((ctxt->myDoc == NULL) ||
5337
4.59k
      (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5338
1.32k
      if (ctxt->myDoc == NULL) {
5339
359
          ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5340
359
          if (ctxt->myDoc == NULL) {
5341
0
              xmlErrMemory(ctxt, "New Doc failed");
5342
0
        goto done;
5343
0
          }
5344
359
          ctxt->myDoc->properties = XML_DOC_INTERNAL;
5345
359
      }
5346
5347
1.32k
      if (ctxt->myDoc->intSubset == NULL)
5348
359
          ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5349
359
            BAD_CAST "fake", NULL, NULL);
5350
1.32k
      xmlSAX2EntityDecl(ctxt, name,
5351
1.32k
                  XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5352
1.32k
                  literal, URI, NULL);
5353
1.32k
        }
5354
4.59k
    }
5355
5.16k
      }
5356
14.1k
  }
5357
19.5k
  if (ctxt->instate == XML_PARSER_EOF)
5358
118
      goto done;
5359
19.4k
  SKIP_BLANKS;
5360
19.4k
  if (RAW != '>') {
5361
920
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5362
920
              "xmlParseEntityDecl: entity %s not terminated\n", name);
5363
920
      xmlHaltParser(ctxt);
5364
18.5k
  } else {
5365
18.5k
      if (inputid != ctxt->input->id) {
5366
195
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5367
195
                         "Entity declaration doesn't start and stop in"
5368
195
                               " the same entity\n");
5369
195
      }
5370
18.5k
      NEXT;
5371
18.5k
  }
5372
19.4k
  if (orig != NULL) {
5373
      /*
5374
       * Ugly mechanism to save the raw entity value.
5375
       */
5376
9.68k
      xmlEntityPtr cur = NULL;
5377
5378
9.68k
      if (isParameter) {
5379
2.09k
          if ((ctxt->sax != NULL) &&
5380
2.09k
        (ctxt->sax->getParameterEntity != NULL))
5381
2.09k
        cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5382
7.58k
      } else {
5383
7.58k
          if ((ctxt->sax != NULL) &&
5384
7.58k
        (ctxt->sax->getEntity != NULL))
5385
7.58k
        cur = ctxt->sax->getEntity(ctxt->userData, name);
5386
7.58k
    if ((cur == NULL) && (ctxt->userData==ctxt)) {
5387
2.24k
        cur = xmlSAX2GetEntity(ctxt, name);
5388
2.24k
    }
5389
7.58k
      }
5390
9.68k
            if ((cur != NULL) && (cur->orig == NULL)) {
5391
2.22k
    cur->orig = orig;
5392
2.22k
                orig = NULL;
5393
2.22k
      }
5394
9.68k
  }
5395
5396
19.5k
done:
5397
19.5k
  if (value != NULL) xmlFree(value);
5398
19.5k
  if (URI != NULL) xmlFree(URI);
5399
19.5k
  if (literal != NULL) xmlFree(literal);
5400
19.5k
        if (orig != NULL) xmlFree(orig);
5401
19.5k
    }
5402
20.1k
}
5403
5404
/**
5405
 * xmlParseDefaultDecl:
5406
 * @ctxt:  an XML parser context
5407
 * @value:  Receive a possible fixed default value for the attribute
5408
 *
5409
 * DEPRECATED: Internal function, don't use.
5410
 *
5411
 * Parse an attribute default declaration
5412
 *
5413
 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5414
 *
5415
 * [ VC: Required Attribute ]
5416
 * if the default declaration is the keyword #REQUIRED, then the
5417
 * attribute must be specified for all elements of the type in the
5418
 * attribute-list declaration.
5419
 *
5420
 * [ VC: Attribute Default Legal ]
5421
 * The declared default value must meet the lexical constraints of
5422
 * the declared attribute type c.f. xmlValidateAttributeDecl()
5423
 *
5424
 * [ VC: Fixed Attribute Default ]
5425
 * if an attribute has a default value declared with the #FIXED
5426
 * keyword, instances of that attribute must match the default value.
5427
 *
5428
 * [ WFC: No < in Attribute Values ]
5429
 * handled in xmlParseAttValue()
5430
 *
5431
 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5432
 *          or XML_ATTRIBUTE_FIXED.
5433
 */
5434
5435
int
5436
53.6k
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5437
53.6k
    int val;
5438
53.6k
    xmlChar *ret;
5439
5440
53.6k
    *value = NULL;
5441
53.6k
    if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5442
345
  SKIP(9);
5443
345
  return(XML_ATTRIBUTE_REQUIRED);
5444
345
    }
5445
53.2k
    if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5446
7.62k
  SKIP(8);
5447
7.62k
  return(XML_ATTRIBUTE_IMPLIED);
5448
7.62k
    }
5449
45.6k
    val = XML_ATTRIBUTE_NONE;
5450
45.6k
    if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5451
139
  SKIP(6);
5452
139
  val = XML_ATTRIBUTE_FIXED;
5453
139
  if (SKIP_BLANKS == 0) {
5454
71
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5455
71
         "Space required after '#FIXED'\n");
5456
71
  }
5457
139
    }
5458
45.6k
    ret = xmlParseAttValue(ctxt);
5459
45.6k
    ctxt->instate = XML_PARSER_DTD;
5460
45.6k
    if (ret == NULL) {
5461
1.45k
  xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5462
1.45k
           "Attribute default value declaration error\n");
5463
1.45k
    } else
5464
44.1k
        *value = ret;
5465
45.6k
    return(val);
5466
53.2k
}
5467
5468
/**
5469
 * xmlParseNotationType:
5470
 * @ctxt:  an XML parser context
5471
 *
5472
 * DEPRECATED: Internal function, don't use.
5473
 *
5474
 * parse an Notation attribute type.
5475
 *
5476
 * Note: the leading 'NOTATION' S part has already being parsed...
5477
 *
5478
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5479
 *
5480
 * [ VC: Notation Attributes ]
5481
 * Values of this type must match one of the notation names included
5482
 * in the declaration; all notation names in the declaration must be declared.
5483
 *
5484
 * Returns: the notation attribute tree built while parsing
5485
 */
5486
5487
xmlEnumerationPtr
5488
917
xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5489
917
    const xmlChar *name;
5490
917
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5491
5492
917
    if (RAW != '(') {
5493
73
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5494
73
  return(NULL);
5495
73
    }
5496
2.19k
    do {
5497
2.19k
        NEXT;
5498
2.19k
  SKIP_BLANKS;
5499
2.19k
        name = xmlParseName(ctxt);
5500
2.19k
  if (name == NULL) {
5501
86
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5502
86
         "Name expected in NOTATION declaration\n");
5503
86
            xmlFreeEnumeration(ret);
5504
86
      return(NULL);
5505
86
  }
5506
2.11k
  tmp = ret;
5507
9.15k
  while (tmp != NULL) {
5508
7.80k
      if (xmlStrEqual(name, tmp->name)) {
5509
762
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5510
762
    "standalone: attribute notation value token %s duplicated\n",
5511
762
         name, NULL);
5512
762
    if (!xmlDictOwns(ctxt->dict, name))
5513
0
        xmlFree((xmlChar *) name);
5514
762
    break;
5515
762
      }
5516
7.04k
      tmp = tmp->next;
5517
7.04k
  }
5518
2.11k
  if (tmp == NULL) {
5519
1.35k
      cur = xmlCreateEnumeration(name);
5520
1.35k
      if (cur == NULL) {
5521
0
                xmlFreeEnumeration(ret);
5522
0
                return(NULL);
5523
0
            }
5524
1.35k
      if (last == NULL) ret = last = cur;
5525
581
      else {
5526
581
    last->next = cur;
5527
581
    last = cur;
5528
581
      }
5529
1.35k
  }
5530
2.11k
  SKIP_BLANKS;
5531
2.11k
    } while (RAW == '|');
5532
758
    if (RAW != ')') {
5533
347
  xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5534
347
        xmlFreeEnumeration(ret);
5535
347
  return(NULL);
5536
347
    }
5537
411
    NEXT;
5538
411
    return(ret);
5539
758
}
5540
5541
/**
5542
 * xmlParseEnumerationType:
5543
 * @ctxt:  an XML parser context
5544
 *
5545
 * DEPRECATED: Internal function, don't use.
5546
 *
5547
 * parse an Enumeration attribute type.
5548
 *
5549
 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5550
 *
5551
 * [ VC: Enumeration ]
5552
 * Values of this type must match one of the Nmtoken tokens in
5553
 * the declaration
5554
 *
5555
 * Returns: the enumeration attribute tree built while parsing
5556
 */
5557
5558
xmlEnumerationPtr
5559
27.3k
xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5560
27.3k
    xmlChar *name;
5561
27.3k
    xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5562
5563
27.3k
    if (RAW != '(') {
5564
695
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5565
695
  return(NULL);
5566
695
    }
5567
27.7k
    do {
5568
27.7k
        NEXT;
5569
27.7k
  SKIP_BLANKS;
5570
27.7k
        name = xmlParseNmtoken(ctxt);
5571
27.7k
  if (name == NULL) {
5572
267
      xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5573
267
      return(ret);
5574
267
  }
5575
27.4k
  tmp = ret;
5576
31.3k
  while (tmp != NULL) {
5577
4.61k
      if (xmlStrEqual(name, tmp->name)) {
5578
799
    xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5579
799
    "standalone: attribute enumeration value token %s duplicated\n",
5580
799
         name, NULL);
5581
799
    if (!xmlDictOwns(ctxt->dict, name))
5582
799
        xmlFree(name);
5583
799
    break;
5584
799
      }
5585
3.81k
      tmp = tmp->next;
5586
3.81k
  }
5587
27.4k
  if (tmp == NULL) {
5588
26.6k
      cur = xmlCreateEnumeration(name);
5589
26.6k
      if (!xmlDictOwns(ctxt->dict, name))
5590
26.6k
    xmlFree(name);
5591
26.6k
      if (cur == NULL) {
5592
0
                xmlFreeEnumeration(ret);
5593
0
                return(NULL);
5594
0
            }
5595
26.6k
      if (last == NULL) ret = last = cur;
5596
322
      else {
5597
322
    last->next = cur;
5598
322
    last = cur;
5599
322
      }
5600
26.6k
  }
5601
27.4k
  SKIP_BLANKS;
5602
27.4k
    } while (RAW == '|');
5603
26.3k
    if (RAW != ')') {
5604
713
  xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5605
713
  return(ret);
5606
713
    }
5607
25.6k
    NEXT;
5608
25.6k
    return(ret);
5609
26.3k
}
5610
5611
/**
5612
 * xmlParseEnumeratedType:
5613
 * @ctxt:  an XML parser context
5614
 * @tree:  the enumeration tree built while parsing
5615
 *
5616
 * DEPRECATED: Internal function, don't use.
5617
 *
5618
 * parse an Enumerated attribute type.
5619
 *
5620
 * [57] EnumeratedType ::= NotationType | Enumeration
5621
 *
5622
 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5623
 *
5624
 *
5625
 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5626
 */
5627
5628
int
5629
28.6k
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5630
28.6k
    if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5631
1.29k
  SKIP(8);
5632
1.29k
  if (SKIP_BLANKS == 0) {
5633
379
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5634
379
         "Space required after 'NOTATION'\n");
5635
379
      return(0);
5636
379
  }
5637
917
  *tree = xmlParseNotationType(ctxt);
5638
917
  if (*tree == NULL) return(0);
5639
411
  return(XML_ATTRIBUTE_NOTATION);
5640
917
    }
5641
27.3k
    *tree = xmlParseEnumerationType(ctxt);
5642
27.3k
    if (*tree == NULL) return(0);
5643
26.3k
    return(XML_ATTRIBUTE_ENUMERATION);
5644
27.3k
}
5645
5646
/**
5647
 * xmlParseAttributeType:
5648
 * @ctxt:  an XML parser context
5649
 * @tree:  the enumeration tree built while parsing
5650
 *
5651
 * DEPRECATED: Internal function, don't use.
5652
 *
5653
 * parse the Attribute list def for an element
5654
 *
5655
 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5656
 *
5657
 * [55] StringType ::= 'CDATA'
5658
 *
5659
 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5660
 *                        'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5661
 *
5662
 * Validity constraints for attribute values syntax are checked in
5663
 * xmlValidateAttributeValue()
5664
 *
5665
 * [ VC: ID ]
5666
 * Values of type ID must match the Name production. A name must not
5667
 * appear more than once in an XML document as a value of this type;
5668
 * i.e., ID values must uniquely identify the elements which bear them.
5669
 *
5670
 * [ VC: One ID per Element Type ]
5671
 * No element type may have more than one ID attribute specified.
5672
 *
5673
 * [ VC: ID Attribute Default ]
5674
 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5675
 *
5676
 * [ VC: IDREF ]
5677
 * Values of type IDREF must match the Name production, and values
5678
 * of type IDREFS must match Names; each IDREF Name must match the value
5679
 * of an ID attribute on some element in the XML document; i.e. IDREF
5680
 * values must match the value of some ID attribute.
5681
 *
5682
 * [ VC: Entity Name ]
5683
 * Values of type ENTITY must match the Name production, values
5684
 * of type ENTITIES must match Names; each Entity Name must match the
5685
 * name of an unparsed entity declared in the DTD.
5686
 *
5687
 * [ VC: Name Token ]
5688
 * Values of type NMTOKEN must match the Nmtoken production; values
5689
 * of type NMTOKENS must match Nmtokens.
5690
 *
5691
 * Returns the attribute type
5692
 */
5693
int
5694
56.3k
xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5695
56.3k
    if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
5696
685
  SKIP(5);
5697
685
  return(XML_ATTRIBUTE_CDATA);
5698
55.6k
     } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
5699
3.61k
  SKIP(6);
5700
3.61k
  return(XML_ATTRIBUTE_IDREFS);
5701
52.0k
     } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
5702
886
  SKIP(5);
5703
886
  return(XML_ATTRIBUTE_IDREF);
5704
51.1k
     } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5705
16.0k
        SKIP(2);
5706
16.0k
  return(XML_ATTRIBUTE_ID);
5707
35.1k
     } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5708
2.83k
  SKIP(6);
5709
2.83k
  return(XML_ATTRIBUTE_ENTITY);
5710
32.3k
     } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
5711
503
  SKIP(8);
5712
503
  return(XML_ATTRIBUTE_ENTITIES);
5713
31.8k
     } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
5714
798
  SKIP(8);
5715
798
  return(XML_ATTRIBUTE_NMTOKENS);
5716
31.0k
     } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
5717
2.38k
  SKIP(7);
5718
2.38k
  return(XML_ATTRIBUTE_NMTOKEN);
5719
2.38k
     }
5720
28.6k
     return(xmlParseEnumeratedType(ctxt, tree));
5721
56.3k
}
5722
5723
/**
5724
 * xmlParseAttributeListDecl:
5725
 * @ctxt:  an XML parser context
5726
 *
5727
 * DEPRECATED: Internal function, don't use.
5728
 *
5729
 * Parse an attribute list declaration for an element. Always consumes '<!'.
5730
 *
5731
 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5732
 *
5733
 * [53] AttDef ::= S Name S AttType S DefaultDecl
5734
 *
5735
 */
5736
void
5737
20.6k
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
5738
20.6k
    const xmlChar *elemName;
5739
20.6k
    const xmlChar *attrName;
5740
20.6k
    xmlEnumerationPtr tree;
5741
5742
20.6k
    if ((CUR != '<') || (NXT(1) != '!'))
5743
0
        return;
5744
20.6k
    SKIP(2);
5745
5746
20.6k
    if (CMP7(CUR_PTR, 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
5747
20.5k
  int inputid = ctxt->input->id;
5748
5749
20.5k
  SKIP(7);
5750
20.5k
  if (SKIP_BLANKS == 0) {
5751
3.54k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5752
3.54k
                     "Space required after '<!ATTLIST'\n");
5753
3.54k
  }
5754
20.5k
        elemName = xmlParseName(ctxt);
5755
20.5k
  if (elemName == NULL) {
5756
401
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5757
401
         "ATTLIST: no name for Element\n");
5758
401
      return;
5759
401
  }
5760
20.1k
  SKIP_BLANKS;
5761
20.1k
  GROW;
5762
71.2k
  while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
5763
62.3k
      int type;
5764
62.3k
      int def;
5765
62.3k
      xmlChar *defaultValue = NULL;
5766
5767
62.3k
      GROW;
5768
62.3k
            tree = NULL;
5769
62.3k
      attrName = xmlParseName(ctxt);
5770
62.3k
      if (attrName == NULL) {
5771
4.92k
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5772
4.92k
             "ATTLIST: no name for Attribute\n");
5773
4.92k
    break;
5774
4.92k
      }
5775
57.4k
      GROW;
5776
57.4k
      if (SKIP_BLANKS == 0) {
5777
1.05k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5778
1.05k
            "Space required after the attribute name\n");
5779
1.05k
    break;
5780
1.05k
      }
5781
5782
56.3k
      type = xmlParseAttributeType(ctxt, &tree);
5783
56.3k
      if (type <= 0) {
5784
1.83k
          break;
5785
1.83k
      }
5786
5787
54.5k
      GROW;
5788
54.5k
      if (SKIP_BLANKS == 0) {
5789
926
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5790
926
             "Space required after the attribute type\n");
5791
926
          if (tree != NULL)
5792
729
        xmlFreeEnumeration(tree);
5793
926
    break;
5794
926
      }
5795
5796
53.6k
      def = xmlParseDefaultDecl(ctxt, &defaultValue);
5797
53.6k
      if (def <= 0) {
5798
0
                if (defaultValue != NULL)
5799
0
        xmlFree(defaultValue);
5800
0
          if (tree != NULL)
5801
0
        xmlFreeEnumeration(tree);
5802
0
          break;
5803
0
      }
5804
53.6k
      if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
5805
43.5k
          xmlAttrNormalizeSpace(defaultValue, defaultValue);
5806
5807
53.6k
      GROW;
5808
53.6k
            if (RAW != '>') {
5809
45.2k
    if (SKIP_BLANKS == 0) {
5810
2.57k
        xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5811
2.57k
      "Space required after the attribute default value\n");
5812
2.57k
        if (defaultValue != NULL)
5813
1.18k
      xmlFree(defaultValue);
5814
2.57k
        if (tree != NULL)
5815
1.14k
      xmlFreeEnumeration(tree);
5816
2.57k
        break;
5817
2.57k
    }
5818
45.2k
      }
5819
51.0k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5820
51.0k
    (ctxt->sax->attributeDecl != NULL))
5821
32.4k
    ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
5822
32.4k
                          type, def, defaultValue, tree);
5823
18.5k
      else if (tree != NULL)
5824
9.48k
    xmlFreeEnumeration(tree);
5825
5826
51.0k
      if ((ctxt->sax2) && (defaultValue != NULL) &&
5827
51.0k
          (def != XML_ATTRIBUTE_IMPLIED) &&
5828
51.0k
    (def != XML_ATTRIBUTE_REQUIRED)) {
5829
43.0k
    xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
5830
43.0k
      }
5831
51.0k
      if (ctxt->sax2) {
5832
51.0k
    xmlAddSpecialAttr(ctxt, elemName, attrName, type);
5833
51.0k
      }
5834
51.0k
      if (defaultValue != NULL)
5835
43.0k
          xmlFree(defaultValue);
5836
51.0k
      GROW;
5837
51.0k
  }
5838
20.1k
  if (RAW == '>') {
5839
9.86k
      if (inputid != ctxt->input->id) {
5840
93
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5841
93
                               "Attribute list declaration doesn't start and"
5842
93
                               " stop in the same entity\n");
5843
93
      }
5844
9.86k
      NEXT;
5845
9.86k
  }
5846
20.1k
    }
5847
20.6k
}
5848
5849
/**
5850
 * xmlParseElementMixedContentDecl:
5851
 * @ctxt:  an XML parser context
5852
 * @inputchk:  the input used for the current entity, needed for boundary checks
5853
 *
5854
 * DEPRECATED: Internal function, don't use.
5855
 *
5856
 * parse the declaration for a Mixed Element content
5857
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5858
 *
5859
 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5860
 *                '(' S? '#PCDATA' S? ')'
5861
 *
5862
 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5863
 *
5864
 * [ VC: No Duplicate Types ]
5865
 * The same name must not appear more than once in a single
5866
 * mixed-content declaration.
5867
 *
5868
 * returns: the list of the xmlElementContentPtr describing the element choices
5869
 */
5870
xmlElementContentPtr
5871
964
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
5872
964
    xmlElementContentPtr ret = NULL, cur = NULL, n;
5873
964
    const xmlChar *elem = NULL;
5874
5875
964
    GROW;
5876
964
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
5877
964
  SKIP(7);
5878
964
  SKIP_BLANKS;
5879
964
  if (RAW == ')') {
5880
316
      if (ctxt->input->id != inputchk) {
5881
18
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5882
18
                               "Element content declaration doesn't start and"
5883
18
                               " stop in the same entity\n");
5884
18
      }
5885
316
      NEXT;
5886
316
      ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
5887
316
      if (ret == NULL)
5888
0
          return(NULL);
5889
316
      if (RAW == '*') {
5890
113
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
5891
113
    NEXT;
5892
113
      }
5893
316
      return(ret);
5894
316
  }
5895
648
  if ((RAW == '(') || (RAW == '|')) {
5896
257
      ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
5897
257
      if (ret == NULL) return(NULL);
5898
257
  }
5899
2.97k
  while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
5900
2.43k
      NEXT;
5901
2.43k
      if (elem == NULL) {
5902
256
          ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
5903
256
    if (ret == NULL) {
5904
0
        xmlFreeDocElementContent(ctxt->myDoc, cur);
5905
0
                    return(NULL);
5906
0
                }
5907
256
    ret->c1 = cur;
5908
256
    if (cur != NULL)
5909
256
        cur->parent = ret;
5910
256
    cur = ret;
5911
2.17k
      } else {
5912
2.17k
          n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
5913
2.17k
    if (n == NULL) {
5914
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
5915
0
                    return(NULL);
5916
0
                }
5917
2.17k
    n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
5918
2.17k
    if (n->c1 != NULL)
5919
2.17k
        n->c1->parent = n;
5920
2.17k
          cur->c2 = n;
5921
2.17k
    if (n != NULL)
5922
2.17k
        n->parent = cur;
5923
2.17k
    cur = n;
5924
2.17k
      }
5925
2.43k
      SKIP_BLANKS;
5926
2.43k
      elem = xmlParseName(ctxt);
5927
2.43k
      if (elem == NULL) {
5928
102
    xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5929
102
      "xmlParseElementMixedContentDecl : Name expected\n");
5930
102
    xmlFreeDocElementContent(ctxt->myDoc, ret);
5931
102
    return(NULL);
5932
102
      }
5933
2.32k
      SKIP_BLANKS;
5934
2.32k
      GROW;
5935
2.32k
  }
5936
546
  if ((RAW == ')') && (NXT(1) == '*')) {
5937
37
      if (elem != NULL) {
5938
37
    cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
5939
37
                                   XML_ELEMENT_CONTENT_ELEMENT);
5940
37
    if (cur->c2 != NULL)
5941
37
        cur->c2->parent = cur;
5942
37
            }
5943
37
            if (ret != NULL)
5944
37
                ret->ocur = XML_ELEMENT_CONTENT_MULT;
5945
37
      if (ctxt->input->id != inputchk) {
5946
18
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5947
18
                               "Element content declaration doesn't start and"
5948
18
                               " stop in the same entity\n");
5949
18
      }
5950
37
      SKIP(2);
5951
509
  } else {
5952
509
      xmlFreeDocElementContent(ctxt->myDoc, ret);
5953
509
      xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
5954
509
      return(NULL);
5955
509
  }
5956
5957
546
    } else {
5958
0
  xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
5959
0
    }
5960
37
    return(ret);
5961
964
}
5962
5963
/**
5964
 * xmlParseElementChildrenContentDeclPriv:
5965
 * @ctxt:  an XML parser context
5966
 * @inputchk:  the input used for the current entity, needed for boundary checks
5967
 * @depth: the level of recursion
5968
 *
5969
 * parse the declaration for a Mixed Element content
5970
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5971
 *
5972
 *
5973
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5974
 *
5975
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5976
 *
5977
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5978
 *
5979
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5980
 *
5981
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5982
 * TODO Parameter-entity replacement text must be properly nested
5983
 *  with parenthesized groups. That is to say, if either of the
5984
 *  opening or closing parentheses in a choice, seq, or Mixed
5985
 *  construct is contained in the replacement text for a parameter
5986
 *  entity, both must be contained in the same replacement text. For
5987
 *  interoperability, if a parameter-entity reference appears in a
5988
 *  choice, seq, or Mixed construct, its replacement text should not
5989
 *  be empty, and neither the first nor last non-blank character of
5990
 *  the replacement text should be a connector (| or ,).
5991
 *
5992
 * Returns the tree of xmlElementContentPtr describing the element
5993
 *          hierarchy.
5994
 */
5995
static xmlElementContentPtr
5996
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
5997
40.1k
                                       int depth) {
5998
40.1k
    xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
5999
40.1k
    const xmlChar *elem;
6000
40.1k
    xmlChar type = 0;
6001
6002
40.1k
    if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6003
40.1k
        (depth >  2048)) {
6004
3
        xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6005
3
"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6006
3
                          depth);
6007
3
  return(NULL);
6008
3
    }
6009
40.1k
    SKIP_BLANKS;
6010
40.1k
    GROW;
6011
40.1k
    if (RAW == '(') {
6012
18.5k
  int inputid = ctxt->input->id;
6013
6014
        /* Recurse on first child */
6015
18.5k
  NEXT;
6016
18.5k
  SKIP_BLANKS;
6017
18.5k
        cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6018
18.5k
                                                           depth + 1);
6019
18.5k
        if (cur == NULL)
6020
13.4k
            return(NULL);
6021
5.07k
  SKIP_BLANKS;
6022
5.07k
  GROW;
6023
21.6k
    } else {
6024
21.6k
  elem = xmlParseName(ctxt);
6025
21.6k
  if (elem == NULL) {
6026
450
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6027
450
      return(NULL);
6028
450
  }
6029
21.2k
        cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6030
21.2k
  if (cur == NULL) {
6031
0
      xmlErrMemory(ctxt, NULL);
6032
0
      return(NULL);
6033
0
  }
6034
21.2k
  GROW;
6035
21.2k
  if (RAW == '?') {
6036
1.20k
      cur->ocur = XML_ELEMENT_CONTENT_OPT;
6037
1.20k
      NEXT;
6038
19.9k
  } else if (RAW == '*') {
6039
6.10k
      cur->ocur = XML_ELEMENT_CONTENT_MULT;
6040
6.10k
      NEXT;
6041
13.8k
  } else if (RAW == '+') {
6042
1.10k
      cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6043
1.10k
      NEXT;
6044
12.7k
  } else {
6045
12.7k
      cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6046
12.7k
  }
6047
21.2k
  GROW;
6048
21.2k
    }
6049
26.2k
    SKIP_BLANKS;
6050
51.7k
    while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6051
        /*
6052
   * Each loop we parse one separator and one element.
6053
   */
6054
37.8k
        if (RAW == ',') {
6055
10.6k
      if (type == 0) type = CUR;
6056
6057
      /*
6058
       * Detect "Name | Name , Name" error
6059
       */
6060
8.28k
      else if (type != CUR) {
6061
1
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6062
1
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6063
1
                      type);
6064
1
    if ((last != NULL) && (last != ret))
6065
1
        xmlFreeDocElementContent(ctxt->myDoc, last);
6066
1
    if (ret != NULL)
6067
1
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6068
1
    return(NULL);
6069
1
      }
6070
10.6k
      NEXT;
6071
6072
10.6k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6073
10.6k
      if (op == NULL) {
6074
0
    if ((last != NULL) && (last != ret))
6075
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6076
0
          xmlFreeDocElementContent(ctxt->myDoc, ret);
6077
0
    return(NULL);
6078
0
      }
6079
10.6k
      if (last == NULL) {
6080
2.35k
    op->c1 = ret;
6081
2.35k
    if (ret != NULL)
6082
2.35k
        ret->parent = op;
6083
2.35k
    ret = cur = op;
6084
8.28k
      } else {
6085
8.28k
          cur->c2 = op;
6086
8.28k
    if (op != NULL)
6087
8.28k
        op->parent = cur;
6088
8.28k
    op->c1 = last;
6089
8.28k
    if (last != NULL)
6090
8.28k
        last->parent = op;
6091
8.28k
    cur =op;
6092
8.28k
    last = NULL;
6093
8.28k
      }
6094
27.2k
  } else if (RAW == '|') {
6095
25.0k
      if (type == 0) type = CUR;
6096
6097
      /*
6098
       * Detect "Name , Name | Name" error
6099
       */
6100
7.37k
      else if (type != CUR) {
6101
4
    xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6102
4
        "xmlParseElementChildrenContentDecl : '%c' expected\n",
6103
4
          type);
6104
4
    if ((last != NULL) && (last != ret))
6105
4
        xmlFreeDocElementContent(ctxt->myDoc, last);
6106
4
    if (ret != NULL)
6107
4
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6108
4
    return(NULL);
6109
4
      }
6110
25.0k
      NEXT;
6111
6112
25.0k
      op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6113
25.0k
      if (op == NULL) {
6114
0
    if ((last != NULL) && (last != ret))
6115
0
        xmlFreeDocElementContent(ctxt->myDoc, last);
6116
0
    if (ret != NULL)
6117
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6118
0
    return(NULL);
6119
0
      }
6120
25.0k
      if (last == NULL) {
6121
17.6k
    op->c1 = ret;
6122
17.6k
    if (ret != NULL)
6123
17.6k
        ret->parent = op;
6124
17.6k
    ret = cur = op;
6125
17.6k
      } else {
6126
7.36k
          cur->c2 = op;
6127
7.36k
    if (op != NULL)
6128
7.36k
        op->parent = cur;
6129
7.36k
    op->c1 = last;
6130
7.36k
    if (last != NULL)
6131
7.36k
        last->parent = op;
6132
7.36k
    cur =op;
6133
7.36k
    last = NULL;
6134
7.36k
      }
6135
25.0k
  } else {
6136
2.21k
      xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6137
2.21k
      if ((last != NULL) && (last != ret))
6138
1.41k
          xmlFreeDocElementContent(ctxt->myDoc, last);
6139
2.21k
      if (ret != NULL)
6140
2.21k
    xmlFreeDocElementContent(ctxt->myDoc, ret);
6141
2.21k
      return(NULL);
6142
2.21k
  }
6143
35.6k
  GROW;
6144
35.6k
  SKIP_BLANKS;
6145
35.6k
  GROW;
6146
35.6k
  if (RAW == '(') {
6147
16.5k
      int inputid = ctxt->input->id;
6148
      /* Recurse on second child */
6149
16.5k
      NEXT;
6150
16.5k
      SKIP_BLANKS;
6151
16.5k
      last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6152
16.5k
                                                          depth + 1);
6153
16.5k
            if (last == NULL) {
6154
9.80k
    if (ret != NULL)
6155
9.80k
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6156
9.80k
    return(NULL);
6157
9.80k
            }
6158
6.77k
      SKIP_BLANKS;
6159
19.0k
  } else {
6160
19.0k
      elem = xmlParseName(ctxt);
6161
19.0k
      if (elem == NULL) {
6162
426
    xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6163
426
    if (ret != NULL)
6164
426
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6165
426
    return(NULL);
6166
426
      }
6167
18.6k
      last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6168
18.6k
      if (last == NULL) {
6169
0
    if (ret != NULL)
6170
0
        xmlFreeDocElementContent(ctxt->myDoc, ret);
6171
0
    return(NULL);
6172
0
      }
6173
18.6k
      if (RAW == '?') {
6174
470
    last->ocur = XML_ELEMENT_CONTENT_OPT;
6175
470
    NEXT;
6176
18.1k
      } else if (RAW == '*') {
6177
585
    last->ocur = XML_ELEMENT_CONTENT_MULT;
6178
585
    NEXT;
6179
17.6k
      } else if (RAW == '+') {
6180
2.36k
    last->ocur = XML_ELEMENT_CONTENT_PLUS;
6181
2.36k
    NEXT;
6182
15.2k
      } else {
6183
15.2k
    last->ocur = XML_ELEMENT_CONTENT_ONCE;
6184
15.2k
      }
6185
18.6k
  }
6186
25.4k
  SKIP_BLANKS;
6187
25.4k
  GROW;
6188
25.4k
    }
6189
13.8k
    if ((cur != NULL) && (last != NULL)) {
6190
8.37k
        cur->c2 = last;
6191
8.37k
  if (last != NULL)
6192
8.37k
      last->parent = cur;
6193
8.37k
    }
6194
13.8k
    if (ctxt->input->id != inputchk) {
6195
221
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6196
221
                       "Element content declaration doesn't start and stop in"
6197
221
                       " the same entity\n");
6198
221
    }
6199
13.8k
    NEXT;
6200
13.8k
    if (RAW == '?') {
6201
958
  if (ret != NULL) {
6202
958
      if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6203
958
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6204
250
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6205
708
      else
6206
708
          ret->ocur = XML_ELEMENT_CONTENT_OPT;
6207
958
  }
6208
958
  NEXT;
6209
12.8k
    } else if (RAW == '*') {
6210
3.83k
  if (ret != NULL) {
6211
3.83k
      ret->ocur = XML_ELEMENT_CONTENT_MULT;
6212
3.83k
      cur = ret;
6213
      /*
6214
       * Some normalization:
6215
       * (a | b* | c?)* == (a | b | c)*
6216
       */
6217
6.88k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6218
3.04k
    if ((cur->c1 != NULL) &&
6219
3.04k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6220
3.04k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6221
1.28k
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6222
3.04k
    if ((cur->c2 != NULL) &&
6223
3.04k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6224
3.04k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6225
692
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6226
3.04k
    cur = cur->c2;
6227
3.04k
      }
6228
3.83k
  }
6229
3.83k
  NEXT;
6230
9.04k
    } else if (RAW == '+') {
6231
5.98k
  if (ret != NULL) {
6232
5.98k
      int found = 0;
6233
6234
5.98k
      if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6235
5.98k
          (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6236
1.38k
          ret->ocur = XML_ELEMENT_CONTENT_MULT;
6237
4.59k
      else
6238
4.59k
          ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6239
      /*
6240
       * Some normalization:
6241
       * (a | b*)+ == (a | b)*
6242
       * (a | b?)+ == (a | b)*
6243
       */
6244
13.9k
      while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6245
8.00k
    if ((cur->c1 != NULL) &&
6246
8.00k
              ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6247
8.00k
         (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6248
820
        cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6249
820
        found = 1;
6250
820
    }
6251
8.00k
    if ((cur->c2 != NULL) &&
6252
8.00k
              ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6253
8.00k
         (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6254
1.92k
        cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6255
1.92k
        found = 1;
6256
1.92k
    }
6257
8.00k
    cur = cur->c2;
6258
8.00k
      }
6259
5.98k
      if (found)
6260
2.54k
    ret->ocur = XML_ELEMENT_CONTENT_MULT;
6261
5.98k
  }
6262
5.98k
  NEXT;
6263
5.98k
    }
6264
13.8k
    return(ret);
6265
26.2k
}
6266
6267
/**
6268
 * xmlParseElementChildrenContentDecl:
6269
 * @ctxt:  an XML parser context
6270
 * @inputchk:  the input used for the current entity, needed for boundary checks
6271
 *
6272
 * DEPRECATED: Internal function, don't use.
6273
 *
6274
 * parse the declaration for a Mixed Element content
6275
 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6276
 *
6277
 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6278
 *
6279
 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6280
 *
6281
 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6282
 *
6283
 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6284
 *
6285
 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6286
 * TODO Parameter-entity replacement text must be properly nested
6287
 *  with parenthesized groups. That is to say, if either of the
6288
 *  opening or closing parentheses in a choice, seq, or Mixed
6289
 *  construct is contained in the replacement text for a parameter
6290
 *  entity, both must be contained in the same replacement text. For
6291
 *  interoperability, if a parameter-entity reference appears in a
6292
 *  choice, seq, or Mixed construct, its replacement text should not
6293
 *  be empty, and neither the first nor last non-blank character of
6294
 *  the replacement text should be a connector (| or ,).
6295
 *
6296
 * Returns the tree of xmlElementContentPtr describing the element
6297
 *          hierarchy.
6298
 */
6299
xmlElementContentPtr
6300
0
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6301
    /* stub left for API/ABI compat */
6302
0
    return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6303
0
}
6304
6305
/**
6306
 * xmlParseElementContentDecl:
6307
 * @ctxt:  an XML parser context
6308
 * @name:  the name of the element being defined.
6309
 * @result:  the Element Content pointer will be stored here if any
6310
 *
6311
 * DEPRECATED: Internal function, don't use.
6312
 *
6313
 * parse the declaration for an Element content either Mixed or Children,
6314
 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6315
 *
6316
 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6317
 *
6318
 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6319
 */
6320
6321
int
6322
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6323
6.04k
                           xmlElementContentPtr *result) {
6324
6325
6.04k
    xmlElementContentPtr tree = NULL;
6326
6.04k
    int inputid = ctxt->input->id;
6327
6.04k
    int res;
6328
6329
6.04k
    *result = NULL;
6330
6331
6.04k
    if (RAW != '(') {
6332
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6333
0
    "xmlParseElementContentDecl : %s '(' expected\n", name);
6334
0
  return(-1);
6335
0
    }
6336
6.04k
    NEXT;
6337
6.04k
    GROW;
6338
6.04k
    if (ctxt->instate == XML_PARSER_EOF)
6339
1
        return(-1);
6340
6.04k
    SKIP_BLANKS;
6341
6.04k
    if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6342
964
        tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6343
964
  res = XML_ELEMENT_TYPE_MIXED;
6344
5.07k
    } else {
6345
5.07k
        tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6346
5.07k
  res = XML_ELEMENT_TYPE_ELEMENT;
6347
5.07k
    }
6348
6.04k
    SKIP_BLANKS;
6349
6.04k
    *result = tree;
6350
6.04k
    return(res);
6351
6.04k
}
6352
6353
/**
6354
 * xmlParseElementDecl:
6355
 * @ctxt:  an XML parser context
6356
 *
6357
 * DEPRECATED: Internal function, don't use.
6358
 *
6359
 * Parse an element declaration. Always consumes '<!'.
6360
 *
6361
 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6362
 *
6363
 * [ VC: Unique Element Type Declaration ]
6364
 * No element type may be declared more than once
6365
 *
6366
 * Returns the type of the element, or -1 in case of error
6367
 */
6368
int
6369
8.25k
xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6370
8.25k
    const xmlChar *name;
6371
8.25k
    int ret = -1;
6372
8.25k
    xmlElementContentPtr content  = NULL;
6373
6374
8.25k
    if ((CUR != '<') || (NXT(1) != '!'))
6375
0
        return(ret);
6376
8.25k
    SKIP(2);
6377
6378
    /* GROW; done in the caller */
6379
8.25k
    if (CMP7(CUR_PTR, 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6380
8.22k
  int inputid = ctxt->input->id;
6381
6382
8.22k
  SKIP(7);
6383
8.22k
  if (SKIP_BLANKS == 0) {
6384
80
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6385
80
               "Space required after 'ELEMENT'\n");
6386
80
      return(-1);
6387
80
  }
6388
8.14k
        name = xmlParseName(ctxt);
6389
8.14k
  if (name == NULL) {
6390
384
      xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6391
384
         "xmlParseElementDecl: no name for Element\n");
6392
384
      return(-1);
6393
384
  }
6394
7.75k
  if (SKIP_BLANKS == 0) {
6395
3.23k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6396
3.23k
         "Space required after the element name\n");
6397
3.23k
  }
6398
7.75k
  if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6399
131
      SKIP(5);
6400
      /*
6401
       * Element must always be empty.
6402
       */
6403
131
      ret = XML_ELEMENT_TYPE_EMPTY;
6404
7.62k
  } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6405
7.62k
             (NXT(2) == 'Y')) {
6406
464
      SKIP(3);
6407
      /*
6408
       * Element is a generic container.
6409
       */
6410
464
      ret = XML_ELEMENT_TYPE_ANY;
6411
7.16k
  } else if (RAW == '(') {
6412
6.04k
      ret = xmlParseElementContentDecl(ctxt, name, &content);
6413
6.04k
  } else {
6414
      /*
6415
       * [ WFC: PEs in Internal Subset ] error handling.
6416
       */
6417
1.12k
      if ((RAW == '%') && (ctxt->external == 0) &&
6418
1.12k
          (ctxt->inputNr == 1)) {
6419
558
    xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6420
558
    "PEReference: forbidden within markup decl in internal subset\n");
6421
565
      } else {
6422
565
    xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6423
565
          "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6424
565
            }
6425
1.12k
      return(-1);
6426
1.12k
  }
6427
6428
6.63k
  SKIP_BLANKS;
6429
6430
6.63k
  if (RAW != '>') {
6431
4.06k
      xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6432
4.06k
      if (content != NULL) {
6433
355
    xmlFreeDocElementContent(ctxt->myDoc, content);
6434
355
      }
6435
4.06k
  } else {
6436
2.57k
      if (inputid != ctxt->input->id) {
6437
78
    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6438
78
                               "Element declaration doesn't start and stop in"
6439
78
                               " the same entity\n");
6440
78
      }
6441
6442
2.57k
      NEXT;
6443
2.57k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6444
2.57k
    (ctxt->sax->elementDecl != NULL)) {
6445
979
    if (content != NULL)
6446
776
        content->parent = NULL;
6447
979
          ctxt->sax->elementDecl(ctxt->userData, name, ret,
6448
979
                           content);
6449
979
    if ((content != NULL) && (content->parent == NULL)) {
6450
        /*
6451
         * this is a trick: if xmlAddElementDecl is called,
6452
         * instead of copying the full tree it is plugged directly
6453
         * if called from the parser. Avoid duplicating the
6454
         * interfaces or change the API/ABI
6455
         */
6456
416
        xmlFreeDocElementContent(ctxt->myDoc, content);
6457
416
    }
6458
1.59k
      } else if (content != NULL) {
6459
1.20k
    xmlFreeDocElementContent(ctxt->myDoc, content);
6460
1.20k
      }
6461
2.57k
  }
6462
6.63k
    }
6463
6.66k
    return(ret);
6464
8.25k
}
6465
6466
/**
6467
 * xmlParseConditionalSections
6468
 * @ctxt:  an XML parser context
6469
 *
6470
 * Parse a conditional section. Always consumes '<!['.
6471
 *
6472
 * [61] conditionalSect ::= includeSect | ignoreSect
6473
 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6474
 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6475
 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6476
 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6477
 */
6478
6479
static void
6480
1.23k
xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6481
1.23k
    int *inputIds = NULL;
6482
1.23k
    size_t inputIdsSize = 0;
6483
1.23k
    size_t depth = 0;
6484
6485
5.06k
    while (ctxt->instate != XML_PARSER_EOF) {
6486
5.06k
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6487
1.80k
            int id = ctxt->input->id;
6488
6489
1.80k
            SKIP(3);
6490
1.80k
            SKIP_BLANKS;
6491
6492
1.80k
            if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6493
772
                SKIP(7);
6494
772
                SKIP_BLANKS;
6495
772
                if (RAW != '[') {
6496
1
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6497
1
                    xmlHaltParser(ctxt);
6498
1
                    goto error;
6499
1
                }
6500
771
                if (ctxt->input->id != id) {
6501
1
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6502
1
                                   "All markup of the conditional section is"
6503
1
                                   " not in the same entity\n");
6504
1
                }
6505
771
                NEXT;
6506
6507
771
                if (inputIdsSize <= depth) {
6508
257
                    int *tmp;
6509
6510
257
                    inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2);
6511
257
                    tmp = (int *) xmlRealloc(inputIds,
6512
257
                            inputIdsSize * sizeof(int));
6513
257
                    if (tmp == NULL) {
6514
0
                        xmlErrMemory(ctxt, NULL);
6515
0
                        goto error;
6516
0
                    }
6517
257
                    inputIds = tmp;
6518
257
                }
6519
771
                inputIds[depth] = id;
6520
771
                depth++;
6521
1.03k
            } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6522
1.01k
                size_t ignoreDepth = 0;
6523
6524
1.01k
                SKIP(6);
6525
1.01k
                SKIP_BLANKS;
6526
1.01k
                if (RAW != '[') {
6527
4
                    xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6528
4
                    xmlHaltParser(ctxt);
6529
4
                    goto error;
6530
4
                }
6531
1.01k
                if (ctxt->input->id != id) {
6532
221
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6533
221
                                   "All markup of the conditional section is"
6534
221
                                   " not in the same entity\n");
6535
221
                }
6536
1.01k
                NEXT;
6537
6538
110k
                while (RAW != 0) {
6539
109k
                    if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6540
882
                        SKIP(3);
6541
882
                        ignoreDepth++;
6542
                        /* Check for integer overflow */
6543
882
                        if (ignoreDepth == 0) {
6544
0
                            xmlErrMemory(ctxt, NULL);
6545
0
                            goto error;
6546
0
                        }
6547
108k
                    } else if ((RAW == ']') && (NXT(1) == ']') &&
6548
108k
                               (NXT(2) == '>')) {
6549
773
                        if (ignoreDepth == 0)
6550
309
                            break;
6551
464
                        SKIP(3);
6552
464
                        ignoreDepth--;
6553
107k
                    } else {
6554
107k
                        NEXT;
6555
107k
                    }
6556
109k
                }
6557
6558
1.01k
    if (RAW == 0) {
6559
703
        xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6560
703
                    goto error;
6561
703
    }
6562
309
                if (ctxt->input->id != id) {
6563
214
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6564
214
                                   "All markup of the conditional section is"
6565
214
                                   " not in the same entity\n");
6566
214
                }
6567
309
                SKIP(3);
6568
309
            } else {
6569
17
                xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6570
17
                xmlHaltParser(ctxt);
6571
17
                goto error;
6572
17
            }
6573
3.25k
        } else if ((depth > 0) &&
6574
3.25k
                   (RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6575
140
            depth--;
6576
140
            if (ctxt->input->id != inputIds[depth]) {
6577
3
                xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6578
3
                               "All markup of the conditional section is not"
6579
3
                               " in the same entity\n");
6580
3
            }
6581
140
            SKIP(3);
6582
3.11k
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
6583
3.05k
            xmlParseMarkupDecl(ctxt);
6584
3.05k
        } else {
6585
60
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6586
60
            xmlHaltParser(ctxt);
6587
60
            goto error;
6588
60
        }
6589
6590
4.27k
        if (depth == 0)
6591
448
            break;
6592
6593
3.83k
        SKIP_BLANKS;
6594
3.83k
        SHRINK;
6595
3.83k
        GROW;
6596
3.83k
    }
6597
6598
1.23k
error:
6599
1.23k
    xmlFree(inputIds);
6600
1.23k
}
6601
6602
/**
6603
 * xmlParseMarkupDecl:
6604
 * @ctxt:  an XML parser context
6605
 *
6606
 * DEPRECATED: Internal function, don't use.
6607
 *
6608
 * Parse markup declarations. Always consumes '<!' or '<?'.
6609
 *
6610
 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6611
 *                     NotationDecl | PI | Comment
6612
 *
6613
 * [ VC: Proper Declaration/PE Nesting ]
6614
 * Parameter-entity replacement text must be properly nested with
6615
 * markup declarations. That is to say, if either the first character
6616
 * or the last character of a markup declaration (markupdecl above) is
6617
 * contained in the replacement text for a parameter-entity reference,
6618
 * both must be contained in the same replacement text.
6619
 *
6620
 * [ WFC: PEs in Internal Subset ]
6621
 * In the internal DTD subset, parameter-entity references can occur
6622
 * only where markup declarations can occur, not within markup declarations.
6623
 * (This does not apply to references that occur in external parameter
6624
 * entities or to the external subset.)
6625
 */
6626
void
6627
80.9k
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6628
80.9k
    GROW;
6629
80.9k
    if (CUR == '<') {
6630
80.9k
        if (NXT(1) == '!') {
6631
68.4k
      switch (NXT(2)) {
6632
28.4k
          case 'E':
6633
28.4k
        if (NXT(3) == 'L')
6634
8.25k
      xmlParseElementDecl(ctxt);
6635
20.1k
        else if (NXT(3) == 'N')
6636
20.1k
      xmlParseEntityDecl(ctxt);
6637
12
                    else
6638
12
                        SKIP(2);
6639
28.4k
        break;
6640
20.6k
          case 'A':
6641
20.6k
        xmlParseAttributeListDecl(ctxt);
6642
20.6k
        break;
6643
6.02k
          case 'N':
6644
6.02k
        xmlParseNotationDecl(ctxt);
6645
6.02k
        break;
6646
11.7k
          case '-':
6647
11.7k
        xmlParseComment(ctxt);
6648
11.7k
        break;
6649
1.71k
    default:
6650
        /* there is an error but it will be detected later */
6651
1.71k
                    SKIP(2);
6652
1.71k
        break;
6653
68.4k
      }
6654
68.4k
  } else if (NXT(1) == '?') {
6655
12.4k
      xmlParsePI(ctxt);
6656
12.4k
  }
6657
80.9k
    }
6658
6659
    /*
6660
     * detect requirement to exit there and act accordingly
6661
     * and avoid having instate overridden later on
6662
     */
6663
80.9k
    if (ctxt->instate == XML_PARSER_EOF)
6664
1.05k
        return;
6665
6666
79.8k
    ctxt->instate = XML_PARSER_DTD;
6667
79.8k
}
6668
6669
/**
6670
 * xmlParseTextDecl:
6671
 * @ctxt:  an XML parser context
6672
 *
6673
 * DEPRECATED: Internal function, don't use.
6674
 *
6675
 * parse an XML declaration header for external entities
6676
 *
6677
 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6678
 */
6679
6680
void
6681
17.3k
xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6682
17.3k
    xmlChar *version;
6683
17.3k
    const xmlChar *encoding;
6684
17.3k
    int oldstate;
6685
6686
    /*
6687
     * We know that '<?xml' is here.
6688
     */
6689
17.3k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
6690
17.3k
  SKIP(5);
6691
17.3k
    } else {
6692
0
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
6693
0
  return;
6694
0
    }
6695
6696
    /* Avoid expansion of parameter entities when skipping blanks. */
6697
17.3k
    oldstate = ctxt->instate;
6698
17.3k
    ctxt->instate = XML_PARSER_START;
6699
6700
17.3k
    if (SKIP_BLANKS == 0) {
6701
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6702
0
           "Space needed after '<?xml'\n");
6703
0
    }
6704
6705
    /*
6706
     * We may have the VersionInfo here.
6707
     */
6708
17.3k
    version = xmlParseVersionInfo(ctxt);
6709
17.3k
    if (version == NULL)
6710
16.0k
  version = xmlCharStrdup(XML_DEFAULT_VERSION);
6711
1.30k
    else {
6712
1.30k
  if (SKIP_BLANKS == 0) {
6713
375
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6714
375
               "Space needed here\n");
6715
375
  }
6716
1.30k
    }
6717
17.3k
    ctxt->input->version = version;
6718
6719
    /*
6720
     * We must have the encoding declaration
6721
     */
6722
17.3k
    encoding = xmlParseEncodingDecl(ctxt);
6723
17.3k
    if (ctxt->instate == XML_PARSER_EOF)
6724
2
        return;
6725
17.3k
    if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6726
  /*
6727
   * The XML REC instructs us to stop parsing right here
6728
   */
6729
475
        ctxt->instate = oldstate;
6730
475
        return;
6731
475
    }
6732
16.9k
    if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6733
170
  xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6734
170
           "Missing encoding in text declaration\n");
6735
170
    }
6736
6737
16.9k
    SKIP_BLANKS;
6738
16.9k
    if ((RAW == '?') && (NXT(1) == '>')) {
6739
3.85k
        SKIP(2);
6740
13.0k
    } else if (RAW == '>') {
6741
        /* Deprecated old WD ... */
6742
1.20k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6743
1.20k
  NEXT;
6744
11.8k
    } else {
6745
11.8k
        int c;
6746
6747
11.8k
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
6748
5.99M
        while ((c = CUR) != 0) {
6749
5.99M
            NEXT;
6750
5.99M
            if (c == '>')
6751
6.39k
                break;
6752
5.99M
        }
6753
11.8k
    }
6754
6755
16.9k
    ctxt->instate = oldstate;
6756
16.9k
}
6757
6758
/**
6759
 * xmlParseExternalSubset:
6760
 * @ctxt:  an XML parser context
6761
 * @ExternalID: the external identifier
6762
 * @SystemID: the system identifier (or URL)
6763
 *
6764
 * parse Markup declarations from an external subset
6765
 *
6766
 * [30] extSubset ::= textDecl? extSubsetDecl
6767
 *
6768
 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6769
 */
6770
void
6771
xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6772
0
                       const xmlChar *SystemID) {
6773
0
    xmlDetectSAX2(ctxt);
6774
0
    GROW;
6775
6776
0
    if ((ctxt->encoding == NULL) &&
6777
0
        (ctxt->input->end - ctxt->input->cur >= 4)) {
6778
0
        xmlChar start[4];
6779
0
  xmlCharEncoding enc;
6780
6781
0
  start[0] = RAW;
6782
0
  start[1] = NXT(1);
6783
0
  start[2] = NXT(2);
6784
0
  start[3] = NXT(3);
6785
0
  enc = xmlDetectCharEncoding(start, 4);
6786
0
  if (enc != XML_CHAR_ENCODING_NONE)
6787
0
      xmlSwitchEncoding(ctxt, enc);
6788
0
    }
6789
6790
0
    if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
6791
0
  xmlParseTextDecl(ctxt);
6792
0
  if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6793
      /*
6794
       * The XML REC instructs us to stop parsing right here
6795
       */
6796
0
      xmlHaltParser(ctxt);
6797
0
      return;
6798
0
  }
6799
0
    }
6800
0
    if (ctxt->myDoc == NULL) {
6801
0
        ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
6802
0
  if (ctxt->myDoc == NULL) {
6803
0
      xmlErrMemory(ctxt, "New Doc failed");
6804
0
      return;
6805
0
  }
6806
0
  ctxt->myDoc->properties = XML_DOC_INTERNAL;
6807
0
    }
6808
0
    if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6809
0
        xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6810
6811
0
    ctxt->instate = XML_PARSER_DTD;
6812
0
    ctxt->external = 1;
6813
0
    SKIP_BLANKS;
6814
0
    while ((ctxt->instate != XML_PARSER_EOF) && (RAW != 0)) {
6815
0
  GROW;
6816
0
        if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6817
0
            xmlParseConditionalSections(ctxt);
6818
0
        } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
6819
0
            xmlParseMarkupDecl(ctxt);
6820
0
        } else {
6821
0
            xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6822
0
            xmlHaltParser(ctxt);
6823
0
            return;
6824
0
        }
6825
0
        SKIP_BLANKS;
6826
0
        SHRINK;
6827
0
    }
6828
6829
0
    if (RAW != 0) {
6830
0
  xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6831
0
    }
6832
6833
0
}
6834
6835
/**
6836
 * xmlParseReference:
6837
 * @ctxt:  an XML parser context
6838
 *
6839
 * DEPRECATED: Internal function, don't use.
6840
 *
6841
 * parse and handle entity references in content, depending on the SAX
6842
 * interface, this may end-up in a call to character() if this is a
6843
 * CharRef, a predefined entity, if there is no reference() callback.
6844
 * or if the parser was asked to switch to that mode.
6845
 *
6846
 * Always consumes '&'.
6847
 *
6848
 * [67] Reference ::= EntityRef | CharRef
6849
 */
6850
void
6851
58.4k
xmlParseReference(xmlParserCtxtPtr ctxt) {
6852
58.4k
    xmlEntityPtr ent;
6853
58.4k
    xmlChar *val;
6854
58.4k
    int was_checked;
6855
58.4k
    xmlNodePtr list = NULL;
6856
58.4k
    xmlParserErrors ret = XML_ERR_OK;
6857
6858
6859
58.4k
    if (RAW != '&')
6860
0
        return;
6861
6862
    /*
6863
     * Simple case of a CharRef
6864
     */
6865
58.4k
    if (NXT(1) == '#') {
6866
8.60k
  int i = 0;
6867
8.60k
  xmlChar out[16];
6868
8.60k
  int hex = NXT(2);
6869
8.60k
  int value = xmlParseCharRef(ctxt);
6870
6871
8.60k
  if (value == 0)
6872
4.40k
      return;
6873
4.20k
  if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
6874
      /*
6875
       * So we are using non-UTF-8 buffers
6876
       * Check that the char fit on 8bits, if not
6877
       * generate a CharRef.
6878
       */
6879
2.19k
      if (value <= 0xFF) {
6880
1.02k
    out[0] = value;
6881
1.02k
    out[1] = 0;
6882
1.02k
    if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6883
1.02k
        (!ctxt->disableSAX))
6884
0
        ctxt->sax->characters(ctxt->userData, out, 1);
6885
1.17k
      } else {
6886
1.17k
    if ((hex == 'x') || (hex == 'X'))
6887
826
        snprintf((char *)out, sizeof(out), "#x%X", value);
6888
344
    else
6889
344
        snprintf((char *)out, sizeof(out), "#%d", value);
6890
1.17k
    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6891
1.17k
        (!ctxt->disableSAX))
6892
0
        ctxt->sax->reference(ctxt->userData, out);
6893
1.17k
      }
6894
2.19k
  } else {
6895
      /*
6896
       * Just encode the value in UTF-8
6897
       */
6898
2.01k
      COPY_BUF(0 ,out, i, value);
6899
2.01k
      out[i] = 0;
6900
2.01k
      if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6901
2.01k
    (!ctxt->disableSAX))
6902
1.56k
    ctxt->sax->characters(ctxt->userData, out, i);
6903
2.01k
  }
6904
4.20k
  return;
6905
8.60k
    }
6906
6907
    /*
6908
     * We are seeing an entity reference
6909
     */
6910
49.8k
    ent = xmlParseEntityRef(ctxt);
6911
49.8k
    if (ent == NULL) return;
6912
38.1k
    if (!ctxt->wellFormed)
6913
11.4k
  return;
6914
26.7k
    was_checked = ent->flags & XML_ENT_PARSED;
6915
6916
    /* special case of predefined entities */
6917
26.7k
    if ((ent->name == NULL) ||
6918
26.7k
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
6919
96
  val = ent->content;
6920
96
  if (val == NULL) return;
6921
  /*
6922
   * inline the entity.
6923
   */
6924
96
  if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6925
96
      (!ctxt->disableSAX))
6926
96
      ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6927
96
  return;
6928
96
    }
6929
6930
    /*
6931
     * The first reference to the entity trigger a parsing phase
6932
     * where the ent->children is filled with the result from
6933
     * the parsing.
6934
     * Note: external parsed entities will not be loaded, it is not
6935
     * required for a non-validating parser, unless the parsing option
6936
     * of validating, or substituting entities were given. Doing so is
6937
     * far more secure as the parser will only process data coming from
6938
     * the document entity by default.
6939
     */
6940
26.6k
    if (((ent->flags & XML_ENT_PARSED) == 0) &&
6941
26.6k
        ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
6942
2.10k
         (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
6943
2.10k
  unsigned long oldsizeentcopy = ctxt->sizeentcopy;
6944
6945
  /*
6946
   * This is a bit hackish but this seems the best
6947
   * way to make sure both SAX and DOM entity support
6948
   * behaves okay.
6949
   */
6950
2.10k
  void *user_data;
6951
2.10k
  if (ctxt->userData == ctxt)
6952
2.10k
      user_data = NULL;
6953
0
  else
6954
0
      user_data = ctxt->userData;
6955
6956
        /* Avoid overflow as much as possible */
6957
2.10k
        ctxt->sizeentcopy = 0;
6958
6959
2.10k
        if (ent->flags & XML_ENT_EXPANDING) {
6960
14
            xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6961
14
            xmlHaltParser(ctxt);
6962
14
            return;
6963
14
        }
6964
6965
2.09k
        ent->flags |= XML_ENT_EXPANDING;
6966
6967
  /*
6968
   * Check that this entity is well formed
6969
   * 4.3.2: An internal general parsed entity is well-formed
6970
   * if its replacement text matches the production labeled
6971
   * content.
6972
   */
6973
2.09k
  if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6974
359
      ctxt->depth++;
6975
359
      ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
6976
359
                                                user_data, &list);
6977
359
      ctxt->depth--;
6978
6979
1.73k
  } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6980
1.73k
      ctxt->depth++;
6981
1.73k
      ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
6982
1.73k
                                     user_data, ctxt->depth, ent->URI,
6983
1.73k
             ent->ExternalID, &list);
6984
1.73k
      ctxt->depth--;
6985
1.73k
  } else {
6986
0
      ret = XML_ERR_ENTITY_PE_INTERNAL;
6987
0
      xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6988
0
       "invalid entity type found\n", NULL);
6989
0
  }
6990
6991
2.09k
        ent->flags &= ~XML_ENT_EXPANDING;
6992
2.09k
        ent->flags |= XML_ENT_PARSED | XML_ENT_CHECKED;
6993
2.09k
        ent->expandedSize = ctxt->sizeentcopy;
6994
2.09k
  if (ret == XML_ERR_ENTITY_LOOP) {
6995
17
            xmlHaltParser(ctxt);
6996
17
      xmlFreeNodeList(list);
6997
17
      return;
6998
17
  }
6999
2.07k
  if (xmlParserEntityCheck(ctxt, oldsizeentcopy)) {
7000
0
      xmlFreeNodeList(list);
7001
0
      return;
7002
0
  }
7003
7004
2.07k
  if ((ret == XML_ERR_OK) && (list != NULL)) {
7005
284
            ent->children = list;
7006
            /*
7007
             * Prune it directly in the generated document
7008
             * except for single text nodes.
7009
             */
7010
284
            if ((ctxt->replaceEntities == 0) ||
7011
284
                (ctxt->parseMode == XML_PARSE_READER) ||
7012
284
                ((list->type == XML_TEXT_NODE) &&
7013
284
                 (list->next == NULL))) {
7014
85
                ent->owner = 1;
7015
170
                while (list != NULL) {
7016
85
                    list->parent = (xmlNodePtr) ent;
7017
85
                    if (list->doc != ent->doc)
7018
0
                        xmlSetTreeDoc(list, ent->doc);
7019
85
                    if (list->next == NULL)
7020
85
                        ent->last = list;
7021
85
                    list = list->next;
7022
85
                }
7023
85
                list = NULL;
7024
199
            } else {
7025
199
                ent->owner = 0;
7026
4.11k
                while (list != NULL) {
7027
3.91k
                    list->parent = (xmlNodePtr) ctxt->node;
7028
3.91k
                    list->doc = ctxt->myDoc;
7029
3.91k
                    if (list->next == NULL)
7030
199
                        ent->last = list;
7031
3.91k
                    list = list->next;
7032
3.91k
                }
7033
199
                list = ent->children;
7034
#ifdef LIBXML_LEGACY_ENABLED
7035
                if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7036
                    xmlAddEntityReference(ent, list, NULL);
7037
#endif /* LIBXML_LEGACY_ENABLED */
7038
199
            }
7039
1.79k
  } else if ((ret != XML_ERR_OK) &&
7040
1.79k
       (ret != XML_WAR_UNDECLARED_ENTITY)) {
7041
995
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7042
995
         "Entity '%s' failed to parse\n", ent->name);
7043
995
            if (ent->content != NULL)
7044
117
                ent->content[0] = 0;
7045
995
  } else if (list != NULL) {
7046
0
      xmlFreeNodeList(list);
7047
0
      list = NULL;
7048
0
  }
7049
7050
        /* Prevent entity from being parsed and expanded twice (Bug 760367). */
7051
2.07k
        was_checked = 0;
7052
2.07k
    }
7053
7054
    /*
7055
     * Now that the entity content has been gathered
7056
     * provide it to the application, this can take different forms based
7057
     * on the parsing modes.
7058
     */
7059
26.5k
    if (ent->children == NULL) {
7060
  /*
7061
   * Probably running in SAX mode and the callbacks don't
7062
   * build the entity content. So unless we already went
7063
   * though parsing for first checking go though the entity
7064
   * content to generate callbacks associated to the entity
7065
   */
7066
23.3k
  if (was_checked != 0) {
7067
21.5k
      void *user_data;
7068
      /*
7069
       * This is a bit hackish but this seems the best
7070
       * way to make sure both SAX and DOM entity support
7071
       * behaves okay.
7072
       */
7073
21.5k
      if (ctxt->userData == ctxt)
7074
21.5k
    user_data = NULL;
7075
0
      else
7076
0
    user_data = ctxt->userData;
7077
7078
21.5k
      if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7079
3.68k
    ctxt->depth++;
7080
3.68k
    ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7081
3.68k
           ent->content, user_data, NULL);
7082
3.68k
    ctxt->depth--;
7083
17.8k
      } else if (ent->etype ==
7084
17.8k
           XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7085
17.8k
          unsigned long oldsizeentities = ctxt->sizeentities;
7086
7087
17.8k
    ctxt->depth++;
7088
17.8k
    ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7089
17.8k
         ctxt->sax, user_data, ctxt->depth,
7090
17.8k
         ent->URI, ent->ExternalID, NULL);
7091
17.8k
    ctxt->depth--;
7092
7093
                /* Undo the change to sizeentities */
7094
17.8k
                ctxt->sizeentities = oldsizeentities;
7095
17.8k
      } else {
7096
0
    ret = XML_ERR_ENTITY_PE_INTERNAL;
7097
0
    xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7098
0
           "invalid entity type found\n", NULL);
7099
0
      }
7100
21.5k
      if (ret == XML_ERR_ENTITY_LOOP) {
7101
0
    xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7102
0
    return;
7103
0
      }
7104
21.5k
            if (xmlParserEntityCheck(ctxt, 0))
7105
0
                return;
7106
21.5k
  }
7107
23.3k
  if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7108
23.3k
      (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7109
      /*
7110
       * Entity reference callback comes second, it's somewhat
7111
       * superfluous but a compatibility to historical behaviour
7112
       */
7113
0
      ctxt->sax->reference(ctxt->userData, ent->name);
7114
0
  }
7115
23.3k
  return;
7116
23.3k
    }
7117
7118
    /*
7119
     * We also check for amplification if entities aren't substituted.
7120
     * They might be expanded later.
7121
     */
7122
3.22k
    if ((was_checked != 0) &&
7123
3.22k
        (xmlParserEntityCheck(ctxt, ent->expandedSize)))
7124
6
        return;
7125
7126
    /*
7127
     * If we didn't get any children for the entity being built
7128
     */
7129
3.22k
    if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7130
3.22k
  (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7131
  /*
7132
   * Create a node.
7133
   */
7134
0
  ctxt->sax->reference(ctxt->userData, ent->name);
7135
0
  return;
7136
0
    }
7137
7138
3.22k
    if (ctxt->replaceEntities)  {
7139
  /*
7140
   * There is a problem on the handling of _private for entities
7141
   * (bug 155816): Should we copy the content of the field from
7142
   * the entity (possibly overwriting some value set by the user
7143
   * when a copy is created), should we leave it alone, or should
7144
   * we try to take care of different situations?  The problem
7145
   * is exacerbated by the usage of this field by the xmlReader.
7146
   * To fix this bug, we look at _private on the created node
7147
   * and, if it's NULL, we copy in whatever was in the entity.
7148
   * If it's not NULL we leave it alone.  This is somewhat of a
7149
   * hack - maybe we should have further tests to determine
7150
   * what to do.
7151
   */
7152
3.22k
  if (ctxt->node != NULL) {
7153
      /*
7154
       * Seems we are generating the DOM content, do
7155
       * a simple tree copy for all references except the first
7156
       * In the first occurrence list contains the replacement.
7157
       */
7158
3.22k
      if (((list == NULL) && (ent->owner == 0)) ||
7159
3.22k
    (ctxt->parseMode == XML_PARSE_READER)) {
7160
0
    xmlNodePtr nw = NULL, cur, firstChild = NULL;
7161
7162
    /*
7163
     * when operating on a reader, the entities definitions
7164
     * are always owning the entities subtree.
7165
    if (ctxt->parseMode == XML_PARSE_READER)
7166
        ent->owner = 1;
7167
     */
7168
7169
0
    cur = ent->children;
7170
0
    while (cur != NULL) {
7171
0
        nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7172
0
        if (nw != NULL) {
7173
0
      if (nw->_private == NULL)
7174
0
          nw->_private = cur->_private;
7175
0
      if (firstChild == NULL){
7176
0
          firstChild = nw;
7177
0
      }
7178
0
      nw = xmlAddChild(ctxt->node, nw);
7179
0
        }
7180
0
        if (cur == ent->last) {
7181
      /*
7182
       * needed to detect some strange empty
7183
       * node cases in the reader tests
7184
       */
7185
0
      if ((ctxt->parseMode == XML_PARSE_READER) &&
7186
0
          (nw != NULL) &&
7187
0
          (nw->type == XML_ELEMENT_NODE) &&
7188
0
          (nw->children == NULL))
7189
0
          nw->extra = 1;
7190
7191
0
      break;
7192
0
        }
7193
0
        cur = cur->next;
7194
0
    }
7195
#ifdef LIBXML_LEGACY_ENABLED
7196
    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7197
      xmlAddEntityReference(ent, firstChild, nw);
7198
#endif /* LIBXML_LEGACY_ENABLED */
7199
3.22k
      } else if ((list == NULL) || (ctxt->inputNr > 0)) {
7200
3.22k
    xmlNodePtr nw = NULL, cur, next, last,
7201
3.22k
         firstChild = NULL;
7202
7203
    /*
7204
     * Copy the entity child list and make it the new
7205
     * entity child list. The goal is to make sure any
7206
     * ID or REF referenced will be the one from the
7207
     * document content and not the entity copy.
7208
     */
7209
3.22k
    cur = ent->children;
7210
3.22k
    ent->children = NULL;
7211
3.22k
    last = ent->last;
7212
3.22k
    ent->last = NULL;
7213
16.6k
    while (cur != NULL) {
7214
16.6k
        next = cur->next;
7215
16.6k
        cur->next = NULL;
7216
16.6k
        cur->parent = NULL;
7217
16.6k
        nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7218
16.6k
        if (nw != NULL) {
7219
16.6k
      if (nw->_private == NULL)
7220
16.6k
          nw->_private = cur->_private;
7221
16.6k
      if (firstChild == NULL){
7222
3.22k
          firstChild = cur;
7223
3.22k
      }
7224
16.6k
      xmlAddChild((xmlNodePtr) ent, nw);
7225
16.6k
        }
7226
16.6k
        xmlAddChild(ctxt->node, cur);
7227
16.6k
        if (cur == last)
7228
3.22k
      break;
7229
13.4k
        cur = next;
7230
13.4k
    }
7231
3.22k
    if (ent->owner == 0)
7232
199
        ent->owner = 1;
7233
#ifdef LIBXML_LEGACY_ENABLED
7234
    if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7235
      xmlAddEntityReference(ent, firstChild, nw);
7236
#endif /* LIBXML_LEGACY_ENABLED */
7237
3.22k
      } else {
7238
0
    const xmlChar *nbktext;
7239
7240
    /*
7241
     * the name change is to avoid coalescing of the
7242
     * node with a possible previous text one which
7243
     * would make ent->children a dangling pointer
7244
     */
7245
0
    nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7246
0
          -1);
7247
0
    if (ent->children->type == XML_TEXT_NODE)
7248
0
        ent->children->name = nbktext;
7249
0
    if ((ent->last != ent->children) &&
7250
0
        (ent->last->type == XML_TEXT_NODE))
7251
0
        ent->last->name = nbktext;
7252
0
    xmlAddChildList(ctxt->node, ent->children);
7253
0
      }
7254
7255
      /*
7256
       * This is to avoid a nasty side effect, see
7257
       * characters() in SAX.c
7258
       */
7259
3.22k
      ctxt->nodemem = 0;
7260
3.22k
      ctxt->nodelen = 0;
7261
3.22k
      return;
7262
3.22k
  }
7263
3.22k
    }
7264
3.22k
}
7265
7266
/**
7267
 * xmlParseEntityRef:
7268
 * @ctxt:  an XML parser context
7269
 *
7270
 * DEPRECATED: Internal function, don't use.
7271
 *
7272
 * Parse an entitiy reference. Always consumes '&'.
7273
 *
7274
 * [68] EntityRef ::= '&' Name ';'
7275
 *
7276
 * [ WFC: Entity Declared ]
7277
 * In a document without any DTD, a document with only an internal DTD
7278
 * subset which contains no parameter entity references, or a document
7279
 * with "standalone='yes'", the Name given in the entity reference
7280
 * must match that in an entity declaration, except that well-formed
7281
 * documents need not declare any of the following entities: amp, lt,
7282
 * gt, apos, quot.  The declaration of a parameter entity must precede
7283
 * any reference to it.  Similarly, the declaration of a general entity
7284
 * must precede any reference to it which appears in a default value in an
7285
 * attribute-list declaration. Note that if entities are declared in the
7286
 * external subset or in external parameter entities, a non-validating
7287
 * processor is not obligated to read and process their declarations;
7288
 * for such documents, the rule that an entity must be declared is a
7289
 * well-formedness constraint only if standalone='yes'.
7290
 *
7291
 * [ WFC: Parsed Entity ]
7292
 * An entity reference must not contain the name of an unparsed entity
7293
 *
7294
 * Returns the xmlEntityPtr if found, or NULL otherwise.
7295
 */
7296
xmlEntityPtr
7297
69.4k
xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7298
69.4k
    const xmlChar *name;
7299
69.4k
    xmlEntityPtr ent = NULL;
7300
7301
69.4k
    GROW;
7302
69.4k
    if (ctxt->instate == XML_PARSER_EOF)
7303
1
        return(NULL);
7304
7305
69.4k
    if (RAW != '&')
7306
0
        return(NULL);
7307
69.4k
    NEXT;
7308
69.4k
    name = xmlParseName(ctxt);
7309
69.4k
    if (name == NULL) {
7310
6.95k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7311
6.95k
           "xmlParseEntityRef: no name\n");
7312
6.95k
        return(NULL);
7313
6.95k
    }
7314
62.5k
    if (RAW != ';') {
7315
7.41k
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7316
7.41k
  return(NULL);
7317
7.41k
    }
7318
55.0k
    NEXT;
7319
7320
    /*
7321
     * Predefined entities override any extra definition
7322
     */
7323
55.0k
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7324
55.0k
        ent = xmlGetPredefinedEntity(name);
7325
55.0k
        if (ent != NULL)
7326
2.45k
            return(ent);
7327
55.0k
    }
7328
7329
    /*
7330
     * Ask first SAX for entity resolution, otherwise try the
7331
     * entities which may have stored in the parser context.
7332
     */
7333
52.6k
    if (ctxt->sax != NULL) {
7334
52.6k
  if (ctxt->sax->getEntity != NULL)
7335
52.6k
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7336
52.6k
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7337
52.6k
      (ctxt->options & XML_PARSE_OLDSAX))
7338
0
      ent = xmlGetPredefinedEntity(name);
7339
52.6k
  if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7340
52.6k
      (ctxt->userData==ctxt)) {
7341
358
      ent = xmlSAX2GetEntity(ctxt, name);
7342
358
  }
7343
52.6k
    }
7344
52.6k
    if (ctxt->instate == XML_PARSER_EOF)
7345
3
  return(NULL);
7346
    /*
7347
     * [ WFC: Entity Declared ]
7348
     * In a document without any DTD, a document with only an
7349
     * internal DTD subset which contains no parameter entity
7350
     * references, or a document with "standalone='yes'", the
7351
     * Name given in the entity reference must match that in an
7352
     * entity declaration, except that well-formed documents
7353
     * need not declare any of the following entities: amp, lt,
7354
     * gt, apos, quot.
7355
     * The declaration of a parameter entity must precede any
7356
     * reference to it.
7357
     * Similarly, the declaration of a general entity must
7358
     * precede any reference to it which appears in a default
7359
     * value in an attribute-list declaration. Note that if
7360
     * entities are declared in the external subset or in
7361
     * external parameter entities, a non-validating processor
7362
     * is not obligated to read and process their declarations;
7363
     * for such documents, the rule that an entity must be
7364
     * declared is a well-formedness constraint only if
7365
     * standalone='yes'.
7366
     */
7367
52.6k
    if (ent == NULL) {
7368
4.46k
  if ((ctxt->standalone == 1) ||
7369
4.46k
      ((ctxt->hasExternalSubset == 0) &&
7370
4.05k
       (ctxt->hasPErefs == 0))) {
7371
2.77k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7372
2.77k
         "Entity '%s' not defined\n", name);
7373
2.77k
  } else {
7374
1.69k
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7375
1.69k
         "Entity '%s' not defined\n", name);
7376
1.69k
      if ((ctxt->inSubset == 0) &&
7377
1.69k
    (ctxt->sax != NULL) &&
7378
1.69k
    (ctxt->sax->reference != NULL)) {
7379
1.40k
    ctxt->sax->reference(ctxt->userData, name);
7380
1.40k
      }
7381
1.69k
  }
7382
4.46k
  ctxt->valid = 0;
7383
4.46k
    }
7384
7385
    /*
7386
     * [ WFC: Parsed Entity ]
7387
     * An entity reference must not contain the name of an
7388
     * unparsed entity
7389
     */
7390
48.1k
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7391
202
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7392
202
     "Entity reference to unparsed entity %s\n", name);
7393
202
    }
7394
7395
    /*
7396
     * [ WFC: No External Entity References ]
7397
     * Attribute values cannot contain direct or indirect
7398
     * entity references to external entities.
7399
     */
7400
47.9k
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7401
47.9k
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7402
1.13k
  xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7403
1.13k
       "Attribute references external entity '%s'\n", name);
7404
1.13k
    }
7405
    /*
7406
     * [ WFC: No < in Attribute Values ]
7407
     * The replacement text of any entity referred to directly or
7408
     * indirectly in an attribute value (other than "&lt;") must
7409
     * not contain a <.
7410
     */
7411
46.8k
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7412
46.8k
       (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7413
9.34k
  if ((ent->flags & XML_ENT_CHECKED_LT) == 0) {
7414
477
            if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7415
202
                ent->flags |= XML_ENT_CONTAINS_LT;
7416
477
            ent->flags |= XML_ENT_CHECKED_LT;
7417
477
        }
7418
9.34k
        if (ent->flags & XML_ENT_CONTAINS_LT)
7419
4.34k
            xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7420
4.34k
                    "'<' in entity '%s' is not allowed in attributes "
7421
4.34k
                    "values\n", name);
7422
9.34k
    }
7423
7424
    /*
7425
     * Internal check, no parameter entities here ...
7426
     */
7427
37.4k
    else {
7428
37.4k
  switch (ent->etype) {
7429
0
      case XML_INTERNAL_PARAMETER_ENTITY:
7430
0
      case XML_EXTERNAL_PARAMETER_ENTITY:
7431
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7432
0
       "Attempt to reference the parameter entity '%s'\n",
7433
0
            name);
7434
0
      break;
7435
37.4k
      default:
7436
37.4k
      break;
7437
37.4k
  }
7438
37.4k
    }
7439
7440
    /*
7441
     * [ WFC: No Recursion ]
7442
     * A parsed entity must not contain a recursive reference
7443
     * to itself, either directly or indirectly.
7444
     * Done somewhere else
7445
     */
7446
52.6k
    return(ent);
7447
52.6k
}
7448
7449
/**
7450
 * xmlParseStringEntityRef:
7451
 * @ctxt:  an XML parser context
7452
 * @str:  a pointer to an index in the string
7453
 *
7454
 * parse ENTITY references declarations, but this version parses it from
7455
 * a string value.
7456
 *
7457
 * [68] EntityRef ::= '&' Name ';'
7458
 *
7459
 * [ WFC: Entity Declared ]
7460
 * In a document without any DTD, a document with only an internal DTD
7461
 * subset which contains no parameter entity references, or a document
7462
 * with "standalone='yes'", the Name given in the entity reference
7463
 * must match that in an entity declaration, except that well-formed
7464
 * documents need not declare any of the following entities: amp, lt,
7465
 * gt, apos, quot.  The declaration of a parameter entity must precede
7466
 * any reference to it.  Similarly, the declaration of a general entity
7467
 * must precede any reference to it which appears in a default value in an
7468
 * attribute-list declaration. Note that if entities are declared in the
7469
 * external subset or in external parameter entities, a non-validating
7470
 * processor is not obligated to read and process their declarations;
7471
 * for such documents, the rule that an entity must be declared is a
7472
 * well-formedness constraint only if standalone='yes'.
7473
 *
7474
 * [ WFC: Parsed Entity ]
7475
 * An entity reference must not contain the name of an unparsed entity
7476
 *
7477
 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7478
 * is updated to the current location in the string.
7479
 */
7480
static xmlEntityPtr
7481
161k
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7482
161k
    xmlChar *name;
7483
161k
    const xmlChar *ptr;
7484
161k
    xmlChar cur;
7485
161k
    xmlEntityPtr ent = NULL;
7486
7487
161k
    if ((str == NULL) || (*str == NULL))
7488
0
        return(NULL);
7489
161k
    ptr = *str;
7490
161k
    cur = *ptr;
7491
161k
    if (cur != '&')
7492
0
  return(NULL);
7493
7494
161k
    ptr++;
7495
161k
    name = xmlParseStringName(ctxt, &ptr);
7496
161k
    if (name == NULL) {
7497
414
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7498
414
           "xmlParseStringEntityRef: no name\n");
7499
414
  *str = ptr;
7500
414
  return(NULL);
7501
414
    }
7502
161k
    if (*ptr != ';') {
7503
302
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7504
302
        xmlFree(name);
7505
302
  *str = ptr;
7506
302
  return(NULL);
7507
302
    }
7508
161k
    ptr++;
7509
7510
7511
    /*
7512
     * Predefined entities override any extra definition
7513
     */
7514
161k
    if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7515
161k
        ent = xmlGetPredefinedEntity(name);
7516
161k
        if (ent != NULL) {
7517
3.44k
            xmlFree(name);
7518
3.44k
            *str = ptr;
7519
3.44k
            return(ent);
7520
3.44k
        }
7521
161k
    }
7522
7523
    /*
7524
     * Ask first SAX for entity resolution, otherwise try the
7525
     * entities which may have stored in the parser context.
7526
     */
7527
157k
    if (ctxt->sax != NULL) {
7528
157k
  if (ctxt->sax->getEntity != NULL)
7529
157k
      ent = ctxt->sax->getEntity(ctxt->userData, name);
7530
157k
  if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7531
0
      ent = xmlGetPredefinedEntity(name);
7532
157k
  if ((ent == NULL) && (ctxt->userData==ctxt)) {
7533
65.6k
      ent = xmlSAX2GetEntity(ctxt, name);
7534
65.6k
  }
7535
157k
    }
7536
157k
    if (ctxt->instate == XML_PARSER_EOF) {
7537
0
  xmlFree(name);
7538
0
  return(NULL);
7539
0
    }
7540
7541
    /*
7542
     * [ WFC: Entity Declared ]
7543
     * In a document without any DTD, a document with only an
7544
     * internal DTD subset which contains no parameter entity
7545
     * references, or a document with "standalone='yes'", the
7546
     * Name given in the entity reference must match that in an
7547
     * entity declaration, except that well-formed documents
7548
     * need not declare any of the following entities: amp, lt,
7549
     * gt, apos, quot.
7550
     * The declaration of a parameter entity must precede any
7551
     * reference to it.
7552
     * Similarly, the declaration of a general entity must
7553
     * precede any reference to it which appears in a default
7554
     * value in an attribute-list declaration. Note that if
7555
     * entities are declared in the external subset or in
7556
     * external parameter entities, a non-validating processor
7557
     * is not obligated to read and process their declarations;
7558
     * for such documents, the rule that an entity must be
7559
     * declared is a well-formedness constraint only if
7560
     * standalone='yes'.
7561
     */
7562
157k
    if (ent == NULL) {
7563
65.6k
  if ((ctxt->standalone == 1) ||
7564
65.6k
      ((ctxt->hasExternalSubset == 0) &&
7565
65.6k
       (ctxt->hasPErefs == 0))) {
7566
65.4k
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7567
65.4k
         "Entity '%s' not defined\n", name);
7568
65.4k
  } else {
7569
261
      xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7570
261
        "Entity '%s' not defined\n",
7571
261
        name);
7572
261
  }
7573
  /* TODO ? check regressions ctxt->valid = 0; */
7574
65.6k
    }
7575
7576
    /*
7577
     * [ WFC: Parsed Entity ]
7578
     * An entity reference must not contain the name of an
7579
     * unparsed entity
7580
     */
7581
91.8k
    else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7582
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7583
0
     "Entity reference to unparsed entity %s\n", name);
7584
0
    }
7585
7586
    /*
7587
     * [ WFC: No External Entity References ]
7588
     * Attribute values cannot contain direct or indirect
7589
     * entity references to external entities.
7590
     */
7591
91.8k
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7592
91.8k
       (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7593
205
  xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7594
205
   "Attribute references external entity '%s'\n", name);
7595
205
    }
7596
    /*
7597
     * [ WFC: No < in Attribute Values ]
7598
     * The replacement text of any entity referred to directly or
7599
     * indirectly in an attribute value (other than "&lt;") must
7600
     * not contain a <.
7601
     */
7602
91.6k
    else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7603
91.6k
       (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7604
91.6k
  if ((ent->flags & XML_ENT_CHECKED_LT) == 0) {
7605
213
            if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7606
127
                ent->flags |= XML_ENT_CONTAINS_LT;
7607
213
            ent->flags |= XML_ENT_CHECKED_LT;
7608
213
        }
7609
91.6k
        if (ent->flags & XML_ENT_CONTAINS_LT)
7610
80.5k
            xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7611
80.5k
                    "'<' in entity '%s' is not allowed in attributes "
7612
80.5k
                    "values\n", name);
7613
91.6k
    }
7614
7615
    /*
7616
     * Internal check, no parameter entities here ...
7617
     */
7618
0
    else {
7619
0
  switch (ent->etype) {
7620
0
      case XML_INTERNAL_PARAMETER_ENTITY:
7621
0
      case XML_EXTERNAL_PARAMETER_ENTITY:
7622
0
    xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7623
0
       "Attempt to reference the parameter entity '%s'\n",
7624
0
          name);
7625
0
      break;
7626
0
      default:
7627
0
      break;
7628
0
  }
7629
0
    }
7630
7631
    /*
7632
     * [ WFC: No Recursion ]
7633
     * A parsed entity must not contain a recursive reference
7634
     * to itself, either directly or indirectly.
7635
     * Done somewhere else
7636
     */
7637
7638
157k
    xmlFree(name);
7639
157k
    *str = ptr;
7640
157k
    return(ent);
7641
157k
}
7642
7643
/**
7644
 * xmlParsePEReference:
7645
 * @ctxt:  an XML parser context
7646
 *
7647
 * DEPRECATED: Internal function, don't use.
7648
 *
7649
 * Parse a parameter entity reference. Always consumes '%'.
7650
 *
7651
 * The entity content is handled directly by pushing it's content as
7652
 * a new input stream.
7653
 *
7654
 * [69] PEReference ::= '%' Name ';'
7655
 *
7656
 * [ WFC: No Recursion ]
7657
 * A parsed entity must not contain a recursive
7658
 * reference to itself, either directly or indirectly.
7659
 *
7660
 * [ WFC: Entity Declared ]
7661
 * In a document without any DTD, a document with only an internal DTD
7662
 * subset which contains no parameter entity references, or a document
7663
 * with "standalone='yes'", ...  ... The declaration of a parameter
7664
 * entity must precede any reference to it...
7665
 *
7666
 * [ VC: Entity Declared ]
7667
 * In a document with an external subset or external parameter entities
7668
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7669
 * must precede any reference to it...
7670
 *
7671
 * [ WFC: In DTD ]
7672
 * Parameter-entity references may only appear in the DTD.
7673
 * NOTE: misleading but this is handled.
7674
 */
7675
void
7676
xmlParsePEReference(xmlParserCtxtPtr ctxt)
7677
93.9k
{
7678
93.9k
    const xmlChar *name;
7679
93.9k
    xmlEntityPtr entity = NULL;
7680
93.9k
    xmlParserInputPtr input;
7681
7682
93.9k
    if (RAW != '%')
7683
0
        return;
7684
93.9k
    NEXT;
7685
93.9k
    name = xmlParseName(ctxt);
7686
93.9k
    if (name == NULL) {
7687
30.7k
  xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
7688
30.7k
  return;
7689
30.7k
    }
7690
63.2k
    if (xmlParserDebugEntities)
7691
0
  xmlGenericError(xmlGenericErrorContext,
7692
0
    "PEReference: %s\n", name);
7693
63.2k
    if (RAW != ';') {
7694
11.7k
  xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
7695
11.7k
        return;
7696
11.7k
    }
7697
7698
51.4k
    NEXT;
7699
7700
    /*
7701
     * Request the entity from SAX
7702
     */
7703
51.4k
    if ((ctxt->sax != NULL) &&
7704
51.4k
  (ctxt->sax->getParameterEntity != NULL))
7705
51.4k
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7706
51.4k
    if (ctxt->instate == XML_PARSER_EOF)
7707
1
  return;
7708
51.4k
    if (entity == NULL) {
7709
  /*
7710
   * [ WFC: Entity Declared ]
7711
   * In a document without any DTD, a document with only an
7712
   * internal DTD subset which contains no parameter entity
7713
   * references, or a document with "standalone='yes'", ...
7714
   * ... The declaration of a parameter entity must precede
7715
   * any reference to it...
7716
   */
7717
8.62k
  if ((ctxt->standalone == 1) ||
7718
8.62k
      ((ctxt->hasExternalSubset == 0) &&
7719
8.41k
       (ctxt->hasPErefs == 0))) {
7720
670
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7721
670
            "PEReference: %%%s; not found\n",
7722
670
            name);
7723
7.95k
  } else {
7724
      /*
7725
       * [ VC: Entity Declared ]
7726
       * In a document with an external subset or external
7727
       * parameter entities with "standalone='no'", ...
7728
       * ... The declaration of a parameter entity must
7729
       * precede any reference to it...
7730
       */
7731
7.95k
            if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
7732
0
                xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
7733
0
                                 "PEReference: %%%s; not found\n",
7734
0
                                 name, NULL);
7735
0
            } else
7736
7.95k
                xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7737
7.95k
                              "PEReference: %%%s; not found\n",
7738
7.95k
                              name, NULL);
7739
7.95k
            ctxt->valid = 0;
7740
7.95k
  }
7741
42.8k
    } else {
7742
  /*
7743
   * Internal checking in case the entity quest barfed
7744
   */
7745
42.8k
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7746
42.8k
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7747
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7748
0
      "Internal: %%%s; is not a parameter entity\n",
7749
0
        name, NULL);
7750
42.8k
  } else {
7751
42.8k
            xmlChar start[4];
7752
42.8k
            xmlCharEncoding enc;
7753
42.8k
            unsigned long parentConsumed;
7754
42.8k
            xmlEntityPtr oldEnt;
7755
7756
42.8k
      if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7757
42.8k
          ((ctxt->options & XML_PARSE_NOENT) == 0) &&
7758
42.8k
    ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
7759
42.8k
    ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
7760
42.8k
    ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
7761
42.8k
    (ctxt->replaceEntities == 0) &&
7762
42.8k
    (ctxt->validate == 0))
7763
0
    return;
7764
7765
42.8k
            if (entity->flags & XML_ENT_EXPANDING) {
7766
19
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7767
19
                xmlHaltParser(ctxt);
7768
19
                return;
7769
19
            }
7770
7771
            /* Must be computed from old input before pushing new input. */
7772
42.8k
            parentConsumed = ctxt->input->parentConsumed;
7773
42.8k
            oldEnt = ctxt->input->entity;
7774
42.8k
            if ((oldEnt == NULL) ||
7775
42.8k
                ((oldEnt->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7776
42.5k
                 ((oldEnt->flags & XML_ENT_PARSED) == 0))) {
7777
42.5k
                xmlSaturatedAdd(&parentConsumed, ctxt->input->consumed);
7778
42.5k
                xmlSaturatedAddSizeT(&parentConsumed,
7779
42.5k
                                     ctxt->input->cur - ctxt->input->base);
7780
42.5k
            }
7781
7782
42.8k
      input = xmlNewEntityInputStream(ctxt, entity);
7783
42.8k
      if (xmlPushInput(ctxt, input) < 0) {
7784
1.01k
                xmlFreeInputStream(input);
7785
1.01k
    return;
7786
1.01k
            }
7787
7788
41.8k
            entity->flags |= XML_ENT_EXPANDING;
7789
7790
41.8k
            input->parentConsumed = parentConsumed;
7791
7792
41.8k
      if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
7793
                /*
7794
                 * Get the 4 first bytes and decode the charset
7795
                 * if enc != XML_CHAR_ENCODING_NONE
7796
                 * plug some encoding conversion routines.
7797
                 * Note that, since we may have some non-UTF8
7798
                 * encoding (like UTF16, bug 135229), the 'length'
7799
                 * is not known, but we can calculate based upon
7800
                 * the amount of data in the buffer.
7801
                 */
7802
28.5k
                GROW
7803
28.5k
                if (ctxt->instate == XML_PARSER_EOF)
7804
0
                    return;
7805
28.5k
                if ((ctxt->input->end - ctxt->input->cur)>=4) {
7806
28.2k
                    start[0] = RAW;
7807
28.2k
                    start[1] = NXT(1);
7808
28.2k
                    start[2] = NXT(2);
7809
28.2k
                    start[3] = NXT(3);
7810
28.2k
                    enc = xmlDetectCharEncoding(start, 4);
7811
28.2k
                    if (enc != XML_CHAR_ENCODING_NONE) {
7812
18.3k
                        xmlSwitchEncoding(ctxt, enc);
7813
18.3k
                    }
7814
28.2k
                }
7815
7816
28.5k
                if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7817
28.5k
                    (IS_BLANK_CH(NXT(5)))) {
7818
13.9k
                    xmlParseTextDecl(ctxt);
7819
13.9k
                }
7820
28.5k
            }
7821
41.8k
  }
7822
42.8k
    }
7823
50.4k
    ctxt->hasPErefs = 1;
7824
50.4k
}
7825
7826
/**
7827
 * xmlLoadEntityContent:
7828
 * @ctxt:  an XML parser context
7829
 * @entity: an unloaded system entity
7830
 *
7831
 * Load the original content of the given system entity from the
7832
 * ExternalID/SystemID given. This is to be used for Included in Literal
7833
 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
7834
 *
7835
 * Returns 0 in case of success and -1 in case of failure
7836
 */
7837
static int
7838
728
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7839
728
    xmlParserInputPtr input;
7840
728
    xmlBufferPtr buf;
7841
728
    int l, c;
7842
7843
728
    if ((ctxt == NULL) || (entity == NULL) ||
7844
728
        ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7845
728
   (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7846
728
  (entity->content != NULL)) {
7847
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7848
0
              "xmlLoadEntityContent parameter error");
7849
0
        return(-1);
7850
0
    }
7851
7852
728
    if (xmlParserDebugEntities)
7853
0
  xmlGenericError(xmlGenericErrorContext,
7854
0
    "Reading %s entity content input\n", entity->name);
7855
7856
728
    buf = xmlBufferCreate();
7857
728
    if (buf == NULL) {
7858
0
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7859
0
              "xmlLoadEntityContent parameter error");
7860
0
        return(-1);
7861
0
    }
7862
728
    xmlBufferSetAllocationScheme(buf, XML_BUFFER_ALLOC_DOUBLEIT);
7863
7864
728
    input = xmlNewEntityInputStream(ctxt, entity);
7865
728
    if (input == NULL) {
7866
195
  xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7867
195
              "xmlLoadEntityContent input error");
7868
195
  xmlBufferFree(buf);
7869
195
        return(-1);
7870
195
    }
7871
7872
    /*
7873
     * Push the entity as the current input, read char by char
7874
     * saving to the buffer until the end of the entity or an error
7875
     */
7876
533
    if (xmlPushInput(ctxt, input) < 0) {
7877
0
        xmlBufferFree(buf);
7878
0
  xmlFreeInputStream(input);
7879
0
  return(-1);
7880
0
    }
7881
7882
533
    GROW;
7883
533
    c = CUR_CHAR(l);
7884
2.50M
    while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
7885
2.50M
           (IS_CHAR(c))) {
7886
2.50M
        xmlBufferAdd(buf, ctxt->input->cur, l);
7887
2.50M
  NEXTL(l);
7888
2.50M
  c = CUR_CHAR(l);
7889
2.50M
    }
7890
533
    if (ctxt->instate == XML_PARSER_EOF) {
7891
0
  xmlBufferFree(buf);
7892
0
  return(-1);
7893
0
    }
7894
7895
533
    if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
7896
82
        xmlSaturatedAdd(&ctxt->sizeentities, ctxt->input->consumed);
7897
82
        xmlPopInput(ctxt);
7898
451
    } else if (!IS_CHAR(c)) {
7899
451
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
7900
451
                          "xmlLoadEntityContent: invalid char value %d\n",
7901
451
                    c);
7902
451
  xmlBufferFree(buf);
7903
451
  return(-1);
7904
451
    }
7905
82
    entity->content = buf->content;
7906
82
    entity->length = buf->use;
7907
82
    buf->content = NULL;
7908
82
    xmlBufferFree(buf);
7909
7910
82
    return(0);
7911
533
}
7912
7913
/**
7914
 * xmlParseStringPEReference:
7915
 * @ctxt:  an XML parser context
7916
 * @str:  a pointer to an index in the string
7917
 *
7918
 * parse PEReference declarations
7919
 *
7920
 * [69] PEReference ::= '%' Name ';'
7921
 *
7922
 * [ WFC: No Recursion ]
7923
 * A parsed entity must not contain a recursive
7924
 * reference to itself, either directly or indirectly.
7925
 *
7926
 * [ WFC: Entity Declared ]
7927
 * In a document without any DTD, a document with only an internal DTD
7928
 * subset which contains no parameter entity references, or a document
7929
 * with "standalone='yes'", ...  ... The declaration of a parameter
7930
 * entity must precede any reference to it...
7931
 *
7932
 * [ VC: Entity Declared ]
7933
 * In a document with an external subset or external parameter entities
7934
 * with "standalone='no'", ...  ... The declaration of a parameter entity
7935
 * must precede any reference to it...
7936
 *
7937
 * [ WFC: In DTD ]
7938
 * Parameter-entity references may only appear in the DTD.
7939
 * NOTE: misleading but this is handled.
7940
 *
7941
 * Returns the string of the entity content.
7942
 *         str is updated to the current value of the index
7943
 */
7944
static xmlEntityPtr
7945
7.24k
xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7946
7.24k
    const xmlChar *ptr;
7947
7.24k
    xmlChar cur;
7948
7.24k
    xmlChar *name;
7949
7.24k
    xmlEntityPtr entity = NULL;
7950
7951
7.24k
    if ((str == NULL) || (*str == NULL)) return(NULL);
7952
7.24k
    ptr = *str;
7953
7.24k
    cur = *ptr;
7954
7.24k
    if (cur != '%')
7955
0
        return(NULL);
7956
7.24k
    ptr++;
7957
7.24k
    name = xmlParseStringName(ctxt, &ptr);
7958
7.24k
    if (name == NULL) {
7959
1.29k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7960
1.29k
           "xmlParseStringPEReference: no name\n");
7961
1.29k
  *str = ptr;
7962
1.29k
  return(NULL);
7963
1.29k
    }
7964
5.95k
    cur = *ptr;
7965
5.95k
    if (cur != ';') {
7966
174
  xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7967
174
  xmlFree(name);
7968
174
  *str = ptr;
7969
174
  return(NULL);
7970
174
    }
7971
5.77k
    ptr++;
7972
7973
    /*
7974
     * Request the entity from SAX
7975
     */
7976
5.77k
    if ((ctxt->sax != NULL) &&
7977
5.77k
  (ctxt->sax->getParameterEntity != NULL))
7978
5.77k
  entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7979
5.77k
    if (ctxt->instate == XML_PARSER_EOF) {
7980
0
  xmlFree(name);
7981
0
  *str = ptr;
7982
0
  return(NULL);
7983
0
    }
7984
5.77k
    if (entity == NULL) {
7985
  /*
7986
   * [ WFC: Entity Declared ]
7987
   * In a document without any DTD, a document with only an
7988
   * internal DTD subset which contains no parameter entity
7989
   * references, or a document with "standalone='yes'", ...
7990
   * ... The declaration of a parameter entity must precede
7991
   * any reference to it...
7992
   */
7993
2.23k
  if ((ctxt->standalone == 1) ||
7994
2.23k
      ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
7995
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7996
0
     "PEReference: %%%s; not found\n", name);
7997
2.23k
  } else {
7998
      /*
7999
       * [ VC: Entity Declared ]
8000
       * In a document with an external subset or external
8001
       * parameter entities with "standalone='no'", ...
8002
       * ... The declaration of a parameter entity must
8003
       * precede any reference to it...
8004
       */
8005
2.23k
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8006
2.23k
        "PEReference: %%%s; not found\n",
8007
2.23k
        name, NULL);
8008
2.23k
      ctxt->valid = 0;
8009
2.23k
  }
8010
3.54k
    } else {
8011
  /*
8012
   * Internal checking in case the entity quest barfed
8013
   */
8014
3.54k
  if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8015
3.54k
      (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8016
0
      xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8017
0
        "%%%s; is not a parameter entity\n",
8018
0
        name, NULL);
8019
0
  }
8020
3.54k
    }
8021
5.77k
    ctxt->hasPErefs = 1;
8022
5.77k
    xmlFree(name);
8023
5.77k
    *str = ptr;
8024
5.77k
    return(entity);
8025
5.77k
}
8026
8027
/**
8028
 * xmlParseDocTypeDecl:
8029
 * @ctxt:  an XML parser context
8030
 *
8031
 * DEPRECATED: Internal function, don't use.
8032
 *
8033
 * parse a DOCTYPE declaration
8034
 *
8035
 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8036
 *                      ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8037
 *
8038
 * [ VC: Root Element Type ]
8039
 * The Name in the document type declaration must match the element
8040
 * type of the root element.
8041
 */
8042
8043
void
8044
8.50k
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8045
8.50k
    const xmlChar *name = NULL;
8046
8.50k
    xmlChar *ExternalID = NULL;
8047
8.50k
    xmlChar *URI = NULL;
8048
8049
    /*
8050
     * We know that '<!DOCTYPE' has been detected.
8051
     */
8052
8.50k
    SKIP(9);
8053
8054
8.50k
    SKIP_BLANKS;
8055
8056
    /*
8057
     * Parse the DOCTYPE name.
8058
     */
8059
8.50k
    name = xmlParseName(ctxt);
8060
8.50k
    if (name == NULL) {
8061
929
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8062
929
           "xmlParseDocTypeDecl : no DOCTYPE name !\n");
8063
929
    }
8064
8.50k
    ctxt->intSubName = name;
8065
8066
8.50k
    SKIP_BLANKS;
8067
8068
    /*
8069
     * Check for SystemID and ExternalID
8070
     */
8071
8.50k
    URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8072
8073
8.50k
    if ((URI != NULL) || (ExternalID != NULL)) {
8074
117
        ctxt->hasExternalSubset = 1;
8075
117
    }
8076
8.50k
    ctxt->extSubURI = URI;
8077
8.50k
    ctxt->extSubSystem = ExternalID;
8078
8079
8.50k
    SKIP_BLANKS;
8080
8081
    /*
8082
     * Create and update the internal subset.
8083
     */
8084
8.50k
    if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8085
8.50k
  (!ctxt->disableSAX))
8086
7.03k
  ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8087
8.50k
    if (ctxt->instate == XML_PARSER_EOF)
8088
1
  return;
8089
8090
    /*
8091
     * Is there any internal subset declarations ?
8092
     * they are handled separately in xmlParseInternalSubset()
8093
     */
8094
8.50k
    if (RAW == '[')
8095
8.08k
  return;
8096
8097
    /*
8098
     * We should be at the end of the DOCTYPE declaration.
8099
     */
8100
425
    if (RAW != '>') {
8101
318
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8102
318
    }
8103
425
    NEXT;
8104
425
}
8105
8106
/**
8107
 * xmlParseInternalSubset:
8108
 * @ctxt:  an XML parser context
8109
 *
8110
 * parse the internal subset declaration
8111
 *
8112
 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8113
 */
8114
8115
static void
8116
8.27k
xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8117
    /*
8118
     * Is there any DTD definition ?
8119
     */
8120
8.27k
    if (RAW == '[') {
8121
8.27k
        int baseInputNr = ctxt->inputNr;
8122
8.27k
        ctxt->instate = XML_PARSER_DTD;
8123
8.27k
        NEXT;
8124
  /*
8125
   * Parse the succession of Markup declarations and
8126
   * PEReferences.
8127
   * Subsequence (markupdecl | PEReference | S)*
8128
   */
8129
8.27k
  SKIP_BLANKS;
8130
140k
  while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) &&
8131
140k
               (ctxt->instate != XML_PARSER_EOF)) {
8132
8133
            /*
8134
             * Conditional sections are allowed from external entities included
8135
             * by PE References in the internal subset.
8136
             */
8137
136k
            if ((ctxt->inputNr > 1) && (ctxt->input->filename != NULL) &&
8138
136k
                (RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8139
1.23k
                xmlParseConditionalSections(ctxt);
8140
135k
            } else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
8141
77.8k
          xmlParseMarkupDecl(ctxt);
8142
77.8k
            } else if (RAW == '%') {
8143
53.4k
          xmlParsePEReference(ctxt);
8144
53.4k
            } else {
8145
3.86k
    xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8146
3.86k
                        "xmlParseInternalSubset: error detected in"
8147
3.86k
                        " Markup declaration\n");
8148
3.86k
                xmlHaltParser(ctxt);
8149
3.86k
                return;
8150
3.86k
            }
8151
132k
      SKIP_BLANKS;
8152
132k
            SHRINK;
8153
132k
            GROW;
8154
132k
  }
8155
4.40k
  if (RAW == ']') {
8156
3.24k
      NEXT;
8157
3.24k
      SKIP_BLANKS;
8158
3.24k
  }
8159
4.40k
    }
8160
8161
    /*
8162
     * We should be at the end of the DOCTYPE declaration.
8163
     */
8164
4.40k
    if (RAW != '>') {
8165
1.36k
  xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8166
1.36k
  return;
8167
1.36k
    }
8168
3.04k
    NEXT;
8169
3.04k
}
8170
8171
#ifdef LIBXML_SAX1_ENABLED
8172
/**
8173
 * xmlParseAttribute:
8174
 * @ctxt:  an XML parser context
8175
 * @value:  a xmlChar ** used to store the value of the attribute
8176
 *
8177
 * DEPRECATED: Internal function, don't use.
8178
 *
8179
 * parse an attribute
8180
 *
8181
 * [41] Attribute ::= Name Eq AttValue
8182
 *
8183
 * [ WFC: No External Entity References ]
8184
 * Attribute values cannot contain direct or indirect entity references
8185
 * to external entities.
8186
 *
8187
 * [ WFC: No < in Attribute Values ]
8188
 * The replacement text of any entity referred to directly or indirectly in
8189
 * an attribute value (other than "&lt;") must not contain a <.
8190
 *
8191
 * [ VC: Attribute Value Type ]
8192
 * The attribute must have been declared; the value must be of the type
8193
 * declared for it.
8194
 *
8195
 * [25] Eq ::= S? '=' S?
8196
 *
8197
 * With namespace:
8198
 *
8199
 * [NS 11] Attribute ::= QName Eq AttValue
8200
 *
8201
 * Also the case QName == xmlns:??? is handled independently as a namespace
8202
 * definition.
8203
 *
8204
 * Returns the attribute name, and the value in *value.
8205
 */
8206
8207
const xmlChar *
8208
0
xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8209
0
    const xmlChar *name;
8210
0
    xmlChar *val;
8211
8212
0
    *value = NULL;
8213
0
    GROW;
8214
0
    name = xmlParseName(ctxt);
8215
0
    if (name == NULL) {
8216
0
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8217
0
                 "error parsing attribute name\n");
8218
0
        return(NULL);
8219
0
    }
8220
8221
    /*
8222
     * read the value
8223
     */
8224
0
    SKIP_BLANKS;
8225
0
    if (RAW == '=') {
8226
0
        NEXT;
8227
0
  SKIP_BLANKS;
8228
0
  val = xmlParseAttValue(ctxt);
8229
0
  ctxt->instate = XML_PARSER_CONTENT;
8230
0
    } else {
8231
0
  xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8232
0
         "Specification mandates value for attribute %s\n", name);
8233
0
  return(name);
8234
0
    }
8235
8236
    /*
8237
     * Check that xml:lang conforms to the specification
8238
     * No more registered as an error, just generate a warning now
8239
     * since this was deprecated in XML second edition
8240
     */
8241
0
    if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8242
0
  if (!xmlCheckLanguageID(val)) {
8243
0
      xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8244
0
              "Malformed value for xml:lang : %s\n",
8245
0
        val, NULL);
8246
0
  }
8247
0
    }
8248
8249
    /*
8250
     * Check that xml:space conforms to the specification
8251
     */
8252
0
    if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8253
0
  if (xmlStrEqual(val, BAD_CAST "default"))
8254
0
      *(ctxt->space) = 0;
8255
0
  else if (xmlStrEqual(val, BAD_CAST "preserve"))
8256
0
      *(ctxt->space) = 1;
8257
0
  else {
8258
0
    xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8259
0
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8260
0
                                 val, NULL);
8261
0
  }
8262
0
    }
8263
8264
0
    *value = val;
8265
0
    return(name);
8266
0
}
8267
8268
/**
8269
 * xmlParseStartTag:
8270
 * @ctxt:  an XML parser context
8271
 *
8272
 * DEPRECATED: Internal function, don't use.
8273
 *
8274
 * Parse a start tag. Always consumes '<'.
8275
 *
8276
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8277
 *
8278
 * [ WFC: Unique Att Spec ]
8279
 * No attribute name may appear more than once in the same start-tag or
8280
 * empty-element tag.
8281
 *
8282
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8283
 *
8284
 * [ WFC: Unique Att Spec ]
8285
 * No attribute name may appear more than once in the same start-tag or
8286
 * empty-element tag.
8287
 *
8288
 * With namespace:
8289
 *
8290
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8291
 *
8292
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8293
 *
8294
 * Returns the element name parsed
8295
 */
8296
8297
const xmlChar *
8298
0
xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8299
0
    const xmlChar *name;
8300
0
    const xmlChar *attname;
8301
0
    xmlChar *attvalue;
8302
0
    const xmlChar **atts = ctxt->atts;
8303
0
    int nbatts = 0;
8304
0
    int maxatts = ctxt->maxatts;
8305
0
    int i;
8306
8307
0
    if (RAW != '<') return(NULL);
8308
0
    NEXT1;
8309
8310
0
    name = xmlParseName(ctxt);
8311
0
    if (name == NULL) {
8312
0
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8313
0
       "xmlParseStartTag: invalid element name\n");
8314
0
        return(NULL);
8315
0
    }
8316
8317
    /*
8318
     * Now parse the attributes, it ends up with the ending
8319
     *
8320
     * (S Attribute)* S?
8321
     */
8322
0
    SKIP_BLANKS;
8323
0
    GROW;
8324
8325
0
    while (((RAW != '>') &&
8326
0
     ((RAW != '/') || (NXT(1) != '>')) &&
8327
0
     (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8328
0
  attname = xmlParseAttribute(ctxt, &attvalue);
8329
0
        if (attname == NULL) {
8330
0
      xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8331
0
         "xmlParseStartTag: problem parsing attributes\n");
8332
0
      break;
8333
0
  }
8334
0
        if (attvalue != NULL) {
8335
      /*
8336
       * [ WFC: Unique Att Spec ]
8337
       * No attribute name may appear more than once in the same
8338
       * start-tag or empty-element tag.
8339
       */
8340
0
      for (i = 0; i < nbatts;i += 2) {
8341
0
          if (xmlStrEqual(atts[i], attname)) {
8342
0
        xmlErrAttributeDup(ctxt, NULL, attname);
8343
0
        xmlFree(attvalue);
8344
0
        goto failed;
8345
0
    }
8346
0
      }
8347
      /*
8348
       * Add the pair to atts
8349
       */
8350
0
      if (atts == NULL) {
8351
0
          maxatts = 22; /* allow for 10 attrs by default */
8352
0
          atts = (const xmlChar **)
8353
0
           xmlMalloc(maxatts * sizeof(xmlChar *));
8354
0
    if (atts == NULL) {
8355
0
        xmlErrMemory(ctxt, NULL);
8356
0
        if (attvalue != NULL)
8357
0
      xmlFree(attvalue);
8358
0
        goto failed;
8359
0
    }
8360
0
    ctxt->atts = atts;
8361
0
    ctxt->maxatts = maxatts;
8362
0
      } else if (nbatts + 4 > maxatts) {
8363
0
          const xmlChar **n;
8364
8365
0
          maxatts *= 2;
8366
0
          n = (const xmlChar **) xmlRealloc((void *) atts,
8367
0
               maxatts * sizeof(const xmlChar *));
8368
0
    if (n == NULL) {
8369
0
        xmlErrMemory(ctxt, NULL);
8370
0
        if (attvalue != NULL)
8371
0
      xmlFree(attvalue);
8372
0
        goto failed;
8373
0
    }
8374
0
    atts = n;
8375
0
    ctxt->atts = atts;
8376
0
    ctxt->maxatts = maxatts;
8377
0
      }
8378
0
      atts[nbatts++] = attname;
8379
0
      atts[nbatts++] = attvalue;
8380
0
      atts[nbatts] = NULL;
8381
0
      atts[nbatts + 1] = NULL;
8382
0
  } else {
8383
0
      if (attvalue != NULL)
8384
0
    xmlFree(attvalue);
8385
0
  }
8386
8387
0
failed:
8388
8389
0
  GROW
8390
0
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8391
0
      break;
8392
0
  if (SKIP_BLANKS == 0) {
8393
0
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8394
0
         "attributes construct error\n");
8395
0
  }
8396
0
  SHRINK;
8397
0
        GROW;
8398
0
    }
8399
8400
    /*
8401
     * SAX: Start of Element !
8402
     */
8403
0
    if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8404
0
  (!ctxt->disableSAX)) {
8405
0
  if (nbatts > 0)
8406
0
      ctxt->sax->startElement(ctxt->userData, name, atts);
8407
0
  else
8408
0
      ctxt->sax->startElement(ctxt->userData, name, NULL);
8409
0
    }
8410
8411
0
    if (atts != NULL) {
8412
        /* Free only the content strings */
8413
0
        for (i = 1;i < nbatts;i+=2)
8414
0
      if (atts[i] != NULL)
8415
0
         xmlFree((xmlChar *) atts[i]);
8416
0
    }
8417
0
    return(name);
8418
0
}
8419
8420
/**
8421
 * xmlParseEndTag1:
8422
 * @ctxt:  an XML parser context
8423
 * @line:  line of the start tag
8424
 * @nsNr:  number of namespaces on the start tag
8425
 *
8426
 * Parse an end tag. Always consumes '</'.
8427
 *
8428
 * [42] ETag ::= '</' Name S? '>'
8429
 *
8430
 * With namespace
8431
 *
8432
 * [NS 9] ETag ::= '</' QName S? '>'
8433
 */
8434
8435
static void
8436
0
xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8437
0
    const xmlChar *name;
8438
8439
0
    GROW;
8440
0
    if ((RAW != '<') || (NXT(1) != '/')) {
8441
0
  xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8442
0
           "xmlParseEndTag: '</' not found\n");
8443
0
  return;
8444
0
    }
8445
0
    SKIP(2);
8446
8447
0
    name = xmlParseNameAndCompare(ctxt,ctxt->name);
8448
8449
    /*
8450
     * We should definitely be at the ending "S? '>'" part
8451
     */
8452
0
    GROW;
8453
0
    SKIP_BLANKS;
8454
0
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8455
0
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8456
0
    } else
8457
0
  NEXT1;
8458
8459
    /*
8460
     * [ WFC: Element Type Match ]
8461
     * The Name in an element's end-tag must match the element type in the
8462
     * start-tag.
8463
     *
8464
     */
8465
0
    if (name != (xmlChar*)1) {
8466
0
        if (name == NULL) name = BAD_CAST "unparsable";
8467
0
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8468
0
         "Opening and ending tag mismatch: %s line %d and %s\n",
8469
0
                    ctxt->name, line, name);
8470
0
    }
8471
8472
    /*
8473
     * SAX: End of Tag
8474
     */
8475
0
    if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8476
0
  (!ctxt->disableSAX))
8477
0
        ctxt->sax->endElement(ctxt->userData, ctxt->name);
8478
8479
0
    namePop(ctxt);
8480
0
    spacePop(ctxt);
8481
0
    return;
8482
0
}
8483
8484
/**
8485
 * xmlParseEndTag:
8486
 * @ctxt:  an XML parser context
8487
 *
8488
 * DEPRECATED: Internal function, don't use.
8489
 *
8490
 * parse an end of tag
8491
 *
8492
 * [42] ETag ::= '</' Name S? '>'
8493
 *
8494
 * With namespace
8495
 *
8496
 * [NS 9] ETag ::= '</' QName S? '>'
8497
 */
8498
8499
void
8500
0
xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8501
0
    xmlParseEndTag1(ctxt, 0);
8502
0
}
8503
#endif /* LIBXML_SAX1_ENABLED */
8504
8505
/************************************************************************
8506
 *                  *
8507
 *          SAX 2 specific operations       *
8508
 *                  *
8509
 ************************************************************************/
8510
8511
/*
8512
 * xmlGetNamespace:
8513
 * @ctxt:  an XML parser context
8514
 * @prefix:  the prefix to lookup
8515
 *
8516
 * Lookup the namespace name for the @prefix (which ca be NULL)
8517
 * The prefix must come from the @ctxt->dict dictionary
8518
 *
8519
 * Returns the namespace name or NULL if not bound
8520
 */
8521
static const xmlChar *
8522
565k
xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8523
565k
    int i;
8524
8525
565k
    if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
8526
1.19M
    for (i = ctxt->nsNr - 2;i >= 0;i-=2)
8527
1.06M
        if (ctxt->nsTab[i] == prefix) {
8528
407k
      if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8529
2.10k
          return(NULL);
8530
405k
      return(ctxt->nsTab[i + 1]);
8531
407k
  }
8532
125k
    return(NULL);
8533
533k
}
8534
8535
/**
8536
 * xmlParseQName:
8537
 * @ctxt:  an XML parser context
8538
 * @prefix:  pointer to store the prefix part
8539
 *
8540
 * parse an XML Namespace QName
8541
 *
8542
 * [6]  QName  ::= (Prefix ':')? LocalPart
8543
 * [7]  Prefix  ::= NCName
8544
 * [8]  LocalPart  ::= NCName
8545
 *
8546
 * Returns the Name parsed or NULL
8547
 */
8548
8549
static const xmlChar *
8550
1.10M
xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8551
1.10M
    const xmlChar *l, *p;
8552
8553
1.10M
    GROW;
8554
1.10M
    if (ctxt->instate == XML_PARSER_EOF)
8555
3
        return(NULL);
8556
8557
1.10M
    l = xmlParseNCName(ctxt);
8558
1.10M
    if (l == NULL) {
8559
67.1k
        if (CUR == ':') {
8560
21.4k
      l = xmlParseName(ctxt);
8561
21.4k
      if (l != NULL) {
8562
21.4k
          xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8563
21.4k
             "Failed to parse QName '%s'\n", l, NULL, NULL);
8564
21.4k
    *prefix = NULL;
8565
21.4k
    return(l);
8566
21.4k
      }
8567
21.4k
  }
8568
45.7k
        return(NULL);
8569
67.1k
    }
8570
1.03M
    if (CUR == ':') {
8571
442k
        NEXT;
8572
442k
  p = l;
8573
442k
  l = xmlParseNCName(ctxt);
8574
442k
  if (l == NULL) {
8575
5.37k
      xmlChar *tmp;
8576
8577
5.37k
            if (ctxt->instate == XML_PARSER_EOF)
8578
14
                return(NULL);
8579
5.36k
            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8580
5.36k
               "Failed to parse QName '%s:'\n", p, NULL, NULL);
8581
5.36k
      l = xmlParseNmtoken(ctxt);
8582
5.36k
      if (l == NULL) {
8583
2.97k
                if (ctxt->instate == XML_PARSER_EOF)
8584
10
                    return(NULL);
8585
2.96k
    tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8586
2.96k
            } else {
8587
2.38k
    tmp = xmlBuildQName(l, p, NULL, 0);
8588
2.38k
    xmlFree((char *)l);
8589
2.38k
      }
8590
5.35k
      p = xmlDictLookup(ctxt->dict, tmp, -1);
8591
5.35k
      if (tmp != NULL) xmlFree(tmp);
8592
5.35k
      *prefix = NULL;
8593
5.35k
      return(p);
8594
5.36k
  }
8595
436k
  if (CUR == ':') {
8596
11.6k
      xmlChar *tmp;
8597
8598
11.6k
            xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8599
11.6k
               "Failed to parse QName '%s:%s:'\n", p, l, NULL);
8600
11.6k
      NEXT;
8601
11.6k
      tmp = (xmlChar *) xmlParseName(ctxt);
8602
11.6k
      if (tmp != NULL) {
8603
10.9k
          tmp = xmlBuildQName(tmp, l, NULL, 0);
8604
10.9k
    l = xmlDictLookup(ctxt->dict, tmp, -1);
8605
10.9k
    if (tmp != NULL) xmlFree(tmp);
8606
10.9k
    *prefix = p;
8607
10.9k
    return(l);
8608
10.9k
      }
8609
745
            if (ctxt->instate == XML_PARSER_EOF)
8610
10
                return(NULL);
8611
735
      tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8612
735
      l = xmlDictLookup(ctxt->dict, tmp, -1);
8613
735
      if (tmp != NULL) xmlFree(tmp);
8614
735
      *prefix = p;
8615
735
      return(l);
8616
745
  }
8617
425k
  *prefix = p;
8618
425k
    } else
8619
591k
        *prefix = NULL;
8620
1.01M
    return(l);
8621
1.03M
}
8622
8623
/**
8624
 * xmlParseQNameAndCompare:
8625
 * @ctxt:  an XML parser context
8626
 * @name:  the localname
8627
 * @prefix:  the prefix, if any.
8628
 *
8629
 * parse an XML name and compares for match
8630
 * (specialized for endtag parsing)
8631
 *
8632
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8633
 * and the name for mismatch
8634
 */
8635
8636
static const xmlChar *
8637
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8638
75.9k
                        xmlChar const *prefix) {
8639
75.9k
    const xmlChar *cmp;
8640
75.9k
    const xmlChar *in;
8641
75.9k
    const xmlChar *ret;
8642
75.9k
    const xmlChar *prefix2;
8643
8644
75.9k
    if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8645
8646
75.9k
    GROW;
8647
75.9k
    in = ctxt->input->cur;
8648
8649
75.9k
    cmp = prefix;
8650
249k
    while (*in != 0 && *in == *cmp) {
8651
173k
  ++in;
8652
173k
  ++cmp;
8653
173k
    }
8654
75.9k
    if ((*cmp == 0) && (*in == ':')) {
8655
74.4k
        in++;
8656
74.4k
  cmp = name;
8657
706k
  while (*in != 0 && *in == *cmp) {
8658
632k
      ++in;
8659
632k
      ++cmp;
8660
632k
  }
8661
74.4k
  if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
8662
      /* success */
8663
72.8k
            ctxt->input->col += in - ctxt->input->cur;
8664
72.8k
      ctxt->input->cur = in;
8665
72.8k
      return((const xmlChar*) 1);
8666
72.8k
  }
8667
74.4k
    }
8668
    /*
8669
     * all strings coms from the dictionary, equality can be done directly
8670
     */
8671
3.09k
    ret = xmlParseQName (ctxt, &prefix2);
8672
3.09k
    if ((ret == name) && (prefix == prefix2))
8673
318
  return((const xmlChar*) 1);
8674
2.77k
    return ret;
8675
3.09k
}
8676
8677
/**
8678
 * xmlParseAttValueInternal:
8679
 * @ctxt:  an XML parser context
8680
 * @len:  attribute len result
8681
 * @alloc:  whether the attribute was reallocated as a new string
8682
 * @normalize:  if 1 then further non-CDATA normalization must be done
8683
 *
8684
 * parse a value for an attribute.
8685
 * NOTE: if no normalization is needed, the routine will return pointers
8686
 *       directly from the data buffer.
8687
 *
8688
 * 3.3.3 Attribute-Value Normalization:
8689
 * Before the value of an attribute is passed to the application or
8690
 * checked for validity, the XML processor must normalize it as follows:
8691
 * - a character reference is processed by appending the referenced
8692
 *   character to the attribute value
8693
 * - an entity reference is processed by recursively processing the
8694
 *   replacement text of the entity
8695
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8696
 *   appending #x20 to the normalized value, except that only a single
8697
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
8698
 *   parsed entity or the literal entity value of an internal parsed entity
8699
 * - other characters are processed by appending them to the normalized value
8700
 * If the declared value is not CDATA, then the XML processor must further
8701
 * process the normalized attribute value by discarding any leading and
8702
 * trailing space (#x20) characters, and by replacing sequences of space
8703
 * (#x20) characters by a single space (#x20) character.
8704
 * All attributes for which no declaration has been read should be treated
8705
 * by a non-validating parser as if declared CDATA.
8706
 *
8707
 * Returns the AttValue parsed or NULL. The value has to be freed by the
8708
 *     caller if it was copied, this can be detected by val[*len] == 0.
8709
 */
8710
8711
#define GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) \
8712
1.21k
    const xmlChar *oldbase = ctxt->input->base;\
8713
1.21k
    GROW;\
8714
1.21k
    if (ctxt->instate == XML_PARSER_EOF)\
8715
1.21k
        return(NULL);\
8716
1.21k
    if (oldbase != ctxt->input->base) {\
8717
0
        ptrdiff_t delta = ctxt->input->base - oldbase;\
8718
0
        start = start + delta;\
8719
0
        in = in + delta;\
8720
0
    }\
8721
1.21k
    end = ctxt->input->end;
8722
8723
static xmlChar *
8724
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8725
                         int normalize)
8726
591k
{
8727
591k
    xmlChar limit = 0;
8728
591k
    const xmlChar *in = NULL, *start, *end, *last;
8729
591k
    xmlChar *ret = NULL;
8730
591k
    int line, col;
8731
591k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
8732
0
                    XML_MAX_HUGE_LENGTH :
8733
591k
                    XML_MAX_TEXT_LENGTH;
8734
8735
591k
    GROW;
8736
591k
    in = (xmlChar *) CUR_PTR;
8737
591k
    line = ctxt->input->line;
8738
591k
    col = ctxt->input->col;
8739
591k
    if (*in != '"' && *in != '\'') {
8740
2.74k
        xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
8741
2.74k
        return (NULL);
8742
2.74k
    }
8743
588k
    ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
8744
8745
    /*
8746
     * try to handle in this routine the most common case where no
8747
     * allocation of a new string is required and where content is
8748
     * pure ASCII.
8749
     */
8750
588k
    limit = *in++;
8751
588k
    col++;
8752
588k
    end = ctxt->input->end;
8753
588k
    start = in;
8754
588k
    if (in >= end) {
8755
224
        GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
8756
224
    }
8757
588k
    if (normalize) {
8758
        /*
8759
   * Skip any leading spaces
8760
   */
8761
7.94k
  while ((in < end) && (*in != limit) &&
8762
7.94k
         ((*in == 0x20) || (*in == 0x9) ||
8763
7.25k
          (*in == 0xA) || (*in == 0xD))) {
8764
2.81k
      if (*in == 0xA) {
8765
689
          line++; col = 1;
8766
2.12k
      } else {
8767
2.12k
          col++;
8768
2.12k
      }
8769
2.81k
      in++;
8770
2.81k
      start = in;
8771
2.81k
      if (in >= end) {
8772
9
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
8773
9
                if ((in - start) > maxLength) {
8774
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8775
0
                                   "AttValue length too long\n");
8776
0
                    return(NULL);
8777
0
                }
8778
9
      }
8779
2.81k
  }
8780
18.2k
  while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8781
18.2k
         (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8782
13.4k
      col++;
8783
13.4k
      if ((*in++ == 0x20) && (*in == 0x20)) break;
8784
13.1k
      if (in >= end) {
8785
24
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
8786
24
                if ((in - start) > maxLength) {
8787
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8788
0
                                   "AttValue length too long\n");
8789
0
                    return(NULL);
8790
0
                }
8791
24
      }
8792
13.1k
  }
8793
5.13k
  last = in;
8794
  /*
8795
   * skip the trailing blanks
8796
   */
8797
5.47k
  while ((last[-1] == 0x20) && (last > start)) last--;
8798
8.54k
  while ((in < end) && (*in != limit) &&
8799
8.54k
         ((*in == 0x20) || (*in == 0x9) ||
8800
6.52k
          (*in == 0xA) || (*in == 0xD))) {
8801
3.40k
      if (*in == 0xA) {
8802
628
          line++, col = 1;
8803
2.78k
      } else {
8804
2.78k
          col++;
8805
2.78k
      }
8806
3.40k
      in++;
8807
3.40k
      if (in >= end) {
8808
16
    const xmlChar *oldbase = ctxt->input->base;
8809
16
    GROW;
8810
16
                if (ctxt->instate == XML_PARSER_EOF)
8811
0
                    return(NULL);
8812
16
    if (oldbase != ctxt->input->base) {
8813
0
        ptrdiff_t delta = ctxt->input->base - oldbase;
8814
0
        start = start + delta;
8815
0
        in = in + delta;
8816
0
        last = last + delta;
8817
0
    }
8818
16
    end = ctxt->input->end;
8819
16
                if ((in - start) > maxLength) {
8820
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8821
0
                                   "AttValue length too long\n");
8822
0
                    return(NULL);
8823
0
                }
8824
16
      }
8825
3.40k
  }
8826
5.13k
        if ((in - start) > maxLength) {
8827
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8828
0
                           "AttValue length too long\n");
8829
0
            return(NULL);
8830
0
        }
8831
5.13k
  if (*in != limit) goto need_complex;
8832
583k
    } else {
8833
8.14M
  while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8834
8.14M
         (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8835
7.56M
      in++;
8836
7.56M
      col++;
8837
7.56M
      if (in >= end) {
8838
962
                GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
8839
962
                if ((in - start) > maxLength) {
8840
0
                    xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8841
0
                                   "AttValue length too long\n");
8842
0
                    return(NULL);
8843
0
                }
8844
962
      }
8845
7.56M
  }
8846
583k
  last = in;
8847
583k
        if ((in - start) > maxLength) {
8848
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8849
0
                           "AttValue length too long\n");
8850
0
            return(NULL);
8851
0
        }
8852
583k
  if (*in != limit) goto need_complex;
8853
583k
    }
8854
402k
    in++;
8855
402k
    col++;
8856
402k
    if (len != NULL) {
8857
374k
        if (alloc) *alloc = 0;
8858
374k
        *len = last - start;
8859
374k
        ret = (xmlChar *) start;
8860
374k
    } else {
8861
28.2k
        if (alloc) *alloc = 1;
8862
28.2k
        ret = xmlStrndup(start, last - start);
8863
28.2k
    }
8864
402k
    CUR_PTR = in;
8865
402k
    ctxt->input->line = line;
8866
402k
    ctxt->input->col = col;
8867
402k
    return ret;
8868
186k
need_complex:
8869
186k
    if (alloc) *alloc = 1;
8870
186k
    return xmlParseAttValueComplex(ctxt, len, normalize);
8871
588k
}
8872
8873
/**
8874
 * xmlParseAttribute2:
8875
 * @ctxt:  an XML parser context
8876
 * @pref:  the element prefix
8877
 * @elem:  the element name
8878
 * @prefix:  a xmlChar ** used to store the value of the attribute prefix
8879
 * @value:  a xmlChar ** used to store the value of the attribute
8880
 * @len:  an int * to save the length of the attribute
8881
 * @alloc:  an int * to indicate if the attribute was allocated
8882
 *
8883
 * parse an attribute in the new SAX2 framework.
8884
 *
8885
 * Returns the attribute name, and the value in *value, .
8886
 */
8887
8888
static const xmlChar *
8889
xmlParseAttribute2(xmlParserCtxtPtr ctxt,
8890
                   const xmlChar * pref, const xmlChar * elem,
8891
                   const xmlChar ** prefix, xmlChar ** value,
8892
                   int *len, int *alloc)
8893
578k
{
8894
578k
    const xmlChar *name;
8895
578k
    xmlChar *val, *internal_val = NULL;
8896
578k
    int normalize = 0;
8897
8898
578k
    *value = NULL;
8899
578k
    GROW;
8900
578k
    name = xmlParseQName(ctxt, prefix);
8901
578k
    if (name == NULL) {
8902
24.8k
        xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8903
24.8k
                       "error parsing attribute name\n");
8904
24.8k
        return (NULL);
8905
24.8k
    }
8906
8907
    /*
8908
     * get the type if needed
8909
     */
8910
554k
    if (ctxt->attsSpecial != NULL) {
8911
11.7k
        int type;
8912
8913
11.7k
        type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
8914
11.7k
                                                 pref, elem, *prefix, name);
8915
11.7k
        if (type != 0)
8916
5.34k
            normalize = 1;
8917
11.7k
    }
8918
8919
    /*
8920
     * read the value
8921
     */
8922
554k
    SKIP_BLANKS;
8923
554k
    if (RAW == '=') {
8924
546k
        NEXT;
8925
546k
        SKIP_BLANKS;
8926
546k
        val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
8927
546k
        if (val == NULL)
8928
1.33k
            return (NULL);
8929
544k
  if (normalize) {
8930
      /*
8931
       * Sometimes a second normalisation pass for spaces is needed
8932
       * but that only happens if charrefs or entities references
8933
       * have been used in the attribute value, i.e. the attribute
8934
       * value have been extracted in an allocated string already.
8935
       */
8936
5.13k
      if (*alloc) {
8937
3.16k
          const xmlChar *val2;
8938
8939
3.16k
          val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
8940
3.16k
    if ((val2 != NULL) && (val2 != val)) {
8941
406
        xmlFree(val);
8942
406
        val = (xmlChar *) val2;
8943
406
    }
8944
3.16k
      }
8945
5.13k
  }
8946
544k
        ctxt->instate = XML_PARSER_CONTENT;
8947
544k
    } else {
8948
8.10k
        xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8949
8.10k
                          "Specification mandates value for attribute %s\n",
8950
8.10k
                          name);
8951
8.10k
        return (name);
8952
8.10k
    }
8953
8954
544k
    if (*prefix == ctxt->str_xml) {
8955
        /*
8956
         * Check that xml:lang conforms to the specification
8957
         * No more registered as an error, just generate a warning now
8958
         * since this was deprecated in XML second edition
8959
         */
8960
28.0k
        if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8961
0
            internal_val = xmlStrndup(val, *len);
8962
0
            if (!xmlCheckLanguageID(internal_val)) {
8963
0
                xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8964
0
                              "Malformed value for xml:lang : %s\n",
8965
0
                              internal_val, NULL);
8966
0
            }
8967
0
        }
8968
8969
        /*
8970
         * Check that xml:space conforms to the specification
8971
         */
8972
28.0k
        if (xmlStrEqual(name, BAD_CAST "space")) {
8973
650
            internal_val = xmlStrndup(val, *len);
8974
650
            if (xmlStrEqual(internal_val, BAD_CAST "default"))
8975
3
                *(ctxt->space) = 0;
8976
647
            else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8977
197
                *(ctxt->space) = 1;
8978
450
            else {
8979
450
                xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8980
450
                              "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8981
450
                              internal_val, NULL);
8982
450
            }
8983
650
        }
8984
28.0k
        if (internal_val) {
8985
650
            xmlFree(internal_val);
8986
650
        }
8987
28.0k
    }
8988
8989
544k
    *value = val;
8990
544k
    return (name);
8991
554k
}
8992
/**
8993
 * xmlParseStartTag2:
8994
 * @ctxt:  an XML parser context
8995
 *
8996
 * Parse a start tag. Always consumes '<'.
8997
 *
8998
 * This routine is called when running SAX2 parsing
8999
 *
9000
 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9001
 *
9002
 * [ WFC: Unique Att Spec ]
9003
 * No attribute name may appear more than once in the same start-tag or
9004
 * empty-element tag.
9005
 *
9006
 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9007
 *
9008
 * [ WFC: Unique Att Spec ]
9009
 * No attribute name may appear more than once in the same start-tag or
9010
 * empty-element tag.
9011
 *
9012
 * With namespace:
9013
 *
9014
 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9015
 *
9016
 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9017
 *
9018
 * Returns the element name parsed
9019
 */
9020
9021
static const xmlChar *
9022
xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9023
518k
                  const xmlChar **URI, int *tlen) {
9024
518k
    const xmlChar *localname;
9025
518k
    const xmlChar *prefix;
9026
518k
    const xmlChar *attname;
9027
518k
    const xmlChar *aprefix;
9028
518k
    const xmlChar *nsname;
9029
518k
    xmlChar *attvalue;
9030
518k
    const xmlChar **atts = ctxt->atts;
9031
518k
    int maxatts = ctxt->maxatts;
9032
518k
    int nratts, nbatts, nbdef, inputid;
9033
518k
    int i, j, nbNs, attval;
9034
518k
    size_t cur;
9035
518k
    int nsNr = ctxt->nsNr;
9036
9037
518k
    if (RAW != '<') return(NULL);
9038
518k
    NEXT1;
9039
9040
518k
    cur = ctxt->input->cur - ctxt->input->base;
9041
518k
    inputid = ctxt->input->id;
9042
518k
    nbatts = 0;
9043
518k
    nratts = 0;
9044
518k
    nbdef = 0;
9045
518k
    nbNs = 0;
9046
518k
    attval = 0;
9047
    /* Forget any namespaces added during an earlier parse of this element. */
9048
518k
    ctxt->nsNr = nsNr;
9049
9050
518k
    localname = xmlParseQName(ctxt, &prefix);
9051
518k
    if (localname == NULL) {
9052
20.7k
  xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9053
20.7k
           "StartTag: invalid element name\n");
9054
20.7k
        return(NULL);
9055
20.7k
    }
9056
497k
    *tlen = ctxt->input->cur - ctxt->input->base - cur;
9057
9058
    /*
9059
     * Now parse the attributes, it ends up with the ending
9060
     *
9061
     * (S Attribute)* S?
9062
     */
9063
497k
    SKIP_BLANKS;
9064
497k
    GROW;
9065
9066
695k
    while (((RAW != '>') &&
9067
695k
     ((RAW != '/') || (NXT(1) != '>')) &&
9068
695k
     (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9069
578k
  int len = -1, alloc = 0;
9070
9071
578k
  attname = xmlParseAttribute2(ctxt, prefix, localname,
9072
578k
                               &aprefix, &attvalue, &len, &alloc);
9073
578k
        if (attname == NULL) {
9074
26.1k
      xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9075
26.1k
           "xmlParseStartTag: problem parsing attributes\n");
9076
26.1k
      break;
9077
26.1k
  }
9078
552k
        if (attvalue == NULL)
9079
8.10k
            goto next_attr;
9080
544k
  if (len < 0) len = xmlStrlen(attvalue);
9081
9082
544k
        if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9083
18.1k
            const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9084
18.1k
            xmlURIPtr uri;
9085
9086
18.1k
            if (URL == NULL) {
9087
0
                xmlErrMemory(ctxt, "dictionary allocation failure");
9088
0
                if ((attvalue != NULL) && (alloc != 0))
9089
0
                    xmlFree(attvalue);
9090
0
                localname = NULL;
9091
0
                goto done;
9092
0
            }
9093
18.1k
            if (*URL != 0) {
9094
17.4k
                uri = xmlParseURI((const char *) URL);
9095
17.4k
                if (uri == NULL) {
9096
12.3k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9097
12.3k
                             "xmlns: '%s' is not a valid URI\n",
9098
12.3k
                                       URL, NULL, NULL);
9099
12.3k
                } else {
9100
5.05k
                    if (uri->scheme == NULL) {
9101
2.26k
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9102
2.26k
                                  "xmlns: URI %s is not absolute\n",
9103
2.26k
                                  URL, NULL, NULL);
9104
2.26k
                    }
9105
5.05k
                    xmlFreeURI(uri);
9106
5.05k
                }
9107
17.4k
                if (URL == ctxt->str_xml_ns) {
9108
198
                    if (attname != ctxt->str_xml) {
9109
198
                        xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9110
198
                     "xml namespace URI cannot be the default namespace\n",
9111
198
                                 NULL, NULL, NULL);
9112
198
                    }
9113
198
                    goto next_attr;
9114
198
                }
9115
17.2k
                if ((len == 29) &&
9116
17.2k
                    (xmlStrEqual(URL,
9117
714
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9118
228
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9119
228
                         "reuse of the xmlns namespace name is forbidden\n",
9120
228
                             NULL, NULL, NULL);
9121
228
                    goto next_attr;
9122
228
                }
9123
17.2k
            }
9124
            /*
9125
             * check that it's not a defined namespace
9126
             */
9127
19.5k
            for (j = 1;j <= nbNs;j++)
9128
3.15k
                if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9129
1.25k
                    break;
9130
17.6k
            if (j <= nbNs)
9131
1.25k
                xmlErrAttributeDup(ctxt, NULL, attname);
9132
16.4k
            else
9133
16.4k
                if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
9134
9135
526k
        } else if (aprefix == ctxt->str_xmlns) {
9136
24.2k
            const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9137
24.2k
            xmlURIPtr uri;
9138
9139
24.2k
            if (attname == ctxt->str_xml) {
9140
692
                if (URL != ctxt->str_xml_ns) {
9141
498
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9142
498
                             "xml namespace prefix mapped to wrong URI\n",
9143
498
                             NULL, NULL, NULL);
9144
498
                }
9145
                /*
9146
                 * Do not keep a namespace definition node
9147
                 */
9148
692
                goto next_attr;
9149
692
            }
9150
23.5k
            if (URL == ctxt->str_xml_ns) {
9151
88
                if (attname != ctxt->str_xml) {
9152
88
                    xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9153
88
                             "xml namespace URI mapped to wrong prefix\n",
9154
88
                             NULL, NULL, NULL);
9155
88
                }
9156
88
                goto next_attr;
9157
88
            }
9158
23.4k
            if (attname == ctxt->str_xmlns) {
9159
360
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9160
360
                         "redefinition of the xmlns prefix is forbidden\n",
9161
360
                         NULL, NULL, NULL);
9162
360
                goto next_attr;
9163
360
            }
9164
23.0k
            if ((len == 29) &&
9165
23.0k
                (xmlStrEqual(URL,
9166
660
                             BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9167
397
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9168
397
                         "reuse of the xmlns namespace name is forbidden\n",
9169
397
                         NULL, NULL, NULL);
9170
397
                goto next_attr;
9171
397
            }
9172
22.6k
            if ((URL == NULL) || (URL[0] == 0)) {
9173
413
                xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9174
413
                         "xmlns:%s: Empty XML namespace is not allowed\n",
9175
413
                              attname, NULL, NULL);
9176
413
                goto next_attr;
9177
22.2k
            } else {
9178
22.2k
                uri = xmlParseURI((const char *) URL);
9179
22.2k
                if (uri == NULL) {
9180
3.38k
                    xmlNsErr(ctxt, XML_WAR_NS_URI,
9181
3.38k
                         "xmlns:%s: '%s' is not a valid URI\n",
9182
3.38k
                                       attname, URL, NULL);
9183
18.9k
                } else {
9184
18.9k
                    if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9185
0
                        xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9186
0
                                  "xmlns:%s: URI %s is not absolute\n",
9187
0
                                  attname, URL, NULL);
9188
0
                    }
9189
18.9k
                    xmlFreeURI(uri);
9190
18.9k
                }
9191
22.2k
            }
9192
9193
            /*
9194
             * check that it's not a defined namespace
9195
             */
9196
27.3k
            for (j = 1;j <= nbNs;j++)
9197
5.99k
                if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9198
927
                    break;
9199
22.2k
            if (j <= nbNs)
9200
927
                xmlErrAttributeDup(ctxt, aprefix, attname);
9201
21.3k
            else
9202
21.3k
                if (nsPush(ctxt, attname, URL) > 0) nbNs++;
9203
9204
502k
        } else {
9205
            /*
9206
             * Add the pair to atts
9207
             */
9208
502k
            if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9209
17.0k
                if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9210
0
                    goto next_attr;
9211
0
                }
9212
17.0k
                maxatts = ctxt->maxatts;
9213
17.0k
                atts = ctxt->atts;
9214
17.0k
            }
9215
502k
            ctxt->attallocs[nratts++] = alloc;
9216
502k
            atts[nbatts++] = attname;
9217
502k
            atts[nbatts++] = aprefix;
9218
            /*
9219
             * The namespace URI field is used temporarily to point at the
9220
             * base of the current input buffer for non-alloced attributes.
9221
             * When the input buffer is reallocated, all the pointers become
9222
             * invalid, but they can be reconstructed later.
9223
             */
9224
502k
            if (alloc)
9225
156k
                atts[nbatts++] = NULL;
9226
346k
            else
9227
346k
                atts[nbatts++] = ctxt->input->base;
9228
502k
            atts[nbatts++] = attvalue;
9229
502k
            attvalue += len;
9230
502k
            atts[nbatts++] = attvalue;
9231
            /*
9232
             * tag if some deallocation is needed
9233
             */
9234
502k
            if (alloc != 0) attval = 1;
9235
502k
            attvalue = NULL; /* moved into atts */
9236
502k
        }
9237
9238
552k
next_attr:
9239
552k
        if ((attvalue != NULL) && (alloc != 0)) {
9240
14.0k
            xmlFree(attvalue);
9241
14.0k
            attvalue = NULL;
9242
14.0k
        }
9243
9244
552k
  GROW
9245
552k
        if (ctxt->instate == XML_PARSER_EOF)
9246
14
            break;
9247
552k
  if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9248
328k
      break;
9249
224k
  if (SKIP_BLANKS == 0) {
9250
27.5k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9251
27.5k
         "attributes construct error\n");
9252
27.5k
      break;
9253
27.5k
  }
9254
197k
        GROW;
9255
197k
    }
9256
9257
497k
    if (ctxt->input->id != inputid) {
9258
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9259
0
                    "Unexpected change of input\n");
9260
0
        localname = NULL;
9261
0
        goto done;
9262
0
    }
9263
9264
    /* Reconstruct attribute value pointers. */
9265
1.00M
    for (i = 0, j = 0; j < nratts; i += 5, j++) {
9266
502k
        if (atts[i+2] != NULL) {
9267
            /*
9268
             * Arithmetic on dangling pointers is technically undefined
9269
             * behavior, but well...
9270
             */
9271
346k
            const xmlChar *old = atts[i+2];
9272
346k
            atts[i+2]  = NULL;    /* Reset repurposed namespace URI */
9273
346k
            atts[i+3] = ctxt->input->base + (atts[i+3] - old);  /* value */
9274
346k
            atts[i+4] = ctxt->input->base + (atts[i+4] - old);  /* valuend */
9275
346k
        }
9276
502k
    }
9277
9278
    /*
9279
     * The attributes defaulting
9280
     */
9281
497k
    if (ctxt->attsDefault != NULL) {
9282
13.3k
        xmlDefAttrsPtr defaults;
9283
9284
13.3k
  defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9285
13.3k
  if (defaults != NULL) {
9286
25.2k
      for (i = 0;i < defaults->nbAttrs;i++) {
9287
15.7k
          attname = defaults->values[5 * i];
9288
15.7k
    aprefix = defaults->values[5 * i + 1];
9289
9290
                /*
9291
     * special work for namespaces defaulted defs
9292
     */
9293
15.7k
    if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9294
        /*
9295
         * check that it's not a defined namespace
9296
         */
9297
5.00k
        for (j = 1;j <= nbNs;j++)
9298
2.57k
            if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9299
560
          break;
9300
2.99k
              if (j <= nbNs) continue;
9301
9302
2.43k
        nsname = xmlGetNamespace(ctxt, NULL);
9303
2.43k
        if (nsname != defaults->values[5 * i + 2]) {
9304
1.57k
      if (nsPush(ctxt, NULL,
9305
1.57k
                 defaults->values[5 * i + 2]) > 0)
9306
1.57k
          nbNs++;
9307
1.57k
        }
9308
12.7k
    } else if (aprefix == ctxt->str_xmlns) {
9309
        /*
9310
         * check that it's not a defined namespace
9311
         */
9312
7.87k
        for (j = 1;j <= nbNs;j++)
9313
3.61k
            if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9314
760
          break;
9315
5.02k
              if (j <= nbNs) continue;
9316
9317
4.26k
        nsname = xmlGetNamespace(ctxt, attname);
9318
4.26k
        if (nsname != defaults->values[5 * i + 2]) {
9319
3.28k
      if (nsPush(ctxt, attname,
9320
3.28k
                 defaults->values[5 * i + 2]) > 0)
9321
3.28k
          nbNs++;
9322
3.28k
        }
9323
7.77k
    } else {
9324
        /*
9325
         * check that it's not a defined attribute
9326
         */
9327
12.1k
        for (j = 0;j < nbatts;j+=5) {
9328
5.52k
      if ((attname == atts[j]) && (aprefix == atts[j+1]))
9329
1.14k
          break;
9330
5.52k
        }
9331
7.77k
        if (j < nbatts) continue;
9332
9333
6.63k
        if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9334
157
      if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9335
0
                            localname = NULL;
9336
0
                            goto done;
9337
0
      }
9338
157
      maxatts = ctxt->maxatts;
9339
157
      atts = ctxt->atts;
9340
157
        }
9341
6.63k
        atts[nbatts++] = attname;
9342
6.63k
        atts[nbatts++] = aprefix;
9343
6.63k
        if (aprefix == NULL)
9344
2.97k
      atts[nbatts++] = NULL;
9345
3.65k
        else
9346
3.65k
            atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
9347
6.63k
        atts[nbatts++] = defaults->values[5 * i + 2];
9348
6.63k
        atts[nbatts++] = defaults->values[5 * i + 3];
9349
6.63k
        if ((ctxt->standalone == 1) &&
9350
6.63k
            (defaults->values[5 * i + 4] != NULL)) {
9351
0
      xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9352
0
    "standalone: attribute %s on %s defaulted from external subset\n",
9353
0
                                   attname, localname);
9354
0
        }
9355
6.63k
        nbdef++;
9356
6.63k
    }
9357
15.7k
      }
9358
9.49k
  }
9359
13.3k
    }
9360
9361
    /*
9362
     * The attributes checkings
9363
     */
9364
1.00M
    for (i = 0; i < nbatts;i += 5) {
9365
        /*
9366
  * The default namespace does not apply to attribute names.
9367
  */
9368
508k
  if (atts[i + 1] != NULL) {
9369
56.8k
      nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9370
56.8k
      if (nsname == NULL) {
9371
15.8k
    xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9372
15.8k
        "Namespace prefix %s for %s on %s is not defined\n",
9373
15.8k
        atts[i + 1], atts[i], localname);
9374
15.8k
      }
9375
56.8k
      atts[i + 2] = nsname;
9376
56.8k
  } else
9377
452k
      nsname = NULL;
9378
  /*
9379
   * [ WFC: Unique Att Spec ]
9380
   * No attribute name may appear more than once in the same
9381
   * start-tag or empty-element tag.
9382
   * As extended by the Namespace in XML REC.
9383
   */
9384
785k
        for (j = 0; j < i;j += 5) {
9385
281k
      if (atts[i] == atts[j]) {
9386
7.88k
          if (atts[i+1] == atts[j+1]) {
9387
3.69k
        xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9388
3.69k
        break;
9389
3.69k
    }
9390
4.18k
    if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9391
1.08k
        xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9392
1.08k
           "Namespaced Attribute %s in '%s' redefined\n",
9393
1.08k
           atts[i], nsname, NULL);
9394
1.08k
        break;
9395
1.08k
    }
9396
4.18k
      }
9397
281k
  }
9398
508k
    }
9399
9400
497k
    nsname = xmlGetNamespace(ctxt, prefix);
9401
497k
    if ((prefix != NULL) && (nsname == NULL)) {
9402
25.5k
  xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9403
25.5k
           "Namespace prefix %s on %s is not defined\n",
9404
25.5k
     prefix, localname, NULL);
9405
25.5k
    }
9406
497k
    *pref = prefix;
9407
497k
    *URI = nsname;
9408
9409
    /*
9410
     * SAX: Start of Element !
9411
     */
9412
497k
    if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9413
497k
  (!ctxt->disableSAX)) {
9414
399k
  if (nbNs > 0)
9415
18.6k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9416
18.6k
        nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9417
18.6k
        nbatts / 5, nbdef, atts);
9418
381k
  else
9419
381k
      ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9420
381k
                    nsname, 0, NULL, nbatts / 5, nbdef, atts);
9421
399k
    }
9422
9423
497k
done:
9424
    /*
9425
     * Free up attribute allocated strings if needed
9426
     */
9427
497k
    if (attval != 0) {
9428
342k
  for (i = 3,j = 0; j < nratts;i += 5,j++)
9429
209k
      if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9430
156k
          xmlFree((xmlChar *) atts[i]);
9431
132k
    }
9432
9433
497k
    return(localname);
9434
497k
}
9435
9436
/**
9437
 * xmlParseEndTag2:
9438
 * @ctxt:  an XML parser context
9439
 * @line:  line of the start tag
9440
 * @nsNr:  number of namespaces on the start tag
9441
 *
9442
 * Parse an end tag. Always consumes '</'.
9443
 *
9444
 * [42] ETag ::= '</' Name S? '>'
9445
 *
9446
 * With namespace
9447
 *
9448
 * [NS 9] ETag ::= '</' QName S? '>'
9449
 */
9450
9451
static void
9452
100k
xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
9453
100k
    const xmlChar *name;
9454
9455
100k
    GROW;
9456
100k
    if ((RAW != '<') || (NXT(1) != '/')) {
9457
3
  xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
9458
3
  return;
9459
3
    }
9460
100k
    SKIP(2);
9461
9462
100k
    if (tag->prefix == NULL)
9463
24.9k
        name = xmlParseNameAndCompare(ctxt, ctxt->name);
9464
75.9k
    else
9465
75.9k
        name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
9466
9467
    /*
9468
     * We should definitely be at the ending "S? '>'" part
9469
     */
9470
100k
    GROW;
9471
100k
    if (ctxt->instate == XML_PARSER_EOF)
9472
7
        return;
9473
100k
    SKIP_BLANKS;
9474
100k
    if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
9475
7.42k
  xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
9476
7.42k
    } else
9477
93.5k
  NEXT1;
9478
9479
    /*
9480
     * [ WFC: Element Type Match ]
9481
     * The Name in an element's end-tag must match the element type in the
9482
     * start-tag.
9483
     *
9484
     */
9485
100k
    if (name != (xmlChar*)1) {
9486
6.43k
        if (name == NULL) name = BAD_CAST "unparsable";
9487
6.43k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
9488
6.43k
         "Opening and ending tag mismatch: %s line %d and %s\n",
9489
6.43k
                    ctxt->name, tag->line, name);
9490
6.43k
    }
9491
9492
    /*
9493
     * SAX: End of Tag
9494
     */
9495
100k
    if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9496
100k
  (!ctxt->disableSAX))
9497
88.2k
  ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
9498
88.2k
                                tag->URI);
9499
9500
100k
    spacePop(ctxt);
9501
100k
    if (tag->nsNr != 0)
9502
14.7k
  nsPop(ctxt, tag->nsNr);
9503
100k
}
9504
9505
/**
9506
 * xmlParseCDSect:
9507
 * @ctxt:  an XML parser context
9508
 *
9509
 * DEPRECATED: Internal function, don't use.
9510
 *
9511
 * Parse escaped pure raw content. Always consumes '<!['.
9512
 *
9513
 * [18] CDSect ::= CDStart CData CDEnd
9514
 *
9515
 * [19] CDStart ::= '<![CDATA['
9516
 *
9517
 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9518
 *
9519
 * [21] CDEnd ::= ']]>'
9520
 */
9521
void
9522
5.85k
xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9523
5.85k
    xmlChar *buf = NULL;
9524
5.85k
    int len = 0;
9525
5.85k
    int size = XML_PARSER_BUFFER_SIZE;
9526
5.85k
    int r, rl;
9527
5.85k
    int s, sl;
9528
5.85k
    int cur, l;
9529
5.85k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9530
0
                    XML_MAX_HUGE_LENGTH :
9531
5.85k
                    XML_MAX_TEXT_LENGTH;
9532
9533
5.85k
    if ((CUR != '<') || (NXT(1) != '!') || (NXT(2) != '['))
9534
0
        return;
9535
5.85k
    SKIP(3);
9536
9537
5.85k
    if (!CMP6(CUR_PTR, 'C', 'D', 'A', 'T', 'A', '['))
9538
0
        return;
9539
5.85k
    SKIP(6);
9540
9541
5.85k
    ctxt->instate = XML_PARSER_CDATA_SECTION;
9542
5.85k
    r = CUR_CHAR(rl);
9543
5.85k
    if (!IS_CHAR(r)) {
9544
510
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9545
510
        goto out;
9546
510
    }
9547
5.34k
    NEXTL(rl);
9548
5.34k
    s = CUR_CHAR(sl);
9549
5.34k
    if (!IS_CHAR(s)) {
9550
1.03k
  xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
9551
1.03k
        goto out;
9552
1.03k
    }
9553
4.31k
    NEXTL(sl);
9554
4.31k
    cur = CUR_CHAR(l);
9555
4.31k
    buf = (xmlChar *) xmlMallocAtomic(size);
9556
4.31k
    if (buf == NULL) {
9557
0
  xmlErrMemory(ctxt, NULL);
9558
0
        goto out;
9559
0
    }
9560
649k
    while (IS_CHAR(cur) &&
9561
649k
           ((r != ']') || (s != ']') || (cur != '>'))) {
9562
644k
  if (len + 5 >= size) {
9563
777
      xmlChar *tmp;
9564
9565
777
      tmp = (xmlChar *) xmlRealloc(buf, size * 2);
9566
777
      if (tmp == NULL) {
9567
0
    xmlErrMemory(ctxt, NULL);
9568
0
                goto out;
9569
0
      }
9570
777
      buf = tmp;
9571
777
      size *= 2;
9572
777
  }
9573
644k
  COPY_BUF(rl,buf,len,r);
9574
644k
        if (len > maxLength) {
9575
0
            xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9576
0
                           "CData section too big found\n");
9577
0
            goto out;
9578
0
        }
9579
644k
  r = s;
9580
644k
  rl = sl;
9581
644k
  s = cur;
9582
644k
  sl = l;
9583
644k
  NEXTL(l);
9584
644k
  cur = CUR_CHAR(l);
9585
644k
    }
9586
4.31k
    buf[len] = 0;
9587
4.31k
    if (ctxt->instate == XML_PARSER_EOF) {
9588
11
        xmlFree(buf);
9589
11
        return;
9590
11
    }
9591
4.30k
    if (cur != '>') {
9592
2.41k
  xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9593
2.41k
                       "CData section not finished\n%.50s\n", buf);
9594
2.41k
        goto out;
9595
2.41k
    }
9596
1.89k
    NEXTL(l);
9597
9598
    /*
9599
     * OK the buffer is to be consumed as cdata.
9600
     */
9601
1.89k
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9602
1.56k
  if (ctxt->sax->cdataBlock != NULL)
9603
1.56k
      ctxt->sax->cdataBlock(ctxt->userData, buf, len);
9604
0
  else if (ctxt->sax->characters != NULL)
9605
0
      ctxt->sax->characters(ctxt->userData, buf, len);
9606
1.56k
    }
9607
9608
5.84k
out:
9609
5.84k
    if (ctxt->instate != XML_PARSER_EOF)
9610
5.84k
        ctxt->instate = XML_PARSER_CONTENT;
9611
5.84k
    xmlFree(buf);
9612
5.84k
}
9613
9614
/**
9615
 * xmlParseContentInternal:
9616
 * @ctxt:  an XML parser context
9617
 *
9618
 * Parse a content sequence. Stops at EOF or '</'. Leaves checking of
9619
 * unexpected EOF to the caller.
9620
 */
9621
9622
static void
9623
31.3k
xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
9624
31.3k
    int nameNr = ctxt->nameNr;
9625
9626
31.3k
    GROW;
9627
1.28M
    while ((RAW != 0) &&
9628
1.28M
     (ctxt->instate != XML_PARSER_EOF)) {
9629
1.26M
  const xmlChar *cur = ctxt->input->cur;
9630
9631
  /*
9632
   * First case : a Processing Instruction.
9633
   */
9634
1.26M
  if ((*cur == '<') && (cur[1] == '?')) {
9635
3.61k
      xmlParsePI(ctxt);
9636
3.61k
  }
9637
9638
  /*
9639
   * Second case : a CDSection
9640
   */
9641
  /* 2.6.0 test was *cur not RAW */
9642
1.26M
  else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
9643
5.85k
      xmlParseCDSect(ctxt);
9644
5.85k
  }
9645
9646
  /*
9647
   * Third case :  a comment
9648
   */
9649
1.26M
  else if ((*cur == '<') && (NXT(1) == '!') &&
9650
1.26M
     (NXT(2) == '-') && (NXT(3) == '-')) {
9651
5.24k
      xmlParseComment(ctxt);
9652
5.24k
      ctxt->instate = XML_PARSER_CONTENT;
9653
5.24k
  }
9654
9655
  /*
9656
   * Fourth case :  a sub-element.
9657
   */
9658
1.25M
  else if (*cur == '<') {
9659
597k
            if (NXT(1) == '/') {
9660
100k
                if (ctxt->nameNr <= nameNr)
9661
14.4k
                    break;
9662
86.5k
          xmlParseElementEnd(ctxt);
9663
496k
            } else {
9664
496k
          xmlParseElementStart(ctxt);
9665
496k
            }
9666
597k
  }
9667
9668
  /*
9669
   * Fifth case : a reference. If if has not been resolved,
9670
   *    parsing returns it's Name, create the node
9671
   */
9672
9673
656k
  else if (*cur == '&') {
9674
58.4k
      xmlParseReference(ctxt);
9675
58.4k
  }
9676
9677
  /*
9678
   * Last case, text. Note that References are handled directly.
9679
   */
9680
598k
  else {
9681
598k
      xmlParseCharDataInternal(ctxt, 0);
9682
598k
  }
9683
9684
1.25M
  SHRINK;
9685
1.25M
  GROW;
9686
1.25M
    }
9687
31.3k
}
9688
9689
/**
9690
 * xmlParseContent:
9691
 * @ctxt:  an XML parser context
9692
 *
9693
 * Parse a content sequence. Stops at EOF or '</'.
9694
 *
9695
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9696
 */
9697
9698
void
9699
11.4k
xmlParseContent(xmlParserCtxtPtr ctxt) {
9700
11.4k
    int nameNr = ctxt->nameNr;
9701
9702
11.4k
    xmlParseContentInternal(ctxt);
9703
9704
11.4k
    if ((ctxt->instate != XML_PARSER_EOF) && (ctxt->nameNr > nameNr)) {
9705
783
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9706
783
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9707
783
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9708
783
                "Premature end of data in tag %s line %d\n",
9709
783
    name, line, NULL);
9710
783
    }
9711
11.4k
}
9712
9713
/**
9714
 * xmlParseElement:
9715
 * @ctxt:  an XML parser context
9716
 *
9717
 * DEPRECATED: Internal function, don't use.
9718
 *
9719
 * parse an XML element
9720
 *
9721
 * [39] element ::= EmptyElemTag | STag content ETag
9722
 *
9723
 * [ WFC: Element Type Match ]
9724
 * The Name in an element's end-tag must match the element type in the
9725
 * start-tag.
9726
 *
9727
 */
9728
9729
void
9730
21.7k
xmlParseElement(xmlParserCtxtPtr ctxt) {
9731
21.7k
    if (xmlParseElementStart(ctxt) != 0)
9732
1.88k
        return;
9733
9734
19.9k
    xmlParseContentInternal(ctxt);
9735
19.9k
    if (ctxt->instate == XML_PARSER_EOF)
9736
150
  return;
9737
9738
19.7k
    if (CUR == 0) {
9739
5.32k
        const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
9740
5.32k
        int line = ctxt->pushTab[ctxt->nameNr - 1].line;
9741
5.32k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
9742
5.32k
                "Premature end of data in tag %s line %d\n",
9743
5.32k
    name, line, NULL);
9744
5.32k
        return;
9745
5.32k
    }
9746
9747
14.4k
    xmlParseElementEnd(ctxt);
9748
14.4k
}
9749
9750
/**
9751
 * xmlParseElementStart:
9752
 * @ctxt:  an XML parser context
9753
 *
9754
 * Parse the start of an XML element. Returns -1 in case of error, 0 if an
9755
 * opening tag was parsed, 1 if an empty element was parsed.
9756
 *
9757
 * Always consumes '<'.
9758
 */
9759
static int
9760
518k
xmlParseElementStart(xmlParserCtxtPtr ctxt) {
9761
518k
    const xmlChar *name;
9762
518k
    const xmlChar *prefix = NULL;
9763
518k
    const xmlChar *URI = NULL;
9764
518k
    xmlParserNodeInfo node_info;
9765
518k
    int line, tlen = 0;
9766
518k
    xmlNodePtr cur;
9767
518k
    int nsNr = ctxt->nsNr;
9768
9769
518k
    if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9770
518k
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9771
10
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9772
10
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9773
10
        xmlParserMaxDepth);
9774
10
  xmlHaltParser(ctxt);
9775
10
  return(-1);
9776
10
    }
9777
9778
    /* Capture start position */
9779
518k
    if (ctxt->record_info) {
9780
0
        node_info.begin_pos = ctxt->input->consumed +
9781
0
                          (CUR_PTR - ctxt->input->base);
9782
0
  node_info.begin_line = ctxt->input->line;
9783
0
    }
9784
9785
518k
    if (ctxt->spaceNr == 0)
9786
21.7k
  spacePush(ctxt, -1);
9787
496k
    else if (*ctxt->space == -2)
9788
0
  spacePush(ctxt, -1);
9789
496k
    else
9790
496k
  spacePush(ctxt, *ctxt->space);
9791
9792
518k
    line = ctxt->input->line;
9793
518k
#ifdef LIBXML_SAX1_ENABLED
9794
518k
    if (ctxt->sax2)
9795
518k
#endif /* LIBXML_SAX1_ENABLED */
9796
518k
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
9797
0
#ifdef LIBXML_SAX1_ENABLED
9798
0
    else
9799
0
  name = xmlParseStartTag(ctxt);
9800
518k
#endif /* LIBXML_SAX1_ENABLED */
9801
518k
    if (ctxt->instate == XML_PARSER_EOF)
9802
117
  return(-1);
9803
518k
    if (name == NULL) {
9804
20.6k
  spacePop(ctxt);
9805
20.6k
        return(-1);
9806
20.6k
    }
9807
497k
    nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
9808
497k
    cur = ctxt->node;
9809
9810
497k
#ifdef LIBXML_VALID_ENABLED
9811
    /*
9812
     * [ VC: Root Element Type ]
9813
     * The Name in the document type declaration must match the element
9814
     * type of the root element.
9815
     */
9816
497k
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9817
497k
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
9818
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9819
497k
#endif /* LIBXML_VALID_ENABLED */
9820
9821
    /*
9822
     * Check for an Empty Element.
9823
     */
9824
497k
    if ((RAW == '/') && (NXT(1) == '>')) {
9825
285k
        SKIP(2);
9826
285k
  if (ctxt->sax2) {
9827
285k
      if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9828
285k
    (!ctxt->disableSAX))
9829
281k
    ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
9830
285k
#ifdef LIBXML_SAX1_ENABLED
9831
285k
  } else {
9832
0
      if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9833
0
    (!ctxt->disableSAX))
9834
0
    ctxt->sax->endElement(ctxt->userData, name);
9835
0
#endif /* LIBXML_SAX1_ENABLED */
9836
0
  }
9837
285k
  namePop(ctxt);
9838
285k
  spacePop(ctxt);
9839
285k
  if (nsNr != ctxt->nsNr)
9840
1.17k
      nsPop(ctxt, ctxt->nsNr - nsNr);
9841
285k
  if (cur != NULL && ctxt->record_info) {
9842
0
            node_info.node = cur;
9843
0
            node_info.end_pos = ctxt->input->consumed +
9844
0
                                (CUR_PTR - ctxt->input->base);
9845
0
            node_info.end_line = ctxt->input->line;
9846
0
            xmlParserAddNodeInfo(ctxt, &node_info);
9847
0
  }
9848
285k
  return(1);
9849
285k
    }
9850
211k
    if (RAW == '>') {
9851
154k
        NEXT1;
9852
154k
        if (cur != NULL && ctxt->record_info) {
9853
0
            node_info.node = cur;
9854
0
            node_info.end_pos = 0;
9855
0
            node_info.end_line = 0;
9856
0
            xmlParserAddNodeInfo(ctxt, &node_info);
9857
0
        }
9858
154k
    } else {
9859
57.3k
        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9860
57.3k
         "Couldn't find end of Start Tag %s line %d\n",
9861
57.3k
                    name, line, NULL);
9862
9863
  /*
9864
   * end of parsing of this node.
9865
   */
9866
57.3k
  nodePop(ctxt);
9867
57.3k
  namePop(ctxt);
9868
57.3k
  spacePop(ctxt);
9869
57.3k
  if (nsNr != ctxt->nsNr)
9870
13.6k
      nsPop(ctxt, ctxt->nsNr - nsNr);
9871
57.3k
  return(-1);
9872
57.3k
    }
9873
9874
154k
    return(0);
9875
211k
}
9876
9877
/**
9878
 * xmlParseElementEnd:
9879
 * @ctxt:  an XML parser context
9880
 *
9881
 * Parse the end of an XML element. Always consumes '</'.
9882
 */
9883
static void
9884
100k
xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
9885
100k
    xmlNodePtr cur = ctxt->node;
9886
9887
100k
    if (ctxt->nameNr <= 0) {
9888
0
        if ((RAW == '<') && (NXT(1) == '/'))
9889
0
            SKIP(2);
9890
0
        return;
9891
0
    }
9892
9893
    /*
9894
     * parse the end of tag: '</' should be here.
9895
     */
9896
100k
    if (ctxt->sax2) {
9897
100k
  xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
9898
100k
  namePop(ctxt);
9899
100k
    }
9900
0
#ifdef LIBXML_SAX1_ENABLED
9901
0
    else
9902
0
  xmlParseEndTag1(ctxt, 0);
9903
100k
#endif /* LIBXML_SAX1_ENABLED */
9904
9905
    /*
9906
     * Capture end position
9907
     */
9908
100k
    if (cur != NULL && ctxt->record_info) {
9909
0
        xmlParserNodeInfoPtr node_info;
9910
9911
0
        node_info = (xmlParserNodeInfoPtr) xmlParserFindNodeInfo(ctxt, cur);
9912
0
        if (node_info != NULL) {
9913
0
            node_info->end_pos = ctxt->input->consumed +
9914
0
                                 (CUR_PTR - ctxt->input->base);
9915
0
            node_info->end_line = ctxt->input->line;
9916
0
        }
9917
0
    }
9918
100k
}
9919
9920
/**
9921
 * xmlParseVersionNum:
9922
 * @ctxt:  an XML parser context
9923
 *
9924
 * DEPRECATED: Internal function, don't use.
9925
 *
9926
 * parse the XML version value.
9927
 *
9928
 * [26] VersionNum ::= '1.' [0-9]+
9929
 *
9930
 * In practice allow [0-9].[0-9]+ at that level
9931
 *
9932
 * Returns the string giving the XML version number, or NULL
9933
 */
9934
xmlChar *
9935
10.0k
xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
9936
10.0k
    xmlChar *buf = NULL;
9937
10.0k
    int len = 0;
9938
10.0k
    int size = 10;
9939
10.0k
    xmlChar cur;
9940
9941
10.0k
    buf = (xmlChar *) xmlMallocAtomic(size);
9942
10.0k
    if (buf == NULL) {
9943
0
  xmlErrMemory(ctxt, NULL);
9944
0
  return(NULL);
9945
0
    }
9946
10.0k
    cur = CUR;
9947
10.0k
    if (!((cur >= '0') && (cur <= '9'))) {
9948
317
  xmlFree(buf);
9949
317
  return(NULL);
9950
317
    }
9951
9.77k
    buf[len++] = cur;
9952
9.77k
    NEXT;
9953
9.77k
    cur=CUR;
9954
9.77k
    if (cur != '.') {
9955
249
  xmlFree(buf);
9956
249
  return(NULL);
9957
249
    }
9958
9.52k
    buf[len++] = cur;
9959
9.52k
    NEXT;
9960
9.52k
    cur=CUR;
9961
25.5k
    while ((cur >= '0') && (cur <= '9')) {
9962
15.9k
  if (len + 1 >= size) {
9963
462
      xmlChar *tmp;
9964
9965
462
      size *= 2;
9966
462
      tmp = (xmlChar *) xmlRealloc(buf, size);
9967
462
      if (tmp == NULL) {
9968
0
          xmlFree(buf);
9969
0
    xmlErrMemory(ctxt, NULL);
9970
0
    return(NULL);
9971
0
      }
9972
462
      buf = tmp;
9973
462
  }
9974
15.9k
  buf[len++] = cur;
9975
15.9k
  NEXT;
9976
15.9k
  cur=CUR;
9977
15.9k
    }
9978
9.52k
    buf[len] = 0;
9979
9.52k
    return(buf);
9980
9.52k
}
9981
9982
/**
9983
 * xmlParseVersionInfo:
9984
 * @ctxt:  an XML parser context
9985
 *
9986
 * DEPRECATED: Internal function, don't use.
9987
 *
9988
 * parse the XML version.
9989
 *
9990
 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
9991
 *
9992
 * [25] Eq ::= S? '=' S?
9993
 *
9994
 * Returns the version string, e.g. "1.0"
9995
 */
9996
9997
xmlChar *
9998
27.1k
xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
9999
27.1k
    xmlChar *version = NULL;
10000
10001
27.1k
    if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10002
10.7k
  SKIP(7);
10003
10.7k
  SKIP_BLANKS;
10004
10.7k
  if (RAW != '=') {
10005
403
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10006
403
      return(NULL);
10007
403
        }
10008
10.3k
  NEXT;
10009
10.3k
  SKIP_BLANKS;
10010
10.3k
  if (RAW == '"') {
10011
8.90k
      NEXT;
10012
8.90k
      version = xmlParseVersionNum(ctxt);
10013
8.90k
      if (RAW != '"') {
10014
433
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10015
433
      } else
10016
8.47k
          NEXT;
10017
8.90k
  } else if (RAW == '\''){
10018
1.18k
      NEXT;
10019
1.18k
      version = xmlParseVersionNum(ctxt);
10020
1.18k
      if (RAW != '\'') {
10021
497
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10022
497
      } else
10023
692
          NEXT;
10024
1.18k
  } else {
10025
207
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10026
207
  }
10027
10.3k
    }
10028
26.7k
    return(version);
10029
27.1k
}
10030
10031
/**
10032
 * xmlParseEncName:
10033
 * @ctxt:  an XML parser context
10034
 *
10035
 * DEPRECATED: Internal function, don't use.
10036
 *
10037
 * parse the XML encoding name
10038
 *
10039
 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10040
 *
10041
 * Returns the encoding name value or NULL
10042
 */
10043
xmlChar *
10044
14.9k
xmlParseEncName(xmlParserCtxtPtr ctxt) {
10045
14.9k
    xmlChar *buf = NULL;
10046
14.9k
    int len = 0;
10047
14.9k
    int size = 10;
10048
14.9k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
10049
0
                    XML_MAX_TEXT_LENGTH :
10050
14.9k
                    XML_MAX_NAME_LENGTH;
10051
14.9k
    xmlChar cur;
10052
10053
14.9k
    cur = CUR;
10054
14.9k
    if (((cur >= 'a') && (cur <= 'z')) ||
10055
14.9k
        ((cur >= 'A') && (cur <= 'Z'))) {
10056
14.4k
  buf = (xmlChar *) xmlMallocAtomic(size);
10057
14.4k
  if (buf == NULL) {
10058
0
      xmlErrMemory(ctxt, NULL);
10059
0
      return(NULL);
10060
0
  }
10061
10062
14.4k
  buf[len++] = cur;
10063
14.4k
  NEXT;
10064
14.4k
  cur = CUR;
10065
163k
  while (((cur >= 'a') && (cur <= 'z')) ||
10066
163k
         ((cur >= 'A') && (cur <= 'Z')) ||
10067
163k
         ((cur >= '0') && (cur <= '9')) ||
10068
163k
         (cur == '.') || (cur == '_') ||
10069
163k
         (cur == '-')) {
10070
148k
      if (len + 1 >= size) {
10071
7.57k
          xmlChar *tmp;
10072
10073
7.57k
    size *= 2;
10074
7.57k
    tmp = (xmlChar *) xmlRealloc(buf, size);
10075
7.57k
    if (tmp == NULL) {
10076
0
        xmlErrMemory(ctxt, NULL);
10077
0
        xmlFree(buf);
10078
0
        return(NULL);
10079
0
    }
10080
7.57k
    buf = tmp;
10081
7.57k
      }
10082
148k
      buf[len++] = cur;
10083
148k
            if (len > maxLength) {
10084
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "EncName");
10085
0
                xmlFree(buf);
10086
0
                return(NULL);
10087
0
            }
10088
148k
      NEXT;
10089
148k
      cur = CUR;
10090
148k
        }
10091
14.4k
  buf[len] = 0;
10092
14.4k
    } else {
10093
459
  xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10094
459
    }
10095
14.9k
    return(buf);
10096
14.9k
}
10097
10098
/**
10099
 * xmlParseEncodingDecl:
10100
 * @ctxt:  an XML parser context
10101
 *
10102
 * DEPRECATED: Internal function, don't use.
10103
 *
10104
 * parse the XML encoding declaration
10105
 *
10106
 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'")
10107
 *
10108
 * this setups the conversion filters.
10109
 *
10110
 * Returns the encoding value or NULL
10111
 */
10112
10113
const xmlChar *
10114
25.8k
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10115
25.8k
    xmlChar *encoding = NULL;
10116
10117
25.8k
    SKIP_BLANKS;
10118
25.8k
    if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
10119
15.3k
  SKIP(8);
10120
15.3k
  SKIP_BLANKS;
10121
15.3k
  if (RAW != '=') {
10122
241
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10123
241
      return(NULL);
10124
241
        }
10125
15.1k
  NEXT;
10126
15.1k
  SKIP_BLANKS;
10127
15.1k
  if (RAW == '"') {
10128
14.6k
      NEXT;
10129
14.6k
      encoding = xmlParseEncName(ctxt);
10130
14.6k
      if (RAW != '"') {
10131
597
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10132
597
    xmlFree((xmlChar *) encoding);
10133
597
    return(NULL);
10134
597
      } else
10135
14.0k
          NEXT;
10136
14.6k
  } else if (RAW == '\''){
10137
272
      NEXT;
10138
272
      encoding = xmlParseEncName(ctxt);
10139
272
      if (RAW != '\'') {
10140
69
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10141
69
    xmlFree((xmlChar *) encoding);
10142
69
    return(NULL);
10143
69
      } else
10144
203
          NEXT;
10145
272
  } else {
10146
201
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10147
201
  }
10148
10149
        /*
10150
         * Non standard parsing, allowing the user to ignore encoding
10151
         */
10152
14.4k
        if (ctxt->options & XML_PARSE_IGNORE_ENC) {
10153
0
      xmlFree((xmlChar *) encoding);
10154
0
            return(NULL);
10155
0
  }
10156
10157
  /*
10158
   * UTF-16 encoding switch has already taken place at this stage,
10159
   * more over the little-endian/big-endian selection is already done
10160
   */
10161
14.4k
        if ((encoding != NULL) &&
10162
14.4k
      ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10163
14.0k
       (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
10164
      /*
10165
       * If no encoding was passed to the parser, that we are
10166
       * using UTF-16 and no decoder is present i.e. the
10167
       * document is apparently UTF-8 compatible, then raise an
10168
       * encoding mismatch fatal error
10169
       */
10170
148
      if ((ctxt->encoding == NULL) &&
10171
148
          (ctxt->input->buf != NULL) &&
10172
148
          (ctxt->input->buf->encoder == NULL)) {
10173
16
    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10174
16
      "Document labelled UTF-16 but has UTF-8 content\n");
10175
16
      }
10176
148
      if (ctxt->encoding != NULL)
10177
132
    xmlFree((xmlChar *) ctxt->encoding);
10178
148
      ctxt->encoding = encoding;
10179
148
  }
10180
  /*
10181
   * UTF-8 encoding is handled natively
10182
   */
10183
14.3k
        else if ((encoding != NULL) &&
10184
14.3k
      ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10185
13.9k
       (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
10186
            /* TODO: Check for encoding mismatch. */
10187
2.77k
      if (ctxt->encoding != NULL)
10188
243
    xmlFree((xmlChar *) ctxt->encoding);
10189
2.77k
      ctxt->encoding = encoding;
10190
2.77k
  }
10191
11.5k
  else if (encoding != NULL) {
10192
11.1k
      xmlCharEncodingHandlerPtr handler;
10193
10194
11.1k
      if (ctxt->input->encoding != NULL)
10195
0
    xmlFree((xmlChar *) ctxt->input->encoding);
10196
11.1k
      ctxt->input->encoding = encoding;
10197
10198
11.1k
            handler = xmlFindCharEncodingHandler((const char *) encoding);
10199
11.1k
      if (handler != NULL) {
10200
10.3k
    if (xmlSwitchToEncoding(ctxt, handler) < 0) {
10201
        /* failed to convert */
10202
6
        ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
10203
6
        return(NULL);
10204
6
    }
10205
10.3k
      } else {
10206
826
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
10207
826
      "Unsupported encoding %s\n", encoding);
10208
826
    return(NULL);
10209
826
      }
10210
11.1k
  }
10211
14.4k
    }
10212
24.0k
    return(encoding);
10213
25.8k
}
10214
10215
/**
10216
 * xmlParseSDDecl:
10217
 * @ctxt:  an XML parser context
10218
 *
10219
 * DEPRECATED: Internal function, don't use.
10220
 *
10221
 * parse the XML standalone declaration
10222
 *
10223
 * [32] SDDecl ::= S 'standalone' Eq
10224
 *                 (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10225
 *
10226
 * [ VC: Standalone Document Declaration ]
10227
 * TODO The standalone document declaration must have the value "no"
10228
 * if any external markup declarations contain declarations of:
10229
 *  - attributes with default values, if elements to which these
10230
 *    attributes apply appear in the document without specifications
10231
 *    of values for these attributes, or
10232
 *  - entities (other than amp, lt, gt, apos, quot), if references
10233
 *    to those entities appear in the document, or
10234
 *  - attributes with values subject to normalization, where the
10235
 *    attribute appears in the document with a value which will change
10236
 *    as a result of normalization, or
10237
 *  - element types with element content, if white space occurs directly
10238
 *    within any instance of those types.
10239
 *
10240
 * Returns:
10241
 *   1 if standalone="yes"
10242
 *   0 if standalone="no"
10243
 *  -2 if standalone attribute is missing or invalid
10244
 *    (A standalone value of -2 means that the XML declaration was found,
10245
 *     but no value was specified for the standalone attribute).
10246
 */
10247
10248
int
10249
3.29k
xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10250
3.29k
    int standalone = -2;
10251
10252
3.29k
    SKIP_BLANKS;
10253
3.29k
    if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10254
72
  SKIP(10);
10255
72
        SKIP_BLANKS;
10256
72
  if (RAW != '=') {
10257
2
      xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10258
2
      return(standalone);
10259
2
        }
10260
70
  NEXT;
10261
70
  SKIP_BLANKS;
10262
70
        if (RAW == '\''){
10263
58
      NEXT;
10264
58
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10265
2
          standalone = 0;
10266
2
                SKIP(2);
10267
56
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10268
56
                 (NXT(2) == 's')) {
10269
47
          standalone = 1;
10270
47
    SKIP(3);
10271
47
            } else {
10272
9
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10273
9
      }
10274
58
      if (RAW != '\'') {
10275
15
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10276
15
      } else
10277
43
          NEXT;
10278
58
  } else if (RAW == '"'){
10279
11
      NEXT;
10280
11
      if ((RAW == 'n') && (NXT(1) == 'o')) {
10281
2
          standalone = 0;
10282
2
    SKIP(2);
10283
9
      } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10284
9
                 (NXT(2) == 's')) {
10285
2
          standalone = 1;
10286
2
                SKIP(3);
10287
7
            } else {
10288
7
    xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10289
7
      }
10290
11
      if (RAW != '"') {
10291
10
    xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10292
10
      } else
10293
1
          NEXT;
10294
11
  } else {
10295
1
      xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10296
1
        }
10297
70
    }
10298
3.29k
    return(standalone);
10299
3.29k
}
10300
10301
/**
10302
 * xmlParseXMLDecl:
10303
 * @ctxt:  an XML parser context
10304
 *
10305
 * DEPRECATED: Internal function, don't use.
10306
 *
10307
 * parse an XML declaration header
10308
 *
10309
 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10310
 */
10311
10312
void
10313
9.76k
xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10314
9.76k
    xmlChar *version;
10315
10316
    /*
10317
     * This value for standalone indicates that the document has an
10318
     * XML declaration but it does not have a standalone attribute.
10319
     * It will be overwritten later if a standalone attribute is found.
10320
     */
10321
9.76k
    ctxt->input->standalone = -2;
10322
10323
    /*
10324
     * We know that '<?xml' is here.
10325
     */
10326
9.76k
    SKIP(5);
10327
10328
9.76k
    if (!IS_BLANK_CH(RAW)) {
10329
0
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10330
0
                 "Blank needed after '<?xml'\n");
10331
0
    }
10332
9.76k
    SKIP_BLANKS;
10333
10334
    /*
10335
     * We must have the VersionInfo here.
10336
     */
10337
9.76k
    version = xmlParseVersionInfo(ctxt);
10338
9.76k
    if (version == NULL) {
10339
1.54k
  xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10340
8.22k
    } else {
10341
8.22k
  if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10342
      /*
10343
       * Changed here for XML-1.0 5th edition
10344
       */
10345
3.18k
      if (ctxt->options & XML_PARSE_OLD10) {
10346
0
    xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10347
0
                "Unsupported version '%s'\n",
10348
0
                version);
10349
3.18k
      } else {
10350
3.18k
          if ((version[0] == '1') && ((version[1] == '.'))) {
10351
3.06k
        xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10352
3.06k
                      "Unsupported version '%s'\n",
10353
3.06k
          version, NULL);
10354
3.06k
    } else {
10355
121
        xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10356
121
              "Unsupported version '%s'\n",
10357
121
              version);
10358
121
    }
10359
3.18k
      }
10360
3.18k
  }
10361
8.22k
  if (ctxt->version != NULL)
10362
0
      xmlFree((void *) ctxt->version);
10363
8.22k
  ctxt->version = version;
10364
8.22k
    }
10365
10366
    /*
10367
     * We may have the encoding declaration
10368
     */
10369
9.76k
    if (!IS_BLANK_CH(RAW)) {
10370
2.87k
        if ((RAW == '?') && (NXT(1) == '>')) {
10371
1.33k
      SKIP(2);
10372
1.33k
      return;
10373
1.33k
  }
10374
1.54k
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10375
1.54k
    }
10376
8.43k
    xmlParseEncodingDecl(ctxt);
10377
8.43k
    if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10378
8.43k
         (ctxt->instate == XML_PARSER_EOF)) {
10379
  /*
10380
   * The XML REC instructs us to stop parsing right here
10381
   */
10382
355
        return;
10383
355
    }
10384
10385
    /*
10386
     * We may have the standalone status.
10387
     */
10388
8.08k
    if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
10389
5.24k
        if ((RAW == '?') && (NXT(1) == '>')) {
10390
4.78k
      SKIP(2);
10391
4.78k
      return;
10392
4.78k
  }
10393
457
  xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10394
457
    }
10395
10396
    /*
10397
     * We can grow the input buffer freely at that point
10398
     */
10399
3.29k
    GROW;
10400
10401
3.29k
    SKIP_BLANKS;
10402
3.29k
    ctxt->input->standalone = xmlParseSDDecl(ctxt);
10403
10404
3.29k
    SKIP_BLANKS;
10405
3.29k
    if ((RAW == '?') && (NXT(1) == '>')) {
10406
1.86k
        SKIP(2);
10407
1.86k
    } else if (RAW == '>') {
10408
        /* Deprecated old WD ... */
10409
489
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10410
489
  NEXT;
10411
947
    } else {
10412
947
        int c;
10413
10414
947
  xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
10415
47.5k
        while ((c = CUR) != 0) {
10416
47.2k
            NEXT;
10417
47.2k
            if (c == '>')
10418
661
                break;
10419
47.2k
        }
10420
947
    }
10421
3.29k
}
10422
10423
/**
10424
 * xmlParseMisc:
10425
 * @ctxt:  an XML parser context
10426
 *
10427
 * DEPRECATED: Internal function, don't use.
10428
 *
10429
 * parse an XML Misc* optional field.
10430
 *
10431
 * [27] Misc ::= Comment | PI |  S
10432
 */
10433
10434
void
10435
55.2k
xmlParseMisc(xmlParserCtxtPtr ctxt) {
10436
68.4k
    while (ctxt->instate != XML_PARSER_EOF) {
10437
68.4k
        SKIP_BLANKS;
10438
68.4k
        GROW;
10439
68.4k
        if ((RAW == '<') && (NXT(1) == '?')) {
10440
10.2k
      xmlParsePI(ctxt);
10441
58.1k
        } else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
10442
2.88k
      xmlParseComment(ctxt);
10443
55.2k
        } else {
10444
55.2k
            break;
10445
55.2k
        }
10446
68.4k
    }
10447
55.2k
}
10448
10449
/**
10450
 * xmlParseDocument:
10451
 * @ctxt:  an XML parser context
10452
 *
10453
 * parse an XML document (and build a tree if using the standard SAX
10454
 * interface).
10455
 *
10456
 * [1] document ::= prolog element Misc*
10457
 *
10458
 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10459
 *
10460
 * Returns 0, -1 in case of error. the parser context is augmented
10461
 *                as a result of the parsing.
10462
 */
10463
10464
int
10465
30.6k
xmlParseDocument(xmlParserCtxtPtr ctxt) {
10466
30.6k
    xmlChar start[4];
10467
30.6k
    xmlCharEncoding enc;
10468
10469
30.6k
    xmlInitParser();
10470
10471
30.6k
    if ((ctxt == NULL) || (ctxt->input == NULL))
10472
0
        return(-1);
10473
10474
30.6k
    GROW;
10475
10476
    /*
10477
     * SAX: detecting the level.
10478
     */
10479
30.6k
    xmlDetectSAX2(ctxt);
10480
10481
    /*
10482
     * SAX: beginning of the document processing.
10483
     */
10484
30.6k
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10485
30.6k
        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10486
30.6k
    if (ctxt->instate == XML_PARSER_EOF)
10487
0
  return(-1);
10488
10489
30.6k
    if ((ctxt->encoding == NULL) &&
10490
30.6k
        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
10491
  /*
10492
   * Get the 4 first bytes and decode the charset
10493
   * if enc != XML_CHAR_ENCODING_NONE
10494
   * plug some encoding conversion routines.
10495
   */
10496
28.8k
  start[0] = RAW;
10497
28.8k
  start[1] = NXT(1);
10498
28.8k
  start[2] = NXT(2);
10499
28.8k
  start[3] = NXT(3);
10500
28.8k
  enc = xmlDetectCharEncoding(&start[0], 4);
10501
28.8k
  if (enc != XML_CHAR_ENCODING_NONE) {
10502
10.8k
      xmlSwitchEncoding(ctxt, enc);
10503
10.8k
  }
10504
28.8k
    }
10505
10506
10507
30.6k
    if (CUR == 0) {
10508
285
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10509
285
  return(-1);
10510
285
    }
10511
10512
30.4k
    GROW;
10513
30.4k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10514
10515
  /*
10516
   * Note that we will switch encoding on the fly.
10517
   */
10518
9.76k
  xmlParseXMLDecl(ctxt);
10519
9.76k
  if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
10520
9.76k
      (ctxt->instate == XML_PARSER_EOF)) {
10521
      /*
10522
       * The XML REC instructs us to stop parsing right here
10523
       */
10524
369
      return(-1);
10525
369
  }
10526
9.39k
  ctxt->standalone = ctxt->input->standalone;
10527
9.39k
  SKIP_BLANKS;
10528
20.6k
    } else {
10529
20.6k
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10530
20.6k
    }
10531
30.0k
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10532
28.5k
        ctxt->sax->startDocument(ctxt->userData);
10533
30.0k
    if (ctxt->instate == XML_PARSER_EOF)
10534
14
  return(-1);
10535
30.0k
    if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10536
30.0k
        (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10537
0
  ctxt->myDoc->compression = ctxt->input->buf->compressed;
10538
0
    }
10539
10540
    /*
10541
     * The Misc part of the Prolog
10542
     */
10543
30.0k
    xmlParseMisc(ctxt);
10544
10545
    /*
10546
     * Then possibly doc type declaration(s) and more Misc
10547
     * (doctypedecl Misc*)?
10548
     */
10549
30.0k
    GROW;
10550
30.0k
    if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
10551
10552
8.50k
  ctxt->inSubset = 1;
10553
8.50k
  xmlParseDocTypeDecl(ctxt);
10554
8.50k
  if (RAW == '[') {
10555
8.27k
      ctxt->instate = XML_PARSER_DTD;
10556
8.27k
      xmlParseInternalSubset(ctxt);
10557
8.27k
      if (ctxt->instate == XML_PARSER_EOF)
10558
5.03k
    return(-1);
10559
8.27k
  }
10560
10561
  /*
10562
   * Create and update the external subset.
10563
   */
10564
3.47k
  ctxt->inSubset = 2;
10565
3.47k
  if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10566
3.47k
      (!ctxt->disableSAX))
10567
2.55k
      ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10568
2.55k
                                ctxt->extSubSystem, ctxt->extSubURI);
10569
3.47k
  if (ctxt->instate == XML_PARSER_EOF)
10570
1
      return(-1);
10571
3.47k
  ctxt->inSubset = 0;
10572
10573
3.47k
        xmlCleanSpecialAttr(ctxt);
10574
10575
3.47k
  ctxt->instate = XML_PARSER_PROLOG;
10576
3.47k
  xmlParseMisc(ctxt);
10577
3.47k
    }
10578
10579
    /*
10580
     * Time to start parsing the tree itself
10581
     */
10582
24.9k
    GROW;
10583
24.9k
    if (RAW != '<') {
10584
3.19k
  xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10585
3.19k
           "Start tag expected, '<' not found\n");
10586
21.7k
    } else {
10587
21.7k
  ctxt->instate = XML_PARSER_CONTENT;
10588
21.7k
  xmlParseElement(ctxt);
10589
21.7k
  ctxt->instate = XML_PARSER_EPILOG;
10590
10591
10592
  /*
10593
   * The Misc part at the end
10594
   */
10595
21.7k
  xmlParseMisc(ctxt);
10596
10597
21.7k
  if (RAW != 0) {
10598
747
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
10599
747
  }
10600
21.7k
  ctxt->instate = XML_PARSER_EOF;
10601
21.7k
    }
10602
10603
    /*
10604
     * SAX: end of the document processing.
10605
     */
10606
24.9k
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10607
24.9k
        ctxt->sax->endDocument(ctxt->userData);
10608
10609
    /*
10610
     * Remove locally kept entity definitions if the tree was not built
10611
     */
10612
24.9k
    if ((ctxt->myDoc != NULL) &&
10613
24.9k
  (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10614
230
  xmlFreeDoc(ctxt->myDoc);
10615
230
  ctxt->myDoc = NULL;
10616
230
    }
10617
10618
24.9k
    if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10619
14.2k
        ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10620
14.2k
  if (ctxt->valid)
10621
14.2k
      ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10622
14.2k
  if (ctxt->nsWellFormed)
10623
12.6k
      ctxt->myDoc->properties |= XML_DOC_NSVALID;
10624
14.2k
  if (ctxt->options & XML_PARSE_OLD10)
10625
0
      ctxt->myDoc->properties |= XML_DOC_OLD10;
10626
14.2k
    }
10627
24.9k
    if (! ctxt->wellFormed) {
10628
10.6k
  ctxt->valid = 0;
10629
10.6k
  return(-1);
10630
10.6k
    }
10631
14.2k
    return(0);
10632
24.9k
}
10633
10634
/**
10635
 * xmlParseExtParsedEnt:
10636
 * @ctxt:  an XML parser context
10637
 *
10638
 * parse a general parsed entity
10639
 * An external general parsed entity is well-formed if it matches the
10640
 * production labeled extParsedEnt.
10641
 *
10642
 * [78] extParsedEnt ::= TextDecl? content
10643
 *
10644
 * Returns 0, -1 in case of error. the parser context is augmented
10645
 *                as a result of the parsing.
10646
 */
10647
10648
int
10649
0
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10650
0
    xmlChar start[4];
10651
0
    xmlCharEncoding enc;
10652
10653
0
    if ((ctxt == NULL) || (ctxt->input == NULL))
10654
0
        return(-1);
10655
10656
0
    xmlDetectSAX2(ctxt);
10657
10658
0
    GROW;
10659
10660
    /*
10661
     * SAX: beginning of the document processing.
10662
     */
10663
0
    if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10664
0
        ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10665
10666
    /*
10667
     * Get the 4 first bytes and decode the charset
10668
     * if enc != XML_CHAR_ENCODING_NONE
10669
     * plug some encoding conversion routines.
10670
     */
10671
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10672
0
  start[0] = RAW;
10673
0
  start[1] = NXT(1);
10674
0
  start[2] = NXT(2);
10675
0
  start[3] = NXT(3);
10676
0
  enc = xmlDetectCharEncoding(start, 4);
10677
0
  if (enc != XML_CHAR_ENCODING_NONE) {
10678
0
      xmlSwitchEncoding(ctxt, enc);
10679
0
  }
10680
0
    }
10681
10682
10683
0
    if (CUR == 0) {
10684
0
  xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
10685
0
    }
10686
10687
    /*
10688
     * Check for the XMLDecl in the Prolog.
10689
     */
10690
0
    GROW;
10691
0
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
10692
10693
  /*
10694
   * Note that we will switch encoding on the fly.
10695
   */
10696
0
  xmlParseXMLDecl(ctxt);
10697
0
  if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10698
      /*
10699
       * The XML REC instructs us to stop parsing right here
10700
       */
10701
0
      return(-1);
10702
0
  }
10703
0
  SKIP_BLANKS;
10704
0
    } else {
10705
0
  ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10706
0
    }
10707
0
    if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10708
0
        ctxt->sax->startDocument(ctxt->userData);
10709
0
    if (ctxt->instate == XML_PARSER_EOF)
10710
0
  return(-1);
10711
10712
    /*
10713
     * Doing validity checking on chunk doesn't make sense
10714
     */
10715
0
    ctxt->instate = XML_PARSER_CONTENT;
10716
0
    ctxt->validate = 0;
10717
0
    ctxt->loadsubset = 0;
10718
0
    ctxt->depth = 0;
10719
10720
0
    xmlParseContent(ctxt);
10721
0
    if (ctxt->instate == XML_PARSER_EOF)
10722
0
  return(-1);
10723
10724
0
    if ((RAW == '<') && (NXT(1) == '/')) {
10725
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
10726
0
    } else if (RAW != 0) {
10727
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
10728
0
    }
10729
10730
    /*
10731
     * SAX: end of the document processing.
10732
     */
10733
0
    if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10734
0
        ctxt->sax->endDocument(ctxt->userData);
10735
10736
0
    if (! ctxt->wellFormed) return(-1);
10737
0
    return(0);
10738
0
}
10739
10740
#ifdef LIBXML_PUSH_ENABLED
10741
/************************************************************************
10742
 *                  *
10743
 *    Progressive parsing interfaces        *
10744
 *                  *
10745
 ************************************************************************/
10746
10747
/**
10748
 * xmlParseLookupChar:
10749
 * @ctxt:  an XML parser context
10750
 * @c:  character
10751
 *
10752
 * Check whether the input buffer contains a character.
10753
 */
10754
static int
10755
0
xmlParseLookupChar(xmlParserCtxtPtr ctxt, int c) {
10756
0
    const xmlChar *cur;
10757
10758
0
    if (ctxt->checkIndex == 0) {
10759
0
        cur = ctxt->input->cur + 1;
10760
0
    } else {
10761
0
        cur = ctxt->input->cur + ctxt->checkIndex;
10762
0
    }
10763
10764
0
    if (memchr(cur, c, ctxt->input->end - cur) == NULL) {
10765
0
        size_t index = ctxt->input->end - ctxt->input->cur;
10766
10767
0
        if (index > LONG_MAX) {
10768
0
            ctxt->checkIndex = 0;
10769
0
            return(1);
10770
0
        }
10771
0
        ctxt->checkIndex = index;
10772
0
        return(0);
10773
0
    } else {
10774
0
        ctxt->checkIndex = 0;
10775
0
        return(1);
10776
0
    }
10777
0
}
10778
10779
/**
10780
 * xmlParseLookupString:
10781
 * @ctxt:  an XML parser context
10782
 * @startDelta: delta to apply at the start
10783
 * @str:  string
10784
 * @strLen:  length of string
10785
 *
10786
 * Check whether the input buffer contains a string.
10787
 */
10788
static const xmlChar *
10789
xmlParseLookupString(xmlParserCtxtPtr ctxt, size_t startDelta,
10790
0
                     const char *str, size_t strLen) {
10791
0
    const xmlChar *cur, *term;
10792
10793
0
    if (ctxt->checkIndex == 0) {
10794
0
        cur = ctxt->input->cur + startDelta;
10795
0
    } else {
10796
0
        cur = ctxt->input->cur + ctxt->checkIndex;
10797
0
    }
10798
10799
0
    term = BAD_CAST strstr((const char *) cur, str);
10800
0
    if (term == NULL) {
10801
0
        const xmlChar *end = ctxt->input->end;
10802
0
        size_t index;
10803
10804
        /* Rescan (strLen - 1) characters. */
10805
0
        if ((size_t) (end - cur) < strLen)
10806
0
            end = cur;
10807
0
        else
10808
0
            end -= strLen - 1;
10809
0
        index = end - ctxt->input->cur;
10810
0
        if (index > LONG_MAX) {
10811
0
            ctxt->checkIndex = 0;
10812
0
            return(ctxt->input->end - strLen);
10813
0
        }
10814
0
        ctxt->checkIndex = index;
10815
0
    } else {
10816
0
        ctxt->checkIndex = 0;
10817
0
    }
10818
10819
0
    return(term);
10820
0
}
10821
10822
/**
10823
 * xmlParseLookupCharData:
10824
 * @ctxt:  an XML parser context
10825
 *
10826
 * Check whether the input buffer contains terminated char data.
10827
 */
10828
static int
10829
0
xmlParseLookupCharData(xmlParserCtxtPtr ctxt) {
10830
0
    const xmlChar *cur = ctxt->input->cur + ctxt->checkIndex;
10831
0
    const xmlChar *end = ctxt->input->end;
10832
0
    size_t index;
10833
10834
0
    while (cur < end) {
10835
0
        if ((*cur == '<') || (*cur == '&')) {
10836
0
            ctxt->checkIndex = 0;
10837
0
            return(1);
10838
0
        }
10839
0
        cur++;
10840
0
    }
10841
10842
0
    index = cur - ctxt->input->cur;
10843
0
    if (index > LONG_MAX) {
10844
0
        ctxt->checkIndex = 0;
10845
0
        return(1);
10846
0
    }
10847
0
    ctxt->checkIndex = index;
10848
0
    return(0);
10849
0
}
10850
10851
/**
10852
 * xmlParseLookupGt:
10853
 * @ctxt:  an XML parser context
10854
 *
10855
 * Check whether there's enough data in the input buffer to finish parsing
10856
 * a start tag. This has to take quotes into account.
10857
 */
10858
static int
10859
0
xmlParseLookupGt(xmlParserCtxtPtr ctxt) {
10860
0
    const xmlChar *cur;
10861
0
    const xmlChar *end = ctxt->input->end;
10862
0
    int state = ctxt->endCheckState;
10863
0
    size_t index;
10864
10865
0
    if (ctxt->checkIndex == 0)
10866
0
        cur = ctxt->input->cur + 1;
10867
0
    else
10868
0
        cur = ctxt->input->cur + ctxt->checkIndex;
10869
10870
0
    while (cur < end) {
10871
0
        if (state) {
10872
0
            if (*cur == state)
10873
0
                state = 0;
10874
0
        } else if (*cur == '\'' || *cur == '"') {
10875
0
            state = *cur;
10876
0
        } else if (*cur == '>') {
10877
0
            ctxt->checkIndex = 0;
10878
0
            ctxt->endCheckState = 0;
10879
0
            return(1);
10880
0
        }
10881
0
        cur++;
10882
0
    }
10883
10884
0
    index = cur - ctxt->input->cur;
10885
0
    if (index > LONG_MAX) {
10886
0
        ctxt->checkIndex = 0;
10887
0
        ctxt->endCheckState = 0;
10888
0
        return(1);
10889
0
    }
10890
0
    ctxt->checkIndex = index;
10891
0
    ctxt->endCheckState = state;
10892
0
    return(0);
10893
0
}
10894
10895
/**
10896
 * xmlParseLookupInternalSubset:
10897
 * @ctxt:  an XML parser context
10898
 *
10899
 * Check whether there's enough data in the input buffer to finish parsing
10900
 * the internal subset.
10901
 */
10902
static int
10903
0
xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt) {
10904
    /*
10905
     * Sorry, but progressive parsing of the internal subset is not
10906
     * supported. We first check that the full content of the internal
10907
     * subset is available and parsing is launched only at that point.
10908
     * Internal subset ends with "']' S? '>'" in an unescaped section and
10909
     * not in a ']]>' sequence which are conditional sections.
10910
     */
10911
0
    const xmlChar *cur, *start;
10912
0
    const xmlChar *end = ctxt->input->end;
10913
0
    int state = ctxt->endCheckState;
10914
0
    size_t index;
10915
10916
0
    if (ctxt->checkIndex == 0) {
10917
0
        cur = ctxt->input->cur + 1;
10918
0
    } else {
10919
0
        cur = ctxt->input->cur + ctxt->checkIndex;
10920
0
    }
10921
0
    start = cur;
10922
10923
0
    while (cur < end) {
10924
0
        if (state == '-') {
10925
0
            if ((*cur == '-') &&
10926
0
                (cur[1] == '-') &&
10927
0
                (cur[2] == '>')) {
10928
0
                state = 0;
10929
0
                cur += 3;
10930
0
                start = cur;
10931
0
                continue;
10932
0
            }
10933
0
        }
10934
0
        else if (state == ']') {
10935
0
            if (*cur == '>') {
10936
0
                ctxt->checkIndex = 0;
10937
0
                ctxt->endCheckState = 0;
10938
0
                return(1);
10939
0
            }
10940
0
            if (IS_BLANK_CH(*cur)) {
10941
0
                state = ' ';
10942
0
            } else if (*cur != ']') {
10943
0
                state = 0;
10944
0
                start = cur;
10945
0
                continue;
10946
0
            }
10947
0
        }
10948
0
        else if (state == ' ') {
10949
0
            if (*cur == '>') {
10950
0
                ctxt->checkIndex = 0;
10951
0
                ctxt->endCheckState = 0;
10952
0
                return(1);
10953
0
            }
10954
0
            if (!IS_BLANK_CH(*cur)) {
10955
0
                state = 0;
10956
0
                start = cur;
10957
0
                continue;
10958
0
            }
10959
0
        }
10960
0
        else if (state != 0) {
10961
0
            if (*cur == state) {
10962
0
                state = 0;
10963
0
                start = cur + 1;
10964
0
            }
10965
0
        }
10966
0
        else if (*cur == '<') {
10967
0
            if ((cur[1] == '!') &&
10968
0
                (cur[2] == '-') &&
10969
0
                (cur[3] == '-')) {
10970
0
                state = '-';
10971
0
                cur += 4;
10972
                /* Don't treat <!--> as comment */
10973
0
                start = cur;
10974
0
                continue;
10975
0
            }
10976
0
        }
10977
0
        else if ((*cur == '"') || (*cur == '\'') || (*cur == ']')) {
10978
0
            state = *cur;
10979
0
        }
10980
10981
0
        cur++;
10982
0
    }
10983
10984
    /*
10985
     * Rescan the three last characters to detect "<!--" and "-->"
10986
     * split across chunks.
10987
     */
10988
0
    if ((state == 0) || (state == '-')) {
10989
0
        if (cur - start < 3)
10990
0
            cur = start;
10991
0
        else
10992
0
            cur -= 3;
10993
0
    }
10994
0
    index = cur - ctxt->input->cur;
10995
0
    if (index > LONG_MAX) {
10996
0
        ctxt->checkIndex = 0;
10997
0
        ctxt->endCheckState = 0;
10998
0
        return(1);
10999
0
    }
11000
0
    ctxt->checkIndex = index;
11001
0
    ctxt->endCheckState = state;
11002
0
    return(0);
11003
0
}
11004
11005
/**
11006
 * xmlCheckCdataPush:
11007
 * @cur: pointer to the block of characters
11008
 * @len: length of the block in bytes
11009
 * @complete: 1 if complete CDATA block is passed in, 0 if partial block
11010
 *
11011
 * Check that the block of characters is okay as SCdata content [20]
11012
 *
11013
 * Returns the number of bytes to pass if okay, a negative index where an
11014
 *         UTF-8 error occurred otherwise
11015
 */
11016
static int
11017
0
xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
11018
0
    int ix;
11019
0
    unsigned char c;
11020
0
    int codepoint;
11021
11022
0
    if ((utf == NULL) || (len <= 0))
11023
0
        return(0);
11024
11025
0
    for (ix = 0; ix < len;) {      /* string is 0-terminated */
11026
0
        c = utf[ix];
11027
0
        if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11028
0
      if (c >= 0x20)
11029
0
    ix++;
11030
0
      else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11031
0
          ix++;
11032
0
      else
11033
0
          return(-ix);
11034
0
  } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11035
0
      if (ix + 2 > len) return(complete ? -ix : ix);
11036
0
      if ((utf[ix+1] & 0xc0 ) != 0x80)
11037
0
          return(-ix);
11038
0
      codepoint = (utf[ix] & 0x1f) << 6;
11039
0
      codepoint |= utf[ix+1] & 0x3f;
11040
0
      if (!xmlIsCharQ(codepoint))
11041
0
          return(-ix);
11042
0
      ix += 2;
11043
0
  } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11044
0
      if (ix + 3 > len) return(complete ? -ix : ix);
11045
0
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11046
0
          ((utf[ix+2] & 0xc0) != 0x80))
11047
0
        return(-ix);
11048
0
      codepoint = (utf[ix] & 0xf) << 12;
11049
0
      codepoint |= (utf[ix+1] & 0x3f) << 6;
11050
0
      codepoint |= utf[ix+2] & 0x3f;
11051
0
      if (!xmlIsCharQ(codepoint))
11052
0
          return(-ix);
11053
0
      ix += 3;
11054
0
  } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11055
0
      if (ix + 4 > len) return(complete ? -ix : ix);
11056
0
      if (((utf[ix+1] & 0xc0) != 0x80) ||
11057
0
          ((utf[ix+2] & 0xc0) != 0x80) ||
11058
0
    ((utf[ix+3] & 0xc0) != 0x80))
11059
0
        return(-ix);
11060
0
      codepoint = (utf[ix] & 0x7) << 18;
11061
0
      codepoint |= (utf[ix+1] & 0x3f) << 12;
11062
0
      codepoint |= (utf[ix+2] & 0x3f) << 6;
11063
0
      codepoint |= utf[ix+3] & 0x3f;
11064
0
      if (!xmlIsCharQ(codepoint))
11065
0
          return(-ix);
11066
0
      ix += 4;
11067
0
  } else       /* unknown encoding */
11068
0
      return(-ix);
11069
0
      }
11070
0
      return(ix);
11071
0
}
11072
11073
/**
11074
 * xmlParseTryOrFinish:
11075
 * @ctxt:  an XML parser context
11076
 * @terminate:  last chunk indicator
11077
 *
11078
 * Try to progress on parsing
11079
 *
11080
 * Returns zero if no parsing was possible
11081
 */
11082
static int
11083
0
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11084
0
    int ret = 0;
11085
0
    int tlen;
11086
0
    size_t avail;
11087
0
    xmlChar cur, next;
11088
11089
0
    if (ctxt->input == NULL)
11090
0
        return(0);
11091
11092
#ifdef DEBUG_PUSH
11093
    switch (ctxt->instate) {
11094
  case XML_PARSER_EOF:
11095
      xmlGenericError(xmlGenericErrorContext,
11096
        "PP: try EOF\n"); break;
11097
  case XML_PARSER_START:
11098
      xmlGenericError(xmlGenericErrorContext,
11099
        "PP: try START\n"); break;
11100
  case XML_PARSER_MISC:
11101
      xmlGenericError(xmlGenericErrorContext,
11102
        "PP: try MISC\n");break;
11103
  case XML_PARSER_COMMENT:
11104
      xmlGenericError(xmlGenericErrorContext,
11105
        "PP: try COMMENT\n");break;
11106
  case XML_PARSER_PROLOG:
11107
      xmlGenericError(xmlGenericErrorContext,
11108
        "PP: try PROLOG\n");break;
11109
  case XML_PARSER_START_TAG:
11110
      xmlGenericError(xmlGenericErrorContext,
11111
        "PP: try START_TAG\n");break;
11112
  case XML_PARSER_CONTENT:
11113
      xmlGenericError(xmlGenericErrorContext,
11114
        "PP: try CONTENT\n");break;
11115
  case XML_PARSER_CDATA_SECTION:
11116
      xmlGenericError(xmlGenericErrorContext,
11117
        "PP: try CDATA_SECTION\n");break;
11118
  case XML_PARSER_END_TAG:
11119
      xmlGenericError(xmlGenericErrorContext,
11120
        "PP: try END_TAG\n");break;
11121
  case XML_PARSER_ENTITY_DECL:
11122
      xmlGenericError(xmlGenericErrorContext,
11123
        "PP: try ENTITY_DECL\n");break;
11124
  case XML_PARSER_ENTITY_VALUE:
11125
      xmlGenericError(xmlGenericErrorContext,
11126
        "PP: try ENTITY_VALUE\n");break;
11127
  case XML_PARSER_ATTRIBUTE_VALUE:
11128
      xmlGenericError(xmlGenericErrorContext,
11129
        "PP: try ATTRIBUTE_VALUE\n");break;
11130
  case XML_PARSER_DTD:
11131
      xmlGenericError(xmlGenericErrorContext,
11132
        "PP: try DTD\n");break;
11133
  case XML_PARSER_EPILOG:
11134
      xmlGenericError(xmlGenericErrorContext,
11135
        "PP: try EPILOG\n");break;
11136
  case XML_PARSER_PI:
11137
      xmlGenericError(xmlGenericErrorContext,
11138
        "PP: try PI\n");break;
11139
        case XML_PARSER_IGNORE:
11140
            xmlGenericError(xmlGenericErrorContext,
11141
        "PP: try IGNORE\n");break;
11142
    }
11143
#endif
11144
11145
0
    if ((ctxt->input != NULL) &&
11146
0
        (ctxt->input->cur - ctxt->input->base > 4096)) {
11147
0
        xmlParserShrink(ctxt);
11148
0
    }
11149
11150
0
    while (ctxt->instate != XML_PARSER_EOF) {
11151
0
  if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11152
0
      return(0);
11153
11154
0
  if (ctxt->input == NULL) break;
11155
0
  if (ctxt->input->buf != NULL) {
11156
      /*
11157
       * If we are operating on converted input, try to flush
11158
       * remaining chars to avoid them stalling in the non-converted
11159
       * buffer.
11160
       */
11161
0
      if ((ctxt->input->buf->raw != NULL) &&
11162
0
    (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
11163
0
                size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11164
0
                                                 ctxt->input);
11165
0
    size_t current = ctxt->input->cur - ctxt->input->base;
11166
0
                int res;
11167
11168
0
    res = xmlParserInputBufferPush(ctxt->input->buf, 0, "");
11169
0
                xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11170
0
                                      base, current);
11171
0
                if (res < 0) {
11172
0
                    xmlFatalErr(ctxt, ctxt->input->buf->error, NULL);
11173
0
                    xmlHaltParser(ctxt);
11174
0
                    return(0);
11175
0
                }
11176
0
      }
11177
0
  }
11178
0
        avail = ctxt->input->end - ctxt->input->cur;
11179
0
        if (avail < 1)
11180
0
      goto done;
11181
0
        switch (ctxt->instate) {
11182
0
            case XML_PARSER_EOF:
11183
          /*
11184
     * Document parsing is done !
11185
     */
11186
0
          goto done;
11187
0
            case XML_PARSER_START:
11188
0
    if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11189
0
        xmlChar start[4];
11190
0
        xmlCharEncoding enc;
11191
11192
        /*
11193
         * Very first chars read from the document flow.
11194
         */
11195
0
        if (avail < 4)
11196
0
      goto done;
11197
11198
        /*
11199
         * Get the 4 first bytes and decode the charset
11200
         * if enc != XML_CHAR_ENCODING_NONE
11201
         * plug some encoding conversion routines,
11202
         * else xmlSwitchEncoding will set to (default)
11203
         * UTF8.
11204
         */
11205
0
        start[0] = RAW;
11206
0
        start[1] = NXT(1);
11207
0
        start[2] = NXT(2);
11208
0
        start[3] = NXT(3);
11209
0
        enc = xmlDetectCharEncoding(start, 4);
11210
                    /*
11211
                     * We need more bytes to detect EBCDIC code pages.
11212
                     * See xmlDetectEBCDIC.
11213
                     */
11214
0
                    if ((enc == XML_CHAR_ENCODING_EBCDIC) &&
11215
0
                        (!terminate) && (avail < 200))
11216
0
                        goto done;
11217
0
        xmlSwitchEncoding(ctxt, enc);
11218
0
        break;
11219
0
    }
11220
11221
0
    if (avail < 2)
11222
0
        goto done;
11223
0
    cur = ctxt->input->cur[0];
11224
0
    next = ctxt->input->cur[1];
11225
0
    if (cur == 0) {
11226
0
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11227
0
      ctxt->sax->setDocumentLocator(ctxt->userData,
11228
0
                  &xmlDefaultSAXLocator);
11229
0
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11230
0
        xmlHaltParser(ctxt);
11231
#ifdef DEBUG_PUSH
11232
        xmlGenericError(xmlGenericErrorContext,
11233
          "PP: entering EOF\n");
11234
#endif
11235
0
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11236
0
      ctxt->sax->endDocument(ctxt->userData);
11237
0
        goto done;
11238
0
    }
11239
0
          if ((cur == '<') && (next == '?')) {
11240
        /* PI or XML decl */
11241
0
        if (avail < 5) goto done;
11242
0
        if ((!terminate) &&
11243
0
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11244
0
      goto done;
11245
0
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11246
0
      ctxt->sax->setDocumentLocator(ctxt->userData,
11247
0
                  &xmlDefaultSAXLocator);
11248
0
        if ((ctxt->input->cur[2] == 'x') &&
11249
0
      (ctxt->input->cur[3] == 'm') &&
11250
0
      (ctxt->input->cur[4] == 'l') &&
11251
0
      (IS_BLANK_CH(ctxt->input->cur[5]))) {
11252
0
      ret += 5;
11253
#ifdef DEBUG_PUSH
11254
      xmlGenericError(xmlGenericErrorContext,
11255
        "PP: Parsing XML Decl\n");
11256
#endif
11257
0
      xmlParseXMLDecl(ctxt);
11258
0
      if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11259
          /*
11260
           * The XML REC instructs us to stop parsing right
11261
           * here
11262
           */
11263
0
          xmlHaltParser(ctxt);
11264
0
          return(0);
11265
0
      }
11266
0
      ctxt->standalone = ctxt->input->standalone;
11267
0
      if ((ctxt->encoding == NULL) &&
11268
0
          (ctxt->input->encoding != NULL))
11269
0
          ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11270
0
      if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11271
0
          (!ctxt->disableSAX))
11272
0
          ctxt->sax->startDocument(ctxt->userData);
11273
0
      ctxt->instate = XML_PARSER_MISC;
11274
#ifdef DEBUG_PUSH
11275
      xmlGenericError(xmlGenericErrorContext,
11276
        "PP: entering MISC\n");
11277
#endif
11278
0
        } else {
11279
0
      ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11280
0
      if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11281
0
          (!ctxt->disableSAX))
11282
0
          ctxt->sax->startDocument(ctxt->userData);
11283
0
      ctxt->instate = XML_PARSER_MISC;
11284
#ifdef DEBUG_PUSH
11285
      xmlGenericError(xmlGenericErrorContext,
11286
        "PP: entering MISC\n");
11287
#endif
11288
0
        }
11289
0
    } else {
11290
0
        if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11291
0
      ctxt->sax->setDocumentLocator(ctxt->userData,
11292
0
                  &xmlDefaultSAXLocator);
11293
0
        ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11294
0
        if (ctxt->version == NULL) {
11295
0
            xmlErrMemory(ctxt, NULL);
11296
0
      break;
11297
0
        }
11298
0
        if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11299
0
            (!ctxt->disableSAX))
11300
0
      ctxt->sax->startDocument(ctxt->userData);
11301
0
        ctxt->instate = XML_PARSER_MISC;
11302
#ifdef DEBUG_PUSH
11303
        xmlGenericError(xmlGenericErrorContext,
11304
          "PP: entering MISC\n");
11305
#endif
11306
0
    }
11307
0
    break;
11308
0
            case XML_PARSER_START_TAG: {
11309
0
          const xmlChar *name;
11310
0
    const xmlChar *prefix = NULL;
11311
0
    const xmlChar *URI = NULL;
11312
0
                int line = ctxt->input->line;
11313
0
    int nsNr = ctxt->nsNr;
11314
11315
0
    if ((avail < 2) && (ctxt->inputNr == 1))
11316
0
        goto done;
11317
0
    cur = ctxt->input->cur[0];
11318
0
          if (cur != '<') {
11319
0
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11320
0
        xmlHaltParser(ctxt);
11321
0
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11322
0
      ctxt->sax->endDocument(ctxt->userData);
11323
0
        goto done;
11324
0
    }
11325
0
    if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11326
0
                    goto done;
11327
0
    if (ctxt->spaceNr == 0)
11328
0
        spacePush(ctxt, -1);
11329
0
    else if (*ctxt->space == -2)
11330
0
        spacePush(ctxt, -1);
11331
0
    else
11332
0
        spacePush(ctxt, *ctxt->space);
11333
0
#ifdef LIBXML_SAX1_ENABLED
11334
0
    if (ctxt->sax2)
11335
0
#endif /* LIBXML_SAX1_ENABLED */
11336
0
        name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
11337
0
#ifdef LIBXML_SAX1_ENABLED
11338
0
    else
11339
0
        name = xmlParseStartTag(ctxt);
11340
0
#endif /* LIBXML_SAX1_ENABLED */
11341
0
    if (ctxt->instate == XML_PARSER_EOF)
11342
0
        goto done;
11343
0
    if (name == NULL) {
11344
0
        spacePop(ctxt);
11345
0
        xmlHaltParser(ctxt);
11346
0
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11347
0
      ctxt->sax->endDocument(ctxt->userData);
11348
0
        goto done;
11349
0
    }
11350
0
#ifdef LIBXML_VALID_ENABLED
11351
    /*
11352
     * [ VC: Root Element Type ]
11353
     * The Name in the document type declaration must match
11354
     * the element type of the root element.
11355
     */
11356
0
    if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11357
0
        ctxt->node && (ctxt->node == ctxt->myDoc->children))
11358
0
        ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11359
0
#endif /* LIBXML_VALID_ENABLED */
11360
11361
    /*
11362
     * Check for an Empty Element.
11363
     */
11364
0
    if ((RAW == '/') && (NXT(1) == '>')) {
11365
0
        SKIP(2);
11366
11367
0
        if (ctxt->sax2) {
11368
0
      if ((ctxt->sax != NULL) &&
11369
0
          (ctxt->sax->endElementNs != NULL) &&
11370
0
          (!ctxt->disableSAX))
11371
0
          ctxt->sax->endElementNs(ctxt->userData, name,
11372
0
                                  prefix, URI);
11373
0
      if (ctxt->nsNr - nsNr > 0)
11374
0
          nsPop(ctxt, ctxt->nsNr - nsNr);
11375
0
#ifdef LIBXML_SAX1_ENABLED
11376
0
        } else {
11377
0
      if ((ctxt->sax != NULL) &&
11378
0
          (ctxt->sax->endElement != NULL) &&
11379
0
          (!ctxt->disableSAX))
11380
0
          ctxt->sax->endElement(ctxt->userData, name);
11381
0
#endif /* LIBXML_SAX1_ENABLED */
11382
0
        }
11383
0
        if (ctxt->instate == XML_PARSER_EOF)
11384
0
      goto done;
11385
0
        spacePop(ctxt);
11386
0
        if (ctxt->nameNr == 0) {
11387
0
      ctxt->instate = XML_PARSER_EPILOG;
11388
0
        } else {
11389
0
      ctxt->instate = XML_PARSER_CONTENT;
11390
0
        }
11391
0
        break;
11392
0
    }
11393
0
    if (RAW == '>') {
11394
0
        NEXT;
11395
0
    } else {
11396
0
        xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11397
0
           "Couldn't find end of Start Tag %s\n",
11398
0
           name);
11399
0
        nodePop(ctxt);
11400
0
        spacePop(ctxt);
11401
0
    }
11402
0
                nameNsPush(ctxt, name, prefix, URI, line, ctxt->nsNr - nsNr);
11403
11404
0
    ctxt->instate = XML_PARSER_CONTENT;
11405
0
                break;
11406
0
      }
11407
0
            case XML_PARSER_CONTENT: {
11408
0
    if ((avail < 2) && (ctxt->inputNr == 1))
11409
0
        goto done;
11410
0
    cur = ctxt->input->cur[0];
11411
0
    next = ctxt->input->cur[1];
11412
11413
0
    if ((cur == '<') && (next == '/')) {
11414
0
        ctxt->instate = XML_PARSER_END_TAG;
11415
0
        break;
11416
0
          } else if ((cur == '<') && (next == '?')) {
11417
0
        if ((!terminate) &&
11418
0
            (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11419
0
      goto done;
11420
0
        xmlParsePI(ctxt);
11421
0
        ctxt->instate = XML_PARSER_CONTENT;
11422
0
    } else if ((cur == '<') && (next != '!')) {
11423
0
        ctxt->instate = XML_PARSER_START_TAG;
11424
0
        break;
11425
0
    } else if ((cur == '<') && (next == '!') &&
11426
0
               (ctxt->input->cur[2] == '-') &&
11427
0
         (ctxt->input->cur[3] == '-')) {
11428
0
        if ((!terminate) &&
11429
0
            (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11430
0
      goto done;
11431
0
        xmlParseComment(ctxt);
11432
0
        ctxt->instate = XML_PARSER_CONTENT;
11433
0
    } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11434
0
        (ctxt->input->cur[2] == '[') &&
11435
0
        (ctxt->input->cur[3] == 'C') &&
11436
0
        (ctxt->input->cur[4] == 'D') &&
11437
0
        (ctxt->input->cur[5] == 'A') &&
11438
0
        (ctxt->input->cur[6] == 'T') &&
11439
0
        (ctxt->input->cur[7] == 'A') &&
11440
0
        (ctxt->input->cur[8] == '[')) {
11441
0
        SKIP(9);
11442
0
        ctxt->instate = XML_PARSER_CDATA_SECTION;
11443
0
        break;
11444
0
    } else if ((cur == '<') && (next == '!') &&
11445
0
               (avail < 9)) {
11446
0
        goto done;
11447
0
    } else if (cur == '<') {
11448
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11449
0
                    "detected an error in element content\n");
11450
0
                    SKIP(1);
11451
0
    } else if (cur == '&') {
11452
0
        if ((!terminate) && (!xmlParseLookupChar(ctxt, ';')))
11453
0
      goto done;
11454
0
        xmlParseReference(ctxt);
11455
0
    } else {
11456
        /* TODO Avoid the extra copy, handle directly !!! */
11457
        /*
11458
         * Goal of the following test is:
11459
         *  - minimize calls to the SAX 'character' callback
11460
         *    when they are mergeable
11461
         *  - handle an problem for isBlank when we only parse
11462
         *    a sequence of blank chars and the next one is
11463
         *    not available to check against '<' presence.
11464
         *  - tries to homogenize the differences in SAX
11465
         *    callbacks between the push and pull versions
11466
         *    of the parser.
11467
         */
11468
0
        if ((ctxt->inputNr == 1) &&
11469
0
            (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11470
0
      if ((!terminate) && (!xmlParseLookupCharData(ctxt)))
11471
0
          goto done;
11472
0
                    }
11473
0
                    ctxt->checkIndex = 0;
11474
0
        xmlParseCharDataInternal(ctxt, !terminate);
11475
0
    }
11476
0
    break;
11477
0
      }
11478
0
            case XML_PARSER_END_TAG:
11479
0
    if (avail < 2)
11480
0
        goto done;
11481
0
    if ((!terminate) && (!xmlParseLookupChar(ctxt, '>')))
11482
0
        goto done;
11483
0
    if (ctxt->sax2) {
11484
0
              xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11485
0
        nameNsPop(ctxt);
11486
0
    }
11487
0
#ifdef LIBXML_SAX1_ENABLED
11488
0
      else
11489
0
        xmlParseEndTag1(ctxt, 0);
11490
0
#endif /* LIBXML_SAX1_ENABLED */
11491
0
    if (ctxt->instate == XML_PARSER_EOF) {
11492
        /* Nothing */
11493
0
    } else if (ctxt->nameNr == 0) {
11494
0
        ctxt->instate = XML_PARSER_EPILOG;
11495
0
    } else {
11496
0
        ctxt->instate = XML_PARSER_CONTENT;
11497
0
    }
11498
0
    break;
11499
0
            case XML_PARSER_CDATA_SECTION: {
11500
          /*
11501
     * The Push mode need to have the SAX callback for
11502
     * cdataBlock merge back contiguous callbacks.
11503
     */
11504
0
    const xmlChar *term;
11505
11506
0
                if (terminate) {
11507
                    /*
11508
                     * Don't call xmlParseLookupString. If 'terminate'
11509
                     * is set, checkIndex is invalid.
11510
                     */
11511
0
                    term = BAD_CAST strstr((const char *) ctxt->input->cur,
11512
0
                                           "]]>");
11513
0
                } else {
11514
0
        term = xmlParseLookupString(ctxt, 0, "]]>", 3);
11515
0
                }
11516
11517
0
    if (term == NULL) {
11518
0
        int tmp, size;
11519
11520
0
                    if (terminate) {
11521
                        /* Unfinished CDATA section */
11522
0
                        size = ctxt->input->end - ctxt->input->cur;
11523
0
                    } else {
11524
0
                        if (avail < XML_PARSER_BIG_BUFFER_SIZE + 2)
11525
0
                            goto done;
11526
0
                        ctxt->checkIndex = 0;
11527
                        /* XXX: Why don't we pass the full buffer? */
11528
0
                        size = XML_PARSER_BIG_BUFFER_SIZE;
11529
0
                    }
11530
0
                    tmp = xmlCheckCdataPush(ctxt->input->cur, size, 0);
11531
0
                    if (tmp <= 0) {
11532
0
                        tmp = -tmp;
11533
0
                        ctxt->input->cur += tmp;
11534
0
                        goto encoding_error;
11535
0
                    }
11536
0
                    if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11537
0
                        if (ctxt->sax->cdataBlock != NULL)
11538
0
                            ctxt->sax->cdataBlock(ctxt->userData,
11539
0
                                                  ctxt->input->cur, tmp);
11540
0
                        else if (ctxt->sax->characters != NULL)
11541
0
                            ctxt->sax->characters(ctxt->userData,
11542
0
                                                  ctxt->input->cur, tmp);
11543
0
                    }
11544
0
                    if (ctxt->instate == XML_PARSER_EOF)
11545
0
                        goto done;
11546
0
                    SKIPL(tmp);
11547
0
    } else {
11548
0
                    int base = term - CUR_PTR;
11549
0
        int tmp;
11550
11551
0
        tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
11552
0
        if ((tmp < 0) || (tmp != base)) {
11553
0
      tmp = -tmp;
11554
0
      ctxt->input->cur += tmp;
11555
0
      goto encoding_error;
11556
0
        }
11557
0
        if ((ctxt->sax != NULL) && (base == 0) &&
11558
0
            (ctxt->sax->cdataBlock != NULL) &&
11559
0
            (!ctxt->disableSAX)) {
11560
      /*
11561
       * Special case to provide identical behaviour
11562
       * between pull and push parsers on enpty CDATA
11563
       * sections
11564
       */
11565
0
       if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11566
0
           (!strncmp((const char *)&ctxt->input->cur[-9],
11567
0
                     "<![CDATA[", 9)))
11568
0
           ctxt->sax->cdataBlock(ctxt->userData,
11569
0
                                 BAD_CAST "", 0);
11570
0
        } else if ((ctxt->sax != NULL) && (base > 0) &&
11571
0
      (!ctxt->disableSAX)) {
11572
0
      if (ctxt->sax->cdataBlock != NULL)
11573
0
          ctxt->sax->cdataBlock(ctxt->userData,
11574
0
              ctxt->input->cur, base);
11575
0
      else if (ctxt->sax->characters != NULL)
11576
0
          ctxt->sax->characters(ctxt->userData,
11577
0
              ctxt->input->cur, base);
11578
0
        }
11579
0
        if (ctxt->instate == XML_PARSER_EOF)
11580
0
      goto done;
11581
0
        SKIPL(base + 3);
11582
0
        ctxt->instate = XML_PARSER_CONTENT;
11583
#ifdef DEBUG_PUSH
11584
        xmlGenericError(xmlGenericErrorContext,
11585
          "PP: entering CONTENT\n");
11586
#endif
11587
0
    }
11588
0
    break;
11589
0
      }
11590
0
            case XML_PARSER_MISC:
11591
0
            case XML_PARSER_PROLOG:
11592
0
            case XML_PARSER_EPILOG:
11593
0
    SKIP_BLANKS;
11594
0
                avail = ctxt->input->end - ctxt->input->cur;
11595
0
    if (avail < 2)
11596
0
        goto done;
11597
0
    cur = ctxt->input->cur[0];
11598
0
    next = ctxt->input->cur[1];
11599
0
          if ((cur == '<') && (next == '?')) {
11600
0
        if ((!terminate) &&
11601
0
                        (!xmlParseLookupString(ctxt, 2, "?>", 2)))
11602
0
      goto done;
11603
#ifdef DEBUG_PUSH
11604
        xmlGenericError(xmlGenericErrorContext,
11605
          "PP: Parsing PI\n");
11606
#endif
11607
0
        xmlParsePI(ctxt);
11608
0
        if (ctxt->instate == XML_PARSER_EOF)
11609
0
      goto done;
11610
0
    } else if ((cur == '<') && (next == '!') &&
11611
0
        (ctxt->input->cur[2] == '-') &&
11612
0
        (ctxt->input->cur[3] == '-')) {
11613
0
        if ((!terminate) &&
11614
0
                        (!xmlParseLookupString(ctxt, 4, "-->", 3)))
11615
0
      goto done;
11616
#ifdef DEBUG_PUSH
11617
        xmlGenericError(xmlGenericErrorContext,
11618
          "PP: Parsing Comment\n");
11619
#endif
11620
0
        xmlParseComment(ctxt);
11621
0
        if (ctxt->instate == XML_PARSER_EOF)
11622
0
      goto done;
11623
0
    } else if ((ctxt->instate == XML_PARSER_MISC) &&
11624
0
                    (cur == '<') && (next == '!') &&
11625
0
        (ctxt->input->cur[2] == 'D') &&
11626
0
        (ctxt->input->cur[3] == 'O') &&
11627
0
        (ctxt->input->cur[4] == 'C') &&
11628
0
        (ctxt->input->cur[5] == 'T') &&
11629
0
        (ctxt->input->cur[6] == 'Y') &&
11630
0
        (ctxt->input->cur[7] == 'P') &&
11631
0
        (ctxt->input->cur[8] == 'E')) {
11632
0
        if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11633
0
                        goto done;
11634
#ifdef DEBUG_PUSH
11635
        xmlGenericError(xmlGenericErrorContext,
11636
          "PP: Parsing internal subset\n");
11637
#endif
11638
0
        ctxt->inSubset = 1;
11639
0
        xmlParseDocTypeDecl(ctxt);
11640
0
        if (ctxt->instate == XML_PARSER_EOF)
11641
0
      goto done;
11642
0
        if (RAW == '[') {
11643
0
      ctxt->instate = XML_PARSER_DTD;
11644
#ifdef DEBUG_PUSH
11645
      xmlGenericError(xmlGenericErrorContext,
11646
        "PP: entering DTD\n");
11647
#endif
11648
0
        } else {
11649
      /*
11650
       * Create and update the external subset.
11651
       */
11652
0
      ctxt->inSubset = 2;
11653
0
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11654
0
          (ctxt->sax->externalSubset != NULL))
11655
0
          ctxt->sax->externalSubset(ctxt->userData,
11656
0
            ctxt->intSubName, ctxt->extSubSystem,
11657
0
            ctxt->extSubURI);
11658
0
      ctxt->inSubset = 0;
11659
0
      xmlCleanSpecialAttr(ctxt);
11660
0
      ctxt->instate = XML_PARSER_PROLOG;
11661
#ifdef DEBUG_PUSH
11662
      xmlGenericError(xmlGenericErrorContext,
11663
        "PP: entering PROLOG\n");
11664
#endif
11665
0
        }
11666
0
    } else if ((cur == '<') && (next == '!') &&
11667
0
               (avail <
11668
0
                            (ctxt->instate == XML_PARSER_MISC ? 9 : 4))) {
11669
0
        goto done;
11670
0
    } else if (ctxt->instate == XML_PARSER_EPILOG) {
11671
0
        xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11672
0
        xmlHaltParser(ctxt);
11673
#ifdef DEBUG_PUSH
11674
        xmlGenericError(xmlGenericErrorContext,
11675
          "PP: entering EOF\n");
11676
#endif
11677
0
        if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11678
0
      ctxt->sax->endDocument(ctxt->userData);
11679
0
        goto done;
11680
0
                } else {
11681
0
        ctxt->instate = XML_PARSER_START_TAG;
11682
#ifdef DEBUG_PUSH
11683
        xmlGenericError(xmlGenericErrorContext,
11684
          "PP: entering START_TAG\n");
11685
#endif
11686
0
    }
11687
0
    break;
11688
0
            case XML_PARSER_DTD: {
11689
0
                if ((!terminate) && (!xmlParseLookupInternalSubset(ctxt)))
11690
0
                    goto done;
11691
0
    xmlParseInternalSubset(ctxt);
11692
0
    if (ctxt->instate == XML_PARSER_EOF)
11693
0
        goto done;
11694
0
    ctxt->inSubset = 2;
11695
0
    if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11696
0
        (ctxt->sax->externalSubset != NULL))
11697
0
        ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11698
0
          ctxt->extSubSystem, ctxt->extSubURI);
11699
0
    ctxt->inSubset = 0;
11700
0
    xmlCleanSpecialAttr(ctxt);
11701
0
    if (ctxt->instate == XML_PARSER_EOF)
11702
0
        goto done;
11703
0
    ctxt->instate = XML_PARSER_PROLOG;
11704
#ifdef DEBUG_PUSH
11705
    xmlGenericError(xmlGenericErrorContext,
11706
      "PP: entering PROLOG\n");
11707
#endif
11708
0
                break;
11709
0
      }
11710
0
            case XML_PARSER_COMMENT:
11711
0
    xmlGenericError(xmlGenericErrorContext,
11712
0
      "PP: internal error, state == COMMENT\n");
11713
0
    ctxt->instate = XML_PARSER_CONTENT;
11714
#ifdef DEBUG_PUSH
11715
    xmlGenericError(xmlGenericErrorContext,
11716
      "PP: entering CONTENT\n");
11717
#endif
11718
0
    break;
11719
0
            case XML_PARSER_IGNORE:
11720
0
    xmlGenericError(xmlGenericErrorContext,
11721
0
      "PP: internal error, state == IGNORE");
11722
0
          ctxt->instate = XML_PARSER_DTD;
11723
#ifdef DEBUG_PUSH
11724
    xmlGenericError(xmlGenericErrorContext,
11725
      "PP: entering DTD\n");
11726
#endif
11727
0
          break;
11728
0
            case XML_PARSER_PI:
11729
0
    xmlGenericError(xmlGenericErrorContext,
11730
0
      "PP: internal error, state == PI\n");
11731
0
    ctxt->instate = XML_PARSER_CONTENT;
11732
#ifdef DEBUG_PUSH
11733
    xmlGenericError(xmlGenericErrorContext,
11734
      "PP: entering CONTENT\n");
11735
#endif
11736
0
    break;
11737
0
            case XML_PARSER_ENTITY_DECL:
11738
0
    xmlGenericError(xmlGenericErrorContext,
11739
0
      "PP: internal error, state == ENTITY_DECL\n");
11740
0
    ctxt->instate = XML_PARSER_DTD;
11741
#ifdef DEBUG_PUSH
11742
    xmlGenericError(xmlGenericErrorContext,
11743
      "PP: entering DTD\n");
11744
#endif
11745
0
    break;
11746
0
            case XML_PARSER_ENTITY_VALUE:
11747
0
    xmlGenericError(xmlGenericErrorContext,
11748
0
      "PP: internal error, state == ENTITY_VALUE\n");
11749
0
    ctxt->instate = XML_PARSER_CONTENT;
11750
#ifdef DEBUG_PUSH
11751
    xmlGenericError(xmlGenericErrorContext,
11752
      "PP: entering DTD\n");
11753
#endif
11754
0
    break;
11755
0
            case XML_PARSER_ATTRIBUTE_VALUE:
11756
0
    xmlGenericError(xmlGenericErrorContext,
11757
0
      "PP: internal error, state == ATTRIBUTE_VALUE\n");
11758
0
    ctxt->instate = XML_PARSER_START_TAG;
11759
#ifdef DEBUG_PUSH
11760
    xmlGenericError(xmlGenericErrorContext,
11761
      "PP: entering START_TAG\n");
11762
#endif
11763
0
    break;
11764
0
            case XML_PARSER_SYSTEM_LITERAL:
11765
0
    xmlGenericError(xmlGenericErrorContext,
11766
0
      "PP: internal error, state == SYSTEM_LITERAL\n");
11767
0
    ctxt->instate = XML_PARSER_START_TAG;
11768
#ifdef DEBUG_PUSH
11769
    xmlGenericError(xmlGenericErrorContext,
11770
      "PP: entering START_TAG\n");
11771
#endif
11772
0
    break;
11773
0
            case XML_PARSER_PUBLIC_LITERAL:
11774
0
    xmlGenericError(xmlGenericErrorContext,
11775
0
      "PP: internal error, state == PUBLIC_LITERAL\n");
11776
0
    ctxt->instate = XML_PARSER_START_TAG;
11777
#ifdef DEBUG_PUSH
11778
    xmlGenericError(xmlGenericErrorContext,
11779
      "PP: entering START_TAG\n");
11780
#endif
11781
0
    break;
11782
0
  }
11783
0
    }
11784
0
done:
11785
#ifdef DEBUG_PUSH
11786
    xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
11787
#endif
11788
0
    return(ret);
11789
0
encoding_error:
11790
0
    if (ctxt->input->end - ctxt->input->cur < 4) {
11791
0
  __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
11792
0
         "Input is not proper UTF-8, indicate encoding !\n",
11793
0
         NULL, NULL);
11794
0
    } else {
11795
0
        char buffer[150];
11796
11797
0
  snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
11798
0
      ctxt->input->cur[0], ctxt->input->cur[1],
11799
0
      ctxt->input->cur[2], ctxt->input->cur[3]);
11800
0
  __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
11801
0
         "Input is not proper UTF-8, indicate encoding !\n%s",
11802
0
         BAD_CAST buffer, NULL);
11803
0
    }
11804
0
    return(0);
11805
0
}
11806
11807
/**
11808
 * xmlParseChunk:
11809
 * @ctxt:  an XML parser context
11810
 * @chunk:  an char array
11811
 * @size:  the size in byte of the chunk
11812
 * @terminate:  last chunk indicator
11813
 *
11814
 * Parse a Chunk of memory
11815
 *
11816
 * Returns zero if no error, the xmlParserErrors otherwise.
11817
 */
11818
int
11819
xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
11820
0
              int terminate) {
11821
0
    int end_in_lf = 0;
11822
11823
0
    if (ctxt == NULL)
11824
0
        return(XML_ERR_INTERNAL_ERROR);
11825
0
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11826
0
        return(ctxt->errNo);
11827
0
    if (ctxt->instate == XML_PARSER_EOF)
11828
0
        return(-1);
11829
0
    if (ctxt->input == NULL)
11830
0
        return(-1);
11831
11832
0
    ctxt->progressive = 1;
11833
0
    if (ctxt->instate == XML_PARSER_START)
11834
0
        xmlDetectSAX2(ctxt);
11835
0
    if ((size > 0) && (chunk != NULL) && (!terminate) &&
11836
0
        (chunk[size - 1] == '\r')) {
11837
0
  end_in_lf = 1;
11838
0
  size--;
11839
0
    }
11840
11841
0
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
11842
0
        (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF))  {
11843
0
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
11844
0
  size_t cur = ctxt->input->cur - ctxt->input->base;
11845
0
  int res;
11846
11847
0
  res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11848
0
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
11849
0
  if (res < 0) {
11850
0
            xmlFatalErr(ctxt, ctxt->input->buf->error, NULL);
11851
0
      xmlHaltParser(ctxt);
11852
0
      return(ctxt->errNo);
11853
0
  }
11854
#ifdef DEBUG_PUSH
11855
  xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11856
#endif
11857
11858
0
    } else if (ctxt->instate != XML_PARSER_EOF) {
11859
0
  if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
11860
0
      xmlParserInputBufferPtr in = ctxt->input->buf;
11861
0
      if ((in->encoder != NULL) && (in->buffer != NULL) &&
11862
0
        (in->raw != NULL)) {
11863
0
    int nbchars;
11864
0
    size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
11865
0
    size_t current = ctxt->input->cur - ctxt->input->base;
11866
11867
0
    nbchars = xmlCharEncInput(in, terminate);
11868
0
    xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
11869
0
    if (nbchars < 0) {
11870
0
              xmlFatalErr(ctxt, in->error, NULL);
11871
0
                    xmlHaltParser(ctxt);
11872
0
        return(ctxt->errNo);
11873
0
    }
11874
0
      }
11875
0
  }
11876
0
    }
11877
11878
0
    xmlParseTryOrFinish(ctxt, terminate);
11879
0
    if (ctxt->instate == XML_PARSER_EOF)
11880
0
        return(ctxt->errNo);
11881
11882
0
    if ((ctxt->input != NULL) &&
11883
0
         (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
11884
0
         ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
11885
0
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
11886
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
11887
0
        xmlHaltParser(ctxt);
11888
0
    }
11889
0
    if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11890
0
        return(ctxt->errNo);
11891
11892
0
    if ((end_in_lf == 1) && (ctxt->input != NULL) &&
11893
0
        (ctxt->input->buf != NULL)) {
11894
0
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11895
0
           ctxt->input);
11896
0
  size_t current = ctxt->input->cur - ctxt->input->base;
11897
0
        int res;
11898
11899
0
  res = xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
11900
0
  xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11901
0
            base, current);
11902
0
        if (res < 0) {
11903
0
            xmlFatalErr(ctxt, ctxt->input->buf->error, NULL);
11904
0
            xmlHaltParser(ctxt);
11905
0
            return(ctxt->errNo);
11906
0
        }
11907
0
    }
11908
0
    if (terminate) {
11909
  /*
11910
   * Check for termination
11911
   */
11912
0
  if ((ctxt->instate != XML_PARSER_EOF) &&
11913
0
      (ctxt->instate != XML_PARSER_EPILOG)) {
11914
0
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11915
0
  }
11916
0
  if ((ctxt->instate == XML_PARSER_EPILOG) &&
11917
0
            (ctxt->input->cur < ctxt->input->end)) {
11918
0
      xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11919
0
  }
11920
0
  if (ctxt->instate != XML_PARSER_EOF) {
11921
0
      if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11922
0
    ctxt->sax->endDocument(ctxt->userData);
11923
0
  }
11924
0
  ctxt->instate = XML_PARSER_EOF;
11925
0
    }
11926
0
    if (ctxt->wellFormed == 0)
11927
0
  return((xmlParserErrors) ctxt->errNo);
11928
0
    else
11929
0
        return(0);
11930
0
}
11931
11932
/************************************************************************
11933
 *                  *
11934
 *    I/O front end functions to the parser     *
11935
 *                  *
11936
 ************************************************************************/
11937
11938
/**
11939
 * xmlCreatePushParserCtxt:
11940
 * @sax:  a SAX handler
11941
 * @user_data:  The user data returned on SAX callbacks
11942
 * @chunk:  a pointer to an array of chars
11943
 * @size:  number of chars in the array
11944
 * @filename:  an optional file name or URI
11945
 *
11946
 * Create a parser context for using the XML parser in push mode.
11947
 * If @buffer and @size are non-NULL, the data is used to detect
11948
 * the encoding.  The remaining characters will be parsed so they
11949
 * don't need to be fed in again through xmlParseChunk.
11950
 * To allow content encoding detection, @size should be >= 4
11951
 * The value of @filename is used for fetching external entities
11952
 * and error/warning reports.
11953
 *
11954
 * Returns the new parser context or NULL
11955
 */
11956
11957
xmlParserCtxtPtr
11958
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11959
0
                        const char *chunk, int size, const char *filename) {
11960
0
    xmlParserCtxtPtr ctxt;
11961
0
    xmlParserInputPtr inputStream;
11962
0
    xmlParserInputBufferPtr buf;
11963
11964
0
    buf = xmlAllocParserInputBuffer(XML_CHAR_ENCODING_NONE);
11965
0
    if (buf == NULL) return(NULL);
11966
11967
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
11968
0
    if (ctxt == NULL) {
11969
0
        xmlErrMemory(NULL, "creating parser: out of memory\n");
11970
0
  xmlFreeParserInputBuffer(buf);
11971
0
  return(NULL);
11972
0
    }
11973
0
    ctxt->dictNames = 1;
11974
0
    if (filename == NULL) {
11975
0
  ctxt->directory = NULL;
11976
0
    } else {
11977
0
        ctxt->directory = xmlParserGetDirectory(filename);
11978
0
    }
11979
11980
0
    inputStream = xmlNewInputStream(ctxt);
11981
0
    if (inputStream == NULL) {
11982
0
  xmlFreeParserCtxt(ctxt);
11983
0
  xmlFreeParserInputBuffer(buf);
11984
0
  return(NULL);
11985
0
    }
11986
11987
0
    if (filename == NULL)
11988
0
  inputStream->filename = NULL;
11989
0
    else {
11990
0
  inputStream->filename = (char *)
11991
0
      xmlCanonicPath((const xmlChar *) filename);
11992
0
  if (inputStream->filename == NULL) {
11993
0
            xmlFreeInputStream(inputStream);
11994
0
      xmlFreeParserCtxt(ctxt);
11995
0
      xmlFreeParserInputBuffer(buf);
11996
0
      return(NULL);
11997
0
  }
11998
0
    }
11999
0
    inputStream->buf = buf;
12000
0
    xmlBufResetInput(inputStream->buf->buffer, inputStream);
12001
0
    inputPush(ctxt, inputStream);
12002
12003
    /*
12004
     * If the caller didn't provide an initial 'chunk' for determining
12005
     * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12006
     * that it can be automatically determined later
12007
     */
12008
0
    ctxt->charset = XML_CHAR_ENCODING_NONE;
12009
12010
0
    if ((size != 0) && (chunk != NULL) &&
12011
0
        (ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12012
0
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12013
0
  size_t cur = ctxt->input->cur - ctxt->input->base;
12014
0
        int res;
12015
12016
0
  res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12017
0
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
12018
0
        if (res < 0) {
12019
0
            xmlFatalErr(ctxt, ctxt->input->buf->error, NULL);
12020
0
            xmlHaltParser(ctxt);
12021
0
        }
12022
#ifdef DEBUG_PUSH
12023
  xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12024
#endif
12025
0
    }
12026
12027
0
    return(ctxt);
12028
0
}
12029
#endif /* LIBXML_PUSH_ENABLED */
12030
12031
/**
12032
 * xmlStopParser:
12033
 * @ctxt:  an XML parser context
12034
 *
12035
 * Blocks further parser processing
12036
 */
12037
void
12038
14
xmlStopParser(xmlParserCtxtPtr ctxt) {
12039
14
    if (ctxt == NULL)
12040
0
        return;
12041
14
    xmlHaltParser(ctxt);
12042
14
    ctxt->errNo = XML_ERR_USER_STOP;
12043
14
}
12044
12045
/**
12046
 * xmlCreateIOParserCtxt:
12047
 * @sax:  a SAX handler
12048
 * @user_data:  The user data returned on SAX callbacks
12049
 * @ioread:  an I/O read function
12050
 * @ioclose:  an I/O close function
12051
 * @ioctx:  an I/O handler
12052
 * @enc:  the charset encoding if known
12053
 *
12054
 * Create a parser context for using the XML parser with an existing
12055
 * I/O stream
12056
 *
12057
 * Returns the new parser context or NULL
12058
 */
12059
xmlParserCtxtPtr
12060
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12061
  xmlInputReadCallback   ioread, xmlInputCloseCallback  ioclose,
12062
0
  void *ioctx, xmlCharEncoding enc) {
12063
0
    xmlParserCtxtPtr ctxt;
12064
0
    xmlParserInputPtr inputStream;
12065
0
    xmlParserInputBufferPtr buf;
12066
12067
0
    if (ioread == NULL) return(NULL);
12068
12069
0
    buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12070
0
    if (buf == NULL) {
12071
0
        if (ioclose != NULL)
12072
0
            ioclose(ioctx);
12073
0
        return (NULL);
12074
0
    }
12075
12076
0
    ctxt = xmlNewSAXParserCtxt(sax, user_data);
12077
0
    if (ctxt == NULL) {
12078
0
  xmlFreeParserInputBuffer(buf);
12079
0
  return(NULL);
12080
0
    }
12081
12082
0
    inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12083
0
    if (inputStream == NULL) {
12084
0
  xmlFreeParserCtxt(ctxt);
12085
0
  return(NULL);
12086
0
    }
12087
0
    inputPush(ctxt, inputStream);
12088
12089
0
    return(ctxt);
12090
0
}
12091
12092
#ifdef LIBXML_VALID_ENABLED
12093
/************************************************************************
12094
 *                  *
12095
 *    Front ends when parsing a DTD       *
12096
 *                  *
12097
 ************************************************************************/
12098
12099
/**
12100
 * xmlIOParseDTD:
12101
 * @sax:  the SAX handler block or NULL
12102
 * @input:  an Input Buffer
12103
 * @enc:  the charset encoding if known
12104
 *
12105
 * Load and parse a DTD
12106
 *
12107
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12108
 * @input will be freed by the function in any case.
12109
 */
12110
12111
xmlDtdPtr
12112
xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12113
0
        xmlCharEncoding enc) {
12114
0
    xmlDtdPtr ret = NULL;
12115
0
    xmlParserCtxtPtr ctxt;
12116
0
    xmlParserInputPtr pinput = NULL;
12117
0
    xmlChar start[4];
12118
12119
0
    if (input == NULL)
12120
0
  return(NULL);
12121
12122
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12123
0
    if (ctxt == NULL) {
12124
0
        xmlFreeParserInputBuffer(input);
12125
0
  return(NULL);
12126
0
    }
12127
12128
    /* We are loading a DTD */
12129
0
    ctxt->options |= XML_PARSE_DTDLOAD;
12130
12131
0
    xmlDetectSAX2(ctxt);
12132
12133
    /*
12134
     * generate a parser input from the I/O handler
12135
     */
12136
12137
0
    pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12138
0
    if (pinput == NULL) {
12139
0
        xmlFreeParserInputBuffer(input);
12140
0
  xmlFreeParserCtxt(ctxt);
12141
0
  return(NULL);
12142
0
    }
12143
12144
    /*
12145
     * plug some encoding conversion routines here.
12146
     */
12147
0
    if (xmlPushInput(ctxt, pinput) < 0) {
12148
0
  xmlFreeParserCtxt(ctxt);
12149
0
  return(NULL);
12150
0
    }
12151
0
    if (enc != XML_CHAR_ENCODING_NONE) {
12152
0
        xmlSwitchEncoding(ctxt, enc);
12153
0
    }
12154
12155
0
    pinput->filename = NULL;
12156
0
    pinput->line = 1;
12157
0
    pinput->col = 1;
12158
0
    pinput->base = ctxt->input->cur;
12159
0
    pinput->cur = ctxt->input->cur;
12160
0
    pinput->free = NULL;
12161
12162
    /*
12163
     * let's parse that entity knowing it's an external subset.
12164
     */
12165
0
    ctxt->inSubset = 2;
12166
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12167
0
    if (ctxt->myDoc == NULL) {
12168
0
  xmlErrMemory(ctxt, "New Doc failed");
12169
0
  return(NULL);
12170
0
    }
12171
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12172
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12173
0
                                 BAD_CAST "none", BAD_CAST "none");
12174
12175
0
    if ((enc == XML_CHAR_ENCODING_NONE) &&
12176
0
        ((ctxt->input->end - ctxt->input->cur) >= 4)) {
12177
  /*
12178
   * Get the 4 first bytes and decode the charset
12179
   * if enc != XML_CHAR_ENCODING_NONE
12180
   * plug some encoding conversion routines.
12181
   */
12182
0
  start[0] = RAW;
12183
0
  start[1] = NXT(1);
12184
0
  start[2] = NXT(2);
12185
0
  start[3] = NXT(3);
12186
0
  enc = xmlDetectCharEncoding(start, 4);
12187
0
  if (enc != XML_CHAR_ENCODING_NONE) {
12188
0
      xmlSwitchEncoding(ctxt, enc);
12189
0
  }
12190
0
    }
12191
12192
0
    xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12193
12194
0
    if (ctxt->myDoc != NULL) {
12195
0
  if (ctxt->wellFormed) {
12196
0
      ret = ctxt->myDoc->extSubset;
12197
0
      ctxt->myDoc->extSubset = NULL;
12198
0
      if (ret != NULL) {
12199
0
    xmlNodePtr tmp;
12200
12201
0
    ret->doc = NULL;
12202
0
    tmp = ret->children;
12203
0
    while (tmp != NULL) {
12204
0
        tmp->doc = NULL;
12205
0
        tmp = tmp->next;
12206
0
    }
12207
0
      }
12208
0
  } else {
12209
0
      ret = NULL;
12210
0
  }
12211
0
        xmlFreeDoc(ctxt->myDoc);
12212
0
        ctxt->myDoc = NULL;
12213
0
    }
12214
0
    xmlFreeParserCtxt(ctxt);
12215
12216
0
    return(ret);
12217
0
}
12218
12219
/**
12220
 * xmlSAXParseDTD:
12221
 * @sax:  the SAX handler block
12222
 * @ExternalID:  a NAME* containing the External ID of the DTD
12223
 * @SystemID:  a NAME* containing the URL to the DTD
12224
 *
12225
 * DEPRECATED: Don't use.
12226
 *
12227
 * Load and parse an external subset.
12228
 *
12229
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12230
 */
12231
12232
xmlDtdPtr
12233
xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12234
0
                          const xmlChar *SystemID) {
12235
0
    xmlDtdPtr ret = NULL;
12236
0
    xmlParserCtxtPtr ctxt;
12237
0
    xmlParserInputPtr input = NULL;
12238
0
    xmlCharEncoding enc;
12239
0
    xmlChar* systemIdCanonic;
12240
12241
0
    if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12242
12243
0
    ctxt = xmlNewSAXParserCtxt(sax, NULL);
12244
0
    if (ctxt == NULL) {
12245
0
  return(NULL);
12246
0
    }
12247
12248
    /* We are loading a DTD */
12249
0
    ctxt->options |= XML_PARSE_DTDLOAD;
12250
12251
    /*
12252
     * Canonicalise the system ID
12253
     */
12254
0
    systemIdCanonic = xmlCanonicPath(SystemID);
12255
0
    if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12256
0
  xmlFreeParserCtxt(ctxt);
12257
0
  return(NULL);
12258
0
    }
12259
12260
    /*
12261
     * Ask the Entity resolver to load the damn thing
12262
     */
12263
12264
0
    if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12265
0
  input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12266
0
                                   systemIdCanonic);
12267
0
    if (input == NULL) {
12268
0
  xmlFreeParserCtxt(ctxt);
12269
0
  if (systemIdCanonic != NULL)
12270
0
      xmlFree(systemIdCanonic);
12271
0
  return(NULL);
12272
0
    }
12273
12274
    /*
12275
     * plug some encoding conversion routines here.
12276
     */
12277
0
    if (xmlPushInput(ctxt, input) < 0) {
12278
0
  xmlFreeParserCtxt(ctxt);
12279
0
  if (systemIdCanonic != NULL)
12280
0
      xmlFree(systemIdCanonic);
12281
0
  return(NULL);
12282
0
    }
12283
0
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12284
0
  enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12285
0
  xmlSwitchEncoding(ctxt, enc);
12286
0
    }
12287
12288
0
    if (input->filename == NULL)
12289
0
  input->filename = (char *) systemIdCanonic;
12290
0
    else
12291
0
  xmlFree(systemIdCanonic);
12292
0
    input->line = 1;
12293
0
    input->col = 1;
12294
0
    input->base = ctxt->input->cur;
12295
0
    input->cur = ctxt->input->cur;
12296
0
    input->free = NULL;
12297
12298
    /*
12299
     * let's parse that entity knowing it's an external subset.
12300
     */
12301
0
    ctxt->inSubset = 2;
12302
0
    ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12303
0
    if (ctxt->myDoc == NULL) {
12304
0
  xmlErrMemory(ctxt, "New Doc failed");
12305
0
  xmlFreeParserCtxt(ctxt);
12306
0
  return(NULL);
12307
0
    }
12308
0
    ctxt->myDoc->properties = XML_DOC_INTERNAL;
12309
0
    ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12310
0
                                 ExternalID, SystemID);
12311
0
    xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12312
12313
0
    if (ctxt->myDoc != NULL) {
12314
0
  if (ctxt->wellFormed) {
12315
0
      ret = ctxt->myDoc->extSubset;
12316
0
      ctxt->myDoc->extSubset = NULL;
12317
0
      if (ret != NULL) {
12318
0
    xmlNodePtr tmp;
12319
12320
0
    ret->doc = NULL;
12321
0
    tmp = ret->children;
12322
0
    while (tmp != NULL) {
12323
0
        tmp->doc = NULL;
12324
0
        tmp = tmp->next;
12325
0
    }
12326
0
      }
12327
0
  } else {
12328
0
      ret = NULL;
12329
0
  }
12330
0
        xmlFreeDoc(ctxt->myDoc);
12331
0
        ctxt->myDoc = NULL;
12332
0
    }
12333
0
    xmlFreeParserCtxt(ctxt);
12334
12335
0
    return(ret);
12336
0
}
12337
12338
12339
/**
12340
 * xmlParseDTD:
12341
 * @ExternalID:  a NAME* containing the External ID of the DTD
12342
 * @SystemID:  a NAME* containing the URL to the DTD
12343
 *
12344
 * Load and parse an external subset.
12345
 *
12346
 * Returns the resulting xmlDtdPtr or NULL in case of error.
12347
 */
12348
12349
xmlDtdPtr
12350
0
xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12351
0
    return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12352
0
}
12353
#endif /* LIBXML_VALID_ENABLED */
12354
12355
/************************************************************************
12356
 *                  *
12357
 *    Front ends when parsing an Entity     *
12358
 *                  *
12359
 ************************************************************************/
12360
12361
/**
12362
 * xmlParseCtxtExternalEntity:
12363
 * @ctx:  the existing parsing context
12364
 * @URL:  the URL for the entity to load
12365
 * @ID:  the System ID for the entity to load
12366
 * @lst:  the return value for the set of parsed nodes
12367
 *
12368
 * Parse an external general entity within an existing parsing context
12369
 * An external general parsed entity is well-formed if it matches the
12370
 * production labeled extParsedEnt.
12371
 *
12372
 * [78] extParsedEnt ::= TextDecl? content
12373
 *
12374
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12375
 *    the parser error code otherwise
12376
 */
12377
12378
int
12379
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12380
0
                 const xmlChar *ID, xmlNodePtr *lst) {
12381
0
    void *userData;
12382
12383
0
    if (ctx == NULL) return(-1);
12384
    /*
12385
     * If the user provided their own SAX callbacks, then reuse the
12386
     * userData callback field, otherwise the expected setup in a
12387
     * DOM builder is to have userData == ctxt
12388
     */
12389
0
    if (ctx->userData == ctx)
12390
0
        userData = NULL;
12391
0
    else
12392
0
        userData = ctx->userData;
12393
0
    return xmlParseExternalEntityPrivate(ctx->myDoc, ctx, ctx->sax,
12394
0
                                         userData, ctx->depth + 1,
12395
0
                                         URL, ID, lst);
12396
0
}
12397
12398
/**
12399
 * xmlParseExternalEntityPrivate:
12400
 * @doc:  the document the chunk pertains to
12401
 * @oldctxt:  the previous parser context if available
12402
 * @sax:  the SAX handler block (possibly NULL)
12403
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12404
 * @depth:  Used for loop detection, use 0
12405
 * @URL:  the URL for the entity to load
12406
 * @ID:  the System ID for the entity to load
12407
 * @list:  the return value for the set of parsed nodes
12408
 *
12409
 * Private version of xmlParseExternalEntity()
12410
 *
12411
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12412
 *    the parser error code otherwise
12413
 */
12414
12415
static xmlParserErrors
12416
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12417
                xmlSAXHandlerPtr sax,
12418
          void *user_data, int depth, const xmlChar *URL,
12419
19.6k
          const xmlChar *ID, xmlNodePtr *list) {
12420
19.6k
    xmlParserCtxtPtr ctxt;
12421
19.6k
    xmlDocPtr newDoc;
12422
19.6k
    xmlNodePtr newRoot;
12423
19.6k
    xmlParserErrors ret = XML_ERR_OK;
12424
19.6k
    xmlChar start[4];
12425
19.6k
    xmlCharEncoding enc;
12426
12427
19.6k
    if (((depth > 40) &&
12428
19.6k
  ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12429
19.6k
  (depth > 100)) {
12430
0
  xmlFatalErrMsg(oldctxt, XML_ERR_ENTITY_LOOP,
12431
0
                       "Maximum entity nesting depth exceeded");
12432
0
        return(XML_ERR_ENTITY_LOOP);
12433
0
    }
12434
12435
19.6k
    if (list != NULL)
12436
1.73k
        *list = NULL;
12437
19.6k
    if ((URL == NULL) && (ID == NULL))
12438
0
  return(XML_ERR_INTERNAL_ERROR);
12439
19.6k
    if (doc == NULL)
12440
0
  return(XML_ERR_INTERNAL_ERROR);
12441
12442
19.6k
    ctxt = xmlCreateEntityParserCtxtInternal(sax, user_data, URL, ID, NULL,
12443
19.6k
                                             oldctxt);
12444
19.6k
    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
12445
7.52k
    if (oldctxt != NULL) {
12446
7.52k
        ctxt->nbErrors = oldctxt->nbErrors;
12447
7.52k
        ctxt->nbWarnings = oldctxt->nbWarnings;
12448
7.52k
    }
12449
7.52k
    xmlDetectSAX2(ctxt);
12450
12451
7.52k
    newDoc = xmlNewDoc(BAD_CAST "1.0");
12452
7.52k
    if (newDoc == NULL) {
12453
0
  xmlFreeParserCtxt(ctxt);
12454
0
  return(XML_ERR_INTERNAL_ERROR);
12455
0
    }
12456
7.52k
    newDoc->properties = XML_DOC_INTERNAL;
12457
7.52k
    if (doc) {
12458
7.52k
        newDoc->intSubset = doc->intSubset;
12459
7.52k
        newDoc->extSubset = doc->extSubset;
12460
7.52k
        if (doc->dict) {
12461
7.52k
            newDoc->dict = doc->dict;
12462
7.52k
            xmlDictReference(newDoc->dict);
12463
7.52k
        }
12464
7.52k
        if (doc->URL != NULL) {
12465
7.52k
            newDoc->URL = xmlStrdup(doc->URL);
12466
7.52k
        }
12467
7.52k
    }
12468
7.52k
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12469
7.52k
    if (newRoot == NULL) {
12470
0
  if (sax != NULL)
12471
0
  xmlFreeParserCtxt(ctxt);
12472
0
  newDoc->intSubset = NULL;
12473
0
  newDoc->extSubset = NULL;
12474
0
        xmlFreeDoc(newDoc);
12475
0
  return(XML_ERR_INTERNAL_ERROR);
12476
0
    }
12477
7.52k
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
12478
7.52k
    nodePush(ctxt, newDoc->children);
12479
7.52k
    if (doc == NULL) {
12480
0
        ctxt->myDoc = newDoc;
12481
7.52k
    } else {
12482
7.52k
        ctxt->myDoc = doc;
12483
7.52k
        newRoot->doc = doc;
12484
7.52k
    }
12485
12486
    /*
12487
     * Get the 4 first bytes and decode the charset
12488
     * if enc != XML_CHAR_ENCODING_NONE
12489
     * plug some encoding conversion routines.
12490
     */
12491
7.52k
    GROW;
12492
7.52k
    if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12493
7.27k
  start[0] = RAW;
12494
7.27k
  start[1] = NXT(1);
12495
7.27k
  start[2] = NXT(2);
12496
7.27k
  start[3] = NXT(3);
12497
7.27k
  enc = xmlDetectCharEncoding(start, 4);
12498
7.27k
  if (enc != XML_CHAR_ENCODING_NONE) {
12499
5.56k
      xmlSwitchEncoding(ctxt, enc);
12500
5.56k
  }
12501
7.27k
    }
12502
12503
    /*
12504
     * Parse a possible text declaration first
12505
     */
12506
7.52k
    if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12507
3.46k
  xmlParseTextDecl(ctxt);
12508
        /*
12509
         * An XML-1.0 document can't reference an entity not XML-1.0
12510
         */
12511
3.46k
        if ((xmlStrEqual(oldctxt->version, BAD_CAST "1.0")) &&
12512
3.46k
            (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12513
1
            xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12514
1
                           "Version mismatch between document and entity\n");
12515
1
        }
12516
3.46k
    }
12517
12518
7.52k
    ctxt->instate = XML_PARSER_CONTENT;
12519
7.52k
    ctxt->depth = depth;
12520
7.52k
    if (oldctxt != NULL) {
12521
7.52k
  ctxt->_private = oldctxt->_private;
12522
7.52k
  ctxt->loadsubset = oldctxt->loadsubset;
12523
7.52k
  ctxt->validate = oldctxt->validate;
12524
7.52k
  ctxt->valid = oldctxt->valid;
12525
7.52k
  ctxt->replaceEntities = oldctxt->replaceEntities;
12526
7.52k
        if (oldctxt->validate) {
12527
0
            ctxt->vctxt.error = oldctxt->vctxt.error;
12528
0
            ctxt->vctxt.warning = oldctxt->vctxt.warning;
12529
0
            ctxt->vctxt.userData = oldctxt->vctxt.userData;
12530
0
            ctxt->vctxt.flags = oldctxt->vctxt.flags;
12531
0
        }
12532
7.52k
  ctxt->external = oldctxt->external;
12533
7.52k
        if (ctxt->dict) xmlDictFree(ctxt->dict);
12534
7.52k
        ctxt->dict = oldctxt->dict;
12535
7.52k
        ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12536
7.52k
        ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12537
7.52k
        ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12538
7.52k
        ctxt->dictNames = oldctxt->dictNames;
12539
7.52k
        ctxt->attsDefault = oldctxt->attsDefault;
12540
7.52k
        ctxt->attsSpecial = oldctxt->attsSpecial;
12541
7.52k
        ctxt->linenumbers = oldctxt->linenumbers;
12542
7.52k
  ctxt->record_info = oldctxt->record_info;
12543
7.52k
  ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
12544
7.52k
  ctxt->node_seq.length = oldctxt->node_seq.length;
12545
7.52k
  ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
12546
7.52k
    } else {
12547
  /*
12548
   * Doing validity checking on chunk without context
12549
   * doesn't make sense
12550
   */
12551
0
  ctxt->_private = NULL;
12552
0
  ctxt->validate = 0;
12553
0
  ctxt->external = 2;
12554
0
  ctxt->loadsubset = 0;
12555
0
    }
12556
12557
7.52k
    xmlParseContent(ctxt);
12558
12559
7.52k
    if ((RAW == '<') && (NXT(1) == '/')) {
12560
15
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12561
7.50k
    } else if (RAW != 0) {
12562
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12563
0
    }
12564
7.52k
    if (ctxt->node != newDoc->children) {
12565
611
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12566
611
    }
12567
12568
7.52k
    if (!ctxt->wellFormed) {
12569
880
  ret = (xmlParserErrors)ctxt->errNo;
12570
880
        if (oldctxt != NULL) {
12571
880
            oldctxt->errNo = ctxt->errNo;
12572
880
            oldctxt->wellFormed = 0;
12573
880
            xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12574
880
        }
12575
6.64k
    } else {
12576
6.64k
  if (list != NULL) {
12577
251
      xmlNodePtr cur;
12578
12579
      /*
12580
       * Return the newly created nodeset after unlinking it from
12581
       * they pseudo parent.
12582
       */
12583
251
      cur = newDoc->children->children;
12584
251
      *list = cur;
12585
994
      while (cur != NULL) {
12586
743
    cur->parent = NULL;
12587
743
    cur = cur->next;
12588
743
      }
12589
251
            newDoc->children->children = NULL;
12590
251
  }
12591
6.64k
  ret = XML_ERR_OK;
12592
6.64k
    }
12593
12594
    /*
12595
     * Also record the size of the entity parsed
12596
     */
12597
7.52k
    if (ctxt->input != NULL && oldctxt != NULL) {
12598
7.52k
        unsigned long consumed = ctxt->input->consumed;
12599
12600
7.52k
        xmlSaturatedAddSizeT(&consumed, ctxt->input->cur - ctxt->input->base);
12601
12602
7.52k
        xmlSaturatedAdd(&oldctxt->sizeentities, consumed);
12603
7.52k
        xmlSaturatedAdd(&oldctxt->sizeentities, ctxt->sizeentities);
12604
12605
7.52k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, consumed);
12606
7.52k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, ctxt->sizeentcopy);
12607
7.52k
    }
12608
12609
7.52k
    if (oldctxt != NULL) {
12610
7.52k
        ctxt->dict = NULL;
12611
7.52k
        ctxt->attsDefault = NULL;
12612
7.52k
        ctxt->attsSpecial = NULL;
12613
7.52k
        oldctxt->nbErrors = ctxt->nbErrors;
12614
7.52k
        oldctxt->nbWarnings = ctxt->nbWarnings;
12615
7.52k
        oldctxt->validate = ctxt->validate;
12616
7.52k
        oldctxt->valid = ctxt->valid;
12617
7.52k
        oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
12618
7.52k
        oldctxt->node_seq.length = ctxt->node_seq.length;
12619
7.52k
        oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
12620
7.52k
    }
12621
7.52k
    ctxt->node_seq.maximum = 0;
12622
7.52k
    ctxt->node_seq.length = 0;
12623
7.52k
    ctxt->node_seq.buffer = NULL;
12624
7.52k
    xmlFreeParserCtxt(ctxt);
12625
7.52k
    newDoc->intSubset = NULL;
12626
7.52k
    newDoc->extSubset = NULL;
12627
7.52k
    xmlFreeDoc(newDoc);
12628
12629
7.52k
    return(ret);
12630
7.52k
}
12631
12632
#ifdef LIBXML_SAX1_ENABLED
12633
/**
12634
 * xmlParseExternalEntity:
12635
 * @doc:  the document the chunk pertains to
12636
 * @sax:  the SAX handler block (possibly NULL)
12637
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12638
 * @depth:  Used for loop detection, use 0
12639
 * @URL:  the URL for the entity to load
12640
 * @ID:  the System ID for the entity to load
12641
 * @lst:  the return value for the set of parsed nodes
12642
 *
12643
 * Parse an external general entity
12644
 * An external general parsed entity is well-formed if it matches the
12645
 * production labeled extParsedEnt.
12646
 *
12647
 * [78] extParsedEnt ::= TextDecl? content
12648
 *
12649
 * Returns 0 if the entity is well formed, -1 in case of args problem and
12650
 *    the parser error code otherwise
12651
 */
12652
12653
int
12654
xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
12655
0
    int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
12656
0
    return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
12657
0
                           ID, lst));
12658
0
}
12659
12660
/**
12661
 * xmlParseBalancedChunkMemory:
12662
 * @doc:  the document the chunk pertains to (must not be NULL)
12663
 * @sax:  the SAX handler block (possibly NULL)
12664
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
12665
 * @depth:  Used for loop detection, use 0
12666
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
12667
 * @lst:  the return value for the set of parsed nodes
12668
 *
12669
 * Parse a well-balanced chunk of an XML document
12670
 * called by the parser
12671
 * The allowed sequence for the Well Balanced Chunk is the one defined by
12672
 * the content production in the XML grammar:
12673
 *
12674
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12675
 *
12676
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12677
 *    the parser error code otherwise
12678
 */
12679
12680
int
12681
xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12682
0
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
12683
0
    return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
12684
0
                                                depth, string, lst, 0 );
12685
0
}
12686
#endif /* LIBXML_SAX1_ENABLED */
12687
12688
/**
12689
 * xmlParseBalancedChunkMemoryInternal:
12690
 * @oldctxt:  the existing parsing context
12691
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
12692
 * @user_data:  the user data field for the parser context
12693
 * @lst:  the return value for the set of parsed nodes
12694
 *
12695
 *
12696
 * Parse a well-balanced chunk of an XML document
12697
 * called by the parser
12698
 * The allowed sequence for the Well Balanced Chunk is the one defined by
12699
 * the content production in the XML grammar:
12700
 *
12701
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12702
 *
12703
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12704
 * error code otherwise
12705
 *
12706
 * In case recover is set to 1, the nodelist will not be empty even if
12707
 * the parsed chunk is not well balanced.
12708
 */
12709
static xmlParserErrors
12710
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
12711
4.03k
  const xmlChar *string, void *user_data, xmlNodePtr *lst) {
12712
4.03k
    xmlParserCtxtPtr ctxt;
12713
4.03k
    xmlDocPtr newDoc = NULL;
12714
4.03k
    xmlNodePtr newRoot;
12715
4.03k
    xmlSAXHandlerPtr oldsax = NULL;
12716
4.03k
    xmlNodePtr content = NULL;
12717
4.03k
    xmlNodePtr last = NULL;
12718
4.03k
    int size;
12719
4.03k
    xmlParserErrors ret = XML_ERR_OK;
12720
4.03k
#ifdef SAX2
12721
4.03k
    int i;
12722
4.03k
#endif
12723
12724
4.03k
    if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12725
4.03k
        (oldctxt->depth >  100)) {
12726
0
  xmlFatalErrMsg(oldctxt, XML_ERR_ENTITY_LOOP,
12727
0
                       "Maximum entity nesting depth exceeded");
12728
0
  return(XML_ERR_ENTITY_LOOP);
12729
0
    }
12730
12731
12732
4.03k
    if (lst != NULL)
12733
359
        *lst = NULL;
12734
4.03k
    if (string == NULL)
12735
0
        return(XML_ERR_INTERNAL_ERROR);
12736
12737
4.03k
    size = xmlStrlen(string);
12738
12739
4.03k
    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
12740
4.03k
    if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
12741
3.92k
    ctxt->nbErrors = oldctxt->nbErrors;
12742
3.92k
    ctxt->nbWarnings = oldctxt->nbWarnings;
12743
3.92k
    if (user_data != NULL)
12744
0
  ctxt->userData = user_data;
12745
3.92k
    else
12746
3.92k
  ctxt->userData = ctxt;
12747
3.92k
    if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12748
3.92k
    ctxt->dict = oldctxt->dict;
12749
3.92k
    ctxt->input_id = oldctxt->input_id;
12750
3.92k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12751
3.92k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12752
3.92k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12753
12754
3.92k
#ifdef SAX2
12755
    /* propagate namespaces down the entity */
12756
7.63k
    for (i = 0;i < oldctxt->nsNr;i += 2) {
12757
3.70k
        nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
12758
3.70k
    }
12759
3.92k
#endif
12760
12761
3.92k
    oldsax = ctxt->sax;
12762
3.92k
    ctxt->sax = oldctxt->sax;
12763
3.92k
    xmlDetectSAX2(ctxt);
12764
3.92k
    ctxt->replaceEntities = oldctxt->replaceEntities;
12765
3.92k
    ctxt->options = oldctxt->options;
12766
12767
3.92k
    ctxt->_private = oldctxt->_private;
12768
3.92k
    if (oldctxt->myDoc == NULL) {
12769
0
  newDoc = xmlNewDoc(BAD_CAST "1.0");
12770
0
  if (newDoc == NULL) {
12771
0
      ctxt->sax = oldsax;
12772
0
      ctxt->dict = NULL;
12773
0
      xmlFreeParserCtxt(ctxt);
12774
0
      return(XML_ERR_INTERNAL_ERROR);
12775
0
  }
12776
0
  newDoc->properties = XML_DOC_INTERNAL;
12777
0
  newDoc->dict = ctxt->dict;
12778
0
  xmlDictReference(newDoc->dict);
12779
0
  ctxt->myDoc = newDoc;
12780
3.92k
    } else {
12781
3.92k
  ctxt->myDoc = oldctxt->myDoc;
12782
3.92k
        content = ctxt->myDoc->children;
12783
3.92k
  last = ctxt->myDoc->last;
12784
3.92k
    }
12785
3.92k
    newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
12786
3.92k
    if (newRoot == NULL) {
12787
0
  ctxt->sax = oldsax;
12788
0
  ctxt->dict = NULL;
12789
0
  xmlFreeParserCtxt(ctxt);
12790
0
  if (newDoc != NULL) {
12791
0
      xmlFreeDoc(newDoc);
12792
0
  }
12793
0
  return(XML_ERR_INTERNAL_ERROR);
12794
0
    }
12795
3.92k
    ctxt->myDoc->children = NULL;
12796
3.92k
    ctxt->myDoc->last = NULL;
12797
3.92k
    xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
12798
3.92k
    nodePush(ctxt, ctxt->myDoc->children);
12799
3.92k
    ctxt->instate = XML_PARSER_CONTENT;
12800
3.92k
    ctxt->depth = oldctxt->depth;
12801
12802
3.92k
    ctxt->validate = 0;
12803
3.92k
    ctxt->loadsubset = oldctxt->loadsubset;
12804
3.92k
    if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
12805
  /*
12806
   * ID/IDREF registration will be done in xmlValidateElement below
12807
   */
12808
3.92k
  ctxt->loadsubset |= XML_SKIP_IDS;
12809
3.92k
    }
12810
3.92k
    ctxt->dictNames = oldctxt->dictNames;
12811
3.92k
    ctxt->attsDefault = oldctxt->attsDefault;
12812
3.92k
    ctxt->attsSpecial = oldctxt->attsSpecial;
12813
12814
3.92k
    xmlParseContent(ctxt);
12815
3.92k
    if ((RAW == '<') && (NXT(1) == '/')) {
12816
7
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12817
3.92k
    } else if (RAW != 0) {
12818
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12819
0
    }
12820
3.92k
    if (ctxt->node != ctxt->myDoc->children) {
12821
90
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12822
90
    }
12823
12824
3.92k
    if (!ctxt->wellFormed) {
12825
132
  ret = (xmlParserErrors)ctxt->errNo;
12826
132
        oldctxt->errNo = ctxt->errNo;
12827
132
        oldctxt->wellFormed = 0;
12828
132
        xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12829
3.79k
    } else {
12830
3.79k
        ret = XML_ERR_OK;
12831
3.79k
    }
12832
12833
3.92k
    if ((lst != NULL) && (ret == XML_ERR_OK)) {
12834
211
  xmlNodePtr cur;
12835
12836
  /*
12837
   * Return the newly created nodeset after unlinking it from
12838
   * they pseudo parent.
12839
   */
12840
211
  cur = ctxt->myDoc->children->children;
12841
211
  *lst = cur;
12842
3.47k
  while (cur != NULL) {
12843
3.25k
#ifdef LIBXML_VALID_ENABLED
12844
3.25k
      if ((oldctxt->validate) && (oldctxt->wellFormed) &&
12845
3.25k
    (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
12846
3.25k
    (cur->type == XML_ELEMENT_NODE)) {
12847
0
    oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
12848
0
      oldctxt->myDoc, cur);
12849
0
      }
12850
3.25k
#endif /* LIBXML_VALID_ENABLED */
12851
3.25k
      cur->parent = NULL;
12852
3.25k
      cur = cur->next;
12853
3.25k
  }
12854
211
  ctxt->myDoc->children->children = NULL;
12855
211
    }
12856
3.92k
    if (ctxt->myDoc != NULL) {
12857
3.92k
  xmlFreeNode(ctxt->myDoc->children);
12858
3.92k
        ctxt->myDoc->children = content;
12859
3.92k
        ctxt->myDoc->last = last;
12860
3.92k
    }
12861
12862
    /*
12863
     * Also record the size of the entity parsed
12864
     */
12865
3.92k
    if (ctxt->input != NULL && oldctxt != NULL) {
12866
3.92k
        unsigned long consumed = ctxt->input->consumed;
12867
12868
3.92k
        xmlSaturatedAddSizeT(&consumed, ctxt->input->cur - ctxt->input->base);
12869
12870
3.92k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, consumed);
12871
3.92k
        xmlSaturatedAdd(&oldctxt->sizeentcopy, ctxt->sizeentcopy);
12872
3.92k
    }
12873
12874
3.92k
    oldctxt->nbErrors = ctxt->nbErrors;
12875
3.92k
    oldctxt->nbWarnings = ctxt->nbWarnings;
12876
3.92k
    ctxt->sax = oldsax;
12877
3.92k
    ctxt->dict = NULL;
12878
3.92k
    ctxt->attsDefault = NULL;
12879
3.92k
    ctxt->attsSpecial = NULL;
12880
3.92k
    xmlFreeParserCtxt(ctxt);
12881
3.92k
    if (newDoc != NULL) {
12882
0
  xmlFreeDoc(newDoc);
12883
0
    }
12884
12885
3.92k
    return(ret);
12886
3.92k
}
12887
12888
/**
12889
 * xmlParseInNodeContext:
12890
 * @node:  the context node
12891
 * @data:  the input string
12892
 * @datalen:  the input string length in bytes
12893
 * @options:  a combination of xmlParserOption
12894
 * @lst:  the return value for the set of parsed nodes
12895
 *
12896
 * Parse a well-balanced chunk of an XML document
12897
 * within the context (DTD, namespaces, etc ...) of the given node.
12898
 *
12899
 * The allowed sequence for the data is a Well Balanced Chunk defined by
12900
 * the content production in the XML grammar:
12901
 *
12902
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12903
 *
12904
 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12905
 * error code otherwise
12906
 */
12907
xmlParserErrors
12908
xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
12909
0
                      int options, xmlNodePtr *lst) {
12910
0
#ifdef SAX2
12911
0
    xmlParserCtxtPtr ctxt;
12912
0
    xmlDocPtr doc = NULL;
12913
0
    xmlNodePtr fake, cur;
12914
0
    int nsnr = 0;
12915
12916
0
    xmlParserErrors ret = XML_ERR_OK;
12917
12918
    /*
12919
     * check all input parameters, grab the document
12920
     */
12921
0
    if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
12922
0
        return(XML_ERR_INTERNAL_ERROR);
12923
0
    switch (node->type) {
12924
0
        case XML_ELEMENT_NODE:
12925
0
        case XML_ATTRIBUTE_NODE:
12926
0
        case XML_TEXT_NODE:
12927
0
        case XML_CDATA_SECTION_NODE:
12928
0
        case XML_ENTITY_REF_NODE:
12929
0
        case XML_PI_NODE:
12930
0
        case XML_COMMENT_NODE:
12931
0
        case XML_DOCUMENT_NODE:
12932
0
        case XML_HTML_DOCUMENT_NODE:
12933
0
      break;
12934
0
  default:
12935
0
      return(XML_ERR_INTERNAL_ERROR);
12936
12937
0
    }
12938
0
    while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
12939
0
           (node->type != XML_DOCUMENT_NODE) &&
12940
0
     (node->type != XML_HTML_DOCUMENT_NODE))
12941
0
  node = node->parent;
12942
0
    if (node == NULL)
12943
0
  return(XML_ERR_INTERNAL_ERROR);
12944
0
    if (node->type == XML_ELEMENT_NODE)
12945
0
  doc = node->doc;
12946
0
    else
12947
0
        doc = (xmlDocPtr) node;
12948
0
    if (doc == NULL)
12949
0
  return(XML_ERR_INTERNAL_ERROR);
12950
12951
    /*
12952
     * allocate a context and set-up everything not related to the
12953
     * node position in the tree
12954
     */
12955
0
    if (doc->type == XML_DOCUMENT_NODE)
12956
0
  ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
12957
0
#ifdef LIBXML_HTML_ENABLED
12958
0
    else if (doc->type == XML_HTML_DOCUMENT_NODE) {
12959
0
  ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
12960
        /*
12961
         * When parsing in context, it makes no sense to add implied
12962
         * elements like html/body/etc...
12963
         */
12964
0
        options |= HTML_PARSE_NOIMPLIED;
12965
0
    }
12966
0
#endif
12967
0
    else
12968
0
        return(XML_ERR_INTERNAL_ERROR);
12969
12970
0
    if (ctxt == NULL)
12971
0
        return(XML_ERR_NO_MEMORY);
12972
12973
    /*
12974
     * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
12975
     * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
12976
     * we must wait until the last moment to free the original one.
12977
     */
12978
0
    if (doc->dict != NULL) {
12979
0
        if (ctxt->dict != NULL)
12980
0
      xmlDictFree(ctxt->dict);
12981
0
  ctxt->dict = doc->dict;
12982
0
    } else
12983
0
        options |= XML_PARSE_NODICT;
12984
12985
0
    if (doc->encoding != NULL) {
12986
0
        xmlCharEncodingHandlerPtr hdlr;
12987
12988
0
        if (ctxt->encoding != NULL)
12989
0
      xmlFree((xmlChar *) ctxt->encoding);
12990
0
        ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
12991
12992
0
        hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
12993
0
        if (hdlr != NULL) {
12994
0
            xmlSwitchToEncoding(ctxt, hdlr);
12995
0
  } else {
12996
0
            return(XML_ERR_UNSUPPORTED_ENCODING);
12997
0
        }
12998
0
    }
12999
13000
0
    xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13001
0
    xmlDetectSAX2(ctxt);
13002
0
    ctxt->myDoc = doc;
13003
    /* parsing in context, i.e. as within existing content */
13004
0
    ctxt->input_id = 2;
13005
0
    ctxt->instate = XML_PARSER_CONTENT;
13006
13007
0
    fake = xmlNewDocComment(node->doc, NULL);
13008
0
    if (fake == NULL) {
13009
0
        xmlFreeParserCtxt(ctxt);
13010
0
  return(XML_ERR_NO_MEMORY);
13011
0
    }
13012
0
    xmlAddChild(node, fake);
13013
13014
0
    if (node->type == XML_ELEMENT_NODE) {
13015
0
  nodePush(ctxt, node);
13016
  /*
13017
   * initialize the SAX2 namespaces stack
13018
   */
13019
0
  cur = node;
13020
0
  while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13021
0
      xmlNsPtr ns = cur->nsDef;
13022
0
      const xmlChar *iprefix, *ihref;
13023
13024
0
      while (ns != NULL) {
13025
0
    if (ctxt->dict) {
13026
0
        iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13027
0
        ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13028
0
    } else {
13029
0
        iprefix = ns->prefix;
13030
0
        ihref = ns->href;
13031
0
    }
13032
13033
0
          if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13034
0
        nsPush(ctxt, iprefix, ihref);
13035
0
        nsnr++;
13036
0
    }
13037
0
    ns = ns->next;
13038
0
      }
13039
0
      cur = cur->parent;
13040
0
  }
13041
0
    }
13042
13043
0
    if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13044
  /*
13045
   * ID/IDREF registration will be done in xmlValidateElement below
13046
   */
13047
0
  ctxt->loadsubset |= XML_SKIP_IDS;
13048
0
    }
13049
13050
0
#ifdef LIBXML_HTML_ENABLED
13051
0
    if (doc->type == XML_HTML_DOCUMENT_NODE)
13052
0
        __htmlParseContent(ctxt);
13053
0
    else
13054
0
#endif
13055
0
  xmlParseContent(ctxt);
13056
13057
0
    nsPop(ctxt, nsnr);
13058
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13059
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13060
0
    } else if (RAW != 0) {
13061
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13062
0
    }
13063
0
    if ((ctxt->node != NULL) && (ctxt->node != node)) {
13064
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13065
0
  ctxt->wellFormed = 0;
13066
0
    }
13067
13068
0
    if (!ctxt->wellFormed) {
13069
0
        if (ctxt->errNo == 0)
13070
0
      ret = XML_ERR_INTERNAL_ERROR;
13071
0
  else
13072
0
      ret = (xmlParserErrors)ctxt->errNo;
13073
0
    } else {
13074
0
        ret = XML_ERR_OK;
13075
0
    }
13076
13077
    /*
13078
     * Return the newly created nodeset after unlinking it from
13079
     * the pseudo sibling.
13080
     */
13081
13082
0
    cur = fake->next;
13083
0
    fake->next = NULL;
13084
0
    node->last = fake;
13085
13086
0
    if (cur != NULL) {
13087
0
  cur->prev = NULL;
13088
0
    }
13089
13090
0
    *lst = cur;
13091
13092
0
    while (cur != NULL) {
13093
0
  cur->parent = NULL;
13094
0
  cur = cur->next;
13095
0
    }
13096
13097
0
    xmlUnlinkNode(fake);
13098
0
    xmlFreeNode(fake);
13099
13100
13101
0
    if (ret != XML_ERR_OK) {
13102
0
        xmlFreeNodeList(*lst);
13103
0
  *lst = NULL;
13104
0
    }
13105
13106
0
    if (doc->dict != NULL)
13107
0
        ctxt->dict = NULL;
13108
0
    xmlFreeParserCtxt(ctxt);
13109
13110
0
    return(ret);
13111
#else /* !SAX2 */
13112
    return(XML_ERR_INTERNAL_ERROR);
13113
#endif
13114
0
}
13115
13116
#ifdef LIBXML_SAX1_ENABLED
13117
/**
13118
 * xmlParseBalancedChunkMemoryRecover:
13119
 * @doc:  the document the chunk pertains to (must not be NULL)
13120
 * @sax:  the SAX handler block (possibly NULL)
13121
 * @user_data:  The user data returned on SAX callbacks (possibly NULL)
13122
 * @depth:  Used for loop detection, use 0
13123
 * @string:  the input string in UTF8 or ISO-Latin (zero terminated)
13124
 * @lst:  the return value for the set of parsed nodes
13125
 * @recover: return nodes even if the data is broken (use 0)
13126
 *
13127
 *
13128
 * Parse a well-balanced chunk of an XML document
13129
 * called by the parser
13130
 * The allowed sequence for the Well Balanced Chunk is the one defined by
13131
 * the content production in the XML grammar:
13132
 *
13133
 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13134
 *
13135
 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13136
 *    the parser error code otherwise
13137
 *
13138
 * In case recover is set to 1, the nodelist will not be empty even if
13139
 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13140
 * some extent.
13141
 */
13142
int
13143
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13144
     void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13145
0
     int recover) {
13146
0
    xmlParserCtxtPtr ctxt;
13147
0
    xmlDocPtr newDoc;
13148
0
    xmlSAXHandlerPtr oldsax = NULL;
13149
0
    xmlNodePtr content, newRoot;
13150
0
    int size;
13151
0
    int ret = 0;
13152
13153
0
    if (depth > 40) {
13154
0
  return(XML_ERR_ENTITY_LOOP);
13155
0
    }
13156
13157
13158
0
    if (lst != NULL)
13159
0
        *lst = NULL;
13160
0
    if (string == NULL)
13161
0
        return(-1);
13162
13163
0
    size = xmlStrlen(string);
13164
13165
0
    ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13166
0
    if (ctxt == NULL) return(-1);
13167
0
    ctxt->userData = ctxt;
13168
0
    if (sax != NULL) {
13169
0
  oldsax = ctxt->sax;
13170
0
        ctxt->sax = sax;
13171
0
  if (user_data != NULL)
13172
0
      ctxt->userData = user_data;
13173
0
    }
13174
0
    newDoc = xmlNewDoc(BAD_CAST "1.0");
13175
0
    if (newDoc == NULL) {
13176
0
  xmlFreeParserCtxt(ctxt);
13177
0
  return(-1);
13178
0
    }
13179
0
    newDoc->properties = XML_DOC_INTERNAL;
13180
0
    if ((doc != NULL) && (doc->dict != NULL)) {
13181
0
        xmlDictFree(ctxt->dict);
13182
0
  ctxt->dict = doc->dict;
13183
0
  xmlDictReference(ctxt->dict);
13184
0
  ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13185
0
  ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13186
0
  ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13187
0
  ctxt->dictNames = 1;
13188
0
    } else {
13189
0
  xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
13190
0
    }
13191
    /* doc == NULL is only supported for historic reasons */
13192
0
    if (doc != NULL) {
13193
0
  newDoc->intSubset = doc->intSubset;
13194
0
  newDoc->extSubset = doc->extSubset;
13195
0
    }
13196
0
    newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13197
0
    if (newRoot == NULL) {
13198
0
  if (sax != NULL)
13199
0
      ctxt->sax = oldsax;
13200
0
  xmlFreeParserCtxt(ctxt);
13201
0
  newDoc->intSubset = NULL;
13202
0
  newDoc->extSubset = NULL;
13203
0
        xmlFreeDoc(newDoc);
13204
0
  return(-1);
13205
0
    }
13206
0
    xmlAddChild((xmlNodePtr) newDoc, newRoot);
13207
0
    nodePush(ctxt, newRoot);
13208
    /* doc == NULL is only supported for historic reasons */
13209
0
    if (doc == NULL) {
13210
0
  ctxt->myDoc = newDoc;
13211
0
    } else {
13212
0
  ctxt->myDoc = newDoc;
13213
0
  newDoc->children->doc = doc;
13214
  /* Ensure that doc has XML spec namespace */
13215
0
  xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13216
0
  newDoc->oldNs = doc->oldNs;
13217
0
    }
13218
0
    ctxt->instate = XML_PARSER_CONTENT;
13219
0
    ctxt->input_id = 2;
13220
0
    ctxt->depth = depth;
13221
13222
    /*
13223
     * Doing validity checking on chunk doesn't make sense
13224
     */
13225
0
    ctxt->validate = 0;
13226
0
    ctxt->loadsubset = 0;
13227
0
    xmlDetectSAX2(ctxt);
13228
13229
0
    if ( doc != NULL ){
13230
0
        content = doc->children;
13231
0
        doc->children = NULL;
13232
0
        xmlParseContent(ctxt);
13233
0
        doc->children = content;
13234
0
    }
13235
0
    else {
13236
0
        xmlParseContent(ctxt);
13237
0
    }
13238
0
    if ((RAW == '<') && (NXT(1) == '/')) {
13239
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13240
0
    } else if (RAW != 0) {
13241
0
  xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13242
0
    }
13243
0
    if (ctxt->node != newDoc->children) {
13244
0
  xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13245
0
    }
13246
13247
0
    if (!ctxt->wellFormed) {
13248
0
        if (ctxt->errNo == 0)
13249
0
      ret = 1;
13250
0
  else
13251
0
      ret = ctxt->errNo;
13252
0
    } else {
13253
0
      ret = 0;
13254
0
    }
13255
13256
0
    if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13257
0
  xmlNodePtr cur;
13258
13259
  /*
13260
   * Return the newly created nodeset after unlinking it from
13261
   * they pseudo parent.
13262
   */
13263
0
  cur = newDoc->children->children;
13264
0
  *lst = cur;
13265
0
  while (cur != NULL) {
13266
0
      xmlSetTreeDoc(cur, doc);
13267
0
      cur->parent = NULL;
13268
0
      cur = cur->next;
13269
0
  }
13270
0
  newDoc->children->children = NULL;
13271
0
    }
13272
13273
0
    if (sax != NULL)
13274
0
  ctxt->sax = oldsax;
13275
0
    xmlFreeParserCtxt(ctxt);
13276
0
    newDoc->intSubset = NULL;
13277
0
    newDoc->extSubset = NULL;
13278
    /* This leaks the namespace list if doc == NULL */
13279
0
    newDoc->oldNs = NULL;
13280
0
    xmlFreeDoc(newDoc);
13281
13282
0
    return(ret);
13283
0
}
13284
13285
/**
13286
 * xmlSAXParseEntity:
13287
 * @sax:  the SAX handler block
13288
 * @filename:  the filename
13289
 *
13290
 * DEPRECATED: Don't use.
13291
 *
13292
 * parse an XML external entity out of context and build a tree.
13293
 * It use the given SAX function block to handle the parsing callback.
13294
 * If sax is NULL, fallback to the default DOM tree building routines.
13295
 *
13296
 * [78] extParsedEnt ::= TextDecl? content
13297
 *
13298
 * This correspond to a "Well Balanced" chunk
13299
 *
13300
 * Returns the resulting document tree
13301
 */
13302
13303
xmlDocPtr
13304
0
xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13305
0
    xmlDocPtr ret;
13306
0
    xmlParserCtxtPtr ctxt;
13307
13308
0
    ctxt = xmlCreateFileParserCtxt(filename);
13309
0
    if (ctxt == NULL) {
13310
0
  return(NULL);
13311
0
    }
13312
0
    if (sax != NULL) {
13313
0
  if (ctxt->sax != NULL)
13314
0
      xmlFree(ctxt->sax);
13315
0
        ctxt->sax = sax;
13316
0
        ctxt->userData = NULL;
13317
0
    }
13318
13319
0
    xmlParseExtParsedEnt(ctxt);
13320
13321
0
    if (ctxt->wellFormed)
13322
0
  ret = ctxt->myDoc;
13323
0
    else {
13324
0
        ret = NULL;
13325
0
        xmlFreeDoc(ctxt->myDoc);
13326
0
        ctxt->myDoc = NULL;
13327
0
    }
13328
0
    if (sax != NULL)
13329
0
        ctxt->sax = NULL;
13330
0
    xmlFreeParserCtxt(ctxt);
13331
13332
0
    return(ret);
13333
0
}
13334
13335
/**
13336
 * xmlParseEntity:
13337
 * @filename:  the filename
13338
 *
13339
 * parse an XML external entity out of context and build a tree.
13340
 *
13341
 * [78] extParsedEnt ::= TextDecl? content
13342
 *
13343
 * This correspond to a "Well Balanced" chunk
13344
 *
13345
 * Returns the resulting document tree
13346
 */
13347
13348
xmlDocPtr
13349
0
xmlParseEntity(const char *filename) {
13350
0
    return(xmlSAXParseEntity(NULL, filename));
13351
0
}
13352
#endif /* LIBXML_SAX1_ENABLED */
13353
13354
/**
13355
 * xmlCreateEntityParserCtxtInternal:
13356
 * @URL:  the entity URL
13357
 * @ID:  the entity PUBLIC ID
13358
 * @base:  a possible base for the target URI
13359
 * @pctx:  parser context used to set options on new context
13360
 *
13361
 * Create a parser context for an external entity
13362
 * Automatic support for ZLIB/Compress compressed document is provided
13363
 * by default if found at compile-time.
13364
 *
13365
 * Returns the new parser context or NULL
13366
 */
13367
static xmlParserCtxtPtr
13368
xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
13369
        const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
13370
19.6k
        xmlParserCtxtPtr pctx) {
13371
19.6k
    xmlParserCtxtPtr ctxt;
13372
19.6k
    xmlParserInputPtr inputStream;
13373
19.6k
    char *directory = NULL;
13374
19.6k
    xmlChar *uri;
13375
13376
19.6k
    ctxt = xmlNewSAXParserCtxt(sax, userData);
13377
19.6k
    if (ctxt == NULL) {
13378
0
  return(NULL);
13379
0
    }
13380
13381
19.6k
    if (pctx != NULL) {
13382
19.6k
        ctxt->options = pctx->options;
13383
19.6k
        ctxt->_private = pctx->_private;
13384
19.6k
  ctxt->input_id = pctx->input_id;
13385
19.6k
    }
13386
13387
    /* Don't read from stdin. */
13388
19.6k
    if (xmlStrcmp(URL, BAD_CAST "-") == 0)
13389
69
        URL = BAD_CAST "./-";
13390
13391
19.6k
    uri = xmlBuildURI(URL, base);
13392
13393
19.6k
    if (uri == NULL) {
13394
7.24k
  inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13395
7.24k
  if (inputStream == NULL) {
13396
2.75k
      xmlFreeParserCtxt(ctxt);
13397
2.75k
      return(NULL);
13398
2.75k
  }
13399
13400
4.49k
  inputPush(ctxt, inputStream);
13401
13402
4.49k
  if ((ctxt->directory == NULL) && (directory == NULL))
13403
4.49k
      directory = xmlParserGetDirectory((char *)URL);
13404
4.49k
  if ((ctxt->directory == NULL) && (directory != NULL))
13405
4.49k
      ctxt->directory = directory;
13406
12.3k
    } else {
13407
12.3k
  inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13408
12.3k
  if (inputStream == NULL) {
13409
9.35k
      xmlFree(uri);
13410
9.35k
      xmlFreeParserCtxt(ctxt);
13411
9.35k
      return(NULL);
13412
9.35k
  }
13413
13414
3.03k
  inputPush(ctxt, inputStream);
13415
13416
3.03k
  if ((ctxt->directory == NULL) && (directory == NULL))
13417
3.03k
      directory = xmlParserGetDirectory((char *)uri);
13418
3.03k
  if ((ctxt->directory == NULL) && (directory != NULL))
13419
3.03k
      ctxt->directory = directory;
13420
3.03k
  xmlFree(uri);
13421
3.03k
    }
13422
7.52k
    return(ctxt);
13423
19.6k
}
13424
13425
/**
13426
 * xmlCreateEntityParserCtxt:
13427
 * @URL:  the entity URL
13428
 * @ID:  the entity PUBLIC ID
13429
 * @base:  a possible base for the target URI
13430
 *
13431
 * Create a parser context for an external entity
13432
 * Automatic support for ZLIB/Compress compressed document is provided
13433
 * by default if found at compile-time.
13434
 *
13435
 * Returns the new parser context or NULL
13436
 */
13437
xmlParserCtxtPtr
13438
xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
13439
0
                    const xmlChar *base) {
13440
0
    return xmlCreateEntityParserCtxtInternal(NULL, NULL, URL, ID, base, NULL);
13441
13442
0
}
13443
13444
/************************************************************************
13445
 *                  *
13446
 *    Front ends when parsing from a file     *
13447
 *                  *
13448
 ************************************************************************/
13449
13450
/**
13451
 * xmlCreateURLParserCtxt:
13452
 * @filename:  the filename or URL
13453
 * @options:  a combination of xmlParserOption
13454
 *
13455
 * Create a parser context for a file or URL content.
13456
 * Automatic support for ZLIB/Compress compressed document is provided
13457
 * by default if found at compile-time and for file accesses
13458
 *
13459
 * Returns the new parser context or NULL
13460
 */
13461
xmlParserCtxtPtr
13462
xmlCreateURLParserCtxt(const char *filename, int options)
13463
0
{
13464
0
    xmlParserCtxtPtr ctxt;
13465
0
    xmlParserInputPtr inputStream;
13466
0
    char *directory = NULL;
13467
13468
0
    ctxt = xmlNewParserCtxt();
13469
0
    if (ctxt == NULL) {
13470
0
  xmlErrMemory(NULL, "cannot allocate parser context");
13471
0
  return(NULL);
13472
0
    }
13473
13474
0
    if (options)
13475
0
  xmlCtxtUseOptionsInternal(ctxt, options, NULL);
13476
0
    ctxt->linenumbers = 1;
13477
13478
0
    inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
13479
0
    if (inputStream == NULL) {
13480
0
  xmlFreeParserCtxt(ctxt);
13481
0
  return(NULL);
13482
0
    }
13483
13484
0
    inputPush(ctxt, inputStream);
13485
0
    if ((ctxt->directory == NULL) && (directory == NULL))
13486
0
        directory = xmlParserGetDirectory(filename);
13487
0
    if ((ctxt->directory == NULL) && (directory != NULL))
13488
0
        ctxt->directory = directory;
13489
13490
0
    return(ctxt);
13491
0
}
13492
13493
/**
13494
 * xmlCreateFileParserCtxt:
13495
 * @filename:  the filename
13496
 *
13497
 * Create a parser context for a file content.
13498
 * Automatic support for ZLIB/Compress compressed document is provided
13499
 * by default if found at compile-time.
13500
 *
13501
 * Returns the new parser context or NULL
13502
 */
13503
xmlParserCtxtPtr
13504
xmlCreateFileParserCtxt(const char *filename)
13505
0
{
13506
0
    return(xmlCreateURLParserCtxt(filename, 0));
13507
0
}
13508
13509
#ifdef LIBXML_SAX1_ENABLED
13510
/**
13511
 * xmlSAXParseFileWithData:
13512
 * @sax:  the SAX handler block
13513
 * @filename:  the filename
13514
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13515
 *             documents
13516
 * @data:  the userdata
13517
 *
13518
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
13519
 *
13520
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13521
 * compressed document is provided by default if found at compile-time.
13522
 * It use the given SAX function block to handle the parsing callback.
13523
 * If sax is NULL, fallback to the default DOM tree building routines.
13524
 *
13525
 * User data (void *) is stored within the parser context in the
13526
 * context's _private member, so it is available nearly everywhere in libxml
13527
 *
13528
 * Returns the resulting document tree
13529
 */
13530
13531
xmlDocPtr
13532
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
13533
0
                        int recovery, void *data) {
13534
0
    xmlDocPtr ret;
13535
0
    xmlParserCtxtPtr ctxt;
13536
13537
0
    xmlInitParser();
13538
13539
0
    ctxt = xmlCreateFileParserCtxt(filename);
13540
0
    if (ctxt == NULL) {
13541
0
  return(NULL);
13542
0
    }
13543
0
    if (sax != NULL) {
13544
0
  if (ctxt->sax != NULL)
13545
0
      xmlFree(ctxt->sax);
13546
0
        ctxt->sax = sax;
13547
0
    }
13548
0
    xmlDetectSAX2(ctxt);
13549
0
    if (data!=NULL) {
13550
0
  ctxt->_private = data;
13551
0
    }
13552
13553
0
    if (ctxt->directory == NULL)
13554
0
        ctxt->directory = xmlParserGetDirectory(filename);
13555
13556
0
    ctxt->recovery = recovery;
13557
13558
0
    xmlParseDocument(ctxt);
13559
13560
0
    if ((ctxt->wellFormed) || recovery) {
13561
0
        ret = ctxt->myDoc;
13562
0
  if ((ret != NULL) && (ctxt->input->buf != NULL)) {
13563
0
      if (ctxt->input->buf->compressed > 0)
13564
0
    ret->compression = 9;
13565
0
      else
13566
0
    ret->compression = ctxt->input->buf->compressed;
13567
0
  }
13568
0
    }
13569
0
    else {
13570
0
       ret = NULL;
13571
0
       xmlFreeDoc(ctxt->myDoc);
13572
0
       ctxt->myDoc = NULL;
13573
0
    }
13574
0
    if (sax != NULL)
13575
0
        ctxt->sax = NULL;
13576
0
    xmlFreeParserCtxt(ctxt);
13577
13578
0
    return(ret);
13579
0
}
13580
13581
/**
13582
 * xmlSAXParseFile:
13583
 * @sax:  the SAX handler block
13584
 * @filename:  the filename
13585
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13586
 *             documents
13587
 *
13588
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
13589
 *
13590
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13591
 * compressed document is provided by default if found at compile-time.
13592
 * It use the given SAX function block to handle the parsing callback.
13593
 * If sax is NULL, fallback to the default DOM tree building routines.
13594
 *
13595
 * Returns the resulting document tree
13596
 */
13597
13598
xmlDocPtr
13599
xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
13600
0
                          int recovery) {
13601
0
    return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
13602
0
}
13603
13604
/**
13605
 * xmlRecoverDoc:
13606
 * @cur:  a pointer to an array of xmlChar
13607
 *
13608
 * DEPRECATED: Use xmlReadDoc with XML_PARSE_RECOVER.
13609
 *
13610
 * parse an XML in-memory document and build a tree.
13611
 * In the case the document is not Well Formed, a attempt to build a
13612
 * tree is tried anyway
13613
 *
13614
 * Returns the resulting document tree or NULL in case of failure
13615
 */
13616
13617
xmlDocPtr
13618
0
xmlRecoverDoc(const xmlChar *cur) {
13619
0
    return(xmlSAXParseDoc(NULL, cur, 1));
13620
0
}
13621
13622
/**
13623
 * xmlParseFile:
13624
 * @filename:  the filename
13625
 *
13626
 * DEPRECATED: Use xmlReadFile.
13627
 *
13628
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13629
 * compressed document is provided by default if found at compile-time.
13630
 *
13631
 * Returns the resulting document tree if the file was wellformed,
13632
 * NULL otherwise.
13633
 */
13634
13635
xmlDocPtr
13636
0
xmlParseFile(const char *filename) {
13637
0
    return(xmlSAXParseFile(NULL, filename, 0));
13638
0
}
13639
13640
/**
13641
 * xmlRecoverFile:
13642
 * @filename:  the filename
13643
 *
13644
 * DEPRECATED: Use xmlReadFile with XML_PARSE_RECOVER.
13645
 *
13646
 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13647
 * compressed document is provided by default if found at compile-time.
13648
 * In the case the document is not Well Formed, it attempts to build
13649
 * a tree anyway
13650
 *
13651
 * Returns the resulting document tree or NULL in case of failure
13652
 */
13653
13654
xmlDocPtr
13655
0
xmlRecoverFile(const char *filename) {
13656
0
    return(xmlSAXParseFile(NULL, filename, 1));
13657
0
}
13658
13659
13660
/**
13661
 * xmlSetupParserForBuffer:
13662
 * @ctxt:  an XML parser context
13663
 * @buffer:  a xmlChar * buffer
13664
 * @filename:  a file name
13665
 *
13666
 * DEPRECATED: Don't use.
13667
 *
13668
 * Setup the parser context to parse a new buffer; Clears any prior
13669
 * contents from the parser context. The buffer parameter must not be
13670
 * NULL, but the filename parameter can be
13671
 */
13672
void
13673
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
13674
                             const char* filename)
13675
0
{
13676
0
    xmlParserInputPtr input;
13677
13678
0
    if ((ctxt == NULL) || (buffer == NULL))
13679
0
        return;
13680
13681
0
    input = xmlNewInputStream(ctxt);
13682
0
    if (input == NULL) {
13683
0
        xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
13684
0
        xmlClearParserCtxt(ctxt);
13685
0
        return;
13686
0
    }
13687
13688
0
    xmlClearParserCtxt(ctxt);
13689
0
    if (filename != NULL)
13690
0
        input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
13691
0
    input->base = buffer;
13692
0
    input->cur = buffer;
13693
0
    input->end = &buffer[xmlStrlen(buffer)];
13694
0
    inputPush(ctxt, input);
13695
0
}
13696
13697
/**
13698
 * xmlSAXUserParseFile:
13699
 * @sax:  a SAX handler
13700
 * @user_data:  The user data returned on SAX callbacks
13701
 * @filename:  a file name
13702
 *
13703
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
13704
 *
13705
 * parse an XML file and call the given SAX handler routines.
13706
 * Automatic support for ZLIB/Compress compressed document is provided
13707
 *
13708
 * Returns 0 in case of success or a error number otherwise
13709
 */
13710
int
13711
xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
13712
0
                    const char *filename) {
13713
0
    int ret = 0;
13714
0
    xmlParserCtxtPtr ctxt;
13715
13716
0
    ctxt = xmlCreateFileParserCtxt(filename);
13717
0
    if (ctxt == NULL) return -1;
13718
0
    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
13719
0
  xmlFree(ctxt->sax);
13720
0
    ctxt->sax = sax;
13721
0
    xmlDetectSAX2(ctxt);
13722
13723
0
    if (user_data != NULL)
13724
0
  ctxt->userData = user_data;
13725
13726
0
    xmlParseDocument(ctxt);
13727
13728
0
    if (ctxt->wellFormed)
13729
0
  ret = 0;
13730
0
    else {
13731
0
        if (ctxt->errNo != 0)
13732
0
      ret = ctxt->errNo;
13733
0
  else
13734
0
      ret = -1;
13735
0
    }
13736
0
    if (sax != NULL)
13737
0
  ctxt->sax = NULL;
13738
0
    if (ctxt->myDoc != NULL) {
13739
0
        xmlFreeDoc(ctxt->myDoc);
13740
0
  ctxt->myDoc = NULL;
13741
0
    }
13742
0
    xmlFreeParserCtxt(ctxt);
13743
13744
0
    return ret;
13745
0
}
13746
#endif /* LIBXML_SAX1_ENABLED */
13747
13748
/************************************************************************
13749
 *                  *
13750
 *    Front ends when parsing from memory     *
13751
 *                  *
13752
 ************************************************************************/
13753
13754
/**
13755
 * xmlCreateMemoryParserCtxt:
13756
 * @buffer:  a pointer to a char array
13757
 * @size:  the size of the array
13758
 *
13759
 * Create a parser context for an XML in-memory document.
13760
 *
13761
 * Returns the new parser context or NULL
13762
 */
13763
xmlParserCtxtPtr
13764
4.03k
xmlCreateMemoryParserCtxt(const char *buffer, int size) {
13765
4.03k
    xmlParserCtxtPtr ctxt;
13766
4.03k
    xmlParserInputPtr input;
13767
4.03k
    xmlParserInputBufferPtr buf;
13768
13769
4.03k
    if (buffer == NULL)
13770
0
  return(NULL);
13771
4.03k
    if (size <= 0)
13772
111
  return(NULL);
13773
13774
3.92k
    ctxt = xmlNewParserCtxt();
13775
3.92k
    if (ctxt == NULL)
13776
0
  return(NULL);
13777
13778
3.92k
    buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
13779
3.92k
    if (buf == NULL) {
13780
0
  xmlFreeParserCtxt(ctxt);
13781
0
  return(NULL);
13782
0
    }
13783
13784
3.92k
    input = xmlNewInputStream(ctxt);
13785
3.92k
    if (input == NULL) {
13786
0
  xmlFreeParserInputBuffer(buf);
13787
0
  xmlFreeParserCtxt(ctxt);
13788
0
  return(NULL);
13789
0
    }
13790
13791
3.92k
    input->filename = NULL;
13792
3.92k
    input->buf = buf;
13793
3.92k
    xmlBufResetInput(input->buf->buffer, input);
13794
13795
3.92k
    inputPush(ctxt, input);
13796
3.92k
    return(ctxt);
13797
3.92k
}
13798
13799
#ifdef LIBXML_SAX1_ENABLED
13800
/**
13801
 * xmlSAXParseMemoryWithData:
13802
 * @sax:  the SAX handler block
13803
 * @buffer:  an pointer to a char array
13804
 * @size:  the size of the array
13805
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13806
 *             documents
13807
 * @data:  the userdata
13808
 *
13809
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
13810
 *
13811
 * parse an XML in-memory block and use the given SAX function block
13812
 * to handle the parsing callback. If sax is NULL, fallback to the default
13813
 * DOM tree building routines.
13814
 *
13815
 * User data (void *) is stored within the parser context in the
13816
 * context's _private member, so it is available nearly everywhere in libxml
13817
 *
13818
 * Returns the resulting document tree
13819
 */
13820
13821
xmlDocPtr
13822
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
13823
0
            int size, int recovery, void *data) {
13824
0
    xmlDocPtr ret;
13825
0
    xmlParserCtxtPtr ctxt;
13826
13827
0
    xmlInitParser();
13828
13829
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13830
0
    if (ctxt == NULL) return(NULL);
13831
0
    if (sax != NULL) {
13832
0
  if (ctxt->sax != NULL)
13833
0
      xmlFree(ctxt->sax);
13834
0
        ctxt->sax = sax;
13835
0
    }
13836
0
    xmlDetectSAX2(ctxt);
13837
0
    if (data!=NULL) {
13838
0
  ctxt->_private=data;
13839
0
    }
13840
13841
0
    ctxt->recovery = recovery;
13842
13843
0
    xmlParseDocument(ctxt);
13844
13845
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13846
0
    else {
13847
0
       ret = NULL;
13848
0
       xmlFreeDoc(ctxt->myDoc);
13849
0
       ctxt->myDoc = NULL;
13850
0
    }
13851
0
    if (sax != NULL)
13852
0
  ctxt->sax = NULL;
13853
0
    xmlFreeParserCtxt(ctxt);
13854
13855
0
    return(ret);
13856
0
}
13857
13858
/**
13859
 * xmlSAXParseMemory:
13860
 * @sax:  the SAX handler block
13861
 * @buffer:  an pointer to a char array
13862
 * @size:  the size of the array
13863
 * @recovery:  work in recovery mode, i.e. tries to read not Well Formed
13864
 *             documents
13865
 *
13866
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
13867
 *
13868
 * parse an XML in-memory block and use the given SAX function block
13869
 * to handle the parsing callback. If sax is NULL, fallback to the default
13870
 * DOM tree building routines.
13871
 *
13872
 * Returns the resulting document tree
13873
 */
13874
xmlDocPtr
13875
xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
13876
0
            int size, int recovery) {
13877
0
    return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
13878
0
}
13879
13880
/**
13881
 * xmlParseMemory:
13882
 * @buffer:  an pointer to a char array
13883
 * @size:  the size of the array
13884
 *
13885
 * DEPRECATED: Use xmlReadMemory.
13886
 *
13887
 * parse an XML in-memory block and build a tree.
13888
 *
13889
 * Returns the resulting document tree
13890
 */
13891
13892
0
xmlDocPtr xmlParseMemory(const char *buffer, int size) {
13893
0
   return(xmlSAXParseMemory(NULL, buffer, size, 0));
13894
0
}
13895
13896
/**
13897
 * xmlRecoverMemory:
13898
 * @buffer:  an pointer to a char array
13899
 * @size:  the size of the array
13900
 *
13901
 * DEPRECATED: Use xmlReadMemory with XML_PARSE_RECOVER.
13902
 *
13903
 * parse an XML in-memory block and build a tree.
13904
 * In the case the document is not Well Formed, an attempt to
13905
 * build a tree is tried anyway
13906
 *
13907
 * Returns the resulting document tree or NULL in case of error
13908
 */
13909
13910
0
xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
13911
0
   return(xmlSAXParseMemory(NULL, buffer, size, 1));
13912
0
}
13913
13914
/**
13915
 * xmlSAXUserParseMemory:
13916
 * @sax:  a SAX handler
13917
 * @user_data:  The user data returned on SAX callbacks
13918
 * @buffer:  an in-memory XML document input
13919
 * @size:  the length of the XML document in bytes
13920
 *
13921
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
13922
 *
13923
 * parse an XML in-memory buffer and call the given SAX handler routines.
13924
 *
13925
 * Returns 0 in case of success or a error number otherwise
13926
 */
13927
int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
13928
0
        const char *buffer, int size) {
13929
0
    int ret = 0;
13930
0
    xmlParserCtxtPtr ctxt;
13931
13932
0
    xmlInitParser();
13933
13934
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13935
0
    if (ctxt == NULL) return -1;
13936
0
    if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
13937
0
        xmlFree(ctxt->sax);
13938
0
    ctxt->sax = sax;
13939
0
    xmlDetectSAX2(ctxt);
13940
13941
0
    if (user_data != NULL)
13942
0
  ctxt->userData = user_data;
13943
13944
0
    xmlParseDocument(ctxt);
13945
13946
0
    if (ctxt->wellFormed)
13947
0
  ret = 0;
13948
0
    else {
13949
0
        if (ctxt->errNo != 0)
13950
0
      ret = ctxt->errNo;
13951
0
  else
13952
0
      ret = -1;
13953
0
    }
13954
0
    if (sax != NULL)
13955
0
        ctxt->sax = NULL;
13956
0
    if (ctxt->myDoc != NULL) {
13957
0
        xmlFreeDoc(ctxt->myDoc);
13958
0
  ctxt->myDoc = NULL;
13959
0
    }
13960
0
    xmlFreeParserCtxt(ctxt);
13961
13962
0
    return ret;
13963
0
}
13964
#endif /* LIBXML_SAX1_ENABLED */
13965
13966
/**
13967
 * xmlCreateDocParserCtxt:
13968
 * @cur:  a pointer to an array of xmlChar
13969
 *
13970
 * Creates a parser context for an XML in-memory document.
13971
 *
13972
 * Returns the new parser context or NULL
13973
 */
13974
xmlParserCtxtPtr
13975
0
xmlCreateDocParserCtxt(const xmlChar *cur) {
13976
0
    int len;
13977
13978
0
    if (cur == NULL)
13979
0
  return(NULL);
13980
0
    len = xmlStrlen(cur);
13981
0
    return(xmlCreateMemoryParserCtxt((const char *)cur, len));
13982
0
}
13983
13984
#ifdef LIBXML_SAX1_ENABLED
13985
/**
13986
 * xmlSAXParseDoc:
13987
 * @sax:  the SAX handler block
13988
 * @cur:  a pointer to an array of xmlChar
13989
 * @recovery:  work in recovery mode, i.e. tries to read no Well Formed
13990
 *             documents
13991
 *
13992
 * DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadDoc.
13993
 *
13994
 * parse an XML in-memory document and build a tree.
13995
 * It use the given SAX function block to handle the parsing callback.
13996
 * If sax is NULL, fallback to the default DOM tree building routines.
13997
 *
13998
 * Returns the resulting document tree
13999
 */
14000
14001
xmlDocPtr
14002
0
xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14003
0
    xmlDocPtr ret;
14004
0
    xmlParserCtxtPtr ctxt;
14005
0
    xmlSAXHandlerPtr oldsax = NULL;
14006
14007
0
    if (cur == NULL) return(NULL);
14008
14009
14010
0
    ctxt = xmlCreateDocParserCtxt(cur);
14011
0
    if (ctxt == NULL) return(NULL);
14012
0
    if (sax != NULL) {
14013
0
        oldsax = ctxt->sax;
14014
0
        ctxt->sax = sax;
14015
0
        ctxt->userData = NULL;
14016
0
    }
14017
0
    xmlDetectSAX2(ctxt);
14018
14019
0
    xmlParseDocument(ctxt);
14020
0
    if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14021
0
    else {
14022
0
       ret = NULL;
14023
0
       xmlFreeDoc(ctxt->myDoc);
14024
0
       ctxt->myDoc = NULL;
14025
0
    }
14026
0
    if (sax != NULL)
14027
0
  ctxt->sax = oldsax;
14028
0
    xmlFreeParserCtxt(ctxt);
14029
14030
0
    return(ret);
14031
0
}
14032
14033
/**
14034
 * xmlParseDoc:
14035
 * @cur:  a pointer to an array of xmlChar
14036
 *
14037
 * DEPRECATED: Use xmlReadDoc.
14038
 *
14039
 * parse an XML in-memory document and build a tree.
14040
 *
14041
 * Returns the resulting document tree
14042
 */
14043
14044
xmlDocPtr
14045
0
xmlParseDoc(const xmlChar *cur) {
14046
0
    return(xmlSAXParseDoc(NULL, cur, 0));
14047
0
}
14048
#endif /* LIBXML_SAX1_ENABLED */
14049
14050
#ifdef LIBXML_LEGACY_ENABLED
14051
/************************************************************************
14052
 *                  *
14053
 *  Specific function to keep track of entities references    *
14054
 *  and used by the XSLT debugger         *
14055
 *                  *
14056
 ************************************************************************/
14057
14058
static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14059
14060
/**
14061
 * xmlAddEntityReference:
14062
 * @ent : A valid entity
14063
 * @firstNode : A valid first node for children of entity
14064
 * @lastNode : A valid last node of children entity
14065
 *
14066
 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14067
 */
14068
static void
14069
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14070
                      xmlNodePtr lastNode)
14071
{
14072
    if (xmlEntityRefFunc != NULL) {
14073
        (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14074
    }
14075
}
14076
14077
14078
/**
14079
 * xmlSetEntityReferenceFunc:
14080
 * @func: A valid function
14081
 *
14082
 * Set the function to call call back when a xml reference has been made
14083
 */
14084
void
14085
xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14086
{
14087
    xmlEntityRefFunc = func;
14088
}
14089
#endif /* LIBXML_LEGACY_ENABLED */
14090
14091
/************************************************************************
14092
 *                  *
14093
 *        Miscellaneous       *
14094
 *                  *
14095
 ************************************************************************/
14096
14097
static int xmlParserInitialized = 0;
14098
14099
/**
14100
 * xmlInitParser:
14101
 *
14102
 * Initialization function for the XML parser.
14103
 * This is not reentrant. Call once before processing in case of
14104
 * use in multithreaded programs.
14105
 */
14106
14107
void
14108
8.26M
xmlInitParser(void) {
14109
    /*
14110
     * Note that the initialization code must not make memory allocations.
14111
     */
14112
8.26M
    if (xmlParserInitialized != 0)
14113
8.26M
  return;
14114
14115
2
#ifdef LIBXML_THREAD_ENABLED
14116
2
    __xmlGlobalInitMutexLock();
14117
2
    if (xmlParserInitialized == 0) {
14118
2
#endif
14119
#if defined(_WIN32) && (!defined(LIBXML_STATIC) || defined(LIBXML_STATIC_FOR_DLL))
14120
        if (xmlFree == free)
14121
            atexit(xmlCleanupParser);
14122
#endif
14123
14124
2
  xmlInitThreadsInternal();
14125
2
  xmlInitGlobalsInternal();
14126
2
  xmlInitMemoryInternal();
14127
2
        __xmlInitializeDict();
14128
2
  xmlInitEncodingInternal();
14129
2
  xmlRegisterDefaultInputCallbacks();
14130
2
#ifdef LIBXML_OUTPUT_ENABLED
14131
2
  xmlRegisterDefaultOutputCallbacks();
14132
2
#endif /* LIBXML_OUTPUT_ENABLED */
14133
2
#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED)
14134
2
  xmlInitXPathInternal();
14135
2
#endif
14136
2
  xmlParserInitialized = 1;
14137
2
#ifdef LIBXML_THREAD_ENABLED
14138
2
    }
14139
2
    __xmlGlobalInitMutexUnlock();
14140
2
#endif
14141
2
}
14142
14143
/**
14144
 * xmlCleanupParser:
14145
 *
14146
 * This function name is somewhat misleading. It does not clean up
14147
 * parser state, it cleans up memory allocated by the library itself.
14148
 * It is a cleanup function for the XML library. It tries to reclaim all
14149
 * related global memory allocated for the library processing.
14150
 * It doesn't deallocate any document related memory. One should
14151
 * call xmlCleanupParser() only when the process has finished using
14152
 * the library and all XML/HTML documents built with it.
14153
 * See also xmlInitParser() which has the opposite function of preparing
14154
 * the library for operations.
14155
 *
14156
 * WARNING: if your application is multithreaded or has plugin support
14157
 *          calling this may crash the application if another thread or
14158
 *          a plugin is still using libxml2. It's sometimes very hard to
14159
 *          guess if libxml2 is in use in the application, some libraries
14160
 *          or plugins may use it without notice. In case of doubt abstain
14161
 *          from calling this function or do it just before calling exit()
14162
 *          to avoid leak reports from valgrind !
14163
 */
14164
14165
void
14166
0
xmlCleanupParser(void) {
14167
0
    if (!xmlParserInitialized)
14168
0
  return;
14169
14170
0
    xmlCleanupCharEncodingHandlers();
14171
0
#ifdef LIBXML_CATALOG_ENABLED
14172
0
    xmlCatalogCleanup();
14173
0
#endif
14174
0
    xmlCleanupDictInternal();
14175
0
    xmlCleanupInputCallbacks();
14176
0
#ifdef LIBXML_OUTPUT_ENABLED
14177
0
    xmlCleanupOutputCallbacks();
14178
0
#endif
14179
0
#ifdef LIBXML_SCHEMAS_ENABLED
14180
0
    xmlSchemaCleanupTypes();
14181
0
    xmlRelaxNGCleanupTypes();
14182
0
#endif
14183
0
    xmlCleanupGlobalsInternal();
14184
0
    xmlCleanupThreadsInternal();
14185
0
    xmlCleanupMemoryInternal();
14186
0
    xmlParserInitialized = 0;
14187
0
}
14188
14189
#if defined(HAVE_ATTRIBUTE_DESTRUCTOR) && !defined(LIBXML_STATIC) && \
14190
    !defined(_WIN32)
14191
static void
14192
ATTRIBUTE_DESTRUCTOR
14193
0
xmlDestructor(void) {
14194
    /*
14195
     * Calling custom deallocation functions in a destructor can cause
14196
     * problems, for example with Nokogiri.
14197
     */
14198
0
    if (xmlFree == free)
14199
0
        xmlCleanupParser();
14200
0
}
14201
#endif
14202
14203
/************************************************************************
14204
 *                  *
14205
 *  New set (2.6.0) of simpler and more flexible APIs   *
14206
 *                  *
14207
 ************************************************************************/
14208
14209
/**
14210
 * DICT_FREE:
14211
 * @str:  a string
14212
 *
14213
 * Free a string if it is not owned by the "dict" dictionary in the
14214
 * current scope
14215
 */
14216
#define DICT_FREE(str)            \
14217
154k
  if ((str) && ((!dict) ||       \
14218
0
      (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14219
154k
      xmlFree((char *)(str));
14220
14221
/**
14222
 * xmlCtxtReset:
14223
 * @ctxt: an XML parser context
14224
 *
14225
 * Reset a parser context
14226
 */
14227
void
14228
xmlCtxtReset(xmlParserCtxtPtr ctxt)
14229
30.9k
{
14230
30.9k
    xmlParserInputPtr input;
14231
30.9k
    xmlDictPtr dict;
14232
14233
30.9k
    if (ctxt == NULL)
14234
0
        return;
14235
14236
30.9k
    dict = ctxt->dict;
14237
14238
30.9k
    while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14239
0
        xmlFreeInputStream(input);
14240
0
    }
14241
30.9k
    ctxt->inputNr = 0;
14242
30.9k
    ctxt->input = NULL;
14243
14244
30.9k
    ctxt->spaceNr = 0;
14245
30.9k
    if (ctxt->spaceTab != NULL) {
14246
30.9k
  ctxt->spaceTab[0] = -1;
14247
30.9k
  ctxt->space = &ctxt->spaceTab[0];
14248
30.9k
    } else {
14249
0
        ctxt->space = NULL;
14250
0
    }
14251
14252
14253
30.9k
    ctxt->nodeNr = 0;
14254
30.9k
    ctxt->node = NULL;
14255
14256
30.9k
    ctxt->nameNr = 0;
14257
30.9k
    ctxt->name = NULL;
14258
14259
30.9k
    ctxt->nsNr = 0;
14260
14261
30.9k
    DICT_FREE(ctxt->version);
14262
30.9k
    ctxt->version = NULL;
14263
30.9k
    DICT_FREE(ctxt->encoding);
14264
30.9k
    ctxt->encoding = NULL;
14265
30.9k
    DICT_FREE(ctxt->directory);
14266
30.9k
    ctxt->directory = NULL;
14267
30.9k
    DICT_FREE(ctxt->extSubURI);
14268
30.9k
    ctxt->extSubURI = NULL;
14269
30.9k
    DICT_FREE(ctxt->extSubSystem);
14270
30.9k
    ctxt->extSubSystem = NULL;
14271
30.9k
    if (ctxt->myDoc != NULL)
14272
0
        xmlFreeDoc(ctxt->myDoc);
14273
30.9k
    ctxt->myDoc = NULL;
14274
14275
30.9k
    ctxt->standalone = -1;
14276
30.9k
    ctxt->hasExternalSubset = 0;
14277
30.9k
    ctxt->hasPErefs = 0;
14278
30.9k
    ctxt->html = 0;
14279
30.9k
    ctxt->external = 0;
14280
30.9k
    ctxt->instate = XML_PARSER_START;
14281
30.9k
    ctxt->token = 0;
14282
14283
30.9k
    ctxt->wellFormed = 1;
14284
30.9k
    ctxt->nsWellFormed = 1;
14285
30.9k
    ctxt->disableSAX = 0;
14286
30.9k
    ctxt->valid = 1;
14287
#if 0
14288
    ctxt->vctxt.userData = ctxt;
14289
    ctxt->vctxt.error = xmlParserValidityError;
14290
    ctxt->vctxt.warning = xmlParserValidityWarning;
14291
#endif
14292
30.9k
    ctxt->record_info = 0;
14293
30.9k
    ctxt->checkIndex = 0;
14294
30.9k
    ctxt->endCheckState = 0;
14295
30.9k
    ctxt->inSubset = 0;
14296
30.9k
    ctxt->errNo = XML_ERR_OK;
14297
30.9k
    ctxt->depth = 0;
14298
30.9k
    ctxt->charset = XML_CHAR_ENCODING_UTF8;
14299
30.9k
    ctxt->catalogs = NULL;
14300
30.9k
    ctxt->sizeentities = 0;
14301
30.9k
    ctxt->sizeentcopy = 0;
14302
30.9k
    xmlInitNodeInfoSeq(&ctxt->node_seq);
14303
14304
30.9k
    if (ctxt->attsDefault != NULL) {
14305
0
        xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
14306
0
        ctxt->attsDefault = NULL;
14307
0
    }
14308
30.9k
    if (ctxt->attsSpecial != NULL) {
14309
0
        xmlHashFree(ctxt->attsSpecial, NULL);
14310
0
        ctxt->attsSpecial = NULL;
14311
0
    }
14312
14313
30.9k
#ifdef LIBXML_CATALOG_ENABLED
14314
30.9k
    if (ctxt->catalogs != NULL)
14315
0
  xmlCatalogFreeLocal(ctxt->catalogs);
14316
30.9k
#endif
14317
30.9k
    ctxt->nbErrors = 0;
14318
30.9k
    ctxt->nbWarnings = 0;
14319
30.9k
    if (ctxt->lastError.code != XML_ERR_OK)
14320
0
        xmlResetError(&ctxt->lastError);
14321
30.9k
}
14322
14323
/**
14324
 * xmlCtxtResetPush:
14325
 * @ctxt: an XML parser context
14326
 * @chunk:  a pointer to an array of chars
14327
 * @size:  number of chars in the array
14328
 * @filename:  an optional file name or URI
14329
 * @encoding:  the document encoding, or NULL
14330
 *
14331
 * Reset a push parser context
14332
 *
14333
 * Returns 0 in case of success and 1 in case of error
14334
 */
14335
int
14336
xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14337
                 int size, const char *filename, const char *encoding)
14338
0
{
14339
0
    xmlParserInputPtr inputStream;
14340
0
    xmlParserInputBufferPtr buf;
14341
0
    xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14342
14343
0
    if (ctxt == NULL)
14344
0
        return(1);
14345
14346
0
    if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14347
0
        enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14348
14349
0
    buf = xmlAllocParserInputBuffer(enc);
14350
0
    if (buf == NULL)
14351
0
        return(1);
14352
14353
0
    if (ctxt == NULL) {
14354
0
        xmlFreeParserInputBuffer(buf);
14355
0
        return(1);
14356
0
    }
14357
14358
0
    xmlCtxtReset(ctxt);
14359
14360
0
    if (filename == NULL) {
14361
0
        ctxt->directory = NULL;
14362
0
    } else {
14363
0
        ctxt->directory = xmlParserGetDirectory(filename);
14364
0
    }
14365
14366
0
    inputStream = xmlNewInputStream(ctxt);
14367
0
    if (inputStream == NULL) {
14368
0
        xmlFreeParserInputBuffer(buf);
14369
0
        return(1);
14370
0
    }
14371
14372
0
    if (filename == NULL)
14373
0
        inputStream->filename = NULL;
14374
0
    else
14375
0
        inputStream->filename = (char *)
14376
0
            xmlCanonicPath((const xmlChar *) filename);
14377
0
    inputStream->buf = buf;
14378
0
    xmlBufResetInput(buf->buffer, inputStream);
14379
14380
0
    inputPush(ctxt, inputStream);
14381
14382
0
    if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14383
0
        (ctxt->input->buf != NULL)) {
14384
0
  size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14385
0
        size_t cur = ctxt->input->cur - ctxt->input->base;
14386
0
        int res;
14387
14388
0
        res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14389
0
        xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
14390
0
        if (res < 0) {
14391
0
            xmlFatalErr(ctxt, ctxt->input->buf->error, NULL);
14392
0
            xmlHaltParser(ctxt);
14393
0
            return(1);
14394
0
        }
14395
#ifdef DEBUG_PUSH
14396
        xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14397
#endif
14398
0
    }
14399
14400
0
    if (encoding != NULL) {
14401
0
        xmlCharEncodingHandlerPtr hdlr;
14402
14403
0
        if (ctxt->encoding != NULL)
14404
0
      xmlFree((xmlChar *) ctxt->encoding);
14405
0
        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14406
14407
0
        hdlr = xmlFindCharEncodingHandler(encoding);
14408
0
        if (hdlr != NULL) {
14409
0
            xmlSwitchToEncoding(ctxt, hdlr);
14410
0
  } else {
14411
0
      xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14412
0
            "Unsupported encoding %s\n", BAD_CAST encoding);
14413
0
        }
14414
0
    } else if (enc != XML_CHAR_ENCODING_NONE) {
14415
0
        xmlSwitchEncoding(ctxt, enc);
14416
0
    }
14417
14418
0
    return(0);
14419
0
}
14420
14421
14422
/**
14423
 * xmlCtxtUseOptionsInternal:
14424
 * @ctxt: an XML parser context
14425
 * @options:  a combination of xmlParserOption
14426
 * @encoding:  the user provided encoding to use
14427
 *
14428
 * Applies the options to the parser context
14429
 *
14430
 * Returns 0 in case of success, the set of unknown or unimplemented options
14431
 *         in case of error.
14432
 */
14433
static int
14434
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
14435
30.6k
{
14436
30.6k
    if (ctxt == NULL)
14437
0
        return(-1);
14438
30.6k
    if (encoding != NULL) {
14439
0
        if (ctxt->encoding != NULL)
14440
0
      xmlFree((xmlChar *) ctxt->encoding);
14441
0
        ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14442
0
    }
14443
30.6k
    if (options & XML_PARSE_RECOVER) {
14444
0
        ctxt->recovery = 1;
14445
0
        options -= XML_PARSE_RECOVER;
14446
0
  ctxt->options |= XML_PARSE_RECOVER;
14447
0
    } else
14448
30.6k
        ctxt->recovery = 0;
14449
30.6k
    if (options & XML_PARSE_DTDLOAD) {
14450
0
        ctxt->loadsubset = XML_DETECT_IDS;
14451
0
        options -= XML_PARSE_DTDLOAD;
14452
0
  ctxt->options |= XML_PARSE_DTDLOAD;
14453
0
    } else
14454
30.6k
        ctxt->loadsubset = 0;
14455
30.6k
    if (options & XML_PARSE_DTDATTR) {
14456
0
        ctxt->loadsubset |= XML_COMPLETE_ATTRS;
14457
0
        options -= XML_PARSE_DTDATTR;
14458
0
  ctxt->options |= XML_PARSE_DTDATTR;
14459
0
    }
14460
30.6k
    if (options & XML_PARSE_NOENT) {
14461
30.6k
        ctxt->replaceEntities = 1;
14462
        /* ctxt->loadsubset |= XML_DETECT_IDS; */
14463
30.6k
        options -= XML_PARSE_NOENT;
14464
30.6k
  ctxt->options |= XML_PARSE_NOENT;
14465
30.6k
    } else
14466
0
        ctxt->replaceEntities = 0;
14467
30.6k
    if (options & XML_PARSE_PEDANTIC) {
14468
0
        ctxt->pedantic = 1;
14469
0
        options -= XML_PARSE_PEDANTIC;
14470
0
  ctxt->options |= XML_PARSE_PEDANTIC;
14471
0
    } else
14472
30.6k
        ctxt->pedantic = 0;
14473
30.6k
    if (options & XML_PARSE_NOBLANKS) {
14474
0
        ctxt->keepBlanks = 0;
14475
0
        ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
14476
0
        options -= XML_PARSE_NOBLANKS;
14477
0
  ctxt->options |= XML_PARSE_NOBLANKS;
14478
0
    } else
14479
30.6k
        ctxt->keepBlanks = 1;
14480
30.6k
    if (options & XML_PARSE_DTDVALID) {
14481
0
        ctxt->validate = 1;
14482
0
        if (options & XML_PARSE_NOWARNING)
14483
0
            ctxt->vctxt.warning = NULL;
14484
0
        if (options & XML_PARSE_NOERROR)
14485
0
            ctxt->vctxt.error = NULL;
14486
0
        options -= XML_PARSE_DTDVALID;
14487
0
  ctxt->options |= XML_PARSE_DTDVALID;
14488
0
    } else
14489
30.6k
        ctxt->validate = 0;
14490
30.6k
    if (options & XML_PARSE_NOWARNING) {
14491
0
        ctxt->sax->warning = NULL;
14492
0
        options -= XML_PARSE_NOWARNING;
14493
0
    }
14494
30.6k
    if (options & XML_PARSE_NOERROR) {
14495
0
        ctxt->sax->error = NULL;
14496
0
        ctxt->sax->fatalError = NULL;
14497
0
        options -= XML_PARSE_NOERROR;
14498
0
    }
14499
30.6k
#ifdef LIBXML_SAX1_ENABLED
14500
30.6k
    if (options & XML_PARSE_SAX1) {
14501
0
        ctxt->sax->startElementNs = NULL;
14502
0
        ctxt->sax->endElementNs = NULL;
14503
0
        ctxt->sax->initialized = 1;
14504
0
        options -= XML_PARSE_SAX1;
14505
0
  ctxt->options |= XML_PARSE_SAX1;
14506
0
    }
14507
30.6k
#endif /* LIBXML_SAX1_ENABLED */
14508
30.6k
    if (options & XML_PARSE_NODICT) {
14509
0
        ctxt->dictNames = 0;
14510
0
        options -= XML_PARSE_NODICT;
14511
0
  ctxt->options |= XML_PARSE_NODICT;
14512
30.6k
    } else {
14513
30.6k
        ctxt->dictNames = 1;
14514
30.6k
    }
14515
30.6k
    if (options & XML_PARSE_NOCDATA) {
14516
0
        ctxt->sax->cdataBlock = NULL;
14517
0
        options -= XML_PARSE_NOCDATA;
14518
0
  ctxt->options |= XML_PARSE_NOCDATA;
14519
0
    }
14520
30.6k
    if (options & XML_PARSE_NSCLEAN) {
14521
0
  ctxt->options |= XML_PARSE_NSCLEAN;
14522
0
        options -= XML_PARSE_NSCLEAN;
14523
0
    }
14524
30.6k
    if (options & XML_PARSE_NONET) {
14525
0
  ctxt->options |= XML_PARSE_NONET;
14526
0
        options -= XML_PARSE_NONET;
14527
0
    }
14528
30.6k
    if (options & XML_PARSE_COMPACT) {
14529
0
  ctxt->options |= XML_PARSE_COMPACT;
14530
0
        options -= XML_PARSE_COMPACT;
14531
0
    }
14532
30.6k
    if (options & XML_PARSE_OLD10) {
14533
0
  ctxt->options |= XML_PARSE_OLD10;
14534
0
        options -= XML_PARSE_OLD10;
14535
0
    }
14536
30.6k
    if (options & XML_PARSE_NOBASEFIX) {
14537
0
  ctxt->options |= XML_PARSE_NOBASEFIX;
14538
0
        options -= XML_PARSE_NOBASEFIX;
14539
0
    }
14540
30.6k
    if (options & XML_PARSE_HUGE) {
14541
0
  ctxt->options |= XML_PARSE_HUGE;
14542
0
        options -= XML_PARSE_HUGE;
14543
0
        if (ctxt->dict != NULL)
14544
0
            xmlDictSetLimit(ctxt->dict, 0);
14545
0
    }
14546
30.6k
    if (options & XML_PARSE_OLDSAX) {
14547
0
  ctxt->options |= XML_PARSE_OLDSAX;
14548
0
        options -= XML_PARSE_OLDSAX;
14549
0
    }
14550
30.6k
    if (options & XML_PARSE_IGNORE_ENC) {
14551
0
  ctxt->options |= XML_PARSE_IGNORE_ENC;
14552
0
        options -= XML_PARSE_IGNORE_ENC;
14553
0
    }
14554
30.6k
    if (options & XML_PARSE_BIG_LINES) {
14555
0
  ctxt->options |= XML_PARSE_BIG_LINES;
14556
0
        options -= XML_PARSE_BIG_LINES;
14557
0
    }
14558
30.6k
    ctxt->linenumbers = 1;
14559
30.6k
    return (options);
14560
30.6k
}
14561
14562
/**
14563
 * xmlCtxtUseOptions:
14564
 * @ctxt: an XML parser context
14565
 * @options:  a combination of xmlParserOption
14566
 *
14567
 * Applies the options to the parser context
14568
 *
14569
 * Returns 0 in case of success, the set of unknown or unimplemented options
14570
 *         in case of error.
14571
 */
14572
int
14573
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
14574
0
{
14575
0
   return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
14576
0
}
14577
14578
/**
14579
 * xmlDoRead:
14580
 * @ctxt:  an XML parser context
14581
 * @URL:  the base URL to use for the document
14582
 * @encoding:  the document encoding, or NULL
14583
 * @options:  a combination of xmlParserOption
14584
 * @reuse:  keep the context for reuse
14585
 *
14586
 * Common front-end for the xmlRead functions
14587
 *
14588
 * Returns the resulting document tree or NULL
14589
 */
14590
static xmlDocPtr
14591
xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
14592
          int options, int reuse)
14593
30.6k
{
14594
30.6k
    xmlDocPtr ret;
14595
14596
30.6k
    xmlCtxtUseOptionsInternal(ctxt, options, encoding);
14597
30.6k
    if (encoding != NULL) {
14598
0
        xmlCharEncodingHandlerPtr hdlr;
14599
14600
        /*
14601
         * TODO: We should consider to set XML_PARSE_IGNORE_ENC if the
14602
         * caller provided an encoding. Otherwise, we might switch to
14603
         * the encoding from the XML declaration which is likely to
14604
         * break things. Also see xmlSwitchInputEncoding.
14605
         */
14606
0
  hdlr = xmlFindCharEncodingHandler(encoding);
14607
0
  if (hdlr != NULL)
14608
0
      xmlSwitchToEncoding(ctxt, hdlr);
14609
0
    }
14610
30.6k
    if ((URL != NULL) && (ctxt->input != NULL) &&
14611
30.6k
        (ctxt->input->filename == NULL))
14612
0
        ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
14613
30.6k
    xmlParseDocument(ctxt);
14614
30.6k
    if ((ctxt->wellFormed) || ctxt->recovery)
14615
14.2k
        ret = ctxt->myDoc;
14616
16.3k
    else {
14617
16.3k
        ret = NULL;
14618
16.3k
  if (ctxt->myDoc != NULL) {
14619
14.7k
      xmlFreeDoc(ctxt->myDoc);
14620
14.7k
  }
14621
16.3k
    }
14622
30.6k
    ctxt->myDoc = NULL;
14623
30.6k
    if (!reuse) {
14624
0
  xmlFreeParserCtxt(ctxt);
14625
0
    }
14626
14627
30.6k
    return (ret);
14628
30.6k
}
14629
14630
/**
14631
 * xmlReadDoc:
14632
 * @cur:  a pointer to a zero terminated string
14633
 * @URL:  the base URL to use for the document
14634
 * @encoding:  the document encoding, or NULL
14635
 * @options:  a combination of xmlParserOption
14636
 *
14637
 * parse an XML in-memory document and build a tree.
14638
 *
14639
 * Returns the resulting document tree
14640
 */
14641
xmlDocPtr
14642
xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
14643
0
{
14644
0
    xmlParserCtxtPtr ctxt;
14645
14646
0
    if (cur == NULL)
14647
0
        return (NULL);
14648
0
    xmlInitParser();
14649
14650
0
    ctxt = xmlCreateDocParserCtxt(cur);
14651
0
    if (ctxt == NULL)
14652
0
        return (NULL);
14653
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
14654
0
}
14655
14656
/**
14657
 * xmlReadFile:
14658
 * @filename:  a file or URL
14659
 * @encoding:  the document encoding, or NULL
14660
 * @options:  a combination of xmlParserOption
14661
 *
14662
 * parse an XML file from the filesystem or the network.
14663
 *
14664
 * Returns the resulting document tree
14665
 */
14666
xmlDocPtr
14667
xmlReadFile(const char *filename, const char *encoding, int options)
14668
0
{
14669
0
    xmlParserCtxtPtr ctxt;
14670
14671
0
    xmlInitParser();
14672
0
    ctxt = xmlCreateURLParserCtxt(filename, options);
14673
0
    if (ctxt == NULL)
14674
0
        return (NULL);
14675
0
    return (xmlDoRead(ctxt, NULL, encoding, options, 0));
14676
0
}
14677
14678
/**
14679
 * xmlReadMemory:
14680
 * @buffer:  a pointer to a char array
14681
 * @size:  the size of the array
14682
 * @URL:  the base URL to use for the document
14683
 * @encoding:  the document encoding, or NULL
14684
 * @options:  a combination of xmlParserOption
14685
 *
14686
 * parse an XML in-memory document and build a tree.
14687
 *
14688
 * Returns the resulting document tree
14689
 */
14690
xmlDocPtr
14691
xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
14692
0
{
14693
0
    xmlParserCtxtPtr ctxt;
14694
14695
0
    xmlInitParser();
14696
0
    ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14697
0
    if (ctxt == NULL)
14698
0
        return (NULL);
14699
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
14700
0
}
14701
14702
/**
14703
 * xmlReadFd:
14704
 * @fd:  an open file descriptor
14705
 * @URL:  the base URL to use for the document
14706
 * @encoding:  the document encoding, or NULL
14707
 * @options:  a combination of xmlParserOption
14708
 *
14709
 * parse an XML from a file descriptor and build a tree.
14710
 * NOTE that the file descriptor will not be closed when the
14711
 *      reader is closed or reset.
14712
 *
14713
 * Returns the resulting document tree
14714
 */
14715
xmlDocPtr
14716
xmlReadFd(int fd, const char *URL, const char *encoding, int options)
14717
0
{
14718
0
    xmlParserCtxtPtr ctxt;
14719
0
    xmlParserInputBufferPtr input;
14720
0
    xmlParserInputPtr stream;
14721
14722
0
    if (fd < 0)
14723
0
        return (NULL);
14724
0
    xmlInitParser();
14725
14726
0
    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14727
0
    if (input == NULL)
14728
0
        return (NULL);
14729
0
    input->closecallback = NULL;
14730
0
    ctxt = xmlNewParserCtxt();
14731
0
    if (ctxt == NULL) {
14732
0
        xmlFreeParserInputBuffer(input);
14733
0
        return (NULL);
14734
0
    }
14735
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14736
0
    if (stream == NULL) {
14737
0
        xmlFreeParserInputBuffer(input);
14738
0
  xmlFreeParserCtxt(ctxt);
14739
0
        return (NULL);
14740
0
    }
14741
0
    inputPush(ctxt, stream);
14742
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
14743
0
}
14744
14745
/**
14746
 * xmlReadIO:
14747
 * @ioread:  an I/O read function
14748
 * @ioclose:  an I/O close function
14749
 * @ioctx:  an I/O handler
14750
 * @URL:  the base URL to use for the document
14751
 * @encoding:  the document encoding, or NULL
14752
 * @options:  a combination of xmlParserOption
14753
 *
14754
 * parse an XML document from I/O functions and source and build a tree.
14755
 *
14756
 * Returns the resulting document tree
14757
 */
14758
xmlDocPtr
14759
xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
14760
          void *ioctx, const char *URL, const char *encoding, int options)
14761
0
{
14762
0
    xmlParserCtxtPtr ctxt;
14763
0
    xmlParserInputBufferPtr input;
14764
0
    xmlParserInputPtr stream;
14765
14766
0
    if (ioread == NULL)
14767
0
        return (NULL);
14768
0
    xmlInitParser();
14769
14770
0
    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14771
0
                                         XML_CHAR_ENCODING_NONE);
14772
0
    if (input == NULL) {
14773
0
        if (ioclose != NULL)
14774
0
            ioclose(ioctx);
14775
0
        return (NULL);
14776
0
    }
14777
0
    ctxt = xmlNewParserCtxt();
14778
0
    if (ctxt == NULL) {
14779
0
        xmlFreeParserInputBuffer(input);
14780
0
        return (NULL);
14781
0
    }
14782
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14783
0
    if (stream == NULL) {
14784
0
        xmlFreeParserInputBuffer(input);
14785
0
  xmlFreeParserCtxt(ctxt);
14786
0
        return (NULL);
14787
0
    }
14788
0
    inputPush(ctxt, stream);
14789
0
    return (xmlDoRead(ctxt, URL, encoding, options, 0));
14790
0
}
14791
14792
/**
14793
 * xmlCtxtReadDoc:
14794
 * @ctxt:  an XML parser context
14795
 * @cur:  a pointer to a zero terminated string
14796
 * @URL:  the base URL to use for the document
14797
 * @encoding:  the document encoding, or NULL
14798
 * @options:  a combination of xmlParserOption
14799
 *
14800
 * parse an XML in-memory document and build a tree.
14801
 * This reuses the existing @ctxt parser context
14802
 *
14803
 * Returns the resulting document tree
14804
 */
14805
xmlDocPtr
14806
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
14807
               const char *URL, const char *encoding, int options)
14808
0
{
14809
0
    if (cur == NULL)
14810
0
        return (NULL);
14811
0
    return (xmlCtxtReadMemory(ctxt, (const char *) cur, xmlStrlen(cur), URL,
14812
0
                              encoding, options));
14813
0
}
14814
14815
/**
14816
 * xmlCtxtReadFile:
14817
 * @ctxt:  an XML parser context
14818
 * @filename:  a file or URL
14819
 * @encoding:  the document encoding, or NULL
14820
 * @options:  a combination of xmlParserOption
14821
 *
14822
 * parse an XML file from the filesystem or the network.
14823
 * This reuses the existing @ctxt parser context
14824
 *
14825
 * Returns the resulting document tree
14826
 */
14827
xmlDocPtr
14828
xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
14829
                const char *encoding, int options)
14830
30.9k
{
14831
30.9k
    xmlParserInputPtr stream;
14832
14833
30.9k
    if (filename == NULL)
14834
0
        return (NULL);
14835
30.9k
    if (ctxt == NULL)
14836
0
        return (NULL);
14837
30.9k
    xmlInitParser();
14838
14839
30.9k
    xmlCtxtReset(ctxt);
14840
14841
30.9k
    stream = xmlLoadExternalEntity(filename, NULL, ctxt);
14842
30.9k
    if (stream == NULL) {
14843
250
        return (NULL);
14844
250
    }
14845
30.6k
    inputPush(ctxt, stream);
14846
30.6k
    return (xmlDoRead(ctxt, NULL, encoding, options, 1));
14847
30.9k
}
14848
14849
/**
14850
 * xmlCtxtReadMemory:
14851
 * @ctxt:  an XML parser context
14852
 * @buffer:  a pointer to a char array
14853
 * @size:  the size of the array
14854
 * @URL:  the base URL to use for the document
14855
 * @encoding:  the document encoding, or NULL
14856
 * @options:  a combination of xmlParserOption
14857
 *
14858
 * parse an XML in-memory document and build a tree.
14859
 * This reuses the existing @ctxt parser context
14860
 *
14861
 * Returns the resulting document tree
14862
 */
14863
xmlDocPtr
14864
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
14865
                  const char *URL, const char *encoding, int options)
14866
0
{
14867
0
    xmlParserInputBufferPtr input;
14868
0
    xmlParserInputPtr stream;
14869
14870
0
    if (ctxt == NULL)
14871
0
        return (NULL);
14872
0
    if (buffer == NULL)
14873
0
        return (NULL);
14874
0
    xmlInitParser();
14875
14876
0
    xmlCtxtReset(ctxt);
14877
14878
0
    input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14879
0
    if (input == NULL) {
14880
0
  return(NULL);
14881
0
    }
14882
14883
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14884
0
    if (stream == NULL) {
14885
0
  xmlFreeParserInputBuffer(input);
14886
0
  return(NULL);
14887
0
    }
14888
14889
0
    inputPush(ctxt, stream);
14890
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
14891
0
}
14892
14893
/**
14894
 * xmlCtxtReadFd:
14895
 * @ctxt:  an XML parser context
14896
 * @fd:  an open file descriptor
14897
 * @URL:  the base URL to use for the document
14898
 * @encoding:  the document encoding, or NULL
14899
 * @options:  a combination of xmlParserOption
14900
 *
14901
 * parse an XML from a file descriptor and build a tree.
14902
 * This reuses the existing @ctxt parser context
14903
 * NOTE that the file descriptor will not be closed when the
14904
 *      reader is closed or reset.
14905
 *
14906
 * Returns the resulting document tree
14907
 */
14908
xmlDocPtr
14909
xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
14910
              const char *URL, const char *encoding, int options)
14911
0
{
14912
0
    xmlParserInputBufferPtr input;
14913
0
    xmlParserInputPtr stream;
14914
14915
0
    if (fd < 0)
14916
0
        return (NULL);
14917
0
    if (ctxt == NULL)
14918
0
        return (NULL);
14919
0
    xmlInitParser();
14920
14921
0
    xmlCtxtReset(ctxt);
14922
14923
14924
0
    input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14925
0
    if (input == NULL)
14926
0
        return (NULL);
14927
0
    input->closecallback = NULL;
14928
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14929
0
    if (stream == NULL) {
14930
0
        xmlFreeParserInputBuffer(input);
14931
0
        return (NULL);
14932
0
    }
14933
0
    inputPush(ctxt, stream);
14934
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
14935
0
}
14936
14937
/**
14938
 * xmlCtxtReadIO:
14939
 * @ctxt:  an XML parser context
14940
 * @ioread:  an I/O read function
14941
 * @ioclose:  an I/O close function
14942
 * @ioctx:  an I/O handler
14943
 * @URL:  the base URL to use for the document
14944
 * @encoding:  the document encoding, or NULL
14945
 * @options:  a combination of xmlParserOption
14946
 *
14947
 * parse an XML document from I/O functions and source and build a tree.
14948
 * This reuses the existing @ctxt parser context
14949
 *
14950
 * Returns the resulting document tree
14951
 */
14952
xmlDocPtr
14953
xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
14954
              xmlInputCloseCallback ioclose, void *ioctx,
14955
        const char *URL,
14956
              const char *encoding, int options)
14957
0
{
14958
0
    xmlParserInputBufferPtr input;
14959
0
    xmlParserInputPtr stream;
14960
14961
0
    if (ioread == NULL)
14962
0
        return (NULL);
14963
0
    if (ctxt == NULL)
14964
0
        return (NULL);
14965
0
    xmlInitParser();
14966
14967
0
    xmlCtxtReset(ctxt);
14968
14969
0
    input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14970
0
                                         XML_CHAR_ENCODING_NONE);
14971
0
    if (input == NULL) {
14972
0
        if (ioclose != NULL)
14973
0
            ioclose(ioctx);
14974
0
        return (NULL);
14975
0
    }
14976
0
    stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14977
0
    if (stream == NULL) {
14978
0
        xmlFreeParserInputBuffer(input);
14979
0
        return (NULL);
14980
0
    }
14981
0
    inputPush(ctxt, stream);
14982
0
    return (xmlDoRead(ctxt, URL, encoding, options, 1));
14983
0
}
14984