Coverage Report

Created: 2023-11-19 07:09

/src/libprotobuf-mutator/build/examples/libxml2/external.libxml2/src/external.libxml2/parser.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
 *            implemented on top of the SAX interfaces
4
 *
5
 * References:
6
 *   The XML specification:
7
 *     http://www.w3.org/TR/REC-xml
8
 *   Original 1.0 version:
9
 *     http://www.w3.org/TR/1998/REC-xml-19980210
10
 *   XML second edition working draft
11
 *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12
 *
13
 * Okay this is a big file, the parser core is around 7000 lines, then it
14
 * is followed by the progressive parser top routines, then the various
15
 * high level APIs to call the parser and a few miscellaneous functions.
16
 * A number of helper functions and deprecated ones have been moved to
17
 * parserInternals.c to reduce this file size.
18
 * As much as possible the functions are associated with their relative
19
 * production in the XML specification. A few productions defining the
20
 * different ranges of character are actually implanted either in
21
 * parserInternals.h or parserInternals.c
22
 * The DOM tree build is realized from the default SAX callbacks in
23
 * the module SAX.c.
24
 * The routines doing the validation checks are in valid.c and called either
25
 * from the SAX callbacks or as standalone functions using a preparsed
26
 * document.
27
 *
28
 * See Copyright for the status of this software.
29
 *
30
 * daniel@veillard.com
31
 */
32
33
/* To avoid EBCDIC trouble when parsing on zOS */
34
#if defined(__MVS__)
35
#pragma convert("ISO8859-1")
36
#endif
37
38
#define IN_LIBXML
39
#include "libxml.h"
40
41
#if defined(_WIN32)
42
#define XML_DIR_SEP '\\'
43
#else
44
#define XML_DIR_SEP '/'
45
#endif
46
47
#include <stdlib.h>
48
#include <limits.h>
49
#include <string.h>
50
#include <stdarg.h>
51
#include <stddef.h>
52
#include <ctype.h>
53
#include <stdlib.h>
54
#include <libxml/parser.h>
55
#include <libxml/xmlmemory.h>
56
#include <libxml/tree.h>
57
#include <libxml/parserInternals.h>
58
#include <libxml/valid.h>
59
#include <libxml/entities.h>
60
#include <libxml/xmlerror.h>
61
#include <libxml/encoding.h>
62
#include <libxml/xmlIO.h>
63
#include <libxml/uri.h>
64
#include <libxml/SAX2.h>
65
#ifdef LIBXML_CATALOG_ENABLED
66
#include <libxml/catalog.h>
67
#endif
68
69
#include "private/buf.h"
70
#include "private/dict.h"
71
#include "private/entities.h"
72
#include "private/error.h"
73
#include "private/html.h"
74
#include "private/io.h"
75
#include "private/parser.h"
76
77
59.6k
#define NS_INDEX_EMPTY  INT_MAX
78
29.4k
#define NS_INDEX_XML    (INT_MAX - 1)
79
15.1k
#define URI_HASH_EMPTY  0xD943A04E
80
2.17k
#define URI_HASH_XML    0xF0451F02
81
82
struct _xmlStartTag {
83
    const xmlChar *prefix;
84
    const xmlChar *URI;
85
    int line;
86
    int nsNr;
87
};
88
89
typedef struct {
90
    void *saxData;
91
    unsigned prefixHashValue;
92
    unsigned uriHashValue;
93
    unsigned elementId;
94
    int oldIndex;
95
} xmlParserNsExtra;
96
97
typedef struct {
98
    unsigned hashValue;
99
    int index;
100
} xmlParserNsBucket;
101
102
struct _xmlParserNsData {
103
    xmlParserNsExtra *extra;
104
105
    unsigned hashSize;
106
    unsigned hashElems;
107
    xmlParserNsBucket *hash;
108
109
    unsigned elementId;
110
    int defaultNsIndex;
111
};
112
113
struct _xmlAttrHashBucket {
114
    int index;
115
};
116
117
static xmlParserCtxtPtr
118
xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
119
        const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
120
        xmlParserCtxtPtr pctx);
121
122
static int
123
xmlParseElementStart(xmlParserCtxtPtr ctxt);
124
125
static void
126
xmlParseElementEnd(xmlParserCtxtPtr ctxt);
127
128
/************************************************************************
129
 *                  *
130
 *  Arbitrary limits set in the parser. See XML_PARSE_HUGE    *
131
 *                  *
132
 ************************************************************************/
133
134
#define XML_PARSER_BIG_ENTITY 1000
135
#define XML_PARSER_LOT_ENTITY 5000
136
137
/*
138
 * Constants for protection against abusive entity expansion
139
 * ("billion laughs").
140
 */
141
142
/*
143
 * A certain amount of entity expansion which is always allowed.
144
 */
145
212k
#define XML_PARSER_ALLOWED_EXPANSION 1000000
146
147
/*
148
 * Fixed cost for each entity reference. This crudely models processing time
149
 * as well to protect, for example, against exponential expansion of empty
150
 * or very short entities.
151
 */
152
212k
#define XML_ENT_FIXED_COST 20
153
154
/**
155
 * xmlParserMaxDepth:
156
 *
157
 * arbitrary depth limit for the XML documents that we allow to
158
 * process. This is not a limitation of the parser but a safety
159
 * boundary feature. It can be disabled with the XML_PARSE_HUGE
160
 * parser option.
161
 */
162
unsigned int xmlParserMaxDepth = 256;
163
164
165
166
509k
#define XML_PARSER_BIG_BUFFER_SIZE 300
167
147M
#define XML_PARSER_BUFFER_SIZE 100
168
20.1k
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
169
170
/**
171
 * XML_PARSER_CHUNK_SIZE
172
 *
173
 * When calling GROW that's the minimal amount of data
174
 * the parser expected to have received. It is not a hard
175
 * limit but an optimization when reading strings like Names
176
 * It is not strictly needed as long as inputs available characters
177
 * are followed by 0, which should be provided by the I/O level
178
 */
179
#define XML_PARSER_CHUNK_SIZE 100
180
181
/**
182
 * xmlParserVersion:
183
 *
184
 * Constant string describing the internal version of the library
185
 */
186
const char *const
187
xmlParserVersion = LIBXML_VERSION_STRING LIBXML_VERSION_EXTRA;
188
189
/*
190
 * List of XML prefixed PI allowed by W3C specs
191
 */
192
193
static const char* const xmlW3CPIs[] = {
194
    "xml-stylesheet",
195
    "xml-model",
196
    NULL
197
};
198
199
200
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
201
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
202
                                              const xmlChar **str);
203
204
static xmlParserErrors
205
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
206
                xmlSAXHandlerPtr sax,
207
          void *user_data, int depth, const xmlChar *URL,
208
          const xmlChar *ID, xmlNodePtr *list);
209
210
static int
211
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
212
                          const char *encoding);
213
#ifdef LIBXML_LEGACY_ENABLED
214
static void
215
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
216
                      xmlNodePtr lastNode);
217
#endif /* LIBXML_LEGACY_ENABLED */
218
219
static xmlParserErrors
220
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
221
          const xmlChar *string, void *user_data, xmlNodePtr *lst);
222
223
static int
224
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
225
226
/************************************************************************
227
 *                  *
228
 *    Some factorized error routines        *
229
 *                  *
230
 ************************************************************************/
231
232
/**
233
 * xmlErrAttributeDup:
234
 * @ctxt:  an XML parser context
235
 * @prefix:  the attribute prefix
236
 * @localname:  the attribute localname
237
 *
238
 * Handle a redefinition of attribute error
239
 */
240
static void
241
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
242
                   const xmlChar * localname)
243
6.38k
{
244
6.38k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
245
6.38k
        (ctxt->instate == XML_PARSER_EOF))
246
89
  return;
247
6.29k
    if (ctxt != NULL)
248
6.29k
  ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
249
250
6.29k
    if (prefix == NULL)
251
5.74k
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
252
5.74k
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
253
5.74k
                        (const char *) localname, NULL, NULL, 0, 0,
254
5.74k
                        "Attribute %s redefined\n", localname);
255
544
    else
256
544
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
257
544
                        XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
258
544
                        (const char *) prefix, (const char *) localname,
259
544
                        NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
260
544
                        localname);
261
6.29k
    if (ctxt != NULL) {
262
6.29k
  ctxt->wellFormed = 0;
263
6.29k
  if (ctxt->recovery == 0)
264
3.14k
      ctxt->disableSAX = 1;
265
6.29k
    }
266
6.29k
}
267
268
/**
269
 * xmlFatalErrMsg:
270
 * @ctxt:  an XML parser context
271
 * @error:  the error number
272
 * @msg:  the error message
273
 *
274
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
275
 */
276
static void LIBXML_ATTR_FORMAT(3,0)
277
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
278
               const char *msg)
279
333k
{
280
333k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
281
333k
        (ctxt->instate == XML_PARSER_EOF))
282
263
  return;
283
332k
    if (ctxt != NULL)
284
332k
  ctxt->errNo = error;
285
332k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
286
332k
                    XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
287
332k
    if (ctxt != NULL) {
288
332k
  ctxt->wellFormed = 0;
289
332k
  if (ctxt->recovery == 0)
290
61.7k
      ctxt->disableSAX = 1;
291
332k
    }
292
332k
}
293
294
/**
295
 * xmlWarningMsg:
296
 * @ctxt:  an XML parser context
297
 * @error:  the error number
298
 * @msg:  the error message
299
 * @str1:  extra data
300
 * @str2:  extra data
301
 *
302
 * Handle a warning.
303
 */
304
void LIBXML_ATTR_FORMAT(3,0)
305
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
306
              const char *msg, const xmlChar *str1, const xmlChar *str2)
307
5.77k
{
308
5.77k
    xmlStructuredErrorFunc schannel = NULL;
309
310
5.77k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
311
5.77k
        (ctxt->instate == XML_PARSER_EOF))
312
0
  return;
313
5.77k
    if ((ctxt != NULL) && (ctxt->sax != NULL) &&
314
5.77k
        (ctxt->sax->initialized == XML_SAX2_MAGIC))
315
3.71k
        schannel = ctxt->sax->serror;
316
5.77k
    if (ctxt != NULL) {
317
5.77k
        __xmlRaiseError(schannel,
318
5.77k
                    (ctxt->sax) ? ctxt->sax->warning : NULL,
319
5.77k
                    ctxt->userData,
320
5.77k
                    ctxt, NULL, XML_FROM_PARSER, error,
321
5.77k
                    XML_ERR_WARNING, NULL, 0,
322
5.77k
        (const char *) str1, (const char *) str2, NULL, 0, 0,
323
5.77k
        msg, (const char *) str1, (const char *) str2);
324
5.77k
    } else {
325
0
        __xmlRaiseError(schannel, NULL, NULL,
326
0
                    ctxt, NULL, XML_FROM_PARSER, error,
327
0
                    XML_ERR_WARNING, NULL, 0,
328
0
        (const char *) str1, (const char *) str2, NULL, 0, 0,
329
0
        msg, (const char *) str1, (const char *) str2);
330
0
    }
331
5.77k
}
332
333
/**
334
 * xmlValidityError:
335
 * @ctxt:  an XML parser context
336
 * @error:  the error number
337
 * @msg:  the error message
338
 * @str1:  extra data
339
 *
340
 * Handle a validity error.
341
 */
342
static void LIBXML_ATTR_FORMAT(3,0)
343
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
344
              const char *msg, const xmlChar *str1, const xmlChar *str2)
345
1.88k
{
346
1.88k
    xmlStructuredErrorFunc schannel = NULL;
347
348
1.88k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
349
1.88k
        (ctxt->instate == XML_PARSER_EOF))
350
0
  return;
351
1.88k
    if (ctxt != NULL) {
352
1.88k
  ctxt->errNo = error;
353
1.88k
  if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
354
1.69k
      schannel = ctxt->sax->serror;
355
1.88k
    }
356
1.88k
    if (ctxt != NULL) {
357
1.88k
        __xmlRaiseError(schannel,
358
1.88k
                    ctxt->vctxt.error, ctxt->vctxt.userData,
359
1.88k
                    ctxt, NULL, XML_FROM_DTD, error,
360
1.88k
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
361
1.88k
        (const char *) str2, NULL, 0, 0,
362
1.88k
        msg, (const char *) str1, (const char *) str2);
363
1.88k
  ctxt->valid = 0;
364
1.88k
    } else {
365
0
        __xmlRaiseError(schannel, NULL, NULL,
366
0
                    ctxt, NULL, XML_FROM_DTD, error,
367
0
                    XML_ERR_ERROR, NULL, 0, (const char *) str1,
368
0
        (const char *) str2, NULL, 0, 0,
369
0
        msg, (const char *) str1, (const char *) str2);
370
0
    }
371
1.88k
}
372
373
/**
374
 * xmlFatalErrMsgInt:
375
 * @ctxt:  an XML parser context
376
 * @error:  the error number
377
 * @msg:  the error message
378
 * @val:  an integer value
379
 *
380
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
381
 */
382
static void LIBXML_ATTR_FORMAT(3,0)
383
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
384
                  const char *msg, int val)
385
14.5k
{
386
14.5k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
387
14.5k
        (ctxt->instate == XML_PARSER_EOF))
388
0
  return;
389
14.5k
    if (ctxt != NULL)
390
14.5k
  ctxt->errNo = error;
391
14.5k
    __xmlRaiseError(NULL, NULL, NULL,
392
14.5k
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
393
14.5k
                    NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
394
14.5k
    if (ctxt != NULL) {
395
14.5k
  ctxt->wellFormed = 0;
396
14.5k
  if (ctxt->recovery == 0)
397
7.30k
      ctxt->disableSAX = 1;
398
14.5k
    }
399
14.5k
}
400
401
/**
402
 * xmlFatalErrMsgStrIntStr:
403
 * @ctxt:  an XML parser context
404
 * @error:  the error number
405
 * @msg:  the error message
406
 * @str1:  an string info
407
 * @val:  an integer value
408
 * @str2:  an string info
409
 *
410
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
411
 */
412
static void LIBXML_ATTR_FORMAT(3,0)
413
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
414
                  const char *msg, const xmlChar *str1, int val,
415
      const xmlChar *str2)
416
117k
{
417
117k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
418
117k
        (ctxt->instate == XML_PARSER_EOF))
419
0
  return;
420
117k
    if (ctxt != NULL)
421
117k
  ctxt->errNo = error;
422
117k
    __xmlRaiseError(NULL, NULL, NULL,
423
117k
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
424
117k
                    NULL, 0, (const char *) str1, (const char *) str2,
425
117k
        NULL, val, 0, msg, str1, val, str2);
426
117k
    if (ctxt != NULL) {
427
117k
  ctxt->wellFormed = 0;
428
117k
  if (ctxt->recovery == 0)
429
33.6k
      ctxt->disableSAX = 1;
430
117k
    }
431
117k
}
432
433
/**
434
 * xmlFatalErrMsgStr:
435
 * @ctxt:  an XML parser context
436
 * @error:  the error number
437
 * @msg:  the error message
438
 * @val:  a string value
439
 *
440
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
441
 */
442
static void LIBXML_ATTR_FORMAT(3,0)
443
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
444
                  const char *msg, const xmlChar * val)
445
181k
{
446
181k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
447
181k
        (ctxt->instate == XML_PARSER_EOF))
448
1
  return;
449
181k
    if (ctxt != NULL)
450
181k
  ctxt->errNo = error;
451
181k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
452
181k
                    XML_FROM_PARSER, error, XML_ERR_FATAL,
453
181k
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
454
181k
                    val);
455
181k
    if (ctxt != NULL) {
456
181k
  ctxt->wellFormed = 0;
457
181k
  if (ctxt->recovery == 0)
458
10.1k
      ctxt->disableSAX = 1;
459
181k
    }
460
181k
}
461
462
/**
463
 * xmlErrMsgStr:
464
 * @ctxt:  an XML parser context
465
 * @error:  the error number
466
 * @msg:  the error message
467
 * @val:  a string value
468
 *
469
 * Handle a non fatal parser error
470
 */
471
static void LIBXML_ATTR_FORMAT(3,0)
472
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
473
                  const char *msg, const xmlChar * val)
474
100k
{
475
100k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
476
100k
        (ctxt->instate == XML_PARSER_EOF))
477
0
  return;
478
100k
    if (ctxt != NULL)
479
100k
  ctxt->errNo = error;
480
100k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
481
100k
                    XML_FROM_PARSER, error, XML_ERR_ERROR,
482
100k
                    NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
483
100k
                    val);
484
100k
}
485
486
/**
487
 * xmlNsErr:
488
 * @ctxt:  an XML parser context
489
 * @error:  the error number
490
 * @msg:  the message
491
 * @info1:  extra information string
492
 * @info2:  extra information string
493
 *
494
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
495
 */
496
static void LIBXML_ATTR_FORMAT(3,0)
497
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
498
         const char *msg,
499
         const xmlChar * info1, const xmlChar * info2,
500
         const xmlChar * info3)
501
54.8k
{
502
54.8k
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
503
54.8k
        (ctxt->instate == XML_PARSER_EOF))
504
325
  return;
505
54.5k
    if (ctxt != NULL)
506
54.5k
  ctxt->errNo = error;
507
54.5k
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
508
54.5k
                    XML_ERR_ERROR, NULL, 0, (const char *) info1,
509
54.5k
                    (const char *) info2, (const char *) info3, 0, 0, msg,
510
54.5k
                    info1, info2, info3);
511
54.5k
    if (ctxt != NULL)
512
54.5k
  ctxt->nsWellFormed = 0;
513
54.5k
}
514
515
/**
516
 * xmlNsWarn
517
 * @ctxt:  an XML parser context
518
 * @error:  the error number
519
 * @msg:  the message
520
 * @info1:  extra information string
521
 * @info2:  extra information string
522
 *
523
 * Handle a namespace warning error
524
 */
525
static void LIBXML_ATTR_FORMAT(3,0)
526
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
527
         const char *msg,
528
         const xmlChar * info1, const xmlChar * info2,
529
         const xmlChar * info3)
530
870
{
531
870
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
532
870
        (ctxt->instate == XML_PARSER_EOF))
533
0
  return;
534
870
    __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
535
870
                    XML_ERR_WARNING, NULL, 0, (const char *) info1,
536
870
                    (const char *) info2, (const char *) info3, 0, 0, msg,
537
870
                    info1, info2, info3);
538
870
}
539
540
static void
541
760k
xmlSaturatedAdd(unsigned long *dst, unsigned long val) {
542
760k
    if (val > ULONG_MAX - *dst)
543
0
        *dst = ULONG_MAX;
544
760k
    else
545
760k
        *dst += val;
546
760k
}
547
548
static void
549
166k
xmlSaturatedAddSizeT(unsigned long *dst, unsigned long val) {
550
166k
    if (val > ULONG_MAX - *dst)
551
0
        *dst = ULONG_MAX;
552
166k
    else
553
166k
        *dst += val;
554
166k
}
555
556
/**
557
 * xmlParserEntityCheck:
558
 * @ctxt:  parser context
559
 * @extra:  sum of unexpanded entity sizes
560
 *
561
 * Check for non-linear entity expansion behaviour.
562
 *
563
 * In some cases like xmlStringDecodeEntities, this function is called
564
 * for each, possibly nested entity and its unexpanded content length.
565
 *
566
 * In other cases like xmlParseReference, it's only called for each
567
 * top-level entity with its unexpanded content length plus the sum of
568
 * the unexpanded content lengths (plus fixed cost) of all nested
569
 * entities.
570
 *
571
 * Summing the unexpanded lengths also adds the length of the reference.
572
 * This is by design. Taking the length of the entity name into account
573
 * discourages attacks that try to waste CPU time with abusively long
574
 * entity names. See test/recurse/lol6.xml for example. Each call also
575
 * adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with
576
 * short entities.
577
 *
578
 * Returns 1 on error, 0 on success.
579
 */
580
static int
581
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long extra)
582
212k
{
583
212k
    unsigned long consumed;
584
212k
    xmlParserInputPtr input = ctxt->input;
585
212k
    xmlEntityPtr entity = input->entity;
586
587
    /*
588
     * Compute total consumed bytes so far, including input streams of
589
     * external entities.
590
     */
591
212k
    consumed = input->parentConsumed;
592
212k
    if ((entity == NULL) ||
593
212k
        ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
594
142k
         ((entity->flags & XML_ENT_PARSED) == 0))) {
595
69.3k
        xmlSaturatedAdd(&consumed, input->consumed);
596
69.3k
        xmlSaturatedAddSizeT(&consumed, input->cur - input->base);
597
69.3k
    }
598
212k
    xmlSaturatedAdd(&consumed, ctxt->sizeentities);
599
600
    /*
601
     * Add extra cost and some fixed cost.
602
     */
603
212k
    xmlSaturatedAdd(&ctxt->sizeentcopy, extra);
604
212k
    xmlSaturatedAdd(&ctxt->sizeentcopy, XML_ENT_FIXED_COST);
605
606
    /*
607
     * It's important to always use saturation arithmetic when tracking
608
     * entity sizes to make the size checks reliable. If "sizeentcopy"
609
     * overflows, we have to abort.
610
     */
611
212k
    if ((ctxt->sizeentcopy > XML_PARSER_ALLOWED_EXPANSION) &&
612
212k
        ((ctxt->sizeentcopy >= ULONG_MAX) ||
613
32
         (ctxt->sizeentcopy / ctxt->maxAmpl > consumed))) {
614
32
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP,
615
32
                       "Maximum entity amplification factor exceeded, see "
616
32
                       "xmlCtxtSetMaxAmplification.\n");
617
32
        xmlHaltParser(ctxt);
618
32
        return(1);
619
32
    }
620
621
212k
    return(0);
622
212k
}
623
624
/************************************************************************
625
 *                  *
626
 *    Library wide options          *
627
 *                  *
628
 ************************************************************************/
629
630
/**
631
  * xmlHasFeature:
632
  * @feature: the feature to be examined
633
  *
634
  * Examines if the library has been compiled with a given feature.
635
  *
636
  * Returns a non-zero value if the feature exist, otherwise zero.
637
  * Returns zero (0) if the feature does not exist or an unknown
638
  * unknown feature is requested, non-zero otherwise.
639
  */
640
int
641
xmlHasFeature(xmlFeature feature)
642
0
{
643
0
    switch (feature) {
644
0
  case XML_WITH_THREAD:
645
0
#ifdef LIBXML_THREAD_ENABLED
646
0
      return(1);
647
#else
648
      return(0);
649
#endif
650
0
        case XML_WITH_TREE:
651
0
#ifdef LIBXML_TREE_ENABLED
652
0
            return(1);
653
#else
654
            return(0);
655
#endif
656
0
        case XML_WITH_OUTPUT:
657
0
#ifdef LIBXML_OUTPUT_ENABLED
658
0
            return(1);
659
#else
660
            return(0);
661
#endif
662
0
        case XML_WITH_PUSH:
663
0
#ifdef LIBXML_PUSH_ENABLED
664
0
            return(1);
665
#else
666
            return(0);
667
#endif
668
0
        case XML_WITH_READER:
669
0
#ifdef LIBXML_READER_ENABLED
670
0
            return(1);
671
#else
672
            return(0);
673
#endif
674
0
        case XML_WITH_PATTERN:
675
0
#ifdef LIBXML_PATTERN_ENABLED
676
0
            return(1);
677
#else
678
            return(0);
679
#endif
680
0
        case XML_WITH_WRITER:
681
0
#ifdef LIBXML_WRITER_ENABLED
682
0
            return(1);
683
#else
684
            return(0);
685
#endif
686
0
        case XML_WITH_SAX1:
687
0
#ifdef LIBXML_SAX1_ENABLED
688
0
            return(1);
689
#else
690
            return(0);
691
#endif
692
0
        case XML_WITH_FTP:
693
#ifdef LIBXML_FTP_ENABLED
694
            return(1);
695
#else
696
0
            return(0);
697
0
#endif
698
0
        case XML_WITH_HTTP:
699
0
#ifdef LIBXML_HTTP_ENABLED
700
0
            return(1);
701
#else
702
            return(0);
703
#endif
704
0
        case XML_WITH_VALID:
705
0
#ifdef LIBXML_VALID_ENABLED
706
0
            return(1);
707
#else
708
            return(0);
709
#endif
710
0
        case XML_WITH_HTML:
711
0
#ifdef LIBXML_HTML_ENABLED
712
0
            return(1);
713
#else
714
            return(0);
715
#endif
716
0
        case XML_WITH_LEGACY:
717
#ifdef LIBXML_LEGACY_ENABLED
718
            return(1);
719
#else
720
0
            return(0);
721
0
#endif
722
0
        case XML_WITH_C14N:
723
0
#ifdef LIBXML_C14N_ENABLED
724
0
            return(1);
725
#else
726
            return(0);
727
#endif
728
0
        case XML_WITH_CATALOG:
729
0
#ifdef LIBXML_CATALOG_ENABLED
730
0
            return(1);
731
#else
732
            return(0);
733
#endif
734
0
        case XML_WITH_XPATH:
735
0
#ifdef LIBXML_XPATH_ENABLED
736
0
            return(1);
737
#else
738
            return(0);
739
#endif
740
0
        case XML_WITH_XPTR:
741
0
#ifdef LIBXML_XPTR_ENABLED
742
0
            return(1);
743
#else
744
            return(0);
745
#endif
746
0
        case XML_WITH_XINCLUDE:
747
0
#ifdef LIBXML_XINCLUDE_ENABLED
748
0
            return(1);
749
#else
750
            return(0);
751
#endif
752
0
        case XML_WITH_ICONV:
753
0
#ifdef LIBXML_ICONV_ENABLED
754
0
            return(1);
755
#else
756
            return(0);
757
#endif
758
0
        case XML_WITH_ISO8859X:
759
0
#ifdef LIBXML_ISO8859X_ENABLED
760
0
            return(1);
761
#else
762
            return(0);
763
#endif
764
0
        case XML_WITH_UNICODE:
765
0
#ifdef LIBXML_UNICODE_ENABLED
766
0
            return(1);
767
#else
768
            return(0);
769
#endif
770
0
        case XML_WITH_REGEXP:
771
0
#ifdef LIBXML_REGEXP_ENABLED
772
0
            return(1);
773
#else
774
            return(0);
775
#endif
776
0
        case XML_WITH_AUTOMATA:
777
0
#ifdef LIBXML_AUTOMATA_ENABLED
778
0
            return(1);
779
#else
780
            return(0);
781
#endif
782
0
        case XML_WITH_EXPR:
783
#ifdef LIBXML_EXPR_ENABLED
784
            return(1);
785
#else
786
0
            return(0);
787
0
#endif
788
0
        case XML_WITH_SCHEMAS:
789
0
#ifdef LIBXML_SCHEMAS_ENABLED
790
0
            return(1);
791
#else
792
            return(0);
793
#endif
794
0
        case XML_WITH_SCHEMATRON:
795
0
#ifdef LIBXML_SCHEMATRON_ENABLED
796
0
            return(1);
797
#else
798
            return(0);
799
#endif
800
0
        case XML_WITH_MODULES:
801
0
#ifdef LIBXML_MODULES_ENABLED
802
0
            return(1);
803
#else
804
            return(0);
805
#endif
806
0
        case XML_WITH_DEBUG:
807
0
#ifdef LIBXML_DEBUG_ENABLED
808
0
            return(1);
809
#else
810
            return(0);
811
#endif
812
0
        case XML_WITH_DEBUG_MEM:
813
#ifdef DEBUG_MEMORY_LOCATION
814
            return(1);
815
#else
816
0
            return(0);
817
0
#endif
818
0
        case XML_WITH_DEBUG_RUN:
819
0
            return(0);
820
0
        case XML_WITH_ZLIB:
821
0
#ifdef LIBXML_ZLIB_ENABLED
822
0
            return(1);
823
#else
824
            return(0);
825
#endif
826
0
        case XML_WITH_LZMA:
827
0
#ifdef LIBXML_LZMA_ENABLED
828
0
            return(1);
829
#else
830
            return(0);
831
#endif
832
0
        case XML_WITH_ICU:
833
#ifdef LIBXML_ICU_ENABLED
834
            return(1);
835
#else
836
0
            return(0);
837
0
#endif
838
0
        default:
839
0
      break;
840
0
     }
841
0
     return(0);
842
0
}
843
844
/************************************************************************
845
 *                  *
846
 *    SAX2 defaulted attributes handling      *
847
 *                  *
848
 ************************************************************************/
849
850
/**
851
 * xmlDetectSAX2:
852
 * @ctxt:  an XML parser context
853
 *
854
 * Do the SAX2 detection and specific initialization
855
 */
856
static void
857
28.4k
xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
858
28.4k
    xmlSAXHandlerPtr sax;
859
860
    /* Avoid unused variable warning if features are disabled. */
861
28.4k
    (void) sax;
862
863
28.4k
    if (ctxt == NULL) return;
864
28.4k
    sax = ctxt->sax;
865
28.4k
#ifdef LIBXML_SAX1_ENABLED
866
28.4k
    if ((sax) && (sax->initialized == XML_SAX2_MAGIC))
867
23.9k
        ctxt->sax2 = 1;
868
#else
869
    ctxt->sax2 = 1;
870
#endif /* LIBXML_SAX1_ENABLED */
871
872
28.4k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
873
28.4k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
874
28.4k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
875
28.4k
    if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
876
28.4k
    (ctxt->str_xml_ns == NULL)) {
877
0
        xmlErrMemory(ctxt, NULL);
878
0
    }
879
28.4k
}
880
881
typedef struct {
882
    xmlHashedString prefix;
883
    xmlHashedString name;
884
    xmlHashedString value;
885
    const xmlChar *valueEnd;
886
    int external;
887
    int expandedSize;
888
} xmlDefAttr;
889
890
typedef struct _xmlDefAttrs xmlDefAttrs;
891
typedef xmlDefAttrs *xmlDefAttrsPtr;
892
struct _xmlDefAttrs {
893
    int nbAttrs;  /* number of defaulted attributes on that element */
894
    int maxAttrs;       /* the size of the array */
895
#if __STDC_VERSION__ >= 199901L
896
    /* Using a C99 flexible array member avoids UBSan errors. */
897
    xmlDefAttr attrs[]; /* array of localname/prefix/values/external */
898
#else
899
    xmlDefAttr attrs[1];
900
#endif
901
};
902
903
/**
904
 * xmlAttrNormalizeSpace:
905
 * @src: the source string
906
 * @dst: the target string
907
 *
908
 * Normalize the space in non CDATA attribute values:
909
 * If the attribute type is not CDATA, then the XML processor MUST further
910
 * process the normalized attribute value by discarding any leading and
911
 * trailing space (#x20) characters, and by replacing sequences of space
912
 * (#x20) characters by a single space (#x20) character.
913
 * Note that the size of dst need to be at least src, and if one doesn't need
914
 * to preserve dst (and it doesn't come from a dictionary or read-only) then
915
 * passing src as dst is just fine.
916
 *
917
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
918
 *         is needed.
919
 */
920
static xmlChar *
921
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
922
28.9k
{
923
28.9k
    if ((src == NULL) || (dst == NULL))
924
0
        return(NULL);
925
926
40.0k
    while (*src == 0x20) src++;
927
330k
    while (*src != 0) {
928
301k
  if (*src == 0x20) {
929
45.7k
      while (*src == 0x20) src++;
930
16.2k
      if (*src != 0)
931
14.9k
    *dst++ = 0x20;
932
284k
  } else {
933
284k
      *dst++ = *src++;
934
284k
  }
935
301k
    }
936
28.9k
    *dst = 0;
937
28.9k
    if (dst == src)
938
17.4k
       return(NULL);
939
11.5k
    return(dst);
940
28.9k
}
941
942
/**
943
 * xmlAttrNormalizeSpace2:
944
 * @src: the source string
945
 *
946
 * Normalize the space in non CDATA attribute values, a slightly more complex
947
 * front end to avoid allocation problems when running on attribute values
948
 * coming from the input.
949
 *
950
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
951
 *         is needed.
952
 */
953
static const xmlChar *
954
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
955
3.24k
{
956
3.24k
    int i;
957
3.24k
    int remove_head = 0;
958
3.24k
    int need_realloc = 0;
959
3.24k
    const xmlChar *cur;
960
961
3.24k
    if ((ctxt == NULL) || (src == NULL) || (len == NULL))
962
0
        return(NULL);
963
3.24k
    i = *len;
964
3.24k
    if (i <= 0)
965
1.40k
        return(NULL);
966
967
1.84k
    cur = src;
968
2.12k
    while (*cur == 0x20) {
969
283
        cur++;
970
283
  remove_head++;
971
283
    }
972
17.9k
    while (*cur != 0) {
973
16.4k
  if (*cur == 0x20) {
974
1.06k
      cur++;
975
1.06k
      if ((*cur == 0x20) || (*cur == 0)) {
976
387
          need_realloc = 1;
977
387
    break;
978
387
      }
979
1.06k
  } else
980
15.3k
      cur++;
981
16.4k
    }
982
1.84k
    if (need_realloc) {
983
387
        xmlChar *ret;
984
985
387
  ret = xmlStrndup(src + remove_head, i - remove_head + 1);
986
387
  if (ret == NULL) {
987
0
      xmlErrMemory(ctxt, NULL);
988
0
      return(NULL);
989
0
  }
990
387
  xmlAttrNormalizeSpace(ret, ret);
991
387
  *len = strlen((const char *)ret);
992
387
        return(ret);
993
1.45k
    } else if (remove_head) {
994
85
        *len -= remove_head;
995
85
        memmove(src, src + remove_head, 1 + *len);
996
85
  return(src);
997
85
    }
998
1.37k
    return(NULL);
999
1.84k
}
1000
1001
/**
1002
 * xmlAddDefAttrs:
1003
 * @ctxt:  an XML parser context
1004
 * @fullname:  the element fullname
1005
 * @fullattr:  the attribute fullname
1006
 * @value:  the attribute value
1007
 *
1008
 * Add a defaulted attribute for an element
1009
 */
1010
static void
1011
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1012
               const xmlChar *fullname,
1013
               const xmlChar *fullattr,
1014
21.2k
               const xmlChar *value) {
1015
21.2k
    xmlDefAttrsPtr defaults;
1016
21.2k
    xmlDefAttr *attr;
1017
21.2k
    int len, expandedSize;
1018
21.2k
    xmlHashedString name;
1019
21.2k
    xmlHashedString prefix;
1020
21.2k
    xmlHashedString hvalue;
1021
21.2k
    const xmlChar *localname;
1022
1023
    /*
1024
     * Allows to detect attribute redefinitions
1025
     */
1026
21.2k
    if (ctxt->attsSpecial != NULL) {
1027
18.5k
        if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1028
14.7k
      return;
1029
18.5k
    }
1030
1031
6.46k
    if (ctxt->attsDefault == NULL) {
1032
2.63k
        ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1033
2.63k
  if (ctxt->attsDefault == NULL)
1034
0
      goto mem_error;
1035
2.63k
    }
1036
1037
    /*
1038
     * split the element name into prefix:localname , the string found
1039
     * are within the DTD and then not associated to namespace names.
1040
     */
1041
6.46k
    localname = xmlSplitQName3(fullname, &len);
1042
6.46k
    if (localname == NULL) {
1043
6.01k
        name = xmlDictLookupHashed(ctxt->dict, fullname, -1);
1044
6.01k
  prefix.name = NULL;
1045
6.01k
    } else {
1046
456
        name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1047
456
  prefix = xmlDictLookupHashed(ctxt->dict, fullname, len);
1048
456
        if (prefix.name == NULL)
1049
0
            goto mem_error;
1050
456
    }
1051
6.46k
    if (name.name == NULL)
1052
0
        goto mem_error;
1053
1054
    /*
1055
     * make sure there is some storage
1056
     */
1057
6.46k
    defaults = xmlHashLookup2(ctxt->attsDefault, name.name, prefix.name);
1058
6.46k
    if ((defaults == NULL) ||
1059
6.46k
        (defaults->nbAttrs >= defaults->maxAttrs)) {
1060
3.29k
        xmlDefAttrsPtr temp;
1061
3.29k
        int newSize;
1062
1063
3.29k
        newSize = (defaults != NULL) ? 2 * defaults->maxAttrs : 4;
1064
3.29k
        temp = xmlRealloc(defaults,
1065
3.29k
                          sizeof(*defaults) + newSize * sizeof(xmlDefAttr));
1066
3.29k
  if (temp == NULL)
1067
0
      goto mem_error;
1068
3.29k
        if (defaults == NULL)
1069
2.83k
            temp->nbAttrs = 0;
1070
3.29k
  temp->maxAttrs = newSize;
1071
3.29k
        defaults = temp;
1072
3.29k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name.name, prefix.name,
1073
3.29k
                          defaults, NULL) < 0) {
1074
0
      xmlFree(defaults);
1075
0
      goto mem_error;
1076
0
  }
1077
3.29k
    }
1078
1079
    /*
1080
     * Split the attribute name into prefix:localname , the string found
1081
     * are within the DTD and hen not associated to namespace names.
1082
     */
1083
6.46k
    localname = xmlSplitQName3(fullattr, &len);
1084
6.46k
    if (localname == NULL) {
1085
5.10k
        name = xmlDictLookupHashed(ctxt->dict, fullattr, -1);
1086
5.10k
  prefix.name = NULL;
1087
5.10k
    } else {
1088
1.36k
        name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1089
1.36k
  prefix = xmlDictLookupHashed(ctxt->dict, fullattr, len);
1090
1.36k
        if (prefix.name == NULL)
1091
0
            goto mem_error;
1092
1.36k
    }
1093
6.46k
    if (name.name == NULL)
1094
0
        goto mem_error;
1095
1096
    /* intern the string and precompute the end */
1097
6.46k
    len = strlen((const char *) value);
1098
6.46k
    hvalue = xmlDictLookupHashed(ctxt->dict, value, len);
1099
6.46k
    if (hvalue.name == NULL)
1100
0
        goto mem_error;
1101
1102
6.46k
    expandedSize = strlen((const char *) name.name);
1103
6.46k
    if (prefix.name != NULL)
1104
1.36k
        expandedSize += strlen((const char *) prefix.name);
1105
6.46k
    expandedSize += len;
1106
1107
6.46k
    attr = &defaults->attrs[defaults->nbAttrs++];
1108
6.46k
    attr->name = name;
1109
6.46k
    attr->prefix = prefix;
1110
6.46k
    attr->value = hvalue;
1111
6.46k
    attr->valueEnd = hvalue.name + len;
1112
6.46k
    attr->external = ctxt->external;
1113
6.46k
    attr->expandedSize = expandedSize;
1114
1115
6.46k
    return;
1116
1117
0
mem_error:
1118
0
    xmlErrMemory(ctxt, NULL);
1119
0
    return;
1120
6.46k
}
1121
1122
/**
1123
 * xmlAddSpecialAttr:
1124
 * @ctxt:  an XML parser context
1125
 * @fullname:  the element fullname
1126
 * @fullattr:  the attribute fullname
1127
 * @type:  the attribute type
1128
 *
1129
 * Register this attribute type
1130
 */
1131
static void
1132
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1133
      const xmlChar *fullname,
1134
      const xmlChar *fullattr,
1135
      int type)
1136
22.9k
{
1137
22.9k
    if (ctxt->attsSpecial == NULL) {
1138
3.20k
        ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1139
3.20k
  if (ctxt->attsSpecial == NULL)
1140
0
      goto mem_error;
1141
3.20k
    }
1142
1143
22.9k
    if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1144
15.7k
        return;
1145
1146
7.18k
    xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1147
7.18k
                     (void *) (ptrdiff_t) type);
1148
7.18k
    return;
1149
1150
0
mem_error:
1151
0
    xmlErrMemory(ctxt, NULL);
1152
0
    return;
1153
22.9k
}
1154
1155
/**
1156
 * xmlCleanSpecialAttrCallback:
1157
 *
1158
 * Removes CDATA attributes from the special attribute table
1159
 */
1160
static void
1161
xmlCleanSpecialAttrCallback(void *payload, void *data,
1162
                            const xmlChar *fullname, const xmlChar *fullattr,
1163
3.68k
                            const xmlChar *unused ATTRIBUTE_UNUSED) {
1164
3.68k
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1165
1166
3.68k
    if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1167
626
        xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1168
626
    }
1169
3.68k
}
1170
1171
/**
1172
 * xmlCleanSpecialAttr:
1173
 * @ctxt:  an XML parser context
1174
 *
1175
 * Trim the list of attributes defined to remove all those of type
1176
 * CDATA as they are not special. This call should be done when finishing
1177
 * to parse the DTD and before starting to parse the document root.
1178
 */
1179
static void
1180
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1181
6.40k
{
1182
6.40k
    if (ctxt->attsSpecial == NULL)
1183
4.37k
        return;
1184
1185
2.02k
    xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1186
1187
2.02k
    if (xmlHashSize(ctxt->attsSpecial) == 0) {
1188
204
        xmlHashFree(ctxt->attsSpecial, NULL);
1189
204
        ctxt->attsSpecial = NULL;
1190
204
    }
1191
2.02k
    return;
1192
6.40k
}
1193
1194
/**
1195
 * xmlCheckLanguageID:
1196
 * @lang:  pointer to the string value
1197
 *
1198
 * DEPRECATED: Internal function, do not use.
1199
 *
1200
 * Checks that the value conforms to the LanguageID production:
1201
 *
1202
 * NOTE: this is somewhat deprecated, those productions were removed from
1203
 *       the XML Second edition.
1204
 *
1205
 * [33] LanguageID ::= Langcode ('-' Subcode)*
1206
 * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1207
 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1208
 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1209
 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1210
 * [38] Subcode ::= ([a-z] | [A-Z])+
1211
 *
1212
 * The current REC reference the successors of RFC 1766, currently 5646
1213
 *
1214
 * http://www.rfc-editor.org/rfc/rfc5646.txt
1215
 * langtag       = language
1216
 *                 ["-" script]
1217
 *                 ["-" region]
1218
 *                 *("-" variant)
1219
 *                 *("-" extension)
1220
 *                 ["-" privateuse]
1221
 * language      = 2*3ALPHA            ; shortest ISO 639 code
1222
 *                 ["-" extlang]       ; sometimes followed by
1223
 *                                     ; extended language subtags
1224
 *               / 4ALPHA              ; or reserved for future use
1225
 *               / 5*8ALPHA            ; or registered language subtag
1226
 *
1227
 * extlang       = 3ALPHA              ; selected ISO 639 codes
1228
 *                 *2("-" 3ALPHA)      ; permanently reserved
1229
 *
1230
 * script        = 4ALPHA              ; ISO 15924 code
1231
 *
1232
 * region        = 2ALPHA              ; ISO 3166-1 code
1233
 *               / 3DIGIT              ; UN M.49 code
1234
 *
1235
 * variant       = 5*8alphanum         ; registered variants
1236
 *               / (DIGIT 3alphanum)
1237
 *
1238
 * extension     = singleton 1*("-" (2*8alphanum))
1239
 *
1240
 *                                     ; Single alphanumerics
1241
 *                                     ; "x" reserved for private use
1242
 * singleton     = DIGIT               ; 0 - 9
1243
 *               / %x41-57             ; A - W
1244
 *               / %x59-5A             ; Y - Z
1245
 *               / %x61-77             ; a - w
1246
 *               / %x79-7A             ; y - z
1247
 *
1248
 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1249
 * The parser below doesn't try to cope with extension or privateuse
1250
 * that could be added but that's not interoperable anyway
1251
 *
1252
 * Returns 1 if correct 0 otherwise
1253
 **/
1254
int
1255
xmlCheckLanguageID(const xmlChar * lang)
1256
3.05k
{
1257
3.05k
    const xmlChar *cur = lang, *nxt;
1258
1259
3.05k
    if (cur == NULL)
1260
72
        return (0);
1261
2.98k
    if (((cur[0] == 'i') && (cur[1] == '-')) ||
1262
2.98k
        ((cur[0] == 'I') && (cur[1] == '-')) ||
1263
2.98k
        ((cur[0] == 'x') && (cur[1] == '-')) ||
1264
2.98k
        ((cur[0] == 'X') && (cur[1] == '-'))) {
1265
        /*
1266
         * Still allow IANA code and user code which were coming
1267
         * from the previous version of the XML-1.0 specification
1268
         * it's deprecated but we should not fail
1269
         */
1270
299
        cur += 2;
1271
828
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1272
828
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
1273
529
            cur++;
1274
299
        return(cur[0] == 0);
1275
299
    }
1276
2.68k
    nxt = cur;
1277
9.18k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1278
9.18k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1279
6.50k
           nxt++;
1280
2.68k
    if (nxt - cur >= 4) {
1281
        /*
1282
         * Reserved
1283
         */
1284
213
        if ((nxt - cur > 8) || (nxt[0] != 0))
1285
147
            return(0);
1286
66
        return(1);
1287
213
    }
1288
2.47k
    if (nxt - cur < 2)
1289
228
        return(0);
1290
    /* we got an ISO 639 code */
1291
2.24k
    if (nxt[0] == 0)
1292
102
        return(1);
1293
2.14k
    if (nxt[0] != '-')
1294
86
        return(0);
1295
1296
2.05k
    nxt++;
1297
2.05k
    cur = nxt;
1298
    /* now we can have extlang or script or region or variant */
1299
2.05k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1300
236
        goto region_m49;
1301
1302
8.38k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1303
8.38k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1304
6.56k
           nxt++;
1305
1.82k
    if (nxt - cur == 4)
1306
528
        goto script;
1307
1.29k
    if (nxt - cur == 2)
1308
307
        goto region;
1309
985
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1310
149
        goto variant;
1311
836
    if (nxt - cur != 3)
1312
200
        return(0);
1313
    /* we parsed an extlang */
1314
636
    if (nxt[0] == 0)
1315
67
        return(1);
1316
569
    if (nxt[0] != '-')
1317
67
        return(0);
1318
1319
502
    nxt++;
1320
502
    cur = nxt;
1321
    /* now we can have script or region or variant */
1322
502
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1323
114
        goto region_m49;
1324
1325
2.01k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1326
2.01k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1327
1.62k
           nxt++;
1328
388
    if (nxt - cur == 2)
1329
77
        goto region;
1330
311
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1331
79
        goto variant;
1332
232
    if (nxt - cur != 4)
1333
138
        return(0);
1334
    /* we parsed a script */
1335
622
script:
1336
622
    if (nxt[0] == 0)
1337
73
        return(1);
1338
549
    if (nxt[0] != '-')
1339
117
        return(0);
1340
1341
432
    nxt++;
1342
432
    cur = nxt;
1343
    /* now we can have region or variant */
1344
432
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1345
67
        goto region_m49;
1346
1347
2.02k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1348
2.02k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1349
1.65k
           nxt++;
1350
1351
365
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1352
100
        goto variant;
1353
265
    if (nxt - cur != 2)
1354
190
        return(0);
1355
    /* we parsed a region */
1356
531
region:
1357
531
    if (nxt[0] == 0)
1358
93
        return(1);
1359
438
    if (nxt[0] != '-')
1360
181
        return(0);
1361
1362
257
    nxt++;
1363
257
    cur = nxt;
1364
    /* now we can just have a variant */
1365
1.85k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1366
1.85k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1367
1.59k
           nxt++;
1368
1369
257
    if ((nxt - cur < 5) || (nxt - cur > 8))
1370
174
        return(0);
1371
1372
    /* we parsed a variant */
1373
411
variant:
1374
411
    if (nxt[0] == 0)
1375
167
        return(1);
1376
244
    if (nxt[0] != '-')
1377
159
        return(0);
1378
    /* extensions and private use subtags not checked */
1379
85
    return (1);
1380
1381
417
region_m49:
1382
417
    if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1383
417
        ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1384
72
        nxt += 3;
1385
72
        goto region;
1386
72
    }
1387
345
    return(0);
1388
417
}
1389
1390
/************************************************************************
1391
 *                  *
1392
 *    Parser stacks related functions and macros    *
1393
 *                  *
1394
 ************************************************************************/
1395
1396
static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1397
                                            const xmlChar ** str);
1398
1399
/**
1400
 * xmlParserNsCreate:
1401
 *
1402
 * Create a new namespace database.
1403
 *
1404
 * Returns the new obejct.
1405
 */
1406
xmlParserNsData *
1407
28.4k
xmlParserNsCreate(void) {
1408
28.4k
    xmlParserNsData *nsdb = xmlMalloc(sizeof(*nsdb));
1409
1410
28.4k
    if (nsdb == NULL)
1411
0
        return(NULL);
1412
28.4k
    memset(nsdb, 0, sizeof(*nsdb));
1413
28.4k
    nsdb->defaultNsIndex = INT_MAX;
1414
1415
28.4k
    return(nsdb);
1416
28.4k
}
1417
1418
/**
1419
 * xmlParserNsFree:
1420
 * @nsdb: namespace database
1421
 *
1422
 * Free a namespace database.
1423
 */
1424
void
1425
28.4k
xmlParserNsFree(xmlParserNsData *nsdb) {
1426
28.4k
    if (nsdb == NULL)
1427
0
        return;
1428
1429
28.4k
    xmlFree(nsdb->extra);
1430
28.4k
    xmlFree(nsdb->hash);
1431
28.4k
    xmlFree(nsdb);
1432
28.4k
}
1433
1434
/**
1435
 * xmlParserNsReset:
1436
 * @nsdb: namespace database
1437
 *
1438
 * Reset a namespace database.
1439
 */
1440
static void
1441
0
xmlParserNsReset(xmlParserNsData *nsdb) {
1442
0
    if (nsdb == NULL)
1443
0
        return;
1444
1445
0
    nsdb->hashElems = 0;
1446
0
    nsdb->elementId = 0;
1447
0
    nsdb->defaultNsIndex = INT_MAX;
1448
1449
0
    if (nsdb->hash)
1450
0
        memset(nsdb->hash, 0, nsdb->hashSize * sizeof(nsdb->hash[0]));
1451
0
}
1452
1453
/**
1454
 * xmlParserStartElement:
1455
 * @nsdb: namespace database
1456
 *
1457
 * Signal that a new element has started.
1458
 *
1459
 * Returns 0 on success, -1 if the element counter overflowed.
1460
 */
1461
static int
1462
102k
xmlParserNsStartElement(xmlParserNsData *nsdb) {
1463
102k
    if (nsdb->elementId == UINT_MAX)
1464
0
        return(-1);
1465
102k
    nsdb->elementId++;
1466
1467
102k
    return(0);
1468
102k
}
1469
1470
/**
1471
 * xmlParserNsLookup:
1472
 * @ctxt: parser context
1473
 * @prefix: namespace prefix
1474
 * @bucketPtr: optional bucket (return value)
1475
 *
1476
 * Lookup namespace with given prefix. If @bucketPtr is non-NULL, it will
1477
 * be set to the matching bucket, or the first empty bucket if no match
1478
 * was found.
1479
 *
1480
 * Returns the namespace index on success, INT_MAX if no namespace was
1481
 * found.
1482
 */
1483
static int
1484
xmlParserNsLookup(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1485
152k
                  xmlParserNsBucket **bucketPtr) {
1486
152k
    xmlParserNsBucket *bucket;
1487
152k
    unsigned index, hashValue;
1488
1489
152k
    if (prefix->name == NULL)
1490
99.1k
        return(ctxt->nsdb->defaultNsIndex);
1491
1492
53.2k
    if (ctxt->nsdb->hashSize == 0)
1493
8.91k
        return(INT_MAX);
1494
1495
44.3k
    hashValue = prefix->hashValue;
1496
44.3k
    index = hashValue & (ctxt->nsdb->hashSize - 1);
1497
44.3k
    bucket = &ctxt->nsdb->hash[index];
1498
1499
4.24M
    while (bucket->hashValue) {
1500
4.22M
        if ((bucket->hashValue == hashValue) &&
1501
4.22M
            (bucket->index != INT_MAX)) {
1502
23.9k
            if (ctxt->nsTab[bucket->index * 2] == prefix->name) {
1503
23.9k
                if (bucketPtr != NULL)
1504
18.5k
                    *bucketPtr = bucket;
1505
23.9k
                return(bucket->index);
1506
23.9k
            }
1507
23.9k
        }
1508
1509
4.19M
        index++;
1510
4.19M
        bucket++;
1511
4.19M
        if (index == ctxt->nsdb->hashSize) {
1512
10.1k
            index = 0;
1513
10.1k
            bucket = ctxt->nsdb->hash;
1514
10.1k
        }
1515
4.19M
    }
1516
1517
20.3k
    if (bucketPtr != NULL)
1518
17.4k
        *bucketPtr = bucket;
1519
20.3k
    return(INT_MAX);
1520
44.3k
}
1521
1522
/**
1523
 * xmlParserNsLookupUri:
1524
 * @ctxt: parser context
1525
 * @prefix: namespace prefix
1526
 *
1527
 * Lookup namespace URI with given prefix.
1528
 *
1529
 * Returns the namespace URI on success, NULL if no namespace was found.
1530
 */
1531
static const xmlChar *
1532
90.3k
xmlParserNsLookupUri(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix) {
1533
90.3k
    const xmlChar *ret;
1534
90.3k
    int nsIndex;
1535
1536
90.3k
    if (prefix->name == ctxt->str_xml)
1537
657
        return(ctxt->str_xml_ns);
1538
1539
89.6k
    nsIndex = xmlParserNsLookup(ctxt, prefix, NULL);
1540
89.6k
    if (nsIndex == INT_MAX)
1541
70.2k
        return(NULL);
1542
1543
19.4k
    ret = ctxt->nsTab[nsIndex * 2 + 1];
1544
19.4k
    if (ret[0] == 0)
1545
1.01k
        ret = NULL;
1546
19.4k
    return(ret);
1547
89.6k
}
1548
1549
/**
1550
 * xmlParserNsLookupSax:
1551
 * @ctxt: parser context
1552
 * @prefix: namespace prefix
1553
 *
1554
 * Lookup extra data for the given prefix. This returns data stored
1555
 * with xmlParserNsUdpateSax.
1556
 *
1557
 * Returns the data on success, NULL if no namespace was found.
1558
 */
1559
void *
1560
6.77k
xmlParserNsLookupSax(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
1561
6.77k
    xmlHashedString hprefix;
1562
6.77k
    int nsIndex;
1563
1564
6.77k
    if (prefix == ctxt->str_xml)
1565
4.50k
        return(NULL);
1566
1567
2.27k
    hprefix.name = prefix;
1568
2.27k
    if (prefix != NULL)
1569
715
        hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1570
1.55k
    else
1571
1.55k
        hprefix.hashValue = 0;
1572
2.27k
    nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1573
2.27k
    if (nsIndex == INT_MAX)
1574
0
        return(NULL);
1575
1576
2.27k
    return(ctxt->nsdb->extra[nsIndex].saxData);
1577
2.27k
}
1578
1579
/**
1580
 * xmlParserNsUpdateSax:
1581
 * @ctxt: parser context
1582
 * @prefix: namespace prefix
1583
 * @saxData: extra data for SAX handler
1584
 *
1585
 * Sets or updates extra data for the given prefix. This value will be
1586
 * returned by xmlParserNsLookupSax as long as the namespace with the
1587
 * given prefix is in scope.
1588
 *
1589
 * Returns the data on success, NULL if no namespace was found.
1590
 */
1591
int
1592
xmlParserNsUpdateSax(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
1593
15.0k
                     void *saxData) {
1594
15.0k
    xmlHashedString hprefix;
1595
15.0k
    int nsIndex;
1596
1597
15.0k
    if (prefix == ctxt->str_xml)
1598
0
        return(-1);
1599
1600
15.0k
    hprefix.name = prefix;
1601
15.0k
    if (prefix != NULL)
1602
2.53k
        hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1603
12.4k
    else
1604
12.4k
        hprefix.hashValue = 0;
1605
15.0k
    nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1606
15.0k
    if (nsIndex == INT_MAX)
1607
0
        return(-1);
1608
1609
15.0k
    ctxt->nsdb->extra[nsIndex].saxData = saxData;
1610
15.0k
    return(0);
1611
15.0k
}
1612
1613
/**
1614
 * xmlParserNsGrow:
1615
 * @ctxt: parser context
1616
 *
1617
 * Grows the namespace tables.
1618
 *
1619
 * Returns 0 on success, -1 if a memory allocation failed.
1620
 */
1621
static int
1622
4.40k
xmlParserNsGrow(xmlParserCtxtPtr ctxt) {
1623
4.40k
    const xmlChar **table;
1624
4.40k
    xmlParserNsExtra *extra;
1625
4.40k
    int newSize;
1626
1627
4.40k
    if (ctxt->nsMax > INT_MAX / 2)
1628
0
        goto error;
1629
4.40k
    newSize = ctxt->nsMax ? ctxt->nsMax * 2 : 16;
1630
1631
4.40k
    table = xmlRealloc(ctxt->nsTab, 2 * newSize * sizeof(table[0]));
1632
4.40k
    if (table == NULL)
1633
0
        goto error;
1634
4.40k
    ctxt->nsTab = table;
1635
1636
4.40k
    extra = xmlRealloc(ctxt->nsdb->extra, newSize * sizeof(extra[0]));
1637
4.40k
    if (extra == NULL)
1638
0
        goto error;
1639
4.40k
    ctxt->nsdb->extra = extra;
1640
1641
4.40k
    ctxt->nsMax = newSize;
1642
4.40k
    return(0);
1643
1644
0
error:
1645
0
    xmlErrMemory(ctxt, NULL);
1646
0
    return(-1);
1647
4.40k
}
1648
1649
/**
1650
 * xmlParserNsPush:
1651
 * @ctxt: parser context
1652
 * @prefix: prefix with hash value
1653
 * @uri: uri with hash value
1654
 * @saxData: extra data for SAX handler
1655
 * @defAttr: whether the namespace comes from a default attribute
1656
 *
1657
 * Push a new namespace on the table.
1658
 *
1659
 * Returns 1 if the namespace was pushed, 0 if the namespace was ignored,
1660
 * -1 if a memory allocation failed.
1661
 */
1662
static int
1663
xmlParserNsPush(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1664
39.8k
                const xmlHashedString *uri, void *saxData, int defAttr) {
1665
39.8k
    xmlParserNsBucket *bucket = NULL;
1666
39.8k
    xmlParserNsExtra *extra;
1667
39.8k
    const xmlChar **ns;
1668
39.8k
    unsigned hashValue, nsIndex, oldIndex;
1669
1670
39.8k
    if ((prefix != NULL) && (prefix->name == ctxt->str_xml))
1671
195
        return(0);
1672
1673
39.6k
    if ((ctxt->nsNr >= ctxt->nsMax) && (xmlParserNsGrow(ctxt) < 0)) {
1674
0
        xmlErrMemory(ctxt, NULL);
1675
0
        return(-1);
1676
0
    }
1677
1678
    /*
1679
     * Default namespace and 'xml' namespace
1680
     */
1681
39.6k
    if ((prefix == NULL) || (prefix->name == NULL)) {
1682
17.3k
        oldIndex = ctxt->nsdb->defaultNsIndex;
1683
1684
17.3k
        if (oldIndex != INT_MAX) {
1685
1.47k
            if (defAttr != 0)
1686
717
                return(0);
1687
1688
762
            extra = &ctxt->nsdb->extra[oldIndex];
1689
1690
762
            if (extra->elementId == ctxt->nsdb->elementId) {
1691
400
                xmlErrAttributeDup(ctxt, NULL, BAD_CAST "xmlns");
1692
400
                return(0);
1693
400
            }
1694
1695
362
            if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1696
362
                (uri->name == ctxt->nsTab[oldIndex * 2 + 1]))
1697
67
                return(0);
1698
362
        }
1699
1700
16.1k
        ctxt->nsdb->defaultNsIndex = ctxt->nsNr;
1701
16.1k
        goto populate_entry;
1702
17.3k
    }
1703
1704
    /*
1705
     * Hash table lookup
1706
     */
1707
22.3k
    oldIndex = xmlParserNsLookup(ctxt, prefix, &bucket);
1708
22.3k
    if (oldIndex != INT_MAX) {
1709
2.76k
        extra = &ctxt->nsdb->extra[oldIndex];
1710
1711
2.76k
        if (defAttr != 0)
1712
1.50k
            return(0);
1713
1714
        /*
1715
         * Check for duplicate definitions on the same element.
1716
         */
1717
1.26k
        if (extra->elementId == ctxt->nsdb->elementId) {
1718
241
            xmlErrAttributeDup(ctxt, BAD_CAST "xmlns", prefix->name);
1719
241
            return(0);
1720
241
        }
1721
1722
1.02k
        if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1723
1.02k
            (uri->name == ctxt->nsTab[bucket->index * 2 + 1]))
1724
66
            return(0);
1725
1726
956
        bucket->index = ctxt->nsNr;
1727
956
        goto populate_entry;
1728
1.02k
    }
1729
1730
    /*
1731
     * Insert new bucket
1732
     */
1733
1734
19.5k
    hashValue = prefix->hashValue;
1735
1736
    /*
1737
     * Grow hash table, 50% fill factor
1738
     */
1739
19.5k
    if (ctxt->nsdb->hashElems + 1 > ctxt->nsdb->hashSize / 2) {
1740
2.83k
        xmlParserNsBucket *newHash;
1741
2.83k
        unsigned newSize, i, index;
1742
1743
2.83k
        if (ctxt->nsdb->hashSize > UINT_MAX / 2) {
1744
0
            xmlErrMemory(ctxt, NULL);
1745
0
            return(-1);
1746
0
        }
1747
2.83k
        newSize = ctxt->nsdb->hashSize ? ctxt->nsdb->hashSize * 2 : 16;
1748
2.83k
        newHash = xmlMalloc(newSize * sizeof(newHash[0]));
1749
2.83k
        if (newHash == NULL) {
1750
0
            xmlErrMemory(ctxt, NULL);
1751
0
            return(-1);
1752
0
        }
1753
2.83k
        memset(newHash, 0, newSize * sizeof(newHash[0]));
1754
1755
50.3k
        for (i = 0; i < ctxt->nsdb->hashSize; i++) {
1756
47.5k
            unsigned hv = ctxt->nsdb->hash[i].hashValue;
1757
47.5k
            unsigned newIndex;
1758
1759
47.5k
            if (hv == 0)
1760
23.7k
                continue;
1761
23.7k
            newIndex = hv & (newSize - 1);
1762
1763
1.79M
            while (newHash[newIndex].hashValue != 0) {
1764
1.76M
                newIndex++;
1765
1.76M
                if (newIndex == newSize)
1766
2.48k
                    newIndex = 0;
1767
1.76M
            }
1768
1769
23.7k
            newHash[newIndex] = ctxt->nsdb->hash[i];
1770
23.7k
        }
1771
1772
2.83k
        xmlFree(ctxt->nsdb->hash);
1773
2.83k
        ctxt->nsdb->hash = newHash;
1774
2.83k
        ctxt->nsdb->hashSize = newSize;
1775
1776
        /*
1777
         * Relookup
1778
         */
1779
2.83k
        index = hashValue & (newSize - 1);
1780
1781
13.5k
        while (newHash[index].hashValue != 0) {
1782
10.7k
            index++;
1783
10.7k
            if (index == newSize)
1784
176
                index = 0;
1785
10.7k
        }
1786
1787
2.83k
        bucket = &newHash[index];
1788
2.83k
    }
1789
1790
19.5k
    bucket->hashValue = hashValue;
1791
19.5k
    bucket->index = ctxt->nsNr;
1792
19.5k
    ctxt->nsdb->hashElems++;
1793
19.5k
    oldIndex = INT_MAX;
1794
1795
36.6k
populate_entry:
1796
36.6k
    nsIndex = ctxt->nsNr;
1797
1798
36.6k
    ns = &ctxt->nsTab[nsIndex * 2];
1799
36.6k
    ns[0] = prefix ? prefix->name : NULL;
1800
36.6k
    ns[1] = uri->name;
1801
1802
36.6k
    extra = &ctxt->nsdb->extra[nsIndex];
1803
36.6k
    extra->saxData = saxData;
1804
36.6k
    extra->prefixHashValue = prefix ? prefix->hashValue : 0;
1805
36.6k
    extra->uriHashValue = uri->hashValue;
1806
36.6k
    extra->elementId = ctxt->nsdb->elementId;
1807
36.6k
    extra->oldIndex = oldIndex;
1808
1809
36.6k
    ctxt->nsNr++;
1810
1811
36.6k
    return(1);
1812
19.5k
}
1813
1814
/**
1815
 * xmlParserNsPop:
1816
 * @ctxt: an XML parser context
1817
 * @nr:  the number to pop
1818
 *
1819
 * Pops the top @nr namespaces and restores the hash table.
1820
 *
1821
 * Returns the number of namespaces popped.
1822
 */
1823
static int
1824
xmlParserNsPop(xmlParserCtxtPtr ctxt, int nr)
1825
22.0k
{
1826
22.0k
    int i;
1827
1828
    /* assert(nr <= ctxt->nsNr); */
1829
1830
53.6k
    for (i = ctxt->nsNr - 1; i >= ctxt->nsNr - nr; i--) {
1831
31.5k
        const xmlChar *prefix = ctxt->nsTab[i * 2];
1832
31.5k
        xmlParserNsExtra *extra = &ctxt->nsdb->extra[i];
1833
1834
31.5k
        if (prefix == NULL) {
1835
15.7k
            ctxt->nsdb->defaultNsIndex = extra->oldIndex;
1836
15.7k
        } else {
1837
15.7k
            xmlHashedString hprefix;
1838
15.7k
            xmlParserNsBucket *bucket = NULL;
1839
1840
15.7k
            hprefix.name = prefix;
1841
15.7k
            hprefix.hashValue = extra->prefixHashValue;
1842
15.7k
            xmlParserNsLookup(ctxt, &hprefix, &bucket);
1843
            /* assert(bucket && bucket->hashValue); */
1844
15.7k
            bucket->index = extra->oldIndex;
1845
15.7k
        }
1846
31.5k
    }
1847
1848
22.0k
    ctxt->nsNr -= nr;
1849
22.0k
    return(nr);
1850
22.0k
}
1851
1852
static int
1853
3.23k
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1854
3.23k
    const xmlChar **atts;
1855
3.23k
    unsigned *attallocs;
1856
3.23k
    int maxatts;
1857
1858
3.23k
    if (nr + 5 > ctxt->maxatts) {
1859
3.23k
  maxatts = ctxt->maxatts == 0 ? 55 : (nr + 5) * 2;
1860
3.23k
  atts = (const xmlChar **) xmlMalloc(
1861
3.23k
             maxatts * sizeof(const xmlChar *));
1862
3.23k
  if (atts == NULL) goto mem_error;
1863
3.23k
  attallocs = xmlRealloc(ctxt->attallocs,
1864
3.23k
                               (maxatts / 5) * sizeof(attallocs[0]));
1865
3.23k
  if (attallocs == NULL) {
1866
0
            xmlFree(atts);
1867
0
            goto mem_error;
1868
0
        }
1869
3.23k
        if (ctxt->maxatts > 0)
1870
171
            memcpy(atts, ctxt->atts, ctxt->maxatts * sizeof(const xmlChar *));
1871
3.23k
        xmlFree(ctxt->atts);
1872
3.23k
  ctxt->atts = atts;
1873
3.23k
  ctxt->attallocs = attallocs;
1874
3.23k
  ctxt->maxatts = maxatts;
1875
3.23k
    }
1876
3.23k
    return(ctxt->maxatts);
1877
0
mem_error:
1878
0
    xmlErrMemory(ctxt, NULL);
1879
0
    return(-1);
1880
3.23k
}
1881
1882
/**
1883
 * inputPush:
1884
 * @ctxt:  an XML parser context
1885
 * @value:  the parser input
1886
 *
1887
 * Pushes a new parser input on top of the input stack
1888
 *
1889
 * Returns -1 in case of error, the index in the stack otherwise
1890
 */
1891
int
1892
inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1893
75.2k
{
1894
75.2k
    if ((ctxt == NULL) || (value == NULL))
1895
0
        return(-1);
1896
75.2k
    if (ctxt->inputNr >= ctxt->inputMax) {
1897
0
        size_t newSize = ctxt->inputMax * 2;
1898
0
        xmlParserInputPtr *tmp;
1899
1900
0
        tmp = (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1901
0
                                               newSize * sizeof(*tmp));
1902
0
        if (tmp == NULL) {
1903
0
            xmlErrMemory(ctxt, NULL);
1904
0
            return (-1);
1905
0
        }
1906
0
        ctxt->inputTab = tmp;
1907
0
        ctxt->inputMax = newSize;
1908
0
    }
1909
75.2k
    ctxt->inputTab[ctxt->inputNr] = value;
1910
75.2k
    ctxt->input = value;
1911
75.2k
    return (ctxt->inputNr++);
1912
75.2k
}
1913
/**
1914
 * inputPop:
1915
 * @ctxt: an XML parser context
1916
 *
1917
 * Pops the top parser input from the input stack
1918
 *
1919
 * Returns the input just removed
1920
 */
1921
xmlParserInputPtr
1922
inputPop(xmlParserCtxtPtr ctxt)
1923
132k
{
1924
132k
    xmlParserInputPtr ret;
1925
1926
132k
    if (ctxt == NULL)
1927
0
        return(NULL);
1928
132k
    if (ctxt->inputNr <= 0)
1929
56.9k
        return (NULL);
1930
75.2k
    ctxt->inputNr--;
1931
75.2k
    if (ctxt->inputNr > 0)
1932
46.7k
        ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1933
28.4k
    else
1934
28.4k
        ctxt->input = NULL;
1935
75.2k
    ret = ctxt->inputTab[ctxt->inputNr];
1936
75.2k
    ctxt->inputTab[ctxt->inputNr] = NULL;
1937
75.2k
    return (ret);
1938
132k
}
1939
/**
1940
 * nodePush:
1941
 * @ctxt:  an XML parser context
1942
 * @value:  the element node
1943
 *
1944
 * DEPRECATED: Internal function, do not use.
1945
 *
1946
 * Pushes a new element node on top of the node stack
1947
 *
1948
 * Returns -1 in case of error, the index in the stack otherwise
1949
 */
1950
int
1951
nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1952
103k
{
1953
103k
    if (ctxt == NULL) return(0);
1954
103k
    if (ctxt->nodeNr >= ctxt->nodeMax) {
1955
324
        xmlNodePtr *tmp;
1956
1957
324
  tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1958
324
                                      ctxt->nodeMax * 2 *
1959
324
                                      sizeof(ctxt->nodeTab[0]));
1960
324
        if (tmp == NULL) {
1961
0
            xmlErrMemory(ctxt, NULL);
1962
0
            return (-1);
1963
0
        }
1964
324
        ctxt->nodeTab = tmp;
1965
324
  ctxt->nodeMax *= 2;
1966
324
    }
1967
103k
    if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1968
103k
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
1969
3
  xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1970
3
     "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1971
3
        xmlParserMaxDepth);
1972
3
  xmlHaltParser(ctxt);
1973
3
  return(-1);
1974
3
    }
1975
103k
    ctxt->nodeTab[ctxt->nodeNr] = value;
1976
103k
    ctxt->node = value;
1977
103k
    return (ctxt->nodeNr++);
1978
103k
}
1979
1980
/**
1981
 * nodePop:
1982
 * @ctxt: an XML parser context
1983
 *
1984
 * DEPRECATED: Internal function, do not use.
1985
 *
1986
 * Pops the top element node from the node stack
1987
 *
1988
 * Returns the node just removed
1989
 */
1990
xmlNodePtr
1991
nodePop(xmlParserCtxtPtr ctxt)
1992
112k
{
1993
112k
    xmlNodePtr ret;
1994
1995
112k
    if (ctxt == NULL) return(NULL);
1996
112k
    if (ctxt->nodeNr <= 0)
1997
26.2k
        return (NULL);
1998
85.7k
    ctxt->nodeNr--;
1999
85.7k
    if (ctxt->nodeNr > 0)
2000
80.6k
        ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
2001
5.16k
    else
2002
5.16k
        ctxt->node = NULL;
2003
85.7k
    ret = ctxt->nodeTab[ctxt->nodeNr];
2004
85.7k
    ctxt->nodeTab[ctxt->nodeNr] = NULL;
2005
85.7k
    return (ret);
2006
112k
}
2007
2008
/**
2009
 * nameNsPush:
2010
 * @ctxt:  an XML parser context
2011
 * @value:  the element name
2012
 * @prefix:  the element prefix
2013
 * @URI:  the element namespace name
2014
 * @line:  the current line number for error messages
2015
 * @nsNr:  the number of namespaces pushed on the namespace table
2016
 *
2017
 * Pushes a new element name/prefix/URL on top of the name stack
2018
 *
2019
 * Returns -1 in case of error, the index in the stack otherwise
2020
 */
2021
static int
2022
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
2023
           const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
2024
141k
{
2025
141k
    xmlStartTag *tag;
2026
2027
141k
    if (ctxt->nameNr >= ctxt->nameMax) {
2028
446
        const xmlChar * *tmp;
2029
446
        xmlStartTag *tmp2;
2030
446
        ctxt->nameMax *= 2;
2031
446
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
2032
446
                                    ctxt->nameMax *
2033
446
                                    sizeof(ctxt->nameTab[0]));
2034
446
        if (tmp == NULL) {
2035
0
      ctxt->nameMax /= 2;
2036
0
      goto mem_error;
2037
0
        }
2038
446
  ctxt->nameTab = tmp;
2039
446
        tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
2040
446
                                    ctxt->nameMax *
2041
446
                                    sizeof(ctxt->pushTab[0]));
2042
446
        if (tmp2 == NULL) {
2043
0
      ctxt->nameMax /= 2;
2044
0
      goto mem_error;
2045
0
        }
2046
446
  ctxt->pushTab = tmp2;
2047
141k
    } else if (ctxt->pushTab == NULL) {
2048
14.8k
        ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
2049
14.8k
                                            sizeof(ctxt->pushTab[0]));
2050
14.8k
        if (ctxt->pushTab == NULL)
2051
0
            goto mem_error;
2052
14.8k
    }
2053
141k
    ctxt->nameTab[ctxt->nameNr] = value;
2054
141k
    ctxt->name = value;
2055
141k
    tag = &ctxt->pushTab[ctxt->nameNr];
2056
141k
    tag->prefix = prefix;
2057
141k
    tag->URI = URI;
2058
141k
    tag->line = line;
2059
141k
    tag->nsNr = nsNr;
2060
141k
    return (ctxt->nameNr++);
2061
0
mem_error:
2062
0
    xmlErrMemory(ctxt, NULL);
2063
0
    return (-1);
2064
141k
}
2065
#ifdef LIBXML_PUSH_ENABLED
2066
/**
2067
 * nameNsPop:
2068
 * @ctxt: an XML parser context
2069
 *
2070
 * Pops the top element/prefix/URI name from the name stack
2071
 *
2072
 * Returns the name just removed
2073
 */
2074
static const xmlChar *
2075
nameNsPop(xmlParserCtxtPtr ctxt)
2076
0
{
2077
0
    const xmlChar *ret;
2078
2079
0
    if (ctxt->nameNr <= 0)
2080
0
        return (NULL);
2081
0
    ctxt->nameNr--;
2082
0
    if (ctxt->nameNr > 0)
2083
0
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2084
0
    else
2085
0
        ctxt->name = NULL;
2086
0
    ret = ctxt->nameTab[ctxt->nameNr];
2087
0
    ctxt->nameTab[ctxt->nameNr] = NULL;
2088
0
    return (ret);
2089
0
}
2090
#endif /* LIBXML_PUSH_ENABLED */
2091
2092
/**
2093
 * namePush:
2094
 * @ctxt:  an XML parser context
2095
 * @value:  the element name
2096
 *
2097
 * DEPRECATED: Internal function, do not use.
2098
 *
2099
 * Pushes a new element name on top of the name stack
2100
 *
2101
 * Returns -1 in case of error, the index in the stack otherwise
2102
 */
2103
int
2104
namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
2105
0
{
2106
0
    if (ctxt == NULL) return (-1);
2107
2108
0
    if (ctxt->nameNr >= ctxt->nameMax) {
2109
0
        const xmlChar * *tmp;
2110
0
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
2111
0
                                    ctxt->nameMax * 2 *
2112
0
                                    sizeof(ctxt->nameTab[0]));
2113
0
        if (tmp == NULL) {
2114
0
      goto mem_error;
2115
0
        }
2116
0
  ctxt->nameTab = tmp;
2117
0
        ctxt->nameMax *= 2;
2118
0
    }
2119
0
    ctxt->nameTab[ctxt->nameNr] = value;
2120
0
    ctxt->name = value;
2121
0
    return (ctxt->nameNr++);
2122
0
mem_error:
2123
0
    xmlErrMemory(ctxt, NULL);
2124
0
    return (-1);
2125
0
}
2126
2127
/**
2128
 * namePop:
2129
 * @ctxt: an XML parser context
2130
 *
2131
 * DEPRECATED: Internal function, do not use.
2132
 *
2133
 * Pops the top element name from the name stack
2134
 *
2135
 * Returns the name just removed
2136
 */
2137
const xmlChar *
2138
namePop(xmlParserCtxtPtr ctxt)
2139
123k
{
2140
123k
    const xmlChar *ret;
2141
2142
123k
    if ((ctxt == NULL) || (ctxt->nameNr <= 0))
2143
0
        return (NULL);
2144
123k
    ctxt->nameNr--;
2145
123k
    if (ctxt->nameNr > 0)
2146
112k
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2147
11.1k
    else
2148
11.1k
        ctxt->name = NULL;
2149
123k
    ret = ctxt->nameTab[ctxt->nameNr];
2150
123k
    ctxt->nameTab[ctxt->nameNr] = NULL;
2151
123k
    return (ret);
2152
123k
}
2153
2154
166k
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
2155
166k
    if (ctxt->spaceNr >= ctxt->spaceMax) {
2156
480
        int *tmp;
2157
2158
480
  ctxt->spaceMax *= 2;
2159
480
        tmp = (int *) xmlRealloc(ctxt->spaceTab,
2160
480
                           ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
2161
480
        if (tmp == NULL) {
2162
0
      xmlErrMemory(ctxt, NULL);
2163
0
      ctxt->spaceMax /=2;
2164
0
      return(-1);
2165
0
  }
2166
480
  ctxt->spaceTab = tmp;
2167
480
    }
2168
166k
    ctxt->spaceTab[ctxt->spaceNr] = val;
2169
166k
    ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
2170
166k
    return(ctxt->spaceNr++);
2171
166k
}
2172
2173
147k
static int spacePop(xmlParserCtxtPtr ctxt) {
2174
147k
    int ret;
2175
147k
    if (ctxt->spaceNr <= 0) return(0);
2176
147k
    ctxt->spaceNr--;
2177
147k
    if (ctxt->spaceNr > 0)
2178
147k
  ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
2179
0
    else
2180
0
        ctxt->space = &ctxt->spaceTab[0];
2181
147k
    ret = ctxt->spaceTab[ctxt->spaceNr];
2182
147k
    ctxt->spaceTab[ctxt->spaceNr] = -1;
2183
147k
    return(ret);
2184
147k
}
2185
2186
/*
2187
 * Macros for accessing the content. Those should be used only by the parser,
2188
 * and not exported.
2189
 *
2190
 * Dirty macros, i.e. one often need to make assumption on the context to
2191
 * use them
2192
 *
2193
 *   CUR_PTR return the current pointer to the xmlChar to be parsed.
2194
 *           To be used with extreme caution since operations consuming
2195
 *           characters may move the input buffer to a different location !
2196
 *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
2197
 *           This should be used internally by the parser
2198
 *           only to compare to ASCII values otherwise it would break when
2199
 *           running with UTF-8 encoding.
2200
 *   RAW     same as CUR but in the input buffer, bypass any token
2201
 *           extraction that may have been done
2202
 *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
2203
 *           to compare on ASCII based substring.
2204
 *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2205
 *           strings without newlines within the parser.
2206
 *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2207
 *           defined char within the parser.
2208
 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2209
 *
2210
 *   NEXT    Skip to the next character, this does the proper decoding
2211
 *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2212
 *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2213
 *   CUR_CHAR(l) returns the current unicode character (int), set l
2214
 *           to the number of xmlChars used for the encoding [0-5].
2215
 *   CUR_SCHAR  same but operate on a string instead of the context
2216
 *   COPY_BUF  copy the current unicode char to the target buffer, increment
2217
 *            the index
2218
 *   GROW, SHRINK  handling of input buffers
2219
 */
2220
2221
2.43M
#define RAW (*ctxt->input->cur)
2222
1.89M
#define CUR (*ctxt->input->cur)
2223
1.59M
#define NXT(val) ctxt->input->cur[(val)]
2224
472k
#define CUR_PTR ctxt->input->cur
2225
447k
#define BASE_PTR ctxt->input->base
2226
2227
#define CMP4( s, c1, c2, c3, c4 ) \
2228
1.50M
  ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2229
779k
    ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2230
#define CMP5( s, c1, c2, c3, c4, c5 ) \
2231
1.34M
  ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2232
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2233
1.08M
  ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2234
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2235
866k
  ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2236
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2237
704k
  ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2238
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2239
319k
  ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2240
319k
    ((unsigned char *) s)[ 8 ] == c9 )
2241
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2242
11.1k
  ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2243
11.1k
    ((unsigned char *) s)[ 9 ] == c10 )
2244
2245
329k
#define SKIP(val) do {             \
2246
329k
    ctxt->input->cur += (val),ctxt->input->col+=(val);      \
2247
329k
    if (*ctxt->input->cur == 0)           \
2248
329k
        xmlParserGrow(ctxt);           \
2249
329k
  } while (0)
2250
2251
0
#define SKIPL(val) do {             \
2252
0
    int skipl;                \
2253
0
    for(skipl=0; skipl<val; skipl++) {         \
2254
0
  if (*(ctxt->input->cur) == '\n') {       \
2255
0
  ctxt->input->line++; ctxt->input->col = 1;      \
2256
0
  } else ctxt->input->col++;         \
2257
0
  ctxt->input->cur++;           \
2258
0
    }                 \
2259
0
    if (*ctxt->input->cur == 0)           \
2260
0
        xmlParserGrow(ctxt);           \
2261
0
  } while (0)
2262
2263
/* Don't shrink push parser buffer. */
2264
#define SHRINK \
2265
400k
    if (((ctxt->progressive == 0) || (ctxt->inputNr > 1)) && \
2266
400k
        (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2267
400k
  (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2268
400k
  xmlParserShrink(ctxt);
2269
2270
2.90M
#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) \
2271
2.90M
  xmlParserGrow(ctxt);
2272
2273
1.03M
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2274
2275
818k
#define NEXT xmlNextChar(ctxt)
2276
2277
211k
#define NEXT1 {               \
2278
211k
  ctxt->input->col++;           \
2279
211k
  ctxt->input->cur++;           \
2280
211k
  if (*ctxt->input->cur == 0)         \
2281
211k
      xmlParserGrow(ctxt);           \
2282
211k
    }
2283
2284
1.28M
#define NEXTL(l) do {             \
2285
1.28M
    if (*(ctxt->input->cur) == '\n') {         \
2286
2.93k
  ctxt->input->line++; ctxt->input->col = 1;      \
2287
1.28M
    } else ctxt->input->col++;           \
2288
1.28M
    ctxt->input->cur += l;        \
2289
1.28M
  } while (0)
2290
2291
1.68M
#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2292
36.3M
#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2293
2294
#define COPY_BUF(b, i, v)           \
2295
36.3M
    if (v < 0x80) b[i++] = v;           \
2296
36.3M
    else i += xmlCopyCharMultiByte(&b[i],v)
2297
2298
/**
2299
 * xmlSkipBlankChars:
2300
 * @ctxt:  the XML parser context
2301
 *
2302
 * DEPRECATED: Internal function, do not use.
2303
 *
2304
 * skip all blanks character found at that point in the input streams.
2305
 * It pops up finished entities in the process if allowable at that point.
2306
 *
2307
 * Returns the number of space chars skipped
2308
 */
2309
2310
int
2311
1.03M
xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2312
1.03M
    int res = 0;
2313
2314
    /*
2315
     * It's Okay to use CUR/NEXT here since all the blanks are on
2316
     * the ASCII range.
2317
     */
2318
1.03M
    if (((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) ||
2319
1.03M
        (ctxt->instate == XML_PARSER_START)) {
2320
485k
  const xmlChar *cur;
2321
  /*
2322
   * if we are in the document content, go really fast
2323
   */
2324
485k
  cur = ctxt->input->cur;
2325
485k
  while (IS_BLANK_CH(*cur)) {
2326
76.9k
      if (*cur == '\n') {
2327
471
    ctxt->input->line++; ctxt->input->col = 1;
2328
76.4k
      } else {
2329
76.4k
    ctxt->input->col++;
2330
76.4k
      }
2331
76.9k
      cur++;
2332
76.9k
      if (res < INT_MAX)
2333
76.9k
    res++;
2334
76.9k
      if (*cur == 0) {
2335
91
    ctxt->input->cur = cur;
2336
91
    xmlParserGrow(ctxt);
2337
91
    cur = ctxt->input->cur;
2338
91
      }
2339
76.9k
  }
2340
485k
  ctxt->input->cur = cur;
2341
554k
    } else {
2342
554k
        int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2343
2344
805k
  while (ctxt->instate != XML_PARSER_EOF) {
2345
805k
            if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2346
160k
    NEXT;
2347
645k
      } else if (CUR == '%') {
2348
                /*
2349
                 * Need to handle support of entities branching here
2350
                 */
2351
55.6k
          if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2352
11.6k
                    break;
2353
44.0k
          xmlParsePEReference(ctxt);
2354
589k
            } else if (CUR == 0) {
2355
46.8k
                unsigned long consumed;
2356
46.8k
                xmlEntityPtr ent;
2357
2358
46.8k
                if (ctxt->inputNr <= 1)
2359
122
                    break;
2360
2361
46.7k
                consumed = ctxt->input->consumed;
2362
46.7k
                xmlSaturatedAddSizeT(&consumed,
2363
46.7k
                                     ctxt->input->cur - ctxt->input->base);
2364
2365
                /*
2366
                 * Add to sizeentities when parsing an external entity
2367
                 * for the first time.
2368
                 */
2369
46.7k
                ent = ctxt->input->entity;
2370
46.7k
                if ((ent->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2371
46.7k
                    ((ent->flags & XML_ENT_PARSED) == 0)) {
2372
0
                    ent->flags |= XML_ENT_PARSED;
2373
2374
0
                    xmlSaturatedAdd(&ctxt->sizeentities, consumed);
2375
0
                }
2376
2377
46.7k
                xmlParserEntityCheck(ctxt, consumed);
2378
2379
46.7k
                xmlPopInput(ctxt);
2380
542k
            } else {
2381
542k
                break;
2382
542k
            }
2383
2384
            /*
2385
             * Also increase the counter when entering or exiting a PERef.
2386
             * The spec says: "When a parameter-entity reference is recognized
2387
             * in the DTD and included, its replacement text MUST be enlarged
2388
             * by the attachment of one leading and one following space (#x20)
2389
             * character."
2390
             */
2391
251k
      if (res < INT_MAX)
2392
251k
    res++;
2393
251k
        }
2394
554k
    }
2395
1.03M
    return(res);
2396
1.03M
}
2397
2398
/************************************************************************
2399
 *                  *
2400
 *    Commodity functions to handle entities      *
2401
 *                  *
2402
 ************************************************************************/
2403
2404
/**
2405
 * xmlPopInput:
2406
 * @ctxt:  an XML parser context
2407
 *
2408
 * xmlPopInput: the current input pointed by ctxt->input came to an end
2409
 *          pop it and return the next char.
2410
 *
2411
 * Returns the current xmlChar in the parser context
2412
 */
2413
xmlChar
2414
46.7k
xmlPopInput(xmlParserCtxtPtr ctxt) {
2415
46.7k
    xmlParserInputPtr input;
2416
2417
46.7k
    if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2418
46.7k
    if (xmlParserDebugEntities)
2419
0
  xmlGenericError(xmlGenericErrorContext,
2420
0
    "Popping input %d\n", ctxt->inputNr);
2421
46.7k
    if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2422
46.7k
        (ctxt->instate != XML_PARSER_EOF))
2423
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2424
0
                    "Unfinished entity outside the DTD");
2425
46.7k
    input = inputPop(ctxt);
2426
46.7k
    if (input->entity != NULL)
2427
46.7k
        input->entity->flags &= ~XML_ENT_EXPANDING;
2428
46.7k
    xmlFreeInputStream(input);
2429
46.7k
    if (*ctxt->input->cur == 0)
2430
410
        xmlParserGrow(ctxt);
2431
46.7k
    return(CUR);
2432
46.7k
}
2433
2434
/**
2435
 * xmlPushInput:
2436
 * @ctxt:  an XML parser context
2437
 * @input:  an XML parser input fragment (entity, XML fragment ...).
2438
 *
2439
 * xmlPushInput: switch to a new input stream which is stacked on top
2440
 *               of the previous one(s).
2441
 * Returns -1 in case of error or the index in the input stack
2442
 */
2443
int
2444
46.7k
xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2445
46.7k
    int ret;
2446
46.7k
    if (input == NULL) return(-1);
2447
2448
46.7k
    if (xmlParserDebugEntities) {
2449
0
  if ((ctxt->input != NULL) && (ctxt->input->filename))
2450
0
      xmlGenericError(xmlGenericErrorContext,
2451
0
        "%s(%d): ", ctxt->input->filename,
2452
0
        ctxt->input->line);
2453
0
  xmlGenericError(xmlGenericErrorContext,
2454
0
    "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2455
0
    }
2456
46.7k
    if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2457
46.7k
        (ctxt->inputNr > 100)) {
2458
0
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2459
0
        while (ctxt->inputNr > 1)
2460
0
            xmlFreeInputStream(inputPop(ctxt));
2461
0
  return(-1);
2462
0
    }
2463
46.7k
    ret = inputPush(ctxt, input);
2464
46.7k
    if (ctxt->instate == XML_PARSER_EOF)
2465
0
        return(-1);
2466
46.7k
    GROW;
2467
46.7k
    return(ret);
2468
46.7k
}
2469
2470
/**
2471
 * xmlParseCharRef:
2472
 * @ctxt:  an XML parser context
2473
 *
2474
 * DEPRECATED: Internal function, don't use.
2475
 *
2476
 * Parse a numeric character reference. Always consumes '&'.
2477
 *
2478
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2479
 *                  '&#x' [0-9a-fA-F]+ ';'
2480
 *
2481
 * [ WFC: Legal Character ]
2482
 * Characters referred to using character references must match the
2483
 * production for Char.
2484
 *
2485
 * Returns the value parsed (as an int), 0 in case of error
2486
 */
2487
int
2488
13.4k
xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2489
13.4k
    int val = 0;
2490
13.4k
    int count = 0;
2491
2492
    /*
2493
     * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2494
     */
2495
13.4k
    if ((RAW == '&') && (NXT(1) == '#') &&
2496
13.4k
        (NXT(2) == 'x')) {
2497
6.94k
  SKIP(3);
2498
6.94k
  GROW;
2499
30.5k
  while (RAW != ';') { /* loop blocked by count */
2500
25.0k
      if (count++ > 20) {
2501
473
    count = 0;
2502
473
    GROW;
2503
473
                if (ctxt->instate == XML_PARSER_EOF)
2504
0
                    return(0);
2505
473
      }
2506
25.0k
      if ((RAW >= '0') && (RAW <= '9'))
2507
7.99k
          val = val * 16 + (CUR - '0');
2508
17.0k
      else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2509
5.80k
          val = val * 16 + (CUR - 'a') + 10;
2510
11.2k
      else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2511
9.81k
          val = val * 16 + (CUR - 'A') + 10;
2512
1.45k
      else {
2513
1.45k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2514
1.45k
    val = 0;
2515
1.45k
    break;
2516
1.45k
      }
2517
23.6k
      if (val > 0x110000)
2518
5.39k
          val = 0x110000;
2519
2520
23.6k
      NEXT;
2521
23.6k
      count++;
2522
23.6k
  }
2523
6.94k
  if (RAW == ';') {
2524
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2525
5.48k
      ctxt->input->col++;
2526
5.48k
      ctxt->input->cur++;
2527
5.48k
  }
2528
6.94k
    } else if  ((RAW == '&') && (NXT(1) == '#')) {
2529
6.55k
  SKIP(2);
2530
6.55k
  GROW;
2531
23.4k
  while (RAW != ';') { /* loop blocked by count */
2532
18.8k
      if (count++ > 20) {
2533
473
    count = 0;
2534
473
    GROW;
2535
473
                if (ctxt->instate == XML_PARSER_EOF)
2536
0
                    return(0);
2537
473
      }
2538
18.8k
      if ((RAW >= '0') && (RAW <= '9'))
2539
16.9k
          val = val * 10 + (CUR - '0');
2540
1.93k
      else {
2541
1.93k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2542
1.93k
    val = 0;
2543
1.93k
    break;
2544
1.93k
      }
2545
16.9k
      if (val > 0x110000)
2546
1.85k
          val = 0x110000;
2547
2548
16.9k
      NEXT;
2549
16.9k
      count++;
2550
16.9k
  }
2551
6.55k
  if (RAW == ';') {
2552
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2553
4.61k
      ctxt->input->col++;
2554
4.61k
      ctxt->input->cur++;
2555
4.61k
  }
2556
6.55k
    } else {
2557
0
        if (RAW == '&')
2558
0
            SKIP(1);
2559
0
        xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2560
0
    }
2561
2562
    /*
2563
     * [ WFC: Legal Character ]
2564
     * Characters referred to using character references must match the
2565
     * production for Char.
2566
     */
2567
13.4k
    if (val >= 0x110000) {
2568
300
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2569
300
                "xmlParseCharRef: character reference out of bounds\n",
2570
300
          val);
2571
13.1k
    } else if (IS_CHAR(val)) {
2572
9.05k
        return(val);
2573
9.05k
    } else {
2574
4.13k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2575
4.13k
                          "xmlParseCharRef: invalid xmlChar value %d\n",
2576
4.13k
                    val);
2577
4.13k
    }
2578
4.43k
    return(0);
2579
13.4k
}
2580
2581
/**
2582
 * xmlParseStringCharRef:
2583
 * @ctxt:  an XML parser context
2584
 * @str:  a pointer to an index in the string
2585
 *
2586
 * parse Reference declarations, variant parsing from a string rather
2587
 * than an an input flow.
2588
 *
2589
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2590
 *                  '&#x' [0-9a-fA-F]+ ';'
2591
 *
2592
 * [ WFC: Legal Character ]
2593
 * Characters referred to using character references must match the
2594
 * production for Char.
2595
 *
2596
 * Returns the value parsed (as an int), 0 in case of error, str will be
2597
 *         updated to the current value of the index
2598
 */
2599
static int
2600
6.37k
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2601
6.37k
    const xmlChar *ptr;
2602
6.37k
    xmlChar cur;
2603
6.37k
    int val = 0;
2604
2605
6.37k
    if ((str == NULL) || (*str == NULL)) return(0);
2606
6.37k
    ptr = *str;
2607
6.37k
    cur = *ptr;
2608
6.37k
    if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2609
2.52k
  ptr += 3;
2610
2.52k
  cur = *ptr;
2611
10.2k
  while (cur != ';') { /* Non input consuming loop */
2612
8.18k
      if ((cur >= '0') && (cur <= '9'))
2613
2.47k
          val = val * 16 + (cur - '0');
2614
5.70k
      else if ((cur >= 'a') && (cur <= 'f'))
2615
2.02k
          val = val * 16 + (cur - 'a') + 10;
2616
3.68k
      else if ((cur >= 'A') && (cur <= 'F'))
2617
3.22k
          val = val * 16 + (cur - 'A') + 10;
2618
462
      else {
2619
462
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2620
462
    val = 0;
2621
462
    break;
2622
462
      }
2623
7.71k
      if (val > 0x110000)
2624
584
          val = 0x110000;
2625
2626
7.71k
      ptr++;
2627
7.71k
      cur = *ptr;
2628
7.71k
  }
2629
2.52k
  if (cur == ';')
2630
2.06k
      ptr++;
2631
3.85k
    } else if  ((cur == '&') && (ptr[1] == '#')){
2632
3.85k
  ptr += 2;
2633
3.85k
  cur = *ptr;
2634
10.8k
  while (cur != ';') { /* Non input consuming loops */
2635
8.06k
      if ((cur >= '0') && (cur <= '9'))
2636
7.00k
          val = val * 10 + (cur - '0');
2637
1.06k
      else {
2638
1.06k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2639
1.06k
    val = 0;
2640
1.06k
    break;
2641
1.06k
      }
2642
7.00k
      if (val > 0x110000)
2643
565
          val = 0x110000;
2644
2645
7.00k
      ptr++;
2646
7.00k
      cur = *ptr;
2647
7.00k
  }
2648
3.85k
  if (cur == ';')
2649
2.79k
      ptr++;
2650
3.85k
    } else {
2651
0
  xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2652
0
  return(0);
2653
0
    }
2654
6.37k
    *str = ptr;
2655
2656
    /*
2657
     * [ WFC: Legal Character ]
2658
     * Characters referred to using character references must match the
2659
     * production for Char.
2660
     */
2661
6.37k
    if (val >= 0x110000) {
2662
275
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2663
275
                "xmlParseStringCharRef: character reference out of bounds\n",
2664
275
                val);
2665
6.10k
    } else if (IS_CHAR(val)) {
2666
3.95k
        return(val);
2667
3.95k
    } else {
2668
2.15k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2669
2.15k
        "xmlParseStringCharRef: invalid xmlChar value %d\n",
2670
2.15k
        val);
2671
2.15k
    }
2672
2.42k
    return(0);
2673
6.37k
}
2674
2675
/**
2676
 * xmlParserHandlePEReference:
2677
 * @ctxt:  the parser context
2678
 *
2679
 * DEPRECATED: Internal function, do not use.
2680
 *
2681
 * [69] PEReference ::= '%' Name ';'
2682
 *
2683
 * [ WFC: No Recursion ]
2684
 * A parsed entity must not contain a recursive
2685
 * reference to itself, either directly or indirectly.
2686
 *
2687
 * [ WFC: Entity Declared ]
2688
 * In a document without any DTD, a document with only an internal DTD
2689
 * subset which contains no parameter entity references, or a document
2690
 * with "standalone='yes'", ...  ... The declaration of a parameter
2691
 * entity must precede any reference to it...
2692
 *
2693
 * [ VC: Entity Declared ]
2694
 * In a document with an external subset or external parameter entities
2695
 * with "standalone='no'", ...  ... The declaration of a parameter entity
2696
 * must precede any reference to it...
2697
 *
2698
 * [ WFC: In DTD ]
2699
 * Parameter-entity references may only appear in the DTD.
2700
 * NOTE: misleading but this is handled.
2701
 *
2702
 * A PEReference may have been detected in the current input stream
2703
 * the handling is done accordingly to
2704
 *      http://www.w3.org/TR/REC-xml#entproc
2705
 * i.e.
2706
 *   - Included in literal in entity values
2707
 *   - Included as Parameter Entity reference within DTDs
2708
 */
2709
void
2710
0
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2711
0
    switch(ctxt->instate) {
2712
0
  case XML_PARSER_CDATA_SECTION:
2713
0
      return;
2714
0
        case XML_PARSER_COMMENT:
2715
0
      return;
2716
0
  case XML_PARSER_START_TAG:
2717
0
      return;
2718
0
  case XML_PARSER_END_TAG:
2719
0
      return;
2720
0
        case XML_PARSER_EOF:
2721
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2722
0
      return;
2723
0
        case XML_PARSER_PROLOG:
2724
0
  case XML_PARSER_START:
2725
0
  case XML_PARSER_XML_DECL:
2726
0
  case XML_PARSER_MISC:
2727
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2728
0
      return;
2729
0
  case XML_PARSER_ENTITY_DECL:
2730
0
        case XML_PARSER_CONTENT:
2731
0
        case XML_PARSER_ATTRIBUTE_VALUE:
2732
0
        case XML_PARSER_PI:
2733
0
  case XML_PARSER_SYSTEM_LITERAL:
2734
0
  case XML_PARSER_PUBLIC_LITERAL:
2735
      /* we just ignore it there */
2736
0
      return;
2737
0
        case XML_PARSER_EPILOG:
2738
0
      xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2739
0
      return;
2740
0
  case XML_PARSER_ENTITY_VALUE:
2741
      /*
2742
       * NOTE: in the case of entity values, we don't do the
2743
       *       substitution here since we need the literal
2744
       *       entity value to be able to save the internal
2745
       *       subset of the document.
2746
       *       This will be handled by xmlStringDecodeEntities
2747
       */
2748
0
      return;
2749
0
        case XML_PARSER_DTD:
2750
      /*
2751
       * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2752
       * In the internal DTD subset, parameter-entity references
2753
       * can occur only where markup declarations can occur, not
2754
       * within markup declarations.
2755
       * In that case this is handled in xmlParseMarkupDecl
2756
       */
2757
0
      if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2758
0
    return;
2759
0
      if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2760
0
    return;
2761
0
            break;
2762
0
        case XML_PARSER_IGNORE:
2763
0
            return;
2764
0
    }
2765
2766
0
    xmlParsePEReference(ctxt);
2767
0
}
2768
2769
/*
2770
 * Macro used to grow the current buffer.
2771
 * buffer##_size is expected to be a size_t
2772
 * mem_error: is expected to handle memory allocation failures
2773
 */
2774
154k
#define growBuffer(buffer, n) {           \
2775
154k
    xmlChar *tmp;             \
2776
154k
    size_t new_size = buffer##_size * 2 + n;                            \
2777
154k
    if (new_size < buffer##_size) goto mem_error;                       \
2778
154k
    tmp = (xmlChar *) xmlRealloc(buffer, new_size);                     \
2779
154k
    if (tmp == NULL) goto mem_error;         \
2780
154k
    buffer = tmp;             \
2781
154k
    buffer##_size = new_size;                                           \
2782
154k
}
2783
2784
/**
2785
 * xmlStringDecodeEntitiesInt:
2786
 * @ctxt:  the parser context
2787
 * @str:  the input string
2788
 * @len: the string length
2789
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2790
 * @end:  an end marker xmlChar, 0 if none
2791
 * @end2:  an end marker xmlChar, 0 if none
2792
 * @end3:  an end marker xmlChar, 0 if none
2793
 * @check:  whether to perform entity checks
2794
 */
2795
static xmlChar *
2796
xmlStringDecodeEntitiesInt(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2797
               int what, xmlChar end, xmlChar  end2, xmlChar end3,
2798
444k
                           int check) {
2799
444k
    xmlChar *buffer = NULL;
2800
444k
    size_t buffer_size = 0;
2801
444k
    size_t nbchars = 0;
2802
2803
444k
    xmlChar *current = NULL;
2804
444k
    xmlChar *rep = NULL;
2805
444k
    const xmlChar *last;
2806
444k
    xmlEntityPtr ent;
2807
444k
    int c,l;
2808
2809
444k
    if (str == NULL)
2810
0
        return(NULL);
2811
444k
    last = str + len;
2812
2813
444k
    if (((ctxt->depth > 40) &&
2814
444k
         ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2815
444k
  (ctxt->depth > 100)) {
2816
0
  xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP,
2817
0
                       "Maximum entity nesting depth exceeded");
2818
0
  return(NULL);
2819
0
    }
2820
2821
    /*
2822
     * allocate a translation buffer.
2823
     */
2824
444k
    buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2825
444k
    buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2826
444k
    if (buffer == NULL) goto mem_error;
2827
2828
    /*
2829
     * OK loop until we reach one of the ending char or a size limit.
2830
     * we are operating on already parsed values.
2831
     */
2832
444k
    if (str < last)
2833
442k
  c = CUR_SCHAR(str, l);
2834
2.35k
    else
2835
2.35k
        c = 0;
2836
35.6M
    while ((c != 0) && (c != end) && /* non input consuming loop */
2837
35.6M
           (c != end2) && (c != end3) &&
2838
35.6M
           (ctxt->instate != XML_PARSER_EOF)) {
2839
2840
35.1M
  if (c == 0) break;
2841
35.1M
        if ((c == '&') && (str[1] == '#')) {
2842
6.37k
      int val = xmlParseStringCharRef(ctxt, &str);
2843
6.37k
      if (val == 0)
2844
2.42k
                goto int_error;
2845
3.95k
      COPY_BUF(buffer, nbchars, val);
2846
3.95k
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2847
396
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2848
396
      }
2849
35.1M
  } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2850
513k
      if (xmlParserDebugEntities)
2851
0
    xmlGenericError(xmlGenericErrorContext,
2852
0
      "String decoding Entity Reference: %.30s\n",
2853
0
      str);
2854
513k
      ent = xmlParseStringEntityRef(ctxt, &str);
2855
513k
      if ((ent != NULL) &&
2856
513k
    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2857
1.94k
    if (ent->content != NULL) {
2858
1.94k
        COPY_BUF(buffer, nbchars, ent->content[0]);
2859
1.94k
        if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2860
392
      growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2861
392
        }
2862
1.94k
    } else {
2863
0
        xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2864
0
          "predefined entity has no content\n");
2865
0
                    goto int_error;
2866
0
    }
2867
511k
      } else if ((ent != NULL) && (ent->content != NULL)) {
2868
386k
          if ((check) && (xmlParserEntityCheck(ctxt, ent->length)))
2869
0
                    goto int_error;
2870
2871
386k
                if (ent->flags & XML_ENT_EXPANDING) {
2872
149
              xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2873
149
                    xmlHaltParser(ctxt);
2874
149
                    ent->content[0] = 0;
2875
149
                    goto int_error;
2876
149
                }
2877
2878
386k
                ent->flags |= XML_ENT_EXPANDING;
2879
386k
    ctxt->depth++;
2880
386k
    rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
2881
386k
                        ent->length, what, 0, 0, 0, check);
2882
386k
    ctxt->depth--;
2883
386k
                ent->flags &= ~XML_ENT_EXPANDING;
2884
2885
386k
    if (rep == NULL) {
2886
133
                    ent->content[0] = 0;
2887
133
                    goto int_error;
2888
133
                }
2889
2890
386k
                current = rep;
2891
113M
                while (*current != 0) { /* non input consuming loop */
2892
112M
                    buffer[nbchars++] = *current++;
2893
112M
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2894
90.0k
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2895
90.0k
                    }
2896
112M
                }
2897
386k
                xmlFree(rep);
2898
386k
                rep = NULL;
2899
386k
      } else if (ent != NULL) {
2900
8.12k
    int i = xmlStrlen(ent->name);
2901
8.12k
    const xmlChar *cur = ent->name;
2902
2903
8.12k
    buffer[nbchars++] = '&';
2904
8.12k
    if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2905
470
        growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2906
470
    }
2907
16.4k
    for (;i > 0;i--)
2908
8.32k
        buffer[nbchars++] = *cur++;
2909
8.12k
    buffer[nbchars++] = ';';
2910
8.12k
      }
2911
34.6M
  } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2912
2.98k
      if (xmlParserDebugEntities)
2913
0
    xmlGenericError(xmlGenericErrorContext,
2914
0
      "String decoding PE Reference: %.30s\n", str);
2915
2.98k
      ent = xmlParseStringPEReference(ctxt, &str);
2916
2.98k
      if (ent != NULL) {
2917
1.45k
                if (ent->content == NULL) {
2918
        /*
2919
         * Note: external parsed entities will not be loaded,
2920
         * it is not required for a non-validating parser to
2921
         * complete external PEReferences coming from the
2922
         * internal subset
2923
         */
2924
0
        if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2925
0
      ((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2926
0
      (ctxt->validate != 0)) {
2927
0
      xmlLoadEntityContent(ctxt, ent);
2928
0
        } else {
2929
0
      xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2930
0
      "not validating will not read content for PE entity %s\n",
2931
0
                          ent->name, NULL);
2932
0
        }
2933
0
    }
2934
2935
1.45k
          if ((check) && (xmlParserEntityCheck(ctxt, ent->length)))
2936
0
                    goto int_error;
2937
2938
1.45k
                if (ent->flags & XML_ENT_EXPANDING) {
2939
1
              xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2940
1
                    xmlHaltParser(ctxt);
2941
1
                    if (ent->content != NULL)
2942
1
                        ent->content[0] = 0;
2943
1
                    goto int_error;
2944
1
                }
2945
2946
1.45k
                ent->flags |= XML_ENT_EXPANDING;
2947
1.45k
    ctxt->depth++;
2948
1.45k
    rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
2949
1.45k
                        ent->length, what, 0, 0, 0, check);
2950
1.45k
    ctxt->depth--;
2951
1.45k
                ent->flags &= ~XML_ENT_EXPANDING;
2952
2953
1.45k
    if (rep == NULL) {
2954
1
                    if (ent->content != NULL)
2955
1
                        ent->content[0] = 0;
2956
1
                    goto int_error;
2957
1
                }
2958
1.45k
                current = rep;
2959
48.6k
                while (*current != 0) { /* non input consuming loop */
2960
47.1k
                    buffer[nbchars++] = *current++;
2961
47.1k
                    if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2962
398
                        growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2963
398
                    }
2964
47.1k
                }
2965
1.45k
                xmlFree(rep);
2966
1.45k
                rep = NULL;
2967
1.45k
      }
2968
34.6M
  } else {
2969
34.6M
      COPY_BUF(buffer, nbchars, c);
2970
34.6M
      str += l;
2971
34.6M
      if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2972
211k
          growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2973
211k
      }
2974
34.6M
  }
2975
35.1M
  if (str < last)
2976
34.7M
      c = CUR_SCHAR(str, l);
2977
439k
  else
2978
439k
      c = 0;
2979
35.1M
    }
2980
441k
    buffer[nbchars] = 0;
2981
441k
    return(buffer);
2982
2983
0
mem_error:
2984
0
    xmlErrMemory(ctxt, NULL);
2985
2.70k
int_error:
2986
2.70k
    if (rep != NULL)
2987
0
        xmlFree(rep);
2988
2.70k
    if (buffer != NULL)
2989
2.70k
        xmlFree(buffer);
2990
2.70k
    return(NULL);
2991
0
}
2992
2993
/**
2994
 * xmlStringLenDecodeEntities:
2995
 * @ctxt:  the parser context
2996
 * @str:  the input string
2997
 * @len: the string length
2998
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2999
 * @end:  an end marker xmlChar, 0 if none
3000
 * @end2:  an end marker xmlChar, 0 if none
3001
 * @end3:  an end marker xmlChar, 0 if none
3002
 *
3003
 * DEPRECATED: Internal function, don't use.
3004
 *
3005
 * Takes a entity string content and process to do the adequate substitutions.
3006
 *
3007
 * [67] Reference ::= EntityRef | CharRef
3008
 *
3009
 * [69] PEReference ::= '%' Name ';'
3010
 *
3011
 * Returns A newly allocated string with the substitution done. The caller
3012
 *      must deallocate it !
3013
 */
3014
xmlChar *
3015
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
3016
                           int what, xmlChar end, xmlChar  end2,
3017
0
                           xmlChar end3) {
3018
0
    if ((ctxt == NULL) || (str == NULL) || (len < 0))
3019
0
        return(NULL);
3020
0
    return(xmlStringDecodeEntitiesInt(ctxt, str, len, what,
3021
0
                                      end, end2, end3, 0));
3022
0
}
3023
3024
/**
3025
 * xmlStringDecodeEntities:
3026
 * @ctxt:  the parser context
3027
 * @str:  the input string
3028
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
3029
 * @end:  an end marker xmlChar, 0 if none
3030
 * @end2:  an end marker xmlChar, 0 if none
3031
 * @end3:  an end marker xmlChar, 0 if none
3032
 *
3033
 * DEPRECATED: Internal function, don't use.
3034
 *
3035
 * Takes a entity string content and process to do the adequate substitutions.
3036
 *
3037
 * [67] Reference ::= EntityRef | CharRef
3038
 *
3039
 * [69] PEReference ::= '%' Name ';'
3040
 *
3041
 * Returns A newly allocated string with the substitution done. The caller
3042
 *      must deallocate it !
3043
 */
3044
xmlChar *
3045
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
3046
31.4k
            xmlChar end, xmlChar  end2, xmlChar end3) {
3047
31.4k
    if ((ctxt == NULL) || (str == NULL)) return(NULL);
3048
31.4k
    return(xmlStringDecodeEntitiesInt(ctxt, str, xmlStrlen(str), what,
3049
31.4k
                                      end, end2, end3, 0));
3050
31.4k
}
3051
3052
/************************************************************************
3053
 *                  *
3054
 *    Commodity functions, cleanup needed ?     *
3055
 *                  *
3056
 ************************************************************************/
3057
3058
/**
3059
 * areBlanks:
3060
 * @ctxt:  an XML parser context
3061
 * @str:  a xmlChar *
3062
 * @len:  the size of @str
3063
 * @blank_chars: we know the chars are blanks
3064
 *
3065
 * Is this a sequence of blank chars that one can ignore ?
3066
 *
3067
 * Returns 1 if ignorable 0 otherwise.
3068
 */
3069
3070
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
3071
7.77k
                     int blank_chars) {
3072
7.77k
    int i, ret;
3073
7.77k
    xmlNodePtr lastChild;
3074
3075
    /*
3076
     * Don't spend time trying to differentiate them, the same callback is
3077
     * used !
3078
     */
3079
7.77k
    if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
3080
1.30k
  return(0);
3081
3082
    /*
3083
     * Check for xml:space value.
3084
     */
3085
6.46k
    if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
3086
6.46k
        (*(ctxt->space) == -2))
3087
1.89k
  return(0);
3088
3089
    /*
3090
     * Check that the string is made of blanks
3091
     */
3092
4.56k
    if (blank_chars == 0) {
3093
8.26k
  for (i = 0;i < len;i++)
3094
6.63k
      if (!(IS_BLANK_CH(str[i]))) return(0);
3095
2.57k
    }
3096
3097
    /*
3098
     * Look if the element is mixed content in the DTD if available
3099
     */
3100
3.62k
    if (ctxt->node == NULL) return(0);
3101
3.62k
    if (ctxt->myDoc != NULL) {
3102
3.62k
  ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
3103
3.62k
        if (ret == 0) return(1);
3104
3.42k
        if (ret == 1) return(0);
3105
3.42k
    }
3106
3107
    /*
3108
     * Otherwise, heuristic :-\
3109
     */
3110
3.23k
    if ((RAW != '<') && (RAW != 0xD)) return(0);
3111
2.71k
    if ((ctxt->node->children == NULL) &&
3112
2.71k
  (RAW == '<') && (NXT(1) == '/')) return(0);
3113
3114
2.31k
    lastChild = xmlGetLastChild(ctxt->node);
3115
2.31k
    if (lastChild == NULL) {
3116
1.67k
        if ((ctxt->node->type != XML_ELEMENT_NODE) &&
3117
1.67k
            (ctxt->node->content != NULL)) return(0);
3118
1.67k
    } else if (xmlNodeIsText(lastChild))
3119
220
        return(0);
3120
420
    else if ((ctxt->node->children != NULL) &&
3121
420
             (xmlNodeIsText(ctxt->node->children)))
3122
81
        return(0);
3123
2.01k
    return(1);
3124
2.31k
}
3125
3126
/************************************************************************
3127
 *                  *
3128
 *    Extra stuff for namespace support     *
3129
 *  Relates to http://www.w3.org/TR/WD-xml-names      *
3130
 *                  *
3131
 ************************************************************************/
3132
3133
/**
3134
 * xmlSplitQName:
3135
 * @ctxt:  an XML parser context
3136
 * @name:  an XML parser context
3137
 * @prefix:  a xmlChar **
3138
 *
3139
 * parse an UTF8 encoded XML qualified name string
3140
 *
3141
 * [NS 5] QName ::= (Prefix ':')? LocalPart
3142
 *
3143
 * [NS 6] Prefix ::= NCName
3144
 *
3145
 * [NS 7] LocalPart ::= NCName
3146
 *
3147
 * Returns the local part, and prefix is updated
3148
 *   to get the Prefix if any.
3149
 */
3150
3151
xmlChar *
3152
114k
xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
3153
114k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3154
114k
    xmlChar *buffer = NULL;
3155
114k
    int len = 0;
3156
114k
    int max = XML_MAX_NAMELEN;
3157
114k
    xmlChar *ret = NULL;
3158
114k
    const xmlChar *cur = name;
3159
114k
    int c;
3160
3161
114k
    if (prefix == NULL) return(NULL);
3162
114k
    *prefix = NULL;
3163
3164
114k
    if (cur == NULL) return(NULL);
3165
3166
#ifndef XML_XML_NAMESPACE
3167
    /* xml: prefix is not really a namespace */
3168
    if ((cur[0] == 'x') && (cur[1] == 'm') &&
3169
        (cur[2] == 'l') && (cur[3] == ':'))
3170
  return(xmlStrdup(name));
3171
#endif
3172
3173
    /* nasty but well=formed */
3174
114k
    if (cur[0] == ':')
3175
11.8k
  return(xmlStrdup(name));
3176
3177
102k
    c = *cur++;
3178
433k
    while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
3179
330k
  buf[len++] = c;
3180
330k
  c = *cur++;
3181
330k
    }
3182
102k
    if (len >= max) {
3183
  /*
3184
   * Okay someone managed to make a huge name, so he's ready to pay
3185
   * for the processing speed.
3186
   */
3187
696
  max = len * 2;
3188
3189
696
  buffer = (xmlChar *) xmlMallocAtomic(max);
3190
696
  if (buffer == NULL) {
3191
0
      xmlErrMemory(ctxt, NULL);
3192
0
      return(NULL);
3193
0
  }
3194
696
  memcpy(buffer, buf, len);
3195
59.3k
  while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3196
58.6k
      if (len + 10 > max) {
3197
309
          xmlChar *tmp;
3198
3199
309
    max *= 2;
3200
309
    tmp = (xmlChar *) xmlRealloc(buffer, max);
3201
309
    if (tmp == NULL) {
3202
0
        xmlFree(buffer);
3203
0
        xmlErrMemory(ctxt, NULL);
3204
0
        return(NULL);
3205
0
    }
3206
309
    buffer = tmp;
3207
309
      }
3208
58.6k
      buffer[len++] = c;
3209
58.6k
      c = *cur++;
3210
58.6k
  }
3211
696
  buffer[len] = 0;
3212
696
    }
3213
3214
102k
    if ((c == ':') && (*cur == 0)) {
3215
695
        if (buffer != NULL)
3216
195
      xmlFree(buffer);
3217
695
  *prefix = NULL;
3218
695
  return(xmlStrdup(name));
3219
695
    }
3220
3221
102k
    if (buffer == NULL)
3222
101k
  ret = xmlStrndup(buf, len);
3223
501
    else {
3224
501
  ret = buffer;
3225
501
  buffer = NULL;
3226
501
  max = XML_MAX_NAMELEN;
3227
501
    }
3228
3229
3230
102k
    if (c == ':') {
3231
15.0k
  c = *cur;
3232
15.0k
        *prefix = ret;
3233
15.0k
  if (c == 0) {
3234
0
      return(xmlStrndup(BAD_CAST "", 0));
3235
0
  }
3236
15.0k
  len = 0;
3237
3238
  /*
3239
   * Check that the first character is proper to start
3240
   * a new name
3241
   */
3242
15.0k
  if (!(((c >= 0x61) && (c <= 0x7A)) ||
3243
15.0k
        ((c >= 0x41) && (c <= 0x5A)) ||
3244
15.0k
        (c == '_') || (c == ':'))) {
3245
4.56k
      int l;
3246
4.56k
      int first = CUR_SCHAR(cur, l);
3247
3248
4.56k
      if (!IS_LETTER(first) && (first != '_')) {
3249
2.85k
    xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3250
2.85k
          "Name %s is not XML Namespace compliant\n",
3251
2.85k
          name);
3252
2.85k
      }
3253
4.56k
  }
3254
15.0k
  cur++;
3255
3256
124k
  while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3257
108k
      buf[len++] = c;
3258
108k
      c = *cur++;
3259
108k
  }
3260
15.0k
  if (len >= max) {
3261
      /*
3262
       * Okay someone managed to make a huge name, so he's ready to pay
3263
       * for the processing speed.
3264
       */
3265
687
      max = len * 2;
3266
3267
687
      buffer = (xmlChar *) xmlMallocAtomic(max);
3268
687
      if (buffer == NULL) {
3269
0
          xmlErrMemory(ctxt, NULL);
3270
0
    return(NULL);
3271
0
      }
3272
687
      memcpy(buffer, buf, len);
3273
35.7k
      while (c != 0) { /* tested bigname2.xml */
3274
35.0k
    if (len + 10 > max) {
3275
286
        xmlChar *tmp;
3276
3277
286
        max *= 2;
3278
286
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3279
286
        if (tmp == NULL) {
3280
0
      xmlErrMemory(ctxt, NULL);
3281
0
      xmlFree(buffer);
3282
0
      return(NULL);
3283
0
        }
3284
286
        buffer = tmp;
3285
286
    }
3286
35.0k
    buffer[len++] = c;
3287
35.0k
    c = *cur++;
3288
35.0k
      }
3289
687
      buffer[len] = 0;
3290
687
  }
3291
3292
15.0k
  if (buffer == NULL)
3293
14.4k
      ret = xmlStrndup(buf, len);
3294
687
  else {
3295
687
      ret = buffer;
3296
687
  }
3297
15.0k
    }
3298
3299
102k
    return(ret);
3300
102k
}
3301
3302
/************************************************************************
3303
 *                  *
3304
 *      The parser itself       *
3305
 *  Relates to http://www.w3.org/TR/REC-xml       *
3306
 *                  *
3307
 ************************************************************************/
3308
3309
/************************************************************************
3310
 *                  *
3311
 *  Routines to parse Name, NCName and NmToken      *
3312
 *                  *
3313
 ************************************************************************/
3314
3315
/*
3316
 * The two following functions are related to the change of accepted
3317
 * characters for Name and NmToken in the Revision 5 of XML-1.0
3318
 * They correspond to the modified production [4] and the new production [4a]
3319
 * changes in that revision. Also note that the macros used for the
3320
 * productions Letter, Digit, CombiningChar and Extender are not needed
3321
 * anymore.
3322
 * We still keep compatibility to pre-revision5 parsing semantic if the
3323
 * new XML_PARSE_OLD10 option is given to the parser.
3324
 */
3325
static int
3326
626k
xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3327
626k
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3328
        /*
3329
   * Use the new checks of production [4] [4a] amd [5] of the
3330
   * Update 5 of XML-1.0
3331
   */
3332
617k
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3333
617k
      (((c >= 'a') && (c <= 'z')) ||
3334
616k
       ((c >= 'A') && (c <= 'Z')) ||
3335
616k
       (c == '_') || (c == ':') ||
3336
616k
       ((c >= 0xC0) && (c <= 0xD6)) ||
3337
616k
       ((c >= 0xD8) && (c <= 0xF6)) ||
3338
616k
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3339
616k
       ((c >= 0x370) && (c <= 0x37D)) ||
3340
616k
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3341
616k
       ((c >= 0x200C) && (c <= 0x200D)) ||
3342
616k
       ((c >= 0x2070) && (c <= 0x218F)) ||
3343
616k
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3344
616k
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3345
616k
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3346
616k
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3347
616k
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3348
564k
      return(1);
3349
617k
    } else {
3350
9.58k
        if (IS_LETTER(c) || (c == '_') || (c == ':'))
3351
7.22k
      return(1);
3352
9.58k
    }
3353
55.6k
    return(0);
3354
626k
}
3355
3356
static int
3357
904k
xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3358
904k
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3359
        /*
3360
   * Use the new checks of production [4] [4a] amd [5] of the
3361
   * Update 5 of XML-1.0
3362
   */
3363
888k
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3364
888k
      (((c >= 'a') && (c <= 'z')) ||
3365
876k
       ((c >= 'A') && (c <= 'Z')) ||
3366
876k
       ((c >= '0') && (c <= '9')) || /* !start */
3367
876k
       (c == '_') || (c == ':') ||
3368
876k
       (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3369
876k
       ((c >= 0xC0) && (c <= 0xD6)) ||
3370
876k
       ((c >= 0xD8) && (c <= 0xF6)) ||
3371
876k
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3372
876k
       ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3373
876k
       ((c >= 0x370) && (c <= 0x37D)) ||
3374
876k
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3375
876k
       ((c >= 0x200C) && (c <= 0x200D)) ||
3376
876k
       ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3377
876k
       ((c >= 0x2070) && (c <= 0x218F)) ||
3378
876k
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3379
876k
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3380
876k
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3381
876k
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3382
876k
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3383
315k
       return(1);
3384
888k
    } else {
3385
15.6k
        if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3386
15.6k
            (c == '.') || (c == '-') ||
3387
15.6k
      (c == '_') || (c == ':') ||
3388
15.6k
      (IS_COMBINING(c)) ||
3389
15.6k
      (IS_EXTENDER(c)))
3390
7.71k
      return(1);
3391
15.6k
    }
3392
581k
    return(0);
3393
904k
}
3394
3395
static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3396
                                          int *len, int *alloc, int normalize);
3397
3398
static const xmlChar *
3399
111k
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3400
111k
    int len = 0, l;
3401
111k
    int c;
3402
111k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3403
0
                    XML_MAX_TEXT_LENGTH :
3404
111k
                    XML_MAX_NAME_LENGTH;
3405
3406
    /*
3407
     * Handler for more complex cases
3408
     */
3409
111k
    c = CUR_CHAR(l);
3410
111k
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3411
        /*
3412
   * Use the new checks of production [4] [4a] amd [5] of the
3413
   * Update 5 of XML-1.0
3414
   */
3415
102k
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3416
102k
      (!(((c >= 'a') && (c <= 'z')) ||
3417
86.5k
         ((c >= 'A') && (c <= 'Z')) ||
3418
86.5k
         (c == '_') || (c == ':') ||
3419
86.5k
         ((c >= 0xC0) && (c <= 0xD6)) ||
3420
86.5k
         ((c >= 0xD8) && (c <= 0xF6)) ||
3421
86.5k
         ((c >= 0xF8) && (c <= 0x2FF)) ||
3422
86.5k
         ((c >= 0x370) && (c <= 0x37D)) ||
3423
86.5k
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
3424
86.5k
         ((c >= 0x200C) && (c <= 0x200D)) ||
3425
86.5k
         ((c >= 0x2070) && (c <= 0x218F)) ||
3426
86.5k
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3427
86.5k
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
3428
86.5k
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
3429
86.5k
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3430
93.6k
         ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3431
93.6k
      return(NULL);
3432
93.6k
  }
3433
8.58k
  len += l;
3434
8.58k
  NEXTL(l);
3435
8.58k
  c = CUR_CHAR(l);
3436
116k
  while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3437
116k
         (((c >= 'a') && (c <= 'z')) ||
3438
114k
          ((c >= 'A') && (c <= 'Z')) ||
3439
114k
          ((c >= '0') && (c <= '9')) || /* !start */
3440
114k
          (c == '_') || (c == ':') ||
3441
114k
          (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3442
114k
          ((c >= 0xC0) && (c <= 0xD6)) ||
3443
114k
          ((c >= 0xD8) && (c <= 0xF6)) ||
3444
114k
          ((c >= 0xF8) && (c <= 0x2FF)) ||
3445
114k
          ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3446
114k
          ((c >= 0x370) && (c <= 0x37D)) ||
3447
114k
          ((c >= 0x37F) && (c <= 0x1FFF)) ||
3448
114k
          ((c >= 0x200C) && (c <= 0x200D)) ||
3449
114k
          ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3450
114k
          ((c >= 0x2070) && (c <= 0x218F)) ||
3451
114k
          ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3452
114k
          ((c >= 0x3001) && (c <= 0xD7FF)) ||
3453
114k
          ((c >= 0xF900) && (c <= 0xFDCF)) ||
3454
114k
          ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3455
114k
          ((c >= 0x10000) && (c <= 0xEFFFF))
3456
114k
    )) {
3457
108k
            if (len <= INT_MAX - l)
3458
108k
          len += l;
3459
108k
      NEXTL(l);
3460
108k
      c = CUR_CHAR(l);
3461
108k
  }
3462
8.93k
    } else {
3463
8.93k
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3464
8.93k
      (!IS_LETTER(c) && (c != '_') &&
3465
7.84k
       (c != ':'))) {
3466
5.64k
      return(NULL);
3467
5.64k
  }
3468
3.28k
  len += l;
3469
3.28k
  NEXTL(l);
3470
3.28k
  c = CUR_CHAR(l);
3471
3472
12.1k
  while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3473
12.1k
         ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3474
11.1k
    (c == '.') || (c == '-') ||
3475
11.1k
    (c == '_') || (c == ':') ||
3476
11.1k
    (IS_COMBINING(c)) ||
3477
11.1k
    (IS_EXTENDER(c)))) {
3478
8.84k
            if (len <= INT_MAX - l)
3479
8.84k
          len += l;
3480
8.84k
      NEXTL(l);
3481
8.84k
      c = CUR_CHAR(l);
3482
8.84k
  }
3483
3.28k
    }
3484
11.8k
    if (ctxt->instate == XML_PARSER_EOF)
3485
0
        return(NULL);
3486
11.8k
    if (len > maxLength) {
3487
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3488
0
        return(NULL);
3489
0
    }
3490
11.8k
    if (ctxt->input->cur - ctxt->input->base < len) {
3491
        /*
3492
         * There were a couple of bugs where PERefs lead to to a change
3493
         * of the buffer. Check the buffer size to avoid passing an invalid
3494
         * pointer to xmlDictLookup.
3495
         */
3496
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3497
0
                    "unexpected change of input buffer");
3498
0
        return (NULL);
3499
0
    }
3500
11.8k
    if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3501
0
        return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3502
11.8k
    return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3503
11.8k
}
3504
3505
/**
3506
 * xmlParseName:
3507
 * @ctxt:  an XML parser context
3508
 *
3509
 * DEPRECATED: Internal function, don't use.
3510
 *
3511
 * parse an XML name.
3512
 *
3513
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3514
 *                  CombiningChar | Extender
3515
 *
3516
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3517
 *
3518
 * [6] Names ::= Name (#x20 Name)*
3519
 *
3520
 * Returns the Name parsed or NULL
3521
 */
3522
3523
const xmlChar *
3524
418k
xmlParseName(xmlParserCtxtPtr ctxt) {
3525
418k
    const xmlChar *in;
3526
418k
    const xmlChar *ret;
3527
418k
    size_t count = 0;
3528
418k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3529
0
                       XML_MAX_TEXT_LENGTH :
3530
418k
                       XML_MAX_NAME_LENGTH;
3531
3532
418k
    GROW;
3533
418k
    if (ctxt->instate == XML_PARSER_EOF)
3534
2
        return(NULL);
3535
3536
    /*
3537
     * Accelerator for simple ASCII names
3538
     */
3539
418k
    in = ctxt->input->cur;
3540
418k
    if (((*in >= 0x61) && (*in <= 0x7A)) ||
3541
418k
  ((*in >= 0x41) && (*in <= 0x5A)) ||
3542
418k
  (*in == '_') || (*in == ':')) {
3543
312k
  in++;
3544
590k
  while (((*in >= 0x61) && (*in <= 0x7A)) ||
3545
590k
         ((*in >= 0x41) && (*in <= 0x5A)) ||
3546
590k
         ((*in >= 0x30) && (*in <= 0x39)) ||
3547
590k
         (*in == '_') || (*in == '-') ||
3548
590k
         (*in == ':') || (*in == '.'))
3549
278k
      in++;
3550
312k
  if ((*in > 0) && (*in < 0x80)) {
3551
307k
      count = in - ctxt->input->cur;
3552
307k
            if (count > maxLength) {
3553
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3554
0
                return(NULL);
3555
0
            }
3556
307k
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3557
307k
      ctxt->input->cur = in;
3558
307k
      ctxt->input->col += count;
3559
307k
      if (ret == NULL)
3560
0
          xmlErrMemory(ctxt, NULL);
3561
307k
      return(ret);
3562
307k
  }
3563
312k
    }
3564
    /* accelerator for special cases */
3565
111k
    return(xmlParseNameComplex(ctxt));
3566
418k
}
3567
3568
static xmlHashedString
3569
99.4k
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3570
99.4k
    xmlHashedString ret;
3571
99.4k
    int len = 0, l;
3572
99.4k
    int c;
3573
99.4k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3574
0
                    XML_MAX_TEXT_LENGTH :
3575
99.4k
                    XML_MAX_NAME_LENGTH;
3576
99.4k
    size_t startPosition = 0;
3577
3578
99.4k
    ret.name = NULL;
3579
99.4k
    ret.hashValue = 0;
3580
3581
    /*
3582
     * Handler for more complex cases
3583
     */
3584
99.4k
    startPosition = CUR_PTR - BASE_PTR;
3585
99.4k
    c = CUR_CHAR(l);
3586
99.4k
    if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3587
99.4k
  (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3588
93.6k
  return(ret);
3589
93.6k
    }
3590
3591
98.1k
    while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3592
98.1k
     (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3593
92.3k
        if (len <= INT_MAX - l)
3594
92.3k
      len += l;
3595
92.3k
  NEXTL(l);
3596
92.3k
  c = CUR_CHAR(l);
3597
92.3k
    }
3598
5.80k
    if (ctxt->instate == XML_PARSER_EOF)
3599
0
        return(ret);
3600
5.80k
    if (len > maxLength) {
3601
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3602
0
        return(ret);
3603
0
    }
3604
5.80k
    ret = xmlDictLookupHashed(ctxt->dict, (BASE_PTR + startPosition), len);
3605
5.80k
    return(ret);
3606
5.80k
}
3607
3608
/**
3609
 * xmlParseNCName:
3610
 * @ctxt:  an XML parser context
3611
 * @len:  length of the string parsed
3612
 *
3613
 * parse an XML name.
3614
 *
3615
 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3616
 *                      CombiningChar | Extender
3617
 *
3618
 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3619
 *
3620
 * Returns the Name parsed or NULL
3621
 */
3622
3623
static xmlHashedString
3624
195k
xmlParseNCName(xmlParserCtxtPtr ctxt) {
3625
195k
    const xmlChar *in, *e;
3626
195k
    xmlHashedString ret;
3627
195k
    size_t count = 0;
3628
195k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3629
0
                       XML_MAX_TEXT_LENGTH :
3630
195k
                       XML_MAX_NAME_LENGTH;
3631
3632
195k
    ret.name = NULL;
3633
3634
    /*
3635
     * Accelerator for simple ASCII names
3636
     */
3637
195k
    in = ctxt->input->cur;
3638
195k
    e = ctxt->input->end;
3639
195k
    if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3640
195k
   ((*in >= 0x41) && (*in <= 0x5A)) ||
3641
195k
   (*in == '_')) && (in < e)) {
3642
97.3k
  in++;
3643
178k
  while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3644
178k
          ((*in >= 0x41) && (*in <= 0x5A)) ||
3645
178k
          ((*in >= 0x30) && (*in <= 0x39)) ||
3646
178k
          (*in == '_') || (*in == '-') ||
3647
178k
          (*in == '.')) && (in < e))
3648
81.5k
      in++;
3649
97.3k
  if (in >= e)
3650
22
      goto complex;
3651
97.3k
  if ((*in > 0) && (*in < 0x80)) {
3652
96.2k
      count = in - ctxt->input->cur;
3653
96.2k
            if (count > maxLength) {
3654
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3655
0
                return(ret);
3656
0
            }
3657
96.2k
      ret = xmlDictLookupHashed(ctxt->dict, ctxt->input->cur, count);
3658
96.2k
      ctxt->input->cur = in;
3659
96.2k
      ctxt->input->col += count;
3660
96.2k
      if (ret.name == NULL) {
3661
0
          xmlErrMemory(ctxt, NULL);
3662
0
      }
3663
96.2k
      return(ret);
3664
96.2k
  }
3665
97.3k
    }
3666
99.4k
complex:
3667
99.4k
    return(xmlParseNCNameComplex(ctxt));
3668
195k
}
3669
3670
/**
3671
 * xmlParseNameAndCompare:
3672
 * @ctxt:  an XML parser context
3673
 *
3674
 * parse an XML name and compares for match
3675
 * (specialized for endtag parsing)
3676
 *
3677
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3678
 * and the name for mismatch
3679
 */
3680
3681
static const xmlChar *
3682
14.5k
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3683
14.5k
    register const xmlChar *cmp = other;
3684
14.5k
    register const xmlChar *in;
3685
14.5k
    const xmlChar *ret;
3686
3687
14.5k
    GROW;
3688
14.5k
    if (ctxt->instate == XML_PARSER_EOF)
3689
0
        return(NULL);
3690
3691
14.5k
    in = ctxt->input->cur;
3692
52.2k
    while (*in != 0 && *in == *cmp) {
3693
37.7k
  ++in;
3694
37.7k
  ++cmp;
3695
37.7k
    }
3696
14.5k
    if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3697
  /* success */
3698
2.15k
  ctxt->input->col += in - ctxt->input->cur;
3699
2.15k
  ctxt->input->cur = in;
3700
2.15k
  return (const xmlChar*) 1;
3701
2.15k
    }
3702
    /* failure (or end of input buffer), check with full function */
3703
12.3k
    ret = xmlParseName (ctxt);
3704
    /* strings coming from the dictionary direct compare possible */
3705
12.3k
    if (ret == other) {
3706
618
  return (const xmlChar*) 1;
3707
618
    }
3708
11.7k
    return ret;
3709
12.3k
}
3710
3711
/**
3712
 * xmlParseStringName:
3713
 * @ctxt:  an XML parser context
3714
 * @str:  a pointer to the string pointer (IN/OUT)
3715
 *
3716
 * parse an XML name.
3717
 *
3718
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3719
 *                  CombiningChar | Extender
3720
 *
3721
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3722
 *
3723
 * [6] Names ::= Name (#x20 Name)*
3724
 *
3725
 * Returns the Name parsed or NULL. The @str pointer
3726
 * is updated to the current location in the string.
3727
 */
3728
3729
static xmlChar *
3730
535k
xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3731
535k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3732
535k
    const xmlChar *cur = *str;
3733
535k
    int len = 0, l;
3734
535k
    int c;
3735
535k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3736
0
                    XML_MAX_TEXT_LENGTH :
3737
535k
                    XML_MAX_NAME_LENGTH;
3738
3739
535k
    c = CUR_SCHAR(cur, l);
3740
535k
    if (!xmlIsNameStartChar(ctxt, c)) {
3741
2.37k
  return(NULL);
3742
2.37k
    }
3743
3744
532k
    COPY_BUF(buf, len, c);
3745
532k
    cur += l;
3746
532k
    c = CUR_SCHAR(cur, l);
3747
608k
    while (xmlIsNameChar(ctxt, c)) {
3748
76.2k
  COPY_BUF(buf, len, c);
3749
76.2k
  cur += l;
3750
76.2k
  c = CUR_SCHAR(cur, l);
3751
76.2k
  if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3752
      /*
3753
       * Okay someone managed to make a huge name, so he's ready to pay
3754
       * for the processing speed.
3755
       */
3756
423
      xmlChar *buffer;
3757
423
      int max = len * 2;
3758
3759
423
      buffer = (xmlChar *) xmlMallocAtomic(max);
3760
423
      if (buffer == NULL) {
3761
0
          xmlErrMemory(ctxt, NULL);
3762
0
    return(NULL);
3763
0
      }
3764
423
      memcpy(buffer, buf, len);
3765
8.77k
      while (xmlIsNameChar(ctxt, c)) {
3766
8.35k
    if (len + 10 > max) {
3767
205
        xmlChar *tmp;
3768
3769
205
        max *= 2;
3770
205
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3771
205
        if (tmp == NULL) {
3772
0
      xmlErrMemory(ctxt, NULL);
3773
0
      xmlFree(buffer);
3774
0
      return(NULL);
3775
0
        }
3776
205
        buffer = tmp;
3777
205
    }
3778
8.35k
    COPY_BUF(buffer, len, c);
3779
8.35k
    cur += l;
3780
8.35k
    c = CUR_SCHAR(cur, l);
3781
8.35k
                if (len > maxLength) {
3782
0
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3783
0
                    xmlFree(buffer);
3784
0
                    return(NULL);
3785
0
                }
3786
8.35k
      }
3787
423
      buffer[len] = 0;
3788
423
      *str = cur;
3789
423
      return(buffer);
3790
423
  }
3791
76.2k
    }
3792
532k
    if (len > maxLength) {
3793
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3794
0
        return(NULL);
3795
0
    }
3796
532k
    *str = cur;
3797
532k
    return(xmlStrndup(buf, len));
3798
532k
}
3799
3800
/**
3801
 * xmlParseNmtoken:
3802
 * @ctxt:  an XML parser context
3803
 *
3804
 * DEPRECATED: Internal function, don't use.
3805
 *
3806
 * parse an XML Nmtoken.
3807
 *
3808
 * [7] Nmtoken ::= (NameChar)+
3809
 *
3810
 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3811
 *
3812
 * Returns the Nmtoken parsed or NULL
3813
 */
3814
3815
xmlChar *
3816
44.6k
xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3817
44.6k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3818
44.6k
    int len = 0, l;
3819
44.6k
    int c;
3820
44.6k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3821
0
                    XML_MAX_TEXT_LENGTH :
3822
44.6k
                    XML_MAX_NAME_LENGTH;
3823
3824
44.6k
    c = CUR_CHAR(l);
3825
3826
122k
    while (xmlIsNameChar(ctxt, c)) {
3827
78.2k
  COPY_BUF(buf, len, c);
3828
78.2k
  NEXTL(l);
3829
78.2k
  c = CUR_CHAR(l);
3830
78.2k
  if (len >= XML_MAX_NAMELEN) {
3831
      /*
3832
       * Okay someone managed to make a huge token, so he's ready to pay
3833
       * for the processing speed.
3834
       */
3835
763
      xmlChar *buffer;
3836
763
      int max = len * 2;
3837
3838
763
      buffer = (xmlChar *) xmlMallocAtomic(max);
3839
763
      if (buffer == NULL) {
3840
0
          xmlErrMemory(ctxt, NULL);
3841
0
    return(NULL);
3842
0
      }
3843
763
      memcpy(buffer, buf, len);
3844
68.0k
      while (xmlIsNameChar(ctxt, c)) {
3845
67.2k
    if (len + 10 > max) {
3846
803
        xmlChar *tmp;
3847
3848
803
        max *= 2;
3849
803
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3850
803
        if (tmp == NULL) {
3851
0
      xmlErrMemory(ctxt, NULL);
3852
0
      xmlFree(buffer);
3853
0
      return(NULL);
3854
0
        }
3855
803
        buffer = tmp;
3856
803
    }
3857
67.2k
    COPY_BUF(buffer, len, c);
3858
67.2k
                if (len > maxLength) {
3859
0
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3860
0
                    xmlFree(buffer);
3861
0
                    return(NULL);
3862
0
                }
3863
67.2k
    NEXTL(l);
3864
67.2k
    c = CUR_CHAR(l);
3865
67.2k
      }
3866
763
      buffer[len] = 0;
3867
763
            if (ctxt->instate == XML_PARSER_EOF) {
3868
0
                xmlFree(buffer);
3869
0
                return(NULL);
3870
0
            }
3871
763
      return(buffer);
3872
763
  }
3873
78.2k
    }
3874
43.8k
    if (ctxt->instate == XML_PARSER_EOF)
3875
0
        return(NULL);
3876
43.8k
    if (len == 0)
3877
1.95k
        return(NULL);
3878
41.8k
    if (len > maxLength) {
3879
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3880
0
        return(NULL);
3881
0
    }
3882
41.8k
    return(xmlStrndup(buf, len));
3883
41.8k
}
3884
3885
/**
3886
 * xmlParseEntityValue:
3887
 * @ctxt:  an XML parser context
3888
 * @orig:  if non-NULL store a copy of the original entity value
3889
 *
3890
 * DEPRECATED: Internal function, don't use.
3891
 *
3892
 * parse a value for ENTITY declarations
3893
 *
3894
 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3895
 *                 "'" ([^%&'] | PEReference | Reference)* "'"
3896
 *
3897
 * Returns the EntityValue parsed with reference substituted or NULL
3898
 */
3899
3900
xmlChar *
3901
12.2k
xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3902
12.2k
    xmlChar *buf = NULL;
3903
12.2k
    int len = 0;
3904
12.2k
    int size = XML_PARSER_BUFFER_SIZE;
3905
12.2k
    int c, l;
3906
12.2k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3907
0
                    XML_MAX_HUGE_LENGTH :
3908
12.2k
                    XML_MAX_TEXT_LENGTH;
3909
12.2k
    xmlChar stop;
3910
12.2k
    xmlChar *ret = NULL;
3911
12.2k
    const xmlChar *cur = NULL;
3912
12.2k
    xmlParserInputPtr input;
3913
3914
12.2k
    if (RAW == '"') stop = '"';
3915
6.65k
    else if (RAW == '\'') stop = '\'';
3916
0
    else {
3917
0
  xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3918
0
  return(NULL);
3919
0
    }
3920
12.2k
    buf = (xmlChar *) xmlMallocAtomic(size);
3921
12.2k
    if (buf == NULL) {
3922
0
  xmlErrMemory(ctxt, NULL);
3923
0
  return(NULL);
3924
0
    }
3925
3926
    /*
3927
     * The content of the entity definition is copied in a buffer.
3928
     */
3929
3930
12.2k
    ctxt->instate = XML_PARSER_ENTITY_VALUE;
3931
12.2k
    input = ctxt->input;
3932
12.2k
    GROW;
3933
12.2k
    if (ctxt->instate == XML_PARSER_EOF)
3934
0
        goto error;
3935
12.2k
    NEXT;
3936
12.2k
    c = CUR_CHAR(l);
3937
    /*
3938
     * NOTE: 4.4.5 Included in Literal
3939
     * When a parameter entity reference appears in a literal entity
3940
     * value, ... a single or double quote character in the replacement
3941
     * text is always treated as a normal data character and will not
3942
     * terminate the literal.
3943
     * In practice it means we stop the loop only when back at parsing
3944
     * the initial entity and the quote is found
3945
     */
3946
310k
    while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3947
309k
      (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3948
297k
  if (len + 5 >= size) {
3949
1.74k
      xmlChar *tmp;
3950
3951
1.74k
      size *= 2;
3952
1.74k
      tmp = (xmlChar *) xmlRealloc(buf, size);
3953
1.74k
      if (tmp == NULL) {
3954
0
    xmlErrMemory(ctxt, NULL);
3955
0
                goto error;
3956
0
      }
3957
1.74k
      buf = tmp;
3958
1.74k
  }
3959
297k
  COPY_BUF(buf, len, c);
3960
297k
  NEXTL(l);
3961
3962
297k
  GROW;
3963
297k
  c = CUR_CHAR(l);
3964
297k
  if (c == 0) {
3965
780
      GROW;
3966
780
      c = CUR_CHAR(l);
3967
780
  }
3968
3969
297k
        if (len > maxLength) {
3970
0
            xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
3971
0
                           "entity value too long\n");
3972
0
            goto error;
3973
0
        }
3974
297k
    }
3975
12.2k
    buf[len] = 0;
3976
12.2k
    if (ctxt->instate == XML_PARSER_EOF)
3977
0
        goto error;
3978
12.2k
    if (c != stop) {
3979
783
        xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3980
783
        goto error;
3981
783
    }
3982
11.4k
    NEXT;
3983
3984
    /*
3985
     * Raise problem w.r.t. '&' and '%' being used in non-entities
3986
     * reference constructs. Note Charref will be handled in
3987
     * xmlStringDecodeEntities()
3988
     */
3989
11.4k
    cur = buf;
3990
380k
    while (*cur != 0) { /* non input consuming */
3991
370k
  if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3992
18.5k
      xmlChar *name;
3993
18.5k
      xmlChar tmp = *cur;
3994
18.5k
            int nameOk = 0;
3995
3996
18.5k
      cur++;
3997
18.5k
      name = xmlParseStringName(ctxt, &cur);
3998
18.5k
            if (name != NULL) {
3999
18.1k
                nameOk = 1;
4000
18.1k
                xmlFree(name);
4001
18.1k
            }
4002
18.5k
            if ((nameOk == 0) || (*cur != ';')) {
4003
705
    xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
4004
705
      "EntityValue: '%c' forbidden except for entities references\n",
4005
705
                            tmp);
4006
705
                goto error;
4007
705
      }
4008
17.8k
      if ((tmp == '%') && (ctxt->inSubset == 1) &&
4009
17.8k
    (ctxt->inputNr == 1)) {
4010
82
    xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
4011
82
                goto error;
4012
82
      }
4013
17.7k
      if (*cur == 0)
4014
0
          break;
4015
17.7k
  }
4016
369k
  cur++;
4017
369k
    }
4018
4019
    /*
4020
     * Then PEReference entities are substituted.
4021
     *
4022
     * NOTE: 4.4.7 Bypassed
4023
     * When a general entity reference appears in the EntityValue in
4024
     * an entity declaration, it is bypassed and left as is.
4025
     * so XML_SUBSTITUTE_REF is not set here.
4026
     */
4027
10.6k
    ++ctxt->depth;
4028
10.6k
    ret = xmlStringDecodeEntitiesInt(ctxt, buf, len, XML_SUBSTITUTE_PEREF,
4029
10.6k
                                     0, 0, 0, /* check */ 1);
4030
10.6k
    --ctxt->depth;
4031
4032
10.6k
    if (orig != NULL) {
4033
10.6k
        *orig = buf;
4034
10.6k
        buf = NULL;
4035
10.6k
    }
4036
4037
12.2k
error:
4038
12.2k
    if (buf != NULL)
4039
1.57k
        xmlFree(buf);
4040
12.2k
    return(ret);
4041
10.6k
}
4042
4043
/**
4044
 * xmlParseAttValueComplex:
4045
 * @ctxt:  an XML parser context
4046
 * @len:   the resulting attribute len
4047
 * @normalize:  whether to apply the inner normalization
4048
 *
4049
 * parse a value for an attribute, this is the fallback function
4050
 * of xmlParseAttValue() when the attribute parsing requires handling
4051
 * of non-ASCII characters, or normalization compaction.
4052
 *
4053
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4054
 */
4055
static xmlChar *
4056
38.7k
xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
4057
38.7k
    xmlChar limit = 0;
4058
38.7k
    xmlChar *buf = NULL;
4059
38.7k
    xmlChar *rep = NULL;
4060
38.7k
    size_t len = 0;
4061
38.7k
    size_t buf_size = 0;
4062
38.7k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4063
0
                       XML_MAX_HUGE_LENGTH :
4064
38.7k
                       XML_MAX_TEXT_LENGTH;
4065
38.7k
    int c, l, in_space = 0;
4066
38.7k
    xmlChar *current = NULL;
4067
38.7k
    xmlEntityPtr ent;
4068
4069
38.7k
    if (NXT(0) == '"') {
4070
29.0k
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
4071
29.0k
  limit = '"';
4072
29.0k
        NEXT;
4073
29.0k
    } else if (NXT(0) == '\'') {
4074
9.76k
  limit = '\'';
4075
9.76k
  ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
4076
9.76k
        NEXT;
4077
9.76k
    } else {
4078
0
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
4079
0
  return(NULL);
4080
0
    }
4081
4082
    /*
4083
     * allocate a translation buffer.
4084
     */
4085
38.7k
    buf_size = XML_PARSER_BUFFER_SIZE;
4086
38.7k
    buf = (xmlChar *) xmlMallocAtomic(buf_size);
4087
38.7k
    if (buf == NULL) goto mem_error;
4088
4089
    /*
4090
     * OK loop until we reach one of the ending char or a size limit.
4091
     */
4092
38.7k
    c = CUR_CHAR(l);
4093
419k
    while (((NXT(0) != limit) && /* checked */
4094
419k
            (IS_CHAR(c)) && (c != '<')) &&
4095
419k
            (ctxt->instate != XML_PARSER_EOF)) {
4096
381k
  if (c == '&') {
4097
54.2k
      in_space = 0;
4098
54.2k
      if (NXT(1) == '#') {
4099
8.76k
    int val = xmlParseCharRef(ctxt);
4100
4101
8.76k
    if (val == '&') {
4102
901
        if (ctxt->replaceEntities) {
4103
0
      if (len + 10 > buf_size) {
4104
0
          growBuffer(buf, 10);
4105
0
      }
4106
0
      buf[len++] = '&';
4107
901
        } else {
4108
      /*
4109
       * The reparsing will be done in xmlStringGetNodeList()
4110
       * called by the attribute() function in SAX.c
4111
       */
4112
901
      if (len + 10 > buf_size) {
4113
390
          growBuffer(buf, 10);
4114
390
      }
4115
901
      buf[len++] = '&';
4116
901
      buf[len++] = '#';
4117
901
      buf[len++] = '3';
4118
901
      buf[len++] = '8';
4119
901
      buf[len++] = ';';
4120
901
        }
4121
7.86k
    } else if (val != 0) {
4122
5.43k
        if (len + 10 > buf_size) {
4123
394
      growBuffer(buf, 10);
4124
394
        }
4125
5.43k
        len += xmlCopyChar(0, &buf[len], val);
4126
5.43k
    }
4127
45.5k
      } else {
4128
45.5k
    ent = xmlParseEntityRef(ctxt);
4129
45.5k
    if ((ent != NULL) &&
4130
45.5k
        (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
4131
2.04k
        if (len + 10 > buf_size) {
4132
416
      growBuffer(buf, 10);
4133
416
        }
4134
2.04k
        if ((ctxt->replaceEntities == 0) &&
4135
2.04k
            (ent->content[0] == '&')) {
4136
866
      buf[len++] = '&';
4137
866
      buf[len++] = '#';
4138
866
      buf[len++] = '3';
4139
866
      buf[len++] = '8';
4140
866
      buf[len++] = ';';
4141
1.17k
        } else {
4142
1.17k
      buf[len++] = ent->content[0];
4143
1.17k
        }
4144
43.4k
    } else if ((ent != NULL) &&
4145
43.4k
               (ctxt->replaceEntities != 0)) {
4146
0
        if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4147
0
                        if (xmlParserEntityCheck(ctxt, ent->length))
4148
0
                            goto error;
4149
4150
0
      ++ctxt->depth;
4151
0
      rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
4152
0
                                ent->length, XML_SUBSTITUTE_REF, 0, 0, 0,
4153
0
                                /* check */ 1);
4154
0
      --ctxt->depth;
4155
0
      if (rep != NULL) {
4156
0
          current = rep;
4157
0
          while (*current != 0) { /* non input consuming */
4158
0
                                if ((*current == 0xD) || (*current == 0xA) ||
4159
0
                                    (*current == 0x9)) {
4160
0
                                    buf[len++] = 0x20;
4161
0
                                    current++;
4162
0
                                } else
4163
0
                                    buf[len++] = *current++;
4164
0
        if (len + 10 > buf_size) {
4165
0
            growBuffer(buf, 10);
4166
0
        }
4167
0
          }
4168
0
          xmlFree(rep);
4169
0
          rep = NULL;
4170
0
      }
4171
0
        } else {
4172
0
      if (len + 10 > buf_size) {
4173
0
          growBuffer(buf, 10);
4174
0
      }
4175
0
      if (ent->content != NULL)
4176
0
          buf[len++] = ent->content[0];
4177
0
        }
4178
43.4k
    } else if (ent != NULL) {
4179
30.0k
        int i = xmlStrlen(ent->name);
4180
30.0k
        const xmlChar *cur = ent->name;
4181
4182
        /*
4183
                     * We also check for recursion and amplification
4184
                     * when entities are not substituted. They're
4185
                     * often expanded later.
4186
         */
4187
30.0k
        if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4188
30.0k
      (ent->content != NULL)) {
4189
27.2k
                        if ((ent->flags & XML_ENT_CHECKED) == 0) {
4190
14.9k
                            unsigned long oldCopy = ctxt->sizeentcopy;
4191
4192
14.9k
                            ctxt->sizeentcopy = ent->length;
4193
4194
14.9k
                            ++ctxt->depth;
4195
14.9k
                            rep = xmlStringDecodeEntitiesInt(ctxt,
4196
14.9k
                                    ent->content, ent->length,
4197
14.9k
                                    XML_SUBSTITUTE_REF, 0, 0, 0,
4198
14.9k
                                    /* check */ 1);
4199
14.9k
                            --ctxt->depth;
4200
4201
                            /*
4202
                             * If we're parsing DTD content, the entity
4203
                             * might reference other entities which
4204
                             * weren't defined yet, so the check isn't
4205
                             * reliable.
4206
                             */
4207
14.9k
                            if (ctxt->inSubset == 0) {
4208
804
                                ent->flags |= XML_ENT_CHECKED;
4209
804
                                ent->expandedSize = ctxt->sizeentcopy;
4210
804
                            }
4211
4212
14.9k
                            if (rep != NULL) {
4213
14.7k
                                xmlFree(rep);
4214
14.7k
                                rep = NULL;
4215
14.7k
                            } else {
4216
173
                                ent->content[0] = 0;
4217
173
                            }
4218
4219
14.9k
                            if (xmlParserEntityCheck(ctxt, oldCopy))
4220
2
                                goto error;
4221
14.9k
                        } else {
4222
12.2k
                            if (xmlParserEntityCheck(ctxt, ent->expandedSize))
4223
28
                                goto error;
4224
12.2k
                        }
4225
27.2k
        }
4226
4227
        /*
4228
         * Just output the reference
4229
         */
4230
30.0k
        buf[len++] = '&';
4231
30.5k
        while (len + i + 10 > buf_size) {
4232
976
      growBuffer(buf, i + 10);
4233
976
        }
4234
60.3k
        for (;i > 0;i--)
4235
30.2k
      buf[len++] = *cur++;
4236
30.0k
        buf[len++] = ';';
4237
30.0k
    }
4238
45.5k
      }
4239
326k
  } else {
4240
326k
      if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4241
44.8k
          if ((len != 0) || (!normalize)) {
4242
43.3k
        if ((!normalize) || (!in_space)) {
4243
42.3k
      COPY_BUF(buf, len, 0x20);
4244
42.5k
      while (len + 10 > buf_size) {
4245
434
          growBuffer(buf, 10);
4246
434
      }
4247
42.3k
        }
4248
43.3k
        in_space = 1;
4249
43.3k
    }
4250
281k
      } else {
4251
281k
          in_space = 0;
4252
281k
    COPY_BUF(buf, len, c);
4253
281k
    if (len + 10 > buf_size) {
4254
2.27k
        growBuffer(buf, 10);
4255
2.27k
    }
4256
281k
      }
4257
326k
      NEXTL(l);
4258
326k
  }
4259
381k
  GROW;
4260
381k
  c = CUR_CHAR(l);
4261
381k
        if (len > maxLength) {
4262
0
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4263
0
                           "AttValue length too long\n");
4264
0
            goto mem_error;
4265
0
        }
4266
381k
    }
4267
38.7k
    if (ctxt->instate == XML_PARSER_EOF)
4268
149
        goto error;
4269
4270
38.6k
    if ((in_space) && (normalize)) {
4271
875
        while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4272
199
    }
4273
38.6k
    buf[len] = 0;
4274
38.6k
    if (RAW == '<') {
4275
25.6k
  xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4276
25.6k
    } else if (RAW != limit) {
4277
8.02k
  if ((c != 0) && (!IS_CHAR(c))) {
4278
292
      xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4279
292
         "invalid character in attribute value\n");
4280
7.73k
  } else {
4281
7.73k
      xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4282
7.73k
         "AttValue: ' expected\n");
4283
7.73k
        }
4284
8.02k
    } else
4285
4.96k
  NEXT;
4286
4287
38.6k
    if (attlen != NULL) *attlen = len;
4288
38.6k
    return(buf);
4289
4290
0
mem_error:
4291
0
    xmlErrMemory(ctxt, NULL);
4292
179
error:
4293
179
    if (buf != NULL)
4294
179
        xmlFree(buf);
4295
179
    if (rep != NULL)
4296
0
        xmlFree(rep);
4297
179
    return(NULL);
4298
0
}
4299
4300
/**
4301
 * xmlParseAttValue:
4302
 * @ctxt:  an XML parser context
4303
 *
4304
 * DEPRECATED: Internal function, don't use.
4305
 *
4306
 * parse a value for an attribute
4307
 * Note: the parser won't do substitution of entities here, this
4308
 * will be handled later in xmlStringGetNodeList
4309
 *
4310
 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4311
 *                   "'" ([^<&'] | Reference)* "'"
4312
 *
4313
 * 3.3.3 Attribute-Value Normalization:
4314
 * Before the value of an attribute is passed to the application or
4315
 * checked for validity, the XML processor must normalize it as follows:
4316
 * - a character reference is processed by appending the referenced
4317
 *   character to the attribute value
4318
 * - an entity reference is processed by recursively processing the
4319
 *   replacement text of the entity
4320
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4321
 *   appending #x20 to the normalized value, except that only a single
4322
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4323
 *   parsed entity or the literal entity value of an internal parsed entity
4324
 * - other characters are processed by appending them to the normalized value
4325
 * If the declared value is not CDATA, then the XML processor must further
4326
 * process the normalized attribute value by discarding any leading and
4327
 * trailing space (#x20) characters, and by replacing sequences of space
4328
 * (#x20) characters by a single space (#x20) character.
4329
 * All attributes for which no declaration has been read should be treated
4330
 * by a non-validating parser as if declared CDATA.
4331
 *
4332
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4333
 */
4334
4335
4336
xmlChar *
4337
45.0k
xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4338
45.0k
    if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4339
45.0k
    return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4340
45.0k
}
4341
4342
/**
4343
 * xmlParseSystemLiteral:
4344
 * @ctxt:  an XML parser context
4345
 *
4346
 * DEPRECATED: Internal function, don't use.
4347
 *
4348
 * parse an XML Literal
4349
 *
4350
 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4351
 *
4352
 * Returns the SystemLiteral parsed or NULL
4353
 */
4354
4355
xmlChar *
4356
7.05k
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4357
7.05k
    xmlChar *buf = NULL;
4358
7.05k
    int len = 0;
4359
7.05k
    int size = XML_PARSER_BUFFER_SIZE;
4360
7.05k
    int cur, l;
4361
7.05k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4362
0
                    XML_MAX_TEXT_LENGTH :
4363
7.05k
                    XML_MAX_NAME_LENGTH;
4364
7.05k
    xmlChar stop;
4365
7.05k
    int state = ctxt->instate;
4366
4367
7.05k
    if (RAW == '"') {
4368
3.78k
        NEXT;
4369
3.78k
  stop = '"';
4370
3.78k
    } else if (RAW == '\'') {
4371
1.07k
        NEXT;
4372
1.07k
  stop = '\'';
4373
2.19k
    } else {
4374
2.19k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4375
2.19k
  return(NULL);
4376
2.19k
    }
4377
4378
4.85k
    buf = (xmlChar *) xmlMallocAtomic(size);
4379
4.85k
    if (buf == NULL) {
4380
0
        xmlErrMemory(ctxt, NULL);
4381
0
  return(NULL);
4382
0
    }
4383
4.85k
    ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4384
4.85k
    cur = CUR_CHAR(l);
4385
81.7k
    while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4386
76.9k
  if (len + 5 >= size) {
4387
424
      xmlChar *tmp;
4388
4389
424
      size *= 2;
4390
424
      tmp = (xmlChar *) xmlRealloc(buf, size);
4391
424
      if (tmp == NULL) {
4392
0
          xmlFree(buf);
4393
0
    xmlErrMemory(ctxt, NULL);
4394
0
    ctxt->instate = (xmlParserInputState) state;
4395
0
    return(NULL);
4396
0
      }
4397
424
      buf = tmp;
4398
424
  }
4399
76.9k
  COPY_BUF(buf, len, cur);
4400
76.9k
        if (len > maxLength) {
4401
0
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4402
0
            xmlFree(buf);
4403
0
            ctxt->instate = (xmlParserInputState) state;
4404
0
            return(NULL);
4405
0
        }
4406
76.9k
  NEXTL(l);
4407
76.9k
  cur = CUR_CHAR(l);
4408
76.9k
    }
4409
4.85k
    buf[len] = 0;
4410
4.85k
    if (ctxt->instate == XML_PARSER_EOF) {
4411
0
        xmlFree(buf);
4412
0
        return(NULL);
4413
0
    }
4414
4.85k
    ctxt->instate = (xmlParserInputState) state;
4415
4.85k
    if (!IS_CHAR(cur)) {
4416
1.34k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4417
3.50k
    } else {
4418
3.50k
  NEXT;
4419
3.50k
    }
4420
4.85k
    return(buf);
4421
4.85k
}
4422
4423
/**
4424
 * xmlParsePubidLiteral:
4425
 * @ctxt:  an XML parser context
4426
 *
4427
 * DEPRECATED: Internal function, don't use.
4428
 *
4429
 * parse an XML public literal
4430
 *
4431
 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4432
 *
4433
 * Returns the PubidLiteral parsed or NULL.
4434
 */
4435
4436
xmlChar *
4437
4.30k
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4438
4.30k
    xmlChar *buf = NULL;
4439
4.30k
    int len = 0;
4440
4.30k
    int size = XML_PARSER_BUFFER_SIZE;
4441
4.30k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4442
0
                    XML_MAX_TEXT_LENGTH :
4443
4.30k
                    XML_MAX_NAME_LENGTH;
4444
4.30k
    xmlChar cur;
4445
4.30k
    xmlChar stop;
4446
4.30k
    xmlParserInputState oldstate = ctxt->instate;
4447
4448
4.30k
    if (RAW == '"') {
4449
2.41k
        NEXT;
4450
2.41k
  stop = '"';
4451
2.41k
    } else if (RAW == '\'') {
4452
1.19k
        NEXT;
4453
1.19k
  stop = '\'';
4454
1.19k
    } else {
4455
696
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4456
696
  return(NULL);
4457
696
    }
4458
3.60k
    buf = (xmlChar *) xmlMallocAtomic(size);
4459
3.60k
    if (buf == NULL) {
4460
0
  xmlErrMemory(ctxt, NULL);
4461
0
  return(NULL);
4462
0
    }
4463
3.60k
    ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4464
3.60k
    cur = CUR;
4465
45.9k
    while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4466
42.3k
  if (len + 1 >= size) {
4467
194
      xmlChar *tmp;
4468
4469
194
      size *= 2;
4470
194
      tmp = (xmlChar *) xmlRealloc(buf, size);
4471
194
      if (tmp == NULL) {
4472
0
    xmlErrMemory(ctxt, NULL);
4473
0
    xmlFree(buf);
4474
0
    return(NULL);
4475
0
      }
4476
194
      buf = tmp;
4477
194
  }
4478
42.3k
  buf[len++] = cur;
4479
42.3k
        if (len > maxLength) {
4480
0
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4481
0
            xmlFree(buf);
4482
0
            return(NULL);
4483
0
        }
4484
42.3k
  NEXT;
4485
42.3k
  cur = CUR;
4486
42.3k
    }
4487
3.60k
    buf[len] = 0;
4488
3.60k
    if (ctxt->instate == XML_PARSER_EOF) {
4489
0
        xmlFree(buf);
4490
0
        return(NULL);
4491
0
    }
4492
3.60k
    if (cur != stop) {
4493
3.05k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4494
3.05k
    } else {
4495
556
  NEXTL(1);
4496
556
    }
4497
3.60k
    ctxt->instate = oldstate;
4498
3.60k
    return(buf);
4499
3.60k
}
4500
4501
static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial);
4502
4503
/*
4504
 * used for the test in the inner loop of the char data testing
4505
 */
4506
static const unsigned char test_char_data[256] = {
4507
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4508
    0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4509
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4510
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4511
    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4512
    0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4513
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4514
    0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4515
    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4516
    0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4517
    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4518
    0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4519
    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4520
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4521
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4522
    0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4523
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4524
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4525
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4526
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4527
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4528
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4529
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4530
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4531
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4532
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4533
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4534
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4535
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4536
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4537
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4538
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4539
};
4540
4541
/**
4542
 * xmlParseCharDataInternal:
4543
 * @ctxt:  an XML parser context
4544
 * @partial:  buffer may contain partial UTF-8 sequences
4545
 *
4546
 * Parse character data. Always makes progress if the first char isn't
4547
 * '<' or '&'.
4548
 *
4549
 * The right angle bracket (>) may be represented using the string "&gt;",
4550
 * and must, for compatibility, be escaped using "&gt;" or a character
4551
 * reference when it appears in the string "]]>" in content, when that
4552
 * string is not marking the end of a CDATA section.
4553
 *
4554
 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4555
 */
4556
static void
4557
50.1k
xmlParseCharDataInternal(xmlParserCtxtPtr ctxt, int partial) {
4558
50.1k
    const xmlChar *in;
4559
50.1k
    int nbchar = 0;
4560
50.1k
    int line = ctxt->input->line;
4561
50.1k
    int col = ctxt->input->col;
4562
50.1k
    int ccol;
4563
4564
50.1k
    GROW;
4565
    /*
4566
     * Accelerated common case where input don't need to be
4567
     * modified before passing it to the handler.
4568
     */
4569
50.1k
    in = ctxt->input->cur;
4570
50.5k
    do {
4571
51.0k
get_more_space:
4572
60.7k
        while (*in == 0x20) { in++; ctxt->input->col++; }
4573
51.0k
        if (*in == 0xA) {
4574
1.16k
            do {
4575
1.16k
                ctxt->input->line++; ctxt->input->col = 1;
4576
1.16k
                in++;
4577
1.16k
            } while (*in == 0xA);
4578
506
            goto get_more_space;
4579
506
        }
4580
50.5k
        if (*in == '<') {
4581
4.35k
            nbchar = in - ctxt->input->cur;
4582
4.35k
            if (nbchar > 0) {
4583
4.35k
                const xmlChar *tmp = ctxt->input->cur;
4584
4.35k
                ctxt->input->cur = in;
4585
4586
4.35k
                if ((ctxt->sax != NULL) &&
4587
4.35k
                    (ctxt->disableSAX == 0) &&
4588
4.35k
                    (ctxt->sax->ignorableWhitespace !=
4589
3.40k
                     ctxt->sax->characters)) {
4590
2.42k
                    if (areBlanks(ctxt, tmp, nbchar, 1)) {
4591
1.09k
                        if (ctxt->sax->ignorableWhitespace != NULL)
4592
1.09k
                            ctxt->sax->ignorableWhitespace(ctxt->userData,
4593
1.09k
                                                   tmp, nbchar);
4594
1.33k
                    } else {
4595
1.33k
                        if (ctxt->sax->characters != NULL)
4596
1.33k
                            ctxt->sax->characters(ctxt->userData,
4597
1.33k
                                                  tmp, nbchar);
4598
1.33k
                        if (*ctxt->space == -1)
4599
712
                            *ctxt->space = -2;
4600
1.33k
                    }
4601
2.42k
                } else if ((ctxt->sax != NULL) &&
4602
1.93k
                           (ctxt->disableSAX == 0) &&
4603
1.93k
                           (ctxt->sax->characters != NULL)) {
4604
981
                    ctxt->sax->characters(ctxt->userData,
4605
981
                                          tmp, nbchar);
4606
981
                }
4607
4.35k
            }
4608
4.35k
            return;
4609
4.35k
        }
4610
4611
49.4k
get_more:
4612
49.4k
        ccol = ctxt->input->col;
4613
128k
        while (test_char_data[*in]) {
4614
78.8k
            in++;
4615
78.8k
            ccol++;
4616
78.8k
        }
4617
49.4k
        ctxt->input->col = ccol;
4618
49.4k
        if (*in == 0xA) {
4619
563
            do {
4620
563
                ctxt->input->line++; ctxt->input->col = 1;
4621
563
                in++;
4622
563
            } while (*in == 0xA);
4623
266
            goto get_more;
4624
266
        }
4625
49.1k
        if (*in == ']') {
4626
3.26k
            if ((in[1] == ']') && (in[2] == '>')) {
4627
280
                xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4628
280
                if (ctxt->instate != XML_PARSER_EOF)
4629
280
                    ctxt->input->cur = in + 1;
4630
280
                return;
4631
280
            }
4632
2.98k
            in++;
4633
2.98k
            ctxt->input->col++;
4634
2.98k
            goto get_more;
4635
3.26k
        }
4636
45.9k
        nbchar = in - ctxt->input->cur;
4637
45.9k
        if (nbchar > 0) {
4638
35.8k
            if ((ctxt->sax != NULL) &&
4639
35.8k
                (ctxt->disableSAX == 0) &&
4640
35.8k
                (ctxt->sax->ignorableWhitespace !=
4641
21.7k
                 ctxt->sax->characters) &&
4642
35.8k
                (IS_BLANK_CH(*ctxt->input->cur))) {
4643
2.43k
                const xmlChar *tmp = ctxt->input->cur;
4644
2.43k
                ctxt->input->cur = in;
4645
4646
2.43k
                if (areBlanks(ctxt, tmp, nbchar, 0)) {
4647
643
                    if (ctxt->sax->ignorableWhitespace != NULL)
4648
643
                        ctxt->sax->ignorableWhitespace(ctxt->userData,
4649
643
                                                       tmp, nbchar);
4650
1.79k
                } else {
4651
1.79k
                    if (ctxt->sax->characters != NULL)
4652
1.79k
                        ctxt->sax->characters(ctxt->userData,
4653
1.79k
                                              tmp, nbchar);
4654
1.79k
                    if (*ctxt->space == -1)
4655
737
                        *ctxt->space = -2;
4656
1.79k
                }
4657
2.43k
                line = ctxt->input->line;
4658
2.43k
                col = ctxt->input->col;
4659
33.4k
            } else if ((ctxt->sax != NULL) &&
4660
33.4k
                       (ctxt->disableSAX == 0)) {
4661
19.2k
                if (ctxt->sax->characters != NULL)
4662
19.2k
                    ctxt->sax->characters(ctxt->userData,
4663
19.2k
                                          ctxt->input->cur, nbchar);
4664
19.2k
                line = ctxt->input->line;
4665
19.2k
                col = ctxt->input->col;
4666
19.2k
            }
4667
35.8k
            if (ctxt->instate == XML_PARSER_EOF)
4668
0
                return;
4669
35.8k
        }
4670
45.9k
        ctxt->input->cur = in;
4671
45.9k
        if (*in == 0xD) {
4672
1.51k
            in++;
4673
1.51k
            if (*in == 0xA) {
4674
392
                ctxt->input->cur = in;
4675
392
                in++;
4676
392
                ctxt->input->line++; ctxt->input->col = 1;
4677
392
                continue; /* while */
4678
392
            }
4679
1.12k
            in--;
4680
1.12k
        }
4681
45.5k
        if (*in == '<') {
4682
27.6k
            return;
4683
27.6k
        }
4684
17.8k
        if (*in == '&') {
4685
5.93k
            return;
4686
5.93k
        }
4687
11.9k
        SHRINK;
4688
11.9k
        GROW;
4689
11.9k
        if (ctxt->instate == XML_PARSER_EOF)
4690
0
            return;
4691
11.9k
        in = ctxt->input->cur;
4692
12.3k
    } while (((*in >= 0x20) && (*in <= 0x7F)) ||
4693
12.3k
             (*in == 0x09) || (*in == 0x0a));
4694
11.9k
    ctxt->input->line = line;
4695
11.9k
    ctxt->input->col = col;
4696
11.9k
    xmlParseCharDataComplex(ctxt, partial);
4697
11.9k
}
4698
4699
/**
4700
 * xmlParseCharDataComplex:
4701
 * @ctxt:  an XML parser context
4702
 * @cdata:  int indicating whether we are within a CDATA section
4703
 *
4704
 * Always makes progress if the first char isn't '<' or '&'.
4705
 *
4706
 * parse a CharData section.this is the fallback function
4707
 * of xmlParseCharData() when the parsing requires handling
4708
 * of non-ASCII characters.
4709
 */
4710
static void
4711
11.9k
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial) {
4712
11.9k
    xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4713
11.9k
    int nbchar = 0;
4714
11.9k
    int cur, l;
4715
4716
11.9k
    cur = CUR_CHAR(l);
4717
76.8k
    while ((cur != '<') && /* checked */
4718
76.8k
           (cur != '&') &&
4719
76.8k
     (IS_CHAR(cur))) {
4720
64.9k
  if ((cur == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4721
322
      xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4722
322
  }
4723
64.9k
  COPY_BUF(buf, nbchar, cur);
4724
  /* move current position before possible calling of ctxt->sax->characters */
4725
64.9k
  NEXTL(l);
4726
64.9k
  if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4727
400
      buf[nbchar] = 0;
4728
4729
      /*
4730
       * OK the segment is to be consumed as chars.
4731
       */
4732
400
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4733
227
    if (areBlanks(ctxt, buf, nbchar, 0)) {
4734
10
        if (ctxt->sax->ignorableWhitespace != NULL)
4735
10
      ctxt->sax->ignorableWhitespace(ctxt->userData,
4736
10
                                     buf, nbchar);
4737
217
    } else {
4738
217
        if (ctxt->sax->characters != NULL)
4739
217
      ctxt->sax->characters(ctxt->userData, buf, nbchar);
4740
217
        if ((ctxt->sax->characters !=
4741
217
             ctxt->sax->ignorableWhitespace) &&
4742
217
      (*ctxt->space == -1))
4743
34
      *ctxt->space = -2;
4744
217
    }
4745
227
      }
4746
400
      nbchar = 0;
4747
            /* something really bad happened in the SAX callback */
4748
400
            if (ctxt->instate != XML_PARSER_CONTENT)
4749
0
                return;
4750
400
            SHRINK;
4751
400
  }
4752
64.9k
  cur = CUR_CHAR(l);
4753
64.9k
    }
4754
11.9k
    if (ctxt->instate == XML_PARSER_EOF)
4755
0
        return;
4756
11.9k
    if (nbchar != 0) {
4757
5.13k
        buf[nbchar] = 0;
4758
  /*
4759
   * OK the segment is to be consumed as chars.
4760
   */
4761
5.13k
  if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4762
2.68k
      if (areBlanks(ctxt, buf, nbchar, 0)) {
4763
461
    if (ctxt->sax->ignorableWhitespace != NULL)
4764
461
        ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4765
2.22k
      } else {
4766
2.22k
    if (ctxt->sax->characters != NULL)
4767
2.22k
        ctxt->sax->characters(ctxt->userData, buf, nbchar);
4768
2.22k
    if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4769
2.22k
        (*ctxt->space == -1))
4770
345
        *ctxt->space = -2;
4771
2.22k
      }
4772
2.68k
  }
4773
5.13k
    }
4774
    /*
4775
     * cur == 0 can mean
4776
     *
4777
     * - XML_PARSER_EOF or memory error. This is checked above.
4778
     * - An actual 0 character.
4779
     * - End of buffer.
4780
     * - An incomplete UTF-8 sequence. This is allowed if partial is set.
4781
     */
4782
11.9k
    if (ctxt->input->cur < ctxt->input->end) {
4783
10.9k
        if ((cur == 0) && (CUR != 0)) {
4784
9
            if (partial == 0) {
4785
9
                xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4786
9
                        "Incomplete UTF-8 sequence starting with %02X\n", CUR);
4787
9
                NEXTL(1);
4788
9
            }
4789
10.9k
        } else if ((cur != '<') && (cur != '&')) {
4790
            /* Generate the error and skip the offending character */
4791
6.02k
            xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4792
6.02k
                              "PCDATA invalid Char value %d\n", cur);
4793
6.02k
            NEXTL(l);
4794
6.02k
        }
4795
10.9k
    }
4796
11.9k
}
4797
4798
/**
4799
 * xmlParseCharData:
4800
 * @ctxt:  an XML parser context
4801
 * @cdata:  unused
4802
 *
4803
 * DEPRECATED: Internal function, don't use.
4804
 */
4805
void
4806
0
xmlParseCharData(xmlParserCtxtPtr ctxt, ATTRIBUTE_UNUSED int cdata) {
4807
0
    xmlParseCharDataInternal(ctxt, 0);
4808
0
}
4809
4810
/**
4811
 * xmlParseExternalID:
4812
 * @ctxt:  an XML parser context
4813
 * @publicID:  a xmlChar** receiving PubidLiteral
4814
 * @strict: indicate whether we should restrict parsing to only
4815
 *          production [75], see NOTE below
4816
 *
4817
 * DEPRECATED: Internal function, don't use.
4818
 *
4819
 * Parse an External ID or a Public ID
4820
 *
4821
 * NOTE: Productions [75] and [83] interact badly since [75] can generate
4822
 *       'PUBLIC' S PubidLiteral S SystemLiteral
4823
 *
4824
 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4825
 *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
4826
 *
4827
 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4828
 *
4829
 * Returns the function returns SystemLiteral and in the second
4830
 *                case publicID receives PubidLiteral, is strict is off
4831
 *                it is possible to return NULL and have publicID set.
4832
 */
4833
4834
xmlChar *
4835
23.0k
xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4836
23.0k
    xmlChar *URI = NULL;
4837
4838
23.0k
    *publicID = NULL;
4839
23.0k
    if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4840
4.11k
        SKIP(6);
4841
4.11k
  if (SKIP_BLANKS == 0) {
4842
3.16k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4843
3.16k
                     "Space required after 'SYSTEM'\n");
4844
3.16k
  }
4845
4.11k
  URI = xmlParseSystemLiteral(ctxt);
4846
4.11k
  if (URI == NULL) {
4847
481
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4848
481
        }
4849
18.9k
    } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4850
4.30k
        SKIP(6);
4851
4.30k
  if (SKIP_BLANKS == 0) {
4852
3.57k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4853
3.57k
        "Space required after 'PUBLIC'\n");
4854
3.57k
  }
4855
4.30k
  *publicID = xmlParsePubidLiteral(ctxt);
4856
4.30k
  if (*publicID == NULL) {
4857
696
      xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4858
696
  }
4859
4.30k
  if (strict) {
4860
      /*
4861
       * We don't handle [83] so "S SystemLiteral" is required.
4862
       */
4863
2.00k
      if (SKIP_BLANKS == 0) {
4864
1.79k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4865
1.79k
      "Space required after the Public Identifier\n");
4866
1.79k
      }
4867
2.29k
  } else {
4868
      /*
4869
       * We handle [83] so we return immediately, if
4870
       * "S SystemLiteral" is not detected. We skip blanks if no
4871
             * system literal was found, but this is harmless since we must
4872
             * be at the end of a NotationDecl.
4873
       */
4874
2.29k
      if (SKIP_BLANKS == 0) return(NULL);
4875
1.19k
      if ((CUR != '\'') && (CUR != '"')) return(NULL);
4876
1.19k
  }
4877
2.93k
  URI = xmlParseSystemLiteral(ctxt);
4878
2.93k
  if (URI == NULL) {
4879
1.71k
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4880
1.71k
        }
4881
2.93k
    }
4882
21.6k
    return(URI);
4883
23.0k
}
4884
4885
/**
4886
 * xmlParseCommentComplex:
4887
 * @ctxt:  an XML parser context
4888
 * @buf:  the already parsed part of the buffer
4889
 * @len:  number of bytes in the buffer
4890
 * @size:  allocated size of the buffer
4891
 *
4892
 * Skip an XML (SGML) comment <!-- .... -->
4893
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
4894
 *  must not occur within comments. "
4895
 * This is the slow routine in case the accelerator for ascii didn't work
4896
 *
4897
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4898
 */
4899
static void
4900
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4901
10.2k
                       size_t len, size_t size) {
4902
10.2k
    int q, ql;
4903
10.2k
    int r, rl;
4904
10.2k
    int cur, l;
4905
10.2k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4906
0
                       XML_MAX_HUGE_LENGTH :
4907
10.2k
                       XML_MAX_TEXT_LENGTH;
4908
10.2k
    int inputid;
4909
4910
10.2k
    inputid = ctxt->input->id;
4911
4912
10.2k
    if (buf == NULL) {
4913
4.67k
        len = 0;
4914
4.67k
  size = XML_PARSER_BUFFER_SIZE;
4915
4.67k
  buf = (xmlChar *) xmlMallocAtomic(size);
4916
4.67k
  if (buf == NULL) {
4917
0
      xmlErrMemory(ctxt, NULL);
4918
0
      return;
4919
0
  }
4920
4.67k
    }
4921
10.2k
    q = CUR_CHAR(ql);
4922
10.2k
    if (q == 0)
4923
6.04k
        goto not_terminated;
4924
4.24k
    if (!IS_CHAR(q)) {
4925
431
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4926
431
                          "xmlParseComment: invalid xmlChar value %d\n",
4927
431
                    q);
4928
431
  xmlFree (buf);
4929
431
  return;
4930
431
    }
4931
3.81k
    NEXTL(ql);
4932
3.81k
    r = CUR_CHAR(rl);
4933
3.81k
    if (r == 0)
4934
213
        goto not_terminated;
4935
3.60k
    if (!IS_CHAR(r)) {
4936
171
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4937
171
                          "xmlParseComment: invalid xmlChar value %d\n",
4938
171
                    r);
4939
171
  xmlFree (buf);
4940
171
  return;
4941
171
    }
4942
3.43k
    NEXTL(rl);
4943
3.43k
    cur = CUR_CHAR(l);
4944
3.43k
    if (cur == 0)
4945
210
        goto not_terminated;
4946
34.8k
    while (IS_CHAR(cur) && /* checked */
4947
34.8k
           ((cur != '>') ||
4948
33.5k
      (r != '-') || (q != '-'))) {
4949
31.5k
  if ((r == '-') && (q == '-')) {
4950
450
      xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4951
450
  }
4952
31.5k
  if (len + 5 >= size) {
4953
455
      xmlChar *new_buf;
4954
455
            size_t new_size;
4955
4956
455
      new_size = size * 2;
4957
455
      new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4958
455
      if (new_buf == NULL) {
4959
0
    xmlFree (buf);
4960
0
    xmlErrMemory(ctxt, NULL);
4961
0
    return;
4962
0
      }
4963
455
      buf = new_buf;
4964
455
            size = new_size;
4965
455
  }
4966
31.5k
  COPY_BUF(buf, len, q);
4967
31.5k
        if (len > maxLength) {
4968
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4969
0
                         "Comment too big found", NULL);
4970
0
            xmlFree (buf);
4971
0
            return;
4972
0
        }
4973
4974
31.5k
  q = r;
4975
31.5k
  ql = rl;
4976
31.5k
  r = cur;
4977
31.5k
  rl = l;
4978
4979
31.5k
  NEXTL(l);
4980
31.5k
  cur = CUR_CHAR(l);
4981
4982
31.5k
    }
4983
3.22k
    buf[len] = 0;
4984
3.22k
    if (ctxt->instate == XML_PARSER_EOF) {
4985
0
        xmlFree(buf);
4986
0
        return;
4987
0
    }
4988
3.22k
    if (cur == 0) {
4989
1.11k
  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4990
1.11k
                       "Comment not terminated \n<!--%.50s\n", buf);
4991
2.10k
    } else if (!IS_CHAR(cur)) {
4992
129
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4993
129
                          "xmlParseComment: invalid xmlChar value %d\n",
4994
129
                    cur);
4995
1.97k
    } else {
4996
1.97k
  if (inputid != ctxt->input->id) {
4997
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4998
0
               "Comment doesn't start and stop in the same"
4999
0
                           " entity\n");
5000
0
  }
5001
1.97k
        NEXT;
5002
1.97k
  if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5003
1.97k
      (!ctxt->disableSAX))
5004
1.78k
      ctxt->sax->comment(ctxt->userData, buf);
5005
1.97k
    }
5006
3.22k
    xmlFree(buf);
5007
3.22k
    return;
5008
6.46k
not_terminated:
5009
6.46k
    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5010
6.46k
       "Comment not terminated\n", NULL);
5011
6.46k
    xmlFree(buf);
5012
6.46k
    return;
5013
3.22k
}
5014
5015
/**
5016
 * xmlParseComment:
5017
 * @ctxt:  an XML parser context
5018
 *
5019
 * DEPRECATED: Internal function, don't use.
5020
 *
5021
 * Parse an XML (SGML) comment. Always consumes '<!'.
5022
 *
5023
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
5024
 *  must not occur within comments. "
5025
 *
5026
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
5027
 */
5028
void
5029
14.2k
xmlParseComment(xmlParserCtxtPtr ctxt) {
5030
14.2k
    xmlChar *buf = NULL;
5031
14.2k
    size_t size = XML_PARSER_BUFFER_SIZE;
5032
14.2k
    size_t len = 0;
5033
14.2k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5034
0
                       XML_MAX_HUGE_LENGTH :
5035
14.2k
                       XML_MAX_TEXT_LENGTH;
5036
14.2k
    xmlParserInputState state;
5037
14.2k
    const xmlChar *in;
5038
14.2k
    size_t nbchar = 0;
5039
14.2k
    int ccol;
5040
14.2k
    int inputid;
5041
5042
    /*
5043
     * Check that there is a comment right here.
5044
     */
5045
14.2k
    if ((RAW != '<') || (NXT(1) != '!'))
5046
0
        return;
5047
14.2k
    SKIP(2);
5048
14.2k
    if ((RAW != '-') || (NXT(1) != '-'))
5049
5
        return;
5050
14.2k
    state = ctxt->instate;
5051
14.2k
    ctxt->instate = XML_PARSER_COMMENT;
5052
14.2k
    inputid = ctxt->input->id;
5053
14.2k
    SKIP(2);
5054
14.2k
    GROW;
5055
5056
    /*
5057
     * Accelerated common case where input don't need to be
5058
     * modified before passing it to the handler.
5059
     */
5060
14.2k
    in = ctxt->input->cur;
5061
14.2k
    do {
5062
14.2k
  if (*in == 0xA) {
5063
806
      do {
5064
806
    ctxt->input->line++; ctxt->input->col = 1;
5065
806
    in++;
5066
806
      } while (*in == 0xA);
5067
218
  }
5068
30.6k
get_more:
5069
30.6k
        ccol = ctxt->input->col;
5070
58.9k
  while (((*in > '-') && (*in <= 0x7F)) ||
5071
58.9k
         ((*in >= 0x20) && (*in < '-')) ||
5072
58.9k
         (*in == 0x09)) {
5073
28.2k
        in++;
5074
28.2k
        ccol++;
5075
28.2k
  }
5076
30.6k
  ctxt->input->col = ccol;
5077
30.6k
  if (*in == 0xA) {
5078
667
      do {
5079
667
    ctxt->input->line++; ctxt->input->col = 1;
5080
667
    in++;
5081
667
      } while (*in == 0xA);
5082
471
      goto get_more;
5083
471
  }
5084
30.2k
  nbchar = in - ctxt->input->cur;
5085
  /*
5086
   * save current set of data
5087
   */
5088
30.2k
  if (nbchar > 0) {
5089
18.6k
            if (buf == NULL) {
5090
7.12k
                if ((*in == '-') && (in[1] == '-'))
5091
1.77k
                    size = nbchar + 1;
5092
5.35k
                else
5093
5.35k
                    size = XML_PARSER_BUFFER_SIZE + nbchar;
5094
7.12k
                buf = (xmlChar *) xmlMallocAtomic(size);
5095
7.12k
                if (buf == NULL) {
5096
0
                    xmlErrMemory(ctxt, NULL);
5097
0
                    ctxt->instate = state;
5098
0
                    return;
5099
0
                }
5100
7.12k
                len = 0;
5101
11.5k
            } else if (len + nbchar + 1 >= size) {
5102
931
                xmlChar *new_buf;
5103
931
                size  += len + nbchar + XML_PARSER_BUFFER_SIZE;
5104
931
                new_buf = (xmlChar *) xmlRealloc(buf, size);
5105
931
                if (new_buf == NULL) {
5106
0
                    xmlFree (buf);
5107
0
                    xmlErrMemory(ctxt, NULL);
5108
0
                    ctxt->instate = state;
5109
0
                    return;
5110
0
                }
5111
931
                buf = new_buf;
5112
931
            }
5113
18.6k
            memcpy(&buf[len], ctxt->input->cur, nbchar);
5114
18.6k
            len += nbchar;
5115
18.6k
            buf[len] = 0;
5116
18.6k
  }
5117
30.2k
        if (len > maxLength) {
5118
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5119
0
                         "Comment too big found", NULL);
5120
0
            xmlFree (buf);
5121
0
            return;
5122
0
        }
5123
30.2k
  ctxt->input->cur = in;
5124
30.2k
  if (*in == 0xA) {
5125
0
      in++;
5126
0
      ctxt->input->line++; ctxt->input->col = 1;
5127
0
  }
5128
30.2k
  if (*in == 0xD) {
5129
973
      in++;
5130
973
      if (*in == 0xA) {
5131
194
    ctxt->input->cur = in;
5132
194
    in++;
5133
194
    ctxt->input->line++; ctxt->input->col = 1;
5134
194
    goto get_more;
5135
194
      }
5136
779
      in--;
5137
779
  }
5138
30.0k
  SHRINK;
5139
30.0k
  GROW;
5140
30.0k
        if (ctxt->instate == XML_PARSER_EOF) {
5141
0
            xmlFree(buf);
5142
0
            return;
5143
0
        }
5144
30.0k
  in = ctxt->input->cur;
5145
30.0k
  if (*in == '-') {
5146
19.7k
      if (in[1] == '-') {
5147
15.0k
          if (in[2] == '>') {
5148
3.96k
        if (ctxt->input->id != inputid) {
5149
0
      xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5150
0
                     "comment doesn't start and stop in the"
5151
0
                                       " same entity\n");
5152
0
        }
5153
3.96k
        SKIP(3);
5154
3.96k
        if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5155
3.96k
            (!ctxt->disableSAX)) {
5156
3.25k
      if (buf != NULL)
5157
1.24k
          ctxt->sax->comment(ctxt->userData, buf);
5158
2.00k
      else
5159
2.00k
          ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5160
3.25k
        }
5161
3.96k
        if (buf != NULL)
5162
1.51k
            xmlFree(buf);
5163
3.96k
        if (ctxt->instate != XML_PARSER_EOF)
5164
3.96k
      ctxt->instate = state;
5165
3.96k
        return;
5166
3.96k
    }
5167
11.1k
    if (buf != NULL) {
5168
7.12k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5169
7.12k
                          "Double hyphen within comment: "
5170
7.12k
                                      "<!--%.50s\n",
5171
7.12k
              buf);
5172
7.12k
    } else
5173
4.00k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5174
4.00k
                          "Double hyphen within comment\n", NULL);
5175
11.1k
                if (ctxt->instate == XML_PARSER_EOF) {
5176
0
                    xmlFree(buf);
5177
0
                    return;
5178
0
                }
5179
11.1k
    in++;
5180
11.1k
    ctxt->input->col++;
5181
11.1k
      }
5182
15.7k
      in++;
5183
15.7k
      ctxt->input->col++;
5184
15.7k
      goto get_more;
5185
19.7k
  }
5186
30.0k
    } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5187
10.2k
    xmlParseCommentComplex(ctxt, buf, len, size);
5188
10.2k
    ctxt->instate = state;
5189
10.2k
    return;
5190
14.2k
}
5191
5192
5193
/**
5194
 * xmlParsePITarget:
5195
 * @ctxt:  an XML parser context
5196
 *
5197
 * DEPRECATED: Internal function, don't use.
5198
 *
5199
 * parse the name of a PI
5200
 *
5201
 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5202
 *
5203
 * Returns the PITarget name or NULL
5204
 */
5205
5206
const xmlChar *
5207
32.1k
xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5208
32.1k
    const xmlChar *name;
5209
5210
32.1k
    name = xmlParseName(ctxt);
5211
32.1k
    if ((name != NULL) &&
5212
32.1k
        ((name[0] == 'x') || (name[0] == 'X')) &&
5213
32.1k
        ((name[1] == 'm') || (name[1] == 'M')) &&
5214
32.1k
        ((name[2] == 'l') || (name[2] == 'L'))) {
5215
1.62k
  int i;
5216
1.62k
  if ((name[0] == 'x') && (name[1] == 'm') &&
5217
1.62k
      (name[2] == 'l') && (name[3] == 0)) {
5218
208
      xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5219
208
     "XML declaration allowed only at the start of the document\n");
5220
208
      return(name);
5221
1.41k
  } else if (name[3] == 0) {
5222
514
      xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5223
514
      return(name);
5224
514
  }
5225
2.50k
  for (i = 0;;i++) {
5226
2.50k
      if (xmlW3CPIs[i] == NULL) break;
5227
1.79k
      if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5228
194
          return(name);
5229
1.79k
  }
5230
707
  xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5231
707
          "xmlParsePITarget: invalid name prefix 'xml'\n",
5232
707
          NULL, NULL);
5233
707
    }
5234
31.2k
    if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5235
1.01k
  xmlNsErr(ctxt, XML_NS_ERR_COLON,
5236
1.01k
     "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5237
1.01k
    }
5238
31.2k
    return(name);
5239
32.1k
}
5240
5241
#ifdef LIBXML_CATALOG_ENABLED
5242
/**
5243
 * xmlParseCatalogPI:
5244
 * @ctxt:  an XML parser context
5245
 * @catalog:  the PI value string
5246
 *
5247
 * parse an XML Catalog Processing Instruction.
5248
 *
5249
 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5250
 *
5251
 * Occurs only if allowed by the user and if happening in the Misc
5252
 * part of the document before any doctype information
5253
 * This will add the given catalog to the parsing context in order
5254
 * to be used if there is a resolution need further down in the document
5255
 */
5256
5257
static void
5258
583
xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5259
583
    xmlChar *URL = NULL;
5260
583
    const xmlChar *tmp, *base;
5261
583
    xmlChar marker;
5262
5263
583
    tmp = catalog;
5264
583
    while (IS_BLANK_CH(*tmp)) tmp++;
5265
583
    if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5266
168
  goto error;
5267
415
    tmp += 7;
5268
428
    while (IS_BLANK_CH(*tmp)) tmp++;
5269
415
    if (*tmp != '=') {
5270
58
  return;
5271
58
    }
5272
357
    tmp++;
5273
506
    while (IS_BLANK_CH(*tmp)) tmp++;
5274
357
    marker = *tmp;
5275
357
    if ((marker != '\'') && (marker != '"'))
5276
94
  goto error;
5277
263
    tmp++;
5278
263
    base = tmp;
5279
873
    while ((*tmp != 0) && (*tmp != marker)) tmp++;
5280
263
    if (*tmp == 0)
5281
103
  goto error;
5282
160
    URL = xmlStrndup(base, tmp - base);
5283
160
    tmp++;
5284
432
    while (IS_BLANK_CH(*tmp)) tmp++;
5285
160
    if (*tmp != 0)
5286
55
  goto error;
5287
5288
105
    if (URL != NULL) {
5289
105
  ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5290
105
  xmlFree(URL);
5291
105
    }
5292
105
    return;
5293
5294
420
error:
5295
420
    xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5296
420
            "Catalog PI syntax error: %s\n",
5297
420
      catalog, NULL);
5298
420
    if (URL != NULL)
5299
55
  xmlFree(URL);
5300
420
}
5301
#endif
5302
5303
/**
5304
 * xmlParsePI:
5305
 * @ctxt:  an XML parser context
5306
 *
5307
 * DEPRECATED: Internal function, don't use.
5308
 *
5309
 * parse an XML Processing Instruction.
5310
 *
5311
 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5312
 *
5313
 * The processing is transferred to SAX once parsed.
5314
 */
5315
5316
void
5317
32.1k
xmlParsePI(xmlParserCtxtPtr ctxt) {
5318
32.1k
    xmlChar *buf = NULL;
5319
32.1k
    size_t len = 0;
5320
32.1k
    size_t size = XML_PARSER_BUFFER_SIZE;
5321
32.1k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5322
0
                       XML_MAX_HUGE_LENGTH :
5323
32.1k
                       XML_MAX_TEXT_LENGTH;
5324
32.1k
    int cur, l;
5325
32.1k
    const xmlChar *target;
5326
32.1k
    xmlParserInputState state;
5327
5328
32.1k
    if ((RAW == '<') && (NXT(1) == '?')) {
5329
32.1k
  int inputid = ctxt->input->id;
5330
32.1k
  state = ctxt->instate;
5331
32.1k
        ctxt->instate = XML_PARSER_PI;
5332
  /*
5333
   * this is a Processing Instruction.
5334
   */
5335
32.1k
  SKIP(2);
5336
5337
  /*
5338
   * Parse the target name and check for special support like
5339
   * namespace.
5340
   */
5341
32.1k
        target = xmlParsePITarget(ctxt);
5342
32.1k
  if (target != NULL) {
5343
24.4k
      if ((RAW == '?') && (NXT(1) == '>')) {
5344
6.38k
    if (inputid != ctxt->input->id) {
5345
0
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5346
0
                             "PI declaration doesn't start and stop in"
5347
0
                                   " the same entity\n");
5348
0
    }
5349
6.38k
    SKIP(2);
5350
5351
    /*
5352
     * SAX: PI detected.
5353
     */
5354
6.38k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5355
6.38k
        (ctxt->sax->processingInstruction != NULL))
5356
5.35k
        ctxt->sax->processingInstruction(ctxt->userData,
5357
5.35k
                                         target, NULL);
5358
6.38k
    if (ctxt->instate != XML_PARSER_EOF)
5359
6.38k
        ctxt->instate = state;
5360
6.38k
    return;
5361
6.38k
      }
5362
18.0k
      buf = (xmlChar *) xmlMallocAtomic(size);
5363
18.0k
      if (buf == NULL) {
5364
0
    xmlErrMemory(ctxt, NULL);
5365
0
    ctxt->instate = state;
5366
0
    return;
5367
0
      }
5368
18.0k
      if (SKIP_BLANKS == 0) {
5369
15.9k
    xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5370
15.9k
        "ParsePI: PI %s space expected\n", target);
5371
15.9k
      }
5372
18.0k
      cur = CUR_CHAR(l);
5373
104k
      while (IS_CHAR(cur) && /* checked */
5374
104k
       ((cur != '?') || (NXT(1) != '>'))) {
5375
86.8k
    if (len + 5 >= size) {
5376
695
        xmlChar *tmp;
5377
695
                    size_t new_size = size * 2;
5378
695
        tmp = (xmlChar *) xmlRealloc(buf, new_size);
5379
695
        if (tmp == NULL) {
5380
0
      xmlErrMemory(ctxt, NULL);
5381
0
      xmlFree(buf);
5382
0
      ctxt->instate = state;
5383
0
      return;
5384
0
        }
5385
695
        buf = tmp;
5386
695
                    size = new_size;
5387
695
    }
5388
86.8k
    COPY_BUF(buf, len, cur);
5389
86.8k
                if (len > maxLength) {
5390
0
                    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5391
0
                                      "PI %s too big found", target);
5392
0
                    xmlFree(buf);
5393
0
                    ctxt->instate = state;
5394
0
                    return;
5395
0
                }
5396
86.8k
    NEXTL(l);
5397
86.8k
    cur = CUR_CHAR(l);
5398
86.8k
      }
5399
18.0k
      buf[len] = 0;
5400
18.0k
            if (ctxt->instate == XML_PARSER_EOF) {
5401
1
                xmlFree(buf);
5402
1
                return;
5403
1
            }
5404
18.0k
      if (cur != '?') {
5405
13.7k
    xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5406
13.7k
          "ParsePI: PI %s never end ...\n", target);
5407
13.7k
      } else {
5408
4.27k
    if (inputid != ctxt->input->id) {
5409
69
        xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5410
69
                             "PI declaration doesn't start and stop in"
5411
69
                                   " the same entity\n");
5412
69
    }
5413
4.27k
    SKIP(2);
5414
5415
4.27k
#ifdef LIBXML_CATAL