Coverage Report

Created: 2024-02-25 06:19

/src/libxml2/parser.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
 *            implemented on top of the SAX interfaces
4
 *
5
 * References:
6
 *   The XML specification:
7
 *     http://www.w3.org/TR/REC-xml
8
 *   Original 1.0 version:
9
 *     http://www.w3.org/TR/1998/REC-xml-19980210
10
 *   XML second edition working draft
11
 *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12
 *
13
 * Okay this is a big file, the parser core is around 7000 lines, then it
14
 * is followed by the progressive parser top routines, then the various
15
 * high level APIs to call the parser and a few miscellaneous functions.
16
 * A number of helper functions and deprecated ones have been moved to
17
 * parserInternals.c to reduce this file size.
18
 * As much as possible the functions are associated with their relative
19
 * production in the XML specification. A few productions defining the
20
 * different ranges of character are actually implanted either in
21
 * parserInternals.h or parserInternals.c
22
 * The DOM tree build is realized from the default SAX callbacks in
23
 * the module SAX.c.
24
 * The routines doing the validation checks are in valid.c and called either
25
 * from the SAX callbacks or as standalone functions using a preparsed
26
 * document.
27
 *
28
 * See Copyright for the status of this software.
29
 *
30
 * daniel@veillard.com
31
 */
32
33
/* To avoid EBCDIC trouble when parsing on zOS */
34
#if defined(__MVS__)
35
#pragma convert("ISO8859-1")
36
#endif
37
38
#define IN_LIBXML
39
#include "libxml.h"
40
41
#if defined(_WIN32)
42
#define XML_DIR_SEP '\\'
43
#else
44
#define XML_DIR_SEP '/'
45
#endif
46
47
#include <stdlib.h>
48
#include <limits.h>
49
#include <string.h>
50
#include <stdarg.h>
51
#include <stddef.h>
52
#include <ctype.h>
53
#include <stdlib.h>
54
#include <libxml/parser.h>
55
#include <libxml/xmlmemory.h>
56
#include <libxml/tree.h>
57
#include <libxml/parserInternals.h>
58
#include <libxml/valid.h>
59
#include <libxml/entities.h>
60
#include <libxml/xmlerror.h>
61
#include <libxml/encoding.h>
62
#include <libxml/xmlIO.h>
63
#include <libxml/uri.h>
64
#include <libxml/SAX2.h>
65
#ifdef LIBXML_CATALOG_ENABLED
66
#include <libxml/catalog.h>
67
#endif
68
69
#include "private/buf.h"
70
#include "private/dict.h"
71
#include "private/entities.h"
72
#include "private/error.h"
73
#include "private/html.h"
74
#include "private/io.h"
75
#include "private/parser.h"
76
77
2.00M
#define NS_INDEX_EMPTY  INT_MAX
78
199k
#define NS_INDEX_XML    (INT_MAX - 1)
79
787k
#define URI_HASH_EMPTY  0xD943A04E
80
81.9k
#define URI_HASH_XML    0xF0451F02
81
82
struct _xmlStartTag {
83
    const xmlChar *prefix;
84
    const xmlChar *URI;
85
    int line;
86
    int nsNr;
87
};
88
89
typedef struct {
90
    void *saxData;
91
    unsigned prefixHashValue;
92
    unsigned uriHashValue;
93
    unsigned elementId;
94
    int oldIndex;
95
} xmlParserNsExtra;
96
97
typedef struct {
98
    unsigned hashValue;
99
    int index;
100
} xmlParserNsBucket;
101
102
struct _xmlParserNsData {
103
    xmlParserNsExtra *extra;
104
105
    unsigned hashSize;
106
    unsigned hashElems;
107
    xmlParserNsBucket *hash;
108
109
    unsigned elementId;
110
    int defaultNsIndex;
111
    int minNsIndex;
112
};
113
114
struct _xmlAttrHashBucket {
115
    int index;
116
};
117
118
static int
119
xmlParseElementStart(xmlParserCtxtPtr ctxt);
120
121
static void
122
xmlParseElementEnd(xmlParserCtxtPtr ctxt);
123
124
static xmlEntityPtr
125
xmlLookupGeneralEntity(xmlParserCtxtPtr ctxt, const xmlChar *name, int inAttr);
126
127
static const xmlChar *
128
xmlParseEntityRefInternal(xmlParserCtxtPtr ctxt);
129
130
/************************************************************************
131
 *                  *
132
 *  Arbitrary limits set in the parser. See XML_PARSE_HUGE    *
133
 *                  *
134
 ************************************************************************/
135
136
#define XML_PARSER_BIG_ENTITY 1000
137
#define XML_PARSER_LOT_ENTITY 5000
138
139
/*
140
 * Constants for protection against abusive entity expansion
141
 * ("billion laughs").
142
 */
143
144
/*
145
 * A certain amount of entity expansion which is always allowed.
146
 */
147
717k
#define XML_PARSER_ALLOWED_EXPANSION 1000000
148
149
/*
150
 * Fixed cost for each entity reference. This crudely models processing time
151
 * as well to protect, for example, against exponential expansion of empty
152
 * or very short entities.
153
 */
154
722k
#define XML_ENT_FIXED_COST 20
155
156
/**
157
 * xmlParserMaxDepth:
158
 *
159
 * arbitrary depth limit for the XML documents that we allow to
160
 * process. This is not a limitation of the parser but a safety
161
 * boundary feature. It can be disabled with the XML_PARSE_HUGE
162
 * parser option.
163
 */
164
const unsigned int xmlParserMaxDepth = 256;
165
166
167
168
100M
#define XML_PARSER_BIG_BUFFER_SIZE 300
169
253k
#define XML_PARSER_BUFFER_SIZE 100
170
124k
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
171
172
/**
173
 * XML_PARSER_CHUNK_SIZE
174
 *
175
 * When calling GROW that's the minimal amount of data
176
 * the parser expected to have received. It is not a hard
177
 * limit but an optimization when reading strings like Names
178
 * It is not strictly needed as long as inputs available characters
179
 * are followed by 0, which should be provided by the I/O level
180
 */
181
#define XML_PARSER_CHUNK_SIZE 100
182
183
/**
184
 * xmlParserVersion:
185
 *
186
 * Constant string describing the internal version of the library
187
 */
188
const char *const
189
xmlParserVersion = LIBXML_VERSION_STRING LIBXML_VERSION_EXTRA;
190
191
/*
192
 * List of XML prefixed PI allowed by W3C specs
193
 */
194
195
static const char* const xmlW3CPIs[] = {
196
    "xml-stylesheet",
197
    "xml-model",
198
    NULL
199
};
200
201
202
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
203
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
204
                                              const xmlChar **str);
205
206
static void
207
xmlCtxtParseEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr ent);
208
209
static int
210
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
211
212
/************************************************************************
213
 *                  *
214
 *    Some factorized error routines        *
215
 *                  *
216
 ************************************************************************/
217
218
static void
219
443
xmlErrMemory(xmlParserCtxtPtr ctxt) {
220
443
    xmlCtxtErrMemory(ctxt);
221
443
}
222
223
/**
224
 * xmlErrAttributeDup:
225
 * @ctxt:  an XML parser context
226
 * @prefix:  the attribute prefix
227
 * @localname:  the attribute localname
228
 *
229
 * Handle a redefinition of attribute error
230
 */
231
static void
232
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
233
                   const xmlChar * localname)
234
37.5k
{
235
37.5k
    if (prefix == NULL)
236
19.5k
        xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, XML_ERR_ATTRIBUTE_REDEFINED,
237
19.5k
                   XML_ERR_FATAL, localname, NULL, NULL, 0,
238
19.5k
                   "Attribute %s redefined\n", localname);
239
17.9k
    else
240
17.9k
        xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, XML_ERR_ATTRIBUTE_REDEFINED,
241
17.9k
                   XML_ERR_FATAL, prefix, localname, NULL, 0,
242
17.9k
                   "Attribute %s:%s redefined\n", prefix, localname);
243
37.5k
}
244
245
/**
246
 * xmlFatalErrMsg:
247
 * @ctxt:  an XML parser context
248
 * @error:  the error number
249
 * @msg:  the error message
250
 *
251
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
252
 */
253
static void LIBXML_ATTR_FORMAT(3,0)
254
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
255
               const char *msg)
256
33.4M
{
257
33.4M
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
258
33.4M
               NULL, NULL, NULL, 0, "%s", msg);
259
33.4M
}
260
261
/**
262
 * xmlWarningMsg:
263
 * @ctxt:  an XML parser context
264
 * @error:  the error number
265
 * @msg:  the error message
266
 * @str1:  extra data
267
 * @str2:  extra data
268
 *
269
 * Handle a warning.
270
 */
271
void LIBXML_ATTR_FORMAT(3,0)
272
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
273
              const char *msg, const xmlChar *str1, const xmlChar *str2)
274
45.7k
{
275
45.7k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_WARNING,
276
45.7k
               str1, str2, NULL, 0, msg, str1, str2);
277
45.7k
}
278
279
/**
280
 * xmlValidityError:
281
 * @ctxt:  an XML parser context
282
 * @error:  the error number
283
 * @msg:  the error message
284
 * @str1:  extra data
285
 *
286
 * Handle a validity error.
287
 */
288
static void LIBXML_ATTR_FORMAT(3,0)
289
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
290
              const char *msg, const xmlChar *str1, const xmlChar *str2)
291
565
{
292
565
    ctxt->valid = 0;
293
294
565
    xmlCtxtErr(ctxt, NULL, XML_FROM_DTD, error, XML_ERR_ERROR,
295
565
               str1, str2, NULL, 0, msg, str1, str2);
296
565
}
297
298
/**
299
 * xmlFatalErrMsgInt:
300
 * @ctxt:  an XML parser context
301
 * @error:  the error number
302
 * @msg:  the error message
303
 * @val:  an integer value
304
 *
305
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
306
 */
307
static void LIBXML_ATTR_FORMAT(3,0)
308
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
309
                  const char *msg, int val)
310
7.79M
{
311
7.79M
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
312
7.79M
               NULL, NULL, NULL, val, msg, val);
313
7.79M
}
314
315
/**
316
 * xmlFatalErrMsgStrIntStr:
317
 * @ctxt:  an XML parser context
318
 * @error:  the error number
319
 * @msg:  the error message
320
 * @str1:  an string info
321
 * @val:  an integer value
322
 * @str2:  an string info
323
 *
324
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
325
 */
326
static void LIBXML_ATTR_FORMAT(3,0)
327
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
328
                  const char *msg, const xmlChar *str1, int val,
329
      const xmlChar *str2)
330
2.39M
{
331
2.39M
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
332
2.39M
               str1, str2, NULL, val, msg, str1, val, str2);
333
2.39M
}
334
335
/**
336
 * xmlFatalErrMsgStr:
337
 * @ctxt:  an XML parser context
338
 * @error:  the error number
339
 * @msg:  the error message
340
 * @val:  a string value
341
 *
342
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
343
 */
344
static void LIBXML_ATTR_FORMAT(3,0)
345
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
346
                  const char *msg, const xmlChar * val)
347
1.17M
{
348
1.17M
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
349
1.17M
               val, NULL, NULL, 0, msg, val);
350
1.17M
}
351
352
/**
353
 * xmlErrMsgStr:
354
 * @ctxt:  an XML parser context
355
 * @error:  the error number
356
 * @msg:  the error message
357
 * @val:  a string value
358
 *
359
 * Handle a non fatal parser error
360
 */
361
static void LIBXML_ATTR_FORMAT(3,0)
362
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
363
                  const char *msg, const xmlChar * val)
364
7.02k
{
365
7.02k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_ERROR,
366
7.02k
               val, NULL, NULL, 0, msg, val);
367
7.02k
}
368
369
/**
370
 * xmlNsErr:
371
 * @ctxt:  an XML parser context
372
 * @error:  the error number
373
 * @msg:  the message
374
 * @info1:  extra information string
375
 * @info2:  extra information string
376
 *
377
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
378
 */
379
static void LIBXML_ATTR_FORMAT(3,0)
380
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
381
         const char *msg,
382
         const xmlChar * info1, const xmlChar * info2,
383
         const xmlChar * info3)
384
1.15M
{
385
1.15M
    ctxt->nsWellFormed = 0;
386
387
1.15M
    xmlCtxtErr(ctxt, NULL, XML_FROM_NAMESPACE, error, XML_ERR_ERROR,
388
1.15M
               info1, info2, info3, 0, msg, info1, info2, info3);
389
1.15M
}
390
391
/**
392
 * xmlNsWarn
393
 * @ctxt:  an XML parser context
394
 * @error:  the error number
395
 * @msg:  the message
396
 * @info1:  extra information string
397
 * @info2:  extra information string
398
 *
399
 * Handle a namespace warning error
400
 */
401
static void LIBXML_ATTR_FORMAT(3,0)
402
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
403
         const char *msg,
404
         const xmlChar * info1, const xmlChar * info2,
405
         const xmlChar * info3)
406
14.8k
{
407
14.8k
    xmlCtxtErr(ctxt, NULL, XML_FROM_NAMESPACE, error, XML_ERR_WARNING,
408
14.8k
               info1, info2, info3, 0, msg, info1, info2, info3);
409
14.8k
}
410
411
static void
412
2.16M
xmlSaturatedAdd(unsigned long *dst, unsigned long val) {
413
2.16M
    if (val > ULONG_MAX - *dst)
414
0
        *dst = ULONG_MAX;
415
2.16M
    else
416
2.16M
        *dst += val;
417
2.16M
}
418
419
static void
420
720k
xmlSaturatedAddSizeT(unsigned long *dst, unsigned long val) {
421
720k
    if (val > ULONG_MAX - *dst)
422
0
        *dst = ULONG_MAX;
423
720k
    else
424
720k
        *dst += val;
425
720k
}
426
427
/**
428
 * xmlParserEntityCheck:
429
 * @ctxt:  parser context
430
 * @extra:  sum of unexpanded entity sizes
431
 *
432
 * Check for non-linear entity expansion behaviour.
433
 *
434
 * In some cases like xmlExpandEntityInAttValue, this function is called
435
 * for each, possibly nested entity and its unexpanded content length.
436
 *
437
 * In other cases like xmlParseReference, it's only called for each
438
 * top-level entity with its unexpanded content length plus the sum of
439
 * the unexpanded content lengths (plus fixed cost) of all nested
440
 * entities.
441
 *
442
 * Summing the unexpanded lengths also adds the length of the reference.
443
 * This is by design. Taking the length of the entity name into account
444
 * discourages attacks that try to waste CPU time with abusively long
445
 * entity names. See test/recurse/lol6.xml for example. Each call also
446
 * adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with
447
 * short entities.
448
 *
449
 * Returns 1 on error, 0 on success.
450
 */
451
static int
452
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long extra)
453
717k
{
454
717k
    unsigned long consumed;
455
717k
    unsigned long *expandedSize;
456
717k
    xmlParserInputPtr input = ctxt->input;
457
717k
    xmlEntityPtr entity = input->entity;
458
459
717k
    if ((entity) && (entity->flags & XML_ENT_CHECKED))
460
11
        return(0);
461
462
    /*
463
     * Compute total consumed bytes so far, including input streams of
464
     * external entities.
465
     */
466
717k
    consumed = input->consumed;
467
717k
    xmlSaturatedAddSizeT(&consumed, input->cur - input->base);
468
717k
    xmlSaturatedAdd(&consumed, ctxt->sizeentities);
469
470
717k
    if (entity)
471
153
        expandedSize = &entity->expandedSize;
472
717k
    else
473
717k
        expandedSize = &ctxt->sizeentcopy;
474
475
    /*
476
     * Add extra cost and some fixed cost.
477
     */
478
717k
    xmlSaturatedAdd(expandedSize, extra);
479
717k
    xmlSaturatedAdd(expandedSize, XML_ENT_FIXED_COST);
480
481
    /*
482
     * It's important to always use saturation arithmetic when tracking
483
     * entity sizes to make the size checks reliable. If "sizeentcopy"
484
     * overflows, we have to abort.
485
     */
486
717k
    if ((*expandedSize > XML_PARSER_ALLOWED_EXPANSION) &&
487
717k
        ((*expandedSize >= ULONG_MAX) ||
488
309k
         (*expandedSize / ctxt->maxAmpl > consumed))) {
489
234
        xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
490
234
                       "Maximum entity amplification factor exceeded, see "
491
234
                       "xmlCtxtSetMaxAmplification.\n");
492
234
        xmlHaltParser(ctxt);
493
234
        return(1);
494
234
    }
495
496
717k
    return(0);
497
717k
}
498
499
/************************************************************************
500
 *                  *
501
 *    Library wide options          *
502
 *                  *
503
 ************************************************************************/
504
505
/**
506
  * xmlHasFeature:
507
  * @feature: the feature to be examined
508
  *
509
  * Examines if the library has been compiled with a given feature.
510
  *
511
  * Returns a non-zero value if the feature exist, otherwise zero.
512
  * Returns zero (0) if the feature does not exist or an unknown
513
  * unknown feature is requested, non-zero otherwise.
514
  */
515
int
516
xmlHasFeature(xmlFeature feature)
517
0
{
518
0
    switch (feature) {
519
0
  case XML_WITH_THREAD:
520
0
#ifdef LIBXML_THREAD_ENABLED
521
0
      return(1);
522
#else
523
      return(0);
524
#endif
525
0
        case XML_WITH_TREE:
526
0
#ifdef LIBXML_TREE_ENABLED
527
0
            return(1);
528
#else
529
            return(0);
530
#endif
531
0
        case XML_WITH_OUTPUT:
532
0
#ifdef LIBXML_OUTPUT_ENABLED
533
0
            return(1);
534
#else
535
            return(0);
536
#endif
537
0
        case XML_WITH_PUSH:
538
#ifdef LIBXML_PUSH_ENABLED
539
            return(1);
540
#else
541
0
            return(0);
542
0
#endif
543
0
        case XML_WITH_READER:
544
#ifdef LIBXML_READER_ENABLED
545
            return(1);
546
#else
547
0
            return(0);
548
0
#endif
549
0
        case XML_WITH_PATTERN:
550
0
#ifdef LIBXML_PATTERN_ENABLED
551
0
            return(1);
552
#else
553
            return(0);
554
#endif
555
0
        case XML_WITH_WRITER:
556
#ifdef LIBXML_WRITER_ENABLED
557
            return(1);
558
#else
559
0
            return(0);
560
0
#endif
561
0
        case XML_WITH_SAX1:
562
#ifdef LIBXML_SAX1_ENABLED
563
            return(1);
564
#else
565
0
            return(0);
566
0
#endif
567
0
        case XML_WITH_FTP:
568
#ifdef LIBXML_FTP_ENABLED
569
            return(1);
570
#else
571
0
            return(0);
572
0
#endif
573
0
        case XML_WITH_HTTP:
574
0
#ifdef LIBXML_HTTP_ENABLED
575
0
            return(1);
576
#else
577
            return(0);
578
#endif
579
0
        case XML_WITH_VALID:
580
#ifdef LIBXML_VALID_ENABLED
581
            return(1);
582
#else
583
0
            return(0);
584
0
#endif
585
0
        case XML_WITH_HTML:
586
0
#ifdef LIBXML_HTML_ENABLED
587
0
            return(1);
588
#else
589
            return(0);
590
#endif
591
0
        case XML_WITH_LEGACY:
592
#ifdef LIBXML_LEGACY_ENABLED
593
            return(1);
594
#else
595
0
            return(0);
596
0
#endif
597
0
        case XML_WITH_C14N:
598
#ifdef LIBXML_C14N_ENABLED
599
            return(1);
600
#else
601
0
            return(0);
602
0
#endif
603
0
        case XML_WITH_CATALOG:
604
0
#ifdef LIBXML_CATALOG_ENABLED
605
0
            return(1);
606
#else
607
            return(0);
608
#endif
609
0
        case XML_WITH_XPATH:
610
0
#ifdef LIBXML_XPATH_ENABLED
611
0
            return(1);
612
#else
613
            return(0);
614
#endif
615
0
        case XML_WITH_XPTR:
616
0
#ifdef LIBXML_XPTR_ENABLED
617
0
            return(1);
618
#else
619
            return(0);
620
#endif
621
0
        case XML_WITH_XINCLUDE:
622
0
#ifdef LIBXML_XINCLUDE_ENABLED
623
0
            return(1);
624
#else
625
            return(0);
626
#endif
627
0
        case XML_WITH_ICONV:
628
0
#ifdef LIBXML_ICONV_ENABLED
629
0
            return(1);
630
#else
631
            return(0);
632
#endif
633
0
        case XML_WITH_ISO8859X:
634
0
#ifdef LIBXML_ISO8859X_ENABLED
635
0
            return(1);
636
#else
637
            return(0);
638
#endif
639
0
        case XML_WITH_UNICODE:
640
#ifdef LIBXML_UNICODE_ENABLED
641
            return(1);
642
#else
643
0
            return(0);
644
0
#endif
645
0
        case XML_WITH_REGEXP:
646
#ifdef LIBXML_REGEXP_ENABLED
647
            return(1);
648
#else
649
0
            return(0);
650
0
#endif
651
0
        case XML_WITH_AUTOMATA:
652
#ifdef LIBXML_AUTOMATA_ENABLED
653
            return(1);
654
#else
655
0
            return(0);
656
0
#endif
657
0
        case XML_WITH_EXPR:
658
#ifdef LIBXML_EXPR_ENABLED
659
            return(1);
660
#else
661
0
            return(0);
662
0
#endif
663
0
        case XML_WITH_SCHEMAS:
664
#ifdef LIBXML_SCHEMAS_ENABLED
665
            return(1);
666
#else
667
0
            return(0);
668
0
#endif
669
0
        case XML_WITH_SCHEMATRON:
670
#ifdef LIBXML_SCHEMATRON_ENABLED
671
            return(1);
672
#else
673
0
            return(0);
674
0
#endif
675
0
        case XML_WITH_MODULES:
676
0
#ifdef LIBXML_MODULES_ENABLED
677
0
            return(1);
678
#else
679
            return(0);
680
#endif
681
0
        case XML_WITH_DEBUG:
682
0
#ifdef LIBXML_DEBUG_ENABLED
683
0
            return(1);
684
#else
685
            return(0);
686
#endif
687
0
        case XML_WITH_DEBUG_MEM:
688
#ifdef DEBUG_MEMORY_LOCATION
689
            return(1);
690
#else
691
0
            return(0);
692
0
#endif
693
0
        case XML_WITH_ZLIB:
694
#ifdef LIBXML_ZLIB_ENABLED
695
            return(1);
696
#else
697
0
            return(0);
698
0
#endif
699
0
        case XML_WITH_LZMA:
700
#ifdef LIBXML_LZMA_ENABLED
701
            return(1);
702
#else
703
0
            return(0);
704
0
#endif
705
0
        case XML_WITH_ICU:
706
#ifdef LIBXML_ICU_ENABLED
707
            return(1);
708
#else
709
0
            return(0);
710
0
#endif
711
0
        default:
712
0
      break;
713
0
     }
714
0
     return(0);
715
0
}
716
717
/************************************************************************
718
 *                  *
719
 *      Simple string buffer        *
720
 *                  *
721
 ************************************************************************/
722
723
typedef struct {
724
    xmlChar *mem;
725
    unsigned size;
726
    unsigned cap; /* size < cap */
727
    unsigned max; /* size <= max */
728
    xmlParserErrors code;
729
} xmlSBuf;
730
731
static void
732
1.26M
xmlSBufInit(xmlSBuf *buf, unsigned max) {
733
1.26M
    buf->mem = NULL;
734
1.26M
    buf->size = 0;
735
1.26M
    buf->cap = 0;
736
1.26M
    buf->max = max;
737
1.26M
    buf->code = XML_ERR_OK;
738
1.26M
}
739
740
static int
741
217k
xmlSBufGrow(xmlSBuf *buf, unsigned len) {
742
217k
    xmlChar *mem;
743
217k
    unsigned cap;
744
745
217k
    if (len >= UINT_MAX / 2 - buf->size) {
746
0
        buf->code = XML_ERR_RESOURCE_LIMIT;
747
0
        return(-1);
748
0
    }
749
750
217k
    cap = (buf->size + len) * 2;
751
217k
    if (cap < 240)
752
152k
        cap = 240;
753
754
217k
    mem = xmlRealloc(buf->mem, cap);
755
217k
    if (mem == NULL) {
756
22.0k
        buf->code = XML_ERR_NO_MEMORY;
757
22.0k
        return(-1);
758
22.0k
    }
759
760
195k
    buf->mem = mem;
761
195k
    buf->cap = cap;
762
763
195k
    return(0);
764
217k
}
765
766
static void
767
89.6M
xmlSBufAddString(xmlSBuf *buf, const xmlChar *str, unsigned len) {
768
89.6M
    if (buf->max - buf->size < len) {
769
62.0k
        buf->code = XML_ERR_RESOURCE_LIMIT;
770
62.0k
        return;
771
62.0k
    }
772
773
89.6M
    if (buf->cap - buf->size <= len) {
774
214k
        if (xmlSBufGrow(buf, len) < 0)
775
21.5k
            return;
776
214k
    }
777
778
89.6M
    if (len > 0)
779
89.6M
        memcpy(buf->mem + buf->size, str, len);
780
89.6M
    buf->size += len;
781
89.6M
}
782
783
static void
784
87.9M
xmlSBufAddCString(xmlSBuf *buf, const char *str, unsigned len) {
785
87.9M
    xmlSBufAddString(buf, (const xmlChar *) str, len);
786
87.9M
}
787
788
static void
789
302k
xmlSBufAddChar(xmlSBuf *buf, int c) {
790
302k
    xmlChar *end;
791
792
302k
    if (buf->max - buf->size < 4) {
793
132
        buf->code = XML_ERR_RESOURCE_LIMIT;
794
132
        return;
795
132
    }
796
797
302k
    if (buf->cap - buf->size <= 4) {
798
2.46k
        if (xmlSBufGrow(buf, 4) < 0)
799
447
            return;
800
2.46k
    }
801
802
302k
    end = buf->mem + buf->size;
803
804
302k
    if (c < 0x80) {
805
46.2k
        *end = (xmlChar) c;
806
46.2k
        buf->size += 1;
807
255k
    } else {
808
255k
        buf->size += xmlCopyCharMultiByte(end, c);
809
255k
    }
810
302k
}
811
812
static void
813
61.7M
xmlSBufAddReplChar(xmlSBuf *buf) {
814
61.7M
    xmlSBufAddCString(buf, "\xEF\xBF\xBD", 3);
815
61.7M
}
816
817
static void
818
67
xmlSBufReportError(xmlSBuf *buf, xmlParserCtxtPtr ctxt, const char *errMsg) {
819
67
    if (buf->code == XML_ERR_NO_MEMORY)
820
38
        xmlCtxtErrMemory(ctxt);
821
29
    else
822
29
        xmlFatalErr(ctxt, buf->code, errMsg);
823
67
}
824
825
static xmlChar *
826
xmlSBufFinish(xmlSBuf *buf, int *sizeOut, xmlParserCtxtPtr ctxt,
827
164k
              const char *errMsg) {
828
164k
    if (buf->mem == NULL) {
829
19.9k
        buf->mem = xmlMalloc(1);
830
19.9k
        if (buf->mem == NULL) {
831
8
            buf->code = XML_ERR_NO_MEMORY;
832
19.9k
        } else {
833
19.9k
            buf->mem[0] = 0;
834
19.9k
        }
835
144k
    } else {
836
144k
        buf->mem[buf->size] = 0;
837
144k
    }
838
839
164k
    if (buf->code == XML_ERR_OK) {
840
164k
        if (sizeOut != NULL)
841
90.1k
            *sizeOut = buf->size;
842
164k
        return(buf->mem);
843
164k
    }
844
845
25
    xmlSBufReportError(buf, ctxt, errMsg);
846
847
25
    xmlFree(buf->mem);
848
849
25
    if (sizeOut != NULL)
850
16
        *sizeOut = 0;
851
25
    return(NULL);
852
164k
}
853
854
static void
855
1.05M
xmlSBufCleanup(xmlSBuf *buf, xmlParserCtxtPtr ctxt, const char *errMsg) {
856
1.05M
    if (buf->code != XML_ERR_OK)
857
42
        xmlSBufReportError(buf, ctxt, errMsg);
858
859
1.05M
    xmlFree(buf->mem);
860
1.05M
}
861
862
static int
863
xmlUTF8MultibyteLen(xmlParserCtxtPtr ctxt, const xmlChar *str,
864
219M
                    const char *errMsg) {
865
219M
    int c = str[0];
866
219M
    int c1 = str[1];
867
868
219M
    if ((c1 & 0xC0) != 0x80)
869
22.9M
        goto encoding_error;
870
871
196M
    if (c < 0xE0) {
872
        /* 2-byte sequence */
873
11.8M
        if (c < 0xC2)
874
8.49M
            goto encoding_error;
875
876
3.38M
        return(2);
877
184M
    } else {
878
184M
        int c2 = str[2];
879
880
184M
        if ((c2 & 0xC0) != 0x80)
881
35.3k
            goto encoding_error;
882
883
184M
        if (c < 0xF0) {
884
            /* 3-byte sequence */
885
184M
            if (c == 0xE0) {
886
                /* overlong */
887
179M
                if (c1 < 0xA0)
888
453
                    goto encoding_error;
889
179M
            } else if (c == 0xED) {
890
                /* surrogate */
891
807
                if (c1 >= 0xA0)
892
90
                    goto encoding_error;
893
4.82M
            } else if (c == 0xEF) {
894
                /* U+FFFE and U+FFFF are invalid Chars */
895
371k
                if ((c1 == 0xBF) && (c2 >= 0xBE))
896
4.80k
                    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR, errMsg);
897
371k
            }
898
899
184M
            return(3);
900
184M
        } else {
901
            /* 4-byte sequence */
902
33.0k
            if ((str[3] & 0xC0) != 0x80)
903
4.17k
                goto encoding_error;
904
28.8k
            if (c == 0xF0) {
905
                /* overlong */
906
5.36k
                if (c1 < 0x90)
907
1.18k
                    goto encoding_error;
908
23.4k
            } else if (c >= 0xF4) {
909
                /* greater than 0x10FFFF */
910
6.11k
                if ((c > 0xF4) || (c1 >= 0x90))
911
5.83k
                    goto encoding_error;
912
6.11k
            }
913
914
21.8k
            return(4);
915
28.8k
        }
916
184M
    }
917
918
31.4M
encoding_error:
919
    /* Only report the first error */
920
31.4M
    if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
921
3.36k
        xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);
922
3.36k
        ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
923
3.36k
    }
924
925
31.4M
    return(0);
926
196M
}
927
928
/************************************************************************
929
 *                  *
930
 *    SAX2 defaulted attributes handling      *
931
 *                  *
932
 ************************************************************************/
933
934
/**
935
 * xmlCtxtInitializeLate:
936
 * @ctxt:  an XML parser context
937
 *
938
 * Final initialization of the parser context before starting to parse.
939
 *
940
 * This accounts for users modifying struct members of parser context
941
 * directly.
942
 */
943
static void
944
91.6k
xmlCtxtInitializeLate(xmlParserCtxtPtr ctxt) {
945
91.6k
    xmlSAXHandlerPtr sax;
946
947
    /* Avoid unused variable warning if features are disabled. */
948
91.6k
    (void) sax;
949
950
    /*
951
     * Changing the SAX struct directly is still widespread practice
952
     * in internal and external code.
953
     */
954
91.6k
    if (ctxt == NULL) return;
955
91.6k
    sax = ctxt->sax;
956
#ifdef LIBXML_SAX1_ENABLED
957
    /*
958
     * Only enable SAX2 if there SAX2 element handlers, except when there
959
     * are no element handlers at all.
960
     */
961
    if (((ctxt->options & XML_PARSE_SAX1) == 0) &&
962
        (sax) &&
963
        (sax->initialized == XML_SAX2_MAGIC) &&
964
        ((sax->startElementNs != NULL) ||
965
         (sax->endElementNs != NULL) ||
966
         ((sax->startElement == NULL) && (sax->endElement == NULL))))
967
        ctxt->sax2 = 1;
968
#else
969
91.6k
    ctxt->sax2 = 1;
970
91.6k
#endif /* LIBXML_SAX1_ENABLED */
971
972
    /*
973
     * Some users replace the dictionary directly in the context struct.
974
     * We really need an API function to do that cleanly.
975
     */
976
91.6k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
977
91.6k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
978
91.6k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
979
91.6k
    if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
980
91.6k
    (ctxt->str_xml_ns == NULL)) {
981
0
        xmlErrMemory(ctxt);
982
0
    }
983
91.6k
}
984
985
typedef struct {
986
    xmlHashedString prefix;
987
    xmlHashedString name;
988
    xmlHashedString value;
989
    const xmlChar *valueEnd;
990
    int external;
991
    int expandedSize;
992
} xmlDefAttr;
993
994
typedef struct _xmlDefAttrs xmlDefAttrs;
995
typedef xmlDefAttrs *xmlDefAttrsPtr;
996
struct _xmlDefAttrs {
997
    int nbAttrs;  /* number of defaulted attributes on that element */
998
    int maxAttrs;       /* the size of the array */
999
#if __STDC_VERSION__ >= 199901L
1000
    /* Using a C99 flexible array member avoids UBSan errors. */
1001
    xmlDefAttr attrs[]; /* array of localname/prefix/values/external */
1002
#else
1003
    xmlDefAttr attrs[1];
1004
#endif
1005
};
1006
1007
/**
1008
 * xmlAttrNormalizeSpace:
1009
 * @src: the source string
1010
 * @dst: the target string
1011
 *
1012
 * Normalize the space in non CDATA attribute values:
1013
 * If the attribute type is not CDATA, then the XML processor MUST further
1014
 * process the normalized attribute value by discarding any leading and
1015
 * trailing space (#x20) characters, and by replacing sequences of space
1016
 * (#x20) characters by a single space (#x20) character.
1017
 * Note that the size of dst need to be at least src, and if one doesn't need
1018
 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1019
 * passing src as dst is just fine.
1020
 *
1021
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1022
 *         is needed.
1023
 */
1024
static xmlChar *
1025
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1026
24.0k
{
1027
24.0k
    if ((src == NULL) || (dst == NULL))
1028
0
        return(NULL);
1029
1030
30.3k
    while (*src == 0x20) src++;
1031
1.76M
    while (*src != 0) {
1032
1.73M
  if (*src == 0x20) {
1033
1.22M
      while (*src == 0x20) src++;
1034
17.7k
      if (*src != 0)
1035
12.9k
    *dst++ = 0x20;
1036
1.71M
  } else {
1037
1.71M
      *dst++ = *src++;
1038
1.71M
  }
1039
1.73M
    }
1040
24.0k
    *dst = 0;
1041
24.0k
    if (dst == src)
1042
18.2k
       return(NULL);
1043
5.80k
    return(dst);
1044
24.0k
}
1045
1046
/**
1047
 * xmlAddDefAttrs:
1048
 * @ctxt:  an XML parser context
1049
 * @fullname:  the element fullname
1050
 * @fullattr:  the attribute fullname
1051
 * @value:  the attribute value
1052
 *
1053
 * Add a defaulted attribute for an element
1054
 */
1055
static void
1056
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1057
               const xmlChar *fullname,
1058
               const xmlChar *fullattr,
1059
24.7k
               const xmlChar *value) {
1060
24.7k
    xmlDefAttrsPtr defaults;
1061
24.7k
    xmlDefAttr *attr;
1062
24.7k
    int len, expandedSize;
1063
24.7k
    xmlHashedString name;
1064
24.7k
    xmlHashedString prefix;
1065
24.7k
    xmlHashedString hvalue;
1066
24.7k
    const xmlChar *localname;
1067
1068
    /*
1069
     * Allows to detect attribute redefinitions
1070
     */
1071
24.7k
    if (ctxt->attsSpecial != NULL) {
1072
20.8k
        if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1073
8.64k
      return;
1074
20.8k
    }
1075
1076
16.1k
    if (ctxt->attsDefault == NULL) {
1077
3.98k
        ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1078
3.98k
  if (ctxt->attsDefault == NULL)
1079
3
      goto mem_error;
1080
3.98k
    }
1081
1082
    /*
1083
     * split the element name into prefix:localname , the string found
1084
     * are within the DTD and then not associated to namespace names.
1085
     */
1086
16.1k
    localname = xmlSplitQName3(fullname, &len);
1087
16.1k
    if (localname == NULL) {
1088
11.7k
        name = xmlDictLookupHashed(ctxt->dict, fullname, -1);
1089
11.7k
  prefix.name = NULL;
1090
11.7k
    } else {
1091
4.34k
        name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1092
4.34k
  prefix = xmlDictLookupHashed(ctxt->dict, fullname, len);
1093
4.34k
        if (prefix.name == NULL)
1094
0
            goto mem_error;
1095
4.34k
    }
1096
16.1k
    if (name.name == NULL)
1097
0
        goto mem_error;
1098
1099
    /*
1100
     * make sure there is some storage
1101
     */
1102
16.1k
    defaults = xmlHashLookup2(ctxt->attsDefault, name.name, prefix.name);
1103
16.1k
    if ((defaults == NULL) ||
1104
16.1k
        (defaults->nbAttrs >= defaults->maxAttrs)) {
1105
6.03k
        xmlDefAttrsPtr temp;
1106
6.03k
        int newSize;
1107
1108
6.03k
        newSize = (defaults != NULL) ? 2 * defaults->maxAttrs : 4;
1109
6.03k
        temp = xmlRealloc(defaults,
1110
6.03k
                          sizeof(*defaults) + newSize * sizeof(xmlDefAttr));
1111
6.03k
  if (temp == NULL)
1112
1
      goto mem_error;
1113
6.03k
        if (defaults == NULL)
1114
4.56k
            temp->nbAttrs = 0;
1115
6.03k
  temp->maxAttrs = newSize;
1116
6.03k
        defaults = temp;
1117
6.03k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name.name, prefix.name,
1118
6.03k
                          defaults, NULL) < 0) {
1119
0
      xmlFree(defaults);
1120
0
      goto mem_error;
1121
0
  }
1122
6.03k
    }
1123
1124
    /*
1125
     * Split the attribute name into prefix:localname , the string found
1126
     * are within the DTD and hen not associated to namespace names.
1127
     */
1128
16.1k
    localname = xmlSplitQName3(fullattr, &len);
1129
16.1k
    if (localname == NULL) {
1130
9.05k
        name = xmlDictLookupHashed(ctxt->dict, fullattr, -1);
1131
9.05k
  prefix.name = NULL;
1132
9.05k
    } else {
1133
7.08k
        name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1134
7.08k
  prefix = xmlDictLookupHashed(ctxt->dict, fullattr, len);
1135
7.08k
        if (prefix.name == NULL)
1136
1
            goto mem_error;
1137
7.08k
    }
1138
16.1k
    if (name.name == NULL)
1139
0
        goto mem_error;
1140
1141
    /* intern the string and precompute the end */
1142
16.1k
    len = strlen((const char *) value);
1143
16.1k
    hvalue = xmlDictLookupHashed(ctxt->dict, value, len);
1144
16.1k
    if (hvalue.name == NULL)
1145
0
        goto mem_error;
1146
1147
16.1k
    expandedSize = strlen((const char *) name.name);
1148
16.1k
    if (prefix.name != NULL)
1149
7.08k
        expandedSize += strlen((const char *) prefix.name);
1150
16.1k
    expandedSize += len;
1151
1152
16.1k
    attr = &defaults->attrs[defaults->nbAttrs++];
1153
16.1k
    attr->name = name;
1154
16.1k
    attr->prefix = prefix;
1155
16.1k
    attr->value = hvalue;
1156
16.1k
    attr->valueEnd = hvalue.name + len;
1157
16.1k
    attr->external = PARSER_EXTERNAL(ctxt);
1158
16.1k
    attr->expandedSize = expandedSize;
1159
1160
16.1k
    return;
1161
1162
5
mem_error:
1163
5
    xmlErrMemory(ctxt);
1164
5
    return;
1165
16.1k
}
1166
1167
/**
1168
 * xmlAddSpecialAttr:
1169
 * @ctxt:  an XML parser context
1170
 * @fullname:  the element fullname
1171
 * @fullattr:  the attribute fullname
1172
 * @type:  the attribute type
1173
 *
1174
 * Register this attribute type
1175
 */
1176
static void
1177
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1178
      const xmlChar *fullname,
1179
      const xmlChar *fullattr,
1180
      int type)
1181
25.8k
{
1182
25.8k
    if (ctxt->attsSpecial == NULL) {
1183
4.32k
        ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1184
4.32k
  if (ctxt->attsSpecial == NULL)
1185
5
      goto mem_error;
1186
4.32k
    }
1187
1188
25.7k
    if (xmlHashAdd2(ctxt->attsSpecial, fullname, fullattr,
1189
25.7k
                    (void *) (ptrdiff_t) type) < 0)
1190
0
        goto mem_error;
1191
25.7k
    return;
1192
1193
25.7k
mem_error:
1194
5
    xmlErrMemory(ctxt);
1195
5
    return;
1196
25.7k
}
1197
1198
/**
1199
 * xmlCleanSpecialAttrCallback:
1200
 *
1201
 * Removes CDATA attributes from the special attribute table
1202
 */
1203
static void
1204
xmlCleanSpecialAttrCallback(void *payload, void *data,
1205
                            const xmlChar *fullname, const xmlChar *fullattr,
1206
16.6k
                            const xmlChar *unused ATTRIBUTE_UNUSED) {
1207
16.6k
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1208
1209
16.6k
    if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1210
1.24k
        xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1211
1.24k
    }
1212
16.6k
}
1213
1214
/**
1215
 * xmlCleanSpecialAttr:
1216
 * @ctxt:  an XML parser context
1217
 *
1218
 * Trim the list of attributes defined to remove all those of type
1219
 * CDATA as they are not special. This call should be done when finishing
1220
 * to parse the DTD and before starting to parse the document root.
1221
 */
1222
static void
1223
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1224
21.2k
{
1225
21.2k
    if (ctxt->attsSpecial == NULL)
1226
16.9k
        return;
1227
1228
4.31k
    xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1229
1230
4.31k
    if (xmlHashSize(ctxt->attsSpecial) == 0) {
1231
295
        xmlHashFree(ctxt->attsSpecial, NULL);
1232
295
        ctxt->attsSpecial = NULL;
1233
295
    }
1234
4.31k
    return;
1235
21.2k
}
1236
1237
/**
1238
 * xmlCheckLanguageID:
1239
 * @lang:  pointer to the string value
1240
 *
1241
 * DEPRECATED: Internal function, do not use.
1242
 *
1243
 * Checks that the value conforms to the LanguageID production:
1244
 *
1245
 * NOTE: this is somewhat deprecated, those productions were removed from
1246
 *       the XML Second edition.
1247
 *
1248
 * [33] LanguageID ::= Langcode ('-' Subcode)*
1249
 * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1250
 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1251
 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1252
 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1253
 * [38] Subcode ::= ([a-z] | [A-Z])+
1254
 *
1255
 * The current REC reference the successors of RFC 1766, currently 5646
1256
 *
1257
 * http://www.rfc-editor.org/rfc/rfc5646.txt
1258
 * langtag       = language
1259
 *                 ["-" script]
1260
 *                 ["-" region]
1261
 *                 *("-" variant)
1262
 *                 *("-" extension)
1263
 *                 ["-" privateuse]
1264
 * language      = 2*3ALPHA            ; shortest ISO 639 code
1265
 *                 ["-" extlang]       ; sometimes followed by
1266
 *                                     ; extended language subtags
1267
 *               / 4ALPHA              ; or reserved for future use
1268
 *               / 5*8ALPHA            ; or registered language subtag
1269
 *
1270
 * extlang       = 3ALPHA              ; selected ISO 639 codes
1271
 *                 *2("-" 3ALPHA)      ; permanently reserved
1272
 *
1273
 * script        = 4ALPHA              ; ISO 15924 code
1274
 *
1275
 * region        = 2ALPHA              ; ISO 3166-1 code
1276
 *               / 3DIGIT              ; UN M.49 code
1277
 *
1278
 * variant       = 5*8alphanum         ; registered variants
1279
 *               / (DIGIT 3alphanum)
1280
 *
1281
 * extension     = singleton 1*("-" (2*8alphanum))
1282
 *
1283
 *                                     ; Single alphanumerics
1284
 *                                     ; "x" reserved for private use
1285
 * singleton     = DIGIT               ; 0 - 9
1286
 *               / %x41-57             ; A - W
1287
 *               / %x59-5A             ; Y - Z
1288
 *               / %x61-77             ; a - w
1289
 *               / %x79-7A             ; y - z
1290
 *
1291
 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1292
 * The parser below doesn't try to cope with extension or privateuse
1293
 * that could be added but that's not interoperable anyway
1294
 *
1295
 * Returns 1 if correct 0 otherwise
1296
 **/
1297
int
1298
xmlCheckLanguageID(const xmlChar * lang)
1299
0
{
1300
0
    const xmlChar *cur = lang, *nxt;
1301
1302
0
    if (cur == NULL)
1303
0
        return (0);
1304
0
    if (((cur[0] == 'i') && (cur[1] == '-')) ||
1305
0
        ((cur[0] == 'I') && (cur[1] == '-')) ||
1306
0
        ((cur[0] == 'x') && (cur[1] == '-')) ||
1307
0
        ((cur[0] == 'X') && (cur[1] == '-'))) {
1308
        /*
1309
         * Still allow IANA code and user code which were coming
1310
         * from the previous version of the XML-1.0 specification
1311
         * it's deprecated but we should not fail
1312
         */
1313
0
        cur += 2;
1314
0
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1315
0
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
1316
0
            cur++;
1317
0
        return(cur[0] == 0);
1318
0
    }
1319
0
    nxt = cur;
1320
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1321
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1322
0
           nxt++;
1323
0
    if (nxt - cur >= 4) {
1324
        /*
1325
         * Reserved
1326
         */
1327
0
        if ((nxt - cur > 8) || (nxt[0] != 0))
1328
0
            return(0);
1329
0
        return(1);
1330
0
    }
1331
0
    if (nxt - cur < 2)
1332
0
        return(0);
1333
    /* we got an ISO 639 code */
1334
0
    if (nxt[0] == 0)
1335
0
        return(1);
1336
0
    if (nxt[0] != '-')
1337
0
        return(0);
1338
1339
0
    nxt++;
1340
0
    cur = nxt;
1341
    /* now we can have extlang or script or region or variant */
1342
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1343
0
        goto region_m49;
1344
1345
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1346
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1347
0
           nxt++;
1348
0
    if (nxt - cur == 4)
1349
0
        goto script;
1350
0
    if (nxt - cur == 2)
1351
0
        goto region;
1352
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1353
0
        goto variant;
1354
0
    if (nxt - cur != 3)
1355
0
        return(0);
1356
    /* we parsed an extlang */
1357
0
    if (nxt[0] == 0)
1358
0
        return(1);
1359
0
    if (nxt[0] != '-')
1360
0
        return(0);
1361
1362
0
    nxt++;
1363
0
    cur = nxt;
1364
    /* now we can have script or region or variant */
1365
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1366
0
        goto region_m49;
1367
1368
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1369
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1370
0
           nxt++;
1371
0
    if (nxt - cur == 2)
1372
0
        goto region;
1373
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1374
0
        goto variant;
1375
0
    if (nxt - cur != 4)
1376
0
        return(0);
1377
    /* we parsed a script */
1378
0
script:
1379
0
    if (nxt[0] == 0)
1380
0
        return(1);
1381
0
    if (nxt[0] != '-')
1382
0
        return(0);
1383
1384
0
    nxt++;
1385
0
    cur = nxt;
1386
    /* now we can have region or variant */
1387
0
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1388
0
        goto region_m49;
1389
1390
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1391
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1392
0
           nxt++;
1393
1394
0
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1395
0
        goto variant;
1396
0
    if (nxt - cur != 2)
1397
0
        return(0);
1398
    /* we parsed a region */
1399
0
region:
1400
0
    if (nxt[0] == 0)
1401
0
        return(1);
1402
0
    if (nxt[0] != '-')
1403
0
        return(0);
1404
1405
0
    nxt++;
1406
0
    cur = nxt;
1407
    /* now we can just have a variant */
1408
0
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1409
0
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1410
0
           nxt++;
1411
1412
0
    if ((nxt - cur < 5) || (nxt - cur > 8))
1413
0
        return(0);
1414
1415
    /* we parsed a variant */
1416
0
variant:
1417
0
    if (nxt[0] == 0)
1418
0
        return(1);
1419
0
    if (nxt[0] != '-')
1420
0
        return(0);
1421
    /* extensions and private use subtags not checked */
1422
0
    return (1);
1423
1424
0
region_m49:
1425
0
    if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1426
0
        ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1427
0
        nxt += 3;
1428
0
        goto region;
1429
0
    }
1430
0
    return(0);
1431
0
}
1432
1433
/************************************************************************
1434
 *                  *
1435
 *    Parser stacks related functions and macros    *
1436
 *                  *
1437
 ************************************************************************/
1438
1439
static xmlChar *
1440
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar **str);
1441
1442
/**
1443
 * xmlParserNsCreate:
1444
 *
1445
 * Create a new namespace database.
1446
 *
1447
 * Returns the new obejct.
1448
 */
1449
xmlParserNsData *
1450
94.0k
xmlParserNsCreate(void) {
1451
94.0k
    xmlParserNsData *nsdb = xmlMalloc(sizeof(*nsdb));
1452
1453
94.0k
    if (nsdb == NULL)
1454
9
        return(NULL);
1455
94.0k
    memset(nsdb, 0, sizeof(*nsdb));
1456
94.0k
    nsdb->defaultNsIndex = INT_MAX;
1457
1458
94.0k
    return(nsdb);
1459
94.0k
}
1460
1461
/**
1462
 * xmlParserNsFree:
1463
 * @nsdb: namespace database
1464
 *
1465
 * Free a namespace database.
1466
 */
1467
void
1468
94.0k
xmlParserNsFree(xmlParserNsData *nsdb) {
1469
94.0k
    if (nsdb == NULL)
1470
0
        return;
1471
1472
94.0k
    xmlFree(nsdb->extra);
1473
94.0k
    xmlFree(nsdb->hash);
1474
94.0k
    xmlFree(nsdb);
1475
94.0k
}
1476
1477
/**
1478
 * xmlParserNsReset:
1479
 * @nsdb: namespace database
1480
 *
1481
 * Reset a namespace database.
1482
 */
1483
static void
1484
0
xmlParserNsReset(xmlParserNsData *nsdb) {
1485
0
    if (nsdb == NULL)
1486
0
        return;
1487
1488
0
    nsdb->hashElems = 0;
1489
0
    nsdb->elementId = 0;
1490
0
    nsdb->defaultNsIndex = INT_MAX;
1491
1492
0
    if (nsdb->hash)
1493
0
        memset(nsdb->hash, 0, nsdb->hashSize * sizeof(nsdb->hash[0]));
1494
0
}
1495
1496
/**
1497
 * xmlParserStartElement:
1498
 * @nsdb: namespace database
1499
 *
1500
 * Signal that a new element has started.
1501
 *
1502
 * Returns 0 on success, -1 if the element counter overflowed.
1503
 */
1504
static int
1505
4.55M
xmlParserNsStartElement(xmlParserNsData *nsdb) {
1506
4.55M
    if (nsdb->elementId == UINT_MAX)
1507
0
        return(-1);
1508
4.55M
    nsdb->elementId++;
1509
1510
4.55M
    return(0);
1511
4.55M
}
1512
1513
/**
1514
 * xmlParserNsLookup:
1515
 * @ctxt: parser context
1516
 * @prefix: namespace prefix
1517
 * @bucketPtr: optional bucket (return value)
1518
 *
1519
 * Lookup namespace with given prefix. If @bucketPtr is non-NULL, it will
1520
 * be set to the matching bucket, or the first empty bucket if no match
1521
 * was found.
1522
 *
1523
 * Returns the namespace index on success, INT_MAX if no namespace was
1524
 * found.
1525
 */
1526
static int
1527
xmlParserNsLookup(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1528
5.16M
                  xmlParserNsBucket **bucketPtr) {
1529
5.16M
    xmlParserNsBucket *bucket;
1530
5.16M
    unsigned index, hashValue;
1531
1532
5.16M
    if (prefix->name == NULL)
1533
3.20M
        return(ctxt->nsdb->defaultNsIndex);
1534
1535
1.96M
    if (ctxt->nsdb->hashSize == 0)
1536
177k
        return(INT_MAX);
1537
1538
1.78M
    hashValue = prefix->hashValue;
1539
1.78M
    index = hashValue & (ctxt->nsdb->hashSize - 1);
1540
1.78M
    bucket = &ctxt->nsdb->hash[index];
1541
1542
17.1M
    while (bucket->hashValue) {
1543
16.3M
        if ((bucket->hashValue == hashValue) &&
1544
16.3M
            (bucket->index != INT_MAX)) {
1545
986k
            if (ctxt->nsTab[bucket->index * 2] == prefix->name) {
1546
986k
                if (bucketPtr != NULL)
1547
140k
                    *bucketPtr = bucket;
1548
986k
                return(bucket->index);
1549
986k
            }
1550
986k
        }
1551
1552
15.3M
        index++;
1553
15.3M
        bucket++;
1554
15.3M
        if (index == ctxt->nsdb->hashSize) {
1555
31.0k
            index = 0;
1556
31.0k
            bucket = ctxt->nsdb->hash;
1557
31.0k
        }
1558
15.3M
    }
1559
1560
796k
    if (bucketPtr != NULL)
1561
34.6k
        *bucketPtr = bucket;
1562
796k
    return(INT_MAX);
1563
1.78M
}
1564
1565
/**
1566
 * xmlParserNsLookupUri:
1567
 * @ctxt: parser context
1568
 * @prefix: namespace prefix
1569
 *
1570
 * Lookup namespace URI with given prefix.
1571
 *
1572
 * Returns the namespace URI on success, NULL if no namespace was found.
1573
 */
1574
static const xmlChar *
1575
3.81M
xmlParserNsLookupUri(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix) {
1576
3.81M
    const xmlChar *ret;
1577
3.81M
    int nsIndex;
1578
1579
3.81M
    if (prefix->name == ctxt->str_xml)
1580
584
        return(ctxt->str_xml_ns);
1581
1582
    /*
1583
     * minNsIndex is used when building an entity tree. We must
1584
     * ignore namespaces declared outside the entity.
1585
     */
1586
3.81M
    nsIndex = xmlParserNsLookup(ctxt, prefix, NULL);
1587
3.81M
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1588
3.07M
        return(NULL);
1589
1590
744k
    ret = ctxt->nsTab[nsIndex * 2 + 1];
1591
744k
    if (ret[0] == 0)
1592
13.4k
        ret = NULL;
1593
744k
    return(ret);
1594
3.81M
}
1595
1596
/**
1597
 * xmlParserNsLookupSax:
1598
 * @ctxt: parser context
1599
 * @prefix: namespace prefix
1600
 *
1601
 * Lookup extra data for the given prefix. This returns data stored
1602
 * with xmlParserNsUdpateSax.
1603
 *
1604
 * Returns the data on success, NULL if no namespace was found.
1605
 */
1606
void *
1607
727k
xmlParserNsLookupSax(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
1608
727k
    xmlHashedString hprefix;
1609
727k
    int nsIndex;
1610
1611
727k
    if (prefix == ctxt->str_xml)
1612
94.8k
        return(NULL);
1613
1614
632k
    hprefix.name = prefix;
1615
632k
    if (prefix != NULL)
1616
339k
        hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1617
292k
    else
1618
292k
        hprefix.hashValue = 0;
1619
632k
    nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1620
632k
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1621
0
        return(NULL);
1622
1623
632k
    return(ctxt->nsdb->extra[nsIndex].saxData);
1624
632k
}
1625
1626
/**
1627
 * xmlParserNsUpdateSax:
1628
 * @ctxt: parser context
1629
 * @prefix: namespace prefix
1630
 * @saxData: extra data for SAX handler
1631
 *
1632
 * Sets or updates extra data for the given prefix. This value will be
1633
 * returned by xmlParserNsLookupSax as long as the namespace with the
1634
 * given prefix is in scope.
1635
 *
1636
 * Returns the data on success, NULL if no namespace was found.
1637
 */
1638
int
1639
xmlParserNsUpdateSax(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
1640
110k
                     void *saxData) {
1641
110k
    xmlHashedString hprefix;
1642
110k
    int nsIndex;
1643
1644
110k
    if (prefix == ctxt->str_xml)
1645
0
        return(-1);
1646
1647
110k
    hprefix.name = prefix;
1648
110k
    if (prefix != NULL)
1649
92.7k
        hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1650
17.2k
    else
1651
17.2k
        hprefix.hashValue = 0;
1652
110k
    nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1653
110k
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1654
0
        return(-1);
1655
1656
110k
    ctxt->nsdb->extra[nsIndex].saxData = saxData;
1657
110k
    return(0);
1658
110k
}
1659
1660
/**
1661
 * xmlParserNsGrow:
1662
 * @ctxt: parser context
1663
 *
1664
 * Grows the namespace tables.
1665
 *
1666
 * Returns 0 on success, -1 if a memory allocation failed.
1667
 */
1668
static int
1669
51.5k
xmlParserNsGrow(xmlParserCtxtPtr ctxt) {
1670
51.5k
    const xmlChar **table;
1671
51.5k
    xmlParserNsExtra *extra;
1672
51.5k
    int newSize;
1673
1674
51.5k
    if (ctxt->nsMax > INT_MAX / 2)
1675
0
        goto error;
1676
51.5k
    newSize = ctxt->nsMax ? ctxt->nsMax * 2 : 16;
1677
1678
51.5k
    table = xmlRealloc(ctxt->nsTab, 2 * newSize * sizeof(table[0]));
1679
51.5k
    if (table == NULL)
1680
11
        goto error;
1681
51.5k
    ctxt->nsTab = table;
1682
1683
51.5k
    extra = xmlRealloc(ctxt->nsdb->extra, newSize * sizeof(extra[0]));
1684
51.5k
    if (extra == NULL)
1685
6
        goto error;
1686
51.5k
    ctxt->nsdb->extra = extra;
1687
1688
51.5k
    ctxt->nsMax = newSize;
1689
51.5k
    return(0);
1690
1691
17
error:
1692
17
    xmlErrMemory(ctxt);
1693
17
    return(-1);
1694
51.5k
}
1695
1696
/**
1697
 * xmlParserNsPush:
1698
 * @ctxt: parser context
1699
 * @prefix: prefix with hash value
1700
 * @uri: uri with hash value
1701
 * @saxData: extra data for SAX handler
1702
 * @defAttr: whether the namespace comes from a default attribute
1703
 *
1704
 * Push a new namespace on the table.
1705
 *
1706
 * Returns 1 if the namespace was pushed, 0 if the namespace was ignored,
1707
 * -1 if a memory allocation failed.
1708
 */
1709
static int
1710
xmlParserNsPush(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1711
147k
                const xmlHashedString *uri, void *saxData, int defAttr) {
1712
147k
    xmlParserNsBucket *bucket = NULL;
1713
147k
    xmlParserNsExtra *extra;
1714
147k
    const xmlChar **ns;
1715
147k
    unsigned hashValue, nsIndex, oldIndex;
1716
1717
147k
    if ((prefix != NULL) && (prefix->name == ctxt->str_xml))
1718
70
        return(0);
1719
1720
147k
    if ((ctxt->nsNr >= ctxt->nsMax) && (xmlParserNsGrow(ctxt) < 0)) {
1721
17
        xmlErrMemory(ctxt);
1722
17
        return(-1);
1723
17
    }
1724
1725
    /*
1726
     * Default namespace and 'xml' namespace
1727
     */
1728
147k
    if ((prefix == NULL) || (prefix->name == NULL)) {
1729
26.3k
        oldIndex = ctxt->nsdb->defaultNsIndex;
1730
1731
26.3k
        if (oldIndex != INT_MAX) {
1732
15.2k
            extra = &ctxt->nsdb->extra[oldIndex];
1733
1734
15.2k
            if (extra->elementId == ctxt->nsdb->elementId) {
1735
1.23k
                if (defAttr == 0)
1736
415
                    xmlErrAttributeDup(ctxt, NULL, BAD_CAST "xmlns");
1737
1.23k
                return(0);
1738
1.23k
            }
1739
1740
14.0k
            if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1741
14.0k
                (uri->name == ctxt->nsTab[oldIndex * 2 + 1]))
1742
0
                return(0);
1743
14.0k
        }
1744
1745
25.1k
        ctxt->nsdb->defaultNsIndex = ctxt->nsNr;
1746
25.1k
        goto populate_entry;
1747
26.3k
    }
1748
1749
    /*
1750
     * Hash table lookup
1751
     */
1752
121k
    oldIndex = xmlParserNsLookup(ctxt, prefix, &bucket);
1753
121k
    if (oldIndex != INT_MAX) {
1754
36.3k
        extra = &ctxt->nsdb->extra[oldIndex];
1755
1756
        /*
1757
         * Check for duplicate definitions on the same element.
1758
         */
1759
36.3k
        if (extra->elementId == ctxt->nsdb->elementId) {
1760
9.04k
            if (defAttr == 0)
1761
8.97k
                xmlErrAttributeDup(ctxt, BAD_CAST "xmlns", prefix->name);
1762
9.04k
            return(0);
1763
9.04k
        }
1764
1765
27.2k
        if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1766
27.2k
            (uri->name == ctxt->nsTab[bucket->index * 2 + 1]))
1767
0
            return(0);
1768
1769
27.2k
        bucket->index = ctxt->nsNr;
1770
27.2k
        goto populate_entry;
1771
27.2k
    }
1772
1773
    /*
1774
     * Insert new bucket
1775
     */
1776
1777
84.8k
    hashValue = prefix->hashValue;
1778
1779
    /*
1780
     * Grow hash table, 50% fill factor
1781
     */
1782
84.8k
    if (ctxt->nsdb->hashElems + 1 > ctxt->nsdb->hashSize / 2) {
1783
50.9k
        xmlParserNsBucket *newHash;
1784
50.9k
        unsigned newSize, i, index;
1785
1786
50.9k
        if (ctxt->nsdb->hashSize > UINT_MAX / 2) {
1787
0
            xmlErrMemory(ctxt);
1788
0
            return(-1);
1789
0
        }
1790
50.9k
        newSize = ctxt->nsdb->hashSize ? ctxt->nsdb->hashSize * 2 : 16;
1791
50.9k
        newHash = xmlMalloc(newSize * sizeof(newHash[0]));
1792
50.9k
        if (newHash == NULL) {
1793
10
            xmlErrMemory(ctxt);
1794
10
            return(-1);
1795
10
        }
1796
50.9k
        memset(newHash, 0, newSize * sizeof(newHash[0]));
1797
1798
118k
        for (i = 0; i < ctxt->nsdb->hashSize; i++) {
1799
67.5k
            unsigned hv = ctxt->nsdb->hash[i].hashValue;
1800
67.5k
            unsigned newIndex;
1801
1802
67.5k
            if (hv == 0)
1803
33.7k
                continue;
1804
33.7k
            newIndex = hv & (newSize - 1);
1805
1806
2.57M
            while (newHash[newIndex].hashValue != 0) {
1807
2.53M
                newIndex++;
1808
2.53M
                if (newIndex == newSize)
1809
3.30k
                    newIndex = 0;
1810
2.53M
            }
1811
1812
33.7k
            newHash[newIndex] = ctxt->nsdb->hash[i];
1813
33.7k
        }
1814
1815
50.9k
        xmlFree(ctxt->nsdb->hash);
1816
50.9k
        ctxt->nsdb->hash = newHash;
1817
50.9k
        ctxt->nsdb->hashSize = newSize;
1818
1819
        /*
1820
         * Relookup
1821
         */
1822
50.9k
        index = hashValue & (newSize - 1);
1823
1824
67.5k
        while (newHash[index].hashValue != 0) {
1825
16.5k
            index++;
1826
16.5k
            if (index == newSize)
1827
174
                index = 0;
1828
16.5k
        }
1829
1830
50.9k
        bucket = &newHash[index];
1831
50.9k
    }
1832
1833
84.8k
    bucket->hashValue = hashValue;
1834
84.8k
    bucket->index = ctxt->nsNr;
1835
84.8k
    ctxt->nsdb->hashElems++;
1836
84.8k
    oldIndex = INT_MAX;
1837
1838
137k
populate_entry:
1839
137k
    nsIndex = ctxt->nsNr;
1840
1841
137k
    ns = &ctxt->nsTab[nsIndex * 2];
1842
137k
    ns[0] = prefix ? prefix->name : NULL;
1843
137k
    ns[1] = uri->name;
1844
1845
137k
    extra = &ctxt->nsdb->extra[nsIndex];
1846
137k
    extra->saxData = saxData;
1847
137k
    extra->prefixHashValue = prefix ? prefix->hashValue : 0;
1848
137k
    extra->uriHashValue = uri->hashValue;
1849
137k
    extra->elementId = ctxt->nsdb->elementId;
1850
137k
    extra->oldIndex = oldIndex;
1851
1852
137k
    ctxt->nsNr++;
1853
1854
137k
    return(1);
1855
84.8k
}
1856
1857
/**
1858
 * xmlParserNsPop:
1859
 * @ctxt: an XML parser context
1860
 * @nr:  the number to pop
1861
 *
1862
 * Pops the top @nr namespaces and restores the hash table.
1863
 *
1864
 * Returns the number of namespaces popped.
1865
 */
1866
static int
1867
xmlParserNsPop(xmlParserCtxtPtr ctxt, int nr)
1868
95.5k
{
1869
95.5k
    int i;
1870
1871
    /* assert(nr <= ctxt->nsNr); */
1872
1873
222k
    for (i = ctxt->nsNr - 1; i >= ctxt->nsNr - nr; i--) {
1874
127k
        const xmlChar *prefix = ctxt->nsTab[i * 2];
1875
127k
        xmlParserNsExtra *extra = &ctxt->nsdb->extra[i];
1876
1877
127k
        if (prefix == NULL) {
1878
23.0k
            ctxt->nsdb->defaultNsIndex = extra->oldIndex;
1879
104k
        } else {
1880
104k
            xmlHashedString hprefix;
1881
104k
            xmlParserNsBucket *bucket = NULL;
1882
1883
104k
            hprefix.name = prefix;
1884
104k
            hprefix.hashValue = extra->prefixHashValue;
1885
104k
            xmlParserNsLookup(ctxt, &hprefix, &bucket);
1886
            /* assert(bucket && bucket->hashValue); */
1887
104k
            bucket->index = extra->oldIndex;
1888
104k
        }
1889
127k
    }
1890
1891
95.5k
    ctxt->nsNr -= nr;
1892
95.5k
    return(nr);
1893
95.5k
}
1894
1895
static int
1896
53.3k
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1897
53.3k
    const xmlChar **atts;
1898
53.3k
    unsigned *attallocs;
1899
53.3k
    int maxatts;
1900
1901
53.3k
    if (nr + 5 > ctxt->maxatts) {
1902
53.3k
  maxatts = ctxt->maxatts == 0 ? 55 : (nr + 5) * 2;
1903
53.3k
  atts = (const xmlChar **) xmlMalloc(
1904
53.3k
             maxatts * sizeof(const xmlChar *));
1905
53.3k
  if (atts == NULL) goto mem_error;
1906
53.2k
  attallocs = xmlRealloc(ctxt->attallocs,
1907
53.2k
                               (maxatts / 5) * sizeof(attallocs[0]));
1908
53.2k
  if (attallocs == NULL) {
1909
12
            xmlFree(atts);
1910
12
            goto mem_error;
1911
12
        }
1912
53.2k
        if (ctxt->maxatts > 0)
1913
152
            memcpy(atts, ctxt->atts, ctxt->maxatts * sizeof(const xmlChar *));
1914
53.2k
        xmlFree(ctxt->atts);
1915
53.2k
  ctxt->atts = atts;
1916
53.2k
  ctxt->attallocs = attallocs;
1917
53.2k
  ctxt->maxatts = maxatts;
1918
53.2k
    }
1919
53.2k
    return(ctxt->maxatts);
1920
24
mem_error:
1921
24
    xmlErrMemory(ctxt);
1922
24
    return(-1);
1923
53.3k
}
1924
1925
/**
1926
 * inputPush:
1927
 * @ctxt:  an XML parser context
1928
 * @value:  the parser input
1929
 *
1930
 * Pushes a new parser input on top of the input stack
1931
 *
1932
 * Returns -1 in case of error, the index in the stack otherwise
1933
 */
1934
int
1935
inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1936
98.4k
{
1937
98.4k
    if ((ctxt == NULL) || (value == NULL))
1938
0
        return(-1);
1939
98.4k
    if (ctxt->inputNr >= ctxt->inputMax) {
1940
0
        size_t newSize = ctxt->inputMax * 2;
1941
0
        xmlParserInputPtr *tmp;
1942
1943
0
        tmp = (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1944
0
                                               newSize * sizeof(*tmp));
1945
0
        if (tmp == NULL) {
1946
0
            xmlErrMemory(ctxt);
1947
0
            return (-1);
1948
0
        }
1949
0
        ctxt->inputTab = tmp;
1950
0
        ctxt->inputMax = newSize;
1951
0
    }
1952
98.4k
    ctxt->inputTab[ctxt->inputNr] = value;
1953
98.4k
    ctxt->input = value;
1954
98.4k
    return (ctxt->inputNr++);
1955
98.4k
}
1956
/**
1957
 * inputPop:
1958
 * @ctxt: an XML parser context
1959
 *
1960
 * Pops the top parser input from the input stack
1961
 *
1962
 * Returns the input just removed
1963
 */
1964
xmlParserInputPtr
1965
inputPop(xmlParserCtxtPtr ctxt)
1966
284k
{
1967
284k
    xmlParserInputPtr ret;
1968
1969
284k
    if (ctxt == NULL)
1970
0
        return(NULL);
1971
284k
    if (ctxt->inputNr <= 0)
1972
188k
        return (NULL);
1973
95.8k
    ctxt->inputNr--;
1974
95.8k
    if (ctxt->inputNr > 0)
1975
6.73k
        ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1976
89.0k
    else
1977
89.0k
        ctxt->input = NULL;
1978
95.8k
    ret = ctxt->inputTab[ctxt->inputNr];
1979
95.8k
    ctxt->inputTab[ctxt->inputNr] = NULL;
1980
95.8k
    return (ret);
1981
284k
}
1982
/**
1983
 * nodePush:
1984
 * @ctxt:  an XML parser context
1985
 * @value:  the element node
1986
 *
1987
 * DEPRECATED: Internal function, do not use.
1988
 *
1989
 * Pushes a new element node on top of the node stack
1990
 *
1991
 * Returns -1 in case of error, the index in the stack otherwise
1992
 */
1993
int
1994
nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1995
3.66M
{
1996
3.66M
    int maxDepth;
1997
1998
3.66M
    if (ctxt == NULL)
1999
0
        return(0);
2000
2001
3.66M
    maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
2002
3.66M
    if (ctxt->nodeNr > maxDepth) {
2003
0
        xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
2004
0
                "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
2005
0
                ctxt->nodeNr);
2006
0
        xmlHaltParser(ctxt);
2007
0
        return(-1);
2008
0
    }
2009
3.66M
    if (ctxt->nodeNr >= ctxt->nodeMax) {
2010
7.02k
        xmlNodePtr *tmp;
2011
2012
7.02k
  tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
2013
7.02k
                                      ctxt->nodeMax * 2 *
2014
7.02k
                                      sizeof(ctxt->nodeTab[0]));
2015
7.02k
        if (tmp == NULL) {
2016
0
            xmlErrMemory(ctxt);
2017
0
            return (-1);
2018
0
        }
2019
7.02k
        ctxt->nodeTab = tmp;
2020
7.02k
  ctxt->nodeMax *= 2;
2021
7.02k
    }
2022
3.66M
    ctxt->nodeTab[ctxt->nodeNr] = value;
2023
3.66M
    ctxt->node = value;
2024
3.66M
    return (ctxt->nodeNr++);
2025
3.66M
}
2026
2027
/**
2028
 * nodePop:
2029
 * @ctxt: an XML parser context
2030
 *
2031
 * DEPRECATED: Internal function, do not use.
2032
 *
2033
 * Pops the top element node from the node stack
2034
 *
2035
 * Returns the node just removed
2036
 */
2037
xmlNodePtr
2038
nodePop(xmlParserCtxtPtr ctxt)
2039
3.68M
{
2040
3.68M
    xmlNodePtr ret;
2041
2042
3.68M
    if (ctxt == NULL) return(NULL);
2043
3.68M
    if (ctxt->nodeNr <= 0)
2044
31.9k
        return (NULL);
2045
3.65M
    ctxt->nodeNr--;
2046
3.65M
    if (ctxt->nodeNr > 0)
2047
3.60M
        ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
2048
49.2k
    else
2049
49.2k
        ctxt->node = NULL;
2050
3.65M
    ret = ctxt->nodeTab[ctxt->nodeNr];
2051
3.65M
    ctxt->nodeTab[ctxt->nodeNr] = NULL;
2052
3.65M
    return (ret);
2053
3.68M
}
2054
2055
/**
2056
 * nameNsPush:
2057
 * @ctxt:  an XML parser context
2058
 * @value:  the element name
2059
 * @prefix:  the element prefix
2060
 * @URI:  the element namespace name
2061
 * @line:  the current line number for error messages
2062
 * @nsNr:  the number of namespaces pushed on the namespace table
2063
 *
2064
 * Pushes a new element name/prefix/URL on top of the name stack
2065
 *
2066
 * Returns -1 in case of error, the index in the stack otherwise
2067
 */
2068
static int
2069
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
2070
           const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
2071
3.81M
{
2072
3.81M
    xmlStartTag *tag;
2073
2074
3.81M
    if (ctxt->nameNr >= ctxt->nameMax) {
2075
9.12k
        const xmlChar * *tmp;
2076
9.12k
        xmlStartTag *tmp2;
2077
9.12k
        ctxt->nameMax *= 2;
2078
9.12k
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
2079
9.12k
                                    ctxt->nameMax *
2080
9.12k
                                    sizeof(ctxt->nameTab[0]));
2081
9.12k
        if (tmp == NULL) {
2082
0
      ctxt->nameMax /= 2;
2083
0
      goto mem_error;
2084
0
        }
2085
9.12k
  ctxt->nameTab = tmp;
2086
9.12k
        tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
2087
9.12k
                                    ctxt->nameMax *
2088
9.12k
                                    sizeof(ctxt->pushTab[0]));
2089
9.12k
        if (tmp2 == NULL) {
2090
1
      ctxt->nameMax /= 2;
2091
1
      goto mem_error;
2092
1
        }
2093
9.12k
  ctxt->pushTab = tmp2;
2094
3.80M
    } else if (ctxt->pushTab == NULL) {
2095
64.9k
        ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
2096
64.9k
                                            sizeof(ctxt->pushTab[0]));
2097
64.9k
        if (ctxt->pushTab == NULL)
2098
201
            goto mem_error;
2099
64.9k
    }
2100
3.81M
    ctxt->nameTab[ctxt->nameNr] = value;
2101
3.81M
    ctxt->name = value;
2102
3.81M
    tag = &ctxt->pushTab[ctxt->nameNr];
2103
3.81M
    tag->prefix = prefix;
2104
3.81M
    tag->URI = URI;
2105
3.81M
    tag->line = line;
2106
3.81M
    tag->nsNr = nsNr;
2107
3.81M
    return (ctxt->nameNr++);
2108
202
mem_error:
2109
202
    xmlErrMemory(ctxt);
2110
202
    return (-1);
2111
3.81M
}
2112
#ifdef LIBXML_PUSH_ENABLED
2113
/**
2114
 * nameNsPop:
2115
 * @ctxt: an XML parser context
2116
 *
2117
 * Pops the top element/prefix/URI name from the name stack
2118
 *
2119
 * Returns the name just removed
2120
 */
2121
static const xmlChar *
2122
nameNsPop(xmlParserCtxtPtr ctxt)
2123
{
2124
    const xmlChar *ret;
2125
2126
    if (ctxt->nameNr <= 0)
2127
        return (NULL);
2128
    ctxt->nameNr--;
2129
    if (ctxt->nameNr > 0)
2130
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2131
    else
2132
        ctxt->name = NULL;
2133
    ret = ctxt->nameTab[ctxt->nameNr];
2134
    ctxt->nameTab[ctxt->nameNr] = NULL;
2135
    return (ret);
2136
}
2137
#endif /* LIBXML_PUSH_ENABLED */
2138
2139
/**
2140
 * namePush:
2141
 * @ctxt:  an XML parser context
2142
 * @value:  the element name
2143
 *
2144
 * DEPRECATED: Internal function, do not use.
2145
 *
2146
 * Pushes a new element name on top of the name stack
2147
 *
2148
 * Returns -1 in case of error, the index in the stack otherwise
2149
 */
2150
int
2151
namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
2152
0
{
2153
0
    if (ctxt == NULL) return (-1);
2154
2155
0
    if (ctxt->nameNr >= ctxt->nameMax) {
2156
0
        const xmlChar * *tmp;
2157
0
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
2158
0
                                    ctxt->nameMax * 2 *
2159
0
                                    sizeof(ctxt->nameTab[0]));
2160
0
        if (tmp == NULL) {
2161
0
      goto mem_error;
2162
0
        }
2163
0
  ctxt->nameTab = tmp;
2164
0
        ctxt->nameMax *= 2;
2165
0
    }
2166
0
    ctxt->nameTab[ctxt->nameNr] = value;
2167
0
    ctxt->name = value;
2168
0
    return (ctxt->nameNr++);
2169
0
mem_error:
2170
0
    xmlErrMemory(ctxt);
2171
0
    return (-1);
2172
0
}
2173
2174
/**
2175
 * namePop:
2176
 * @ctxt: an XML parser context
2177
 *
2178
 * DEPRECATED: Internal function, do not use.
2179
 *
2180
 * Pops the top element name from the name stack
2181
 *
2182
 * Returns the name just removed
2183
 */
2184
const xmlChar *
2185
namePop(xmlParserCtxtPtr ctxt)
2186
3.80M
{
2187
3.80M
    const xmlChar *ret;
2188
2189
3.80M
    if ((ctxt == NULL) || (ctxt->nameNr <= 0))
2190
92
        return (NULL);
2191
3.80M
    ctxt->nameNr--;
2192
3.80M
    if (ctxt->nameNr > 0)
2193
3.75M
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2194
51.4k
    else
2195
51.4k
        ctxt->name = NULL;
2196
3.80M
    ret = ctxt->nameTab[ctxt->nameNr];
2197
3.80M
    ctxt->nameTab[ctxt->nameNr] = NULL;
2198
3.80M
    return (ret);
2199
3.80M
}
2200
2201
4.55M
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
2202
4.55M
    if (ctxt->spaceNr >= ctxt->spaceMax) {
2203
9.88k
        int *tmp;
2204
2205
9.88k
  ctxt->spaceMax *= 2;
2206
9.88k
        tmp = (int *) xmlRealloc(ctxt->spaceTab,
2207
9.88k
                           ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
2208
9.88k
        if (tmp == NULL) {
2209
3
      xmlErrMemory(ctxt);
2210
3
      ctxt->spaceMax /=2;
2211
3
      return(-1);
2212
3
  }
2213
9.87k
  ctxt->spaceTab = tmp;
2214
9.87k
    }
2215
4.55M
    ctxt->spaceTab[ctxt->spaceNr] = val;
2216
4.55M
    ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
2217
4.55M
    return(ctxt->spaceNr++);
2218
4.55M
}
2219
2220
4.54M
static int spacePop(xmlParserCtxtPtr ctxt) {
2221
4.54M
    int ret;
2222
4.54M
    if (ctxt->spaceNr <= 0) return(0);
2223
4.54M
    ctxt->spaceNr--;
2224
4.54M
    if (ctxt->spaceNr > 0)
2225
4.54M
  ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
2226
0
    else
2227
0
        ctxt->space = &ctxt->spaceTab[0];
2228
4.54M
    ret = ctxt->spaceTab[ctxt->spaceNr];
2229
4.54M
    ctxt->spaceTab[ctxt->spaceNr] = -1;
2230
4.54M
    return(ret);
2231
4.54M
}
2232
2233
/*
2234
 * Macros for accessing the content. Those should be used only by the parser,
2235
 * and not exported.
2236
 *
2237
 * Dirty macros, i.e. one often need to make assumption on the context to
2238
 * use them
2239
 *
2240
 *   CUR_PTR return the current pointer to the xmlChar to be parsed.
2241
 *           To be used with extreme caution since operations consuming
2242
 *           characters may move the input buffer to a different location !
2243
 *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
2244
 *           This should be used internally by the parser
2245
 *           only to compare to ASCII values otherwise it would break when
2246
 *           running with UTF-8 encoding.
2247
 *   RAW     same as CUR but in the input buffer, bypass any token
2248
 *           extraction that may have been done
2249
 *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
2250
 *           to compare on ASCII based substring.
2251
 *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2252
 *           strings without newlines within the parser.
2253
 *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2254
 *           defined char within the parser.
2255
 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2256
 *
2257
 *   NEXT    Skip to the next character, this does the proper decoding
2258
 *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2259
 *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2260
 *   CUR_CHAR(l) returns the current unicode character (int), set l
2261
 *           to the number of xmlChars used for the encoding [0-5].
2262
 *   CUR_SCHAR  same but operate on a string instead of the context
2263
 *   COPY_BUF  copy the current unicode char to the target buffer, increment
2264
 *            the index
2265
 *   GROW, SHRINK  handling of input buffers
2266
 */
2267
2268
31.3M
#define RAW (*ctxt->input->cur)
2269
479M
#define CUR (*ctxt->input->cur)
2270
16.0M
#define NXT(val) ctxt->input->cur[(val)]
2271
1.06G
#define CUR_PTR ctxt->input->cur
2272
15.3M
#define BASE_PTR ctxt->input->base
2273
2274
#define CMP4( s, c1, c2, c3, c4 ) \
2275
33.5M
  ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2276
16.8M
    ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2277
#define CMP5( s, c1, c2, c3, c4, c5 ) \
2278
33.1M
  ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2279
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2280
32.7M
  ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2281
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2282
32.4M
  ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2283
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2284
32.2M
  ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2285
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2286
16.0M
  ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2287
16.0M
    ((unsigned char *) s)[ 8 ] == c9 )
2288
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2289
11.3k
  ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2290
11.3k
    ((unsigned char *) s)[ 9 ] == c10 )
2291
2292
2.20M
#define SKIP(val) do {             \
2293
2.20M
    ctxt->input->cur += (val),ctxt->input->col+=(val);      \
2294
2.20M
    if (*ctxt->input->cur == 0)           \
2295
2.20M
        xmlParserGrow(ctxt);           \
2296
2.20M
  } while (0)
2297
2298
#define SKIPL(val) do {             \
2299
    int skipl;                \
2300
    for(skipl=0; skipl<val; skipl++) {          \
2301
  if (*(ctxt->input->cur) == '\n') {        \
2302
  ctxt->input->line++; ctxt->input->col = 1;      \
2303
  } else ctxt->input->col++;          \
2304
  ctxt->input->cur++;           \
2305
    }                 \
2306
    if (*ctxt->input->cur == 0)           \
2307
        xmlParserGrow(ctxt);            \
2308
  } while (0)
2309
2310
#define SHRINK \
2311
25.9M
    if ((!PARSER_PROGRESSIVE(ctxt)) && \
2312
25.9M
        (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2313
25.9M
  (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2314
25.9M
  xmlParserShrink(ctxt);
2315
2316
#define GROW \
2317
60.4M
    if ((!PARSER_PROGRESSIVE(ctxt)) && \
2318
60.4M
        (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2319
3.29M
  xmlParserGrow(ctxt);
2320
2321
8.49M
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2322
2323
1.39M
#define SKIP_BLANKS_PE xmlSkipBlankCharsPE(ctxt)
2324
2325
12.1M
#define NEXT xmlNextChar(ctxt)
2326
2327
6.28M
#define NEXT1 {               \
2328
6.28M
  ctxt->input->col++;           \
2329
6.28M
  ctxt->input->cur++;           \
2330
6.28M
  if (*ctxt->input->cur == 0)         \
2331
6.28M
      xmlParserGrow(ctxt);           \
2332
6.28M
    }
2333
2334
676M
#define NEXTL(l) do {             \
2335
676M
    if (*(ctxt->input->cur) == '\n') {         \
2336
29.1M
  ctxt->input->line++; ctxt->input->col = 1;      \
2337
647M
    } else ctxt->input->col++;           \
2338
676M
    ctxt->input->cur += l;        \
2339
676M
  } while (0)
2340
2341
229M
#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2342
660k
#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2343
2344
#define COPY_BUF(b, i, v)           \
2345
187M
    if (v < 0x80) b[i++] = v;           \
2346
187M
    else i += xmlCopyCharMultiByte(&b[i],v)
2347
2348
/**
2349
 * xmlSkipBlankChars:
2350
 * @ctxt:  the XML parser context
2351
 *
2352
 * DEPRECATED: Internal function, do not use.
2353
 *
2354
 * Skip whitespace in the input stream.
2355
 *
2356
 * Returns the number of space chars skipped
2357
 */
2358
int
2359
9.83M
xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2360
9.83M
    const xmlChar *cur;
2361
9.83M
    int res = 0;
2362
2363
    /*
2364
     * It's Okay to use CUR/NEXT here since all the blanks are on
2365
     * the ASCII range.
2366
     */
2367
9.83M
    cur = ctxt->input->cur;
2368
9.83M
    while (IS_BLANK_CH(*cur)) {
2369
6.09M
        if (*cur == '\n') {
2370
2.10M
            ctxt->input->line++; ctxt->input->col = 1;
2371
3.98M
        } else {
2372
3.98M
            ctxt->input->col++;
2373
3.98M
        }
2374
6.09M
        cur++;
2375
6.09M
        if (res < INT_MAX)
2376
6.09M
            res++;
2377
6.09M
        if (*cur == 0) {
2378
47.9k
            ctxt->input->cur = cur;
2379
47.9k
            xmlParserGrow(ctxt);
2380
47.9k
            cur = ctxt->input->cur;
2381
47.9k
        }
2382
6.09M
    }
2383
9.83M
    ctxt->input->cur = cur;
2384
2385
9.83M
    return(res);
2386
9.83M
}
2387
2388
static void
2389
4.14k
xmlPopPE(xmlParserCtxtPtr ctxt) {
2390
4.14k
    unsigned long consumed;
2391
4.14k
    xmlEntityPtr ent;
2392
2393
4.14k
    ent = ctxt->input->entity;
2394
2395
4.14k
    ent->flags &= ~XML_ENT_EXPANDING;
2396
2397
4.14k
    if ((ent->flags & XML_ENT_CHECKED) == 0) {
2398
464
        int result;
2399
2400
        /*
2401
         * Read the rest of the stream in case of errors. We want
2402
         * to account for the whole entity size.
2403
         */
2404
4.73k
        do {
2405
4.73k
            ctxt->input->cur = ctxt->input->end;
2406
4.73k
            xmlParserShrink(ctxt);
2407
4.73k
            result = xmlParserGrow(ctxt);
2408
4.73k
        } while (result > 0);
2409
2410
464
        consumed = ctxt->input->consumed;
2411
464
        xmlSaturatedAddSizeT(&consumed,
2412
464
                             ctxt->input->end - ctxt->input->base);
2413
2414
464
        xmlSaturatedAdd(&ent->expandedSize, consumed);
2415
2416
        /*
2417
         * Add to sizeentities when parsing an external entity
2418
         * for the first time.
2419
         */
2420
464
        if (ent->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
2421
142
            xmlSaturatedAdd(&ctxt->sizeentities, consumed);
2422
142
        }
2423
2424
464
        ent->flags |= XML_ENT_CHECKED;
2425
464
    }
2426
2427
4.14k
    xmlPopInput(ctxt);
2428
2429
4.14k
    xmlParserEntityCheck(ctxt, ent->expandedSize);
2430
4.14k
}
2431
2432
/**
2433
 * xmlSkipBlankCharsPE:
2434
 * @ctxt:  the XML parser context
2435
 *
2436
 * Skip whitespace in the input stream, also handling parameter
2437
 * entities.
2438
 *
2439
 * Returns the number of space chars skipped
2440
 */
2441
static int
2442
1.39M
xmlSkipBlankCharsPE(xmlParserCtxtPtr ctxt) {
2443
1.39M
    int res = 0;
2444
1.39M
    int inParam;
2445
1.39M
    int expandParam;
2446
2447
1.39M
    inParam = PARSER_IN_PE(ctxt);
2448
1.39M
    expandParam = PARSER_EXTERNAL(ctxt);
2449
2450
1.39M
    if (!inParam && !expandParam)
2451
1.34M
        return(xmlSkipBlankChars(ctxt));
2452
2453
71.6k
    while (PARSER_STOPPED(ctxt) == 0) {
2454
69.7k
        if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2455
20.7k
            NEXT;
2456
48.9k
        } else if (CUR == '%') {
2457
5.67k
            if ((expandParam == 0) ||
2458
5.67k
                (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2459
4.25k
                break;
2460
2461
            /*
2462
             * Expand parameter entity. We continue to consume
2463
             * whitespace at the start of the entity and possible
2464
             * even consume the whole entity and pop it. We might
2465
             * even pop multiple PEs in this loop.
2466
             */
2467
1.42k
            xmlParsePEReference(ctxt);
2468
2469
1.42k
            inParam = PARSER_IN_PE(ctxt);
2470
1.42k
            expandParam = PARSER_EXTERNAL(ctxt);
2471
43.3k
        } else if (CUR == 0) {
2472
4.60k
            if (inParam == 0)
2473
621
                break;
2474
2475
3.98k
            xmlPopPE(ctxt);
2476
2477
3.98k
            inParam = PARSER_IN_PE(ctxt);
2478
3.98k
            expandParam = PARSER_EXTERNAL(ctxt);
2479
38.6k
        } else {
2480
38.6k
            break;
2481
38.6k
        }
2482
2483
        /*
2484
         * Also increase the counter when entering or exiting a PERef.
2485
         * The spec says: "When a parameter-entity reference is recognized
2486
         * in the DTD and included, its replacement text MUST be enlarged
2487
         * by the attachment of one leading and one following space (#x20)
2488
         * character."
2489
         */
2490
26.1k
        if (res < INT_MAX)
2491
26.1k
            res++;
2492
26.1k
    }
2493
2494
45.4k
    return(res);
2495
1.39M
}
2496
2497
/************************************************************************
2498
 *                  *
2499
 *    Commodity functions to handle entities      *
2500
 *                  *
2501
 ************************************************************************/
2502
2503
/**
2504
 * xmlPopInput:
2505
 * @ctxt:  an XML parser context
2506
 *
2507
 * xmlPopInput: the current input pointed by ctxt->input came to an end
2508
 *          pop it and return the next char.
2509
 *
2510
 * Returns the current xmlChar in the parser context
2511
 */
2512
xmlChar
2513
4.14k
xmlPopInput(xmlParserCtxtPtr ctxt) {
2514
4.14k
    xmlParserInputPtr input;
2515
2516
4.14k
    if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2517
4.14k
    input = inputPop(ctxt);
2518
4.14k
    xmlFreeInputStream(input);
2519
4.14k
    if (*ctxt->input->cur == 0)
2520
63
        xmlParserGrow(ctxt);
2521
4.14k
    return(CUR);
2522
4.14k
}
2523
2524
/**
2525
 * xmlPushInput:
2526
 * @ctxt:  an XML parser context
2527
 * @input:  an XML parser input fragment (entity, XML fragment ...).
2528
 *
2529
 * Push an input stream onto the stack.
2530
 *
2531
 * This makes the parser use an input returned from advanced functions
2532
 * like xmlNewInputURL or xmlNewInputMemory.
2533
 *
2534
 * Returns -1 in case of error or the index in the input stack
2535
 */
2536
int
2537
9.44k
xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2538
9.44k
    int maxDepth;
2539
9.44k
    int ret;
2540
2541
9.44k
    if ((ctxt == NULL) || (input == NULL))
2542
127
        return(-1);
2543
2544
9.32k
    maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
2545
9.32k
    if (ctxt->inputNr > maxDepth) {
2546
0
        xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
2547
0
                       "Maximum entity nesting depth exceeded");
2548
0
        xmlHaltParser(ctxt);
2549
0
  return(-1);
2550
0
    }
2551
9.32k
    ret = inputPush(ctxt, input);
2552
9.32k
    GROW;
2553
9.32k
    return(ret);
2554
9.32k
}
2555
2556
/**
2557
 * xmlParseCharRef:
2558
 * @ctxt:  an XML parser context
2559
 *
2560
 * DEPRECATED: Internal function, don't use.
2561
 *
2562
 * Parse a numeric character reference. Always consumes '&'.
2563
 *
2564
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2565
 *                  '&#x' [0-9a-fA-F]+ ';'
2566
 *
2567
 * [ WFC: Legal Character ]
2568
 * Characters referred to using character references must match the
2569
 * production for Char.
2570
 *
2571
 * Returns the value parsed (as an int), 0 in case of error
2572
 */
2573
int
2574
65.7k
xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2575
65.7k
    int val = 0;
2576
65.7k
    int count = 0;
2577
2578
    /*
2579
     * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2580
     */
2581
65.7k
    if ((RAW == '&') && (NXT(1) == '#') &&
2582
65.7k
        (NXT(2) == 'x')) {
2583
28.7k
  SKIP(3);
2584
28.7k
  GROW;
2585
99.9k
  while ((RAW != ';') && (PARSER_STOPPED(ctxt) == 0)) {
2586
77.5k
      if (count++ > 20) {
2587
1.04k
    count = 0;
2588
1.04k
    GROW;
2589
1.04k
      }
2590
77.5k
      if ((RAW >= '0') && (RAW <= '9'))
2591
13.9k
          val = val * 16 + (CUR - '0');
2592
63.6k
      else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2593
32.5k
          val = val * 16 + (CUR - 'a') + 10;
2594
31.0k
      else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2595
24.6k
          val = val * 16 + (CUR - 'A') + 10;
2596
6.41k
      else {
2597
6.41k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2598
6.41k
    val = 0;
2599
6.41k
    break;
2600
6.41k
      }
2601
71.1k
      if (val > 0x110000)
2602
20.7k
          val = 0x110000;
2603
2604
71.1k
      NEXT;
2605
71.1k
      count++;
2606
71.1k
  }
2607
28.7k
  if (RAW == ';') {
2608
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2609
22.3k
      ctxt->input->col++;
2610
22.3k
      ctxt->input->cur++;
2611
22.3k
  }
2612
36.9k
    } else if  ((RAW == '&') && (NXT(1) == '#')) {
2613
36.9k
  SKIP(2);
2614
36.9k
  GROW;
2615
128k
  while (RAW != ';') { /* loop blocked by count */
2616
99.6k
      if (count++ > 20) {
2617
1.39k
    count = 0;
2618
1.39k
    GROW;
2619
1.39k
      }
2620
99.6k
      if ((RAW >= '0') && (RAW <= '9'))
2621
91.4k
          val = val * 10 + (CUR - '0');
2622
8.24k
      else {
2623
8.24k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2624
8.24k
    val = 0;
2625
8.24k
    break;
2626
8.24k
      }
2627
91.4k
      if (val > 0x110000)
2628
12.6k
          val = 0x110000;
2629
2630
91.4k
      NEXT;
2631
91.4k
      count++;
2632
91.4k
  }
2633
36.9k
  if (RAW == ';') {
2634
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2635
28.7k
      ctxt->input->col++;
2636
28.7k
      ctxt->input->cur++;
2637
28.7k
  }
2638
36.9k
    } else {
2639
0
        if (RAW == '&')
2640
0
            SKIP(1);
2641
0
        xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2642
0
    }
2643
2644
    /*
2645
     * [ WFC: Legal Character ]
2646
     * Characters referred to using character references must match the
2647
     * production for Char.
2648
     */
2649
65.7k
    if (val >= 0x110000) {
2650
377
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2651
377
                "xmlParseCharRef: character reference out of bounds\n",
2652
377
          val);
2653
65.3k
    } else if (IS_CHAR(val)) {
2654
43.1k
        return(val);
2655
43.1k
    } else {
2656
22.2k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2657
22.2k
                          "xmlParseCharRef: invalid xmlChar value %d\n",
2658
22.2k
                    val);
2659
22.2k
    }
2660
22.6k
    return(0);
2661
65.7k
}
2662
2663
/**
2664
 * xmlParseStringCharRef:
2665
 * @ctxt:  an XML parser context
2666
 * @str:  a pointer to an index in the string
2667
 *
2668
 * parse Reference declarations, variant parsing from a string rather
2669
 * than an an input flow.
2670
 *
2671
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2672
 *                  '&#x' [0-9a-fA-F]+ ';'
2673
 *
2674
 * [ WFC: Legal Character ]
2675
 * Characters referred to using character references must match the
2676
 * production for Char.
2677
 *
2678
 * Returns the value parsed (as an int), 0 in case of error, str will be
2679
 *         updated to the current value of the index
2680
 */
2681
static int
2682
308k
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2683
308k
    const xmlChar *ptr;
2684
308k
    xmlChar cur;
2685
308k
    int val = 0;
2686
2687
308k
    if ((str == NULL) || (*str == NULL)) return(0);
2688
308k
    ptr = *str;
2689
308k
    cur = *ptr;
2690
308k
    if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2691
283k
  ptr += 3;
2692
283k
  cur = *ptr;
2693
1.44M
  while (cur != ';') { /* Non input consuming loop */
2694
1.16M
      if ((cur >= '0') && (cur <= '9'))
2695
729k
          val = val * 16 + (cur - '0');
2696
434k
      else if ((cur >= 'a') && (cur <= 'f'))
2697
173k
          val = val * 16 + (cur - 'a') + 10;
2698
261k
      else if ((cur >= 'A') && (cur <= 'F'))
2699
256k
          val = val * 16 + (cur - 'A') + 10;
2700
5.15k
      else {
2701
5.15k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2702
5.15k
    val = 0;
2703
5.15k
    break;
2704
5.15k
      }
2705
1.15M
      if (val > 0x110000)
2706
112k
          val = 0x110000;
2707
2708
1.15M
      ptr++;
2709
1.15M
      cur = *ptr;
2710
1.15M
  }
2711
283k
  if (cur == ';')
2712
278k
      ptr++;
2713
283k
    } else if  ((cur == '&') && (ptr[1] == '#')){
2714
25.1k
  ptr += 2;
2715
25.1k
  cur = *ptr;
2716
160k
  while (cur != ';') { /* Non input consuming loops */
2717
139k
      if ((cur >= '0') && (cur <= '9'))
2718
134k
          val = val * 10 + (cur - '0');
2719
4.95k
      else {
2720
4.95k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2721
4.95k
    val = 0;
2722
4.95k
    break;
2723
4.95k
      }
2724
134k
      if (val > 0x110000)
2725
56.5k
          val = 0x110000;
2726
2727
134k
      ptr++;
2728
134k
      cur = *ptr;
2729
134k
  }
2730
25.1k
  if (cur == ';')
2731
20.2k
      ptr++;
2732
25.1k
    } else {
2733
0
  xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2734
0
  return(0);
2735
0
    }
2736
308k
    *str = ptr;
2737
2738
    /*
2739
     * [ WFC: Legal Character ]
2740
     * Characters referred to using character references must match the
2741
     * production for Char.
2742
     */
2743
308k
    if (val >= 0x110000) {
2744
852
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2745
852
                "xmlParseStringCharRef: character reference out of bounds\n",
2746
852
                val);
2747
307k
    } else if (IS_CHAR(val)) {
2748
288k
        return(val);
2749
288k
    } else {
2750
18.7k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2751
18.7k
        "xmlParseStringCharRef: invalid xmlChar value %d\n",
2752
18.7k
        val);
2753
18.7k
    }
2754
19.5k
    return(0);
2755
308k
}
2756
2757
/**
2758
 * xmlParserHandlePEReference:
2759
 * @ctxt:  the parser context
2760
 *
2761
 * DEPRECATED: Internal function, do not use.
2762
 *
2763
 * [69] PEReference ::= '%' Name ';'
2764
 *
2765
 * [ WFC: No Recursion ]
2766
 * A parsed entity must not contain a recursive
2767
 * reference to itself, either directly or indirectly.
2768
 *
2769
 * [ WFC: Entity Declared ]
2770
 * In a document without any DTD, a document with only an internal DTD
2771
 * subset which contains no parameter entity references, or a document
2772
 * with "standalone='yes'", ...  ... The declaration of a parameter
2773
 * entity must precede any reference to it...
2774
 *
2775
 * [ VC: Entity Declared ]
2776
 * In a document with an external subset or external parameter entities
2777
 * with "standalone='no'", ...  ... The declaration of a parameter entity
2778
 * must precede any reference to it...
2779
 *
2780
 * [ WFC: In DTD ]
2781
 * Parameter-entity references may only appear in the DTD.
2782
 * NOTE: misleading but this is handled.
2783
 *
2784
 * A PEReference may have been detected in the current input stream
2785
 * the handling is done accordingly to
2786
 *      http://www.w3.org/TR/REC-xml#entproc
2787
 * i.e.
2788
 *   - Included in literal in entity values
2789
 *   - Included as Parameter Entity reference within DTDs
2790
 */
2791
void
2792
0
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2793
0
    xmlParsePEReference(ctxt);
2794
0
}
2795
2796
/**
2797
 * xmlStringLenDecodeEntities:
2798
 * @ctxt:  the parser context
2799
 * @str:  the input string
2800
 * @len: the string length
2801
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2802
 * @end:  an end marker xmlChar, 0 if none
2803
 * @end2:  an end marker xmlChar, 0 if none
2804
 * @end3:  an end marker xmlChar, 0 if none
2805
 *
2806
 * DEPRECATED: Internal function, don't use.
2807
 *
2808
 * Returns A newly allocated string with the substitution done. The caller
2809
 *      must deallocate it !
2810
 */
2811
xmlChar *
2812
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2813
                           int what ATTRIBUTE_UNUSED,
2814
0
                           xmlChar end, xmlChar end2, xmlChar end3) {
2815
0
    if ((ctxt == NULL) || (str == NULL) || (len < 0))
2816
0
        return(NULL);
2817
2818
0
    if ((str[len] != 0) ||
2819
0
        (end != 0) || (end2 != 0) || (end3 != 0))
2820
0
        return(NULL);
2821
2822
0
    return(xmlExpandEntitiesInAttValue(ctxt, str, 0));
2823
0
}
2824
2825
/**
2826
 * xmlStringDecodeEntities:
2827
 * @ctxt:  the parser context
2828
 * @str:  the input string
2829
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2830
 * @end:  an end marker xmlChar, 0 if none
2831
 * @end2:  an end marker xmlChar, 0 if none
2832
 * @end3:  an end marker xmlChar, 0 if none
2833
 *
2834
 * DEPRECATED: Internal function, don't use.
2835
 *
2836
 * Returns A newly allocated string with the substitution done. The caller
2837
 *      must deallocate it !
2838
 */
2839
xmlChar *
2840
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str,
2841
                        int what ATTRIBUTE_UNUSED,
2842
0
            xmlChar end, xmlChar  end2, xmlChar end3) {
2843
0
    if ((ctxt == NULL) || (str == NULL))
2844
0
        return(NULL);
2845
2846
0
    if ((end != 0) || (end2 != 0) || (end3 != 0))
2847
0
        return(NULL);
2848
2849
0
    return(xmlExpandEntitiesInAttValue(ctxt, str, 0));
2850
0
}
2851
2852
/************************************************************************
2853
 *                  *
2854
 *    Commodity functions, cleanup needed ?     *
2855
 *                  *
2856
 ************************************************************************/
2857
2858
/**
2859
 * areBlanks:
2860
 * @ctxt:  an XML parser context
2861
 * @str:  a xmlChar *
2862
 * @len:  the size of @str
2863
 * @blank_chars: we know the chars are blanks
2864
 *
2865
 * Is this a sequence of blank chars that one can ignore ?
2866
 *
2867
 * Returns 1 if ignorable 0 otherwise.
2868
 */
2869
2870
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2871
1.59M
                     int blank_chars) {
2872
1.59M
    int i;
2873
1.59M
    xmlNodePtr lastChild;
2874
2875
    /*
2876
     * Don't spend time trying to differentiate them, the same callback is
2877
     * used !
2878
     */
2879
1.59M
    if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2880
1.59M
  return(0);
2881
2882
    /*
2883
     * Check for xml:space value.
2884
     */
2885
0
    if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2886
0
        (*(ctxt->space) == -2))
2887
0
  return(0);
2888
2889
    /*
2890
     * Check that the string is made of blanks
2891
     */
2892
0
    if (blank_chars == 0) {
2893
0
  for (i = 0;i < len;i++)
2894
0
      if (!(IS_BLANK_CH(str[i]))) return(0);
2895
0
    }
2896
2897
    /*
2898
     * Look if the element is mixed content in the DTD if available
2899
     */
2900
0
    if (ctxt->node == NULL) return(0);
2901
0
    if (ctxt->myDoc != NULL) {
2902
0
        xmlElementPtr elemDecl = NULL;
2903
0
        xmlDocPtr doc = ctxt->myDoc;
2904
0
        const xmlChar *prefix = NULL;
2905
2906
0
        if (ctxt->node->ns)
2907
0
            prefix = ctxt->node->ns->prefix;
2908
0
        if (doc->intSubset != NULL)
2909
0
            elemDecl = xmlHashLookup2(doc->intSubset->elements, ctxt->node->name,
2910
0
                                      prefix);
2911
0
        if ((elemDecl == NULL) && (doc->extSubset != NULL))
2912
0
            elemDecl = xmlHashLookup2(doc->extSubset->elements, ctxt->node->name,
2913
0
                                      prefix);
2914
0
        if (elemDecl != NULL) {
2915
0
            if (elemDecl->etype == XML_ELEMENT_TYPE_ELEMENT)
2916
0
                return(1);
2917
0
            if ((elemDecl->etype == XML_ELEMENT_TYPE_ANY) ||
2918
0
                (elemDecl->etype == XML_ELEMENT_TYPE_MIXED))
2919
0
                return(0);
2920
0
        }
2921
0
    }
2922
2923
    /*
2924
     * Otherwise, heuristic :-\
2925
     */
2926
0
    if ((RAW != '<') && (RAW != 0xD)) return(0);
2927
0
    if ((ctxt->node->children == NULL) &&
2928
0
  (RAW == '<') && (NXT(1) == '/')) return(0);
2929
2930
0
    lastChild = xmlGetLastChild(ctxt->node);
2931
0
    if (lastChild == NULL) {
2932
0
        if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2933
0
            (ctxt->node->content != NULL)) return(0);
2934
0
    } else if (xmlNodeIsText(lastChild))
2935
0
        return(0);
2936
0
    else if ((ctxt->node->children != NULL) &&
2937
0
             (xmlNodeIsText(ctxt->node->children)))
2938
0
        return(0);
2939
0
    return(1);
2940
0
}
2941
2942
/************************************************************************
2943
 *                  *
2944
 *    Extra stuff for namespace support     *
2945
 *  Relates to http://www.w3.org/TR/WD-xml-names      *
2946
 *                  *
2947
 ************************************************************************/
2948
2949
/**
2950
 * xmlSplitQName:
2951
 * @ctxt:  an XML parser context
2952
 * @name:  an XML parser context
2953
 * @prefixOut:  a xmlChar **
2954
 *
2955
 * parse an UTF8 encoded XML qualified name string
2956
 *
2957
 * [NS 5] QName ::= (Prefix ':')? LocalPart
2958
 *
2959
 * [NS 6] Prefix ::= NCName
2960
 *
2961
 * [NS 7] LocalPart ::= NCName
2962
 *
2963
 * Returns the local part, and prefix is updated
2964
 *   to get the Prefix if any.
2965
 */
2966
2967
xmlChar *
2968
21.1k
xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefixOut) {
2969
21.1k
    xmlChar buf[XML_MAX_NAMELEN + 5];
2970
21.1k
    xmlChar *buffer = NULL;
2971
21.1k
    int len = 0;
2972
21.1k
    int max = XML_MAX_NAMELEN;
2973
21.1k
    xmlChar *ret = NULL;
2974
21.1k
    xmlChar *prefix;
2975
21.1k
    const xmlChar *cur = name;
2976
21.1k
    int c;
2977
2978
21.1k
    if (prefixOut == NULL) return(NULL);
2979
21.1k
    *prefixOut = NULL;
2980
2981
21.1k
    if (cur == NULL) return(NULL);
2982
2983
#ifndef XML_XML_NAMESPACE
2984
    /* xml: prefix is not really a namespace */
2985
    if ((cur[0] == 'x') && (cur[1] == 'm') &&
2986
        (cur[2] == 'l') && (cur[3] == ':'))
2987
  return(xmlStrdup(name));
2988
#endif
2989
2990
    /* nasty but well=formed */
2991
21.1k
    if (cur[0] == ':')
2992
807
  return(xmlStrdup(name));
2993
2994
20.3k
    c = *cur++;
2995
1.08M
    while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2996
1.06M
  buf[len++] = c;
2997
1.06M
  c = *cur++;
2998
1.06M
    }
2999
20.3k
    if (len >= max) {
3000
  /*
3001
   * Okay someone managed to make a huge name, so he's ready to pay
3002
   * for the processing speed.
3003
   */
3004
8.00k
  max = len * 2;
3005
3006
8.00k
  buffer = (xmlChar *) xmlMallocAtomic(max);
3007
8.00k
  if (buffer == NULL) {
3008
0
      xmlErrMemory(ctxt);
3009
0
      return(NULL);
3010
0
  }
3011
8.00k
  memcpy(buffer, buf, len);
3012
991k
  while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3013
983k
      if (len + 10 > max) {
3014
5.03k
          xmlChar *tmp;
3015
3016
5.03k
    max *= 2;
3017
5.03k
    tmp = (xmlChar *) xmlRealloc(buffer, max);
3018
5.03k
    if (tmp == NULL) {
3019
0
        xmlFree(buffer);
3020
0
        xmlErrMemory(ctxt);
3021
0
        return(NULL);
3022
0
    }
3023
5.03k
    buffer = tmp;
3024
5.03k
      }
3025
983k
      buffer[len++] = c;
3026
983k
      c = *cur++;
3027
983k
  }
3028
8.00k
  buffer[len] = 0;
3029
8.00k
    }
3030
3031
20.3k
    if ((c == ':') && (*cur == 0)) {
3032
782
        if (buffer != NULL)
3033
508
      xmlFree(buffer);
3034
782
  return(xmlStrdup(name));
3035
782
    }
3036
3037
19.5k
    if (buffer == NULL) {
3038
12.0k
  ret = xmlStrndup(buf, len);
3039
12.0k
        if (ret == NULL) {
3040
1
      xmlErrMemory(ctxt);
3041
1
      return(NULL);
3042
1
        }
3043
12.0k
    } else {
3044
7.49k
  ret = buffer;
3045
7.49k
  buffer = NULL;
3046
7.49k
  max = XML_MAX_NAMELEN;
3047
7.49k
    }
3048
3049
3050
19.5k
    if (c == ':') {
3051
10.2k
  c = *cur;
3052
10.2k
        prefix = ret;
3053
10.2k
  if (c == 0) {
3054
0
      ret = xmlStrndup(BAD_CAST "", 0);
3055
0
            if (ret == NULL) {
3056
0
                xmlFree(prefix);
3057
0
                return(NULL);
3058
0
            }
3059
0
            *prefixOut = prefix;
3060
0
            return(ret);
3061
0
  }
3062
10.2k
  len = 0;
3063
3064
  /*
3065
   * Check that the first character is proper to start
3066
   * a new name
3067
   */
3068
10.2k
  if (!(((c >= 0x61) && (c <= 0x7A)) ||
3069
10.2k
        ((c >= 0x41) && (c <= 0x5A)) ||
3070
10.2k
        (c == '_') || (c == ':'))) {
3071
3.20k
      int l;
3072
3.20k
      int first = CUR_SCHAR(cur, l);
3073
3074
3.20k
      if (!IS_LETTER(first) && (first != '_')) {
3075
2.28k
    xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3076
2.28k
          "Name %s is not XML Namespace compliant\n",
3077
2.28k
          name);
3078
2.28k
      }
3079
3.20k
  }
3080
10.2k
  cur++;
3081
3082
642k
  while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3083
632k
      buf[len++] = c;
3084
632k
      c = *cur++;
3085
632k
  }
3086
10.2k
  if (len >= max) {
3087
      /*
3088
       * Okay someone managed to make a huge name, so he's ready to pay
3089
       * for the processing speed.
3090
       */
3091
5.56k
      max = len * 2;
3092
3093
5.56k
      buffer = (xmlChar *) xmlMallocAtomic(max);
3094
5.56k
      if (buffer == NULL) {
3095
1
          xmlErrMemory(ctxt);
3096
1
                xmlFree(prefix);
3097
1
    return(NULL);
3098
1
      }
3099
5.56k
      memcpy(buffer, buf, len);
3100
1.34M
      while (c != 0) { /* tested bigname2.xml */
3101
1.33M
    if (len + 10 > max) {
3102
5.44k
        xmlChar *tmp;
3103
3104
5.44k
        max *= 2;
3105
5.44k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3106
5.44k
        if (tmp == NULL) {
3107
0
      xmlErrMemory(ctxt);
3108
0
                        xmlFree(prefix);
3109
0
      xmlFree(buffer);
3110
0
      return(NULL);
3111
0
        }
3112
5.44k
        buffer = tmp;
3113
5.44k
    }
3114
1.33M
    buffer[len++] = c;
3115
1.33M
    c = *cur++;
3116
1.33M
      }
3117
5.56k
      buffer[len] = 0;
3118
5.56k
  }
3119
3120
10.2k
  if (buffer == NULL) {
3121
4.65k
      ret = xmlStrndup(buf, len);
3122
4.65k
            if (ret == NULL) {
3123
1
                xmlFree(prefix);
3124
1
                return(NULL);
3125
1
            }
3126
5.56k
  } else {
3127
5.56k
      ret = buffer;
3128
5.56k
  }
3129
3130
10.2k
        *prefixOut = prefix;
3131
10.2k
    }
3132
3133
19.5k
    return(ret);
3134
19.5k
}
3135
3136
/************************************************************************
3137
 *                  *
3138
 *      The parser itself       *
3139
 *  Relates to http://www.w3.org/TR/REC-xml       *
3140
 *                  *
3141
 ************************************************************************/
3142
3143
/************************************************************************
3144
 *                  *
3145
 *  Routines to parse Name, NCName and NmToken      *
3146
 *                  *
3147
 ************************************************************************/
3148
3149
/*
3150
 * The two following functions are related to the change of accepted
3151
 * characters for Name and NmToken in the Revision 5 of XML-1.0
3152
 * They correspond to the modified production [4] and the new production [4a]
3153
 * changes in that revision. Also note that the macros used for the
3154
 * productions Letter, Digit, CombiningChar and Extender are not needed
3155
 * anymore.
3156
 * We still keep compatibility to pre-revision5 parsing semantic if the
3157
 * new XML_PARSE_OLD10 option is given to the parser.
3158
 */
3159
static int
3160
3.24M
xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3161
3.24M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3162
        /*
3163
   * Use the new checks of production [4] [4a] amd [5] of the
3164
   * Update 5 of XML-1.0
3165
   */
3166
3.24M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3167
3.24M
      (((c >= 'a') && (c <= 'z')) ||
3168
3.24M
       ((c >= 'A') && (c <= 'Z')) ||
3169
3.24M
       (c == '_') || (c == ':') ||
3170
3.24M
       ((c >= 0xC0) && (c <= 0xD6)) ||
3171
3.24M
       ((c >= 0xD8) && (c <= 0xF6)) ||
3172
3.24M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3173
3.24M
       ((c >= 0x370) && (c <= 0x37D)) ||
3174
3.24M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3175
3.24M
       ((c >= 0x200C) && (c <= 0x200D)) ||
3176
3.24M
       ((c >= 0x2070) && (c <= 0x218F)) ||
3177
3.24M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3178
3.24M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3179
3.24M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3180
3.24M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3181
3.24M
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3182
402k
      return(1);
3183
3.24M
    } else {
3184
0
        if (IS_LETTER(c) || (c == '_') || (c == ':'))
3185
0
      return(1);
3186
0
    }
3187
2.84M
    return(0);
3188
3.24M
}
3189
3190
static int
3191
23.4M
xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3192
23.4M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3193
        /*
3194
   * Use the new checks of production [4] [4a] amd [5] of the
3195
   * Update 5 of XML-1.0
3196
   */
3197
23.4M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3198
23.4M
      (((c >= 'a') && (c <= 'z')) ||
3199
23.4M
       ((c >= 'A') && (c <= 'Z')) ||
3200
23.4M
       ((c >= '0') && (c <= '9')) || /* !start */
3201
23.4M
       (c == '_') || (c == ':') ||
3202
23.4M
       (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3203
23.4M
       ((c >= 0xC0) && (c <= 0xD6)) ||
3204
23.4M
       ((c >= 0xD8) && (c <= 0xF6)) ||
3205
23.4M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3206
23.4M
       ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3207
23.4M
       ((c >= 0x370) && (c <= 0x37D)) ||
3208
23.4M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3209
23.4M
       ((c >= 0x200C) && (c <= 0x200D)) ||
3210
23.4M
       ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3211
23.4M
       ((c >= 0x2070) && (c <= 0x218F)) ||
3212
23.4M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3213
23.4M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3214
23.4M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3215
23.4M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3216
23.4M
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3217
23.0M
       return(1);
3218
23.4M
    } else {
3219
0
        if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3220
0
            (c == '.') || (c == '-') ||
3221
0
      (c == '_') || (c == ':') ||
3222
0
      (IS_COMBINING(c)) ||
3223
0
      (IS_EXTENDER(c)))
3224
0
      return(1);
3225
0
    }
3226
433k
    return(0);
3227
23.4M
}
3228
3229
static const xmlChar *
3230
470k
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3231
470k
    const xmlChar *ret;
3232
470k
    int len = 0, l;
3233
470k
    int c;
3234
470k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3235
0
                    XML_MAX_TEXT_LENGTH :
3236
470k
                    XML_MAX_NAME_LENGTH;
3237
3238
    /*
3239
     * Handler for more complex cases
3240
     */
3241
470k
    c = CUR_CHAR(l);
3242
470k
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3243
        /*
3244
   * Use the new checks of production [4] [4a] amd [5] of the
3245
   * Update 5 of XML-1.0
3246
   */
3247
470k
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3248
470k
      (!(((c >= 'a') && (c <= 'z')) ||
3249
413k
         ((c >= 'A') && (c <= 'Z')) ||
3250
413k
         (c == '_') || (c == ':') ||
3251
413k
         ((c >= 0xC0) && (c <= 0xD6)) ||
3252
413k
         ((c >= 0xD8) && (c <= 0xF6)) ||
3253
413k
         ((c >= 0xF8) && (c <= 0x2FF)) ||
3254
413k
         ((c >= 0x370) && (c <= 0x37D)) ||
3255
413k
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
3256
413k
         ((c >= 0x200C) && (c <= 0x200D)) ||
3257
413k
         ((c >= 0x2070) && (c <= 0x218F)) ||
3258
413k
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3259
413k
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
3260
413k
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
3261
413k
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3262
413k
         ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3263
251k
      return(NULL);
3264
251k
  }
3265
219k
  len += l;
3266
219k
  NEXTL(l);
3267
219k
  c = CUR_CHAR(l);
3268
10.0M
  while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3269
10.0M
         (((c >= 'a') && (c <= 'z')) ||
3270
10.0M
          ((c >= 'A') && (c <= 'Z')) ||
3271
10.0M
          ((c >= '0') && (c <= '9')) || /* !start */
3272
10.0M
          (c == '_') || (c == ':') ||
3273
10.0M
          (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3274
10.0M
          ((c >= 0xC0) && (c <= 0xD6)) ||
3275
10.0M
          ((c >= 0xD8) && (c <= 0xF6)) ||
3276
10.0M
          ((c >= 0xF8) && (c <= 0x2FF)) ||
3277
10.0M
          ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3278
10.0M
          ((c >= 0x370) && (c <= 0x37D)) ||
3279
10.0M
          ((c >= 0x37F) && (c <= 0x1FFF)) ||
3280
10.0M
          ((c >= 0x200C) && (c <= 0x200D)) ||
3281
10.0M
          ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3282
10.0M
          ((c >= 0x2070) && (c <= 0x218F)) ||
3283
10.0M
          ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3284
10.0M
          ((c >= 0x3001) && (c <= 0xD7FF)) ||
3285
10.0M
          ((c >= 0xF900) && (c <= 0xFDCF)) ||
3286
10.0M
          ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3287
10.0M
          ((c >= 0x10000) && (c <= 0xEFFFF))
3288
10.0M
    )) {
3289
9.82M
            if (len <= INT_MAX - l)
3290
9.82M
          len += l;
3291
9.82M
      NEXTL(l);
3292
9.82M
      c = CUR_CHAR(l);
3293
9.82M
  }
3294
219k
    } else {
3295
0
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3296
0
      (!IS_LETTER(c) && (c != '_') &&
3297
0
       (c != ':'))) {
3298
0
      return(NULL);
3299
0
  }
3300
0
  len += l;
3301
0
  NEXTL(l);
3302
0
  c = CUR_CHAR(l);
3303
3304
0
  while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3305
0
         ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3306
0
    (c == '.') || (c == '-') ||
3307
0
    (c == '_') || (c == ':') ||
3308
0
    (IS_COMBINING(c)) ||
3309
0
    (IS_EXTENDER(c)))) {
3310
0
            if (len <= INT_MAX - l)
3311
0
          len += l;
3312
0
      NEXTL(l);
3313
0
      c = CUR_CHAR(l);
3314
0
  }
3315
0
    }
3316
219k
    if (len > maxLength) {
3317
36
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3318
36
        return(NULL);
3319
36
    }
3320
219k
    if (ctxt->input->cur - ctxt->input->base < len) {
3321
        /*
3322
         * There were a couple of bugs where PERefs lead to to a change
3323
         * of the buffer. Check the buffer size to avoid passing an invalid
3324
         * pointer to xmlDictLookup.
3325
         */
3326
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3327
0
                    "unexpected change of input buffer");
3328
0
        return (NULL);
3329
0
    }
3330
219k
    if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3331
319
        ret = xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len);
3332
219k
    else
3333
219k
        ret = xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len);
3334
219k
    if (ret == NULL)
3335
0
        xmlErrMemory(ctxt);
3336
219k
    return(ret);
3337
219k
}
3338
3339
/**
3340
 * xmlParseName:
3341
 * @ctxt:  an XML parser context
3342
 *
3343
 * DEPRECATED: Internal function, don't use.
3344
 *
3345
 * parse an XML name.
3346
 *
3347
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3348
 *                  CombiningChar | Extender
3349
 *
3350
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3351
 *
3352
 * [6] Names ::= Name (#x20 Name)*
3353
 *
3354
 * Returns the Name parsed or NULL
3355
 */
3356
3357
const xmlChar *
3358
1.47M
xmlParseName(xmlParserCtxtPtr ctxt) {
3359
1.47M
    const xmlChar *in;
3360
1.47M
    const xmlChar *ret;
3361
1.47M
    size_t count = 0;
3362
1.47M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3363
0
                       XML_MAX_TEXT_LENGTH :
3364
1.47M
                       XML_MAX_NAME_LENGTH;
3365
3366
1.47M
    GROW;
3367
3368
    /*
3369
     * Accelerator for simple ASCII names
3370
     */
3371
1.47M
    in = ctxt->input->cur;
3372
1.47M
    if (((*in >= 0x61) && (*in <= 0x7A)) ||
3373
1.47M
  ((*in >= 0x41) && (*in <= 0x5A)) ||
3374
1.47M
  (*in == '_') || (*in == ':')) {
3375
1.13M
  in++;
3376
3.26M
  while (((*in >= 0x61) && (*in <= 0x7A)) ||
3377
3.26M
         ((*in >= 0x41) && (*in <= 0x5A)) ||
3378
3.26M
         ((*in >= 0x30) && (*in <= 0x39)) ||
3379
3.26M
         (*in == '_') || (*in == '-') ||
3380
3.26M
         (*in == ':') || (*in == '.'))
3381
2.13M
      in++;
3382
1.13M
  if ((*in > 0) && (*in < 0x80)) {
3383
1.00M
      count = in - ctxt->input->cur;
3384
1.00M
            if (count > maxLength) {
3385
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3386
0
                return(NULL);
3387
0
            }
3388
1.00M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3389
1.00M
      ctxt->input->cur = in;
3390
1.00M
      ctxt->input->col += count;
3391
1.00M
      if (ret == NULL)
3392
3
          xmlErrMemory(ctxt);
3393
1.00M
      return(ret);
3394
1.00M
  }
3395
1.13M
    }
3396
    /* accelerator for special cases */
3397
470k
    return(xmlParseNameComplex(ctxt));
3398
1.47M
}
3399
3400
static xmlHashedString
3401
3.18M
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3402
3.18M
    xmlHashedString ret;
3403
3.18M
    int len = 0, l;
3404
3.18M
    int c;
3405
3.18M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3406
0
                    XML_MAX_TEXT_LENGTH :
3407
3.18M
                    XML_MAX_NAME_LENGTH;
3408
3.18M
    size_t startPosition = 0;
3409
3410
3.18M
    ret.name = NULL;
3411
3.18M
    ret.hashValue = 0;
3412
3413
    /*
3414
     * Handler for more complex cases
3415
     */
3416
3.18M
    startPosition = CUR_PTR - BASE_PTR;
3417
3.18M
    c = CUR_CHAR(l);
3418
3.18M
    if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3419
3.18M
  (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3420
2.89M
  return(ret);
3421
2.89M
    }
3422
3423
21.1M
    while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3424
21.1M
     (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3425
20.8M
        if (len <= INT_MAX - l)
3426
20.8M
      len += l;
3427
20.8M
  NEXTL(l);
3428
20.8M
  c = CUR_CHAR(l);
3429
20.8M
    }
3430
287k
    if (len > maxLength) {
3431
238
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3432
238
        return(ret);
3433
238
    }
3434
287k
    ret = xmlDictLookupHashed(ctxt->dict, (BASE_PTR + startPosition), len);
3435
287k
    if (ret.name == NULL)
3436
0
        xmlErrMemory(ctxt);
3437
287k
    return(ret);
3438
287k
}
3439
3440
/**
3441
 * xmlParseNCName:
3442
 * @ctxt:  an XML parser context
3443
 * @len:  length of the string parsed
3444
 *
3445
 * parse an XML name.
3446
 *
3447
 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3448
 *                      CombiningChar | Extender
3449
 *
3450
 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3451
 *
3452
 * Returns the Name parsed or NULL
3453
 */
3454
3455
static xmlHashedString
3456
9.22M
xmlParseNCName(xmlParserCtxtPtr ctxt) {
3457
9.22M
    const xmlChar *in, *e;
3458
9.22M
    xmlHashedString ret;
3459
9.22M
    size_t count = 0;
3460
9.22M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3461
0
                       XML_MAX_TEXT_LENGTH :
3462
9.22M
                       XML_MAX_NAME_LENGTH;
3463
3464
9.22M
    ret.name = NULL;
3465
3466
    /*
3467
     * Accelerator for simple ASCII names
3468
     */
3469
9.22M
    in = ctxt->input->cur;
3470
9.22M
    e = ctxt->input->end;
3471
9.22M
    if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3472
9.22M
   ((*in >= 0x41) && (*in <= 0x5A)) ||
3473
9.22M
   (*in == '_')) && (in < e)) {
3474
6.18M
  in++;
3475
22.5M
  while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3476
22.5M
          ((*in >= 0x41) && (*in <= 0x5A)) ||
3477
22.5M
          ((*in >= 0x30) && (*in <= 0x39)) ||
3478
22.5M
          (*in == '_') || (*in == '-') ||
3479
22.5M
          (*in == '.')) && (in < e))
3480
16.3M
      in++;
3481
6.18M
  if (in >= e)
3482
1.84k
      goto complex;
3483
6.17M
  if ((*in > 0) && (*in < 0x80)) {
3484
6.04M
      count = in - ctxt->input->cur;
3485
6.04M
            if (count > maxLength) {
3486
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3487
0
                return(ret);
3488
0
            }
3489
6.04M
      ret = xmlDictLookupHashed(ctxt->dict, ctxt->input->cur, count);
3490
6.04M
      ctxt->input->cur = in;
3491
6.04M
      ctxt->input->col += count;
3492
6.04M
      if (ret.name == NULL) {
3493
0
          xmlErrMemory(ctxt);
3494
0
      }
3495
6.04M
      return(ret);
3496
6.04M
  }
3497
6.17M
    }
3498
3.18M
complex:
3499
3.18M
    return(xmlParseNCNameComplex(ctxt));
3500
9.22M
}
3501
3502
/**
3503
 * xmlParseNameAndCompare:
3504
 * @ctxt:  an XML parser context
3505
 *
3506
 * parse an XML name and compares for match
3507
 * (specialized for endtag parsing)
3508
 *
3509
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3510
 * and the name for mismatch
3511
 */
3512
3513
static const xmlChar *
3514
315k
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3515
315k
    register const xmlChar *cmp = other;
3516
315k
    register const xmlChar *in;
3517
315k
    const xmlChar *ret;
3518
3519
315k
    GROW;
3520
3521
315k
    in = ctxt->input->cur;
3522
1.47M
    while (*in != 0 && *in == *cmp) {
3523
1.16M
  ++in;
3524
1.16M
  ++cmp;
3525
1.16M
    }
3526
315k
    if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3527
  /* success */
3528
227k
  ctxt->input->col += in - ctxt->input->cur;
3529
227k
  ctxt->input->cur = in;
3530
227k
  return (const xmlChar*) 1;
3531
227k
    }
3532
    /* failure (or end of input buffer), check with full function */
3533
87.7k
    ret = xmlParseName (ctxt);
3534
    /* strings coming from the dictionary direct compare possible */
3535
87.7k
    if (ret == other) {
3536
899
  return (const xmlChar*) 1;
3537
899
    }
3538
86.8k
    return ret;
3539
87.7k
}
3540
3541
/**
3542
 * xmlParseStringName:
3543
 * @ctxt:  an XML parser context
3544
 * @str:  a pointer to the string pointer (IN/OUT)
3545
 *
3546
 * parse an XML name.
3547
 *
3548
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3549
 *                  CombiningChar | Extender
3550
 *
3551
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3552
 *
3553
 * [6] Names ::= Name (#x20 Name)*
3554
 *
3555
 * Returns the Name parsed or NULL. The @str pointer
3556
 * is updated to the current location in the string.
3557
 */
3558
3559
static xmlChar *
3560
92.3k
xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3561
92.3k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3562
92.3k
    xmlChar *ret;
3563
92.3k
    const xmlChar *cur = *str;
3564
92.3k
    int len = 0, l;
3565
92.3k
    int c;
3566
92.3k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3567
0
                    XML_MAX_TEXT_LENGTH :
3568
92.3k
                    XML_MAX_NAME_LENGTH;
3569
3570
92.3k
    c = CUR_SCHAR(cur, l);
3571
92.3k
    if (!xmlIsNameStartChar(ctxt, c)) {
3572
3.83k
  return(NULL);
3573
3.83k
    }
3574
3575
88.4k
    COPY_BUF(buf, len, c);
3576
88.4k
    cur += l;
3577
88.4k
    c = CUR_SCHAR(cur, l);
3578
270k
    while (xmlIsNameChar(ctxt, c)) {
3579
182k
  COPY_BUF(buf, len, c);
3580
182k
  cur += l;
3581
182k
  c = CUR_SCHAR(cur, l);
3582
182k
  if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3583
      /*
3584
       * Okay someone managed to make a huge name, so he's ready to pay
3585
       * for the processing speed.
3586
       */
3587
1.10k
      xmlChar *buffer;
3588
1.10k
      int max = len * 2;
3589
3590
1.10k
      buffer = (xmlChar *) xmlMallocAtomic(max);
3591
1.10k
      if (buffer == NULL) {
3592
0
          xmlErrMemory(ctxt);
3593
0
    return(NULL);
3594
0
      }
3595
1.10k
      memcpy(buffer, buf, len);
3596
294k
      while (xmlIsNameChar(ctxt, c)) {
3597
293k
    if (len + 10 > max) {
3598
751
        xmlChar *tmp;
3599
3600
751
        max *= 2;
3601
751
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3602
751
        if (tmp == NULL) {
3603
0
      xmlErrMemory(ctxt);
3604
0
      xmlFree(buffer);
3605
0
      return(NULL);
3606
0
        }
3607
751
        buffer = tmp;
3608
751
    }
3609
293k
    COPY_BUF(buffer, len, c);
3610
293k
    cur += l;
3611
293k
    c = CUR_SCHAR(cur, l);
3612
293k
                if (len > maxLength) {
3613
12
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3614
12
                    xmlFree(buffer);
3615
12
                    return(NULL);
3616
12
                }
3617
293k
      }
3618
1.08k
      buffer[len] = 0;
3619
1.08k
      *str = cur;
3620
1.08k
      return(buffer);
3621
1.10k
  }
3622
182k
    }
3623
87.3k
    if (len > maxLength) {
3624
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3625
0
        return(NULL);
3626
0
    }
3627
87.3k
    *str = cur;
3628
87.3k
    ret = xmlStrndup(buf, len);
3629
87.3k
    if (ret == NULL)
3630
1
        xmlErrMemory(ctxt);
3631
87.3k
    return(ret);
3632
87.3k
}
3633
3634
/**
3635
 * xmlParseNmtoken:
3636
 * @ctxt:  an XML parser context
3637
 *
3638
 * DEPRECATED: Internal function, don't use.
3639
 *
3640
 * parse an XML Nmtoken.
3641
 *
3642
 * [7] Nmtoken ::= (NameChar)+
3643
 *
3644
 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3645
 *
3646
 * Returns the Nmtoken parsed or NULL
3647
 */
3648
3649
xmlChar *
3650
126k
xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3651
126k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3652
126k
    xmlChar *ret;
3653
126k
    int len = 0, l;
3654
126k
    int c;
3655
126k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3656
0
                    XML_MAX_TEXT_LENGTH :
3657
126k
                    XML_MAX_NAME_LENGTH;
3658
3659
126k
    c = CUR_CHAR(l);
3660
3661
448k
    while (xmlIsNameChar(ctxt, c)) {
3662
322k
  COPY_BUF(buf, len, c);
3663
322k
  NEXTL(l);
3664
322k
  c = CUR_CHAR(l);
3665
322k
  if (len >= XML_MAX_NAMELEN) {
3666
      /*
3667
       * Okay someone managed to make a huge token, so he's ready to pay
3668
       * for the processing speed.
3669
       */
3670
874
      xmlChar *buffer;
3671
874
      int max = len * 2;
3672
3673
874
      buffer = (xmlChar *) xmlMallocAtomic(max);
3674
874
      if (buffer == NULL) {
3675
0
          xmlErrMemory(ctxt);
3676
0
    return(NULL);
3677
0
      }
3678
874
      memcpy(buffer, buf, len);
3679
1.34M
      while (xmlIsNameChar(ctxt, c)) {
3680
1.34M
    if (len + 10 > max) {
3681
1.45k
        xmlChar *tmp;
3682
3683
1.45k
        max *= 2;
3684
1.45k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3685
1.45k
        if (tmp == NULL) {
3686
0
      xmlErrMemory(ctxt);
3687
0
      xmlFree(buffer);
3688
0
      return(NULL);
3689
0
        }
3690
1.45k
        buffer = tmp;
3691
1.45k
    }
3692
1.34M
    COPY_BUF(buffer, len, c);
3693
1.34M
                if (len > maxLength) {
3694
61
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3695
61
                    xmlFree(buffer);
3696
61
                    return(NULL);
3697
61
                }
3698
1.34M
    NEXTL(l);
3699
1.34M
    c = CUR_CHAR(l);
3700
1.34M
      }
3701
813
      buffer[len] = 0;
3702
813
      return(buffer);
3703
874
  }
3704
322k
    }
3705
125k
    if (len == 0)
3706
71.2k
        return(NULL);
3707
54.3k
    if (len > maxLength) {
3708
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3709
0
        return(NULL);
3710
0
    }
3711
54.3k
    ret = xmlStrndup(buf, len);
3712
54.3k
    if (ret == NULL)
3713
0
        xmlErrMemory(ctxt);
3714
54.3k
    return(ret);
3715
54.3k
}
3716
3717
/**
3718
 * xmlExpandPEsInEntityValue:
3719
 * @ctxt:  parser context
3720
 * @buf:  string buffer
3721
 * @str:  entity value
3722
 * @length:  size of entity value
3723
 * @depth:  nesting depth
3724
 *
3725
 * Validate an entity value and expand parameter entities.
3726
 */
3727
static void
3728
xmlExpandPEsInEntityValue(xmlParserCtxtPtr ctxt, xmlSBuf *buf,
3729
49.0k
                          const xmlChar *str, int length, int depth) {
3730
49.0k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3731
49.0k
    const xmlChar *end, *chunk;
3732
49.0k
    int c, l;
3733
3734
49.0k
    if (str == NULL)
3735
0
        return;
3736
3737
49.0k
    depth += 1;
3738
49.0k
    if (depth > maxDepth) {
3739
0
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3740
0
                       "Maximum entity nesting depth exceeded");
3741
0
  return;
3742
0
    }
3743
3744
49.0k
    end = str + length;
3745
49.0k
    chunk = str;
3746
3747
73.6M
    while ((str < end) && (!PARSER_STOPPED(ctxt))) {
3748
73.5M
        c = *str;
3749
3750
73.5M
        if (c >= 0x80) {
3751
65.9M
            l = xmlUTF8MultibyteLen(ctxt, str,
3752
65.9M
                    "invalid character in entity value\n");
3753
65.9M
            if (l == 0) {
3754
5.25M
                if (chunk < str)
3755
91.6k
                    xmlSBufAddString(buf, chunk, str - chunk);
3756
5.25M
                xmlSBufAddReplChar(buf);
3757
5.25M
                str += 1;
3758
5.25M
                chunk = str;
3759
60.7M
            } else {
3760
60.7M
                str += l;
3761
60.7M
            }
3762
65.9M
        } else if (c == '&') {
3763
82.1k
            if (str[1] == '#') {
3764
59.4k
                if (chunk < str)
3765
56.1k
                    xmlSBufAddString(buf, chunk, str - chunk);
3766
3767
59.4k
                c = xmlParseStringCharRef(ctxt, &str);
3768
59.4k
                if (c == 0)
3769
19.5k
                    return;
3770
3771
39.9k
                xmlSBufAddChar(buf, c);
3772
3773
39.9k
                chunk = str;
3774
39.9k
            } else {
3775
22.6k
                xmlChar *name;
3776
3777
                /*
3778
                 * General entity references are checked for
3779
                 * syntactic validity.
3780
                 */
3781
22.6k
                str++;
3782
22.6k
                name = xmlParseStringName(ctxt, &str);
3783
3784
22.6k
                if ((name == NULL) || (*str++ != ';')) {
3785
5.94k
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3786
5.94k
                            "EntityValue: '&' forbidden except for entities "
3787
5.94k
                            "references\n");
3788
5.94k
                    xmlFree(name);
3789
5.94k
                    return;
3790
5.94k
                }
3791
3792
16.7k
                xmlFree(name);
3793
16.7k
            }
3794
7.52M
        } else if (c == '%') {
3795
6.15k
            xmlEntityPtr ent;
3796
3797
6.15k
            if (chunk < str)
3798
5.97k
                xmlSBufAddString(buf, chunk, str - chunk);
3799
3800
6.15k
            ent = xmlParseStringPEReference(ctxt, &str);
3801
6.15k
            if (ent == NULL)
3802
5.96k
                return;
3803
3804
189
            if (!PARSER_EXTERNAL(ctxt)) {
3805
114
                xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3806
114
                return;
3807
114
            }
3808
3809
75
            if (ent->content == NULL) {
3810
                /*
3811
                 * Note: external parsed entities will not be loaded,
3812
                 * it is not required for a non-validating parser to
3813
                 * complete external PEReferences coming from the
3814
                 * internal subset
3815
                 */
3816
21
                if (((ctxt->options & XML_PARSE_NO_XXE) == 0) &&
3817
21
                    ((ctxt->replaceEntities) ||
3818
21
                     (ctxt->validate))) {
3819
21
                    xmlLoadEntityContent(ctxt, ent);
3820
21
                } else {
3821
0
                    xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
3822
0
                                  "not validating will not read content for "
3823
0
                                  "PE entity %s\n", ent->name, NULL);
3824
0
                }
3825
21
            }
3826
3827
            /*
3828
             * TODO: Skip if ent->content is still NULL.
3829
             */
3830
3831
75
            if (xmlParserEntityCheck(ctxt, ent->length))
3832
0
                return;
3833
3834
75
            if (ent->flags & XML_ENT_EXPANDING) {
3835
21
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
3836
21
                xmlHaltParser(ctxt);
3837
21
                return;
3838
21
            }
3839
3840
54
            ent->flags |= XML_ENT_EXPANDING;
3841
54
            xmlExpandPEsInEntityValue(ctxt, buf, ent->content, ent->length,
3842
54
                                      depth);
3843
54
            ent->flags &= ~XML_ENT_EXPANDING;
3844
3845
54
            chunk = str;
3846
7.51M
        } else {
3847
            /* Normal ASCII char */
3848
7.51M
            if (!IS_BYTE_CHAR(c)) {
3849
682k
                xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3850
682k
                        "invalid character in entity value\n");
3851
682k
                if (chunk < str)
3852
19.3k
                    xmlSBufAddString(buf, chunk, str - chunk);
3853
682k
                xmlSBufAddReplChar(buf);
3854
682k
                str += 1;
3855
682k
                chunk = str;
3856
6.83M
            } else {
3857
6.83M
                str += 1;
3858
6.83M
            }
3859
7.51M
        }
3860
73.5M
    }
3861
3862
17.4k
    if (chunk < str)
3863
11.7k
        xmlSBufAddString(buf, chunk, str - chunk);
3864
3865
17.4k
    return;
3866
49.0k
}
3867
3868
/**
3869
 * xmlParseEntityValue:
3870
 * @ctxt:  an XML parser context
3871
 * @orig:  if non-NULL store a copy of the original entity value
3872
 *
3873
 * DEPRECATED: Internal function, don't use.
3874
 *
3875
 * parse a value for ENTITY declarations
3876
 *
3877
 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3878
 *                 "'" ([^%&'] | PEReference | Reference)* "'"
3879
 *
3880
 * Returns the EntityValue parsed with reference substituted or NULL
3881
 */
3882
xmlChar *
3883
49.5k
xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3884
49.5k
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3885
0
                         XML_MAX_HUGE_LENGTH :
3886
49.5k
                         XML_MAX_TEXT_LENGTH;
3887
49.5k
    xmlSBuf buf;
3888
49.5k
    const xmlChar *start;
3889
49.5k
    int quote, length;
3890
3891
49.5k
    xmlSBufInit(&buf, maxLength);
3892
3893
49.5k
    GROW;
3894
3895
49.5k
    quote = CUR;
3896
49.5k
    if ((quote != '"') && (quote != '\'')) {
3897
0
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3898
0
  return(NULL);
3899
0
    }
3900
49.5k
    CUR_PTR++;
3901
3902
49.5k
    length = 0;
3903
3904
    /*
3905
     * Copy raw content of the entity into a buffer
3906
     */
3907
225M
    while (1) {
3908
225M
        int c;
3909
3910
225M
        if (PARSER_STOPPED(ctxt))
3911
2
            goto error;
3912
3913
225M
        if (CUR_PTR >= ctxt->input->end) {
3914
240
            xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3915
240
            goto error;
3916
240
        }
3917
3918
225M
        c = CUR;
3919
3920
225M
        if (c == 0) {
3921
380
            xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3922
380
                    "invalid character in entity value\n");
3923
380
            goto error;
3924
380
        }
3925
225M
        if (c == quote)
3926
48.9k
            break;
3927
225M
        NEXTL(1);
3928
225M
        length += 1;
3929
3930
        /*
3931
         * TODO: Check growth threshold
3932
         */
3933
225M
        if (ctxt->input->end - CUR_PTR < 10)
3934
12.2k
            GROW;
3935
225M
    }
3936
3937
48.9k
    start = CUR_PTR - length;
3938
3939
48.9k
    if (orig != NULL) {
3940
48.9k
        *orig = xmlStrndup(start, length);
3941
48.9k
        if (*orig == NULL)
3942
2
            xmlErrMemory(ctxt);
3943
48.9k
    }
3944
3945
48.9k
    xmlExpandPEsInEntityValue(ctxt, &buf, start, length, ctxt->inputNr);
3946
3947
48.9k
    NEXTL(1);
3948
3949
48.9k
    return(xmlSBufFinish(&buf, NULL, ctxt, "entity length too long"));
3950
3951
622
error:
3952
622
    xmlSBufCleanup(&buf, ctxt, "entity length too long");
3953
622
    return(NULL);
3954
49.5k
}
3955
3956
/**
3957
 * xmlCheckEntityInAttValue:
3958
 * @ctxt:  parser context
3959
 * @pent:  entity
3960
 * @depth:  nesting depth
3961
 *
3962
 * Check an entity reference in an attribute value for validity
3963
 * without expanding it.
3964
 */
3965
static void
3966
426
xmlCheckEntityInAttValue(xmlParserCtxtPtr ctxt, xmlEntityPtr pent, int depth) {
3967
426
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3968
426
    const xmlChar *str;
3969
426
    unsigned long expandedSize = pent->length;
3970
426
    int c, flags;
3971
3972
426
    depth += 1;
3973
426
    if (depth > maxDepth) {
3974
0
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3975
0
                       "Maximum entity nesting depth exceeded");
3976
0
  return;
3977
0
    }
3978
3979
426
    if (pent->flags & XML_ENT_EXPANDING) {
3980
11
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
3981
11
        xmlHaltParser(ctxt);
3982
11
        return;
3983
11
    }
3984
3985
    /*
3986
     * If we're parsing a default attribute value in DTD content,
3987
     * the entity might reference other entities which weren't
3988
     * defined yet, so the check isn't reliable.
3989
     */
3990
415
    if (ctxt->inSubset == 0)
3991
405
        flags = XML_ENT_CHECKED | XML_ENT_VALIDATED;
3992
10
    else
3993
10
        flags = XML_ENT_VALIDATED;
3994
3995
415
    str = pent->content;
3996
415
    if (str == NULL)
3997
0
        goto done;
3998
3999
    /*
4000
     * Note that entity values are already validated. We only check
4001
     * for illegal less-than signs and compute the expanded size
4002
     * of the entity. No special handling for multi-byte characters
4003
     * is needed.
4004
     */
4005
10.7M
    while (!PARSER_STOPPED(ctxt)) {
4006
10.7M
        c = *str;
4007
4008
10.7M
  if (c != '&') {
4009
10.7M
            if (c == 0)
4010
379
                break;
4011
4012
10.7M
            if (c == '<')
4013
32.6k
                xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
4014
32.6k
                        "'<' in entity '%s' is not allowed in attributes "
4015
32.6k
                        "values\n", pent->name);
4016
4017
10.7M
            str += 1;
4018
10.7M
        } else if (str[1] == '#') {
4019
208
            int val;
4020
4021
208
      val = xmlParseStringCharRef(ctxt, &str);
4022
208
      if (val == 0) {
4023
0
                pent->content[0] = 0;
4024
0
                break;
4025
0
            }
4026
8.23k
  } else {
4027
8.23k
            xmlChar *name;
4028
8.23k
            xmlEntityPtr ent;
4029
4030
8.23k
      name = xmlParseStringEntityRef(ctxt, &str);
4031
8.23k
      if (name == NULL) {
4032
25
                pent->content[0] = 0;
4033
25
                break;
4034
25
            }
4035
4036
8.20k
            ent = xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 1);
4037
8.20k
            xmlFree(name);
4038
4039
8.20k
            if ((ent != NULL) &&
4040
8.20k
                (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
4041
4.35k
                if ((ent->flags & flags) != flags) {
4042
64
                    pent->flags |= XML_ENT_EXPANDING;
4043
64
                    xmlCheckEntityInAttValue(ctxt, ent, depth);
4044
64
                    pent->flags &= ~XML_ENT_EXPANDING;
4045
64
                }
4046
4047
4.35k
                xmlSaturatedAdd(&expandedSize, ent->expandedSize);
4048
4.35k
                xmlSaturatedAdd(&expandedSize, XML_ENT_FIXED_COST);
4049
4.35k
            }
4050
8.20k
        }
4051
10.7M
    }
4052
4053
415
done:
4054
415
    if (ctxt->inSubset == 0)
4055
405
        pent->expandedSize = expandedSize;
4056
4057
415
    pent->flags |= flags;
4058
415
}
4059
4060
/**
4061
 * xmlExpandEntityInAttValue:
4062
 * @ctxt:  parser context
4063
 * @buf:  string buffer
4064
 * @str:  entity or attribute value
4065
 * @pent:  entity for entity value, NULL for attribute values
4066
 * @normalize:  whether to collapse whitespace
4067
 * @inSpace:  whitespace state
4068
 * @depth:  nesting depth
4069
 * @check:  whether to check for amplification
4070
 *
4071
 * Expand general entity references in an entity or attribute value.
4072
 * Perform attribute value normalization.
4073
 */
4074
static void
4075
xmlExpandEntityInAttValue(xmlParserCtxtPtr ctxt, xmlSBuf *buf,
4076
                          const xmlChar *str, xmlEntityPtr pent, int normalize,
4077
266k
                          int *inSpace, int depth, int check) {
4078
266k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
4079
266k
    int c, chunkSize;
4080
4081
266k
    if (str == NULL)
4082
0
        return;
4083
4084
266k
    depth += 1;
4085
266k
    if (depth > maxDepth) {
4086
0
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
4087
0
                       "Maximum entity nesting depth exceeded");
4088
0
  return;
4089
0
    }
4090
4091
266k
    if (pent != NULL) {
4092
266k
        if (pent->flags & XML_ENT_EXPANDING) {
4093
161
            xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
4094
161
            xmlHaltParser(ctxt);
4095
161
            return;
4096
161
        }
4097
4098
266k
        if (check) {
4099
266k
            if (xmlParserEntityCheck(ctxt, pent->length))
4100
52
                return;
4101
266k
        }
4102
266k
    }
4103
4104
266k
    chunkSize = 0;
4105
4106
    /*
4107
     * Note that entity values are already validated. No special
4108
     * handling for multi-byte characters is needed.
4109
     */
4110
310M
    while (!PARSER_STOPPED(ctxt)) {
4111
310M
        c = *str;
4112
4113
310M
  if (c != '&') {
4114
309M
            if (c == 0)
4115
257k
                break;
4116
4117
            /*
4118
             * If this function is called without an entity, it is used to
4119
             * expand entities in an attribute content where less-than was
4120
             * already unscaped and is allowed.
4121
             */
4122
309M
            if ((pent != NULL) && (c == '<')) {
4123
8.16k
                xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
4124
8.16k
                        "'<' in entity '%s' is not allowed in attributes "
4125
8.16k
                        "values\n", pent->name);
4126
8.16k
                break;
4127
8.16k
            }
4128
4129
309M
            if (c <= 0x20) {
4130
7.69M
                if ((normalize) && (*inSpace)) {
4131
                    /* Skip char */
4132
0
                    if (chunkSize > 0) {
4133
0
                        xmlSBufAddString(buf, str - chunkSize, chunkSize);
4134
0
                        chunkSize = 0;
4135
0
                    }
4136
7.69M
                } else if (c < 0x20) {
4137
7.45M
                    if (chunkSize > 0) {
4138
493k
                        xmlSBufAddString(buf, str - chunkSize, chunkSize);
4139
493k
                        chunkSize = 0;
4140
493k
                    }
4141
4142
7.45M
                    xmlSBufAddCString(buf, " ", 1);
4143
7.45M
                } else {
4144
244k
                    chunkSize += 1;
4145
244k
                }
4146
4147
7.69M
                *inSpace = 1;
4148
301M
            } else {
4149
301M
                chunkSize += 1;
4150
301M
                *inSpace = 0;
4151
301M
            }
4152
4153
309M
            str += 1;
4154
309M
        } else if (str[1] == '#') {
4155
248k
            int val;
4156
4157
248k
            if (chunkSize > 0) {
4158
14.3k
                xmlSBufAddString(buf, str - chunkSize, chunkSize);
4159
14.3k
                chunkSize = 0;
4160
14.3k
            }
4161
4162
248k
      val = xmlParseStringCharRef(ctxt, &str);
4163
248k
      if (val == 0) {
4164
60
                if (pent != NULL)
4165
60
                    pent->content[0] = 0;
4166
60
                break;
4167
60
            }
4168
4169
248k
            if (val == ' ') {
4170
3.60k
                if ((!normalize) || (!*inSpace))
4171
3.60k
                    xmlSBufAddCString(buf, " ", 1);
4172
3.60k
                *inSpace = 1;
4173
245k
            } else {
4174
245k
                xmlSBufAddChar(buf, val);
4175
245k
                *inSpace = 0;
4176
245k
            }
4177
248k
  } else {
4178
55.2k
            xmlChar *name;
4179
55.2k
            xmlEntityPtr ent;
4180
4181
55.2k
            if (chunkSize > 0) {
4182
43.5k
                xmlSBufAddString(buf, str - chunkSize, chunkSize);
4183
43.5k
                chunkSize = 0;
4184
43.5k
            }
4185
4186
55.2k
      name = xmlParseStringEntityRef(ctxt, &str);
4187
55.2k
            if (name == NULL) {
4188
196
                if (pent != NULL)
4189
196
                    pent->content[0] = 0;
4190
196
                break;
4191
196
            }
4192
4193
55.0k
            ent = xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 1);
4194
55.0k
            xmlFree(name);
4195
4196
55.0k
      if ((ent != NULL) &&
4197
55.0k
    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
4198
17.9k
    if (ent->content == NULL) {
4199
0
        xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
4200
0
          "predefined entity has no content\n");
4201
0
                    break;
4202
0
                }
4203
4204
17.9k
                xmlSBufAddString(buf, ent->content, ent->length);
4205
4206
17.9k
                *inSpace = 0;
4207
37.1k
      } else if ((ent != NULL) && (ent->content != NULL)) {
4208
23.1k
                if (pent != NULL)
4209
23.1k
                    pent->flags |= XML_ENT_EXPANDING;
4210
23.1k
    xmlExpandEntityInAttValue(ctxt, buf, ent->content, ent,
4211
23.1k
                                          normalize, inSpace, depth, check);
4212
23.1k
                if (pent != NULL)
4213
23.1k
                    pent->flags &= ~XML_ENT_EXPANDING;
4214
23.1k
      }
4215
55.0k
        }
4216
310M
    }
4217
4218
266k
    if (chunkSize > 0)
4219
39.1k
        xmlSBufAddString(buf, str - chunkSize, chunkSize);
4220
4221
266k
    return;
4222
266k
}
4223
4224
/**
4225
 * xmlExpandEntitiesInAttValue:
4226
 * @ctxt:  parser context
4227
 * @str:  entity or attribute value
4228
 * @normalize:  whether to collapse whitespace
4229
 *
4230
 * Expand general entity references in an entity or attribute value.
4231
 * Perform attribute value normalization.
4232
 *
4233
 * Returns the expanded attribtue value.
4234
 */
4235
xmlChar *
4236
xmlExpandEntitiesInAttValue(xmlParserCtxtPtr ctxt, const xmlChar *str,
4237
0
                            int normalize) {
4238
0
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4239
0
                         XML_MAX_HUGE_LENGTH :
4240
0
                         XML_MAX_TEXT_LENGTH;
4241
0
    xmlSBuf buf;
4242
0
    int inSpace = 1;
4243
4244
0
    xmlSBufInit(&buf, maxLength);
4245
4246
0
    xmlExpandEntityInAttValue(ctxt, &buf, str, NULL, normalize, &inSpace,
4247
0
                              ctxt->inputNr, /* check */ 0);
4248
4249
0
    if ((normalize) && (inSpace) && (buf.size > 0))
4250
0
        buf.size--;
4251
4252
0
    return(xmlSBufFinish(&buf, NULL, ctxt, "AttValue length too long"));
4253
0
}
4254
4255
/**
4256
 * xmlParseAttValueInternal:
4257
 * @ctxt:  an XML parser context
4258
 * @len:  attribute len result
4259
 * @alloc:  whether the attribute was reallocated as a new string
4260
 * @normalize:  if 1 then further non-CDATA normalization must be done
4261
 *
4262
 * parse a value for an attribute.
4263
 * NOTE: if no normalization is needed, the routine will return pointers
4264
 *       directly from the data buffer.
4265
 *
4266
 * 3.3.3 Attribute-Value Normalization:
4267
 * Before the value of an attribute is passed to the application or
4268
 * checked for validity, the XML processor must normalize it as follows:
4269
 * - a character reference is processed by appending the referenced
4270
 *   character to the attribute value
4271
 * - an entity reference is processed by recursively processing the
4272
 *   replacement text of the entity
4273
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4274
 *   appending #x20 to the normalized value, except that only a single
4275
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4276
 *   parsed entity or the literal entity value of an internal parsed entity
4277
 * - other characters are processed by appending them to the normalized value
4278
 * If the declared value is not CDATA, then the XML processor must further
4279
 * process the normalized attribute value by discarding any leading and
4280
 * trailing space (#x20) characters, and by replacing sequences of space
4281
 * (#x20) characters by a single space (#x20) character.
4282
 * All attributes for which no declaration has been read should be treated
4283
 * by a non-validating parser as if declared CDATA.
4284
 *
4285
 * Returns the AttValue parsed or NULL. The value has to be freed by the
4286
 *     caller if it was copied, this can be detected by val[*len] == 0.
4287
 */
4288
static xmlChar *
4289
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *attlen, int *alloc,
4290
1.21M
                         int normalize) {
4291
1.21M
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4292
0
                         XML_MAX_HUGE_LENGTH :
4293
1.21M
                         XML_MAX_TEXT_LENGTH;
4294
1.21M
    xmlSBuf buf;
4295
1.21M
    xmlChar *ret;
4296
1.21M
    int c, l, quote, flags, chunkSize;
4297
1.21M
    int inSpace = 1;
4298
4299
1.21M
    xmlSBufInit(&buf, maxLength);
4300
4301
1.21M
    GROW;
4302
4303
1.21M
    quote = CUR;
4304
1.21M
    if ((quote != '"') && (quote != '\'')) {
4305
51.4k
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
4306
51.4k
  return(NULL);
4307
51.4k
    }
4308
1.16M
    NEXTL(1);
4309
4310
1.16M
    if (ctxt->inSubset == 0)
4311
1.14M
        flags = XML_ENT_CHECKED | XML_ENT_VALIDATED;
4312
25.4k
    else
4313
25.4k
        flags = XML_ENT_VALIDATED;
4314
4315
1.16M
    inSpace = 1;
4316
1.16M
    chunkSize = 0;
4317
4318
225M
    while (1) {
4319
225M
        if (PARSER_STOPPED(ctxt))
4320
250
            goto error;
4321
4322
225M
        if (CUR_PTR >= ctxt->input->end) {
4323
1.77k
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4324
1.77k
                           "AttValue: ' expected\n");
4325
1.77k
            goto error;
4326
1.77k
        }
4327
4328
        /*
4329
         * TODO: Check growth threshold
4330
         */
4331
225M
        if (ctxt->input->end - CUR_PTR < 10)
4332
56.6k
            GROW;
4333
4334
225M
        c = CUR;
4335
4336
225M
        if (c >= 0x80) {
4337
153M
            l = xmlUTF8MultibyteLen(ctxt, CUR_PTR,
4338
153M
                    "invalid character in attribute value\n");
4339
153M
            if (l == 0) {
4340
26.2M
                if (chunkSize > 0) {
4341
362k
                    xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4342
362k
                    chunkSize = 0;
4343
362k
                }
4344
26.2M
                xmlSBufAddReplChar(&buf);
4345
26.2M
                NEXTL(1);
4346
127M
            } else {
4347
127M
                chunkSize += l;
4348
127M
                NEXTL(l);
4349
127M
            }
4350
4351
153M
            inSpace = 0;
4352
153M
        } else if (c != '&') {
4353
71.4M
            if (c > 0x20) {
4354
20.2M
                if (c == quote)
4355
1.16M
                    break;
4356
4357
19.1M
                if (c == '<')
4358
462k
                    xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4359
4360
19.1M
                chunkSize += 1;
4361
19.1M
                inSpace = 0;
4362
51.1M
            } else if (!IS_BYTE_CHAR(c)) {
4363
29.5M
                xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4364
29.5M
                        "invalid character in attribute value\n");
4365
29.5M
                if (chunkSize > 0) {
4366
40.8k
                    xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4367
40.8k
                    chunkSize = 0;
4368
40.8k
                }
4369
29.5M
                xmlSBufAddReplChar(&buf);
4370
29.5M
                inSpace = 0;
4371
29.5M
            } else {
4372
                /* Whitespace */
4373
21.5M
                if ((normalize) && (inSpace)) {
4374
                    /* Skip char */
4375
1.70M
                    if (chunkSize > 0) {
4376
136k
                        xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4377
136k
                        chunkSize = 0;
4378
136k
                    }
4379
19.8M
                } else if (c < 0x20) {
4380
                    /* Convert to space */
4381
18.6M
                    if (chunkSize > 0) {
4382
101k
                        xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4383
101k
                        chunkSize = 0;
4384
101k
                    }
4385
4386
18.6M
                    xmlSBufAddCString(&buf, " ", 1);
4387
18.6M
                } else {
4388
1.21M
                    chunkSize += 1;
4389
1.21M
                }
4390
4391
21.5M
                inSpace = 1;
4392
4393
21.5M
                if ((c == 0xD) && (NXT(1) == 0xA))
4394
2.06k
                    CUR_PTR++;
4395
21.5M
            }
4396
4397
70.3M
            NEXTL(1);
4398
70.3M
        } else if (NXT(1) == '#') {
4399
27.5k
            int val;
4400
4401
27.5k
            if (chunkSize > 0) {
4402
12.9k
                xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4403
12.9k
                chunkSize = 0;
4404
12.9k
            }
4405
4406
27.5k
            val = xmlParseCharRef(ctxt);
4407
27.5k
            if (val == 0)
4408
2.76k
                goto error;
4409
4410
24.7k
            if ((val == '&') && (!ctxt->replaceEntities)) {
4411
                /*
4412
                 * The reparsing will be done in xmlStringGetNodeList()
4413
                 * called by the attribute() function in SAX.c
4414
                 */
4415
2.99k
                xmlSBufAddCString(&buf, "&#38;", 5);
4416
2.99k
                inSpace = 0;
4417
21.7k
            } else if (val == ' ') {
4418
4.25k
                if ((!normalize) || (!inSpace))
4419
4.19k
                    xmlSBufAddCString(&buf, " ", 1);
4420
4.25k
                inSpace = 1;
4421
17.5k
            } else {
4422
17.5k
                xmlSBufAddChar(&buf, val);
4423
17.5k
                inSpace = 0;
4424
17.5k
            }
4425
620k
        } else {
4426
620k
            const xmlChar *name;
4427
620k
            xmlEntityPtr ent;
4428
4429
620k
            if (chunkSize > 0) {
4430
178k
                xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4431
178k
                chunkSize = 0;
4432
178k
            }
4433
4434
620k
            name = xmlParseEntityRefInternal(ctxt);
4435
620k
            if (name == NULL) {
4436
                /*
4437
                 * Probably a literal '&' which wasn't escaped.
4438
                 * TODO: Handle gracefully in recovery mode.
4439
                 */
4440
247k
                continue;
4441
247k
            }
4442
4443
373k
            ent = xmlLookupGeneralEntity(ctxt, name, /* isAttr */ 1);
4444
373k
            if (ent == NULL)
4445
77.7k
                continue;
4446
4447
295k
            if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
4448
21.5k
                if ((ent->content[0] == '&') && (!ctxt->replaceEntities))
4449
1.12k
                    xmlSBufAddCString(&buf, "&#38;", 5);
4450
20.3k
                else
4451
20.3k
                    xmlSBufAddString(&buf, ent->content, ent->length);
4452
21.5k
                inSpace = 0;
4453
273k
            } else if (ctxt->replaceEntities) {
4454
243k
                xmlExpandEntityInAttValue(ctxt, &buf, ent->content, ent,
4455
243k
                                          normalize, &inSpace, ctxt->inputNr,
4456
243k
                                          /* check */ 1);
4457
243k
            } else {
4458
30.8k
                if ((ent->flags & flags) != flags)
4459
362
                    xmlCheckEntityInAttValue(ctxt, ent, ctxt->inputNr);
4460
4461
30.8k
                if (xmlParserEntityCheck(ctxt, ent->expandedSize)) {
4462
42
                    ent->content[0] = 0;
4463
42
                    goto error;
4464
42
                }
4465
4466
                /*
4467
                 * Just output the reference
4468
                 */
4469
30.7k
                xmlSBufAddCString(&buf, "&", 1);
4470
30.7k
                xmlSBufAddString(&buf, ent->name, xmlStrlen(ent->name));
4471
30.7k
                xmlSBufAddCString(&buf, ";", 1);
4472
4473
30.7k
                inSpace = 0;
4474
30.7k
            }
4475
295k
  }
4476
225M
    }
4477
4478
1.16M
    if ((buf.mem == NULL) && (alloc != NULL)) {
4479
1.04M
        ret = (xmlChar *) CUR_PTR - chunkSize;
4480
4481
1.04M
        if (attlen != NULL)
4482
1.04M
            *attlen = chunkSize;
4483
1.04M
        if ((normalize) && (inSpace) && (chunkSize > 0))
4484
318
            *attlen -= 1;
4485
1.04M
        *alloc = 0;
4486
4487
        /* Report potential error */
4488
1.04M
        xmlSBufCleanup(&buf, ctxt, "AttValue length too long");
4489
1.04M
    } else {
4490
115k
        if (chunkSize > 0)
4491
74.6k
            xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4492
4493
115k
        if ((normalize) && (inSpace) && (buf.size > 0))
4494
678
            buf.size--;
4495
4496
115k
        ret = xmlSBufFinish(&buf, attlen, ctxt, "AttValue length too long");
4497
4498
115k
        if (ret != NULL) {
4499
115k
            if (attlen != NULL)
4500
90.1k
                *attlen = buf.size;
4501
115k
            if (alloc != NULL)
4502
90.1k
                *alloc = 1;
4503
115k
        }
4504
115k
    }
4505
4506
1.16M
    NEXTL(1);
4507
4508
1.16M
    return(ret);
4509
4510
4.83k
error:
4511
4.83k
    xmlSBufCleanup(&buf, ctxt, "AttValue length too long");
4512
4.83k
    return(NULL);
4513
1.16M
}
4514
4515
/**
4516
 * xmlParseAttValue:
4517
 * @ctxt:  an XML parser context
4518
 *
4519
 * DEPRECATED: Internal function, don't use.
4520
 *
4521
 * parse a value for an attribute
4522
 * Note: the parser won't do substitution of entities here, this
4523
 * will be handled later in xmlStringGetNodeList
4524
 *
4525
 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4526
 *                   "'" ([^<&'] | Reference)* "'"
4527
 *
4528
 * 3.3.3 Attribute-Value Normalization:
4529
 * Before the value of an attribute is passed to the application or
4530
 * checked for validity, the XML processor must normalize it as follows:
4531
 * - a character reference is processed by appending the referenced
4532
 *   character to the attribute value
4533
 * - an entity reference is processed by recursively processing the
4534
 *   replacement text of the entity
4535
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4536
 *   appending #x20 to the normalized value, except that only a single
4537
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4538
 *   parsed entity or the literal entity value of an internal parsed entity
4539
 * - other characters are processed by appending them to the normalized value
4540
 * If the declared value is not CDATA, then the XML processor must further
4541
 * process the normalized attribute value by discarding any leading and
4542
 * trailing space (#x20) characters, and by replacing sequences of space
4543
 * (#x20) characters by a single space (#x20) character.
4544
 * All attributes for which no declaration has been read should be treated
4545
 * by a non-validating parser as if declared CDATA.
4546
 *
4547
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4548
 */
4549
4550
4551
xmlChar *
4552
26.8k
xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4553
26.8k
    if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4554
26.8k
    return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4555
26.8k
}
4556
4557
/**
4558
 * xmlParseSystemLiteral:
4559
 * @ctxt:  an XML parser context
4560
 *
4561
 * DEPRECATED: Internal function, don't use.
4562
 *
4563
 * parse an XML Literal
4564
 *
4565
 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4566
 *
4567
 * Returns the SystemLiteral parsed or NULL
4568
 */
4569
4570
xmlChar *
4571
10.9k
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4572
10.9k
    xmlChar *buf = NULL;
4573
10.9k
    int len = 0;
4574
10.9k
    int size = XML_PARSER_BUFFER_SIZE;
4575
10.9k
    int cur, l;
4576
10.9k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4577
0
                    XML_MAX_TEXT_LENGTH :
4578
10.9k
                    XML_MAX_NAME_LENGTH;
4579
10.9k
    xmlChar stop;
4580
4581
10.9k
    if (RAW == '"') {
4582
7.79k
        NEXT;
4583
7.79k
  stop = '"';
4584
7.79k
    } else if (RAW == '\'') {
4585
1.70k
        NEXT;
4586
1.70k
  stop = '\'';
4587
1.70k
    } else {
4588
1.45k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4589
1.45k
  return(NULL);
4590
1.45k
    }
4591
4592
9.49k
    buf = (xmlChar *) xmlMallocAtomic(size);
4593
9.49k
    if (buf == NULL) {
4594
2
        xmlErrMemory(ctxt);
4595
2
  return(NULL);
4596
2
    }
4597
9.49k
    cur = CUR_CHAR(l);
4598
1.64M
    while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4599
1.63M
  if (len + 5 >= size) {
4600
5.35k
      xmlChar *tmp;
4601
4602
5.35k
      size *= 2;
4603
5.35k
      tmp = (xmlChar *) xmlRealloc(buf, size);
4604
5.35k
      if (tmp == NULL) {
4605
1
          xmlFree(buf);
4606
1
    xmlErrMemory(ctxt);
4607
1
    return(NULL);
4608
1
      }
4609
5.35k
      buf = tmp;
4610
5.35k
  }
4611
1.63M
  COPY_BUF(buf, len, cur);
4612
1.63M
        if (len > maxLength) {
4613
8
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4614
8
            xmlFree(buf);
4615
8
            return(NULL);
4616
8
        }
4617
1.63M
  NEXTL(l);
4618
1.63M
  cur = CUR_CHAR(l);
4619
1.63M
    }
4620
9.48k
    buf[len] = 0;
4621
9.48k
    if (!IS_CHAR(cur)) {
4622
263
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4623
9.21k
    } else {
4624
9.21k
  NEXT;
4625
9.21k
    }
4626
9.48k
    return(buf);
4627
9.49k
}
4628
4629
/**
4630
 * xmlParsePubidLiteral:
4631
 * @ctxt:  an XML parser context
4632
 *
4633
 * DEPRECATED: Internal function, don't use.
4634
 *
4635
 * parse an XML public literal
4636
 *
4637
 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4638
 *
4639
 * Returns the PubidLiteral parsed or NULL.
4640
 */
4641
4642
xmlChar *
4643
6.57k
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4644
6.57k
    xmlChar *buf = NULL;
4645
6.57k
    int len = 0;
4646
6.57k
    int size = XML_PARSER_BUFFER_SIZE;
4647
6.57k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4648
0
                    XML_MAX_TEXT_LENGTH :
4649
6.57k
                    XML_MAX_NAME_LENGTH;
4650
6.57k
    xmlChar cur;
4651
6.57k
    xmlChar stop;
4652
4653
6.57k
    if (RAW == '"') {
4654
5.62k
        NEXT;
4655
5.62k
  stop = '"';
4656
5.62k
    } else if (RAW == '\'') {
4657
882
        NEXT;
4658
882
  stop = '\'';
4659
882
    } else {
4660
76
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4661
76
  return(NULL);
4662
76
    }
4663
6.50k
    buf = (xmlChar *) xmlMallocAtomic(size);
4664
6.50k
    if (buf == NULL) {
4665
2
  xmlErrMemory(ctxt);
4666
2
  return(NULL);
4667
2
    }
4668
6.50k
    cur = CUR;
4669
670k
    while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop) &&
4670
670k
           (PARSER_STOPPED(ctxt) == 0)) { /* checked */
4671
664k
  if (len + 1 >= size) {
4672
2.01k
      xmlChar *tmp;
4673
4674
2.01k
      size *= 2;
4675
2.01k
      tmp = (xmlChar *) xmlRealloc(buf, size);
4676
2.01k
      if (tmp == NULL) {
4677
0
    xmlErrMemory(ctxt);
4678
0
    xmlFree(buf);
4679
0
    return(NULL);
4680
0
      }
4681
2.01k
      buf = tmp;
4682
2.01k
  }
4683
664k
  buf[len++] = cur;
4684
664k
        if (len > maxLength) {
4685
7
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4686
7
            xmlFree(buf);
4687
7
            return(NULL);
4688
7
        }
4689
664k
  NEXT;
4690
664k
  cur = CUR;
4691
664k
    }
4692
6.49k
    buf[len] = 0;
4693
6.49k
    if (cur != stop) {
4694
395
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4695
6.09k
    } else {
4696
6.09k
  NEXTL(1);
4697
6.09k
    }
4698
6.49k
    return(buf);
4699
6.50k
}
4700
4701
static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial);
4702
4703
/*
4704
 * used for the test in the inner loop of the char data testing
4705
 */
4706
static const unsigned char test_char_data[256] = {
4707
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4708
    0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4709
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4710
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4711
    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4712
    0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4713
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4714
    0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4715
    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4716
    0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4717
    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4718
    0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4719
    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4720
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4721
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4722
    0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4723
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4724
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4725
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4726
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4727
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4728
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4729
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4730
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4731
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4732
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4733
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4734
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4735
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4736
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4737
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4738
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4739
};
4740
4741
/**
4742
 * xmlParseCharDataInternal:
4743
 * @ctxt:  an XML parser context
4744
 * @partial:  buffer may contain partial UTF-8 sequences
4745
 *
4746
 * Parse character data. Always makes progress if the first char isn't
4747
 * '<' or '&'.
4748
 *
4749
 * The right angle bracket (>) may be represented using the string "&gt;",
4750
 * and must, for compatibility, be escaped using "&gt;" or a character
4751
 * reference when it appears in the string "]]>" in content, when that
4752
 * string is not marking the end of a CDATA section.
4753
 *
4754
 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4755
 */
4756
static void
4757
10.0M
xmlParseCharDataInternal(xmlParserCtxtPtr ctxt, int partial) {
4758
10.0M
    const xmlChar *in;
4759
10.0M
    int nbchar = 0;
4760
10.0M
    int line = ctxt->input->line;
4761
10.0M
    int col = ctxt->input->col;
4762
10.0M
    int ccol;
4763
4764
10.0M
    GROW;
4765
    /*
4766
     * Accelerated common case where input don't need to be
4767
     * modified before passing it to the handler.
4768
     */
4769
10.0M
    in = ctxt->input->cur;
4770
10.3M
    do {
4771
10.6M
get_more_space:
4772
12.2M
        while (*in == 0x20) { in++; ctxt->input->col++; }
4773
10.6M
        if (*in == 0xA) {
4774
5.77M
            do {
4775
5.77M
                ctxt->input->line++; ctxt->input->col = 1;
4776
5.77M
                in++;
4777
5.77M
            } while (*in == 0xA);
4778
355k
            goto get_more_space;
4779
355k
        }
4780
10.3M
        if (*in == '<') {
4781
203k
            nbchar = in - ctxt->input->cur;
4782
203k
            if (nbchar > 0) {
4783
203k
                const xmlChar *tmp = ctxt->input->cur;
4784
203k
                ctxt->input->cur = in;
4785
4786
203k
                if ((ctxt->sax != NULL) &&
4787
203k
                    (ctxt->disableSAX == 0) &&
4788
203k
                    (ctxt->sax->ignorableWhitespace !=
4789
187k
                     ctxt->sax->characters)) {
4790
0
                    if (areBlanks(ctxt, tmp, nbchar, 1)) {
4791
0
                        if (ctxt->sax->ignorableWhitespace != NULL)
4792
0
                            ctxt->sax->ignorableWhitespace(ctxt->userData,
4793
0
                                                   tmp, nbchar);
4794
0
                    } else {
4795
0
                        if (ctxt->sax->characters != NULL)
4796
0
                            ctxt->sax->characters(ctxt->userData,
4797
0
                                                  tmp, nbchar);
4798
0
                        if (*ctxt->space == -1)
4799
0
                            *ctxt->space = -2;
4800
0
                    }
4801
203k
                } else if ((ctxt->sax != NULL) &&
4802
203k
                           (ctxt->disableSAX == 0) &&
4803
203k
                           (ctxt->sax->characters != NULL)) {
4804
187k
                    ctxt->sax->characters(ctxt->userData,
4805
187k
                                          tmp, nbchar);
4806
187k
                }
4807
203k
            }
4808
203k
            return;
4809
203k
        }
4810
4811
11.3M
get_more:
4812
11.3M
        ccol = ctxt->input->col;
4813
50.8M
        while (test_char_data[*in]) {
4814
39.4M
            in++;
4815
39.4M
            ccol++;
4816
39.4M
        }
4817
11.3M
        ctxt->input->col = ccol;
4818
11.3M
        if (*in == 0xA) {
4819
4.57M
            do {
4820
4.57M
                ctxt->input->line++; ctxt->input->col = 1;
4821
4.57M
                in++;
4822
4.57M
            } while (*in == 0xA);
4823
212k
            goto get_more;
4824
212k
        }
4825
11.1M
        if (*in == ']') {
4826
1.04M
            if ((in[1] == ']') && (in[2] == '>')) {
4827
2.97k
                xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4828
2.97k
                ctxt->input->cur = in + 1;
4829
2.97k
                return;
4830
2.97k
            }
4831
1.04M
            in++;
4832
1.04M
            ctxt->input->col++;
4833
1.04M
            goto get_more;
4834
1.04M
        }
4835
10.0M
        nbchar = in - ctxt->input->cur;
4836
10.0M
        if (nbchar > 0) {
4837
2.62M
            if ((ctxt->sax != NULL) &&
4838
2.62M
                (ctxt->disableSAX == 0) &&
4839
2.62M
                (ctxt->sax->ignorableWhitespace !=
4840
2.46M
                 ctxt->sax->characters) &&
4841
2.62M
                (IS_BLANK_CH(*ctxt->input->cur))) {
4842
0
                const xmlChar *tmp = ctxt->input->cur;
4843
0
                ctxt->input->cur = in;
4844
4845
0
                if (areBlanks(ctxt, tmp, nbchar, 0)) {
4846
0
                    if (ctxt->sax->ignorableWhitespace != NULL)
4847
0
                        ctxt->sax->ignorableWhitespace(ctxt->userData,
4848
0
                                                       tmp, nbchar);
4849
0
                } else {
4850
0
                    if (ctxt->sax->characters != NULL)
4851
0
                        ctxt->sax->characters(ctxt->userData,
4852
0
                                              tmp, nbchar);
4853
0
                    if (*ctxt->space == -1)
4854
0
                        *ctxt->space = -2;
4855
0
                }
4856
0
                line = ctxt->input->line;
4857
0
                col = ctxt->input->col;
4858
2.62M
            } else if ((ctxt->sax != NULL) &&
4859
2.62M
                       (ctxt->disableSAX == 0)) {
4860
2.46M
                if (ctxt->sax->characters != NULL)
4861
2.46M
                    ctxt->sax->characters(ctxt->userData,
4862
2.46M
                                          ctxt->input->cur, nbchar);
4863
2.46M
                line = ctxt->input->line;
4864
2.46M
                col = ctxt->input->col;
4865
2.46M
            }
4866
2.62M
        }
4867
10.0M
        ctxt->input->cur = in;
4868
10.0M
        if (*in == 0xD) {
4869
515k
            in++;
4870
515k
            if (*in == 0xA) {
4871
254k
                ctxt->input->cur = in;
4872
254k
                in++;
4873
254k
                ctxt->input->line++; ctxt->input->col = 1;
4874
254k
                continue; /* while */
4875
254k
            }
4876
261k
            in--;
4877
261k
        }
4878
9.84M
        if (*in == '<') {
4879
1.11M
            return;
4880
1.11M
        }
4881
8.72M
        if (*in == '&') {
4882
302k
            return;
4883
302k
        }
4884
8.41M
        SHRINK;
4885
8.41M
        GROW;
4886
8.41M
        in = ctxt->input->cur;
4887
8.67M
    } while (((*in >= 0x20) && (*in <= 0x7F)) ||
4888
8.67M
             (*in == 0x09) || (*in == 0x0a));
4889
8.41M
    ctxt->input->line = line;
4890
8.41M
    ctxt->input->col = col;
4891
8.41M
    xmlParseCharDataComplex(ctxt, partial);
4892
8.41M
}
4893
4894
/**
4895
 * xmlParseCharDataComplex:
4896
 * @ctxt:  an XML parser context
4897
 * @cdata:  int indicating whether we are within a CDATA section
4898
 *
4899
 * Always makes progress if the first char isn't '<' or '&'.
4900
 *
4901
 * parse a CharData section.this is the fallback function
4902
 * of xmlParseCharData() when the parsing requires handling
4903
 * of non-ASCII characters.
4904
 */
4905
static void
4906
8.41M
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial) {
4907
8.41M
    xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4908
8.41M
    int nbchar = 0;
4909
8.41M
    int cur, l;
4910
4911
8.41M
    cur = CUR_CHAR(l);
4912
109M
    while ((cur != '<') && /* checked */
4913
109M
           (cur != '&') &&
4914
109M
     (IS_CHAR(cur))) {
4915
100M
  if ((cur == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4916
4.31k
      xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4917
4.31k
  }
4918
100M
  COPY_BUF(buf, nbchar, cur);
4919
  /* move current position before possible calling of ctxt->sax->characters */
4920
100M
  NEXTL(l);
4921
100M
  if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4922
505k
      buf[nbchar] = 0;
4923
4924
      /*
4925
       * OK the segment is to be consumed as chars.
4926
       */
4927
505k
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4928
329k
    if (areBlanks(ctxt, buf, nbchar, 0)) {
4929
0
        if (ctxt->sax->ignorableWhitespace != NULL)
4930
0
      ctxt->sax->ignorableWhitespace(ctxt->userData,
4931
0
                                     buf, nbchar);
4932
329k
    } else {
4933
329k
        if (ctxt->sax->characters != NULL)
4934
329k
      ctxt->sax->characters(ctxt->userData, buf, nbchar);
4935
329k
        if ((ctxt->sax->characters !=
4936
329k
             ctxt->sax->ignorableWhitespace) &&
4937
329k
      (*ctxt->space == -1))
4938
0
      *ctxt->space = -2;
4939
329k
    }
4940
329k
      }
4941
505k
      nbchar = 0;
4942
505k
            SHRINK;
4943
505k
  }
4944
100M
  cur = CUR_CHAR(l);
4945
100M
    }
4946
8.41M
    if (nbchar != 0) {
4947
1.35M
        buf[nbchar] = 0;
4948
  /*
4949
   * OK the segment is to be consumed as chars.
4950
   */
4951
1.35M
  if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4952
1.26M
      if (areBlanks(ctxt, buf, nbchar, 0)) {
4953
0
    if (ctxt->sax->ignorableWhitespace != NULL)
4954
0
        ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4955
1.26M
      } else {
4956
1.26M
    if (ctxt->sax->characters != NULL)
4957
1.26M
        ctxt->sax->characters(ctxt->userData, buf, nbchar);
4958
1.26M
    if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4959
1.26M
        (*ctxt->space == -1))
4960
0
        *ctxt->space = -2;
4961
1.26M
      }
4962
1.26M
  }
4963
1.35M
    }
4964
    /*
4965
     * cur == 0 can mean
4966
     *
4967
     * - End of buffer.
4968
     * - An actual 0 character.
4969
     * - An incomplete UTF-8 sequence. This is allowed if partial is set.
4970
     */
4971
8.41M
    if (ctxt->input->cur < ctxt->input->end) {
4972
8.41M
        if ((cur == 0) && (CUR != 0)) {
4973
1.91k
            if (partial == 0) {
4974
1.91k
                xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4975
1.91k
                        "Incomplete UTF-8 sequence starting with %02X\n", CUR);
4976
1.91k
                NEXTL(1);
4977
1.91k
            }
4978
8.40M
        } else if ((cur != '<') && (cur != '&')) {
4979
            /* Generate the error and skip the offending character */
4980
7.73M
            xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4981
7.73M
                              "PCDATA invalid Char value %d\n", cur);
4982
7.73M
            NEXTL(l);
4983
7.73M
        }
4984
8.41M
    }
4985
8.41M
}
4986
4987
/**
4988
 * xmlParseCharData:
4989
 * @ctxt:  an XML parser context
4990
 * @cdata:  unused
4991
 *
4992
 * DEPRECATED: Internal function, don't use.
4993
 */
4994
void
4995
0
xmlParseCharData(xmlParserCtxtPtr ctxt, ATTRIBUTE_UNUSED int cdata) {
4996
0
    xmlParseCharDataInternal(ctxt, 0);
4997
0
}
4998
4999
/**
5000
 * xmlParseExternalID:
5001
 * @ctxt:  an XML parser context
5002
 * @publicID:  a xmlChar** receiving PubidLiteral
5003
 * @strict: indicate whether we should restrict parsing to only
5004
 *          production [75], see NOTE below
5005
 *
5006
 * DEPRECATED: Internal function, don't use.
5007
 *
5008
 * Parse an External ID or a Public ID
5009
 *
5010
 * NOTE: Productions [75] and [83] interact badly since [75] can generate
5011
 *       'PUBLIC' S PubidLiteral S SystemLiteral
5012
 *
5013
 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
5014
 *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
5015
 *
5016
 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
5017
 *
5018
 * Returns the function returns SystemLiteral and in the second
5019
 *                case publicID receives PubidLiteral, is strict is off
5020
 *                it is possible to return NULL and have publicID set.
5021
 */
5022
5023
xmlChar *
5024
30.0k
xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
5025
30.0k
    xmlChar *URI = NULL;
5026
5027
30.0k
    *publicID = NULL;
5028
30.0k
    if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
5029
4.39k
        SKIP(6);
5030
4.39k
  if (SKIP_BLANKS == 0) {
5031
49
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5032
49
                     "Space required after 'SYSTEM'\n");
5033
49
  }
5034
4.39k
  URI = xmlParseSystemLiteral(ctxt);
5035
4.39k
  if (URI == NULL) {
5036
48
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
5037
48
        }
5038
25.6k
    } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
5039
6.57k
        SKIP(6);
5040
6.57k
  if (SKIP_BLANKS == 0) {
5041
325
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5042
325
        "Space required after 'PUBLIC'\n");
5043
325
  }
5044
6.57k
  *publicID = xmlParsePubidLiteral(ctxt);
5045
6.57k
  if (*publicID == NULL) {
5046
85
      xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
5047
85
  }
5048
6.57k
  if (strict) {
5049
      /*
5050
       * We don't handle [83] so "S SystemLiteral" is required.
5051
       */
5052
6.52k
      if (SKIP_BLANKS == 0) {
5053
1.31k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5054
1.31k
      "Space required after the Public Identifier\n");
5055
1.31k
      }
5056
6.52k
  } else {
5057
      /*
5058
       * We handle [83] so we return immediately, if
5059
       * "S SystemLiteral" is not detected. We skip blanks if no
5060
             * system literal was found, but this is harmless since we must
5061
             * be at the end of a NotationDecl.
5062
       */
5063
54
      if (SKIP_BLANKS == 0) return(NULL);
5064
31
      if ((CUR != '\'') && (CUR != '"')) return(NULL);
5065
31
  }
5066
6.55k
  URI = xmlParseSystemLiteral(ctxt);
5067
6.55k
  if (URI == NULL) {
5068
1.41k
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
5069
1.41k
        }
5070
6.55k
    }
5071
29.9k
    return(URI);
5072
30.0k
}
5073
5074
/**
5075
 * xmlParseCommentComplex:
5076
 * @ctxt:  an XML parser context
5077
 * @buf:  the already parsed part of the buffer
5078
 * @len:  number of bytes in the buffer
5079
 * @size:  allocated size of the buffer
5080
 *
5081
 * Skip an XML (SGML) comment <!-- .... -->
5082
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
5083
 *  must not occur within comments. "
5084
 * This is the slow routine in case the accelerator for ascii didn't work
5085
 *
5086
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
5087
 */
5088
static void
5089
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
5090
49.2k
                       size_t len, size_t size) {
5091
49.2k
    int q, ql;
5092
49.2k
    int r, rl;
5093
49.2k
    int cur, l;
5094
49.2k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5095
0
                       XML_MAX_HUGE_LENGTH :
5096
49.2k
                       XML_MAX_TEXT_LENGTH;
5097
5098
49.2k
    if (buf == NULL) {
5099
30.5k
        len = 0;
5100
30.5k
  size = XML_PARSER_BUFFER_SIZE;
5101
30.5k
  buf = (xmlChar *) xmlMallocAtomic(size);
5102
30.5k
  if (buf == NULL) {
5103
0
      xmlErrMemory(ctxt);
5104
0
      return;
5105
0
  }
5106
30.5k
    }
5107
49.2k
    q = CUR_CHAR(ql);
5108
49.2k
    if (q == 0)
5109
17.4k
        goto not_terminated;
5110
31.8k
    if (!IS_CHAR(q)) {
5111
9.61k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
5112
9.61k
                          "xmlParseComment: invalid xmlChar value %d\n",
5113
9.61k
                    q);
5114
9.61k
  xmlFree (buf);
5115
9.61k
  return;
5116
9.61k
    }
5117
22.1k
    NEXTL(ql);
5118
22.1k
    r = CUR_CHAR(rl);
5119
22.1k
    if (r == 0)
5120
2.56k
        goto not_terminated;
5121
19.6k
    if (!IS_CHAR(r)) {
5122
1.77k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
5123
1.77k
                          "xmlParseComment: invalid xmlChar value %d\n",
5124
1.77k
                    r);
5125
1.77k
  xmlFree (buf);
5126
1.77k
  return;
5127
1.77k
    }
5128
17.8k
    NEXTL(rl);
5129
17.8k
    cur = CUR_CHAR(l);
5130
17.8k
    if (cur == 0)
5131
3.45k
        goto not_terminated;
5132
33.2M
    while (IS_CHAR(cur) && /* checked */
5133
33.2M
           ((cur != '>') ||
5134
33.2M
      (r != '-') || (q != '-'))) {
5135
33.2M
  if ((r == '-') && (q == '-')) {
5136
455k
      xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
5137
455k
  }
5138
33.2M
  if (len + 5 >= size) {
5139
7.46k
      xmlChar *new_buf;
5140
7.46k
            size_t new_size;
5141
5142
7.46k
      new_size = size * 2;
5143
7.46k
      new_buf = (xmlChar *) xmlRealloc(buf, new_size);
5144
7.46k
      if (new_buf == NULL) {
5145
2
    xmlFree (buf);
5146
2
    xmlErrMemory(ctxt);
5147
2
    return;
5148
2
      }
5149
7.46k
      buf = new_buf;
5150
7.46k
            size = new_size;
5151
7.46k
  }
5152
33.2M
  COPY_BUF(buf, len, q);
5153
33.2M
        if (len > maxLength) {
5154
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5155
0
                         "Comment too big found", NULL);
5156
0
            xmlFree (buf);
5157
0
            return;
5158
0
        }
5159
5160
33.2M
  q = r;
5161
33.2M
  ql = rl;
5162
33.2M
  r = cur;
5163
33.2M
  rl = l;
5164
5165
33.2M
  NEXTL(l);
5166
33.2M
  cur = CUR_CHAR(l);
5167
5168
33.2M
    }
5169
14.3k
    buf[len] = 0;
5170
14.3k
    if (cur == 0) {
5171
4.43k
  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5172
4.43k
                       "Comment not terminated \n<!--%.50s\n", buf);
5173
9.96k
    } else if (!IS_CHAR(cur)) {
5174
3.59k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
5175
3.59k
                          "xmlParseComment: invalid xmlChar value %d\n",
5176
3.59k
                    cur);
5177
6.37k
    } else {
5178
6.37k
        NEXT;
5179
6.37k
  if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5180
6.37k
      (!ctxt->disableSAX))
5181
5.96k
      ctxt->sax->comment(ctxt->userData, buf);
5182
6.37k
    }
5183
14.3k
    xmlFree(buf);
5184
14.3k
    return;
5185
23.5k
not_terminated:
5186
23.5k
    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5187
23.5k
       "Comment not terminated\n", NULL);
5188
23.5k
    xmlFree(buf);
5189
23.5k
    return;
5190
14.3k
}
5191
5192
/**
5193
 * xmlParseComment:
5194
 * @ctxt:  an XML parser context
5195
 *
5196
 * DEPRECATED: Internal function, don't use.
5197
 *
5198
 * Parse an XML (SGML) comment. Always consumes '<!'.
5199
 *
5200
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
5201
 *  must not occur within comments. "
5202
 *
5203
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
5204
 */
5205
void
5206
85.4k
xmlParseComment(xmlParserCtxtPtr ctxt) {
5207
85.4k
    xmlChar *buf = NULL;
5208
85.4k
    size_t size = XML_PARSER_BUFFER_SIZE;
5209
85.4k
    size_t len = 0;
5210
85.4k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5211
0
                       XML_MAX_HUGE_LENGTH :
5212
85.4k
                       XML_MAX_TEXT_LENGTH;
5213
85.4k
    const xmlChar *in;
5214
85.4k
    size_t nbchar = 0;
5215
85.4k
    int ccol;
5216
5217
    /*
5218
     * Check that there is a comment right here.
5219
     */
5220
85.4k
    if ((RAW != '<') || (NXT(1) != '!'))
5221
0
        return;
5222
85.4k
    SKIP(2);
5223
85.4k
    if ((RAW != '-') || (NXT(1) != '-'))
5224
5
        return;
5225
85.4k
    SKIP(2);
5226
85.4k
    GROW;
5227
5228
    /*
5229
     * Accelerated common case where input don't need to be
5230
     * modified before passing it to the handler.
5231
     */
5232
85.4k
    in = ctxt->input->cur;
5233
85.8k
    do {
5234
85.8k
  if (*in == 0xA) {
5235
12.7k
      do {
5236
12.7k
    ctxt->input->line++; ctxt->input->col = 1;
5237
12.7k
    in++;
5238
12.7k
      } while (*in == 0xA);
5239
7.05k
  }
5240
1.04M
get_more:
5241
1.04M
        ccol = ctxt->input->col;
5242
3.84M
  while (((*in > '-') && (*in <= 0x7F)) ||
5243
3.84M
         ((*in >= 0x20) && (*in < '-')) ||
5244
3.84M
         (*in == 0x09)) {
5245
2.80M
        in++;
5246
2.80M
        ccol++;
5247
2.80M
  }
5248
1.04M
  ctxt->input->col = ccol;
5249
1.04M
  if (*in == 0xA) {
5250
164k
      do {
5251
164k
    ctxt->input->line++; ctxt->input->col = 1;
5252
164k
    in++;
5253
164k
      } while (*in == 0xA);
5254
16.1k
      goto get_more;
5255
16.1k
  }
5256
1.02M
  nbchar = in - ctxt->input->cur;
5257
  /*
5258
   * save current set of data
5259
   */
5260
1.02M
  if (nbchar > 0) {
5261
986k
            if (buf == NULL) {
5262
53.4k
                if ((*in == '-') && (in[1] == '-'))
5263
23.5k
                    size = nbchar + 1;
5264
29.8k
                else
5265
29.8k
                    size = XML_PARSER_BUFFER_SIZE + nbchar;
5266
53.4k
                buf = (xmlChar *) xmlMallocAtomic(size);
5267
53.4k
                if (buf == NULL) {
5268
3
                    xmlErrMemory(ctxt);
5269
3
                    return;
5270
3
                }
5271
53.4k
                len = 0;
5272
933k
            } else if (len + nbchar + 1 >= size) {
5273
3.94k
                xmlChar *new_buf;
5274
3.94k
                size  += len + nbchar + XML_PARSER_BUFFER_SIZE;
5275
3.94k
                new_buf = (xmlChar *) xmlRealloc(buf, size);
5276
3.94k
                if (new_buf == NULL) {
5277
0
                    xmlFree (buf);
5278
0
                    xmlErrMemory(ctxt);
5279
0
                    return;
5280
0
                }
5281
3.94k
                buf = new_buf;
5282
3.94k
            }
5283
986k
            memcpy(&buf[len], ctxt->input->cur, nbchar);
5284
986k
            len += nbchar;
5285
986k
            buf[len] = 0;
5286
986k
  }
5287
1.02M
        if (len > maxLength) {
5288
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5289
0
                         "Comment too big found", NULL);
5290
0
            xmlFree (buf);
5291
0
            return;
5292
0
        }
5293
1.02M
  ctxt->input->cur = in;
5294
1.02M
  if (*in == 0xA) {
5295
0
      in++;
5296
0
      ctxt->input->line++; ctxt->input->col = 1;
5297
0
  }
5298
1.02M
  if (*in == 0xD) {
5299
3.55k
      in++;
5300
3.55k
      if (*in == 0xA) {
5301
1.99k
    ctxt->input->cur = in;
5302
1.99k
    in++;
5303
1.99k
    ctxt->input->line++; ctxt->input->col = 1;
5304
1.99k
    goto get_more;
5305
1.99k
      }
5306
1.56k
      in--;
5307
1.56k
  }
5308
1.02M
  SHRINK;
5309
1.02M
  GROW;
5310
1.02M
  in = ctxt->input->cur;
5311
1.02M
  if (*in == '-') {
5312
972k
      if (in[1] == '-') {
5313
921k
          if (in[2] == '>') {
5314
36.1k
        SKIP(3);
5315
36.1k
        if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5316
36.1k
            (!ctxt->disableSAX)) {
5317
31.8k
      if (buf != NULL)
5318
30.3k
          ctxt->sax->comment(ctxt->userData, buf);
5319
1.45k
      else
5320
1.45k
          ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5321
31.8k
        }
5322
36.1k
        if (buf != NULL)
5323
34.6k
            xmlFree(buf);
5324
36.1k
        return;
5325
36.1k
    }
5326
885k
    if (buf != NULL) {
5327
883k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5328
883k
                          "Double hyphen within comment: "
5329
883k
                                      "<!--%.50s\n",
5330
883k
              buf);
5331
883k
    } else
5332
1.82k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5333
1.82k
                          "Double hyphen within comment\n", NULL);
5334
885k
    in++;
5335
885k
    ctxt->input->col++;
5336
885k
      }
5337
936k
      in++;
5338
936k
      ctxt->input->col++;
5339
936k
      goto get_more;
5340
972k
  }
5341
1.02M
    } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5342
49.2k
    xmlParseCommentComplex(ctxt, buf, len, size);
5343
49.2k
    return;
5344
85.4k
}
5345
5346
5347
/**
5348
 * xmlParsePITarget:
5349
 * @ctxt:  an XML parser context
5350
 *
5351
 * DEPRECATED: Internal function, don't use.
5352
 *
5353
 * parse the name of a PI
5354
 *
5355
 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5356
 *
5357
 * Returns the PITarget name or NULL
5358
 */
5359
5360
const xmlChar *
5361
55.3k
xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5362
55.3k
    const xmlChar *name;
5363
5364
55.3k
    name = xmlParseName(ctxt);
5365
55.3k
    if ((name != NULL) &&
5366
55.3k
        ((name[0] == 'x') || (name[0] == 'X')) &&
5367
55.3k
        ((name[1] == 'm') || (name[1] == 'M')) &&
5368
55.3k
        ((name[2] == 'l') || (name[2] == 'L'))) {
5369
5.72k
  int i;
5370
5.72k
  if ((name[0] == 'x') && (name[1] == 'm') &&
5371
5.72k
      (name[2] == 'l') && (name[3] == 0)) {
5372
988
      xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5373
988
     "XML declaration allowed only at the start of the document\n");
5374
988
      return(name);
5375
4.73k
  } else if (name[3] == 0) {
5376
762
      xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5377
762
      return(name);
5378
762
  }
5379
11.4k
  for (i = 0;;i++) {
5380
11.4k
      if (xmlW3CPIs[i] == NULL) break;
5381
7.91k
      if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5382
395
          return(name);
5383
7.91k
  }
5384
3.58k
  xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5385
3.58k
          "xmlParsePITarget: invalid name prefix 'xml'\n",
5386
3.58k
          NULL, NULL);
5387
3.58k
    }
5388
53.2k
    if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5389
3.10k
  xmlNsErr(ctxt, XML_NS_ERR_COLON,
5390
3.10k
     "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5391
3.10k
    }
5392
53.2k
    return(name);
5393
55.3k
}
5394
5395
#ifdef LIBXML_CATALOG_ENABLED
5396
/**
5397
 * xmlParseCatalogPI:
5398
 * @ctxt:  an XML parser context
5399
 * @catalog:  the PI value string
5400
 *
5401
 * parse an XML Catalog Processing Instruction.
5402
 *
5403
 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5404
 *
5405
 * Occurs only if allowed by the user and if happening in the Misc
5406
 * part of the document before any doctype information
5407
 * This will add the given catalog to the parsing context in order
5408
 * to be used if there is a resolution need fur