Coverage Report

Created: 2024-02-25 06:14

/src/libxml2/parser.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
 *            implemented on top of the SAX interfaces
4
 *
5
 * References:
6
 *   The XML specification:
7
 *     http://www.w3.org/TR/REC-xml
8
 *   Original 1.0 version:
9
 *     http://www.w3.org/TR/1998/REC-xml-19980210
10
 *   XML second edition working draft
11
 *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12
 *
13
 * Okay this is a big file, the parser core is around 7000 lines, then it
14
 * is followed by the progressive parser top routines, then the various
15
 * high level APIs to call the parser and a few miscellaneous functions.
16
 * A number of helper functions and deprecated ones have been moved to
17
 * parserInternals.c to reduce this file size.
18
 * As much as possible the functions are associated with their relative
19
 * production in the XML specification. A few productions defining the
20
 * different ranges of character are actually implanted either in
21
 * parserInternals.h or parserInternals.c
22
 * The DOM tree build is realized from the default SAX callbacks in
23
 * the module SAX.c.
24
 * The routines doing the validation checks are in valid.c and called either
25
 * from the SAX callbacks or as standalone functions using a preparsed
26
 * document.
27
 *
28
 * See Copyright for the status of this software.
29
 *
30
 * daniel@veillard.com
31
 */
32
33
/* To avoid EBCDIC trouble when parsing on zOS */
34
#if defined(__MVS__)
35
#pragma convert("ISO8859-1")
36
#endif
37
38
#define IN_LIBXML
39
#include "libxml.h"
40
41
#if defined(_WIN32)
42
#define XML_DIR_SEP '\\'
43
#else
44
#define XML_DIR_SEP '/'
45
#endif
46
47
#include <stdlib.h>
48
#include <limits.h>
49
#include <string.h>
50
#include <stdarg.h>
51
#include <stddef.h>
52
#include <ctype.h>
53
#include <stdlib.h>
54
#include <libxml/parser.h>
55
#include <libxml/xmlmemory.h>
56
#include <libxml/tree.h>
57
#include <libxml/parserInternals.h>
58
#include <libxml/valid.h>
59
#include <libxml/entities.h>
60
#include <libxml/xmlerror.h>
61
#include <libxml/encoding.h>
62
#include <libxml/xmlIO.h>
63
#include <libxml/uri.h>
64
#include <libxml/SAX2.h>
65
#ifdef LIBXML_CATALOG_ENABLED
66
#include <libxml/catalog.h>
67
#endif
68
69
#include "private/buf.h"
70
#include "private/dict.h"
71
#include "private/entities.h"
72
#include "private/error.h"
73
#include "private/html.h"
74
#include "private/io.h"
75
#include "private/parser.h"
76
77
6.96M
#define NS_INDEX_EMPTY  INT_MAX
78
3.58M
#define NS_INDEX_XML    (INT_MAX - 1)
79
2.02M
#define URI_HASH_EMPTY  0xD943A04E
80
124k
#define URI_HASH_XML    0xF0451F02
81
82
struct _xmlStartTag {
83
    const xmlChar *prefix;
84
    const xmlChar *URI;
85
    int line;
86
    int nsNr;
87
};
88
89
typedef struct {
90
    void *saxData;
91
    unsigned prefixHashValue;
92
    unsigned uriHashValue;
93
    unsigned elementId;
94
    int oldIndex;
95
} xmlParserNsExtra;
96
97
typedef struct {
98
    unsigned hashValue;
99
    int index;
100
} xmlParserNsBucket;
101
102
struct _xmlParserNsData {
103
    xmlParserNsExtra *extra;
104
105
    unsigned hashSize;
106
    unsigned hashElems;
107
    xmlParserNsBucket *hash;
108
109
    unsigned elementId;
110
    int defaultNsIndex;
111
    int minNsIndex;
112
};
113
114
struct _xmlAttrHashBucket {
115
    int index;
116
};
117
118
static int
119
xmlParseElementStart(xmlParserCtxtPtr ctxt);
120
121
static void
122
xmlParseElementEnd(xmlParserCtxtPtr ctxt);
123
124
static xmlEntityPtr
125
xmlLookupGeneralEntity(xmlParserCtxtPtr ctxt, const xmlChar *name, int inAttr);
126
127
static const xmlChar *
128
xmlParseEntityRefInternal(xmlParserCtxtPtr ctxt);
129
130
/************************************************************************
131
 *                  *
132
 *  Arbitrary limits set in the parser. See XML_PARSE_HUGE    *
133
 *                  *
134
 ************************************************************************/
135
136
#define XML_PARSER_BIG_ENTITY 1000
137
#define XML_PARSER_LOT_ENTITY 5000
138
139
/*
140
 * Constants for protection against abusive entity expansion
141
 * ("billion laughs").
142
 */
143
144
/*
145
 * A certain amount of entity expansion which is always allowed.
146
 */
147
4.06M
#define XML_PARSER_ALLOWED_EXPANSION 1000000
148
149
/*
150
 * Fixed cost for each entity reference. This crudely models processing time
151
 * as well to protect, for example, against exponential expansion of empty
152
 * or very short entities.
153
 */
154
4.08M
#define XML_ENT_FIXED_COST 20
155
156
/**
157
 * xmlParserMaxDepth:
158
 *
159
 * arbitrary depth limit for the XML documents that we allow to
160
 * process. This is not a limitation of the parser but a safety
161
 * boundary feature. It can be disabled with the XML_PARSE_HUGE
162
 * parser option.
163
 */
164
const unsigned int xmlParserMaxDepth = 256;
165
166
167
168
134M
#define XML_PARSER_BIG_BUFFER_SIZE 300
169
5.66M
#define XML_PARSER_BUFFER_SIZE 100
170
301k
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
171
172
/**
173
 * XML_PARSER_CHUNK_SIZE
174
 *
175
 * When calling GROW that's the minimal amount of data
176
 * the parser expected to have received. It is not a hard
177
 * limit but an optimization when reading strings like Names
178
 * It is not strictly needed as long as inputs available characters
179
 * are followed by 0, which should be provided by the I/O level
180
 */
181
#define XML_PARSER_CHUNK_SIZE 100
182
183
/**
184
 * xmlParserVersion:
185
 *
186
 * Constant string describing the internal version of the library
187
 */
188
const char *const
189
xmlParserVersion = LIBXML_VERSION_STRING LIBXML_VERSION_EXTRA;
190
191
/*
192
 * List of XML prefixed PI allowed by W3C specs
193
 */
194
195
static const char* const xmlW3CPIs[] = {
196
    "xml-stylesheet",
197
    "xml-model",
198
    NULL
199
};
200
201
202
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
203
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
204
                                              const xmlChar **str);
205
206
static void
207
xmlCtxtParseEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr ent);
208
209
static int
210
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
211
212
/************************************************************************
213
 *                  *
214
 *    Some factorized error routines        *
215
 *                  *
216
 ************************************************************************/
217
218
static void
219
6.07k
xmlErrMemory(xmlParserCtxtPtr ctxt) {
220
6.07k
    xmlCtxtErrMemory(ctxt);
221
6.07k
}
222
223
/**
224
 * xmlErrAttributeDup:
225
 * @ctxt:  an XML parser context
226
 * @prefix:  the attribute prefix
227
 * @localname:  the attribute localname
228
 *
229
 * Handle a redefinition of attribute error
230
 */
231
static void
232
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
233
                   const xmlChar * localname)
234
66.4k
{
235
66.4k
    if (prefix == NULL)
236
41.2k
        xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, XML_ERR_ATTRIBUTE_REDEFINED,
237
41.2k
                   XML_ERR_FATAL, localname, NULL, NULL, 0,
238
41.2k
                   "Attribute %s redefined\n", localname);
239
25.2k
    else
240
25.2k
        xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, XML_ERR_ATTRIBUTE_REDEFINED,
241
25.2k
                   XML_ERR_FATAL, prefix, localname, NULL, 0,
242
25.2k
                   "Attribute %s:%s redefined\n", prefix, localname);
243
66.4k
}
244
245
/**
246
 * xmlFatalErrMsg:
247
 * @ctxt:  an XML parser context
248
 * @error:  the error number
249
 * @msg:  the error message
250
 *
251
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
252
 */
253
static void LIBXML_ATTR_FORMAT(3,0)
254
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
255
               const char *msg)
256
78.7M
{
257
78.7M
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
258
78.7M
               NULL, NULL, NULL, 0, "%s", msg);
259
78.7M
}
260
261
/**
262
 * xmlWarningMsg:
263
 * @ctxt:  an XML parser context
264
 * @error:  the error number
265
 * @msg:  the error message
266
 * @str1:  extra data
267
 * @str2:  extra data
268
 *
269
 * Handle a warning.
270
 */
271
void LIBXML_ATTR_FORMAT(3,0)
272
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
273
              const char *msg, const xmlChar *str1, const xmlChar *str2)
274
311k
{
275
311k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_WARNING,
276
311k
               str1, str2, NULL, 0, msg, str1, str2);
277
311k
}
278
279
/**
280
 * xmlValidityError:
281
 * @ctxt:  an XML parser context
282
 * @error:  the error number
283
 * @msg:  the error message
284
 * @str1:  extra data
285
 *
286
 * Handle a validity error.
287
 */
288
static void LIBXML_ATTR_FORMAT(3,0)
289
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
290
              const char *msg, const xmlChar *str1, const xmlChar *str2)
291
176k
{
292
176k
    ctxt->valid = 0;
293
294
176k
    xmlCtxtErr(ctxt, NULL, XML_FROM_DTD, error, XML_ERR_ERROR,
295
176k
               str1, str2, NULL, 0, msg, str1, str2);
296
176k
}
297
298
/**
299
 * xmlFatalErrMsgInt:
300
 * @ctxt:  an XML parser context
301
 * @error:  the error number
302
 * @msg:  the error message
303
 * @val:  an integer value
304
 *
305
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
306
 */
307
static void LIBXML_ATTR_FORMAT(3,0)
308
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
309
                  const char *msg, int val)
310
7.21M
{
311
7.21M
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
312
7.21M
               NULL, NULL, NULL, val, msg, val);
313
7.21M
}
314
315
/**
316
 * xmlFatalErrMsgStrIntStr:
317
 * @ctxt:  an XML parser context
318
 * @error:  the error number
319
 * @msg:  the error message
320
 * @str1:  an string info
321
 * @val:  an integer value
322
 * @str2:  an string info
323
 *
324
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
325
 */
326
static void LIBXML_ATTR_FORMAT(3,0)
327
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
328
                  const char *msg, const xmlChar *str1, int val,
329
      const xmlChar *str2)
330
1.09M
{
331
1.09M
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
332
1.09M
               str1, str2, NULL, val, msg, str1, val, str2);
333
1.09M
}
334
335
/**
336
 * xmlFatalErrMsgStr:
337
 * @ctxt:  an XML parser context
338
 * @error:  the error number
339
 * @msg:  the error message
340
 * @val:  a string value
341
 *
342
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
343
 */
344
static void LIBXML_ATTR_FORMAT(3,0)
345
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
346
                  const char *msg, const xmlChar * val)
347
2.37M
{
348
2.37M
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
349
2.37M
               val, NULL, NULL, 0, msg, val);
350
2.37M
}
351
352
/**
353
 * xmlErrMsgStr:
354
 * @ctxt:  an XML parser context
355
 * @error:  the error number
356
 * @msg:  the error message
357
 * @val:  a string value
358
 *
359
 * Handle a non fatal parser error
360
 */
361
static void LIBXML_ATTR_FORMAT(3,0)
362
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
363
                  const char *msg, const xmlChar * val)
364
41.8k
{
365
41.8k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_ERROR,
366
41.8k
               val, NULL, NULL, 0, msg, val);
367
41.8k
}
368
369
/**
370
 * xmlNsErr:
371
 * @ctxt:  an XML parser context
372
 * @error:  the error number
373
 * @msg:  the message
374
 * @info1:  extra information string
375
 * @info2:  extra information string
376
 *
377
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
378
 */
379
static void LIBXML_ATTR_FORMAT(3,0)
380
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
381
         const char *msg,
382
         const xmlChar * info1, const xmlChar * info2,
383
         const xmlChar * info3)
384
855k
{
385
855k
    ctxt->nsWellFormed = 0;
386
387
855k
    xmlCtxtErr(ctxt, NULL, XML_FROM_NAMESPACE, error, XML_ERR_ERROR,
388
855k
               info1, info2, info3, 0, msg, info1, info2, info3);
389
855k
}
390
391
/**
392
 * xmlNsWarn
393
 * @ctxt:  an XML parser context
394
 * @error:  the error number
395
 * @msg:  the message
396
 * @info1:  extra information string
397
 * @info2:  extra information string
398
 *
399
 * Handle a namespace warning error
400
 */
401
static void LIBXML_ATTR_FORMAT(3,0)
402
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
403
         const char *msg,
404
         const xmlChar * info1, const xmlChar * info2,
405
         const xmlChar * info3)
406
106k
{
407
106k
    xmlCtxtErr(ctxt, NULL, XML_FROM_NAMESPACE, error, XML_ERR_WARNING,
408
106k
               info1, info2, info3, 0, msg, info1, info2, info3);
409
106k
}
410
411
static void
412
12.3M
xmlSaturatedAdd(unsigned long *dst, unsigned long val) {
413
12.3M
    if (val > ULONG_MAX - *dst)
414
0
        *dst = ULONG_MAX;
415
12.3M
    else
416
12.3M
        *dst += val;
417
12.3M
}
418
419
static void
420
4.11M
xmlSaturatedAddSizeT(unsigned long *dst, unsigned long val) {
421
4.11M
    if (val > ULONG_MAX - *dst)
422
0
        *dst = ULONG_MAX;
423
4.11M
    else
424
4.11M
        *dst += val;
425
4.11M
}
426
427
/**
428
 * xmlParserEntityCheck:
429
 * @ctxt:  parser context
430
 * @extra:  sum of unexpanded entity sizes
431
 *
432
 * Check for non-linear entity expansion behaviour.
433
 *
434
 * In some cases like xmlExpandEntityInAttValue, this function is called
435
 * for each, possibly nested entity and its unexpanded content length.
436
 *
437
 * In other cases like xmlParseReference, it's only called for each
438
 * top-level entity with its unexpanded content length plus the sum of
439
 * the unexpanded content lengths (plus fixed cost) of all nested
440
 * entities.
441
 *
442
 * Summing the unexpanded lengths also adds the length of the reference.
443
 * This is by design. Taking the length of the entity name into account
444
 * discourages attacks that try to waste CPU time with abusively long
445
 * entity names. See test/recurse/lol6.xml for example. Each call also
446
 * adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with
447
 * short entities.
448
 *
449
 * Returns 1 on error, 0 on success.
450
 */
451
static int
452
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long extra)
453
4.45M
{
454
4.45M
    unsigned long consumed;
455
4.45M
    unsigned long *expandedSize;
456
4.45M
    xmlParserInputPtr input = ctxt->input;
457
4.45M
    xmlEntityPtr entity = input->entity;
458
459
4.45M
    if ((entity) && (entity->flags & XML_ENT_CHECKED))
460
388k
        return(0);
461
462
    /*
463
     * Compute total consumed bytes so far, including input streams of
464
     * external entities.
465
     */
466
4.06M
    consumed = input->consumed;
467
4.06M
    xmlSaturatedAddSizeT(&consumed, input->cur - input->base);
468
4.06M
    xmlSaturatedAdd(&consumed, ctxt->sizeentities);
469
470
4.06M
    if (entity)
471
76.6k
        expandedSize = &entity->expandedSize;
472
3.99M
    else
473
3.99M
        expandedSize = &ctxt->sizeentcopy;
474
475
    /*
476
     * Add extra cost and some fixed cost.
477
     */
478
4.06M
    xmlSaturatedAdd(expandedSize, extra);
479
4.06M
    xmlSaturatedAdd(expandedSize, XML_ENT_FIXED_COST);
480
481
    /*
482
     * It's important to always use saturation arithmetic when tracking
483
     * entity sizes to make the size checks reliable. If "sizeentcopy"
484
     * overflows, we have to abort.
485
     */
486
4.06M
    if ((*expandedSize > XML_PARSER_ALLOWED_EXPANSION) &&
487
4.06M
        ((*expandedSize >= ULONG_MAX) ||
488
90.7k
         (*expandedSize / ctxt->maxAmpl > consumed))) {
489
3.32k
        xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
490
3.32k
                       "Maximum entity amplification factor exceeded, see "
491
3.32k
                       "xmlCtxtSetMaxAmplification.\n");
492
3.32k
        xmlHaltParser(ctxt);
493
3.32k
        return(1);
494
3.32k
    }
495
496
4.06M
    return(0);
497
4.06M
}
498
499
/************************************************************************
500
 *                  *
501
 *    Library wide options          *
502
 *                  *
503
 ************************************************************************/
504
505
/**
506
  * xmlHasFeature:
507
  * @feature: the feature to be examined
508
  *
509
  * Examines if the library has been compiled with a given feature.
510
  *
511
  * Returns a non-zero value if the feature exist, otherwise zero.
512
  * Returns zero (0) if the feature does not exist or an unknown
513
  * unknown feature is requested, non-zero otherwise.
514
  */
515
int
516
xmlHasFeature(xmlFeature feature)
517
0
{
518
0
    switch (feature) {
519
0
  case XML_WITH_THREAD:
520
0
#ifdef LIBXML_THREAD_ENABLED
521
0
      return(1);
522
#else
523
      return(0);
524
#endif
525
0
        case XML_WITH_TREE:
526
0
#ifdef LIBXML_TREE_ENABLED
527
0
            return(1);
528
#else
529
            return(0);
530
#endif
531
0
        case XML_WITH_OUTPUT:
532
0
#ifdef LIBXML_OUTPUT_ENABLED
533
0
            return(1);
534
#else
535
            return(0);
536
#endif
537
0
        case XML_WITH_PUSH:
538
0
#ifdef LIBXML_PUSH_ENABLED
539
0
            return(1);
540
#else
541
            return(0);
542
#endif
543
0
        case XML_WITH_READER:
544
0
#ifdef LIBXML_READER_ENABLED
545
0
            return(1);
546
#else
547
            return(0);
548
#endif
549
0
        case XML_WITH_PATTERN:
550
0
#ifdef LIBXML_PATTERN_ENABLED
551
0
            return(1);
552
#else
553
            return(0);
554
#endif
555
0
        case XML_WITH_WRITER:
556
0
#ifdef LIBXML_WRITER_ENABLED
557
0
            return(1);
558
#else
559
            return(0);
560
#endif
561
0
        case XML_WITH_SAX1:
562
0
#ifdef LIBXML_SAX1_ENABLED
563
0
            return(1);
564
#else
565
            return(0);
566
#endif
567
0
        case XML_WITH_FTP:
568
#ifdef LIBXML_FTP_ENABLED
569
            return(1);
570
#else
571
0
            return(0);
572
0
#endif
573
0
        case XML_WITH_HTTP:
574
#ifdef LIBXML_HTTP_ENABLED
575
            return(1);
576
#else
577
0
            return(0);
578
0
#endif
579
0
        case XML_WITH_VALID:
580
0
#ifdef LIBXML_VALID_ENABLED
581
0
            return(1);
582
#else
583
            return(0);
584
#endif
585
0
        case XML_WITH_HTML:
586
0
#ifdef LIBXML_HTML_ENABLED
587
0
            return(1);
588
#else
589
            return(0);
590
#endif
591
0
        case XML_WITH_LEGACY:
592
#ifdef LIBXML_LEGACY_ENABLED
593
            return(1);
594
#else
595
0
            return(0);
596
0
#endif
597
0
        case XML_WITH_C14N:
598
0
#ifdef LIBXML_C14N_ENABLED
599
0
            return(1);
600
#else
601
            return(0);
602
#endif
603
0
        case XML_WITH_CATALOG:
604
0
#ifdef LIBXML_CATALOG_ENABLED
605
0
            return(1);
606
#else
607
            return(0);
608
#endif
609
0
        case XML_WITH_XPATH:
610
0
#ifdef LIBXML_XPATH_ENABLED
611
0
            return(1);
612
#else
613
            return(0);
614
#endif
615
0
        case XML_WITH_XPTR:
616
0
#ifdef LIBXML_XPTR_ENABLED
617
0
            return(1);
618
#else
619
            return(0);
620
#endif
621
0
        case XML_WITH_XINCLUDE:
622
0
#ifdef LIBXML_XINCLUDE_ENABLED
623
0
            return(1);
624
#else
625
            return(0);
626
#endif
627
0
        case XML_WITH_ICONV:
628
0
#ifdef LIBXML_ICONV_ENABLED
629
0
            return(1);
630
#else
631
            return(0);
632
#endif
633
0
        case XML_WITH_ISO8859X:
634
0
#ifdef LIBXML_ISO8859X_ENABLED
635
0
            return(1);
636
#else
637
            return(0);
638
#endif
639
0
        case XML_WITH_UNICODE:
640
0
#ifdef LIBXML_UNICODE_ENABLED
641
0
            return(1);
642
#else
643
            return(0);
644
#endif
645
0
        case XML_WITH_REGEXP:
646
0
#ifdef LIBXML_REGEXP_ENABLED
647
0
            return(1);
648
#else
649
            return(0);
650
#endif
651
0
        case XML_WITH_AUTOMATA:
652
0
#ifdef LIBXML_AUTOMATA_ENABLED
653
0
            return(1);
654
#else
655
            return(0);
656
#endif
657
0
        case XML_WITH_EXPR:
658
#ifdef LIBXML_EXPR_ENABLED
659
            return(1);
660
#else
661
0
            return(0);
662
0
#endif
663
0
        case XML_WITH_SCHEMAS:
664
0
#ifdef LIBXML_SCHEMAS_ENABLED
665
0
            return(1);
666
#else
667
            return(0);
668
#endif
669
0
        case XML_WITH_SCHEMATRON:
670
0
#ifdef LIBXML_SCHEMATRON_ENABLED
671
0
            return(1);
672
#else
673
            return(0);
674
#endif
675
0
        case XML_WITH_MODULES:
676
0
#ifdef LIBXML_MODULES_ENABLED
677
0
            return(1);
678
#else
679
            return(0);
680
#endif
681
0
        case XML_WITH_DEBUG:
682
#ifdef LIBXML_DEBUG_ENABLED
683
            return(1);
684
#else
685
0
            return(0);
686
0
#endif
687
0
        case XML_WITH_DEBUG_MEM:
688
#ifdef DEBUG_MEMORY_LOCATION
689
            return(1);
690
#else
691
0
            return(0);
692
0
#endif
693
0
        case XML_WITH_ZLIB:
694
0
#ifdef LIBXML_ZLIB_ENABLED
695
0
            return(1);
696
#else
697
            return(0);
698
#endif
699
0
        case XML_WITH_LZMA:
700
0
#ifdef LIBXML_LZMA_ENABLED
701
0
            return(1);
702
#else
703
            return(0);
704
#endif
705
0
        case XML_WITH_ICU:
706
#ifdef LIBXML_ICU_ENABLED
707
            return(1);
708
#else
709
0
            return(0);
710
0
#endif
711
0
        default:
712
0
      break;
713
0
     }
714
0
     return(0);
715
0
}
716
717
/************************************************************************
718
 *                  *
719
 *      Simple string buffer        *
720
 *                  *
721
 ************************************************************************/
722
723
typedef struct {
724
    xmlChar *mem;
725
    unsigned size;
726
    unsigned cap; /* size < cap */
727
    unsigned max; /* size <= max */
728
    xmlParserErrors code;
729
} xmlSBuf;
730
731
static void
732
2.97M
xmlSBufInit(xmlSBuf *buf, unsigned max) {
733
2.97M
    buf->mem = NULL;
734
2.97M
    buf->size = 0;
735
2.97M
    buf->cap = 0;
736
2.97M
    buf->max = max;
737
2.97M
    buf->code = XML_ERR_OK;
738
2.97M
}
739
740
static int
741
1.15M
xmlSBufGrow(xmlSBuf *buf, unsigned len) {
742
1.15M
    xmlChar *mem;
743
1.15M
    unsigned cap;
744
745
1.15M
    if (len >= UINT_MAX / 2 - buf->size) {
746
0
        buf->code = XML_ERR_RESOURCE_LIMIT;
747
0
        return(-1);
748
0
    }
749
750
1.15M
    cap = (buf->size + len) * 2;
751
1.15M
    if (cap < 240)
752
999k
        cap = 240;
753
754
1.15M
    mem = xmlRealloc(buf->mem, cap);
755
1.15M
    if (mem == NULL) {
756
875
        buf->code = XML_ERR_NO_MEMORY;
757
875
        return(-1);
758
875
    }
759
760
1.15M
    buf->mem = mem;
761
1.15M
    buf->cap = cap;
762
763
1.15M
    return(0);
764
1.15M
}
765
766
static void
767
242M
xmlSBufAddString(xmlSBuf *buf, const xmlChar *str, unsigned len) {
768
242M
    if (buf->max - buf->size < len) {
769
0
        buf->code = XML_ERR_RESOURCE_LIMIT;
770
0
        return;
771
0
    }
772
773
242M
    if (buf->cap - buf->size <= len) {
774
1.10M
        if (xmlSBufGrow(buf, len) < 0)
775
826
            return;
776
1.10M
    }
777
778
242M
    if (len > 0)
779
242M
        memcpy(buf->mem + buf->size, str, len);
780
242M
    buf->size += len;
781
242M
}
782
783
static void
784
235M
xmlSBufAddCString(xmlSBuf *buf, const char *str, unsigned len) {
785
235M
    xmlSBufAddString(buf, (const xmlChar *) str, len);
786
235M
}
787
788
static void
789
389k
xmlSBufAddChar(xmlSBuf *buf, int c) {
790
389k
    xmlChar *end;
791
792
389k
    if (buf->max - buf->size < 4) {
793
0
        buf->code = XML_ERR_RESOURCE_LIMIT;
794
0
        return;
795
0
    }
796
797
389k
    if (buf->cap - buf->size <= 4) {
798
57.7k
        if (xmlSBufGrow(buf, 4) < 0)
799
49
            return;
800
57.7k
    }
801
802
389k
    end = buf->mem + buf->size;
803
804
389k
    if (c < 0x80) {
805
339k
        *end = (xmlChar) c;
806
339k
        buf->size += 1;
807
339k
    } else {
808
49.5k
        buf->size += xmlCopyCharMultiByte(end, c);
809
49.5k
    }
810
389k
}
811
812
static void
813
198M
xmlSBufAddReplChar(xmlSBuf *buf) {
814
198M
    xmlSBufAddCString(buf, "\xEF\xBF\xBD", 3);
815
198M
}
816
817
static void
818
937
xmlSBufReportError(xmlSBuf *buf, xmlParserCtxtPtr ctxt, const char *errMsg) {
819
937
    if (buf->code == XML_ERR_NO_MEMORY)
820
937
        xmlCtxtErrMemory(ctxt);
821
0
    else
822
0
        xmlFatalErr(ctxt, buf->code, errMsg);
823
937
}
824
825
static xmlChar *
826
xmlSBufFinish(xmlSBuf *buf, int *sizeOut, xmlParserCtxtPtr ctxt,
827
1.07M
              const char *errMsg) {
828
1.07M
    if (buf->mem == NULL) {
829
76.3k
        buf->mem = xmlMalloc(1);
830
76.3k
        if (buf->mem == NULL) {
831
62
            buf->code = XML_ERR_NO_MEMORY;
832
76.3k
        } else {
833
76.3k
            buf->mem[0] = 0;
834
76.3k
        }
835
995k
    } else {
836
995k
        buf->mem[buf->size] = 0;
837
995k
    }
838
839
1.07M
    if (buf->code == XML_ERR_OK) {
840
1.07M
        if (sizeOut != NULL)
841
360k
            *sizeOut = buf->size;
842
1.07M
        return(buf->mem);
843
1.07M
    }
844
845
685
    xmlSBufReportError(buf, ctxt, errMsg);
846
847
685
    xmlFree(buf->mem);
848
849
685
    if (sizeOut != NULL)
850
160
        *sizeOut = 0;
851
685
    return(NULL);
852
1.07M
}
853
854
static void
855
1.83M
xmlSBufCleanup(xmlSBuf *buf, xmlParserCtxtPtr ctxt, const char *errMsg) {
856
1.83M
    if (buf->code != XML_ERR_OK)
857
252
        xmlSBufReportError(buf, ctxt, errMsg);
858
859
1.83M
    xmlFree(buf->mem);
860
1.83M
}
861
862
static int
863
xmlUTF8MultibyteLen(xmlParserCtxtPtr ctxt, const xmlChar *str,
864
320M
                    const char *errMsg) {
865
320M
    int c = str[0];
866
320M
    int c1 = str[1];
867
868
320M
    if ((c1 & 0xC0) != 0x80)
869
69.5M
        goto encoding_error;
870
871
251M
    if (c < 0xE0) {
872
        /* 2-byte sequence */
873
94.5M
        if (c < 0xC2)
874
53.0M
            goto encoding_error;
875
876
41.4M
        return(2);
877
156M
    } else {
878
156M
        int c2 = str[2];
879
880
156M
        if ((c2 & 0xC0) != 0x80)
881
29.1k
            goto encoding_error;
882
883
156M
        if (c < 0xF0) {
884
            /* 3-byte sequence */
885
156M
            if (c == 0xE0) {
886
                /* overlong */
887
26.6M
                if (c1 < 0xA0)
888
2.16k
                    goto encoding_error;
889
129M
            } else if (c == 0xED) {
890
                /* surrogate */
891
1.15M
                if (c1 >= 0xA0)
892
1.45k
                    goto encoding_error;
893
128M
            } else if (c == 0xEF) {
894
                /* U+FFFE and U+FFFF are invalid Chars */
895
19.8M
                if ((c1 == 0xBF) && (c2 >= 0xBE))
896
3.08k
                    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR, errMsg);
897
19.8M
            }
898
899
156M
            return(3);
900
156M
        } else {
901
            /* 4-byte sequence */
902
283k
            if ((str[3] & 0xC0) != 0x80)
903
8.41k
                goto encoding_error;
904
275k
            if (c == 0xF0) {
905
                /* overlong */
906
27.4k
                if (c1 < 0x90)
907
16.5k
                    goto encoding_error;
908
247k
            } else if (c >= 0xF4) {
909
                /* greater than 0x10FFFF */
910
41.2k
                if ((c > 0xF4) || (c1 >= 0x90))
911
39.6k
                    goto encoding_error;
912
41.2k
            }
913
914
218k
            return(4);
915
275k
        }
916
156M
    }
917
918
122M
encoding_error:
919
    /* Only report the first error */
920
122M
    if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
921
46.1k
        xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);
922
46.1k
        ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
923
46.1k
    }
924
925
122M
    return(0);
926
251M
}
927
928
/************************************************************************
929
 *                  *
930
 *    SAX2 defaulted attributes handling      *
931
 *                  *
932
 ************************************************************************/
933
934
/**
935
 * xmlCtxtInitializeLate:
936
 * @ctxt:  an XML parser context
937
 *
938
 * Final initialization of the parser context before starting to parse.
939
 *
940
 * This accounts for users modifying struct members of parser context
941
 * directly.
942
 */
943
static void
944
225k
xmlCtxtInitializeLate(xmlParserCtxtPtr ctxt) {
945
225k
    xmlSAXHandlerPtr sax;
946
947
    /* Avoid unused variable warning if features are disabled. */
948
225k
    (void) sax;
949
950
    /*
951
     * Changing the SAX struct directly is still widespread practice
952
     * in internal and external code.
953
     */
954
225k
    if (ctxt == NULL) return;
955
225k
    sax = ctxt->sax;
956
225k
#ifdef LIBXML_SAX1_ENABLED
957
    /*
958
     * Only enable SAX2 if there SAX2 element handlers, except when there
959
     * are no element handlers at all.
960
     */
961
225k
    if (((ctxt->options & XML_PARSE_SAX1) == 0) &&
962
225k
        (sax) &&
963
225k
        (sax->initialized == XML_SAX2_MAGIC) &&
964
225k
        ((sax->startElementNs != NULL) ||
965
191k
         (sax->endElementNs != NULL) ||
966
191k
         ((sax->startElement == NULL) && (sax->endElement == NULL))))
967
191k
        ctxt->sax2 = 1;
968
#else
969
    ctxt->sax2 = 1;
970
#endif /* LIBXML_SAX1_ENABLED */
971
972
    /*
973
     * Some users replace the dictionary directly in the context struct.
974
     * We really need an API function to do that cleanly.
975
     */
976
225k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
977
225k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
978
225k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
979
225k
    if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
980
225k
    (ctxt->str_xml_ns == NULL)) {
981
979
        xmlErrMemory(ctxt);
982
979
    }
983
225k
}
984
985
typedef struct {
986
    xmlHashedString prefix;
987
    xmlHashedString name;
988
    xmlHashedString value;
989
    const xmlChar *valueEnd;
990
    int external;
991
    int expandedSize;
992
} xmlDefAttr;
993
994
typedef struct _xmlDefAttrs xmlDefAttrs;
995
typedef xmlDefAttrs *xmlDefAttrsPtr;
996
struct _xmlDefAttrs {
997
    int nbAttrs;  /* number of defaulted attributes on that element */
998
    int maxAttrs;       /* the size of the array */
999
#if __STDC_VERSION__ >= 199901L
1000
    /* Using a C99 flexible array member avoids UBSan errors. */
1001
    xmlDefAttr attrs[]; /* array of localname/prefix/values/external */
1002
#else
1003
    xmlDefAttr attrs[1];
1004
#endif
1005
};
1006
1007
/**
1008
 * xmlAttrNormalizeSpace:
1009
 * @src: the source string
1010
 * @dst: the target string
1011
 *
1012
 * Normalize the space in non CDATA attribute values:
1013
 * If the attribute type is not CDATA, then the XML processor MUST further
1014
 * process the normalized attribute value by discarding any leading and
1015
 * trailing space (#x20) characters, and by replacing sequences of space
1016
 * (#x20) characters by a single space (#x20) character.
1017
 * Note that the size of dst need to be at least src, and if one doesn't need
1018
 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1019
 * passing src as dst is just fine.
1020
 *
1021
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1022
 *         is needed.
1023
 */
1024
static xmlChar *
1025
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1026
160k
{
1027
160k
    if ((src == NULL) || (dst == NULL))
1028
0
        return(NULL);
1029
1030
173k
    while (*src == 0x20) src++;
1031
124M
    while (*src != 0) {
1032
124M
  if (*src == 0x20) {
1033
14.9M
      while (*src == 0x20) src++;
1034
121k
      if (*src != 0)
1035
110k
    *dst++ = 0x20;
1036
124M
  } else {
1037
124M
      *dst++ = *src++;
1038
124M
  }
1039
124M
    }
1040
160k
    *dst = 0;
1041
160k
    if (dst == src)
1042
141k
       return(NULL);
1043
18.7k
    return(dst);
1044
160k
}
1045
1046
/**
1047
 * xmlAddDefAttrs:
1048
 * @ctxt:  an XML parser context
1049
 * @fullname:  the element fullname
1050
 * @fullattr:  the attribute fullname
1051
 * @value:  the attribute value
1052
 *
1053
 * Add a defaulted attribute for an element
1054
 */
1055
static void
1056
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1057
               const xmlChar *fullname,
1058
               const xmlChar *fullattr,
1059
320k
               const xmlChar *value) {
1060
320k
    xmlDefAttrsPtr defaults;
1061
320k
    xmlDefAttr *attr;
1062
320k
    int len, expandedSize;
1063
320k
    xmlHashedString name;
1064
320k
    xmlHashedString prefix;
1065
320k
    xmlHashedString hvalue;
1066
320k
    const xmlChar *localname;
1067
1068
    /*
1069
     * Allows to detect attribute redefinitions
1070
     */
1071
320k
    if (ctxt->attsSpecial != NULL) {
1072
300k
        if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1073
216k
      return;
1074
300k
    }
1075
1076
103k
    if (ctxt->attsDefault == NULL) {
1077
20.3k
        ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1078
20.3k
  if (ctxt->attsDefault == NULL)
1079
65
      goto mem_error;
1080
20.3k
    }
1081
1082
    /*
1083
     * split the element name into prefix:localname , the string found
1084
     * are within the DTD and then not associated to namespace names.
1085
     */
1086
103k
    localname = xmlSplitQName3(fullname, &len);
1087
103k
    if (localname == NULL) {
1088
67.5k
        name = xmlDictLookupHashed(ctxt->dict, fullname, -1);
1089
67.5k
  prefix.name = NULL;
1090
67.5k
    } else {
1091
36.2k
        name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1092
36.2k
  prefix = xmlDictLookupHashed(ctxt->dict, fullname, len);
1093
36.2k
        if (prefix.name == NULL)
1094
7
            goto mem_error;
1095
36.2k
    }
1096
103k
    if (name.name == NULL)
1097
6
        goto mem_error;
1098
1099
    /*
1100
     * make sure there is some storage
1101
     */
1102
103k
    defaults = xmlHashLookup2(ctxt->attsDefault, name.name, prefix.name);
1103
103k
    if ((defaults == NULL) ||
1104
103k
        (defaults->nbAttrs >= defaults->maxAttrs)) {
1105
32.1k
        xmlDefAttrsPtr temp;
1106
32.1k
        int newSize;
1107
1108
32.1k
        newSize = (defaults != NULL) ? 2 * defaults->maxAttrs : 4;
1109
32.1k
        temp = xmlRealloc(defaults,
1110
32.1k
                          sizeof(*defaults) + newSize * sizeof(xmlDefAttr));
1111
32.1k
  if (temp == NULL)
1112
47
      goto mem_error;
1113
32.1k
        if (defaults == NULL)
1114
23.9k
            temp->nbAttrs = 0;
1115
32.1k
  temp->maxAttrs = newSize;
1116
32.1k
        defaults = temp;
1117
32.1k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name.name, prefix.name,
1118
32.1k
                          defaults, NULL) < 0) {
1119
0
      xmlFree(defaults);
1120
0
      goto mem_error;
1121
0
  }
1122
32.1k
    }
1123
1124
    /*
1125
     * Split the attribute name into prefix:localname , the string found
1126
     * are within the DTD and hen not associated to namespace names.
1127
     */
1128
103k
    localname = xmlSplitQName3(fullattr, &len);
1129
103k
    if (localname == NULL) {
1130
85.4k
        name = xmlDictLookupHashed(ctxt->dict, fullattr, -1);
1131
85.4k
  prefix.name = NULL;
1132
85.4k
    } else {
1133
18.3k
        name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1134
18.3k
  prefix = xmlDictLookupHashed(ctxt->dict, fullattr, len);
1135
18.3k
        if (prefix.name == NULL)
1136
6
            goto mem_error;
1137
18.3k
    }
1138
103k
    if (name.name == NULL)
1139
5
        goto mem_error;
1140
1141
    /* intern the string and precompute the end */
1142
103k
    len = strlen((const char *) value);
1143
103k
    hvalue = xmlDictLookupHashed(ctxt->dict, value, len);
1144
103k
    if (hvalue.name == NULL)
1145
14
        goto mem_error;
1146
1147
103k
    expandedSize = strlen((const char *) name.name);
1148
103k
    if (prefix.name != NULL)
1149
18.3k
        expandedSize += strlen((const char *) prefix.name);
1150
103k
    expandedSize += len;
1151
1152
103k
    attr = &defaults->attrs[defaults->nbAttrs++];
1153
103k
    attr->name = name;
1154
103k
    attr->prefix = prefix;
1155
103k
    attr->value = hvalue;
1156
103k
    attr->valueEnd = hvalue.name + len;
1157
103k
    attr->external = PARSER_EXTERNAL(ctxt);
1158
103k
    attr->expandedSize = expandedSize;
1159
1160
103k
    return;
1161
1162
150
mem_error:
1163
150
    xmlErrMemory(ctxt);
1164
150
    return;
1165
103k
}
1166
1167
/**
1168
 * xmlAddSpecialAttr:
1169
 * @ctxt:  an XML parser context
1170
 * @fullname:  the element fullname
1171
 * @fullattr:  the attribute fullname
1172
 * @type:  the attribute type
1173
 *
1174
 * Register this attribute type
1175
 */
1176
static void
1177
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1178
      const xmlChar *fullname,
1179
      const xmlChar *fullattr,
1180
      int type)
1181
482k
{
1182
482k
    if (ctxt->attsSpecial == NULL) {
1183
26.9k
        ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1184
26.9k
  if (ctxt->attsSpecial == NULL)
1185
84
      goto mem_error;
1186
26.9k
    }
1187
1188
482k
    if (xmlHashAdd2(ctxt->attsSpecial, fullname, fullattr,
1189
482k
                    (void *) (ptrdiff_t) type) < 0)
1190
5
        goto mem_error;
1191
482k
    return;
1192
1193
482k
mem_error:
1194
89
    xmlErrMemory(ctxt);
1195
89
    return;
1196
482k
}
1197
1198
/**
1199
 * xmlCleanSpecialAttrCallback:
1200
 *
1201
 * Removes CDATA attributes from the special attribute table
1202
 */
1203
static void
1204
xmlCleanSpecialAttrCallback(void *payload, void *data,
1205
                            const xmlChar *fullname, const xmlChar *fullattr,
1206
201k
                            const xmlChar *unused ATTRIBUTE_UNUSED) {
1207
201k
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1208
1209
201k
    if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1210
44.1k
        xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1211
44.1k
    }
1212
201k
}
1213
1214
/**
1215
 * xmlCleanSpecialAttr:
1216
 * @ctxt:  an XML parser context
1217
 *
1218
 * Trim the list of attributes defined to remove all those of type
1219
 * CDATA as they are not special. This call should be done when finishing
1220
 * to parse the DTD and before starting to parse the document root.
1221
 */
1222
static void
1223
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1224
95.8k
{
1225
95.8k
    if (ctxt->attsSpecial == NULL)
1226
69.0k
        return;
1227
1228
26.8k
    xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1229
1230
26.8k
    if (xmlHashSize(ctxt->attsSpecial) == 0) {
1231
2.21k
        xmlHashFree(ctxt->attsSpecial, NULL);
1232
2.21k
        ctxt->attsSpecial = NULL;
1233
2.21k
    }
1234
26.8k
    return;
1235
95.8k
}
1236
1237
/**
1238
 * xmlCheckLanguageID:
1239
 * @lang:  pointer to the string value
1240
 *
1241
 * DEPRECATED: Internal function, do not use.
1242
 *
1243
 * Checks that the value conforms to the LanguageID production:
1244
 *
1245
 * NOTE: this is somewhat deprecated, those productions were removed from
1246
 *       the XML Second edition.
1247
 *
1248
 * [33] LanguageID ::= Langcode ('-' Subcode)*
1249
 * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1250
 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1251
 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1252
 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1253
 * [38] Subcode ::= ([a-z] | [A-Z])+
1254
 *
1255
 * The current REC reference the successors of RFC 1766, currently 5646
1256
 *
1257
 * http://www.rfc-editor.org/rfc/rfc5646.txt
1258
 * langtag       = language
1259
 *                 ["-" script]
1260
 *                 ["-" region]
1261
 *                 *("-" variant)
1262
 *                 *("-" extension)
1263
 *                 ["-" privateuse]
1264
 * language      = 2*3ALPHA            ; shortest ISO 639 code
1265
 *                 ["-" extlang]       ; sometimes followed by
1266
 *                                     ; extended language subtags
1267
 *               / 4ALPHA              ; or reserved for future use
1268
 *               / 5*8ALPHA            ; or registered language subtag
1269
 *
1270
 * extlang       = 3ALPHA              ; selected ISO 639 codes
1271
 *                 *2("-" 3ALPHA)      ; permanently reserved
1272
 *
1273
 * script        = 4ALPHA              ; ISO 15924 code
1274
 *
1275
 * region        = 2ALPHA              ; ISO 3166-1 code
1276
 *               / 3DIGIT              ; UN M.49 code
1277
 *
1278
 * variant       = 5*8alphanum         ; registered variants
1279
 *               / (DIGIT 3alphanum)
1280
 *
1281
 * extension     = singleton 1*("-" (2*8alphanum))
1282
 *
1283
 *                                     ; Single alphanumerics
1284
 *                                     ; "x" reserved for private use
1285
 * singleton     = DIGIT               ; 0 - 9
1286
 *               / %x41-57             ; A - W
1287
 *               / %x59-5A             ; Y - Z
1288
 *               / %x61-77             ; a - w
1289
 *               / %x79-7A             ; y - z
1290
 *
1291
 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1292
 * The parser below doesn't try to cope with extension or privateuse
1293
 * that could be added but that's not interoperable anyway
1294
 *
1295
 * Returns 1 if correct 0 otherwise
1296
 **/
1297
int
1298
xmlCheckLanguageID(const xmlChar * lang)
1299
84.9k
{
1300
84.9k
    const xmlChar *cur = lang, *nxt;
1301
1302
84.9k
    if (cur == NULL)
1303
3.20k
        return (0);
1304
81.7k
    if (((cur[0] == 'i') && (cur[1] == '-')) ||
1305
81.7k
        ((cur[0] == 'I') && (cur[1] == '-')) ||
1306
81.7k
        ((cur[0] == 'x') && (cur[1] == '-')) ||
1307
81.7k
        ((cur[0] == 'X') && (cur[1] == '-'))) {
1308
        /*
1309
         * Still allow IANA code and user code which were coming
1310
         * from the previous version of the XML-1.0 specification
1311
         * it's deprecated but we should not fail
1312
         */
1313
3.29k
        cur += 2;
1314
9.88k
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1315
9.88k
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
1316
6.58k
            cur++;
1317
3.29k
        return(cur[0] == 0);
1318
3.29k
    }
1319
78.4k
    nxt = cur;
1320
259k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1321
259k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1322
181k
           nxt++;
1323
78.4k
    if (nxt - cur >= 4) {
1324
        /*
1325
         * Reserved
1326
         */
1327
4.63k
        if ((nxt - cur > 8) || (nxt[0] != 0))
1328
3.18k
            return(0);
1329
1.44k
        return(1);
1330
4.63k
    }
1331
73.8k
    if (nxt - cur < 2)
1332
8.45k
        return(0);
1333
    /* we got an ISO 639 code */
1334
65.3k
    if (nxt[0] == 0)
1335
2.16k
        return(1);
1336
63.2k
    if (nxt[0] != '-')
1337
3.29k
        return(0);
1338
1339
59.9k
    nxt++;
1340
59.9k
    cur = nxt;
1341
    /* now we can have extlang or script or region or variant */
1342
59.9k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1343
12.1k
        goto region_m49;
1344
1345
209k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1346
209k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1347
161k
           nxt++;
1348
47.7k
    if (nxt - cur == 4)
1349
18.7k
        goto script;
1350
28.9k
    if (nxt - cur == 2)
1351
7.50k
        goto region;
1352
21.4k
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1353
2.22k
        goto variant;
1354
19.2k
    if (nxt - cur != 3)
1355
2.25k
        return(0);
1356
    /* we parsed an extlang */
1357
16.9k
    if (nxt[0] == 0)
1358
613
        return(1);
1359
16.3k
    if (nxt[0] != '-')
1360
1.77k
        return(0);
1361
1362
14.5k
    nxt++;
1363
14.5k
    cur = nxt;
1364
    /* now we can have script or region or variant */
1365
14.5k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1366
1.19k
        goto region_m49;
1367
1368
76.5k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1369
76.5k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1370
63.1k
           nxt++;
1371
13.3k
    if (nxt - cur == 2)
1372
2.22k
        goto region;
1373
11.1k
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1374
1.29k
        goto variant;
1375
9.87k
    if (nxt - cur != 4)
1376
4.72k
        return(0);
1377
    /* we parsed a script */
1378
23.9k
script:
1379
23.9k
    if (nxt[0] == 0)
1380
405
        return(1);
1381
23.5k
    if (nxt[0] != '-')
1382
4.14k
        return(0);
1383
1384
19.3k
    nxt++;
1385
19.3k
    cur = nxt;
1386
    /* now we can have region or variant */
1387
19.3k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1388
7.28k
        goto region_m49;
1389
1390
67.6k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1391
67.6k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1392
55.5k
           nxt++;
1393
1394
12.0k
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1395
4.91k
        goto variant;
1396
7.18k
    if (nxt - cur != 2)
1397
3.59k
        return(0);
1398
    /* we parsed a region */
1399
15.1k
region:
1400
15.1k
    if (nxt[0] == 0)
1401
1.70k
        return(1);
1402
13.4k
    if (nxt[0] != '-')
1403
6.62k
        return(0);
1404
1405
6.80k
    nxt++;
1406
6.80k
    cur = nxt;
1407
    /* now we can just have a variant */
1408
44.5k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1409
44.5k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1410
37.7k
           nxt++;
1411
1412
6.80k
    if ((nxt - cur < 5) || (nxt - cur > 8))
1413
5.03k
        return(0);
1414
1415
    /* we parsed a variant */
1416
10.2k
variant:
1417
10.2k
    if (nxt[0] == 0)
1418
1.06k
        return(1);
1419
9.14k
    if (nxt[0] != '-')
1420
4.34k
        return(0);
1421
    /* extensions and private use subtags not checked */
1422
4.79k
    return (1);
1423
1424
20.6k
region_m49:
1425
20.6k
    if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1426
20.6k
        ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1427
1.81k
        nxt += 3;
1428
1.81k
        goto region;
1429
1.81k
    }
1430
18.8k
    return(0);
1431
20.6k
}
1432
1433
/************************************************************************
1434
 *                  *
1435
 *    Parser stacks related functions and macros    *
1436
 *                  *
1437
 ************************************************************************/
1438
1439
static xmlChar *
1440
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar **str);
1441
1442
/**
1443
 * xmlParserNsCreate:
1444
 *
1445
 * Create a new namespace database.
1446
 *
1447
 * Returns the new obejct.
1448
 */
1449
xmlParserNsData *
1450
220k
xmlParserNsCreate(void) {
1451
220k
    xmlParserNsData *nsdb = xmlMalloc(sizeof(*nsdb));
1452
1453
220k
    if (nsdb == NULL)
1454
81
        return(NULL);
1455
220k
    memset(nsdb, 0, sizeof(*nsdb));
1456
220k
    nsdb->defaultNsIndex = INT_MAX;
1457
1458
220k
    return(nsdb);
1459
220k
}
1460
1461
/**
1462
 * xmlParserNsFree:
1463
 * @nsdb: namespace database
1464
 *
1465
 * Free a namespace database.
1466
 */
1467
void
1468
220k
xmlParserNsFree(xmlParserNsData *nsdb) {
1469
220k
    if (nsdb == NULL)
1470
0
        return;
1471
1472
220k
    xmlFree(nsdb->extra);
1473
220k
    xmlFree(nsdb->hash);
1474
220k
    xmlFree(nsdb);
1475
220k
}
1476
1477
/**
1478
 * xmlParserNsReset:
1479
 * @nsdb: namespace database
1480
 *
1481
 * Reset a namespace database.
1482
 */
1483
static void
1484
91.8k
xmlParserNsReset(xmlParserNsData *nsdb) {
1485
91.8k
    if (nsdb == NULL)
1486
0
        return;
1487
1488
91.8k
    nsdb->hashElems = 0;
1489
91.8k
    nsdb->elementId = 0;
1490
91.8k
    nsdb->defaultNsIndex = INT_MAX;
1491
1492
91.8k
    if (nsdb->hash)
1493
0
        memset(nsdb->hash, 0, nsdb->hashSize * sizeof(nsdb->hash[0]));
1494
91.8k
}
1495
1496
/**
1497
 * xmlParserStartElement:
1498
 * @nsdb: namespace database
1499
 *
1500
 * Signal that a new element has started.
1501
 *
1502
 * Returns 0 on success, -1 if the element counter overflowed.
1503
 */
1504
static int
1505
4.31M
xmlParserNsStartElement(xmlParserNsData *nsdb) {
1506
4.31M
    if (nsdb->elementId == UINT_MAX)
1507
0
        return(-1);
1508
4.31M
    nsdb->elementId++;
1509
1510
4.31M
    return(0);
1511
4.31M
}
1512
1513
/**
1514
 * xmlParserNsLookup:
1515
 * @ctxt: parser context
1516
 * @prefix: namespace prefix
1517
 * @bucketPtr: optional bucket (return value)
1518
 *
1519
 * Lookup namespace with given prefix. If @bucketPtr is non-NULL, it will
1520
 * be set to the matching bucket, or the first empty bucket if no match
1521
 * was found.
1522
 *
1523
 * Returns the namespace index on success, INT_MAX if no namespace was
1524
 * found.
1525
 */
1526
static int
1527
xmlParserNsLookup(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1528
6.88M
                  xmlParserNsBucket **bucketPtr) {
1529
6.88M
    xmlParserNsBucket *bucket;
1530
6.88M
    unsigned index, hashValue;
1531
1532
6.88M
    if (prefix->name == NULL)
1533
3.71M
        return(ctxt->nsdb->defaultNsIndex);
1534
1535
3.17M
    if (ctxt->nsdb->hashSize == 0)
1536
202k
        return(INT_MAX);
1537
1538
2.97M
    hashValue = prefix->hashValue;
1539
2.97M
    index = hashValue & (ctxt->nsdb->hashSize - 1);
1540
2.97M
    bucket = &ctxt->nsdb->hash[index];
1541
1542
271M
    while (bucket->hashValue) {
1543
271M
        if ((bucket->hashValue == hashValue) &&
1544
271M
            (bucket->index != INT_MAX)) {
1545
2.49M
            if (ctxt->nsTab[bucket->index * 2] == prefix->name) {
1546
2.48M
                if (bucketPtr != NULL)
1547
708k
                    *bucketPtr = bucket;
1548
2.48M
                return(bucket->index);
1549
2.48M
            }
1550
2.49M
        }
1551
1552
268M
        index++;
1553
268M
        bucket++;
1554
268M
        if (index == ctxt->nsdb->hashSize) {
1555
282k
            index = 0;
1556
282k
            bucket = ctxt->nsdb->hash;
1557
282k
        }
1558
268M
    }
1559
1560
489k
    if (bucketPtr != NULL)
1561
182k
        *bucketPtr = bucket;
1562
489k
    return(INT_MAX);
1563
2.97M
}
1564
1565
/**
1566
 * xmlParserNsLookupUri:
1567
 * @ctxt: parser context
1568
 * @prefix: namespace prefix
1569
 *
1570
 * Lookup namespace URI with given prefix.
1571
 *
1572
 * Returns the namespace URI on success, NULL if no namespace was found.
1573
 */
1574
static const xmlChar *
1575
3.96M
xmlParserNsLookupUri(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix) {
1576
3.96M
    const xmlChar *ret;
1577
3.96M
    int nsIndex;
1578
1579
3.96M
    if (prefix->name == ctxt->str_xml)
1580
2.57k
        return(ctxt->str_xml_ns);
1581
1582
    /*
1583
     * minNsIndex is used when building an entity tree. We must
1584
     * ignore namespaces declared outside the entity.
1585
     */
1586
3.96M
    nsIndex = xmlParserNsLookup(ctxt, prefix, NULL);
1587
3.96M
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1588
2.77M
        return(NULL);
1589
1590
1.18M
    ret = ctxt->nsTab[nsIndex * 2 + 1];
1591
1.18M
    if (ret[0] == 0)
1592
12.5k
        ret = NULL;
1593
1.18M
    return(ret);
1594
3.96M
}
1595
1596
/**
1597
 * xmlParserNsLookupSax:
1598
 * @ctxt: parser context
1599
 * @prefix: namespace prefix
1600
 *
1601
 * Lookup extra data for the given prefix. This returns data stored
1602
 * with xmlParserNsUdpateSax.
1603
 *
1604
 * Returns the data on success, NULL if no namespace was found.
1605
 */
1606
void *
1607
1.07M
xmlParserNsLookupSax(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
1608
1.07M
    xmlHashedString hprefix;
1609
1.07M
    int nsIndex;
1610
1611
1.07M
    if (prefix == ctxt->str_xml)
1612
187k
        return(NULL);
1613
1614
882k
    hprefix.name = prefix;
1615
882k
    if (prefix != NULL)
1616
636k
        hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1617
246k
    else
1618
246k
        hprefix.hashValue = 0;
1619
882k
    nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1620
882k
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1621
0
        return(NULL);
1622
1623
882k
    return(ctxt->nsdb->extra[nsIndex].saxData);
1624
882k
}
1625
1626
/**
1627
 * xmlParserNsUpdateSax:
1628
 * @ctxt: parser context
1629
 * @prefix: namespace prefix
1630
 * @saxData: extra data for SAX handler
1631
 *
1632
 * Sets or updates extra data for the given prefix. This value will be
1633
 * returned by xmlParserNsLookupSax as long as the namespace with the
1634
 * given prefix is in scope.
1635
 *
1636
 * Returns the data on success, NULL if no namespace was found.
1637
 */
1638
int
1639
xmlParserNsUpdateSax(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
1640
646k
                     void *saxData) {
1641
646k
    xmlHashedString hprefix;
1642
646k
    int nsIndex;
1643
1644
646k
    if (prefix == ctxt->str_xml)
1645
0
        return(-1);
1646
1647
646k
    hprefix.name = prefix;
1648
646k
    if (prefix != NULL)
1649
396k
        hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1650
250k
    else
1651
250k
        hprefix.hashValue = 0;
1652
646k
    nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1653
646k
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1654
0
        return(-1);
1655
1656
646k
    ctxt->nsdb->extra[nsIndex].saxData = saxData;
1657
646k
    return(0);
1658
646k
}
1659
1660
/**
1661
 * xmlParserNsGrow:
1662
 * @ctxt: parser context
1663
 *
1664
 * Grows the namespace tables.
1665
 *
1666
 * Returns 0 on success, -1 if a memory allocation failed.
1667
 */
1668
static int
1669
47.6k
xmlParserNsGrow(xmlParserCtxtPtr ctxt) {
1670
47.6k
    const xmlChar **table;
1671
47.6k
    xmlParserNsExtra *extra;
1672
47.6k
    int newSize;
1673
1674
47.6k
    if (ctxt->nsMax > INT_MAX / 2)
1675
0
        goto error;
1676
47.6k
    newSize = ctxt->nsMax ? ctxt->nsMax * 2 : 16;
1677
1678
47.6k
    table = xmlRealloc(ctxt->nsTab, 2 * newSize * sizeof(table[0]));
1679
47.6k
    if (table == NULL)
1680
72
        goto error;
1681
47.5k
    ctxt->nsTab = table;
1682
1683
47.5k
    extra = xmlRealloc(ctxt->nsdb->extra, newSize * sizeof(extra[0]));
1684
47.5k
    if (extra == NULL)
1685
60
        goto error;
1686
47.4k
    ctxt->nsdb->extra = extra;
1687
1688
47.4k
    ctxt->nsMax = newSize;
1689
47.4k
    return(0);
1690
1691
132
error:
1692
132
    xmlErrMemory(ctxt);
1693
132
    return(-1);
1694
47.5k
}
1695
1696
/**
1697
 * xmlParserNsPush:
1698
 * @ctxt: parser context
1699
 * @prefix: prefix with hash value
1700
 * @uri: uri with hash value
1701
 * @saxData: extra data for SAX handler
1702
 * @defAttr: whether the namespace comes from a default attribute
1703
 *
1704
 * Push a new namespace on the table.
1705
 *
1706
 * Returns 1 if the namespace was pushed, 0 if the namespace was ignored,
1707
 * -1 if a memory allocation failed.
1708
 */
1709
static int
1710
xmlParserNsPush(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1711
820k
                const xmlHashedString *uri, void *saxData, int defAttr) {
1712
820k
    xmlParserNsBucket *bucket = NULL;
1713
820k
    xmlParserNsExtra *extra;
1714
820k
    const xmlChar **ns;
1715
820k
    unsigned hashValue, nsIndex, oldIndex;
1716
1717
820k
    if ((prefix != NULL) && (prefix->name == ctxt->str_xml))
1718
1.32k
        return(0);
1719
1720
819k
    if ((ctxt->nsNr >= ctxt->nsMax) && (xmlParserNsGrow(ctxt) < 0)) {
1721
132
        xmlErrMemory(ctxt);
1722
132
        return(-1);
1723
132
    }
1724
1725
    /*
1726
     * Default namespace and 'xml' namespace
1727
     */
1728
819k
    if ((prefix == NULL) || (prefix->name == NULL)) {
1729
333k
        oldIndex = ctxt->nsdb->defaultNsIndex;
1730
1731
333k
        if (oldIndex != INT_MAX) {
1732
294k
            extra = &ctxt->nsdb->extra[oldIndex];
1733
1734
294k
            if (extra->elementId == ctxt->nsdb->elementId) {
1735
33.7k
                if (defAttr == 0)
1736
12.8k
                    xmlErrAttributeDup(ctxt, NULL, BAD_CAST "xmlns");
1737
33.7k
                return(0);
1738
33.7k
            }
1739
1740
261k
            if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1741
261k
                (uri->name == ctxt->nsTab[oldIndex * 2 + 1]))
1742
13.2k
                return(0);
1743
261k
        }
1744
1745
286k
        ctxt->nsdb->defaultNsIndex = ctxt->nsNr;
1746
286k
        goto populate_entry;
1747
333k
    }
1748
1749
    /*
1750
     * Hash table lookup
1751
     */
1752
485k
    oldIndex = xmlParserNsLookup(ctxt, prefix, &bucket);
1753
485k
    if (oldIndex != INT_MAX) {
1754
269k
        extra = &ctxt->nsdb->extra[oldIndex];
1755
1756
        /*
1757
         * Check for duplicate definitions on the same element.
1758
         */
1759
269k
        if (extra->elementId == ctxt->nsdb->elementId) {
1760
9.38k
            if (defAttr == 0)
1761
8.93k
                xmlErrAttributeDup(ctxt, BAD_CAST "xmlns", prefix->name);
1762
9.38k
            return(0);
1763
9.38k
        }
1764
1765
260k
        if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1766
260k
            (uri->name == ctxt->nsTab[bucket->index * 2 + 1]))
1767
15.1k
            return(0);
1768
1769
244k
        bucket->index = ctxt->nsNr;
1770
244k
        goto populate_entry;
1771
260k
    }
1772
1773
    /*
1774
     * Insert new bucket
1775
     */
1776
1777
216k
    hashValue = prefix->hashValue;
1778
1779
    /*
1780
     * Grow hash table, 50% fill factor
1781
     */
1782
216k
    if (ctxt->nsdb->hashElems + 1 > ctxt->nsdb->hashSize / 2) {
1783
38.5k
        xmlParserNsBucket *newHash;
1784
38.5k
        unsigned newSize, i, index;
1785
1786
38.5k
        if (ctxt->nsdb->hashSize > UINT_MAX / 2) {
1787
0
            xmlErrMemory(ctxt);
1788
0
            return(-1);
1789
0
        }
1790
38.5k
        newSize = ctxt->nsdb->hashSize ? ctxt->nsdb->hashSize * 2 : 16;
1791
38.5k
        newHash = xmlMalloc(newSize * sizeof(newHash[0]));
1792
38.5k
        if (newHash == NULL) {
1793
25
            xmlErrMemory(ctxt);
1794
25
            return(-1);
1795
25
        }
1796
38.5k
        memset(newHash, 0, newSize * sizeof(newHash[0]));
1797
1798
513k
        for (i = 0; i < ctxt->nsdb->hashSize; i++) {
1799
475k
            unsigned hv = ctxt->nsdb->hash[i].hashValue;
1800
475k
            unsigned newIndex;
1801
1802
475k
            if (hv == 0)
1803
237k
                continue;
1804
237k
            newIndex = hv & (newSize - 1);
1805
1806
55.7M
            while (newHash[newIndex].hashValue != 0) {
1807
55.4M
                newIndex++;
1808
55.4M
                if (newIndex == newSize)
1809
38.6k
                    newIndex = 0;
1810
55.4M
            }
1811
1812
237k
            newHash[newIndex] = ctxt->nsdb->hash[i];
1813
237k
        }
1814
1815
38.5k
        xmlFree(ctxt->nsdb->hash);
1816
38.5k
        ctxt->nsdb->hash = newHash;
1817
38.5k
        ctxt->nsdb->hashSize = newSize;
1818
1819
        /*
1820
         * Relookup
1821
         */
1822
38.5k
        index = hashValue & (newSize - 1);
1823
1824
201k
        while (newHash[index].hashValue != 0) {
1825
162k
            index++;
1826
162k
            if (index == newSize)
1827
1.06k
                index = 0;
1828
162k
        }
1829
1830
38.5k
        bucket = &newHash[index];
1831
38.5k
    }
1832
1833
216k
    bucket->hashValue = hashValue;
1834
216k
    bucket->index = ctxt->nsNr;
1835
216k
    ctxt->nsdb->hashElems++;
1836
216k
    oldIndex = INT_MAX;
1837
1838
747k
populate_entry:
1839
747k
    nsIndex = ctxt->nsNr;
1840
1841
747k
    ns = &ctxt->nsTab[nsIndex * 2];
1842
747k
    ns[0] = prefix ? prefix->name : NULL;
1843
747k
    ns[1] = uri->name;
1844
1845
747k
    extra = &ctxt->nsdb->extra[nsIndex];
1846
747k
    extra->saxData = saxData;
1847
747k
    extra->prefixHashValue = prefix ? prefix->hashValue : 0;
1848
747k
    extra->uriHashValue = uri->hashValue;
1849
747k
    extra->elementId = ctxt->nsdb->elementId;
1850
747k
    extra->oldIndex = oldIndex;
1851
1852
747k
    ctxt->nsNr++;
1853
1854
747k
    return(1);
1855
216k
}
1856
1857
/**
1858
 * xmlParserNsPop:
1859
 * @ctxt: an XML parser context
1860
 * @nr:  the number to pop
1861
 *
1862
 * Pops the top @nr namespaces and restores the hash table.
1863
 *
1864
 * Returns the number of namespaces popped.
1865
 */
1866
static int
1867
xmlParserNsPop(xmlParserCtxtPtr ctxt, int nr)
1868
434k
{
1869
434k
    int i;
1870
1871
    /* assert(nr <= ctxt->nsNr); */
1872
1873
1.11M
    for (i = ctxt->nsNr - 1; i >= ctxt->nsNr - nr; i--) {
1874
685k
        const xmlChar *prefix = ctxt->nsTab[i * 2];
1875
685k
        xmlParserNsExtra *extra = &ctxt->nsdb->extra[i];
1876
1877
685k
        if (prefix == NULL) {
1878
246k
            ctxt->nsdb->defaultNsIndex = extra->oldIndex;
1879
438k
        } else {
1880
438k
            xmlHashedString hprefix;
1881
438k
            xmlParserNsBucket *bucket = NULL;
1882
1883
438k
            hprefix.name = prefix;
1884
438k
            hprefix.hashValue = extra->prefixHashValue;
1885
438k
            xmlParserNsLookup(ctxt, &hprefix, &bucket);
1886
            /* assert(bucket && bucket->hashValue); */
1887
438k
            bucket->index = extra->oldIndex;
1888
438k
        }
1889
685k
    }
1890
1891
434k
    ctxt->nsNr -= nr;
1892
434k
    return(nr);
1893
434k
}
1894
1895
static int
1896
51.4k
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1897
51.4k
    const xmlChar **atts;
1898
51.4k
    unsigned *attallocs;
1899
51.4k
    int maxatts;
1900
1901
51.4k
    if (nr + 5 > ctxt->maxatts) {
1902
51.4k
  maxatts = ctxt->maxatts == 0 ? 55 : (nr + 5) * 2;
1903
51.4k
  atts = (const xmlChar **) xmlMalloc(
1904
51.4k
             maxatts * sizeof(const xmlChar *));
1905
51.4k
  if (atts == NULL) goto mem_error;
1906
51.3k
  attallocs = xmlRealloc(ctxt->attallocs,
1907
51.3k
                               (maxatts / 5) * sizeof(attallocs[0]));
1908
51.3k
  if (attallocs == NULL) {
1909
99
            xmlFree(atts);
1910
99
            goto mem_error;
1911
99
        }
1912
51.2k
        if (ctxt->maxatts > 0)
1913
2.25k
            memcpy(atts, ctxt->atts, ctxt->maxatts * sizeof(const xmlChar *));
1914
51.2k
        xmlFree(ctxt->atts);
1915
51.2k
  ctxt->atts = atts;
1916
51.2k
  ctxt->attallocs = attallocs;
1917
51.2k
  ctxt->maxatts = maxatts;
1918
51.2k
    }
1919
51.2k
    return(ctxt->maxatts);
1920
176
mem_error:
1921
176
    xmlErrMemory(ctxt);
1922
176
    return(-1);
1923
51.4k
}
1924
1925
/**
1926
 * inputPush:
1927
 * @ctxt:  an XML parser context
1928
 * @value:  the parser input
1929
 *
1930
 * Pushes a new parser input on top of the input stack
1931
 *
1932
 * Returns -1 in case of error, the index in the stack otherwise
1933
 */
1934
int
1935
inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1936
883k
{
1937
883k
    if ((ctxt == NULL) || (value == NULL))
1938
0
        return(-1);
1939
883k
    if (ctxt->inputNr >= ctxt->inputMax) {
1940
503
        size_t newSize = ctxt->inputMax * 2;
1941
503
        xmlParserInputPtr *tmp;
1942
1943
503
        tmp = (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1944
503
                                               newSize * sizeof(*tmp));
1945
503
        if (tmp == NULL) {
1946
4
            xmlErrMemory(ctxt);
1947
4
            return (-1);
1948
4
        }
1949
499
        ctxt->inputTab = tmp;
1950
499
        ctxt->inputMax = newSize;
1951
499
    }
1952
883k
    ctxt->inputTab[ctxt->inputNr] = value;
1953
883k
    ctxt->input = value;
1954
883k
    return (ctxt->inputNr++);
1955
883k
}
1956
/**
1957
 * inputPop:
1958
 * @ctxt: an XML parser context
1959
 *
1960
 * Pops the top parser input from the input stack
1961
 *
1962
 * Returns the input just removed
1963
 */
1964
xmlParserInputPtr
1965
inputPop(xmlParserCtxtPtr ctxt)
1966
1.44M
{
1967
1.44M
    xmlParserInputPtr ret;
1968
1969
1.44M
    if (ctxt == NULL)
1970
0
        return(NULL);
1971
1.44M
    if (ctxt->inputNr <= 0)
1972
569k
        return (NULL);
1973
878k
    ctxt->inputNr--;
1974
878k
    if (ctxt->inputNr > 0)
1975
613k
        ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1976
265k
    else
1977
265k
        ctxt->input = NULL;
1978
878k
    ret = ctxt->inputTab[ctxt->inputNr];
1979
878k
    ctxt->inputTab[ctxt->inputNr] = NULL;
1980
878k
    return (ret);
1981
1.44M
}
1982
/**
1983
 * nodePush:
1984
 * @ctxt:  an XML parser context
1985
 * @value:  the element node
1986
 *
1987
 * DEPRECATED: Internal function, do not use.
1988
 *
1989
 * Pushes a new element node on top of the node stack
1990
 *
1991
 * Returns -1 in case of error, the index in the stack otherwise
1992
 */
1993
int
1994
nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1995
4.53M
{
1996
4.53M
    int maxDepth;
1997
1998
4.53M
    if (ctxt == NULL)
1999
0
        return(0);
2000
2001
4.53M
    maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
2002
4.53M
    if (ctxt->nodeNr > maxDepth) {
2003
69
        xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
2004
69
                "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
2005
69
                ctxt->nodeNr);
2006
69
        xmlHaltParser(ctxt);
2007
69
        return(-1);
2008
69
    }
2009
4.53M
    if (ctxt->nodeNr >= ctxt->nodeMax) {
2010
35.3k
        xmlNodePtr *tmp;
2011
2012
35.3k
  tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
2013
35.3k
                                      ctxt->nodeMax * 2 *
2014
35.3k
                                      sizeof(ctxt->nodeTab[0]));
2015
35.3k
        if (tmp == NULL) {
2016
29
            xmlErrMemory(ctxt);
2017
29
            return (-1);
2018
29
        }
2019
35.2k
        ctxt->nodeTab = tmp;
2020
35.2k
  ctxt->nodeMax *= 2;
2021
35.2k
    }
2022
4.53M
    ctxt->nodeTab[ctxt->nodeNr] = value;
2023
4.53M
    ctxt->node = value;
2024
4.53M
    return (ctxt->nodeNr++);
2025
4.53M
}
2026
2027
/**
2028
 * nodePop:
2029
 * @ctxt: an XML parser context
2030
 *
2031
 * DEPRECATED: Internal function, do not use.
2032
 *
2033
 * Pops the top element node from the node stack
2034
 *
2035
 * Returns the node just removed
2036
 */
2037
xmlNodePtr
2038
nodePop(xmlParserCtxtPtr ctxt)
2039
4.15M
{
2040
4.15M
    xmlNodePtr ret;
2041
2042
4.15M
    if (ctxt == NULL) return(NULL);
2043
4.15M
    if (ctxt->nodeNr <= 0)
2044
121k
        return (NULL);
2045
4.03M
    ctxt->nodeNr--;
2046
4.03M
    if (ctxt->nodeNr > 0)
2047
3.97M
        ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
2048
60.8k
    else
2049
60.8k
        ctxt->node = NULL;
2050
4.03M
    ret = ctxt->nodeTab[ctxt->nodeNr];
2051
4.03M
    ctxt->nodeTab[ctxt->nodeNr] = NULL;
2052
4.03M
    return (ret);
2053
4.15M
}
2054
2055
/**
2056
 * nameNsPush:
2057
 * @ctxt:  an XML parser context
2058
 * @value:  the element name
2059
 * @prefix:  the element prefix
2060
 * @URI:  the element namespace name
2061
 * @line:  the current line number for error messages
2062
 * @nsNr:  the number of namespaces pushed on the namespace table
2063
 *
2064
 * Pushes a new element name/prefix/URL on top of the name stack
2065
 *
2066
 * Returns -1 in case of error, the index in the stack otherwise
2067
 */
2068
static int
2069
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
2070
           const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
2071
3.96M
{
2072
3.96M
    xmlStartTag *tag;
2073
2074
3.96M
    if (ctxt->nameNr >= ctxt->nameMax) {
2075
35.0k
        const xmlChar * *tmp;
2076
35.0k
        xmlStartTag *tmp2;
2077
35.0k
        ctxt->nameMax *= 2;
2078
35.0k
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
2079
35.0k
                                    ctxt->nameMax *
2080
35.0k
                                    sizeof(ctxt->nameTab[0]));
2081
35.0k
        if (tmp == NULL) {
2082
27
      ctxt->nameMax /= 2;
2083
27
      goto mem_error;
2084
27
        }
2085
35.0k
  ctxt->nameTab = tmp;
2086
35.0k
        tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
2087
35.0k
                                    ctxt->nameMax *
2088
35.0k
                                    sizeof(ctxt->pushTab[0]));
2089
35.0k
        if (tmp2 == NULL) {
2090
30
      ctxt->nameMax /= 2;
2091
30
      goto mem_error;
2092
30
        }
2093
35.0k
  ctxt->pushTab = tmp2;
2094
3.92M
    } else if (ctxt->pushTab == NULL) {
2095
124k
        ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
2096
124k
                                            sizeof(ctxt->pushTab[0]));
2097
124k
        if (ctxt->pushTab == NULL)
2098
260
            goto mem_error;
2099
124k
    }
2100
3.96M
    ctxt->nameTab[ctxt->nameNr] = value;
2101
3.96M
    ctxt->name = value;
2102
3.96M
    tag = &ctxt->pushTab[ctxt->nameNr];
2103
3.96M
    tag->prefix = prefix;
2104
3.96M
    tag->URI = URI;
2105
3.96M
    tag->line = line;
2106
3.96M
    tag->nsNr = nsNr;
2107
3.96M
    return (ctxt->nameNr++);
2108
317
mem_error:
2109
317
    xmlErrMemory(ctxt);
2110
317
    return (-1);
2111
3.96M
}
2112
#ifdef LIBXML_PUSH_ENABLED
2113
/**
2114
 * nameNsPop:
2115
 * @ctxt: an XML parser context
2116
 *
2117
 * Pops the top element/prefix/URI name from the name stack
2118
 *
2119
 * Returns the name just removed
2120
 */
2121
static const xmlChar *
2122
nameNsPop(xmlParserCtxtPtr ctxt)
2123
360k
{
2124
360k
    const xmlChar *ret;
2125
2126
360k
    if (ctxt->nameNr <= 0)
2127
0
        return (NULL);
2128
360k
    ctxt->nameNr--;
2129
360k
    if (ctxt->nameNr > 0)
2130
359k
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2131
1.27k
    else
2132
1.27k
        ctxt->name = NULL;
2133
360k
    ret = ctxt->nameTab[ctxt->nameNr];
2134
360k
    ctxt->nameTab[ctxt->nameNr] = NULL;
2135
360k
    return (ret);
2136
360k
}
2137
#endif /* LIBXML_PUSH_ENABLED */
2138
2139
/**
2140
 * namePush:
2141
 * @ctxt:  an XML parser context
2142
 * @value:  the element name
2143
 *
2144
 * DEPRECATED: Internal function, do not use.
2145
 *
2146
 * Pushes a new element name on top of the name stack
2147
 *
2148
 * Returns -1 in case of error, the index in the stack otherwise
2149
 */
2150
int
2151
namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
2152
0
{
2153
0
    if (ctxt == NULL) return (-1);
2154
2155
0
    if (ctxt->nameNr >= ctxt->nameMax) {
2156
0
        const xmlChar * *tmp;
2157
0
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
2158
0
                                    ctxt->nameMax * 2 *
2159
0
                                    sizeof(ctxt->nameTab[0]));
2160
0
        if (tmp == NULL) {
2161
0
      goto mem_error;
2162
0
        }
2163
0
  ctxt->nameTab = tmp;
2164
0
        ctxt->nameMax *= 2;
2165
0
    }
2166
0
    ctxt->nameTab[ctxt->nameNr] = value;
2167
0
    ctxt->name = value;
2168
0
    return (ctxt->nameNr++);
2169
0
mem_error:
2170
0
    xmlErrMemory(ctxt);
2171
0
    return (-1);
2172
0
}
2173
2174
/**
2175
 * namePop:
2176
 * @ctxt: an XML parser context
2177
 *
2178
 * DEPRECATED: Internal function, do not use.
2179
 *
2180
 * Pops the top element name from the name stack
2181
 *
2182
 * Returns the name just removed
2183
 */
2184
const xmlChar *
2185
namePop(xmlParserCtxtPtr ctxt)
2186
3.15M
{
2187
3.15M
    const xmlChar *ret;
2188
2189
3.15M
    if ((ctxt == NULL) || (ctxt->nameNr <= 0))
2190
86
        return (NULL);
2191
3.14M
    ctxt->nameNr--;
2192
3.14M
    if (ctxt->nameNr > 0)
2193
3.11M
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2194
34.3k
    else
2195
34.3k
        ctxt->name = NULL;
2196
3.14M
    ret = ctxt->nameTab[ctxt->nameNr];
2197
3.14M
    ctxt->nameTab[ctxt->nameNr] = NULL;
2198
3.14M
    return (ret);
2199
3.15M
}
2200
2201
4.85M
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
2202
4.85M
    if (ctxt->spaceNr >= ctxt->spaceMax) {
2203
36.9k
        int *tmp;
2204
2205
36.9k
  ctxt->spaceMax *= 2;
2206
36.9k
        tmp = (int *) xmlRealloc(ctxt->spaceTab,
2207
36.9k
                           ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
2208
36.9k
        if (tmp == NULL) {
2209
36
      xmlErrMemory(ctxt);
2210
36
      ctxt->spaceMax /=2;
2211
36
      return(-1);
2212
36
  }
2213
36.8k
  ctxt->spaceTab = tmp;
2214
36.8k
    }
2215
4.85M
    ctxt->spaceTab[ctxt->spaceNr] = val;
2216
4.85M
    ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
2217
4.85M
    return(ctxt->spaceNr++);
2218
4.85M
}
2219
2220
4.40M
static int spacePop(xmlParserCtxtPtr ctxt) {
2221
4.40M
    int ret;
2222
4.40M
    if (ctxt->spaceNr <= 0) return(0);
2223
4.40M
    ctxt->spaceNr--;
2224
4.40M
    if (ctxt->spaceNr > 0)
2225
4.37M
  ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
2226
26.6k
    else
2227
26.6k
        ctxt->space = &ctxt->spaceTab[0];
2228
4.40M
    ret = ctxt->spaceTab[ctxt->spaceNr];
2229
4.40M
    ctxt->spaceTab[ctxt->spaceNr] = -1;
2230
4.40M
    return(ret);
2231
4.40M
}
2232
2233
/*
2234
 * Macros for accessing the content. Those should be used only by the parser,
2235
 * and not exported.
2236
 *
2237
 * Dirty macros, i.e. one often need to make assumption on the context to
2238
 * use them
2239
 *
2240
 *   CUR_PTR return the current pointer to the xmlChar to be parsed.
2241
 *           To be used with extreme caution since operations consuming
2242
 *           characters may move the input buffer to a different location !
2243
 *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
2244
 *           This should be used internally by the parser
2245
 *           only to compare to ASCII values otherwise it would break when
2246
 *           running with UTF-8 encoding.
2247
 *   RAW     same as CUR but in the input buffer, bypass any token
2248
 *           extraction that may have been done
2249
 *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
2250
 *           to compare on ASCII based substring.
2251
 *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2252
 *           strings without newlines within the parser.
2253
 *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2254
 *           defined char within the parser.
2255
 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2256
 *
2257
 *   NEXT    Skip to the next character, this does the proper decoding
2258
 *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2259
 *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2260
 *   CUR_CHAR(l) returns the current unicode character (int), set l
2261
 *           to the number of xmlChars used for the encoding [0-5].
2262
 *   CUR_SCHAR  same but operate on a string instead of the context
2263
 *   COPY_BUF  copy the current unicode char to the target buffer, increment
2264
 *            the index
2265
 *   GROW, SHRINK  handling of input buffers
2266
 */
2267
2268
108M
#define RAW (*ctxt->input->cur)
2269
804M
#define CUR (*ctxt->input->cur)
2270
56.3M
#define NXT(val) ctxt->input->cur[(val)]
2271
1.45G
#define CUR_PTR ctxt->input->cur
2272
15.1M
#define BASE_PTR ctxt->input->base
2273
2274
#define CMP4( s, c1, c2, c3, c4 ) \
2275
37.8M
  ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2276
19.1M
    ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2277
#define CMP5( s, c1, c2, c3, c4, c5 ) \
2278
35.7M
  ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2279
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2280
32.5M
  ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2281
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2282
30.2M
  ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2283
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2284
28.4M
  ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2285
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2286
13.7M
  ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2287
13.7M
    ((unsigned char *) s)[ 8 ] == c9 )
2288
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2289
19.1k
  ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2290
19.1k
    ((unsigned char *) s)[ 9 ] == c10 )
2291
2292
19.5M
#define SKIP(val) do {             \
2293
19.5M
    ctxt->input->cur += (val),ctxt->input->col+=(val);      \
2294
19.5M
    if (*ctxt->input->cur == 0)           \
2295
19.5M
        xmlParserGrow(ctxt);           \
2296
19.5M
  } while (0)
2297
2298
223k
#define SKIPL(val) do {             \
2299
223k
    int skipl;                \
2300
45.1M
    for(skipl=0; skipl<val; skipl++) {         \
2301
44.9M
  if (*(ctxt->input->cur) == '\n') {       \
2302
16.6k
  ctxt->input->line++; ctxt->input->col = 1;      \
2303
44.9M
  } else ctxt->input->col++;         \
2304
44.9M
  ctxt->input->cur++;           \
2305
44.9M
    }                  \
2306
223k
    if (*ctxt->input->cur == 0)           \
2307
223k
        xmlParserGrow(ctxt);           \
2308
223k
  } while (0)
2309
2310
#define SHRINK \
2311
33.5M
    if ((!PARSER_PROGRESSIVE(ctxt)) && \
2312
33.5M
        (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2313
33.5M
  (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2314
33.5M
  xmlParserShrink(ctxt);
2315
2316
#define GROW \
2317
93.4M
    if ((!PARSER_PROGRESSIVE(ctxt)) && \
2318
93.4M
        (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2319
15.1M
  xmlParserGrow(ctxt);
2320
2321
14.4M
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2322
2323
11.3M
#define SKIP_BLANKS_PE xmlSkipBlankCharsPE(ctxt)
2324
2325
130M
#define NEXT xmlNextChar(ctxt)
2326
2327
7.37M
#define NEXT1 {               \
2328
7.37M
  ctxt->input->col++;           \
2329
7.37M
  ctxt->input->cur++;           \
2330
7.37M
  if (*ctxt->input->cur == 0)         \
2331
7.37M
      xmlParserGrow(ctxt);           \
2332
7.37M
    }
2333
2334
939M
#define NEXTL(l) do {             \
2335
939M
    if (*(ctxt->input->cur) == '\n') {         \
2336
40.8M
  ctxt->input->line++; ctxt->input->col = 1;      \
2337
898M
    } else ctxt->input->col++;           \
2338
939M
    ctxt->input->cur += l;        \
2339
939M
  } while (0)
2340
2341
284M
#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2342
12.8M
#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2343
2344
#define COPY_BUF(b, i, v)           \
2345
229M
    if (v < 0x80) b[i++] = v;           \
2346
229M
    else i += xmlCopyCharMultiByte(&b[i],v)
2347
2348
/**
2349
 * xmlSkipBlankChars:
2350
 * @ctxt:  the XML parser context
2351
 *
2352
 * DEPRECATED: Internal function, do not use.
2353
 *
2354
 * Skip whitespace in the input stream.
2355
 *
2356
 * Returns the number of space chars skipped
2357
 */
2358
int
2359
17.3M
xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2360
17.3M
    const xmlChar *cur;
2361
17.3M
    int res = 0;
2362
2363
    /*
2364
     * It's Okay to use CUR/NEXT here since all the blanks are on
2365
     * the ASCII range.
2366
     */
2367
17.3M
    cur = ctxt->input->cur;
2368
17.3M
    while (IS_BLANK_CH(*cur)) {
2369
12.8M
        if (*cur == '\n') {
2370
7.83M
            ctxt->input->line++; ctxt->input->col = 1;
2371
7.83M
        } else {
2372
5.04M
            ctxt->input->col++;
2373
5.04M
        }
2374
12.8M
        cur++;
2375
12.8M
        if (res < INT_MAX)
2376
12.8M
            res++;
2377
12.8M
        if (*cur == 0) {
2378
42.2k
            ctxt->input->cur = cur;
2379
42.2k
            xmlParserGrow(ctxt);
2380
42.2k
            cur = ctxt->input->cur;
2381
42.2k
        }
2382
12.8M
    }
2383
17.3M
    ctxt->input->cur = cur;
2384
2385
17.3M
    return(res);
2386
17.3M
}
2387
2388
static void
2389
588k
xmlPopPE(xmlParserCtxtPtr ctxt) {
2390
588k
    unsigned long consumed;
2391
588k
    xmlEntityPtr ent;
2392
2393
588k
    ent = ctxt->input->entity;
2394
2395
588k
    ent->flags &= ~XML_ENT_EXPANDING;
2396
2397
588k
    if ((ent->flags & XML_ENT_CHECKED) == 0) {
2398
23.5k
        int result;
2399
2400
        /*
2401
         * Read the rest of the stream in case of errors. We want
2402
         * to account for the whole entity size.
2403
         */
2404
34.7k
        do {
2405
34.7k
            ctxt->input->cur = ctxt->input->end;
2406
34.7k
            xmlParserShrink(ctxt);
2407
34.7k
            result = xmlParserGrow(ctxt);
2408
34.7k
        } while (result > 0);
2409
2410
23.5k
        consumed = ctxt->input->consumed;
2411
23.5k
        xmlSaturatedAddSizeT(&consumed,
2412
23.5k
                             ctxt->input->end - ctxt->input->base);
2413
2414
23.5k
        xmlSaturatedAdd(&ent->expandedSize, consumed);
2415
2416
        /*
2417
         * Add to sizeentities when parsing an external entity
2418
         * for the first time.
2419
         */
2420
23.5k
        if (ent->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
2421
10.6k
            xmlSaturatedAdd(&ctxt->sizeentities, consumed);
2422
10.6k
        }
2423
2424
23.5k
        ent->flags |= XML_ENT_CHECKED;
2425
23.5k
    }
2426
2427
588k
    xmlPopInput(ctxt);
2428
2429
588k
    xmlParserEntityCheck(ctxt, ent->expandedSize);
2430
588k
}
2431
2432
/**
2433
 * xmlSkipBlankCharsPE:
2434
 * @ctxt:  the XML parser context
2435
 *
2436
 * Skip whitespace in the input stream, also handling parameter
2437
 * entities.
2438
 *
2439
 * Returns the number of space chars skipped
2440
 */
2441
static int
2442
11.3M
xmlSkipBlankCharsPE(xmlParserCtxtPtr ctxt) {
2443
11.3M
    int res = 0;
2444
11.3M
    int inParam;
2445
11.3M
    int expandParam;
2446
2447
11.3M
    inParam = PARSER_IN_PE(ctxt);
2448
11.3M
    expandParam = PARSER_EXTERNAL(ctxt);
2449
2450
11.3M
    if (!inParam && !expandParam)
2451
2.90M
        return(xmlSkipBlankChars(ctxt));
2452
2453
20.1M
    while (PARSER_STOPPED(ctxt) == 0) {
2454
20.1M
        if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2455
9.82M
            NEXT;
2456
10.2M
        } else if (CUR == '%') {
2457
1.54M
            if ((expandParam == 0) ||
2458
1.54M
                (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2459
269k
                break;
2460
2461
            /*
2462
             * Expand parameter entity. We continue to consume
2463
             * whitespace at the start of the entity and possible
2464
             * even consume the whole entity and pop it. We might
2465
             * even pop multiple PEs in this loop.
2466
             */
2467
1.27M
            xmlParsePEReference(ctxt);
2468
2469
1.27M
            inParam = PARSER_IN_PE(ctxt);
2470
1.27M
            expandParam = PARSER_EXTERNAL(ctxt);
2471
8.74M
        } else if (CUR == 0) {
2472
588k
            if (inParam == 0)
2473
3.91k
                break;
2474
2475
584k
            xmlPopPE(ctxt);
2476
2477
584k
            inParam = PARSER_IN_PE(ctxt);
2478
584k
            expandParam = PARSER_EXTERNAL(ctxt);
2479
8.15M
        } else {
2480
8.15M
            break;
2481
8.15M
        }
2482
2483
        /*
2484
         * Also increase the counter when entering or exiting a PERef.
2485
         * The spec says: "When a parameter-entity reference is recognized
2486
         * in the DTD and included, its replacement text MUST be enlarged
2487
         * by the attachment of one leading and one following space (#x20)
2488
         * character."
2489
         */
2490
11.6M
        if (res < INT_MAX)
2491
11.6M
            res++;
2492
11.6M
    }
2493
2494
8.43M
    return(res);
2495
11.3M
}
2496
2497
/************************************************************************
2498
 *                  *
2499
 *    Commodity functions to handle entities      *
2500
 *                  *
2501
 ************************************************************************/
2502
2503
/**
2504
 * xmlPopInput:
2505
 * @ctxt:  an XML parser context
2506
 *
2507
 * xmlPopInput: the current input pointed by ctxt->input came to an end
2508
 *          pop it and return the next char.
2509
 *
2510
 * Returns the current xmlChar in the parser context
2511
 */
2512
xmlChar
2513
588k
xmlPopInput(xmlParserCtxtPtr ctxt) {
2514
588k
    xmlParserInputPtr input;
2515
2516
588k
    if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2517
588k
    input = inputPop(ctxt);
2518
588k
    xmlFreeInputStream(input);
2519
588k
    if (*ctxt->input->cur == 0)
2520
5.18k
        xmlParserGrow(ctxt);
2521
588k
    return(CUR);
2522
588k
}
2523
2524
/**
2525
 * xmlPushInput:
2526
 * @ctxt:  an XML parser context
2527
 * @input:  an XML parser input fragment (entity, XML fragment ...).
2528
 *
2529
 * Push an input stream onto the stack.
2530
 *
2531
 * This makes the parser use an input returned from advanced functions
2532
 * like xmlNewInputURL or xmlNewInputMemory.
2533
 *
2534
 * Returns -1 in case of error or the index in the input stack
2535
 */
2536
int
2537
805k
xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2538
805k
    int maxDepth;
2539
805k
    int ret;
2540
2541
805k
    if ((ctxt == NULL) || (input == NULL))
2542
186k
        return(-1);
2543
2544
618k
    maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
2545
618k
    if (ctxt->inputNr > maxDepth) {
2546
22
        xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
2547
22
                       "Maximum entity nesting depth exceeded");
2548
22
        xmlHaltParser(ctxt);
2549
22
  return(-1);
2550
22
    }
2551
618k
    ret = inputPush(ctxt, input);
2552
618k
    GROW;
2553
618k
    return(ret);
2554
618k
}
2555
2556
/**
2557
 * xmlParseCharRef:
2558
 * @ctxt:  an XML parser context
2559
 *
2560
 * DEPRECATED: Internal function, don't use.
2561
 *
2562
 * Parse a numeric character reference. Always consumes '&'.
2563
 *
2564
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2565
 *                  '&#x' [0-9a-fA-F]+ ';'
2566
 *
2567
 * [ WFC: Legal Character ]
2568
 * Characters referred to using character references must match the
2569
 * production for Char.
2570
 *
2571
 * Returns the value parsed (as an int), 0 in case of error
2572
 */
2573
int
2574
474k
xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2575
474k
    int val = 0;
2576
474k
    int count = 0;
2577
2578
    /*
2579
     * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2580
     */
2581
474k
    if ((RAW == '&') && (NXT(1) == '#') &&
2582
474k
        (NXT(2) == 'x')) {
2583
261k
  SKIP(3);
2584
261k
  GROW;
2585
662k
  while ((RAW != ';') && (PARSER_STOPPED(ctxt) == 0)) {
2586
505k
      if (count++ > 20) {
2587
4.46k
    count = 0;
2588
4.46k
    GROW;
2589
4.46k
      }
2590
505k
      if ((RAW >= '0') && (RAW <= '9'))
2591
235k
          val = val * 16 + (CUR - '0');
2592
269k
      else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2593
46.2k
          val = val * 16 + (CUR - 'a') + 10;
2594
223k
      else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2595
118k
          val = val * 16 + (CUR - 'A') + 10;
2596
104k
      else {
2597
104k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2598
104k
    val = 0;
2599
104k
    break;
2600
104k
      }
2601
400k
      if (val > 0x110000)
2602
52.9k
          val = 0x110000;
2603
2604
400k
      NEXT;
2605
400k
      count++;
2606
400k
  }
2607
261k
  if (RAW == ';') {
2608
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2609
157k
      ctxt->input->col++;
2610
157k
      ctxt->input->cur++;
2611
157k
  }
2612
261k
    } else if  ((RAW == '&') && (NXT(1) == '#')) {
2613
212k
  SKIP(2);
2614
212k
  GROW;
2615
704k
  while (RAW != ';') { /* loop blocked by count */
2616
545k
      if (count++ > 20) {
2617
10.2k
    count = 0;
2618
10.2k
    GROW;
2619
10.2k
      }
2620
545k
      if ((RAW >= '0') && (RAW <= '9'))
2621
492k
          val = val * 10 + (CUR - '0');
2622
53.4k
      else {
2623
53.4k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2624
53.4k
    val = 0;
2625
53.4k
    break;
2626
53.4k
      }
2627
492k
      if (val > 0x110000)
2628
109k
          val = 0x110000;
2629
2630
492k
      NEXT;
2631
492k
      count++;
2632
492k
  }
2633
212k
  if (RAW == ';') {
2634
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2635
158k
      ctxt->input->col++;
2636
158k
      ctxt->input->cur++;
2637
158k
  }
2638
212k
    } else {
2639
0
        if (RAW == '&')
2640
0
            SKIP(1);
2641
0
        xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2642
0
    }
2643
2644
    /*
2645
     * [ WFC: Legal Character ]
2646
     * Characters referred to using character references must match the
2647
     * production for Char.
2648
     */
2649
474k
    if (val >= 0x110000) {
2650
2.95k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2651
2.95k
                "xmlParseCharRef: character reference out of bounds\n",
2652
2.95k
          val);
2653
471k
    } else if (IS_CHAR(val)) {
2654
302k
        return(val);
2655
302k
    } else {
2656
169k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2657
169k
                          "xmlParseCharRef: invalid xmlChar value %d\n",
2658
169k
                    val);
2659
169k
    }
2660
172k
    return(0);
2661
474k
}
2662
2663
/**
2664
 * xmlParseStringCharRef:
2665
 * @ctxt:  an XML parser context
2666
 * @str:  a pointer to an index in the string
2667
 *
2668
 * parse Reference declarations, variant parsing from a string rather
2669
 * than an an input flow.
2670
 *
2671
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2672
 *                  '&#x' [0-9a-fA-F]+ ';'
2673
 *
2674
 * [ WFC: Legal Character ]
2675
 * Characters referred to using character references must match the
2676
 * production for Char.
2677
 *
2678
 * Returns the value parsed (as an int), 0 in case of error, str will be
2679
 *         updated to the current value of the index
2680
 */
2681
static int
2682
270k
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2683
270k
    const xmlChar *ptr;
2684
270k
    xmlChar cur;
2685
270k
    int val = 0;
2686
2687
270k
    if ((str == NULL) || (*str == NULL)) return(0);
2688
270k
    ptr = *str;
2689
270k
    cur = *ptr;
2690
270k
    if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2691
50.1k
  ptr += 3;
2692
50.1k
  cur = *ptr;
2693
201k
  while (cur != ';') { /* Non input consuming loop */
2694
157k
      if ((cur >= '0') && (cur <= '9'))
2695
64.8k
          val = val * 16 + (cur - '0');
2696
92.2k
      else if ((cur >= 'a') && (cur <= 'f'))
2697
31.3k
          val = val * 16 + (cur - 'a') + 10;
2698
60.9k
      else if ((cur >= 'A') && (cur <= 'F'))
2699
55.6k
          val = val * 16 + (cur - 'A') + 10;
2700
5.33k
      else {
2701
5.33k
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2702
5.33k
    val = 0;
2703
5.33k
    break;
2704
5.33k
      }
2705
151k
      if (val > 0x110000)
2706
51.8k
          val = 0x110000;
2707
2708
151k
      ptr++;
2709
151k
      cur = *ptr;
2710
151k
  }
2711
50.1k
  if (cur == ';')
2712
44.7k
      ptr++;
2713
220k
    } else if  ((cur == '&') && (ptr[1] == '#')){
2714
220k
  ptr += 2;
2715
220k
  cur = *ptr;
2716
702k
  while (cur != ';') { /* Non input consuming loops */
2717
489k
      if ((cur >= '0') && (cur <= '9'))
2718
482k
          val = val * 10 + (cur - '0');
2719
7.16k
      else {
2720
7.16k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2721
7.16k
    val = 0;
2722
7.16k
    break;
2723
7.16k
      }
2724
482k
      if (val > 0x110000)
2725
29.2k
          val = 0x110000;
2726
2727
482k
      ptr++;
2728
482k
      cur = *ptr;
2729
482k
  }
2730
220k
  if (cur == ';')
2731
213k
      ptr++;
2732
220k
    } else {
2733
0
  xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2734
0
  return(0);
2735
0
    }
2736
270k
    *str = ptr;
2737
2738
    /*
2739
     * [ WFC: Legal Character ]
2740
     * Characters referred to using character references must match the
2741
     * production for Char.
2742
     */
2743
270k
    if (val >= 0x110000) {
2744
2.62k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2745
2.62k
                "xmlParseStringCharRef: character reference out of bounds\n",
2746
2.62k
                val);
2747
267k
    } else if (IS_CHAR(val)) {
2748
251k
        return(val);
2749
251k
    } else {
2750
16.2k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2751
16.2k
        "xmlParseStringCharRef: invalid xmlChar value %d\n",
2752
16.2k
        val);
2753
16.2k
    }
2754
18.9k
    return(0);
2755
270k
}
2756
2757
/**
2758
 * xmlParserHandlePEReference:
2759
 * @ctxt:  the parser context
2760
 *
2761
 * DEPRECATED: Internal function, do not use.
2762
 *
2763
 * [69] PEReference ::= '%' Name ';'
2764
 *
2765
 * [ WFC: No Recursion ]
2766
 * A parsed entity must not contain a recursive
2767
 * reference to itself, either directly or indirectly.
2768
 *
2769
 * [ WFC: Entity Declared ]
2770
 * In a document without any DTD, a document with only an internal DTD
2771
 * subset which contains no parameter entity references, or a document
2772
 * with "standalone='yes'", ...  ... The declaration of a parameter
2773
 * entity must precede any reference to it...
2774
 *
2775
 * [ VC: Entity Declared ]
2776
 * In a document with an external subset or external parameter entities
2777
 * with "standalone='no'", ...  ... The declaration of a parameter entity
2778
 * must precede any reference to it...
2779
 *
2780
 * [ WFC: In DTD ]
2781
 * Parameter-entity references may only appear in the DTD.
2782
 * NOTE: misleading but this is handled.
2783
 *
2784
 * A PEReference may have been detected in the current input stream
2785
 * the handling is done accordingly to
2786
 *      http://www.w3.org/TR/REC-xml#entproc
2787
 * i.e.
2788
 *   - Included in literal in entity values
2789
 *   - Included as Parameter Entity reference within DTDs
2790
 */
2791
void
2792
0
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2793
0
    xmlParsePEReference(ctxt);
2794
0
}
2795
2796
/**
2797
 * xmlStringLenDecodeEntities:
2798
 * @ctxt:  the parser context
2799
 * @str:  the input string
2800
 * @len: the string length
2801
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2802
 * @end:  an end marker xmlChar, 0 if none
2803
 * @end2:  an end marker xmlChar, 0 if none
2804
 * @end3:  an end marker xmlChar, 0 if none
2805
 *
2806
 * DEPRECATED: Internal function, don't use.
2807
 *
2808
 * Returns A newly allocated string with the substitution done. The caller
2809
 *      must deallocate it !
2810
 */
2811
xmlChar *
2812
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2813
                           int what ATTRIBUTE_UNUSED,
2814
0
                           xmlChar end, xmlChar end2, xmlChar end3) {
2815
0
    if ((ctxt == NULL) || (str == NULL) || (len < 0))
2816
0
        return(NULL);
2817
2818
0
    if ((str[len] != 0) ||
2819
0
        (end != 0) || (end2 != 0) || (end3 != 0))
2820
0
        return(NULL);
2821
2822
0
    return(xmlExpandEntitiesInAttValue(ctxt, str, 0));
2823
0
}
2824
2825
/**
2826
 * xmlStringDecodeEntities:
2827
 * @ctxt:  the parser context
2828
 * @str:  the input string
2829
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2830
 * @end:  an end marker xmlChar, 0 if none
2831
 * @end2:  an end marker xmlChar, 0 if none
2832
 * @end3:  an end marker xmlChar, 0 if none
2833
 *
2834
 * DEPRECATED: Internal function, don't use.
2835
 *
2836
 * Returns A newly allocated string with the substitution done. The caller
2837
 *      must deallocate it !
2838
 */
2839
xmlChar *
2840
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str,
2841
                        int what ATTRIBUTE_UNUSED,
2842
0
            xmlChar end, xmlChar  end2, xmlChar end3) {
2843
0
    if ((ctxt == NULL) || (str == NULL))
2844
0
        return(NULL);
2845
2846
0
    if ((end != 0) || (end2 != 0) || (end3 != 0))
2847
0
        return(NULL);
2848
2849
0
    return(xmlExpandEntitiesInAttValue(ctxt, str, 0));
2850
0
}
2851
2852
/************************************************************************
2853
 *                  *
2854
 *    Commodity functions, cleanup needed ?     *
2855
 *                  *
2856
 ************************************************************************/
2857
2858
/**
2859
 * areBlanks:
2860
 * @ctxt:  an XML parser context
2861
 * @str:  a xmlChar *
2862
 * @len:  the size of @str
2863
 * @blank_chars: we know the chars are blanks
2864
 *
2865
 * Is this a sequence of blank chars that one can ignore ?
2866
 *
2867
 * Returns 1 if ignorable 0 otherwise.
2868
 */
2869
2870
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2871
1.87M
                     int blank_chars) {
2872
1.87M
    int i;
2873
1.87M
    xmlNodePtr lastChild;
2874
2875
    /*
2876
     * Don't spend time trying to differentiate them, the same callback is
2877
     * used !
2878
     */
2879
1.87M
    if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2880
1.00M
  return(0);
2881
2882
    /*
2883
     * Check for xml:space value.
2884
     */
2885
874k
    if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2886
874k
        (*(ctxt->space) == -2))
2887
712k
  return(0);
2888
2889
    /*
2890
     * Check that the string is made of blanks
2891
     */
2892
161k
    if (blank_chars == 0) {
2893
717k
  for (i = 0;i < len;i++)
2894
684k
      if (!(IS_BLANK_CH(str[i]))) return(0);
2895
129k
    }
2896
2897
    /*
2898
     * Look if the element is mixed content in the DTD if available
2899
     */
2900
65.4k
    if (ctxt->node == NULL) return(0);
2901
65.4k
    if (ctxt->myDoc != NULL) {
2902
65.4k
        xmlElementPtr elemDecl = NULL;
2903
65.4k
        xmlDocPtr doc = ctxt->myDoc;
2904
65.4k
        const xmlChar *prefix = NULL;
2905
2906
65.4k
        if (ctxt->node->ns)
2907
9.46k
            prefix = ctxt->node->ns->prefix;
2908
65.4k
        if (doc->intSubset != NULL)
2909
25.5k
            elemDecl = xmlHashLookup2(doc->intSubset->elements, ctxt->node->name,
2910
25.5k
                                      prefix);
2911
65.4k
        if ((elemDecl == NULL) && (doc->extSubset != NULL))
2912
3.76k
            elemDecl = xmlHashLookup2(doc->extSubset->elements, ctxt->node->name,
2913
3.76k
                                      prefix);
2914
65.4k
        if (elemDecl != NULL) {
2915
8.49k
            if (elemDecl->etype == XML_ELEMENT_TYPE_ELEMENT)
2916
5.46k
                return(1);
2917
3.03k
            if ((elemDecl->etype == XML_ELEMENT_TYPE_ANY) ||
2918
3.03k
                (elemDecl->etype == XML_ELEMENT_TYPE_MIXED))
2919
1.20k
                return(0);
2920
3.03k
        }
2921
65.4k
    }
2922
2923
    /*
2924
     * Otherwise, heuristic :-\
2925
     */
2926
58.7k
    if ((RAW != '<') && (RAW != 0xD)) return(0);
2927
52.6k
    if ((ctxt->node->children == NULL) &&
2928
52.6k
  (RAW == '<') && (NXT(1) == '/')) return(0);
2929
2930
48.5k
    lastChild = xmlGetLastChild(ctxt->node);
2931
48.5k
    if (lastChild == NULL) {
2932
17.6k
        if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2933
17.6k
            (ctxt->node->content != NULL)) return(0);
2934
30.8k
    } else if (xmlNodeIsText(lastChild))
2935
2.41k
        return(0);
2936
28.4k
    else if ((ctxt->node->children != NULL) &&
2937
28.4k
             (xmlNodeIsText(ctxt->node->children)))
2938
2.19k
        return(0);
2939
43.9k
    return(1);
2940
48.5k
}
2941
2942
/************************************************************************
2943
 *                  *
2944
 *    Extra stuff for namespace support     *
2945
 *  Relates to http://www.w3.org/TR/WD-xml-names      *
2946
 *                  *
2947
 ************************************************************************/
2948
2949
/**
2950
 * xmlSplitQName:
2951
 * @ctxt:  an XML parser context
2952
 * @name:  an XML parser context
2953
 * @prefixOut:  a xmlChar **
2954
 *
2955
 * parse an UTF8 encoded XML qualified name string
2956
 *
2957
 * [NS 5] QName ::= (Prefix ':')? LocalPart
2958
 *
2959
 * [NS 6] Prefix ::= NCName
2960
 *
2961
 * [NS 7] LocalPart ::= NCName
2962
 *
2963
 * Returns the local part, and prefix is updated
2964
 *   to get the Prefix if any.
2965
 */
2966
2967
xmlChar *
2968
1.13M
xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefixOut) {
2969
1.13M
    xmlChar buf[XML_MAX_NAMELEN + 5];
2970
1.13M
    xmlChar *buffer = NULL;
2971
1.13M
    int len = 0;
2972
1.13M
    int max = XML_MAX_NAMELEN;
2973
1.13M
    xmlChar *ret = NULL;
2974
1.13M
    xmlChar *prefix;
2975
1.13M
    const xmlChar *cur = name;
2976
1.13M
    int c;
2977
2978
1.13M
    if (prefixOut == NULL) return(NULL);
2979
1.13M
    *prefixOut = NULL;
2980
2981
1.13M
    if (cur == NULL) return(NULL);
2982
2983
#ifndef XML_XML_NAMESPACE
2984
    /* xml: prefix is not really a namespace */
2985
    if ((cur[0] == 'x') && (cur[1] == 'm') &&
2986
        (cur[2] == 'l') && (cur[3] == ':'))
2987
  return(xmlStrdup(name));
2988
#endif
2989
2990
    /* nasty but well=formed */
2991
1.13M
    if (cur[0] == ':')
2992
8.93k
  return(xmlStrdup(name));
2993
2994
1.12M
    c = *cur++;
2995
5.76M
    while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2996
4.64M
  buf[len++] = c;
2997
4.64M
  c = *cur++;
2998
4.64M
    }
2999
1.12M
    if (len >= max) {
3000
  /*
3001
   * Okay someone managed to make a huge name, so he's ready to pay
3002
   * for the processing speed.
3003
   */
3004
7.60k
  max = len * 2;
3005
3006
7.60k
  buffer = (xmlChar *) xmlMallocAtomic(max);
3007
7.60k
  if (buffer == NULL) {
3008
12
      xmlErrMemory(ctxt);
3009
12
      return(NULL);
3010
12
  }
3011
7.59k
  memcpy(buffer, buf, len);
3012
2.09M
  while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3013
2.08M
      if (len + 10 > max) {
3014
7.22k
          xmlChar *tmp;
3015
3016
7.22k
    max *= 2;
3017
7.22k
    tmp = (xmlChar *) xmlRealloc(buffer, max);
3018
7.22k
    if (tmp == NULL) {
3019
16
        xmlFree(buffer);
3020
16
        xmlErrMemory(ctxt);
3021
16
        return(NULL);
3022
16
    }
3023
7.20k
    buffer = tmp;
3024
7.20k
      }
3025
2.08M
      buffer[len++] = c;
3026
2.08M
      c = *cur++;
3027
2.08M
  }
3028
7.58k
  buffer[len] = 0;
3029
7.58k
    }
3030
3031
1.12M
    if ((c == ':') && (*cur == 0)) {
3032
16.3k
        if (buffer != NULL)
3033
876
      xmlFree(buffer);
3034
16.3k
  return(xmlStrdup(name));
3035
16.3k
    }
3036
3037
1.10M
    if (buffer == NULL) {
3038
1.09M
  ret = xmlStrndup(buf, len);
3039
1.09M
        if (ret == NULL) {
3040
519
      xmlErrMemory(ctxt);
3041
519
      return(NULL);
3042
519
        }
3043
1.09M
    } else {
3044
6.70k
  ret = buffer;
3045
6.70k
  buffer = NULL;
3046
6.70k
  max = XML_MAX_NAMELEN;
3047
6.70k
    }
3048
3049
3050
1.10M
    if (c == ':') {
3051
212k
  c = *cur;
3052
212k
        prefix = ret;
3053
212k
  if (c == 0) {
3054
0
      ret = xmlStrndup(BAD_CAST "", 0);
3055
0
            if (ret == NULL) {
3056
0
                xmlFree(prefix);
3057
0
                return(NULL);
3058
0
            }
3059
0
            *prefixOut = prefix;
3060
0
            return(ret);
3061
0
  }
3062
212k
  len = 0;
3063
3064
  /*
3065
   * Check that the first character is proper to start
3066
   * a new name
3067
   */
3068
212k
  if (!(((c >= 0x61) && (c <= 0x7A)) ||
3069
212k
        ((c >= 0x41) && (c <= 0x5A)) ||
3070
212k
        (c == '_') || (c == ':'))) {
3071
28.3k
      int l;
3072
28.3k
      int first = CUR_SCHAR(cur, l);
3073
3074
28.3k
      if (!IS_LETTER(first) && (first != '_')) {
3075
17.8k
    xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3076
17.8k
          "Name %s is not XML Namespace compliant\n",
3077
17.8k
          name);
3078
17.8k
      }
3079
28.3k
  }
3080
212k
  cur++;
3081
3082
2.05M
  while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3083
1.84M
      buf[len++] = c;
3084
1.84M
      c = *cur++;
3085
1.84M
  }
3086
212k
  if (len >= max) {
3087
      /*
3088
       * Okay someone managed to make a huge name, so he's ready to pay
3089
       * for the processing speed.
3090
       */
3091
8.06k
      max = len * 2;
3092
3093
8.06k
      buffer = (xmlChar *) xmlMallocAtomic(max);
3094
8.06k
      if (buffer == NULL) {
3095
13
          xmlErrMemory(ctxt);
3096
13
                xmlFree(prefix);
3097
13
    return(NULL);
3098
13
      }
3099
8.05k
      memcpy(buffer, buf, len);
3100
3.24M
      while (c != 0) { /* tested bigname2.xml */
3101
3.24M
    if (len + 10 > max) {
3102
5.27k
        xmlChar *tmp;
3103
3104
5.27k
        max *= 2;
3105
5.27k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3106
5.27k
        if (tmp == NULL) {
3107
17
      xmlErrMemory(ctxt);
3108
17
                        xmlFree(prefix);
3109
17
      xmlFree(buffer);
3110
17
      return(NULL);
3111
17
        }
3112
5.26k
        buffer = tmp;
3113
5.26k
    }
3114
3.24M
    buffer[len++] = c;
3115
3.24M
    c = *cur++;
3116
3.24M
      }
3117
8.03k
      buffer[len] = 0;
3118
8.03k
  }
3119
3120
212k
  if (buffer == NULL) {
3121
203k
      ret = xmlStrndup(buf, len);
3122
203k
            if (ret == NULL) {
3123
116
                xmlFree(prefix);
3124
116
                return(NULL);
3125
116
            }
3126
203k
  } else {
3127
8.03k
      ret = buffer;
3128
8.03k
  }
3129
3130
211k
        *prefixOut = prefix;
3131
211k
    }
3132
3133
1.10M
    return(ret);
3134
1.10M
}
3135
3136
/************************************************************************
3137
 *                  *
3138
 *      The parser itself       *
3139
 *  Relates to http://www.w3.org/TR/REC-xml       *
3140
 *                  *
3141
 ************************************************************************/
3142
3143
/************************************************************************
3144
 *                  *
3145
 *  Routines to parse Name, NCName and NmToken      *
3146
 *                  *
3147
 ************************************************************************/
3148
3149
/*
3150
 * The two following functions are related to the change of accepted
3151
 * characters for Name and NmToken in the Revision 5 of XML-1.0
3152
 * They correspond to the modified production [4] and the new production [4a]
3153
 * changes in that revision. Also note that the macros used for the
3154
 * productions Letter, Digit, CombiningChar and Extender are not needed
3155
 * anymore.
3156
 * We still keep compatibility to pre-revision5 parsing semantic if the
3157
 * new XML_PARSE_OLD10 option is given to the parser.
3158
 */
3159
static int
3160
3.43M
xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3161
3.43M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3162
        /*
3163
   * Use the new checks of production [4] [4a] amd [5] of the
3164
   * Update 5 of XML-1.0
3165
   */
3166
2.80M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3167
2.80M
      (((c >= 'a') && (c <= 'z')) ||
3168
2.79M
       ((c >= 'A') && (c <= 'Z')) ||
3169
2.79M
       (c == '_') || (c == ':') ||
3170
2.79M
       ((c >= 0xC0) && (c <= 0xD6)) ||
3171
2.79M
       ((c >= 0xD8) && (c <= 0xF6)) ||
3172
2.79M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3173
2.79M
       ((c >= 0x370) && (c <= 0x37D)) ||
3174
2.79M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3175
2.79M
       ((c >= 0x200C) && (c <= 0x200D)) ||
3176
2.79M
       ((c >= 0x2070) && (c <= 0x218F)) ||
3177
2.79M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3178
2.79M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3179
2.79M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3180
2.79M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3181
2.79M
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3182
2.06M
      return(1);
3183
2.80M
    } else {
3184
634k
        if (IS_LETTER(c) || (c == '_') || (c == ':'))
3185
485k
      return(1);
3186
634k
    }
3187
887k
    return(0);
3188
3.43M
}
3189
3190
static int
3191
42.2M
xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3192
42.2M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3193
        /*
3194
   * Use the new checks of production [4] [4a] amd [5] of the
3195
   * Update 5 of XML-1.0
3196
   */
3197
37.8M
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3198
37.8M
      (((c >= 'a') && (c <= 'z')) ||
3199
37.8M
       ((c >= 'A') && (c <= 'Z')) ||
3200
37.8M
       ((c >= '0') && (c <= '9')) || /* !start */
3201
37.8M
       (c == '_') || (c == ':') ||
3202
37.8M
       (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3203
37.8M
       ((c >= 0xC0) && (c <= 0xD6)) ||
3204
37.8M
       ((c >= 0xD8) && (c <= 0xF6)) ||
3205
37.8M
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3206
37.8M
       ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3207
37.8M
       ((c >= 0x370) && (c <= 0x37D)) ||
3208
37.8M
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3209
37.8M
       ((c >= 0x200C) && (c <= 0x200D)) ||
3210
37.8M
       ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3211
37.8M
       ((c >= 0x2070) && (c <= 0x218F)) ||
3212
37.8M
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3213
37.8M
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3214
37.8M
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3215
37.8M
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3216
37.8M
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3217
35.8M
       return(1);
3218
37.8M
    } else {
3219
4.36M
        if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3220
4.36M
            (c == '.') || (c == '-') ||
3221
4.36M
      (c == '_') || (c == ':') ||
3222
4.36M
      (IS_COMBINING(c)) ||
3223
4.36M
      (IS_EXTENDER(c)))
3224
3.86M
      return(1);
3225
4.36M
    }
3226
2.57M
    return(0);
3227
42.2M
}
3228
3229
static const xmlChar *
3230
1.36M
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3231
1.36M
    const xmlChar *ret;
3232
1.36M
    int len = 0, l;
3233
1.36M
    int c;
3234
1.36M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3235
386k
                    XML_MAX_TEXT_LENGTH :
3236
1.36M
                    XML_MAX_NAME_LENGTH;
3237
3238
    /*
3239
     * Handler for more complex cases
3240
     */
3241
1.36M
    c = CUR_CHAR(l);
3242
1.36M
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3243
        /*
3244
   * Use the new checks of production [4] [4a] amd [5] of the
3245
   * Update 5 of XML-1.0
3246
   */
3247
1.24M
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3248
1.24M
      (!(((c >= 'a') && (c <= 'z')) ||
3249
1.20M
         ((c >= 'A') && (c <= 'Z')) ||
3250
1.20M
         (c == '_') || (c == ':') ||
3251
1.20M
         ((c >= 0xC0) && (c <= 0xD6)) ||
3252
1.20M
         ((c >= 0xD8) && (c <= 0xF6)) ||
3253
1.20M
         ((c >= 0xF8) && (c <= 0x2FF)) ||
3254
1.20M
         ((c >= 0x370) && (c <= 0x37D)) ||
3255
1.20M
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
3256
1.20M
         ((c >= 0x200C) && (c <= 0x200D)) ||
3257
1.20M
         ((c >= 0x2070) && (c <= 0x218F)) ||
3258
1.20M
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3259
1.20M
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
3260
1.20M
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
3261
1.20M
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3262
1.20M
         ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3263
741k
      return(NULL);
3264
741k
  }
3265
501k
  len += l;
3266
501k
  NEXTL(l);
3267
501k
  c = CUR_CHAR(l);
3268
29.0M
  while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3269
29.0M
         (((c >= 'a') && (c <= 'z')) ||
3270
28.9M
          ((c >= 'A') && (c <= 'Z')) ||
3271
28.9M
          ((c >= '0') && (c <= '9')) || /* !start */
3272
28.9M
          (c == '_') || (c == ':') ||
3273
28.9M
          (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3274
28.9M
          ((c >= 0xC0) && (c <= 0xD6)) ||
3275
28.9M
          ((c >= 0xD8) && (c <= 0xF6)) ||
3276
28.9M
          ((c >= 0xF8) && (c <= 0x2FF)) ||
3277
28.9M
          ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3278
28.9M
          ((c >= 0x370) && (c <= 0x37D)) ||
3279
28.9M
          ((c >= 0x37F) && (c <= 0x1FFF)) ||
3280
28.9M
          ((c >= 0x200C) && (c <= 0x200D)) ||
3281
28.9M
          ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3282
28.9M
          ((c >= 0x2070) && (c <= 0x218F)) ||
3283
28.9M
          ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3284
28.9M
          ((c >= 0x3001) && (c <= 0xD7FF)) ||
3285
28.9M
          ((c >= 0xF900) && (c <= 0xFDCF)) ||
3286
28.9M
          ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3287
28.9M
          ((c >= 0x10000) && (c <= 0xEFFFF))
3288
28.9M
    )) {
3289
28.5M
            if (len <= INT_MAX - l)
3290
28.5M
          len += l;
3291
28.5M
      NEXTL(l);
3292
28.5M
      c = CUR_CHAR(l);
3293
28.5M
  }
3294
501k
    } else {
3295
125k
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3296
125k
      (!IS_LETTER(c) && (c != '_') &&
3297
116k
       (c != ':'))) {
3298
87.6k
      return(NULL);
3299
87.6k
  }
3300
37.4k
  len += l;
3301
37.4k
  NEXTL(l);
3302
37.4k
  c = CUR_CHAR(l);
3303
3304
2.31M
  while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3305
2.31M
         ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3306
2.31M
    (c == '.') || (c == '-') ||
3307
2.31M
    (c == '_') || (c == ':') ||
3308
2.31M
    (IS_COMBINING(c)) ||
3309
2.31M
    (IS_EXTENDER(c)))) {
3310
2.28M
            if (len <= INT_MAX - l)
3311
2.28M
          len += l;
3312
2.28M
      NEXTL(l);
3313
2.28M
      c = CUR_CHAR(l);
3314
2.28M
  }
3315
37.4k
    }
3316
539k
    if (len > maxLength) {
3317
326
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3318
326
        return(NULL);
3319
326
    }
3320
538k
    if (ctxt->input->cur - ctxt->input->base < len) {
3321
        /*
3322
         * There were a couple of bugs where PERefs lead to to a change
3323
         * of the buffer. Check the buffer size to avoid passing an invalid
3324
         * pointer to xmlDictLookup.
3325
         */
3326
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3327
0
                    "unexpected change of input buffer");
3328
0
        return (NULL);
3329
0
    }
3330
538k
    if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3331
1.29k
        ret = xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len);
3332
537k
    else
3333
537k
        ret = xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len);
3334
538k
    if (ret == NULL)
3335
20
        xmlErrMemory(ctxt);
3336
538k
    return(ret);
3337
538k
}
3338
3339
/**
3340
 * xmlParseName:
3341
 * @ctxt:  an XML parser context
3342
 *
3343
 * DEPRECATED: Internal function, don't use.
3344
 *
3345
 * parse an XML name.
3346
 *
3347
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3348
 *                  CombiningChar | Extender
3349
 *
3350
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3351
 *
3352
 * [6] Names ::= Name (#x20 Name)*
3353
 *
3354
 * Returns the Name parsed or NULL
3355
 */
3356
3357
const xmlChar *
3358
6.64M
xmlParseName(xmlParserCtxtPtr ctxt) {
3359
6.64M
    const xmlChar *in;
3360
6.64M
    const xmlChar *ret;
3361
6.64M
    size_t count = 0;
3362
6.64M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3363
1.58M
                       XML_MAX_TEXT_LENGTH :
3364
6.64M
                       XML_MAX_NAME_LENGTH;
3365
3366
6.64M
    GROW;
3367
3368
    /*
3369
     * Accelerator for simple ASCII names
3370
     */
3371
6.64M
    in = ctxt->input->cur;
3372
6.64M
    if (((*in >= 0x61) && (*in <= 0x7A)) ||
3373
6.64M
  ((*in >= 0x41) && (*in <= 0x5A)) ||
3374
6.64M
  (*in == '_') || (*in == ':')) {
3375
5.55M
  in++;
3376
27.9M
  while (((*in >= 0x61) && (*in <= 0x7A)) ||
3377
27.9M
         ((*in >= 0x41) && (*in <= 0x5A)) ||
3378
27.9M
         ((*in >= 0x30) && (*in <= 0x39)) ||
3379
27.9M
         (*in == '_') || (*in == '-') ||
3380
27.9M
         (*in == ':') || (*in == '.'))
3381
22.4M
      in++;
3382
5.55M
  if ((*in > 0) && (*in < 0x80)) {
3383
5.27M
      count = in - ctxt->input->cur;
3384
5.27M
            if (count > maxLength) {
3385
57
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3386
57
                return(NULL);
3387
57
            }
3388
5.27M
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3389
5.27M
      ctxt->input->cur = in;
3390
5.27M
      ctxt->input->col += count;
3391
5.27M
      if (ret == NULL)
3392
32
          xmlErrMemory(ctxt);
3393
5.27M
      return(ret);
3394
5.27M
  }
3395
5.55M
    }
3396
    /* accelerator for special cases */
3397
1.36M
    return(xmlParseNameComplex(ctxt));
3398
6.64M
}
3399
3400
static xmlHashedString
3401
1.47M
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3402
1.47M
    xmlHashedString ret;
3403
1.47M
    int len = 0, l;
3404
1.47M
    int c;
3405
1.47M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3406
506k
                    XML_MAX_TEXT_LENGTH :
3407
1.47M
                    XML_MAX_NAME_LENGTH;
3408
1.47M
    size_t startPosition = 0;
3409
3410
1.47M
    ret.name = NULL;
3411
1.47M
    ret.hashValue = 0;
3412
3413
    /*
3414
     * Handler for more complex cases
3415
     */
3416
1.47M
    startPosition = CUR_PTR - BASE_PTR;
3417
1.47M
    c = CUR_CHAR(l);
3418
1.47M
    if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3419
1.47M
  (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3420
1.06M
  return(ret);
3421
1.06M
    }
3422
3423
21.8M
    while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3424
21.8M
     (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3425
21.4M
        if (len <= INT_MAX - l)
3426
21.4M
      len += l;
3427
21.4M
  NEXTL(l);
3428
21.4M
  c = CUR_CHAR(l);
3429
21.4M
    }
3430
408k
    if (len > maxLength) {
3431
281
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3432
281
        return(ret);
3433
281
    }
3434
408k
    ret = xmlDictLookupHashed(ctxt->dict, (BASE_PTR + startPosition), len);
3435
408k
    if (ret.name == NULL)
3436
20
        xmlErrMemory(ctxt);
3437
408k
    return(ret);
3438
408k
}
3439
3440
/**
3441
 * xmlParseNCName:
3442
 * @ctxt:  an XML parser context
3443
 * @len:  length of the string parsed
3444
 *
3445
 * parse an XML name.
3446
 *
3447
 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3448
 *                      CombiningChar | Extender
3449
 *
3450
 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3451
 *
3452
 * Returns the Name parsed or NULL
3453
 */
3454
3455
static xmlHashedString
3456
8.83M
xmlParseNCName(xmlParserCtxtPtr ctxt) {
3457
8.83M
    const xmlChar *in, *e;
3458
8.83M
    xmlHashedString ret;
3459
8.83M
    size_t count = 0;
3460
8.83M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3461
3.36M
                       XML_MAX_TEXT_LENGTH :
3462
8.83M
                       XML_MAX_NAME_LENGTH;
3463
3464
8.83M
    ret.name = NULL;
3465
3466
    /*
3467
     * Accelerator for simple ASCII names
3468
     */
3469
8.83M
    in = ctxt->input->cur;
3470
8.83M
    e = ctxt->input->end;
3471
8.83M
    if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3472
8.83M
   ((*in >= 0x41) && (*in <= 0x5A)) ||
3473
8.83M
   (*in == '_')) && (in < e)) {
3474
7.56M
  in++;
3475
28.9M
  while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3476
28.9M
          ((*in >= 0x41) && (*in <= 0x5A)) ||
3477
28.9M
          ((*in >= 0x30) && (*in <= 0x39)) ||
3478
28.9M
          (*in == '_') || (*in == '-') ||
3479
28.9M
          (*in == '.')) && (in < e))
3480
21.3M
      in++;
3481
7.56M
  if (in >= e)
3482
7.38k
      goto complex;
3483
7.55M
  if ((*in > 0) && (*in < 0x80)) {
3484
7.36M
      count = in - ctxt->input->cur;
3485
7.36M
            if (count > maxLength) {
3486
34
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3487
34
                return(ret);
3488
34
            }
3489
7.36M
      ret = xmlDictLookupHashed(ctxt->dict, ctxt->input->cur, count);
3490
7.36M
      ctxt->input->cur = in;
3491
7.36M
      ctxt->input->col += count;
3492
7.36M
      if (ret.name == NULL) {
3493
36
          xmlErrMemory(ctxt);
3494
36
      }
3495
7.36M
      return(ret);
3496
7.36M
  }
3497
7.55M
    }
3498
1.47M
complex:
3499
1.47M
    return(xmlParseNCNameComplex(ctxt));
3500
8.83M
}
3501
3502
/**
3503
 * xmlParseNameAndCompare:
3504
 * @ctxt:  an XML parser context
3505
 *
3506
 * parse an XML name and compares for match
3507
 * (specialized for endtag parsing)
3508
 *
3509
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3510
 * and the name for mismatch
3511
 */
3512
3513
static const xmlChar *
3514
942k
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3515
942k
    register const xmlChar *cmp = other;
3516
942k
    register const xmlChar *in;
3517
942k
    const xmlChar *ret;
3518
3519
942k
    GROW;
3520
3521
942k
    in = ctxt->input->cur;
3522
2.67M
    while (*in != 0 && *in == *cmp) {
3523
1.72M
  ++in;
3524
1.72M
  ++cmp;
3525
1.72M
    }
3526
942k
    if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3527
  /* success */
3528
876k
  ctxt->input->col += in - ctxt->input->cur;
3529
876k
  ctxt->input->cur = in;
3530
876k
  return (const xmlChar*) 1;
3531
876k
    }
3532
    /* failure (or end of input buffer), check with full function */
3533
66.1k
    ret = xmlParseName (ctxt);
3534
    /* strings coming from the dictionary direct compare possible */
3535
66.1k
    if (ret == other) {
3536
6.20k
  return (const xmlChar*) 1;
3537
6.20k
    }
3538
59.9k
    return ret;
3539
66.1k
}
3540
3541
/**
3542
 * xmlParseStringName:
3543
 * @ctxt:  an XML parser context
3544
 * @str:  a pointer to the string pointer (IN/OUT)
3545
 *
3546
 * parse an XML name.
3547
 *
3548
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3549
 *                  CombiningChar | Extender
3550
 *
3551
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3552
 *
3553
 * [6] Names ::= Name (#x20 Name)*
3554
 *
3555
 * Returns the Name parsed or NULL. The @str pointer
3556
 * is updated to the current location in the string.
3557
 */
3558
3559
static xmlChar *
3560
2.05M
xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3561
2.05M
    xmlChar buf[XML_MAX_NAMELEN + 5];
3562
2.05M
    xmlChar *ret;
3563
2.05M
    const xmlChar *cur = *str;
3564
2.05M
    int len = 0, l;
3565
2.05M
    int c;
3566
2.05M
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3567
583k
                    XML_MAX_TEXT_LENGTH :
3568
2.05M
                    XML_MAX_NAME_LENGTH;
3569
3570
2.05M
    c = CUR_SCHAR(cur, l);
3571
2.05M
    if (!xmlIsNameStartChar(ctxt, c)) {
3572
10.3k
  return(NULL);
3573
10.3k
    }
3574
3575
2.04M
    COPY_BUF(buf, len, c);
3576
2.04M
    cur += l;
3577
2.04M
    c = CUR_SCHAR(cur, l);
3578
8.82M
    while (xmlIsNameChar(ctxt, c)) {
3579
6.79M
  COPY_BUF(buf, len, c);
3580
6.79M
  cur += l;
3581
6.79M
  c = CUR_SCHAR(cur, l);
3582
6.79M
  if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3583
      /*
3584
       * Okay someone managed to make a huge name, so he's ready to pay
3585
       * for the processing speed.
3586
       */
3587
8.88k
      xmlChar *buffer;
3588
8.88k
      int max = len * 2;
3589
3590
8.88k
      buffer = (xmlChar *) xmlMallocAtomic(max);
3591
8.88k
      if (buffer == NULL) {
3592
13
          xmlErrMemory(ctxt);
3593
13
    return(NULL);
3594
13
      }
3595
8.87k
      memcpy(buffer, buf, len);
3596
1.98M
      while (xmlIsNameChar(ctxt, c)) {
3597
1.97M
    if (len + 10 > max) {
3598
4.82k
        xmlChar *tmp;
3599
3600
4.82k
        max *= 2;
3601
4.82k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3602
4.82k
        if (tmp == NULL) {
3603
8
      xmlErrMemory(ctxt);
3604
8
      xmlFree(buffer);
3605
8
      return(NULL);
3606
8
        }
3607
4.81k
        buffer = tmp;
3608
4.81k
    }
3609
1.97M
    COPY_BUF(buffer, len, c);
3610
1.97M
    cur += l;
3611
1.97M
    c = CUR_SCHAR(cur, l);
3612
1.97M
                if (len > maxLength) {
3613
44
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3614
44
                    xmlFree(buffer);
3615
44
                    return(NULL);
3616
44
                }
3617
1.97M
      }
3618
8.81k
      buffer[len] = 0;
3619
8.81k
      *str = cur;
3620
8.81k
      return(buffer);
3621
8.87k
  }
3622
6.79M
    }
3623
2.03M
    if (len > maxLength) {
3624
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3625
0
        return(NULL);
3626
0
    }
3627
2.03M
    *str = cur;
3628
2.03M
    ret = xmlStrndup(buf, len);
3629
2.03M
    if (ret == NULL)
3630
217
        xmlErrMemory(ctxt);
3631
2.03M
    return(ret);
3632
2.03M
}
3633
3634
/**
3635
 * xmlParseNmtoken:
3636
 * @ctxt:  an XML parser context
3637
 *
3638
 * DEPRECATED: Internal function, don't use.
3639
 *
3640
 * parse an XML Nmtoken.
3641
 *
3642
 * [7] Nmtoken ::= (NameChar)+
3643
 *
3644
 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3645
 *
3646
 * Returns the Nmtoken parsed or NULL
3647
 */
3648
3649
xmlChar *
3650
280k
xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3651
280k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3652
280k
    xmlChar *ret;
3653
280k
    int len = 0, l;
3654
280k
    int c;
3655
280k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3656
66.2k
                    XML_MAX_TEXT_LENGTH :
3657
280k
                    XML_MAX_NAME_LENGTH;
3658
3659
280k
    c = CUR_CHAR(l);
3660
3661
1.05M
    while (xmlIsNameChar(ctxt, c)) {
3662
777k
  COPY_BUF(buf, len, c);
3663
777k
  NEXTL(l);
3664
777k
  c = CUR_CHAR(l);
3665
777k
  if (len >= XML_MAX_NAMELEN) {
3666
      /*
3667
       * Okay someone managed to make a huge token, so he's ready to pay
3668
       * for the processing speed.
3669
       */
3670
4.70k
      xmlChar *buffer;
3671
4.70k
      int max = len * 2;
3672
3673
4.70k
      buffer = (xmlChar *) xmlMallocAtomic(max);
3674
4.70k
      if (buffer == NULL) {
3675
16
          xmlErrMemory(ctxt);
3676
16
    return(NULL);
3677
16
      }
3678
4.68k
      memcpy(buffer, buf, len);
3679
8.69M
      while (xmlIsNameChar(ctxt, c)) {
3680
8.68M
    if (len + 10 > max) {
3681
9.57k
        xmlChar *tmp;
3682
3683
9.57k
        max *= 2;
3684
9.57k
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3685
9.57k
        if (tmp == NULL) {
3686
12
      xmlErrMemory(ctxt);
3687
12
      xmlFree(buffer);
3688
12
      return(NULL);
3689
12
        }
3690
9.55k
        buffer = tmp;
3691
9.55k
    }
3692
8.68M
    COPY_BUF(buffer, len, c);
3693
8.68M
                if (len > maxLength) {
3694
340
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3695
340
                    xmlFree(buffer);
3696
340
                    return(NULL);
3697
340
                }
3698
8.68M
    NEXTL(l);
3699
8.68M
    c = CUR_CHAR(l);
3700
8.68M
      }
3701
4.33k
      buffer[len] = 0;
3702
4.33k
      return(buffer);
3703
4.68k
  }
3704
777k
    }
3705
276k
    if (len == 0)
3706
41.2k
        return(NULL);
3707
234k
    if (len > maxLength) {
3708
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3709
0
        return(NULL);
3710
0
    }
3711
234k
    ret = xmlStrndup(buf, len);
3712
234k
    if (ret == NULL)
3713
59
        xmlErrMemory(ctxt);
3714
234k
    return(ret);
3715
234k
}
3716
3717
/**
3718
 * xmlExpandPEsInEntityValue:
3719
 * @ctxt:  parser context
3720
 * @buf:  string buffer
3721
 * @str:  entity value
3722
 * @length:  size of entity value
3723
 * @depth:  nesting depth
3724
 *
3725
 * Validate an entity value and expand parameter entities.
3726
 */
3727
static void
3728
xmlExpandPEsInEntityValue(xmlParserCtxtPtr ctxt, xmlSBuf *buf,
3729
233k
                          const xmlChar *str, int length, int depth) {
3730
233k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3731
233k
    const xmlChar *end, *chunk;
3732
233k
    int c, l;
3733
3734
233k
    if (str == NULL)
3735
25.6k
        return;
3736
3737
208k
    depth += 1;
3738
208k
    if (depth > maxDepth) {
3739
0
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3740
0
                       "Maximum entity nesting depth exceeded");
3741
0
  return;
3742
0
    }
3743
3744
208k
    end = str + length;
3745
208k
    chunk = str;
3746
3747
330M
    while ((str < end) && (!PARSER_STOPPED(ctxt))) {
3748
330M
        c = *str;
3749
3750
330M
        if (c >= 0x80) {
3751
192M
            l = xmlUTF8MultibyteLen(ctxt, str,
3752
192M
                    "invalid character in entity value\n");
3753
192M
            if (l == 0) {
3754
64.2M
                if (chunk < str)
3755
319k
                    xmlSBufAddString(buf, chunk, str - chunk);
3756
64.2M
                xmlSBufAddReplChar(buf);
3757
64.2M
                str += 1;
3758
64.2M
                chunk = str;
3759
128M
            } else {
3760
128M
                str += l;
3761
128M
            }
3762
192M
        } else if (c == '&') {
3763
259k
            if (str[1] == '#') {
3764
71.2k
                if (chunk < str)
3765
37.6k
                    xmlSBufAddString(buf, chunk, str - chunk);
3766
3767
71.2k
                c = xmlParseStringCharRef(ctxt, &str);
3768
71.2k
                if (c == 0)
3769
18.8k
                    return;
3770
3771
52.3k
                xmlSBufAddChar(buf, c);
3772
3773
52.3k
                chunk = str;
3774
187k
            } else {
3775
187k
                xmlChar *name;
3776
3777
                /*
3778
                 * General entity references are checked for
3779
                 * syntactic validity.
3780
                 */
3781
187k
                str++;
3782
187k
                name = xmlParseStringName(ctxt, &str);
3783
3784
187k
                if ((name == NULL) || (*str++ != ';')) {
3785
8.48k
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3786
8.48k
                            "EntityValue: '&' forbidden except for entities "
3787
8.48k
                            "references\n");
3788
8.48k
                    xmlFree(name);
3789
8.48k
                    return;
3790
8.48k
                }
3791
3792
179k
                xmlFree(name);
3793
179k
            }
3794
137M
        } else if (c == '%') {
3795
95.7k
            xmlEntityPtr ent;
3796
3797
95.7k
            if (chunk < str)
3798
54.4k
                xmlSBufAddString(buf, chunk, str - chunk);
3799
3800
95.7k
            ent = xmlParseStringPEReference(ctxt, &str);
3801
95.7k
            if (ent == NULL)
3802
20.4k
                return;
3803
3804
75.2k
            if (!PARSER_EXTERNAL(ctxt)) {
3805
770
                xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3806
770
                return;
3807
770
            }
3808
3809
74.4k
            if (ent->content == NULL) {
3810
                /*
3811
                 * Note: external parsed entities will not be loaded,
3812
                 * it is not required for a non-validating parser to
3813
                 * complete external PEReferences coming from the
3814
                 * internal subset
3815
                 */
3816
26.2k
                if (((ctxt->options & XML_PARSE_NO_XXE) == 0) &&
3817
26.2k
                    ((ctxt->replaceEntities) ||
3818
26.2k
                     (ctxt->validate))) {
3819
24.5k
                    xmlLoadEntityContent(ctxt, ent);
3820
24.5k
                } else {
3821
1.68k
                    xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
3822
1.68k
                                  "not validating will not read content for "
3823
1.68k
                                  "PE entity %s\n", ent->name, NULL);
3824
1.68k
                }
3825
26.2k
            }
3826
3827
            /*
3828
             * TODO: Skip if ent->content is still NULL.
3829
             */
3830
3831
74.4k
            if (xmlParserEntityCheck(ctxt, ent->length))
3832
87
                return;
3833
3834
74.3k
            if (ent->flags & XML_ENT_EXPANDING) {
3835
117
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
3836
117
                xmlHaltParser(ctxt);
3837
117
                return;
3838
117
            }
3839
3840
74.2k
            ent->flags |= XML_ENT_EXPANDING;
3841
74.2k
            xmlExpandPEsInEntityValue(ctxt, buf, ent->content, ent->length,
3842
74.2k
                                      depth);
3843
74.2k
            ent->flags &= ~XML_ENT_EXPANDING;
3844
3845
74.2k
            chunk = str;
3846
137M
        } else {
3847
            /* Normal ASCII char */
3848
137M
            if (!IS_BYTE_CHAR(c)) {
3849
445k
                xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3850
445k
                        "invalid character in entity value\n");
3851
445k
                if (chunk < str)
3852
52.2k
                    xmlSBufAddString(buf, chunk, str - chunk);
3853
445k
                xmlSBufAddReplChar(buf);
3854
445k
                str += 1;
3855
445k
                chunk = str;
3856
137M
            } else {
3857
137M
                str += 1;
3858
137M
            }
3859
137M
        }
3860
330M
    }
3861
3862
159k
    if (chunk < str)
3863
125k
        xmlSBufAddString(buf, chunk, str - chunk);
3864
3865
159k
    return;
3866
208k
}
3867
3868
/**
3869
 * xmlParseEntityValue:
3870
 * @ctxt:  an XML parser context
3871
 * @orig:  if non-NULL store a copy of the original entity value
3872
 *
3873
 * DEPRECATED: Internal function, don't use.
3874
 *
3875
 * parse a value for ENTITY declarations
3876
 *
3877
 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3878
 *                 "'" ([^%&'] | PEReference | Reference)* "'"
3879
 *
3880
 * Returns the EntityValue parsed with reference substituted or NULL
3881
 */
3882
xmlChar *
3883
160k
xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3884
160k
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3885
42.3k
                         XML_MAX_HUGE_LENGTH :
3886
160k
                         XML_MAX_TEXT_LENGTH;
3887
160k
    xmlSBuf buf;
3888
160k
    const xmlChar *start;
3889
160k
    int quote, length;
3890
3891
160k
    xmlSBufInit(&buf, maxLength);
3892
3893
160k
    GROW;
3894
3895
160k
    quote = CUR;
3896
160k
    if ((quote != '"') && (quote != '\'')) {
3897
0
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3898
0
  return(NULL);
3899
0
    }
3900
160k
    CUR_PTR++;
3901
3902
160k
    length = 0;
3903
3904
    /*
3905
     * Copy raw content of the entity into a buffer
3906
     */
3907
357M
    while (1) {
3908
357M
        int c;
3909
3910
357M
        if (PARSER_STOPPED(ctxt))
3911
31
            goto error;
3912
3913
357M
        if (CUR_PTR >= ctxt->input->end) {
3914
484
            xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3915
484
            goto error;
3916
484
        }
3917
3918
357M
        c = CUR;
3919
3920
357M
        if (c == 0) {
3921
206
            xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3922
206
                    "invalid character in entity value\n");
3923
206
            goto error;
3924
206
        }
3925
357M
        if (c == quote)
3926
159k
            break;
3927
357M
        NEXTL(1);
3928
357M
        length += 1;
3929
3930
        /*
3931
         * TODO: Check growth threshold
3932
         */
3933
357M
        if (ctxt->input->end - CUR_PTR < 10)
3934
96.5k
            GROW;
3935
357M
    }
3936
3937
159k
    start = CUR_PTR - length;
3938
3939
159k
    if (orig != NULL) {
3940
159k
        *orig = xmlStrndup(start, length);
3941
159k
        if (*orig == NULL)
3942
79
            xmlErrMemory(ctxt);
3943
159k
    }
3944
3945
159k
    xmlExpandPEsInEntityValue(ctxt, &buf, start, length, ctxt->inputNr);
3946
3947
159k
    NEXTL(1);
3948
3949
159k
    return(xmlSBufFinish(&buf, NULL, ctxt, "entity length too long"));
3950
3951
721
error:
3952
721
    xmlSBufCleanup(&buf, ctxt, "entity length too long");
3953
721
    return(NULL);
3954
160k
}
3955
3956
/**
3957
 * xmlCheckEntityInAttValue:
3958
 * @ctxt:  parser context
3959
 * @pent:  entity
3960
 * @depth:  nesting depth
3961
 *
3962
 * Check an entity reference in an attribute value for validity
3963
 * without expanding it.
3964
 */
3965
static void
3966
11.0k
xmlCheckEntityInAttValue(xmlParserCtxtPtr ctxt, xmlEntityPtr pent, int depth) {
3967
11.0k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3968
11.0k
    const xmlChar *str;
3969
11.0k
    unsigned long expandedSize = pent->length;
3970
11.0k
    int c, flags;
3971
3972
11.0k
    depth += 1;
3973
11.0k
    if (depth > maxDepth) {
3974
63
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3975
63
                       "Maximum entity nesting depth exceeded");
3976
63
  return;
3977
63
    }
3978
3979
11.0k
    if (pent->flags & XML_ENT_EXPANDING) {
3980
74
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
3981
74
        xmlHaltParser(ctxt);
3982
74
        return;
3983
74
    }
3984
3985
    /*
3986
     * If we're parsing a default attribute value in DTD content,
3987
     * the entity might reference other entities which weren't
3988
     * defined yet, so the check isn't reliable.
3989
     */
3990
10.9k
    if (ctxt->inSubset == 0)
3991
10.8k
        flags = XML_ENT_CHECKED | XML_ENT_VALIDATED;
3992
97
    else
3993
97
        flags = XML_ENT_VALIDATED;
3994
3995
10.9k
    str = pent->content;
3996
10.9k
    if (str == NULL)
3997
0
        goto done;
3998
3999
    /*
4000
     * Note that entity values are already validated. We only check
4001
     * for illegal less-than signs and compute the expanded size
4002
     * of the entity. No special handling for multi-byte characters
4003
     * is needed.
4004
     */
4005
32.2M
    while (!PARSER_STOPPED(ctxt)) {
4006
32.2M
        c = *str;
4007
4008
32.2M
  if (c != '&') {
4009
32.1M
            if (c == 0)
4010
10.3k
                break;
4011
4012
32.1M
            if (c == '<')
4013
10.7k
                xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
4014
10.7k
                        "'<' in entity '%s' is not allowed in attributes "
4015
10.7k
                        "values\n", pent->name);
4016
4017
32.1M
            str += 1;
4018
32.1M
        } else if (str[1] == '#') {
4019
1.52k
            int val;
4020
4021
1.52k
      val = xmlParseStringCharRef(ctxt, &str);
4022
1.52k
      if (val == 0) {
4023
30
                pent->content[0] = 0;
4024
30
                break;
4025
30
            }
4026
24.0k
  } else {
4027
24.0k
            xmlChar *name;
4028
24.0k
            xmlEntityPtr ent;
4029
4030
24.0k
      name = xmlParseStringEntityRef(ctxt, &str);
4031
24.0k
      if (name == NULL) {
4032
153
                pent->content[0] = 0;
4033
153
                break;
4034
153
            }
4035
4036
23.8k
            ent = xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 1);
4037
23.8k
            xmlFree(name);
4038
4039
23.8k
            if ((ent != NULL) &&
4040
23.8k
                (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
4041
15.5k
                if ((ent->flags & flags) != flags) {
4042
7.74k
                    pent->flags |= XML_ENT_EXPANDING;
4043
7.74k
                    xmlCheckEntityInAttValue(ctxt, ent, depth);
4044
7.74k
                    pent->flags &= ~XML_ENT_EXPANDING;
4045
7.74k
                }
4046
4047
15.5k
                xmlSaturatedAdd(&expandedSize, ent->expandedSize);
4048
15.5k
                xmlSaturatedAdd(&expandedSize, XML_ENT_FIXED_COST);
4049
15.5k
            }
4050
23.8k
        }
4051
32.2M
    }
4052
4053
10.9k
done:
4054
10.9k
    if (ctxt->inSubset == 0)
4055
10.8k
        pent->expandedSize = expandedSize;
4056
4057
10.9k
    pent->flags |= flags;
4058
10.9k
}
4059
4060
/**
4061
 * xmlExpandEntityInAttValue:
4062
 * @ctxt:  parser context
4063
 * @buf:  string buffer
4064
 * @str:  entity or attribute value
4065
 * @pent:  entity for entity value, NULL for attribute values
4066
 * @normalize:  whether to collapse whitespace
4067
 * @inSpace:  whitespace state
4068
 * @depth:  nesting depth
4069
 * @check:  whether to check for amplification
4070
 *
4071
 * Expand general entity references in an entity or attribute value.
4072
 * Perform attribute value normalization.
4073
 */
4074
static void
4075
xmlExpandEntityInAttValue(xmlParserCtxtPtr ctxt, xmlSBuf *buf,
4076
                          const xmlChar *str, xmlEntityPtr pent, int normalize,
4077
1.54M
                          int *inSpace, int depth, int check) {
4078
1.54M
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
4079
1.54M
    int c, chunkSize;
4080
4081
1.54M
    if (str == NULL)
4082
0
        return;
4083
4084
1.54M
    depth += 1;
4085
1.54M
    if (depth > maxDepth) {
4086
121k
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
4087
121k
                       "Maximum entity nesting depth exceeded");
4088
121k
  return;
4089
121k
    }
4090
4091
1.42M
    if (pent != NULL) {
4092
1.37M
        if (pent->flags & XML_ENT_EXPANDING) {
4093
32
            xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
4094
32
            xmlHaltParser(ctxt);
4095
32
            return;
4096
32
        }
4097
4098
1.37M
        if (check) {
4099
1.35M
            if (xmlParserEntityCheck(ctxt, pent->length))
4100
131
                return;
4101
1.35M
        }
4102
1.37M
    }
4103
4104
1.42M
    chunkSize = 0;
4105
4106
    /*
4107
     * Note that entity values are already validated. No special
4108
     * handling for multi-byte characters is needed.
4109
     */
4110
634M
    while (!PARSER_STOPPED(ctxt)) {
4111
634M
        c = *str;
4112
4113
634M
  if (c != '&') {
4114
632M
            if (c == 0)
4115
1.28M
                break;
4116
4117
            /*
4118
             * If this function is called without an entity, it is used to
4119
             * expand entities in an attribute content where less-than was
4120
             * already unscaped and is allowed.
4121
             */
4122
631M
            if ((pent != NULL) && (c == '<')) {
4123
137k
                xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
4124
137k
                        "'<' in entity '%s' is not allowed in attributes "
4125
137k
                        "values\n", pent->name);
4126
137k
                break;
4127
137k
            }
4128
4129
631M
            if (c <= 0x20) {
4130
13.5M
                if ((normalize) && (*inSpace)) {
4131
                    /* Skip char */
4132
547k
                    if (chunkSize > 0) {
4133
104k
                        xmlSBufAddString(buf, str - chunkSize, chunkSize);
4134
104k
                        chunkSize = 0;
4135
104k
                    }
4136
13.0M
                } else if (c < 0x20) {
4137
6.92M
                    if (chunkSize > 0) {
4138
249k
                        xmlSBufAddString(buf, str - chunkSize, chunkSize);
4139
249k
                        chunkSize = 0;
4140
249k
                    }
4141
4142
6.92M
                    xmlSBufAddCString(buf, " ", 1);
4143
6.92M
                } else {
4144
6.11M
                    chunkSize += 1;
4145
6.11M
                }
4146
4147
13.5M
                *inSpace = 1;
4148
617M
            } else {
4149
617M
                chunkSize += 1;
4150
617M
                *inSpace = 0;
4151
617M
            }
4152
4153
631M
            str += 1;
4154
631M
        } else if (str[1] == '#') {
4155
197k
            int val;
4156
4157
197k
            if (chunkSize > 0) {
4158
180k
                xmlSBufAddString(buf, str - chunkSize, chunkSize);
4159
180k
                chunkSize = 0;
4160
180k
            }
4161
4162
197k
      val = xmlParseStringCharRef(ctxt, &str);
4163
197k
      if (val == 0) {
4164
39
                if (pent != NULL)
4165
39
                    pent->content[0] = 0;
4166
39
                break;
4167
39
            }
4168
4169
197k
            if (val == ' ') {
4170
7.89k
                if ((!normalize) || (!*inSpace))
4171
7.79k
                    xmlSBufAddCString(buf, " ", 1);
4172
7.89k
                *inSpace = 1;
4173
189k
            } else {
4174
189k
                xmlSBufAddChar(buf, val);
4175
189k
                *inSpace = 0;
4176
189k
            }
4177
1.74M
  } else {
4178
1.74M
            xmlChar *name;
4179
1.74M
            xmlEntityPtr ent;
4180
4181
1.74M
            if (chunkSize > 0) {
4182
416k
                xmlSBufAddString(buf, str - chunkSize, chunkSize);
4183
416k
                chunkSize = 0;
4184
416k
            }
4185
4186
1.74M
      name = xmlParseStringEntityRef(ctxt, &str);
4187
1.74M
            if (name == NULL) {
4188
178
                if (pent != NULL)
4189
167
                    pent->content[0] = 0;
4190
178
                break;
4191
178
            }
4192
4193
1.74M
            ent = xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 1);
4194
1.74M
            xmlFree(name);
4195
4196
1.74M
      if ((ent != NULL) &&
4197
1.74M
    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
4198
220k
    if (ent->content == NULL) {
4199
0
        xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
4200
0
          "predefined entity has no content\n");
4201
0
                    break;
4202
0
                }
4203
4204
220k
                xmlSBufAddString(buf, ent->content, ent->length);
4205
4206
220k
                *inSpace = 0;
4207
1.52M
      } else if ((ent != NULL) && (ent->content != NULL)) {
4208
1.30M
                if (pent != NULL)
4209
1.30M
                    pent->flags |= XML_ENT_EXPANDING;
4210
1.30M
    xmlExpandEntityInAttValue(ctxt, buf, ent->content, ent,
4211
1.30M
                                          normalize, inSpace, depth, check);
4212
1.30M
                if (pent != NULL)
4213
1.30M
                    pent->flags &= ~XML_ENT_EXPANDING;
4214
1.30M
      }
4215
1.74M
        }
4216
634M
    }
4217
4218
1.42M
    if (chunkSize > 0)
4219
694k
        xmlSBufAddString(buf, str - chunkSize, chunkSize);
4220
4221
1.42M
    return;
4222
1.42M
}
4223
4224
/**
4225
 * xmlExpandEntitiesInAttValue:
4226
 * @ctxt:  parser context
4227
 * @str:  entity or attribute value
4228
 * @normalize:  whether to collapse whitespace
4229
 *
4230
 * Expand general entity references in an entity or attribute value.
4231
 * Perform attribute value normalization.
4232
 *
4233
 * Returns the expanded attribtue value.
4234
 */
4235
xmlChar *
4236
xmlExpandEntitiesInAttValue(xmlParserCtxtPtr ctxt, const xmlChar *str,
4237
53.7k
                            int normalize) {
4238
53.7k
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4239
24.6k
                         XML_MAX_HUGE_LENGTH :
4240
53.7k
                         XML_MAX_TEXT_LENGTH;
4241
53.7k
    xmlSBuf buf;
4242
53.7k
    int inSpace = 1;
4243
4244
53.7k
    xmlSBufInit(&buf, maxLength);
4245
4246
53.7k
    xmlExpandEntityInAttValue(ctxt, &buf, str, NULL, normalize, &inSpace,
4247
53.7k
                              ctxt->inputNr, /* check */ 0);
4248
4249
53.7k
    if ((normalize) && (inSpace) && (buf.size > 0))
4250
0
        buf.size--;
4251
4252
53.7k
    return(xmlSBufFinish(&buf, NULL, ctxt, "AttValue length too long"));
4253
53.7k
}
4254
4255
/**
4256
 * xmlParseAttValueInternal:
4257
 * @ctxt:  an XML parser context
4258
 * @len:  attribute len result
4259
 * @alloc:  whether the attribute was reallocated as a new string
4260
 * @normalize:  if 1 then further non-CDATA normalization must be done
4261
 *
4262
 * parse a value for an attribute.
4263
 * NOTE: if no normalization is needed, the routine will return pointers
4264
 *       directly from the data buffer.
4265
 *
4266
 * 3.3.3 Attribute-Value Normalization:
4267
 * Before the value of an attribute is passed to the application or
4268
 * checked for validity, the XML processor must normalize it as follows:
4269
 * - a character reference is processed by appending the referenced
4270
 *   character to the attribute value
4271
 * - an entity reference is processed by recursively processing the
4272
 *   replacement text of the entity
4273
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4274
 *   appending #x20 to the normalized value, except that only a single
4275
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4276
 *   parsed entity or the literal entity value of an internal parsed entity
4277
 * - other characters are processed by appending them to the normalized value
4278
 * If the declared value is not CDATA, then the XML processor must further
4279
 * process the normalized attribute value by discarding any leading and
4280
 * trailing space (#x20) characters, and by replacing sequences of space
4281
 * (#x20) characters by a single space (#x20) character.
4282
 * All attributes for which no declaration has been read should be treated
4283
 * by a non-validating parser as if declared CDATA.
4284
 *
4285
 * Returns the AttValue parsed or NULL. The value has to be freed by the
4286
 *     caller if it was copied, this can be detected by val[*len] == 0.
4287
 */
4288
static xmlChar *
4289
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *attlen, int *alloc,
4290
2.76M
                         int normalize) {
4291
2.76M
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4292
929k
                         XML_MAX_HUGE_LENGTH :
4293
2.76M
                         XML_MAX_TEXT_LENGTH;
4294
2.76M
    xmlSBuf buf;
4295
2.76M
    xmlChar *ret;
4296
2.76M
    int c, l, quote, flags, chunkSize;
4297
2.76M
    int inSpace = 1;
4298
4299
2.76M
    xmlSBufInit(&buf, maxLength);
4300
4301
2.76M
    GROW;
4302
4303
2.76M
    quote = CUR;
4304
2.76M
    if ((quote != '"') && (quote != '\'')) {
4305
66.5k
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
4306
66.5k
  return(NULL);
4307
66.5k
    }
4308
2.69M
    NEXTL(1);
4309
4310
2.69M
    if (ctxt->inSubset == 0)
4311
2.34M
        flags = XML_ENT_CHECKED | XML_ENT_VALIDATED;
4312
354k
    else
4313
354k
        flags = XML_ENT_VALIDATED;
4314
4315
2.69M
    inSpace = 1;
4316
2.69M
    chunkSize = 0;
4317
4318
300M
    while (1) {
4319
300M
        if (PARSER_STOPPED(ctxt))
4320
1.62k
            goto error;
4321
4322
300M
        if (CUR_PTR >= ctxt->input->end) {
4323
31.5k
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4324
31.5k
                           "AttValue: ' expected\n");
4325
31.5k
            goto error;
4326
31.5k
        }
4327
4328
        /*
4329
         * TODO: Check growth threshold
4330
         */
4331
300M
        if (ctxt->input->end - CUR_PTR < 10)
4332
413k
            GROW;
4333
4334
300M
        c = CUR;
4335
4336
300M
        if (c >= 0x80) {
4337
128M
            l = xmlUTF8MultibyteLen(ctxt, CUR_PTR,
4338
128M
                    "invalid character in attribute value\n");
4339
128M
            if (l == 0) {
4340
58.4M
                if (chunkSize > 0) {
4341
578k
                    xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4342
578k
                    chunkSize = 0;
4343
578k
                }
4344
58.4M
                xmlSBufAddReplChar(&buf);
4345
58.4M
                NEXTL(1);
4346
70.0M
            } else {
4347
70.0M
                chunkSize += l;
4348
70.0M
                NEXTL(l);
4349
70.0M
            }
4350
4351
128M
            inSpace = 0;
4352
172M
        } else if (c != '&') {
4353
170M
            if (c > 0x20) {
4354
61.5M
                if (c == quote)
4355
2.64M
                    break;
4356
4357
58.8M
                if (c == '<')
4358
813k
                    xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4359
4360
58.8M
                chunkSize += 1;
4361
58.8M
                inSpace = 0;
4362
108M
            } else if (!IS_BYTE_CHAR(c)) {
4363
75.5M
                xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4364
75.5M
                        "invalid character in attribute value\n");
4365
75.5M
                if (chunkSize > 0) {
4366
197k
                    xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4367
197k
                    chunkSize = 0;
4368
197k
                }
4369
75.5M
                xmlSBufAddReplChar(&buf);
4370
75.5M
                inSpace = 0;
4371
75.5M
            } else {
4372
                /* Whitespace */
4373
33.4M
                if ((normalize) && (inSpace)) {
4374
                    /* Skip char */
4375
2.20M
                    if (chunkSize > 0) {
4376
41.6k
                        xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4377
41.6k
                        chunkSize = 0;
4378
41.6k
                    }
4379
31.2M
                } else if (c < 0x20) {
4380
                    /* Convert to space */
4381
29.4M
                    if (chunkSize > 0) {
4382
694k
                        xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4383
694k
                        chunkSize = 0;
4384
694k
                    }
4385
4386
29.4M
                    xmlSBufAddCString(&buf, " ", 1);
4387
29.4M
                } else {
4388
1.82M
                    chunkSize += 1;
4389
1.82M
                }
4390
4391
33.4M
                inSpace = 1;
4392
4393
33.4M
                if ((c == 0xD) && (NXT(1) == 0xA))
4394
56.5k
                    CUR_PTR++;
4395
33.4M
            }
4396
4397
167M
            NEXTL(1);
4398
167M
        } else if (NXT(1) == '#') {
4399
189k
            int val;
4400
4401
189k
            if (chunkSize > 0) {
4402
113k
                xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4403
113k
                chunkSize = 0;
4404
113k
            }
4405
4406
189k
            val = xmlParseCharRef(ctxt);
4407
189k
            if (val == 0)
4408
17.1k
                goto error;
4409
4410
172k
            if ((val == '&') && (!ctxt->replaceEntities)) {
4411
                /*
4412
                 * The reparsing will be done in xmlStringGetNodeList()
4413
                 * called by the attribute() function in SAX.c
4414
                 */
4415
8.23k
                xmlSBufAddCString(&buf, "&#38;", 5);
4416
8.23k
                inSpace = 0;
4417
164k
            } else if (val == ' ') {
4418
17.2k
                if ((!normalize) || (!inSpace))
4419
15.5k
                    xmlSBufAddCString(&buf, " ", 1);
4420
17.2k
                inSpace = 1;
4421
147k
            } else {
4422
147k
                xmlSBufAddChar(&buf, val);
4423
147k
                inSpace = 0;
4424
147k
            }
4425
1.35M
        } else {
4426
1.35M
            const xmlChar *name;
4427
1.35M
            xmlEntityPtr ent;
4428
4429
1.35M
            if (chunkSize > 0) {
4430
738k
                xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4431
738k
                chunkSize = 0;
4432
738k
            }
4433
4434
1.35M
            name = xmlParseEntityRefInternal(ctxt);
4435
1.35M
            if (name == NULL) {
4436
                /*
4437
                 * Probably a literal '&' which wasn't escaped.
4438
                 * TODO: Handle gracefully in recovery mode.
4439
                 */
4440
145k
                continue;
4441
145k
            }
4442
4443
1.20M
            ent = xmlLookupGeneralEntity(ctxt, name, /* isAttr */ 1);
4444
1.20M
            if (ent == NULL)
4445
73.9k
                continue;
4446
4447
1.13M
            if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
4448
561k
                if ((ent->content[0] == '&') && (!ctxt->replaceEntities))
4449
13.3k
                    xmlSBufAddCString(&buf, "&#38;", 5);
4450
548k
                else
4451
548k
                    xmlSBufAddString(&buf, ent->content, ent->length);
4452
561k
                inSpace = 0;
4453
569k
            } else if (ctxt->replaceEntities) {
4454
186k
                xmlExpandEntityInAttValue(ctxt, &buf, ent->content, ent,
4455
186k
                                          normalize, &inSpace, ctxt->inputNr,
4456
186k
                                          /* check */ 1);
4457
382k
            } else {
4458
382k
                if ((ent->flags & flags) != flags)
4459
3.34k
                    xmlCheckEntityInAttValue(ctxt, ent, ctxt->inputNr);
4460
4461
382k
                if (xmlParserEntityCheck(ctxt, ent->expandedSize)) {
4462
210
                    ent->content[0] = 0;
4463
210
                    goto error;
4464
210
                }
4465
4466
                /*
4467
                 * Just output the reference
4468
                 */
4469
382k
                xmlSBufAddCString(&buf, "&", 1);
4470
382k
                xmlSBufAddString(&buf, ent->name, xmlStrlen(ent->name));
4471
382k
                xmlSBufAddCString(&buf, ";", 1);
4472
4473
382k
                inSpace = 0;
4474
382k
            }
4475
1.13M
  }
4476
300M
    }
4477
4478
2.64M
    if ((buf.mem == NULL) && (alloc != NULL)) {
4479
1.78M
        ret = (xmlChar *) CUR_PTR - chunkSize;
4480
4481
1.78M
        if (attlen != NULL)
4482
1.78M
            *attlen = chunkSize;
4483
1.78M
        if ((normalize) && (inSpace) && (chunkSize > 0))
4484
973
            *attlen -= 1;
4485
1.78M
        *alloc = 0;
4486
4487
        /* Report potential error */
4488
1.78M
        xmlSBufCleanup(&buf, ctxt, "AttValue length too long");
4489
1.78M
    } else {
4490
858k
        if (chunkSize > 0)
4491
644k
            xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4492
4493
858k
        if ((normalize) && (inSpace) && (buf.size > 0))
4494
5.66k
            buf.size--;
4495
4496
858k
        ret = xmlSBufFinish(&buf, attlen, ctxt, "AttValue length too long");
4497
4498
858k
        if (ret != NULL) {
4499
858k
            if (attlen != NULL)
4500
360k
                *attlen = buf.size;
4501
858k
            if (alloc != NULL)
4502
360k
                *alloc = 1;
4503
858k
        }
4504
858k
    }
4505
4506
2.64M
    NEXTL(1);
4507
4508
2.64M
    return(ret);
4509
4510
50.5k
error:
4511
50.5k
    xmlSBufCleanup(&buf, ctxt, "AttValue length too long");
4512
50.5k
    return(NULL);
4513
2.69M
}
4514
4515
/**
4516
 * xmlParseAttValue:
4517
 * @ctxt:  an XML parser context
4518
 *
4519
 * DEPRECATED: Internal function, don't use.
4520
 *
4521
 * parse a value for an attribute
4522
 * Note: the parser won't do substitution of entities here, this
4523
 * will be handled later in xmlStringGetNodeList
4524
 *
4525
 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4526
 *                   "'" ([^<&'] | Reference)* "'"
4527
 *
4528
 * 3.3.3 Attribute-Value Normalization:
4529
 * Before the value of an attribute is passed to the application or
4530
 * checked for validity, the XML processor must normalize it as follows:
4531
 * - a character reference is processed by appending the referenced
4532
 *   character to the attribute value
4533
 * - an entity reference is processed by recursively processing the
4534
 *   replacement text of the entity
4535
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4536
 *   appending #x20 to the normalized value, except that only a single
4537
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4538
 *   parsed entity or the literal entity value of an internal parsed entity
4539
 * - other characters are processed by appending them to the normalized value
4540
 * If the declared value is not CDATA, then the XML processor must further
4541
 * process the normalized attribute value by discarding any leading and
4542
 * trailing space (#x20) characters, and by replacing sequences of space
4543
 * (#x20) characters by a single space (#x20) character.
4544
 * All attributes for which no declaration has been read should be treated
4545
 * by a non-validating parser as if declared CDATA.
4546
 *
4547
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4548
 */
4549
4550
4551
xmlChar *
4552
549k
xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4553
549k
    if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4554
549k
    return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4555
549k
}
4556
4557
/**
4558
 * xmlParseSystemLiteral:
4559
 * @ctxt:  an XML parser context
4560
 *
4561
 * DEPRECATED: Internal function, don't use.
4562
 *
4563
 * parse an XML Literal
4564
 *
4565
 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4566
 *
4567
 * Returns the SystemLiteral parsed or NULL
4568
 */
4569
4570
xmlChar *
4571
99.7k
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4572
99.7k
    xmlChar *buf = NULL;
4573
99.7k
    int len = 0;
4574
99.7k
    int size = XML_PARSER_BUFFER_SIZE;
4575
99.7k
    int cur, l;
4576
99.7k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4577
16.7k
                    XML_MAX_TEXT_LENGTH :
4578
99.7k
                    XML_MAX_NAME_LENGTH;
4579
99.7k
    xmlChar stop;
4580
4581
99.7k
    if (RAW == '"') {
4582
44.6k
        NEXT;
4583
44.6k
  stop = '"';
4584
55.1k
    } else if (RAW == '\'') {
4585
49.3k
        NEXT;
4586
49.3k
  stop = '\'';
4587
49.3k
    } else {
4588
5.73k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4589
5.73k
  return(NULL);
4590
5.73k
    }
4591
4592
94.0k
    buf = (xmlChar *) xmlMallocAtomic(size);
4593
94.0k
    if (buf == NULL) {
4594
71
        xmlErrMemory(ctxt);
4595
71
  return(NULL);
4596
71
    }
4597
93.9k
    cur = CUR_CHAR(l);
4598
16.1M
    while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4599
16.0M
  if (len + 5 >= size) {
4600
45.7k
      xmlChar *tmp;
4601
4602
45.7k
      size *= 2;
4603
45.7k
      tmp = (xmlChar *) xmlRealloc(buf, size);
4604
45.7k
      if (tmp == NULL) {
4605
19
          xmlFree(buf);
4606
19
    xmlErrMemory(ctxt);
4607
19
    return(NULL);
4608
19
      }
4609
45.7k
      buf = tmp;
4610
45.7k
  }
4611
16.0M
  COPY_BUF(buf, len, cur);
4612
16.0M
        if (len > maxLength) {
4613
13
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4614
13
            xmlFree(buf);
4615
13
            return(NULL);
4616
13
        }
4617
16.0M
  NEXTL(l);
4618
16.0M
  cur = CUR_CHAR(l);
4619
16.0M
    }
4620
93.9k
    buf[len] = 0;
4621
93.9k
    if (!IS_CHAR(cur)) {
4622
28.8k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4623
65.0k
    } else {
4624
65.0k
  NEXT;
4625
65.0k
    }
4626
93.9k
    return(buf);
4627
93.9k
}
4628
4629
/**
4630
 * xmlParsePubidLiteral:
4631
 * @ctxt:  an XML parser context
4632
 *
4633
 * DEPRECATED: Internal function, don't use.
4634
 *
4635
 * parse an XML public literal
4636
 *
4637
 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4638
 *
4639
 * Returns the PubidLiteral parsed or NULL.
4640
 */
4641
4642
xmlChar *
4643
34.1k
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4644
34.1k
    xmlChar *buf = NULL;
4645
34.1k
    int len = 0;
4646
34.1k
    int size = XML_PARSER_BUFFER_SIZE;
4647
34.1k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4648
6.48k
                    XML_MAX_TEXT_LENGTH :
4649
34.1k
                    XML_MAX_NAME_LENGTH;
4650
34.1k
    xmlChar cur;
4651
34.1k
    xmlChar stop;
4652
4653
34.1k
    if (RAW == '"') {
4654
9.44k
        NEXT;
4655
9.44k
  stop = '"';
4656
24.7k
    } else if (RAW == '\'') {
4657
23.2k
        NEXT;
4658
23.2k
  stop = '\'';
4659
23.2k
    } else {
4660
1.47k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4661
1.47k
  return(NULL);
4662
1.47k
    }
4663
32.6k
    buf = (xmlChar *) xmlMallocAtomic(size);
4664
32.6k
    if (buf == NULL) {
4665
38
  xmlErrMemory(ctxt);
4666
38
  return(NULL);
4667
38
    }
4668
32.6k
    cur = CUR;
4669
590k
    while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop) &&
4670
590k
           (PARSER_STOPPED(ctxt) == 0)) { /* checked */
4671
557k
  if (len + 1 >= size) {
4672
566
      xmlChar *tmp;
4673
4674
566
      size *= 2;
4675
566
      tmp = (xmlChar *) xmlRealloc(buf, size);
4676
566
      if (tmp == NULL) {
4677
6
    xmlErrMemory(ctxt);
4678
6
    xmlFree(buf);
4679
6
    return(NULL);
4680
6
      }
4681
560
      buf = tmp;
4682
560
  }
4683
557k
  buf[len++] = cur;
4684
557k
        if (len > maxLength) {
4685
6
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4686
6
            xmlFree(buf);
4687
6
            return(NULL);
4688
6
        }
4689
557k
  NEXT;
4690
557k
  cur = CUR;
4691
557k
    }
4692
32.6k
    buf[len] = 0;
4693
32.6k
    if (cur != stop) {
4694
6.05k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4695
26.5k
    } else {
4696
26.5k
  NEXTL(1);
4697
26.5k
    }
4698
32.6k
    return(buf);
4699
32.6k
}
4700
4701
static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial);
4702
4703
/*
4704
 * used for the test in the inner loop of the char data testing
4705
 */
4706
static const unsigned char test_char_data[256] = {
4707
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4708
    0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4709
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4710
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4711
    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4712
    0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4713
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4714
    0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4715
    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4716
    0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4717
    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4718
    0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4719
    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4720
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4721
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4722
    0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4723
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4724
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4725
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4726
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4727
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4728
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4729
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4730
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4731
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4732
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4733
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4734
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4735
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4736
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4737
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4738
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4739
};
4740
4741
/**
4742
 * xmlParseCharDataInternal:
4743
 * @ctxt:  an XML parser context
4744
 * @partial:  buffer may contain partial UTF-8 sequences
4745
 *
4746
 * Parse character data. Always makes progress if the first char isn't
4747
 * '<' or '&'.
4748
 *
4749
 * The right angle bracket (>) may be represented using the string "&gt;",
4750
 * and must, for compatibility, be escaped using "&gt;" or a character
4751
 * reference when it appears in the string "]]>" in content, when that
4752
 * string is not marking the end of a CDATA section.
4753
 *
4754
 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4755
 */
4756
static void
4757
9.90M
xmlParseCharDataInternal(xmlParserCtxtPtr ctxt, int partial) {
4758
9.90M
    const xmlChar *in;
4759
9.90M
    int nbchar = 0;
4760
9.90M
    int line = ctxt->input->line;
4761
9.90M
    int col = ctxt->input->col;
4762
9.90M
    int ccol;
4763
4764
9.90M
    GROW;
4765
    /*
4766
     * Accelerated common case where input don't need to be
4767
     * modified before passing it to the handler.
4768
     */
4769
9.90M
    in = ctxt->input->cur;
4770
10.1M
    do {
4771
10.7M
get_more_space:
4772
12.4M
        while (*in == 0x20) { in++; ctxt->input->col++; }
4773
10.7M
        if (*in == 0xA) {
4774
11.5M
            do {
4775
11.5M
                ctxt->input->line++; ctxt->input->col = 1;
4776
11.5M
                in++;
4777
11.5M
            } while (*in == 0xA);
4778
616k
            goto get_more_space;
4779
616k
        }
4780
10.1M
        if (*in == '<') {
4781
583k
            nbchar = in - ctxt->input->cur;
4782
583k
            if (nbchar > 0) {
4783
583k
                const xmlChar *tmp = ctxt->input->cur;
4784
583k
                ctxt->input->cur = in;
4785
4786
583k
                if ((ctxt->sax != NULL) &&
4787
583k
                    (ctxt->disableSAX == 0) &&
4788
583k
                    (ctxt->sax->ignorableWhitespace !=
4789
569k
                     ctxt->sax->characters)) {
4790
45.8k
                    if (areBlanks(ctxt, tmp, nbchar, 1)) {
4791
25.7k
                        if (ctxt->sax->ignorableWhitespace != NULL)
4792
25.7k
                            ctxt->sax->ignorableWhitespace(ctxt->userData,
4793
25.7k
                                                   tmp, nbchar);
4794
25.7k
                    } else {
4795
20.1k
                        if (ctxt->sax->characters != NULL)
4796
20.1k
                            ctxt->sax->characters(ctxt->userData,
4797
20.1k
                                                  tmp, nbchar);
4798
20.1k
                        if (*ctxt->space == -1)
4799
7.10k
                            *ctxt->space = -2;
4800
20.1k
                    }
4801
537k
                } else if ((ctxt->sax != NULL) &&
4802
537k
                           (ctxt->disableSAX == 0) &&
4803
537k
                           (ctxt->sax->characters != NULL)) {
4804
523k
                    ctxt->sax->characters(ctxt->userData,
4805
523k
                                          tmp, nbchar);
4806
523k
                }
4807
583k
            }
4808
583k
            return;
4809
583k
        }
4810
4811
10.1M
get_more:
4812
10.1M
        ccol = ctxt->input->col;
4813
39.1M
        while (test_char_data[*in]) {
4814
29.0M
            in++;
4815
29.0M
            ccol++;
4816
29.0M
        }
4817
10.1M
        ctxt->input->col = ccol;
4818
10.1M
        if (*in == 0xA) {
4819
6.21M
            do {
4820
6.21M
                ctxt->input->line++; ctxt->input->col = 1;
4821
6.21M
                in++;
4822
6.21M
            } while (*in == 0xA);
4823
451k
            goto get_more;
4824
451k
        }
4825
9.66M
        if (*in == ']') {
4826
149k
            if ((in[1] == ']') && (in[2] == '>')) {
4827
22.3k
                xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4828
22.3k
                ctxt->input->cur = in + 1;
4829
22.3k
                return;
4830
22.3k
            }
4831
127k
            in++;
4832
127k
            ctxt->input->col++;
4833
127k
            goto get_more;
4834
149k
        }
4835
9.51M
        nbchar = in - ctxt->input->cur;
4836
9.51M
        if (nbchar > 0) {
4837
2.36M
            if ((ctxt->sax != NULL) &&
4838
2.36M
                (ctxt->disableSAX == 0) &&
4839
2.36M
                (ctxt->sax->ignorableWhitespace !=
4840
2.14M
                 ctxt->sax->characters) &&
4841
2.36M
                (IS_BLANK_CH(*ctxt->input->cur))) {
4842
78.2k
                const xmlChar *tmp = ctxt->input->cur;
4843
78.2k
                ctxt->input->cur = in;
4844
4845
78.2k
                if (areBlanks(ctxt, tmp, nbchar, 0)) {
4846
21.6k
                    if (ctxt->sax->ignorableWhitespace != NULL)
4847
21.6k
                        ctxt->sax->ignorableWhitespace(ctxt->userData,
4848
21.6k
                                                       tmp, nbchar);
4849
56.5k
                } else {
4850
56.5k
                    if (ctxt->sax->characters != NULL)
4851
56.5k
                        ctxt->sax->characters(ctxt->userData,
4852
56.5k
                                              tmp, nbchar);
4853
56.5k
                    if (*ctxt->space == -1)
4854
25.9k
                        *ctxt->space = -2;
4855
56.5k
                }
4856
78.2k
                line = ctxt->input->line;
4857
78.2k
                col = ctxt->input->col;
4858
2.28M
            } else if ((ctxt->sax != NULL) &&
4859
2.28M
                       (ctxt->disableSAX == 0)) {
4860
2.06M
                if (ctxt->sax->characters != NULL)
4861
2.06M
                    ctxt->sax->characters(ctxt->userData,
4862
2.06M
                                          ctxt->input->cur, nbchar);
4863
2.06M
                line = ctxt->input->line;
4864
2.06M
                col = ctxt->input->col;
4865
2.06M
            }
4866
2.36M
        }
4867
9.51M
        ctxt->input->cur = in;
4868
9.51M
        if (*in == 0xD) {
4869
267k
            in++;
4870
267k
            if (*in == 0xA) {
4871
219k
                ctxt->input->cur = in;
4872
219k
                in++;
4873
219k
                ctxt->input->line++; ctxt->input->col = 1;
4874
219k
                continue; /* while */
4875
219k
            }
4876
48.1k
            in--;
4877
48.1k
        }
4878
9.29M
        if (*in == '<') {
4879
1.39M
            return;
4880
1.39M
        }
4881
7.90M
        if (*in == '&') {
4882
286k
            return;
4883
286k
        }
4884
7.61M
        SHRINK;
4885
7.61M
        GROW;
4886
7.61M
        in = ctxt->input->cur;
4887
7.83M
    } while (((*in >= 0x20) && (*in <= 0x7F)) ||
4888
7.83M
             (*in == 0x09) || (*in == 0x0a));
4889
7.61M
    ctxt->input->line = line;
4890
7.61M
    ctxt->input->col = col;
4891
7.61M
    xmlParseCharDataComplex(ctxt, partial);
4892
7.61M
}
4893
4894
/**
4895
 * xmlParseCharDataComplex:
4896
 * @ctxt:  an XML parser context
4897
 * @cdata:  int indicating whether we are within a CDATA section
4898
 *
4899
 * Always makes progress if the first char isn't '<' or '&'.
4900
 *
4901
 * parse a CharData section.this is the fallback function
4902
 * of xmlParseCharData() when the parsing requires handling
4903
 * of non-ASCII characters.
4904
 */
4905
static void
4906
7.61M
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial) {
4907
7.61M
    xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4908
7.61M
    int nbchar = 0;
4909
7.61M
    int cur, l;
4910
4911
7.61M
    cur = CUR_CHAR(l);
4912
140M
    while ((cur != '<') && /* checked */
4913
140M
           (cur != '&') &&
4914
140M
     (IS_CHAR(cur))) {
4915
132M
  if ((cur == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4916
15.6k
      xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4917
15.6k
  }
4918
132M
  COPY_BUF(buf, nbchar, cur);
4919
  /* move current position before possible calling of ctxt->sax->characters */
4920
132M
  NEXTL(l);
4921
132M
  if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4922
1.04M
      buf[nbchar] = 0;
4923
4924
      /*
4925
       * OK the segment is to be consumed as chars.
4926
       */
4927
1.04M
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4928
1.02M
    if (areBlanks(ctxt, buf, nbchar, 0)) {
4929
372
        if (ctxt->sax->ignorableWhitespace != NULL)
4930
372
      ctxt->sax->ignorableWhitespace(ctxt->userData,
4931
372
                                     buf, nbchar);
4932
1.02M
    } else {
4933
1.02M
        if (ctxt->sax->characters != NULL)
4934
1.02M
      ctxt->sax->characters(ctxt->userData, buf, nbchar);
4935
1.02M
        if ((ctxt->sax->characters !=
4936
1.02M
             ctxt->sax->ignorableWhitespace) &&
4937
1.02M
      (*ctxt->space == -1))
4938
2.18k
      *ctxt->space = -2;
4939
1.02M
    }
4940
1.02M
      }
4941
1.04M
      nbchar = 0;
4942
1.04M
            SHRINK;
4943
1.04M
  }
4944
132M
  cur = CUR_CHAR(l);
4945
132M
    }
4946
7.61M
    if (nbchar != 0) {
4947
806k
        buf[nbchar] = 0;
4948
  /*
4949
   * OK the segment is to be consumed as chars.
4950
   */
4951
806k
  if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4952
723k
      if (areBlanks(ctxt, buf, nbchar, 0)) {
4953
1.63k
    if (ctxt->sax->ignorableWhitespace != NULL)
4954
1.63k
        ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4955
721k
      } else {
4956
721k
    if (ctxt->sax->characters != NULL)
4957
721k
        ctxt->sax->characters(ctxt->userData, buf, nbchar);
4958
721k
    if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4959
721k
        (*ctxt->space == -1))
4960
77.2k
        *ctxt->space = -2;
4961
721k
      }
4962
723k
  }
4963
806k
    }
4964
    /*
4965
     * cur == 0 can mean
4966
     *
4967
     * - End of buffer.
4968
     * - An actual 0 character.
4969
     * - An incomplete UTF-8 sequence. This is allowed if partial is set.
4970
     */
4971
7.61M
    if (ctxt->input->cur < ctxt->input->end) {
4972
7.58M
        if ((cur == 0) && (CUR != 0)) {
4973
9.67k
            if (partial == 0) {
4974
6.27k
                xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4975
6.27k
                        "Incomplete UTF-8 sequence starting with %02X\n", CUR);
4976
6.27k
                NEXTL(1);
4977
6.27k
            }
4978
7.57M
        } else if ((cur != '<') && (cur != '&')) {
4979
            /* Generate the error and skip the offending character */
4980
6.98M
            xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4981
6.98M
                              "PCDATA invalid Char value %d\n", cur);
4982
6.98M
            NEXTL(l);
4983
6.98M
        }
4984
7.58M
    }
4985
7.61M
}
4986
4987
/**
4988
 * xmlParseCharData:
4989
 * @ctxt:  an XML parser context
4990
 * @cdata:  unused
4991
 *
4992
 * DEPRECATED: Internal function, don't use.
4993
 */
4994
void
4995
0
xmlParseCharData(xmlParserCtxtPtr ctxt, ATTRIBUTE_UNUSED int cdata) {
4996
0
    xmlParseCharDataInternal(ctxt, 0);
4997
0
}
4998
4999
/**
5000
 * xmlParseExternalID:
5001
 * @ctxt:  an XML parser context
5002
 * @publicID:  a xmlChar** receiving PubidLiteral
5003
 * @strict: indicate whether we should restrict parsing to only
5004
 *          production [75], see NOTE below
5005
 *
5006
 * DEPRECATED: Internal function, don't use.
5007
 *
5008
 * Parse an External ID or a Public ID
5009
 *
5010
 * NOTE: Productions [75] and [83] interact badly since [75] can generate
5011
 *       'PUBLIC' S PubidLiteral S SystemLiteral
5012
 *
5013
 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
5014
 *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
5015
 *
5016
 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
5017
 *
5018
 * Returns the function returns SystemLiteral and in the second
5019
 *                case publicID receives PubidLiteral, is strict is off
5020
 *                it is possible to return NULL and have publicID set.
5021
 */
5022
5023
xmlChar *
5024
202k
xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
5025
202k
    xmlChar *URI = NULL;
5026
5027
202k
    *publicID = NULL;
5028
202k
    if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
5029
73.8k
        SKIP(6);
5030
73.8k
  if (SKIP_BLANKS == 0) {
5031
2.19k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5032
2.19k
                     "Space required after 'SYSTEM'\n");
5033
2.19k
  }
5034
73.8k
  URI = xmlParseSystemLiteral(ctxt);
5035
73.8k
  if (URI == NULL) {
5036
2.09k
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
5037
2.09k
        }
5038
128k
    } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
5039
34.1k
        SKIP(6);
5040
34.1k
  if (SKIP_BLANKS == 0) {
5041
13.6k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5042
13.6k
        "Space required after 'PUBLIC'\n");
5043
13.6k
  }
5044
34.1k
  *publicID = xmlParsePubidLiteral(ctxt);
5045
34.1k
  if (*publicID == NULL) {
5046
1.52k
      xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
5047
1.52k
  }
5048
34.1k
  if (strict) {
5049
      /*
5050
       * We don't handle [83] so "S SystemLiteral" is required.
5051
       */
5052
25.5k
      if (SKIP_BLANKS == 0) {
5053
13.6k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5054
13.6k
      "Space required after the Public Identifier\n");
5055
13.6k
      }
5056
25.5k
  } else {
5057
      /*
5058
       * We handle [83] so we return immediately, if
5059
       * "S SystemLiteral" is not detected. We skip blanks if no
5060
             * system literal was found, but this is harmless since we must
5061
             * be at the end of a NotationDecl.
5062
       */
5063
8.65k
      if (SKIP_BLANKS == 0) return(NULL);
5064
1.88k
      if ((CUR != '\'') && (CUR != '"')) return(NULL);
5065
1.88k
  }
5066
25.9k
  URI = xmlParseSystemLiteral(ctxt);
5067
25.9k
  if (URI == NULL) {
5068
3.74k
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
5069
3.74k
        }
5070
25.9k
    }
5071
193k
    return(URI);
5072
202k
}
5073
5074
/**
5075
 * xmlParseCommentComplex:
5076
 * @ctxt:  an XML parser context
5077
 * @buf:  the already parsed part of the buffer
5078
 * @len:  number of bytes in the buffer
5079
 * @size:  allocated size of the buffer
5080
 *
5081
 * Skip an XML (SGML) comment <!-- .... -->
5082
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
5083
 *  must not occur within comments. "
5084
 * This is the slow routine in case the accelerator for ascii didn't work
5085
 *
5086
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
5087
 */
5088
static void
5089
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
5090
382k
                       size_t len, size_t size) {
5091
382k
    int q, ql;
5092
382k
    int r, rl;
5093
382k
    int cur, l;
5094
382k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5095
49.7k
                       XML_MAX_HUGE_LENGTH :
5096
382k
                       XML_MAX_TEXT_LENGTH;
5097
5098
382k
    if (buf == NULL) {
5099
237k
        len = 0;
5100
237k
  size = XML_PARSER_BUFFER_SIZE;
5101
237k
  buf = (xmlChar *) xmlMallocAtomic(size);
5102
237k
  if (buf == NULL) {
5103
33
      xmlErrMemory(ctxt);
5104
33
      return;
5105
33
  }
5106
237k
    }
5107
382k
    q = CUR_CHAR(ql);
5108
382k
    if (q == 0)
5109
16.1k
        goto not_terminated;
5110
366k
    if (!IS_CHAR(q)) {
5111
4.82k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
5112
4.82k
                          "xmlParseComment: invalid xmlChar value %d\n",
5113
4.82k
                    q);
5114
4.82k
  xmlFree (buf);
5115
4.82k
  return;
5116
4.82k
    }
5117
361k
    NEXTL(ql);
5118
361k
    r = CUR_CHAR(rl);
5119
361k
    if (r == 0)
5120
28.5k
        goto not_terminated;
5121
332k
    if (!IS_CHAR(r)) {
5122
2.90k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
5123
2.90k
                          "xmlParseComment: invalid xmlChar value %d\n",
5124
2.90k
                    r);
5125
2.90k
  xmlFree (buf);
5126
2.90k
  return;
5127
2.90k
    }
5128
330k
    NEXTL(rl);
5129
330k
    cur = CUR_CHAR(l);
5130
330k
    if (cur == 0)
5131
189k
        goto not_terminated;
5132
16.9M
    while (IS_CHAR(cur) && /* checked */
5133
16.9M
           ((cur != '>') ||
5134
16.8M
      (r != '-') || (q != '-'))) {
5135
16.7M
  if ((r == '-') && (q == '-')) {
5136
233k
      xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
5137
233k
  }
5138
16.7M
  if (len + 5 >= size) {
5139
31.3k
      xmlChar *new_buf;
5140
31.3k
            size_t new_size;
5141
5142
31.3k
      new_size = size * 2;
5143
31.3k
      new_buf = (xmlChar *) xmlRealloc(buf, new_size);
5144
31.3k
      if (new_buf == NULL) {
5145
24
    xmlFree (buf);
5146
24
    xmlErrMemory(ctxt);
5147
24
    return;
5148
24
      }
5149
31.3k
      buf = new_buf;
5150
31.3k
            size = new_size;
5151
31.3k
  }
5152
16.7M
  COPY_BUF(buf, len, q);
5153
16.7M
        if (len > maxLength) {
5154
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5155
0
                         "Comment too big found", NULL);
5156
0
            xmlFree (buf);
5157
0
            return;
5158
0
        }
5159
5160
16.7M
  q = r;
5161
16.7M
  ql = rl;
5162
16.7M
  r = cur;
5163
16.7M
  rl = l;
5164
5165
16.7M
  NEXTL(l);
5166
16.7M
  cur = CUR_CHAR(l);
5167
5168
16.7M
    }
5169
140k
    buf[len] = 0;
5170
140k
    if (cur == 0) {
5171
24.8k
  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5172
24.8k
                       "Comment not terminated \n<!--%.50s\n", buf);
5173
115k
    } else if (!IS_CHAR(cur)) {
5174
6.89k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
5175
6.89k
                          "xmlParseComment: invalid xmlChar value %d\n",
5176
6.89k
                    cur);
5177
108k
    } else {
5178
108k
        NEXT;
5179
108k
  if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5180
108k
      (!ctxt->disableSAX))
5181
54.5k
      ctxt->sax->comment(ctxt->userData, buf);
5182
108k
    }
5183
140k
    xmlFree(buf);
5184
140k
    return;
5185
234k
not_terminated:
5186
234k
    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5187
234k
       "Comment not terminated\n", NULL);
5188
234k
    xmlFree(buf);
5189
234k
    return;
5190
140k
}
5191
5192
/**
5193
 * xmlParseComment:
5194
 * @ctxt:  an XML parser context
5195
 *
5196
 * DEPRECATED: Internal function, don't use.
5197
 *
5198
 * Parse an XML (SGML) comment. Always consumes '<!'.
5199
 *
5200
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
5201
 *  must not occur within comments. "
5202
 *
5203
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
5204
 */
5205
void
5206
4.67M
xmlParseComment(xmlParserCtxtPtr ctxt) {
5207
4.67M
    xmlChar *buf = NULL;
5208
4.67M
    size_t size = XML_PARSER_BUFFER_SIZE;
5209
4.67M
    size_t len = 0;
5210
4.67M
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5211
1.00M
                       XML_MAX_HUGE_LENGTH :
5212
4.67M
                       XML_MAX_TEXT_LENGTH;
5213
4.67M
    const xmlChar *in;
5214
4.67M
    size_t nbchar = 0;
5215
4.67M
    int ccol;
5216
5217
    /*
5218
     * Check that there is a comment right here.
5219
     */
5220
4.67M
    if ((RAW != '<') || (NXT(1) != '!'))
5221
0
        return;
5222
4.67M
    SKIP(2);
5223
4.67M
    if ((RAW != '-') || (NXT(1) != '-'))
5224
88
        return;
5225
4.67M
    SKIP(2);
5226
4.67M
    GROW;
5227
5228
    /*
5229
     * Accelerated common case where input don't need to be
5230
     * modified before passing it to the handler.
5231
     */
5232
4.67M
    in = ctxt->input->cur;
5233
4.67M
    do {
5234
4.67M
  if (*in == 0xA) {
5235
79.0k
      do {
5236
79.0k
    ctxt->input->line++; ctxt->input->col = 1;
5237
79.0k
    in++;
5238
79.0k
      } while (*in == 0xA);
5239
26.9k
  }
5240
6.35M
get_more:
5241
6.35M
        ccol = ctxt->input->col;
5242
40.6M
  while (((*in > '-') && (*in <= 0x7F)) ||
5243
40.6M
         ((*in >= 0x20) && (*in < '-')) ||
5244
40.6M
         (*in == 0x09)) {
5245
34.2M
        in++;
5246
34.2M
        ccol++;
5247
34.2M
  }
5248
6.35M
  ctxt->input->col = ccol;
5249
6.35M
  if (*in == 0xA) {
5250
305k
      do {
5251
305k
    ctxt->input->line++; ctxt->input->col = 1;
5252
305k
    in++;
5253
305k
      } while (*in == 0xA);
5254
105k
      goto get_more;
5255
105k
  }
5256
6.24M
  nbchar = in - ctxt->input->cur;
5257
  /*
5258
   * save current set of data
5259
   */
5260
6.24M
  if (nbchar > 0) {
5261
1.91M
            if (buf == NULL) {
5262
453k
                if ((*in == '-') && (in[1] == '-'))
5263
279k
                    size = nbchar + 1;
5264
174k
                else
5265
174k
                    size = XML_PARSER_BUFFER_SIZE + nbchar;
5266
453k
                buf = (xmlChar *) xmlMallocAtomic(size);
5267
453k
                if (buf == NULL) {
5268
63
                    xmlErrMemory(ctxt);
5269
63
                    return;
5270
63
                }
5271
453k
                len = 0;
5272
1.46M
            } else if (len + nbchar + 1 >= size) {
5273
65.3k
                xmlChar *new_buf;
5274
65.3k
                size  += len + nbchar + XML_PARSER_BUFFER_SIZE;
5275
65.3k
                new_buf = (xmlChar *) xmlRealloc(buf, size);
5276
65.3k
                if (new_buf == NULL) {
5277
21
                    xmlFree (buf);
5278
21
                    xmlErrMemory(ctxt);
5279
21
                    return;
5280
21
                }
5281
65.3k
                buf = new_buf;
5282
65.3k
            }
5283
1.91M
            memcpy(&buf[len], ctxt->input->cur, nbchar);
5284
1.91M
            len += nbchar;
5285
1.91M
            buf[len] = 0;
5286
1.91M
  }
5287
6.24M
        if (len > maxLength) {
5288
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5289
0
                         "Comment too big found", NULL);
5290
0
            xmlFree (buf);
5291
0
            return;
5292
0
        }
5293
6.24M
  ctxt->input->cur = in;
5294
6.24M
  if (*in == 0xA) {
5295
0
      in++;
5296
0
      ctxt->input->line++; ctxt->input->col = 1;
5297
0
  }
5298
6.24M
  if (*in == 0xD) {
5299
220k
      in++;
5300
220k
      if (*in == 0xA) {
5301
218k
    ctxt->input->cur = in;
5302
218k
    in++;
5303
218k
    ctxt->input->line++; ctxt->input->col = 1;
5304
218k
    goto get_more;
5305
218k
      }
5306
2.60k
      in--;
5307
2.60k
  }
5308
6.02M
  SHRINK;
5309
6.02M
  GROW;
5310
6.02M
  in = ctxt->input->cur;
5311
6.02M
  if (*in == '-') {
5312
5.64M
      if (in[1] == '-') {
5313
5.26M
          if (in[2] == '>') {
5314
4.29M
        SKIP(3);
5315
4.29M
        if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5316
4.29M
            (!ctxt->disableSAX)) {
5317
2.90M
      if (buf != NULL)
5318
259k
          ctxt->sax->comment(ctxt->userData, buf);
5319
2.64M
      else
5320
2.64M
          ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5321
2.90M
        }
5322
4.29M
        if (buf != NULL)
5323
308k
            xmlFree(buf);
5324
4.29M
        return;
5325
4.29M
    }
5326
974k
    if (buf != NULL) {
5327
898k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5328
898k
                          "Double hyphen within comment: "
5329
898k
                                      "<!--%.50s\n",
5330
898k
              buf);
5331
898k
    } else
5332
75.3k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5333
75.3k
                          "Double hyphen within comment\n", NULL);
5334
974k
    in++;
5335
974k
    ctxt->input->col++;
5336
974k
      }
5337
1.34M
      in++;
5338
1.34M
      ctxt->input->col++;
5339
1.34M
      goto get_more;
5340
5.64M
  }
5341
6.02M
    } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5342
382k
    xmlParseCommentComplex(ctxt, buf, len, size);
5343
382k
    return;
5344
4.67M
}
5345
5346
5347
/**
5348
 * xmlParsePITarget:
5349
 * @ctxt:  an XML parser context
5350
 *
5351
 * DEPRECATED: Internal function, don't use.
5352
 *
5353
 * parse the name of a PI
5354
 *
5355
 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5356
 *
5357
 * Returns the PITarget name or NULL
5358
 */
5359
5360
const xmlChar *
5361
184k
xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5362
184k
    const xmlChar *name;
5363
5364
184k
    name = xmlParseName(ctxt);
5365
184k
    if ((name != NULL) &&
5366
184k
        ((name[0] == 'x') || (name[0] == 'X')) &&
5367
184k
        ((name[1] == 'm') || (name[1] == 'M')) &&
5368
184k
        ((name[2] == 'l') || (name[2] == 'L'))) {
5369
24.5k
  int i;
5370
24.5k
  if ((name[0] == 'x') && (name[1] == 'm') &&
5371
24.5k
      (name[2] == 'l') && (name[3] == 0)) {
5372
10.8k
      xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5373
10.8k
     "XML declaration allowed only at the start of the document\n");
5374
10.8k
      return(name);
5375
13.6k
  } else if (name[3] == 0) {
5376
4.40k
      xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5377
4.40k
      return(name);
5378
4.40k
  }
5379
25.9k
  for (i = 0;;i++) {
5380
25.9k
      if (xmlW3CPIs[i] == NULL) break;
5381
17.8k
      if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5382
1.11k
          return(name);
5383
17.8k
  }
5384
8.13k
  xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5385
8.13k
          "xmlParsePITarget: invalid name prefix 'xml'\n",
5386
8.13k
          NULL, NULL);
5387
8.13k
    }
5388
168k
    if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5389
5.50k
  xmlNsErr(ctxt, XML_NS_ERR_COLON,
5390
5.50k
     "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5391
5.50k
    }
5392
168k
    return(name);
5393
184k
}
5394
5395
#ifdef LIBXML_CATALOG_ENABLED
5396
/**
5397
 * xmlParseCatalogPI:
5398
 * @ctxt:  an XML parser context
5399
 * @catalog:  the PI value string
5400
 *
5401
 * parse an XML Catalog Processing Instruction.
5402
 *
5403
 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5404
 *
5405
 * Occurs only if allowed by the user and if happening in the Misc
5406
 * part of the document before any doctype information
5407
 * This will add the given catalog to the parsing context in order
5408
 * to be used if there is a resolution need further down in the document
5409
 */
5410
5411
static void
5412
0
xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5413
0
    xmlChar *URL = NULL;
5414
0
    const xmlChar *tmp, *base;
5415
0
    xmlChar marker;
5416
5417
0
    tmp = catalog;
5418
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5419
0
    if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5420
0
  goto error;
5421
0
    tmp += 7;
5422
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5423
0
    if (*tmp != '=') {
5424
0
  return;
5425
0
    }
5426
0
    tmp++;
5427
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5428
0
    marker = *tmp;
5429
0
    if ((marker != '\'') && (marker != '"'))
5430
0
  goto error;
5431
0
    tmp++;
5432
0
    base = tmp;
5433
0
    while ((*tmp != 0) && (*tmp != marker)) tmp++;
5434
0
    if (*tmp == 0)
5435
0
  goto error;
5436
0
    URL = xmlStrndup(base, tmp - base);
5437
0
    tmp++;
5438
0
    while (IS_BLANK_CH(*tmp)) tmp++;
5439
0
    if (*tmp != 0)
5440
0
  goto error;
5441
5442
0
    if (URL != NULL) {
5443
        /*
5444
         * Unfortunately, the catalog API doesn't report OOM errors.
5445
         * xmlGetLastError isn't very helpful since we don't know
5446
         * where the last error came from. We'd have to reset it
5447
         * before this call and restore it afterwards.
5448
         */
5449
0
  ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5450
0
  xmlFree(URL);
5451
0
    }
5452
0
    return;
5453
5454
0
error:
5455
0
    xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5456
0
            "Catalog PI syntax error: %s\n",
5457
0
      catalog, NULL);
5458
0
    if (URL != NULL)
5459
0
  xmlFree(URL);
5460
0
}
5461
#endif
5462
5463
/**
5464
 * xmlParsePI:
5465
 * @ctxt:  an XML parser context
5466
 *
5467
 * DEPRECATED: Internal function, don't use.
5468
 *
5469
 * parse an XML Processing Instruction.
5470
 *
5471
 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5472
 *
5473
 * The processing is transferred to SAX once parsed.
5474
 */
5475
5476
void
5477
184k
xmlParsePI(xmlParserCtxtPtr ctxt) {
5478
184k
    xmlChar *buf = NULL;
5479
184k
    size_t len = 0;
5480
184k
    size_t size = XML_PARSER_BUFFER_SIZE;
5481
184k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5482
84.5k
                       XML_MAX_HUGE_LENGTH :
5483
184k
                       XML_MAX_TEXT_LENGTH;
5484
184k
    int cur, l;
5485
184k
    const xmlChar *target;
5486
5487
184k
    if ((RAW == '<') && (NXT(1) == '?')) {
5488
  /*
5489
   * this is a Processing Instruction.
5490
   */
5491
184k
  SKIP(2);
5492
5493
  /*
5494
   * Parse the target name and check for special support like
5495
   * namespace.
5496
   */
5497
184k
        target = xmlParsePITarget(ctxt);
5498
184k
  if (target != NULL) {
5499
165k
      if ((RAW == '?') && (NXT(1) == '>')) {
5500
59.2k
    SKIP(2);
5501
5502
    /*
5503
     * SAX: PI detected.
5504
     */
5505
59.2k
    if ((ctxt->sax) && (!ctxt->disableSAX) &&
5506
59.2k
        (ctxt->sax->processingInstruction != NULL))
5507
56.1k
        ctxt->sax->processingInstruction(ctxt->userData,
5508
56.1k
                                         target, NULL);
5509
59.2k
    return;
5510
59.2k
      }
5511
105k
      buf = (xmlChar *) xmlMallocAtomic(size);
5512
105k
      if (buf == NULL) {
5513
108
    xmlErrMemory(ctxt);
5514
</