Coverage Report

Created: 2024-02-11 06:23

/src/libprotobuf-mutator/build/examples/libxml2/external.libxml2/src/external.libxml2/parser.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
 *            implemented on top of the SAX interfaces
4
 *
5
 * References:
6
 *   The XML specification:
7
 *     http://www.w3.org/TR/REC-xml
8
 *   Original 1.0 version:
9
 *     http://www.w3.org/TR/1998/REC-xml-19980210
10
 *   XML second edition working draft
11
 *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12
 *
13
 * Okay this is a big file, the parser core is around 7000 lines, then it
14
 * is followed by the progressive parser top routines, then the various
15
 * high level APIs to call the parser and a few miscellaneous functions.
16
 * A number of helper functions and deprecated ones have been moved to
17
 * parserInternals.c to reduce this file size.
18
 * As much as possible the functions are associated with their relative
19
 * production in the XML specification. A few productions defining the
20
 * different ranges of character are actually implanted either in
21
 * parserInternals.h or parserInternals.c
22
 * The DOM tree build is realized from the default SAX callbacks in
23
 * the module SAX.c.
24
 * The routines doing the validation checks are in valid.c and called either
25
 * from the SAX callbacks or as standalone functions using a preparsed
26
 * document.
27
 *
28
 * See Copyright for the status of this software.
29
 *
30
 * daniel@veillard.com
31
 */
32
33
/* To avoid EBCDIC trouble when parsing on zOS */
34
#if defined(__MVS__)
35
#pragma convert("ISO8859-1")
36
#endif
37
38
#define IN_LIBXML
39
#include "libxml.h"
40
41
#if defined(_WIN32)
42
#define XML_DIR_SEP '\\'
43
#else
44
#define XML_DIR_SEP '/'
45
#endif
46
47
#include <stdlib.h>
48
#include <limits.h>
49
#include <string.h>
50
#include <stdarg.h>
51
#include <stddef.h>
52
#include <ctype.h>
53
#include <stdlib.h>
54
#include <libxml/parser.h>
55
#include <libxml/xmlmemory.h>
56
#include <libxml/tree.h>
57
#include <libxml/parserInternals.h>
58
#include <libxml/valid.h>
59
#include <libxml/entities.h>
60
#include <libxml/xmlerror.h>
61
#include <libxml/encoding.h>
62
#include <libxml/xmlIO.h>
63
#include <libxml/uri.h>
64
#include <libxml/SAX2.h>
65
#ifdef LIBXML_CATALOG_ENABLED
66
#include <libxml/catalog.h>
67
#endif
68
69
#include "private/buf.h"
70
#include "private/dict.h"
71
#include "private/entities.h"
72
#include "private/error.h"
73
#include "private/html.h"
74
#include "private/io.h"
75
#include "private/parser.h"
76
77
58.4k
#define NS_INDEX_EMPTY  INT_MAX
78
32.6k
#define NS_INDEX_XML    (INT_MAX - 1)
79
16.0k
#define URI_HASH_EMPTY  0xD943A04E
80
2.40k
#define URI_HASH_XML    0xF0451F02
81
82
struct _xmlStartTag {
83
    const xmlChar *prefix;
84
    const xmlChar *URI;
85
    int line;
86
    int nsNr;
87
};
88
89
typedef struct {
90
    void *saxData;
91
    unsigned prefixHashValue;
92
    unsigned uriHashValue;
93
    unsigned elementId;
94
    int oldIndex;
95
} xmlParserNsExtra;
96
97
typedef struct {
98
    unsigned hashValue;
99
    int index;
100
} xmlParserNsBucket;
101
102
struct _xmlParserNsData {
103
    xmlParserNsExtra *extra;
104
105
    unsigned hashSize;
106
    unsigned hashElems;
107
    xmlParserNsBucket *hash;
108
109
    unsigned elementId;
110
    int defaultNsIndex;
111
    int minNsIndex;
112
};
113
114
struct _xmlAttrHashBucket {
115
    int index;
116
};
117
118
static int
119
xmlParseElementStart(xmlParserCtxtPtr ctxt);
120
121
static void
122
xmlParseElementEnd(xmlParserCtxtPtr ctxt);
123
124
static xmlEntityPtr
125
xmlLookupGeneralEntity(xmlParserCtxtPtr ctxt, const xmlChar *name, int inAttr);
126
127
static const xmlChar *
128
xmlParseEntityRefInternal(xmlParserCtxtPtr ctxt);
129
130
/************************************************************************
131
 *                  *
132
 *  Arbitrary limits set in the parser. See XML_PARSE_HUGE    *
133
 *                  *
134
 ************************************************************************/
135
136
#define XML_PARSER_BIG_ENTITY 1000
137
#define XML_PARSER_LOT_ENTITY 5000
138
139
/*
140
 * Constants for protection against abusive entity expansion
141
 * ("billion laughs").
142
 */
143
144
/*
145
 * A certain amount of entity expansion which is always allowed.
146
 */
147
134k
#define XML_PARSER_ALLOWED_EXPANSION 1000000
148
149
/*
150
 * Fixed cost for each entity reference. This crudely models processing time
151
 * as well to protect, for example, against exponential expansion of empty
152
 * or very short entities.
153
 */
154
143k
#define XML_ENT_FIXED_COST 20
155
156
/**
157
 * xmlParserMaxDepth:
158
 *
159
 * arbitrary depth limit for the XML documents that we allow to
160
 * process. This is not a limitation of the parser but a safety
161
 * boundary feature. It can be disabled with the XML_PARSE_HUGE
162
 * parser option.
163
 */
164
const unsigned int xmlParserMaxDepth = 256;
165
166
167
168
51.3k
#define XML_PARSER_BIG_BUFFER_SIZE 300
169
64.8k
#define XML_PARSER_BUFFER_SIZE 100
170
25.9k
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
171
172
/**
173
 * XML_PARSER_CHUNK_SIZE
174
 *
175
 * When calling GROW that's the minimal amount of data
176
 * the parser expected to have received. It is not a hard
177
 * limit but an optimization when reading strings like Names
178
 * It is not strictly needed as long as inputs available characters
179
 * are followed by 0, which should be provided by the I/O level
180
 */
181
#define XML_PARSER_CHUNK_SIZE 100
182
183
/**
184
 * xmlParserVersion:
185
 *
186
 * Constant string describing the internal version of the library
187
 */
188
const char *const
189
xmlParserVersion = LIBXML_VERSION_STRING LIBXML_VERSION_EXTRA;
190
191
/*
192
 * List of XML prefixed PI allowed by W3C specs
193
 */
194
195
static const char* const xmlW3CPIs[] = {
196
    "xml-stylesheet",
197
    "xml-model",
198
    NULL
199
};
200
201
202
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
203
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
204
                                              const xmlChar **str);
205
206
static void
207
xmlCtxtParseEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr ent);
208
209
static int
210
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
211
212
/************************************************************************
213
 *                  *
214
 *    Some factorized error routines        *
215
 *                  *
216
 ************************************************************************/
217
218
static void
219
0
xmlErrMemory(xmlParserCtxtPtr ctxt) {
220
0
    xmlCtxtErrMemory(ctxt);
221
0
}
222
223
/**
224
 * xmlErrAttributeDup:
225
 * @ctxt:  an XML parser context
226
 * @prefix:  the attribute prefix
227
 * @localname:  the attribute localname
228
 *
229
 * Handle a redefinition of attribute error
230
 */
231
static void
232
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
233
                   const xmlChar * localname)
234
5.44k
{
235
5.44k
    if (prefix == NULL)
236
5.13k
        xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, XML_ERR_ATTRIBUTE_REDEFINED,
237
5.13k
                   XML_ERR_FATAL, localname, NULL, NULL, 0,
238
5.13k
                   "Attribute %s redefined\n", localname);
239
309
    else
240
309
        xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, XML_ERR_ATTRIBUTE_REDEFINED,
241
309
                   XML_ERR_FATAL, prefix, localname, NULL, 0,
242
309
                   "Attribute %s:%s redefined\n", prefix, localname);
243
5.44k
}
244
245
/**
246
 * xmlFatalErrMsg:
247
 * @ctxt:  an XML parser context
248
 * @error:  the error number
249
 * @msg:  the error message
250
 *
251
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
252
 */
253
static void LIBXML_ATTR_FORMAT(3,0)
254
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
255
               const char *msg)
256
304k
{
257
304k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
258
304k
               NULL, NULL, NULL, 0, "%s", msg);
259
304k
}
260
261
/**
262
 * xmlWarningMsg:
263
 * @ctxt:  an XML parser context
264
 * @error:  the error number
265
 * @msg:  the error message
266
 * @str1:  extra data
267
 * @str2:  extra data
268
 *
269
 * Handle a warning.
270
 */
271
void LIBXML_ATTR_FORMAT(3,0)
272
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
273
              const char *msg, const xmlChar *str1, const xmlChar *str2)
274
4.70k
{
275
4.70k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_WARNING,
276
4.70k
               str1, str2, NULL, 0, msg, str1, str2);
277
4.70k
}
278
279
/**
280
 * xmlValidityError:
281
 * @ctxt:  an XML parser context
282
 * @error:  the error number
283
 * @msg:  the error message
284
 * @str1:  extra data
285
 *
286
 * Handle a validity error.
287
 */
288
static void LIBXML_ATTR_FORMAT(3,0)
289
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
290
              const char *msg, const xmlChar *str1, const xmlChar *str2)
291
1.53k
{
292
1.53k
    ctxt->valid = 0;
293
294
1.53k
    xmlCtxtErr(ctxt, NULL, XML_FROM_DTD, error, XML_ERR_ERROR,
295
1.53k
               str1, str2, NULL, 0, msg, str1, str2);
296
1.53k
}
297
298
/**
299
 * xmlFatalErrMsgInt:
300
 * @ctxt:  an XML parser context
301
 * @error:  the error number
302
 * @msg:  the error message
303
 * @val:  an integer value
304
 *
305
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
306
 */
307
static void LIBXML_ATTR_FORMAT(3,0)
308
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
309
                  const char *msg, int val)
310
11.4k
{
311
11.4k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
312
11.4k
               NULL, NULL, NULL, val, msg, val);
313
11.4k
}
314
315
/**
316
 * xmlFatalErrMsgStrIntStr:
317
 * @ctxt:  an XML parser context
318
 * @error:  the error number
319
 * @msg:  the error message
320
 * @str1:  an string info
321
 * @val:  an integer value
322
 * @str2:  an string info
323
 *
324
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
325
 */
326
static void LIBXML_ATTR_FORMAT(3,0)
327
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
328
                  const char *msg, const xmlChar *str1, int val,
329
      const xmlChar *str2)
330
110k
{
331
110k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
332
110k
               str1, str2, NULL, val, msg, str1, val, str2);
333
110k
}
334
335
/**
336
 * xmlFatalErrMsgStr:
337
 * @ctxt:  an XML parser context
338
 * @error:  the error number
339
 * @msg:  the error message
340
 * @val:  a string value
341
 *
342
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
343
 */
344
static void LIBXML_ATTR_FORMAT(3,0)
345
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
346
                  const char *msg, const xmlChar * val)
347
101k
{
348
101k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
349
101k
               val, NULL, NULL, 0, msg, val);
350
101k
}
351
352
/**
353
 * xmlErrMsgStr:
354
 * @ctxt:  an XML parser context
355
 * @error:  the error number
356
 * @msg:  the error message
357
 * @val:  a string value
358
 *
359
 * Handle a non fatal parser error
360
 */
361
static void LIBXML_ATTR_FORMAT(3,0)
362
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
363
                  const char *msg, const xmlChar * val)
364
10.6k
{
365
10.6k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_ERROR,
366
10.6k
               val, NULL, NULL, 0, msg, val);
367
10.6k
}
368
369
/**
370
 * xmlNsErr:
371
 * @ctxt:  an XML parser context
372
 * @error:  the error number
373
 * @msg:  the message
374
 * @info1:  extra information string
375
 * @info2:  extra information string
376
 *
377
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
378
 */
379
static void LIBXML_ATTR_FORMAT(3,0)
380
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
381
         const char *msg,
382
         const xmlChar * info1, const xmlChar * info2,
383
         const xmlChar * info3)
384
48.9k
{
385
48.9k
    ctxt->nsWellFormed = 0;
386
387
48.9k
    xmlCtxtErr(ctxt, NULL, XML_FROM_NAMESPACE, error, XML_ERR_ERROR,
388
48.9k
               info1, info2, info3, 0, msg, info1, info2, info3);
389
48.9k
}
390
391
/**
392
 * xmlNsWarn
393
 * @ctxt:  an XML parser context
394
 * @error:  the error number
395
 * @msg:  the message
396
 * @info1:  extra information string
397
 * @info2:  extra information string
398
 *
399
 * Handle a namespace warning error
400
 */
401
static void LIBXML_ATTR_FORMAT(3,0)
402
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
403
         const char *msg,
404
         const xmlChar * info1, const xmlChar * info2,
405
         const xmlChar * info3)
406
998
{
407
998
    xmlCtxtErr(ctxt, NULL, XML_FROM_NAMESPACE, error, XML_ERR_WARNING,
408
998
               info1, info2, info3, 0, msg, info1, info2, info3);
409
998
}
410
411
static void
412
423k
xmlSaturatedAdd(unsigned long *dst, unsigned long val) {
413
423k
    if (val > ULONG_MAX - *dst)
414
0
        *dst = ULONG_MAX;
415
423k
    else
416
423k
        *dst += val;
417
423k
}
418
419
static void
420
137k
xmlSaturatedAddSizeT(unsigned long *dst, unsigned long val) {
421
137k
    if (val > ULONG_MAX - *dst)
422
0
        *dst = ULONG_MAX;
423
137k
    else
424
137k
        *dst += val;
425
137k
}
426
427
/**
428
 * xmlParserEntityCheck:
429
 * @ctxt:  parser context
430
 * @extra:  sum of unexpanded entity sizes
431
 *
432
 * Check for non-linear entity expansion behaviour.
433
 *
434
 * In some cases like xmlExpandEntityInAttValue, this function is called
435
 * for each, possibly nested entity and its unexpanded content length.
436
 *
437
 * In other cases like xmlParseReference, it's only called for each
438
 * top-level entity with its unexpanded content length plus the sum of
439
 * the unexpanded content lengths (plus fixed cost) of all nested
440
 * entities.
441
 *
442
 * Summing the unexpanded lengths also adds the length of the reference.
443
 * This is by design. Taking the length of the entity name into account
444
 * discourages attacks that try to waste CPU time with abusively long
445
 * entity names. See test/recurse/lol6.xml for example. Each call also
446
 * adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with
447
 * short entities.
448
 *
449
 * Returns 1 on error, 0 on success.
450
 */
451
static int
452
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long extra)
453
170k
{
454
170k
    unsigned long consumed;
455
170k
    unsigned long *expandedSize;
456
170k
    xmlParserInputPtr input = ctxt->input;
457
170k
    xmlEntityPtr entity = input->entity;
458
459
170k
    if ((entity) && (entity->flags & XML_ENT_CHECKED))
460
36.1k
        return(0);
461
462
    /*
463
     * Compute total consumed bytes so far, including input streams of
464
     * external entities.
465
     */
466
134k
    consumed = input->consumed;
467
134k
    xmlSaturatedAddSizeT(&consumed, input->cur - input->base);
468
134k
    xmlSaturatedAdd(&consumed, ctxt->sizeentities);
469
470
134k
    if (entity)
471
3.59k
        expandedSize = &entity->expandedSize;
472
131k
    else
473
131k
        expandedSize = &ctxt->sizeentcopy;
474
475
    /*
476
     * Add extra cost and some fixed cost.
477
     */
478
134k
    xmlSaturatedAdd(expandedSize, extra);
479
134k
    xmlSaturatedAdd(expandedSize, XML_ENT_FIXED_COST);
480
481
    /*
482
     * It's important to always use saturation arithmetic when tracking
483
     * entity sizes to make the size checks reliable. If "sizeentcopy"
484
     * overflows, we have to abort.
485
     */
486
134k
    if ((*expandedSize > XML_PARSER_ALLOWED_EXPANSION) &&
487
134k
        ((*expandedSize >= ULONG_MAX) ||
488
17
         (*expandedSize / ctxt->maxAmpl > consumed))) {
489
17
        xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
490
17
                       "Maximum entity amplification factor exceeded, see "
491
17
                       "xmlCtxtSetMaxAmplification.\n");
492
17
        xmlHaltParser(ctxt);
493
17
        return(1);
494
17
    }
495
496
134k
    return(0);
497
134k
}
498
499
/************************************************************************
500
 *                  *
501
 *    Library wide options          *
502
 *                  *
503
 ************************************************************************/
504
505
/**
506
  * xmlHasFeature:
507
  * @feature: the feature to be examined
508
  *
509
  * Examines if the library has been compiled with a given feature.
510
  *
511
  * Returns a non-zero value if the feature exist, otherwise zero.
512
  * Returns zero (0) if the feature does not exist or an unknown
513
  * unknown feature is requested, non-zero otherwise.
514
  */
515
int
516
xmlHasFeature(xmlFeature feature)
517
0
{
518
0
    switch (feature) {
519
0
  case XML_WITH_THREAD:
520
0
#ifdef LIBXML_THREAD_ENABLED
521
0
      return(1);
522
#else
523
      return(0);
524
#endif
525
0
        case XML_WITH_TREE:
526
0
#ifdef LIBXML_TREE_ENABLED
527
0
            return(1);
528
#else
529
            return(0);
530
#endif
531
0
        case XML_WITH_OUTPUT:
532
0
#ifdef LIBXML_OUTPUT_ENABLED
533
0
            return(1);
534
#else
535
            return(0);
536
#endif
537
0
        case XML_WITH_PUSH:
538
0
#ifdef LIBXML_PUSH_ENABLED
539
0
            return(1);
540
#else
541
            return(0);
542
#endif
543
0
        case XML_WITH_READER:
544
0
#ifdef LIBXML_READER_ENABLED
545
0
            return(1);
546
#else
547
            return(0);
548
#endif
549
0
        case XML_WITH_PATTERN:
550
0
#ifdef LIBXML_PATTERN_ENABLED
551
0
            return(1);
552
#else
553
            return(0);
554
#endif
555
0
        case XML_WITH_WRITER:
556
0
#ifdef LIBXML_WRITER_ENABLED
557
0
            return(1);
558
#else
559
            return(0);
560
#endif
561
0
        case XML_WITH_SAX1:
562
0
#ifdef LIBXML_SAX1_ENABLED
563
0
            return(1);
564
#else
565
            return(0);
566
#endif
567
0
        case XML_WITH_FTP:
568
#ifdef LIBXML_FTP_ENABLED
569
            return(1);
570
#else
571
0
            return(0);
572
0
#endif
573
0
        case XML_WITH_HTTP:
574
0
#ifdef LIBXML_HTTP_ENABLED
575
0
            return(1);
576
#else
577
            return(0);
578
#endif
579
0
        case XML_WITH_VALID:
580
0
#ifdef LIBXML_VALID_ENABLED
581
0
            return(1);
582
#else
583
            return(0);
584
#endif
585
0
        case XML_WITH_HTML:
586
0
#ifdef LIBXML_HTML_ENABLED
587
0
            return(1);
588
#else
589
            return(0);
590
#endif
591
0
        case XML_WITH_LEGACY:
592
#ifdef LIBXML_LEGACY_ENABLED
593
            return(1);
594
#else
595
0
            return(0);
596
0
#endif
597
0
        case XML_WITH_C14N:
598
0
#ifdef LIBXML_C14N_ENABLED
599
0
            return(1);
600
#else
601
            return(0);
602
#endif
603
0
        case XML_WITH_CATALOG:
604
0
#ifdef LIBXML_CATALOG_ENABLED
605
0
            return(1);
606
#else
607
            return(0);
608
#endif
609
0
        case XML_WITH_XPATH:
610
0
#ifdef LIBXML_XPATH_ENABLED
611
0
            return(1);
612
#else
613
            return(0);
614
#endif
615
0
        case XML_WITH_XPTR:
616
0
#ifdef LIBXML_XPTR_ENABLED
617
0
            return(1);
618
#else
619
            return(0);
620
#endif
621
0
        case XML_WITH_XINCLUDE:
622
0
#ifdef LIBXML_XINCLUDE_ENABLED
623
0
            return(1);
624
#else
625
            return(0);
626
#endif
627
0
        case XML_WITH_ICONV:
628
0
#ifdef LIBXML_ICONV_ENABLED
629
0
            return(1);
630
#else
631
            return(0);
632
#endif
633
0
        case XML_WITH_ISO8859X:
634
0
#ifdef LIBXML_ISO8859X_ENABLED
635
0
            return(1);
636
#else
637
            return(0);
638
#endif
639
0
        case XML_WITH_UNICODE:
640
0
#ifdef LIBXML_UNICODE_ENABLED
641
0
            return(1);
642
#else
643
            return(0);
644
#endif
645
0
        case XML_WITH_REGEXP:
646
0
#ifdef LIBXML_REGEXP_ENABLED
647
0
            return(1);
648
#else
649
            return(0);
650
#endif
651
0
        case XML_WITH_AUTOMATA:
652
0
#ifdef LIBXML_AUTOMATA_ENABLED
653
0
            return(1);
654
#else
655
            return(0);
656
#endif
657
0
        case XML_WITH_EXPR:
658
#ifdef LIBXML_EXPR_ENABLED
659
            return(1);
660
#else
661
0
            return(0);
662
0
#endif
663
0
        case XML_WITH_SCHEMAS:
664
0
#ifdef LIBXML_SCHEMAS_ENABLED
665
0
            return(1);
666
#else
667
            return(0);
668
#endif
669
0
        case XML_WITH_SCHEMATRON:
670
0
#ifdef LIBXML_SCHEMATRON_ENABLED
671
0
            return(1);
672
#else
673
            return(0);
674
#endif
675
0
        case XML_WITH_MODULES:
676
0
#ifdef LIBXML_MODULES_ENABLED
677
0
            return(1);
678
#else
679
            return(0);
680
#endif
681
0
        case XML_WITH_DEBUG:
682
0
#ifdef LIBXML_DEBUG_ENABLED
683
0
            return(1);
684
#else
685
            return(0);
686
#endif
687
0
        case XML_WITH_DEBUG_MEM:
688
#ifdef DEBUG_MEMORY_LOCATION
689
            return(1);
690
#else
691
0
            return(0);
692
0
#endif
693
0
        case XML_WITH_ZLIB:
694
0
#ifdef LIBXML_ZLIB_ENABLED
695
0
            return(1);
696
#else
697
            return(0);
698
#endif
699
0
        case XML_WITH_LZMA:
700
0
#ifdef LIBXML_LZMA_ENABLED
701
0
            return(1);
702
#else
703
            return(0);
704
#endif
705
0
        case XML_WITH_ICU:
706
#ifdef LIBXML_ICU_ENABLED
707
            return(1);
708
#else
709
0
            return(0);
710
0
#endif
711
0
        default:
712
0
      break;
713
0
     }
714
0
     return(0);
715
0
}
716
717
/************************************************************************
718
 *                  *
719
 *      Simple string buffer        *
720
 *                  *
721
 ************************************************************************/
722
723
typedef struct {
724
    xmlChar *mem;
725
    unsigned size;
726
    unsigned cap; /* size < cap */
727
    unsigned max; /* size <= max */
728
    xmlParserErrors code;
729
} xmlSBuf;
730
731
static void
732
110k
xmlSBufInit(xmlSBuf *buf, unsigned max) {
733
110k
    buf->mem = NULL;
734
110k
    buf->size = 0;
735
110k
    buf->cap = 0;
736
110k
    buf->max = max;
737
110k
    buf->code = XML_ERR_OK;
738
110k
}
739
740
static int
741
77.2k
xmlSBufGrow(xmlSBuf *buf, unsigned len) {
742
77.2k
    xmlChar *mem;
743
77.2k
    unsigned cap;
744
745
77.2k
    if (len >= UINT_MAX / 2 - buf->size) {
746
0
        buf->code = XML_ERR_RESOURCE_LIMIT;
747
0
        return(-1);
748
0
    }
749
750
77.2k
    cap = (buf->size + len) * 2;
751
77.2k
    if (cap < 240)
752
69.5k
        cap = 240;
753
754
77.2k
    mem = xmlRealloc(buf->mem, cap);
755
77.2k
    if (mem == NULL) {
756
0
        buf->code = XML_ERR_NO_MEMORY;
757
0
        return(-1);
758
0
    }
759
760
77.2k
    buf->mem = mem;
761
77.2k
    buf->cap = cap;
762
763
77.2k
    return(0);
764
77.2k
}
765
766
static void
767
1.27M
xmlSBufAddString(xmlSBuf *buf, const xmlChar *str, unsigned len) {
768
1.27M
    if (buf->max - buf->size < len) {
769
0
        buf->code = XML_ERR_RESOURCE_LIMIT;
770
0
        return;
771
0
    }
772
773
1.27M
    if (buf->cap - buf->size <= len) {
774
74.9k
        if (xmlSBufGrow(buf, len) < 0)
775
0
            return;
776
74.9k
    }
777
778
1.27M
    if (len > 0)
779
1.27M
        memcpy(buf->mem + buf->size, str, len);
780
1.27M
    buf->size += len;
781
1.27M
}
782
783
static void
784
431k
xmlSBufAddCString(xmlSBuf *buf, const char *str, unsigned len) {
785
431k
    xmlSBufAddString(buf, (const xmlChar *) str, len);
786
431k
}
787
788
static void
789
17.9k
xmlSBufAddChar(xmlSBuf *buf, int c) {
790
17.9k
    xmlChar *end;
791
792
17.9k
    if (buf->max - buf->size < 4) {
793
0
        buf->code = XML_ERR_RESOURCE_LIMIT;
794
0
        return;
795
0
    }
796
797
17.9k
    if (buf->cap - buf->size <= 4) {
798
2.36k
        if (xmlSBufGrow(buf, 4) < 0)
799
0
            return;
800
2.36k
    }
801
802
17.9k
    end = buf->mem + buf->size;
803
804
17.9k
    if (c < 0x80) {
805
11.3k
        *end = (xmlChar) c;
806
11.3k
        buf->size += 1;
807
11.3k
    } else {
808
6.60k
        buf->size += xmlCopyCharMultiByte(end, c);
809
6.60k
    }
810
17.9k
}
811
812
static void
813
18.2k
xmlSBufAddReplChar(xmlSBuf *buf) {
814
18.2k
    xmlSBufAddCString(buf, "\xEF\xBF\xBD", 3);
815
18.2k
}
816
817
static void
818
0
xmlSBufReportError(xmlSBuf *buf, xmlParserCtxtPtr ctxt, const char *errMsg) {
819
0
    if (buf->code == XML_ERR_NO_MEMORY)
820
0
        xmlCtxtErrMemory(ctxt);
821
0
    else
822
0
        xmlFatalErr(ctxt, buf->code, errMsg);
823
0
}
824
825
static xmlChar *
826
xmlSBufFinish(xmlSBuf *buf, int *sizeOut, xmlParserCtxtPtr ctxt,
827
78.6k
              const char *errMsg) {
828
78.6k
    if (buf->mem == NULL) {
829
12.5k
        buf->mem = xmlMalloc(1);
830
12.5k
        if (buf->mem == NULL) {
831
0
            buf->code = XML_ERR_NO_MEMORY;
832
12.5k
        } else {
833
12.5k
            buf->mem[0] = 0;
834
12.5k
        }
835
66.0k
    } else {
836
66.0k
        buf->mem[buf->size] = 0;
837
66.0k
    }
838
839
78.6k
    if (buf->code == XML_ERR_OK) {
840
78.6k
        if (sizeOut != NULL)
841
2.19k
            *sizeOut = buf->size;
842
78.6k
        return(buf->mem);
843
78.6k
    }
844
845
0
    xmlSBufReportError(buf, ctxt, errMsg);
846
847
0
    xmlFree(buf->mem);
848
849
0
    if (sizeOut != NULL)
850
0
        *sizeOut = 0;
851
0
    return(NULL);
852
78.6k
}
853
854
static void
855
27.4k
xmlSBufCleanup(xmlSBuf *buf, xmlParserCtxtPtr ctxt, const char *errMsg) {
856
27.4k
    if (buf->code != XML_ERR_OK)
857
0
        xmlSBufReportError(buf, ctxt, errMsg);
858
859
27.4k
    xmlFree(buf->mem);
860
27.4k
}
861
862
static int
863
xmlUTF8MultibyteLen(xmlParserCtxtPtr ctxt, const xmlChar *str,
864
218k
                    const char *errMsg) {
865
218k
    int c = str[0];
866
218k
    int c1 = str[1];
867
868
218k
    if ((c1 & 0xC0) != 0x80)
869
7.13k
        goto encoding_error;
870
871
211k
    if (c < 0xE0) {
872
        /* 2-byte sequence */
873
10.0k
        if (c < 0xC2)
874
3.84k
            goto encoding_error;
875
876
6.23k
        return(2);
877
201k
    } else {
878
201k
        int c2 = str[2];
879
880
201k
        if ((c2 & 0xC0) != 0x80)
881
90
            goto encoding_error;
882
883
201k
        if (c < 0xF0) {
884
            /* 3-byte sequence */
885
199k
            if (c == 0xE0) {
886
                /* overlong */
887
154k
                if (c1 < 0xA0)
888
68
                    goto encoding_error;
889
154k
            } else if (c == 0xED) {
890
                /* surrogate */
891
332
                if (c1 >= 0xA0)
892
67
                    goto encoding_error;
893
44.0k
            } else if (c == 0xEF) {
894
                /* U+FFFE and U+FFFF are invalid Chars */
895
40.0k
                if ((c1 == 0xBF) && (c2 >= 0xBE))
896
249
                    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR, errMsg);
897
40.0k
            }
898
899
199k
            return(3);
900
199k
        } else {
901
            /* 4-byte sequence */
902
2.13k
            if ((str[3] & 0xC0) != 0x80)
903
79
                goto encoding_error;
904
2.05k
            if (c == 0xF0) {
905
                /* overlong */
906
696
                if (c1 < 0x90)
907
66
                    goto encoding_error;
908
1.36k
            } else if (c >= 0xF4) {
909
                /* greater than 0x10FFFF */
910
530
                if ((c > 0xF4) || (c1 >= 0x90))
911
136
                    goto encoding_error;
912
530
            }
913
914
1.85k
            return(4);
915
2.05k
        }
916
201k
    }
917
918
11.4k
encoding_error:
919
    /* Only report the first error */
920
11.4k
    if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
921
708
        xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);
922
708
        ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
923
708
    }
924
925
11.4k
    return(0);
926
211k
}
927
928
/************************************************************************
929
 *                  *
930
 *    SAX2 defaulted attributes handling      *
931
 *                  *
932
 ************************************************************************/
933
934
/**
935
 * xmlCtxtInitializeLate:
936
 * @ctxt:  an XML parser context
937
 *
938
 * Final initialization of the parser context before starting to parse.
939
 *
940
 * This accounts for users modifying struct members of parser context
941
 * directly.
942
 */
943
static void
944
22.1k
xmlCtxtInitializeLate(xmlParserCtxtPtr ctxt) {
945
22.1k
    xmlSAXHandlerPtr sax;
946
947
    /* Avoid unused variable warning if features are disabled. */
948
22.1k
    (void) sax;
949
950
    /*
951
     * Changing the SAX struct directly is still widespread practice
952
     * in internal and external code.
953
     */
954
22.1k
    if (ctxt == NULL) return;
955
22.1k
    sax = ctxt->sax;
956
22.1k
#ifdef LIBXML_SAX1_ENABLED
957
    /*
958
     * Only enable SAX2 if there SAX2 element handlers, except when there
959
     * are no element handlers at all.
960
     */
961
22.1k
    if (((ctxt->options & XML_PARSE_SAX1) == 0) &&
962
22.1k
        (sax) &&
963
22.1k
        (sax->initialized == XML_SAX2_MAGIC) &&
964
22.1k
        ((sax->startElementNs != NULL) ||
965
18.0k
         (sax->endElementNs != NULL) ||
966
18.0k
         ((sax->startElement == NULL) && (sax->endElement == NULL))))
967
18.0k
        ctxt->sax2 = 1;
968
#else
969
    ctxt->sax2 = 1;
970
#endif /* LIBXML_SAX1_ENABLED */
971
972
    /*
973
     * Some users replace the dictionary directly in the context struct.
974
     * We really need an API function to do that cleanly.
975
     */
976
22.1k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
977
22.1k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
978
22.1k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
979
22.1k
    if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
980
22.1k
    (ctxt->str_xml_ns == NULL)) {
981
0
        xmlErrMemory(ctxt);
982
0
    }
983
22.1k
}
984
985
typedef struct {
986
    xmlHashedString prefix;
987
    xmlHashedString name;
988
    xmlHashedString value;
989
    const xmlChar *valueEnd;
990
    int external;
991
    int expandedSize;
992
} xmlDefAttr;
993
994
typedef struct _xmlDefAttrs xmlDefAttrs;
995
typedef xmlDefAttrs *xmlDefAttrsPtr;
996
struct _xmlDefAttrs {
997
    int nbAttrs;  /* number of defaulted attributes on that element */
998
    int maxAttrs;       /* the size of the array */
999
#if __STDC_VERSION__ >= 199901L
1000
    /* Using a C99 flexible array member avoids UBSan errors. */
1001
    xmlDefAttr attrs[]; /* array of localname/prefix/values/external */
1002
#else
1003
    xmlDefAttr attrs[1];
1004
#endif
1005
};
1006
1007
/**
1008
 * xmlAttrNormalizeSpace:
1009
 * @src: the source string
1010
 * @dst: the target string
1011
 *
1012
 * Normalize the space in non CDATA attribute values:
1013
 * If the attribute type is not CDATA, then the XML processor MUST further
1014
 * process the normalized attribute value by discarding any leading and
1015
 * trailing space (#x20) characters, and by replacing sequences of space
1016
 * (#x20) characters by a single space (#x20) character.
1017
 * Note that the size of dst need to be at least src, and if one doesn't need
1018
 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1019
 * passing src as dst is just fine.
1020
 *
1021
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1022
 *         is needed.
1023
 */
1024
static xmlChar *
1025
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1026
21.8k
{
1027
21.8k
    if ((src == NULL) || (dst == NULL))
1028
0
        return(NULL);
1029
1030
28.6k
    while (*src == 0x20) src++;
1031
142k
    while (*src != 0) {
1032
120k
  if (*src == 0x20) {
1033
22.9k
      while (*src == 0x20) src++;
1034
10.6k
      if (*src != 0)
1035
9.01k
    *dst++ = 0x20;
1036
109k
  } else {
1037
109k
      *dst++ = *src++;
1038
109k
  }
1039
120k
    }
1040
21.8k
    *dst = 0;
1041
21.8k
    if (dst == src)
1042
13.9k
       return(NULL);
1043
7.95k
    return(dst);
1044
21.8k
}
1045
1046
/**
1047
 * xmlAddDefAttrs:
1048
 * @ctxt:  an XML parser context
1049
 * @fullname:  the element fullname
1050
 * @fullattr:  the attribute fullname
1051
 * @value:  the attribute value
1052
 *
1053
 * Add a defaulted attribute for an element
1054
 */
1055
static void
1056
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1057
               const xmlChar *fullname,
1058
               const xmlChar *fullattr,
1059
20.8k
               const xmlChar *value) {
1060
20.8k
    xmlDefAttrsPtr defaults;
1061
20.8k
    xmlDefAttr *attr;
1062
20.8k
    int len, expandedSize;
1063
20.8k
    xmlHashedString name;
1064
20.8k
    xmlHashedString prefix;
1065
20.8k
    xmlHashedString hvalue;
1066
20.8k
    const xmlChar *localname;
1067
1068
    /*
1069
     * Allows to detect attribute redefinitions
1070
     */
1071
20.8k
    if (ctxt->attsSpecial != NULL) {
1072
18.2k
        if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1073
14.8k
      return;
1074
18.2k
    }
1075
1076
6.02k
    if (ctxt->attsDefault == NULL) {
1077
2.60k
        ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1078
2.60k
  if (ctxt->attsDefault == NULL)
1079
0
      goto mem_error;
1080
2.60k
    }
1081
1082
    /*
1083
     * split the element name into prefix:localname , the string found
1084
     * are within the DTD and then not associated to namespace names.
1085
     */
1086
6.02k
    localname = xmlSplitQName3(fullname, &len);
1087
6.02k
    if (localname == NULL) {
1088
5.72k
        name = xmlDictLookupHashed(ctxt->dict, fullname, -1);
1089
5.72k
  prefix.name = NULL;
1090
5.72k
    } else {
1091
304
        name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1092
304
  prefix = xmlDictLookupHashed(ctxt->dict, fullname, len);
1093
304
        if (prefix.name == NULL)
1094
0
            goto mem_error;
1095
304
    }
1096
6.02k
    if (name.name == NULL)
1097
0
        goto mem_error;
1098
1099
    /*
1100
     * make sure there is some storage
1101
     */
1102
6.02k
    defaults = xmlHashLookup2(ctxt->attsDefault, name.name, prefix.name);
1103
6.02k
    if ((defaults == NULL) ||
1104
6.02k
        (defaults->nbAttrs >= defaults->maxAttrs)) {
1105
3.09k
        xmlDefAttrsPtr temp;
1106
3.09k
        int newSize;
1107
1108
3.09k
        newSize = (defaults != NULL) ? 2 * defaults->maxAttrs : 4;
1109
3.09k
        temp = xmlRealloc(defaults,
1110
3.09k
                          sizeof(*defaults) + newSize * sizeof(xmlDefAttr));
1111
3.09k
  if (temp == NULL)
1112
0
      goto mem_error;
1113
3.09k
        if (defaults == NULL)
1114
2.69k
            temp->nbAttrs = 0;
1115
3.09k
  temp->maxAttrs = newSize;
1116
3.09k
        defaults = temp;
1117
3.09k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name.name, prefix.name,
1118
3.09k
                          defaults, NULL) < 0) {
1119
0
      xmlFree(defaults);
1120
0
      goto mem_error;
1121
0
  }
1122
3.09k
    }
1123
1124
    /*
1125
     * Split the attribute name into prefix:localname , the string found
1126
     * are within the DTD and hen not associated to namespace names.
1127
     */
1128
6.02k
    localname = xmlSplitQName3(fullattr, &len);
1129
6.02k
    if (localname == NULL) {
1130
4.52k
        name = xmlDictLookupHashed(ctxt->dict, fullattr, -1);
1131
4.52k
  prefix.name = NULL;
1132
4.52k
    } else {
1133
1.50k
        name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1134
1.50k
  prefix = xmlDictLookupHashed(ctxt->dict, fullattr, len);
1135
1.50k
        if (prefix.name == NULL)
1136
0
            goto mem_error;
1137
1.50k
    }
1138
6.02k
    if (name.name == NULL)
1139
0
        goto mem_error;
1140
1141
    /* intern the string and precompute the end */
1142
6.02k
    len = strlen((const char *) value);
1143
6.02k
    hvalue = xmlDictLookupHashed(ctxt->dict, value, len);
1144
6.02k
    if (hvalue.name == NULL)
1145
0
        goto mem_error;
1146
1147
6.02k
    expandedSize = strlen((const char *) name.name);
1148
6.02k
    if (prefix.name != NULL)
1149
1.50k
        expandedSize += strlen((const char *) prefix.name);
1150
6.02k
    expandedSize += len;
1151
1152
6.02k
    attr = &defaults->attrs[defaults->nbAttrs++];
1153
6.02k
    attr->name = name;
1154
6.02k
    attr->prefix = prefix;
1155
6.02k
    attr->value = hvalue;
1156
6.02k
    attr->valueEnd = hvalue.name + len;
1157
6.02k
    attr->external = PARSER_EXTERNAL(ctxt);
1158
6.02k
    attr->expandedSize = expandedSize;
1159
1160
6.02k
    return;
1161
1162
0
mem_error:
1163
0
    xmlErrMemory(ctxt);
1164
0
    return;
1165
6.02k
}
1166
1167
/**
1168
 * xmlAddSpecialAttr:
1169
 * @ctxt:  an XML parser context
1170
 * @fullname:  the element fullname
1171
 * @fullattr:  the attribute fullname
1172
 * @type:  the attribute type
1173
 *
1174
 * Register this attribute type
1175
 */
1176
static void
1177
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1178
      const xmlChar *fullname,
1179
      const xmlChar *fullattr,
1180
      int type)
1181
32.4k
{
1182
32.4k
    if (ctxt->attsSpecial == NULL) {
1183
3.11k
        ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1184
3.11k
  if (ctxt->attsSpecial == NULL)
1185
0
      goto mem_error;
1186
3.11k
    }
1187
1188
32.4k
    if (xmlHashAdd2(ctxt->attsSpecial, fullname, fullattr,
1189
32.4k
                    (void *) (ptrdiff_t) type) < 0)
1190
0
        goto mem_error;
1191
32.4k
    return;
1192
1193
32.4k
mem_error:
1194
0
    xmlErrMemory(ctxt);
1195
0
    return;
1196
32.4k
}
1197
1198
/**
1199
 * xmlCleanSpecialAttrCallback:
1200
 *
1201
 * Removes CDATA attributes from the special attribute table
1202
 */
1203
static void
1204
xmlCleanSpecialAttrCallback(void *payload, void *data,
1205
                            const xmlChar *fullname, const xmlChar *fullattr,
1206
6.71k
                            const xmlChar *unused ATTRIBUTE_UNUSED) {
1207
6.71k
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1208
1209
6.71k
    if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1210
813
        xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1211
813
    }
1212
6.71k
}
1213
1214
/**
1215
 * xmlCleanSpecialAttr:
1216
 * @ctxt:  an XML parser context
1217
 *
1218
 * Trim the list of attributes defined to remove all those of type
1219
 * CDATA as they are not special. This call should be done when finishing
1220
 * to parse the DTD and before starting to parse the document root.
1221
 */
1222
static void
1223
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1224
10.2k
{
1225
10.2k
    if (ctxt->attsSpecial == NULL)
1226
7.11k
        return;
1227
1228
3.11k
    xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1229
1230
3.11k
    if (xmlHashSize(ctxt->attsSpecial) == 0) {
1231
191
        xmlHashFree(ctxt->attsSpecial, NULL);
1232
191
        ctxt->attsSpecial = NULL;
1233
191
    }
1234
3.11k
    return;
1235
10.2k
}
1236
1237
/**
1238
 * xmlCheckLanguageID:
1239
 * @lang:  pointer to the string value
1240
 *
1241
 * DEPRECATED: Internal function, do not use.
1242
 *
1243
 * Checks that the value conforms to the LanguageID production:
1244
 *
1245
 * NOTE: this is somewhat deprecated, those productions were removed from
1246
 *       the XML Second edition.
1247
 *
1248
 * [33] LanguageID ::= Langcode ('-' Subcode)*
1249
 * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1250
 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1251
 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1252
 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1253
 * [38] Subcode ::= ([a-z] | [A-Z])+
1254
 *
1255
 * The current REC reference the successors of RFC 1766, currently 5646
1256
 *
1257
 * http://www.rfc-editor.org/rfc/rfc5646.txt
1258
 * langtag       = language
1259
 *                 ["-" script]
1260
 *                 ["-" region]
1261
 *                 *("-" variant)
1262
 *                 *("-" extension)
1263
 *                 ["-" privateuse]
1264
 * language      = 2*3ALPHA            ; shortest ISO 639 code
1265
 *                 ["-" extlang]       ; sometimes followed by
1266
 *                                     ; extended language subtags
1267
 *               / 4ALPHA              ; or reserved for future use
1268
 *               / 5*8ALPHA            ; or registered language subtag
1269
 *
1270
 * extlang       = 3ALPHA              ; selected ISO 639 codes
1271
 *                 *2("-" 3ALPHA)      ; permanently reserved
1272
 *
1273
 * script        = 4ALPHA              ; ISO 15924 code
1274
 *
1275
 * region        = 2ALPHA              ; ISO 3166-1 code
1276
 *               / 3DIGIT              ; UN M.49 code
1277
 *
1278
 * variant       = 5*8alphanum         ; registered variants
1279
 *               / (DIGIT 3alphanum)
1280
 *
1281
 * extension     = singleton 1*("-" (2*8alphanum))
1282
 *
1283
 *                                     ; Single alphanumerics
1284
 *                                     ; "x" reserved for private use
1285
 * singleton     = DIGIT               ; 0 - 9
1286
 *               / %x41-57             ; A - W
1287
 *               / %x59-5A             ; Y - Z
1288
 *               / %x61-77             ; a - w
1289
 *               / %x79-7A             ; y - z
1290
 *
1291
 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1292
 * The parser below doesn't try to cope with extension or privateuse
1293
 * that could be added but that's not interoperable anyway
1294
 *
1295
 * Returns 1 if correct 0 otherwise
1296
 **/
1297
int
1298
xmlCheckLanguageID(const xmlChar * lang)
1299
2.21k
{
1300
2.21k
    const xmlChar *cur = lang, *nxt;
1301
1302
2.21k
    if (cur == NULL)
1303
112
        return (0);
1304
2.09k
    if (((cur[0] == 'i') && (cur[1] == '-')) ||
1305
2.09k
        ((cur[0] == 'I') && (cur[1] == '-')) ||
1306
2.09k
        ((cur[0] == 'x') && (cur[1] == '-')) ||
1307
2.09k
        ((cur[0] == 'X') && (cur[1] == '-'))) {
1308
        /*
1309
         * Still allow IANA code and user code which were coming
1310
         * from the previous version of the XML-1.0 specification
1311
         * it's deprecated but we should not fail
1312
         */
1313
244
        cur += 2;
1314
804
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1315
804
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
1316
560
            cur++;
1317
244
        return(cur[0] == 0);
1318
244
    }
1319
1.85k
    nxt = cur;
1320
6.59k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1321
6.59k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1322
4.74k
           nxt++;
1323
1.85k
    if (nxt - cur >= 4) {
1324
        /*
1325
         * Reserved
1326
         */
1327
151
        if ((nxt - cur > 8) || (nxt[0] != 0))
1328
107
            return(0);
1329
44
        return(1);
1330
151
    }
1331
1.70k
    if (nxt - cur < 2)
1332
119
        return(0);
1333
    /* we got an ISO 639 code */
1334
1.58k
    if (nxt[0] == 0)
1335
90
        return(1);
1336
1.49k
    if (nxt[0] != '-')
1337
73
        return(0);
1338
1339
1.42k
    nxt++;
1340
1.42k
    cur = nxt;
1341
    /* now we can have extlang or script or region or variant */
1342
1.42k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1343
258
        goto region_m49;
1344
1345
5.44k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1346
5.44k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1347
4.28k
           nxt++;
1348
1.16k
    if (nxt - cur == 4)
1349
302
        goto script;
1350
862
    if (nxt - cur == 2)
1351
202
        goto region;
1352
660
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1353
110
        goto variant;
1354
550
    if (nxt - cur != 3)
1355
83
        return(0);
1356
    /* we parsed an extlang */
1357
467
    if (nxt[0] == 0)
1358
137
        return(1);
1359
330
    if (nxt[0] != '-')
1360
68
        return(0);
1361
1362
262
    nxt++;
1363
262
    cur = nxt;
1364
    /* now we can have script or region or variant */
1365
262
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1366
36
        goto region_m49;
1367
1368
1.43k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1369
1.43k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1370
1.20k
           nxt++;
1371
226
    if (nxt - cur == 2)
1372
36
        goto region;
1373
190
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1374
35
        goto variant;
1375
155
    if (nxt - cur != 4)
1376
134
        return(0);
1377
    /* we parsed a script */
1378
323
script:
1379
323
    if (nxt[0] == 0)
1380
43
        return(1);
1381
280
    if (nxt[0] != '-')
1382
53
        return(0);
1383
1384
227
    nxt++;
1385
227
    cur = nxt;
1386
    /* now we can have region or variant */
1387
227
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1388
36
        goto region_m49;
1389
1390
1.23k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1391
1.23k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1392
1.04k
           nxt++;
1393
1394
191
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1395
38
        goto variant;
1396
153
    if (nxt - cur != 2)
1397
117
        return(0);
1398
    /* we parsed a region */
1399
317
region:
1400
317
    if (nxt[0] == 0)
1401
88
        return(1);
1402
229
    if (nxt[0] != '-')
1403
105
        return(0);
1404
1405
124
    nxt++;
1406
124
    cur = nxt;
1407
    /* now we can just have a variant */
1408
1.20k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1409
1.20k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1410
1.08k
           nxt++;
1411
1412
124
    if ((nxt - cur < 5) || (nxt - cur > 8))
1413
82
        return(0);
1414
1415
    /* we parsed a variant */
1416
225
variant:
1417
225
    if (nxt[0] == 0)
1418
115
        return(1);
1419
110
    if (nxt[0] != '-')
1420
69
        return(0);
1421
    /* extensions and private use subtags not checked */
1422
41
    return (1);
1423
1424
330
region_m49:
1425
330
    if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1426
330
        ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1427
43
        nxt += 3;
1428
43
        goto region;
1429
43
    }
1430
287
    return(0);
1431
330
}
1432
1433
/************************************************************************
1434
 *                  *
1435
 *    Parser stacks related functions and macros    *
1436
 *                  *
1437
 ************************************************************************/
1438
1439
static xmlChar *
1440
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar **str);
1441
1442
/**
1443
 * xmlParserNsCreate:
1444
 *
1445
 * Create a new namespace database.
1446
 *
1447
 * Returns the new obejct.
1448
 */
1449
xmlParserNsData *
1450
22.1k
xmlParserNsCreate(void) {
1451
22.1k
    xmlParserNsData *nsdb = xmlMalloc(sizeof(*nsdb));
1452
1453
22.1k
    if (nsdb == NULL)
1454
0
        return(NULL);
1455
22.1k
    memset(nsdb, 0, sizeof(*nsdb));
1456
22.1k
    nsdb->defaultNsIndex = INT_MAX;
1457
1458
22.1k
    return(nsdb);
1459
22.1k
}
1460
1461
/**
1462
 * xmlParserNsFree:
1463
 * @nsdb: namespace database
1464
 *
1465
 * Free a namespace database.
1466
 */
1467
void
1468
22.1k
xmlParserNsFree(xmlParserNsData *nsdb) {
1469
22.1k
    if (nsdb == NULL)
1470
0
        return;
1471
1472
22.1k
    xmlFree(nsdb->extra);
1473
22.1k
    xmlFree(nsdb->hash);
1474
22.1k
    xmlFree(nsdb);
1475
22.1k
}
1476
1477
/**
1478
 * xmlParserNsReset:
1479
 * @nsdb: namespace database
1480
 *
1481
 * Reset a namespace database.
1482
 */
1483
static void
1484
0
xmlParserNsReset(xmlParserNsData *nsdb) {
1485
0
    if (nsdb == NULL)
1486
0
        return;
1487
1488
0
    nsdb->hashElems = 0;
1489
0
    nsdb->elementId = 0;
1490
0
    nsdb->defaultNsIndex = INT_MAX;
1491
1492
0
    if (nsdb->hash)
1493
0
        memset(nsdb->hash, 0, nsdb->hashSize * sizeof(nsdb->hash[0]));
1494
0
}
1495
1496
/**
1497
 * xmlParserStartElement:
1498
 * @nsdb: namespace database
1499
 *
1500
 * Signal that a new element has started.
1501
 *
1502
 * Returns 0 on success, -1 if the element counter overflowed.
1503
 */
1504
static int
1505
103k
xmlParserNsStartElement(xmlParserNsData *nsdb) {
1506
103k
    if (nsdb->elementId == UINT_MAX)
1507
0
        return(-1);
1508
103k
    nsdb->elementId++;
1509
1510
103k
    return(0);
1511
103k
}
1512
1513
/**
1514
 * xmlParserNsLookup:
1515
 * @ctxt: parser context
1516
 * @prefix: namespace prefix
1517
 * @bucketPtr: optional bucket (return value)
1518
 *
1519
 * Lookup namespace with given prefix. If @bucketPtr is non-NULL, it will
1520
 * be set to the matching bucket, or the first empty bucket if no match
1521
 * was found.
1522
 *
1523
 * Returns the namespace index on success, INT_MAX if no namespace was
1524
 * found.
1525
 */
1526
static int
1527
xmlParserNsLookup(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1528
183k
                  xmlParserNsBucket **bucketPtr) {
1529
183k
    xmlParserNsBucket *bucket;
1530
183k
    unsigned index, hashValue;
1531
1532
183k
    if (prefix->name == NULL)
1533
98.2k
        return(ctxt->nsdb->defaultNsIndex);
1534
1535
85.4k
    if (ctxt->nsdb->hashSize == 0)
1536
8.12k
        return(INT_MAX);
1537
1538
77.3k
    hashValue = prefix->hashValue;
1539
77.3k
    index = hashValue & (ctxt->nsdb->hashSize - 1);
1540
77.3k
    bucket = &ctxt->nsdb->hash[index];
1541
1542
3.69M
    while (bucket->hashValue) {
1543
3.68M
        if ((bucket->hashValue == hashValue) &&
1544
3.68M
            (bucket->index != INT_MAX)) {
1545
60.4k
            if (ctxt->nsTab[bucket->index * 2] == prefix->name) {
1546
60.4k
                if (bucketPtr != NULL)
1547
49.4k
                    *bucketPtr = bucket;
1548
60.4k
                return(bucket->index);
1549
60.4k
            }
1550
60.4k
        }
1551
1552
3.62M
        index++;
1553
3.62M
        bucket++;
1554
3.62M
        if (index == ctxt->nsdb->hashSize) {
1555
13.2k
            index = 0;
1556
13.2k
            bucket = ctxt->nsdb->hash;
1557
13.2k
        }
1558
3.62M
    }
1559
1560
16.8k
    if (bucketPtr != NULL)
1561
11.9k
        *bucketPtr = bucket;
1562
16.8k
    return(INT_MAX);
1563
77.3k
}
1564
1565
/**
1566
 * xmlParserNsLookupUri:
1567
 * @ctxt: parser context
1568
 * @prefix: namespace prefix
1569
 *
1570
 * Lookup namespace URI with given prefix.
1571
 *
1572
 * Returns the namespace URI on success, NULL if no namespace was found.
1573
 */
1574
static const xmlChar *
1575
86.5k
xmlParserNsLookupUri(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix) {
1576
86.5k
    const xmlChar *ret;
1577
86.5k
    int nsIndex;
1578
1579
86.5k
    if (prefix->name == ctxt->str_xml)
1580
631
        return(ctxt->str_xml_ns);
1581
1582
    /*
1583
     * minNsIndex is used when building an entity tree. We must
1584
     * ignore namespaces declared outside the entity.
1585
     */
1586
85.8k
    nsIndex = xmlParserNsLookup(ctxt, prefix, NULL);
1587
85.8k
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1588
63.3k
        return(NULL);
1589
1590
22.4k
    ret = ctxt->nsTab[nsIndex * 2 + 1];
1591
22.4k
    if (ret[0] == 0)
1592
1.43k
        ret = NULL;
1593
22.4k
    return(ret);
1594
85.8k
}
1595
1596
/**
1597
 * xmlParserNsLookupSax:
1598
 * @ctxt: parser context
1599
 * @prefix: namespace prefix
1600
 *
1601
 * Lookup extra data for the given prefix. This returns data stored
1602
 * with xmlParserNsUdpateSax.
1603
 *
1604
 * Returns the data on success, NULL if no namespace was found.
1605
 */
1606
void *
1607
6.57k
xmlParserNsLookupSax(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
1608
6.57k
    xmlHashedString hprefix;
1609
6.57k
    int nsIndex;
1610
1611
6.57k
    if (prefix == ctxt->str_xml)
1612
4.03k
        return(NULL);
1613
1614
2.53k
    hprefix.name = prefix;
1615
2.53k
    if (prefix != NULL)
1616
724
        hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1617
1.81k
    else
1618
1.81k
        hprefix.hashValue = 0;
1619
2.53k
    nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1620
2.53k
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1621
0
        return(NULL);
1622
1623
2.53k
    return(ctxt->nsdb->extra[nsIndex].saxData);
1624
2.53k
}
1625
1626
/**
1627
 * xmlParserNsUpdateSax:
1628
 * @ctxt: parser context
1629
 * @prefix: namespace prefix
1630
 * @saxData: extra data for SAX handler
1631
 *
1632
 * Sets or updates extra data for the given prefix. This value will be
1633
 * returned by xmlParserNsLookupSax as long as the namespace with the
1634
 * given prefix is in scope.
1635
 *
1636
 * Returns the data on success, NULL if no namespace was found.
1637
 */
1638
int
1639
xmlParserNsUpdateSax(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
1640
22.5k
                     void *saxData) {
1641
22.5k
    xmlHashedString hprefix;
1642
22.5k
    int nsIndex;
1643
1644
22.5k
    if (prefix == ctxt->str_xml)
1645
0
        return(-1);
1646
1647
22.5k
    hprefix.name = prefix;
1648
22.5k
    if (prefix != NULL)
1649
7.30k
        hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1650
15.2k
    else
1651
15.2k
        hprefix.hashValue = 0;
1652
22.5k
    nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1653
22.5k
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1654
0
        return(-1);
1655
1656
22.5k
    ctxt->nsdb->extra[nsIndex].saxData = saxData;
1657
22.5k
    return(0);
1658
22.5k
}
1659
1660
/**
1661
 * xmlParserNsGrow:
1662
 * @ctxt: parser context
1663
 *
1664
 * Grows the namespace tables.
1665
 *
1666
 * Returns 0 on success, -1 if a memory allocation failed.
1667
 */
1668
static int
1669
3.02k
xmlParserNsGrow(xmlParserCtxtPtr ctxt) {
1670
3.02k
    const xmlChar **table;
1671
3.02k
    xmlParserNsExtra *extra;
1672
3.02k
    int newSize;
1673
1674
3.02k
    if (ctxt->nsMax > INT_MAX / 2)
1675
0
        goto error;
1676
3.02k
    newSize = ctxt->nsMax ? ctxt->nsMax * 2 : 16;
1677
1678
3.02k
    table = xmlRealloc(ctxt->nsTab, 2 * newSize * sizeof(table[0]));
1679
3.02k
    if (table == NULL)
1680
0
        goto error;
1681
3.02k
    ctxt->nsTab = table;
1682
1683
3.02k
    extra = xmlRealloc(ctxt->nsdb->extra, newSize * sizeof(extra[0]));
1684
3.02k
    if (extra == NULL)
1685
0
        goto error;
1686
3.02k
    ctxt->nsdb->extra = extra;
1687
1688
3.02k
    ctxt->nsMax = newSize;
1689
3.02k
    return(0);
1690
1691
0
error:
1692
0
    xmlErrMemory(ctxt);
1693
0
    return(-1);
1694
3.02k
}
1695
1696
/**
1697
 * xmlParserNsPush:
1698
 * @ctxt: parser context
1699
 * @prefix: prefix with hash value
1700
 * @uri: uri with hash value
1701
 * @saxData: extra data for SAX handler
1702
 * @defAttr: whether the namespace comes from a default attribute
1703
 *
1704
 * Push a new namespace on the table.
1705
 *
1706
 * Returns 1 if the namespace was pushed, 0 if the namespace was ignored,
1707
 * -1 if a memory allocation failed.
1708
 */
1709
static int
1710
xmlParserNsPush(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1711
51.7k
                const xmlHashedString *uri, void *saxData, int defAttr) {
1712
51.7k
    xmlParserNsBucket *bucket = NULL;
1713
51.7k
    xmlParserNsExtra *extra;
1714
51.7k
    const xmlChar **ns;
1715
51.7k
    unsigned hashValue, nsIndex, oldIndex;
1716
1717
51.7k
    if ((prefix != NULL) && (prefix->name == ctxt->str_xml))
1718
195
        return(0);
1719
1720
51.5k
    if ((ctxt->nsNr >= ctxt->nsMax) && (xmlParserNsGrow(ctxt) < 0)) {
1721
0
        xmlErrMemory(ctxt);
1722
0
        return(-1);
1723
0
    }
1724
1725
    /*
1726
     * Default namespace and 'xml' namespace
1727
     */
1728
51.5k
    if ((prefix == NULL) || (prefix->name == NULL)) {
1729
20.0k
        oldIndex = ctxt->nsdb->defaultNsIndex;
1730
1731
20.0k
        if (oldIndex != INT_MAX) {
1732
7.67k
            extra = &ctxt->nsdb->extra[oldIndex];
1733
1734
7.67k
            if (extra->elementId == ctxt->nsdb->elementId) {
1735
433
                if (defAttr == 0)
1736
264
                    xmlErrAttributeDup(ctxt, NULL, BAD_CAST "xmlns");
1737
433
                return(0);
1738
433
            }
1739
1740
7.24k
            if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1741
7.24k
                (uri->name == ctxt->nsTab[oldIndex * 2 + 1]))
1742
228
                return(0);
1743
7.24k
        }
1744
1745
19.3k
        ctxt->nsdb->defaultNsIndex = ctxt->nsNr;
1746
19.3k
        goto populate_entry;
1747
20.0k
    }
1748
1749
    /*
1750
     * Hash table lookup
1751
     */
1752
31.5k
    oldIndex = xmlParserNsLookup(ctxt, prefix, &bucket);
1753
31.5k
    if (oldIndex != INT_MAX) {
1754
18.5k
        extra = &ctxt->nsdb->extra[oldIndex];
1755
1756
        /*
1757
         * Check for duplicate definitions on the same element.
1758
         */
1759
18.5k
        if (extra->elementId == ctxt->nsdb->elementId) {
1760
127
            if (defAttr == 0)
1761
117
                xmlErrAttributeDup(ctxt, BAD_CAST "xmlns", prefix->name);
1762
127
            return(0);
1763
127
        }
1764
1765
18.4k
        if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1766
18.4k
            (uri->name == ctxt->nsTab[bucket->index * 2 + 1]))
1767
211
            return(0);
1768
1769
18.2k
        bucket->index = ctxt->nsNr;
1770
18.2k
        goto populate_entry;
1771
18.4k
    }
1772
1773
    /*
1774
     * Insert new bucket
1775
     */
1776
1777
12.9k
    hashValue = prefix->hashValue;
1778
1779
    /*
1780
     * Grow hash table, 50% fill factor
1781
     */
1782
12.9k
    if (ctxt->nsdb->hashElems + 1 > ctxt->nsdb->hashSize / 2) {
1783
1.42k
        xmlParserNsBucket *newHash;
1784
1.42k
        unsigned newSize, i, index;
1785
1786
1.42k
        if (ctxt->nsdb->hashSize > UINT_MAX / 2) {
1787
0
            xmlErrMemory(ctxt);
1788
0
            return(-1);
1789
0
        }
1790
1.42k
        newSize = ctxt->nsdb->hashSize ? ctxt->nsdb->hashSize * 2 : 16;
1791
1.42k
        newHash = xmlMalloc(newSize * sizeof(newHash[0]));
1792
1.42k
        if (newHash == NULL) {
1793
0
            xmlErrMemory(ctxt);
1794
0
            return(-1);
1795
0
        }
1796
1.42k
        memset(newHash, 0, newSize * sizeof(newHash[0]));
1797
1798
37.0k
        for (i = 0; i < ctxt->nsdb->hashSize; i++) {
1799
35.6k
            unsigned hv = ctxt->nsdb->hash[i].hashValue;
1800
35.6k
            unsigned newIndex;
1801
1802
35.6k
            if (hv == 0)
1803
17.8k
                continue;
1804
17.8k
            newIndex = hv & (newSize - 1);
1805
1806
1.92M
            while (newHash[newIndex].hashValue != 0) {
1807
1.90M
                newIndex++;
1808
1.90M
                if (newIndex == newSize)
1809
3.71k
                    newIndex = 0;
1810
1.90M
            }
1811
1812
17.8k
            newHash[newIndex] = ctxt->nsdb->hash[i];
1813
17.8k
        }
1814
1815
1.42k
        xmlFree(ctxt->nsdb->hash);
1816
1.42k
        ctxt->nsdb->hash = newHash;
1817
1.42k
        ctxt->nsdb->hashSize = newSize;
1818
1819
        /*
1820
         * Relookup
1821
         */
1822
1.42k
        index = hashValue & (newSize - 1);
1823
1824
10.9k
        while (newHash[index].hashValue != 0) {
1825
9.49k
            index++;
1826
9.49k
            if (index == newSize)
1827
156
                index = 0;
1828
9.49k
        }
1829
1830
1.42k
        bucket = &newHash[index];
1831
1.42k
    }
1832
1833
12.9k
    bucket->hashValue = hashValue;
1834
12.9k
    bucket->index = ctxt->nsNr;
1835
12.9k
    ctxt->nsdb->hashElems++;
1836
12.9k
    oldIndex = INT_MAX;
1837
1838
50.5k
populate_entry:
1839
50.5k
    nsIndex = ctxt->nsNr;
1840
1841
50.5k
    ns = &ctxt->nsTab[nsIndex * 2];
1842
50.5k
    ns[0] = prefix ? prefix->name : NULL;
1843
50.5k
    ns[1] = uri->name;
1844
1845
50.5k
    extra = &ctxt->nsdb->extra[nsIndex];
1846
50.5k
    extra->saxData = saxData;
1847
50.5k
    extra->prefixHashValue = prefix ? prefix->hashValue : 0;
1848
50.5k
    extra->uriHashValue = uri->hashValue;
1849
50.5k
    extra->elementId = ctxt->nsdb->elementId;
1850
50.5k
    extra->oldIndex = oldIndex;
1851
1852
50.5k
    ctxt->nsNr++;
1853
1854
50.5k
    return(1);
1855
12.9k
}
1856
1857
/**
1858
 * xmlParserNsPop:
1859
 * @ctxt: an XML parser context
1860
 * @nr:  the number to pop
1861
 *
1862
 * Pops the top @nr namespaces and restores the hash table.
1863
 *
1864
 * Returns the number of namespaces popped.
1865
 */
1866
static int
1867
xmlParserNsPop(xmlParserCtxtPtr ctxt, int nr)
1868
32.5k
{
1869
32.5k
    int i;
1870
1871
    /* assert(nr <= ctxt->nsNr); */
1872
1873
82.7k
    for (i = ctxt->nsNr - 1; i >= ctxt->nsNr - nr; i--) {
1874
50.1k
        const xmlChar *prefix = ctxt->nsTab[i * 2];
1875
50.1k
        xmlParserNsExtra *extra = &ctxt->nsdb->extra[i];
1876
1877
50.1k
        if (prefix == NULL) {
1878
19.2k
            ctxt->nsdb->defaultNsIndex = extra->oldIndex;
1879
30.9k
        } else {
1880
30.9k
            xmlHashedString hprefix;
1881
30.9k
            xmlParserNsBucket *bucket = NULL;
1882
1883
30.9k
            hprefix.name = prefix;
1884
30.9k
            hprefix.hashValue = extra->prefixHashValue;
1885
30.9k
            xmlParserNsLookup(ctxt, &hprefix, &bucket);
1886
            /* assert(bucket && bucket->hashValue); */
1887
30.9k
            bucket->index = extra->oldIndex;
1888
30.9k
        }
1889
50.1k
    }
1890
1891
32.5k
    ctxt->nsNr -= nr;
1892
32.5k
    return(nr);
1893
32.5k
}
1894
1895
static int
1896
2.43k
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1897
2.43k
    const xmlChar **atts;
1898
2.43k
    unsigned *attallocs;
1899
2.43k
    int maxatts;
1900
1901
2.43k
    if (nr + 5 > ctxt->maxatts) {
1902
2.43k
  maxatts = ctxt->maxatts == 0 ? 55 : (nr + 5) * 2;
1903
2.43k
  atts = (const xmlChar **) xmlMalloc(
1904
2.43k
             maxatts * sizeof(const xmlChar *));
1905
2.43k
  if (atts == NULL) goto mem_error;
1906
2.43k
  attallocs = xmlRealloc(ctxt->attallocs,
1907
2.43k
                               (maxatts / 5) * sizeof(attallocs[0]));
1908
2.43k
  if (attallocs == NULL) {
1909
0
            xmlFree(atts);
1910
0
            goto mem_error;
1911
0
        }
1912
2.43k
        if (ctxt->maxatts > 0)
1913
145
            memcpy(atts, ctxt->atts, ctxt->maxatts * sizeof(const xmlChar *));
1914
2.43k
        xmlFree(ctxt->atts);
1915
2.43k
  ctxt->atts = atts;
1916
2.43k
  ctxt->attallocs = attallocs;
1917
2.43k
  ctxt->maxatts = maxatts;
1918
2.43k
    }
1919
2.43k
    return(ctxt->maxatts);
1920
0
mem_error:
1921
0
    xmlErrMemory(ctxt);
1922
0
    return(-1);
1923
2.43k
}
1924
1925
/**
1926
 * inputPush:
1927
 * @ctxt:  an XML parser context
1928
 * @value:  the parser input
1929
 *
1930
 * Pushes a new parser input on top of the input stack
1931
 *
1932
 * Returns -1 in case of error, the index in the stack otherwise
1933
 */
1934
int
1935
inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1936
80.3k
{
1937
80.3k
    if ((ctxt == NULL) || (value == NULL))
1938
0
        return(-1);
1939
80.3k
    if (ctxt->inputNr >= ctxt->inputMax) {
1940
0
        size_t newSize = ctxt->inputMax * 2;
1941
0
        xmlParserInputPtr *tmp;
1942
1943
0
        tmp = (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1944
0
                                               newSize * sizeof(*tmp));
1945
0
        if (tmp == NULL) {
1946
0
            xmlErrMemory(ctxt);
1947
0
            return (-1);
1948
0
        }
1949
0
        ctxt->inputTab = tmp;
1950
0
        ctxt->inputMax = newSize;
1951
0
    }
1952
80.3k
    ctxt->inputTab[ctxt->inputNr] = value;
1953
80.3k
    ctxt->input = value;
1954
80.3k
    return (ctxt->inputNr++);
1955
80.3k
}
1956
/**
1957
 * inputPop:
1958
 * @ctxt: an XML parser context
1959
 *
1960
 * Pops the top parser input from the input stack
1961
 *
1962
 * Returns the input just removed
1963
 */
1964
xmlParserInputPtr
1965
inputPop(xmlParserCtxtPtr ctxt)
1966
124k
{
1967
124k
    xmlParserInputPtr ret;
1968
1969
124k
    if (ctxt == NULL)
1970
0
        return(NULL);
1971
124k
    if (ctxt->inputNr <= 0)
1972
44.3k
        return (NULL);
1973
80.3k
    ctxt->inputNr--;
1974
80.3k
    if (ctxt->inputNr > 0)
1975
58.2k
        ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1976
22.1k
    else
1977
22.1k
        ctxt->input = NULL;
1978
80.3k
    ret = ctxt->inputTab[ctxt->inputNr];
1979
80.3k
    ctxt->inputTab[ctxt->inputNr] = NULL;
1980
80.3k
    return (ret);
1981
124k
}
1982
/**
1983
 * nodePush:
1984
 * @ctxt:  an XML parser context
1985
 * @value:  the element node
1986
 *
1987
 * DEPRECATED: Internal function, do not use.
1988
 *
1989
 * Pushes a new element node on top of the node stack
1990
 *
1991
 * Returns -1 in case of error, the index in the stack otherwise
1992
 */
1993
int
1994
nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1995
95.0k
{
1996
95.0k
    int maxDepth;
1997
1998
95.0k
    if (ctxt == NULL)
1999
0
        return(0);
2000
2001
95.0k
    maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
2002
95.0k
    if (ctxt->nodeNr > maxDepth) {
2003
0
        xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
2004
0
                "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
2005
0
                ctxt->nodeNr);
2006
0
        xmlHaltParser(ctxt);
2007
0
        return(-1);
2008
0
    }
2009
95.0k
    if (ctxt->nodeNr >= ctxt->nodeMax) {
2010
336
        xmlNodePtr *tmp;
2011
2012
336
  tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
2013
336
                                      ctxt->nodeMax * 2 *
2014
336
                                      sizeof(ctxt->nodeTab[0]));
2015
336
        if (tmp == NULL) {
2016
0
            xmlErrMemory(ctxt);
2017
0
            return (-1);
2018
0
        }
2019
336
        ctxt->nodeTab = tmp;
2020
336
  ctxt->nodeMax *= 2;
2021
336
    }
2022
95.0k
    ctxt->nodeTab[ctxt->nodeNr] = value;
2023
95.0k
    ctxt->node = value;
2024
95.0k
    return (ctxt->nodeNr++);
2025
95.0k
}
2026
2027
/**
2028
 * nodePop:
2029
 * @ctxt: an XML parser context
2030
 *
2031
 * DEPRECATED: Internal function, do not use.
2032
 *
2033
 * Pops the top element node from the node stack
2034
 *
2035
 * Returns the node just removed
2036
 */
2037
xmlNodePtr
2038
nodePop(xmlParserCtxtPtr ctxt)
2039
117k
{
2040
117k
    xmlNodePtr ret;
2041
2042
117k
    if (ctxt == NULL) return(NULL);
2043
117k
    if (ctxt->nodeNr <= 0)
2044
25.3k
        return (NULL);
2045
91.8k
    ctxt->nodeNr--;
2046
91.8k
    if (ctxt->nodeNr > 0)
2047
87.4k
        ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
2048
4.37k
    else
2049
4.37k
        ctxt->node = NULL;
2050
91.8k
    ret = ctxt->nodeTab[ctxt->nodeNr];
2051
91.8k
    ctxt->nodeTab[ctxt->nodeNr] = NULL;
2052
91.8k
    return (ret);
2053
117k
}
2054
2055
/**
2056
 * nameNsPush:
2057
 * @ctxt:  an XML parser context
2058
 * @value:  the element name
2059
 * @prefix:  the element prefix
2060
 * @URI:  the element namespace name
2061
 * @line:  the current line number for error messages
2062
 * @nsNr:  the number of namespaces pushed on the namespace table
2063
 *
2064
 * Pushes a new element name/prefix/URL on top of the name stack
2065
 *
2066
 * Returns -1 in case of error, the index in the stack otherwise
2067
 */
2068
static int
2069
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
2070
           const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
2071
140k
{
2072
140k
    xmlStartTag *tag;
2073
2074
140k
    if (ctxt->nameNr >= ctxt->nameMax) {
2075
611
        const xmlChar * *tmp;
2076
611
        xmlStartTag *tmp2;
2077
611
        ctxt->nameMax *= 2;
2078
611
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
2079
611
                                    ctxt->nameMax *
2080
611
                                    sizeof(ctxt->nameTab[0]));
2081
611
        if (tmp == NULL) {
2082
0
      ctxt->nameMax /= 2;
2083
0
      goto mem_error;
2084
0
        }
2085
611
  ctxt->nameTab = tmp;
2086
611
        tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
2087
611
                                    ctxt->nameMax *
2088
611
                                    sizeof(ctxt->pushTab[0]));
2089
611
        if (tmp2 == NULL) {
2090
0
      ctxt->nameMax /= 2;
2091
0
      goto mem_error;
2092
0
        }
2093
611
  ctxt->pushTab = tmp2;
2094
139k
    } else if (ctxt->pushTab == NULL) {
2095
13.5k
        ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
2096
13.5k
                                            sizeof(ctxt->pushTab[0]));
2097
13.5k
        if (ctxt->pushTab == NULL)
2098
0
            goto mem_error;
2099
13.5k
    }
2100
140k
    ctxt->nameTab[ctxt->nameNr] = value;
2101
140k
    ctxt->name = value;
2102
140k
    tag = &ctxt->pushTab[ctxt->nameNr];
2103
140k
    tag->prefix = prefix;
2104
140k
    tag->URI = URI;
2105
140k
    tag->line = line;
2106
140k
    tag->nsNr = nsNr;
2107
140k
    return (ctxt->nameNr++);
2108
0
mem_error:
2109
0
    xmlErrMemory(ctxt);
2110
0
    return (-1);
2111
140k
}
2112
#ifdef LIBXML_PUSH_ENABLED
2113
/**
2114
 * nameNsPop:
2115
 * @ctxt: an XML parser context
2116
 *
2117
 * Pops the top element/prefix/URI name from the name stack
2118
 *
2119
 * Returns the name just removed
2120
 */
2121
static const xmlChar *
2122
nameNsPop(xmlParserCtxtPtr ctxt)
2123
0
{
2124
0
    const xmlChar *ret;
2125
2126
0
    if (ctxt->nameNr <= 0)
2127
0
        return (NULL);
2128
0
    ctxt->nameNr--;
2129
0
    if (ctxt->nameNr > 0)
2130
0
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2131
0
    else
2132
0
        ctxt->name = NULL;
2133
0
    ret = ctxt->nameTab[ctxt->nameNr];
2134
0
    ctxt->nameTab[ctxt->nameNr] = NULL;
2135
0
    return (ret);
2136
0
}
2137
#endif /* LIBXML_PUSH_ENABLED */
2138
2139
/**
2140
 * namePush:
2141
 * @ctxt:  an XML parser context
2142
 * @value:  the element name
2143
 *
2144
 * DEPRECATED: Internal function, do not use.
2145
 *
2146
 * Pushes a new element name on top of the name stack
2147
 *
2148
 * Returns -1 in case of error, the index in the stack otherwise
2149
 */
2150
int
2151
namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
2152
0
{
2153
0
    if (ctxt == NULL) return (-1);
2154
2155
0
    if (ctxt->nameNr >= ctxt->nameMax) {
2156
0
        const xmlChar * *tmp;
2157
0
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
2158
0
                                    ctxt->nameMax * 2 *
2159
0
                                    sizeof(ctxt->nameTab[0]));
2160
0
        if (tmp == NULL) {
2161
0
      goto mem_error;
2162
0
        }
2163
0
  ctxt->nameTab = tmp;
2164
0
        ctxt->nameMax *= 2;
2165
0
    }
2166
0
    ctxt->nameTab[ctxt->nameNr] = value;
2167
0
    ctxt->name = value;
2168
0
    return (ctxt->nameNr++);
2169
0
mem_error:
2170
0
    xmlErrMemory(ctxt);
2171
0
    return (-1);
2172
0
}
2173
2174
/**
2175
 * namePop:
2176
 * @ctxt: an XML parser context
2177
 *
2178
 * DEPRECATED: Internal function, do not use.
2179
 *
2180
 * Pops the top element name from the name stack
2181
 *
2182
 * Returns the name just removed
2183
 */
2184
const xmlChar *
2185
namePop(xmlParserCtxtPtr ctxt)
2186
135k
{
2187
135k
    const xmlChar *ret;
2188
2189
135k
    if ((ctxt == NULL) || (ctxt->nameNr <= 0))
2190
0
        return (NULL);
2191
135k
    ctxt->nameNr--;
2192
135k
    if (ctxt->nameNr > 0)
2193
126k
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2194
8.80k
    else
2195
8.80k
        ctxt->name = NULL;
2196
135k
    ret = ctxt->nameTab[ctxt->nameNr];
2197
135k
    ctxt->nameTab[ctxt->nameNr] = NULL;
2198
135k
    return (ret);
2199
135k
}
2200
2201
163k
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
2202
163k
    if (ctxt->spaceNr >= ctxt->spaceMax) {
2203
654
        int *tmp;
2204
2205
654
  ctxt->spaceMax *= 2;
2206
654
        tmp = (int *) xmlRealloc(ctxt->spaceTab,
2207
654
                           ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
2208
654
        if (tmp == NULL) {
2209
0
      xmlErrMemory(ctxt);
2210
0
      ctxt->spaceMax /=2;
2211
0
      return(-1);
2212
0
  }
2213
654
  ctxt->spaceTab = tmp;
2214
654
    }
2215
163k
    ctxt->spaceTab[ctxt->spaceNr] = val;
2216
163k
    ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
2217
163k
    return(ctxt->spaceNr++);
2218
163k
}
2219
2220
158k
static int spacePop(xmlParserCtxtPtr ctxt) {
2221
158k
    int ret;
2222
158k
    if (ctxt->spaceNr <= 0) return(0);
2223
158k
    ctxt->spaceNr--;
2224
158k
    if (ctxt->spaceNr > 0)
2225
158k
  ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
2226
0
    else
2227
0
        ctxt->space = &ctxt->spaceTab[0];
2228
158k
    ret = ctxt->spaceTab[ctxt->spaceNr];
2229
158k
    ctxt->spaceTab[ctxt->spaceNr] = -1;
2230
158k
    return(ret);
2231
158k
}
2232
2233
/*
2234
 * Macros for accessing the content. Those should be used only by the parser,
2235
 * and not exported.
2236
 *
2237
 * Dirty macros, i.e. one often need to make assumption on the context to
2238
 * use them
2239
 *
2240
 *   CUR_PTR return the current pointer to the xmlChar to be parsed.
2241
 *           To be used with extreme caution since operations consuming
2242
 *           characters may move the input buffer to a different location !
2243
 *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
2244
 *           This should be used internally by the parser
2245
 *           only to compare to ASCII values otherwise it would break when
2246
 *           running with UTF-8 encoding.
2247
 *   RAW     same as CUR but in the input buffer, bypass any token
2248
 *           extraction that may have been done
2249
 *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
2250
 *           to compare on ASCII based substring.
2251
 *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2252
 *           strings without newlines within the parser.
2253
 *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2254
 *           defined char within the parser.
2255
 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2256
 *
2257
 *   NEXT    Skip to the next character, this does the proper decoding
2258
 *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2259
 *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2260
 *   CUR_CHAR(l) returns the current unicode character (int), set l
2261
 *           to the number of xmlChars used for the encoding [0-5].
2262
 *   CUR_SCHAR  same but operate on a string instead of the context
2263
 *   COPY_BUF  copy the current unicode char to the target buffer, increment
2264
 *            the index
2265
 *   GROW, SHRINK  handling of input buffers
2266
 */
2267
2268
2.50M
#define RAW (*ctxt->input->cur)
2269
3.06M
#define CUR (*ctxt->input->cur)
2270
1.07M
#define NXT(val) ctxt->input->cur[(val)]
2271
3.36M
#define CUR_PTR ctxt->input->cur
2272
381k
#define BASE_PTR ctxt->input->base
2273
2274
#define CMP4( s, c1, c2, c3, c4 ) \
2275
1.53M
  ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2276
793k
    ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2277
#define CMP5( s, c1, c2, c3, c4, c5 ) \
2278
1.37M
  ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2279
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2280
1.10M
  ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2281
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2282
875k
  ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2283
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2284
702k
  ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2285
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2286
314k
  ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2287
314k
    ((unsigned char *) s)[ 8 ] == c9 )
2288
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2289
10.3k
  ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2290
10.3k
    ((unsigned char *) s)[ 9 ] == c10 )
2291
2292
327k
#define SKIP(val) do {             \
2293
327k
    ctxt->input->cur += (val),ctxt->input->col+=(val);      \
2294
327k
    if (*ctxt->input->cur == 0)           \
2295
327k
        xmlParserGrow(ctxt);           \
2296
327k
  } while (0)
2297
2298
0
#define SKIPL(val) do {             \
2299
0
    int skipl;                \
2300
0
    for(skipl=0; skipl<val; skipl++) {         \
2301
0
  if (*(ctxt->input->cur) == '\n') {       \
2302
0
  ctxt->input->line++; ctxt->input->col = 1;      \
2303
0
  } else ctxt->input->col++;         \
2304
0
  ctxt->input->cur++;           \
2305
0
    }                 \
2306
0
    if (*ctxt->input->cur == 0)           \
2307
0
        xmlParserGrow(ctxt);           \
2308
0
  } while (0)
2309
2310
#define SHRINK \
2311
442k
    if ((!PARSER_PROGRESSIVE(ctxt)) && \
2312
442k
        (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2313
442k
  (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2314
442k
  xmlParserShrink(ctxt);
2315
2316
#define GROW \
2317
2.45M
    if ((!PARSER_PROGRESSIVE(ctxt)) && \
2318
2.45M
        (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2319
2.06M
  xmlParserGrow(ctxt);
2320
2321
460k
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2322
2323
562k
#define SKIP_BLANKS_PE xmlSkipBlankCharsPE(ctxt)
2324
2325
765k
#define NEXT xmlNextChar(ctxt)
2326
2327
210k
#define NEXT1 {               \
2328
210k
  ctxt->input->col++;           \
2329
210k
  ctxt->input->cur++;           \
2330
210k
  if (*ctxt->input->cur == 0)         \
2331
210k
      xmlParserGrow(ctxt);           \
2332
210k
    }
2333
2334
2.11M
#define NEXTL(l) do {             \
2335
2.11M
    if (*(ctxt->input->cur) == '\n') {         \
2336
5.17k
  ctxt->input->line++; ctxt->input->col = 1;      \
2337
2.11M
    } else ctxt->input->col++;           \
2338
2.11M
    ctxt->input->cur += l;        \
2339
2.11M
  } while (0)
2340
2341
1.09M
#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2342
880k
#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2343
2344
#define COPY_BUF(b, i, v)           \
2345
1.07M
    if (v < 0x80) b[i++] = v;           \
2346
1.07M
    else i += xmlCopyCharMultiByte(&b[i],v)
2347
2348
/**
2349
 * xmlSkipBlankChars:
2350
 * @ctxt:  the XML parser context
2351
 *
2352
 * DEPRECATED: Internal function, do not use.
2353
 *
2354
 * Skip whitespace in the input stream.
2355
 *
2356
 * Returns the number of space chars skipped
2357
 */
2358
int
2359
586k
xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2360
586k
    const xmlChar *cur;
2361
586k
    int res = 0;
2362
2363
    /*
2364
     * It's Okay to use CUR/NEXT here since all the blanks are on
2365
     * the ASCII range.
2366
     */
2367
586k
    cur = ctxt->input->cur;
2368
586k
    while (IS_BLANK_CH(*cur)) {
2369
116k
        if (*cur == '\n') {
2370
5.90k
            ctxt->input->line++; ctxt->input->col = 1;
2371
110k
        } else {
2372
110k
            ctxt->input->col++;
2373
110k
        }
2374
116k
        cur++;
2375
116k
        if (res < INT_MAX)
2376
116k
            res++;
2377
116k
        if (*cur == 0) {
2378
1.64k
            ctxt->input->cur = cur;
2379
1.64k
            xmlParserGrow(ctxt);
2380
1.64k
            cur = ctxt->input->cur;
2381
1.64k
        }
2382
116k
    }
2383
586k
    ctxt->input->cur = cur;
2384
2385
586k
    return(res);
2386
586k
}
2387
2388
static void
2389
57.4k
xmlPopPE(xmlParserCtxtPtr ctxt) {
2390
57.4k
    unsigned long consumed;
2391
57.4k
    xmlEntityPtr ent;
2392
2393
57.4k
    ent = ctxt->input->entity;
2394
2395
57.4k
    ent->flags &= ~XML_ENT_EXPANDING;
2396
2397
57.4k
    if ((ent->flags & XML_ENT_CHECKED) == 0) {
2398
2.31k
        int result;
2399
2400
        /*
2401
         * Read the rest of the stream in case of errors. We want
2402
         * to account for the whole entity size.
2403
         */
2404
2.32k
        do {
2405
2.32k
            ctxt->input->cur = ctxt->input->end;
2406
2.32k
            xmlParserShrink(ctxt);
2407
2.32k
            result = xmlParserGrow(ctxt);
2408
2.32k
        } while (result > 0);
2409
2410
2.31k
        consumed = ctxt->input->consumed;
2411
2.31k
        xmlSaturatedAddSizeT(&consumed,
2412
2.31k
                             ctxt->input->end - ctxt->input->base);
2413
2414
2.31k
        xmlSaturatedAdd(&ent->expandedSize, consumed);
2415
2416
        /*
2417
         * Add to sizeentities when parsing an external entity
2418
         * for the first time.
2419
         */
2420
2.31k
        if (ent->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
2421
0
            xmlSaturatedAdd(&ctxt->sizeentities, consumed);
2422
0
        }
2423
2424
2.31k
        ent->flags |= XML_ENT_CHECKED;
2425
2.31k
    }
2426
2427
57.4k
    xmlPopInput(ctxt);
2428
2429
57.4k
    xmlParserEntityCheck(ctxt, ent->expandedSize);
2430
57.4k
}
2431
2432
/**
2433
 * xmlSkipBlankCharsPE:
2434
 * @ctxt:  the XML parser context
2435
 *
2436
 * Skip whitespace in the input stream, also handling parameter
2437
 * entities.
2438
 *
2439
 * Returns the number of space chars skipped
2440
 */
2441
static int
2442
562k
xmlSkipBlankCharsPE(xmlParserCtxtPtr ctxt) {
2443
562k
    int res = 0;
2444
562k
    int inParam;
2445
562k
    int expandParam;
2446
2447
562k
    inParam = PARSER_IN_PE(ctxt);
2448
562k
    expandParam = PARSER_EXTERNAL(ctxt);
2449
2450
562k
    if (!inParam && !expandParam)
2451
126k
        return(xmlSkipBlankChars(ctxt));
2452
2453
635k
    while (PARSER_STOPPED(ctxt) == 0) {
2454
635k
        if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2455
142k
            NEXT;
2456
493k
        } else if (CUR == '%') {
2457
56.3k
            if ((expandParam == 0) ||
2458
56.3k
                (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2459
56.3k
                break;
2460
2461
            /*
2462
             * Expand parameter entity. We continue to consume
2463
             * whitespace at the start of the entity and possible
2464
             * even consume the whole entity and pop it. We might
2465
             * even pop multiple PEs in this loop.
2466
             */
2467
0
            xmlParsePEReference(ctxt);
2468
2469
0
            inParam = PARSER_IN_PE(ctxt);
2470
0
            expandParam = PARSER_EXTERNAL(ctxt);
2471
436k
        } else if (CUR == 0) {
2472
57.4k
            if (inParam == 0)
2473
3
                break;
2474
2475
57.4k
            xmlPopPE(ctxt);
2476
2477
57.4k
            inParam = PARSER_IN_PE(ctxt);
2478
57.4k
            expandParam = PARSER_EXTERNAL(ctxt);
2479
379k
        } else {
2480
379k
            break;
2481
379k
        }
2482
2483
        /*
2484
         * Also increase the counter when entering or exiting a PERef.
2485
         * The spec says: "When a parameter-entity reference is recognized
2486
         * in the DTD and included, its replacement text MUST be enlarged
2487
         * by the attachment of one leading and one following space (#x20)
2488
         * character."
2489
         */
2490
199k
        if (res < INT_MAX)
2491
199k
            res++;
2492
199k
    }
2493
2494
435k
    return(res);
2495
562k
}
2496
2497
/************************************************************************
2498
 *                  *
2499
 *    Commodity functions to handle entities      *
2500
 *                  *
2501
 ************************************************************************/
2502
2503
/**
2504
 * xmlPopInput:
2505
 * @ctxt:  an XML parser context
2506
 *
2507
 * xmlPopInput: the current input pointed by ctxt->input came to an end
2508
 *          pop it and return the next char.
2509
 *
2510
 * Returns the current xmlChar in the parser context
2511
 */
2512
xmlChar
2513
57.4k
xmlPopInput(xmlParserCtxtPtr ctxt) {
2514
57.4k
    xmlParserInputPtr input;
2515
2516
57.4k
    if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2517
57.4k
    input = inputPop(ctxt);
2518
57.4k
    xmlFreeInputStream(input);
2519
57.4k
    if (*ctxt->input->cur == 0)
2520
323
        xmlParserGrow(ctxt);
2521
57.4k
    return(CUR);
2522
57.4k
}
2523
2524
/**
2525
 * xmlPushInput:
2526
 * @ctxt:  an XML parser context
2527
 * @input:  an XML parser input fragment (entity, XML fragment ...).
2528
 *
2529
 * Push an input stream onto the stack.
2530
 *
2531
 * This makes the parser use an input returned from advanced functions
2532
 * like xmlNewInputURL or xmlNewInputMemory.
2533
 *
2534
 * Returns -1 in case of error or the index in the input stack
2535
 */
2536
int
2537
58.2k
xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2538
58.2k
    int maxDepth;
2539
58.2k
    int ret;
2540
2541
58.2k
    if ((ctxt == NULL) || (input == NULL))
2542
0
        return(-1);
2543
2544
58.2k
    maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
2545
58.2k
    if (ctxt->inputNr > maxDepth) {
2546
0
        xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
2547
0
                       "Maximum entity nesting depth exceeded");
2548
0
        xmlHaltParser(ctxt);
2549
0
  return(-1);
2550
0
    }
2551
58.2k
    ret = inputPush(ctxt, input);
2552
58.2k
    GROW;
2553
58.2k
    return(ret);
2554
58.2k
}
2555
2556
/**
2557
 * xmlParseCharRef:
2558
 * @ctxt:  an XML parser context
2559
 *
2560
 * DEPRECATED: Internal function, don't use.
2561
 *
2562
 * Parse a numeric character reference. Always consumes '&'.
2563
 *
2564
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2565
 *                  '&#x' [0-9a-fA-F]+ ';'
2566
 *
2567
 * [ WFC: Legal Character ]
2568
 * Characters referred to using character references must match the
2569
 * production for Char.
2570
 *
2571
 * Returns the value parsed (as an int), 0 in case of error
2572
 */
2573
int
2574
13.0k
xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2575
13.0k
    int val = 0;
2576
13.0k
    int count = 0;
2577
2578
    /*
2579
     * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2580
     */
2581
13.0k
    if ((RAW == '&') && (NXT(1) == '#') &&
2582
13.0k
        (NXT(2) == 'x')) {
2583
6.74k
  SKIP(3);
2584
6.74k
  GROW;
2585
27.3k
  while ((RAW != ';') && (PARSER_STOPPED(ctxt) == 0)) {
2586
21.4k
      if (count++ > 20) {
2587
480
    count = 0;
2588
480
    GROW;
2589
480
      }
2590
21.4k
      if ((RAW >= '0') && (RAW <= '9'))
2591
8.65k
          val = val * 16 + (CUR - '0');
2592
12.8k
      else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2593
3.83k
          val = val * 16 + (CUR - 'a') + 10;
2594
8.97k
      else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2595
8.12k
          val = val * 16 + (CUR - 'A') + 10;
2596
845
      else {
2597
845
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2598
845
    val = 0;
2599
845
    break;
2600
845
      }
2601
20.6k
      if (val > 0x110000)
2602
5.74k
          val = 0x110000;
2603
2604
20.6k
      NEXT;
2605
20.6k
      count++;
2606
20.6k
  }
2607
6.74k
  if (RAW == ';') {
2608
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2609
5.89k
      ctxt->input->col++;
2610
5.89k
      ctxt->input->cur++;
2611
5.89k
  }
2612
6.74k
    } else if  ((RAW == '&') && (NXT(1) == '#')) {
2613
6.31k
  SKIP(2);
2614
6.31k
  GROW;
2615
23.2k
  while (RAW != ';') { /* loop blocked by count */
2616
18.2k
      if (count++ > 20) {
2617
345
    count = 0;
2618
345
    GROW;
2619
345
      }
2620
18.2k
      if ((RAW >= '0') && (RAW <= '9'))
2621
16.9k
          val = val * 10 + (CUR - '0');
2622
1.33k
      else {
2623
1.33k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2624
1.33k
    val = 0;
2625
1.33k
    break;
2626
1.33k
      }
2627
16.9k
      if (val > 0x110000)
2628
3.19k
          val = 0x110000;
2629
2630
16.9k
      NEXT;
2631
16.9k
      count++;
2632
16.9k
  }
2633
6.31k
  if (RAW == ';') {
2634
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2635
4.97k
      ctxt->input->col++;
2636
4.97k
      ctxt->input->cur++;
2637
4.97k
  }
2638
6.31k
    } else {
2639
0
        if (RAW == '&')
2640
0
            SKIP(1);
2641
0
        xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2642
0
    }
2643
2644
    /*
2645
     * [ WFC: Legal Character ]
2646
     * Characters referred to using character references must match the
2647
     * production for Char.
2648
     */
2649
13.0k
    if (val >= 0x110000) {
2650
269
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2651
269
                "xmlParseCharRef: character reference out of bounds\n",
2652
269
          val);
2653
12.7k
    } else if (IS_CHAR(val)) {
2654
10.0k
        return(val);
2655
10.0k
    } else {
2656
2.72k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2657
2.72k
                          "xmlParseCharRef: invalid xmlChar value %d\n",
2658
2.72k
                    val);
2659
2.72k
    }
2660
2.99k
    return(0);
2661
13.0k
}
2662
2663
/**
2664
 * xmlParseStringCharRef:
2665
 * @ctxt:  an XML parser context
2666
 * @str:  a pointer to an index in the string
2667
 *
2668
 * parse Reference declarations, variant parsing from a string rather
2669
 * than an an input flow.
2670
 *
2671
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2672
 *                  '&#x' [0-9a-fA-F]+ ';'
2673
 *
2674
 * [ WFC: Legal Character ]
2675
 * Characters referred to using character references must match the
2676
 * production for Char.
2677
 *
2678
 * Returns the value parsed (as an int), 0 in case of error, str will be
2679
 *         updated to the current value of the index
2680
 */
2681
static int
2682
16.9k
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2683
16.9k
    const xmlChar *ptr;
2684
16.9k
    xmlChar cur;
2685
16.9k
    int val = 0;
2686
2687
16.9k
    if ((str == NULL) || (*str == NULL)) return(0);
2688
16.9k
    ptr = *str;
2689
16.9k
    cur = *ptr;
2690
16.9k
    if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2691
5.23k
  ptr += 3;
2692
5.23k
  cur = *ptr;
2693
20.5k
  while (cur != ';') { /* Non input consuming loop */
2694
15.7k
      if ((cur >= '0') && (cur <= '9'))
2695
4.73k
          val = val * 16 + (cur - '0');
2696
11.0k
      else if ((cur >= 'a') && (cur <= 'f'))
2697
4.06k
          val = val * 16 + (cur - 'a') + 10;
2698
6.99k
      else if ((cur >= 'A') && (cur <= 'F'))
2699
6.51k
          val = val * 16 + (cur - 'A') + 10;
2700
484
      else {
2701
484
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2702
484
    val = 0;
2703
484
    break;
2704
484
      }
2705
15.3k
      if (val > 0x110000)
2706
883
          val = 0x110000;
2707
2708
15.3k
      ptr++;
2709
15.3k
      cur = *ptr;
2710
15.3k
  }
2711
5.23k
  if (cur == ';')
2712
4.75k
      ptr++;
2713
11.6k
    } else if  ((cur == '&') && (ptr[1] == '#')){
2714
11.6k
  ptr += 2;
2715
11.6k
  cur = *ptr;
2716
37.4k
  while (cur != ';') { /* Non input consuming loops */
2717
26.7k
      if ((cur >= '0') && (cur <= '9'))
2718
25.7k
          val = val * 10 + (cur - '0');
2719
1.02k
      else {
2720
1.02k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2721
1.02k
    val = 0;
2722
1.02k
    break;
2723
1.02k
      }
2724
25.7k
      if (val > 0x110000)
2725
545
          val = 0x110000;
2726
2727
25.7k
      ptr++;
2728
25.7k
      cur = *ptr;
2729
25.7k
  }
2730
11.6k
  if (cur == ';')
2731
10.6k
      ptr++;
2732
11.6k
    } else {
2733
0
  xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2734
0
  return(0);
2735
0
    }
2736
16.9k
    *str = ptr;
2737
2738
    /*
2739
     * [ WFC: Legal Character ]
2740
     * Characters referred to using character references must match the
2741
     * production for Char.
2742
     */
2743
16.9k
    if (val >= 0x110000) {
2744
202
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2745
202
                "xmlParseStringCharRef: character reference out of bounds\n",
2746
202
                val);
2747
16.7k
    } else if (IS_CHAR(val)) {
2748
14.3k
        return(val);
2749
14.3k
    } else {
2750
2.41k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2751
2.41k
        "xmlParseStringCharRef: invalid xmlChar value %d\n",
2752
2.41k
        val);
2753
2.41k
    }
2754
2.61k
    return(0);
2755
16.9k
}
2756
2757
/**
2758
 * xmlParserHandlePEReference:
2759
 * @ctxt:  the parser context
2760
 *
2761
 * DEPRECATED: Internal function, do not use.
2762
 *
2763
 * [69] PEReference ::= '%' Name ';'
2764
 *
2765
 * [ WFC: No Recursion ]
2766
 * A parsed entity must not contain a recursive
2767
 * reference to itself, either directly or indirectly.
2768
 *
2769
 * [ WFC: Entity Declared ]
2770
 * In a document without any DTD, a document with only an internal DTD
2771
 * subset which contains no parameter entity references, or a document
2772
 * with "standalone='yes'", ...  ... The declaration of a parameter
2773
 * entity must precede any reference to it...
2774
 *
2775
 * [ VC: Entity Declared ]
2776
 * In a document with an external subset or external parameter entities
2777
 * with "standalone='no'", ...  ... The declaration of a parameter entity
2778
 * must precede any reference to it...
2779
 *
2780
 * [ WFC: In DTD ]
2781
 * Parameter-entity references may only appear in the DTD.
2782
 * NOTE: misleading but this is handled.
2783
 *
2784
 * A PEReference may have been detected in the current input stream
2785
 * the handling is done accordingly to
2786
 *      http://www.w3.org/TR/REC-xml#entproc
2787
 * i.e.
2788
 *   - Included in literal in entity values
2789
 *   - Included as Parameter Entity reference within DTDs
2790
 */
2791
void
2792
0
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2793
0
    xmlParsePEReference(ctxt);
2794
0
}
2795
2796
/**
2797
 * xmlStringLenDecodeEntities:
2798
 * @ctxt:  the parser context
2799
 * @str:  the input string
2800
 * @len: the string length
2801
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2802
 * @end:  an end marker xmlChar, 0 if none
2803
 * @end2:  an end marker xmlChar, 0 if none
2804
 * @end3:  an end marker xmlChar, 0 if none
2805
 *
2806
 * DEPRECATED: Internal function, don't use.
2807
 *
2808
 * Returns A newly allocated string with the substitution done. The caller
2809
 *      must deallocate it !
2810
 */
2811
xmlChar *
2812
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2813
                           int what ATTRIBUTE_UNUSED,
2814
0
                           xmlChar end, xmlChar end2, xmlChar end3) {
2815
0
    if ((ctxt == NULL) || (str == NULL) || (len < 0))
2816
0
        return(NULL);
2817
2818
0
    if ((str[len] != 0) ||
2819
0
        (end != 0) || (end2 != 0) || (end3 != 0))
2820
0
        return(NULL);
2821
2822
0
    return(xmlExpandEntitiesInAttValue(ctxt, str, 0));
2823
0
}
2824
2825
/**
2826
 * xmlStringDecodeEntities:
2827
 * @ctxt:  the parser context
2828
 * @str:  the input string
2829
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2830
 * @end:  an end marker xmlChar, 0 if none
2831
 * @end2:  an end marker xmlChar, 0 if none
2832
 * @end3:  an end marker xmlChar, 0 if none
2833
 *
2834
 * DEPRECATED: Internal function, don't use.
2835
 *
2836
 * Returns A newly allocated string with the substitution done. The caller
2837
 *      must deallocate it !
2838
 */
2839
xmlChar *
2840
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str,
2841
                        int what ATTRIBUTE_UNUSED,
2842
0
            xmlChar end, xmlChar  end2, xmlChar end3) {
2843
0
    if ((ctxt == NULL) || (str == NULL))
2844
0
        return(NULL);
2845
2846
0
    if ((end != 0) || (end2 != 0) || (end3 != 0))
2847
0
        return(NULL);
2848
2849
0
    return(xmlExpandEntitiesInAttValue(ctxt, str, 0));
2850
0
}
2851
2852
/************************************************************************
2853
 *                  *
2854
 *    Commodity functions, cleanup needed ?     *
2855
 *                  *
2856
 ************************************************************************/
2857
2858
/**
2859
 * areBlanks:
2860
 * @ctxt:  an XML parser context
2861
 * @str:  a xmlChar *
2862
 * @len:  the size of @str
2863
 * @blank_chars: we know the chars are blanks
2864
 *
2865
 * Is this a sequence of blank chars that one can ignore ?
2866
 *
2867
 * Returns 1 if ignorable 0 otherwise.
2868
 */
2869
2870
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2871
7.09k
                     int blank_chars) {
2872
7.09k
    int i;
2873
7.09k
    xmlNodePtr lastChild;
2874
2875
    /*
2876
     * Don't spend time trying to differentiate them, the same callback is
2877
     * used !
2878
     */
2879
7.09k
    if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2880
1.53k
  return(0);
2881
2882
    /*
2883
     * Check for xml:space value.
2884
     */
2885
5.55k
    if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2886
5.55k
        (*(ctxt->space) == -2))
2887
1.65k
  return(0);
2888
2889
    /*
2890
     * Check that the string is made of blanks
2891
     */
2892
3.90k
    if (blank_chars == 0) {
2893
7.62k
  for (i = 0;i < len;i++)
2894
6.21k
      if (!(IS_BLANK_CH(str[i]))) return(0);
2895
2.22k
    }
2896
2897
    /*
2898
     * Look if the element is mixed content in the DTD if available
2899
     */
2900
3.08k
    if (ctxt->node == NULL) return(0);
2901
3.08k
    if (ctxt->myDoc != NULL) {
2902
3.08k
        xmlElementPtr elemDecl = NULL;
2903
3.08k
        xmlDocPtr doc = ctxt->myDoc;
2904
3.08k
        const xmlChar *prefix = NULL;
2905
2906
3.08k
        if (ctxt->node->ns)
2907
209
            prefix = ctxt->node->ns->prefix;
2908
3.08k
        if (doc->intSubset != NULL)
2909
882
            elemDecl = xmlHashLookup2(doc->intSubset->elements, ctxt->node->name,
2910
882
                                      prefix);
2911
3.08k
        if ((elemDecl == NULL) && (doc->extSubset != NULL))
2912
0
            elemDecl = xmlHashLookup2(doc->extSubset->elements, ctxt->node->name,
2913
0
                                      prefix);
2914
3.08k
        if (elemDecl != NULL) {
2915
483
            if (elemDecl->etype == XML_ELEMENT_TYPE_ELEMENT)
2916
194
                return(1);
2917
289
            if ((elemDecl->etype == XML_ELEMENT_TYPE_ANY) ||
2918
289
                (elemDecl->etype == XML_ELEMENT_TYPE_MIXED))
2919
82
                return(0);
2920
289
        }
2921
3.08k
    }
2922
2923
    /*
2924
     * Otherwise, heuristic :-\
2925
     */
2926
2.81k
    if ((RAW != '<') && (RAW != 0xD)) return(0);
2927
2.44k
    if ((ctxt->node->children == NULL) &&
2928
2.44k
  (RAW == '<') && (NXT(1) == '/')) return(0);
2929
2930
2.18k
    lastChild = xmlGetLastChild(ctxt->node);
2931
2.18k
    if (lastChild == NULL) {
2932
1.39k
        if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2933
1.39k
            (ctxt->node->content != NULL)) return(0);
2934
1.39k
    } else if (xmlNodeIsText(lastChild))
2935
228
        return(0);
2936
561
    else if ((ctxt->node->children != NULL) &&
2937
561
             (xmlNodeIsText(ctxt->node->children)))
2938
75
        return(0);
2939
1.88k
    return(1);
2940
2.18k
}
2941
2942
/************************************************************************
2943
 *                  *
2944
 *    Extra stuff for namespace support     *
2945
 *  Relates to http://www.w3.org/TR/WD-xml-names      *
2946
 *                  *
2947
 ************************************************************************/
2948
2949
/**
2950
 * xmlSplitQName:
2951
 * @ctxt:  an XML parser context
2952
 * @name:  an XML parser context
2953
 * @prefixOut:  a xmlChar **
2954
 *
2955
 * parse an UTF8 encoded XML qualified name string
2956
 *
2957
 * [NS 5] QName ::= (Prefix ':')? LocalPart
2958
 *
2959
 * [NS 6] Prefix ::= NCName
2960
 *
2961
 * [NS 7] LocalPart ::= NCName
2962
 *
2963
 * Returns the local part, and prefix is updated
2964
 *   to get the Prefix if any.
2965
 */
2966
2967
xmlChar *
2968
121k
xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefixOut) {
2969
121k
    xmlChar buf[XML_MAX_NAMELEN + 5];
2970
121k
    xmlChar *buffer = NULL;
2971
121k
    int len = 0;
2972
121k
    int max = XML_MAX_NAMELEN;
2973
121k
    xmlChar *ret = NULL;
2974
121k
    xmlChar *prefix;
2975
121k
    const xmlChar *cur = name;
2976
121k
    int c;
2977
2978
121k
    if (prefixOut == NULL) return(NULL);
2979
121k
    *prefixOut = NULL;
2980
2981
121k
    if (cur == NULL) return(NULL);
2982
2983
#ifndef XML_XML_NAMESPACE
2984
    /* xml: prefix is not really a namespace */
2985
    if ((cur[0] == 'x') && (cur[1] == 'm') &&
2986
        (cur[2] == 'l') && (cur[3] == ':'))
2987
  return(xmlStrdup(name));
2988
#endif
2989
2990
    /* nasty but well=formed */
2991
121k
    if (cur[0] == ':')
2992
11.2k
  return(xmlStrdup(name));
2993
2994
109k
    c = *cur++;
2995
465k
    while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2996
355k
  buf[len++] = c;
2997
355k
  c = *cur++;
2998
355k
    }
2999
109k
    if (len >= max) {
3000
  /*
3001
   * Okay someone managed to make a huge name, so he's ready to pay
3002
   * for the processing speed.
3003
   */
3004
748
  max = len * 2;
3005
3006
748
  buffer = (xmlChar *) xmlMallocAtomic(max);
3007
748
  if (buffer == NULL) {
3008
0
      xmlErrMemory(ctxt);
3009
0
      return(NULL);
3010
0
  }
3011
748
  memcpy(buffer, buf, len);
3012
48.8k
  while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3013
48.0k
      if (len + 10 > max) {
3014
340
          xmlChar *tmp;
3015
3016
340
    max *= 2;
3017
340
    tmp = (xmlChar *) xmlRealloc(buffer, max);
3018
340
    if (tmp == NULL) {
3019
0
        xmlFree(buffer);
3020
0
        xmlErrMemory(ctxt);
3021
0
        return(NULL);
3022
0
    }
3023
340
    buffer = tmp;
3024
340
      }
3025
48.0k
      buffer[len++] = c;
3026
48.0k
      c = *cur++;
3027
48.0k
  }
3028
748
  buffer[len] = 0;
3029
748
    }
3030
3031
109k
    if ((c == ':') && (*cur == 0)) {
3032
872
        if (buffer != NULL)
3033
205
      xmlFree(buffer);
3034
872
  return(xmlStrdup(name));
3035
872
    }
3036
3037
109k
    if (buffer == NULL) {
3038
108k
  ret = xmlStrndup(buf, len);
3039
108k
        if (ret == NULL) {
3040
0
      xmlErrMemory(ctxt);
3041
0
      return(NULL);
3042
0
        }
3043
108k
    } else {
3044
543
  ret = buffer;
3045
543
  buffer = NULL;
3046
543
  max = XML_MAX_NAMELEN;
3047
543
    }
3048
3049
3050
109k
    if (c == ':') {
3051
14.1k
  c = *cur;
3052
14.1k
        prefix = ret;
3053
14.1k
  if (c == 0) {
3054
0
      ret = xmlStrndup(BAD_CAST "", 0);
3055
0
            if (ret == NULL) {
3056
0
                xmlFree(prefix);
3057
0
                return(NULL);
3058
0
            }
3059
0
            *prefixOut = prefix;
3060
0
            return(ret);
3061
0
  }
3062
14.1k
  len = 0;
3063
3064
  /*
3065
   * Check that the first character is proper to start
3066
   * a new name
3067
   */
3068
14.1k
  if (!(((c >= 0x61) && (c <= 0x7A)) ||
3069
14.1k
        ((c >= 0x41) && (c <= 0x5A)) ||
3070
14.1k
        (c == '_') || (c == ':'))) {
3071
3.94k
      int l;
3072
3.94k
      int first = CUR_SCHAR(cur, l);
3073
3074
3.94k
      if (!IS_LETTER(first) && (first != '_')) {
3075
2.58k
    xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3076
2.58k
          "Name %s is not XML Namespace compliant\n",
3077
2.58k
          name);
3078
2.58k
      }
3079
3.94k
  }
3080
14.1k
  cur++;
3081
3082
138k
  while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3083
124k
      buf[len++] = c;
3084
124k
      c = *cur++;
3085
124k
  }
3086
14.1k
  if (len >= max) {
3087
      /*
3088
       * Okay someone managed to make a huge name, so he's ready to pay
3089
       * for the processing speed.
3090
       */
3091
782
      max = len * 2;
3092
3093
782
      buffer = (xmlChar *) xmlMallocAtomic(max);
3094
782
      if (buffer == NULL) {
3095
0
          xmlErrMemory(ctxt);
3096
0
                xmlFree(prefix);
3097
0
    return(NULL);
3098
0
      }
3099
782
      memcpy(buffer, buf, len);
3100
45.9k
      while (c != 0) { /* tested bigname2.xml */
3101
45.1k
    if (len + 10 > max) {
3102
373
        xmlChar *tmp;
3103
3104
373
        max *= 2;
3105
373
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3106
373
        if (tmp == NULL) {
3107
0
      xmlErrMemory(ctxt);
3108
0
                        xmlFree(prefix);
3109
0
      xmlFree(buffer);
3110
0
      return(NULL);
3111
0
        }
3112
373
        buffer = tmp;
3113
373
    }
3114
45.1k
    buffer[len++] = c;
3115
45.1k
    c = *cur++;
3116
45.1k
      }
3117
782
      buffer[len] = 0;
3118
782
  }
3119
3120
14.1k
  if (buffer == NULL) {
3121
13.3k
      ret = xmlStrndup(buf, len);
3122
13.3k
            if (ret == NULL) {
3123
0
                xmlFree(prefix);
3124
0
                return(NULL);
3125
0
            }
3126
13.3k
  } else {
3127
782
      ret = buffer;
3128
782
  }
3129
3130
14.1k
        *prefixOut = prefix;
3131
14.1k
    }
3132
3133
109k
    return(ret);
3134
109k
}
3135
3136
/************************************************************************
3137
 *                  *
3138
 *      The parser itself       *
3139
 *  Relates to http://www.w3.org/TR/REC-xml       *
3140
 *                  *
3141
 ************************************************************************/
3142
3143
/************************************************************************
3144
 *                  *
3145
 *  Routines to parse Name, NCName and NmToken      *
3146
 *                  *
3147
 ************************************************************************/
3148
3149
/*
3150
 * The two following functions are related to the change of accepted
3151
 * characters for Name and NmToken in the Revision 5 of XML-1.0
3152
 * They correspond to the modified production [4] and the new production [4a]
3153
 * changes in that revision. Also note that the macros used for the
3154
 * productions Letter, Digit, CombiningChar and Extender are not needed
3155
 * anymore.
3156
 * We still keep compatibility to pre-revision5 parsing semantic if the
3157
 * new XML_PARSE_OLD10 option is given to the parser.
3158
 */
3159
static int
3160
423k
xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3161
423k
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3162
        /*
3163
   * Use the new checks of production [4] [4a] amd [5] of the
3164
   * Update 5 of XML-1.0
3165
   */
3166
407k
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3167
407k
      (((c >= 'a') && (c <= 'z')) ||
3168
406k
       ((c >= 'A') && (c <= 'Z')) ||
3169
406k
       (c == '_') || (c == ':') ||
3170
406k
       ((c >= 0xC0) && (c <= 0xD6)) ||
3171
406k
       ((c >= 0xD8) && (c <= 0xF6)) ||
3172
406k
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3173
406k
       ((c >= 0x370) && (c <= 0x37D)) ||
3174
406k
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3175
406k
       ((c >= 0x200C) && (c <= 0x200D)) ||
3176
406k
       ((c >= 0x2070) && (c <= 0x218F)) ||
3177
406k
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3178
406k
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3179
406k
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3180
406k
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3181
406k
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3182
350k
      return(1);
3183
407k
    } else {
3184
16.8k
        if (IS_LETTER(c) || (c == '_') || (c == ':'))
3185
14.0k
      return(1);
3186
16.8k
    }
3187
59.1k
    return(0);
3188
423k
}
3189
3190
static int
3191
808k
xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3192
808k
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3193
        /*
3194
   * Use the new checks of production [4] [4a] amd [5] of the
3195
   * Update 5 of XML-1.0
3196
   */
3197
783k
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3198
783k
      (((c >= 'a') && (c <= 'z')) ||
3199
772k
       ((c >= 'A') && (c <= 'Z')) ||
3200
772k
       ((c >= '0') && (c <= '9')) || /* !start */
3201
772k
       (c == '_') || (c == ':') ||
3202
772k
       (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3203
772k
       ((c >= 0xC0) && (c <= 0xD6)) ||
3204
772k
       ((c >= 0xD8) && (c <= 0xF6)) ||
3205
772k
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3206
772k
       ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3207
772k
       ((c >= 0x370) && (c <= 0x37D)) ||
3208
772k
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3209
772k
       ((c >= 0x200C) && (c <= 0x200D)) ||
3210
772k
       ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3211
772k
       ((c >= 0x2070) && (c <= 0x218F)) ||
3212
772k
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3213
772k
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3214
772k
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3215
772k
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3216
772k
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3217
422k
       return(1);
3218
783k
    } else {
3219
25.8k
        if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3220
25.8k
            (c == '.') || (c == '-') ||
3221
25.8k
      (c == '_') || (c == ':') ||
3222
25.8k
      (IS_COMBINING(c)) ||
3223
25.8k
      (IS_EXTENDER(c)))
3224
10.7k
      return(1);
3225
25.8k
    }
3226
375k
    return(0);
3227
808k
}
3228
3229
static const xmlChar *
3230
113k
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3231
113k
    const xmlChar *ret;
3232
113k
    int len = 0, l;
3233
113k
    int c;
3234
113k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3235
0
                    XML_MAX_TEXT_LENGTH :
3236
113k
                    XML_MAX_NAME_LENGTH;
3237
3238
    /*
3239
     * Handler for more complex cases
3240
     */
3241
113k
    c = CUR_CHAR(l);
3242
113k
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3243
        /*
3244
   * Use the new checks of production [4] [4a] amd [5] of the
3245
   * Update 5 of XML-1.0
3246
   */
3247
104k
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3248
104k
      (!(((c >= 'a') && (c <= 'z')) ||
3249
91.5k
         ((c >= 'A') && (c <= 'Z')) ||
3250
91.5k
         (c == '_') || (c == ':') ||
3251
91.5k
         ((c >= 0xC0) && (c <= 0xD6)) ||
3252
91.5k
         ((c >= 0xD8) && (c <= 0xF6)) ||
3253
91.5k
         ((c >= 0xF8) && (c <= 0x2FF)) ||
3254
91.5k
         ((c >= 0x370) && (c <= 0x37D)) ||
3255
91.5k
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
3256
91.5k
         ((c >= 0x200C) && (c <= 0x200D)) ||
3257
91.5k
         ((c >= 0x2070) && (c <= 0x218F)) ||
3258
91.5k
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3259
91.5k
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
3260
91.5k
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
3261
91.5k
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3262
95.0k
         ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3263
95.0k
      return(NULL);
3264
95.0k
  }
3265
9.60k
  len += l;
3266
9.60k
  NEXTL(l);
3267
9.60k
  c = CUR_CHAR(l);
3268
172k
  while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3269
172k
         (((c >= 'a') && (c <= 'z')) ||
3270
169k
          ((c >= 'A') && (c <= 'Z')) ||
3271
169k
          ((c >= '0') && (c <= '9')) || /* !start */
3272
169k
          (c == '_') || (c == ':') ||
3273
169k
          (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3274
169k
          ((c >= 0xC0) && (c <= 0xD6)) ||
3275
169k
          ((c >= 0xD8) && (c <= 0xF6)) ||
3276
169k
          ((c >= 0xF8) && (c <= 0x2FF)) ||
3277
169k
          ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3278
169k
          ((c >= 0x370) && (c <= 0x37D)) ||
3279
169k
          ((c >= 0x37F) && (c <= 0x1FFF)) ||
3280
169k
          ((c >= 0x200C) && (c <= 0x200D)) ||
3281
169k
          ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3282
169k
          ((c >= 0x2070) && (c <= 0x218F)) ||
3283
169k
          ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3284
169k
          ((c >= 0x3001) && (c <= 0xD7FF)) ||
3285
169k
          ((c >= 0xF900) && (c <= 0xFDCF)) ||
3286
169k
          ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3287
169k
          ((c >= 0x10000) && (c <= 0xEFFFF))
3288
169k
    )) {
3289
163k
            if (len <= INT_MAX - l)
3290
163k
          len += l;
3291
163k
      NEXTL(l);
3292
163k
      c = CUR_CHAR(l);
3293
163k
  }
3294
9.60k
    } else {
3295
9.12k
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3296
9.12k
      (!IS_LETTER(c) && (c != '_') &&
3297
7.95k
       (c != ':'))) {
3298
5.87k
      return(NULL);
3299
5.87k
  }
3300
3.25k
  len += l;
3301
3.25k
  NEXTL(l);
3302
3.25k
  c = CUR_CHAR(l);
3303
3304
12.9k
  while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3305
12.9k
         ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3306
12.0k
    (c == '.') || (c == '-') ||
3307
12.0k
    (c == '_') || (c == ':') ||
3308
12.0k
    (IS_COMBINING(c)) ||
3309
12.0k
    (IS_EXTENDER(c)))) {
3310
9.70k
            if (len <= INT_MAX - l)
3311
9.70k
          len += l;
3312
9.70k
      NEXTL(l);
3313
9.70k
      c = CUR_CHAR(l);
3314
9.70k
  }
3315
3.25k
    }
3316
12.8k
    if (len > maxLength) {
3317
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3318
0
        return(NULL);
3319
0
    }
3320
12.8k
    if (ctxt->input->cur - ctxt->input->base < len) {
3321
        /*
3322
         * There were a couple of bugs where PERefs lead to to a change
3323
         * of the buffer. Check the buffer size to avoid passing an invalid
3324
         * pointer to xmlDictLookup.
3325
         */
3326
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3327
0
                    "unexpected change of input buffer");
3328
0
        return (NULL);
3329
0
    }
3330
12.8k
    if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3331
194
        ret = xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len);
3332
12.6k
    else
3333
12.6k
        ret = xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len);
3334
12.8k
    if (ret == NULL)
3335
0
        xmlErrMemory(ctxt);
3336
12.8k
    return(ret);
3337
12.8k
}
3338
3339
/**
3340
 * xmlParseName:
3341
 * @ctxt:  an XML parser context
3342
 *
3343
 * DEPRECATED: Internal function, don't use.
3344
 *
3345
 * parse an XML name.
3346
 *
3347
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3348
 *                  CombiningChar | Extender
3349
 *
3350
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3351
 *
3352
 * [6] Names ::= Name (#x20 Name)*
3353
 *
3354
 * Returns the Name parsed or NULL
3355
 */
3356
3357
const xmlChar *
3358
457k
xmlParseName(xmlParserCtxtPtr ctxt) {
3359
457k
    const xmlChar *in;
3360
457k
    const xmlChar *ret;
3361
457k
    size_t count = 0;
3362
457k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3363
0
                       XML_MAX_TEXT_LENGTH :
3364
457k
                       XML_MAX_NAME_LENGTH;
3365
3366
457k
    GROW;
3367
3368
    /*
3369
     * Accelerator for simple ASCII names
3370
     */
3371
457k
    in = ctxt->input->cur;
3372
457k
    if (((*in >= 0x61) && (*in <= 0x7A)) ||
3373
457k
  ((*in >= 0x41) && (*in <= 0x5A)) ||
3374
457k
  (*in == '_') || (*in == ':')) {
3375
349k
  in++;
3376
670k
  while (((*in >= 0x61) && (*in <= 0x7A)) ||
3377
670k
         ((*in >= 0x41) && (*in <= 0x5A)) ||
3378
670k
         ((*in >= 0x30) && (*in <= 0x39)) ||
3379
670k
         (*in == '_') || (*in == '-') ||
3380
670k
         (*in == ':') || (*in == '.'))
3381
321k
      in++;
3382
349k
  if ((*in > 0) && (*in < 0x80)) {
3383
343k
      count = in - ctxt->input->cur;
3384
343k
            if (count > maxLength) {
3385
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3386
0
                return(NULL);
3387
0
            }
3388
343k
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3389
343k
      ctxt->input->cur = in;
3390
343k
      ctxt->input->col += count;
3391
343k
      if (ret == NULL)
3392
0
          xmlErrMemory(ctxt);
3393
343k
      return(ret);
3394
343k
  }
3395
349k
    }
3396
    /* accelerator for special cases */
3397
113k
    return(xmlParseNameComplex(ctxt));
3398
457k
}
3399
3400
static xmlHashedString
3401
98.8k
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3402
98.8k
    xmlHashedString ret;
3403
98.8k
    int len = 0, l;
3404
98.8k
    int c;
3405
98.8k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3406
0
                    XML_MAX_TEXT_LENGTH :
3407
98.8k
                    XML_MAX_NAME_LENGTH;
3408
98.8k
    size_t startPosition = 0;
3409
3410
98.8k
    ret.name = NULL;
3411
98.8k
    ret.hashValue = 0;
3412
3413
    /*
3414
     * Handler for more complex cases
3415
     */
3416
98.8k
    startPosition = CUR_PTR - BASE_PTR;
3417
98.8k
    c = CUR_CHAR(l);
3418
98.8k
    if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3419
98.8k
  (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3420
92.3k
  return(ret);
3421
92.3k
    }
3422
3423
77.3k
    while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3424
77.3k
     (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3425
70.8k
        if (len <= INT_MAX - l)
3426
70.8k
      len += l;
3427
70.8k
  NEXTL(l);
3428
70.8k
  c = CUR_CHAR(l);
3429
70.8k
    }
3430
6.45k
    if (len > maxLength) {
3431
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3432
0
        return(ret);
3433
0
    }
3434
6.45k
    ret = xmlDictLookupHashed(ctxt->dict, (BASE_PTR + startPosition), len);
3435
6.45k
    if (ret.name == NULL)
3436
0
        xmlErrMemory(ctxt);
3437
6.45k
    return(ret);
3438
6.45k
}
3439
3440
/**
3441
 * xmlParseNCName:
3442
 * @ctxt:  an XML parser context
3443
 * @len:  length of the string parsed
3444
 *
3445
 * parse an XML name.
3446
 *
3447
 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3448
 *                      CombiningChar | Extender
3449
 *
3450
 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3451
 *
3452
 * Returns the Name parsed or NULL
3453
 */
3454
3455
static xmlHashedString
3456
189k
xmlParseNCName(xmlParserCtxtPtr ctxt) {
3457
189k
    const xmlChar *in, *e;
3458
189k
    xmlHashedString ret;
3459
189k
    size_t count = 0;
3460
189k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3461
0
                       XML_MAX_TEXT_LENGTH :
3462
189k
                       XML_MAX_NAME_LENGTH;
3463
3464
189k
    ret.name = NULL;
3465
3466
    /*
3467
     * Accelerator for simple ASCII names
3468
     */
3469
189k
    in = ctxt->input->cur;
3470
189k
    e = ctxt->input->end;
3471
189k
    if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3472
189k
   ((*in >= 0x41) && (*in <= 0x5A)) ||
3473
189k
   (*in == '_')) && (in < e)) {
3474
92.2k
  in++;
3475
163k
  while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3476
163k
          ((*in >= 0x41) && (*in <= 0x5A)) ||
3477
163k
          ((*in >= 0x30) && (*in <= 0x39)) ||
3478
163k
          (*in == '_') || (*in == '-') ||
3479
163k
          (*in == '.')) && (in < e))
3480
71.4k
      in++;
3481
92.2k
  if (in >= e)
3482
33
      goto complex;
3483
92.2k
  if ((*in > 0) && (*in < 0x80)) {
3484
90.9k
      count = in - ctxt->input->cur;
3485
90.9k
            if (count > maxLength) {
3486
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3487
0
                return(ret);
3488
0
            }
3489
90.9k
      ret = xmlDictLookupHashed(ctxt->dict, ctxt->input->cur, count);
3490
90.9k
      ctxt->input->cur = in;
3491
90.9k
      ctxt->input->col += count;
3492
90.9k
      if (ret.name == NULL) {
3493
0
          xmlErrMemory(ctxt);
3494
0
      }
3495
90.9k
      return(ret);
3496
90.9k
  }
3497
92.2k
    }
3498
98.8k
complex:
3499
98.8k
    return(xmlParseNCNameComplex(ctxt));
3500
189k
}
3501
3502
/**
3503
 * xmlParseNameAndCompare:
3504
 * @ctxt:  an XML parser context
3505
 *
3506
 * parse an XML name and compares for match
3507
 * (specialized for endtag parsing)
3508
 *
3509
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3510
 * and the name for mismatch
3511
 */
3512
3513
static const xmlChar *
3514
14.0k
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3515
14.0k
    register const xmlChar *cmp = other;
3516
14.0k
    register const xmlChar *in;
3517
14.0k
    const xmlChar *ret;
3518
3519
14.0k
    GROW;
3520
3521
14.0k
    in = ctxt->input->cur;
3522
29.5k
    while (*in != 0 && *in == *cmp) {
3523
15.5k
  ++in;
3524
15.5k
  ++cmp;
3525
15.5k
    }
3526
14.0k
    if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3527
  /* success */
3528
2.39k
  ctxt->input->col += in - ctxt->input->cur;
3529
2.39k
  ctxt->input->cur = in;
3530
2.39k
  return (const xmlChar*) 1;
3531
2.39k
    }
3532
    /* failure (or end of input buffer), check with full function */
3533
11.6k
    ret = xmlParseName (ctxt);
3534
    /* strings coming from the dictionary direct compare possible */
3535
11.6k
    if (ret == other) {
3536
702
  return (const xmlChar*) 1;
3537
702
    }
3538
10.9k
    return ret;
3539
11.6k
}
3540
3541
/**
3542
 * xmlParseStringName:
3543
 * @ctxt:  an XML parser context
3544
 * @str:  a pointer to the string pointer (IN/OUT)
3545
 *
3546
 * parse an XML name.
3547
 *
3548
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3549
 *                  CombiningChar | Extender
3550
 *
3551
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3552
 *
3553
 * [6] Names ::= Name (#x20 Name)*
3554
 *
3555
 * Returns the Name parsed or NULL. The @str pointer
3556
 * is updated to the current location in the string.
3557
 */
3558
3559
static xmlChar *
3560
333k
xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3561
333k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3562
333k
    xmlChar *ret;
3563
333k
    const xmlChar *cur = *str;
3564
333k
    int len = 0, l;
3565
333k
    int c;
3566
333k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3567
0
                    XML_MAX_TEXT_LENGTH :
3568
333k
                    XML_MAX_NAME_LENGTH;
3569
3570
333k
    c = CUR_SCHAR(cur, l);
3571
333k
    if (!xmlIsNameStartChar(ctxt, c)) {
3572
1.74k
  return(NULL);
3573
1.74k
    }
3574
3575
331k
    COPY_BUF(buf, len, c);
3576
331k
    cur += l;
3577
331k
    c = CUR_SCHAR(cur, l);
3578
520k
    while (xmlIsNameChar(ctxt, c)) {
3579
189k
  COPY_BUF(buf, len, c);
3580
189k
  cur += l;
3581
189k
  c = CUR_SCHAR(cur, l);
3582
189k
  if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3583
      /*
3584
       * Okay someone managed to make a huge name, so he's ready to pay
3585
       * for the processing speed.
3586
       */
3587
427
      xmlChar *buffer;
3588
427
      int max = len * 2;
3589
3590
427
      buffer = (xmlChar *) xmlMallocAtomic(max);
3591
427
      if (buffer == NULL) {
3592
0
          xmlErrMemory(ctxt);
3593
0
    return(NULL);
3594
0
      }
3595
427
      memcpy(buffer, buf, len);
3596
22.3k
      while (xmlIsNameChar(ctxt, c)) {
3597
21.9k
    if (len + 10 > max) {
3598
202
        xmlChar *tmp;
3599
3600
202
        max *= 2;
3601
202
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3602
202
        if (tmp == NULL) {
3603
0
      xmlErrMemory(ctxt);
3604
0
      xmlFree(buffer);
3605
0
      return(NULL);
3606
0
        }
3607
202
        buffer = tmp;
3608
202
    }
3609
21.9k
    COPY_BUF(buffer, len, c);
3610
21.9k
    cur += l;
3611
21.9k
    c = CUR_SCHAR(cur, l);
3612
21.9k
                if (len > maxLength) {
3613
0
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3614
0
                    xmlFree(buffer);
3615
0
                    return(NULL);
3616
0
                }
3617
21.9k
      }
3618
427
      buffer[len] = 0;
3619
427
      *str = cur;
3620
427
      return(buffer);
3621
427
  }
3622
189k
    }
3623
330k
    if (len > maxLength) {
3624
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3625
0
        return(NULL);
3626
0
    }
3627
330k
    *str = cur;
3628
330k
    ret = xmlStrndup(buf, len);
3629
330k
    if (ret == NULL)
3630
0
        xmlErrMemory(ctxt);
3631
330k
    return(ret);
3632
330k
}
3633
3634
/**
3635
 * xmlParseNmtoken:
3636
 * @ctxt:  an XML parser context
3637
 *
3638
 * DEPRECATED: Internal function, don't use.
3639
 *
3640
 * parse an XML Nmtoken.
3641
 *
3642
 * [7] Nmtoken ::= (NameChar)+
3643
 *
3644
 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3645
 *
3646
 * Returns the Nmtoken parsed or NULL
3647
 */
3648
3649
xmlChar *
3650
39.3k
xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3651
39.3k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3652
39.3k
    xmlChar *ret;
3653
39.3k
    int len = 0, l;
3654
39.3k
    int c;
3655
39.3k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3656
0
                    XML_MAX_TEXT_LENGTH :
3657
39.3k
                    XML_MAX_NAME_LENGTH;
3658
3659
39.3k
    c = CUR_CHAR(l);
3660
3661
114k
    while (xmlIsNameChar(ctxt, c)) {
3662
76.2k
  COPY_BUF(buf, len, c);
3663
76.2k
  NEXTL(l);
3664
76.2k
  c = CUR_CHAR(l);
3665
76.2k
  if (len >= XML_MAX_NAMELEN) {
3666
      /*
3667
       * Okay someone managed to make a huge token, so he's ready to pay
3668
       * for the processing speed.
3669
       */
3670
785
      xmlChar *buffer;
3671
785
      int max = len * 2;
3672
3673
785
      buffer = (xmlChar *) xmlMallocAtomic(max);
3674
785
      if (buffer == NULL) {
3675
0
          xmlErrMemory(ctxt);
3676
0
    return(NULL);
3677
0
      }
3678
785
      memcpy(buffer, buf, len);
3679
74.9k
      while (xmlIsNameChar(ctxt, c)) {
3680
74.1k
    if (len + 10 > max) {
3681
908
        xmlChar *tmp;
3682
3683
908
        max *= 2;
3684
908
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3685
908
        if (tmp == NULL) {
3686
0
      xmlErrMemory(ctxt);
3687
0
      xmlFree(buffer);
3688
0
      return(NULL);
3689
0
        }
3690
908
        buffer = tmp;
3691
908
    }
3692
74.1k
    COPY_BUF(buffer, len, c);
3693
74.1k
                if (len > maxLength) {
3694
0
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3695
0
                    xmlFree(buffer);
3696
0
                    return(NULL);
3697
0
                }
3698
74.1k
    NEXTL(l);
3699
74.1k
    c = CUR_CHAR(l);
3700
74.1k
      }
3701
785
      buffer[len] = 0;
3702
785
      return(buffer);
3703
785
  }
3704
76.2k
    }
3705
38.5k
    if (len == 0)
3706
1.53k
        return(NULL);
3707
36.9k
    if (len > maxLength) {
3708
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3709
0
        return(NULL);
3710
0
    }
3711
36.9k
    ret = xmlStrndup(buf, len);
3712
36.9k
    if (ret == NULL)
3713
0
        xmlErrMemory(ctxt);
3714
36.9k
    return(ret);
3715
36.9k
}
3716
3717
/**
3718
 * xmlExpandPEsInEntityValue:
3719
 * @ctxt:  parser context
3720
 * @buf:  string buffer
3721
 * @str:  entity value
3722
 * @length:  size of entity value
3723
 * @depth:  nesting depth
3724
 *
3725
 * Validate an entity value and expand parameter entities.
3726
 */
3727
static void
3728
xmlExpandPEsInEntityValue(xmlParserCtxtPtr ctxt, xmlSBuf *buf,
3729
13.2k
                          const xmlChar *str, int length, int depth) {
3730
13.2k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3731
13.2k
    const xmlChar *end, *chunk;
3732
13.2k
    int c, l;
3733
3734
13.2k
    if (str == NULL)
3735
0
        return;
3736
3737
13.2k
    depth += 1;
3738
13.2k
    if (depth > maxDepth) {
3739
0
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3740
0
                       "Maximum entity nesting depth exceeded");
3741
0
  return;
3742
0
    }
3743
3744
13.2k
    end = str + length;
3745
13.2k
    chunk = str;
3746
3747
318k
    while ((str < end) && (!PARSER_STOPPED(ctxt))) {
3748
311k
        c = *str;
3749
3750
311k
        if (c >= 0x80) {
3751
92.6k
            l = xmlUTF8MultibyteLen(ctxt, str,
3752
92.6k
                    "invalid character in entity value\n");
3753
92.6k
            if (l == 0) {
3754
4.50k
                if (chunk < str)
3755
1.20k
                    xmlSBufAddString(buf, chunk, str - chunk);
3756
4.50k
                xmlSBufAddReplChar(buf);
3757
4.50k
                str += 1;
3758
4.50k
                chunk = str;
3759
88.1k
            } else {
3760
88.1k
                str += l;
3761
88.1k
            }
3762
219k
        } else if (c == '&') {
3763
19.3k
            if (str[1] == '#') {
3764
5.43k
                if (chunk < str)
3765
2.73k
                    xmlSBufAddString(buf, chunk, str - chunk);
3766
3767
5.43k
                c = xmlParseStringCharRef(ctxt, &str);
3768
5.43k
                if (c == 0)
3769
2.61k
                    return;
3770
3771
2.82k
                xmlSBufAddChar(buf, c);
3772
3773
2.82k
                chunk = str;
3774
13.8k
            } else {
3775
13.8k
                xmlChar *name;
3776
3777
                /*
3778
                 * General entity references are checked for
3779
                 * syntactic validity.
3780
                 */
3781
13.8k
                str++;
3782
13.8k
                name = xmlParseStringName(ctxt, &str);
3783
3784
13.8k
                if ((name == NULL) || (*str++ != ';')) {
3785
1.21k
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3786
1.21k
                            "EntityValue: '&' forbidden except for entities "
3787
1.21k
                            "references\n");
3788
1.21k
                    xmlFree(name);
3789
1.21k
                    return;
3790
1.21k
                }
3791
3792
12.6k
                xmlFree(name);
3793
12.6k
            }
3794
199k
        } else if (c == '%') {
3795
2.44k
            xmlEntityPtr ent;
3796
3797
2.44k
            if (chunk < str)
3798
1.61k
                xmlSBufAddString(buf, chunk, str - chunk);
3799
3800
2.44k
            ent = xmlParseStringPEReference(ctxt, &str);
3801
2.44k
            if (ent == NULL)
3802
2.21k
                return;
3803
3804
229
            if (!PARSER_EXTERNAL(ctxt)) {
3805
229
                xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3806
229
                return;
3807
229
            }
3808
3809
0
            if (ent->content == NULL) {
3810
                /*
3811
                 * Note: external parsed entities will not be loaded,
3812
                 * it is not required for a non-validating parser to
3813
                 * complete external PEReferences coming from the
3814
                 * internal subset
3815
                 */
3816
0
                if (((ctxt->options & XML_PARSE_NO_XXE) == 0) &&
3817
0
                    ((ctxt->replaceEntities) ||
3818
0
                     (ctxt->validate))) {
3819
0
                    xmlLoadEntityContent(ctxt, ent);
3820
0
                } else {
3821
0
                    xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
3822
0
                                  "not validating will not read content for "
3823
0
                                  "PE entity %s\n", ent->name, NULL);
3824
0
                }
3825
0
            }
3826
3827
            /*
3828
             * TODO: Skip if ent->content is still NULL.
3829
             */
3830
3831
0
            if (xmlParserEntityCheck(ctxt, ent->length))
3832
0
                return;
3833
3834
0
            if (ent->flags & XML_ENT_EXPANDING) {
3835
0
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
3836
0
                xmlHaltParser(ctxt);
3837
0
                return;
3838
0
            }
3839
3840
0
            ent->flags |= XML_ENT_EXPANDING;
3841
0
            xmlExpandPEsInEntityValue(ctxt, buf, ent->content, ent->length,
3842
0
                                      depth);
3843
0
            ent->flags &= ~XML_ENT_EXPANDING;
3844
3845
0
            chunk = str;
3846
197k
        } else {
3847
            /* Normal ASCII char */
3848
197k
            if (!IS_BYTE_CHAR(c)) {
3849
2.75k
                xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3850
2.75k
                        "invalid character in entity value\n");
3851
2.75k
                if (chunk < str)
3852
417
                    xmlSBufAddString(buf, chunk, str - chunk);
3853
2.75k
                xmlSBufAddReplChar(buf);
3854
2.75k
                str += 1;
3855
2.75k
                chunk = str;
3856
194k
            } else {
3857
194k
                str += 1;
3858
194k
            }
3859
197k
        }
3860
311k
    }
3861
3862
7.01k
    if (chunk < str)
3863
6.40k
        xmlSBufAddString(buf, chunk, str - chunk);
3864
3865
7.01k
    return;
3866
13.2k
}
3867
3868
/**
3869
 * xmlParseEntityValue:
3870
 * @ctxt:  an XML parser context
3871
 * @orig:  if non-NULL store a copy of the original entity value
3872
 *
3873
 * DEPRECATED: Internal function, don't use.
3874
 *
3875
 * parse a value for ENTITY declarations
3876
 *
3877
 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3878
 *                 "'" ([^%&'] | PEReference | Reference)* "'"
3879
 *
3880
 * Returns the EntityValue parsed with reference substituted or NULL
3881
 */
3882
xmlChar *
3883
14.2k
xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3884
14.2k
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3885
0
                         XML_MAX_HUGE_LENGTH :
3886
14.2k
                         XML_MAX_TEXT_LENGTH;
3887
14.2k
    xmlSBuf buf;
3888
14.2k
    const xmlChar *start;
3889
14.2k
    int quote, length;
3890
3891
14.2k
    xmlSBufInit(&buf, maxLength);
3892
3893
14.2k
    GROW;
3894
3895
14.2k
    quote = CUR;
3896
14.2k
    if ((quote != '"') && (quote != '\'')) {
3897
0
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3898
0
  return(NULL);
3899
0
    }
3900
14.2k
    CUR_PTR++;
3901
3902
14.2k
    length = 0;
3903
3904
    /*
3905
     * Copy raw content of the entity into a buffer
3906
     */
3907
594k
    while (1) {
3908
594k
        int c;
3909
3910
594k
        if (PARSER_STOPPED(ctxt))
3911
0
            goto error;
3912
3913
594k
        if (CUR_PTR >= ctxt->input->end) {
3914
996
            xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3915
996
            goto error;
3916
996
        }
3917
3918
593k
        c = CUR;
3919
3920
593k
        if (c == 0) {
3921
2
            xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3922
2
                    "invalid character in entity value\n");
3923
2
            goto error;
3924
2
        }
3925
593k
        if (c == quote)
3926
13.2k
            break;
3927
580k
        NEXTL(1);
3928
580k
        length += 1;
3929
3930
        /*
3931
         * TODO: Check growth threshold
3932
         */
3933
580k
        if (ctxt->input->end - CUR_PTR < 10)
3934
28.2k
            GROW;
3935
580k
    }
3936
3937
13.2k
    start = CUR_PTR - length;
3938
3939
13.2k
    if (orig != NULL) {
3940
13.2k
        *orig = xmlStrndup(start, length);
3941
13.2k
        if (*orig == NULL)
3942
0
            xmlErrMemory(ctxt);
3943
13.2k
    }
3944
3945
13.2k
    xmlExpandPEsInEntityValue(ctxt, &buf, start, length, ctxt->inputNr);
3946
3947
13.2k
    NEXTL(1);
3948
3949
13.2k
    return(xmlSBufFinish(&buf, NULL, ctxt, "entity length too long"));
3950
3951
998
error:
3952
998
    xmlSBufCleanup(&buf, ctxt, "entity length too long");
3953
998
    return(NULL);
3954
14.2k
}
3955
3956
/**
3957
 * xmlCheckEntityInAttValue:
3958
 * @ctxt:  parser context
3959
 * @pent:  entity
3960
 * @depth:  nesting depth
3961
 *
3962
 * Check an entity reference in an attribute value for validity
3963
 * without expanding it.
3964
 */
3965
static void
3966
1.14k
xmlCheckEntityInAttValue(xmlParserCtxtPtr ctxt, xmlEntityPtr pent, int depth) {
3967
1.14k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3968
1.14k
    const xmlChar *str;
3969
1.14k
    unsigned long expandedSize = pent->length;
3970
1.14k
    int c, flags;
3971
3972
1.14k
    depth += 1;
3973
1.14k
    if (depth > maxDepth) {
3974
0
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3975
0
                       "Maximum entity nesting depth exceeded");
3976
0
  return;
3977
0
    }
3978
3979
1.14k
    if (pent->flags & XML_ENT_EXPANDING) {
3980
15
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
3981
15
        xmlHaltParser(ctxt);
3982
15
        return;
3983
15
    }
3984
3985
    /*
3986
     * If we're parsing a default attribute value in DTD content,
3987
     * the entity might reference other entities which weren't
3988
     * defined yet, so the check isn't reliable.
3989
     */
3990
1.12k
    if (ctxt->inSubset == 0)
3991
1.08k
        flags = XML_ENT_CHECKED | XML_ENT_VALIDATED;
3992
40
    else
3993
40
        flags = XML_ENT_VALIDATED;
3994
3995
1.12k
    str = pent->content;
3996
1.12k
    if (str == NULL)
3997
0
        goto done;
3998
3999
    /*
4000
     * Note that entity values are already validated. We only check
4001
     * for illegal less-than signs and compute the expanded size
4002
     * of the entity. No special handling for multi-byte characters
4003
     * is needed.
4004
     */
4005
68.7k
    while (!PARSER_STOPPED(ctxt)) {
4006
68.7k
        c = *str;
4007
4008
68.7k
  if (c != '&') {
4009
58.3k
            if (c == 0)
4010
1.09k
                break;
4011
4012
57.2k
            if (c == '<')
4013
483
                xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
4014
483
                        "'<' in entity '%s' is not allowed in attributes "
4015
483
                        "values\n", pent->name);
4016
4017
57.2k
            str += 1;
4018
57.2k
        } else if (str[1] == '#') {
4019
341
            int val;
4020
4021
341
      val = xmlParseStringCharRef(ctxt, &str);
4022
341
      if (val == 0) {
4023
3
                pent->content[0] = 0;
4024
3
                break;
4025
3
            }
4026
10.1k
  } else {
4027
10.1k
            xmlChar *name;
4028
10.1k
            xmlEntityPtr ent;
4029
4030
10.1k
      name = xmlParseStringEntityRef(ctxt, &str);
4031
10.1k
      if (name == NULL) {
4032
4
                pent->content[0] = 0;
4033
4
                break;
4034
4
            }
4035
4036
10.0k
            ent = xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 1);
4037
10.0k
            xmlFree(name);
4038
4039
10.0k
            if ((ent != NULL) &&
4040
10.0k
                (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
4041
8.53k
                if ((ent->flags & flags) != flags) {
4042
410
                    pent->flags |= XML_ENT_EXPANDING;
4043
410
                    xmlCheckEntityInAttValue(ctxt, ent, depth);
4044
410
                    pent->flags &= ~XML_ENT_EXPANDING;
4045
410
                }
4046
4047
8.53k
                xmlSaturatedAdd(&expandedSize, ent->expandedSize);
4048
8.53k
                xmlSaturatedAdd(&expandedSize, XML_ENT_FIXED_COST);
4049
8.53k
            }
4050
10.0k
        }
4051
68.7k
    }
4052
4053
1.12k
done:
4054
1.12k
    if (ctxt->inSubset == 0)
4055
1.08k
        pent->expandedSize = expandedSize;
4056
4057
1.12k
    pent->flags |= flags;
4058
1.12k
}
4059
4060
/**
4061
 * xmlExpandEntityInAttValue:
4062
 * @ctxt:  parser context
4063
 * @buf:  string buffer
4064
 * @str:  entity or attribute value
4065
 * @pent:  entity for entity value, NULL for attribute values
4066
 * @normalize:  whether to collapse whitespace
4067
 * @inSpace:  whitespace state
4068
 * @depth:  nesting depth
4069
 * @check:  whether to check for amplification
4070
 *
4071
 * Expand general entity references in an entity or attribute value.
4072
 * Perform attribute value normalization.
4073
 */
4074
static void
4075
xmlExpandEntityInAttValue(xmlParserCtxtPtr ctxt, xmlSBuf *buf,
4076
                          const xmlChar *str, xmlEntityPtr pent, int normalize,
4077
262k
                          int *inSpace, int depth, int check) {
4078
262k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
4079
262k
    int c, chunkSize;
4080
4081
262k
    if (str == NULL)
4082
0
        return;
4083
4084
262k
    depth += 1;
4085
262k
    if (depth > maxDepth) {
4086
0
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
4087
0
                       "Maximum entity nesting depth exceeded");
4088
0
  return;
4089
0
    }
4090
4091
262k
    if (pent != NULL) {
4092
229k
        if (pent->flags & XML_ENT_EXPANDING) {
4093
0
            xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
4094
0
            xmlHaltParser(ctxt);
4095
0
            return;
4096
0
        }
4097
4098
229k
        if (check) {
4099
0
            if (xmlParserEntityCheck(ctxt, pent->length))
4100
0
                return;
4101
0
        }
4102
229k
    }
4103
4104
262k
    chunkSize = 0;
4105
4106
    /*
4107
     * Note that entity values are already validated. No special
4108
     * handling for multi-byte characters is needed.
4109
     */
4110
41.3M
    while (!PARSER_STOPPED(ctxt)) {
4111
41.3M
        c = *str;
4112
4113
41.3M
  if (c != '&') {
4114
41.0M
            if (c == 0)
4115
255k
                break;
4116
4117
            /*
4118
             * If this function is called without an entity, it is used to
4119
             * expand entities in an attribute content where less-than was
4120
             * already unscaped and is allowed.
4121
             */
4122
40.7M
            if ((pent != NULL) && (c == '<')) {
4123
6.44k
                xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
4124
6.44k
                        "'<' in entity '%s' is not allowed in attributes "
4125
6.44k
                        "values\n", pent->name);
4126
6.44k
                break;
4127
6.44k
            }
4128
4129
40.7M
            if (c <= 0x20) {
4130
1.77M
                if ((normalize) && (*inSpace)) {
4131
                    /* Skip char */
4132
0
                    if (chunkSize > 0) {
4133
0
                        xmlSBufAddString(buf, str - chunkSize, chunkSize);
4134
0
                        chunkSize = 0;
4135
0
                    }
4136
1.77M
                } else if (c < 0x20) {
4137
299k
                    if (chunkSize > 0) {
4138
296k
                        xmlSBufAddString(buf, str - chunkSize, chunkSize);
4139
296k
                        chunkSize = 0;
4140
296k
                    }
4141
4142
299k
                    xmlSBufAddCString(buf, " ", 1);
4143
1.47M
                } else {
4144
1.47M
                    chunkSize += 1;
4145
1.47M
                }
4146
4147
1.77M
                *inSpace = 1;
4148
38.9M
            } else {
4149
38.9M
                chunkSize += 1;
4150
38.9M
                *inSpace = 0;
4151
38.9M
            }
4152
4153
40.7M
            str += 1;
4154
40.7M
        } else if (str[1] == '#') {
4155
11.1k
            int val;
4156
4157
11.1k
            if (chunkSize > 0) {
4158
7.84k
                xmlSBufAddString(buf, str - chunkSize, chunkSize);
4159
7.84k
                chunkSize = 0;
4160
7.84k
            }
4161
4162
11.1k
      val = xmlParseStringCharRef(ctxt, &str);
4163
11.1k
      if (val == 0) {
4164
0
                if (pent != NULL)
4165
0
                    pent->content[0] = 0;
4166
0
                break;
4167
0
            }
4168
4169
11.1k
            if (val == ' ') {
4170
1.10k
                if ((!normalize) || (!*inSpace))
4171
1.10k
                    xmlSBufAddCString(buf, " ", 1);
4172
1.10k
                *inSpace = 1;
4173
10.0k
            } else {
4174
10.0k
                xmlSBufAddChar(buf, val);
4175
10.0k
                *inSpace = 0;
4176
10.0k
            }
4177
306k
  } else {
4178
306k
            xmlChar *name;
4179
306k
            xmlEntityPtr ent;
4180
4181
306k
            if (chunkSize > 0) {
4182
125k
                xmlSBufAddString(buf, str - chunkSize, chunkSize);
4183
125k
                chunkSize = 0;
4184
125k
            }
4185
4186
306k
      name = xmlParseStringEntityRef(ctxt, &str);
4187
306k
            if (name == NULL) {
4188
0
                if (pent != NULL)
4189
0
                    pent->content[0] = 0;
4190
0
                break;
4191
0
            }
4192
4193
306k
            ent = xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 1);
4194
306k
            xmlFree(name);
4195
4196
306k
      if ((ent != NULL) &&
4197
306k
    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
4198
33.5k
    if (ent->content == NULL) {
4199
0
        xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
4200
0
          "predefined entity has no content\n");
4201
0
                    break;
4202
0
                }
4203
4204
33.5k
                xmlSBufAddString(buf, ent->content, ent->length);
4205
4206
33.5k
                *inSpace = 0;
4207
273k
      } else if ((ent != NULL) && (ent->content != NULL)) {
4208
229k
                if (pent != NULL)
4209
220k
                    pent->flags |= XML_ENT_EXPANDING;
4210
229k
    xmlExpandEntityInAttValue(ctxt, buf, ent->content, ent,
4211
229k
                                          normalize, inSpace, depth, check);
4212
229k
                if (pent != NULL)
4213
220k
                    pent->flags &= ~XML_ENT_EXPANDING;
4214
229k
      }
4215
306k
        }
4216
41.3M
    }
4217
4218
262k
    if (chunkSize > 0)
4219
259k
        xmlSBufAddString(buf, str - chunkSize, chunkSize);
4220
4221
262k
    return;
4222
262k
}
4223
4224
/**
4225
 * xmlExpandEntitiesInAttValue:
4226
 * @ctxt:  parser context
4227
 * @str:  entity or attribute value
4228
 * @normalize:  whether to collapse whitespace
4229
 *
4230
 * Expand general entity references in an entity or attribute value.
4231
 * Perform attribute value normalization.
4232
 *
4233
 * Returns the expanded attribtue value.
4234
 */
4235
xmlChar *
4236
xmlExpandEntitiesInAttValue(xmlParserCtxtPtr ctxt, const xmlChar *str,
4237
32.5k
                            int normalize) {
4238
32.5k
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4239
0
                         XML_MAX_HUGE_LENGTH :
4240
32.5k
                         XML_MAX_TEXT_LENGTH;
4241
32.5k
    xmlSBuf buf;
4242
32.5k
    int inSpace = 1;
4243
4244
32.5k
    xmlSBufInit(&buf, maxLength);
4245
4246
32.5k
    xmlExpandEntityInAttValue(ctxt, &buf, str, NULL, normalize, &inSpace,
4247
32.5k
                              ctxt->inputNr, /* check */ 0);
4248
4249
32.5k
    if ((normalize) && (inSpace) && (buf.size > 0))
4250
0
        buf.size--;
4251
4252
32.5k
    return(xmlSBufFinish(&buf, NULL, ctxt, "AttValue length too long"));
4253
32.5k
}
4254
4255
/**
4256
 * xmlParseAttValueInternal:
4257
 * @ctxt:  an XML parser context
4258
 * @len:  attribute len result
4259
 * @alloc:  whether the attribute was reallocated as a new string
4260
 * @normalize:  if 1 then further non-CDATA normalization must be done
4261
 *
4262
 * parse a value for an attribute.
4263
 * NOTE: if no normalization is needed, the routine will return pointers
4264
 *       directly from the data buffer.
4265
 *
4266
 * 3.3.3 Attribute-Value Normalization:
4267
 * Before the value of an attribute is passed to the application or
4268
 * checked for validity, the XML processor must normalize it as follows:
4269
 * - a character reference is processed by appending the referenced
4270
 *   character to the attribute value
4271
 * - an entity reference is processed by recursively processing the
4272
 *   replacement text of the entity
4273
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4274
 *   appending #x20 to the normalized value, except that only a single
4275
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4276
 *   parsed entity or the literal entity value of an internal parsed entity
4277
 * - other characters are processed by appending them to the normalized value
4278
 * If the declared value is not CDATA, then the XML processor must further
4279
 * process the normalized attribute value by discarding any leading and
4280
 * trailing space (#x20) characters, and by replacing sequences of space
4281
 * (#x20) characters by a single space (#x20) character.
4282
 * All attributes for which no declaration has been read should be treated
4283
 * by a non-validating parser as if declared CDATA.
4284
 *
4285
 * Returns the AttValue parsed or NULL. The value has to be freed by the
4286
 *     caller if it was copied, this can be detected by val[*len] == 0.
4287
 */
4288
static xmlChar *
4289
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *attlen, int *alloc,
4290
63.9k
                         int normalize) {
4291
63.9k
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4292
0
                         XML_MAX_HUGE_LENGTH :
4293
63.9k
                         XML_MAX_TEXT_LENGTH;
4294
63.9k
    xmlSBuf buf;
4295
63.9k
    xmlChar *ret;
4296
63.9k
    int c, l, quote, flags, chunkSize;
4297
63.9k
    int inSpace = 1;
4298
4299
63.9k
    xmlSBufInit(&buf, maxLength);
4300
4301
63.9k
    GROW;
4302
4303
63.9k
    quote = CUR;
4304
63.9k
    if ((quote != '"') && (quote != '\'')) {
4305
4.66k
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
4306
4.66k
  return(NULL);
4307
4.66k
    }
4308
59.2k
    CUR_PTR++;
4309
4310
59.2k
    if (ctxt->inSubset == 0)
4311
24.9k
        flags = XML_ENT_CHECKED | XML_ENT_VALIDATED;
4312
34.3k
    else
4313
34.3k
        flags = XML_ENT_VALIDATED;
4314
4315
59.2k
    inSpace = 1;
4316
59.2k
    chunkSize = 0;
4317
4318
807k
    while (1) {
4319
807k
        if (PARSER_STOPPED(ctxt))
4320
15
            goto error;
4321
4322
807k
        if (CUR_PTR >= ctxt->input->end) {
4323
9.79k
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4324
9.79k
                           "AttValue: ' expected\n");
4325
9.79k
            goto error;
4326
9.79k
        }
4327
4328
        /*
4329
         * TODO: Check growth threshold
4330
         */
4331
797k
        if (ctxt->input->end - CUR_PTR < 10)
4332
99.7k
            GROW;
4333
4334
797k
        c = CUR;
4335
4336
797k
        if (c >= 0x80) {
4337
126k
            l = xmlUTF8MultibyteLen(ctxt, CUR_PTR,
4338
126k
                    "invalid character in attribute value\n");
4339
126k
            if (l == 0) {
4340
6.98k
                if (chunkSize > 0) {
4341
1.13k
                    xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4342
1.13k
                    chunkSize = 0;
4343
1.13k
                }
4344
6.98k
                xmlSBufAddReplChar(&buf);
4345
6.98k
                NEXTL(1);
4346
119k
            } else {
4347
119k
                chunkSize += l;
4348
119k
                NEXTL(l);
4349
119k
            }
4350
4351
126k
            inSpace = 0;
4352
671k
        } else if (c != '&') {
4353
592k
            if (c > 0x20) {
4354
453k
                if (c == quote)
4355
48.1k
                    break;
4356
4357
405k
                if (c == '<')
4358
26.6k
                    xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4359
4360
405k
                chunkSize += 1;
4361
405k
                inSpace = 0;
4362
405k
            } else if (!IS_BYTE_CHAR(c)) {
4363
3.97k
                xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4364
3.97k
                        "invalid character in attribute value\n");
4365
3.97k
                if (chunkSize > 0) {
4366
799
                    xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4367
799
                    chunkSize = 0;
4368
799
                }
4369
3.97k
                xmlSBufAddReplChar(&buf);
4370
3.97k
                inSpace = 0;
4371
134k
            } else {
4372
                /* Whitespace */
4373
134k
                if ((normalize) && (inSpace)) {
4374
                    /* Skip char */
4375
1.13k
                    if (chunkSize > 0) {
4376
412
                        xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4377
412
                        chunkSize = 0;
4378
412
                    }
4379
133k
                } else if (c < 0x20) {
4380
                    /* Convert to space */
4381
5.33k
                    if (chunkSize > 0) {
4382
3.21k
                        xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4383
3.21k
                        chunkSize = 0;
4384
3.21k
                    }
4385
4386
5.33k
                    xmlSBufAddCString(&buf, " ", 1);
4387
128k
                } else {
4388
128k
                    chunkSize += 1;
4389
128k
                }
4390
4391
134k
                inSpace = 1;
4392
4393
134k
                if ((c == 0xD) && (NXT(1) == 0xA))
4394
201
                    CUR_PTR++;
4395
134k
            }
4396
4397
544k
            NEXTL(1);
4398
544k
        } else if (NXT(1) == '#') {
4399
8.82k
            int val;
4400
4401
8.82k
            if (chunkSize > 0) {
4402
5.26k
                xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4403
5.26k
                chunkSize = 0;
4404
5.26k
            }
4405
4406
8.82k
            val = xmlParseCharRef(ctxt);
4407
8.82k
            if (val == 0)
4408
1.29k
                goto error;
4409
4410
7.52k
            if ((val == '&') && (!ctxt->replaceEntities)) {
4411
                /*
4412
                 * The reparsing will be done in xmlStringGetNodeList()
4413
                 * called by the attribute() function in SAX.c
4414
                 */
4415
1.29k
                xmlSBufAddCString(&buf, "&#38;", 5);
4416
1.29k
                inSpace = 0;
4417
6.22k
            } else if (val == ' ') {
4418
1.10k
                if ((!normalize) || (!inSpace))
4419
1.03k
                    xmlSBufAddCString(&buf, " ", 1);
4420
1.10k
                inSpace = 1;
4421
5.11k
            } else {
4422
5.11k
                xmlSBufAddChar(&buf, val);
4423
5.11k
                inSpace = 0;
4424
5.11k
            }
4425
70.5k
        } else {
4426
70.5k
            const xmlChar *name;
4427
70.5k
            xmlEntityPtr ent;
4428
4429
70.5k
            if (chunkSize > 0) {
4430
18.0k
                xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4431
18.0k
                chunkSize = 0;
4432
18.0k
            }
4433
4434
70.5k
            name = xmlParseEntityRefInternal(ctxt);
4435
70.5k
            if (name == NULL) {
4436
                /*
4437
                 * Probably a literal '&' which wasn't escaped.
4438
                 * TODO: Handle gracefully in recovery mode.
4439
                 */
4440
5.79k
                continue;
4441
5.79k
            }
4442
4443
64.7k
            ent = xmlLookupGeneralEntity(ctxt, name, /* isAttr */ 1);
4444
64.7k
            if (ent == NULL)
4445
10.3k
                continue;
4446
4447
54.3k
            if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
4448
3.16k
                if ((ent->content[0] == '&') && (!ctxt->replaceEntities))
4449
1.99k
                    xmlSBufAddCString(&buf, "&#38;", 5);
4450
1.17k
                else
4451
1.17k
                    xmlSBufAddString(&buf, ent->content, ent->length);
4452
3.16k
                inSpace = 0;
4453
51.2k
            } else if (ctxt->replaceEntities) {
4454
0
                xmlExpandEntityInAttValue(ctxt, &buf, ent->content, ent,
4455
0
                                          normalize, &inSpace, ctxt->inputNr,
4456
0
                                          /* check */ 1);
4457
51.2k
            } else {
4458
51.2k
                if ((ent->flags & flags) != flags)
4459
730
                    xmlCheckEntityInAttValue(ctxt, ent, ctxt->inputNr);
4460
4461
51.2k
                if (xmlParserEntityCheck(ctxt, ent->expandedSize)) {
4462
17
                    ent->content[0] = 0;
4463
17
                    goto error;
4464
17
                }
4465
4466
                /*
4467
                 * Just output the reference
4468
                 */
4469
51.2k
                xmlSBufAddCString(&buf, "&", 1);
4470
51.2k
                xmlSBufAddString(&buf, ent->name, xmlStrlen(ent->name));
4471
51.2k
                xmlSBufAddCString(&buf, ";", 1);
4472
4473
51.2k
                inSpace = 0;
4474
51.2k
            }
4475
54.3k
  }
4476
797k
    }
4477
4478
48.1k
    if ((buf.mem == NULL) && (alloc != NULL)) {
4479
15.3k
        ret = (xmlChar *) CUR_PTR - chunkSize;
4480
4481
15.3k
        if (attlen != NULL)
4482
15.3k
            *attlen = chunkSize;
4483
15.3k
        if ((normalize) && (inSpace) && (chunkSize > 0))
4484
90
            *attlen -= 1;
4485
15.3k
        *alloc = 0;
4486
4487
        /* Report potential error */
4488
15.3k
        xmlSBufCleanup(&buf, ctxt, "AttValue length too long");
4489
32.7k
    } else {
4490
32.7k
        if (chunkSize > 0)
4491
23.0k
            xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4492
4493
32.7k
        if ((normalize) && (inSpace) && (buf.size > 0))
4494
100
            buf.size--;
4495
4496
32.7k
        ret = xmlSBufFinish(&buf, attlen, ctxt, "AttValue length too long");
4497
4498
32.7k
        if (ret != NULL) {
4499
32.7k
            if (attlen != NULL)
4500
2.19k
                *attlen = buf.size;
4501
32.7k
            if (alloc != NULL)
4502
2.19k
                *alloc = 1;
4503
32.7k
        }
4504
32.7k
    }
4505
4506
48.1k
    NEXTL(1);
4507
4508
48.1k
    return(ret);
4509
4510
11.1k
error:
4511
11.1k
    xmlSBufCleanup(&buf, ctxt, "AttValue length too long");
4512
11.1k
    return(NULL);
4513
59.2k
}
4514
4515
/**
4516
 * xmlParseAttValue:
4517
 * @ctxt:  an XML parser context
4518
 *
4519
 * DEPRECATED: Internal function, don't use.
4520
 *
4521
 * parse a value for an attribute
4522
 * Note: the parser won't do substitution of entities here, this
4523
 * will be handled later in xmlStringGetNodeList
4524
 *
4525
 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4526
 *                   "'" ([^<&'] | Reference)* "'"
4527
 *
4528
 * 3.3.3 Attribute-Value Normalization:
4529
 * Before the value of an attribute is passed to the application or
4530
 * checked for validity, the XML processor must normalize it as follows:
4531
 * - a character reference is processed by appending the referenced
4532
 *   character to the attribute value
4533
 * - an entity reference is processed by recursively processing the
4534
 *   replacement text of the entity
4535
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4536
 *   appending #x20 to the normalized value, except that only a single
4537
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4538
 *   parsed entity or the literal entity value of an internal parsed entity
4539
 * - other characters are processed by appending them to the normalized value
4540
 * If the declared value is not CDATA, then the XML processor must further
4541
 * process the normalized attribute value by discarding any leading and
4542
 * trailing space (#x20) characters, and by replacing sequences of space
4543
 * (#x20) characters by a single space (#x20) character.
4544
 * All attributes for which no declaration has been read should be treated
4545
 * by a non-validating parser as if declared CDATA.
4546
 *
4547
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4548
 */
4549
4550
4551
xmlChar *
4552
45.0k
xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4553
45.0k
    if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4554
45.0k
    return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4555
45.0k
}
4556
4557
/**
4558
 * xmlParseSystemLiteral:
4559
 * @ctxt:  an XML parser context
4560
 *
4561
 * DEPRECATED: Internal function, don't use.
4562
 *
4563
 * parse an XML Literal
4564
 *
4565
 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4566
 *
4567
 * Returns the SystemLiteral parsed or NULL
4568
 */
4569
4570
xmlChar *
4571
5.14k
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4572
5.14k
    xmlChar *buf = NULL;
4573
5.14k
    int len = 0;
4574
5.14k
    int size = XML_PARSER_BUFFER_SIZE;
4575
5.14k
    int cur, l;
4576
5.14k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4577
0
                    XML_MAX_TEXT_LENGTH :
4578
5.14k
                    XML_MAX_NAME_LENGTH;
4579
5.14k
    xmlChar stop;
4580
4581
5.14k
    if (RAW == '"') {
4582
2.85k
        NEXT;
4583
2.85k
  stop = '"';
4584
2.85k
    } else if (RAW == '\'') {
4585
464
        NEXT;
4586
464
  stop = '\'';
4587
1.82k
    } else {
4588
1.82k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4589
1.82k
  return(NULL);
4590
1.82k
    }
4591
4592
3.32k
    buf = (xmlChar *) xmlMallocAtomic(size);
4593
3.32k
    if (buf == NULL) {
4594
0
        xmlErrMemory(ctxt);
4595
0
  return(NULL);
4596
0
    }
4597
3.32k
    cur = CUR_CHAR(l);
4598
43.7k
    while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4599
40.4k
  if (len + 5 >= size) {
4600
227
      xmlChar *tmp;
4601
4602
227
      size *= 2;
4603
227
      tmp = (xmlChar *) xmlRealloc(buf, size);
4604
227
      if (tmp == NULL) {
4605
0
          xmlFree(buf);
4606
0
    xmlErrMemory(ctxt);
4607
0
    return(NULL);
4608
0
      }
4609
227
      buf = tmp;
4610
227
  }
4611
40.4k
  COPY_BUF(buf, len, cur);
4612
40.4k
        if (len > maxLength) {
4613
0
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4614
0
            xmlFree(buf);
4615
0
            return(NULL);
4616
0
        }
4617
40.4k
  NEXTL(l);
4618
40.4k
  cur = CUR_CHAR(l);
4619
40.4k
    }
4620
3.32k
    buf[len] = 0;
4621
3.32k
    if (!IS_CHAR(cur)) {
4622
957
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4623
2.36k
    } else {
4624
2.36k
  NEXT;
4625
2.36k
    }
4626
3.32k
    return(buf);
4627
3.32k
}
4628
4629
/**
4630
 * xmlParsePubidLiteral:
4631
 * @ctxt:  an XML parser context
4632
 *
4633
 * DEPRECATED: Internal function, don't use.
4634
 *
4635
 * parse an XML public literal
4636
 *
4637
 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4638
 *
4639
 * Returns the PubidLiteral parsed or NULL.
4640
 */
4641
4642
xmlChar *
4643
3.73k
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4644
3.73k
    xmlChar *buf = NULL;
4645
3.73k
    int len = 0;
4646
3.73k
    int size = XML_PARSER_BUFFER_SIZE;
4647
3.73k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4648
0
                    XML_MAX_TEXT_LENGTH :
4649
3.73k
                    XML_MAX_NAME_LENGTH;
4650
3.73k
    xmlChar cur;
4651
3.73k
    xmlChar stop;
4652
4653
3.73k
    if (RAW == '"') {
4654
2.00k
        NEXT;
4655
2.00k
  stop = '"';
4656
2.00k
    } else if (RAW == '\'') {
4657
1.07k
        NEXT;
4658
1.07k
  stop = '\'';
4659
1.07k
    } else {
4660
651
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4661
651
  return(NULL);
4662
651
    }
4663
3.07k
    buf = (xmlChar *) xmlMallocAtomic(size);
4664
3.07k
    if (buf == NULL) {
4665
0
  xmlErrMemory(ctxt);
4666
0
  return(NULL);
4667
0
    }
4668
3.07k
    cur = CUR;
4669
42.8k
    while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop) &&
4670
42.8k
           (PARSER_STOPPED(ctxt) == 0)) { /* checked */
4671
39.7k
  if (len + 1 >= size) {
4672
194
      xmlChar *tmp;
4673
4674
194
      size *= 2;
4675
194
      tmp = (xmlChar *) xmlRealloc(buf, size);
4676
194
      if (tmp == NULL) {
4677
0
    xmlErrMemory(ctxt);
4678
0
    xmlFree(buf);
4679
0
    return(NULL);
4680
0
      }
4681
194
      buf = tmp;
4682
194
  }
4683
39.7k
  buf[len++] = cur;
4684
39.7k
        if (len > maxLength) {
4685
0
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4686
0
            xmlFree(buf);
4687
0
            return(NULL);
4688
0
        }
4689
39.7k
  NEXT;
4690
39.7k
  cur = CUR;
4691
39.7k
    }
4692
3.07k
    buf[len] = 0;
4693
3.07k
    if (cur != stop) {
4694
2.45k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4695
2.45k
    } else {
4696
620
  NEXTL(1);
4697
620
    }
4698
3.07k
    return(buf);
4699
3.07k
}
4700
4701
static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial);
4702
4703
/*
4704
 * used for the test in the inner loop of the char data testing
4705
 */
4706
static const unsigned char test_char_data[256] = {
4707
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4708
    0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4709
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4710
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4711
    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4712
    0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4713
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4714
    0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4715
    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4716
    0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4717
    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4718
    0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4719
    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4720
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4721
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4722
    0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4723
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4724
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4725
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4726
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4727
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4728
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4729
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4730
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4731
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4732
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4733
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4734
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4735
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4736
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4737
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4738
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4739
};
4740
4741
/**
4742
 * xmlParseCharDataInternal:
4743
 * @ctxt:  an XML parser context
4744
 * @partial:  buffer may contain partial UTF-8 sequences
4745
 *
4746
 * Parse character data. Always makes progress if the first char isn't
4747
 * '<' or '&'.
4748
 *
4749
 * The right angle bracket (>) may be represented using the string "&gt;",
4750
 * and must, for compatibility, be escaped using "&gt;" or a character
4751
 * reference when it appears in the string "]]>" in content, when that
4752
 * string is not marking the end of a CDATA section.
4753
 *
4754
 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4755
 */
4756
static void
4757
49.1k
xmlParseCharDataInternal(xmlParserCtxtPtr ctxt, int partial) {
4758
49.1k
    const xmlChar *in;
4759
49.1k
    int nbchar = 0;
4760
49.1k
    int line = ctxt->input->line;
4761
49.1k
    int col = ctxt->input->col;
4762
49.1k
    int ccol;
4763
4764
49.1k
    GROW;
4765
    /*
4766
     * Accelerated common case where input don't need to be
4767
     * modified before passing it to the handler.
4768
     */
4769
49.1k
    in = ctxt->input->cur;
4770
49.6k
    do {
4771
49.9k
get_more_space:
4772
57.2k
        while (*in == 0x20) { in++; ctxt->input->col++; }
4773
49.9k
        if (*in == 0xA) {
4774
848
            do {
4775
848
                ctxt->input->line++; ctxt->input->col = 1;
4776
848
                in++;
4777
848
            } while (*in == 0xA);
4778
285
            goto get_more_space;
4779
285
        }
4780
49.6k
        if (*in == '<') {
4781
3.37k
            nbchar = in - ctxt->input->cur;
4782
3.37k
            if (nbchar > 0) {
4783
3.37k
                const xmlChar *tmp = ctxt->input->cur;
4784
3.37k
                ctxt->input->cur = in;
4785
4786
3.37k
                if ((ctxt->sax != NULL) &&
4787
3.37k
                    (ctxt->disableSAX == 0) &&
4788
3.37k
                    (ctxt->sax->ignorableWhitespace !=
4789
2.75k
                     ctxt->sax->characters)) {
4790
1.98k
                    if (areBlanks(ctxt, tmp, nbchar, 1)) {
4791
1.07k
                        if (ctxt->sax->ignorableWhitespace != NULL)
4792
1.07k
                            ctxt->sax->ignorableWhitespace(ctxt->userData,
4793
1.07k
                                                   tmp, nbchar);
4794
1.07k
                    } else {
4795
909
                        if (ctxt->sax->characters != NULL)
4796
909
                            ctxt->sax->characters(ctxt->userData,
4797
909
                                                  tmp, nbchar);
4798
909
                        if (*ctxt->space == -1)
4799
427
                            *ctxt->space = -2;
4800
909
                    }
4801
1.98k
                } else if ((ctxt->sax != NULL) &&
4802
1.38k
                           (ctxt->disableSAX == 0) &&
4803
1.38k
                           (ctxt->sax->characters != NULL)) {
4804
772
                    ctxt->sax->characters(ctxt->userData,
4805
772
                                          tmp, nbchar);
4806
772
                }
4807
3.37k
            }
4808
3.37k
            return;
4809
3.37k
        }
4810
4811
49.7k
get_more:
4812
49.7k
        ccol = ctxt->input->col;
4813
140k
        while (test_char_data[*in]) {
4814
90.9k
            in++;
4815
90.9k
            ccol++;
4816
90.9k
        }
4817
49.7k
        ctxt->input->col = ccol;
4818
49.7k
        if (*in == 0xA) {
4819
567
            do {
4820
567
                ctxt->input->line++; ctxt->input->col = 1;
4821
567
                in++;
4822
567
            } while (*in == 0xA);
4823
265
            goto get_more;
4824
265
        }
4825
49.4k
        if (*in == ']') {
4826
3.47k
            if ((in[1] == ']') && (in[2] == '>')) {
4827
303
                xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4828
303
                ctxt->input->cur = in + 1;
4829
303
                return;
4830
303
            }
4831
3.16k
            in++;
4832
3.16k
            ctxt->input->col++;
4833
3.16k
            goto get_more;
4834
3.47k
        }
4835
45.9k
        nbchar = in - ctxt->input->cur;
4836
45.9k
        if (nbchar > 0) {
4837
36.1k
            if ((ctxt->sax != NULL) &&
4838
36.1k
                (ctxt->disableSAX == 0) &&
4839
36.1k
                (ctxt->sax->ignorableWhitespace !=
4840
22.2k
                 ctxt->sax->characters) &&
4841
36.1k
                (IS_BLANK_CH(*ctxt->input->cur))) {
4842
1.72k
                const xmlChar *tmp = ctxt->input->cur;
4843
1.72k
                ctxt->input->cur = in;
4844
4845
1.72k
                if (areBlanks(ctxt, tmp, nbchar, 0)) {
4846
527
                    if (ctxt->sax->ignorableWhitespace != NULL)
4847
527
                        ctxt->sax->ignorableWhitespace(ctxt->userData,
4848
527
                                                       tmp, nbchar);
4849
1.20k
                } else {
4850
1.20k
                    if (ctxt->sax->characters != NULL)
4851
1.20k
                        ctxt->sax->characters(ctxt->userData,
4852
1.20k
                                              tmp, nbchar);
4853
1.20k
                    if (*ctxt->space == -1)
4854
646
                        *ctxt->space = -2;
4855
1.20k
                }
4856
1.72k
                line = ctxt->input->line;
4857
1.72k
                col = ctxt->input->col;
4858
34.4k
            } else if ((ctxt->sax != NULL) &&
4859
34.4k
                       (ctxt->disableSAX == 0)) {
4860
20.4k
                if (ctxt->sax->characters != NULL)
4861
20.4k
                    ctxt->sax->characters(ctxt->userData,
4862
20.4k
                                          ctxt->input->cur, nbchar);
4863
20.4k
                line = ctxt->input->line;
4864
20.4k
                col = ctxt->input->col;
4865
20.4k
            }
4866
36.1k
        }
4867
45.9k
        ctxt->input->cur = in;
4868
45.9k
        if (*in == 0xD) {
4869
1.61k
            in++;
4870
1.61k
            if (*in == 0xA) {
4871
522
                ctxt->input->cur = in;
4872
522
                in++;
4873
522
                ctxt->input->line++; ctxt->input->col = 1;
4874
522
                continue; /* while */
4875
522
            }
4876
1.09k
            in--;
4877
1.09k
        }
4878
45.4k
        if (*in == '<') {
4879
28.8k
            return;
4880
28.8k
        }
4881
16.6k
        if (*in == '&') {
4882
4.64k
            return;
4883
4.64k
        }
4884
11.9k
        SHRINK;
4885
11.9k
        GROW;
4886
11.9k
        in = ctxt->input->cur;
4887
12.5k
    } while (((*in >= 0x20) && (*in <= 0x7F)) ||
4888
12.5k
             (*in == 0x09) || (*in == 0x0a));
4889
11.9k
    ctxt->input->line = line;
4890
11.9k
    ctxt->input->col = col;
4891
11.9k
    xmlParseCharDataComplex(ctxt, partial);
4892
11.9k
}
4893
4894
/**
4895
 * xmlParseCharDataComplex:
4896
 * @ctxt:  an XML parser context
4897
 * @cdata:  int indicating whether we are within a CDATA section
4898
 *
4899
 * Always makes progress if the first char isn't '<' or '&'.
4900
 *
4901
 * parse a CharData section.this is the fallback function
4902
 * of xmlParseCharData() when the parsing requires handling
4903
 * of non-ASCII characters.
4904
 */
4905
static void
4906
11.9k
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial) {
4907
11.9k
    xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4908
11.9k
    int nbchar = 0;
4909
11.9k
    int cur, l;
4910
4911
11.9k
    cur = CUR_CHAR(l);
4912
63.3k
    while ((cur != '<') && /* checked */
4913
63.3k
           (cur != '&') &&
4914
63.3k
     (IS_CHAR(cur))) {
4915
51.3k
  if ((cur == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4916
246
      xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4917
246
  }
4918
51.3k
  COPY_BUF(buf, nbchar, cur);
4919
  /* move current position before possible calling of ctxt->sax->characters */
4920
51.3k
  NEXTL(l);
4921
51.3k
  if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4922
271
      buf[nbchar] = 0;
4923
4924
      /*
4925
       * OK the segment is to be consumed as chars.
4926
       */
4927
271
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4928
137
    if (areBlanks(ctxt, buf, nbchar, 0)) {
4929
10
        if (ctxt->sax->ignorableWhitespace != NULL)
4930
10
      ctxt->sax->ignorableWhitespace(ctxt->userData,
4931
10
                                     buf, nbchar);
4932
127
    } else {
4933
127
        if (ctxt->sax->characters != NULL)
4934
127
      ctxt->sax->characters(ctxt->userData, buf, nbchar);
4935
127
        if ((ctxt->sax->characters !=
4936
127
             ctxt->sax->ignorableWhitespace) &&
4937
127
      (*ctxt->space == -1))
4938
27
      *ctxt->space = -2;
4939
127
    }
4940
137
      }
4941
271
      nbchar = 0;
4942
271
            SHRINK;
4943
271
  }
4944
51.3k
  cur = CUR_CHAR(l);
4945
51.3k
    }
4946
11.9k
    if (nbchar != 0) {
4947
5.47k
        buf[nbchar] = 0;
4948
  /*
4949
   * OK the segment is to be consumed as chars.
4950
   */
4951
5.47k
  if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4952
3.24k
      if (areBlanks(ctxt, buf, nbchar, 0)) {
4953
462
    if (ctxt->sax->ignorableWhitespace != NULL)
4954
462
        ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4955
2.77k
      } else {
4956
2.77k
    if (ctxt->sax->characters != NULL)
4957
2.77k
        ctxt->sax->characters(ctxt->userData, buf, nbchar);
4958
2.77k
    if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4959
2.77k
        (*ctxt->space == -1))
4960
378
        *ctxt->space = -2;
4961
2.77k
      }
4962
3.24k
  }
4963
5.47k
    }
4964
    /*
4965
     * cur == 0 can mean
4966
     *
4967
     * - End of buffer.
4968
     * - An actual 0 character.
4969
     * - An incomplete UTF-8 sequence. This is allowed if partial is set.
4970
     */
4971
11.9k
    if (ctxt->input->cur < ctxt->input->end) {
4972
10.6k
        if ((cur == 0) && (CUR != 0)) {
4973
7
            if (partial == 0) {
4974
7
                xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4975
7
                        "Incomplete UTF-8 sequence starting with %02X\n", CUR);
4976
7
                NEXTL(1);
4977
7
            }
4978
10.6k
        } else if ((cur != '<') && (cur != '&')) {
4979
            /* Generate the error and skip the offending character */
4980
5.39k
            xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4981
5.39k
                              "PCDATA invalid Char value %d\n", cur);
4982
5.39k
            NEXTL(l);
4983
5.39k
        }
4984
10.6k
    }
4985
11.9k
}
4986
4987
/**
4988
 * xmlParseCharData:
4989
 * @ctxt:  an XML parser context
4990
 * @cdata:  unused
4991
 *
4992
 * DEPRECATED: Internal function, don't use.
4993
 */
4994
void
4995
0
xmlParseCharData(xmlParserCtxtPtr ctxt, ATTRIBUTE_UNUSED int cdata) {
4996
0
    xmlParseCharDataInternal(ctxt, 0);
4997
0
}
4998
4999
/**
5000
 * xmlParseExternalID:
5001
 * @ctxt:  an XML parser context
5002
 * @publicID:  a xmlChar** receiving PubidLiteral
5003
 * @strict: indicate whether we should restrict parsing to only
5004
 *          production [75], see NOTE below
5005
 *
5006
 * DEPRECATED: Internal function, don't use.
5007
 *
5008
 * Parse an External ID or a Public ID
5009
 *
5010
 * NOTE: Productions [75] and [83] interact badly since [75] can generate
5011
 *       'PUBLIC' S PubidLiteral S SystemLiteral
5012
 *
5013
 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
5014
 *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
5015
 *
5016
 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
5017
 *
5018
 * Returns the function returns SystemLiteral and in the second
5019
 *                case publicID receives PubidLiteral, is strict is off
5020
 *                it is possible to return NULL and have publicID set.
5021
 */
5022
5023
xmlChar *
5024
19.5k
xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
5025
19.5k
    xmlChar *URI = NULL;
5026
5027
19.5k
    *publicID = NULL;
5028
19.5k
    if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
5029
3.31k
        SKIP(6);
5030
3.31k
  if (SKIP_BLANKS == 0) {
5031
2.94k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5032
2.94k
                     "Space required after 'SYSTEM'\n");
5033
2.94k
  }
5034
3.31k
  URI = xmlParseSystemLiteral(ctxt);
5035
3.31k
  if (URI == NULL) {
5036
531
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
5037
531
        }
5038
16.2k
    } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
5039
3.73k
        SKIP(6);
5040
3.73k
  if (SKIP_BLANKS == 0) {
5041
2.90k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5042
2.90k
        "Space required after 'PUBLIC'\n");
5043
2.90k
  }
5044
3.73k
  *publicID = xmlParsePubidLiteral(ctxt);
5045
3.73k
  if (*publicID == NULL) {
5046
651
      xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
5047
651
  }
5048
3.73k
  if (strict) {
5049
      /*
5050
       * We don't handle [83] so "S SystemLiteral" is required.
5051
       */
5052
1.35k
      if (SKIP_BLANKS == 0) {
5053
1.15k
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5054
1.15k
      "Space required after the Public Identifier\n");
5055
1.15k
      }
5056
2.38k
  } else {
5057
      /*
5058
       * We handle [83] so we return immediately, if
5059
       * "S SystemLiteral" is not detected. We skip blanks if no
5060
             * system literal was found, but this is harmless since we must
5061
             * be at the end of a NotationDecl.
5062
       */
5063
2.38k
      if (SKIP_BLANKS == 0) return(NULL);
5064
675
      if ((CUR != '\'') && (CUR != '"')) return(NULL);
5065
675
  }
5066
1.83k
  URI = xmlParseSystemLiteral(ctxt);
5067
1.83k
  if (URI == NULL) {
5068
1.29k
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
5069
1.29k
        }
5070
1.83k
    }
5071
17.6k
    return(URI);
5072
19.5k
}
5073
5074
/**
5075
 * xmlParseCommentComplex:
5076
 * @ctxt:  an XML parser context
5077
 * @buf:  the already parsed part of the buffer
5078
 * @len:  number of bytes in the buffer
5079
 * @size:  allocated size of the buffer
5080
 *
5081
 * Skip an XML (SGML) comment <!-- .... -->
5082
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
5083
 *  must not occur within comments. "
5084
 * This is the slow routine in case the accelerator for ascii didn't work
5085
 *
5086
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
5087
 */
5088
static void
5089
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
5090
12.1k
                       size_t len, size_t size) {
5091
12.1k
    int q, ql;
5092
12.1k
    int r, rl;
5093
12.1k
    int cur, l;
5094
12.1k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5095
0
                       XML_MAX_HUGE_LENGTH :
5096
12.1k
                       XML_MAX_TEXT_LENGTH;
5097
5098
12.1k
    if (buf == NULL) {
5099
4.94k
        len = 0;
5100
4.94k
  size = XML_PARSER_BUFFER_SIZE;
5101
4.94k
  buf = (xmlChar *) xmlMallocAtomic(size);
5102
4.94k
  if (buf == NULL) {
5103
0
      xmlErrMemory(ctxt);
5104
0
      return;
5105
0
  }
5106
4.94k
    }
5107
12.1k
    q = CUR_CHAR(ql);
5108
12.1k
    if (q == 0)
5109
7.85k
        goto not_terminated;
5110
4.31k
    if (!IS_CHAR(q)) {
5111
101
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
5112
101
                          "xmlParseComment: invalid xmlChar value %d\n",
5113
101
                    q);
5114
101
  xmlFree (buf);
5115
101
  return;
5116
101
    }
5117
4.20k
    NEXTL(ql);
5118
4.20k
    r = CUR_CHAR(rl);
5119
4.20k
    if (r == 0)
5120
194
        goto not_terminated;
5121
4.01k
    if (!IS_CHAR(r)) {
5122
136
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
5123
136
                          "xmlParseComment: invalid xmlChar value %d\n",
5124
136
                    r);
5125
136
  xmlFree (buf);
5126
136
  return;
5127
136
    }
5128
3.87k
    NEXTL(rl);
5129
3.87k
    cur = CUR_CHAR(l);
5130
3.87k
    if (cur == 0)
5131
202
        goto not_terminated;
5132
48.2k
    while (IS_CHAR(cur) && /* checked */
5133
48.2k
           ((cur != '>') ||
5134
47.0k
      (r != '-') || (q != '-'))) {
5135
44.5k
  if ((r == '-') && (q == '-')) {
5136
505
      xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
5137
505
  }
5138
44.5k
  if (len + 5 >= size) {
5139
541
      xmlChar *new_buf;
5140
541
            size_t new_size;
5141
5142
541
      new_size = size * 2;
5143
541
      new_buf = (xmlChar *) xmlRealloc(buf, new_size);
5144
541
      if (new_buf == NULL) {
5145
0
    xmlFree (buf);
5146
0
    xmlErrMemory(ctxt);
5147
0
    return;
5148
0
      }
5149
541
      buf = new_buf;
5150
541
            size = new_size;
5151
541
  }
5152
44.5k
  COPY_BUF(buf, len, q);
5153
44.5k
        if (len > maxLength) {
5154
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5155
0
                         "Comment too big found", NULL);
5156
0
            xmlFree (buf);
5157
0
            return;
5158
0
        }
5159
5160
44.5k
  q = r;
5161
44.5k
  ql = rl;
5162
44.5k
  r = cur;
5163
44.5k
  rl = l;
5164
5165
44.5k
  NEXTL(l);
5166
44.5k
  cur = CUR_CHAR(l);
5167
5168
44.5k
    }
5169
3.67k
    buf[len] = 0;
5170
3.67k
    if (cur == 0) {
5171
1.05k
  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5172
1.05k
                       "Comment not terminated \n<!--%.50s\n", buf);
5173
2.62k
    } else if (!IS_CHAR(cur)) {
5174
141
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
5175
141
                          "xmlParseComment: invalid xmlChar value %d\n",
5176
141
                    cur);
5177
2.48k
    } else {
5178
2.48k
        NEXT;
5179
2.48k
  if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5180
2.48k
      (!ctxt->disableSAX))
5181
2.25k
      ctxt->sax->comment(ctxt->userData, buf);
5182
2.48k
    }
5183
3.67k
    xmlFree(buf);
5184
3.67k
    return;
5185
8.25k
not_terminated:
5186
8.25k
    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5187
8.25k
       "Comment not terminated\n", NULL);
5188
8.25k
    xmlFree(buf);
5189
8.25k
    return;
5190
3.67k
}
5191
5192
/**
5193
 * xmlParseComment:
5194
 * @ctxt:  an XML parser context
5195
 *
5196
 * DEPRECATED: Internal function, don't use.
5197
 *
5198
 * Parse an XML (SGML) comment. Always consumes '<!'.
5199
 *
5200
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
5201
 *  must not occur within comments. "
5202
 *
5203
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
5204
 */
5205
void
5206
15.1k
xmlParseComment(xmlParserCtxtPtr ctxt) {
5207
15.1k
    xmlChar *buf = NULL;
5208
15.1k
    size_t size = XML_PARSER_BUFFER_SIZE;
5209
15.1k
    size_t len = 0;
5210
15.1k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5211
0
                       XML_MAX_HUGE_LENGTH :
5212
15.1k
                       XML_MAX_TEXT_LENGTH;
5213
15.1k
    const xmlChar *in;
5214
15.1k
    size_t nbchar = 0;
5215
15.1k
    int ccol;
5216
5217
    /*
5218
     * Check that there is a comment right here.
5219
     */
5220
15.1k
    if ((RAW != '<') || (NXT(1) != '!'))
5221
0
        return;
5222
15.1k
    SKIP(2);
5223
15.1k
    if ((RAW != '-') || (NXT(1) != '-'))
5224
4
        return;
5225
15.1k
    SKIP(2);
5226
15.1k
    GROW;
5227
5228
    /*
5229
     * Accelerated common case where input don't need to be
5230
     * modified before passing it to the handler.
5231
     */
5232
15.1k
    in = ctxt->input->cur;
5233
15.1k
    do {
5234
15.1k
  if (*in == 0xA) {
5235
417
      do {
5236
417
    ctxt->input->line++; ctxt->input->col = 1;
5237
417
    in++;
5238
417
      } while (*in == 0xA);
5239
221
  }
5240
26.3k
get_more:
5241
26.3k
        ccol = ctxt->input->col;
5242
99.8k
  while (((*in > '-') && (*in <= 0x7F)) ||
5243
99.8k
         ((*in >= 0x20) && (*in < '-')) ||
5244
99.8k
         (*in == 0x09)) {
5245
73.4k
        in++;
5246
73.4k
        ccol++;
5247
73.4k
  }
5248
26.3k
  ctxt->input->col = ccol;
5249
26.3k
  if (*in == 0xA) {
5250
412
      do {
5251
412
    ctxt->input->line++; ctxt->input->col = 1;
5252
412
    in++;
5253
412
      } while (*in == 0xA);
5254
218
      goto get_more;
5255
218
  }
5256
26.1k
  nbchar = in - ctxt->input->cur;
5257
  /*
5258
   * save current set of data
5259
   */
5260
26.1k
  if (nbchar > 0) {
5261
15.2k
            if (buf == NULL) {
5262
8.30k
                if ((*in == '-') && (in[1] == '-'))
5263
1.25k
                    size = nbchar + 1;
5264
7.05k
                else
5265
7.05k
                    size = XML_PARSER_BUFFER_SIZE + nbchar;
5266
8.30k
                buf = (xmlChar *) xmlMallocAtomic(size);
5267
8.30k
                if (buf == NULL) {
5268
0
                    xmlErrMemory(ctxt);
5269
0
                    return;
5270
0
                }
5271
8.30k
                len = 0;
5272
8.30k
            } else if (len + nbchar + 1 >= size) {
5273
604
                xmlChar *new_buf;
5274
604
                size  += len + nbchar + XML_PARSER_BUFFER_SIZE;
5275
604
                new_buf = (xmlChar *) xmlRealloc(buf, size);
5276
604
                if (new_buf == NULL) {
5277
0
                    xmlFree (buf);
5278
0
                    xmlErrMemory(ctxt);
5279
0
                    return;
5280
0
                }
5281
604
                buf = new_buf;
5282
604
            }
5283
15.2k
            memcpy(&buf[len], ctxt->input->cur, nbchar);
5284
15.2k
            len += nbchar;
5285
15.2k
            buf[len] = 0;
5286
15.2k
  }
5287
26.1k
        if (len > maxLength) {
5288
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5289
0
                         "Comment too big found", NULL);
5290
0
            xmlFree (buf);
5291
0
            return;
5292
0
        }
5293
26.1k
  ctxt->input->cur = in;
5294
26.1k
  if (*in == 0xA) {
5295
0
      in++;
5296
0
      ctxt->input->line++; ctxt->input->col = 1;
5297
0
  }
5298
26.1k
  if (*in == 0xD) {
5299
1.12k
      in++;
5300
1.12k
      if (*in == 0xA) {
5301
200
    ctxt->input->cur = in;
5302
200
    in++;
5303
200
    ctxt->input->line++; ctxt->input->col = 1;
5304
200
    goto get_more;
5305
200
      }
5306
929
      in--;
5307
929
  }
5308
25.9k
  SHRINK;
5309
25.9k
  GROW;
5310
25.9k
  in = ctxt->input->cur;
5311
25.9k
  if (*in == '-') {
5312
13.7k
      if (in[1] == '-') {
5313
8.11k
          if (in[2] == '>') {
5314
2.94k
        SKIP(3);
5315
2.94k
        if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5316
2.94k
            (!ctxt->disableSAX)) {
5317
2.24k
      if (buf != NULL)
5318
878
          ctxt->sax->comment(ctxt->userData, buf);
5319
1.36k
      else
5320
1.36k
          ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5321
2.24k
        }
5322
2.94k
        if (buf != NULL)
5323
1.08k
            xmlFree(buf);
5324
2.94k
        return;
5325
2.94k
    }
5326
5.16k
    if (buf != NULL) {
5327
1.28k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5328
1.28k
                          "Double hyphen within comment: "
5329
1.28k
                                      "<!--%.50s\n",
5330
1.28k
              buf);
5331
1.28k
    } else
5332
3.87k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5333
3.87k
                          "Double hyphen within comment\n", NULL);
5334
5.16k
    in++;
5335
5.16k
    ctxt->input->col++;
5336
5.16k
      }
5337
10.8k
      in++;
5338
10.8k
      ctxt->input->col++;
5339
10.8k
      goto get_more;
5340
13.7k
  }
5341
25.9k
    } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5342
12.1k
    xmlParseCommentComplex(ctxt, buf, len, size);
5343
12.1k
    return;
5344
15.1k
}
5345
5346
5347
/**
5348
 * xmlParsePITarget:
5349
 * @ctxt:  an XML parser context
5350
 *
5351
 * DEPRECATED: Internal function, don't use.
5352
 *
5353
 * parse the name of a PI
5354
 *
5355
 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5356
 *
5357
 * Returns the PITarget name or NULL
5358
 */
5359
5360
const xmlChar *
5361
25.9k
xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5362
25.9k
    const xmlChar *name;
5363
5364
25.9k
    name = xmlParseName(ctxt);
5365
25.9k
    if ((name != NULL) &&
5366
25.9k
        ((name[0] == 'x') || (name[0] == 'X')) &&
5367
25.9k
        ((name[1] == 'm') || (name[1] == 'M')) &&
5368
25.9k
        ((name[2] == 'l') || (name[2] == 'L'))) {
5369
1.30k
  int i;
5370
1.30k
  if ((name[0] == 'x') && (name[1] == 'm') &&
5371
1.30k
      (name[2] == 'l') && (name[3] == 0)) {
5372
200
      xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5373
200
     "XML declaration allowed only at the start of the document\n");
5374
200
      return(name);
5375
1.10k
  } else if (name[3] == 0) {
5376
634
      xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5377
634
      return(name);
5378
634
  }
5379
1.22k
  for (i = 0;;i++) {
5380
1.22k
      if (xmlW3CPIs[i] == NULL) break;
5381
948
      if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5382
194
          return(name);
5383
948
  }
5384
280
  xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5385
280
          "xmlParsePITarget: invalid name prefix 'xml'\n",
5386
280
          NULL, NULL);
5387
280
    }
5388
24.8k
    if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5389
1.24k
  xmlNsErr(ctxt, XML_NS_ERR_COLON,
5390
1.24k
     "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5391
1.24k
    }
5392
24.8k
    return(name);
5393
25.9k
}
5394
5395
#ifdef LIBXML_CATALOG_ENABLED
5396
/**
5397
 * xmlParseCatalogPI:
5398
 * @ctxt:  an XML parser context
5399
 * @catalog:  the PI value string
5400
 *
5401
 * parse an XML Catalog Processing Instruction.
5402
 *
5403
 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5404
 *
5405
 * Occurs only if allowed by the user and if happening in the Misc
5406
 * part of the document before any doctype information
5407
 * This will add the given catalog to the parsing context in order
5408
 * to be used if there is a resolution need further down in the document
5409
 */
5410
5411
static void
5412
597
xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5413
597
    xmlChar *URL = NULL;
5414
597
    const xmlChar *tmp, *base;
5415
597
    xmlChar marker;
5416
5417
597
    tmp = catalog;
5418
597
    while (IS_BLANK_CH(*tmp)) tmp++;
5419
597
    if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5420
132
  goto error;
5421
465
    tmp += 7;
5422
468
    while (IS_BLANK_CH(*tmp)) tmp++;
5423
465
    if (*tmp != '=') {
5424
84
  return;
5425
84
    }
5426
381
    tmp++;
5427
445
    while (IS_BLANK_CH(*tmp)) tmp++;
5428
381
    marker = *tmp;
5429
381
    if ((marker != '\'') && (marker != '"'))
5430
74
  goto error;
5431
307
    tmp++;
5432
307
    base = tmp;
5433
789
    while ((*tmp != 0) && (*tmp != marker)) tmp++;
5434
307
    if (*tmp == 0)
5435
103
  goto error;
5436
204
    URL = xmlStrndup(base, tmp - base);
5437
204
    tmp++;
5438
423
    while (IS_BLANK_CH(*tmp)) tmp++;
5439
204
    if (*tmp != 0)
5440
52
  goto error;
5441
5442
152
    if (URL != NULL) {
5443
        /*
5444
         * Unfortunately, the catalog API doesn't report OOM errors.
5445
         * xmlGetLastError isn't very helpful since we don't know
5446
         * where the last error came from. We'd have to reset it
5447
         * before this call and restore it afterwards.
5448
         */
5449
152
  ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5450
152
  xmlFree(URL);
5451
152
    }
5452
152
    return;
5453
5454
361
error:
5455
361
    xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5456
361
            "Catalog PI syntax error: %s\n",
5457
361
      catalog, NULL);
5458
361
    if (URL != NULL)
5459
52
  xmlFree(URL);
5460
361
}
5461
#endif
5462
5463
/**
5464
 * xmlP