Coverage Report

Created: 2024-02-25 06:11

/src/libprotobuf-mutator/build/examples/libxml2/external.libxml2/src/external.libxml2/parser.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
 *            implemented on top of the SAX interfaces
4
 *
5
 * References:
6
 *   The XML specification:
7
 *     http://www.w3.org/TR/REC-xml
8
 *   Original 1.0 version:
9
 *     http://www.w3.org/TR/1998/REC-xml-19980210
10
 *   XML second edition working draft
11
 *     http://www.w3.org/TR/2000/WD-xml-2e-20000814
12
 *
13
 * Okay this is a big file, the parser core is around 7000 lines, then it
14
 * is followed by the progressive parser top routines, then the various
15
 * high level APIs to call the parser and a few miscellaneous functions.
16
 * A number of helper functions and deprecated ones have been moved to
17
 * parserInternals.c to reduce this file size.
18
 * As much as possible the functions are associated with their relative
19
 * production in the XML specification. A few productions defining the
20
 * different ranges of character are actually implanted either in
21
 * parserInternals.h or parserInternals.c
22
 * The DOM tree build is realized from the default SAX callbacks in
23
 * the module SAX.c.
24
 * The routines doing the validation checks are in valid.c and called either
25
 * from the SAX callbacks or as standalone functions using a preparsed
26
 * document.
27
 *
28
 * See Copyright for the status of this software.
29
 *
30
 * daniel@veillard.com
31
 */
32
33
/* To avoid EBCDIC trouble when parsing on zOS */
34
#if defined(__MVS__)
35
#pragma convert("ISO8859-1")
36
#endif
37
38
#define IN_LIBXML
39
#include "libxml.h"
40
41
#if defined(_WIN32)
42
#define XML_DIR_SEP '\\'
43
#else
44
#define XML_DIR_SEP '/'
45
#endif
46
47
#include <stdlib.h>
48
#include <limits.h>
49
#include <string.h>
50
#include <stdarg.h>
51
#include <stddef.h>
52
#include <ctype.h>
53
#include <stdlib.h>
54
#include <libxml/parser.h>
55
#include <libxml/xmlmemory.h>
56
#include <libxml/tree.h>
57
#include <libxml/parserInternals.h>
58
#include <libxml/valid.h>
59
#include <libxml/entities.h>
60
#include <libxml/xmlerror.h>
61
#include <libxml/encoding.h>
62
#include <libxml/xmlIO.h>
63
#include <libxml/uri.h>
64
#include <libxml/SAX2.h>
65
#ifdef LIBXML_CATALOG_ENABLED
66
#include <libxml/catalog.h>
67
#endif
68
69
#include "private/buf.h"
70
#include "private/dict.h"
71
#include "private/entities.h"
72
#include "private/error.h"
73
#include "private/html.h"
74
#include "private/io.h"
75
#include "private/parser.h"
76
77
45.4k
#define NS_INDEX_EMPTY  INT_MAX
78
24.2k
#define NS_INDEX_XML    (INT_MAX - 1)
79
13.0k
#define URI_HASH_EMPTY  0xD943A04E
80
1.83k
#define URI_HASH_XML    0xF0451F02
81
82
struct _xmlStartTag {
83
    const xmlChar *prefix;
84
    const xmlChar *URI;
85
    int line;
86
    int nsNr;
87
};
88
89
typedef struct {
90
    void *saxData;
91
    unsigned prefixHashValue;
92
    unsigned uriHashValue;
93
    unsigned elementId;
94
    int oldIndex;
95
} xmlParserNsExtra;
96
97
typedef struct {
98
    unsigned hashValue;
99
    int index;
100
} xmlParserNsBucket;
101
102
struct _xmlParserNsData {
103
    xmlParserNsExtra *extra;
104
105
    unsigned hashSize;
106
    unsigned hashElems;
107
    xmlParserNsBucket *hash;
108
109
    unsigned elementId;
110
    int defaultNsIndex;
111
    int minNsIndex;
112
};
113
114
struct _xmlAttrHashBucket {
115
    int index;
116
};
117
118
static int
119
xmlParseElementStart(xmlParserCtxtPtr ctxt);
120
121
static void
122
xmlParseElementEnd(xmlParserCtxtPtr ctxt);
123
124
static xmlEntityPtr
125
xmlLookupGeneralEntity(xmlParserCtxtPtr ctxt, const xmlChar *name, int inAttr);
126
127
static const xmlChar *
128
xmlParseEntityRefInternal(xmlParserCtxtPtr ctxt);
129
130
/************************************************************************
131
 *                  *
132
 *  Arbitrary limits set in the parser. See XML_PARSE_HUGE    *
133
 *                  *
134
 ************************************************************************/
135
136
#define XML_PARSER_BIG_ENTITY 1000
137
#define XML_PARSER_LOT_ENTITY 5000
138
139
/*
140
 * Constants for protection against abusive entity expansion
141
 * ("billion laughs").
142
 */
143
144
/*
145
 * A certain amount of entity expansion which is always allowed.
146
 */
147
130k
#define XML_PARSER_ALLOWED_EXPANSION 1000000
148
149
/*
150
 * Fixed cost for each entity reference. This crudely models processing time
151
 * as well to protect, for example, against exponential expansion of empty
152
 * or very short entities.
153
 */
154
134k
#define XML_ENT_FIXED_COST 20
155
156
/**
157
 * xmlParserMaxDepth:
158
 *
159
 * arbitrary depth limit for the XML documents that we allow to
160
 * process. This is not a limitation of the parser but a safety
161
 * boundary feature. It can be disabled with the XML_PARSE_HUGE
162
 * parser option.
163
 */
164
const unsigned int xmlParserMaxDepth = 256;
165
166
167
168
58.7k
#define XML_PARSER_BIG_BUFFER_SIZE 300
169
56.8k
#define XML_PARSER_BUFFER_SIZE 100
170
23.4k
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
171
172
/**
173
 * XML_PARSER_CHUNK_SIZE
174
 *
175
 * When calling GROW that's the minimal amount of data
176
 * the parser expected to have received. It is not a hard
177
 * limit but an optimization when reading strings like Names
178
 * It is not strictly needed as long as inputs available characters
179
 * are followed by 0, which should be provided by the I/O level
180
 */
181
#define XML_PARSER_CHUNK_SIZE 100
182
183
/**
184
 * xmlParserVersion:
185
 *
186
 * Constant string describing the internal version of the library
187
 */
188
const char *const
189
xmlParserVersion = LIBXML_VERSION_STRING LIBXML_VERSION_EXTRA;
190
191
/*
192
 * List of XML prefixed PI allowed by W3C specs
193
 */
194
195
static const char* const xmlW3CPIs[] = {
196
    "xml-stylesheet",
197
    "xml-model",
198
    NULL
199
};
200
201
202
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
203
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
204
                                              const xmlChar **str);
205
206
static void
207
xmlCtxtParseEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr ent);
208
209
static int
210
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
211
212
/************************************************************************
213
 *                  *
214
 *    Some factorized error routines        *
215
 *                  *
216
 ************************************************************************/
217
218
static void
219
0
xmlErrMemory(xmlParserCtxtPtr ctxt) {
220
0
    xmlCtxtErrMemory(ctxt);
221
0
}
222
223
/**
224
 * xmlErrAttributeDup:
225
 * @ctxt:  an XML parser context
226
 * @prefix:  the attribute prefix
227
 * @localname:  the attribute localname
228
 *
229
 * Handle a redefinition of attribute error
230
 */
231
static void
232
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
233
                   const xmlChar * localname)
234
4.84k
{
235
4.84k
    if (prefix == NULL)
236
4.59k
        xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, XML_ERR_ATTRIBUTE_REDEFINED,
237
4.59k
                   XML_ERR_FATAL, localname, NULL, NULL, 0,
238
4.59k
                   "Attribute %s redefined\n", localname);
239
243
    else
240
243
        xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, XML_ERR_ATTRIBUTE_REDEFINED,
241
243
                   XML_ERR_FATAL, prefix, localname, NULL, 0,
242
243
                   "Attribute %s:%s redefined\n", prefix, localname);
243
4.84k
}
244
245
/**
246
 * xmlFatalErrMsg:
247
 * @ctxt:  an XML parser context
248
 * @error:  the error number
249
 * @msg:  the error message
250
 *
251
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
252
 */
253
static void LIBXML_ATTR_FORMAT(3,0)
254
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
255
               const char *msg)
256
288k
{
257
288k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
258
288k
               NULL, NULL, NULL, 0, "%s", msg);
259
288k
}
260
261
/**
262
 * xmlWarningMsg:
263
 * @ctxt:  an XML parser context
264
 * @error:  the error number
265
 * @msg:  the error message
266
 * @str1:  extra data
267
 * @str2:  extra data
268
 *
269
 * Handle a warning.
270
 */
271
void LIBXML_ATTR_FORMAT(3,0)
272
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
273
              const char *msg, const xmlChar *str1, const xmlChar *str2)
274
4.75k
{
275
4.75k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_WARNING,
276
4.75k
               str1, str2, NULL, 0, msg, str1, str2);
277
4.75k
}
278
279
/**
280
 * xmlValidityError:
281
 * @ctxt:  an XML parser context
282
 * @error:  the error number
283
 * @msg:  the error message
284
 * @str1:  extra data
285
 *
286
 * Handle a validity error.
287
 */
288
static void LIBXML_ATTR_FORMAT(3,0)
289
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
290
              const char *msg, const xmlChar *str1, const xmlChar *str2)
291
1.57k
{
292
1.57k
    ctxt->valid = 0;
293
294
1.57k
    xmlCtxtErr(ctxt, NULL, XML_FROM_DTD, error, XML_ERR_ERROR,
295
1.57k
               str1, str2, NULL, 0, msg, str1, str2);
296
1.57k
}
297
298
/**
299
 * xmlFatalErrMsgInt:
300
 * @ctxt:  an XML parser context
301
 * @error:  the error number
302
 * @msg:  the error message
303
 * @val:  an integer value
304
 *
305
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
306
 */
307
static void LIBXML_ATTR_FORMAT(3,0)
308
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
309
                  const char *msg, int val)
310
11.2k
{
311
11.2k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
312
11.2k
               NULL, NULL, NULL, val, msg, val);
313
11.2k
}
314
315
/**
316
 * xmlFatalErrMsgStrIntStr:
317
 * @ctxt:  an XML parser context
318
 * @error:  the error number
319
 * @msg:  the error message
320
 * @str1:  an string info
321
 * @val:  an integer value
322
 * @str2:  an string info
323
 *
324
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
325
 */
326
static void LIBXML_ATTR_FORMAT(3,0)
327
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
328
                  const char *msg, const xmlChar *str1, int val,
329
      const xmlChar *str2)
330
107k
{
331
107k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
332
107k
               str1, str2, NULL, val, msg, str1, val, str2);
333
107k
}
334
335
/**
336
 * xmlFatalErrMsgStr:
337
 * @ctxt:  an XML parser context
338
 * @error:  the error number
339
 * @msg:  the error message
340
 * @val:  a string value
341
 *
342
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
343
 */
344
static void LIBXML_ATTR_FORMAT(3,0)
345
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
346
                  const char *msg, const xmlChar * val)
347
77.2k
{
348
77.2k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
349
77.2k
               val, NULL, NULL, 0, msg, val);
350
77.2k
}
351
352
/**
353
 * xmlErrMsgStr:
354
 * @ctxt:  an XML parser context
355
 * @error:  the error number
356
 * @msg:  the error message
357
 * @val:  a string value
358
 *
359
 * Handle a non fatal parser error
360
 */
361
static void LIBXML_ATTR_FORMAT(3,0)
362
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
363
                  const char *msg, const xmlChar * val)
364
13.9k
{
365
13.9k
    xmlCtxtErr(ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_ERROR,
366
13.9k
               val, NULL, NULL, 0, msg, val);
367
13.9k
}
368
369
/**
370
 * xmlNsErr:
371
 * @ctxt:  an XML parser context
372
 * @error:  the error number
373
 * @msg:  the message
374
 * @info1:  extra information string
375
 * @info2:  extra information string
376
 *
377
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
378
 */
379
static void LIBXML_ATTR_FORMAT(3,0)
380
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
381
         const char *msg,
382
         const xmlChar * info1, const xmlChar * info2,
383
         const xmlChar * info3)
384
40.4k
{
385
40.4k
    ctxt->nsWellFormed = 0;
386
387
40.4k
    xmlCtxtErr(ctxt, NULL, XML_FROM_NAMESPACE, error, XML_ERR_ERROR,
388
40.4k
               info1, info2, info3, 0, msg, info1, info2, info3);
389
40.4k
}
390
391
/**
392
 * xmlNsWarn
393
 * @ctxt:  an XML parser context
394
 * @error:  the error number
395
 * @msg:  the message
396
 * @info1:  extra information string
397
 * @info2:  extra information string
398
 *
399
 * Handle a namespace warning error
400
 */
401
static void LIBXML_ATTR_FORMAT(3,0)
402
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
403
         const char *msg,
404
         const xmlChar * info1, const xmlChar * info2,
405
         const xmlChar * info3)
406
770
{
407
770
    xmlCtxtErr(ctxt, NULL, XML_FROM_NAMESPACE, error, XML_ERR_WARNING,
408
770
               info1, info2, info3, 0, msg, info1, info2, info3);
409
770
}
410
411
static void
412
401k
xmlSaturatedAdd(unsigned long *dst, unsigned long val) {
413
401k
    if (val > ULONG_MAX - *dst)
414
0
        *dst = ULONG_MAX;
415
401k
    else
416
401k
        *dst += val;
417
401k
}
418
419
static void
420
132k
xmlSaturatedAddSizeT(unsigned long *dst, unsigned long val) {
421
132k
    if (val > ULONG_MAX - *dst)
422
0
        *dst = ULONG_MAX;
423
132k
    else
424
132k
        *dst += val;
425
132k
}
426
427
/**
428
 * xmlParserEntityCheck:
429
 * @ctxt:  parser context
430
 * @extra:  sum of unexpanded entity sizes
431
 *
432
 * Check for non-linear entity expansion behaviour.
433
 *
434
 * In some cases like xmlExpandEntityInAttValue, this function is called
435
 * for each, possibly nested entity and its unexpanded content length.
436
 *
437
 * In other cases like xmlParseReference, it's only called for each
438
 * top-level entity with its unexpanded content length plus the sum of
439
 * the unexpanded content lengths (plus fixed cost) of all nested
440
 * entities.
441
 *
442
 * Summing the unexpanded lengths also adds the length of the reference.
443
 * This is by design. Taking the length of the entity name into account
444
 * discourages attacks that try to waste CPU time with abusively long
445
 * entity names. See test/recurse/lol6.xml for example. Each call also
446
 * adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with
447
 * short entities.
448
 *
449
 * Returns 1 on error, 0 on success.
450
 */
451
static int
452
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long extra)
453
173k
{
454
173k
    unsigned long consumed;
455
173k
    unsigned long *expandedSize;
456
173k
    xmlParserInputPtr input = ctxt->input;
457
173k
    xmlEntityPtr entity = input->entity;
458
459
173k
    if ((entity) && (entity->flags & XML_ENT_CHECKED))
460
43.5k
        return(0);
461
462
    /*
463
     * Compute total consumed bytes so far, including input streams of
464
     * external entities.
465
     */
466
130k
    consumed = input->consumed;
467
130k
    xmlSaturatedAddSizeT(&consumed, input->cur - input->base);
468
130k
    xmlSaturatedAdd(&consumed, ctxt->sizeentities);
469
470
130k
    if (entity)
471
4.05k
        expandedSize = &entity->expandedSize;
472
126k
    else
473
126k
        expandedSize = &ctxt->sizeentcopy;
474
475
    /*
476
     * Add extra cost and some fixed cost.
477
     */
478
130k
    xmlSaturatedAdd(expandedSize, extra);
479
130k
    xmlSaturatedAdd(expandedSize, XML_ENT_FIXED_COST);
480
481
    /*
482
     * It's important to always use saturation arithmetic when tracking
483
     * entity sizes to make the size checks reliable. If "sizeentcopy"
484
     * overflows, we have to abort.
485
     */
486
130k
    if ((*expandedSize > XML_PARSER_ALLOWED_EXPANSION) &&
487
130k
        ((*expandedSize >= ULONG_MAX) ||
488
16
         (*expandedSize / ctxt->maxAmpl > consumed))) {
489
16
        xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
490
16
                       "Maximum entity amplification factor exceeded, see "
491
16
                       "xmlCtxtSetMaxAmplification.\n");
492
16
        xmlHaltParser(ctxt);
493
16
        return(1);
494
16
    }
495
496
130k
    return(0);
497
130k
}
498
499
/************************************************************************
500
 *                  *
501
 *    Library wide options          *
502
 *                  *
503
 ************************************************************************/
504
505
/**
506
  * xmlHasFeature:
507
  * @feature: the feature to be examined
508
  *
509
  * Examines if the library has been compiled with a given feature.
510
  *
511
  * Returns a non-zero value if the feature exist, otherwise zero.
512
  * Returns zero (0) if the feature does not exist or an unknown
513
  * unknown feature is requested, non-zero otherwise.
514
  */
515
int
516
xmlHasFeature(xmlFeature feature)
517
0
{
518
0
    switch (feature) {
519
0
  case XML_WITH_THREAD:
520
0
#ifdef LIBXML_THREAD_ENABLED
521
0
      return(1);
522
#else
523
      return(0);
524
#endif
525
0
        case XML_WITH_TREE:
526
0
#ifdef LIBXML_TREE_ENABLED
527
0
            return(1);
528
#else
529
            return(0);
530
#endif
531
0
        case XML_WITH_OUTPUT:
532
0
#ifdef LIBXML_OUTPUT_ENABLED
533
0
            return(1);
534
#else
535
            return(0);
536
#endif
537
0
        case XML_WITH_PUSH:
538
0
#ifdef LIBXML_PUSH_ENABLED
539
0
            return(1);
540
#else
541
            return(0);
542
#endif
543
0
        case XML_WITH_READER:
544
0
#ifdef LIBXML_READER_ENABLED
545
0
            return(1);
546
#else
547
            return(0);
548
#endif
549
0
        case XML_WITH_PATTERN:
550
0
#ifdef LIBXML_PATTERN_ENABLED
551
0
            return(1);
552
#else
553
            return(0);
554
#endif
555
0
        case XML_WITH_WRITER:
556
0
#ifdef LIBXML_WRITER_ENABLED
557
0
            return(1);
558
#else
559
            return(0);
560
#endif
561
0
        case XML_WITH_SAX1:
562
0
#ifdef LIBXML_SAX1_ENABLED
563
0
            return(1);
564
#else
565
            return(0);
566
#endif
567
0
        case XML_WITH_FTP:
568
#ifdef LIBXML_FTP_ENABLED
569
            return(1);
570
#else
571
0
            return(0);
572
0
#endif
573
0
        case XML_WITH_HTTP:
574
0
#ifdef LIBXML_HTTP_ENABLED
575
0
            return(1);
576
#else
577
            return(0);
578
#endif
579
0
        case XML_WITH_VALID:
580
0
#ifdef LIBXML_VALID_ENABLED
581
0
            return(1);
582
#else
583
            return(0);
584
#endif
585
0
        case XML_WITH_HTML:
586
0
#ifdef LIBXML_HTML_ENABLED
587
0
            return(1);
588
#else
589
            return(0);
590
#endif
591
0
        case XML_WITH_LEGACY:
592
#ifdef LIBXML_LEGACY_ENABLED
593
            return(1);
594
#else
595
0
            return(0);
596
0
#endif
597
0
        case XML_WITH_C14N:
598
0
#ifdef LIBXML_C14N_ENABLED
599
0
            return(1);
600
#else
601
            return(0);
602
#endif
603
0
        case XML_WITH_CATALOG:
604
0
#ifdef LIBXML_CATALOG_ENABLED
605
0
            return(1);
606
#else
607
            return(0);
608
#endif
609
0
        case XML_WITH_XPATH:
610
0
#ifdef LIBXML_XPATH_ENABLED
611
0
            return(1);
612
#else
613
            return(0);
614
#endif
615
0
        case XML_WITH_XPTR:
616
0
#ifdef LIBXML_XPTR_ENABLED
617
0
            return(1);
618
#else
619
            return(0);
620
#endif
621
0
        case XML_WITH_XINCLUDE:
622
0
#ifdef LIBXML_XINCLUDE_ENABLED
623
0
            return(1);
624
#else
625
            return(0);
626
#endif
627
0
        case XML_WITH_ICONV:
628
0
#ifdef LIBXML_ICONV_ENABLED
629
0
            return(1);
630
#else
631
            return(0);
632
#endif
633
0
        case XML_WITH_ISO8859X:
634
0
#ifdef LIBXML_ISO8859X_ENABLED
635
0
            return(1);
636
#else
637
            return(0);
638
#endif
639
0
        case XML_WITH_UNICODE:
640
0
#ifdef LIBXML_UNICODE_ENABLED
641
0
            return(1);
642
#else
643
            return(0);
644
#endif
645
0
        case XML_WITH_REGEXP:
646
0
#ifdef LIBXML_REGEXP_ENABLED
647
0
            return(1);
648
#else
649
            return(0);
650
#endif
651
0
        case XML_WITH_AUTOMATA:
652
0
#ifdef LIBXML_AUTOMATA_ENABLED
653
0
            return(1);
654
#else
655
            return(0);
656
#endif
657
0
        case XML_WITH_EXPR:
658
#ifdef LIBXML_EXPR_ENABLED
659
            return(1);
660
#else
661
0
            return(0);
662
0
#endif
663
0
        case XML_WITH_SCHEMAS:
664
0
#ifdef LIBXML_SCHEMAS_ENABLED
665
0
            return(1);
666
#else
667
            return(0);
668
#endif
669
0
        case XML_WITH_SCHEMATRON:
670
0
#ifdef LIBXML_SCHEMATRON_ENABLED
671
0
            return(1);
672
#else
673
            return(0);
674
#endif
675
0
        case XML_WITH_MODULES:
676
0
#ifdef LIBXML_MODULES_ENABLED
677
0
            return(1);
678
#else
679
            return(0);
680
#endif
681
0
        case XML_WITH_DEBUG:
682
0
#ifdef LIBXML_DEBUG_ENABLED
683
0
            return(1);
684
#else
685
            return(0);
686
#endif
687
0
        case XML_WITH_DEBUG_MEM:
688
#ifdef DEBUG_MEMORY_LOCATION
689
            return(1);
690
#else
691
0
            return(0);
692
0
#endif
693
0
        case XML_WITH_ZLIB:
694
0
#ifdef LIBXML_ZLIB_ENABLED
695
0
            return(1);
696
#else
697
            return(0);
698
#endif
699
0
        case XML_WITH_LZMA:
700
0
#ifdef LIBXML_LZMA_ENABLED
701
0
            return(1);
702
#else
703
            return(0);
704
#endif
705
0
        case XML_WITH_ICU:
706
#ifdef LIBXML_ICU_ENABLED
707
            return(1);
708
#else
709
0
            return(0);
710
0
#endif
711
0
        default:
712
0
      break;
713
0
     }
714
0
     return(0);
715
0
}
716
717
/************************************************************************
718
 *                  *
719
 *      Simple string buffer        *
720
 *                  *
721
 ************************************************************************/
722
723
typedef struct {
724
    xmlChar *mem;
725
    unsigned size;
726
    unsigned cap; /* size < cap */
727
    unsigned max; /* size <= max */
728
    xmlParserErrors code;
729
} xmlSBuf;
730
731
static void
732
104k
xmlSBufInit(xmlSBuf *buf, unsigned max) {
733
104k
    buf->mem = NULL;
734
104k
    buf->size = 0;
735
104k
    buf->cap = 0;
736
104k
    buf->max = max;
737
104k
    buf->code = XML_ERR_OK;
738
104k
}
739
740
static int
741
75.7k
xmlSBufGrow(xmlSBuf *buf, unsigned len) {
742
75.7k
    xmlChar *mem;
743
75.7k
    unsigned cap;
744
745
75.7k
    if (len >= UINT_MAX / 2 - buf->size) {
746
0
        buf->code = XML_ERR_RESOURCE_LIMIT;
747
0
        return(-1);
748
0
    }
749
750
75.7k
    cap = (buf->size + len) * 2;
751
75.7k
    if (cap < 240)
752
68.4k
        cap = 240;
753
754
75.7k
    mem = xmlRealloc(buf->mem, cap);
755
75.7k
    if (mem == NULL) {
756
0
        buf->code = XML_ERR_NO_MEMORY;
757
0
        return(-1);
758
0
    }
759
760
75.7k
    buf->mem = mem;
761
75.7k
    buf->cap = cap;
762
763
75.7k
    return(0);
764
75.7k
}
765
766
static void
767
878k
xmlSBufAddString(xmlSBuf *buf, const xmlChar *str, unsigned len) {
768
878k
    if (buf->max - buf->size < len) {
769
0
        buf->code = XML_ERR_RESOURCE_LIMIT;
770
0
        return;
771
0
    }
772
773
878k
    if (buf->cap - buf->size <= len) {
774
73.5k
        if (xmlSBufGrow(buf, len) < 0)
775
0
            return;
776
73.5k
    }
777
778
878k
    if (len > 0)
779
878k
        memcpy(buf->mem + buf->size, str, len);
780
878k
    buf->size += len;
781
878k
}
782
783
static void
784
286k
xmlSBufAddCString(xmlSBuf *buf, const char *str, unsigned len) {
785
286k
    xmlSBufAddString(buf, (const xmlChar *) str, len);
786
286k
}
787
788
static void
789
26.2k
xmlSBufAddChar(xmlSBuf *buf, int c) {
790
26.2k
    xmlChar *end;
791
792
26.2k
    if (buf->max - buf->size < 4) {
793
0
        buf->code = XML_ERR_RESOURCE_LIMIT;
794
0
        return;
795
0
    }
796
797
26.2k
    if (buf->cap - buf->size <= 4) {
798
2.20k
        if (xmlSBufGrow(buf, 4) < 0)
799
0
            return;
800
2.20k
    }
801
802
26.2k
    end = buf->mem + buf->size;
803
804
26.2k
    if (c < 0x80) {
805
18.6k
        *end = (xmlChar) c;
806
18.6k
        buf->size += 1;
807
18.6k
    } else {
808
7.66k
        buf->size += xmlCopyCharMultiByte(end, c);
809
7.66k
    }
810
26.2k
}
811
812
static void
813
21.0k
xmlSBufAddReplChar(xmlSBuf *buf) {
814
21.0k
    xmlSBufAddCString(buf, "\xEF\xBF\xBD", 3);
815
21.0k
}
816
817
static void
818
0
xmlSBufReportError(xmlSBuf *buf, xmlParserCtxtPtr ctxt, const char *errMsg) {
819
0
    if (buf->code == XML_ERR_NO_MEMORY)
820
0
        xmlCtxtErrMemory(ctxt);
821
0
    else
822
0
        xmlFatalErr(ctxt, buf->code, errMsg);
823
0
}
824
825
static xmlChar *
826
xmlSBufFinish(xmlSBuf *buf, int *sizeOut, xmlParserCtxtPtr ctxt,
827
75.7k
              const char *errMsg) {
828
75.7k
    if (buf->mem == NULL) {
829
10.6k
        buf->mem = xmlMalloc(1);
830
10.6k
        if (buf->mem == NULL) {
831
0
            buf->code = XML_ERR_NO_MEMORY;
832
10.6k
        } else {
833
10.6k
            buf->mem[0] = 0;
834
10.6k
        }
835
65.0k
    } else {
836
65.0k
        buf->mem[buf->size] = 0;
837
65.0k
    }
838
839
75.7k
    if (buf->code == XML_ERR_OK) {
840
75.7k
        if (sizeOut != NULL)
841
2.08k
            *sizeOut = buf->size;
842
75.7k
        return(buf->mem);
843
75.7k
    }
844
845
0
    xmlSBufReportError(buf, ctxt, errMsg);
846
847
0
    xmlFree(buf->mem);
848
849
0
    if (sizeOut != NULL)
850
0
        *sizeOut = 0;
851
0
    return(NULL);
852
75.7k
}
853
854
static void
855
24.2k
xmlSBufCleanup(xmlSBuf *buf, xmlParserCtxtPtr ctxt, const char *errMsg) {
856
24.2k
    if (buf->code != XML_ERR_OK)
857
0
        xmlSBufReportError(buf, ctxt, errMsg);
858
859
24.2k
    xmlFree(buf->mem);
860
24.2k
}
861
862
static int
863
xmlUTF8MultibyteLen(xmlParserCtxtPtr ctxt, const xmlChar *str,
864
216k
                    const char *errMsg) {
865
216k
    int c = str[0];
866
216k
    int c1 = str[1];
867
868
216k
    if ((c1 & 0xC0) != 0x80)
869
7.33k
        goto encoding_error;
870
871
209k
    if (c < 0xE0) {
872
        /* 2-byte sequence */
873
14.8k
        if (c < 0xC2)
874
6.32k
            goto encoding_error;
875
876
8.56k
        return(2);
877
194k
    } else {
878
194k
        int c2 = str[2];
879
880
194k
        if ((c2 & 0xC0) != 0x80)
881
145
            goto encoding_error;
882
883
193k
        if (c < 0xF0) {
884
            /* 3-byte sequence */
885
191k
            if (c == 0xE0) {
886
                /* overlong */
887
122k
                if (c1 < 0xA0)
888
88
                    goto encoding_error;
889
122k
            } else if (c == 0xED) {
890
                /* surrogate */
891
274
                if (c1 >= 0xA0)
892
46
                    goto encoding_error;
893
69.0k
            } else if (c == 0xEF) {
894
                /* U+FFFE and U+FFFF are invalid Chars */
895
64.7k
                if ((c1 == 0xBF) && (c2 >= 0xBE))
896
246
                    xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR, errMsg);
897
64.7k
            }
898
899
191k
            return(3);
900
191k
        } else {
901
            /* 4-byte sequence */
902
2.06k
            if ((str[3] & 0xC0) != 0x80)
903
73
                goto encoding_error;
904
1.98k
            if (c == 0xF0) {
905
                /* overlong */
906
395
                if (c1 < 0x90)
907
74
                    goto encoding_error;
908
1.59k
            } else if (c >= 0xF4) {
909
                /* greater than 0x10FFFF */
910
543
                if ((c > 0xF4) || (c1 >= 0x90))
911
152
                    goto encoding_error;
912
543
            }
913
914
1.76k
            return(4);
915
1.98k
        }
916
193k
    }
917
918
14.2k
encoding_error:
919
    /* Only report the first error */
920
14.2k
    if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
921
814
        xmlCtxtErrIO(ctxt, XML_ERR_INVALID_ENCODING, NULL);
922
814
        ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
923
814
    }
924
925
14.2k
    return(0);
926
209k
}
927
928
/************************************************************************
929
 *                  *
930
 *    SAX2 defaulted attributes handling      *
931
 *                  *
932
 ************************************************************************/
933
934
/**
935
 * xmlCtxtInitializeLate:
936
 * @ctxt:  an XML parser context
937
 *
938
 * Final initialization of the parser context before starting to parse.
939
 *
940
 * This accounts for users modifying struct members of parser context
941
 * directly.
942
 */
943
static void
944
18.1k
xmlCtxtInitializeLate(xmlParserCtxtPtr ctxt) {
945
18.1k
    xmlSAXHandlerPtr sax;
946
947
    /* Avoid unused variable warning if features are disabled. */
948
18.1k
    (void) sax;
949
950
    /*
951
     * Changing the SAX struct directly is still widespread practice
952
     * in internal and external code.
953
     */
954
18.1k
    if (ctxt == NULL) return;
955
18.1k
    sax = ctxt->sax;
956
18.1k
#ifdef LIBXML_SAX1_ENABLED
957
    /*
958
     * Only enable SAX2 if there SAX2 element handlers, except when there
959
     * are no element handlers at all.
960
     */
961
18.1k
    if (((ctxt->options & XML_PARSE_SAX1) == 0) &&
962
18.1k
        (sax) &&
963
18.1k
        (sax->initialized == XML_SAX2_MAGIC) &&
964
18.1k
        ((sax->startElementNs != NULL) ||
965
14.3k
         (sax->endElementNs != NULL) ||
966
14.3k
         ((sax->startElement == NULL) && (sax->endElement == NULL))))
967
14.3k
        ctxt->sax2 = 1;
968
#else
969
    ctxt->sax2 = 1;
970
#endif /* LIBXML_SAX1_ENABLED */
971
972
    /*
973
     * Some users replace the dictionary directly in the context struct.
974
     * We really need an API function to do that cleanly.
975
     */
976
18.1k
    ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
977
18.1k
    ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
978
18.1k
    ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
979
18.1k
    if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
980
18.1k
    (ctxt->str_xml_ns == NULL)) {
981
0
        xmlErrMemory(ctxt);
982
0
    }
983
18.1k
}
984
985
typedef struct {
986
    xmlHashedString prefix;
987
    xmlHashedString name;
988
    xmlHashedString value;
989
    const xmlChar *valueEnd;
990
    int external;
991
    int expandedSize;
992
} xmlDefAttr;
993
994
typedef struct _xmlDefAttrs xmlDefAttrs;
995
typedef xmlDefAttrs *xmlDefAttrsPtr;
996
struct _xmlDefAttrs {
997
    int nbAttrs;  /* number of defaulted attributes on that element */
998
    int maxAttrs;       /* the size of the array */
999
#if __STDC_VERSION__ >= 199901L
1000
    /* Using a C99 flexible array member avoids UBSan errors. */
1001
    xmlDefAttr attrs[]; /* array of localname/prefix/values/external */
1002
#else
1003
    xmlDefAttr attrs[1];
1004
#endif
1005
};
1006
1007
/**
1008
 * xmlAttrNormalizeSpace:
1009
 * @src: the source string
1010
 * @dst: the target string
1011
 *
1012
 * Normalize the space in non CDATA attribute values:
1013
 * If the attribute type is not CDATA, then the XML processor MUST further
1014
 * process the normalized attribute value by discarding any leading and
1015
 * trailing space (#x20) characters, and by replacing sequences of space
1016
 * (#x20) characters by a single space (#x20) character.
1017
 * Note that the size of dst need to be at least src, and if one doesn't need
1018
 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1019
 * passing src as dst is just fine.
1020
 *
1021
 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1022
 *         is needed.
1023
 */
1024
static xmlChar *
1025
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1026
19.6k
{
1027
19.6k
    if ((src == NULL) || (dst == NULL))
1028
0
        return(NULL);
1029
1030
27.3k
    while (*src == 0x20) src++;
1031
161k
    while (*src != 0) {
1032
141k
  if (*src == 0x20) {
1033
25.3k
      while (*src == 0x20) src++;
1034
12.0k
      if (*src != 0)
1035
10.7k
    *dst++ = 0x20;
1036
129k
  } else {
1037
129k
      *dst++ = *src++;
1038
129k
  }
1039
141k
    }
1040
19.6k
    *dst = 0;
1041
19.6k
    if (dst == src)
1042
11.6k
       return(NULL);
1043
7.99k
    return(dst);
1044
19.6k
}
1045
1046
/**
1047
 * xmlAddDefAttrs:
1048
 * @ctxt:  an XML parser context
1049
 * @fullname:  the element fullname
1050
 * @fullattr:  the attribute fullname
1051
 * @value:  the attribute value
1052
 *
1053
 * Add a defaulted attribute for an element
1054
 */
1055
static void
1056
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1057
               const xmlChar *fullname,
1058
               const xmlChar *fullattr,
1059
16.8k
               const xmlChar *value) {
1060
16.8k
    xmlDefAttrsPtr defaults;
1061
16.8k
    xmlDefAttr *attr;
1062
16.8k
    int len, expandedSize;
1063
16.8k
    xmlHashedString name;
1064
16.8k
    xmlHashedString prefix;
1065
16.8k
    xmlHashedString hvalue;
1066
16.8k
    const xmlChar *localname;
1067
1068
    /*
1069
     * Allows to detect attribute redefinitions
1070
     */
1071
16.8k
    if (ctxt->attsSpecial != NULL) {
1072
14.8k
        if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1073
12.4k
      return;
1074
14.8k
    }
1075
1076
4.38k
    if (ctxt->attsDefault == NULL) {
1077
2.01k
        ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1078
2.01k
  if (ctxt->attsDefault == NULL)
1079
0
      goto mem_error;
1080
2.01k
    }
1081
1082
    /*
1083
     * split the element name into prefix:localname , the string found
1084
     * are within the DTD and then not associated to namespace names.
1085
     */
1086
4.38k
    localname = xmlSplitQName3(fullname, &len);
1087
4.38k
    if (localname == NULL) {
1088
4.21k
        name = xmlDictLookupHashed(ctxt->dict, fullname, -1);
1089
4.21k
  prefix.name = NULL;
1090
4.21k
    } else {
1091
166
        name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1092
166
  prefix = xmlDictLookupHashed(ctxt->dict, fullname, len);
1093
166
        if (prefix.name == NULL)
1094
0
            goto mem_error;
1095
166
    }
1096
4.38k
    if (name.name == NULL)
1097
0
        goto mem_error;
1098
1099
    /*
1100
     * make sure there is some storage
1101
     */
1102
4.38k
    defaults = xmlHashLookup2(ctxt->attsDefault, name.name, prefix.name);
1103
4.38k
    if ((defaults == NULL) ||
1104
4.38k
        (defaults->nbAttrs >= defaults->maxAttrs)) {
1105
2.36k
        xmlDefAttrsPtr temp;
1106
2.36k
        int newSize;
1107
1108
2.36k
        newSize = (defaults != NULL) ? 2 * defaults->maxAttrs : 4;
1109
2.36k
        temp = xmlRealloc(defaults,
1110
2.36k
                          sizeof(*defaults) + newSize * sizeof(xmlDefAttr));
1111
2.36k
  if (temp == NULL)
1112
0
      goto mem_error;
1113
2.36k
        if (defaults == NULL)
1114
2.07k
            temp->nbAttrs = 0;
1115
2.36k
  temp->maxAttrs = newSize;
1116
2.36k
        defaults = temp;
1117
2.36k
  if (xmlHashUpdateEntry2(ctxt->attsDefault, name.name, prefix.name,
1118
2.36k
                          defaults, NULL) < 0) {
1119
0
      xmlFree(defaults);
1120
0
      goto mem_error;
1121
0
  }
1122
2.36k
    }
1123
1124
    /*
1125
     * Split the attribute name into prefix:localname , the string found
1126
     * are within the DTD and hen not associated to namespace names.
1127
     */
1128
4.38k
    localname = xmlSplitQName3(fullattr, &len);
1129
4.38k
    if (localname == NULL) {
1130
3.12k
        name = xmlDictLookupHashed(ctxt->dict, fullattr, -1);
1131
3.12k
  prefix.name = NULL;
1132
3.12k
    } else {
1133
1.25k
        name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1134
1.25k
  prefix = xmlDictLookupHashed(ctxt->dict, fullattr, len);
1135
1.25k
        if (prefix.name == NULL)
1136
0
            goto mem_error;
1137
1.25k
    }
1138
4.38k
    if (name.name == NULL)
1139
0
        goto mem_error;
1140
1141
    /* intern the string and precompute the end */
1142
4.38k
    len = strlen((const char *) value);
1143
4.38k
    hvalue = xmlDictLookupHashed(ctxt->dict, value, len);
1144
4.38k
    if (hvalue.name == NULL)
1145
0
        goto mem_error;
1146
1147
4.38k
    expandedSize = strlen((const char *) name.name);
1148
4.38k
    if (prefix.name != NULL)
1149
1.25k
        expandedSize += strlen((const char *) prefix.name);
1150
4.38k
    expandedSize += len;
1151
1152
4.38k
    attr = &defaults->attrs[defaults->nbAttrs++];
1153
4.38k
    attr->name = name;
1154
4.38k
    attr->prefix = prefix;
1155
4.38k
    attr->value = hvalue;
1156
4.38k
    attr->valueEnd = hvalue.name + len;
1157
4.38k
    attr->external = PARSER_EXTERNAL(ctxt);
1158
4.38k
    attr->expandedSize = expandedSize;
1159
1160
4.38k
    return;
1161
1162
0
mem_error:
1163
0
    xmlErrMemory(ctxt);
1164
0
    return;
1165
4.38k
}
1166
1167
/**
1168
 * xmlAddSpecialAttr:
1169
 * @ctxt:  an XML parser context
1170
 * @fullname:  the element fullname
1171
 * @fullattr:  the attribute fullname
1172
 * @type:  the attribute type
1173
 *
1174
 * Register this attribute type
1175
 */
1176
static void
1177
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1178
      const xmlChar *fullname,
1179
      const xmlChar *fullattr,
1180
      int type)
1181
26.8k
{
1182
26.8k
    if (ctxt->attsSpecial == NULL) {
1183
2.43k
        ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1184
2.43k
  if (ctxt->attsSpecial == NULL)
1185
0
      goto mem_error;
1186
2.43k
    }
1187
1188
26.8k
    if (xmlHashAdd2(ctxt->attsSpecial, fullname, fullattr,
1189
26.8k
                    (void *) (ptrdiff_t) type) < 0)
1190
0
        goto mem_error;
1191
26.8k
    return;
1192
1193
26.8k
mem_error:
1194
0
    xmlErrMemory(ctxt);
1195
0
    return;
1196
26.8k
}
1197
1198
/**
1199
 * xmlCleanSpecialAttrCallback:
1200
 *
1201
 * Removes CDATA attributes from the special attribute table
1202
 */
1203
static void
1204
xmlCleanSpecialAttrCallback(void *payload, void *data,
1205
                            const xmlChar *fullname, const xmlChar *fullattr,
1206
4.93k
                            const xmlChar *unused ATTRIBUTE_UNUSED) {
1207
4.93k
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1208
1209
4.93k
    if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1210
674
        xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1211
674
    }
1212
4.93k
}
1213
1214
/**
1215
 * xmlCleanSpecialAttr:
1216
 * @ctxt:  an XML parser context
1217
 *
1218
 * Trim the list of attributes defined to remove all those of type
1219
 * CDATA as they are not special. This call should be done when finishing
1220
 * to parse the DTD and before starting to parse the document root.
1221
 */
1222
static void
1223
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1224
8.79k
{
1225
8.79k
    if (ctxt->attsSpecial == NULL)
1226
6.36k
        return;
1227
1228
2.43k
    xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1229
1230
2.43k
    if (xmlHashSize(ctxt->attsSpecial) == 0) {
1231
200
        xmlHashFree(ctxt->attsSpecial, NULL);
1232
200
        ctxt->attsSpecial = NULL;
1233
200
    }
1234
2.43k
    return;
1235
8.79k
}
1236
1237
/**
1238
 * xmlCheckLanguageID:
1239
 * @lang:  pointer to the string value
1240
 *
1241
 * DEPRECATED: Internal function, do not use.
1242
 *
1243
 * Checks that the value conforms to the LanguageID production:
1244
 *
1245
 * NOTE: this is somewhat deprecated, those productions were removed from
1246
 *       the XML Second edition.
1247
 *
1248
 * [33] LanguageID ::= Langcode ('-' Subcode)*
1249
 * [34] Langcode ::= ISO639Code |  IanaCode |  UserCode
1250
 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1251
 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1252
 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1253
 * [38] Subcode ::= ([a-z] | [A-Z])+
1254
 *
1255
 * The current REC reference the successors of RFC 1766, currently 5646
1256
 *
1257
 * http://www.rfc-editor.org/rfc/rfc5646.txt
1258
 * langtag       = language
1259
 *                 ["-" script]
1260
 *                 ["-" region]
1261
 *                 *("-" variant)
1262
 *                 *("-" extension)
1263
 *                 ["-" privateuse]
1264
 * language      = 2*3ALPHA            ; shortest ISO 639 code
1265
 *                 ["-" extlang]       ; sometimes followed by
1266
 *                                     ; extended language subtags
1267
 *               / 4ALPHA              ; or reserved for future use
1268
 *               / 5*8ALPHA            ; or registered language subtag
1269
 *
1270
 * extlang       = 3ALPHA              ; selected ISO 639 codes
1271
 *                 *2("-" 3ALPHA)      ; permanently reserved
1272
 *
1273
 * script        = 4ALPHA              ; ISO 15924 code
1274
 *
1275
 * region        = 2ALPHA              ; ISO 3166-1 code
1276
 *               / 3DIGIT              ; UN M.49 code
1277
 *
1278
 * variant       = 5*8alphanum         ; registered variants
1279
 *               / (DIGIT 3alphanum)
1280
 *
1281
 * extension     = singleton 1*("-" (2*8alphanum))
1282
 *
1283
 *                                     ; Single alphanumerics
1284
 *                                     ; "x" reserved for private use
1285
 * singleton     = DIGIT               ; 0 - 9
1286
 *               / %x41-57             ; A - W
1287
 *               / %x59-5A             ; Y - Z
1288
 *               / %x61-77             ; a - w
1289
 *               / %x79-7A             ; y - z
1290
 *
1291
 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1292
 * The parser below doesn't try to cope with extension or privateuse
1293
 * that could be added but that's not interoperable anyway
1294
 *
1295
 * Returns 1 if correct 0 otherwise
1296
 **/
1297
int
1298
xmlCheckLanguageID(const xmlChar * lang)
1299
2.11k
{
1300
2.11k
    const xmlChar *cur = lang, *nxt;
1301
1302
2.11k
    if (cur == NULL)
1303
87
        return (0);
1304
2.02k
    if (((cur[0] == 'i') && (cur[1] == '-')) ||
1305
2.02k
        ((cur[0] == 'I') && (cur[1] == '-')) ||
1306
2.02k
        ((cur[0] == 'x') && (cur[1] == '-')) ||
1307
2.02k
        ((cur[0] == 'X') && (cur[1] == '-'))) {
1308
        /*
1309
         * Still allow IANA code and user code which were coming
1310
         * from the previous version of the XML-1.0 specification
1311
         * it's deprecated but we should not fail
1312
         */
1313
146
        cur += 2;
1314
628
        while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1315
628
               ((cur[0] >= 'a') && (cur[0] <= 'z')))
1316
482
            cur++;
1317
146
        return(cur[0] == 0);
1318
146
    }
1319
1.88k
    nxt = cur;
1320
6.84k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1321
6.84k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1322
4.96k
           nxt++;
1323
1.88k
    if (nxt - cur >= 4) {
1324
        /*
1325
         * Reserved
1326
         */
1327
151
        if ((nxt - cur > 8) || (nxt[0] != 0))
1328
116
            return(0);
1329
35
        return(1);
1330
151
    }
1331
1.73k
    if (nxt - cur < 2)
1332
188
        return(0);
1333
    /* we got an ISO 639 code */
1334
1.54k
    if (nxt[0] == 0)
1335
41
        return(1);
1336
1.50k
    if (nxt[0] != '-')
1337
106
        return(0);
1338
1339
1.39k
    nxt++;
1340
1.39k
    cur = nxt;
1341
    /* now we can have extlang or script or region or variant */
1342
1.39k
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1343
248
        goto region_m49;
1344
1345
5.63k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1346
5.63k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1347
4.48k
           nxt++;
1348
1.14k
    if (nxt - cur == 4)
1349
385
        goto script;
1350
763
    if (nxt - cur == 2)
1351
147
        goto region;
1352
616
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1353
105
        goto variant;
1354
511
    if (nxt - cur != 3)
1355
93
        return(0);
1356
    /* we parsed an extlang */
1357
418
    if (nxt[0] == 0)
1358
97
        return(1);
1359
321
    if (nxt[0] != '-')
1360
66
        return(0);
1361
1362
255
    nxt++;
1363
255
    cur = nxt;
1364
    /* now we can have script or region or variant */
1365
255
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1366
50
        goto region_m49;
1367
1368
1.38k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1369
1.38k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1370
1.18k
           nxt++;
1371
205
    if (nxt - cur == 2)
1372
38
        goto region;
1373
167
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1374
29
        goto variant;
1375
138
    if (nxt - cur != 4)
1376
102
        return(0);
1377
    /* we parsed a script */
1378
421
script:
1379
421
    if (nxt[0] == 0)
1380
71
        return(1);
1381
350
    if (nxt[0] != '-')
1382
98
        return(0);
1383
1384
252
    nxt++;
1385
252
    cur = nxt;
1386
    /* now we can have region or variant */
1387
252
    if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1388
68
        goto region_m49;
1389
1390
1.07k
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1391
1.07k
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1392
890
           nxt++;
1393
1394
184
    if ((nxt - cur >= 5) && (nxt - cur <= 8))
1395
38
        goto variant;
1396
146
    if (nxt - cur != 2)
1397
71
        return(0);
1398
    /* we parsed a region */
1399
306
region:
1400
306
    if (nxt[0] == 0)
1401
89
        return(1);
1402
217
    if (nxt[0] != '-')
1403
126
        return(0);
1404
1405
91
    nxt++;
1406
91
    cur = nxt;
1407
    /* now we can just have a variant */
1408
830
    while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1409
830
           ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1410
739
           nxt++;
1411
1412
91
    if ((nxt - cur < 5) || (nxt - cur > 8))
1413
66
        return(0);
1414
1415
    /* we parsed a variant */
1416
197
variant:
1417
197
    if (nxt[0] == 0)
1418
92
        return(1);
1419
105
    if (nxt[0] != '-')
1420
66
        return(0);
1421
    /* extensions and private use subtags not checked */
1422
39
    return (1);
1423
1424
366
region_m49:
1425
366
    if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1426
366
        ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1427
46
        nxt += 3;
1428
46
        goto region;
1429
46
    }
1430
320
    return(0);
1431
366
}
1432
1433
/************************************************************************
1434
 *                  *
1435
 *    Parser stacks related functions and macros    *
1436
 *                  *
1437
 ************************************************************************/
1438
1439
static xmlChar *
1440
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar **str);
1441
1442
/**
1443
 * xmlParserNsCreate:
1444
 *
1445
 * Create a new namespace database.
1446
 *
1447
 * Returns the new obejct.
1448
 */
1449
xmlParserNsData *
1450
18.1k
xmlParserNsCreate(void) {
1451
18.1k
    xmlParserNsData *nsdb = xmlMalloc(sizeof(*nsdb));
1452
1453
18.1k
    if (nsdb == NULL)
1454
0
        return(NULL);
1455
18.1k
    memset(nsdb, 0, sizeof(*nsdb));
1456
18.1k
    nsdb->defaultNsIndex = INT_MAX;
1457
1458
18.1k
    return(nsdb);
1459
18.1k
}
1460
1461
/**
1462
 * xmlParserNsFree:
1463
 * @nsdb: namespace database
1464
 *
1465
 * Free a namespace database.
1466
 */
1467
void
1468
18.1k
xmlParserNsFree(xmlParserNsData *nsdb) {
1469
18.1k
    if (nsdb == NULL)
1470
0
        return;
1471
1472
18.1k
    xmlFree(nsdb->extra);
1473
18.1k
    xmlFree(nsdb->hash);
1474
18.1k
    xmlFree(nsdb);
1475
18.1k
}
1476
1477
/**
1478
 * xmlParserNsReset:
1479
 * @nsdb: namespace database
1480
 *
1481
 * Reset a namespace database.
1482
 */
1483
static void
1484
0
xmlParserNsReset(xmlParserNsData *nsdb) {
1485
0
    if (nsdb == NULL)
1486
0
        return;
1487
1488
0
    nsdb->hashElems = 0;
1489
0
    nsdb->elementId = 0;
1490
0
    nsdb->defaultNsIndex = INT_MAX;
1491
1492
0
    if (nsdb->hash)
1493
0
        memset(nsdb->hash, 0, nsdb->hashSize * sizeof(nsdb->hash[0]));
1494
0
}
1495
1496
/**
1497
 * xmlParserStartElement:
1498
 * @nsdb: namespace database
1499
 *
1500
 * Signal that a new element has started.
1501
 *
1502
 * Returns 0 on success, -1 if the element counter overflowed.
1503
 */
1504
static int
1505
96.6k
xmlParserNsStartElement(xmlParserNsData *nsdb) {
1506
96.6k
    if (nsdb->elementId == UINT_MAX)
1507
0
        return(-1);
1508
96.6k
    nsdb->elementId++;
1509
1510
96.6k
    return(0);
1511
96.6k
}
1512
1513
/**
1514
 * xmlParserNsLookup:
1515
 * @ctxt: parser context
1516
 * @prefix: namespace prefix
1517
 * @bucketPtr: optional bucket (return value)
1518
 *
1519
 * Lookup namespace with given prefix. If @bucketPtr is non-NULL, it will
1520
 * be set to the matching bucket, or the first empty bucket if no match
1521
 * was found.
1522
 *
1523
 * Returns the namespace index on success, INT_MAX if no namespace was
1524
 * found.
1525
 */
1526
static int
1527
xmlParserNsLookup(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1528
186k
                  xmlParserNsBucket **bucketPtr) {
1529
186k
    xmlParserNsBucket *bucket;
1530
186k
    unsigned index, hashValue;
1531
1532
186k
    if (prefix->name == NULL)
1533
101k
        return(ctxt->nsdb->defaultNsIndex);
1534
1535
85.1k
    if (ctxt->nsdb->hashSize == 0)
1536
6.14k
        return(INT_MAX);
1537
1538
78.9k
    hashValue = prefix->hashValue;
1539
78.9k
    index = hashValue & (ctxt->nsdb->hashSize - 1);
1540
78.9k
    bucket = &ctxt->nsdb->hash[index];
1541
1542
5.10M
    while (bucket->hashValue) {
1543
5.09M
        if ((bucket->hashValue == hashValue) &&
1544
5.09M
            (bucket->index != INT_MAX)) {
1545
61.3k
            if (ctxt->nsTab[bucket->index * 2] == prefix->name) {
1546
61.3k
                if (bucketPtr != NULL)
1547
46.6k
                    *bucketPtr = bucket;
1548
61.3k
                return(bucket->index);
1549
61.3k
            }
1550
61.3k
        }
1551
1552
5.03M
        index++;
1553
5.03M
        bucket++;
1554
5.03M
        if (index == ctxt->nsdb->hashSize) {
1555
12.1k
            index = 0;
1556
12.1k
            bucket = ctxt->nsdb->hash;
1557
12.1k
        }
1558
5.03M
    }
1559
1560
17.6k
    if (bucketPtr != NULL)
1561
12.7k
        *bucketPtr = bucket;
1562
17.6k
    return(INT_MAX);
1563
78.9k
}
1564
1565
/**
1566
 * xmlParserNsLookupUri:
1567
 * @ctxt: parser context
1568
 * @prefix: namespace prefix
1569
 *
1570
 * Lookup namespace URI with given prefix.
1571
 *
1572
 * Returns the namespace URI on success, NULL if no namespace was found.
1573
 */
1574
static const xmlChar *
1575
82.6k
xmlParserNsLookupUri(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix) {
1576
82.6k
    const xmlChar *ret;
1577
82.6k
    int nsIndex;
1578
1579
82.6k
    if (prefix->name == ctxt->str_xml)
1580
485
        return(ctxt->str_xml_ns);
1581
1582
    /*
1583
     * minNsIndex is used when building an entity tree. We must
1584
     * ignore namespaces declared outside the entity.
1585
     */
1586
82.1k
    nsIndex = xmlParserNsLookup(ctxt, prefix, NULL);
1587
82.1k
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1588
54.6k
        return(NULL);
1589
1590
27.5k
    ret = ctxt->nsTab[nsIndex * 2 + 1];
1591
27.5k
    if (ret[0] == 0)
1592
759
        ret = NULL;
1593
27.5k
    return(ret);
1594
82.1k
}
1595
1596
/**
1597
 * xmlParserNsLookupSax:
1598
 * @ctxt: parser context
1599
 * @prefix: namespace prefix
1600
 *
1601
 * Lookup extra data for the given prefix. This returns data stored
1602
 * with xmlParserNsUdpateSax.
1603
 *
1604
 * Returns the data on success, NULL if no namespace was found.
1605
 */
1606
void *
1607
6.60k
xmlParserNsLookupSax(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
1608
6.60k
    xmlHashedString hprefix;
1609
6.60k
    int nsIndex;
1610
1611
6.60k
    if (prefix == ctxt->str_xml)
1612
3.59k
        return(NULL);
1613
1614
3.01k
    hprefix.name = prefix;
1615
3.01k
    if (prefix != NULL)
1616
625
        hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1617
2.38k
    else
1618
2.38k
        hprefix.hashValue = 0;
1619
3.01k
    nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1620
3.01k
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1621
0
        return(NULL);
1622
1623
3.01k
    return(ctxt->nsdb->extra[nsIndex].saxData);
1624
3.01k
}
1625
1626
/**
1627
 * xmlParserNsUpdateSax:
1628
 * @ctxt: parser context
1629
 * @prefix: namespace prefix
1630
 * @saxData: extra data for SAX handler
1631
 *
1632
 * Sets or updates extra data for the given prefix. This value will be
1633
 * returned by xmlParserNsLookupSax as long as the namespace with the
1634
 * given prefix is in scope.
1635
 *
1636
 * Returns the data on success, NULL if no namespace was found.
1637
 */
1638
int
1639
xmlParserNsUpdateSax(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
1640
32.2k
                     void *saxData) {
1641
32.2k
    xmlHashedString hprefix;
1642
32.2k
    int nsIndex;
1643
1644
32.2k
    if (prefix == ctxt->str_xml)
1645
0
        return(-1);
1646
1647
32.2k
    hprefix.name = prefix;
1648
32.2k
    if (prefix != NULL)
1649
11.2k
        hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1650
21.0k
    else
1651
21.0k
        hprefix.hashValue = 0;
1652
32.2k
    nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1653
32.2k
    if ((nsIndex == INT_MAX) || (nsIndex < ctxt->nsdb->minNsIndex))
1654
0
        return(-1);
1655
1656
32.2k
    ctxt->nsdb->extra[nsIndex].saxData = saxData;
1657
32.2k
    return(0);
1658
32.2k
}
1659
1660
/**
1661
 * xmlParserNsGrow:
1662
 * @ctxt: parser context
1663
 *
1664
 * Grows the namespace tables.
1665
 *
1666
 * Returns 0 on success, -1 if a memory allocation failed.
1667
 */
1668
static int
1669
2.33k
xmlParserNsGrow(xmlParserCtxtPtr ctxt) {
1670
2.33k
    const xmlChar **table;
1671
2.33k
    xmlParserNsExtra *extra;
1672
2.33k
    int newSize;
1673
1674
2.33k
    if (ctxt->nsMax > INT_MAX / 2)
1675
0
        goto error;
1676
2.33k
    newSize = ctxt->nsMax ? ctxt->nsMax * 2 : 16;
1677
1678
2.33k
    table = xmlRealloc(ctxt->nsTab, 2 * newSize * sizeof(table[0]));
1679
2.33k
    if (table == NULL)
1680
0
        goto error;
1681
2.33k
    ctxt->nsTab = table;
1682
1683
2.33k
    extra = xmlRealloc(ctxt->nsdb->extra, newSize * sizeof(extra[0]));
1684
2.33k
    if (extra == NULL)
1685
0
        goto error;
1686
2.33k
    ctxt->nsdb->extra = extra;
1687
1688
2.33k
    ctxt->nsMax = newSize;
1689
2.33k
    return(0);
1690
1691
0
error:
1692
0
    xmlErrMemory(ctxt);
1693
0
    return(-1);
1694
2.33k
}
1695
1696
/**
1697
 * xmlParserNsPush:
1698
 * @ctxt: parser context
1699
 * @prefix: prefix with hash value
1700
 * @uri: uri with hash value
1701
 * @saxData: extra data for SAX handler
1702
 * @defAttr: whether the namespace comes from a default attribute
1703
 *
1704
 * Push a new namespace on the table.
1705
 *
1706
 * Returns 1 if the namespace was pushed, 0 if the namespace was ignored,
1707
 * -1 if a memory allocation failed.
1708
 */
1709
static int
1710
xmlParserNsPush(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1711
55.2k
                const xmlHashedString *uri, void *saxData, int defAttr) {
1712
55.2k
    xmlParserNsBucket *bucket = NULL;
1713
55.2k
    xmlParserNsExtra *extra;
1714
55.2k
    const xmlChar **ns;
1715
55.2k
    unsigned hashValue, nsIndex, oldIndex;
1716
1717
55.2k
    if ((prefix != NULL) && (prefix->name == ctxt->str_xml))
1718
203
        return(0);
1719
1720
55.0k
    if ((ctxt->nsNr >= ctxt->nsMax) && (xmlParserNsGrow(ctxt) < 0)) {
1721
0
        xmlErrMemory(ctxt);
1722
0
        return(-1);
1723
0
    }
1724
1725
    /*
1726
     * Default namespace and 'xml' namespace
1727
     */
1728
55.0k
    if ((prefix == NULL) || (prefix->name == NULL)) {
1729
24.6k
        oldIndex = ctxt->nsdb->defaultNsIndex;
1730
1731
24.6k
        if (oldIndex != INT_MAX) {
1732
15.3k
            extra = &ctxt->nsdb->extra[oldIndex];
1733
1734
15.3k
            if (extra->elementId == ctxt->nsdb->elementId) {
1735
254
                if (defAttr == 0)
1736
169
                    xmlErrAttributeDup(ctxt, NULL, BAD_CAST "xmlns");
1737
254
                return(0);
1738
254
            }
1739
1740
15.1k
            if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1741
15.1k
                (uri->name == ctxt->nsTab[oldIndex * 2 + 1]))
1742
334
                return(0);
1743
15.1k
        }
1744
1745
24.0k
        ctxt->nsdb->defaultNsIndex = ctxt->nsNr;
1746
24.0k
        goto populate_entry;
1747
24.6k
    }
1748
1749
    /*
1750
     * Hash table lookup
1751
     */
1752
30.3k
    oldIndex = xmlParserNsLookup(ctxt, prefix, &bucket);
1753
30.3k
    if (oldIndex != INT_MAX) {
1754
16.8k
        extra = &ctxt->nsdb->extra[oldIndex];
1755
1756
        /*
1757
         * Check for duplicate definitions on the same element.
1758
         */
1759
16.8k
        if (extra->elementId == ctxt->nsdb->elementId) {
1760
125
            if (defAttr == 0)
1761
115
                xmlErrAttributeDup(ctxt, BAD_CAST "xmlns", prefix->name);
1762
125
            return(0);
1763
125
        }
1764
1765
16.7k
        if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1766
16.7k
            (uri->name == ctxt->nsTab[bucket->index * 2 + 1]))
1767
278
            return(0);
1768
1769
16.4k
        bucket->index = ctxt->nsNr;
1770
16.4k
        goto populate_entry;
1771
16.7k
    }
1772
1773
    /*
1774
     * Insert new bucket
1775
     */
1776
1777
13.5k
    hashValue = prefix->hashValue;
1778
1779
    /*
1780
     * Grow hash table, 50% fill factor
1781
     */
1782
13.5k
    if (ctxt->nsdb->hashElems + 1 > ctxt->nsdb->hashSize / 2) {
1783
1.16k
        xmlParserNsBucket *newHash;
1784
1.16k
        unsigned newSize, i, index;
1785
1786
1.16k
        if (ctxt->nsdb->hashSize > UINT_MAX / 2) {
1787
0
            xmlErrMemory(ctxt);
1788
0
            return(-1);
1789
0
        }
1790
1.16k
        newSize = ctxt->nsdb->hashSize ? ctxt->nsdb->hashSize * 2 : 16;
1791
1.16k
        newHash = xmlMalloc(newSize * sizeof(newHash[0]));
1792
1.16k
        if (newHash == NULL) {
1793
0
            xmlErrMemory(ctxt);
1794
0
            return(-1);
1795
0
        }
1796
1.16k
        memset(newHash, 0, newSize * sizeof(newHash[0]));
1797
1798
40.3k
        for (i = 0; i < ctxt->nsdb->hashSize; i++) {
1799
39.2k
            unsigned hv = ctxt->nsdb->hash[i].hashValue;
1800
39.2k
            unsigned newIndex;
1801
1802
39.2k
            if (hv == 0)
1803
19.6k
                continue;
1804
19.6k
            newIndex = hv & (newSize - 1);
1805
1806
1.84M
            while (newHash[newIndex].hashValue != 0) {
1807
1.82M
                newIndex++;
1808
1.82M
                if (newIndex == newSize)
1809
3.78k
                    newIndex = 0;
1810
1.82M
            }
1811
1812
19.6k
            newHash[newIndex] = ctxt->nsdb->hash[i];
1813
19.6k
        }
1814
1815
1.16k
        xmlFree(ctxt->nsdb->hash);
1816
1.16k
        ctxt->nsdb->hash = newHash;
1817
1.16k
        ctxt->nsdb->hashSize = newSize;
1818
1819
        /*
1820
         * Relookup
1821
         */
1822
1.16k
        index = hashValue & (newSize - 1);
1823
1824
13.2k
        while (newHash[index].hashValue != 0) {
1825
12.0k
            index++;
1826
12.0k
            if (index == newSize)
1827
135
                index = 0;
1828
12.0k
        }
1829
1830
1.16k
        bucket = &newHash[index];
1831
1.16k
    }
1832
1833
13.5k
    bucket->hashValue = hashValue;
1834
13.5k
    bucket->index = ctxt->nsNr;
1835
13.5k
    ctxt->nsdb->hashElems++;
1836
13.5k
    oldIndex = INT_MAX;
1837
1838
54.0k
populate_entry:
1839
54.0k
    nsIndex = ctxt->nsNr;
1840
1841
54.0k
    ns = &ctxt->nsTab[nsIndex * 2];
1842
54.0k
    ns[0] = prefix ? prefix->name : NULL;
1843
54.0k
    ns[1] = uri->name;
1844
1845
54.0k
    extra = &ctxt->nsdb->extra[nsIndex];
1846
54.0k
    extra->saxData = saxData;
1847
54.0k
    extra->prefixHashValue = prefix ? prefix->hashValue : 0;
1848
54.0k
    extra->uriHashValue = uri->hashValue;
1849
54.0k
    extra->elementId = ctxt->nsdb->elementId;
1850
54.0k
    extra->oldIndex = oldIndex;
1851
1852
54.0k
    ctxt->nsNr++;
1853
1854
54.0k
    return(1);
1855
13.5k
}
1856
1857
/**
1858
 * xmlParserNsPop:
1859
 * @ctxt: an XML parser context
1860
 * @nr:  the number to pop
1861
 *
1862
 * Pops the top @nr namespaces and restores the hash table.
1863
 *
1864
 * Returns the number of namespaces popped.
1865
 */
1866
static int
1867
xmlParserNsPop(xmlParserCtxtPtr ctxt, int nr)
1868
36.5k
{
1869
36.5k
    int i;
1870
1871
    /* assert(nr <= ctxt->nsNr); */
1872
1873
90.1k
    for (i = ctxt->nsNr - 1; i >= ctxt->nsNr - nr; i--) {
1874
53.6k
        const xmlChar *prefix = ctxt->nsTab[i * 2];
1875
53.6k
        xmlParserNsExtra *extra = &ctxt->nsdb->extra[i];
1876
1877
53.6k
        if (prefix == NULL) {
1878
23.8k
            ctxt->nsdb->defaultNsIndex = extra->oldIndex;
1879
29.7k
        } else {
1880
29.7k
            xmlHashedString hprefix;
1881
29.7k
            xmlParserNsBucket *bucket = NULL;
1882
1883
29.7k
            hprefix.name = prefix;
1884
29.7k
            hprefix.hashValue = extra->prefixHashValue;
1885
29.7k
            xmlParserNsLookup(ctxt, &hprefix, &bucket);
1886
            /* assert(bucket && bucket->hashValue); */
1887
29.7k
            bucket->index = extra->oldIndex;
1888
29.7k
        }
1889
53.6k
    }
1890
1891
36.5k
    ctxt->nsNr -= nr;
1892
36.5k
    return(nr);
1893
36.5k
}
1894
1895
static int
1896
1.96k
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1897
1.96k
    const xmlChar **atts;
1898
1.96k
    unsigned *attallocs;
1899
1.96k
    int maxatts;
1900
1901
1.96k
    if (nr + 5 > ctxt->maxatts) {
1902
1.96k
  maxatts = ctxt->maxatts == 0 ? 55 : (nr + 5) * 2;
1903
1.96k
  atts = (const xmlChar **) xmlMalloc(
1904
1.96k
             maxatts * sizeof(const xmlChar *));
1905
1.96k
  if (atts == NULL) goto mem_error;
1906
1.96k
  attallocs = xmlRealloc(ctxt->attallocs,
1907
1.96k
                               (maxatts / 5) * sizeof(attallocs[0]));
1908
1.96k
  if (attallocs == NULL) {
1909
0
            xmlFree(atts);
1910
0
            goto mem_error;
1911
0
        }
1912
1.96k
        if (ctxt->maxatts > 0)
1913
120
            memcpy(atts, ctxt->atts, ctxt->maxatts * sizeof(const xmlChar *));
1914
1.96k
        xmlFree(ctxt->atts);
1915
1.96k
  ctxt->atts = atts;
1916
1.96k
  ctxt->attallocs = attallocs;
1917
1.96k
  ctxt->maxatts = maxatts;
1918
1.96k
    }
1919
1.96k
    return(ctxt->maxatts);
1920
0
mem_error:
1921
0
    xmlErrMemory(ctxt);
1922
0
    return(-1);
1923
1.96k
}
1924
1925
/**
1926
 * inputPush:
1927
 * @ctxt:  an XML parser context
1928
 * @value:  the parser input
1929
 *
1930
 * Pushes a new parser input on top of the input stack
1931
 *
1932
 * Returns -1 in case of error, the index in the stack otherwise
1933
 */
1934
int
1935
inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1936
72.6k
{
1937
72.6k
    if ((ctxt == NULL) || (value == NULL))
1938
0
        return(-1);
1939
72.6k
    if (ctxt->inputNr >= ctxt->inputMax) {
1940
0
        size_t newSize = ctxt->inputMax * 2;
1941
0
        xmlParserInputPtr *tmp;
1942
1943
0
        tmp = (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1944
0
                                               newSize * sizeof(*tmp));
1945
0
        if (tmp == NULL) {
1946
0
            xmlErrMemory(ctxt);
1947
0
            return (-1);
1948
0
        }
1949
0
        ctxt->inputTab = tmp;
1950
0
        ctxt->inputMax = newSize;
1951
0
    }
1952
72.6k
    ctxt->inputTab[ctxt->inputNr] = value;
1953
72.6k
    ctxt->input = value;
1954
72.6k
    return (ctxt->inputNr++);
1955
72.6k
}
1956
/**
1957
 * inputPop:
1958
 * @ctxt: an XML parser context
1959
 *
1960
 * Pops the top parser input from the input stack
1961
 *
1962
 * Returns the input just removed
1963
 */
1964
xmlParserInputPtr
1965
inputPop(xmlParserCtxtPtr ctxt)
1966
108k
{
1967
108k
    xmlParserInputPtr ret;
1968
1969
108k
    if (ctxt == NULL)
1970
0
        return(NULL);
1971
108k
    if (ctxt->inputNr <= 0)
1972
36.2k
        return (NULL);
1973
72.6k
    ctxt->inputNr--;
1974
72.6k
    if (ctxt->inputNr > 0)
1975
54.5k
        ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1976
18.1k
    else
1977
18.1k
        ctxt->input = NULL;
1978
72.6k
    ret = ctxt->inputTab[ctxt->inputNr];
1979
72.6k
    ctxt->inputTab[ctxt->inputNr] = NULL;
1980
72.6k
    return (ret);
1981
108k
}
1982
/**
1983
 * nodePush:
1984
 * @ctxt:  an XML parser context
1985
 * @value:  the element node
1986
 *
1987
 * DEPRECATED: Internal function, do not use.
1988
 *
1989
 * Pushes a new element node on top of the node stack
1990
 *
1991
 * Returns -1 in case of error, the index in the stack otherwise
1992
 */
1993
int
1994
nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1995
101k
{
1996
101k
    int maxDepth;
1997
1998
101k
    if (ctxt == NULL)
1999
0
        return(0);
2000
2001
101k
    maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 2048 : 256;
2002
101k
    if (ctxt->nodeNr > maxDepth) {
2003
0
        xmlFatalErrMsgInt(ctxt, XML_ERR_RESOURCE_LIMIT,
2004
0
                "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
2005
0
                ctxt->nodeNr);
2006
0
        xmlHaltParser(ctxt);
2007
0
        return(-1);
2008
0
    }
2009
101k
    if (ctxt->nodeNr >= ctxt->nodeMax) {
2010
406
        xmlNodePtr *tmp;
2011
2012
406
  tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
2013
406
                                      ctxt->nodeMax * 2 *
2014
406
                                      sizeof(ctxt->nodeTab[0]));
2015
406
        if (tmp == NULL) {
2016
0
            xmlErrMemory(ctxt);
2017
0
            return (-1);
2018
0
        }
2019
406
        ctxt->nodeTab = tmp;
2020
406
  ctxt->nodeMax *= 2;
2021
406
    }
2022
101k
    ctxt->nodeTab[ctxt->nodeNr] = value;
2023
101k
    ctxt->node = value;
2024
101k
    return (ctxt->nodeNr++);
2025
101k
}
2026
2027
/**
2028
 * nodePop:
2029
 * @ctxt: an XML parser context
2030
 *
2031
 * DEPRECATED: Internal function, do not use.
2032
 *
2033
 * Pops the top element node from the node stack
2034
 *
2035
 * Returns the node just removed
2036
 */
2037
xmlNodePtr
2038
nodePop(xmlParserCtxtPtr ctxt)
2039
117k
{
2040
117k
    xmlNodePtr ret;
2041
2042
117k
    if (ctxt == NULL) return(NULL);
2043
117k
    if (ctxt->nodeNr <= 0)
2044
19.0k
        return (NULL);
2045
98.7k
    ctxt->nodeNr--;
2046
98.7k
    if (ctxt->nodeNr > 0)
2047
95.3k
        ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
2048
3.45k
    else
2049
3.45k
        ctxt->node = NULL;
2050
98.7k
    ret = ctxt->nodeTab[ctxt->nodeNr];
2051
98.7k
    ctxt->nodeTab[ctxt->nodeNr] = NULL;
2052
98.7k
    return (ret);
2053
117k
}
2054
2055
/**
2056
 * nameNsPush:
2057
 * @ctxt:  an XML parser context
2058
 * @value:  the element name
2059
 * @prefix:  the element prefix
2060
 * @URI:  the element namespace name
2061
 * @line:  the current line number for error messages
2062
 * @nsNr:  the number of namespaces pushed on the namespace table
2063
 *
2064
 * Pushes a new element name/prefix/URL on top of the name stack
2065
 *
2066
 * Returns -1 in case of error, the index in the stack otherwise
2067
 */
2068
static int
2069
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
2070
           const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
2071
135k
{
2072
135k
    xmlStartTag *tag;
2073
2074
135k
    if (ctxt->nameNr >= ctxt->nameMax) {
2075
576
        const xmlChar * *tmp;
2076
576
        xmlStartTag *tmp2;
2077
576
        ctxt->nameMax *= 2;
2078
576
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
2079
576
                                    ctxt->nameMax *
2080
576
                                    sizeof(ctxt->nameTab[0]));
2081
576
        if (tmp == NULL) {
2082
0
      ctxt->nameMax /= 2;
2083
0
      goto mem_error;
2084
0
        }
2085
576
  ctxt->nameTab = tmp;
2086
576
        tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
2087
576
                                    ctxt->nameMax *
2088
576
                                    sizeof(ctxt->pushTab[0]));
2089
576
        if (tmp2 == NULL) {
2090
0
      ctxt->nameMax /= 2;
2091
0
      goto mem_error;
2092
0
        }
2093
576
  ctxt->pushTab = tmp2;
2094
134k
    } else if (ctxt->pushTab == NULL) {
2095
11.0k
        ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
2096
11.0k
                                            sizeof(ctxt->pushTab[0]));
2097
11.0k
        if (ctxt->pushTab == NULL)
2098
0
            goto mem_error;
2099
11.0k
    }
2100
135k
    ctxt->nameTab[ctxt->nameNr] = value;
2101
135k
    ctxt->name = value;
2102
135k
    tag = &ctxt->pushTab[ctxt->nameNr];
2103
135k
    tag->prefix = prefix;
2104
135k
    tag->URI = URI;
2105
135k
    tag->line = line;
2106
135k
    tag->nsNr = nsNr;
2107
135k
    return (ctxt->nameNr++);
2108
0
mem_error:
2109
0
    xmlErrMemory(ctxt);
2110
0
    return (-1);
2111
135k
}
2112
#ifdef LIBXML_PUSH_ENABLED
2113
/**
2114
 * nameNsPop:
2115
 * @ctxt: an XML parser context
2116
 *
2117
 * Pops the top element/prefix/URI name from the name stack
2118
 *
2119
 * Returns the name just removed
2120
 */
2121
static const xmlChar *
2122
nameNsPop(xmlParserCtxtPtr ctxt)
2123
0
{
2124
0
    const xmlChar *ret;
2125
2126
0
    if (ctxt->nameNr <= 0)
2127
0
        return (NULL);
2128
0
    ctxt->nameNr--;
2129
0
    if (ctxt->nameNr > 0)
2130
0
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2131
0
    else
2132
0
        ctxt->name = NULL;
2133
0
    ret = ctxt->nameTab[ctxt->nameNr];
2134
0
    ctxt->nameTab[ctxt->nameNr] = NULL;
2135
0
    return (ret);
2136
0
}
2137
#endif /* LIBXML_PUSH_ENABLED */
2138
2139
/**
2140
 * namePush:
2141
 * @ctxt:  an XML parser context
2142
 * @value:  the element name
2143
 *
2144
 * DEPRECATED: Internal function, do not use.
2145
 *
2146
 * Pushes a new element name on top of the name stack
2147
 *
2148
 * Returns -1 in case of error, the index in the stack otherwise
2149
 */
2150
int
2151
namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
2152
0
{
2153
0
    if (ctxt == NULL) return (-1);
2154
2155
0
    if (ctxt->nameNr >= ctxt->nameMax) {
2156
0
        const xmlChar * *tmp;
2157
0
        tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
2158
0
                                    ctxt->nameMax * 2 *
2159
0
                                    sizeof(ctxt->nameTab[0]));
2160
0
        if (tmp == NULL) {
2161
0
      goto mem_error;
2162
0
        }
2163
0
  ctxt->nameTab = tmp;
2164
0
        ctxt->nameMax *= 2;
2165
0
    }
2166
0
    ctxt->nameTab[ctxt->nameNr] = value;
2167
0
    ctxt->name = value;
2168
0
    return (ctxt->nameNr++);
2169
0
mem_error:
2170
0
    xmlErrMemory(ctxt);
2171
0
    return (-1);
2172
0
}
2173
2174
/**
2175
 * namePop:
2176
 * @ctxt: an XML parser context
2177
 *
2178
 * DEPRECATED: Internal function, do not use.
2179
 *
2180
 * Pops the top element name from the name stack
2181
 *
2182
 * Returns the name just removed
2183
 */
2184
const xmlChar *
2185
namePop(xmlParserCtxtPtr ctxt)
2186
131k
{
2187
131k
    const xmlChar *ret;
2188
2189
131k
    if ((ctxt == NULL) || (ctxt->nameNr <= 0))
2190
0
        return (NULL);
2191
131k
    ctxt->nameNr--;
2192
131k
    if (ctxt->nameNr > 0)
2193
124k
        ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2194
6.65k
    else
2195
6.65k
        ctxt->name = NULL;
2196
131k
    ret = ctxt->nameTab[ctxt->nameNr];
2197
131k
    ctxt->nameTab[ctxt->nameNr] = NULL;
2198
131k
    return (ret);
2199
131k
}
2200
2201
155k
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
2202
155k
    if (ctxt->spaceNr >= ctxt->spaceMax) {
2203
629
        int *tmp;
2204
2205
629
  ctxt->spaceMax *= 2;
2206
629
        tmp = (int *) xmlRealloc(ctxt->spaceTab,
2207
629
                           ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
2208
629
        if (tmp == NULL) {
2209
0
      xmlErrMemory(ctxt);
2210
0
      ctxt->spaceMax /=2;
2211
0
      return(-1);
2212
0
  }
2213
629
  ctxt->spaceTab = tmp;
2214
629
    }
2215
155k
    ctxt->spaceTab[ctxt->spaceNr] = val;
2216
155k
    ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
2217
155k
    return(ctxt->spaceNr++);
2218
155k
}
2219
2220
151k
static int spacePop(xmlParserCtxtPtr ctxt) {
2221
151k
    int ret;
2222
151k
    if (ctxt->spaceNr <= 0) return(0);
2223
151k
    ctxt->spaceNr--;
2224
151k
    if (ctxt->spaceNr > 0)
2225
151k
  ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
2226
0
    else
2227
0
        ctxt->space = &ctxt->spaceTab[0];
2228
151k
    ret = ctxt->spaceTab[ctxt->spaceNr];
2229
151k
    ctxt->spaceTab[ctxt->spaceNr] = -1;
2230
151k
    return(ret);
2231
151k
}
2232
2233
/*
2234
 * Macros for accessing the content. Those should be used only by the parser,
2235
 * and not exported.
2236
 *
2237
 * Dirty macros, i.e. one often need to make assumption on the context to
2238
 * use them
2239
 *
2240
 *   CUR_PTR return the current pointer to the xmlChar to be parsed.
2241
 *           To be used with extreme caution since operations consuming
2242
 *           characters may move the input buffer to a different location !
2243
 *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
2244
 *           This should be used internally by the parser
2245
 *           only to compare to ASCII values otherwise it would break when
2246
 *           running with UTF-8 encoding.
2247
 *   RAW     same as CUR but in the input buffer, bypass any token
2248
 *           extraction that may have been done
2249
 *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
2250
 *           to compare on ASCII based substring.
2251
 *   SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2252
 *           strings without newlines within the parser.
2253
 *   NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2254
 *           defined char within the parser.
2255
 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2256
 *
2257
 *   NEXT    Skip to the next character, this does the proper decoding
2258
 *           in UTF-8 mode. It also pop-up unfinished entities on the fly.
2259
 *   NEXTL(l) Skip the current unicode character of l xmlChars long.
2260
 *   CUR_CHAR(l) returns the current unicode character (int), set l
2261
 *           to the number of xmlChars used for the encoding [0-5].
2262
 *   CUR_SCHAR  same but operate on a string instead of the context
2263
 *   COPY_BUF  copy the current unicode char to the target buffer, increment
2264
 *            the index
2265
 *   GROW, SHRINK  handling of input buffers
2266
 */
2267
2268
2.34M
#define RAW (*ctxt->input->cur)
2269
3.02M
#define CUR (*ctxt->input->cur)
2270
1.01M
#define NXT(val) ctxt->input->cur[(val)]
2271
3.45M
#define CUR_PTR ctxt->input->cur
2272
346k
#define BASE_PTR ctxt->input->base
2273
2274
#define CMP4( s, c1, c2, c3, c4 ) \
2275
1.36M
  ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2276
703k
    ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2277
#define CMP5( s, c1, c2, c3, c4, c5 ) \
2278
1.22M
  ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2279
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2280
995k
  ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2281
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2282
794k
  ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2283
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2284
645k
  ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2285
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2286
290k
  ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2287
290k
    ((unsigned char *) s)[ 8 ] == c9 )
2288
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2289
7.90k
  ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2290
7.90k
    ((unsigned char *) s)[ 9 ] == c10 )
2291
2292
293k
#define SKIP(val) do {             \
2293
293k
    ctxt->input->cur += (val),ctxt->input->col+=(val);      \
2294
293k
    if (*ctxt->input->cur == 0)           \
2295
293k
        xmlParserGrow(ctxt);           \
2296
293k
  } while (0)
2297
2298
0
#define SKIPL(val) do {             \
2299
0
    int skipl;                \
2300
0
    for(skipl=0; skipl<val; skipl++) {         \
2301
0
  if (*(ctxt->input->cur) == '\n') {       \
2302
0
  ctxt->input->line++; ctxt->input->col = 1;      \
2303
0
  } else ctxt->input->col++;         \
2304
0
  ctxt->input->cur++;           \
2305
0
    }                 \
2306
0
    if (*ctxt->input->cur == 0)           \
2307
0
        xmlParserGrow(ctxt);           \
2308
0
  } while (0)
2309
2310
#define SHRINK \
2311
418k
    if ((!PARSER_PROGRESSIVE(ctxt)) && \
2312
418k
        (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2313
418k
  (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2314
418k
  xmlParserShrink(ctxt);
2315
2316
#define GROW \
2317
2.30M
    if ((!PARSER_PROGRESSIVE(ctxt)) && \
2318
2.30M
        (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
2319
1.88M
  xmlParserGrow(ctxt);
2320
2321
406k
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2322
2323
507k
#define SKIP_BLANKS_PE xmlSkipBlankCharsPE(ctxt)
2324
2325
738k
#define NEXT xmlNextChar(ctxt)
2326
2327
197k
#define NEXT1 {               \
2328
197k
  ctxt->input->col++;           \
2329
197k
  ctxt->input->cur++;           \
2330
197k
  if (*ctxt->input->cur == 0)         \
2331
197k
      xmlParserGrow(ctxt);           \
2332
197k
    }
2333
2334
2.06M
#define NEXTL(l) do {             \
2335
2.06M
    if (*(ctxt->input->cur) == '\n') {         \
2336
6.92k
  ctxt->input->line++; ctxt->input->col = 1;      \
2337
2.06M
    } else ctxt->input->col++;           \
2338
2.06M
    ctxt->input->cur += l;        \
2339
2.06M
  } while (0)
2340
2341
892k
#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2342
555k
#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2343
2344
#define COPY_BUF(b, i, v)           \
2345
711k
    if (v < 0x80) b[i++] = v;           \
2346
711k
    else i += xmlCopyCharMultiByte(&b[i],v)
2347
2348
/**
2349
 * xmlSkipBlankChars:
2350
 * @ctxt:  the XML parser context
2351
 *
2352
 * DEPRECATED: Internal function, do not use.
2353
 *
2354
 * Skip whitespace in the input stream.
2355
 *
2356
 * Returns the number of space chars skipped
2357
 */
2358
int
2359
516k
xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2360
516k
    const xmlChar *cur;
2361
516k
    int res = 0;
2362
2363
    /*
2364
     * It's Okay to use CUR/NEXT here since all the blanks are on
2365
     * the ASCII range.
2366
     */
2367
516k
    cur = ctxt->input->cur;
2368
516k
    while (IS_BLANK_CH(*cur)) {
2369
99.5k
        if (*cur == '\n') {
2370
6.00k
            ctxt->input->line++; ctxt->input->col = 1;
2371
93.5k
        } else {
2372
93.5k
            ctxt->input->col++;
2373
93.5k
        }
2374
99.5k
        cur++;
2375
99.5k
        if (res < INT_MAX)
2376
99.5k
            res++;
2377
99.5k
        if (*cur == 0) {
2378
1.21k
            ctxt->input->cur = cur;
2379
1.21k
            xmlParserGrow(ctxt);
2380
1.21k
            cur = ctxt->input->cur;
2381
1.21k
        }
2382
99.5k
    }
2383
516k
    ctxt->input->cur = cur;
2384
2385
516k
    return(res);
2386
516k
}
2387
2388
static void
2389
53.9k
xmlPopPE(xmlParserCtxtPtr ctxt) {
2390
53.9k
    unsigned long consumed;
2391
53.9k
    xmlEntityPtr ent;
2392
2393
53.9k
    ent = ctxt->input->entity;
2394
2395
53.9k
    ent->flags &= ~XML_ENT_EXPANDING;
2396
2397
53.9k
    if ((ent->flags & XML_ENT_CHECKED) == 0) {
2398
2.08k
        int result;
2399
2400
        /*
2401
         * Read the rest of the stream in case of errors. We want
2402
         * to account for the whole entity size.
2403
         */
2404
2.08k
        do {
2405
2.08k
            ctxt->input->cur = ctxt->input->end;
2406
2.08k
            xmlParserShrink(ctxt);
2407
2.08k
            result = xmlParserGrow(ctxt);
2408
2.08k
        } while (result > 0);
2409
2410
2.08k
        consumed = ctxt->input->consumed;
2411
2.08k
        xmlSaturatedAddSizeT(&consumed,
2412
2.08k
                             ctxt->input->end - ctxt->input->base);
2413
2414
2.08k
        xmlSaturatedAdd(&ent->expandedSize, consumed);
2415
2416
        /*
2417
         * Add to sizeentities when parsing an external entity
2418
         * for the first time.
2419
         */
2420
2.08k
        if (ent->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
2421
0
            xmlSaturatedAdd(&ctxt->sizeentities, consumed);
2422
0
        }
2423
2424
2.08k
        ent->flags |= XML_ENT_CHECKED;
2425
2.08k
    }
2426
2427
53.9k
    xmlPopInput(ctxt);
2428
2429
53.9k
    xmlParserEntityCheck(ctxt, ent->expandedSize);
2430
53.9k
}
2431
2432
/**
2433
 * xmlSkipBlankCharsPE:
2434
 * @ctxt:  the XML parser context
2435
 *
2436
 * Skip whitespace in the input stream, also handling parameter
2437
 * entities.
2438
 *
2439
 * Returns the number of space chars skipped
2440
 */
2441
static int
2442
507k
xmlSkipBlankCharsPE(xmlParserCtxtPtr ctxt) {
2443
507k
    int res = 0;
2444
507k
    int inParam;
2445
507k
    int expandParam;
2446
2447
507k
    inParam = PARSER_IN_PE(ctxt);
2448
507k
    expandParam = PARSER_EXTERNAL(ctxt);
2449
2450
507k
    if (!inParam && !expandParam)
2451
110k
        return(xmlSkipBlankChars(ctxt));
2452
2453
579k
    while (PARSER_STOPPED(ctxt) == 0) {
2454
579k
        if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2455
128k
            NEXT;
2456
450k
        } else if (CUR == '%') {
2457
52.9k
            if ((expandParam == 0) ||
2458
52.9k
                (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2459
52.9k
                break;
2460
2461
            /*
2462
             * Expand parameter entity. We continue to consume
2463
             * whitespace at the start of the entity and possible
2464
             * even consume the whole entity and pop it. We might
2465
             * even pop multiple PEs in this loop.
2466
             */
2467
0
            xmlParsePEReference(ctxt);
2468
2469
0
            inParam = PARSER_IN_PE(ctxt);
2470
0
            expandParam = PARSER_EXTERNAL(ctxt);
2471
397k
        } else if (CUR == 0) {
2472
53.9k
            if (inParam == 0)
2473
2
                break;
2474
2475
53.9k
            xmlPopPE(ctxt);
2476
2477
53.9k
            inParam = PARSER_IN_PE(ctxt);
2478
53.9k
            expandParam = PARSER_EXTERNAL(ctxt);
2479
343k
        } else {
2480
343k
            break;
2481
343k
        }
2482
2483
        /*
2484
         * Also increase the counter when entering or exiting a PERef.
2485
         * The spec says: "When a parameter-entity reference is recognized
2486
         * in the DTD and included, its replacement text MUST be enlarged
2487
         * by the attachment of one leading and one following space (#x20)
2488
         * character."
2489
         */
2490
182k
        if (res < INT_MAX)
2491
182k
            res++;
2492
182k
    }
2493
2494
396k
    return(res);
2495
507k
}
2496
2497
/************************************************************************
2498
 *                  *
2499
 *    Commodity functions to handle entities      *
2500
 *                  *
2501
 ************************************************************************/
2502
2503
/**
2504
 * xmlPopInput:
2505
 * @ctxt:  an XML parser context
2506
 *
2507
 * xmlPopInput: the current input pointed by ctxt->input came to an end
2508
 *          pop it and return the next char.
2509
 *
2510
 * Returns the current xmlChar in the parser context
2511
 */
2512
xmlChar
2513
53.9k
xmlPopInput(xmlParserCtxtPtr ctxt) {
2514
53.9k
    xmlParserInputPtr input;
2515
2516
53.9k
    if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2517
53.9k
    input = inputPop(ctxt);
2518
53.9k
    xmlFreeInputStream(input);
2519
53.9k
    if (*ctxt->input->cur == 0)
2520
306
        xmlParserGrow(ctxt);
2521
53.9k
    return(CUR);
2522
53.9k
}
2523
2524
/**
2525
 * xmlPushInput:
2526
 * @ctxt:  an XML parser context
2527
 * @input:  an XML parser input fragment (entity, XML fragment ...).
2528
 *
2529
 * Push an input stream onto the stack.
2530
 *
2531
 * This makes the parser use an input returned from advanced functions
2532
 * like xmlNewInputURL or xmlNewInputMemory.
2533
 *
2534
 * Returns -1 in case of error or the index in the input stack
2535
 */
2536
int
2537
54.5k
xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2538
54.5k
    int maxDepth;
2539
54.5k
    int ret;
2540
2541
54.5k
    if ((ctxt == NULL) || (input == NULL))
2542
0
        return(-1);
2543
2544
54.5k
    maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
2545
54.5k
    if (ctxt->inputNr > maxDepth) {
2546
0
        xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
2547
0
                       "Maximum entity nesting depth exceeded");
2548
0
        xmlHaltParser(ctxt);
2549
0
  return(-1);
2550
0
    }
2551
54.5k
    ret = inputPush(ctxt, input);
2552
54.5k
    GROW;
2553
54.5k
    return(ret);
2554
54.5k
}
2555
2556
/**
2557
 * xmlParseCharRef:
2558
 * @ctxt:  an XML parser context
2559
 *
2560
 * DEPRECATED: Internal function, don't use.
2561
 *
2562
 * Parse a numeric character reference. Always consumes '&'.
2563
 *
2564
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2565
 *                  '&#x' [0-9a-fA-F]+ ';'
2566
 *
2567
 * [ WFC: Legal Character ]
2568
 * Characters referred to using character references must match the
2569
 * production for Char.
2570
 *
2571
 * Returns the value parsed (as an int), 0 in case of error
2572
 */
2573
int
2574
12.7k
xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2575
12.7k
    int val = 0;
2576
12.7k
    int count = 0;
2577
2578
    /*
2579
     * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2580
     */
2581
12.7k
    if ((RAW == '&') && (NXT(1) == '#') &&
2582
12.7k
        (NXT(2) == 'x')) {
2583
7.72k
  SKIP(3);
2584
7.72k
  GROW;
2585
31.3k
  while ((RAW != ';') && (PARSER_STOPPED(ctxt) == 0)) {
2586
24.3k
      if (count++ > 20) {
2587
164
    count = 0;
2588
164
    GROW;
2589
164
      }
2590
24.3k
      if ((RAW >= '0') && (RAW <= '9'))
2591
6.21k
          val = val * 16 + (CUR - '0');
2592
18.1k
      else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2593
4.93k
          val = val * 16 + (CUR - 'a') + 10;
2594
13.2k
      else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2595
12.4k
          val = val * 16 + (CUR - 'A') + 10;
2596
725
      else {
2597
725
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2598
725
    val = 0;
2599
725
    break;
2600
725
      }
2601
23.6k
      if (val > 0x110000)
2602
2.45k
          val = 0x110000;
2603
2604
23.6k
      NEXT;
2605
23.6k
      count++;
2606
23.6k
  }
2607
7.72k
  if (RAW == ';') {
2608
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2609
7.00k
      ctxt->input->col++;
2610
7.00k
      ctxt->input->cur++;
2611
7.00k
  }
2612
7.72k
    } else if  ((RAW == '&') && (NXT(1) == '#')) {
2613
5.00k
  SKIP(2);
2614
5.00k
  GROW;
2615
17.3k
  while (RAW != ';') { /* loop blocked by count */
2616
13.3k
      if (count++ > 20) {
2617
197
    count = 0;
2618
197
    GROW;
2619
197
      }
2620
13.3k
      if ((RAW >= '0') && (RAW <= '9'))
2621
12.3k
          val = val * 10 + (CUR - '0');
2622
1.00k
      else {
2623
1.00k
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2624
1.00k
    val = 0;
2625
1.00k
    break;
2626
1.00k
      }
2627
12.3k
      if (val > 0x110000)
2628
1.87k
          val = 0x110000;
2629
2630
12.3k
      NEXT;
2631
12.3k
      count++;
2632
12.3k
  }
2633
5.00k
  if (RAW == ';') {
2634
      /* on purpose to avoid reentrancy problems with NEXT and SKIP */
2635
3.99k
      ctxt->input->col++;
2636
3.99k
      ctxt->input->cur++;
2637
3.99k
  }
2638
5.00k
    } else {
2639
0
        if (RAW == '&')
2640
0
            SKIP(1);
2641
0
        xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2642
0
    }
2643
2644
    /*
2645
     * [ WFC: Legal Character ]
2646
     * Characters referred to using character references must match the
2647
     * production for Char.
2648
     */
2649
12.7k
    if (val >= 0x110000) {
2650
220
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2651
220
                "xmlParseCharRef: character reference out of bounds\n",
2652
220
          val);
2653
12.5k
    } else if (IS_CHAR(val)) {
2654
10.4k
        return(val);
2655
10.4k
    } else {
2656
2.05k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2657
2.05k
                          "xmlParseCharRef: invalid xmlChar value %d\n",
2658
2.05k
                    val);
2659
2.05k
    }
2660
2.27k
    return(0);
2661
12.7k
}
2662
2663
/**
2664
 * xmlParseStringCharRef:
2665
 * @ctxt:  an XML parser context
2666
 * @str:  a pointer to an index in the string
2667
 *
2668
 * parse Reference declarations, variant parsing from a string rather
2669
 * than an an input flow.
2670
 *
2671
 * [66] CharRef ::= '&#' [0-9]+ ';' |
2672
 *                  '&#x' [0-9a-fA-F]+ ';'
2673
 *
2674
 * [ WFC: Legal Character ]
2675
 * Characters referred to using character references must match the
2676
 * production for Char.
2677
 *
2678
 * Returns the value parsed (as an int), 0 in case of error, str will be
2679
 *         updated to the current value of the index
2680
 */
2681
static int
2682
24.2k
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2683
24.2k
    const xmlChar *ptr;
2684
24.2k
    xmlChar cur;
2685
24.2k
    int val = 0;
2686
2687
24.2k
    if ((str == NULL) || (*str == NULL)) return(0);
2688
24.2k
    ptr = *str;
2689
24.2k
    cur = *ptr;
2690
24.2k
    if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2691
5.74k
  ptr += 3;
2692
5.74k
  cur = *ptr;
2693
26.5k
  while (cur != ';') { /* Non input consuming loop */
2694
21.4k
      if ((cur >= '0') && (cur <= '9'))
2695
5.40k
          val = val * 16 + (cur - '0');
2696
16.0k
      else if ((cur >= 'a') && (cur <= 'f'))
2697
3.41k
          val = val * 16 + (cur - 'a') + 10;
2698
12.6k
      else if ((cur >= 'A') && (cur <= 'F'))
2699
12.0k
          val = val * 16 + (cur - 'A') + 10;
2700
669
      else {
2701
669
    xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2702
669
    val = 0;
2703
669
    break;
2704
669
      }
2705
20.8k
      if (val > 0x110000)
2706
611
          val = 0x110000;
2707
2708
20.8k
      ptr++;
2709
20.8k
      cur = *ptr;
2710
20.8k
  }
2711
5.74k
  if (cur == ';')
2712
5.07k
      ptr++;
2713
18.5k
    } else if  ((cur == '&') && (ptr[1] == '#')){
2714
18.5k
  ptr += 2;
2715
18.5k
  cur = *ptr;
2716
61.4k
  while (cur != ';') { /* Non input consuming loops */
2717
43.7k
      if ((cur >= '0') && (cur <= '9'))
2718
42.8k
          val = val * 10 + (cur - '0');
2719
814
      else {
2720
814
    xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2721
814
    val = 0;
2722
814
    break;
2723
814
      }
2724
42.8k
      if (val > 0x110000)
2725
414
          val = 0x110000;
2726
2727
42.8k
      ptr++;
2728
42.8k
      cur = *ptr;
2729
42.8k
  }
2730
18.5k
  if (cur == ';')
2731
17.7k
      ptr++;
2732
18.5k
    } else {
2733
0
  xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2734
0
  return(0);
2735
0
    }
2736
24.2k
    *str = ptr;
2737
2738
    /*
2739
     * [ WFC: Legal Character ]
2740
     * Characters referred to using character references must match the
2741
     * production for Char.
2742
     */
2743
24.2k
    if (val >= 0x110000) {
2744
209
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2745
209
                "xmlParseStringCharRef: character reference out of bounds\n",
2746
209
                val);
2747
24.0k
    } else if (IS_CHAR(val)) {
2748
21.6k
        return(val);
2749
21.6k
    } else {
2750
2.43k
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2751
2.43k
        "xmlParseStringCharRef: invalid xmlChar value %d\n",
2752
2.43k
        val);
2753
2.43k
    }
2754
2.64k
    return(0);
2755
24.2k
}
2756
2757
/**
2758
 * xmlParserHandlePEReference:
2759
 * @ctxt:  the parser context
2760
 *
2761
 * DEPRECATED: Internal function, do not use.
2762
 *
2763
 * [69] PEReference ::= '%' Name ';'
2764
 *
2765
 * [ WFC: No Recursion ]
2766
 * A parsed entity must not contain a recursive
2767
 * reference to itself, either directly or indirectly.
2768
 *
2769
 * [ WFC: Entity Declared ]
2770
 * In a document without any DTD, a document with only an internal DTD
2771
 * subset which contains no parameter entity references, or a document
2772
 * with "standalone='yes'", ...  ... The declaration of a parameter
2773
 * entity must precede any reference to it...
2774
 *
2775
 * [ VC: Entity Declared ]
2776
 * In a document with an external subset or external parameter entities
2777
 * with "standalone='no'", ...  ... The declaration of a parameter entity
2778
 * must precede any reference to it...
2779
 *
2780
 * [ WFC: In DTD ]
2781
 * Parameter-entity references may only appear in the DTD.
2782
 * NOTE: misleading but this is handled.
2783
 *
2784
 * A PEReference may have been detected in the current input stream
2785
 * the handling is done accordingly to
2786
 *      http://www.w3.org/TR/REC-xml#entproc
2787
 * i.e.
2788
 *   - Included in literal in entity values
2789
 *   - Included as Parameter Entity reference within DTDs
2790
 */
2791
void
2792
0
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2793
0
    xmlParsePEReference(ctxt);
2794
0
}
2795
2796
/**
2797
 * xmlStringLenDecodeEntities:
2798
 * @ctxt:  the parser context
2799
 * @str:  the input string
2800
 * @len: the string length
2801
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2802
 * @end:  an end marker xmlChar, 0 if none
2803
 * @end2:  an end marker xmlChar, 0 if none
2804
 * @end3:  an end marker xmlChar, 0 if none
2805
 *
2806
 * DEPRECATED: Internal function, don't use.
2807
 *
2808
 * Returns A newly allocated string with the substitution done. The caller
2809
 *      must deallocate it !
2810
 */
2811
xmlChar *
2812
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2813
                           int what ATTRIBUTE_UNUSED,
2814
0
                           xmlChar end, xmlChar end2, xmlChar end3) {
2815
0
    if ((ctxt == NULL) || (str == NULL) || (len < 0))
2816
0
        return(NULL);
2817
2818
0
    if ((str[len] != 0) ||
2819
0
        (end != 0) || (end2 != 0) || (end3 != 0))
2820
0
        return(NULL);
2821
2822
0
    return(xmlExpandEntitiesInAttValue(ctxt, str, 0));
2823
0
}
2824
2825
/**
2826
 * xmlStringDecodeEntities:
2827
 * @ctxt:  the parser context
2828
 * @str:  the input string
2829
 * @what:  combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2830
 * @end:  an end marker xmlChar, 0 if none
2831
 * @end2:  an end marker xmlChar, 0 if none
2832
 * @end3:  an end marker xmlChar, 0 if none
2833
 *
2834
 * DEPRECATED: Internal function, don't use.
2835
 *
2836
 * Returns A newly allocated string with the substitution done. The caller
2837
 *      must deallocate it !
2838
 */
2839
xmlChar *
2840
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str,
2841
                        int what ATTRIBUTE_UNUSED,
2842
0
            xmlChar end, xmlChar  end2, xmlChar end3) {
2843
0
    if ((ctxt == NULL) || (str == NULL))
2844
0
        return(NULL);
2845
2846
0
    if ((end != 0) || (end2 != 0) || (end3 != 0))
2847
0
        return(NULL);
2848
2849
0
    return(xmlExpandEntitiesInAttValue(ctxt, str, 0));
2850
0
}
2851
2852
/************************************************************************
2853
 *                  *
2854
 *    Commodity functions, cleanup needed ?     *
2855
 *                  *
2856
 ************************************************************************/
2857
2858
/**
2859
 * areBlanks:
2860
 * @ctxt:  an XML parser context
2861
 * @str:  a xmlChar *
2862
 * @len:  the size of @str
2863
 * @blank_chars: we know the chars are blanks
2864
 *
2865
 * Is this a sequence of blank chars that one can ignore ?
2866
 *
2867
 * Returns 1 if ignorable 0 otherwise.
2868
 */
2869
2870
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2871
7.04k
                     int blank_chars) {
2872
7.04k
    int i;
2873
7.04k
    xmlNodePtr lastChild;
2874
2875
    /*
2876
     * Don't spend time trying to differentiate them, the same callback is
2877
     * used !
2878
     */
2879
7.04k
    if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
2880
1.77k
  return(0);
2881
2882
    /*
2883
     * Check for xml:space value.
2884
     */
2885
5.27k
    if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2886
5.27k
        (*(ctxt->space) == -2))
2887
1.48k
  return(0);
2888
2889
    /*
2890
     * Check that the string is made of blanks
2891
     */
2892
3.78k
    if (blank_chars == 0) {
2893
7.83k
  for (i = 0;i < len;i++)
2894
6.55k
      if (!(IS_BLANK_CH(str[i]))) return(0);
2895
2.27k
    }
2896
2897
    /*
2898
     * Look if the element is mixed content in the DTD if available
2899
     */
2900
2.78k
    if (ctxt->node == NULL) return(0);
2901
2.78k
    if (ctxt->myDoc != NULL) {
2902
2.78k
        xmlElementPtr elemDecl = NULL;
2903
2.78k
        xmlDocPtr doc = ctxt->myDoc;
2904
2.78k
        const xmlChar *prefix = NULL;
2905
2906
2.78k
        if (ctxt->node->ns)
2907
263
            prefix = ctxt->node->ns->prefix;
2908
2.78k
        if (doc->intSubset != NULL)
2909
900
            elemDecl = xmlHashLookup2(doc->intSubset->elements, ctxt->node->name,
2910
900
                                      prefix);
2911
2.78k
        if ((elemDecl == NULL) && (doc->extSubset != NULL))
2912
0
            elemDecl = xmlHashLookup2(doc->extSubset->elements, ctxt->node->name,
2913
0
                                      prefix);
2914
2.78k
        if (elemDecl != NULL) {
2915
593
            if (elemDecl->etype == XML_ELEMENT_TYPE_ELEMENT)
2916
220
                return(1);
2917
373
            if ((elemDecl->etype == XML_ELEMENT_TYPE_ANY) ||
2918
373
                (elemDecl->etype == XML_ELEMENT_TYPE_MIXED))
2919
151
                return(0);
2920
373
        }
2921
2.78k
    }
2922
2923
    /*
2924
     * Otherwise, heuristic :-\
2925
     */
2926
2.41k
    if ((RAW != '<') && (RAW != 0xD)) return(0);
2927
2.09k
    if ((ctxt->node->children == NULL) &&
2928
2.09k
  (RAW == '<') && (NXT(1) == '/')) return(0);
2929
2930
1.86k
    lastChild = xmlGetLastChild(ctxt->node);
2931
1.86k
    if (lastChild == NULL) {
2932
1.24k
        if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2933
1.24k
            (ctxt->node->content != NULL)) return(0);
2934
1.24k
    } else if (xmlNodeIsText(lastChild))
2935
205
        return(0);
2936
416
    else if ((ctxt->node->children != NULL) &&
2937
416
             (xmlNodeIsText(ctxt->node->children)))
2938
113
        return(0);
2939
1.54k
    return(1);
2940
1.86k
}
2941
2942
/************************************************************************
2943
 *                  *
2944
 *    Extra stuff for namespace support     *
2945
 *  Relates to http://www.w3.org/TR/WD-xml-names      *
2946
 *                  *
2947
 ************************************************************************/
2948
2949
/**
2950
 * xmlSplitQName:
2951
 * @ctxt:  an XML parser context
2952
 * @name:  an XML parser context
2953
 * @prefixOut:  a xmlChar **
2954
 *
2955
 * parse an UTF8 encoded XML qualified name string
2956
 *
2957
 * [NS 5] QName ::= (Prefix ':')? LocalPart
2958
 *
2959
 * [NS 6] Prefix ::= NCName
2960
 *
2961
 * [NS 7] LocalPart ::= NCName
2962
 *
2963
 * Returns the local part, and prefix is updated
2964
 *   to get the Prefix if any.
2965
 */
2966
2967
xmlChar *
2968
118k
xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefixOut) {
2969
118k
    xmlChar buf[XML_MAX_NAMELEN + 5];
2970
118k
    xmlChar *buffer = NULL;
2971
118k
    int len = 0;
2972
118k
    int max = XML_MAX_NAMELEN;
2973
118k
    xmlChar *ret = NULL;
2974
118k
    xmlChar *prefix;
2975
118k
    const xmlChar *cur = name;
2976
118k
    int c;
2977
2978
118k
    if (prefixOut == NULL) return(NULL);
2979
118k
    *prefixOut = NULL;
2980
2981
118k
    if (cur == NULL) return(NULL);
2982
2983
#ifndef XML_XML_NAMESPACE
2984
    /* xml: prefix is not really a namespace */
2985
    if ((cur[0] == 'x') && (cur[1] == 'm') &&
2986
        (cur[2] == 'l') && (cur[3] == ':'))
2987
  return(xmlStrdup(name));
2988
#endif
2989
2990
    /* nasty but well=formed */
2991
118k
    if (cur[0] == ':')
2992
9.89k
  return(xmlStrdup(name));
2993
2994
108k
    c = *cur++;
2995
433k
    while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2996
324k
  buf[len++] = c;
2997
324k
  c = *cur++;
2998
324k
    }
2999
108k
    if (len >= max) {
3000
  /*
3001
   * Okay someone managed to make a huge name, so he's ready to pay
3002
   * for the processing speed.
3003
   */
3004
517
  max = len * 2;
3005
3006
517
  buffer = (xmlChar *) xmlMallocAtomic(max);
3007
517
  if (buffer == NULL) {
3008
0
      xmlErrMemory(ctxt);
3009
0
      return(NULL);
3010
0
  }
3011
517
  memcpy(buffer, buf, len);
3012
55.5k
  while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3013
55.0k
      if (len + 10 > max) {
3014
409
          xmlChar *tmp;
3015
3016
409
    max *= 2;
3017
409
    tmp = (xmlChar *) xmlRealloc(buffer, max);
3018
409
    if (tmp == NULL) {
3019
0
        xmlFree(buffer);
3020
0
        xmlErrMemory(ctxt);
3021
0
        return(NULL);
3022
0
    }
3023
409
    buffer = tmp;
3024
409
      }
3025
55.0k
      buffer[len++] = c;
3026
55.0k
      c = *cur++;
3027
55.0k
  }
3028
517
  buffer[len] = 0;
3029
517
    }
3030
3031
108k
    if ((c == ':') && (*cur == 0)) {
3032
804
        if (buffer != NULL)
3033
197
      xmlFree(buffer);
3034
804
  return(xmlStrdup(name));
3035
804
    }
3036
3037
108k
    if (buffer == NULL) {
3038
107k
  ret = xmlStrndup(buf, len);
3039
107k
        if (ret == NULL) {
3040
0
      xmlErrMemory(ctxt);
3041
0
      return(NULL);
3042
0
        }
3043
107k
    } else {
3044
320
  ret = buffer;
3045
320
  buffer = NULL;
3046
320
  max = XML_MAX_NAMELEN;
3047
320
    }
3048
3049
3050
108k
    if (c == ':') {
3051
14.8k
  c = *cur;
3052
14.8k
        prefix = ret;
3053
14.8k
  if (c == 0) {
3054
0
      ret = xmlStrndup(BAD_CAST "", 0);
3055
0
            if (ret == NULL) {
3056
0
                xmlFree(prefix);
3057
0
                return(NULL);
3058
0
            }
3059
0
            *prefixOut = prefix;
3060
0
            return(ret);
3061
0
  }
3062
14.8k
  len = 0;
3063
3064
  /*
3065
   * Check that the first character is proper to start
3066
   * a new name
3067
   */
3068
14.8k
  if (!(((c >= 0x61) && (c <= 0x7A)) ||
3069
14.8k
        ((c >= 0x41) && (c <= 0x5A)) ||
3070
14.8k
        (c == '_') || (c == ':'))) {
3071
3.73k
      int l;
3072
3.73k
      int first = CUR_SCHAR(cur, l);
3073
3074
3.73k
      if (!IS_LETTER(first) && (first != '_')) {
3075
2.44k
    xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3076
2.44k
          "Name %s is not XML Namespace compliant\n",
3077
2.44k
          name);
3078
2.44k
      }
3079
3.73k
  }
3080
14.8k
  cur++;
3081
3082
143k
  while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3083
128k
      buf[len++] = c;
3084
128k
      c = *cur++;
3085
128k
  }
3086
14.8k
  if (len >= max) {
3087
      /*
3088
       * Okay someone managed to make a huge name, so he's ready to pay
3089
       * for the processing speed.
3090
       */
3091
753
      max = len * 2;
3092
3093
753
      buffer = (xmlChar *) xmlMallocAtomic(max);
3094
753
      if (buffer == NULL) {
3095
0
          xmlErrMemory(ctxt);
3096
0
                xmlFree(prefix);
3097
0
    return(NULL);
3098
0
      }
3099
753
      memcpy(buffer, buf, len);
3100
28.4k
      while (c != 0) { /* tested bigname2.xml */
3101
27.6k
    if (len + 10 > max) {
3102
236
        xmlChar *tmp;
3103
3104
236
        max *= 2;
3105
236
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3106
236
        if (tmp == NULL) {
3107
0
      xmlErrMemory(ctxt);
3108
0
                        xmlFree(prefix);
3109
0
      xmlFree(buffer);
3110
0
      return(NULL);
3111
0
        }
3112
236
        buffer = tmp;
3113
236
    }
3114
27.6k
    buffer[len++] = c;
3115
27.6k
    c = *cur++;
3116
27.6k
      }
3117
753
      buffer[len] = 0;
3118
753
  }
3119
3120
14.8k
  if (buffer == NULL) {
3121
14.0k
      ret = xmlStrndup(buf, len);
3122
14.0k
            if (ret == NULL) {
3123
0
                xmlFree(prefix);
3124
0
                return(NULL);
3125
0
            }
3126
14.0k
  } else {
3127
753
      ret = buffer;
3128
753
  }
3129
3130
14.8k
        *prefixOut = prefix;
3131
14.8k
    }
3132
3133
108k
    return(ret);
3134
108k
}
3135
3136
/************************************************************************
3137
 *                  *
3138
 *      The parser itself       *
3139
 *  Relates to http://www.w3.org/TR/REC-xml       *
3140
 *                  *
3141
 ************************************************************************/
3142
3143
/************************************************************************
3144
 *                  *
3145
 *  Routines to parse Name, NCName and NmToken      *
3146
 *                  *
3147
 ************************************************************************/
3148
3149
/*
3150
 * The two following functions are related to the change of accepted
3151
 * characters for Name and NmToken in the Revision 5 of XML-1.0
3152
 * They correspond to the modified production [4] and the new production [4a]
3153
 * changes in that revision. Also note that the macros used for the
3154
 * productions Letter, Digit, CombiningChar and Extender are not needed
3155
 * anymore.
3156
 * We still keep compatibility to pre-revision5 parsing semantic if the
3157
 * new XML_PARSE_OLD10 option is given to the parser.
3158
 */
3159
static int
3160
303k
xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3161
303k
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3162
        /*
3163
   * Use the new checks of production [4] [4a] amd [5] of the
3164
   * Update 5 of XML-1.0
3165
   */
3166
283k
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3167
283k
      (((c >= 'a') && (c <= 'z')) ||
3168
282k
       ((c >= 'A') && (c <= 'Z')) ||
3169
282k
       (c == '_') || (c == ':') ||
3170
282k
       ((c >= 0xC0) && (c <= 0xD6)) ||
3171
282k
       ((c >= 0xD8) && (c <= 0xF6)) ||
3172
282k
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3173
282k
       ((c >= 0x370) && (c <= 0x37D)) ||
3174
282k
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3175
282k
       ((c >= 0x200C) && (c <= 0x200D)) ||
3176
282k
       ((c >= 0x2070) && (c <= 0x218F)) ||
3177
282k
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3178
282k
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3179
282k
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3180
282k
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3181
282k
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3182
227k
      return(1);
3183
283k
    } else {
3184
19.5k
        if (IS_LETTER(c) || (c == '_') || (c == ':'))
3185
16.7k
      return(1);
3186
19.5k
    }
3187
59.2k
    return(0);
3188
303k
}
3189
3190
static int
3191
526k
xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3192
526k
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3193
        /*
3194
   * Use the new checks of production [4] [4a] amd [5] of the
3195
   * Update 5 of XML-1.0
3196
   */
3197
495k
  if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3198
495k
      (((c >= 'a') && (c <= 'z')) ||
3199
487k
       ((c >= 'A') && (c <= 'Z')) ||
3200
487k
       ((c >= '0') && (c <= '9')) || /* !start */
3201
487k
       (c == '_') || (c == ':') ||
3202
487k
       (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3203
487k
       ((c >= 0xC0) && (c <= 0xD6)) ||
3204
487k
       ((c >= 0xD8) && (c <= 0xF6)) ||
3205
487k
       ((c >= 0xF8) && (c <= 0x2FF)) ||
3206
487k
       ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3207
487k
       ((c >= 0x370) && (c <= 0x37D)) ||
3208
487k
       ((c >= 0x37F) && (c <= 0x1FFF)) ||
3209
487k
       ((c >= 0x200C) && (c <= 0x200D)) ||
3210
487k
       ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3211
487k
       ((c >= 0x2070) && (c <= 0x218F)) ||
3212
487k
       ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3213
487k
       ((c >= 0x3001) && (c <= 0xD7FF)) ||
3214
487k
       ((c >= 0xF900) && (c <= 0xFDCF)) ||
3215
487k
       ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3216
487k
       ((c >= 0x10000) && (c <= 0xEFFFF))))
3217
261k
       return(1);
3218
495k
    } else {
3219
30.2k
        if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3220
30.2k
            (c == '.') || (c == '-') ||
3221
30.2k
      (c == '_') || (c == ':') ||
3222
30.2k
      (IS_COMBINING(c)) ||
3223
30.2k
      (IS_EXTENDER(c)))
3224
12.4k
      return(1);
3225
30.2k
    }
3226
252k
    return(0);
3227
526k
}
3228
3229
static const xmlChar *
3230
109k
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3231
109k
    const xmlChar *ret;
3232
109k
    int len = 0, l;
3233
109k
    int c;
3234
109k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3235
0
                    XML_MAX_TEXT_LENGTH :
3236
109k
                    XML_MAX_NAME_LENGTH;
3237
3238
    /*
3239
     * Handler for more complex cases
3240
     */
3241
109k
    c = CUR_CHAR(l);
3242
109k
    if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3243
        /*
3244
   * Use the new checks of production [4] [4a] amd [5] of the
3245
   * Update 5 of XML-1.0
3246
   */
3247
97.4k
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3248
97.4k
      (!(((c >= 'a') && (c <= 'z')) ||
3249
86.7k
         ((c >= 'A') && (c <= 'Z')) ||
3250
86.7k
         (c == '_') || (c == ':') ||
3251
86.7k
         ((c >= 0xC0) && (c <= 0xD6)) ||
3252
86.7k
         ((c >= 0xD8) && (c <= 0xF6)) ||
3253
86.7k
         ((c >= 0xF8) && (c <= 0x2FF)) ||
3254
86.7k
         ((c >= 0x370) && (c <= 0x37D)) ||
3255
86.7k
         ((c >= 0x37F) && (c <= 0x1FFF)) ||
3256
86.7k
         ((c >= 0x200C) && (c <= 0x200D)) ||
3257
86.7k
         ((c >= 0x2070) && (c <= 0x218F)) ||
3258
86.7k
         ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3259
86.7k
         ((c >= 0x3001) && (c <= 0xD7FF)) ||
3260
86.7k
         ((c >= 0xF900) && (c <= 0xFDCF)) ||
3261
86.7k
         ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3262
87.5k
         ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3263
87.5k
      return(NULL);
3264
87.5k
  }
3265
9.97k
  len += l;
3266
9.97k
  NEXTL(l);
3267
9.97k
  c = CUR_CHAR(l);
3268
159k
  while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3269
159k
         (((c >= 'a') && (c <= 'z')) ||
3270
156k
          ((c >= 'A') && (c <= 'Z')) ||
3271
156k
          ((c >= '0') && (c <= '9')) || /* !start */
3272
156k
          (c == '_') || (c == ':') ||
3273
156k
          (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3274
156k
          ((c >= 0xC0) && (c <= 0xD6)) ||
3275
156k
          ((c >= 0xD8) && (c <= 0xF6)) ||
3276
156k
          ((c >= 0xF8) && (c <= 0x2FF)) ||
3277
156k
          ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3278
156k
          ((c >= 0x370) && (c <= 0x37D)) ||
3279
156k
          ((c >= 0x37F) && (c <= 0x1FFF)) ||
3280
156k
          ((c >= 0x200C) && (c <= 0x200D)) ||
3281
156k
          ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3282
156k
          ((c >= 0x2070) && (c <= 0x218F)) ||
3283
156k
          ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3284
156k
          ((c >= 0x3001) && (c <= 0xD7FF)) ||
3285
156k
          ((c >= 0xF900) && (c <= 0xFDCF)) ||
3286
156k
          ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3287
156k
          ((c >= 0x10000) && (c <= 0xEFFFF))
3288
156k
    )) {
3289
149k
            if (len <= INT_MAX - l)
3290
149k
          len += l;
3291
149k
      NEXTL(l);
3292
149k
      c = CUR_CHAR(l);
3293
149k
  }
3294
12.4k
    } else {
3295
12.4k
  if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3296
12.4k
      (!IS_LETTER(c) && (c != '_') &&
3297
11.0k
       (c != ':'))) {
3298
8.53k
      return(NULL);
3299
8.53k
  }
3300
3.95k
  len += l;
3301
3.95k
  NEXTL(l);
3302
3.95k
  c = CUR_CHAR(l);
3303
3304
12.3k
  while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3305
12.3k
         ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3306
11.2k
    (c == '.') || (c == '-') ||
3307
11.2k
    (c == '_') || (c == ':') ||
3308
11.2k
    (IS_COMBINING(c)) ||
3309
11.2k
    (IS_EXTENDER(c)))) {
3310
8.36k
            if (len <= INT_MAX - l)
3311
8.36k
          len += l;
3312
8.36k
      NEXTL(l);
3313
8.36k
      c = CUR_CHAR(l);
3314
8.36k
  }
3315
3.95k
    }
3316
13.9k
    if (len > maxLength) {
3317
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3318
0
        return(NULL);
3319
0
    }
3320
13.9k
    if (ctxt->input->cur - ctxt->input->base < len) {
3321
        /*
3322
         * There were a couple of bugs where PERefs lead to to a change
3323
         * of the buffer. Check the buffer size to avoid passing an invalid
3324
         * pointer to xmlDictLookup.
3325
         */
3326
0
        xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3327
0
                    "unexpected change of input buffer");
3328
0
        return (NULL);
3329
0
    }
3330
13.9k
    if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3331
195
        ret = xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len);
3332
13.7k
    else
3333
13.7k
        ret = xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len);
3334
13.9k
    if (ret == NULL)
3335
0
        xmlErrMemory(ctxt);
3336
13.9k
    return(ret);
3337
13.9k
}
3338
3339
/**
3340
 * xmlParseName:
3341
 * @ctxt:  an XML parser context
3342
 *
3343
 * DEPRECATED: Internal function, don't use.
3344
 *
3345
 * parse an XML name.
3346
 *
3347
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3348
 *                  CombiningChar | Extender
3349
 *
3350
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3351
 *
3352
 * [6] Names ::= Name (#x20 Name)*
3353
 *
3354
 * Returns the Name parsed or NULL
3355
 */
3356
3357
const xmlChar *
3358
441k
xmlParseName(xmlParserCtxtPtr ctxt) {
3359
441k
    const xmlChar *in;
3360
441k
    const xmlChar *ret;
3361
441k
    size_t count = 0;
3362
441k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3363
0
                       XML_MAX_TEXT_LENGTH :
3364
441k
                       XML_MAX_NAME_LENGTH;
3365
3366
441k
    GROW;
3367
3368
    /*
3369
     * Accelerator for simple ASCII names
3370
     */
3371
441k
    in = ctxt->input->cur;
3372
441k
    if (((*in >= 0x61) && (*in <= 0x7A)) ||
3373
441k
  ((*in >= 0x41) && (*in <= 0x5A)) ||
3374
441k
  (*in == '_') || (*in == ':')) {
3375
336k
  in++;
3376
656k
  while (((*in >= 0x61) && (*in <= 0x7A)) ||
3377
656k
         ((*in >= 0x41) && (*in <= 0x5A)) ||
3378
656k
         ((*in >= 0x30) && (*in <= 0x39)) ||
3379
656k
         (*in == '_') || (*in == '-') ||
3380
656k
         (*in == ':') || (*in == '.'))
3381
320k
      in++;
3382
336k
  if ((*in > 0) && (*in < 0x80)) {
3383
331k
      count = in - ctxt->input->cur;
3384
331k
            if (count > maxLength) {
3385
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3386
0
                return(NULL);
3387
0
            }
3388
331k
      ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3389
331k
      ctxt->input->cur = in;
3390
331k
      ctxt->input->col += count;
3391
331k
      if (ret == NULL)
3392
0
          xmlErrMemory(ctxt);
3393
331k
      return(ret);
3394
331k
  }
3395
336k
    }
3396
    /* accelerator for special cases */
3397
109k
    return(xmlParseNameComplex(ctxt));
3398
441k
}
3399
3400
static xmlHashedString
3401
91.5k
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3402
91.5k
    xmlHashedString ret;
3403
91.5k
    int len = 0, l;
3404
91.5k
    int c;
3405
91.5k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3406
0
                    XML_MAX_TEXT_LENGTH :
3407
91.5k
                    XML_MAX_NAME_LENGTH;
3408
91.5k
    size_t startPosition = 0;
3409
3410
91.5k
    ret.name = NULL;
3411
91.5k
    ret.hashValue = 0;
3412
3413
    /*
3414
     * Handler for more complex cases
3415
     */
3416
91.5k
    startPosition = CUR_PTR - BASE_PTR;
3417
91.5k
    c = CUR_CHAR(l);
3418
91.5k
    if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3419
91.5k
  (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3420
86.2k
  return(ret);
3421
86.2k
    }
3422
3423
67.5k
    while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3424
67.5k
     (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3425
62.1k
        if (len <= INT_MAX - l)
3426
62.1k
      len += l;
3427
62.1k
  NEXTL(l);
3428
62.1k
  c = CUR_CHAR(l);
3429
62.1k
    }
3430
5.35k
    if (len > maxLength) {
3431
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3432
0
        return(ret);
3433
0
    }
3434
5.35k
    ret = xmlDictLookupHashed(ctxt->dict, (BASE_PTR + startPosition), len);
3435
5.35k
    if (ret.name == NULL)
3436
0
        xmlErrMemory(ctxt);
3437
5.35k
    return(ret);
3438
5.35k
}
3439
3440
/**
3441
 * xmlParseNCName:
3442
 * @ctxt:  an XML parser context
3443
 * @len:  length of the string parsed
3444
 *
3445
 * parse an XML name.
3446
 *
3447
 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3448
 *                      CombiningChar | Extender
3449
 *
3450
 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3451
 *
3452
 * Returns the Name parsed or NULL
3453
 */
3454
3455
static xmlHashedString
3456
179k
xmlParseNCName(xmlParserCtxtPtr ctxt) {
3457
179k
    const xmlChar *in, *e;
3458
179k
    xmlHashedString ret;
3459
179k
    size_t count = 0;
3460
179k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3461
0
                       XML_MAX_TEXT_LENGTH :
3462
179k
                       XML_MAX_NAME_LENGTH;
3463
3464
179k
    ret.name = NULL;
3465
3466
    /*
3467
     * Accelerator for simple ASCII names
3468
     */
3469
179k
    in = ctxt->input->cur;
3470
179k
    e = ctxt->input->end;
3471
179k
    if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3472
179k
   ((*in >= 0x41) && (*in <= 0x5A)) ||
3473
179k
   (*in == '_')) && (in < e)) {
3474
89.0k
  in++;
3475
152k
  while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3476
152k
          ((*in >= 0x41) && (*in <= 0x5A)) ||
3477
152k
          ((*in >= 0x30) && (*in <= 0x39)) ||
3478
152k
          (*in == '_') || (*in == '-') ||
3479
152k
          (*in == '.')) && (in < e))
3480
63.1k
      in++;
3481
89.0k
  if (in >= e)
3482
25
      goto complex;
3483
89.0k
  if ((*in > 0) && (*in < 0x80)) {
3484
88.1k
      count = in - ctxt->input->cur;
3485
88.1k
            if (count > maxLength) {
3486
0
                xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3487
0
                return(ret);
3488
0
            }
3489
88.1k
      ret = xmlDictLookupHashed(ctxt->dict, ctxt->input->cur, count);
3490
88.1k
      ctxt->input->cur = in;
3491
88.1k
      ctxt->input->col += count;
3492
88.1k
      if (ret.name == NULL) {
3493
0
          xmlErrMemory(ctxt);
3494
0
      }
3495
88.1k
      return(ret);
3496
88.1k
  }
3497
89.0k
    }
3498
91.5k
complex:
3499
91.5k
    return(xmlParseNCNameComplex(ctxt));
3500
179k
}
3501
3502
/**
3503
 * xmlParseNameAndCompare:
3504
 * @ctxt:  an XML parser context
3505
 *
3506
 * parse an XML name and compares for match
3507
 * (specialized for endtag parsing)
3508
 *
3509
 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3510
 * and the name for mismatch
3511
 */
3512
3513
static const xmlChar *
3514
12.1k
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3515
12.1k
    register const xmlChar *cmp = other;
3516
12.1k
    register const xmlChar *in;
3517
12.1k
    const xmlChar *ret;
3518
3519
12.1k
    GROW;
3520
3521
12.1k
    in = ctxt->input->cur;
3522
34.1k
    while (*in != 0 && *in == *cmp) {
3523
22.0k
  ++in;
3524
22.0k
  ++cmp;
3525
22.0k
    }
3526
12.1k
    if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3527
  /* success */
3528
2.65k
  ctxt->input->col += in - ctxt->input->cur;
3529
2.65k
  ctxt->input->cur = in;
3530
2.65k
  return (const xmlChar*) 1;
3531
2.65k
    }
3532
    /* failure (or end of input buffer), check with full function */
3533
9.51k
    ret = xmlParseName (ctxt);
3534
    /* strings coming from the dictionary direct compare possible */
3535
9.51k
    if (ret == other) {
3536
781
  return (const xmlChar*) 1;
3537
781
    }
3538
8.73k
    return ret;
3539
9.51k
}
3540
3541
/**
3542
 * xmlParseStringName:
3543
 * @ctxt:  an XML parser context
3544
 * @str:  a pointer to the string pointer (IN/OUT)
3545
 *
3546
 * parse an XML name.
3547
 *
3548
 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3549
 *                  CombiningChar | Extender
3550
 *
3551
 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3552
 *
3553
 * [6] Names ::= Name (#x20 Name)*
3554
 *
3555
 * Returns the Name parsed or NULL. The @str pointer
3556
 * is updated to the current location in the string.
3557
 */
3558
3559
static xmlChar *
3560
218k
xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3561
218k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3562
218k
    xmlChar *ret;
3563
218k
    const xmlChar *cur = *str;
3564
218k
    int len = 0, l;
3565
218k
    int c;
3566
218k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3567
0
                    XML_MAX_TEXT_LENGTH :
3568
218k
                    XML_MAX_NAME_LENGTH;
3569
3570
218k
    c = CUR_SCHAR(cur, l);
3571
218k
    if (!xmlIsNameStartChar(ctxt, c)) {
3572
1.15k
  return(NULL);
3573
1.15k
    }
3574
3575
217k
    COPY_BUF(buf, len, c);
3576
217k
    cur += l;
3577
217k
    c = CUR_SCHAR(cur, l);
3578
323k
    while (xmlIsNameChar(ctxt, c)) {
3579
106k
  COPY_BUF(buf, len, c);
3580
106k
  cur += l;
3581
106k
  c = CUR_SCHAR(cur, l);
3582
106k
  if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3583
      /*
3584
       * Okay someone managed to make a huge name, so he's ready to pay
3585
       * for the processing speed.
3586
       */
3587
310
      xmlChar *buffer;
3588
310
      int max = len * 2;
3589
3590
310
      buffer = (xmlChar *) xmlMallocAtomic(max);
3591
310
      if (buffer == NULL) {
3592
0
          xmlErrMemory(ctxt);
3593
0
    return(NULL);
3594
0
      }
3595
310
      memcpy(buffer, buf, len);
3596
10.1k
      while (xmlIsNameChar(ctxt, c)) {
3597
9.79k
    if (len + 10 > max) {
3598
237
        xmlChar *tmp;
3599
3600
237
        max *= 2;
3601
237
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3602
237
        if (tmp == NULL) {
3603
0
      xmlErrMemory(ctxt);
3604
0
      xmlFree(buffer);
3605
0
      return(NULL);
3606
0
        }
3607
237
        buffer = tmp;
3608
237
    }
3609
9.79k
    COPY_BUF(buffer, len, c);
3610
9.79k
    cur += l;
3611
9.79k
    c = CUR_SCHAR(cur, l);
3612
9.79k
                if (len > maxLength) {
3613
0
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3614
0
                    xmlFree(buffer);
3615
0
                    return(NULL);
3616
0
                }
3617
9.79k
      }
3618
310
      buffer[len] = 0;
3619
310
      *str = cur;
3620
310
      return(buffer);
3621
310
  }
3622
106k
    }
3623
216k
    if (len > maxLength) {
3624
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3625
0
        return(NULL);
3626
0
    }
3627
216k
    *str = cur;
3628
216k
    ret = xmlStrndup(buf, len);
3629
216k
    if (ret == NULL)
3630
0
        xmlErrMemory(ctxt);
3631
216k
    return(ret);
3632
216k
}
3633
3634
/**
3635
 * xmlParseNmtoken:
3636
 * @ctxt:  an XML parser context
3637
 *
3638
 * DEPRECATED: Internal function, don't use.
3639
 *
3640
 * parse an XML Nmtoken.
3641
 *
3642
 * [7] Nmtoken ::= (NameChar)+
3643
 *
3644
 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3645
 *
3646
 * Returns the Nmtoken parsed or NULL
3647
 */
3648
3649
xmlChar *
3650
31.4k
xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3651
31.4k
    xmlChar buf[XML_MAX_NAMELEN + 5];
3652
31.4k
    xmlChar *ret;
3653
31.4k
    int len = 0, l;
3654
31.4k
    int c;
3655
31.4k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3656
0
                    XML_MAX_TEXT_LENGTH :
3657
31.4k
                    XML_MAX_NAME_LENGTH;
3658
3659
31.4k
    c = CUR_CHAR(l);
3660
3661
91.0k
    while (xmlIsNameChar(ctxt, c)) {
3662
60.2k
  COPY_BUF(buf, len, c);
3663
60.2k
  NEXTL(l);
3664
60.2k
  c = CUR_CHAR(l);
3665
60.2k
  if (len >= XML_MAX_NAMELEN) {
3666
      /*
3667
       * Okay someone managed to make a huge token, so he's ready to pay
3668
       * for the processing speed.
3669
       */
3670
589
      xmlChar *buffer;
3671
589
      int max = len * 2;
3672
3673
589
      buffer = (xmlChar *) xmlMallocAtomic(max);
3674
589
      if (buffer == NULL) {
3675
0
          xmlErrMemory(ctxt);
3676
0
    return(NULL);
3677
0
      }
3678
589
      memcpy(buffer, buf, len);
3679
35.3k
      while (xmlIsNameChar(ctxt, c)) {
3680
34.7k
    if (len + 10 > max) {
3681
374
        xmlChar *tmp;
3682
3683
374
        max *= 2;
3684
374
        tmp = (xmlChar *) xmlRealloc(buffer, max);
3685
374
        if (tmp == NULL) {
3686
0
      xmlErrMemory(ctxt);
3687
0
      xmlFree(buffer);
3688
0
      return(NULL);
3689
0
        }
3690
374
        buffer = tmp;
3691
374
    }
3692
34.7k
    COPY_BUF(buffer, len, c);
3693
34.7k
                if (len > maxLength) {
3694
0
                    xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3695
0
                    xmlFree(buffer);
3696
0
                    return(NULL);
3697
0
                }
3698
34.7k
    NEXTL(l);
3699
34.7k
    c = CUR_CHAR(l);
3700
34.7k
      }
3701
589
      buffer[len] = 0;
3702
589
      return(buffer);
3703
589
  }
3704
60.2k
    }
3705
30.8k
    if (len == 0)
3706
1.50k
        return(NULL);
3707
29.3k
    if (len > maxLength) {
3708
0
        xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3709
0
        return(NULL);
3710
0
    }
3711
29.3k
    ret = xmlStrndup(buf, len);
3712
29.3k
    if (ret == NULL)
3713
0
        xmlErrMemory(ctxt);
3714
29.3k
    return(ret);
3715
29.3k
}
3716
3717
/**
3718
 * xmlExpandPEsInEntityValue:
3719
 * @ctxt:  parser context
3720
 * @buf:  string buffer
3721
 * @str:  entity value
3722
 * @length:  size of entity value
3723
 * @depth:  nesting depth
3724
 *
3725
 * Validate an entity value and expand parameter entities.
3726
 */
3727
static void
3728
xmlExpandPEsInEntityValue(xmlParserCtxtPtr ctxt, xmlSBuf *buf,
3729
12.8k
                          const xmlChar *str, int length, int depth) {
3730
12.8k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3731
12.8k
    const xmlChar *end, *chunk;
3732
12.8k
    int c, l;
3733
3734
12.8k
    if (str == NULL)
3735
0
        return;
3736
3737
12.8k
    depth += 1;
3738
12.8k
    if (depth > maxDepth) {
3739
0
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3740
0
                       "Maximum entity nesting depth exceeded");
3741
0
  return;
3742
0
    }
3743
3744
12.8k
    end = str + length;
3745
12.8k
    chunk = str;
3746
3747
295k
    while ((str < end) && (!PARSER_STOPPED(ctxt))) {
3748
288k
        c = *str;
3749
3750
288k
        if (c >= 0x80) {
3751
89.8k
            l = xmlUTF8MultibyteLen(ctxt, str,
3752
89.8k
                    "invalid character in entity value\n");
3753
89.8k
            if (l == 0) {
3754
6.95k
                if (chunk < str)
3755
1.35k
                    xmlSBufAddString(buf, chunk, str - chunk);
3756
6.95k
                xmlSBufAddReplChar(buf);
3757
6.95k
                str += 1;
3758
6.95k
                chunk = str;
3759
82.9k
            } else {
3760
82.9k
                str += l;
3761
82.9k
            }
3762
198k
        } else if (c == '&') {
3763
17.0k
            if (str[1] == '#') {
3764
6.28k
                if (chunk < str)
3765
3.95k
                    xmlSBufAddString(buf, chunk, str - chunk);
3766
3767
6.28k
                c = xmlParseStringCharRef(ctxt, &str);
3768
6.28k
                if (c == 0)
3769
2.64k
                    return;
3770
3771
3.64k
                xmlSBufAddChar(buf, c);
3772
3773
3.64k
                chunk = str;
3774
10.7k
            } else {
3775
10.7k
                xmlChar *name;
3776
3777
                /*
3778
                 * General entity references are checked for
3779
                 * syntactic validity.
3780
                 */
3781
10.7k
                str++;
3782
10.7k
                name = xmlParseStringName(ctxt, &str);
3783
3784
10.7k
                if ((name == NULL) || (*str++ != ';')) {
3785
1.35k
                    xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
3786
1.35k
                            "EntityValue: '&' forbidden except for entities "
3787
1.35k
                            "references\n");
3788
1.35k
                    xmlFree(name);
3789
1.35k
                    return;
3790
1.35k
                }
3791
3792
9.43k
                xmlFree(name);
3793
9.43k
            }
3794
181k
        } else if (c == '%') {
3795
2.49k
            xmlEntityPtr ent;
3796
3797
2.49k
            if (chunk < str)
3798
1.71k
                xmlSBufAddString(buf, chunk, str - chunk);
3799
3800
2.49k
            ent = xmlParseStringPEReference(ctxt, &str);
3801
2.49k
            if (ent == NULL)
3802
2.23k
                return;
3803
3804
260
            if (!PARSER_EXTERNAL(ctxt)) {
3805
260
                xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
3806
260
                return;
3807
260
            }
3808
3809
0
            if (ent->content == NULL) {
3810
                /*
3811
                 * Note: external parsed entities will not be loaded,
3812
                 * it is not required for a non-validating parser to
3813
                 * complete external PEReferences coming from the
3814
                 * internal subset
3815
                 */
3816
0
                if (((ctxt->options & XML_PARSE_NO_XXE) == 0) &&
3817
0
                    ((ctxt->replaceEntities) ||
3818
0
                     (ctxt->validate))) {
3819
0
                    xmlLoadEntityContent(ctxt, ent);
3820
0
                } else {
3821
0
                    xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
3822
0
                                  "not validating will not read content for "
3823
0
                                  "PE entity %s\n", ent->name, NULL);
3824
0
                }
3825
0
            }
3826
3827
            /*
3828
             * TODO: Skip if ent->content is still NULL.
3829
             */
3830
3831
0
            if (xmlParserEntityCheck(ctxt, ent->length))
3832
0
                return;
3833
3834
0
            if (ent->flags & XML_ENT_EXPANDING) {
3835
0
                xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
3836
0
                xmlHaltParser(ctxt);
3837
0
                return;
3838
0
            }
3839
3840
0
            ent->flags |= XML_ENT_EXPANDING;
3841
0
            xmlExpandPEsInEntityValue(ctxt, buf, ent->content, ent->length,
3842
0
                                      depth);
3843
0
            ent->flags &= ~XML_ENT_EXPANDING;
3844
3845
0
            chunk = str;
3846
179k
        } else {
3847
            /* Normal ASCII char */
3848
179k
            if (!IS_BYTE_CHAR(c)) {
3849
2.21k
                xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3850
2.21k
                        "invalid character in entity value\n");
3851
2.21k
                if (chunk < str)
3852
439
                    xmlSBufAddString(buf, chunk, str - chunk);
3853
2.21k
                xmlSBufAddReplChar(buf);
3854
2.21k
                str += 1;
3855
2.21k
                chunk = str;
3856
177k
            } else {
3857
177k
                str += 1;
3858
177k
            }
3859
179k
        }
3860
288k
    }
3861
3862
6.35k
    if (chunk < str)
3863
5.88k
        xmlSBufAddString(buf, chunk, str - chunk);
3864
3865
6.35k
    return;
3866
12.8k
}
3867
3868
/**
3869
 * xmlParseEntityValue:
3870
 * @ctxt:  an XML parser context
3871
 * @orig:  if non-NULL store a copy of the original entity value
3872
 *
3873
 * DEPRECATED: Internal function, don't use.
3874
 *
3875
 * parse a value for ENTITY declarations
3876
 *
3877
 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3878
 *                 "'" ([^%&'] | PEReference | Reference)* "'"
3879
 *
3880
 * Returns the EntityValue parsed with reference substituted or NULL
3881
 */
3882
xmlChar *
3883
13.8k
xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3884
13.8k
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3885
0
                         XML_MAX_HUGE_LENGTH :
3886
13.8k
                         XML_MAX_TEXT_LENGTH;
3887
13.8k
    xmlSBuf buf;
3888
13.8k
    const xmlChar *start;
3889
13.8k
    int quote, length;
3890
3891
13.8k
    xmlSBufInit(&buf, maxLength);
3892
3893
13.8k
    GROW;
3894
3895
13.8k
    quote = CUR;
3896
13.8k
    if ((quote != '"') && (quote != '\'')) {
3897
0
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
3898
0
  return(NULL);
3899
0
    }
3900
13.8k
    CUR_PTR++;
3901
3902
13.8k
    length = 0;
3903
3904
    /*
3905
     * Copy raw content of the entity into a buffer
3906
     */
3907
582k
    while (1) {
3908
582k
        int c;
3909
3910
582k
        if (PARSER_STOPPED(ctxt))
3911
0
            goto error;
3912
3913
582k
        if (CUR_PTR >= ctxt->input->end) {
3914
957
            xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3915
957
            goto error;
3916
957
        }
3917
3918
581k
        c = CUR;
3919
3920
581k
        if (c == 0) {
3921
7
            xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3922
7
                    "invalid character in entity value\n");
3923
7
            goto error;
3924
7
        }
3925
581k
        if (c == quote)
3926
12.8k
            break;
3927
569k
        NEXTL(1);
3928
569k
        length += 1;
3929
3930
        /*
3931
         * TODO: Check growth threshold
3932
         */
3933
569k
        if (ctxt->input->end - CUR_PTR < 10)
3934
23.5k
            GROW;
3935
569k
    }
3936
3937
12.8k
    start = CUR_PTR - length;
3938
3939
12.8k
    if (orig != NULL) {
3940
12.8k
        *orig = xmlStrndup(start, length);
3941
12.8k
        if (*orig == NULL)
3942
0
            xmlErrMemory(ctxt);
3943
12.8k
    }
3944
3945
12.8k
    xmlExpandPEsInEntityValue(ctxt, &buf, start, length, ctxt->inputNr);
3946
3947
12.8k
    NEXTL(1);
3948
3949
12.8k
    return(xmlSBufFinish(&buf, NULL, ctxt, "entity length too long"));
3950
3951
964
error:
3952
964
    xmlSBufCleanup(&buf, ctxt, "entity length too long");
3953
964
    return(NULL);
3954
13.8k
}
3955
3956
/**
3957
 * xmlCheckEntityInAttValue:
3958
 * @ctxt:  parser context
3959
 * @pent:  entity
3960
 * @depth:  nesting depth
3961
 *
3962
 * Check an entity reference in an attribute value for validity
3963
 * without expanding it.
3964
 */
3965
static void
3966
868
xmlCheckEntityInAttValue(xmlParserCtxtPtr ctxt, xmlEntityPtr pent, int depth) {
3967
868
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
3968
868
    const xmlChar *str;
3969
868
    unsigned long expandedSize = pent->length;
3970
868
    int c, flags;
3971
3972
868
    depth += 1;
3973
868
    if (depth > maxDepth) {
3974
0
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
3975
0
                       "Maximum entity nesting depth exceeded");
3976
0
  return;
3977
0
    }
3978
3979
868
    if (pent->flags & XML_ENT_EXPANDING) {
3980
9
        xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
3981
9
        xmlHaltParser(ctxt);
3982
9
        return;
3983
9
    }
3984
3985
    /*
3986
     * If we're parsing a default attribute value in DTD content,
3987
     * the entity might reference other entities which weren't
3988
     * defined yet, so the check isn't reliable.
3989
     */
3990
859
    if (ctxt->inSubset == 0)
3991
823
        flags = XML_ENT_CHECKED | XML_ENT_VALIDATED;
3992
36
    else
3993
36
        flags = XML_ENT_VALIDATED;
3994
3995
859
    str = pent->content;
3996
859
    if (str == NULL)
3997
0
        goto done;
3998
3999
    /*
4000
     * Note that entity values are already validated. We only check
4001
     * for illegal less-than signs and compute the expanded size
4002
     * of the entity. No special handling for multi-byte characters
4003
     * is needed.
4004
     */
4005
49.2k
    while (!PARSER_STOPPED(ctxt)) {
4006
49.2k
        c = *str;
4007
4008
49.2k
  if (c != '&') {
4009
43.2k
            if (c == 0)
4010
838
                break;
4011
4012
42.3k
            if (c == '<')
4013
496
                xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
4014
496
                        "'<' in entity '%s' is not allowed in attributes "
4015
496
                        "values\n", pent->name);
4016
4017
42.3k
            str += 1;
4018
42.3k
        } else if (str[1] == '#') {
4019
293
            int val;
4020
4021
293
      val = xmlParseStringCharRef(ctxt, &str);
4022
293
      if (val == 0) {
4023
2
                pent->content[0] = 0;
4024
2
                break;
4025
2
            }
4026
5.74k
  } else {
4027
5.74k
            xmlChar *name;
4028
5.74k
            xmlEntityPtr ent;
4029
4030
5.74k
      name = xmlParseStringEntityRef(ctxt, &str);
4031
5.74k
      if (name == NULL) {
4032
5
                pent->content[0] = 0;
4033
5
                break;
4034
5
            }
4035
4036
5.73k
            ent = xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 1);
4037
5.73k
            xmlFree(name);
4038
4039
5.73k
            if ((ent != NULL) &&
4040
5.73k
                (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
4041
4.57k
                if ((ent->flags & flags) != flags) {
4042
297
                    pent->flags |= XML_ENT_EXPANDING;
4043
297
                    xmlCheckEntityInAttValue(ctxt, ent, depth);
4044
297
                    pent->flags &= ~XML_ENT_EXPANDING;
4045
297
                }
4046
4047
4.57k
                xmlSaturatedAdd(&expandedSize, ent->expandedSize);
4048
4.57k
                xmlSaturatedAdd(&expandedSize, XML_ENT_FIXED_COST);
4049
4.57k
            }
4050
5.73k
        }
4051
49.2k
    }
4052
4053
859
done:
4054
859
    if (ctxt->inSubset == 0)
4055
823
        pent->expandedSize = expandedSize;
4056
4057
859
    pent->flags |= flags;
4058
859
}
4059
4060
/**
4061
 * xmlExpandEntityInAttValue:
4062
 * @ctxt:  parser context
4063
 * @buf:  string buffer
4064
 * @str:  entity or attribute value
4065
 * @pent:  entity for entity value, NULL for attribute values
4066
 * @normalize:  whether to collapse whitespace
4067
 * @inSpace:  whitespace state
4068
 * @depth:  nesting depth
4069
 * @check:  whether to check for amplification
4070
 *
4071
 * Expand general entity references in an entity or attribute value.
4072
 * Perform attribute value normalization.
4073
 */
4074
static void
4075
xmlExpandEntityInAttValue(xmlParserCtxtPtr ctxt, xmlSBuf *buf,
4076
                          const xmlChar *str, xmlEntityPtr pent, int normalize,
4077
156k
                          int *inSpace, int depth, int check) {
4078
156k
    int maxDepth = (ctxt->options & XML_PARSE_HUGE) ? 40 : 20;
4079
156k
    int c, chunkSize;
4080
4081
156k
    if (str == NULL)
4082
0
        return;
4083
4084
156k
    depth += 1;
4085
156k
    if (depth > maxDepth) {
4086
0
  xmlFatalErrMsg(ctxt, XML_ERR_RESOURCE_LIMIT,
4087
0
                       "Maximum entity nesting depth exceeded");
4088
0
  return;
4089
0
    }
4090
4091
156k
    if (pent != NULL) {
4092
123k
        if (pent->flags & XML_ENT_EXPANDING) {
4093
0
            xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
4094
0
            xmlHaltParser(ctxt);
4095
0
            return;
4096
0
        }
4097
4098
123k
        if (check) {
4099
0
            if (xmlParserEntityCheck(ctxt, pent->length))
4100
0
                return;
4101
0
        }
4102
123k
    }
4103
4104
156k
    chunkSize = 0;
4105
4106
    /*
4107
     * Note that entity values are already validated. No special
4108
     * handling for multi-byte characters is needed.
4109
     */
4110
27.1M
    while (!PARSER_STOPPED(ctxt)) {
4111
27.1M
        c = *str;
4112
4113
27.1M
  if (c != '&') {
4114
26.9M
            if (c == 0)
4115
150k
                break;
4116
4117
            /*
4118
             * If this function is called without an entity, it is used to
4119
             * expand entities in an attribute content where less-than was
4120
             * already unscaped and is allowed.
4121
             */
4122
26.7M
            if ((pent != NULL) && (c == '<')) {
4123
5.86k
                xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
4124
5.86k
                        "'<' in entity '%s' is not allowed in attributes "
4125
5.86k
                        "values\n", pent->name);
4126
5.86k
                break;
4127
5.86k
            }
4128
4129
26.7M
            if (c <= 0x20) {
4130
614k
                if ((normalize) && (*inSpace)) {
4131
                    /* Skip char */
4132
0
                    if (chunkSize > 0) {
4133
0
                        xmlSBufAddString(buf, str - chunkSize, chunkSize);
4134
0
                        chunkSize = 0;
4135
0
                    }
4136
614k
                } else if (c < 0x20) {
4137
137k
                    if (chunkSize > 0) {
4138
134k
                        xmlSBufAddString(buf, str - chunkSize, chunkSize);
4139
134k
                        chunkSize = 0;
4140
134k
                    }
4141
4142
137k
                    xmlSBufAddCString(buf, " ", 1);
4143
477k
                } else {
4144
477k
                    chunkSize += 1;
4145
477k
                }
4146
4147
614k
                *inSpace = 1;
4148
26.1M
            } else {
4149
26.1M
                chunkSize += 1;
4150
26.1M
                *inSpace = 0;
4151
26.1M
            }
4152
4153
26.7M
            str += 1;
4154
26.7M
        } else if (str[1] == '#') {
4155
17.7k
            int val;
4156
4157
17.7k
            if (chunkSize > 0) {
4158
12.2k
                xmlSBufAddString(buf, str - chunkSize, chunkSize);
4159
12.2k
                chunkSize = 0;
4160
12.2k
            }
4161
4162
17.7k
      val = xmlParseStringCharRef(ctxt, &str);
4163
17.7k
      if (val == 0) {
4164
0
                if (pent != NULL)
4165
0
                    pent->content[0] = 0;
4166
0
                break;
4167
0
            }
4168
4169
17.7k
            if (val == ' ') {
4170
922
                if ((!normalize) || (!*inSpace))
4171
922
                    xmlSBufAddCString(buf, " ", 1);
4172
922
                *inSpace = 1;
4173
16.7k
            } else {
4174
16.7k
                xmlSBufAddChar(buf, val);
4175
16.7k
                *inSpace = 0;
4176
16.7k
            }
4177
199k
  } else {
4178
199k
            xmlChar *name;
4179
199k
            xmlEntityPtr ent;
4180
4181
199k
            if (chunkSize > 0) {
4182
108k
                xmlSBufAddString(buf, str - chunkSize, chunkSize);
4183
108k
                chunkSize = 0;
4184
108k
            }
4185
4186
199k
      name = xmlParseStringEntityRef(ctxt, &str);
4187
199k
            if (name == NULL) {
4188
1
                if (pent != NULL)
4189
0
                    pent->content[0] = 0;
4190
1
                break;
4191
1
            }
4192
4193
199k
            ent = xmlLookupGeneralEntity(ctxt, name, /* inAttr */ 1);
4194
199k
            xmlFree(name);
4195
4196
199k
      if ((ent != NULL) &&
4197
199k
    (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
4198
57.0k
    if (ent->content == NULL) {
4199
0
        xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
4200
0
          "predefined entity has no content\n");
4201
0
                    break;
4202
0
                }
4203
4204
57.0k
                xmlSBufAddString(buf, ent->content, ent->length);
4205
4206
57.0k
                *inSpace = 0;
4207
142k
      } else if ((ent != NULL) && (ent->content != NULL)) {
4208
123k
                if (pent != NULL)
4209
116k
                    pent->flags |= XML_ENT_EXPANDING;
4210
123k
    xmlExpandEntityInAttValue(ctxt, buf, ent->content, ent,
4211
123k
                                          normalize, inSpace, depth, check);
4212
123k
                if (pent != NULL)
4213
116k
                    pent->flags &= ~XML_ENT_EXPANDING;
4214
123k
      }
4215
199k
        }
4216
27.1M
    }
4217
4218
156k
    if (chunkSize > 0)
4219
152k
        xmlSBufAddString(buf, str - chunkSize, chunkSize);
4220
4221
156k
    return;
4222
156k
}
4223
4224
/**
4225
 * xmlExpandEntitiesInAttValue:
4226
 * @ctxt:  parser context
4227
 * @str:  entity or attribute value
4228
 * @normalize:  whether to collapse whitespace
4229
 *
4230
 * Expand general entity references in an entity or attribute value.
4231
 * Perform attribute value normalization.
4232
 *
4233
 * Returns the expanded attribtue value.
4234
 */
4235
xmlChar *
4236
xmlExpandEntitiesInAttValue(xmlParserCtxtPtr ctxt, const xmlChar *str,
4237
32.7k
                            int normalize) {
4238
32.7k
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4239
0
                         XML_MAX_HUGE_LENGTH :
4240
32.7k
                         XML_MAX_TEXT_LENGTH;
4241
32.7k
    xmlSBuf buf;
4242
32.7k
    int inSpace = 1;
4243
4244
32.7k
    xmlSBufInit(&buf, maxLength);
4245
4246
32.7k
    xmlExpandEntityInAttValue(ctxt, &buf, str, NULL, normalize, &inSpace,
4247
32.7k
                              ctxt->inputNr, /* check */ 0);
4248
4249
32.7k
    if ((normalize) && (inSpace) && (buf.size > 0))
4250
0
        buf.size--;
4251
4252
32.7k
    return(xmlSBufFinish(&buf, NULL, ctxt, "AttValue length too long"));
4253
32.7k
}
4254
4255
/**
4256
 * xmlParseAttValueInternal:
4257
 * @ctxt:  an XML parser context
4258
 * @len:  attribute len result
4259
 * @alloc:  whether the attribute was reallocated as a new string
4260
 * @normalize:  if 1 then further non-CDATA normalization must be done
4261
 *
4262
 * parse a value for an attribute.
4263
 * NOTE: if no normalization is needed, the routine will return pointers
4264
 *       directly from the data buffer.
4265
 *
4266
 * 3.3.3 Attribute-Value Normalization:
4267
 * Before the value of an attribute is passed to the application or
4268
 * checked for validity, the XML processor must normalize it as follows:
4269
 * - a character reference is processed by appending the referenced
4270
 *   character to the attribute value
4271
 * - an entity reference is processed by recursively processing the
4272
 *   replacement text of the entity
4273
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4274
 *   appending #x20 to the normalized value, except that only a single
4275
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4276
 *   parsed entity or the literal entity value of an internal parsed entity
4277
 * - other characters are processed by appending them to the normalized value
4278
 * If the declared value is not CDATA, then the XML processor must further
4279
 * process the normalized attribute value by discarding any leading and
4280
 * trailing space (#x20) characters, and by replacing sequences of space
4281
 * (#x20) characters by a single space (#x20) character.
4282
 * All attributes for which no declaration has been read should be treated
4283
 * by a non-validating parser as if declared CDATA.
4284
 *
4285
 * Returns the AttValue parsed or NULL. The value has to be freed by the
4286
 *     caller if it was copied, this can be detected by val[*len] == 0.
4287
 */
4288
static xmlChar *
4289
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *attlen, int *alloc,
4290
57.7k
                         int normalize) {
4291
57.7k
    unsigned maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4292
0
                         XML_MAX_HUGE_LENGTH :
4293
57.7k
                         XML_MAX_TEXT_LENGTH;
4294
57.7k
    xmlSBuf buf;
4295
57.7k
    xmlChar *ret;
4296
57.7k
    int c, l, quote, flags, chunkSize;
4297
57.7k
    int inSpace = 1;
4298
4299
57.7k
    xmlSBufInit(&buf, maxLength);
4300
4301
57.7k
    GROW;
4302
4303
57.7k
    quote = CUR;
4304
57.7k
    if ((quote != '"') && (quote != '\'')) {
4305
4.31k
  xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
4306
4.31k
  return(NULL);
4307
4.31k
    }
4308
53.4k
    NEXTL(1);
4309
4310
53.4k
    if (ctxt->inSubset == 0)
4311
22.8k
        flags = XML_ENT_CHECKED | XML_ENT_VALIDATED;
4312
30.6k
    else
4313
30.6k
        flags = XML_ENT_VALIDATED;
4314
4315
53.4k
    inSpace = 1;
4316
53.4k
    chunkSize = 0;
4317
4318
904k
    while (1) {
4319
904k
        if (PARSER_STOPPED(ctxt))
4320
9
            goto error;
4321
4322
904k
        if (CUR_PTR >= ctxt->input->end) {
4323
9.04k
            xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4324
9.04k
                           "AttValue: ' expected\n");
4325
9.04k
            goto error;
4326
9.04k
        }
4327
4328
        /*
4329
         * TODO: Check growth threshold
4330
         */
4331
895k
        if (ctxt->input->end - CUR_PTR < 10)
4332
104k
            GROW;
4333
4334
895k
        c = CUR;
4335
4336
895k
        if (c >= 0x80) {
4337
126k
            l = xmlUTF8MultibyteLen(ctxt, CUR_PTR,
4338
126k
                    "invalid character in attribute value\n");
4339
126k
            if (l == 0) {
4340
7.28k
                if (chunkSize > 0) {
4341
1.35k
                    xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4342
1.35k
                    chunkSize = 0;
4343
1.35k
                }
4344
7.28k
                xmlSBufAddReplChar(&buf);
4345
7.28k
                NEXTL(1);
4346
119k
            } else {
4347
119k
                chunkSize += l;
4348
119k
                NEXTL(l);
4349
119k
            }
4350
4351
126k
            inSpace = 0;
4352
768k
        } else if (c != '&') {
4353
678k
            if (c > 0x20) {
4354
500k
                if (c == quote)
4355
43.3k
                    break;
4356
4357
457k
                if (c == '<')
4358
36.1k
                    xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4359
4360
457k
                chunkSize += 1;
4361
457k
                inSpace = 0;
4362
457k
            } else if (!IS_BYTE_CHAR(c)) {
4363
4.62k
                xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4364
4.62k
                        "invalid character in attribute value\n");
4365
4.62k
                if (chunkSize > 0) {
4366
1.01k
                    xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4367
1.01k
                    chunkSize = 0;
4368
1.01k
                }
4369
4.62k
                xmlSBufAddReplChar(&buf);
4370
4.62k
                inSpace = 0;
4371
173k
            } else {
4372
                /* Whitespace */
4373
173k
                if ((normalize) && (inSpace)) {
4374
                    /* Skip char */
4375
1.49k
                    if (chunkSize > 0) {
4376
660
                        xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4377
660
                        chunkSize = 0;
4378
660
                    }
4379
171k
                } else if (c < 0x20) {
4380
                    /* Convert to space */
4381
7.27k
                    if (chunkSize > 0) {
4382
3.50k
                        xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4383
3.50k
                        chunkSize = 0;
4384
3.50k
                    }
4385
4386
7.27k
                    xmlSBufAddCString(&buf, " ", 1);
4387
164k
                } else {
4388
164k
                    chunkSize += 1;
4389
164k
                }
4390
4391
173k
                inSpace = 1;
4392
4393
173k
                if ((c == 0xD) && (NXT(1) == 0xA))
4394
243
                    CUR_PTR++;
4395
173k
            }
4396
4397
635k
            NEXTL(1);
4398
635k
        } else if (NXT(1) == '#') {
4399
9.17k
            int val;
4400
4401
9.17k
            if (chunkSize > 0) {
4402
6.45k
                xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4403
6.45k
                chunkSize = 0;
4404
6.45k
            }
4405
4406
9.17k
            val = xmlParseCharRef(ctxt);
4407
9.17k
            if (val == 0)
4408
1.00k
                goto error;
4409
4410
8.17k
            if ((val == '&') && (!ctxt->replaceEntities)) {
4411
                /*
4412
                 * The reparsing will be done in xmlStringGetNodeList()
4413
                 * called by the attribute() function in SAX.c
4414
                 */
4415
1.09k
                xmlSBufAddCString(&buf, "&#38;", 5);
4416
1.09k
                inSpace = 0;
4417
7.08k
            } else if (val == ' ') {
4418
1.21k
                if ((!normalize) || (!inSpace))
4419
1.14k
                    xmlSBufAddCString(&buf, " ", 1);
4420
1.21k
                inSpace = 1;
4421
5.86k
            } else {
4422
5.86k
                xmlSBufAddChar(&buf, val);
4423
5.86k
                inSpace = 0;
4424
5.86k
            }
4425
81.0k
        } else {
4426
81.0k
            const xmlChar *name;
4427
81.0k
            xmlEntityPtr ent;
4428
4429
81.0k
            if (chunkSize > 0) {
4430
21.5k
                xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4431
21.5k
                chunkSize = 0;
4432
21.5k
            }
4433
4434
81.0k
            name = xmlParseEntityRefInternal(ctxt);
4435
81.0k
            if (name == NULL) {
4436
                /*
4437
                 * Probably a literal '&' which wasn't escaped.
4438
                 * TODO: Handle gracefully in recovery mode.
4439
                 */
4440
4.85k
                continue;
4441
4.85k
            }
4442
4443
76.2k
            ent = xmlLookupGeneralEntity(ctxt, name, /* isAttr */ 1);
4444
76.2k
            if (ent == NULL)
4445
14.2k
                continue;
4446
4447
61.9k
            if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
4448
5.29k
                if ((ent->content[0] == '&') && (!ctxt->replaceEntities))
4449
4.07k
                    xmlSBufAddCString(&buf, "&#38;", 5);
4450
1.21k
                else
4451
1.21k
                    xmlSBufAddString(&buf, ent->content, ent->length);
4452
5.29k
                inSpace = 0;
4453
56.6k
            } else if (ctxt->replaceEntities) {
4454
0
                xmlExpandEntityInAttValue(ctxt, &buf, ent->content, ent,
4455
0
                                          normalize, &inSpace, ctxt->inputNr,
4456
0
                                          /* check */ 1);
4457
56.6k
            } else {
4458
56.6k
                if ((ent->flags & flags) != flags)
4459
571
                    xmlCheckEntityInAttValue(ctxt, ent, ctxt->inputNr);
4460
4461
56.6k
                if (xmlParserEntityCheck(ctxt, ent->expandedSize)) {
4462
16
                    ent->content[0] = 0;
4463
16
                    goto error;
4464
16
                }
4465
4466
                /*
4467
                 * Just output the reference
4468
                 */
4469
56.6k
                xmlSBufAddCString(&buf, "&", 1);
4470
56.6k
                xmlSBufAddString(&buf, ent->name, xmlStrlen(ent->name));
4471
56.6k
                xmlSBufAddCString(&buf, ";", 1);
4472
4473
56.6k
                inSpace = 0;
4474
56.6k
            }
4475
61.9k
  }
4476
895k
    }
4477
4478
43.3k
    if ((buf.mem == NULL) && (alloc != NULL)) {
4479
13.1k
        ret = (xmlChar *) CUR_PTR - chunkSize;
4480
4481
13.1k
        if (attlen != NULL)
4482
13.1k
            *attlen = chunkSize;
4483
13.1k
        if ((normalize) && (inSpace) && (chunkSize > 0))
4484
88
            *attlen -= 1;
4485
13.1k
        *alloc = 0;
4486
4487
        /* Report potential error */
4488
13.1k
        xmlSBufCleanup(&buf, ctxt, "AttValue length too long");
4489
30.1k
    } else {
4490
30.1k
        if (chunkSize > 0)
4491
21.8k
            xmlSBufAddString(&buf, CUR_PTR - chunkSize, chunkSize);
4492
4493
30.1k
        if ((normalize) && (inSpace) && (buf.size > 0))
4494
164
            buf.size--;
4495
4496
30.1k
        ret = xmlSBufFinish(&buf, attlen, ctxt, "AttValue length too long");
4497
4498
30.1k
        if (ret != NULL) {
4499
30.1k
            if (attlen != NULL)
4500
2.08k
                *attlen = buf.size;
4501
30.1k
            if (alloc != NULL)
4502
2.08k
                *alloc = 1;
4503
30.1k
        }
4504
30.1k
    }
4505
4506
43.3k
    NEXTL(1);
4507
4508
43.3k
    return(ret);
4509
4510
10.0k
error:
4511
10.0k
    xmlSBufCleanup(&buf, ctxt, "AttValue length too long");
4512
10.0k
    return(NULL);
4513
53.4k
}
4514
4515
/**
4516
 * xmlParseAttValue:
4517
 * @ctxt:  an XML parser context
4518
 *
4519
 * DEPRECATED: Internal function, don't use.
4520
 *
4521
 * parse a value for an attribute
4522
 * Note: the parser won't do substitution of entities here, this
4523
 * will be handled later in xmlStringGetNodeList
4524
 *
4525
 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4526
 *                   "'" ([^<&'] | Reference)* "'"
4527
 *
4528
 * 3.3.3 Attribute-Value Normalization:
4529
 * Before the value of an attribute is passed to the application or
4530
 * checked for validity, the XML processor must normalize it as follows:
4531
 * - a character reference is processed by appending the referenced
4532
 *   character to the attribute value
4533
 * - an entity reference is processed by recursively processing the
4534
 *   replacement text of the entity
4535
 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4536
 *   appending #x20 to the normalized value, except that only a single
4537
 *   #x20 is appended for a "#xD#xA" sequence that is part of an external
4538
 *   parsed entity or the literal entity value of an internal parsed entity
4539
 * - other characters are processed by appending them to the normalized value
4540
 * If the declared value is not CDATA, then the XML processor must further
4541
 * process the normalized attribute value by discarding any leading and
4542
 * trailing space (#x20) characters, and by replacing sequences of space
4543
 * (#x20) characters by a single space (#x20) character.
4544
 * All attributes for which no declaration has been read should be treated
4545
 * by a non-validating parser as if declared CDATA.
4546
 *
4547
 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4548
 */
4549
4550
4551
xmlChar *
4552
41.1k
xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4553
41.1k
    if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4554
41.1k
    return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4555
41.1k
}
4556
4557
/**
4558
 * xmlParseSystemLiteral:
4559
 * @ctxt:  an XML parser context
4560
 *
4561
 * DEPRECATED: Internal function, don't use.
4562
 *
4563
 * parse an XML Literal
4564
 *
4565
 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4566
 *
4567
 * Returns the SystemLiteral parsed or NULL
4568
 */
4569
4570
xmlChar *
4571
4.64k
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4572
4.64k
    xmlChar *buf = NULL;
4573
4.64k
    int len = 0;
4574
4.64k
    int size = XML_PARSER_BUFFER_SIZE;
4575
4.64k
    int cur, l;
4576
4.64k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4577
0
                    XML_MAX_TEXT_LENGTH :
4578
4.64k
                    XML_MAX_NAME_LENGTH;
4579
4.64k
    xmlChar stop;
4580
4581
4.64k
    if (RAW == '"') {
4582
2.68k
        NEXT;
4583
2.68k
  stop = '"';
4584
2.68k
    } else if (RAW == '\'') {
4585
532
        NEXT;
4586
532
  stop = '\'';
4587
1.42k
    } else {
4588
1.42k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4589
1.42k
  return(NULL);
4590
1.42k
    }
4591
4592
3.21k
    buf = (xmlChar *) xmlMallocAtomic(size);
4593
3.21k
    if (buf == NULL) {
4594
0
        xmlErrMemory(ctxt);
4595
0
  return(NULL);
4596
0
    }
4597
3.21k
    cur = CUR_CHAR(l);
4598
42.6k
    while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4599
39.4k
  if (len + 5 >= size) {
4600
230
      xmlChar *tmp;
4601
4602
230
      size *= 2;
4603
230
      tmp = (xmlChar *) xmlRealloc(buf, size);
4604
230
      if (tmp == NULL) {
4605
0
          xmlFree(buf);
4606
0
    xmlErrMemory(ctxt);
4607
0
    return(NULL);
4608
0
      }
4609
230
      buf = tmp;
4610
230
  }
4611
39.4k
  COPY_BUF(buf, len, cur);
4612
39.4k
        if (len > maxLength) {
4613
0
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4614
0
            xmlFree(buf);
4615
0
            return(NULL);
4616
0
        }
4617
39.4k
  NEXTL(l);
4618
39.4k
  cur = CUR_CHAR(l);
4619
39.4k
    }
4620
3.21k
    buf[len] = 0;
4621
3.21k
    if (!IS_CHAR(cur)) {
4622
940
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4623
2.27k
    } else {
4624
2.27k
  NEXT;
4625
2.27k
    }
4626
3.21k
    return(buf);
4627
3.21k
}
4628
4629
/**
4630
 * xmlParsePubidLiteral:
4631
 * @ctxt:  an XML parser context
4632
 *
4633
 * DEPRECATED: Internal function, don't use.
4634
 *
4635
 * parse an XML public literal
4636
 *
4637
 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4638
 *
4639
 * Returns the PubidLiteral parsed or NULL.
4640
 */
4641
4642
xmlChar *
4643
3.23k
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4644
3.23k
    xmlChar *buf = NULL;
4645
3.23k
    int len = 0;
4646
3.23k
    int size = XML_PARSER_BUFFER_SIZE;
4647
3.23k
    int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4648
0
                    XML_MAX_TEXT_LENGTH :
4649
3.23k
                    XML_MAX_NAME_LENGTH;
4650
3.23k
    xmlChar cur;
4651
3.23k
    xmlChar stop;
4652
4653
3.23k
    if (RAW == '"') {
4654
1.84k
        NEXT;
4655
1.84k
  stop = '"';
4656
1.84k
    } else if (RAW == '\'') {
4657
905
        NEXT;
4658
905
  stop = '\'';
4659
905
    } else {
4660
487
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4661
487
  return(NULL);
4662
487
    }
4663
2.74k
    buf = (xmlChar *) xmlMallocAtomic(size);
4664
2.74k
    if (buf == NULL) {
4665
0
  xmlErrMemory(ctxt);
4666
0
  return(NULL);
4667
0
    }
4668
2.74k
    cur = CUR;
4669
54.6k
    while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop) &&
4670
54.6k
           (PARSER_STOPPED(ctxt) == 0)) { /* checked */
4671
51.8k
  if (len + 1 >= size) {
4672
324
      xmlChar *tmp;
4673
4674
324
      size *= 2;
4675
324
      tmp = (xmlChar *) xmlRealloc(buf, size);
4676
324
      if (tmp == NULL) {
4677
0
    xmlErrMemory(ctxt);
4678
0
    xmlFree(buf);
4679
0
    return(NULL);
4680
0
      }
4681
324
      buf = tmp;
4682
324
  }
4683
51.8k
  buf[len++] = cur;
4684
51.8k
        if (len > maxLength) {
4685
0
            xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4686
0
            xmlFree(buf);
4687
0
            return(NULL);
4688
0
        }
4689
51.8k
  NEXT;
4690
51.8k
  cur = CUR;
4691
51.8k
    }
4692
2.74k
    buf[len] = 0;
4693
2.74k
    if (cur != stop) {
4694
1.82k
  xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4695
1.82k
    } else {
4696
927
  NEXTL(1);
4697
927
    }
4698
2.74k
    return(buf);
4699
2.74k
}
4700
4701
static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial);
4702
4703
/*
4704
 * used for the test in the inner loop of the char data testing
4705
 */
4706
static const unsigned char test_char_data[256] = {
4707
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4708
    0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4709
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4710
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4711
    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4712
    0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4713
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4714
    0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4715
    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4716
    0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4717
    0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4718
    0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4719
    0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4720
    0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4721
    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4722
    0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4723
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4724
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4725
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4726
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4727
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4728
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4729
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4730
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4731
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4732
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4733
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4734
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4735
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4736
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4737
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4738
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4739
};
4740
4741
/**
4742
 * xmlParseCharDataInternal:
4743
 * @ctxt:  an XML parser context
4744
 * @partial:  buffer may contain partial UTF-8 sequences
4745
 *
4746
 * Parse character data. Always makes progress if the first char isn't
4747
 * '<' or '&'.
4748
 *
4749
 * The right angle bracket (>) may be represented using the string "&gt;",
4750
 * and must, for compatibility, be escaped using "&gt;" or a character
4751
 * reference when it appears in the string "]]>" in content, when that
4752
 * string is not marking the end of a CDATA section.
4753
 *
4754
 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4755
 */
4756
static void
4757
47.7k
xmlParseCharDataInternal(xmlParserCtxtPtr ctxt, int partial) {
4758
47.7k
    const xmlChar *in;
4759
47.7k
    int nbchar = 0;
4760
47.7k
    int line = ctxt->input->line;
4761
47.7k
    int col = ctxt->input->col;
4762
47.7k
    int ccol;
4763
4764
47.7k
    GROW;
4765
    /*
4766
     * Accelerated common case where input don't need to be
4767
     * modified before passing it to the handler.
4768
     */
4769
47.7k
    in = ctxt->input->cur;
4770
48.2k
    do {
4771
48.5k
get_more_space:
4772
55.6k
        while (*in == 0x20) { in++; ctxt->input->col++; }
4773
48.5k
        if (*in == 0xA) {
4774
1.04k
            do {
4775
1.04k
                ctxt->input->line++; ctxt->input->col = 1;
4776
1.04k
                in++;
4777
1.04k
            } while (*in == 0xA);
4778
299
            goto get_more_space;
4779
299
        }
4780
48.2k
        if (*in == '<') {
4781
3.00k
            nbchar = in - ctxt->input->cur;
4782
3.00k
            if (nbchar > 0) {
4783
3.00k
                const xmlChar *tmp = ctxt->input->cur;
4784
3.00k
                ctxt->input->cur = in;
4785
4786
3.00k
                if ((ctxt->sax != NULL) &&
4787
3.00k
                    (ctxt->disableSAX == 0) &&
4788
3.00k
                    (ctxt->sax->ignorableWhitespace !=
4789
2.35k
                     ctxt->sax->characters)) {
4790
1.80k
                    if (areBlanks(ctxt, tmp, nbchar, 1)) {
4791
822
                        if (ctxt->sax->ignorableWhitespace != NULL)
4792
822
                            ctxt->sax->ignorableWhitespace(ctxt->userData,
4793
822
                                                   tmp, nbchar);
4794
982
                    } else {
4795
982
                        if (ctxt->sax->characters != NULL)
4796
982
                            ctxt->sax->characters(ctxt->userData,
4797
982
                                                  tmp, nbchar);
4798
982
                        if (*ctxt->space == -1)
4799
521
                            *ctxt->space = -2;
4800
982
                    }
4801
1.80k
                } else if ((ctxt->sax != NULL) &&
4802
1.19k
                           (ctxt->disableSAX == 0) &&
4803
1.19k
                           (ctxt->sax->characters != NULL)) {
4804
546
                    ctxt->sax->characters(ctxt->userData,
4805
546
                                          tmp, nbchar);
4806
546
                }
4807
3.00k
            }
4808
3.00k
            return;
4809
3.00k
        }
4810
4811
48.5k
get_more:
4812
48.5k
        ccol = ctxt->input->col;
4813
146k
        while (test_char_data[*in]) {
4814
98.1k
            in++;
4815
98.1k
            ccol++;
4816
98.1k
        }
4817
48.5k
        ctxt->input->col = ccol;
4818
48.5k
        if (*in == 0xA) {
4819
687
            do {
4820
687
                ctxt->input->line++; ctxt->input->col = 1;
4821
687
                in++;
4822
687
            } while (*in == 0xA);
4823
478
            goto get_more;
4824
478
        }
4825
48.0k
        if (*in == ']') {
4826
3.12k
            if ((in[1] == ']') && (in[2] == '>')) {
4827
334
                xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4828
334
                ctxt->input->cur = in + 1;
4829
334
                return;
4830
334
            }
4831
2.78k
            in++;
4832
2.78k
            ctxt->input->col++;
4833
2.78k
            goto get_more;
4834
3.12k
        }
4835
44.9k
        nbchar = in - ctxt->input->cur;
4836
44.9k
        if (nbchar > 0) {
4837
35.0k
            if ((ctxt->sax != NULL) &&
4838
35.0k
                (ctxt->disableSAX == 0) &&
4839
35.0k
                (ctxt->sax->ignorableWhitespace !=
4840
23.5k
                 ctxt->sax->characters) &&
4841
35.0k
                (IS_BLANK_CH(*ctxt->input->cur))) {
4842
1.85k
                const xmlChar *tmp = ctxt->input->cur;
4843
1.85k
                ctxt->input->cur = in;
4844
4845
1.85k
                if (areBlanks(ctxt, tmp, nbchar, 0)) {
4846
552
                    if (ctxt->sax->ignorableWhitespace != NULL)
4847
552
                        ctxt->sax->ignorableWhitespace(ctxt->userData,
4848
552
                                                       tmp, nbchar);
4849
1.30k
                } else {
4850
1.30k
                    if (ctxt->sax->characters != NULL)
4851
1.30k
                        ctxt->sax->characters(ctxt->userData,
4852
1.30k
                                              tmp, nbchar);
4853
1.30k
                    if (*ctxt->space == -1)
4854
722
                        *ctxt->space = -2;
4855
1.30k
                }
4856
1.85k
                line = ctxt->input->line;
4857
1.85k
                col = ctxt->input->col;
4858
33.2k
            } else if ((ctxt->sax != NULL) &&
4859
33.2k
                       (ctxt->disableSAX == 0)) {
4860
21.6k
                if (ctxt->sax->characters != NULL)
4861
21.6k
                    ctxt->sax->characters(ctxt->userData,
4862
21.6k
                                          ctxt->input->cur, nbchar);
4863
21.6k
                line = ctxt->input->line;
4864
21.6k
                col = ctxt->input->col;
4865
21.6k
            }
4866
35.0k
        }
4867
44.9k
        ctxt->input->cur = in;
4868
44.9k
        if (*in == 0xD) {
4869
1.51k
            in++;
4870
1.51k
            if (*in == 0xA) {
4871
541
                ctxt->input->cur = in;
4872
541
                in++;
4873
541
                ctxt->input->line++; ctxt->input->col = 1;
4874
541
                continue; /* while */
4875
541
            }
4876
974
            in--;
4877
974
        }
4878
44.3k
        if (*in == '<') {
4879
28.2k
            return;
4880
28.2k
        }
4881
16.1k
        if (*in == '&') {
4882
3.95k
            return;
4883
3.95k
        }
4884
12.1k
        SHRINK;
4885
12.1k
        GROW;
4886
12.1k
        in = ctxt->input->cur;
4887
12.7k
    } while (((*in >= 0x20) && (*in <= 0x7F)) ||
4888
12.7k
             (*in == 0x09) || (*in == 0x0a));
4889
12.1k
    ctxt->input->line = line;
4890
12.1k
    ctxt->input->col = col;
4891
12.1k
    xmlParseCharDataComplex(ctxt, partial);
4892
12.1k
}
4893
4894
/**
4895
 * xmlParseCharDataComplex:
4896
 * @ctxt:  an XML parser context
4897
 * @cdata:  int indicating whether we are within a CDATA section
4898
 *
4899
 * Always makes progress if the first char isn't '<' or '&'.
4900
 *
4901
 * parse a CharData section.this is the fallback function
4902
 * of xmlParseCharData() when the parsing requires handling
4903
 * of non-ASCII characters.
4904
 */
4905
static void
4906
12.1k
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial) {
4907
12.1k
    xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4908
12.1k
    int nbchar = 0;
4909
12.1k
    int cur, l;
4910
4911
12.1k
    cur = CUR_CHAR(l);
4912
70.9k
    while ((cur != '<') && /* checked */
4913
70.9k
           (cur != '&') &&
4914
70.9k
     (IS_CHAR(cur))) {
4915
58.7k
  if ((cur == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4916
106
      xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4917
106
  }
4918
58.7k
  COPY_BUF(buf, nbchar, cur);
4919
  /* move current position before possible calling of ctxt->sax->characters */
4920
58.7k
  NEXTL(l);
4921
58.7k
  if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4922
346
      buf[nbchar] = 0;
4923
4924
      /*
4925
       * OK the segment is to be consumed as chars.
4926
       */
4927
346
      if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4928
179
    if (areBlanks(ctxt, buf, nbchar, 0)) {
4929
10
        if (ctxt->sax->ignorableWhitespace != NULL)
4930
10
      ctxt->sax->ignorableWhitespace(ctxt->userData,
4931
10
                                     buf, nbchar);
4932
169
    } else {
4933
169
        if (ctxt->sax->characters != NULL)
4934
169
      ctxt->sax->characters(ctxt->userData, buf, nbchar);
4935
169
        if ((ctxt->sax->characters !=
4936
169
             ctxt->sax->ignorableWhitespace) &&
4937
169
      (*ctxt->space == -1))
4938
29
      *ctxt->space = -2;
4939
169
    }
4940
179
      }
4941
346
      nbchar = 0;
4942
346
            SHRINK;
4943
346
  }
4944
58.7k
  cur = CUR_CHAR(l);
4945
58.7k
    }
4946
12.1k
    if (nbchar != 0) {
4947
5.20k
        buf[nbchar] = 0;
4948
  /*
4949
   * OK the segment is to be consumed as chars.
4950
   */
4951
5.20k
  if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4952
3.21k
      if (areBlanks(ctxt, buf, nbchar, 0)) {
4953
380
    if (ctxt->sax->ignorableWhitespace != NULL)
4954
380
        ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4955
2.83k
      } else {
4956
2.83k
    if (ctxt->sax->characters != NULL)
4957
2.83k
        ctxt->sax->characters(ctxt->userData, buf, nbchar);
4958
2.83k
    if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4959
2.83k
        (*ctxt->space == -1))
4960
408
        *ctxt->space = -2;
4961
2.83k
      }
4962
3.21k
  }
4963
5.20k
    }
4964
    /*
4965
     * cur == 0 can mean
4966
     *
4967
     * - End of buffer.
4968
     * - An actual 0 character.
4969
     * - An incomplete UTF-8 sequence. This is allowed if partial is set.
4970
     */
4971
12.1k
    if (ctxt->input->cur < ctxt->input->end) {
4972
10.8k
        if ((cur == 0) && (CUR != 0)) {
4973
7
            if (partial == 0) {
4974
7
                xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4975
7
                        "Incomplete UTF-8 sequence starting with %02X\n", CUR);
4976
7
                NEXTL(1);
4977
7
            }
4978
10.8k
        } else if ((cur != '<') && (cur != '&')) {
4979
            /* Generate the error and skip the offending character */
4980
5.91k
            xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4981
5.91k
                              "PCDATA invalid Char value %d\n", cur);
4982
5.91k
            NEXTL(l);
4983
5.91k
        }
4984
10.8k
    }
4985
12.1k
}
4986
4987
/**
4988
 * xmlParseCharData:
4989
 * @ctxt:  an XML parser context
4990
 * @cdata:  unused
4991
 *
4992
 * DEPRECATED: Internal function, don't use.
4993
 */
4994
void
4995
0
xmlParseCharData(xmlParserCtxtPtr ctxt, ATTRIBUTE_UNUSED int cdata) {
4996
0
    xmlParseCharDataInternal(ctxt, 0);
4997
0
}
4998
4999
/**
5000
 * xmlParseExternalID:
5001
 * @ctxt:  an XML parser context
5002
 * @publicID:  a xmlChar** receiving PubidLiteral
5003
 * @strict: indicate whether we should restrict parsing to only
5004
 *          production [75], see NOTE below
5005
 *
5006
 * DEPRECATED: Internal function, don't use.
5007
 *
5008
 * Parse an External ID or a Public ID
5009
 *
5010
 * NOTE: Productions [75] and [83] interact badly since [75] can generate
5011
 *       'PUBLIC' S PubidLiteral S SystemLiteral
5012
 *
5013
 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
5014
 *                   | 'PUBLIC' S PubidLiteral S SystemLiteral
5015
 *
5016
 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
5017
 *
5018
 * Returns the function returns SystemLiteral and in the second
5019
 *                case publicID receives PubidLiteral, is strict is off
5020
 *                it is possible to return NULL and have publicID set.
5021
 */
5022
5023
xmlChar *
5024
15.9k
xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
5025
15.9k
    xmlChar *URI = NULL;
5026
5027
15.9k
    *publicID = NULL;
5028
15.9k
    if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
5029
3.06k
        SKIP(6);
5030
3.06k
  if (SKIP_BLANKS == 0) {
5031
2.83k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5032
2.83k
                     "Space required after 'SYSTEM'\n");
5033
2.83k
  }
5034
3.06k
  URI = xmlParseSystemLiteral(ctxt);
5035
3.06k
  if (URI == NULL) {
5036
356
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
5037
356
        }
5038
12.8k
    } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
5039
3.23k
        SKIP(6);
5040
3.23k
  if (SKIP_BLANKS == 0) {
5041
2.62k
      xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5042
2.62k
        "Space required after 'PUBLIC'\n");
5043
2.62k
  }
5044
3.23k
  *publicID = xmlParsePubidLiteral(ctxt);
5045
3.23k
  if (*publicID == NULL) {
5046
487
      xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
5047
487
  }
5048
3.23k
  if (strict) {
5049
      /*
5050
       * We don't handle [83] so "S SystemLiteral" is required.
5051
       */
5052
1.11k
      if (SKIP_BLANKS == 0) {
5053
919
    xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5054
919
      "Space required after the Public Identifier\n");
5055
919
      }
5056
2.12k
  } else {
5057
      /*
5058
       * We handle [83] so we return immediately, if
5059
       * "S SystemLiteral" is not detected. We skip blanks if no
5060
             * system literal was found, but this is harmless since we must
5061
             * be at the end of a NotationDecl.
5062
       */
5063
2.12k
      if (SKIP_BLANKS == 0) return(NULL);
5064
697
      if ((CUR != '\'') && (CUR != '"')) return(NULL);
5065
697
  }
5066
1.57k
  URI = xmlParseSystemLiteral(ctxt);
5067
1.57k
  if (URI == NULL) {
5068
1.07k
      xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
5069
1.07k
        }
5070
1.57k
    }
5071
14.2k
    return(URI);
5072
15.9k
}
5073
5074
/**
5075
 * xmlParseCommentComplex:
5076
 * @ctxt:  an XML parser context
5077
 * @buf:  the already parsed part of the buffer
5078
 * @len:  number of bytes in the buffer
5079
 * @size:  allocated size of the buffer
5080
 *
5081
 * Skip an XML (SGML) comment <!-- .... -->
5082
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
5083
 *  must not occur within comments. "
5084
 * This is the slow routine in case the accelerator for ascii didn't work
5085
 *
5086
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
5087
 */
5088
static void
5089
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
5090
9.93k
                       size_t len, size_t size) {
5091
9.93k
    int q, ql;
5092
9.93k
    int r, rl;
5093
9.93k
    int cur, l;
5094
9.93k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5095
0
                       XML_MAX_HUGE_LENGTH :
5096
9.93k
                       XML_MAX_TEXT_LENGTH;
5097
5098
9.93k
    if (buf == NULL) {
5099
4.73k
        len = 0;
5100
4.73k
  size = XML_PARSER_BUFFER_SIZE;
5101
4.73k
  buf = (xmlChar *) xmlMallocAtomic(size);
5102
4.73k
  if (buf == NULL) {
5103
0
      xmlErrMemory(ctxt);
5104
0
      return;
5105
0
  }
5106
4.73k
    }
5107
9.93k
    q = CUR_CHAR(ql);
5108
9.93k
    if (q == 0)
5109
6.58k
        goto not_terminated;
5110
3.35k
    if (!IS_CHAR(q)) {
5111
112
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
5112
112
                          "xmlParseComment: invalid xmlChar value %d\n",
5113
112
                    q);
5114
112
  xmlFree (buf);
5115
112
  return;
5116
112
    }
5117
3.24k
    NEXTL(ql);
5118
3.24k
    r = CUR_CHAR(rl);
5119
3.24k
    if (r == 0)
5120
203
        goto not_terminated;
5121
3.03k
    if (!IS_CHAR(r)) {
5122
144
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
5123
144
                          "xmlParseComment: invalid xmlChar value %d\n",
5124
144
                    r);
5125
144
  xmlFree (buf);
5126
144
  return;
5127
144
    }
5128
2.89k
    NEXTL(rl);
5129
2.89k
    cur = CUR_CHAR(l);
5130
2.89k
    if (cur == 0)
5131
458
        goto not_terminated;
5132
27.9k
    while (IS_CHAR(cur) && /* checked */
5133
27.9k
           ((cur != '>') ||
5134
27.0k
      (r != '-') || (q != '-'))) {
5135
25.4k
  if ((r == '-') && (q == '-')) {
5136
350
      xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
5137
350
  }
5138
25.4k
  if (len + 5 >= size) {
5139
333
      xmlChar *new_buf;
5140
333
            size_t new_size;
5141
5142
333
      new_size = size * 2;
5143
333
      new_buf = (xmlChar *) xmlRealloc(buf, new_size);
5144
333
      if (new_buf == NULL) {
5145
0
    xmlFree (buf);
5146
0
    xmlErrMemory(ctxt);
5147
0
    return;
5148
0
      }
5149
333
      buf = new_buf;
5150
333
            size = new_size;
5151
333
  }
5152
25.4k
  COPY_BUF(buf, len, q);
5153
25.4k
        if (len > maxLength) {
5154
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5155
0
                         "Comment too big found", NULL);
5156
0
            xmlFree (buf);
5157
0
            return;
5158
0
        }
5159
5160
25.4k
  q = r;
5161
25.4k
  ql = rl;
5162
25.4k
  r = cur;
5163
25.4k
  rl = l;
5164
5165
25.4k
  NEXTL(l);
5166
25.4k
  cur = CUR_CHAR(l);
5167
5168
25.4k
    }
5169
2.43k
    buf[len] = 0;
5170
2.43k
    if (cur == 0) {
5171
758
  xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5172
758
                       "Comment not terminated \n<!--%.50s\n", buf);
5173
1.67k
    } else if (!IS_CHAR(cur)) {
5174
157
        xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
5175
157
                          "xmlParseComment: invalid xmlChar value %d\n",
5176
157
                    cur);
5177
1.52k
    } else {
5178
1.52k
        NEXT;
5179
1.52k
  if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5180
1.52k
      (!ctxt->disableSAX))
5181
1.30k
      ctxt->sax->comment(ctxt->userData, buf);
5182
1.52k
    }
5183
2.43k
    xmlFree(buf);
5184
2.43k
    return;
5185
7.24k
not_terminated:
5186
7.24k
    xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5187
7.24k
       "Comment not terminated\n", NULL);
5188
7.24k
    xmlFree(buf);
5189
7.24k
    return;
5190
2.43k
}
5191
5192
/**
5193
 * xmlParseComment:
5194
 * @ctxt:  an XML parser context
5195
 *
5196
 * DEPRECATED: Internal function, don't use.
5197
 *
5198
 * Parse an XML (SGML) comment. Always consumes '<!'.
5199
 *
5200
 *  The spec says that "For compatibility, the string "--" (double-hyphen)
5201
 *  must not occur within comments. "
5202
 *
5203
 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
5204
 */
5205
void
5206
12.5k
xmlParseComment(xmlParserCtxtPtr ctxt) {
5207
12.5k
    xmlChar *buf = NULL;
5208
12.5k
    size_t size = XML_PARSER_BUFFER_SIZE;
5209
12.5k
    size_t len = 0;
5210
12.5k
    size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5211
0
                       XML_MAX_HUGE_LENGTH :
5212
12.5k
                       XML_MAX_TEXT_LENGTH;
5213
12.5k
    const xmlChar *in;
5214
12.5k
    size_t nbchar = 0;
5215
12.5k
    int ccol;
5216
5217
    /*
5218
     * Check that there is a comment right here.
5219
     */
5220
12.5k
    if ((RAW != '<') || (NXT(1) != '!'))
5221
0
        return;
5222
12.5k
    SKIP(2);
5223
12.5k
    if ((RAW != '-') || (NXT(1) != '-'))
5224
3
        return;
5225
12.5k
    SKIP(2);
5226
12.5k
    GROW;
5227
5228
    /*
5229
     * Accelerated common case where input don't need to be
5230
     * modified before passing it to the handler.
5231
     */
5232
12.5k
    in = ctxt->input->cur;
5233
12.5k
    do {
5234
12.5k
  if (*in == 0xA) {
5235
443
      do {
5236
443
    ctxt->input->line++; ctxt->input->col = 1;
5237
443
    in++;
5238
443
      } while (*in == 0xA);
5239
217
  }
5240
26.1k
get_more:
5241
26.1k
        ccol = ctxt->input->col;
5242
61.6k
  while (((*in > '-') && (*in <= 0x7F)) ||
5243
61.6k
         ((*in >= 0x20) && (*in < '-')) ||
5244
61.6k
         (*in == 0x09)) {
5245
35.5k
        in++;
5246
35.5k
        ccol++;
5247
35.5k
  }
5248
26.1k
  ctxt->input->col = ccol;
5249
26.1k
  if (*in == 0xA) {
5250
650
      do {
5251
650
    ctxt->input->line++; ctxt->input->col = 1;
5252
650
    in++;
5253
650
      } while (*in == 0xA);
5254
268
      goto get_more;
5255
268
  }
5256
25.8k
  nbchar = in - ctxt->input->cur;
5257
  /*
5258
   * save current set of data
5259
   */
5260
25.8k
  if (nbchar > 0) {
5261
17.0k
            if (buf == NULL) {
5262
6.48k
                if ((*in == '-') && (in[1] == '-'))
5263
1.30k
                    size = nbchar + 1;
5264
5.18k
                else
5265
5.18k
                    size = XML_PARSER_BUFFER_SIZE + nbchar;
5266
6.48k
                buf = (xmlChar *) xmlMallocAtomic(size);
5267
6.48k
                if (buf == NULL) {
5268
0
                    xmlErrMemory(ctxt);
5269
0
                    return;
5270
0
                }
5271
6.48k
                len = 0;
5272
10.5k
            } else if (len + nbchar + 1 >= size) {
5273
715
                xmlChar *new_buf;
5274
715
                size  += len + nbchar + XML_PARSER_BUFFER_SIZE;
5275
715
                new_buf = (xmlChar *) xmlRealloc(buf, size);
5276
715
                if (new_buf == NULL) {
5277
0
                    xmlFree (buf);
5278
0
                    xmlErrMemory(ctxt);
5279
0
                    return;
5280
0
                }
5281
715
                buf = new_buf;
5282
715
            }
5283
17.0k
            memcpy(&buf[len], ctxt->input->cur, nbchar);
5284
17.0k
            len += nbchar;
5285
17.0k
            buf[len] = 0;
5286
17.0k
  }
5287
25.8k
        if (len > maxLength) {
5288
0
            xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5289
0
                         "Comment too big found", NULL);
5290
0
            xmlFree (buf);
5291
0
            return;
5292
0
        }
5293
25.8k
  ctxt->input->cur = in;
5294
25.8k
  if (*in == 0xA) {
5295
0
      in++;
5296
0
      ctxt->input->line++; ctxt->input->col = 1;
5297
0
  }
5298
25.8k
  if (*in == 0xD) {
5299
725
      in++;
5300
725
      if (*in == 0xA) {
5301
196
    ctxt->input->cur = in;
5302
196
    in++;
5303
196
    ctxt->input->line++; ctxt->input->col = 1;
5304
196
    goto get_more;
5305
196
      }
5306
529
      in--;
5307
529
  }
5308
25.6k
  SHRINK;
5309
25.6k
  GROW;
5310
25.6k
  in = ctxt->input->cur;
5311
25.6k
  if (*in == '-') {
5312
15.7k
      if (in[1] == '-') {
5313
12.6k
          if (in[2] == '>') {
5314
2.57k
        SKIP(3);
5315
2.57k
        if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5316
2.57k
            (!ctxt->disableSAX)) {
5317
2.15k
      if (buf != NULL)
5318
1.12k
          ctxt->sax->comment(ctxt->userData, buf);
5319
1.02k
      else
5320
1.02k
          ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5321
2.15k
        }
5322
2.57k
        if (buf != NULL)
5323
1.28k
            xmlFree(buf);
5324
2.57k
        return;
5325
2.57k
    }
5326
10.0k
    if (buf != NULL) {
5327
7.32k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5328
7.32k
                          "Double hyphen within comment: "
5329
7.32k
                                      "<!--%.50s\n",
5330
7.32k
              buf);
5331
7.32k
    } else
5332
2.74k
        xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5333
2.74k
                          "Double hyphen within comment\n", NULL);
5334
10.0k
    in++;
5335
10.0k
    ctxt->input->col++;
5336
10.0k
      }
5337
13.1k
      in++;
5338
13.1k
      ctxt->input->col++;
5339
13.1k
      goto get_more;
5340
15.7k
  }
5341
25.6k
    } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5342
9.93k
    xmlParseCommentComplex(ctxt, buf, len, size);
5343
9.93k
    return;
5344
12.5k
}
5345
5346
5347
/**
5348
 * xmlParsePITarget:
5349
 * @ctxt:  an XML parser context
5350
 *
5351
 * DEPRECATED: Internal function, don't use.
5352
 *
5353
 * parse the name of a PI
5354
 *
5355
 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5356
 *
5357
 * Returns the PITarget name or NULL
5358
 */
5359
5360
const xmlChar *
5361
23.5k
xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5362
23.5k
    const xmlChar *name;
5363
5364
23.5k
    name = xmlParseName(ctxt);
5365
23.5k
    if ((name != NULL) &&
5366
23.5k
        ((name[0] == 'x') || (name[0] == 'X')) &&
5367
23.5k
        ((name[1] == 'm') || (name[1] == 'M')) &&
5368
23.5k
        ((name[2] == 'l') || (name[2] == 'L'))) {
5369
1.39k
  int i;
5370
1.39k
  if ((name[0] == 'x') && (name[1] == 'm') &&
5371
1.39k
      (name[2] == 'l') && (name[3] == 0)) {
5372
225
      xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5373
225
     "XML declaration allowed only at the start of the document\n");
5374
225
      return(name);
5375
1.17k
  } else if (name[3] == 0) {
5376
690
      xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5377
690
      return(name);
5378
690
  }
5379
1.25k
  for (i = 0;;i++) {
5380
1.25k
      if (xmlW3CPIs[i] == NULL) break;
5381
964
      if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5382
194
          return(name);
5383
964
  }
5384
288
  xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5385
288
          "xmlParsePITarget: invalid name prefix 'xml'\n",
5386
288
          NULL, NULL);
5387
288
    }
5388
22.4k
    if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5389
1.11k
  xmlNsErr(ctxt, XML_NS_ERR_COLON,
5390
1.11k
     "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5391
1.11k
    }
5392
22.4k
    return(name);
5393
23.5k
}
5394
5395
#ifdef LIBXML_CATALOG_ENABLED
5396
/**
5397
 * xmlParseCatalogPI:
5398
 * @ctxt:  an XML parser context
5399
 * @catalog:  the PI value string
5400
 *
5401
 * parse an XML Catalog Processing Instruction.
5402
 *
5403
 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5404
 *
5405
 * Occurs only if allowed by the user and if happening in the Misc
5406
 * part of the document before any doctype information
5407
 * This will add the given catalog to the parsing context in order
5408
 * to be used if there is a resolution need further down in the document
5409
 */
5410
5411
static void
5412
525
xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5413
525
    xmlChar *URL = NULL;
5414
525
    const xmlChar *tmp, *base;
5415
525
    xmlChar marker;
5416
5417
525
    tmp = catalog;
5418
525
    while (IS_BLANK_CH(*tmp)) tmp++;
5419
525
    if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5420
126
  goto error;
5421
399
    tmp += 7;
5422
407
    while (IS_BLANK_CH(*tmp)) tmp++;
5423
399
    if (*tmp != '=') {
5424
62
  return;
5425
62
    }
5426
337
    tmp++;
5427
521
    while (IS_BLANK_CH(*tmp)) tmp++;
5428
337
    marker = *tmp;
5429
337
    if ((marker != '\'') && (marker != '"'))
5430
56
  goto error;
5431
281
    tmp++;
5432
281
    base = tmp;
5433
814
    while ((*tmp != 0) && (*tmp != marker)) tmp++;
5434
281
    if (*tmp == 0)
5435
95
  goto error;
5436
186
    URL = xmlStrndup(base, tmp - base);
5437
186
    tmp++;
5438
500
    while (IS_BLANK_CH(*tmp)) tmp++;
5439
186
    if (*tmp != 0)
5440
55
  goto error;
5441
5442
131
    if (URL != NULL) {
5443
        /*
5444
         * Unfortunately, the catalog API doesn't report OOM errors.
5445
         * xmlGetLastError isn't very helpful since we don't know
5446
         * where the last error came from. We'd have to reset it
5447
         * before this call and restore it afterwards.
5448
         */
5449
131
  ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5450
131
  xmlFree(URL);
5451
131
    }
5452
131
    return;
5453
5454
332
error:
5455
332
    xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5456
332
            "Catalog PI syntax error: %s\n",
5457
332
      catalog, NULL);
5458
332
    if (URL != NULL)
5459
55
  xmlFree(URL);
5460
332
}
5461
#endif
5462
5463
/**
5464
 * xmlParsePI:
5465
 * @ctxt:  an XML parser context
5466